Merge pull request #25635 from hshankar61:master PiperOrigin-RevId: 234876855

commit: a7015d8e1fe268c8a8b73e16b9e7540acc01105c [log] [tgz]
author: TensorFlower Gardener <gardener@tensorflow.org> Wed Feb 20 15:11:41 2019 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> Wed Feb 20 15:11:41 2019 -0800
tree: 84abeeb5d123493fe1b8a75fda28882bc195db54
parent: 95e1847a2e58f8e6174772a8b52f284ea58d01cd [diff]
parent: 9c7866507772f14be3a1bcd07fe1af4bd68682ef [diff]
diff --git a/WORKSPACE b/WORKSPACE
index 957b8d8..9f07b9f 100644
--- a/WORKSPACE
+++ b/WORKSPACE

@@ -29,7 +29,7 @@
 bazel_toolchains_repositories()
 
 load(
-    "@io_bazel_rules_docker//container:container.bzl",
+    "@io_bazel_rules_docker//repositories:repositories.bzl",
     container_repositories = "repositories",
 )
 
@@ -43,29 +43,17 @@
 # Apple and Swift rules.
 http_archive(
     name = "build_bazel_rules_apple",
-    sha256 = "4fe4ee824200b48821730f89ff260984332dc3551db587c24691235d1d96a8a7",
-    strip_prefix = "rules_apple-0.10.0",
-    urls = ["https://github.com/bazelbuild/rules_apple/archive/0.10.0.tar.gz"],
-)
-http_archive(
-    name = "build_bazel_rules_swift",
-    sha256 = "6544ff5615febec0342de1127144d2f3e43ea80fb7f9b1ade65e6a184e39e618",
-    strip_prefix = "rules_swift-0.5.0",
-    urls = ["https://github.com/bazelbuild/rules_swift/archive/0.5.0.tar.gz"],
-)
-http_archive(
-    name = "bazel_skylib",
-    sha256 = "eb5c57e4c12e68c0c20bc774bfbc60a568e800d025557bc4ea022c6479acc867",
-    strip_prefix = "bazel-skylib-0.6.0",
-    urls = ["https://github.com/bazelbuild/bazel-skylib/archive/0.6.0.tar.gz"],
+    sha256 = "73b4980a318d203d3307f850e27e66ec5cc8d223147a3475a6f11597eb6438a5",
+    strip_prefix = "rules_apple-0.13.0",
+    urls = ["https://github.com/bazelbuild/rules_apple/archive/0.13.0.tar.gz"],
 )
 http_file(
     name = "xctestrunner",
     executable = 1,
-    urls = ["https://github.com/google/xctestrunner/releases/download/0.2.5/ios_test_runner.par"],
+    urls = ["https://github.com/google/xctestrunner/releases/download/0.2.6/ios_test_runner.par"],
 )
 load("@build_bazel_rules_apple//apple:repositories.bzl", "apple_rules_dependencies")
-apple_rules_dependencies(ignore_version_differences = True)
+apple_rules_dependencies()
 load("@build_bazel_rules_swift//swift:repositories.bzl", "swift_rules_dependencies")
 swift_rules_dependencies()
 
@@ -134,4 +122,3 @@
         "http://download.tensorflow.org/models/speech_commands_v0.01.zip",
     ],
 )
-

diff --git a/configure.py b/configure.py
index 08226dc..df73413 100644
--- a/configure.py
+++ b/configure.py

@@ -55,6 +55,12 @@
     'lib64/', 'lib/powerpc64le-linux-gnu/', 'lib/x86_64-linux-gnu/', ''
 ]
 
+# List of files to be configured for using Bazel on Apple platforms.
+APPLE_BAZEL_FILES = [
+    'tensorflow/lite/experimental/objc/BUILD',
+    'tensorflow/lite/experimental/swift/BUILD'
+]
+
 if platform.machine() == 'ppc64le':
   _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/powerpc64le-linux-gnu/'
 else:
@@ -256,6 +262,7 @@
   """Reset file that contains customized config settings."""
   open(_TF_BAZELRC, 'w').close()
 
+
 def cleanup_makefile():
   """Delete any leftover BUILD files from the Makefile build.
 
@@ -785,8 +792,7 @@
       environ_cp,
       var_name='GCC_HOST_COMPILER_PATH',
       var_default=default_gcc_host_compiler_path,
-      ask_for_var=
-      'Please specify which gcc should be used by nvcc as the host compiler.',
+      ask_for_var='Please specify which gcc should be used by nvcc as the host compiler.',
       check_success=os.path.exists,
       error_msg='Invalid gcc path. %s cannot be found.',
   )
@@ -1237,6 +1243,7 @@
   environ_cp['TF_NCCL_VERSION'] = tf_nccl_version
   write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version)
 
+
 def get_native_cuda_compute_capabilities(environ_cp):
   """Get native cuda compute capabilities.
 
@@ -1273,13 +1280,15 @@
 
     ask_cuda_compute_capabilities = (
         'Please specify a list of comma-separated '
-        'Cuda compute capabilities you want to '
+        'CUDA compute capabilities you want to '
         'build with.\nYou can find the compute '
         'capability of your device at: '
         'https://developer.nvidia.com/cuda-gpus.\nPlease'
         ' note that each additional compute '
         'capability significantly increases your '
-        'build time and binary size. [Default is: %s]: ' %
+        'build time and binary size, and that '
+        'TensorFlow only supports compute '
+        'capabilities >= 3.5 [Default is: %s]: ' %
         default_cuda_compute_capabilities)
     tf_cuda_compute_capabilities = get_from_env_or_user_or_default(
         environ_cp, 'TF_CUDA_COMPUTE_CAPABILITIES',
@@ -1292,12 +1301,14 @@
     for compute_capability in tf_cuda_compute_capabilities.split(','):
       m = re.match('[0-9]+.[0-9]+', compute_capability)
       if not m:
-        print('Invalid compute capability: ' % compute_capability)
+        print('Invalid compute capability: %s' % compute_capability)
         all_valid = False
       else:
-        ver = int(m.group(0).split('.')[0])
-        if ver < 3:
-          print('Only compute capabilities 3.0 or higher are supported.')
+        ver = float(m.group(0))
+        if ver < 3.5:
+          print('ERROR: TensorFlow only supports CUDA compute capabilities 3.5 '
+                'and higher. Please re-specify the list of compute '
+                'capabilities excluding version %s.' % ver)
           all_valid = False
 
     if all_valid:
@@ -1513,10 +1524,6 @@
   # The host and target platforms are the same in Windows build. So we don't
   # have to distinct them. This avoids building the same targets twice.
   write_to_bazelrc('build --distinct_host_configuration=false')
-  # Enable short object file path to avoid long path issue on Windows.
-  # TODO(pcloudy): Remove this flag when upgrading Bazel to 0.16.0
-  # Short object file path will be enabled by default.
-  write_to_bazelrc('build --experimental_shortened_obj_file_path=true')
 
   if get_var(
       environ_cp, 'TF_OVERRIDE_EIGEN_STRONG_INLINE', 'Eigen strong inline',
@@ -1537,6 +1544,23 @@
   print('\t--config=%-12s\t# %s' % (name, help_text))
 
 
+def configure_apple_bazel_rules():
+  """Configures Bazel rules for building on Apple platforms.
+
+  Enables analyzing and building Apple Bazel rules on Apple platforms. This
+  function will only be executed if `is_macos()` is true.
+  """
+  if not is_macos():
+    return
+  for filepath in APPLE_BAZEL_FILES:
+    print(
+        'Configuring %s file to analyze and build Bazel rules on Apple platforms.'
+        % filepath)
+    existing_filepath = os.path.join(_TF_WORKSPACE_ROOT, filepath + '.apple')
+    renamed_filepath = os.path.join(_TF_WORKSPACE_ROOT, filepath)
+    os.rename(existing_filepath, renamed_filepath)
+
+
 def main():
   global _TF_WORKSPACE_ROOT
   global _TF_BAZELRC
@@ -1577,6 +1601,8 @@
 
   if is_macos():
     environ_cp['TF_NEED_TENSORRT'] = '0'
+  else:
+    environ_cp['TF_CONFIGURE_APPLE_BAZEL_RULES'] = '0'
 
   # The numpy package on ppc64le uses OpenBLAS which has multi-threading
   # issues that lead to incorrect answers.  Set OMP_NUM_THREADS=1 at
@@ -1679,6 +1705,14 @@
     create_android_ndk_rule(environ_cp)
     create_android_sdk_rule(environ_cp)
 
+  if get_var(
+      environ_cp, 'TF_CONFIGURE_APPLE_BAZEL_RULES',
+      'Configure Bazel rules for Apple platforms', False,
+      ('Would you like to configure Bazel rules for building on Apple platforms?'
+      ), 'Configuring Bazel rules for Apple platforms.',
+      'Not configuring Bazel rules for Apple platforms.'):
+    configure_apple_bazel_rules()
+
   print('Preconfigured Bazel build configs. You can use any of the below by '
         'adding "--config=<>" to your build command. See .bazelrc for more '
         'details.')
@@ -1687,8 +1721,9 @@
   config_info_line('gdr', 'Build with GDR support.')
   config_info_line('verbs', 'Build with libverbs support.')
   config_info_line('ngraph', 'Build with Intel nGraph support.')
-  config_info_line('dynamic_kernels',
-                   '(Experimental) Build kernels into separate shared objects.')
+  config_info_line(
+      'dynamic_kernels',
+      '(Experimental) Build kernels into separate shared objects.')
 
   print('Preconfigured Bazel build configs to DISABLE default on features:')
   config_info_line('noaws', 'Disable AWS S3 filesystem support.')

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 0b63ee4..f53982f 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD

@@ -462,8 +462,7 @@
         "//tensorflow:darwin": [],
         "//tensorflow:windows": [],
         "//conditions:default": [
-            "-Wl,--version-script",  #  This line must be directly followed by the version_script.lds file
-            "$(location //tensorflow:tf_framework_version_script.lds)",
+            "-Wl,--version-script,$(location //tensorflow:tf_framework_version_script.lds)",
         ],
     }),
     linkstatic = 1,
@@ -497,15 +496,13 @@
     name = "libtensorflow.so",
     linkopts = select({
         "//tensorflow:darwin": [
-            "-Wl,-exported_symbols_list",  # This line must be directly followed by the exported_symbols.lds file
-            "$(location //tensorflow/c:exported_symbols.lds)",
+            "-Wl,-exported_symbols_list,$(location //tensorflow/c:exported_symbols.lds)",
             "-Wl,-install_name,@rpath/libtensorflow.so",
         ],
         "//tensorflow:windows": [],
         "//conditions:default": [
             "-z defs",
-            "-Wl,--version-script",  #  This line must be directly followed by the version_script.lds file
-            "$(location //tensorflow/c:version_script.lds)",
+            "-Wl,--version-script,$(location //tensorflow/c:version_script.lds)",
         ],
     }),
     visibility = ["//visibility:public"],
@@ -523,14 +520,12 @@
     name = "libtensorflow_cc.so",
     linkopts = select({
         "//tensorflow:darwin": [
-            "-Wl,-exported_symbols_list",  # This line must be directly followed by the exported_symbols.lds file
-            "$(location //tensorflow:tf_exported_symbols.lds)",
+            "-Wl,-exported_symbols_list,$(location //tensorflow:tf_exported_symbols.lds)",
         ],
         "//tensorflow:windows": [],
         "//conditions:default": [
             "-z defs",
-            "-Wl,--version-script",  #  This line must be directly followed by the version_script.lds file
-            "$(location //tensorflow:tf_version_script.lds)",
+            "-Wl,--version-script,$(location //tensorflow:tf_version_script.lds)",
         ],
     }),
     visibility = ["//visibility:public"],

diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index a93799b..ddcacfc 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py

@@ -26,14 +26,28 @@
 
 # API IMPORTS PLACEHOLDER
 
+# Make sure directory containing top level submodules is in
+# the __path__ so that "from tensorflow.foo import bar" works.
+# We're using bitwise, but there's nothing special about that.
+_API_MODULE = bitwise  # pylint: disable=undefined-variable
+_current_module = _sys.modules[__name__]
+_tf_api_dir = _os.path.dirname(_os.path.dirname(_API_MODULE.__file__))
+if not hasattr(_current_module, '__path__'):
+  __path__ = [_tf_api_dir]
+elif _tf_api_dir not in __path__:
+  __path__.append(_tf_api_dir)
+
 # pylint: disable=g-bad-import-order
 from tensorflow.python.tools import component_api_helper as _component_api_helper
 _component_api_helper.package_hook(
     parent_package_str=__name__,
+    child_package_str=('tensorboard.summary._tf.summary'),
+    error_msg="Limited tf.summary API due to missing TensorBoard installation")
+_component_api_helper.package_hook(
+    parent_package_str=__name__,
     child_package_str=(
         'tensorflow_estimator.python.estimator.api._v2.estimator'))
 
-_current_module = _sys.modules[__name__]
 if not hasattr(_current_module, 'estimator'):
   _component_api_helper.package_hook(
       parent_package_str=__name__,
@@ -42,14 +56,6 @@
 _component_api_helper.package_hook(
     parent_package_str=__name__,
     child_package_str=('tensorflow.python.keras.api._v2.keras'))
-# Make sure directory containing top level submodules is in
-# the __path__ so that "from tensorflow.foo import bar" works.
-# We're using bitwise, but there's nothing special about that.
-_tf_api_dir = _os.path.dirname(_os.path.dirname(bitwise.__file__))  # pylint: disable=undefined-variable
-if not hasattr(_current_module, '__path__'):
-  __path__ = [_tf_api_dir]
-elif _tf_api_dir not in __path__:
-  __path__.append(_tf_api_dir)
 
 # Enable TF2 behaviors
 from tensorflow.python.compat import v2_compat as _compat  # pylint: disable=g-import-not-at-top
@@ -111,5 +117,10 @@
 except NameError:
   pass
 
+# Add module aliases
+if hasattr(_current_module, 'keras'):
+  losses = keras.losses
+  metrics = keras.metrics
+  optimizers = keras.optimizers
 
 # pylint: enable=undefined-variable

diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index eeca8f0..5eb25a8 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py

@@ -70,7 +70,7 @@
 
 # Make sure directory containing top level submodules is in
 # the __path__ so that "from tensorflow.foo import bar" works.
-_tf_api_dir = _os.path.dirname(_os.path.dirname(_API_MODULE.__file__))  # pylint: disable=undefined-variable
+_tf_api_dir = _os.path.dirname(_os.path.dirname(_API_MODULE.__file__))
 if not hasattr(_current_module, '__path__'):
   __path__ = [_tf_api_dir]
 elif _tf_api_dir not in __path__:

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 10a2992..ef7863d 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD

@@ -356,6 +356,7 @@
     srcs = ["c_api_function_test.cc"],
     deps = [
         ":c_api",
+        ":c_api_internal",
         ":c_test_util",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",

diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 6d6d026..245d7ba 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc

@@ -641,7 +641,7 @@
                       dimvec.size(), base, size, DeleteArray, base);
 }
 
-Status MessageToBuffer(const tensorflow::protobuf::Message& in,
+Status MessageToBuffer(const tensorflow::protobuf::MessageLite& in,
                        TF_Buffer* out) {
   if (out->data != nullptr) {
     return InvalidArgument("Passing non-empty TF_Buffer is invalid.");
@@ -1311,6 +1311,13 @@
                      reinterpret_cast<const DataType*>(values), num_values));
 }
 
+void TF_SetAttrPlaceholder(TF_OperationDescription* desc, const char* attr_name,
+                           const char* placeholder) {
+  tensorflow::AttrValue attr_value;
+  attr_value.set_placeholder(placeholder);
+  desc->node_builder.Attr(attr_name, attr_value);
+}
+
 void TF_SetAttrFuncName(TF_OperationDescription* desc, const char* attr_name,
                         const char* value, size_t length) {
   tensorflow::NameAttrList func_name;

diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index ac04037..051de3a 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h

@@ -549,6 +549,10 @@
                                               const char* attr_name,
                                               const TF_DataType* values,
                                               int num_values);
+TF_CAPI_EXPORT extern void TF_SetAttrPlaceholder(TF_OperationDescription* desc,
+                                                 const char* attr_name,
+                                                 const char* placeholder);
+
 // Set a 'func' attribute to the specified name.
 // `value` must point to a string of length `length` bytes.
 TF_CAPI_EXPORT extern void TF_SetAttrFuncName(TF_OperationDescription* desc,
@@ -1310,6 +1314,28 @@
     int noutputs, const TF_Output* outputs, const char* const* output_names,
     const TF_FunctionOptions* opts, const char* description, TF_Status* status);
 
+// Similar to TF_GraphToFunction but allows specifying control outputs of the
+// function.
+//
+//  The arguments of TF_GraphToFunction have the same meaning, but the new
+//  arguments are as follows:
+//
+//    ncontrol_outputs: Number of control outputs of the function.
+//    control_outputs: vector of TF_Operation objects to be marked as control
+//      outputs of the function. Operations marked as control outputs are
+//      guaranteed to execute.
+//    control_output_names: Optional. If not nullptr, vector of strings, one
+//      per control output, with their names to be added to the function's
+//      OpDef.
+TF_CAPI_EXPORT extern TF_Function* TF_GraphToFunctionWithControlOutputs(
+    const TF_Graph* fn_body, const char* fn_name,
+    unsigned char append_hash_to_fn_name, int num_opers,
+    const TF_Operation* const* opers, int ninputs, const TF_Output* inputs,
+    int noutputs, const TF_Output* outputs, const char* const* output_names,
+    int ncontrol_outputs, const TF_Operation* const* control_outputs,
+    const char* const* control_output_names, const TF_FunctionOptions* opts,
+    const char* description, TF_Status* status);
+
 // Returns the name of the graph function.
 // The return value points to memory that is only usable until the next
 // mutation to *func.

diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index a8325ce..7ff4084 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc

@@ -9064,11 +9064,6 @@
       tensorflow::strings::StrCat(op_type, "_", trace_ctx->node_counter++);
   auto* desc =
       TF_NewOperation(trace_ctx->graph, op_type.c_str(), op_name.c_str());
-  for (auto* input : op->operation.Inputs()) {
-    auto symbolic_input = getOrCreateSymbolicTensor(trace_ctx, input, status);
-    if (!status->status.ok()) return nullptr;
-    TF_AddInput(desc, symbolic_input);
-  }
 
   VLOG(1) << "Adding attrs.";
   tensorflow::AttrValueMap attrs;
@@ -9077,6 +9072,34 @@
     desc->node_builder.Attr(attr.first, attr.second);
   }
 
+  VLOG(1) << "Adding inputs.";
+  const auto& inputs = op->operation.Inputs();
+  size_t inputIndex = 0;
+  const tensorflow::OpDef& op_def = desc->node_builder.op_def();
+  for (const tensorflow::OpDef::ArgDef& input_arg : op_def.input_arg()) {
+    // TODO(bgogul): Add support for number attributes.
+    DCHECK(input_arg.number_attr().empty())
+        << "Number attributes is not implemented yet.";
+    if (input_arg.type_list_attr().empty()) {
+      auto symbolic_input =
+          getOrCreateSymbolicTensor(trace_ctx, inputs[inputIndex++], status);
+      if (!status->status.ok()) return nullptr;
+      TF_AddInput(desc, symbolic_input);
+      continue;
+    }
+    const std::string& type_list_attr = input_arg.type_list_attr();
+    const auto& attr_value = attrs[type_list_attr];
+    DCHECK(attr_value.value_case() == tensorflow::AttrValue::kList)
+        << "Type list attribute should be a list!";
+    std::vector<TF_Output> list_inputs(attr_value.list().type_size());
+    for (TF_Output& list_input : list_inputs) {
+      list_input =
+          getOrCreateSymbolicTensor(trace_ctx, inputs[inputIndex++], status);
+      if (!status->status.ok()) return nullptr;
+    }
+    TF_AddInputList(desc, list_inputs.data(), list_inputs.size());
+  }
+
   auto* graph_op = TF_FinishOperation(desc, status);
   if (!status->status.ok()) return nullptr;
 

diff --git a/tensorflow/c/c_api_experimental_test.cc b/tensorflow/c/c_api_experimental_test.cc
index 354ee5f..c54021a 100644
--- a/tensorflow/c/c_api_experimental_test.cc
+++ b/tensorflow/c/c_api_experimental_test.cc

@@ -446,5 +446,29 @@
   TFE_DeleteOp(squeeze);
 }
 
+TEST_F(AddEagerOpToGraphTest, ListInputsAreAddedCorrectly) {
+  TFE_TensorHandle* scalar = TestScalarTensorHandle();
+  TFE_Op* identityn = TFE_NewOp(eager_ctx_, "IdentityN", status_);
+  CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+  constexpr size_t kNumInputs = 3;
+  for (size_t i = 0; i < kNumInputs; ++i) {
+    TFE_OpAddInput(identityn, scalar, status_);
+  }
+  TF_DataType types[kNumInputs] = {TF_FLOAT, TF_FLOAT, TF_FLOAT};
+  TFE_OpSetAttrTypeList(identityn, "T", types, kNumInputs);
+  AddEagerOpToGraphAndCheck(
+      identityn, [this, kNumInputs](TF_Operation* graph_op) {
+        EXPECT_EQ(TF_OperationNumInputs(graph_op), kNumInputs);
+        EXPECT_EQ(TF_OperationInputListLength(graph_op, "input", status_),
+                  kNumInputs);
+        CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+        EXPECT_EQ(TF_OperationOutputListLength(graph_op, "output", status_),
+                  kNumInputs);
+        CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+      });
+  TFE_DeleteTensorHandle(scalar);
+  TFE_DeleteOp(identityn);
+}
+
 }  // namespace
 }  // namespace tensorflow

diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index 28b9f8d..03d65ec 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc

@@ -162,6 +162,11 @@
     const std::vector<const Node*>& body_nodes,
     const std::unordered_map<string, string>& tensor_renaming,
     FunctionDef* fdef) {
+  std::unordered_set<string> func_attr_names;
+  for (const auto& func_attr : fdef->signature().attr()) {
+    func_attr_names.insert(func_attr.name());
+  }
+
   std::vector<const Edge*> in_edges;
   std::vector<const Edge*> control_edges;
   for (const Node* node : body_nodes) {
@@ -243,6 +248,48 @@
     if (node->op_def().is_stateful()) {
       fdef->mutable_signature()->set_is_stateful(true);
     }
+
+    // If this node has any attributes with placeholder value, add the
+    // attribute to FunctionDef signature.
+    for (const auto& iter : node->attrs()) {
+      if (iter.second.placeholder().empty()) {
+        continue;
+      }
+
+      // If we already added the attribute, skip it.
+      string func_attr_name = iter.second.placeholder();
+      if (func_attr_names.find(func_attr_name) != func_attr_names.end()) {
+        continue;
+      }
+
+      // This node's attribute is a placeholder value, so it does not have type
+      // information. We check node's OpDef for attribute type.
+      string node_attr_name = iter.first;
+      const OpDef::AttrDef* node_attr_def = nullptr;
+      for (const auto& node_attr : node->op_def().attr()) {
+        if (node_attr.name() == node_attr_name) {
+          node_attr_def = &node_attr;
+        }
+      }
+      if (!node_attr_def) {
+#ifdef TENSORFLOW_LITE_PROTOS
+        return errors::Unimplemented(
+            "Placeholder value is not supported for attributes not in OpDef. "
+            "Attribute: ",
+            node_attr_name);
+#else
+        return errors::Unimplemented(
+            "Placeholder value is not supported for attributes not in OpDef. "
+            "Attribute: ",
+            node_attr_name, ", OpDef: ", node->op_def().DebugString());
+#endif
+      }
+      OpDef::AttrDef* attr_def = fdef->mutable_signature()->add_attr();
+      attr_def->set_name(func_attr_name);
+      attr_def->set_type(node_attr_def->type());
+
+      func_attr_names.insert(func_attr_name);
+    }
   }
   return Status::OK();
 }
@@ -255,6 +302,8 @@
                           const std::vector<OutputTensor>& inputs,
                           const std::vector<OutputTensor>& outputs,
                           const std::vector<string>& output_names,
+                          const std::vector<const Node*>& control_outputs,
+                          const std::vector<string>& control_output_names,
                           const char* description, FunctionDef* fdef) {
   if (!output_names.empty()) {
     DCHECK_EQ(output_names.size(), outputs.size());
@@ -378,6 +427,29 @@
     fdef->mutable_signature()->set_name(fn_name);
   }
 
+  if (!control_output_names.empty() &&
+      (control_outputs.size() != control_output_names.size())) {
+    return InvalidArgument(
+        "Expected number of control outputs (", control_outputs.size(),
+        ") and the number of control output names (",
+        control_output_names.size(), ") to match but they do not.");
+  }
+  std::unordered_set<string> control_output_names_set;
+  for (int i = 0; i < control_outputs.size(); ++i) {
+    string signature_name;
+    if (!control_output_names.empty()) {
+      signature_name = control_output_names[i];
+    } else {
+      signature_name = control_outputs[i]->name();
+    }
+    if (!control_output_names_set.insert(signature_name).second) {
+      return errors::InvalidArgument("Repeated control output name: ",
+                                     signature_name);
+    }
+    fdef->mutable_signature()->add_control_output(signature_name);
+    (*fdef->mutable_control_ret())[signature_name] = control_outputs[i]->name();
+  }
+
   return Status::OK();
 }
 
@@ -485,14 +557,14 @@
 using tensorflow::Node;
 using tensorflow::string;
 
-TF_Function* TF_GraphToFunction(const TF_Graph* fn_body, const char* fn_name,
-                                unsigned char append_hash_to_fn_name,
-                                int num_opers, const TF_Operation* const* opers,
-                                int ninputs, const TF_Output* inputs,
-                                int noutputs, const TF_Output* outputs,
-                                const char* const* output_names,
-                                const TF_FunctionOptions* opts,
-                                const char* description, TF_Status* status) {
+TF_Function* TF_GraphToFunctionWithControlOutputs(
+    const TF_Graph* fn_body, const char* fn_name,
+    unsigned char append_hash_to_fn_name, int num_opers,
+    const TF_Operation* const* opers, int ninputs, const TF_Output* inputs,
+    int noutputs, const TF_Output* outputs, const char* const* output_names,
+    int ncontrol_outputs, const TF_Operation* const* control_outputs,
+    const char* const* control_output_names, const TF_FunctionOptions* opts,
+    const char* description, TF_Status* status) {
   tensorflow::mutex_lock l(*const_cast<tensorflow::mutex*>(&fn_body->mu));
 
   // Process inputs.
@@ -517,19 +589,34 @@
     }
   }
 
+  // Process control output names.
+  std::vector<string> control_output_names_vec;
+  if (control_output_names) {
+    control_output_names_vec.reserve(ncontrol_outputs);
+    for (int i = 0; i < ncontrol_outputs; ++i) {
+      control_output_names_vec.push_back(string(output_names[i]));
+    }
+  }
+
   // Compute body nodes.
   std::vector<const Node*> body_nodes;
   status->status = tensorflow::ComputeBodyNodes(
       fn_body, fn_name, num_opers, opers, input_nodes, &body_nodes);
   if (!status->status.ok()) return nullptr;
 
+  // Compute body nodes.
+  std::vector<const Node*> control_output_nodes;
+  for (int i = 0; i < ncontrol_outputs; ++i) {
+    control_output_nodes.push_back(&control_outputs[i]->node);
+  }
+
   // Do the actual function creation.
   TF_Function* tf_function = new TF_Function();
   DCHECK(append_hash_to_fn_name <= 1);
   status->status = tensorflow::GraphToFunctionDef(
       fn_body->graph, fn_name, append_hash_to_fn_name != 0, body_nodes,
-      input_tensors, output_tensors, output_names_vec, description,
-      &tf_function->fdef);
+      input_tensors, output_tensors, output_names_vec, control_output_nodes,
+      control_output_names_vec, description, &tf_function->fdef);
   if (!status->status.ok()) {
     TF_DeleteFunction(tf_function);
     return nullptr;
@@ -537,6 +624,20 @@
   return tf_function;
 }
 
+TF_Function* TF_GraphToFunction(const TF_Graph* fn_body, const char* fn_name,
+                                unsigned char append_hash_to_fn_name,
+                                int num_opers, const TF_Operation* const* opers,
+                                int ninputs, const TF_Output* inputs,
+                                int noutputs, const TF_Output* outputs,
+                                const char* const* output_names,
+                                const TF_FunctionOptions* opts,
+                                const char* description, TF_Status* status) {
+  return TF_GraphToFunctionWithControlOutputs(
+      fn_body, fn_name, append_hash_to_fn_name, num_opers, opers, ninputs,
+      inputs, noutputs, outputs, output_names, 0, nullptr, nullptr, opts,
+      description, status);
+}
+
 const char* TF_FunctionName(TF_Function* func) {
   return func->fdef.signature().name().c_str();
 }

diff --git a/tensorflow/c/c_api_function_test.cc b/tensorflow/c/c_api_function_test.cc
index 73fe737..946f8c4 100644
--- a/tensorflow/c/c_api_function_test.cc
+++ b/tensorflow/c/c_api_function_test.cc

@@ -15,6 +15,7 @@
 
 #include "tensorflow/c/c_api.h"
 
+#include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/c/c_test_util.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/op_def.pb.h"
@@ -1230,6 +1231,53 @@
   ASSERT_NE(*func, nullptr);
 }
 
+REGISTER_OP("CustomOp")
+    .Output("output: float32")
+    .Attr("index: int")
+    .SetShapeFn(tensorflow::shape_inference::UnknownShape);
+
+void NodeWithPlaceholderAttrHelper(TF_Graph* graph, TF_Status* s,
+                                   const char* name, const char* placeholder,
+                                   TF_Operation** op) {
+  TF_OperationDescription* desc = TF_NewOperation(graph, "CustomOp", name);
+  TF_SetAttrPlaceholder(desc, "index", placeholder);
+  *op = TF_FinishOperation(desc, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  ASSERT_NE(*op, nullptr);
+}
+
+TEST_F(CApiFunctionTest, GraphToFunctionDefWithPlaceholderAttr) {
+  std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)> func_graph(
+      TF_NewGraph(), TF_DeleteGraph);
+  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> s(TF_NewStatus(),
+                                                           TF_DeleteStatus);
+
+  TF_Operation *node1, *node2, *node3;
+  NodeWithPlaceholderAttrHelper(func_graph.get(), s.get(), "node1", "v1",
+                                &node1);
+  NodeWithPlaceholderAttrHelper(func_graph.get(), s.get(), "node2", "v1",
+                                &node2);
+  NodeWithPlaceholderAttrHelper(func_graph.get(), s.get(), "node3", "v2",
+                                &node3);
+
+  TF_Output inputs[] = {};
+  TF_Output outputs[] = {{node1, 0}, {node2, 0}, {node3, 0}};
+  func_ = TF_GraphToFunction(
+      func_graph.get(), "func", /*append_hash_to_fn_name=*/false, -1,
+      /*opers=*/nullptr, 0, inputs, 3, outputs,
+      /*output_names=*/nullptr,
+      /*opts=*/nullptr, /*description=*/nullptr, s.get());
+  ASSERT_EQ(TF_OK, TF_GetCode(s.get())) << TF_Message(s.get());
+  ASSERT_NE(func_, nullptr);
+
+  // Verify that FunctionDef has 2 attributes, "v1" and "v2".
+  ASSERT_EQ(func_->fdef.signature().attr().size(), 2);
+  EXPECT_EQ(func_->fdef.signature().attr(0).name(), "v1");
+  EXPECT_EQ(func_->fdef.signature().attr(0).type(), "int");
+  EXPECT_EQ(func_->fdef.signature().attr(1).name(), "v2");
+  EXPECT_EQ(func_->fdef.signature().attr(1).type(), "int");
+}
+
 TEST_F(CApiFunctionTest, SetGradientAndRun) {
   // Define the function and its grad
   DefineFunction(func_name_, &func_);

diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h
index 73283d7..d520b6b 100644
--- a/tensorflow/c/c_api_internal.h
+++ b/tensorflow/c/c_api_internal.h

@@ -204,7 +204,8 @@
 
 TF_Tensor* TF_TensorFromTensor(const Tensor& src, TF_Status* status);
 
-Status MessageToBuffer(const tensorflow::protobuf::Message& in, TF_Buffer* out);
+Status MessageToBuffer(const tensorflow::protobuf::MessageLite& in,
+                       TF_Buffer* out);
 
 // Set the shapes and types of the output's handle.
 //

diff --git a/tensorflow/c/c_test.c b/tensorflow/c/c_test.c
index b86d8eb..7468122 100644
--- a/tensorflow/c/c_test.c
+++ b/tensorflow/c/c_test.c

@@ -25,6 +25,16 @@
 #include "tensorflow/c/env.h"
 #include "tensorflow/c/kernels.h"
 
+// A create function. This will never actually get called in this test, it's
+// just nice to know that it compiles.
+void* create(TF_OpKernelConstruction* ctx) {
+  TF_DataType type;
+  TF_Status* s = TF_NewStatus();
+  TF_OpKernelConstruction_GetAttrType(ctx, "foobar", &type, s);
+  TF_DeleteStatus(s);
+  return NULL;
+}
+
 // A compute function. This will never actually get called in this test, it's
 // just nice to know that it compiles.
 void compute(void* kernel, TF_OpKernelContext* ctx) {
@@ -32,12 +42,7 @@
   TF_Status* s = TF_NewStatus();
   TF_GetInput(ctx, 0, &input, s);
   TF_DeleteTensor(input);
-
-  TF_DataType type;
-  TF_OpKernelContext_GetAttrType(ctx, "foobar", &type, s);
-
   TF_DeleteStatus(s);
-
 }
 
 // Exercises tensorflow's C API.
@@ -80,7 +85,7 @@
   TF_StringStreamDone(s);
 
   TF_KernelBuilder* b =
-      TF_NewKernelBuilder("SomeOp", "SomeDevice", NULL, &compute, NULL);
+      TF_NewKernelBuilder("SomeOp", "SomeDevice", &create, &compute, NULL);
   TF_RegisterKernelBuilder("someKernel", b, status);
 
   TF_DeleteStatus(status);

diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index 257be63..c1fd41b 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD

@@ -230,7 +230,6 @@
         ":c_api_test_util",
         "//tensorflow/c:c_test_util",
         "//tensorflow/cc/profiler",
-        "//tensorflow/contrib/tpu/profiler:trace_events_proto_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",

diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index af13f48..45701c7 100755
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc

@@ -762,11 +762,13 @@
 }
 
 void TFE_ContextEnableRunMetadata(TFE_Context* ctx) {
-  ctx->context.SetShouldStoreMetadata(true);
+  ctx->context.SetShouldStoreGraphs(true);
+  ctx->context.SetShouldStoreStepStats(true);
 }
 
 void TFE_ContextDisableRunMetadata(TFE_Context* ctx) {
-  ctx->context.SetShouldStoreMetadata(false);
+  ctx->context.SetShouldStoreGraphs(false);
+  ctx->context.SetShouldStoreStepStats(false);
 }
 
 }  // extern "C"

diff --git a/tensorflow/c/eager/c_api_experimental.cc b/tensorflow/c/eager/c_api_experimental.cc
index dab1750..af7f1bb 100644
--- a/tensorflow/c/eager/c_api_experimental.cc
+++ b/tensorflow/c/eager/c_api_experimental.cc

@@ -25,10 +25,14 @@
   op->operation.ConsumeInput(h->handle);
 }
 
-TFE_Profiler* TFE_NewProfiler(TFE_Context* ctx) {
+TFE_Profiler* TFE_NewProfiler(TFE_ProfilerContext* ctx) {
   return new TFE_Profiler(ctx);
 }
 
+bool TFE_ProfilerIsOk(TFE_Profiler* profiler) {
+  return profiler->profiler->Status().ok();
+}
+
 void TFE_DeleteProfiler(TFE_Profiler* profiler) { delete profiler; }
 
 void TFE_ProfilerSerializeToString(TFE_Context* ctx, TFE_Profiler* profiler,
@@ -46,7 +50,29 @@
   };
 }
 
-void TFE_StartProfilerServer(TFE_Context* ctx, int port) {
-  auto server_thread = tensorflow::StartProfilerServer(&ctx->context, port);
-  ctx->context.AddChildThread(std::move(server_thread));
+TFE_ProfilerContext* TFE_NewProfilerContext() {
+  return new TFE_ProfilerContext;
+}
+
+void TFE_ProfilerContextSetEagerContext(TFE_ProfilerContext* profiler_context,
+                                        TFE_Context* eager_context) {
+  profiler_context->profiler_context.eager_context = &eager_context->context;
+}
+
+void TFE_DeleteProfilerContext(TFE_ProfilerContext* profiler_context) {
+  delete profiler_context;
+}
+
+void TFE_StartProfilerServer(TFE_ProfilerContext* context, int port) {
+  // Release child thread intentionally. The child thread can be terminate by
+  // terminating the main thread.
+  tensorflow::StartProfilerServer(&context->profiler_context, port).release();
+}
+
+void TFE_ContextEnableGraphCollection(TFE_Context* ctx) {
+  ctx->context.SetShouldStoreGraphs(true);
+}
+
+void TFE_ContextDisableGraphCollection(TFE_Context* ctx) {
+  ctx->context.SetShouldStoreGraphs(false);
 }

diff --git a/tensorflow/c/eager/c_api_experimental.h b/tensorflow/c/eager/c_api_experimental.h
index 8c85d0e..eb57077 100644
--- a/tensorflow/c/eager/c_api_experimental.h
+++ b/tensorflow/c/eager/c_api_experimental.h

@@ -25,6 +25,8 @@
 TF_CAPI_EXPORT extern void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h,
                                               TF_Status* status);
 
+typedef struct TFE_ProfilerContext TFE_ProfilerContext;
+
 // A profiler which will start profiling when creating the object and will stop
 // when the object is destroyed. It will profile all operations run under the
 // given TFE_Context. Multiple instance of it can be created, but at most one
@@ -32,7 +34,8 @@
 // Thread-safety: TFE_Profiler is thread-safe.
 typedef struct TFE_Profiler TFE_Profiler;
 
-TF_CAPI_EXPORT extern TFE_Profiler* TFE_NewProfiler(TFE_Context* ctx);
+TF_CAPI_EXPORT extern TFE_Profiler* TFE_NewProfiler(TFE_ProfilerContext* ctx);
+TF_CAPI_EXPORT extern bool TFE_ProfilerIsOk(TFE_Profiler* profiler);
 TF_CAPI_EXPORT extern void TFE_DeleteProfiler(TFE_Profiler* profiler);
 
 // The output string is a binary string of tensorflow.tpu.Trace. User can write
@@ -42,14 +45,35 @@
                                                          TF_Buffer* buf,
                                                          TF_Status* status);
 
+// Return a new profiler context object.
+TF_CAPI_EXPORT extern TFE_ProfilerContext* TFE_NewProfilerContext(void);
+
+// Set the eager context in TFE_ProfilerServerOptions
+TF_CAPI_EXPORT extern void TFE_ProfilerContextSetEagerContext(
+    TFE_ProfilerContext* profiler_context, TFE_Context* eager_context);
+
+// Destroy a profiler context object.
+TF_CAPI_EXPORT extern void TFE_DeleteProfilerContext(
+    TFE_ProfilerContext* profiler_context);
+
 // Start a profiler grpc server which listens to specified port. It will start
-// the server on its own thread. It can be shutdown by destructing TFE_Context.
-// Creating multiple profiler server is allowed. The service defined in
+// the server on its own thread. It can be shutdown by terminating tensorflow.
+// It can be used in both Eager mode and graph mode. Creating multiple profiler
+// server is allowed. The service defined in
 // tensorflow/contrib/tpu/profiler/tpu_profiler.proto. Please use
 // tensorflow/contrib/tpu/profiler/capture_tpu_profile to capture tracable
 // file following
 // https://cloud.google.com/tpu/docs/cloud-tpu-tools#capture_trace.
-TF_CAPI_EXPORT extern void TFE_StartProfilerServer(TFE_Context* ctx, int port);
+TF_CAPI_EXPORT extern void TFE_StartProfilerServer(TFE_ProfilerContext* context,
+                                                   int port);
+
+// Enables only graph collection in RunMetadata on the functions executed from
+// this context.
+TF_CAPI_EXPORT extern void TFE_ContextEnableGraphCollection(TFE_Context* ctx);
+
+// Disables only graph collection in RunMetadata on the functions executed from
+// this context.
+TF_CAPI_EXPORT extern void TFE_ContextDisableGraphCollection(TFE_Context* ctx);
 
 #ifdef __cplusplus
 } /* end extern "C" */

diff --git a/tensorflow/c/eager/c_api_experimental_test.cc b/tensorflow/c/eager/c_api_experimental_test.cc
index af55fee..d85048c 100644
--- a/tensorflow/c/eager/c_api_experimental_test.cc
+++ b/tensorflow/c/eager/c_api_experimental_test.cc

@@ -18,12 +18,12 @@
 #include <string.h>
 #include "tensorflow/c/eager/c_api_test_util.h"
 #include "tensorflow/cc/profiler/profiler.h"
-#include "tensorflow/contrib/tpu/profiler/trace_events.pb.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
+#include "tensorflow/core/profiler/trace_events.pb.h"
 
 using tensorflow::string;
 
@@ -41,9 +41,12 @@
   TFE_ContextOptions* opts = TFE_NewContextOptions();
   TFE_ContextOptionsSetAsync(opts, static_cast<unsigned char>(async));
   TFE_Context* ctx = TFE_NewContext(opts, status);
-  TFE_Profiler* profiler = TFE_NewProfiler(ctx);
+  TFE_ProfilerContext* profiler_context = TFE_NewProfilerContext();
+  TFE_ProfilerContextSetEagerContext(profiler_context, ctx);
+  TFE_Profiler* profiler = TFE_NewProfiler(profiler_context);
   CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TFE_DeleteContextOptions(opts);
+  TFE_DeleteProfilerContext(profiler_context);
 
   TFE_TensorHandle* m = TestMatrixTensorHandle();
   TFE_Op* matmul = MatMulOp(ctx, m, m);
@@ -70,7 +73,7 @@
   TFE_ProfilerSerializeToString(ctx, profiler, profiler_result, status);
   TFE_DeleteProfiler(profiler);
   ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-  tensorflow::tpu::Trace profile_proto;
+  profiler::Trace profile_proto;
   EXPECT_TRUE(profile_proto.ParseFromString(
       {reinterpret_cast<const char*>(profiler_result->data),
        profiler_result->length}));
@@ -100,5 +103,27 @@
 TEST(CAPI, ExecuteWithTracing) { ExecuteWithProfiling(false); }
 TEST(CAPI, ExecuteWithTracingAsync) { ExecuteWithProfiling(true); }
 
+TEST(CAPI, MultipleProfilerSession) {
+  TF_Status* status = TF_NewStatus();
+  TFE_ContextOptions* opts = TFE_NewContextOptions();
+  TFE_ContextOptionsSetAsync(opts, static_cast<unsigned char>(false));
+  TFE_Context* ctx = TFE_NewContext(opts, status);
+  CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteContextOptions(opts);
+
+  TFE_ProfilerContext* profiler_context = TFE_NewProfilerContext();
+  TFE_ProfilerContextSetEagerContext(profiler_context, ctx);
+
+  TFE_Profiler* profiler1 = TFE_NewProfiler(profiler_context);
+  EXPECT_TRUE(TFE_ProfilerIsOk(profiler1));
+
+  TFE_Profiler* profiler2 = TFE_NewProfiler(profiler_context);
+  EXPECT_FALSE(TFE_ProfilerIsOk(profiler2));
+
+  TFE_DeleteProfiler(profiler1);
+  TFE_DeleteProfiler(profiler2);
+  TFE_DeleteProfilerContext(profiler_context);
+}
+
 }  // namespace
 }  // namespace tensorflow

diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h
index b70c0f1..a563e4b 100644
--- a/tensorflow/c/eager/c_api_internal.h
+++ b/tensorflow/c/eager/c_api_internal.h

@@ -107,9 +107,14 @@
   tensorflow::EagerOperation operation;
 };
 
+struct TFE_ProfilerContext {
+  tensorflow::ProfilerContext profiler_context;
+};
+
 struct TFE_Profiler {
-  TFE_Profiler(TFE_Context* ctx)
-      : profiler(tensorflow::ProfilerSession::Create(&ctx->context)) {}
+  TFE_Profiler(TFE_ProfilerContext* ctx) {
+    profiler = tensorflow::ProfilerSession::Create(&ctx->profiler_context);
+  }
 
   std::unique_ptr<tensorflow::ProfilerSession> profiler;
 };

diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc
index 9505bf9..71181ae 100644
--- a/tensorflow/c/kernels.cc
+++ b/tensorflow/c/kernels.cc

@@ -173,9 +173,10 @@
   cc_ctx->CtxFailure(s);
 }
 
-#define DEFINE_TF_GETATTR_(struct_name, func, c_type, cc_type)                 \
-  void struct_name##_GetAttr##func(struct_name* ctx, const char* attr_name,    \
-                                   c_type* val, TF_Status* status) {           \
+#define DEFINE_TF_GETATTR(func, c_type, cc_type)                               \
+  void TF_OpKernelConstruction_GetAttr##func(TF_OpKernelConstruction* ctx,     \
+                                             const char* attr_name,            \
+                                             c_type* val, TF_Status* status) { \
     TF_SetStatus(status, TF_OK, "");                                           \
     cc_type v;                                                                 \
     auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelConstruction*>(ctx); \
@@ -186,10 +187,6 @@
     }                                                                          \
   }
 
-#define DEFINE_TF_GETATTR(func, c_type, cc_type)                     \
-  DEFINE_TF_GETATTR_(TF_OpKernelConstruction, func, c_type, cc_type) \
-  DEFINE_TF_GETATTR_(TF_OpKernelContext, func, c_type, cc_type)
-
 DEFINE_TF_GETATTR(Type, TF_DataType, tensorflow::DataType)
 
 TF_DataType TF_ExpectedOutputDataType(TF_OpKernelContext* ctx, int i) {

diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h
index b015d01..c47bfa8 100644
--- a/tensorflow/c/kernels.h
+++ b/tensorflow/c/kernels.h

@@ -137,15 +137,6 @@
     TF_OpKernelConstruction* ctx, const char* attr_name, TF_DataType* val,
     TF_Status* status);
 
-// Interprets the named kernel context attribute as a TF_DataType and places it
-// into *val. *status is set to TF_OK.
-//
-// If the attribute could not be found or could not be interpreted as
-// TF_DataType, *status is populated with an error.
-TF_CAPI_EXPORT extern void TF_OpKernelContext_GetAttrType(
-    TF_OpKernelContext* ctx, const char* attr_name, TF_DataType* val,
-    TF_Status* status);
-
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif

diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD
deleted file mode 100644
index e4ce218..0000000
--- a/tensorflow/c/kernels/BUILD
+++ /dev/null

@@ -1,35 +0,0 @@
-load(
-    "//tensorflow:tensorflow.bzl",
-    "tf_cc_test",
-    "tf_cuda_library",
-)
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])  # Apache 2.0
-
-tf_cuda_library(
-    name = "bitcast_op",
-    srcs = ["bitcast_op.cc"],
-    deps = [
-        "//tensorflow/c:kernels",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:ops",
-        "@com_google_absl//absl/strings:str_format",
-    ],
-    alwayslink = 1,
-)
-
-tf_cc_test(
-    name = "bitcast_op_test",
-    srcs = ["bitcast_op_test.cc"],
-    deps = [
-        ":bitcast_op",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-    ],
-)

diff --git a/tensorflow/c/kernels/bitcast_op.cc b/tensorflow/c/kernels/bitcast_op.cc
deleted file mode 100644
index 90f32c9..0000000
--- a/tensorflow/c/kernels/bitcast_op.cc
+++ /dev/null

@@ -1,165 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "absl/strings/str_format.h"
-#include "tensorflow/c/kernels.h"
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/framework/types.h"
-
-// BitcastOp implements a bitcast kernel, creating an output tensor that shares
-// the same data buffer as the input but with a different shape and/or data
-// type. Its inputs are:
-//
-//   * the input tensor
-//   * an attribute named "T" containing the TF_DataType of the input tensor
-//   * an attribute named "type" containing the TF_DataType of the output tensor
-//
-// Given an input tensor of shape [...], if the input DataType "T" is larger
-// than the output DataType "type", then the shape changes from [...]
-// to [..., sizeof(T)/sizeof(type)].
-//
-// If "T" is smaller than "type", the operator requires that the rightmost
-// dimension be equal to sizeof(type)/sizeof(T). The shape then goes from
-// [..., sizeof(type)/sizeof(T)] to [...].
-//
-// Bitcast is implemented as a low-level cast, so machines with different endian
-// orderings will give different results.
-typedef struct BitcastOp {
-  TF_DataType input_data_type;
-  TF_DataType output_data_type;
-  size_t in_size;
-  size_t out_size;
-} BitcastOp;
-
-void* BitcastOp_Create(TF_OpKernelConstruction* ctx) {
-  auto* kernel = new BitcastOp;
-
-  TF_Status* s = TF_NewStatus();
-  TF_OpKernelConstruction_GetAttrType(ctx, "T", &kernel->input_data_type, s);
-
-  if (TF_GetCode(s) == TF_OK) {
-    TF_OpKernelConstruction_GetAttrType(ctx, "type", &kernel->output_data_type,
-                                        s);
-  }
-
-  if (TF_GetCode(s) == TF_OK) {
-    kernel->in_size = TF_DataTypeSize(kernel->input_data_type);
-    kernel->out_size = TF_DataTypeSize(kernel->output_data_type);
-
-    size_t check_size = std::max(kernel->in_size, kernel->out_size) %
-                        std::min(kernel->in_size, kernel->out_size);
-    if (check_size != 0) {
-      std::string err =
-          absl::StrFormat("cannot convert between datatype %d and %d",
-                          kernel->input_data_type, kernel->output_data_type);
-      TF_SetStatus(s, TF_INVALID_ARGUMENT, err.c_str());
-    }
-  }
-
-  if (TF_GetCode(s) != TF_OK) {
-    TF_OpKernelConstruction_Failure(ctx, s);
-    delete kernel;
-    kernel = nullptr;
-  }
-
-  TF_DeleteStatus(s);
-  return kernel;
-}
-
-void BitcastOp_Delete(void* kernel) { delete static_cast<BitcastOp*>(kernel); }
-
-void BitcastOp_Compute(void* kernel, TF_OpKernelContext* ctx) {
-  auto* k = static_cast<BitcastOp*>(kernel);
-  int dim_count = 0;
-
-  TF_Tensor* tensor;
-  TF_Status* status = TF_NewStatus();
-  TF_GetInput(ctx, 0, &tensor, status);
-  if (TF_GetCode(status) == TF_OK) {
-    dim_count = TF_NumDims(tensor);
-    if (!(k->in_size >= k->out_size ||
-          (dim_count > 0 &&
-           TF_Dim(tensor, dim_count - 1) == k->out_size / k->in_size))) {
-      std::string err =
-          absl::StrFormat("Cannot bitcast from %d to %d", k->input_data_type,
-                          k->output_data_type);
-      TF_SetStatus(status, TF_INVALID_ARGUMENT, err.c_str());
-    }
-  }
-
-  if (TF_GetCode(status) == TF_OK) {
-    auto* dims = new int64_t[dim_count + 1];
-    int new_dim_count = dim_count;
-    for (int dim = 0; dim < dim_count; ++dim) {
-      dims[dim] = TF_Dim(tensor, dim);
-    }
-    if (k->out_size < k->in_size) {
-      dims[new_dim_count++] = static_cast<int64_t>(k->in_size / k->out_size);
-    } else if (k->out_size > k->in_size) {
-      --new_dim_count;
-    }
-
-    TF_Tensor* output = TF_AllocateTensor(k->output_data_type, dims, 0,
-                                          TF_DataTypeSize(k->output_data_type));
-    TF_TensorBitcastFrom(tensor, k->output_data_type, output, dims,
-                         new_dim_count, status);
-    if (TF_GetCode(status) == TF_OK) {
-      TF_SetOutput(ctx, 0, output, status);
-    }
-    delete[] dims;
-    TF_DeleteTensor(output);
-  }
-
-  if (TF_GetCode(status) != TF_OK) {
-    TF_OpKernelContext_Failure(ctx, status);
-  }
-  TF_DeleteStatus(status);
-  TF_DeleteTensor(tensor);
-}
-
-void RegisterBitcastOp() {
-  TF_Status* status = TF_NewStatus();
-
-  {
-    auto* builder = TF_NewKernelBuilder("Bitcast", tensorflow::DEVICE_CPU,
-                                        &BitcastOp_Create, &BitcastOp_Compute,
-                                        &BitcastOp_Delete);
-    TF_RegisterKernelBuilder("BitcastOp", builder, status);
-    CHECK_EQ(TF_OK, TF_GetCode(status))
-        << "Error while registering bitcast kernel";
-  }
-
-#if GOOGLE_CUDA
-  {
-    auto* builder = TF_NewKernelBuilder("Bitcast", tensorflow::DEVICE_GPU,
-                                        &BitcastOp_Create, &BitcastOp_Compute,
-                                        &BitcastOp_Delete);
-    TF_RegisterKernelBuilder("BitcastOp", builder, status);
-    CHECK_EQ(TF_OK, TF_GetCode(status))
-        << "Error while registering CUDA bitcast kernel";
-  }
-#endif
-
-  TF_DeleteStatus(status);
-}
-
-// A dummy static variable initialized by a lambda whose side-effect is to
-// register the bitcast kernel.
-static bool BitcastOpIsRegistered = []() {
-  RegisterBitcastOp();
-  return true;
-}();

diff --git a/tensorflow/c/kernels/bitcast_op_test.cc b/tensorflow/c/kernels/bitcast_op_test.cc
deleted file mode 100644
index 06ffcca..0000000
--- a/tensorflow/c/kernels/bitcast_op_test.cc
+++ /dev/null

@@ -1,101 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/attr_value.pb.h"
-#include "tensorflow/core/framework/attr_value_util.h"
-#include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace {
-
-class DummyDevice : public DeviceBase {
- public:
-  DummyDevice(Env* env, bool save) : DeviceBase(env), save_(save) {}
-  bool RequiresRecordingAccessedTensors() const override { return save_; }
-  Allocator* GetAllocator(AllocatorAttributes /*attr*/) override {
-    return cpu_allocator();
-  }
-
- private:
-  bool save_;
-};
-
-void TestBitcastOp(Tensor* input_tensor, DataType out_type,
-                   TensorShape expected_shape, error::Code expected_code) {
-  Status status;
-  NodeDef def;
-  def.set_op("Bitcast");
-  def.set_device(DEVICE_CPU);
-
-  AttrValue typeAttr;
-  SetAttrValue(input_tensor->dtype(), &typeAttr);
-
-  AttrValue outTypeAttr;
-  SetAttrValue(out_type, &outTypeAttr);
-
-  (*def.mutable_attr())["T"] = typeAttr;
-  (*def.mutable_attr())["type"] = outTypeAttr;
-
-  def.add_input(
-      strings::StrCat("input1: ", DataTypeString(input_tensor->dtype())));
-
-  std::unique_ptr<OpKernel> kernel =
-      CreateOpKernel(DeviceType(DEVICE_CPU), nullptr, nullptr, def, 1, &status);
-  ASSERT_TRUE(status.ok()) << status.ToString();
-
-  OpKernelContext::Params params;
-  DummyDevice dummy_device(nullptr, false);
-  params.device = &dummy_device;
-  params.op_kernel = kernel.get();
-  gtl::InlinedVector<TensorValue, 4> inputs;
-  inputs.emplace_back(input_tensor);
-  params.inputs = &inputs;
-
-  OpKernelContext ctx(&params);
-  kernel->Compute(&ctx);
-  ASSERT_EQ(expected_code, ctx.status().code());
-  if (expected_code == error::OK) {
-    ASSERT_EQ(expected_shape, ctx.mutable_output(0)->shape())
-        << ctx.mutable_output(0)->shape().DebugString();
-  }
-}
-
-TEST(BitcastOpTest, TestUpcast) {
-  Tensor int8_input(DT_UINT8, {8});
-  for (int i = 0; i < 8; i++) {
-    int8_input.vec<uint8>()(i) = static_cast<uint8>(1);
-  }
-  TestBitcastOp(&int8_input, DT_UINT64, TensorShape(), error::OK);
-}
-
-TEST(BitcastOpTest, TestDowncast) {
-  Tensor int64_input(static_cast<uint64>(1));
-  TestBitcastOp(&int64_input, DT_UINT8, TensorShape({8}), error::OK);
-}
-
-TEST(BitcastOpTest, TestCastToSameSize) {
-  Tensor int32_input(DT_UINT32, {4, 6});
-  TestBitcastOp(&int32_input, DT_UINT8, TensorShape({4, 6, 4}), error::OK);
-}
-
-TEST(BitcastOpTest, TestImpossibleCast) {
-  Tensor int8_input(DT_UINT8, {1});
-  TestBitcastOp(&int8_input, DT_UINT32, TensorShape(), error::INVALID_ARGUMENT);
-}
-
-}  // namespace
-}  // namespace tensorflow

diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc
index 0d29547..6088877 100644
--- a/tensorflow/c/kernels_test.cc
+++ b/tensorflow/c/kernels_test.cc

@@ -36,6 +36,15 @@
   struct MyCustomKernel* s = new struct MyCustomKernel;
   s->created = true;
   s->compute_called = false;
+
+  // Exercise attribute reads.
+  TF_DataType type;
+  TF_Status* status = TF_NewStatus();
+  TF_OpKernelConstruction_GetAttrType(ctx, "SomeDataTypeAttr", &type, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status));
+  EXPECT_EQ(TF_FLOAT, type);
+  TF_DeleteStatus(status);
+
   return s;
 }
 
@@ -43,17 +52,7 @@
   struct MyCustomKernel* s = static_cast<struct MyCustomKernel*>(kernel);
   s->compute_called = true;
   if (ctx != nullptr) {
-    TF_Status* status = TF_NewStatus();
-
     EXPECT_EQ(43, TF_StepId(ctx));
-
-    // Exercise attribute reads.
-    TF_DataType type;
-    TF_OpKernelContext_GetAttrType(ctx, "SomeDataTypeAttr", &type, status);
-    EXPECT_EQ(TF_OK, TF_GetCode(status));
-    EXPECT_EQ(TF_FLOAT, type);
-
-    TF_DeleteStatus(status);
   }
 }
 

diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index a09becc..4c4d587 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD

@@ -150,6 +150,7 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
     ],
 )
@@ -586,6 +587,25 @@
     pkg = "//tensorflow/core",
 )
 
+tf_gen_op_wrappers_cc(
+    name = "tpu_ops",
+    include_internal_ops = 1,
+    op_lib_names = [
+        "tpu_configuration_ops",
+        "tpu_cross_replica_ops",
+        "tpu_embedding_ops",
+        "tpu_functional_ops",
+        "tpu_heartbeat_ops",
+        "tpu_host_compute_ops",
+        "tpu_infeed_ops",
+        "tpu_outfeed_ops",
+        "tpu_ordinal_selector_ops",
+        "tpu_replication_ops",
+    ],
+    pkg = "//tensorflow/core",
+    visibility = ["//tensorflow:internal"],
+)
+
 cc_library_with_android_deps(
     name = "cc_op_gen_main",
     srcs = [

diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index 3959337..43a33cb 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc

@@ -321,6 +321,7 @@
           {"tensor", {"TensorProto", true}},
           {"list(tensor)", {"gtl::ArraySlice<TensorProto>", true}},
           {"func", {"NameAttrList", true}},
+          {"list(func)", {"gtl::ArraySlice<NameAttrList>", true}},
       };
 
   auto entry = attr_type_map->find(attr_type);

diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc
index affd90b..d5ba56e 100644
--- a/tensorflow/cc/framework/gradients.cc
+++ b/tensorflow/cc/framework/gradients.cc

@@ -96,7 +96,7 @@
   // Used to identify nodes at which to stop backprop.
   std::unordered_set<int> GetStopBackpropNodes(
       const std::vector<bool>& reachable_nodes,
-      std::unordered_set<int> output_nodes);
+      const std::unordered_set<int>& output_nodes);
 
   const Scope& scope_;
   const ops::GradOpRegistry* registry_;
@@ -191,7 +191,7 @@
 
 std::unordered_set<int> SymbolicGradientBuilder::GetStopBackpropNodes(
     const std::vector<bool>& reachable_nodes,
-    std::unordered_set<int> output_nodes) {
+    const std::unordered_set<int>& output_nodes) {
   // Output nodes that get transitively consumed by other `outputs_` are stored
   // in `internal_outputs`.
   std::unordered_set<int> internal_outputs;

diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD
index 52345a3..dedd55f 100644
--- a/tensorflow/cc/saved_model/BUILD
+++ b/tensorflow/cc/saved_model/BUILD

@@ -81,6 +81,7 @@
     ] + if_not_mobile([
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:lib",
+        "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
     ]) + if_android([

diff --git a/tensorflow/compat_template.__init__.py b/tensorflow/compat_template.__init__.py
index 1833b6a..2cf68c9 100644
--- a/tensorflow/compat_template.__init__.py
+++ b/tensorflow/compat_template.__init__.py

@@ -19,15 +19,21 @@
 from __future__ import print_function as _print_function
 
 import os as _os
+import sys as _sys
 
 # pylint: disable=g-bad-import-order
-from tensorflow.python import pywrap_tensorflow  # pylint: disable=unused-import
 
 # API IMPORTS PLACEHOLDER
 
 from tensorflow.python.tools import component_api_helper as _component_api_helper
 _component_api_helper.package_hook(
     parent_package_str=__name__,
+    child_package_str=('tensorboard.summary._tf.summary'),
+    error_msg=(
+        "Limited tf.compat.v2.summary API due to missing TensorBoard "
+        "installation"))
+_component_api_helper.package_hook(
+    parent_package_str=__name__,
     child_package_str=(
         'tensorflow_estimator.python.estimator.api._v2.estimator'))
 _component_api_helper.package_hook(
@@ -41,3 +47,10 @@
 #
 # This make this one symbol available directly.
 from tensorflow.python.compat.v2_compat import enable_v2_behavior  # pylint: disable=g-import-not-at-top
+
+# Add module aliases
+_current_module = _sys.modules[__name__]
+if hasattr(_current_module, 'keras'):
+  losses = keras.losses
+  metrics = keras.metrics
+  optimizers = keras.optimizers

diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD
index 16151e7..af016bf 100644
--- a/tensorflow/compiler/aot/BUILD
+++ b/tensorflow/compiler/aot/BUILD

@@ -30,6 +30,7 @@
         "flags.h",
     ],
     deps = [
+        ":aot_only_var_handle_op",
         ":embedded_protocol_buffers",
         "//tensorflow/compiler/tf2xla",
         "//tensorflow/compiler/tf2xla:cpu_function_runtime",
@@ -71,6 +72,7 @@
         ":tfcompile_lib",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@com_google_absl//absl/strings",
@@ -205,6 +207,15 @@
     ],
 )
 
+cc_library(
+    name = "aot_only_var_handle_op",
+    srcs = ["aot_only_var_handle_op.cc"],
+    deps = [
+        "//tensorflow/compiler/tf2xla:xla_compiler",
+    ],
+    alwayslink = 1,
+)
+
 tf_cc_test(
     name = "benchmark_test",
     srcs = ["benchmark_test.cc"],

diff --git a/tensorflow/compiler/aot/aot_only_var_handle_op.cc b/tensorflow/compiler/aot/aot_only_var_handle_op.cc
new file mode 100644
index 0000000..0ce36a9
--- /dev/null
+++ b/tensorflow/compiler/aot/aot_only_var_handle_op.cc

@@ -0,0 +1,56 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/xla_context.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+
+namespace tensorflow {
+namespace {
+
+// Implementation of varhandle that binds a VarHandleOp to an XlaResource of the
+// same name. It is not safe to use this op in a JIT context.
+class XlaAotOnlyVarHandleOp : public XlaOpKernel {
+ public:
+  explicit XlaAotOnlyVarHandleOp(OpKernelConstruction* c);
+  void Compile(XlaOpKernelContext* context) override;
+
+ private:
+  string name_;
+};
+
+XlaAotOnlyVarHandleOp::XlaAotOnlyVarHandleOp(OpKernelConstruction* c)
+    : XlaOpKernel(c) {
+  OP_REQUIRES_OK(c, c->GetAttr("shared_name", &name_));
+}
+
+void XlaAotOnlyVarHandleOp::Compile(XlaOpKernelContext* context) {
+  // Look for a resource of the same name. TF also keys that on the container
+  // and type attributes, but that doesn't seem necessary.
+  for (const auto& resource : context->xla_context()->resources()) {
+    if (resource->kind() == XlaResource::kVariable &&
+        resource->name() == name_) {
+      context->SetResourceOutput(0, resource.get());
+      return;
+    }
+  }
+  context->SetStatus(
+      errors::InvalidArgument("Variable: ", name_, " not configured"));
+}
+}  // namespace
+
+REGISTER_XLA_OP(Name("VarHandleOp").CompilationOnly(), XlaAotOnlyVarHandleOp);
+
+}  // namespace tensorflow

diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc
index d016632..da05987 100644
--- a/tensorflow/compiler/aot/codegen.cc
+++ b/tensorflow/compiler/aot/codegen.cc

@@ -168,12 +168,12 @@
                      const xla::ProgramShapeProto& ps,
                      const CompileResult& compile_result, string* methods) {
   size_t num_args = ps.parameters_size();
-  if (config.feed_size() != num_args) {
-    return errors::InvalidArgument("mismatch between feed_size(",
-                                   config.feed_size(), ") and num_args(",
-                                   num_args, ")");
+  if (config.feed_size() + config.variable_size() != num_args) {
+    return errors::InvalidArgument(
+        "mismatch between feed_size(", config.feed_size(), ")+variable_size(",
+        config.variable_size(), ") and num_args(", num_args, ")");
   }
-  for (int i = 0; i < num_args; ++i) {
+  for (int i = 0; i < config.feed_size(); ++i) {
     std::vector<std::pair<string, string>> rewrites;
     TF_RETURN_IF_ERROR(
         AddRewritesForShape(i, xla::Shape(ps.parameters(i)), &rewrites));
@@ -212,12 +212,14 @@
     // tuple result, and we rely on this to simplify code generation.
     return errors::Internal("codegen requires the XLA result to be a tuple");
   }
-  if (config.fetch_size() != ps.result().tuple_shapes_size()) {
+  size_t num_results = ps.result().tuple_shapes_size();
+  if (config.fetch_size() + config.variable_size() != num_results) {
     return errors::InvalidArgument("mismatch between fetch_size(",
-                                   config.feed_size(), ") and tuple_size(",
+                                   config.fetch_size(), ")+variable_size(",
+                                   config.variable_size(), ") and tuple_size(",
                                    ps.result().tuple_shapes_size(), ")");
   }
-  for (int i = 0; i < ps.result().tuple_shapes_size(); ++i) {
+  for (int i = 0; i < config.fetch_size(); ++i) {
     std::vector<std::pair<string, string>> rewrites;
     TF_RETURN_IF_ERROR(AddRewritesForShape(
         i, xla::Shape(ps.result().tuple_shapes(i)), &rewrites));
@@ -245,6 +247,51 @@
   return Status::OK();
 }
 
+// Generate methods for variables.
+Status GenVariableMethods(const tf2xla::Config& config,
+                          const xla::ProgramShapeProto& ps, string* methods) {
+  size_t num_args = ps.parameters_size();
+  for (int i = config.feed_size(); i < num_args; ++i) {
+    std::vector<std::pair<string, string>> rewrites;
+    TF_RETURN_IF_ERROR(
+        AddRewritesForShape(i, xla::Shape(ps.parameters(i)), &rewrites));
+    const string code = R"(
+  void set_var_{{NAME}}_input_data({{TYPE}}* data) {
+    set_arg_data({{I}}, data);
+  }
+)";
+    const tf2xla::Variable& var = config.variable(i - config.feed_size());
+    *methods += RewriteWithName(
+        var.name().empty() ? var.node_name() : var.name(), code, rewrites);
+  }
+  size_t num_results = ps.result().tuple_shapes_size();
+  for (int i = config.fetch_size(); i < num_results; ++i) {
+    std::vector<std::pair<string, string>> rewrites;
+    TF_RETURN_IF_ERROR(AddRewritesForShape(
+        i, xla::Shape(ps.result().tuple_shapes(i)), &rewrites));
+    string code = R"(
+  {{TYPE}}* var_{{NAME}}_result_data() {
+    return static_cast<{{TYPE}}*>(result_data({{I}}));
+  }
+  {{TYPE}}& var_{{NAME}}_result({{DIM_VARS}}) {
+    return (*static_cast<{{TYPE}}(*){{DIM_SIZES}}>(
+        result_data({{I}}))){{INDICES}};
+  }
+  const {{TYPE}}* var_{{NAME}}_result_data() const {
+    return static_cast<const {{TYPE}}*>(result_data({{I}}));
+  }
+  const {{TYPE}}& var_{{NAME}}_result({{DIM_VARS}}) const {
+    return (*static_cast<const {{TYPE}}(*){{DIM_SIZES}}>(
+        result_data({{I}}))){{INDICES}};
+  }
+)";
+    const tf2xla::Variable& var = config.variable(i - config.fetch_size());
+    *methods += RewriteWithName(
+        var.name().empty() ? var.node_name() : var.name(), code, rewrites);
+  }
+  return Status::OK();
+}
+
 // Generates code implementing {Arg,Result}Names(), where T is one of
 // tf2xla::{Feed,Fetch}. Each feed or fetch name results in a C-style string
 // literal in the array, with nullptr terminating the array.
@@ -291,6 +338,14 @@
       TF_RETURN_IF_ERROR(ValidateCppIdent(fetch.name(), "fetch name"));
     }
   }
+  for (const tf2xla::Variable& variable : config.variable()) {
+    if (!variable.name().empty()) {
+      TF_RETURN_IF_ERROR(ValidateCppIdent(variable.name(), "variable name"));
+    } else {
+      TF_RETURN_IF_ERROR(
+          ValidateCppIdent(variable.node_name(), "variable name"));
+    }
+  }
   return Status::OK();
 }
 
@@ -339,9 +394,10 @@
   std::vector<BufferInfo> buffer_infos_for_temps =
       ExtractTempBufferInfos(buffer_infos);
   const xla::ProgramShapeProto& ps = compile_result.program_shape;
-  string methods_arg, methods_result;
+  string methods_arg, methods_result, methods_variable;
   TF_RETURN_IF_ERROR(GenArgMethods(config, ps, compile_result, &methods_arg));
   TF_RETURN_IF_ERROR(GenResultMethods(config, ps, &methods_result));
+  TF_RETURN_IF_ERROR(GenVariableMethods(config, ps, &methods_variable));
   const size_t arg_bytes_aligned = cpu_function_runtime::AlignedBufferBytes(
       buffer_infos_for_args.data(), buffer_infos_for_args.size(),
       /*allocate_entry_params=*/true);
@@ -523,6 +579,21 @@
   // buffers are managed internally, and may change after each call to Run.
 {{METHODS_RESULT}}
 
+  // Methods for managing variable buffers. Buffers are in row-major order. The
+  // input and output buffers may or may not be identical.
+  //
+  // void set_var_X_data(T* data)
+  //   Sets the buffer for variable X.
+  //
+  // T* var_X_data()
+  //   Returns the buffer of type T for variable X.
+  //
+  // T& var_X(...dim indices...)
+  //   Returns a reference to the value of type T for variable X,
+  //   with dim indices specifying which value. No bounds checking is performed
+  //   on dim indices.
+{{METHODS_VARIABLE}}
+
  private:
   // Number of buffers for the compiled computation.
   static constexpr size_t kNumBuffers = {{NUM_BUFFERS}};
@@ -589,6 +660,7 @@
        include_hlo_profile_printer_data_proto},
       {"{{METHODS_ARG}}\n", methods_arg},
       {"{{METHODS_RESULT}}\n", methods_result},
+      {"{{METHODS_VARIABLE}}\n", methods_variable},
       {"{{NS_END}}\n", ns_end},
       {"{{NS_START}}\n", ns_start},
       {"{{PROGRAM_SHAPE}}", xla::ShapeUtil::HumanString(xla::ProgramShape(ps))},

diff --git a/tensorflow/compiler/aot/codegen_test.cc b/tensorflow/compiler/aot/codegen_test.cc
index c1788ca..5580e55 100644
--- a/tensorflow/compiler/aot/codegen_test.cc
+++ b/tensorflow/compiler/aot/codegen_test.cc

@@ -22,6 +22,8 @@
 #include "absl/strings/string_view.h"
 #include "llvm/Support/TargetSelect.h"
 #include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -172,6 +174,15 @@
   tf2xla::Fetch* fetch = config.add_fetch();
   fetch->mutable_id()->set_node_name("fetch0");
   fetch->set_name("myfetch");
+  tf2xla::Variable* variable = config.add_variable();
+  variable->set_node_name("myvar");
+  variable->mutable_shape()->add_dim()->set_size(1);
+  variable->set_type(DT_FLOAT);
+  tf2xla::Variable* variable2 = config.add_variable();
+  variable2->set_node_name("my/var");
+  variable2->set_name("myvar2");
+  variable2->mutable_shape()->add_dim()->set_size(5);
+  variable2->set_type(DT_INT32);
   CompileResult compile_result;
   compile_result.aot.reset(new xla::cpu::CpuAotCompilationResult(
       {},
@@ -186,9 +197,14 @@
           {
               xla::ShapeUtil::MakeShape(xla::F32, {1, 2}),
               xla::ShapeUtil::MakeShape(xla::S64, {3, 4}),
+              xla::ShapeUtil::MakeShape(xla::F32, {1}),
+              xla::ShapeUtil::MakeShape(xla::S32, {5}),
           },
-          xla::ShapeUtil::MakeTupleShape(
-              {xla::ShapeUtil::MakeShape(xla::U32, {5, 6})}))
+          xla::ShapeUtil::MakeTupleShape({
+              xla::ShapeUtil::MakeShape(xla::U32, {5, 6}),
+              xla::ShapeUtil::MakeShape(xla::F32, {1}),
+              xla::ShapeUtil::MakeShape(xla::S32, {5}),
+          }))
           .ToProto();
   compile_result.entry_point = "entry_point";
   compile_result.pointer_size = 8;

diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden
index 35994fc..b5f33d6 100644
--- a/tensorflow/compiler/aot/codegen_test_h.golden
+++ b/tensorflow/compiler/aot/codegen_test_h.golden

@@ -52,7 +52,7 @@
 //   is guaranteed that no thread may call a non-const method.
 //
 // The logical function signature is:
-//   ((unknown): f32[1,2], (unknown): s64[3,4]) -> (u32[5,6])
+//   ((unknown): f32[1,2], (unknown): s64[3,4], (unknown): f32[1], (unknown): s32[5]) -> (u32[5,6], f32[1], s32[5])
 //
 // Memory stats:
 //   arg bytes total:    104
@@ -214,6 +214,58 @@
         result_data(0)))[dim0][dim1];
   }
 
+  // Methods for managing variable buffers. Buffers are in row-major order. The
+  // input and output buffers may or may not be identical.
+  //
+  // void set_var_X_data(T* data)
+  //   Sets the buffer for variable X.
+  //
+  // T* var_X_data()
+  //   Returns the buffer of type T for variable X.
+  //
+  // T& var_X(...dim indices...)
+  //   Returns a reference to the value of type T for variable X,
+  //   with dim indices specifying which value. No bounds checking is performed
+  //   on dim indices.
+
+  void set_var_myvar_input_data(float* data) {
+    set_arg_data(2, data);
+  }
+
+  void set_var_myvar2_input_data(tensorflow::int32* data) {
+    set_arg_data(3, data);
+  }
+
+  float* var_myvar_result_data() {
+    return static_cast<float*>(result_data(1));
+  }
+  float& var_myvar_result() {
+    return (*static_cast<float(*)[1]>(
+        result_data(1)))[0];
+  }
+  const float* var_myvar_result_data() const {
+    return static_cast<const float*>(result_data(1));
+  }
+  const float& var_myvar_result() const {
+    return (*static_cast<const float(*)[1]>(
+        result_data(1)))[0];
+  }
+
+  tensorflow::int32* var_myvar2_result_data() {
+    return static_cast<tensorflow::int32*>(result_data(2));
+  }
+  tensorflow::int32& var_myvar2_result(size_t dim0) {
+    return (*static_cast<tensorflow::int32(*)[5]>(
+        result_data(2)))[dim0];
+  }
+  const tensorflow::int32* var_myvar2_result_data() const {
+    return static_cast<const tensorflow::int32*>(result_data(2));
+  }
+  const tensorflow::int32& var_myvar2_result(size_t dim0) const {
+    return (*static_cast<const tensorflow::int32(*)[5]>(
+        result_data(2)))[dim0];
+  }
+
  private:
   // Number of buffers for the compiled computation.
   static constexpr size_t kNumBuffers = 6;
@@ -257,7 +309,7 @@
   static const xla::ProgramShapeProto* StaticProgramShape() {
     static const xla::ProgramShapeProto* kShape = []() {
     xla::ProgramShapeProto* proto = new xla::ProgramShapeProto;
-    proto->ParseFromArray(&__tfcompile_foo_bar_MyClass_ProgramShapeProto_protobuf_array_contents[0], 64);
+    proto->ParseFromArray(&__tfcompile_foo_bar_MyClass_ProgramShapeProto_protobuf_array_contents[0], 132);
     return proto;
   }();
     return kShape;

diff --git a/tensorflow/compiler/aot/codegen_test_o.golden b/tensorflow/compiler/aot/codegen_test_o.golden
index 7f7b964..2884597 100644
--- a/tensorflow/compiler/aot/codegen_test_o.golden
+++ b/tensorflow/compiler/aot/codegen_test_o.golden
Binary files differ

diff --git a/tensorflow/compiler/aot/tests/BUILD b/tensorflow/compiler/aot/tests/BUILD
index 10fa33a..444264b 100644
--- a/tensorflow/compiler/aot/tests/BUILD
+++ b/tensorflow/compiler/aot/tests/BUILD

@@ -69,6 +69,7 @@
         "test_graph_tfmatmulandadd.pb",
         "test_graph_tfsplits.pb",
         "test_graph_tftop_k.pb",
+        "test_graph_tfvariable.pb",
     ],
     # Set CUDA_VISIBLE_DEVICES='' to prevent the code we launch from using any
     # GPUs which might be present.  This is important because builds may run
@@ -222,6 +223,17 @@
     ],
 )
 
+tf_library(
+    name = "test_graph_tfvariable",
+    testonly = 1,
+    config = "test_graph_tfvariable.config.pbtxt",
+    cpp_class = "VariableComp",
+    graph = "test_graph_tfvariable.pb",
+    tags = [
+        "manual",
+    ],
+)
+
 tf_cc_test(
     name = "tfcompile_test",
     srcs = ["tfcompile_test.cc"],
@@ -241,6 +253,7 @@
         ":test_graph_tfmatmulandadd_with_profiling",
         ":test_graph_tfsplits",
         ":test_graph_tftop_k",
+        ":test_graph_tfvariable",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:xla_data_proto",

diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py
index 7bac79e..42f8812 100644
--- a/tensorflow/compiler/aot/tests/make_test_graphs.py
+++ b/tensorflow/compiler/aot/tests/make_test_graphs.py

@@ -149,6 +149,14 @@
   array_ops.identity(output[1], name='indices')
 
 
+def tfvariable(_):
+  x = variables.Variable(1000.0, name='x')
+  old_x = x.value()
+  with ops.control_dependencies([old_x]):
+    new_x = x.assign_add(42.0)
+  array_ops.stack([old_x, new_x], name='result')
+
+
 def write_graph(build_graph, out_dir):
   """Build a graph using build_graph and write it out."""
   g = ops.Graph()
@@ -171,6 +179,7 @@
   write_graph(tfmatmulandadd, FLAGS.out_dir)
   write_graph(tfsplits, FLAGS.out_dir)
   write_graph(tftop_k, FLAGS.out_dir)
+  write_graph(tfvariable, FLAGS.out_dir)
 
 
 if __name__ == '__main__':

diff --git a/tensorflow/compiler/aot/tests/test_graph_tfvariable.config.pbtxt b/tensorflow/compiler/aot/tests/test_graph_tfvariable.config.pbtxt
new file mode 100644
index 0000000..9b4c421
--- /dev/null
+++ b/tensorflow/compiler/aot/tests/test_graph_tfvariable.config.pbtxt

@@ -0,0 +1,12 @@
+# Text form of tensorflow.tf2xla.Config proto.
+fetch {
+  id { node_name: "result" }
+}
+
+variable {
+  node_name: "x"
+  shape {
+    dim { size: 1 }
+  }
+  type: DT_FLOAT
+}

diff --git a/tensorflow/compiler/aot/tests/tfcompile_test.cc b/tensorflow/compiler/aot/tests/tfcompile_test.cc
index 4dd79e5..5f9316f 100644
--- a/tensorflow/compiler/aot/tests/tfcompile_test.cc
+++ b/tensorflow/compiler/aot/tests/tfcompile_test.cc

@@ -30,6 +30,7 @@
 #include "tensorflow/compiler/aot/tests/test_graph_tfmatmulandadd_with_profiling.h"
 #include "tensorflow/compiler/aot/tests/test_graph_tfsplits.h"
 #include "tensorflow/compiler/aot/tests/test_graph_tftop_k.h"
+#include "tensorflow/compiler/aot/tests/test_graph_tfvariable.h"
 #include "tensorflow/compiler/xla/service/hlo_profile_printer.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
@@ -473,6 +474,28 @@
   EXPECT_EQ(expected_indices[1], fn.result1(1));
 }
 
+TEST(TFCompileTest, Variable) {
+  Eigen::ThreadPool tp(1);
+  Eigen::ThreadPoolDevice device(&tp, tp.NumThreads());
+
+  VariableComp fn;
+  float x = 23;
+  fn.set_var_x_input_data(&x);
+
+  fn.set_thread_pool(&device);
+  fn.Run();
+  EXPECT_EQ(fn.result0(0, 0), 23);
+  EXPECT_EQ(fn.result0(1, 0), 65);
+  EXPECT_EQ(fn.var_x_result(), 65);
+
+  EXPECT_EQ(x, 23);
+  x = fn.var_x_result();
+  fn.Run();
+  EXPECT_EQ(fn.result0(0, 0), 65);
+  EXPECT_EQ(fn.result0(1, 0), 107);
+  EXPECT_EQ(fn.var_x_result(), 107);
+}
+
 TEST(TFCompileTest, AssertEqAndReturnDiff) {
   // Assert is converted into a no-op in XLA, so there is no failure even if the
   // two args are different.

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 3cae081..121de40 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD

@@ -208,6 +208,7 @@
         "//tensorflow/core/kernels:variable_ops",
         "//tensorflow/core/kernels/data:generator_dataset_op",
         "//tensorflow/core/kernels/data:iterator_ops",
+        "//tensorflow/core/kernels/data:optional_ops",
         "//tensorflow/core/kernels/data:prefetch_dataset_op",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/synchronization",
@@ -282,7 +283,6 @@
         "//tensorflow/compiler/tf2xla:common",
         "//tensorflow/compiler/tf2xla:dump_graph",
         "//tensorflow/compiler/tf2xla:xla_compiler",
-        "//tensorflow/compiler/xla:debug_options_flags",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla/client:client_library",
         "//tensorflow/compiler/xla/client:local_client",
@@ -465,7 +465,6 @@
         "//tensorflow/compiler/tf2xla:tf2xla_util",
         "//tensorflow/core:framework",
         "//tensorflow/core:graph",
-        "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",

diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc
index 9f40426..285b1ef 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc

@@ -115,6 +115,13 @@
     return;
   }
 
+  if (ctrl_edges.size() == 1 && ctrl_edges.front()->dst()->IsSink()) {
+    // Avoid creating a Merge node if we can just add an edge to _SINK
+    // instead.
+    s.graph()->AddControlEdge(new_node, s.graph()->sink_node());
+    return;
+  }
+
   // We can't merge control edges directly so we instead first "convert" them to
   // normal values that can be merged, merge the values and then "convert" the
   // merged value back into control.

diff --git a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
index 390ffa6..c14c746 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc

@@ -68,6 +68,8 @@
     }
   }
 
+  FixupSourceAndSinkEdges(graph.get());
+
   GraphOptimizationPassOptions opt_options;
   opt_options.graph = &graph;
   BuildXlaOpsPass pass(/*enable_lazy_compilation=*/true);
@@ -223,5 +225,23 @@
   ASSERT_NE(write_op_new, nullptr);
   EXPECT_THAT(write_op_new, assign_var);
 }
+
+TEST_F(BuildXlaOpsTest, NoExtraMergeForEdgeToSink) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  FunctionDefLibrary flib_def =
+      CreateFunctionDefLibWithConstFunction("cluster_0");
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def));
+  Node* call;
+  TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", &call));
+
+  std::unique_ptr<Graph> graph;
+  TF_ASSERT_OK(BuildXlaOps(root, &graph));
+
+  Node* sink_node = graph->sink_node();
+  EXPECT_THAT(sink_node, NodeWith(CtrlDeps(NodeWith(Op("_XlaRun")),
+                                           NodeWith(Op("cluster_0")),
+                                           NodeWith(Op("NoOp")))));
+}
 }  // namespace
 }  // namespace tensorflow

diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
index 1f8ec09..261519d 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc

@@ -307,22 +307,6 @@
     .Attr("shapes: list(shape) >= 0")
     .SetShapeFn(::tensorflow::shape_inference::UnknownShape);
 
-REGISTER_OP("_XlaSendFromHost")
-    .Input("inputs: Tinputs")
-    .Input("dynamic_key: string")
-    .Attr("Tinputs: list(type) >= 0")
-    .Attr("key: string")
-    .Attr("device_ordinal: int")
-    .SetShapeFn(::tensorflow::shape_inference::UnknownShape);
-
-REGISTER_OP("_XlaRecvAtHost")
-    .Input("dynamic_key: string")
-    .Output("outputs: Toutputs")
-    .Attr("Toutputs: list(type) >= 0")
-    .Attr("key: string")
-    .Attr("device_ordinal: int")
-    .SetShapeFn(::tensorflow::shape_inference::UnknownShape);
-
 REGISTER_OP("InputTest")
     .Output("o: float")
     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {

diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
index c9ae717..f0c9d57 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc

@@ -15,13 +15,17 @@
 
 #include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
 
+#include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
+#include "absl/strings/ascii.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/proto_serialization.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -36,6 +40,25 @@
 
 const char* const kXlaClusterOutput = "XlaClusterOutput";
 
+bool IsCpuGpuCompile(const Graph* graph) {
+  for (Node* n : graph->nodes()) {
+    string name;
+    // Only consider nodes being compiled.
+    if (!GetNodeAttr(n->attrs(),
+                     EncapsulateXlaComputationsPass::kXlaClusterAttr, &name)
+             .ok())
+      continue;
+    // Early return for any node with a device that is not a CPU or GPU.
+    DeviceNameUtils::ParsedName parsed;
+    if (DeviceNameUtils::ParseFullName(n->requested_device(), &parsed)) {
+      if (parsed.type != DEVICE_CPU && parsed.type != DEVICE_GPU) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 // Checks if a graph node is marked to be a guaranteed constant.
 bool is_guaranteed_constant(const Node& n) {
   bool guaranteed_constant = false;
@@ -177,7 +200,7 @@
   auto serialized = absl::make_unique<char[]>(size);
   TF_RET_CHECK(SerializeToBufferDeterministic(gdef, serialized.get(), size));
   uint64 fingerprint = Fingerprint64(absl::string_view(serialized.get(), size));
-  LOG(INFO) << "Subgraph fingerprint:" << fingerprint;
+  VLOG(1) << "Subgraph fingerprint:" << fingerprint;
   call_def->set_op(absl::StrCat(call_def->op(), "_", fingerprint));
   return Status::OK();
 }
@@ -352,12 +375,19 @@
           << dump_graph::DumpGraphToFile("encapsulate_xla_computations_before",
                                          **options.graph, options.flib_def);
 
-  TF_RETURN_IF_ERROR(Encapsulate(options.graph, options.flib_def));
+  const char* additional_help =
+      IsCpuGpuCompile(options.graph->get())
+          ? xla::status_macros::kPossibleAutoJitAlternative
+          : "";
+
+  TF_RETURN_WITH_CONTEXT_IF_ERROR(Encapsulate(options.graph, options.flib_def),
+                                  additional_help);
   VLOG(1) << "EncapsulateXlaComputations() half-way: "
           << dump_graph::DumpGraphToFile("encapsulate_xla_computations_halfway",
                                          **options.graph, options.flib_def);
 
-  TF_RETURN_IF_ERROR(BuildXlaLaunchOps(options.graph->get()));
+  TF_RETURN_WITH_CONTEXT_IF_ERROR(BuildXlaLaunchOps(options.graph->get()),
+                                  additional_help);
   VLOG(1) << "EncapsulateXlaComputations() finished: "
           << dump_graph::DumpGraphToFile("encapsulate_xla_computations_after",
                                          **options.graph, options.flib_def);

diff --git a/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass.cc b/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass.cc
index ebfffc3..5287fd1 100644
--- a/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass.cc
+++ b/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass.cc

@@ -247,6 +247,7 @@
           .NewSubScope(absl::StrCat(slice->name(), "/static_shaped_slice"));
   Scope host_scope = main_scope.WithAssignedDevice(host_name);
 
+  // In the future we may want to be clever here and avoid the extra Cast ops.
   SliceInputs slice_inputs_int64 =
       MakeSliceIndexAndSizeInt64(host_scope, slice_inputs);
 
@@ -312,9 +313,9 @@
   return Status::OK();
 }
 
-// Return true if `n` is a slice we can rewrite to have a static shape
+// Return true if `n` is a slice we should rewrite to have a static shape
 // (i.e. have the output shape only depend on the "size" input).
-xla::StatusOr<bool> IsRewritableSlice(Node* n) {
+xla::StatusOr<bool> ShouldRewriteSlice(Node* n) {
   if (n->type_string() != "Slice") {
     return false;
   }
@@ -332,14 +333,20 @@
 
   // If slice_size[i] < -1 for any i then executing the slice will throw an
   // error, and we don't do anything here.
-  return absl::c_all_of(slice_inputs->size_as_vector,
-                        [](int64 size_i) { return size_i >= -1; });
+  bool slice_size_has_error = absl::c_all_of(
+      slice_inputs->size_as_vector, [](int64 size_i) { return size_i >= -1; });
+  if (!slice_size_has_error) {
+    return false;
+  }
+
+  // No point in rewriting slices that have both size and begin as constants.
+  return !slice_inputs->begin.node()->IsConstant();
 }
 
 Status FindAndRewriteSlices(Graph* g, bool* changed) {
   std::vector<Node*> slices_to_rewrite;
   for (Node* n : g->nodes()) {
-    TF_ASSIGN_OR_RETURN(bool is_rewritable, IsRewritableSlice(n));
+    TF_ASSIGN_OR_RETURN(bool is_rewritable, ShouldRewriteSlice(n));
     if (is_rewritable) {
       slices_to_rewrite.push_back(n);
     }

diff --git a/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass_test.cc b/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass_test.cc
index 32e3021..2add2c1 100644
--- a/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass_test.cc
+++ b/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass_test.cc

@@ -432,5 +432,26 @@
                                                       Name("dependency")))));
 }
 
+TEST(SliceToDynamicSliceRewriteTest, DontRewriteSliceWithConstBegin) {
+  Scope root = Scope::NewRootScope()
+                   .ExitOnError()
+                   .WithAssignedDevice(kDeviceName)
+                   .WithXlaCluster("cluster_0");
+
+  Output input = ops::Placeholder(root.WithOpName("input"), DT_FLOAT);
+  Output begin = ops::Const(root.WithOpName("begin"), {10, 10});
+  Output size = ops::Const(root.WithOpName("size"), {-1, 500});
+  Output slice = ops::Slice(root.WithOpName("slice"), input, begin, size);
+
+  std::unique_ptr<Graph> result;
+  TF_ASSERT_OK(IncreaseDynamismForAutoJit(root, &result));
+
+  Node* slice_node = testing::FindNodeByName(result.get(), "slice");
+  EXPECT_THAT(slice_node,
+              NodeWith(Op("Slice"), Inputs(Out(NodeWith(Op("Placeholder"))),
+                                           Out(NodeWith(Op("Const"))),
+                                           Out(NodeWith(Op("Const"))))));
+}
+
 }  // namespace
 }  // namespace tensorflow

diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD
index bab824c..d0fa2c4 100644
--- a/tensorflow/compiler/jit/kernels/BUILD
+++ b/tensorflow/compiler/jit/kernels/BUILD

@@ -19,6 +19,7 @@
         "//tensorflow/compiler/tf2xla:common",
         "//tensorflow/compiler/tf2xla:tf2xla_util",
         "//tensorflow/compiler/tf2xla:xla_compiler",
+        "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla/client:client_library",
         "//tensorflow/compiler/xla/client:local_client",

diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc
index ad71df5..997ef6e 100644
--- a/tensorflow/compiler/jit/kernels/xla_ops.cc
+++ b/tensorflow/compiler/jit/kernels/xla_ops.cc

@@ -25,6 +25,7 @@
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/client_library.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
+#include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
 #include "tensorflow/core/common_runtime/function.h"
@@ -35,6 +36,8 @@
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/variable_ops.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 #include "tensorflow/core/util/stream_executor_util.h"
@@ -304,10 +307,19 @@
   xla::LocalExecutable* executable;
   std::map<int, OptionalTensor> variables;
 
-  OP_REQUIRES_OK(
-      ctx, CompileToLocalExecutable(ctx, function_, platform_info_, resources_,
-                                    constants_, /*lazy=*/false, &client,
-                                    &variables, &kernel, &executable));
+  {
+    Status s = CompileToLocalExecutable(
+        ctx, function_, platform_info_, resources_, constants_, /*lazy=*/false,
+        &client, &variables, &kernel, &executable);
+    if (!s.ok() && (platform_info_.device_type().type_string() == DEVICE_CPU ||
+                    platform_info_.device_type().type_string() == DEVICE_GPU)) {
+      // Suggest auto jit if the failure was with GPU or CPU.
+      errors::AppendToMessage(&s,
+                              xla::status_macros::kPossibleAutoJitAlternative);
+    }
+
+    OP_REQUIRES_OK(ctx, s);
+  }
 
   se::Stream* stream =
       ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr;

diff --git a/tensorflow/compiler/jit/xla_cluster_util.cc b/tensorflow/compiler/jit/xla_cluster_util.cc
index 8099386..3adcfef 100644
--- a/tensorflow/compiler/jit/xla_cluster_util.cc
+++ b/tensorflow/compiler/jit/xla_cluster_util.cc

@@ -43,7 +43,7 @@
     return "";
   }
 
-  auto node_name = [cycles, &graph](int node_id) {
+  auto node_name = [&graph](int node_id) {
     if (!FastBoundsCheck(node_id, graph.num_node_ids())) {
       return string("(null)");
     }

diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc
index c67b4f1..eceb47f 100644
--- a/tensorflow/compiler/jit/xla_device.cc
+++ b/tensorflow/compiler/jit/xla_device.cc

@@ -102,7 +102,8 @@
   }
 
   std::unique_ptr<XlaDeviceAllocator> alloc =
-      absl::make_unique<XlaDeviceAllocator>();
+      absl::make_unique<XlaDeviceAllocator>(
+          backend->stream_executors()[device_ordinal]);
   XlaDeviceAllocator* alloc_ptr = alloc.get();
   state.allocators_[{backend, device_ordinal}] = std::move(alloc);
   return alloc_ptr;
@@ -428,7 +429,7 @@
   // moment--when ThenEnqueueOnBackgroundThread is called--will have finished.
   // This achieves a device-wide sync.
   stream->ThenEnqueueOnBackgroundThread(
-      [this, stream, done](se::StreamExecutor*) {
+      [stream, done](se::StreamExecutor*) {
         tracing::ScopedActivity activity("XlaDevice::Sync::Callback",
                                          /*is_expensive=*/true);
         done(stream->ok() ? Status::OK()

diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index 28681bb..05b9c51 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc

@@ -29,7 +29,10 @@
 namespace tensorflow {
 
 // The allocator used for Tensors assigned to the XLA device.
-XlaDeviceAllocator::XlaDeviceAllocator() {}
+XlaDeviceAllocator::XlaDeviceAllocator(
+    stream_executor::StreamExecutor* stream_executor)
+    : stream_executor_(stream_executor) {}
+
 XlaDeviceAllocator::~XlaDeviceAllocator() = default;
 
 string XlaDeviceAllocator::Name() { return "xla"; }
@@ -48,7 +51,21 @@
   delete XlaTensor::FromOpaquePointer(ptr);
 }
 
-void XlaDeviceAllocator::GetStats(AllocatorStats* stats) { stats->Clear(); }
+absl::optional<AllocatorStats> XlaDeviceAllocator::GetStats() {
+  absl::optional<stream_executor::AllocatorStats> se_stats =
+      stream_executor_->GetAllocatorStats();
+  if (!se_stats) {
+    return absl::nullopt;
+  }
+
+  tensorflow::AllocatorStats tf_stats;
+  tf_stats.num_allocs = se_stats->num_allocs;
+  tf_stats.bytes_in_use = se_stats->bytes_in_use;
+  tf_stats.peak_bytes_in_use = se_stats->peak_bytes_in_use;
+  tf_stats.largest_alloc_size = se_stats->largest_alloc_size;
+  tf_stats.bytes_limit = se_stats->bytes_limit;
+  return tf_stats;
+}
 
 XlaDeviceContext::XlaDeviceContext(
     std::shared_ptr<se::Stream> compute_stream,

diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h
index e45db98..1ce64ad 100644
--- a/tensorflow/compiler/jit/xla_device_context.h
+++ b/tensorflow/compiler/jit/xla_device_context.h

@@ -34,14 +34,18 @@
 // empty, XlaTensor.
 class XlaDeviceAllocator : public Allocator {
  public:
-  XlaDeviceAllocator();
+  XlaDeviceAllocator(se::StreamExecutor* stream_executor);
   ~XlaDeviceAllocator() override;
 
   string Name() override;
 
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void* ptr) override;
-  void GetStats(AllocatorStats* stats) override;
+  absl::optional<AllocatorStats> GetStats() override;
+
+ private:
+  // The stream executor of the device.
+  se::StreamExecutor* stream_executor_;
 };
 
 // Helper class for managing data transfers between host and XLA devices.

diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h
index 927f983..09e04d2 100644
--- a/tensorflow/compiler/jit/xla_device_ops.h
+++ b/tensorflow/compiler/jit/xla_device_ops.h

@@ -25,6 +25,7 @@
 #include "tensorflow/core/kernels/control_flow_ops.h"
 #include "tensorflow/core/kernels/data/generator_dataset_op.h"
 #include "tensorflow/core/kernels/data/iterator_ops.h"
+#include "tensorflow/core/kernels/data/optional_ops.h"
 #include "tensorflow/core/kernels/data/prefetch_dataset_op.h"
 #include "tensorflow/core/kernels/fifo_queue.h"
 #include "tensorflow/core/kernels/function_ops.h"
@@ -241,6 +242,8 @@
                           data::AnonymousIteratorHandleOp);                    \
   REGISTER_KERNEL_BUILDER(Name("IteratorGetNext").Device(DEVICE),              \
                           data::IteratorGetNextOp);                            \
+  REGISTER_KERNEL_BUILDER(Name("IteratorGetNextAsOptional").Device(DEVICE),    \
+                          data::IteratorGetNextAsOptionalOp);                  \
   REGISTER_KERNEL_BUILDER(Name("IteratorGetNextSync").Device(DEVICE),          \
                           data::IteratorGetNextSyncOp);                        \
   REGISTER_KERNEL_BUILDER(Name("IteratorToStringHandle")                       \
@@ -251,6 +254,15 @@
                               .Device(DEVICE)                                  \
                               .HostMemory("string_handle"),                    \
                           data::IteratorFromStringHandleOp);                   \
+  REGISTER_KERNEL_BUILDER(Name("OptionalNone").Device(DEVICE),                 \
+                          data::OptionalNoneOp);                               \
+  REGISTER_KERNEL_BUILDER(Name("OptionalFromValue").Device(DEVICE),            \
+                          data::OptionalFromValueOp);                          \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("OptionalHasValue").Device(DEVICE).HostMemory("has_value"),         \
+      data::OptionalHasValueOp);                                               \
+  REGISTER_KERNEL_BUILDER(Name("OptionalGetValue").Device(DEVICE),             \
+                          data::OptionalGetValueOp);                           \
   REGISTER_KERNEL_BUILDER(Name(FunctionLibraryDefinition::kArgOp)              \
                               .Device(DEVICE)                                  \
                               .HostMemory("output")                            \

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 9b6ca40..7c1e0da 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD

@@ -251,6 +251,29 @@
 )
 
 tf_xla_py_test(
+    name = "self_adjoint_eig_op_test",
+    size = "medium",
+    srcs = ["self_adjoint_eig_op_test.py"],
+    # TODO(kuny): remove it after b/124377352 is fixed.
+    disabled_backends = [
+        "cpu",
+        "gpu",
+        "cpu_ondemand",
+    ],
+    tags = ["optonly"],
+    deps = [
+        ":xla_test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:map_fn",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:training",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+tf_xla_py_test(
     name = "matrix_triangular_solve_op_test",
     size = "small",
     timeout = "moderate",

diff --git a/tensorflow/compiler/tests/categorical_op_test.py b/tensorflow/compiler/tests/categorical_op_test.py
index 5d5e486..eec69ea 100644
--- a/tensorflow/compiler/tests/categorical_op_test.py
+++ b/tensorflow/compiler/tests/categorical_op_test.py

@@ -119,7 +119,7 @@
 
   def testSamplingCorrectness(self):
     np.random.seed(1618)  # Make it reproducible.
-    num_samples = 21000
+    num_samples = 40000
 
     rand_probs = np.random.dirichlet([1., 1., 2., 3.])
     rand_probs2 = np.random.dirichlet([1., 4., 5.], size=3)  # batched

diff --git a/tensorflow/compiler/tests/plugin.bzl b/tensorflow/compiler/tests/plugin.bzl
index fbc8781..46a854d 100644
--- a/tensorflow/compiler/tests/plugin.bzl
+++ b/tensorflow/compiler/tests/plugin.bzl

@@ -18,13 +18,12 @@
 #   git update-index --assume-unchanged tensorflow/compiler/tests/plugin.bzl
 
 plugins = {
-  #"example": {
-  #  "device":"XLA_MY_DEVICE",
-  #  "types":"DT_FLOAT,DT_HALF,DT_INT32",
-  #   "tags":[],
-  #   "args":["--disabled_manifest=tensorflow/compiler/plugin/example/disabled_manifest.txt"],
-  #   "data":["//tensorflow/compiler/plugin/example:disabled_manifest.txt"],
-  #   "deps":[],
-  #},
+    #"example": {
+    #  "device":"XLA_MY_DEVICE",
+    #  "types":"DT_FLOAT,DT_HALF,DT_INT32",
+    #   "tags":[],
+    #   "args":["--disabled_manifest=tensorflow/compiler/plugin/example/disabled_manifest.txt"],
+    #   "data":["//tensorflow/compiler/plugin/example:disabled_manifest.txt"],
+    #   "deps":[],
+    #},
 }
-

diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py
index 97ffad3..34f2465 100644
--- a/tensorflow/compiler/tests/random_ops_test.py
+++ b/tensorflow/compiler/tests/random_ops_test.py

@@ -122,8 +122,8 @@
         beta = (b - mu) / sigma
         z = normal_cdf(beta) - normal_cdf(alpha)
 
-        self.assertTrue((y >= a).sum() == count)
-        self.assertTrue((y <= b).sum() == count)
+        self.assertEqual((y >= a).sum(), count)
+        self.assertEqual((y <= b).sum(), count)
 
         # For more information on these calculations, see:
         # Burkardt, John. "The Truncated Normal Distribution".

diff --git a/tensorflow/compiler/tests/scatter_nd_op_test.py b/tensorflow/compiler/tests/scatter_nd_op_test.py
index 693f851..a9a87b8 100644
--- a/tensorflow/compiler/tests/scatter_nd_op_test.py
+++ b/tensorflow/compiler/tests/scatter_nd_op_test.py

@@ -134,6 +134,12 @@
     expected = np.array([0, 11, 0, 10, 9, 0, 0, 12], dtype=np.int32)
     self.assertAllEqual(expected, self._runScatterNd(indices, updates, [8]))
 
+  def testRepeatedIndices(self):
+    indices = np.array([[0], [1], [0], [1]], dtype=np.int32)
+    updates = np.array([9, 10, 11, 12], dtype=np.float32)
+    expected = np.array([20, 22], dtype=np.int32)
+    self.assertAllEqual(expected, self._runScatterNd(indices, updates, [2]))
+
   def testSimple2(self):
     indices = np.array([[1, 0], [1, 1]], dtype=np.int32)
     updates = np.array([11., 12.], dtype=np.float32)

diff --git a/tensorflow/compiler/tests/self_adjoint_eig_op_test.py b/tensorflow/compiler/tests/self_adjoint_eig_op_test.py
new file mode 100644
index 0000000..cfb5c82
--- /dev/null
+++ b/tensorflow/compiler/tests/self_adjoint_eig_op_test.py

@@ -0,0 +1,62 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow.ops.self_adjoint_eig."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.compiler.tests import xla_test
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import linalg_ops
+from tensorflow.python.platform import test
+
+
+class SelfAdjointEigOpTest(xla_test.XLATestCase, parameterized.TestCase):
+
+  def _test(self, dtype, shape):
+    np.random.seed(1)
+    x_np = np.random.uniform(
+        low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(dtype)
+    x_np = x_np + np.swapaxes(x_np, -1, -2)
+    n = shape[-1]
+
+    e_np, _ = np.linalg.eigh(x_np)
+    with self.cached_session() as sess:
+      x_tf = array_ops.placeholder(dtype)
+      with self.test_scope():
+        e, v = linalg_ops.self_adjoint_eig(x_tf)
+      e_val, v_val = sess.run([e, v], feed_dict={x_tf: x_np})
+
+      v_diff = np.matmul(v_val, np.swapaxes(v_val, -1, -2)) - np.eye(n)
+      self.assertAlmostEqual(np.mean(v_diff**2), 0.0, delta=1e-6)
+      self.assertAlmostEqual(np.mean((e_val - e_np)**2), 0.0, delta=1e-6)
+
+  SIZES = [1, 2, 5, 10, 32]
+  DTYPES = [np.float32]
+  PARAMS = itertools.product(SIZES, DTYPES)
+
+  @parameterized.parameters(*PARAMS)
+  def testSelfAdjointEig(self, n, dtype):
+    for batch_dims in [(), (3,)] + [(3, 2)] * (n < 10):
+      self._test(dtype, batch_dims + (n, n))
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/compiler/tests/stateless_random_ops_test.py b/tensorflow/compiler/tests/stateless_random_ops_test.py
index ee7ca7e..df5914a 100644
--- a/tensorflow/compiler/tests/stateless_random_ops_test.py
+++ b/tensorflow/compiler/tests/stateless_random_ops_test.py

@@ -167,8 +167,8 @@
         beta = (b - mu) / sigma
         z = normal_cdf(beta) - normal_cdf(alpha)
 
-        self.assertTrue((y >= a).sum() == n)
-        self.assertTrue((y <= b).sum() == n)
+        self.assertEqual((y >= a).sum(), n)
+        self.assertEqual((y <= b).sum(), n)
 
         # For more information on these calculations, see:
         # Burkardt, John. "The Truncated Normal Distribution".

diff --git a/tensorflow/compiler/tests/tensor_list_ops_test.py b/tensorflow/compiler/tests/tensor_list_ops_test.py
index 47e0f38..a380715 100644
--- a/tensorflow/compiler/tests/tensor_list_ops_test.py
+++ b/tensorflow/compiler/tests/tensor_list_ops_test.py

@@ -102,7 +102,7 @@
       _, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    "Set the max number of elements"):
-        self.assertEqual(sess.run(e), 1.0 * np.ones((7, 15)))
+        self.assertAllEqual(sess.run(e), 1.0 * np.ones((7, 15)))
 
   def testEmptyTensorListMax(self):
     with self.cached_session() as sess, self.test_scope():
@@ -136,6 +136,17 @@
       t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
       self.assertAllEqual(t, [3.0, 2.0])
 
+  def testSetDoesNotUpdatePushIndex(self):
+    with self.cached_session(), self.test_scope():
+      l = list_ops.empty_tensor_list(
+          element_shape=[], element_dtype=dtypes.float32, max_num_elements=2)
+      # SetItem should not change the push index.
+      l = list_ops.tensor_list_set_item(l, 1, 3.)
+      l = list_ops.tensor_list_push_back(l, 5.)
+      l = list_ops.tensor_list_push_back(l, 7.)
+      t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
+      self.assertAllEqual(t, [5., 7.])
+
   def testGetSetReserved(self):
     with self.cached_session(), self.test_scope():
       l = list_ops.tensor_list_reserve(
@@ -146,6 +157,25 @@
       t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
       self.assertAllEqual(t, [3.0, 0.0])
 
+  def testSetStackReservedUnknownElementShape(self):
+    with self.cached_session(), self.test_scope():
+      l = list_ops.tensor_list_reserve(
+          element_dtype=dtypes.float32, element_shape=None, num_elements=2)
+      l = list_ops.tensor_list_set_item(l, 0, [3.0, 4.0])
+      t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
+      self.assertAllEqual(t, [[3.0, 4.0], [0., 0.]])
+
+  def testPushInEmptyListWithUnknownElementShape(self):
+    with self.cached_session(), self.test_scope():
+      l = list_ops.empty_tensor_list(
+          element_dtype=dtypes.float32, element_shape=None, max_num_elements=2)
+      l = list_ops.tensor_list_push_back(l, [3.0, 4.0])
+      # Pushing an element with a different shape should raise an error.
+      with self.assertRaisesRegexp(errors.InvalidArgumentError, "Shape"):
+        l = list_ops.tensor_list_push_back(l, 5.)
+        self.evaluate(
+            list_ops.tensor_list_stack(l, element_dtype=dtypes.float32))
+
   def testGetSetReservedNonScalar(self):
     with self.cached_session() as sess, self.test_scope():
       l = list_ops.tensor_list_reserve(

diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index 978ed66..f2e0eac 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py

@@ -72,6 +72,7 @@
         output = op(pinp)
       result = session.run(output, {pinp: inp})
       if equality_test is None:
+        self.assertEqual(output.dtype, expected.dtype)
         self.assertAllCloseAccordingToType(
             result, expected, rtol=rtol, atol=atol, bfloat16_rtol=0.03)
       else:
@@ -260,7 +261,8 @@
       self._assertOpOutputMatchesExpected(
           math_ops.log1p,
           np.array([[1e-14, 1e-15, 0.6]], dtype=dtype),
-          expected=np.log1p(np.array([[1e-14, 1e-15, 0.6]], dtype=dtype)),
+          expected=np.log1p(np.array([[1e-14, 1e-15, 0.6]],
+                                     dtype=dtype)).astype(dtype),
           rtol=1e-4,
           atol=1e-6)
 
@@ -710,7 +712,7 @@
       self._assertOpOutputMatchesExpected(
           math_ops.abs,
           np.array([[2, -1]], dtype=dtype),
-          expected=np.array([[2, 1]], dtype=dtype))
+          expected=np.array([[2, 1]], dtype=np.real(dtype(0)).dtype))
 
       self._assertOpOutputMatchesExpected(
           math_ops.negative,
@@ -820,6 +822,12 @@
         np.array([1, 2, 0], np.int32),
         expected=np.array([2, 0, 1], dtype=np.int32))
 
+  def testInvertPermutationTwiceIsNoop(self):
+    self._assertOpOutputMatchesExpected(
+        lambda x: array_ops.invert_permutation(array_ops.invert_permutation(x)),
+        np.array([1, 2, 0], np.int32),
+        expected=np.array([1, 2, 0], dtype=np.int32))
+
   def testRank(self):
     rank_op = lambda x: array_ops.rank_internal(x, optimize=False)
     for dtype in self.numeric_types:
@@ -874,6 +882,17 @@
           np.array([[-1], [1], [4]], dtype=dtype),
           expected=np.int32(3))
 
+  def testSizeWithInt64OutType(self):
+
+    def size_op(x):
+      return array_ops.size_internal(x, optimize=False, out_type=np.int64)
+
+    for dtype in self.numeric_types:
+      self._assertOpOutputMatchesExpected(
+          size_op,
+          np.array([[-1], [1], [4]], dtype=dtype),
+          expected=np.int64(3))
+
   def testUnpack(self):
     self._assertOpOutputMatchesExpected(
         array_ops.unstack,
@@ -983,7 +1002,7 @@
   def _assertSoftplusMatchesExpected(self, features, dtype):
     features = np.array(features, dtype=dtype)
     zero = np.asarray(0).astype(dtype)
-    expected = np.logaddexp(zero, features)
+    expected = np.logaddexp(zero, features).astype(dtype)
     self._assertOpOutputMatchesExpected(
         nn_ops.softplus, features, expected=expected, rtol=1e-6, atol=9.1e-6)
 

diff --git a/tensorflow/compiler/tf2tensorrt/BUILD b/tensorflow/compiler/tf2tensorrt/BUILD
index 00d3c8c..63cad6a 100644
--- a/tensorflow/compiler/tf2tensorrt/BUILD
+++ b/tensorflow/compiler/tf2tensorrt/BUILD

@@ -160,6 +160,7 @@
     ],
     srcs_version = "PY2AND3",
     deps = [
+        "//tensorflow/python:errors",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:platform",
         "//tensorflow/python:resources",
@@ -170,13 +171,11 @@
     name = "trt_resources",
     srcs = [
         "utils/trt_int8_calibrator.cc",
-        "utils/trt_resource_manager.cc",
         "utils/trt_resources.cc",
     ],
     hdrs = [
         "utils/trt_int8_calibrator.h",
         "utils/trt_lru_cache.h",
-        "utils/trt_resource_manager.h",
         "utils/trt_resources.h",
     ],
     deps = [
@@ -265,7 +264,6 @@
         "//tensorflow/core:framework_lite",
         "//tensorflow/core:gpu_runtime",
         "//tensorflow/core:graph",
-        "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:devices",
@@ -351,6 +349,7 @@
         "segment/segment.h",
         "segment/union_find.h",
     ],
+    copts = tf_copts(),
     deps = [
         "//tensorflow/core:graph",
         "//tensorflow/core:lib_proto_parsing",
@@ -360,11 +359,12 @@
     ],
 )
 
-tf_cc_test(
+tf_cuda_cc_test(
     name = "segment_test",
     size = "small",
     srcs = ["segment/segment_test.cc"],
     tags = [
+        "no_cuda_on_cpu_tap",
         "no_windows",
         "nomac",
     ],
@@ -430,7 +430,7 @@
     copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_proto_parsing",
     ],
 )
 
@@ -439,7 +439,7 @@
     srcs = ["utils/test_utils.cc"],
     hdrs = ["utils/test_utils.h"],
     deps = [
-        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_proto_parsing",
         "@com_googlesource_code_re2//:re2",
     ],
 )

diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
index f3db425..1f3cae3 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc

@@ -30,7 +30,6 @@
 #include "tensorflow/compiler/tf2tensorrt/plugin/trt_plugin_factory.h"
 #include "tensorflow/compiler/tf2tensorrt/segment/segment.h"
 #include "tensorflow/compiler/tf2tensorrt/utils/test_utils.h"
-#include "tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h"
 #include "tensorflow/compiler/tf2tensorrt/utils/trt_resources.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
@@ -90,29 +89,40 @@
 Status TrtCandidateSelector::IsTensorRTCandidate(const tensorflow::Node* node) {
   // TODO(laigd): move this set to TrtNodeValidator where it should belong.
   // LINT.IfChange
-  static const std::set<string> candidate_ops = {
+  static const auto* candidate_ops = new std::set<string>{
       "Abs",
+      "Acos",
+      "Acosh",
       "Add",
+      "Asin",
+      "Asinh",
+      "Atan",
+      "Atanh",
       "AvgPool",
       "BatchMatMul",
       "BiasAdd",
+      "Ceil",
       "ConcatV2",
       "Const",
       "Conv2D",
       "Conv2DBackpropInput",
+      "Cos",
+      "Cosh",
       "DepthwiseConv2dNative",
       "Div",
       "Exp",
       "ExpandDims",
+      "Floor",
       "FusedBatchNorm",
       "FusedBatchNormV2",
+      "GatherV2",
       "Identity",
       "LeakyRelu",
       "Log",
       "MatMul",
       "Max",
-      "MaxPool",
       "Maximum",
+      "MaxPool",
       "Mean",
       "Min",
       "Minimum",
@@ -126,8 +136,10 @@
       "Relu6",
       "Reshape",
       "Rsqrt",
-      "Rsqrt",
       "Sigmoid",
+      "Sin",
+      "Sinh",
+      "Slice",
       "Snapshot",
       "Softmax",
       "Sqrt",
@@ -136,14 +148,15 @@
       "StridedSlice",
       "Sub",
       "Sum",
+      "Tan",
       "Tanh",
       "TopKV2",
       "Transpose",
   };
   bool is_supported_op_type =
-      (candidate_ops.count(node->type_string()) ||
+      (candidate_ops->count(node->type_string()) ||
        PluginFactoryTensorRT::GetInstance()->IsPlugin(node->type_string()));
-  static const std::set<string> quantize_ops = {
+  static const auto* quantize_ops = new std::set<string>{
       "QuantizeAndDequantizeV2",
       "QuantizeAndDequantizeV3",
       "FakeQuantWithMinMaxVars",
@@ -153,7 +166,7 @@
   // these ops to the relevant tensors. This happens regardless of the value of
   // use_calibration.
   if (precision_mode_ == TrtPrecisionMode::INT8 &&
-      quantize_ops.count(node->type_string())) {
+      quantize_ops->count(node->type_string())) {
     is_supported_op_type = true;
   }
   // LINT.ThenChange(//tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc)
@@ -190,55 +203,6 @@
 
 }  // namespace
 
-// Function to get calibration from ResourceMgr and put them into nodedef.
-tensorflow::Status ConvertCalibGraphToInferGraph(
-    const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph,
-    bool is_dyn_op) {
-  LOG(INFO) << "Starting Calib Conversion";
-  infer_graph->CopyFrom(graph_def);
-  auto trt_rm = TRTResourceManager::instance();
-  auto calib_rm = trt_rm->getManager("TRTCalibration");
-  int num_nodes = infer_graph->node_size();
-  if (!is_dyn_op) {
-    LOG(WARNING) << "Construction of static int8 engine is not implemented "
-                    "yet!. Dynamic engine will be constructed";
-  }
-  for (int i = 0; i < num_nodes; ++i) {
-    auto n = infer_graph->mutable_node(i);
-    if (n->op() == "TRTEngineOp") {
-      VLOG(1) << "Processing " << n->name();
-      const string& container_name = n->attr().at("segment_funcdef_name").s();
-      TRTCalibrationResource* cres = nullptr;
-      auto status = calib_rm->Lookup(container_name, "Calibrator", &cres);
-      if (!status.ok()) {
-        LOG(ERROR) << "Could not get Calibration information. Did you run with "
-                      "calibration data?";
-        return tensorflow::errors::FailedPrecondition(
-            "Need to run graph with calibration data first!");
-      }
-      tensorflow::core::ScopedUnref calib_sc(cres);
-      if (cres->calibrator_) {
-        cres->calibrator_->waitAndSetDone();
-        cres->thr_->join();
-        const auto& calibration_table =
-            cres->calibrator_->getCalibrationTableAsString();
-        if (!calibration_table.size()) {
-          LOG(ERROR) << "Calibration table is empty";
-          return tensorflow::errors::Unknown(
-              "Calibration table is missing. This shouldn't have happened!");
-        }
-        n->mutable_attr()->at("calibration_data").set_s(calibration_table);
-      } else {
-        LOG(ERROR) << "Can't get TRTCalibrator from resource manager!";
-        return tensorflow::errors::Unknown(
-            "Can't get TRTCalibrator from resource manager!");
-      }
-      TF_RETURN_IF_ERROR(calib_rm->Cleanup(container_name));
-    }
-  }
-  return tensorflow::Status::OK();
-}
-
 tensorflow::Status ConvertGraphDefToTensorRT(
     const tensorflow::GraphDef& graph_def,
     const std::vector<string>& output_names, size_t max_batch_size,
@@ -662,8 +626,8 @@
         info.use_calibration,
         /*convert_successfully=*/nullptr));
     TrtUniquePtrType<nvinfer1::IHostMemory> engine_data(engine->serialize());
-    segment_string =
-        string((const char*)engine_data->data(), engine_data->size());
+    segment_string = string(static_cast<const char*>(engine_data->data()),
+                            engine_data->size());
     if (calibrate_int8) {
       // See above comment about why not putting this inside the 'else' branch.
       segment_string = info.segment_graph_def.SerializeAsString();

diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.h b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.h
index 95cf022..80f68d3 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.h

@@ -85,12 +85,6 @@
   std::vector<int> cached_engine_batches;  // list of cached engines
 };
 
-// This method extracts calibration information from the resource managers
-// and puts them in to engine nodedefs.
-tensorflow::Status ConvertCalibGraphToInferGraph(
-    const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def,
-    bool is_dyn_op);
-
 // - max_batch_size: maximum batch size which can be used for inference for
 //   optimization targets inference run with max batch size.
 // - max_workspace_size_bytes: The upper bound of memory allowance for engine

diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph_test.cc
index cabc6cc..1a75418 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph_test.cc

@@ -75,7 +75,7 @@
                                          feed, const_1, matmul_attrs);
 
   // Unsupported op.
-  auto unsupported_op = ops::Sin(s.WithOpName("sin"), feed);
+  auto unsupported_op = ops::Erf(s.WithOpName("sin"), feed);
 
   // Incompatible input.
   auto incompatible_feed = ops::Placeholder(s.WithOpName("feed"), DT_DOUBLE);
@@ -108,7 +108,7 @@
         "transpose_a is not supported for TensorRT FullyConnected "
         "(op: MatMul), at: incompatible_matmul");
     ExpectStatus(selector.IsTensorRTCandidate(unsupported_op.operation.node()),
-                 error::UNIMPLEMENTED, "Op type Sin is not supported");
+                 error::UNIMPLEMENTED, "Op type Erf is not supported");
     ExpectStatus(
         selector.IsTensorRTCandidate(
             matmul_with_incompatible_input.operation.node()),

diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
index 79b1cba..9a2ac8c 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc

@@ -30,7 +30,6 @@
 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
 #include "tensorflow/compiler/tf2tensorrt/plugin/trt_plugin_factory.h"
 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
-#include "tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h"
 #include "tensorflow/compiler/tf2tensorrt/utils/trt_resources.h"
 #include "tensorflow/core/framework/node_def.pb.h"  // NOLINT
 #include "tensorflow/core/framework/node_def_builder.h"
@@ -94,7 +93,6 @@
 namespace convert {
 using absl::StrAppend;
 using absl::StrCat;
-using ::tensorflow::str_util::Split;
 
 inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype,
                                        nvinfer1::DataType* trt_dtype) {
@@ -194,6 +192,15 @@
   *trt_dims = TensorShapeToTrtDims(shape, /*ignore_first_dim=*/true);
   *batch_size = shape.dim_size(0);
 
+  // Don't convert empty tensors (dim value of 0).
+  for (int d = 1; d < shape.dims(); ++d) {
+    if (shape.dim_size(d) == 0) {
+      return errors::Unimplemented(
+          "Input tensor with shape ", shape.DebugString(),
+          " is an empty tensor, which is not supported by TRT");
+    }
+  }
+
   if (validation_only) return Status::OK();
   // Following are validations at runtime.
 
@@ -297,8 +304,8 @@
 
   const int max_nb_dims = nvinfer1::Dims::MAX_DIMS + 1;
   auto compute_output_dims =
-      [max_nb_dims](const TRT_TensorOrWeights& input, int broadcast_num_dims,
-                    int* output_dims_array, nvinfer1::Dims* output_dims) {
+      [](const TRT_TensorOrWeights& input, int broadcast_num_dims,
+         int* output_dims_array, nvinfer1::Dims* output_dims) {
         const nvinfer1::Dims input_dims = input.GetTrtDims();
         std::fill(output_dims_array, output_dims_array + max_nb_dims, 1);
         std::copy(input_dims.d, input_dims.d + input_dims.nbDims,
@@ -380,6 +387,32 @@
   return Status::OK();
 }
 
+// Convert an axis from TF format to TRT format while validating. TF format
+// includes the batch dimension, while TRT does not. TF can also use negative
+// indices.
+// TODO(tmorris): Use this method in more ops.
+tensorflow::Status ConvertAxis(int tf_axis, int trt_nb_dims,
+                               absl::string_view node_name, int* trt_axis) {
+  const int tf_nb_dims = trt_nb_dims + 1;
+  // Check bounds.
+  if (tf_axis < -tf_nb_dims || tf_axis >= tf_nb_dims) {
+    return tensorflow::errors::InvalidArgument(
+        "Axis value of ", tf_axis, " is out of bounds, must be in range [",
+        -tf_nb_dims, ", ", tf_nb_dims, "), at ", node_name);
+  }
+  // Make negative axis positive.
+  if (tf_axis < 0) tf_axis += tf_nb_dims;
+  // Don't allow axis to be the batch dimension.
+  if (tf_axis == 0) {
+    return tensorflow::errors::Unimplemented(
+        "TensorRT does not allow manipulation of the batch dimension, at ",
+        node_name);
+  }
+  // Remove batch dimension.
+  *trt_axis = tf_axis - 1;
+  return Status::OK();
+}
+
 inline bool DimsEqual(const nvinfer1::Dims& dim_l,
                       const nvinfer1::Dims& dim_r) {
   if (dim_l.nbDims != dim_r.nbDims) {
@@ -393,6 +426,15 @@
   return true;
 }
 
+bool AllLengthsEqual(const std::vector<std::vector<int>>& inputs) {
+  if (inputs.size() == 0) return true;
+  int length = inputs.at(0).size();
+  for (int i = 1; i < inputs.size(); i++) {
+    if (inputs.at(i).size() != length) return false;
+  }
+  return true;
+}
+
 inline nvinfer1::Dims GetTrtDimsForTensor(const tensorflow::Tensor& tensor) {
   nvinfer1::Dims dims;
   dims.nbDims = tensor.dims();
@@ -530,6 +572,16 @@
   float getDynamicRange() const override { return 0; }
 #endif
 
+#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
+  bool dynamicRangeIsSet() const override { return true; }
+
+  void resetDynamicRange() override {}
+
+  float getDynamicRangeMin() const override { return 0.f; }
+
+  float getDynamicRangeMax() const override { return 0.f; }
+#endif
+
  private:
   nvinfer1::DataType trt_dtype_;
   nvinfer1::Dims trt_dims_;
@@ -921,7 +973,7 @@
   for (size_t i = 0; i < outputs.size(); ++i) {
     TRT_TensorOrWeights& output = outputs[i];
     string output_name = node_def.name();
-    if (i != 0) output_name = StrCat(output_name, ":", i);
+    if (i != 0) absl::StrAppend(&output_name, ":", i);
     // We need to check the name before setting it. If the input is one of the
     // engine input, setting the name here will overwrite engine input
     // bindings which will cause runtime error.
@@ -2107,7 +2159,7 @@
   // Mark axes to remove by setting them to 0.
   TFAttrs attrs(node_def);
   auto squeeze_dims = attrs.get<std::vector<int>>("squeeze_dims");
-  if (squeeze_dims.size() == 0) {
+  if (squeeze_dims.empty()) {
     return tensorflow::errors::Unimplemented(
         "Squeeze is only implemented for explicit dims, at ", node_def.name());
   }
@@ -2152,100 +2204,73 @@
   return tensorflow::Status::OK();
 }
 
-// Gets the bounds (start or end) from the weights of a StridedSlice op.
-tensorflow::Status GetStridedSliceBound(const std::vector<int>& input_dims,
-                                        const TRT_ShapedWeights& bound_weights,
-                                        int mask, bool begin, string node_name,
-                                        std::vector<int>* output_bound) {
-  const string bound_name = (begin) ? "begin" : "end";
-  const int* weights_ptr = static_cast<int*>(bound_weights.GetValues());
-  *output_bound =
-      std::vector<int>(weights_ptr, weights_ptr + bound_weights.count());
-  if (output_bound->size() != input_dims.size()) {
-    return tensorflow::errors::InvalidArgument(
-        "StridedSlice \"", bound_name, "\" specified ",
-        std::to_string(output_bound->size()), " dimensions, but input rank is ",
-        std::to_string(input_dims.size()), ", at ", node_name);
-  }
-  for (int i = 0; i < output_bound->size(); i++) {
-    if ((1 << i) & mask) {
-      // Apply mask.
-      (*output_bound)[i] = (begin) ? 0 : input_dims[i];
-      // Masked bound will always result in a valid, non-negative bound, so we
-      // don't need the following checks. For the common case of using masks on
-      // a undefined batch dim (-1), we specifically don't want to do the
-      // following checks because they will erroneously detect an out of range
-      // bound or try to correct the negative value.
-      continue;
-    }
-    // Make sure bound is valid.
-    if (((*output_bound)[i] < -input_dims[i]) ||
-        ((*output_bound)[i] > input_dims[i])) {
-      return tensorflow::errors::InvalidArgument(
-          bound_name, " value of ", std::to_string((*output_bound)[i]),
-          " for StridedSlice is invalid, must be in the range "
-          "[-dim_size(i), dim_size(i)], at ",
-          node_name);
-    }
-    // Convert negative values to their positive equivalent.
-    if ((*output_bound)[i] < 0) {
-      (*output_bound)[i] += input_dims[i];
-    }
-  }
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
-  const auto& inputs = params->inputs;
+tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params,
+                                             const TRT_TensorOrWeights& input,
+                                             std::vector<int> begin,
+                                             std::vector<int> size,
+                                             const std::vector<int>& stride) {
   const auto& node_def = params->node_def;
-  TF_RETURN_IF_ERROR(CheckInputsWeights(
-      *params,
-      {{"input", false}, {"begin", true}, {"end", true}, {"strides", true}}));
   // Get input dims.
-  nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
+  nvinfer1::Dims dims = input.GetTrtDims();
   std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
-  if (inputs.at(0).is_tensor()) {
-    // Temporarily add batch dimension so that indexes line up properly.
-    input_dims.insert(input_dims.begin(), inputs.at(0).batch_size());
-  }
-  if (input_dims.size() > 4) {
-    return tensorflow::errors::Unimplemented(
-        "StridedSlice is not implemented for tensors with rank > 4, at ",
-        node_def.name());
-  }
-  TFAttrs attrs(node_def);
-  // Get begin and end bounds per axis.
-  std::vector<int> begin, end;
-  TF_RETURN_IF_ERROR(GetStridedSliceBound(input_dims, inputs.at(1).weights(),
-                                          attrs.get<int>("begin_mask"), true,
-                                          node_def.name(), &begin));
-  TF_RETURN_IF_ERROR(GetStridedSliceBound(input_dims, inputs.at(2).weights(),
-                                          attrs.get<int>("end_mask"), false,
-                                          node_def.name(), &end));
-  // Get strides per axis (must all be 1).
-  TRT_ShapedWeights stride_weights = inputs.at(3).weights();
-  const int* stride_weights_ptr = static_cast<int*>(stride_weights.GetValues());
-  std::vector<int> strides(stride_weights_ptr,
-                           stride_weights_ptr + stride_weights.count());
-  for (int x : strides) {
-    if (x != 1) {
-      return tensorflow::errors::Unimplemented(
-          "StridedSlice is only implemented for stride of 1, at ",
+  // Temporarily add batch dimension so that indexes line up properly.
+  input_dims.insert(input_dims.begin(), -1);
+  // Check bounds.
+  for (int i = 1; i < input_dims.size(); i++) {
+    if (begin[i] < 0 || begin[i] > input_dims[i]) {
+      return tensorflow::errors::InvalidArgument(
+          "\"begin\" for dimension ", std::to_string(i), " in ", node_def.op(),
+          " is out of range, at ", node_def.name());
+    }
+    const int end = begin[i] + size[i];
+    if (end < 0 || end > input_dims[i]) {
+      return tensorflow::errors::InvalidArgument(
+          "\"begin\" + \"size\" for dimension ", std::to_string(i), " in ",
+          node_def.op(), " is out of range, at ", node_def.name());
+    }
+    if (size[i] <= 0) {
+      return tensorflow::errors::InvalidArgument(
+          "\"size\" cannot be negative or zero for ", node_def.op(), ", at ",
           node_def.name());
     }
   }
-  // Unsupported mask options.
-  for (const string& attr :
-       {"ellipsis_mask", "new_axis_mask", "shrink_axis_mask"}) {
-    int attr_val = attrs.get<int>(attr);
-    if (attr_val != 0) {
+// TRT 5.1 adds a slice layer. For older versions, we attempt to use the
+// padding layer with negative padding.
+#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
+  // Use ISliceLayer.
+  nvinfer1::Dims begin_dims, size_dims, stride_dims;
+  TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(begin, &begin_dims,
+                                               /*ignore_first_dim=*/true));
+  TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(size, &size_dims,
+                                               /*ignore_first_dim=*/true));
+  TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(stride, &stride_dims,
+                                               /*ignore_first_dim=*/true));
+  if (params->validation_only) return Status::OK();
+
+  nvinfer1::ISliceLayer* layer = params->converter->network()->addSlice(
+      *const_cast<nvinfer1::ITensor*>(input.tensor()), begin_dims, size_dims,
+      stride_dims);
+  params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
+  return tensorflow::Status::OK();
+#else
+  // Use IPaddingLayer.
+  // Strides must be 1 in this case.
+  for (int x : stride) {
+    if (x != 1) {
       return tensorflow::errors::Unimplemented(
-          attr, " is not supported for StridedSlice, at ", node_def.name());
+          "Strides other than 1 are not supported with this version of TRT, "
+          "at ",
+          node_def.name());
     }
   }
-
-  nvinfer1::ITensor* tensor =
-      const_cast<nvinfer1::ITensor*>(inputs.at(0).tensor());
+  // Rank must be 2, 3 or 4.
+  if (input_dims.size() > 4) {
+    return tensorflow::errors::Unimplemented(node_def.op(),
+                                             " for tensors with rank > 4 is "
+                                             "not supported in this version of "
+                                             "TRT, at ",
+                                             node_def.name());
+  }
   // Reshape if necessary to 4-D, since IPaddingLayer requires a 4-D input.
   const bool need_reshape = (input_dims.size() != 4);
   int reshape_dims_added = 0;
@@ -2255,7 +2280,7 @@
     while (input_dims.size() < 4) {
       input_dims.insert(input_dims.begin() + 1, 1);
       begin.insert(begin.begin() + 1, 0);
-      end.insert(end.begin() + 1, 1);
+      size.insert(size.begin() + 1, 1);
       reshape_dims_added++;
     }
     TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &reshape_dims,
@@ -2263,23 +2288,22 @@
   }
   // Find dimensions which need to be sliced.
   std::vector<int> pad_dims;
-  for (int i = 0; i < input_dims.size(); i++) {
-    if ((begin[i] != 0) || (end[i] != input_dims[i])) {
-      if (i == 0) {
-        return tensorflow::errors::Unimplemented(
-            "StridedSlice can't modify batch dim, at ", node_def.name());
-      } else if ((end[i] - begin[i]) < 0) {
-        return tensorflow::errors::InvalidArgument(
-            "New size of sliced dimension is negative, at ", node_def.name());
-      }
+  for (int i = 1; i < input_dims.size(); i++) {
+    if ((begin[i] != 0) || (begin[i] + size[i] != input_dims[i])) {
       pad_dims.push_back(i);
     }
   }
-  if (pad_dims.size() == 0) {
-    // No dimensions are changed. We could create a padding layer anyway with
-    // values of 0.
+  if (pad_dims.empty()) {
+    // No dimensions are changed, so this is a no-op. We could just return the
+    // input without creating a new layer. TRT will crash if an empty engine
+    // with no layers is attempted to be created, so we add a no-op shuffle to
+    // prevent our unit tests from breaking.
+    // TODO(tmorris): Allow empty engines in the unit tests and return the input
+    // as output here.
     if (params->validation_only) return Status::OK();
-    params->outputs->push_back(inputs.at(0));
+    nvinfer1::IShuffleLayer* layer = params->converter->network()->addShuffle(
+        *const_cast<nvinfer1::ITensor*>(input.tensor()));
+    params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
     return tensorflow::Status::OK();
   } else if (pad_dims.size() == 1) {
     // Only one dim is modified but we have to have 2, mark a second dim which
@@ -2292,16 +2316,19 @@
     }
   } else if (pad_dims.size() > 2) {
     return tensorflow::errors::Unimplemented(
-        "StridedSlice can only modify 2 dimensions, at ", node_def.name());
+        node_def.op(),
+        " can only modify up to 2 dimensions in this version of TRT, at ",
+        node_def.name());
   }
   std::sort(pad_dims.begin(), pad_dims.end());
   // Convert to pre/post padding values. Since TRT does not have a StridedSlice
-  // or Slice layer, we instead create an IPaddingLayer with negative padding.
+  // or Slice layer prior to 5.1, we instead create an IPaddingLayer with
+  // negative padding.
   nvinfer1::DimsHW pre_padding, post_padding;
   for (int i = 0; i < pad_dims.size(); i++) {
     const int axis = pad_dims[i];
     pre_padding.d[i] = -begin[axis];
-    post_padding.d[i] = end[axis] - input_dims[axis];
+    post_padding.d[i] = (begin[axis] + size[axis]) - input_dims[axis];
   }
 
   // IPaddingLayer will always apply the padding to dims 2,3 (input format is
@@ -2321,10 +2348,11 @@
   if (params->validation_only) return Status::OK();
 
   // Start conversion.
+  nvinfer1::ITensor* tensor = const_cast<nvinfer1::ITensor*>(input.tensor());
   if (need_reshape) {
     const nvinfer1::ITensor* output_tensor = nullptr;
     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
-        inputs.at(0), reshape_dims, &output_tensor));
+        input, reshape_dims, &output_tensor));
     tensor = const_cast<nvinfer1::ITensor*>(output_tensor);
   }
   if (need_transpose) {
@@ -2333,7 +2361,6 @@
         tensor, transpose_order, &output_tensor));
     tensor = const_cast<nvinfer1::ITensor*>(output_tensor);
   }
-
   // Add padding layer
   nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding(
       *const_cast<nvinfer1::ITensor*>(tensor), pre_padding, post_padding);
@@ -2341,7 +2368,6 @@
   params->converter->MarkQuantizationRangesAsInferrable(tensor,
                                                         layer->getOutput(0));
   tensor = layer->getOutput(0);
-
   // Restore transpose
   if (need_transpose) {
     const nvinfer1::ITensor* output_tensor = nullptr;
@@ -2354,7 +2380,7 @@
     // Calculate output dimensions
     for (int i = 0; i < pad_dims.size(); i++) {
       const int axis = pad_dims[i];
-      input_dims[axis] = end[axis] - begin[axis];
+      input_dims[axis] = size[axis];
     }
     // Remove added 1 dimensions
     for (int i = 0; i < reshape_dims_added; i++) {
@@ -2378,6 +2404,135 @@
   params->outputs->push_back(
       TRT_TensorOrWeights(const_cast<nvinfer1::ITensor*>(tensor)));
   return tensorflow::Status::OK();
+#endif
+}
+
+tensorflow::Status ConvertSlice(OpConverterParams* params) {
+  const auto& inputs = params->inputs;
+  const auto& node_def = params->node_def;
+  TF_RETURN_IF_ERROR(CheckInputsWeights(
+      *params, {{"input", false}, {"begin", true}, {"size", true}}));
+  std::vector<int> begin = inputs.at(1).weights().ToVector<int>();
+  std::vector<int> size = inputs.at(2).weights().ToVector<int>();
+  // Get input dims.
+  nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
+  std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
+  // Add batch dimension so that indexes line up properly.
+  input_dims.insert(input_dims.begin(), inputs.at(0).batch_size());
+  if (!AllLengthsEqual({input_dims, begin, size})) {
+    return tensorflow::errors::InvalidArgument(
+        "Length of begin and size arguments must equal rank of input for "
+        "Slice, at ",
+        node_def.name());
+  }
+  // Check that batch dimension is unmodified.
+  const bool begin_is_modified = begin[0] != 0;
+  // If size[0]s is not -1, we can only know if the batch dimension is
+  // unmodified when the batch size is defined. When the batch size is
+  // undefined, we don't convert to be safe.
+  const bool batch_size_is_defined = input_dims[0] > 0;
+  const bool size_is_modified =
+      size[0] != -1 && (!batch_size_is_defined ||
+                        (batch_size_is_defined && size[0] != input_dims[0]));
+  if (begin_is_modified || size_is_modified) {
+    return tensorflow::errors::Unimplemented(
+        "TensorRT does not allow modifications to the batch dimension, at ",
+        node_def.name());
+  }
+  // Size of -1 signifies to take all remaining elements.
+  for (int i = 1; i < input_dims.size(); i++) {
+    if (size[i] == -1) {
+      size[i] = input_dims[i] - begin[i];
+    }
+  }
+  // Stride is 1 for all dims.
+  std::vector<int> stride(begin.size(), 1);
+  return ConvertStridedSliceHelper(params, inputs.at(0), begin, size, stride);
+}
+
+tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
+  const auto& inputs = params->inputs;
+  const auto& node_def = params->node_def;
+  TF_RETURN_IF_ERROR(CheckInputsWeights(
+      *params,
+      {{"input", false}, {"begin", true}, {"end", true}, {"strides", true}}));
+  // Get input dims.
+  nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
+  std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
+  // Add batch dimension so that indexes line up properly.
+  input_dims.insert(input_dims.begin(), inputs.at(0).batch_size());
+  // Get begin and end bounds per axis.
+  std::vector<int> begin = inputs.at(1).weights().ToVector<int>();
+  std::vector<int> end = inputs.at(2).weights().ToVector<int>();
+  std::vector<int> stride = inputs.at(3).weights().ToVector<int>();
+  if (!AllLengthsEqual({input_dims, begin, end, stride})) {
+    return tensorflow::errors::InvalidArgument(
+        "Length of begin, end, and stride arguments must equal rank of input "
+        "for StridedSlice, at ",
+        node_def.name());
+  }
+  // Unsupported mask options.
+  TFAttrs attrs(node_def);
+  for (const string& attr :
+       {"ellipsis_mask", "new_axis_mask", "shrink_axis_mask"}) {
+    int attr_val = attrs.get<int>(attr);
+    if (attr_val != 0) {
+      return tensorflow::errors::Unimplemented(
+          attr, " is not supported for StridedSlice, at ", node_def.name());
+    }
+  }
+  const int begin_mask = attrs.get<int>("begin_mask");
+  const int end_mask = attrs.get<int>("end_mask");
+  // Check that batch dimension is unmodified.
+  const bool begin_is_modified = !(begin_mask & 1) && begin[0] != 0;
+  const bool stride_is_modified = stride[0] != 1;
+  // If the batch size is -1 and the end mask is not set, we can only know if
+  // the batch dimension is unmodified when the batch size is defined. When the
+  // batch size is undefined, we don't convert to be safe.
+  const bool batch_size_is_defined = input_dims[0] > 0;
+  const bool end_is_modified =
+      !(end_mask & 1) && (!batch_size_is_defined ||
+                          (batch_size_is_defined && end[0] != input_dims[0]));
+  if (begin_is_modified || stride_is_modified || end_is_modified) {
+    return tensorflow::errors::Unimplemented(
+        "TensorRT does not allow modifications to the batch dimension, at ",
+        node_def.name());
+  }
+  // Standarize begin and end bounds by applying masks, making negative values
+  // positive, and correcting out of bounds ranges (StridedSlice does this
+  // silently).
+  for (int i = 1; i < input_dims.size(); i++) {
+    // Begin
+    if ((1 << i) & begin_mask) {
+      begin[i] = 0;
+    } else if (begin[i] < 0) {
+      begin[i] += input_dims[i];
+    }
+    begin[i] = std::max(0, std::min(begin[i], input_dims[i]));
+    // End
+    if ((1 << i) & end_mask) {
+      end[i] = input_dims[i];
+    } else if (end[i] < 0) {
+      end[i] += input_dims[i];
+    }
+    end[i] = std::max(0, std::min(end[i], input_dims[i]));
+  }
+  // Negative or zero strides currently not supported.
+  for (int i = 0; i < input_dims.size(); i++) {
+    if (stride[i] <= 0) {
+      return tensorflow::errors::Unimplemented(
+          "Negative or zero stride values are not supported for StridedSlice, "
+          "at ",
+          node_def.name());
+    }
+  }
+  // TRT Slice layer uses (begin, size) instead of (begin, end)
+  std::vector<int> size(input_dims.size());
+  for (int i = 0; i < input_dims.size(); i++) {
+    // Divide by stride (round up)
+    size[i] = (end[i] - begin[i] + stride[i] - 1) / stride[i];
+  }
+  return ConvertStridedSliceHelper(params, inputs.at(0), begin, size, stride);
 }
 
 tensorflow::Status ConvertConv2D(OpConverterParams* params) {
@@ -2947,58 +3102,104 @@
   return status;
 }
 
+tensorflow::Status ConvertRsqrt(OpConverterParams* params) {
+  const auto& inputs = params->inputs;
+  const auto& node_def = params->node_def;
+  TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}}));
+  if (params->validation_only) return tensorflow::Status::OK();
+
+  // TODO(tmorris): params->converter is null during validation. Allow
+  // precision_mode and use_calibration to be accessed during validation and
+  // include this check in validation.
+  // We will need a quantization range for intermediate tensor if not using
+  // calibration.
+  //
+  //   x -> [Sqrt] -> sqrt(x) -> [Recip] -> 1/sqrt(x)
+  //                     ^
+  //               need range here
+  if (params->converter->precision_mode() == TrtPrecisionMode::INT8 &&
+      !params->converter->use_calibration()) {
+    return errors::Unimplemented(
+        "Intermediate quantization range cannot be determined without"
+        " calibration for Rsqrt, consider replacing with "
+        "Sqrt -> FakeQuant -> Reciprocal ops, at ",
+        node_def.name());
+  }
+  // Start conversion.
+  const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
+  // Sqrt
+  nvinfer1::IUnaryLayer* sqrt_layer = params->converter->network()->addUnary(
+      *const_cast<nvinfer1::ITensor*>(tensor), nvinfer1::UnaryOperation::kSQRT);
+  TFTRT_RETURN_ERROR_IF_NULLPTR(sqrt_layer, node_def.name());
+  // Recip
+  nvinfer1::IUnaryLayer* recip_layer = params->converter->network()->addUnary(
+      *sqrt_layer->getOutput(0), nvinfer1::UnaryOperation::kRECIP);
+  TFTRT_RETURN_ERROR_IF_NULLPTR(recip_layer, node_def.name());
+  params->outputs->push_back(TRT_TensorOrWeights(recip_layer->getOutput(0)));
+  return tensorflow::Status::OK();
+}
+
+const std::unordered_map<string, nvinfer1::UnaryOperation>*
+UnaryOperationMap() {
+  static auto* const m =
+      new std::unordered_map<string, nvinfer1::UnaryOperation>({
+        {"Neg", nvinfer1::UnaryOperation::kNEG},
+            {"Exp", nvinfer1::UnaryOperation::kEXP},
+            {"Log", nvinfer1::UnaryOperation::kLOG},
+            {"Sqrt", nvinfer1::UnaryOperation::kSQRT},
+            {"Abs", nvinfer1::UnaryOperation::kABS},
+            {"Reciprocal", nvinfer1::UnaryOperation::kRECIP},
+#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
+            {"Sin", nvinfer1::UnaryOperation::kSIN},
+            {"Cos", nvinfer1::UnaryOperation::kCOS},
+            {"Tan", nvinfer1::UnaryOperation::kTAN},
+            {"Sinh", nvinfer1::UnaryOperation::kSINH},
+            {"Cosh", nvinfer1::UnaryOperation::kCOSH},
+            {"Asin", nvinfer1::UnaryOperation::kASIN},
+            {"Acos", nvinfer1::UnaryOperation::kACOS},
+            {"Atan", nvinfer1::UnaryOperation::kATAN},
+            {"Asinh", nvinfer1::UnaryOperation::kASINH},
+            {"Acosh", nvinfer1::UnaryOperation::kACOSH},
+            {"Atanh", nvinfer1::UnaryOperation::kATANH},
+            {"Ceil", nvinfer1::UnaryOperation::kCEIL},
+            {"Floor", nvinfer1::UnaryOperation::kFLOOR},
+#endif
+      });
+  return m;
+}
+
 tensorflow::Status ConvertUnary(OpConverterParams* params) {
   const auto& inputs = params->inputs;
   const auto& node_def = params->node_def;
-  static const std::unordered_map<string, nvinfer1::UnaryOperation> ops{
-      {"Neg", nvinfer1::UnaryOperation::kNEG},
-      {"Exp", nvinfer1::UnaryOperation::kEXP},
-      {"Log", nvinfer1::UnaryOperation::kLOG},
-      {"Sqrt", nvinfer1::UnaryOperation::kSQRT},
-      {"Abs", nvinfer1::UnaryOperation::kABS},
-      {"Reciprocal", nvinfer1::UnaryOperation::kRECIP},
-  };
   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}}));
-
-  // TODO(jie): check type
-  const nvinfer1::ITensor* tensor = nullptr;
-  TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
-      inputs.at(0), inputs.at(0).GetTrtDims(), &tensor));
-
-  nvinfer1::IUnaryLayer* layer;
-  if (node_def.op() == "Rsqrt") {
-    // We will need a quantization range for intermediate tensor if not using
-    // calibration.
-    //
-    //   x -> [Sqrt] -> sqrt(x) -> [Recip] -> 1/sqrt(x)
-    //                     ^
-    //               need range here
-    if (params->converter->precision_mode() == TrtPrecisionMode::INT8 &&
-        !params->converter->use_calibration()) {
-      return errors::Unimplemented(
-          "Intermediate quantization range cannot be determined without"
-          " calibration for Rsqrt, consider replacing with "
-          "Sqrt -> FakeQuant -> Reciprocal ops, at ",
-          node_def.name());
-    }
-    layer = params->converter->network()->addUnary(
-        *const_cast<nvinfer1::ITensor*>(tensor),
-        nvinfer1::UnaryOperation::kSQRT);
-    TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
-    tensor = layer->getOutput(0);
-    layer = params->converter->network()->addUnary(
-        *const_cast<nvinfer1::ITensor*>(tensor),
-        nvinfer1::UnaryOperation::kRECIP);
-  } else if (ops.count(node_def.op()) != 0) {
-    layer = params->converter->network()->addUnary(
-        *const_cast<nvinfer1::ITensor*>(tensor), ops.at(node_def.op()));
-  } else {
-    return tensorflow::errors::InvalidArgument(
-        "Binary op: ", node_def.op(), " not supported, at ", node_def.name());
+  auto op_pair = UnaryOperationMap()->find(node_def.op());
+  if (op_pair == UnaryOperationMap()->end()) {
+    return tensorflow::errors::Unimplemented(
+        "Unary op: ", node_def.op(), " not supported at: ", node_def.name());
   }
+  if (params->validation_only) return tensorflow::Status::OK();
 
+  // Start conversion.
+  const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
+  nvinfer1::IUnaryLayer* layer = params->converter->network()->addUnary(
+      *const_cast<nvinfer1::ITensor*>(tensor), op_pair->second);
   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
+
+  // Set quantization ranges.
+  if (node_def.op() == "Sin" || node_def.op() == "Cos") {
+    params->converter->ProvideQuantizationRange(output_tensor, -1.0f, 1.0f);
+  } else if (node_def.op() == "Asin" || node_def.op() == "Atan") {
+    params->converter->ProvideQuantizationRange(output_tensor, -M_PI_2, M_PI_2);
+  } else if (node_def.op() == "Acos") {
+    params->converter->ProvideQuantizationRange(output_tensor, 0.0f, M_PI);
+  } else if (node_def.op() == "Neg" || node_def.op() == "Abs") {
+    // Neg and Abs will have same range as input since TRT uses symmetric
+    // quantization.
+    // TODO(tmorris): Should we infer ranges for Ceil and Floor as well?
+    params->converter->MarkQuantizationRangesAsInferrable(
+        const_cast<nvinfer1::ITensor*>(tensor), output_tensor);
+  }
   params->outputs->push_back(
       TRT_TensorOrWeights(const_cast<nvinfer1::ITensor*>(output_tensor)));
   return tensorflow::Status::OK();
@@ -3139,7 +3340,7 @@
   }
 
   // No padding at all, we should exit
-  if (pad_index.size() == 0) {
+  if (pad_index.empty()) {
     params->outputs->push_back(inputs.at(0));
     return tensorflow::Status::OK();
   }
@@ -3329,7 +3530,7 @@
   TRT_ShapedWeights dummy_power_weights(parameter_type);
   size_t nweight = 0;
   for (int i = 1; i < 5; i++) {
-    nweight = std::max(nweight, (size_t)inputs.at(i).weights().count());
+    nweight = std::max<size_t>(nweight, inputs.at(i).weights().count());
   }
   TRT_ShapedWeights* ptr_shape_weights = nullptr;
   for (int i = 1; i < 5; i++) {
@@ -3414,6 +3615,29 @@
   return tensorflow::Status::OK();
 }
 
+tensorflow::Status ConvertGather(OpConverterParams* params) {
+  const auto& inputs = params->inputs;
+  const auto& node_def = params->node_def;
+  TF_RETURN_IF_ERROR(CheckInputsWeights(
+      *params, {{"params", false}, {"indices", false}, {"axis", true}}));
+  absl::Span<const int> axis = inputs.at(2).weights().GetSpan<int>();
+  if (axis.size() != 1) {
+    return tensorflow::errors::InvalidArgument(
+        "Axis for GatherV2 must be a scalar, at ", node_def.name());
+  }
+  int trt_axis = 0;
+  TF_RETURN_IF_ERROR(ConvertAxis(axis[0], inputs.at(0).GetTrtDims().nbDims,
+                                 node_def.name(), &trt_axis));
+  if (params->validation_only) return Status::OK();
+
+  nvinfer1::IGatherLayer* layer = params->converter->network()->addGather(
+      *const_cast<nvinfer1::ITensor*>(inputs.at(0).tensor()),
+      *const_cast<nvinfer1::ITensor*>(inputs.at(1).tensor()), trt_axis);
+  TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
+  params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
+  return Status::OK();
+}
+
 tensorflow::Status ConvertMatMulHelper(OpConverterParams* params,
                                        TRT_TensorOrWeights tensor_input,
                                        TRT_ShapedWeights weights_raw,
@@ -3644,11 +3868,14 @@
   (*registration)["Conv2DBackpropInput"] = ConvertConv2DBackpropInput;
   (*registration)["DepthwiseConv2dNative"] = ConvertConv2DDepthwise;
   (*registration)["ExpandDims"] = ConvertExpandDims;
+  (*registration)["GatherV2"] = ConvertGather;
   (*registration)["LeakyRelu"] = ConvertLeakyRelu;
   (*registration)["MatMul"] = ConvertMatMul;
   (*registration)["Pad"] = ConvertPad;
   (*registration)["Relu6"] = ConvertRelu6;
   (*registration)["Reshape"] = ConvertReshape;
+  (*registration)["Rsqrt"] = ConvertRsqrt;
+  (*registration)["Slice"] = ConvertSlice;
   (*registration)["Square"] = ConvertSquare;
   (*registration)["Squeeze"] = ConvertSqueeze;
   (*registration)["StridedSlice"] = ConvertStridedSlice;
@@ -3673,6 +3900,9 @@
   for (auto normalization_op_type : {"FusedBatchNorm", "FusedBatchNormV2"}) {
     (*registration)[normalization_op_type] = ConvertFusedBatchNorm;
   }
+  for (auto unary_op_pair : *UnaryOperationMap()) {
+    (*registration)[unary_op_pair.first] = ConvertUnary;
+  }
 }
 
 void TrtNodeValidator::RegisterOpValidators() {
@@ -3685,14 +3915,6 @@
   op_registry_["Identity"] = ConvertIdentity;  // Identity should be removed
   op_registry_["Snapshot"] = ConvertIdentity;  // Snapshot should be removed
 
-  op_registry_["Rsqrt"] = ConvertUnary;
-  op_registry_["Reciprocal"] = ConvertUnary;
-  op_registry_["Exp"] = ConvertUnary;
-  op_registry_["Log"] = ConvertUnary;
-  op_registry_["Sqrt"] = ConvertUnary;
-  op_registry_["Abs"] = ConvertUnary;
-  op_registry_["Neg"] = ConvertUnary;
-
   op_registry_["Sum"] = ConvertReduce;
   op_registry_["Prod"] = ConvertReduce;
   op_registry_["Max"] = ConvertReduce;
@@ -3722,8 +3944,12 @@
   builder->setMaxWorkspaceSize(max_workspace_size_bytes);
   builder->setGpuAllocator(allocator);
   if (precision_mode == TrtPrecisionMode::FP16) {
-    builder->setHalf2Mode(true);
+    builder->setFp16Mode(true);
   } else if (precision_mode == TrtPrecisionMode::INT8) {
+    // Setting FP16 mode as well allows TRT to also consider FP16 kernels and
+    // use them in situations where they are faster than INT8 or where INT8 is
+    // not supported for a given layer.
+    builder->setFp16Mode(true);
     builder->setInt8Mode(true);
     if (use_calibration) {
       builder->setInt8Calibrator(calibrator);
@@ -3899,7 +4125,7 @@
     local_scope = GetCommonNameScope(local_scope, node->name());
     old_to_new_id_map[node->id()] = segment_def->node_size();
     auto snode = segment_def->add_node();
-    snode->CopyFrom(node->def());
+    *snode = node->def();
     VLOG(2) << "Copying " << snode->name() << " to subgraph";
   }
   // Update the inputs of the new input nodes to point to placeholder nodes.

diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
index d1e30eb..7b37173 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h

@@ -190,6 +190,17 @@
 
   string DebugString() const;
 
+  template <typename T>
+  absl::Span<const T> GetSpan() const {
+    return absl::Span<const T>(tensor_.flat<T>().data(), count());
+  }
+
+  template <typename T>
+  std::vector<T> ToVector() const {
+    auto span = GetSpan<T>();
+    return std::vector<T>(span.data(), span.data() + span.size());
+  }
+
   // TODO(aaroey): make these private.
   nvinfer1::Dims shape_;  // Note: shape.type[] is not used.
   tensorflow::DataType type_;
@@ -560,6 +571,9 @@
   friend class OpConverterTest;
 };
 
+// Map of all supported UnaryOperations
+const std::unordered_map<string, nvinfer1::UnaryOperation>* UnaryOperationMap();
+
 }  // namespace convert
 }  // namespace tensorrt
 }  // namespace tensorflow

diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
index 77221f6..0aa4891 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc

@@ -60,6 +60,7 @@
 using absl::StrCat;
 using ::testing::ElementsAre;
 using ::testing::ElementsAreArray;
+using ::testing::NanSensitiveFloatNear;
 
 // TODO(laigd): put this into some test utils file.
 void ExpectStatus(Status status, error::Code code = error::OK,
@@ -241,6 +242,16 @@
   float getDynamicRange() const override { return dynamic_range_; }
 #endif
 
+#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
+  bool dynamicRangeIsSet() const override { return true; }
+
+  void resetDynamicRange() override {}
+
+  float getDynamicRangeMin() const override { return 0.f; }
+
+  float getDynamicRangeMax() const override { return 0.f; }
+#endif
+
  private:
   string name_;
   nvinfer1::Dims dims_;
@@ -1439,11 +1450,6 @@
   }
 
   struct TestParams {
-    TestParams(int input_batch_size, const std::vector<int>& input_tensor_dims,
-               const std::vector<int>& input_shape)
-        : batch_size(input_batch_size),
-          tensor_dims(input_tensor_dims),
-          shape(input_shape) {}
     int batch_size;
     std::vector<int> tensor_dims;
     std::vector<int> shape;
@@ -2381,11 +2387,6 @@
   }
 
   struct TestParams {
-    TestParams(const std::vector<int>& input_dims, int axis,
-               const std::vector<int>& expected_output_dims)
-        : input_dims(input_dims),
-          axis(axis),
-          expected_output_dims(expected_output_dims) {}
     std::vector<int> input_dims;
     int axis;
     std::vector<int> expected_output_dims;
@@ -2498,11 +2499,6 @@
   }
 
   struct TestParams {
-    TestParams(const std::vector<int>& input_dims, const std::vector<int>& axis,
-               const std::vector<int>& expected_output_dims)
-        : input_dims(input_dims),
-          axis(axis),
-          expected_output_dims(expected_output_dims) {}
     std::vector<int> input_dims;
     std::vector<int> axis;
     std::vector<int> expected_output_dims;
@@ -2621,46 +2617,62 @@
     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
     RunValidationAndConversion(
         node_def, error::UNIMPLEMENTED,
-        "StridedSlice can't modify batch dim, at my_strided_slice");
+        "TensorRT does not allow modifications to the batch dimension, at "
+        "my_strided_slice");
   }
   {
+    // Dynamic batch size without end_mask, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "TensorRT does not allow modifications to the batch dimension, at "
+        "my_strided_slice");
+  }
+  {
+    // Dynamic batch size but using end_mask, ok.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef(/*begin_mask=*/0,
+                                                 /*end_mask=*/1);
+    AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("end", {4}, {0, 1, 2, 2});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
+    RunValidationAndConversion(node_def);
+  }
+// TRT 5.1+ supports strides
+#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
+  {
+    // Negative strides, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, -1});
+    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
+                               "Negative or zero stride values are not "
+                               "supported for StridedSlice, at "
+                               "my_strided_slice");
+  }
+#else
+  {
     // Stride is not 1, should fail.
     Reset();
     NodeDef node_def = get_strided_slice_nodedef();
     AddTestTensor("input", {1, 2, 3});
     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
     AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
-    AddTestWeights<int32>("strides", {4}, {1, 2, -1, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 2, 1, 3});
     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
-                               "StridedSlice is only implemented for stride of "
-                               "1, at my_strided_slice");
+                               "Strides other than 1 are not supported with "
+                               "this version of TRT, at my_strided_slice");
   }
-  {
-    // Begin out of bounds, should fail.
-    Reset();
-    NodeDef node_def = get_strided_slice_nodedef();
-    AddTestTensor("input", {1, 2, 3});
-    AddTestWeights<int32>("begin", {4}, {1, 2, 3, 4});
-    AddTestWeights<int32>("end", {4}, {0, 1, 2, 3});
-    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
-    RunValidationAndConversion(
-        node_def, error::INVALID_ARGUMENT,
-        "begin value of 2 for StridedSlice is invalid, must be in the range "
-        "[-dim_size(i), dim_size(i)], at my_strided_slice");
-  }
-  {
-    // End out of bounds, should fail.
-    Reset();
-    NodeDef node_def = get_strided_slice_nodedef();
-    AddTestTensor("input", {1, 2, 3});
-    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
-    AddTestWeights<int32>("end", {4}, {1, 2, 3, 4});
-    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
-    RunValidationAndConversion(
-        node_def, error::INVALID_ARGUMENT,
-        "end value of 2 for StridedSlice is invalid, must be in the range "
-        "[-dim_size(i), dim_size(i)], at my_strided_slice");
-  }
+#endif
   {
     // Size of sliced dim is negative, should fail.
     Reset();
@@ -2669,126 +2681,183 @@
     AddTestWeights<int32>("begin", {4}, {0, 0, 2, 0});
     AddTestWeights<int32>("end", {4}, {1, 1, 0, 3});
     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
-    RunValidationAndConversion(
-        node_def, error::INVALID_ARGUMENT,
-        "New size of sliced dimension is negative, at my_strided_slice");
+    RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
+                               "\"size\" cannot be negative or zero for "
+                               "StridedSlice, at my_strided_slice");
   }
 
   struct TestParams {
-    TestParams(const std::vector<int>& input_dims,
-               const std::vector<int>& expected_output_dims,
-               const std::vector<int>& begin, const std::vector<int>& end,
-               const std::vector<int>& begin_mask,
-               const std::vector<int>& end_mask,
-               const std::vector<int>& expected_output)
-        : input_dims(input_dims),
-          expected_output_dims(expected_output_dims),
-          begin(begin),
-          end(end),
-          expected_output(expected_output) {
-      // Masks are provided in terms of vectors for readability. Convert them to
-      // binary here.
-      this->begin_mask = 0;
-      for (int i = 0; i < begin_mask.size(); i++) {
-        if (begin_mask[i]) this->begin_mask |= (1 << i);
-      }
-      this->end_mask = 0;
-      for (int i = 0; i < end_mask.size(); i++) {
-        if (end_mask[i]) this->end_mask |= (1 << i);
-      }
-    }
-
     std::vector<int> input_dims;
-    std::vector<int> expected_output_dims;
     std::vector<int> begin;
     std::vector<int> end;
+    std::vector<int> strides;
     int begin_mask;
     int end_mask;
-    std::vector<int> expected_output;
+    std::vector<int> expected_output_dims;
+    std::vector<float> expected_output;
   };
 
+  auto get_mask = [](const std::vector<int>& mask) {
+    int result = 0;
+    for (int i = 0; i < mask.size(); i++) {
+      if (mask[i]) result += (1 << i);
+    }
+    return result;
+  };
+
+  // Same input is used for all tests.
+  const std::vector<float> ok_input = {1, 2, 3, 4, 5, 6};
+
+#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
+  const int kStridedSliceOKCases = 23;
+#else
+  const int kStridedSliceOKCases = 19;
+#endif
   // Ok.
-  const int kStridedSliceOKCases = 18;
   TestParams ok_params[kStridedSliceOKCases] = {
-      // 2D Crop.
-      TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2},
-                 /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 0, 1, 2},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 0, 0},
-                 /*expected_output=*/{1, 2}},
-      TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2},
-                 /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 0, 0, 0},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 1, 1},
-                 /*expected_output=*/{5, 6}},
-      TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2},
-                 /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 1, 2, 3},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 0, 0},
-                 /*expected_output=*/{5, 6}},
-      // 2D Crop, with transpose.
-      TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 2, 1},
-                 /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 2, 1},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 0, 0, 0},
-                 /*expected_output=*/{1, 2}},
-      TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 2, 1},
-                 /*begin=*/{0, 1, 1, 0}, /*end=*/{0, 2, 3, 1},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 0, 0, 0},
-                 /*expected_output=*/{5, 6}},
-      TestParams{/*input_dims=*/{2, 1, 3}, /*expected_output_dims=*/{1, 1, 2},
-                 /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 1, 2},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 0, 0, 0},
-                 /*expected_output=*/{1, 2}},
-      TestParams{/*input_dims=*/{2, 1, 3}, /*expected_output_dims=*/{1, 1, 2},
-                 /*begin=*/{0, 1, 0, 1}, /*end=*/{0, 2, 1, 3},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 0, 0, 0},
-                 /*expected_output=*/{5, 6}},
-      // 2D Crop, with reshape.
-      TestParams{/*input_dims=*/{2, 3}, /*expected_output_dims=*/{1, 2},
-                 /*begin=*/{0, 0, 0}, /*end=*/{0, 1, 2},
-                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 0, 0},
-                 /*expected_output=*/{1, 2}},
-      TestParams{/*input_dims=*/{2, 3}, /*expected_output_dims=*/{1, 2},
-                 /*begin=*/{0, 1, 1}, /*end=*/{0, 0, 0},
-                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 1, 1},
-                 /*expected_output=*/{5, 6}},
-      // 1D Crop.
-      TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 2, 2},
-                 /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 0, 0, 2},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 1, 0},
-                 /*expected_output=*/{1, 2, 4, 5}},
-      TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 3},
-                 /*begin=*/{0, 0, 1, 0}, /*end=*/{0, 0, 0, 0},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 1, 1},
-                 /*expected_output=*/{4, 5, 6}},
-      // 1D Crop, with transpose.
-      TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 3, 1},
-                 /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 0, 0},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 0, 1, 1},
-                 /*expected_output=*/{1, 2, 3}},
-      TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 3, 1},
-                 /*begin=*/{0, 1, 0, 0}, /*end=*/{0, 0, 0, 0},
-                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 1, 1},
-                 /*expected_output=*/{4, 5, 6}},
-      // 1D Crop, with reshape.
-      TestParams{/*input_dims=*/{6}, /*expected_output_dims=*/{3},
-                 /*begin=*/{0, 0}, /*end=*/{0, 3},
-                 /*begin_mask=*/{0, 0}, /*end_mask=*/{1, 0},
-                 /*expected_output=*/{1, 2, 3}},
-      TestParams{/*input_dims=*/{1, 6}, /*expected_output_dims=*/{1, 3},
-                 /*begin=*/{0, 0, 2}, /*end=*/{0, 0, 5},
-                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 1, 0},
-                 /*expected_output=*/{3, 4, 5}},
-      TestParams{/*input_dims=*/{6, 1}, /*expected_output_dims=*/{3, 1},
-                 /*begin=*/{0, 2, 0}, /*end=*/{0, 5, 0},
-                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 0, 1},
-                 /*expected_output=*/{3, 4, 5}},
-      // Negative axis.
-      TestParams{/*input_dims=*/{6, 1}, /*expected_output_dims=*/{3, 1},
-                 /*begin=*/{0, -6, 0}, /*end=*/{0, -3, 0},
-                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 0, 1},
-                 /*expected_output=*/{1, 2, 3}},
-      TestParams{/*input_dims=*/{6, 1}, /*expected_output_dims=*/{5, 1},
-                 /*begin=*/{0, 0, 0}, /*end=*/{0, -1, 0},
-                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 0, 1},
-                 /*expected_output=*/{1, 2, 3, 4, 5}},
+    // 2D Crop.
+    TestParams{/*input_dims=*/{1, 2, 3}, /*begin=*/{0, 0, 0, 0},
+               /*end=*/{0, 0, 1, 2}, /*strides=*/{1, 1, 1, 1},
+               /*begin_mask=*/get_mask({0, 0, 0, 0}),
+               /*end_mask=*/get_mask({1, 1, 0, 0}),
+               /*expected_output_dims=*/{1, 1, 2}, /*expected_output=*/{1, 2}},
+    TestParams{
+        /*input_dims=*/{1, 2, 3},
+        /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 0, 0, 0}, /*strides=*/{1, 1, 1, 1},
+        /*begin_mask=*/get_mask({0, 0, 0, 0}),
+        /*end_mask=*/get_mask({1, 1, 1, 1}), /*expected_output_dims=*/{1, 1, 2},
+        /*expected_output=*/{5, 6}},
+    TestParams{
+        /*input_dims=*/{1, 2, 3},
+        /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 1, 2, 3}, /*strides=*/{1, 1, 1, 1},
+        /*begin_mask=*/get_mask({0, 0, 0, 0}),
+        /*end_mask=*/get_mask({1, 1, 0, 0}), /*expected_output_dims=*/{1, 1, 2},
+        /*expected_output=*/{5, 6}},
+    // 2D Crop, with transpose.
+    TestParams{
+        /*input_dims=*/{2, 3, 1},
+        /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 2, 1}, /*strides=*/{1, 1, 1, 1},
+        /*begin_mask=*/get_mask({0, 0, 0, 0}),
+        /*end_mask=*/get_mask({1, 0, 0, 0}), /*expected_output_dims=*/{1, 2, 1},
+        /*expected_output=*/{1, 2}},
+    TestParams{
+        /*input_dims=*/{2, 3, 1},
+        /*begin=*/{0, 1, 1, 0}, /*end=*/{0, 2, 3, 1}, /*strides=*/{1, 1, 1, 1},
+        /*begin_mask=*/get_mask({0, 0, 0, 0}),
+        /*end_mask=*/get_mask({1, 0, 0, 0}), /*expected_output_dims=*/{1, 2, 1},
+        /*expected_output=*/{5, 6}},
+    TestParams{
+        /*input_dims=*/{2, 1, 3},
+        /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 1, 2}, /*strides=*/{1, 1, 1, 1},
+        /*begin_mask=*/get_mask({0, 0, 0, 0}),
+        /*end_mask=*/get_mask({1, 0, 0, 0}), /*expected_output_dims=*/{1, 1, 2},
+        /*expected_output=*/{1, 2}},
+    TestParams{
+        /*input_dims=*/{2, 1, 3},
+        /*begin=*/{0, 1, 0, 1}, /*end=*/{0, 2, 1, 3}, /*strides=*/{1, 1, 1, 1},
+        /*begin_mask=*/get_mask({0, 0, 0, 0}),
+        /*end_mask=*/get_mask({1, 0, 0, 0}), /*expected_output_dims=*/{1, 1, 2},
+        /*expected_output=*/{5, 6}},
+    // 2D Crop, with reshape.
+    TestParams{/*input_dims=*/{2, 3},
+               /*begin=*/{0, 0, 0}, /*end=*/{0, 1, 2}, /*strides=*/{1, 1, 1},
+               /*begin_mask=*/get_mask({0, 0, 0}),
+               /*end_mask=*/get_mask({1, 0, 0}),
+               /*expected_output_dims=*/{1, 2},
+               /*expected_output=*/{1, 2}},
+    TestParams{/*input_dims=*/{2, 3},
+               /*begin=*/{0, 1, 1}, /*end=*/{0, 0, 0}, /*strides=*/{1, 1, 1},
+               /*begin_mask=*/get_mask({0, 0, 0}),
+               /*end_mask=*/get_mask({1, 1, 1}),
+               /*expected_output_dims=*/{1, 2},
+               /*expected_output=*/{5, 6}},
+    // 1D Crop.
+    TestParams{
+        /*input_dims=*/{1, 2, 3},
+        /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 0, 0, 2}, /*strides=*/{1, 1, 1, 1},
+        /*begin_mask=*/get_mask({0, 0, 0, 0}),
+        /*end_mask=*/get_mask({1, 1, 1, 0}), /*expected_output_dims=*/{1, 2, 2},
+        /*expected_output=*/{1, 2, 4, 5}},
+    TestParams{
+        /*input_dims=*/{1, 2, 3},
+        /*begin=*/{0, 0, 1, 0}, /*end=*/{0, 0, 0, 0}, /*strides=*/{1, 1, 1, 1},
+        /*begin_mask=*/get_mask({0, 0, 0, 0}),
+        /*end_mask=*/get_mask({1, 1, 1, 1}), /*expected_output_dims=*/{1, 1, 3},
+        /*expected_output=*/{4, 5, 6}},
+    // 1D Crop, with transpose.
+    TestParams{
+        /*input_dims=*/{2, 3, 1},
+        /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 0, 0}, /*strides=*/{1, 1, 1, 1},
+        /*begin_mask=*/get_mask({0, 0, 0, 0}),
+        /*end_mask=*/get_mask({1, 0, 1, 1}), /*expected_output_dims=*/{1, 3, 1},
+        /*expected_output=*/{1, 2, 3}},
+    TestParams{
+        /*input_dims=*/{2, 3, 1},
+        /*begin=*/{0, 1, 0, 0}, /*end=*/{0, 0, 0, 0}, /*strides=*/{1, 1, 1, 1},
+        /*begin_mask=*/get_mask({0, 0, 0, 0}),
+        /*end_mask=*/get_mask({1, 1, 1, 1}), /*expected_output_dims=*/{1, 3, 1},
+        /*expected_output=*/{4, 5, 6}},
+    // 1D Crop, with reshape.
+    TestParams{/*input_dims=*/{6},
+               /*begin=*/{0, 0}, /*end=*/{0, 3}, /*strides=*/{1, 1},
+               /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}),
+               /*expected_output_dims=*/{3},
+               /*expected_output=*/{1, 2, 3}},
+    TestParams{/*input_dims=*/{1, 6},
+               /*begin=*/{0, 0, 2}, /*end=*/{0, 0, 5}, /*strides=*/{1, 1, 1},
+               /*begin_mask=*/get_mask({0, 0, 0}),
+               /*end_mask=*/get_mask({1, 1, 0}),
+               /*expected_output_dims=*/{1, 3},
+               /*expected_output=*/{3, 4, 5}},
+    TestParams{/*input_dims=*/{6, 1},
+               /*begin=*/{0, 2, 0}, /*end=*/{0, 5, 0}, /*strides=*/{1, 1, 1},
+               /*begin_mask=*/get_mask({0, 0, 0}),
+               /*end_mask=*/get_mask({1, 0, 1}),
+               /*expected_output_dims=*/{3, 1},
+               /*expected_output=*/{3, 4, 5}},
+    // Negative axis.
+    TestParams{/*input_dims=*/{6, 1},
+               /*begin=*/{0, -6, 0}, /*end=*/{0, -3, 0}, /*strides=*/{1, 1, 1},
+               /*begin_mask=*/get_mask({0, 0, 0}),
+               /*end_mask=*/get_mask({1, 0, 1}),
+               /*expected_output_dims=*/{3, 1},
+               /*expected_output=*/{1, 2, 3}},
+    TestParams{/*input_dims=*/{6, 1},
+               /*begin=*/{0, 0, 0}, /*end=*/{0, -1, 0}, /*strides=*/{1, 1, 1},
+               /*begin_mask=*/get_mask({0, 0, 0}),
+               /*end_mask=*/get_mask({1, 0, 1}),
+               /*expected_output_dims=*/{5, 1},
+               /*expected_output=*/{1, 2, 3, 4, 5}},
+    // Clamp out of bounds begin and end.
+    TestParams{/*input_dims=*/{1, 2, 3}, /*begin=*/{0, 0, -9999, -9},
+               /*end=*/{0, 1, 1000, 4}, /*strides=*/{1, 1, 1, 1},
+               /*begin_mask=*/get_mask({0, 0, 0, 0}),
+               /*end_mask=*/get_mask({1, 0, 0, 0}),
+               /*expected_output_dims=*/{1, 2, 3},
+               /*expected_output=*/{1, 2, 3, 4, 5, 6}},
+#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
+    // Strides
+    TestParams{/*input_dims=*/{6},
+               /*begin=*/{0, 0}, /*end=*/{0, 5}, /*strides=*/{1, 2},
+               /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}),
+               /*expected_output_dims=*/{3},
+               /*expected_output=*/{1, 3, 5}},
+    TestParams{/*input_dims=*/{6},
+               /*begin=*/{0, 0}, /*end=*/{0, 6}, /*strides=*/{1, 2},
+               /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}),
+               /*expected_output_dims=*/{3},
+               /*expected_output=*/{1, 3, 5}},
+    TestParams{/*input_dims=*/{6},
+               /*begin=*/{0, 1}, /*end=*/{0, 6}, /*strides=*/{1, 2},
+               /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}),
+               /*expected_output_dims=*/{3},
+               /*expected_output=*/{2, 4, 6}},
+    TestParams{/*input_dims=*/{6},
+               /*begin=*/{0, 2}, /*end=*/{0, 6}, /*strides=*/{1, 3},
+               /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}),
+               /*expected_output_dims=*/{2},
+               /*expected_output=*/{3, 6}},
+#endif
   };
 
   for (int i = 0; i < kStridedSliceOKCases; i++) {
@@ -2801,16 +2870,18 @@
                           ok_params[i].begin);
     AddTestWeights<int32>("end", {static_cast<int>(ok_params[i].end.size())},
                           ok_params[i].end);
-    std::vector<int> strides(ok_params[i].input_dims.size(), 1);
-    AddTestWeights<int32>("strides", {static_cast<int>(strides.size())},
-                          strides);
+    AddTestWeights<int32>("strides",
+                          {static_cast<int>(ok_params[i].strides.size())},
+                          ok_params[i].strides);
     RunValidationAndConversion(node_def);
 
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_strided_slice", &output));
+    EXPECT_TRUE(output.is_tensor());
+    ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
+                             output.tensor()->getDimensions());
 
-    const DataVec input_data{
-        {"input", test::AsTensor<float>({1, 2, 3, 4, 5, 6})}};
+    const DataVec input_data{{"input", test::AsTensor<float>(ok_input)}};
     DataVec output_data{
         {"my_strided_slice",
          ConstructTensor<float>(ok_params[i].expected_output.size())}};
@@ -2820,6 +2891,148 @@
   }
 }
 
+TEST_F(OpConverterTest, ConvertSlice) {
+  // Get nodedef for Slice layer.
+  auto get_slice_nodedef = []() -> NodeDef {
+    Scope s = Scope::NewRootScope();
+    auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
+    auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
+    auto size = ops::Placeholder(s.WithOpName("size"), DT_INT32);
+    auto slice = ops::Slice(s.WithOpName("my_slice"), input, begin, size);
+    return slice.operation.node()->def();
+  };
+
+  {
+    // Begin is below bounds, should fail.
+    Reset();
+    NodeDef node_def = get_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, -1, 0});
+    AddTestWeights<int32>("size", {4}, {1, 1, 2, 3});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "\"begin\" for dimension 2 in Slice is out of range, at my_slice");
+  }
+  {
+    // Begin is above bounds, should fail.
+    Reset();
+    NodeDef node_def = get_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 3, 0});
+    AddTestWeights<int32>("size", {4}, {1, 1, 2, 3});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "\"begin\" for dimension 2 in Slice is out of range, at my_slice");
+  }
+  {
+    // Size is below bounds, should fail.
+    Reset();
+    NodeDef node_def = get_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("size", {4}, {1, 1, 2, -2});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "\"begin\" + \"size\" for dimension 3 in Slice is out of range, at "
+        "my_slice");
+  }
+  {
+    // Size is above bounds, should fail.
+    Reset();
+    NodeDef node_def = get_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("size", {4}, {1, 1, 3, 3});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "\"begin\" + \"size\" for dimension 2 in Slice is out of range, at "
+        "my_slice");
+  }
+  {
+    // Modify batch dim, should fail.
+    Reset();
+    NodeDef node_def = get_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("size", {4}, {0, 1, 2, 3});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "TensorRT does not allow modifications to the batch dimension, at "
+        "my_slice");
+  }
+  {
+    // Dynamic batch size with size[0] not -1, should fail.
+    Reset();
+    NodeDef node_def = get_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("size", {4}, {1, 1, 2, 3});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "TensorRT does not allow modifications to the batch dimension, at "
+        "my_slice");
+  }
+  {
+    // Dynamic batch size but using size[0] of -1, ok.
+    Reset();
+    NodeDef node_def = get_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("size", {4}, {-1, 1, 2, 2});
+    RunValidationAndConversion(node_def);
+  }
+
+  struct TestParams {
+    std::vector<int> input_dims;
+    std::vector<int> begin;
+    std::vector<int> size;
+    std::vector<int> expected_output_dims;
+    std::vector<int> expected_output;
+  };
+
+  // Ok.
+  const int kSliceOKCases = 5;
+  TestParams ok_params[kSliceOKCases] = {
+      TestParams{{1, 2, 3},
+                 {0, 0, 0, 0},
+                 {-1, -1, -1, -1},
+                 {1, 2, 3},
+                 {1, 2, 3, 4, 5, 6}},
+      TestParams{
+          {1, 2, 3}, {0, 0, 0, 0}, {1, 1, 2, 3}, {1, 2, 3}, {1, 2, 3, 4, 5, 6}},
+      TestParams{
+          {1, 2, 3}, {0, 0, 0, 0}, {1, -1, 2, 2}, {1, 2, 2}, {1, 2, 4, 5}},
+      TestParams{{6}, {0, 1}, {1, 5}, {5}, {2, 3, 4, 5, 6}},
+      TestParams{{6}, {0, 1}, {-1, 3}, {3}, {2, 3, 4}},
+  };
+
+  for (int i = 0; i < kSliceOKCases; i++) {
+    Reset();
+    NodeDef node_def = get_slice_nodedef();
+    AddTestTensor("input", ok_params[i].input_dims);
+    AddTestWeights<int32>("begin",
+                          {static_cast<int>(ok_params[i].begin.size())},
+                          ok_params[i].begin);
+    AddTestWeights<int32>("size", {static_cast<int>(ok_params[i].size.size())},
+                          ok_params[i].size);
+    RunValidationAndConversion(node_def);
+
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(GetTensorOrWeights("my_slice", &output));
+    EXPECT_TRUE(output.is_tensor());
+    ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
+                             output.tensor()->getDimensions());
+
+    const DataVec input_data{
+        {"input", test::AsTensor<float>({1, 2, 3, 4, 5, 6})}};
+    DataVec output_data{{"my_slice", ConstructTensor<float>(
+                                         ok_params[i].expected_output.size())}};
+    BuildAndRun(input_data, &output_data);
+    EXPECT_THAT(GetSpanForData<float>(output_data[0]),
+                ElementsAreArray(ok_params[i].expected_output));
+  }
+}
+
 TEST_F(OpConverterTest, ConvertConv2D) {
   {
     // Input list is empty, should fail.
@@ -2956,27 +3169,6 @@
   }
 
   struct TestParams {
-    TestParams(const std::vector<int>& input_dims,
-               const std::vector<float>& input,
-               const std::vector<int>& filter_dims,
-               const std::vector<float>& filter,
-               const std::vector<int>& strides, const string& padding,
-               const string& data_format, const std::vector<int>& dilations,
-               bool is_conv2d_backprop_input,
-               const std::vector<int>& expected_output_dims,
-               const std::vector<float>& expected_output)
-        : input_dims(input_dims),
-          input(input),
-          filter_dims(filter_dims),
-          filter(filter),
-          strides(strides),
-          padding(padding),
-          data_format(data_format),
-          dilations(dilations),
-          is_conv2d_backprop_input(is_conv2d_backprop_input),
-          expected_output_dims(expected_output_dims),
-          expected_output(expected_output) {}
-
     std::vector<int> input_dims;
     std::vector<float> input;
     std::vector<int> filter_dims;
@@ -3163,6 +3355,294 @@
   }
 }
 
+template <DataType dtype>
+void TestConvertGather(OpConverterTest* test) {
+  typedef typename EnumToDataType<dtype>::Type CType;
+
+  // Get the NodeDef for GatherV2.
+  Scope s = Scope::NewRootScope();
+  auto params = ops::Placeholder(s.WithOpName("params"), dtype);
+  auto indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32);
+  auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
+  auto gather = ops::GatherV2(s.WithOpName("my_gather"), params, indices, axis);
+  const NodeDef& node_def = gather.operation.node()->def();
+
+  struct TestParams {
+    std::vector<int> params_dims;
+    std::vector<int> indices_dims;
+    std::vector<int> indices;
+    int axis;
+    std::vector<int> expected_output_dims;
+    std::vector<int> expected_output;
+  };
+
+  // Input is the same {1, 2, 3, 4, 5, 6} for all cases.
+  const int kGatherOKCases = 5;
+  TestParams ok_params[kGatherOKCases] = {
+      // Vector indices (output is rank(params)).
+      TestParams{{1, 2, 3}, {1}, {0}, 3, {1, 2, 1}, {1, 4}},
+      TestParams{{1, 2, 3}, {1}, {1}, 3, {1, 2, 1}, {2, 5}},
+      TestParams{{1, 2, 3}, {1}, {2}, -1, {1, 2, 1}, {3, 6}},
+      TestParams{{1, 2, 3}, {3}, {2, 0, 1}, 3, {1, 2, 3}, {3, 1, 2, 6, 4, 5}},
+      // Higher rank indices (output is rank(params) + rank(indices) - 1).
+      TestParams{{1, 2, 3}, {1, 1}, {0}, 2, {1, 1, 1, 3}, {1, 2, 3}},
+  };
+
+  // Ok.
+  for (int i = 0; i < kGatherOKCases; i++) {
+    test->Reset();
+    test->AddTestTensor("params", ok_params[i].params_dims, 1,
+                        TfDataTypeToTrt(dtype));
+    test->AddTestTensor("indices", ok_params[i].indices_dims, 1,
+                        nvinfer1::DataType::kINT32);
+    test->AddTestWeights<int32>("axis", {1}, {ok_params[i].axis});
+    test->RunValidationAndConversion(node_def);
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(test->GetTensorOrWeights("my_gather", &output));
+    EXPECT_TRUE(output.is_tensor());
+    ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
+                             output.tensor()->getDimensions());
+
+    // Create input in CType and convert expected output to CType.
+    std::vector<CType> inputs = {CType(1), CType(2), CType(3),
+                                 CType(4), CType(5), CType(6)};
+    std::vector<CType> converted_expected_output(
+        ok_params[i].expected_output.begin(),
+        ok_params[i].expected_output.end());
+
+    const DataVec input_data{
+        {"params", test::AsTensor<CType>(inputs)},
+        {"indices", test::AsTensor<int32>(ok_params[i].indices)}};
+    DataVec output_data{
+        {"my_gather",
+         ConstructTensor<CType>(ok_params[i].expected_output.size())}};
+    test->BuildAndRun(input_data, &output_data);
+    EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
+                ElementsAreArray(converted_expected_output));
+  }
+}
+
+TEST_F(OpConverterTest, ConvertGather) {
+  {
+    // Input list is empty, should fail.
+    NodeDef node_def = MakeNodeDef("my_gather", "GatherV2", {});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "GatherV2 got 0 inputs but expected 3, at my_gather");
+  }
+
+  // Get the NodeDef for GatherV2.
+  Scope s = Scope::NewRootScope();
+  auto params = ops::Placeholder(s.WithOpName("params"), DT_FLOAT);
+  auto indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32);
+  auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
+  auto gather = ops::GatherV2(s.WithOpName("my_gather"), params, indices, axis);
+  const NodeDef& node_def = gather.operation.node()->def();
+  {
+    // Axis is a tensor, should fail.
+    Reset();
+    AddTestTensor("params", {1, 2, 3});
+    AddTestTensor("indices", {2});
+    AddTestTensor("axis", {1});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "The input \"axis\" for GatherV2 must be a constant, at my_gather");
+  }
+  {
+    // Axis is out of bounds, should fail.
+    Reset();
+    AddTestTensor("params", {1, 2, 3});
+    AddTestTensor("indices", {2});
+    AddTestWeights<int32>("axis", {1}, {4});
+    RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
+                               "Axis value of 4 is out of bounds, must be in "
+                               "range [-4, 4), at my_gather");
+  }
+  {
+    // Axis is batch dimension, should fail.
+    Reset();
+    AddTestTensor("params", {1, 2, 3});
+    AddTestTensor("indices", {2});
+    AddTestWeights<int32>("axis", {1}, {0});
+    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
+                               "TensorRT does not allow manipulation of the "
+                               "batch dimension, at my_gather");
+  }
+
+  Reset();
+  TestConvertGather<DT_FLOAT>(this);
+  TestConvertGather<DT_HALF>(this);
+  TestConvertGather<DT_INT32>(this);
+}
+
+TEST_F(OpConverterTest, ConvertUnary) {
+  {
+    // Input list is empty, should fail.
+    NodeDef node_def = MakeNodeDef("my_unary", "Neg", {});
+    RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
+                               "Neg got 0 inputs but expected 1, at my_unary");
+  }
+  {
+    // Input is weights, should fail.
+    Reset();
+    Scope s = Scope::NewRootScope();
+    auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
+    auto neg = ops::Neg(s.WithOpName("my_unary"), input);
+    const NodeDef& node_def = neg.operation.node()->def();
+    AddTestWeights<float>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "The input \"x\" for Neg must be a tensor, at my_unary");
+  }
+
+  // Get nodedef for unary layer.
+  auto get_unary_nodedef = [](string op_name) -> NodeDef {
+    Scope s = Scope::NewRootScope();
+    auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
+    if (op_name == "Abs") {
+      auto unary = ops::Abs(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Acos") {
+      auto unary = ops::Acos(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Acosh") {
+      auto unary = ops::Acosh(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Asin") {
+      auto unary = ops::Asin(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Asinh") {
+      auto unary = ops::Asinh(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Atan") {
+      auto unary = ops::Atan(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Atanh") {
+      auto unary = ops::Atanh(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Ceil") {
+      auto unary = ops::Ceil(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Cos") {
+      auto unary = ops::Cos(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Cosh") {
+      auto unary = ops::Cosh(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Exp") {
+      auto unary = ops::Exp(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Floor") {
+      auto unary = ops::Floor(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Log") {
+      auto unary = ops::Log(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Neg") {
+      auto unary = ops::Neg(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Reciprocal") {
+      auto unary = ops::Reciprocal(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Rsqrt") {
+      auto unary = ops::Rsqrt(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Sin") {
+      auto unary = ops::Sin(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Sinh") {
+      auto unary = ops::Sinh(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Sqrt") {
+      auto unary = ops::Sqrt(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    } else if (op_name == "Tan") {
+      auto unary = ops::Tan(s.WithOpName("my_unary"), input);
+      return unary.operation.node()->def();
+    }
+    EXPECT_TRUE(false);
+    return NodeDef();
+  };
+  // Get expected output for unary layer.
+  auto get_unary_output = [](string op_name, float input) -> float {
+    if (op_name == "Abs") {
+      return std::abs(input);
+    } else if (op_name == "Acos") {
+      return std::acos(input);
+    } else if (op_name == "Acosh") {
+      return std::acosh(input);
+    } else if (op_name == "Asin") {
+      return std::asin(input);
+    } else if (op_name == "Asinh") {
+      return std::asinh(input);
+    } else if (op_name == "Atan") {
+      return std::atan(input);
+    } else if (op_name == "Atanh") {
+      return std::atanh(input);
+    } else if (op_name == "Ceil") {
+      return std::ceil(input);
+    } else if (op_name == "Cos") {
+      return std::cos(input);
+    } else if (op_name == "Cosh") {
+      return std::cosh(input);
+    } else if (op_name == "Exp") {
+      return std::exp(input);
+    } else if (op_name == "Floor") {
+      return std::floor(input);
+    } else if (op_name == "Log") {
+      return std::log(input);
+    } else if (op_name == "Neg") {
+      return -input;
+    } else if (op_name == "Reciprocal") {
+      return 1.0 / input;
+    } else if (op_name == "Rsqrt") {
+      return 1.0 / std::sqrt(input);
+    } else if (op_name == "Sin") {
+      return std::sin(input);
+    } else if (op_name == "Sinh") {
+      return std::sinh(input);
+    } else if (op_name == "Sqrt") {
+      return std::sqrt(input);
+    } else if (op_name == "Tan") {
+      return std::tan(input);
+    }
+    EXPECT_TRUE(false);
+    return 0;
+  };
+
+  // Get list of ops to test.
+  std::vector<string> ops_to_test;
+  // Add all ops supported by ConvertUnary.
+  auto* map = UnaryOperationMap();
+  ops_to_test.reserve(map->size());
+  for (auto& pair : *map) {
+    ops_to_test.push_back(pair.first);
+  }
+  // Add other unary ops to test.
+  ops_to_test.push_back("Rsqrt");
+  // Ok.
+  for (string op_name : ops_to_test) {
+    Reset();
+    NodeDef node_def = get_unary_nodedef(op_name);
+    AddTestTensor("input", {1, 2, 3});
+    RunValidationAndConversion(node_def);
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(GetTensorOrWeights("my_unary", &output));
+    EXPECT_TRUE(output.is_tensor());
+    ExpectTrtDimsEqualsArray({1, 2, 3}, output.tensor()->getDimensions());
+
+    const std::vector<float> input = {-0.9f, 0.6f, 0.0f, -3.5f, 100.0f, 2.9f};
+    const DataVec input_data{{"input", test::AsTensor<float>(input)}};
+    DataVec output_data{{"my_unary", ConstructTensor<float>(6)}};
+    BuildAndRun(input_data, &output_data);
+    for (int i = 0; i < input.size(); ++i) {
+      const float expected_output = get_unary_output(op_name, input[i]);
+      EXPECT_THAT(GetSpanForData<float>(output_data[0])[i],
+                  NanSensitiveFloatNear(expected_output, 0.0001));
+    }
+  }
+}
+
 }  // namespace convert
 }  // namespace tensorrt
 }  // namespace tensorflow

diff --git a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc
index f36aa55..0eedfca 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc

@@ -87,7 +87,7 @@
     LOG(INFO) << offset << "type             = " << cluster->type();
     LOG(INFO) << offset << "num warmup steps = " << cluster->NumWarmupSteps();
     const auto dev_names = cluster->GetDeviceNames();
-    if (dev_names.size()) {
+    if (!dev_names.empty()) {
       LOG(INFO) << offset << " Device names:";
       for (const auto s : dev_names) {
         LOG(INFO) << offset2 << s;
@@ -103,7 +103,7 @@
     }
 
     const auto dev_props = cluster->GetDevices();
-    if (dev_props.size()) {
+    if (!dev_props.empty()) {
       LOG(INFO) << offset << "Device properties:";
       for (auto k : dev_props) {
         LOG(INFO) << offset2 << k.first;
@@ -131,7 +131,7 @@
     }
   }
   LOG(INFO) << "item: " << item.id;
-  if (item.feed.size()) {
+  if (!item.feed.empty()) {
     LOG(INFO) << offset << "Feeds  :";
     for (const auto& f : item.feed) {
       const auto& shape = f.second.shape();
@@ -140,7 +140,7 @@
   } else {
     LOG(INFO) << offset << "No Feeds";
   }
-  if (item.fetch.size()) {
+  if (!item.fetch.empty()) {
     LOG(INFO) << offset << "Fetches  :";
     for (const auto& f : item.fetch) {
       LOG(INFO) << offset2 << f;
@@ -149,7 +149,7 @@
     LOG(INFO) << offset << "No Fetches";
   }
 
-  if (item.init_ops.size()) {
+  if (!item.init_ops.empty()) {
     LOG(INFO) << offset << "init ops  :";
     for (const auto& f : item.init_ops) {
       LOG(INFO) << offset2 << f;
@@ -160,7 +160,7 @@
   LOG(INFO) << "Save Op = " << item.save_op;
   LOG(INFO) << "Restore Op = " << item.restore_op;
   LOG(INFO) << "save_restore_loc_tensor = " << item.save_restore_loc_tensor;
-  if (item.keep_ops.size()) {
+  if (!item.keep_ops.empty()) {
     LOG(INFO) << offset << "keep ops  :";
     for (const auto& f : item.keep_ops) {
       LOG(INFO) << offset2 << f;
@@ -197,7 +197,7 @@
     PrintDebugInfo(cluster, item);
   }
   int max_dim = -1;
-  if (item.feed.size()) {
+  if (!item.feed.empty()) {
     for (const auto& f : item.feed) {
       const auto& shape = f.second.shape();
       if (shape.dims() > 0) {

diff --git a/tensorflow/compiler/tf2tensorrt/kernels/get_serialized_resource_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/get_serialized_resource_op.cc
index eae1f8e..81406b6 100644
--- a/tensorflow/compiler/tf2tensorrt/kernels/get_serialized_resource_op.cc
+++ b/tensorflow/compiler/tf2tensorrt/kernels/get_serialized_resource_op.cc

@@ -13,9 +13,6 @@
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_TF2TENSORRT_KERNELS_GET_SERIALIZED_RESOURCE_OP_H_
-#define TENSORFLOW_COMPILER_TF2TENSORRT_KERNELS_GET_SERIALIZED_RESOURCE_OP_H_
-
 #include <memory>
 #include <vector>
 
@@ -70,4 +67,3 @@
 
 #endif  // GOOGLE_TENSORRT
 #endif  // GOOGLE_CUDA
-#endif  // TENSORFLOW_COMPILER_TF2TENSORRT_KERNELS_GET_SERIALIZED_RESOURCE_OP_H_

diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
index bc5335e..f6d387c 100644
--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc

@@ -25,7 +25,6 @@
 #include "tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h"
 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
 #include "tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h"
-#include "tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h"
 #include "tensorflow/compiler/tf2tensorrt/utils/trt_resources.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph_to_functiondef.h"
@@ -216,8 +215,8 @@
                  context->GetAttr("use_calibration", &use_calibration_));
   calibration_mode_ =
       (use_calibration_ && precision_mode_ == TrtPrecisionMode::INT8 &&
-       calibration_data.size() == 0);
-  if (calibration_data.size()) {
+       calibration_data.empty());
+  if (!calibration_data.empty()) {
     calibrator_.reset(new TRTInt8Calibrator(calibration_data));
     calibration_data.resize(0);
   }
@@ -254,6 +253,7 @@
   opts.rendezvous = ctx->rendezvous();
   opts.cancellation_manager = ctx->cancellation_manager();
   opts.runner = ctx->runner();
+  inputs.reserve(ctx->num_inputs());
   for (int i = 0; i < ctx->num_inputs(); i++) {
     inputs.push_back(ctx->input(i));
   }
@@ -294,27 +294,6 @@
             return this->AllocateCalibrationResources(ctx, cr);
           }}));
   tensorflow::core::ScopedUnref calib_sc(calib_res);
-  // TODO(aaroey): here we also add the resource to the ResourceMgr singleton.
-  // This is needed before we migrate all uses of calib_graph_to_infer_graph()
-  // to the new calibration workflow. After that we'll remove this block.
-  {
-    auto deprecated_rm =
-        TRTResourceManager::instance()->getManager("TRTCalibration");
-    TRTCalibrationResource* copied_resource = nullptr;
-    // Check whether the resource exists, and create it if not.
-    if (deprecated_rm->Lookup(funcdef_name_, "Calibrator", &copied_resource)
-            .ok()) {
-      // Do nothing if the resource exists.
-      copied_resource->Unref();
-    } else {
-      copied_resource = calib_res;
-      // Increase the refcount by 1 then transfer the ownership of that refcount
-      // to the ResourceMgr singleton.
-      copied_resource->Ref();
-      OP_REQUIRES_OK(ctx, deprecated_rm->Create(funcdef_name_, "Calibrator",
-                                                copied_resource));
-    }
-  }
   int num_inputs = ctx->num_inputs();
   // Pass input data to calibrator
   std::unordered_map<string, void*> input_data;
@@ -385,8 +364,9 @@
   }
   // Get shapes of inputs to engine.
   std::vector<tensorflow::TensorShape> input_shapes;
+  input_shapes.reserve(ctx->num_inputs());
   for (int i = 0; i < ctx->num_inputs(); ++i) {
-    input_shapes.emplace_back(ctx->input(i).shape());
+    input_shapes.push_back(ctx->input(i).shape());
   }
   EngineContext* engine_context = GetEngine(input_shapes, ctx);
   if (!engine_context->cuda_engine) {
@@ -433,7 +413,8 @@
     auto dtype = cuda_engine->getBindingDataType(binding_index);
     switch (dtype) {
       case nvinfer1::DataType::kFLOAT:
-        buffers[binding_index] = (void*)(input_tensor.flat<float>().data());
+        buffers[binding_index] =
+            const_cast<float*>(input_tensor.flat<float>().data());
         break;
       case nvinfer1::DataType::kHALF:
         LOG(ERROR) << "FP16 inputs are not supported yet!";
@@ -442,10 +423,11 @@
         LOG(ERROR) << "INT8 inputs are not supported yet!";
         return kRetry;
       case nvinfer1::DataType::kINT32:
-        buffers[binding_index] = (void*)(input_tensor.flat<int32>().data());
+        buffers[binding_index] =
+            const_cast<int32*>(input_tensor.flat<int32>().data());
         break;
       default:
-        LOG(ERROR) << "Unknown TRT data type: " << int(dtype);
+        LOG(ERROR) << "Unknown TRT data type: " << static_cast<int>(dtype);
         return kRetry;
     }
   }
@@ -484,7 +466,7 @@
     switch (dtype) {
       case nvinfer1::DataType::kFLOAT:
         buffers[binding_index] =
-            reinterpret_cast<void*>(output_tensor->flat<float>().data());
+            const_cast<float*>(output_tensor->flat<float>().data());
         break;
       case nvinfer1::DataType::kHALF:
         LOG(WARNING) << "half size is not supported yet!";
@@ -494,7 +476,7 @@
         return kRetry;
       case nvinfer1::DataType::kINT32:
         buffers[binding_index] =
-            reinterpret_cast<void*>(output_tensor->flat<int32>().data());
+            const_cast<int32*>(output_tensor->flat<int32>().data());
         break;
       default:
         LOG(WARNING) << "Unknown TRT data type: " << static_cast<int>(dtype);
@@ -616,11 +598,11 @@
     LOG(INFO) << "Building a new TensorRT engine for " << name()
               << " input shapes: "
               << TensorShapeUtils::ShapeListString(engine_input_shapes);
+
     // Convert to partial shapes
-    std::vector<PartialTensorShape> partial_shapes;
-    for (int i = 0; i < engine_input_shapes.size(); i++) {
-      partial_shapes.emplace_back(engine_input_shapes[i]);
-    }
+    std::vector<PartialTensorShape> partial_shapes(engine_input_shapes.begin(),
+                                                   engine_input_shapes.end());
+
     // Up to this point, calibrator_ can never be empty, since otherwise it
     // means calibration_mode_ is true and this path won't get executed.
     auto status = convert::ConvertGraphDefToEngine(

diff --git a/tensorflow/compiler/tf2tensorrt/python/ops/trt_ops.py b/tensorflow/compiler/tf2tensorrt/python/ops/trt_ops.py
index 86bfabf..25fb3a1 100644
--- a/tensorflow/compiler/tf2tensorrt/python/ops/trt_ops.py
+++ b/tensorflow/compiler/tf2tensorrt/python/ops/trt_ops.py

@@ -18,17 +18,52 @@
 from __future__ import division
 from __future__ import print_function
 
+import threading
+
 import platform
+from tensorflow.python.framework import errors
 
-if platform.system() != "Windows":
-  # pylint: disable=wildcard-import,unused-import,g-import-not-at-top
-  from tensorflow.compiler.tf2tensorrt.ops.gen_trt_ops import *
+_trt_ops_so = None
+_module_lock = threading.Lock()
 
-  from tensorflow.python.framework import load_library
-  from tensorflow.python.platform import resource_loader
-  # pylint: enable=wildcard-import,unused-import,g-import-not-at-top
 
-  _trt_ops = load_library.load_op_library(
-      resource_loader.get_path_to_datafile("_trt_ops.so"))
-else:
-  raise RuntimeError("Windows platforms are not supported")
+def load_trt_ops():
+  """Load TF-TRT op libraries so if it hasn't been loaded already."""
+  global _trt_ops_so
+
+  if platform.system() == "Windows":
+    raise RuntimeError("Windows platforms are not supported")
+
+  with _module_lock:
+    if _trt_ops_so:
+      return
+
+    try:
+      # pylint: disable=g-import-not-at-top,unused-variable
+      # This registers the TRT ops, it doesn't require loading TRT library.
+      from tensorflow.compiler.tf2tensorrt.ops.gen_trt_ops import trt_engine_op
+      # pylint: enable=g-import-not-at-top,unused-variable
+    except ImportError as e:
+      print("**** Failed to import TF-TRT ops. This is because the binary was "
+            "not built with CUDA or TensorRT enabled. ****")
+      raise e
+
+    # TODO(laigd): we should load TF-TRT kernels here as well after removing the
+    # swig binding.
+    try:
+      # pylint: disable=g-import-not-at-top
+      from tensorflow.python.framework import load_library
+      from tensorflow.python.platform import resource_loader
+      # pylint: enable=g-import-not-at-top
+
+      _trt_ops_so = load_library.load_op_library(
+          resource_loader.get_path_to_datafile("_trt_ops.so"))
+    except errors.NotFoundError as e:
+      no_trt_message = (
+          "**** Failed to initialize TensorRT. This is either because the "
+          "TensorRT installation path is not in LD_LIBRARY_PATH, or because "
+          "you do not have it installed. If not installed, please go to "
+          "https://developer.nvidia.com/tensorrt to download and install "
+          "TensorRT ****")
+      print(no_trt_message)
+      raise e

diff --git a/tensorflow/compiler/tf2tensorrt/segment/segment.cc b/tensorflow/compiler/tf2tensorrt/segment/segment.cc
index 4a8a4ac..3794929 100644
--- a/tensorflow/compiler/tf2tensorrt/segment/segment.cc
+++ b/tensorflow/compiler/tf2tensorrt/segment/segment.cc

@@ -30,6 +30,9 @@
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/types.h"
 
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
 namespace tensorflow {
 namespace tensorrt {
 namespace segment {
@@ -725,3 +728,6 @@
 }  // namespace segment
 }  // namespace tensorrt
 }  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA

diff --git a/tensorflow/compiler/tf2tensorrt/segment/segment.h b/tensorflow/compiler/tf2tensorrt/segment/segment.h
index 9a0ccc9..9622ddd 100644
--- a/tensorflow/compiler/tf2tensorrt/segment/segment.h
+++ b/tensorflow/compiler/tf2tensorrt/segment/segment.h

@@ -24,8 +24,10 @@
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/types.h"
 
-namespace tensorflow {
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
 
+namespace tensorflow {
 namespace tensorrt {
 namespace segment {
 
@@ -60,4 +62,7 @@
 }  // namespace tensorrt
 }  // namespace tensorflow
 
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
+
 #endif  // TENSORFLOW_COMPILER_TF2TENSORRT_SEGMENT_SEGMENT_H_

diff --git a/tensorflow/compiler/tf2tensorrt/segment/segment_test.cc b/tensorflow/compiler/tf2tensorrt/segment/segment_test.cc
index 58512d3..e11ad27 100644
--- a/tensorflow/compiler/tf2tensorrt/segment/segment_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/segment/segment_test.cc

@@ -26,6 +26,9 @@
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/session.h"
 
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
 namespace tensorflow {
 namespace tensorrt {
 namespace segment {
@@ -265,3 +268,6 @@
 }  // namespace segment
 }  // namespace tensorrt
 }  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA

diff --git a/tensorflow/compiler/tf2tensorrt/utils/test_utils.cc b/tensorflow/compiler/tf2tensorrt/utils/test_utils.cc
index 3bcca99..dd3c09d 100644
--- a/tensorflow/compiler/tf2tensorrt/utils/test_utils.cc
+++ b/tensorflow/compiler/tf2tensorrt/utils/test_utils.cc

@@ -19,7 +19,9 @@
 #include <vector>
 
 #include "re2/re2.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace tensorrt {

diff --git a/tensorflow/compiler/tf2tensorrt/utils/test_utils.h b/tensorflow/compiler/tf2tensorrt/utils/test_utils.h
index bcd628b..d858759 100644
--- a/tensorflow/compiler/tf2tensorrt/utils/test_utils.h
+++ b/tensorflow/compiler/tf2tensorrt/utils/test_utils.h

@@ -16,8 +16,7 @@
 #ifndef TENSORFLOW_COMPILER_TF2TENSORRT_UTILS_TEST_UTILS_H_
 #define TENSORFLOW_COMPILER_TF2TENSORRT_UTILS_TEST_UTILS_H_
 
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace tensorrt {

diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.cc
index bf111d3..5213fce 100644
--- a/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.cc
+++ b/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.cc

@@ -135,7 +135,7 @@
 
 void TRTInt8Calibrator::writeCalibrationCache(const void* ptr,
                                               std::size_t length) {
-  calibration_table_ = string((const char*)ptr, length);
+  calibration_table_ = string(static_cast<const char*>(ptr), length);
   VLOG(1) << "Got calibration data for " << engine_name_ << " @" << ptr
           << " length=" << length;
 }

diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h b/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h
index 10587e9..aa70b07 100644
--- a/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h
+++ b/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h

@@ -34,7 +34,12 @@
 // TRTs pull model for calibration. When TRT implements a means for
 // a push calibration This class should be updated accordingly
 
+// IInt8EntropyCalibrator2 is prefferred for TRT 5.1+.
+#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
+struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 {
+#else
 struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator {
+#endif
  public:
   // Construct a calibrator for future calibration.
   TRTInt8Calibrator(

diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.cc
deleted file mode 100644
index 0a72a88..0000000
--- a/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.cc
+++ /dev/null

@@ -1,45 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h"
-#include "tensorflow/core/platform/logging.h"
-
-namespace tensorflow {
-namespace tensorrt {
-
-std::shared_ptr<TRTResourceManager>
-tensorflow::tensorrt::TRTResourceManager::instance() {
-  static std::shared_ptr<TRTResourceManager> instance_(new TRTResourceManager);
-  return instance_;
-}
-
-std::shared_ptr<tensorflow::ResourceMgr>
-tensorflow::tensorrt::TRTResourceManager::getManager(const string& op_name) {
-  // mutex is held for lookup only. Most instantiations where mutex will be held
-  // longer will be during op creation and should be ok.
-  tensorflow::mutex_lock lock(map_mutex_);
-  auto s = managers_.find(op_name);
-  if (s == managers_.end()) {
-    auto it = managers_.emplace(
-        op_name, std::make_shared<tensorflow::ResourceMgr>(op_name));
-    VLOG(1) << "Returning a new manager " << op_name;
-    return it.first->second;
-  }
-  VLOG(1) << "Returning old manager " << op_name;
-  return s->second;
-}
-
-}  // namespace tensorrt
-}  // namespace tensorflow

diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h b/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h
deleted file mode 100644
index 03879ff..0000000
--- a/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h
+++ /dev/null

@@ -1,45 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_TF2TENSORRT_UTILS_TRT_RESOURCE_MANAGER_H_
-#define TENSORFLOW_COMPILER_TF2TENSORRT_UTILS_TRT_RESOURCE_MANAGER_H_
-#include <memory>
-
-#include <string>
-#include <unordered_map>
-#include "tensorflow/core/framework/resource_mgr.h"
-#include "tensorflow/core/platform/mutex.h"
-
-namespace tensorflow {
-namespace tensorrt {
-
-class TRTResourceManager {
-  TRTResourceManager() = default;
-
- public:
-  static std::shared_ptr<TRTResourceManager> instance();
-  // returns a manager for given op, if it doesn't exists it creates one
-  std::shared_ptr<tensorflow::ResourceMgr> getManager(const string& op_name);
-
- private:
-  std::unordered_map<string, std::shared_ptr<tensorflow::ResourceMgr>>
-      managers_;
-  tensorflow::mutex map_mutex_;
-};
-
-}  // namespace tensorrt
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_COMPILER_TF2TENSORRT_UTILS_TRT_RESOURCE_MANAGER_H_

diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_resources.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_resources.cc
index 37f7fe9..2e55307 100644
--- a/tensorflow/compiler/tf2tensorrt/utils/trt_resources.cc
+++ b/tensorflow/compiler/tf2tensorrt/utils/trt_resources.cc

@@ -48,7 +48,7 @@
   calibrator_->waitAndSetDone();
   thr_->join();
   *serialized = calibrator_->getCalibrationTableAsString();
-  if (!serialized->size()) {
+  if (serialized->empty()) {
     return tensorflow::errors::Unknown("Calibration table is empty.");
   }
   return Status::OK();

diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index 02de951..7d9e7b9 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD

@@ -24,7 +24,7 @@
 )
 
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
-load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library")
+load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library", "xla_py_proto_library")
 
 cc_library(
     name = "tf2xla_supported_ops_lib",
@@ -60,6 +60,14 @@
     ],
 )
 
+xla_py_proto_library(
+    name = "tf2xla_py",
+    has_services = False,
+    api_version = 2,
+    visibility = ["//visibility:public"],
+    deps = [":tf2xla_proto"],
+)
+
 xla_proto_library(
     name = "host_compute_metadata_proto",
     srcs = ["host_compute_metadata.proto"],
@@ -283,6 +291,7 @@
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
     ],
 )
 
@@ -448,6 +457,7 @@
     hdrs = [
         "dump_graph.h",
     ],
+    visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/compiler/jit:flags",
         "//tensorflow/core:framework",

diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc
index c8341a2..8aa162b 100644
--- a/tensorflow/compiler/tf2xla/functionalize_cond.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc

@@ -63,6 +63,23 @@
   return Hash64Combine(h, std::hash<int>()(static_cast<int>(ancestor.type)));
 }
 
+typedef std::tuple<StateMap::CondId, StateMap::AncestorId, OutputTensor>
+    ClusterTuple;
+
+struct ClusterTupleLessThan {
+  bool operator()(const ClusterTuple& a, const ClusterTuple& b) const {
+    if (std::tie(std::get<0>(a), std::get<1>(a)) <
+        std::tie(std::get<0>(b), std::get<1>(b))) {
+      return true;
+    } else if (std::tie(std::get<0>(a), std::get<1>(a)) ==
+               std::tie(std::get<0>(b), std::get<1>(b))) {
+      return StateMap::OutputTensorLess()(std::get<2>(a), std::get<2>(b));
+    } else {
+      return false;
+    }
+  }
+};
+
 // TODO(jpienaar): Move to OutputTensor.
 string DebugString(const OutputTensor& tensor) {
   return absl::StrCat(tensor.node->name(), ":", tensor.index);
@@ -744,9 +761,9 @@
   }
   builder.Device(predicate_.node->assigned_device_name());
   // Conditional should be the first input ...
-  builder.Input(NodeDefBuilder::NodeOut(predicate_.node->name(),
-                                        predicate_.index,
-                                        predicate_.node->output_type(0)));
+  builder.Input(
+      NodeDefBuilder::NodeOut(predicate_.node->name(), predicate_.index,
+                              predicate_.node->output_type(predicate_.index)));
   // ... followed by the other inputs.
   builder.Input(inputs);
 
@@ -1393,16 +1410,30 @@
   // Sort the merge nodes from innermost outwards.
   SortMergeNodes(&merge_order);
 
-  // Cluster merge nodes by CondId and AncestorId in order of nesting.
-  using ClusterPair = std::pair<StateMap::CondId, StateMap::AncestorId>;
+  // Cluster merge nodes by (CondId, AncestorId, predicate) in order of
+  // nesting. (CondId, AncestorId) is not enough, e.g.
+  //   pred1 = array_ops.placeholder(dtypes.bool, name='pred1')
+  //   pred2 = array_ops.placeholder(dtypes.bool, name='pred2')
+  //   cond1 = control_flow_ops.cond(pred1, ...)
+  //   cond2 = control_flow_ops.cond(pred2, ...)
+  //   cond3 = control_flow_ops.cond(pred1, use cond1 and cond2)
+  //   cond4 = control_flow_ops.cond(pred2, use cond1 and cond2)
+  // cond3 and cond4 have the same (CondId, AncestorId), but they should not
+  // be merged into one "If" node (because they have different predicates).
   std::deque<std::vector<Node*>> merge_clusters;
-  std::map<ClusterPair, int> merge_cluster_index;
+  std::map<ClusterTuple, int, ClusterTupleLessThan> merge_cluster_index;
   for (Node* merge : merge_order) {
     auto cond_id = state_map_.LookupCondId(merge);
     if (state_map_.IsDead(cond_id)) continue;
 
-    ClusterPair key =
-        std::make_pair(cond_id, state_map_.LookupAncestorId(merge));
+    auto predicate = merge_to_predicate_.find(merge);
+    if (predicate == merge_to_predicate_.end()) {
+      return errors::Internal("Cannot find predicate for Merge node ",
+                              merge->name());
+    }
+
+    ClusterTuple key = std::make_tuple(
+        cond_id, state_map_.LookupAncestorId(merge), predicate->second);
     auto idx = merge_cluster_index.find(key);
     if (idx == merge_cluster_index.end()) {
       merge_cluster_index[key] = merge_clusters.size();
@@ -1422,7 +1453,7 @@
                      &state_map_);
     for (Node* merge : cluster) TF_RETURN_IF_ERROR(cond.AddMerge(merge));
     TF_RETURN_IF_ERROR(
-        cond.BuildAndReplace(graph_, library_, &merge_to_predicate_));
+        cond.BuildAndReplace(graph_, library_, &merge_to_replacement_));
 
     if (VLOG_IS_ON(4)) DumpGraphWithCondState("after_extract");
   }

diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 69353fe..343568b 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD

@@ -107,11 +107,13 @@
         "xla_pad_op.cc",
         "xla_reduce_op.cc",
         "xla_select_and_scatter_op.cc",
+        "xla_self_adjoint_eig_op.cc",
     ],
     hdrs = [
         "index_ops.h",
         "shape_util.h",
     ],
+    tags = ["optonly"],
     deps = [
         ":conv_op_helpers",
         ":if_op",
@@ -143,8 +145,8 @@
         "//tensorflow/compiler/xla/client/lib:prng",
         "//tensorflow/compiler/xla/client/lib:qr",
         "//tensorflow/compiler/xla/client/lib:quantize",
+        "//tensorflow/compiler/xla/client/lib:self_adjoint_eig",
         "//tensorflow/compiler/xla/client/lib:sorting",
-        "//tensorflow/compiler/xla/client/lib:triangular_solve",
         "//tensorflow/core:bitwise_ops_op_lib",
         "//tensorflow/core:control_flow_ops_op_lib",
         "//tensorflow/core:data_flow_ops_op_lib",

diff --git a/tensorflow/compiler/tf2xla/kernels/categorical_op.cc b/tensorflow/compiler/tf2xla/kernels/categorical_op.cc
index c2b4c28..a99c6ee 100644
--- a/tensorflow/compiler/tf2xla/kernels/categorical_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/categorical_op.cc

@@ -21,6 +21,7 @@
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+#include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/lib/prng.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
@@ -112,9 +113,12 @@
                                     xla::PrimitiveType type,
                                     XlaOpKernelContext* ctx) {
     xla::XlaBuilder* builder = ctx->builder();
-    auto uniforms =
-        xla::RngUniform(XlaHelpers::Zero(builder, input_type(0)),
-                        XlaHelpers::One(builder, input_type(0)), uniform_shape);
+    // We want a number in (0, 1) rather than [0, 1) or (0, 1]:
+    // * log(-log(0)) is ∞.
+    // * log(-log(1)) is -∞.
+    auto uniforms = xla::RngUniform(
+        xla::MinPositiveNormalValue(builder, type),
+        xla::One(builder, uniform_shape.element_type()), uniform_shape);
     return xla::Log(-xla::Log(uniforms));
   }
 
@@ -143,9 +147,13 @@
     if (uniform_shape.element_type() == xla::BF16) {
       uniform_shape.set_element_type(xla::F32);
     }
+    // We want a number in (0, 1) rather than [0, 1) or (0, 1]:
+    // * log(-log(0)) is ∞.
+    // * log(-log(1)) is -∞.
     auto uniforms = xla::StatelessRngUniform(
-        {seed0, seed1}, uniform_shape, XlaHelpers::Zero(builder, DT_FLOAT),
-        XlaHelpers::One(builder, DT_FLOAT));
+        {seed0, seed1}, uniform_shape,
+        xla::MinPositiveNormalValue(builder, uniform_shape.element_type()),
+        xla::One(builder, uniform_shape.element_type()));
     return xla::ConvertElementType(xla::Log(-xla::Log(uniforms)), type);
   }
 

diff --git a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc
index b96d453..d19d48e 100644
--- a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc

@@ -13,6 +13,7 @@
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/types/span.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
@@ -134,14 +135,15 @@
 // If the 2D kernel would be very large, the 1D kernel can be applied once in
 // each dimension due to the symmetry of the kernel along all axis to reduce the
 // computational intensity.
-xla::XlaOp MakeBilinear1DKernel(xla::XlaBuilder* builder, int64 n) {
+xla::XlaOp MakeBilinear1DKernel(xla::XlaBuilder* builder,
+                                xla::PrimitiveType type, int64 n) {
   std::vector<float> kernel(n * 2 - 1);
   for (int64 i = 0; i < n; ++i) {
     float v = (i + 1.0f) / n;
     kernel[i] = v;
     kernel[n * 2 - 2 - i] = v;
   }
-  return xla::ConstantR1<float>(builder, kernel);
+  return xla::ConvertElementType(xla::ConstantR1<float>(builder, kernel), type);
 }
 
 // Unlike the bilinear kernel, which is triangular, the nearest neighbor
@@ -153,11 +155,12 @@
 // to the right (because an existing non TPU kernel
 // for nearest neighbor resize already chose to default to the right,
 // so we want to be consistent).
-xla::XlaOp MakeNearestNeighbor1DKernel(xla::XlaBuilder* builder, int64 n) {
+xla::XlaOp MakeNearestNeighbor1DKernel(xla::XlaBuilder* builder,
+                                       xla::PrimitiveType type, int64 n) {
   std::vector<float> kernel(n * 2 - 1, 0.0f);
   std::fill(&kernel[n / 2], &kernel[(3 * n) / 2], 1.0f);
 
-  return xla::ConstantR1<float>(builder, kernel);
+  return xla::ConvertElementType(xla::ConstantR1<float>(builder, kernel), type);
 }
 
 // Kernels with more than 16 spatial elements are considered intense and the
@@ -165,42 +168,66 @@
 const int64 kMax2DKernelSize = 16;
 
 xla::XlaOp MakeGeneralResizeKernel(xla::XlaBuilder* builder,
+                                   xla::PrimitiveType type,
                                    absl::Span<const int64> kernel_size,
                                    int64 channels, bool is_kernel_bilinear) {
   auto make_kernel_func =
       is_kernel_bilinear ? MakeBilinear1DKernel : MakeNearestNeighbor1DKernel;
 
-  auto depthwise_kernel = xla::Broadcast(
-      xla::Zero(builder, xla::F32),
-      {(2 * kernel_size[0] - 1), (2 * kernel_size[1] - 1), channels, 1});
+  std::vector<int64> depthwise_kernel_sizes = {
+      (2 * kernel_size[0] - 1), (2 * kernel_size[1] - 1), channels, 1};
+  auto depthwise_kernel =
+      xla::BroadcastInDim(make_kernel_func(builder, type, kernel_size[1]),
+                          depthwise_kernel_sizes, /*broadcast_dimensions=*/{1});
 
-  return xla::Mul(
-      xla::Add(depthwise_kernel, make_kernel_func(builder, kernel_size[1]),
-               /*broadcast_dimensions=*/{1}),
-      make_kernel_func(builder, kernel_size[0]),
-      /*broadcast_dimensions=*/{0});
+  return xla::Mul(depthwise_kernel,
+                  make_kernel_func(builder, type, kernel_size[0]),
+                  /*broadcast_dimensions=*/{0});
 }
 
 xla::XlaOp MakeGeneralResizeKernelInDim(xla::XlaBuilder* builder,
+                                        xla::PrimitiveType type,
                                         absl::Span<const int64> kernel_size,
                                         int64 channels, int64 dim,
                                         bool is_kernel_bilinear) {
   auto make_kernel_func =
       is_kernel_bilinear ? MakeBilinear1DKernel : MakeNearestNeighbor1DKernel;
 
-  auto depthwise_kernel =
-      xla::Broadcast(xla::Zero(builder, xla::F32),
-                     {dim == 0 ? (2 * kernel_size[0] - 1) : 1,
-                      dim == 1 ? (2 * kernel_size[1] - 1) : 1, channels, 1});
-  return xla::Add(depthwise_kernel, make_kernel_func(builder, kernel_size[dim]),
-                  /*broadcast_dimensions=*/{dim});
+  std::vector<int64> depthwise_kernel_sizes = {
+      dim == 0 ? (2 * kernel_size[0] - 1) : 1,
+      dim == 1 ? (2 * kernel_size[1] - 1) : 1, channels, 1};
+  return xla::BroadcastInDim(make_kernel_func(builder, type, kernel_size[dim]),
+                             depthwise_kernel_sizes,
+                             /*broadcast_dimensions=*/{dim});
+}
+
+xla::XlaOp BroadcastSpatialDimensions(xla::XlaBuilder* builder,
+                                      const xla::XlaOp& input,
+                                      int32 spatial_dimensions_offset,
+                                      absl::Span<const int64> in_size,
+                                      absl::Span<const int64> out_size) {
+  // Add broadcasts to handle expanding from a size == 1 dimension to a
+  // size > 1 dimension.
+  auto broadcast_shape_or_status = builder->GetShape(input);
+  if (!broadcast_shape_or_status.ok()) {
+    return builder->ReportError(broadcast_shape_or_status.status());
+  }
+  xla::Shape broadcast_shape = broadcast_shape_or_status.ValueOrDie();
+  for (int32 i = 0; i < in_size.size(); ++i) {
+    if (in_size[i] == 1 && out_size[i] > 1) {
+      broadcast_shape.set_dimensions(spatial_dimensions_offset + i,
+                                     out_size[i]);
+    }
+  }
+  return xla::BroadcastInDim(input, broadcast_shape.dimensions(),
+                             /*broadcast_dimensions=*/{0, 1, 2, 3});
 }
 
 xla::XlaOp ResizeUsingDilationAndConvolution(
-    xla::XlaBuilder* builder, const xla::XlaOp& input,
-    const int num_spatial_dims, std::vector<int64> in_size,
-    std::vector<int64> out_size, const int64 channels, const bool align_corners,
-    bool is_kernel_bilinear) {
+    xla::XlaBuilder* builder, const xla::XlaOp& input, xla::PrimitiveType type,
+    const int num_spatial_dims, absl::Span<const int64> in_size,
+    absl::Span<const int64> out_size, const int64 channels,
+    const bool align_corners, bool is_kernel_bilinear) {
   // Picture for a 1x3 to 1x4 bilinear resize:
   // stride = 2, kernel size = 3
   // Input:
@@ -287,7 +314,7 @@
   // Split convolutions into independent dimensions if they would be a very
   // large kernel.
   if (dims.kernel_size[0] * dims.kernel_size[1] < kMax2DKernelSize) {
-    xla::XlaOp kernel = MakeGeneralResizeKernel(builder, dims.kernel_size,
+    xla::XlaOp kernel = MakeGeneralResizeKernel(builder, type, dims.kernel_size,
                                                 channels, is_kernel_bilinear);
     output =
         xla::ConvGeneralDilated(input_data, kernel, dims.stride,
@@ -299,7 +326,7 @@
                                 /*feature_group_count=*/channels);
   } else {
     xla::XlaOp kernel0 = MakeGeneralResizeKernelInDim(
-        builder, dims.kernel_size, channels, 0, is_kernel_bilinear);
+        builder, type, dims.kernel_size, channels, 0, is_kernel_bilinear);
     output = xla::ConvGeneralDilated(
         input_data, kernel0, {dims.stride[0], 1},
         /*padding=*/
@@ -308,7 +335,7 @@
         /*rhs_dilation=*/{1, 1}, dimension_numbers,
         /*feature_group_count=*/channels);
     xla::XlaOp kernel1 = MakeGeneralResizeKernelInDim(
-        builder, dims.kernel_size, channels, 1, is_kernel_bilinear);
+        builder, type, dims.kernel_size, channels, 1, is_kernel_bilinear);
     output = xla::ConvGeneralDilated(
         output, kernel1, {1, dims.stride[1]},
         /*padding=*/
@@ -320,19 +347,14 @@
 
   // Add broadcasts to handle expanding from a size == 1 dimension to a
   // size > 1 dimension.
-  for (int i = 0; i < num_spatial_dims; ++i) {
-    if (in_size[i] == 1 && out_size[i] > 1) {
-      output = xla::Add(output, xla::ConstantR1<float>(builder, out_size[i], 0),
-                        /*broadcast_dimensions=*/{1 + i});
-    }
-  }
-  return output;
+  return BroadcastSpatialDimensions(
+      builder, output, /*spatial_dimensions_offset=*/1, in_size, out_size);
 }
 
 xla::XlaOp ResizeUsingDilationAndConvolutionGradOp(
-    xla::XlaBuilder* builder, const xla::XlaOp& grad,
-    const int num_spatial_dims, std::vector<int64> in_size,
-    std::vector<int64> grad_size, const int64 channels,
+    xla::XlaBuilder* builder, const xla::XlaOp& grad, xla::PrimitiveType type,
+    const int num_spatial_dims, absl::Span<const int64> in_size,
+    absl::Span<const int64> grad_size, const int64 channels,
     const bool align_corners, bool is_kernel_bilinear) {
   ResizeConvolutionDims dims =
       ComputeResizeConvolutionParameters(in_size, grad_size, align_corners);
@@ -353,19 +375,14 @@
   dimension_numbers.set_kernel_output_feature_dimension(num_spatial_dims);
   xla::XlaOp output;
   if (dims.kernel_size[0] * dims.kernel_size[1] < kMax2DKernelSize) {
-    xla::XlaOp kernel = MakeGeneralResizeKernel(builder, dims.kernel_size,
+    xla::XlaOp kernel = MakeGeneralResizeKernel(builder, type, dims.kernel_size,
                                                 channels, is_kernel_bilinear);
 
     // Broadcast the input kernel where the forward op expanded from a size == 1
     // dimension to a size > 1 dimension. This has the effect of summing the
     // gradient contributions in that dimension.
-    for (int i = 0; i < num_spatial_dims; ++i) {
-      if (in_size[i] == 1 && grad_size[i] > 1) {
-        kernel =
-            xla::Add(kernel, xla::ConstantR1<float>(builder, grad_size[i], 0),
-                     /*broadcast_dimensions=*/{i});
-      }
-    }
+    kernel = BroadcastSpatialDimensions(
+        builder, kernel, /*spatial_dimensions_offset=*/0, in_size, grad_size);
 
     output = xla::ConvGeneralDilated(
         grad, kernel, /*window_strides=*/dims.kernel_size,
@@ -377,22 +394,22 @@
         /*feature_group_count=*/channels);
   } else {
     xla::XlaOp kernel0 = MakeGeneralResizeKernelInDim(
-        builder, dims.kernel_size, channels, 0, is_kernel_bilinear);
+        builder, type, dims.kernel_size, channels, 0, is_kernel_bilinear);
     xla::XlaOp kernel1 = MakeGeneralResizeKernelInDim(
-        builder, dims.kernel_size, channels, 1, is_kernel_bilinear);
+        builder, type, dims.kernel_size, channels, 1, is_kernel_bilinear);
 
     // Broadcast the input kernel where the forward op expanded from a
     // size == 1 dimension to a size > 1 dimension. This has the effect of
     // summing the gradient contributions in that dimension.
     if (in_size[0] == 1 && grad_size[0] > 1) {
-      kernel0 =
-          xla::Add(kernel0, xla::ConstantR1<float>(builder, grad_size[0], 0),
-                   /*broadcast_dimensions=*/{0});
+      kernel0 = BroadcastSpatialDimensions(builder, kernel0,
+                                           /*spatial_dimensions_offset=*/0, {1},
+                                           {grad_size[0]});
     }
     if (in_size[1] == 1 && grad_size[1] > 1) {
-      kernel1 =
-          xla::Add(kernel0, xla::ConstantR1<float>(builder, grad_size[1], 0),
-                   /*broadcast_dimensions=*/{1});
+      kernel1 = BroadcastSpatialDimensions(builder, kernel0,
+                                           /*spatial_dimensions_offset=*/0,
+                                           in_size, grad_size);
     }
 
     output = xla::ConvGeneralDilated(
@@ -423,7 +440,7 @@
     }
   }
   if (pad_output) {
-    output = xla::Pad(output, xla::ConstantR0<float>(builder, 0.0f), padding);
+    output = xla::Pad(output, xla::Zero(builder, type), padding);
   }
   return output;
 }
@@ -458,6 +475,7 @@
   const int num_spatial_dims = 2;
 
   xla::XlaOp input = ctx->Input(0);
+  xla::PrimitiveType input_type = ctx->input_xla_type(0);
 
   // If in_size[i] > 1 and out_size[i] == 1, slice out the first input in
   // dimension i.
@@ -475,8 +493,11 @@
                        {batch, in_size[0], in_size[1], channels}, {1, 1, 1, 1});
   }
 
-  // Output is always type float.
-  input = xla::ConvertElementType(input, xla::F32);
+  // Output is always type float if 'is_kernel_bilinear' is true.
+  if (is_kernel_bilinear) {
+    input = xla::ConvertElementType(input, xla::F32);
+    input_type = xla::F32;
+  }
 
   // Special Case:
   // Instead of doing a ResizeUsingDilationAndConvolution directly,
@@ -504,19 +525,19 @@
         std::vector<int64> next_out_size = {(in_size[0] - 1) * 2 + 1,
                                             (in_size[1] - 1) * 2 + 1};
         output = ResizeUsingDilationAndConvolution(
-            b, input, num_spatial_dims, in_size, next_out_size, channels,
-            align_corners_, is_kernel_bilinear);
+            b, input, input_type, num_spatial_dims, in_size, next_out_size,
+            channels, align_corners_, is_kernel_bilinear);
         input = output;
         in_size = next_out_size;
       } else {
         output = ResizeUsingDilationAndConvolution(
-            b, input, num_spatial_dims, in_size, out_size, channels,
+            b, input, input_type, num_spatial_dims, in_size, out_size, channels,
             align_corners_, is_kernel_bilinear);
         in_size = out_size;
       }
     } else {
       output = ResizeUsingDilationAndConvolution(
-          b, input, num_spatial_dims, in_size, out_size, channels,
+          b, input, input_type, num_spatial_dims, in_size, out_size, channels,
           align_corners_, is_kernel_bilinear);
       in_size = out_size;
     }
@@ -631,19 +652,19 @@
           std::vector<int64> next_grad_size = {(in_size[0] - 1) * 2 + 1,
                                                (in_size[1] - 1) * 2 + 1};
           output = ResizeUsingDilationAndConvolutionGradOp(
-              b, grad, num_spatial_dims, in_size, next_grad_size, channels,
-              align_corners_, true);
+              b, grad, xla::F32, num_spatial_dims, in_size, next_grad_size,
+              channels, align_corners_, true);
           grad = output;
           in_size = next_grad_size;
         } else {
           output = ResizeUsingDilationAndConvolutionGradOp(
-              b, grad, num_spatial_dims, in_size, grad_size, channels,
+              b, grad, xla::F32, num_spatial_dims, in_size, grad_size, channels,
               align_corners_, true);
           in_size = grad_size;
         }
       } else {
         output = ResizeUsingDilationAndConvolutionGradOp(
-            b, grad, num_spatial_dims, in_size, grad_size, channels,
+            b, grad, xla::F32, num_spatial_dims, in_size, grad_size, channels,
             align_corners_, true);
         in_size = grad_size;
       }

diff --git a/tensorflow/compiler/tf2xla/kernels/matrix_triangular_solve_op.cc b/tensorflow/compiler/tf2xla/kernels/matrix_triangular_solve_op.cc
index b322495..5a6569c 100644
--- a/tensorflow/compiler/tf2xla/kernels/matrix_triangular_solve_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/matrix_triangular_solve_op.cc

@@ -15,7 +15,8 @@
 
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
-#include "tensorflow/compiler/xla/client/lib/triangular_solve.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
 
 namespace tensorflow {
 namespace {
@@ -31,8 +32,10 @@
   void Compile(XlaOpKernelContext* ctx) override {
     auto result = xla::TriangularSolve(
         ctx->Input(0), ctx->Input(1), /*left_side=*/true,
-        /*lower=*/lower_, /*transpose_a=*/adjoint_, /*conjugate_a=*/adjoint_,
-        /*unit_diagonal=*/false);
+        /*lower=*/lower_, /*unit_diagonal=*/false,
+        /*transpose_a=*/
+        adjoint_ ? xla::TriangularSolveOptions::ADJOINT
+                 : xla::TriangularSolveOptions::NO_TRANSPOSE);
     ctx->SetOutput(0, result);
   }
 

diff --git a/tensorflow/compiler/tf2xla/kernels/random_ops.cc b/tensorflow/compiler/tf2xla/kernels/random_ops.cc
index 01b047f..d6c70d4 100644
--- a/tensorflow/compiler/tf2xla/kernels/random_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/random_ops.cc

@@ -25,6 +25,7 @@
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+#include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/lib/loops.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -279,9 +280,9 @@
 
     xla::XlaBuilder* b = ctx->builder();
 
-    xla::XlaOp one = XlaHelpers::FloatLiteral(b, dtype, 1.0);
+    xla::XlaOp one = xla::One(b, xla_shape.element_type());
     xla::XlaOp min_positive =
-        XlaHelpers::FloatLiteral(b, dtype, std::numeric_limits<float>::min());
+        xla::MinPositiveNormalValue(b, xla_shape.element_type());
     auto uniform = xla::RngUniform(min_positive, one, xla_shape);
     ctx->SetOutput(0, TruncatedNormal(uniform));
   }

diff --git a/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc b/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc
index a95e7ad..a1c18be 100644
--- a/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc

@@ -110,10 +110,16 @@
     auto updates = context->Input(1);
     auto result =
         XlaScatter(buffer, updates, indices,
-                   /*indices_are_vectors=*/true, /*combiner=*/{}, builder);
+                   /*indices_are_vectors=*/true, /*combiner=*/Combine, builder);
     OP_REQUIRES_OK(context, result.status());
     context->SetOutput(0, result.ValueOrDie());
   }
+
+ private:
+  static xla::XlaOp Combine(const xla::XlaOp& x, const xla::XlaOp& y,
+                            xla::XlaBuilder* builder) {
+    return xla::Add(x, y);
+  }
 };
 
 REGISTER_XLA_OP(Name("ScatterNd").CompileTimeConstantInput("shape"),

diff --git a/tensorflow/compiler/tf2xla/kernels/shape_op.cc b/tensorflow/compiler/tf2xla/kernels/shape_op.cc
index 31d4cc1..280b683 100644
--- a/tensorflow/compiler/tf2xla/kernels/shape_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/shape_op.cc

@@ -104,7 +104,7 @@
     for (int64 i = 0; i < rank; ++i) {
       size = xla::Mul(size, xla::GetDimensionSize(ctx->Input(0), i));
     }
-    size = xla::ConvertElementType(size, xla::S32);
+    size = xla::ConvertElementType(size, ctx->output_xla_type(0));
     ctx->SetOutput(0, size);
   }
 };

diff --git a/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc b/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc
index 50653d7..17f067e 100644
--- a/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc

@@ -218,8 +218,8 @@
     OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(DT_FLOAT, shape, &xla_shape));
     auto uniform = xla::StatelessRngUniform(
         {seed0, seed1}, xla_shape,
-        xla::ConstantR0<float>(builder, std::numeric_limits<float>::min()),
-        xla::ConstantR0<float>(builder, 1.0));
+        xla::MinPositiveNormalValue(builder, xla_shape.element_type()),
+        xla::One(builder, xla_shape.element_type()));
     auto output = TruncatedNormal(uniform);
     output = MaybeConvertF32ToBF16(output, dtype_);
     ctx->SetOutput(0, output);

diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc
index 6502001..8958a48 100644
--- a/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc

@@ -26,6 +26,7 @@
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -35,6 +36,7 @@
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -69,6 +71,43 @@
 
 REGISTER_XLA_OP(Name("TensorListLength"), TensorListLengthOp);
 
+// Creates an empty list with size (leading_dim, *element_shape) if
+// element_shape is known at compile time. Otherwise creates one with size
+// (leading_dim, 0) which gets initialized later in `GetInitializedList`.
+Status CreateZerosList(XlaOpKernelContext* ctx, int element_shape_index,
+                       int64 leading_dim, DataType dtype, xla::XlaOp* list) {
+  TensorShape list_shape;
+  list_shape.AddDim(leading_dim);
+  xla::XlaOp element_shape_handle = ctx->Input(element_shape_index);
+  TF_ASSIGN_OR_RETURN(
+      bool is_element_shape_compile_time_const,
+      element_shape_handle.builder()->IsConstant(element_shape_handle));
+  PartialTensorShape partial_element_shape;
+  if (is_element_shape_compile_time_const) {
+    TF_RETURN_IF_ERROR(ctx->ConstantInputAsPartialShape(
+        element_shape_index, &partial_element_shape));
+  }
+  if (is_element_shape_compile_time_const &&
+      partial_element_shape.IsFullyDefined()) {
+    TensorShape element_shape;
+    partial_element_shape.AsTensorShape(&element_shape);
+    list_shape.AppendShape(element_shape);
+  } else {
+    // If element_shape is not a compile time constant or if it is not fully
+    // defined we will have to wait for the first write call to fully allocate
+    // the array.
+    // TODO(srbs): We are using element_shape of [0] as a proxy to denote an
+    // uninitialized list. A better implementation may be to represent the
+    // list as a 3-tuple containining an explicit "initialized" flag. However,
+    // we would still need to create a dummy tensor for the first tuple
+    // element.
+    list_shape.AddDim(0);
+  }
+  *list = xla::Broadcast(XlaHelpers::Zero(ctx->builder(), dtype),
+                         list_shape.dim_sizes());
+  return Status::OK();
+}
+
 class TensorListReserveOp : public XlaOpKernel {
  public:
   explicit TensorListReserveOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
@@ -76,20 +115,15 @@
   }
 
   void Compile(XlaOpKernelContext* ctx) override {
-    TensorShape element_shape;
-    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &element_shape));
     int64 num_elements;
     OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &num_elements));
 
-    TensorShape tensor_shape;
-    tensor_shape.AddDim(num_elements);
-    tensor_shape.AppendShape(element_shape);
+    xla::XlaOp list;
+    OP_REQUIRES_OK(ctx, CreateZerosList(ctx, 0, num_elements, dtype_, &list));
 
     xla::XlaBuilder* b = ctx->builder();
     ctx->SetTensorListOutput(
-        0, xla::Tuple(b, {xla::Broadcast(XlaHelpers::Zero(b, dtype_),
-                                         tensor_shape.dim_sizes()),
-                          xla::ConstantR0<int32>(b, num_elements)}));
+        0, xla::Tuple(b, {list, xla::ConstantR0<int32>(b, num_elements)}));
   }
 
  private:
@@ -110,8 +144,6 @@
   }
 
   void Compile(XlaOpKernelContext* ctx) override {
-    TensorShape element_shape;
-    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &element_shape));
     int64 max_num_elements;
     OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &max_num_elements));
     OP_REQUIRES(
@@ -119,15 +151,13 @@
         errors::InvalidArgument("XLA compilation requires a fixed tensor list "
                                 "size. Set the max number of elements."));
 
-    TensorShape tensor_shape;
-    tensor_shape.AddDim(max_num_elements);
-    tensor_shape.AppendShape(element_shape);
+    xla::XlaOp list;
+    OP_REQUIRES_OK(ctx,
+                   CreateZerosList(ctx, 0, max_num_elements, dtype_, &list));
 
     xla::XlaBuilder* b = ctx->builder();
     ctx->SetTensorListOutput(
-        0, xla::Tuple(b, {xla::Broadcast(XlaHelpers::Zero(b, dtype_),
-                                         tensor_shape.dim_sizes()),
-                          xla::ConstantR0<int32>(b, 0)}));
+        0, xla::Tuple(b, {list, xla::ConstantR0<int32>(b, 0)}));
   }
 
  private:
@@ -274,6 +304,36 @@
     Name("TensorListFromTensor").CompileTimeConstantInput("element_shape"),
     TensorListFromTensorOp);
 
+// Returns the 0'th element of `tuple` containing the list tensor if it has been
+// initialized already else creates one lazily. This allows lazy initialization
+// of the list on the first call to SetItem or PushBack.
+Status GetInitializedList(XlaOpKernelContext* ctx, const xla::XlaOp& tuple,
+                          const TensorShape& element_shape, DataType dtype,
+                          xla::XlaOp* list) {
+  *list = xla::GetTupleElement(tuple, 0);
+  TensorShape list_shape;
+  TF_RETURN_IF_ERROR(GetTensorListShape(ctx->builder(), tuple, &list_shape));
+  int64 leading_dim = list_shape.dim_size(0);
+  TensorShape list_element_shape = list_shape;
+  list_element_shape.RemoveDim(0);
+  // This checks for the lazy initialization contract set by CreateEmptyList.
+  // In TensorListReserve if the element_shape is not known at compile time,
+  // it creates a list with shape [leading_dim, 0].
+  if (element_shape != list_element_shape) {
+    if (list_element_shape.num_elements() != 0) {
+      return errors::InvalidArgument(
+          "Invalid shape of value in TensorListSetItem. Expected: ",
+          list_element_shape.DebugString(),
+          " Actual: ", element_shape.DebugString());
+    }
+    list_shape = element_shape;
+    list_shape.InsertDim(0, leading_dim);
+    *list = xla::Broadcast(XlaHelpers::Zero(ctx->builder(), dtype),
+                           list_shape.dim_sizes());
+  }
+  return Status::OK();
+}
+
 class TensorListSetItemOp : public XlaOpKernel {
  public:
   explicit TensorListSetItemOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
@@ -285,7 +345,9 @@
     xla::XlaOp tl = ctx->Input(0);
     TensorShape elem_shape = ctx->InputShape(2);
 
-    xla::XlaOp ta = xla::GetTupleElement(tl, 0);
+    xla::XlaOp list;
+    OP_REQUIRES_OK(ctx, GetInitializedList(ctx, tl, elem_shape, dtype_, &list));
+
     xla::XlaOp index = ctx->Input(1);
     xla::XlaOp value = ctx->Input(2);
 
@@ -299,8 +361,8 @@
     auto update = xla::Reshape(value, slice_shape.dim_sizes());
 
     ctx->SetTensorListOutput(
-        0, xla::Tuple(b, {xla::DynamicUpdateSlice(ta, update, start_indices),
-                          index + xla::ConstantR0<int32>(b, 1)}));
+        0, xla::Tuple(b, {xla::DynamicUpdateSlice(list, update, start_indices),
+                          xla::GetTupleElement(tl, 1)}));
   }
 
  private:
@@ -319,11 +381,14 @@
 
   void Compile(XlaOpKernelContext* ctx) override {
     xla::XlaBuilder* b = ctx->builder();
-    xla::XlaOp tl = ctx->Input(0);
+    xla::XlaOp list_tuple = ctx->Input(0);
     TensorShape elem_shape = ctx->InputShape(1);
 
-    xla::XlaOp ta = xla::GetTupleElement(tl, 0);
-    xla::XlaOp index = xla::GetTupleElement(tl, 1);
+    xla::XlaOp list;
+    OP_REQUIRES_OK(
+        ctx, GetInitializedList(ctx, list_tuple, elem_shape, dtype_, &list));
+
+    xla::XlaOp index = xla::GetTupleElement(list_tuple, 1);
     xla::XlaOp value = ctx->Input(1);
 
     // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
@@ -336,7 +401,7 @@
     auto update = xla::Reshape(value, slice_shape.dim_sizes());
 
     ctx->SetTensorListOutput(
-        0, xla::Tuple(b, {xla::DynamicUpdateSlice(ta, update, start_indices),
+        0, xla::Tuple(b, {xla::DynamicUpdateSlice(list, update, start_indices),
                           index + xla::ConstantR0<int32>(b, 1)}));
   }
 

diff --git a/tensorflow/compiler/tf2xla/kernels/transpose_op.cc b/tensorflow/compiler/tf2xla/kernels/transpose_op.cc
index 76793d6..4ac7143 100644
--- a/tensorflow/compiler/tf2xla/kernels/transpose_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/transpose_op.cc

@@ -19,6 +19,7 @@
 // helper.
 
 #include "tensorflow/core/kernels/transpose_op.h"
+#include "tensorflow/compiler/tf2xla/lib/scatter.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
@@ -128,29 +129,46 @@
                 errors::InvalidArgument("permutation of nonnegative int32s "
                                         "must have <= int32 max elements"));
 
-    std::vector<int64> perm;
-    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(0, &perm));
+    auto e = ctx->InputExpression(0);
+    auto tensor_or_status = e.ResolveConstant(ctx->compiler()->client());
+    OP_REQUIRES_OK(ctx, tensor_or_status.status());
+    // If the input is a constant, we also want the output to be a constant.
+    // Some models rely on the result of InvertPermutation being a constant.
+    // TODO(b/32495713): Remove this when we can check whether Scatter is
+    // constant. Right now, we always assume it is non-constant because we don't
+    // check the embedded computation.
+    if (tensor_or_status.ValueOrDie().has_value()) {
+      std::vector<int64> perm;
+      OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(0, &perm));
 
-    int size = perm.size();
+      int size = perm.size();
 
-    std::vector<int32> output(size);
-    std::fill_n(output.data(), size, -1);
-    for (int i = 0; i < size; ++i) {
-      const int64 d = perm[i];
-      OP_REQUIRES(ctx, FastBoundsCheck(d, size),
-                  errors::InvalidArgument(d, " is not between 0 and ", size));
-      OP_REQUIRES(ctx, output[d] == -1,
-                  errors::InvalidArgument(d, " is duplicated in the input."));
-      output[d] = i;
+      std::vector<int32> output(size);
+      std::fill_n(output.data(), size, -1);
+      for (int i = 0; i < size; ++i) {
+        const int64 d = perm[i];
+        OP_REQUIRES(ctx, FastBoundsCheck(d, size),
+                    errors::InvalidArgument(d, " is not between 0 and ", size));
+        OP_REQUIRES(ctx, output[d] == -1,
+                    errors::InvalidArgument(d, " is duplicated in the input."));
+        output[d] = i;
+      }
+
+      ctx->SetOutput(0, xla::ConstantR1<int32>(ctx->builder(), output));
+    } else {
+      auto indices = ctx->Input(0);
+      int size = ctx->InputShape(0).num_elements();
+      auto iota = xla::Iota(ctx->builder(), xla::S32, size);
+      auto result = XlaScatter(iota, iota, indices,
+                               /*indices_are_vectors=*/false, /*combiner=*/{},
+                               ctx->builder());
+      OP_REQUIRES_OK(ctx, result.status());
+      ctx->SetOutput(0, result.ValueOrDie());
     }
-
-    ctx->SetOutput(0, xla::ConstantR1<int32>(ctx->builder(), output));
   }
 };
 
-REGISTER_XLA_OP(Name("InvertPermutation")
-                    .TypeConstraint("T", DT_INT32)
-                    .CompileTimeConstantInput("x"),
+REGISTER_XLA_OP(Name("InvertPermutation").TypeConstraint("T", DT_INT32),
                 InvertPermutationOp);
 
 }  // namespace

diff --git a/tensorflow/compiler/tf2xla/kernels/xla_self_adjoint_eig_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_self_adjoint_eig_op.cc
new file mode 100644
index 0000000..233ac8e
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/xla_self_adjoint_eig_op.cc

@@ -0,0 +1,66 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/lib/self_adjoint_eig.h"
+#include "tensorflow/core/lib/core/bits.h"
+
+namespace tensorflow {
+namespace {
+
+class XlaSelfAdjointEigOp : public XlaOpKernel {
+ public:
+  explicit XlaSelfAdjointEigOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("lower", &lower_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("max_iter", &max_iter_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("epsilon", &epsilon_));
+  }
+  void Compile(XlaOpKernelContext* ctx) override {
+    auto result =
+        xla::SelfAdjointEig(ctx->Input(0), lower_, max_iter_, epsilon_);
+    ctx->SetOutput(0, result.w);
+    ctx->SetOutput(1, result.v);
+  }
+
+ private:
+  bool lower_;
+  int32 max_iter_;
+  float epsilon_;
+};
+
+class SelfAdjointEigV2Op : public XlaOpKernel {
+ public:
+  explicit SelfAdjointEigV2Op(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
+  void Compile(XlaOpKernelContext* ctx) override {
+    const TensorShape input_shape = ctx->InputShape("input");
+    int n = input_shape.dim_size(input_shape.dims() - 1);
+    // This is based on heuristics that approx log(n) sweep updates are needed.
+    // Note: the heuristics provides no theoretical guarantee, max_iter=100 and
+    // epsilon should be used to determine exit condition.
+    int max_iter = 2 * tensorflow::Log2Ceiling(n);
+    auto result = xla::SelfAdjointEig(ctx->Input(0), true, max_iter, 1e-6);
+    ctx->SetOutput(0, result.w);
+    ctx->SetOutput(1, result.v);
+  }
+};
+
+REGISTER_XLA_OP(Name("XlaSelfAdjointEig").TypeConstraint("T", kFloatTypes),
+                XlaSelfAdjointEigOp);
+REGISTER_XLA_OP(Name("SelfAdjointEigV2").TypeConstraint("T", kFloatTypes),
+                SelfAdjointEigV2Op);
+
+}  // namespace
+}  // namespace tensorflow

diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index af64113..ccd5807 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc

@@ -56,6 +56,41 @@
 rhs_output: the broadcasted RHS tensor
 )doc");
 
+REGISTER_OP("XlaSelfAdjointEig")
+    .Input("a: T")
+    .Attr("lower: bool")
+    .Attr("max_iter: int")
+    .Attr("epsilon: float")
+    .Output("w: T")
+    .Output("v: T")
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Attr("T: numbertype")
+    .Doc(R"doc(
+Computes the eigen decomposition of a batch of self-adjoint matrices
+(Note: Only real inputs are supported).
+
+Computes the eigenvalues and eigenvectors of the innermost N-by-N matrices in
+tensor such that tensor[...,:,:] * v[..., :,i] = e[..., i] * v[...,:,i], for
+i=0...N-1.
+
+a: the input tensor.
+
+lower: a boolean specifies whether the calculation is done with the lower
+  triangular part or the upper triangular part.
+
+max_iter: maximum number of sweep update, i.e., the whole lower triangular
+  part or upper triangular part based on parameter lower. Heuristically, it has
+  been argued that approximatly logN sweeps are needed in practice (Ref: Golub &
+  van Loan "Matrix Computation").
+
+epsilon: the tolerance ratio.
+
+w: The eigenvalues in ascending order, each repeated according to its
+  multiplicity.
+v: The column v[..., :, i] is the normalized eigenvector corresponding to the
+  eigenvalue w[..., i].
+)doc");
+
 REGISTER_OP("XlaConv")
     .Input("lhs: T")
     .Input("rhs: T")

diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py
index 345193c..de4710d 100644
--- a/tensorflow/compiler/tf2xla/python/xla.py
+++ b/tensorflow/compiler/tf2xla/python/xla.py

@@ -291,6 +291,10 @@
       name=name)
 
 
+def self_adjoint_eig(a, lower, max_iter, epsilon):
+  return gen_xla_ops.xla_self_adjoint_eig(a, lower, max_iter, epsilon)
+
+
 dynamic_slice = gen_xla_ops.xla_dynamic_slice
 dynamic_update_slice = gen_xla_ops.xla_dynamic_update_slice
 

diff --git a/tensorflow/compiler/tf2xla/tf2xla.cc b/tensorflow/compiler/tf2xla/tf2xla.cc
index cf48576..28a4566 100644
--- a/tensorflow/compiler/tf2xla/tf2xla.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla.cc

@@ -254,7 +254,8 @@
 
 // Converts the TensorFlow graph into an XLA computation, by executing the
 // graph symbolically, with each op building up the XLA HLO.
-Status ConvertGraphToXla(std::unique_ptr<Graph> graph, xla::Client* client,
+Status ConvertGraphToXla(std::unique_ptr<Graph> graph,
+                         const tf2xla::Config& config, xla::Client* client,
                          xla::XlaComputation* computation) {
   XlaOpRegistry::RegisterCompilationKernels();
   for (Node* node : graph->nodes()) {
@@ -264,6 +265,19 @@
   std::vector<XlaCompiler::Argument> xla_args;
   TF_RETURN_IF_ERROR(CreateXlaArgs(*graph, &xla_args));
 
+  // Populate arguments with resource variables from the config. The variables
+  // get turned into inputs and outputs.
+  for (const tf2xla::Variable& variable : config.variable()) {
+    XlaCompiler::Argument arg;
+    arg.type = variable.type();
+    arg.kind = XlaCompiler::Argument::kResource;
+    arg.shape = variable.shape();
+    arg.name = variable.node_name();
+    arg.resource_kind = XlaResource::kVariable;
+    arg.initialized = true;
+    xla_args.push_back(std::move(arg));
+  }
+
   // Compile the graph into an XLA computation.
   XlaCompiler::Options compiler_options;
   compiler_options.client = client;
@@ -361,7 +375,8 @@
                             xla::XlaComputation* computation) {
   std::unique_ptr<Graph> graph;
   TF_RETURN_IF_ERROR(InitGraph(graph_def, config, &graph));
-  TF_RETURN_IF_ERROR(ConvertGraphToXla(std::move(graph), client, computation));
+  TF_RETURN_IF_ERROR(
+      ConvertGraphToXla(std::move(graph), config, client, computation));
   return Status::OK();
 }
 

diff --git a/tensorflow/compiler/tf2xla/tf2xla.proto b/tensorflow/compiler/tf2xla/tf2xla.proto
index 18c9089..5627af7 100644
--- a/tensorflow/compiler/tf2xla/tf2xla.proto
+++ b/tensorflow/compiler/tf2xla/tf2xla.proto

@@ -39,6 +39,15 @@
   string name = 2;  // Optional name for generated code.
 };
 
+// Variable represents a resource variable with the given name, shape and type.
+message Variable {
+  string node_name = 1;
+  string name =
+      2;  // Optional name for generated code. If empty, node_name will be used.
+  TensorShapeProto shape = 3;
+  DataType type = 4;
+}
+
 // Config represents configuration information for tf2xla conversion.
 message Config {
   // Each feed is a positional input argument for the generated computation.
@@ -47,4 +56,6 @@
   // Each fetch is a positional output argument for the generated computation.
   // The order of each entry matches the order of each output argument.
   repeated Fetch fetch = 2;
+  // Each variable is a named input and output of the generated computation.
+  repeated Variable variable = 3;
 };

diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc
index c64f78e..88c03a6 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc

@@ -122,7 +122,12 @@
 
   for (const auto& iter : const_input_index_to_node) {
     int arg_index = iter.first;
-    Node* const_node = g->CopyNode(iter.second);
+    NodeDef const_def = iter.second->def();
+    const_def.set_name(g->NewName(const_def.name()));
+    Status s;
+    Node* const_node = g->AddNode(const_def, &s);
+    TF_RETURN_IF_ERROR(s);
+
     Node* arg_node = arg_nodes[arg_index];
 
     // Collect all usages of the _Arg node.

diff --git a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
index 9e9c3ce..28b4744 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc

@@ -363,5 +363,58 @@
   TF_EXPECT_OK(PropagateConstIntoFunctionalNodes(&graph, &fld, &fld));
 }
 
+TEST(PropagateConstIntoFunctionalNodes, CopiedConstNodeHasUniqueName) {
+  FunctionLibraryDefinition fld(OpRegistry::Global(), {});
+  {
+    // Cond graph & body graph.
+    Scope scope = Scope::NewRootScope().ExitOnError();
+    auto pred = ops::_Arg(scope.WithOpName("arg0"), DT_BOOL, 0);
+    auto input = ops::_Arg(scope.WithOpName("arg1"), DT_BOOL, 1);
+    auto duplicate_name = ops::NoOp(scope.WithOpName("duplicate_name"));
+    auto ret = ops::_Retval(scope.WithOpName("ret"), pred, 0);
+    Graph graph(OpRegistry::Global());
+    TF_ASSERT_OK(scope.ToGraph(&graph));
+    FunctionDef cond_fdef;
+    TF_ASSERT_OK(GraphToFunctionDef(graph, "cond", &cond_fdef));
+    TF_ASSERT_OK(fld.AddFunctionDef(cond_fdef));
+    FunctionDef body_fdef;
+    TF_ASSERT_OK(GraphToFunctionDef(graph, "body", &body_fdef));
+    TF_ASSERT_OK(fld.AddFunctionDef(body_fdef));
+  }
+  Scope scope = Scope::NewRootScope().ExitOnError();
+  auto pred =
+      ops::Const(scope.WithOpName("duplicate_name"), false, TensorShape({}));
+  auto input = ops::Const(scope.WithOpName("input"), false, TensorShape({}));
+  NameAttrList cond_fn, body_fn;
+  cond_fn.set_name("cond");
+  body_fn.set_name("body");
+  auto while_op =
+      ops::While(scope.WithOpName("while"),
+                 std::initializer_list<Input>{pred, input}, cond_fn, body_fn);
+  Graph graph(OpRegistry::Global());
+  TF_ASSERT_OK(scope.ToGraph(&graph));
+
+  TF_EXPECT_OK(PropagateConstIntoFunctionalNodes(&graph, &fld, &fld));
+
+  // Check that in rewritten body function, the NoOp node still has name
+  // "duplicate_name", and the copied Const node has name "duplicate_name/_0".
+  auto node_name_index = graph.BuildNodeNameIndex();
+  Node* while_node = node_name_index["while"];
+  ASSERT_NE(while_node, nullptr);
+  TF_ASSERT_OK(GetNodeAttr(while_node->def(), "body", &body_fn));
+  const FunctionDef* rewritten_body_fn = fld.Find(body_fn.name());
+  ASSERT_NE(rewritten_body_fn, nullptr);
+  std::unordered_map<string, NodeDef> nodes;
+  for (const NodeDef& node_def : rewritten_body_fn->node_def()) {
+    nodes[node_def.name()] = node_def;
+  }
+  auto noop_def = nodes.find("duplicate_name");
+  ASSERT_NE(noop_def, nodes.end());
+  EXPECT_EQ(noop_def->second.op(), "NoOp");
+  auto const_def = nodes.find("duplicate_name/_0");
+  ASSERT_NE(const_def, nodes.end());
+  EXPECT_EQ(const_def->second.op(), "Const");
+}
+
 }  // namespace
 }  // namespace tensorflow

diff --git a/tensorflow/compiler/tf2xla/xla_compilation_device.cc b/tensorflow/compiler/tf2xla/xla_compilation_device.cc
index ddb2849..5bd0277 100644
--- a/tensorflow/compiler/tf2xla/xla_compilation_device.cc
+++ b/tensorflow/compiler/tf2xla/xla_compilation_device.cc

@@ -60,8 +60,6 @@
   // buffers, so they get ids to track.
   bool ShouldAllocateEmptyTensors() override { return true; }
 
-  void GetStats(AllocatorStats* stats) override { stats->Clear(); }
-
  private:
   // Don't run any constructors or destructors for complex objects,
   // since there is no backing store for the tensor to run them

diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index 0833264..3221ec5 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc

@@ -185,9 +185,10 @@
   std::vector<xla::XlaOp> elems;
   elems.reserve(retvals.size());
 
-  // Keeps track of which retvals have layout to update. The first element is
-  // the output index, second element is the new layout.
-  std::vector<std::pair<int64, xla::Layout>> retval_to_update_layout;
+  // Keeps track of the layout of each retval. If a retval is not in this list,
+  // a descending layout is used. The first element is the output index, second
+  // element is the new layout.
+  std::vector<std::pair<int64, xla::Layout>> retval_index_and_layout;
   for (int i = 0; i < retvals.size(); ++i) {
     XlaCompiler::OutputDescription& output = (*outputs)[i];
     const XlaExpression& retval = retvals[i];
@@ -216,7 +217,7 @@
           TF_ASSIGN_OR_RETURN(xla::Shape shape, shape_representation_fn(
                                                     output.shape, output.type));
           value = xla::Reshape(value, xla::AsInt64Slice(shape.dimensions()));
-          retval_to_update_layout.emplace_back(elems.size(), shape.layout());
+          retval_index_and_layout.emplace_back(elems.size(), shape.layout());
         } else if (it != retval_cores.end()) {
           // Apply the sharding to the output, if there is a core assignment.
           value = identity_op(value);
@@ -289,6 +290,11 @@
       // Ensures the correct sharding is applied to the output.
       handle = identity_op(handle);
 
+      // Set layout of the retval to device representation layout.
+      if (resource->representation_shape().has_value()) {
+        retval_index_and_layout.emplace_back(
+            elems.size(), resource->representation_shape()->layout());
+      }
       elems.push_back(handle);
     }
   }
@@ -318,15 +324,15 @@
                       computation->GetProgramShape());
   *output_shape = program_shape.result();
   // Update the output layout to the layout of retval.
-  for (auto& update : retval_to_update_layout) {
+  for (auto& index_and_layout : retval_index_and_layout) {
     if (!always_return_tuple && elems.size() == 1) {
-      *output_shape->mutable_layout() = update.second;
+      *output_shape->mutable_layout() = index_and_layout.second;
       continue;
     }
 
-    xla::Shape* output_sub_shape =
-        xla::ShapeUtil::GetMutableSubshape(output_shape, {update.first});
-    *output_sub_shape->mutable_layout() = update.second;
+    xla::Shape* output_sub_shape = xla::ShapeUtil::GetMutableSubshape(
+        output_shape, {index_and_layout.first});
+    *output_sub_shape->mutable_layout() = index_and_layout.second;
   }
   return Status::OK();
 }

diff --git a/tensorflow/compiler/tf2xla/xla_compiler_test.cc b/tensorflow/compiler/tf2xla/xla_compiler_test.cc
index 492010f..b311378 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler_test.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler_test.cc

@@ -277,6 +277,97 @@
   EXPECT_TRUE(xla::LiteralTestUtil::Equal(param0_literal, actual_literal));
 }
 
+// Tests that the compiler can correctly propagate the layout assigned by
+// shape_representation_fn_ to return types.
+TEST_F(XlaCompilerTest, HonorShapeRepresentationFnForRetVal) {
+  Scope scope = Scope::NewRootScope().ExitOnError();
+  auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0);
+  auto var = ops::_Arg(scope.WithOpName("V"), DT_RESOURCE, 1);
+  // Adds an identity op around the resource to make sure identity ops propagate
+  // resources correctly.
+  auto identity = ops::Identity(scope.WithOpName("VIdentity"), var);
+  auto write = ops::AssignAddVariableOp(scope, identity, a);
+  auto read = ops::ReadVariableOp(
+      scope.WithControlDependencies(std::vector<Operation>{write}), var,
+      DT_INT32);
+  auto read_plus_one = ops::Add(scope, read, ops::Const<int32>(scope, 1));
+  auto d = ops::_Retval(scope.WithOpName("D"), read_plus_one, 0);
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  TF_ASSERT_OK(scope.ToGraph(graph.get()));
+
+  // Builds a description of the arguments.
+  std::vector<XlaCompiler::Argument> args(2);
+  args[0].kind = XlaCompiler::Argument::kParameter;
+  args[0].type = DT_INT32;
+  args[0].shape = TensorShape({2, 3});
+  args[1].kind = XlaCompiler::Argument::kResource;
+  args[1].resource_kind = XlaResource::kVariable;
+  args[1].initialized = true;
+  args[1].type = DT_INT32;
+  args[1].shape = TensorShape({2, 3});
+
+  auto options = DefaultOptions();
+  options.shape_representation_fn =
+      [](const TensorShape& shape, DataType dt) -> xla::StatusOr<xla::Shape> {
+    xla::Shape xla_shape;
+    TF_RETURN_IF_ERROR(TensorShapeToXLAShape(dt, shape, &xla_shape));
+    *xla_shape.mutable_layout() = xla::LayoutUtil::MakeLayout({0, 1});
+    return xla_shape;
+  };
+  // Compiles the graph.
+  XlaCompiler compiler(options);
+
+  XlaCompiler::CompilationResult result;
+  TF_ASSERT_OK(compiler.CompileGraph(XlaCompiler::CompileOptions(), "add",
+                                     std::move(graph), args, &result));
+  xla::Shape transposed =
+      xla::ShapeUtil::MakeShapeWithLayout(xla::S32, {2, 3}, {0, 1});
+  // Check that the return shapes are correctly tranposed.
+  EXPECT_EQ(result.xla_output_shape,
+            xla::ShapeUtil::MakeTupleShape({transposed, transposed}));
+}
+
+// The layout of resource variable shouldn't change after transpose
+TEST_F(XlaCompilerTest, TransposeVariables) {
+  Scope scope = Scope::NewRootScope().ExitOnError();
+  auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0);
+  auto var = ops::_Arg(scope.WithOpName("V"), DT_RESOURCE, 1);
+  // Adds an identity op around the resource to make sure identity ops propagate
+  // resources correctly.
+  auto identity = ops::Identity(scope.WithOpName("VIdentity"), var);
+  auto write = ops::AssignAddVariableOp(scope, identity, a);
+  auto read = ops::ReadVariableOp(
+      scope.WithControlDependencies(std::vector<Operation>{write}), var,
+      DT_INT32);
+  auto transposed_read = ops::Transpose(scope, read, {1, 0});
+  auto reshape = ops::Reshape(scope, transposed_read, {2, 3});
+  auto d = ops::_Retval(scope.WithOpName("D"), reshape, 0);
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  TF_ASSERT_OK(scope.ToGraph(graph.get()));
+
+  // Builds a description of the arguments.
+  std::vector<XlaCompiler::Argument> args(2);
+  args[0].kind = XlaCompiler::Argument::kParameter;
+  args[0].type = DT_INT32;
+  args[0].shape = TensorShape({2, 3});
+  args[1].kind = XlaCompiler::Argument::kResource;
+  args[1].resource_kind = XlaResource::kVariable;
+  args[1].initialized = true;
+  args[1].type = DT_INT32;
+  args[1].shape = TensorShape({2, 3});
+  // Compiles the graph.
+  XlaCompiler compiler(DefaultOptions());
+
+  XlaCompiler::CompilationResult result;
+  TF_ASSERT_OK(compiler.CompileGraph(XlaCompiler::CompileOptions(), "transpose",
+                                     std::move(graph), args, &result));
+  xla::Shape transposed =
+      xla::ShapeUtil::MakeShapeWithLayout(xla::S32, {2, 3}, {1, 0});
+  // Check that the return shapes are correctly tranposed.
+  EXPECT_EQ(result.xla_output_shape,
+            xla::ShapeUtil::MakeTupleShape({transposed, transposed}));
+}
+
 // Tests that the compiler doesn't reorder the parameters.
 TEST_F(XlaCompilerTest, MixedOrderArguments) {
   for (bool swap_order : {false, true}) {

diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc
index 6139bf3..3f787fd 100644
--- a/tensorflow/compiler/tf2xla/xla_context.cc
+++ b/tensorflow/compiler/tf2xla/xla_context.cc

@@ -76,7 +76,7 @@
 }
 
 const xla::XlaComputation* XlaContext::GetOrCreateMax(const DataType type) {
-  return LookupOrCreate(type, &max_func_, [this, type] {
+  return LookupOrCreate(type, &max_func_, [type] {
     const string type_string = DataTypeString(type);
     VLOG(1) << "Building Max() for " << type_string;
     xla::XlaBuilder b("max<" + type_string + ">");
@@ -92,7 +92,7 @@
 }
 
 const xla::XlaComputation* XlaContext::GetOrCreateMin(const DataType type) {
-  return LookupOrCreate(type, &min_func_, [this, type] {
+  return LookupOrCreate(type, &min_func_, [type] {
     const string type_string = DataTypeString(type);
     VLOG(1) << "Building Min() for " << type_string;
     xla::XlaBuilder b("min<" + type_string + ">");
@@ -108,7 +108,7 @@
 }
 
 const xla::XlaComputation* XlaContext::GetOrCreateAdd(const DataType type) {
-  return LookupOrCreate(type, &add_func_, [this, type] {
+  return LookupOrCreate(type, &add_func_, [type] {
     const string type_string = DataTypeString(type);
     VLOG(1) << "Building Add() for " << type_string;
     xla::XlaBuilder b("add<" + type_string + ">");
@@ -124,7 +124,7 @@
 }
 
 const xla::XlaComputation* XlaContext::GetOrCreateMul(const DataType type) {
-  return LookupOrCreate(type, &mul_func_, [this, type] {
+  return LookupOrCreate(type, &mul_func_, [type] {
     const string type_string = DataTypeString(type);
     VLOG(1) << "Building Mul() for " << type_string;
     xla::XlaBuilder b("mul<" + type_string + ">");

diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc
index 04a5d93..7bb1ad2 100644
--- a/tensorflow/compiler/tf2xla/xla_helpers.cc
+++ b/tensorflow/compiler/tf2xla/xla_helpers.cc

@@ -81,61 +81,27 @@
   return Status::OK();
 }
 
-template <typename T>
-static Tensor MakeLinspaceTensor(const TensorShape& shape, int64 depth) {
-  Tensor linspace(DataTypeToEnum<T>::v(), shape);
-  auto linspace_flat = linspace.flat<T>();
-  for (int64 i = 0; i < depth; ++i) {
-    linspace_flat(i) = i;
-  }
-  return linspace;
-}
-
 Status XlaHelpers::OneHot(xla::XlaBuilder* builder, int64 depth, int axis,
                           DataType index_type, const TensorShape& indices_shape,
                           const xla::XlaOp& indices, const xla::XlaOp& on_value,
                           const xla::XlaOp& off_value, xla::XlaOp* one_hot) {
-  const int indices_dims = indices_shape.dims();
-  const int output_dims = indices_dims + 1;
-
-  TensorShape output_shape = indices_shape;
-  output_shape.InsertDim(axis, depth);
-
-  // Build a Tensor populated with values 0, 1, 2, ... depth.
-  std::vector<int64> linspace_dims(output_dims, 1);
-  linspace_dims[axis] = depth;
-  TensorShape linspace_shape(linspace_dims);
-  Tensor linspace;
-  switch (index_type) {
-    case DT_UINT8:
-      linspace = MakeLinspaceTensor<uint8>(linspace_shape, depth);
-      break;
-    case DT_INT32:
-      linspace = MakeLinspaceTensor<int32>(linspace_shape, depth);
-      break;
-    case DT_INT64:
-      linspace = MakeLinspaceTensor<int64>(linspace_shape, depth);
-      break;
-    default:
-      return errors::InvalidArgument("Invalid argument type ",
-                                     DataTypeString(index_type));
-  }
-
-  xla::BorrowingLiteral linspace_literal;
-  TF_RETURN_IF_ERROR(HostTensorToBorrowingLiteral(linspace, &linspace_literal));
-
   // Broadcast the linspace constant across the indices along the new axis,
   // and test equality at each position.
   std::vector<int64> broadcast_dims(indices_shape.dims());
   std::iota(broadcast_dims.begin(), broadcast_dims.begin() + axis, 0);
   std::iota(broadcast_dims.begin() + axis, broadcast_dims.end(), axis + 1);
-  xla::XlaOp one_hot_bool = xla::Eq(
-      indices, xla::ConstantLiteral(builder, linspace_literal), broadcast_dims);
+
+  TensorShape output_shape = indices_shape;
+  output_shape.InsertDim(axis, depth);
+  xla::Shape iota_shape;
+  TF_RETURN_IF_ERROR(
+      TensorShapeToXLAShape(index_type, output_shape, &iota_shape));
 
   // Selects the user-provided off_value and on_value values.
-  *one_hot = xla::Select(one_hot_bool,
-                         xla::Broadcast(on_value, output_shape.dim_sizes()),
-                         xla::Broadcast(off_value, output_shape.dim_sizes()));
+  *one_hot = xla::Select(
+      xla::Eq(indices, xla::Iota(builder, iota_shape, axis), broadcast_dims),
+      xla::Broadcast(on_value, output_shape.dim_sizes()),
+      xla::Broadcast(off_value, output_shape.dim_sizes()));
   return Status::OK();
 }
 

diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
index 78bc2c9..ee11f3a 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc

@@ -319,6 +319,27 @@
   return Status::OK();
 }
 
+Status XlaOpKernelContext::ConstantInputAsPartialShape(
+    int index, PartialTensorShape* shape) {
+  xla::Literal literal;
+  TF_RETURN_IF_ERROR(ConstantInput(index, &literal));
+  // If `literal` is a scalar it's value must be -1.
+  if (literal.shape().rank() == 0) {
+    int64 shape_val;
+    TF_RETURN_IF_ERROR(LiteralToInt64Scalar(literal, &shape_val));
+    if (shape_val != -1) {
+      return errors::InvalidArgument(
+          "Cannot convert value to PartialTensorShape: ", shape_val);
+    }
+    *shape = PartialTensorShape();  // Shape with unknown rank.
+    return Status::OK();
+  }
+  std::vector<int64> dims;
+  TF_RETURN_IF_ERROR(LiteralToInt64Vector(literal, &dims));
+  *shape = PartialTensorShape(dims);
+  return Status::OK();
+}
+
 Status XlaOpKernelContext::InputList(absl::string_view name,
                                      std::vector<xla::XlaOp>* handles,
                                      std::vector<TensorShape>* shapes) {
@@ -447,6 +468,16 @@
   }
 }
 
+xla::PrimitiveType XlaOpKernelContext::output_xla_type(int index) {
+  xla::PrimitiveType type;
+  Status status = DataTypeToPrimitiveType(expected_output_dtype(index), &type);
+  if (!status.ok()) {
+    SetStatus(status);
+    return xla::PRIMITIVE_TYPE_INVALID;
+  }
+  return type;
+}
+
 void XlaOpKernelContext::SetOutput(int index, const xla::XlaOp& handle) {
   SetOutputExpression(
       index,
@@ -503,6 +534,7 @@
     handle = xla::Reshape(handle,
                           xla::AsInt64Slice(representation_shape.dimensions()));
   }
+  variable->SetRepresentationShape(representation_shape);
   return variable->SetValue(handle);
 }
 

diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h
index e44415f..cc2d5e8 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.h
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h

@@ -138,6 +138,10 @@
   // Converts a constant 1D int32 or int64 tensor into a TensorShape.
   Status ConstantInputAsShape(int index, TensorShape* shape);
 
+  // Converts a constant 1D int32 or int64 tensor, or a scalar with value -1
+  // into a PartialTensorShape.
+  Status ConstantInputAsPartialShape(int index, PartialTensorShape* shape);
+
   // Returns the named list-valued immutable input in "list", as
   // defined in the OpDef.  If the named output is not list-valued,
   // returns a one-element list.
@@ -155,6 +159,11 @@
     return context_->expected_output_dtype(index);
   }
 
+  // Returns the type of output `index` as an xla::PrimitiveType. If the type
+  // is not representable as an XLA type, sets an error status and returns
+  // xla::PRIMITIVE_TYPE_INVALID.
+  xla::PrimitiveType output_xla_type(int index);
+
   // Sets output `index` to the XlaOp `handle`.
   // All outputs should be set using SetOutput and SetConstantOutput, not
   // via the underlying OpKernelContext.

diff --git a/tensorflow/compiler/tf2xla/xla_resource.h b/tensorflow/compiler/tf2xla/xla_resource.h
index 736588b..ab3a5bd 100644
--- a/tensorflow/compiler/tf2xla/xla_resource.h
+++ b/tensorflow/compiler/tf2xla/xla_resource.h

@@ -86,6 +86,12 @@
   // variables have new values that need to be written back.
   const xla::XlaOp& initial_value() const { return initial_value_; }
 
+  // An xla shape that indicates how this resource variable is represented on
+  // device.
+  const absl::optional<xla::Shape>& representation_shape() const {
+    return representation_shape_;
+  }
+
   // A variable is initialized if it has a value.
   bool initialized() const { return value_.valid(); }
 
@@ -100,6 +106,11 @@
   // Sets the current value of the resource to an all-zero value.
   Status SetZeroValue(xla::XlaBuilder* builder);
 
+  // Sets the representational shape of the resource on device.
+  void SetRepresentationShape(const xla::Shape& shape) {
+    representation_shape_ = absl::make_optional(shape);
+  }
+
   // Looks up the gradient for `source`, or creates it if it does not already
   // exist. The call target must be an initialized TensorArray resource. A
   // TensorArray can have multiple named gradients; see the operator
@@ -160,6 +171,10 @@
   xla::XlaOp value_;
   xla::XlaOp initial_value_;
 
+  // An xla shape that indicates how this resource variable is represented on
+  // device.
+  absl::optional<xla::Shape> representation_shape_;
+
   int64 max_array_size_ = -1;
   bool tensor_array_multiple_writes_aggregate_ = false;
 

diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 636e5ef..ee6f7d5 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD

@@ -150,8 +150,6 @@
     visibility = ["//visibility:public"],
     deps = [
         ":status",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
         "//tensorflow/stream_executor/lib",
     ],
 )
@@ -194,7 +192,7 @@
         ":types",
         ":util",
         "//tensorflow/core:lib",
-        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
     ],
 )
 
@@ -833,7 +831,6 @@
             "//tensorflow/compiler/xla:xla_proto",
             "//tensorflow/compiler/xla/service:hlo",
             "//tensorflow/core:framework_internal",
-            "//tensorflow/core:lib",
             "@com_google_absl//absl/strings",
         ],
 )

diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD
index 6be8154..c5dea5f 100644
--- a/tensorflow/compiler/xla/client/lib/BUILD
+++ b/tensorflow/compiler/xla/client/lib/BUILD

@@ -65,7 +65,6 @@
         "//tensorflow/compiler/xla/client/lib:loops",
         "//tensorflow/compiler/xla/client/lib:matrix",
         "//tensorflow/compiler/xla/client/lib:slicing",
-        "//tensorflow/compiler/xla/client/lib:triangular_solve",
         "//tensorflow/core:lib",
     ],
 )
@@ -231,6 +230,7 @@
     deps = [
         ":arithmetic",
         ":constants",
+        ":slicing",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status",
         "//tensorflow/compiler/xla:status_macros",
@@ -376,6 +376,7 @@
     srcs = ["sorting.cc"],
     hdrs = ["sorting.h"],
     deps = [
+        ":comparators",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
@@ -451,56 +452,12 @@
 )
 
 cc_library(
-    name = "triangular_solve",
-    srcs = ["triangular_solve.cc"],
-    hdrs = ["triangular_solve.h"],
-    deps = [
-        "//tensorflow/compiler/xla:literal",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:status_macros",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla:util",
-        "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/client:xla_builder",
-        "//tensorflow/compiler/xla/client:xla_computation",
-        "//tensorflow/compiler/xla/client/lib:constants",
-        "//tensorflow/compiler/xla/client/lib:math",
-        "//tensorflow/compiler/xla/client/lib:matrix",
-        "//tensorflow/compiler/xla/client/lib:slicing",
-        "//tensorflow/core:lib",
-    ],
-)
-
-xla_test(
-    name = "triangular_solve_test",
-    srcs = ["triangular_solve_test.cc"],
-    tags = [
-        "enable_for_xla_interpreter",
-        "noasan",  # sometimes times out, http://b/78650012
-    ],
-    deps = [
-        ":math",
-        ":matrix",
-        ":triangular_solve",
-        "//tensorflow/compiler/xla:array2d",
-        "//tensorflow/compiler/xla:literal",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla:test",
-        "//tensorflow/compiler/xla:types",
-        "//tensorflow/compiler/xla/client:xla_builder",
-        "//tensorflow/compiler/xla/tests:client_library_test_base",
-        "//tensorflow/compiler/xla/tests:literal_test_util",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
-        "//tensorflow/core:test",
-    ],
-)
-
-cc_library(
-    name = "self_adjoint_eigen",
-    srcs = ["self_adjoint_eigen.cc"],
-    hdrs = ["self_adjoint_eigen.h"],
+    name = "self_adjoint_eig",
+    srcs = ["self_adjoint_eig.cc"],
+    hdrs = ["self_adjoint_eig.h"],
     deps = [
         ":arithmetic",
+        ":comparators",
         ":constants",
         ":loops",
         ":math",
@@ -517,9 +474,12 @@
 )
 
 xla_test(
-    name = "self_adjoint_eigen_test",
-    size = "medium",
-    srcs = ["self_adjoint_eigen_test.cc"],
+    name = "self_adjoint_eig_test",
+    srcs = ["self_adjoint_eig_test.cc"],
+    blacklisted_backends = [
+        "cpu",
+        "gpu",
+    ],
     real_hardware_only = True,
     shard_count = 10,
     tags = ["optonly"],
@@ -527,7 +487,7 @@
         ":arithmetic",
         ":constants",
         ":matrix",
-        ":self_adjoint_eigen",
+        ":self_adjoint_eig",
         "//tensorflow/compiler/xla:array2d",
         "//tensorflow/compiler/xla:array3d",
         "//tensorflow/compiler/xla:literal",

diff --git a/tensorflow/compiler/xla/client/lib/cholesky.cc b/tensorflow/compiler/xla/client/lib/cholesky.cc
index 4578fff..bb41f99 100644
--- a/tensorflow/compiler/xla/client/lib/cholesky.cc
+++ b/tensorflow/compiler/xla/client/lib/cholesky.cc

@@ -23,7 +23,6 @@
 #include "tensorflow/compiler/xla/client/lib/math.h"
 #include "tensorflow/compiler/xla/client/lib/matrix.h"
 #include "tensorflow/compiler/xla/client/lib/slicing.h"
-#include "tensorflow/compiler/xla/client/lib/triangular_solve.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
@@ -194,13 +193,12 @@
         // l[i+k:, i:i+k] =
         //     trsm_right_transpose(l[i:i+k, i:i+k], a[i+k:, i:i+k])
         auto panel = SliceInMinorDims(a, {i + k, i}, {n, i + k});
-        auto update = TriangularSolve(factorized, panel,
-                                      /*left_side=*/false,
-                                      /*lower=*/true,
-                                      /*transpose_a=*/true,
-                                      /*conjugate_a=*/false,
-                                      /*unit_diagonal=*/false,
-                                      /*block_size=*/block_size);
+        auto update =
+            TriangularSolve(factorized, panel,
+                            /*left_side=*/false,
+                            /*lower=*/true,
+                            /*unit_diagonal=*/false,
+                            /*transpose_a=*/TriangularSolveOptions::TRANSPOSE);
         l = UpdateSliceInMinorDims(l, update, {i + k, i});
       }
     }

diff --git a/tensorflow/compiler/xla/client/lib/constants.cc b/tensorflow/compiler/xla/client/lib/constants.cc
index d0efd79..6bd56a8 100644
--- a/tensorflow/compiler/xla/client/lib/constants.cc
+++ b/tensorflow/compiler/xla/client/lib/constants.cc

@@ -80,6 +80,24 @@
   }
 }
 
+XlaOp MinPositiveNormalValue(XlaBuilder* builder, PrimitiveType type) {
+  switch (type) {
+    case F16:
+      return ConstantR0<Eigen::half>(builder,
+                                     std::numeric_limits<Eigen::half>::min());
+    case BF16:
+      return ConstantR0<bfloat16>(builder, bfloat16::min_positive_normal());
+    case F32:
+      return ConstantR0<float>(builder, std::numeric_limits<float>::min());
+    case F64:
+      return ConstantR0<double>(builder, std::numeric_limits<double>::min());
+    default:
+      return builder->ReportError(
+          InvalidArgument("Invalid type for MinPositiveNormalValue (%s).",
+                          PrimitiveType_Name(type)));
+  }
+}
+
 XlaOp MaxValue(XlaBuilder* builder, PrimitiveType type) {
   return ConstantLiteral(builder, LiteralUtil::MaxValue(type));
 }

diff --git a/tensorflow/compiler/xla/client/lib/constants.h b/tensorflow/compiler/xla/client/lib/constants.h
index 77e7ca6..47b8f1b 100644
--- a/tensorflow/compiler/xla/client/lib/constants.h
+++ b/tensorflow/compiler/xla/client/lib/constants.h

@@ -135,6 +135,9 @@
 // point type, this is equal to -MaxFiniteValue().
 XlaOp MinFiniteValue(XlaBuilder* builder, PrimitiveType type);
 
+// Returns the minimum positive normal value for floating-point type `type`.
+XlaOp MinPositiveNormalValue(XlaBuilder* builder, PrimitiveType type);
+
 // Returns the maximum representable finite or infinite value for 'type'.
 // Returns 'inf' for floating-point types.
 XlaOp MaxValue(XlaBuilder* builder, PrimitiveType type);

diff --git a/tensorflow/compiler/xla/client/lib/matrix.cc b/tensorflow/compiler/xla/client/lib/matrix.cc
index a5aea96..a055a8e 100644
--- a/tensorflow/compiler/xla/client/lib/matrix.cc
+++ b/tensorflow/compiler/xla/client/lib/matrix.cc

@@ -27,6 +27,7 @@
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
 #include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/compiler/xla/client/lib/slicing.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status.h"
@@ -45,7 +46,7 @@
   return ConvertElementType(indicator, type);
 }
 
-XlaOp GetMatrixDiagonal(XlaOp x) {
+XlaOp GetMatrixDiagonal(XlaOp x, int k) {
   XlaBuilder* builder = x.builder();
   return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(x));
@@ -53,10 +54,13 @@
     TF_RET_CHECK(n_dims >= 2);
     const int64 m = shape.dimensions(n_dims - 2);
     const int64 n = shape.dimensions(n_dims - 1);
+
+    auto offset = ConstantR0WithType(builder, S32, k);
+
     absl::Span<const int64> major_dims =
         AsInt64Slice(shape.dimensions()).subspan(/*pos=*/0, /*len=*/n_dims - 2);
-    auto a = Iota(builder, U32, n);
-    auto b = Iota(builder, U32, m);
+    auto a = Iota(builder, S32, n);
+    auto b = Iota(builder, S32, m) + offset;
     auto indicator = Eq(b, Broadcast(a, {m}), /*broadcast_dimensions=*/{0});
     auto mask = Broadcast(indicator, major_dims);
 
@@ -66,9 +70,21 @@
         primitive_util::IsIntegralType(shape.element_type())
             ? CreateScalarOrComputation(shape.element_type(), builder)
             : CreateScalarAddComputation(shape.element_type(), builder);
-
-    return Reduce(Select(mask, x, Zeros(builder, shape)), ScalarLike(x, 0),
-                  reducer, {m >= n ? n_dims - 2 : n_dims - 1});
+    // k == 0, we can save one slice op.
+    if (k == 0) {
+      return Reduce(Select(mask, x, Zeros(builder, shape)), ScalarLike(x, 0),
+                    reducer, {m >= n ? n_dims - 2 : n_dims - 1});
+    } else if (k > 0) {
+      auto result = Reduce(Select(mask, x, Zeros(builder, shape)),
+                           ScalarLike(x, 0), reducer, {n_dims - 2});
+      return SliceInMinorDims(result, {std::min<int64>(k, n)},
+                              {std::min(m + k, n)});
+    } else {
+      auto result = Reduce(Select(mask, x, Zeros(builder, shape)),
+                           ScalarLike(x, 0), reducer, {n_dims - 1});
+      return SliceInMinorDims(result, {std::min<int64>(-k, m)},
+                              {std::min(m, n - k)});
+    }
   });
 }
 
@@ -336,4 +352,5 @@
 XlaOp MaybeTransposeInMinorDims(XlaOp x, bool transpose) {
   return transpose ? TransposeInMinorDims(x) : x;
 }
+
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/client/lib/matrix.h b/tensorflow/compiler/xla/client/lib/matrix.h
index 491f1ea..60c41ec 100644
--- a/tensorflow/compiler/xla/client/lib/matrix.h
+++ b/tensorflow/compiler/xla/client/lib/matrix.h

@@ -30,10 +30,15 @@
 // else.
 XlaOp IdentityMatrix(XlaBuilder* builder, PrimitiveType type, int64 m, int64 n);
 
-// Get the diagonals of the last two dimensions. If 'x' has shape
-// [..., M, N], then the output has shape [..., min(M, N)], containing the
-// diagonal elements (i.e., with indices [..., i, i]).
-XlaOp GetMatrixDiagonal(XlaOp x);
+// Get the diagonals of the last two dimensions. Use k>0 for diagonals above the
+// main diagonal, and k<0 for diagonals below the main diagonal.
+//
+// If 'x' has shape [..., M, N]
+//  If k >= 0: then the output has shape [..., min(M, N - k)], containing the
+//            diagonal elements (i.e., with indices [..., i, i + k]).
+//  If k < 0: then the output has shape [..., min(M + k, N)], containing the
+//            diagonal elements (i.e., with indices [..., i - k, i]).
+XlaOp GetMatrixDiagonal(XlaOp x, int k = 0);
 
 // Returns a lower-triangular mask, i.e., true below the `diagonal`-th diagonal
 // and false above that diagonal.

diff --git a/tensorflow/compiler/xla/client/lib/matrix_test.cc b/tensorflow/compiler/xla/client/lib/matrix_test.cc
index 61f91a5..a93fc2c 100644
--- a/tensorflow/compiler/xla/client/lib/matrix_test.cc
+++ b/tensorflow/compiler/xla/client/lib/matrix_test.cc

@@ -53,13 +53,24 @@
   XlaBuilder builder("GetMatrixDiagonal");
   Array3D<T> input(2, 3, 4);
   input.FillIota(0);
+  std::map<int, Array2D<T>> k_and_expected = {
+      {0, {{0, 5, 10}, {12, 17, 22}}},
+      {1, {{1, 6, 11}, {13, 18, 23}}},
+      {2, {{2, 7}, {14, 19}}},
+      {3, {{3}, {15}}},
+      {4, {{}, {}}},
+      {-1, {{4, 9}, {16, 21}}},
+      {-2, {{8}, {20}}},
+      {-3, {{}, {}}},
+      {-4, {{}, {}}},
+  };
+  for (const auto& kv : k_and_expected) {
+    XlaOp a;
+    auto a_data = CreateR3Parameter<T>(input, 0, "a", &builder, &a);
+    GetMatrixDiagonal(a, kv.first);
 
-  XlaOp a;
-  auto a_data = CreateR3Parameter<T>(input, 0, "a", &builder, &a);
-  GetMatrixDiagonal(a);
-  Array2D<T> expected({{0, 5, 10}, {12, 17, 22}});
-
-  ComputeAndCompareR2<T>(&builder, expected, {a_data.get()});
+    ComputeAndCompareR2<T>(&builder, kv.second, {a_data.get()});
+  }
 }
 
 XLA_TEST_F(MatrixTest, GetMatrixDiagonal_S32) { TestMatrixDiagonal<int32>(); }

diff --git a/tensorflow/compiler/xla/client/lib/self_adjoint_eig.cc b/tensorflow/compiler/xla/client/lib/self_adjoint_eig.cc
new file mode 100644
index 0000000..546127e
--- /dev/null
+++ b/tensorflow/compiler/xla/client/lib/self_adjoint_eig.cc

@@ -0,0 +1,471 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/client/lib/self_adjoint_eig.h"
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+#include "tensorflow/compiler/xla/client/lib/comparators.h"
+#include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/compiler/xla/client/lib/loops.h"
+#include "tensorflow/compiler/xla/client/lib/math.h"
+#include "tensorflow/compiler/xla/client/lib/matrix.h"
+#include "tensorflow/compiler/xla/client/lib/slicing.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace xla {
+
+namespace {
+
+// Jacobi rotation (also known as Givens rotation):
+// G = [[ c, s],
+//      [-s, c]]
+// matmul(G_T, G) = I
+struct SymmetricSchurDecomposition {
+  XlaOp c;          // cosine.
+  XlaOp s;          // sine.
+};
+
+// JacobiUpdate holds the intermediate orthogonal matrix, Jacobi-rotated matrix
+// and the off-diagonal norm of the rotated matrix. After each Jacobi iteration,
+// off-diagonal norm is reduced.
+struct JacobiUpdate {
+  XlaOp v;
+  XlaOp w;
+};
+
+struct FrobeniusNorms {
+  XlaOp off_diagonal_norm;
+  XlaOp total_norm;
+};
+
+// Given an n-by-n symmetric A and integers p and q that satisfy 0 <= p < q < n,
+// it computes a rotation matrix G = [[c, s], [-s, c]], such that
+//                        G_T * A[[p, q], [p, q]] * G
+// is diagonalized.
+//
+//  def sym_schur2x2(A, p, q):
+//      if np.abs(A[p, q]) > 1e-6:
+//          tau = (A[q, q] - A[p, p]) / (2 * A[p, q])
+//          if tau >= 0:
+//              t = 1.0 / (tau + np.sqrt(1 + tau ** 2))
+//          else:
+//              t = -1.0 / (-tau + np.sqrt(1 + tau ** 2))
+//          c = 1.0 / np.sqrt(1.0 + t ** 2)
+//          s = t * c
+//      else:
+//          c = 1.0
+//          s = 0.0
+//      return c, s
+StatusOr<SymmetricSchurDecomposition> SymmetricShurDecomposition2x2(XlaOp a,
+                                                                    XlaOp p,
+                                                                    XlaOp q,
+                                                                    XlaOp tol) {
+  XlaBuilder* builder = a.builder();
+  TF_ASSIGN_OR_RETURN(Shape a_shape, builder->GetShape(a));
+
+  auto zero = ScalarLike(a, 0.0);
+  auto one = ScalarLike(a, 1.0);
+  auto two = ScalarLike(a, 2.0);
+
+  auto pqs = DynamicSliceInMinorDims(a, {p, q}, {1, 1});
+
+  auto ps = DynamicSliceInMinorDims(a, {p, p}, {1, 1});
+  auto qs = DynamicSliceInMinorDims(a, {q, q}, {1, 1});
+
+  auto tau = (qs - ps) / (pqs * two);
+  auto t_pos = one / (tau + Sqrt(one + Square(tau)));
+  auto t_neg = -one / (-tau + Sqrt(one + Square(tau)));
+  auto t = Select(Ge(tau, zero), t_pos, t_neg);
+
+  auto c_temp = Rsqrt(one + Square(t));
+  auto s_temp = t * c_temp;
+
+  auto c = Select(Ge(Abs(pqs), tol), c_temp, ZerosLike(c_temp) + one);
+  auto s = Select(Ge(Abs(pqs), tol), s_temp, ZerosLike(s_temp));
+  // Renormalize c and s to compensate for low precision arithmetic, this step
+  // is redundant if high precision float is used, like float64.
+  auto rnorm = Rsqrt(Square(c) + Square(s));
+
+  SymmetricSchurDecomposition schur;
+
+  schur.c = c * rnorm;
+  schur.s = s * rnorm;
+
+  return schur;
+}
+
+StatusOr<JacobiUpdate> Update(JacobiUpdate jacobi_update, XlaOp p, XlaOp q,
+                              XlaOp tol, int64 n) {
+  XlaBuilder* builder = jacobi_update.w.builder();
+  TF_ASSIGN_OR_RETURN(
+      SymmetricSchurDecomposition schur,
+      SymmetricShurDecomposition2x2(jacobi_update.w, p, q, tol));
+
+  TF_ASSIGN_OR_RETURN(Shape w_shape, builder->GetShape(jacobi_update.w));
+  const std::vector<int64> batch_dims(w_shape.dimensions().begin(),
+                                      w_shape.dimensions().end() - 2);
+  const int64 num_dims = w_shape.rank();
+
+  auto zero = ScalarLike(p, 0);
+
+  XlaOp c = schur.c;
+  XlaOp s = schur.s;
+
+  auto slice_p = DynamicSliceInMinorDims(jacobi_update.w, {p, zero}, {1, n});
+  auto slice_q = DynamicSliceInMinorDims(jacobi_update.w, {q, zero}, {1, n});
+
+  auto slice_p_new = c * slice_p - s * slice_q;
+  auto slice_q_new = s * slice_p + c * slice_q;
+
+  jacobi_update.w =
+      DynamicUpdateSliceInMinorDims(jacobi_update.w, slice_p_new, {p, zero});
+  jacobi_update.w =
+      DynamicUpdateSliceInMinorDims(jacobi_update.w, slice_q_new, {q, zero});
+
+  slice_p = DynamicSliceInMinorDims(jacobi_update.w, {zero, p}, {n, 1});
+  slice_q = DynamicSliceInMinorDims(jacobi_update.w, {zero, q}, {n, 1});
+
+  slice_p_new = c * slice_p - s * slice_q;
+  slice_q_new = s * slice_p + c * slice_q;
+
+  jacobi_update.w =
+      DynamicUpdateSliceInMinorDims(jacobi_update.w, slice_p_new, {zero, p});
+  jacobi_update.w =
+      DynamicUpdateSliceInMinorDims(jacobi_update.w, slice_q_new, {zero, q});
+
+  // Zero out a_{pq} explicitly.
+  std::vector<int64> pq_dims(batch_dims.begin(), batch_dims.end());
+  pq_dims.push_back(1);
+  pq_dims.push_back(1);
+  auto pq_zero = ScalarLike(jacobi_update.w, 0.0);
+  auto pq_zeros = Broadcast(pq_zero, pq_dims);
+  jacobi_update.w =
+      DynamicUpdateSliceInMinorDims(jacobi_update.w, pq_zeros, {p, q});
+  jacobi_update.w =
+      DynamicUpdateSliceInMinorDims(jacobi_update.w, pq_zeros, {q, p});
+
+  slice_p = DynamicSliceInMinorDims(jacobi_update.v, {zero, p}, {n, 1});
+  slice_q = DynamicSliceInMinorDims(jacobi_update.v, {zero, q}, {n, 1});
+
+  std::vector<int64> broadcast_dims(batch_dims.size());
+  std::iota(broadcast_dims.begin(), broadcast_dims.end(), 0);
+  broadcast_dims.push_back(num_dims - 1);
+
+  // Renormalize the p-th and q-th columns. This step is redundant if high
+  // precision floats are used, like 64-bit float. But for 32-bit float, it
+  // becomes necessary. This step will not increase the overall complexity.
+  slice_p_new = c * slice_p - s * slice_q;
+  slice_p_new = Mul(
+      slice_p_new,
+      Rsqrt(Reduce(Square(slice_p_new), pq_zero,
+                   CreateScalarAddComputation(w_shape.element_type(), builder),
+                   {num_dims - 2})),
+      broadcast_dims);
+  slice_q_new = s * slice_p + c * slice_q;
+  slice_q_new = Mul(
+      slice_q_new,
+      Rsqrt(Reduce(Square(slice_q_new), pq_zero,
+                   CreateScalarAddComputation(w_shape.element_type(), builder),
+                   {num_dims - 2})),
+      broadcast_dims);
+
+  jacobi_update.v =
+      DynamicUpdateSliceInMinorDims(jacobi_update.v, slice_p_new, {zero, p});
+  jacobi_update.v =
+      DynamicUpdateSliceInMinorDims(jacobi_update.v, slice_q_new, {zero, q});
+
+  return jacobi_update;
+}
+
+StatusOr<FrobeniusNorms> ComputeFrobeniusNorms(XlaOp w) {
+  XlaBuilder* builder = w.builder();
+  TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(w));
+  const int64 num_dims = shape.rank();
+  auto frobenius_norm =
+      Sqrt(Reduce(Square(w), ScalarLike(w, 0.0),
+                  CreateScalarAddComputation(shape.element_type(), builder),
+                  {num_dims - 2, num_dims - 1}));
+  auto diag = GetMatrixDiagonal(w);
+  auto diag_square =
+      Reduce(Square(diag), ScalarLike(w, 0.0),
+             CreateScalarAddComputation(shape.element_type(), builder),
+             {num_dims - 2});
+
+  FrobeniusNorms frobenius_norms;
+
+  frobenius_norms.off_diagonal_norm =
+      Sqrt(Max(Square(frobenius_norm) - diag_square, ScalarLike(w, 0.0)));
+  frobenius_norms.total_norm = frobenius_norm;
+
+  return frobenius_norms;
+}
+
+StatusOr<std::vector<XlaOp>> WhileLoopFn(
+    absl::Span<const XlaOp> initial_values,  //
+    int matrix_dimension,                    //
+    int max_sweep_updates,                   //
+    PrimitiveType index_type,                //
+    absl::string_view name,                  //
+    XlaBuilder* builder) {
+  auto while_cond_fn = [&](absl::Span<const XlaOp> values,
+                           XlaBuilder* cond_builder) -> StatusOr<XlaOp> {
+    auto k = values[0];
+    auto max_sweeps = ScalarLike(k, max_sweep_updates);
+    auto sweep_update_cond = Gt(max_sweeps, k);
+
+    auto norms = ComputeFrobeniusNorms(values[2]).ValueOrDie();
+    auto tol = norms.total_norm * values[3];
+    auto tol_cond = ReduceAll(Lt(tol, norms.off_diagonal_norm),
+                              xla::ConstantR0<bool>(cond_builder, false),
+                              CreateScalarOrComputation(PRED, cond_builder));
+
+    return And(sweep_update_cond, tol_cond);
+  };
+
+  auto while_body_fn =
+      [&](absl::Span<const XlaOp> values,
+          XlaBuilder* body_builder) -> StatusOr<std::vector<XlaOp>> {
+    auto while_cond_fn_inner =
+        [&](absl::Span<const XlaOp> values_inner,
+            XlaBuilder* inner_cond_builder) -> StatusOr<XlaOp> {
+      auto p = values_inner[0];
+      return Lt(p, ScalarLike(p, matrix_dimension - 1));
+    };
+
+    auto while_body_fn_inner =
+        [&](absl::Span<const XlaOp> values_inner,
+            XlaBuilder* inner_body_builder) -> StatusOr<std::vector<XlaOp>> {
+      auto while_cond_fn_innermost =
+          [&](absl::Span<const XlaOp> values_innermost,
+              XlaBuilder* innermost_cond_builder) -> StatusOr<XlaOp> {
+        auto q = values_innermost[1];
+        return Lt(q, ScalarLike(q, matrix_dimension));
+      };
+      auto while_body_fn_innermost =
+          [&](absl::Span<const XlaOp> values_innermost,
+              XlaBuilder* innermost_body_builder)
+          -> StatusOr<std::vector<XlaOp>> {
+        auto p = values_innermost[0];
+        auto q = values_innermost[1];
+
+        JacobiUpdate jacobi_update;
+        jacobi_update.v = values_innermost[2];
+        jacobi_update.w = values_innermost[3];
+
+        auto tol = values_innermost[4];
+
+        TF_ASSIGN_OR_RETURN(jacobi_update,
+                            Update(jacobi_update, p, q, tol, matrix_dimension));
+
+        std::vector<XlaOp> updated_values_innermost;
+        updated_values_innermost.reserve(values_innermost.size());
+
+        updated_values_innermost.push_back(p);
+        updated_values_innermost.push_back(q + ScalarLike(q, 1));
+        updated_values_innermost.push_back(jacobi_update.v);
+        updated_values_innermost.push_back(jacobi_update.w);
+        updated_values_innermost.push_back(tol);
+
+        return updated_values_innermost;
+      };
+
+      std::vector<XlaOp> values_innermost(5);
+      auto p = values_inner[0];
+      auto q = p + ScalarLike(p, 1);
+      values_innermost[0] = p;                // index p.
+      values_innermost[1] = q;                // index q.
+      values_innermost[2] = values_inner[1];  // v.
+      values_innermost[3] = values_inner[2];  // w.
+      values_innermost[4] = values_inner[3];  // tol.
+      TF_ASSIGN_OR_RETURN(
+          values_innermost,
+          WhileLoopHelper(while_cond_fn_innermost, while_body_fn_innermost,
+                          values_innermost, absl::StrCat(name, "-Innermost"),
+                          inner_body_builder));
+
+      std::vector<XlaOp> updated_values_inner;
+      updated_values_inner.reserve(values_inner.size());
+
+      updated_values_inner.push_back(p + ScalarLike(p, 1));
+      updated_values_inner.push_back(values_innermost[2]);
+      updated_values_inner.push_back(values_innermost[3]);
+      updated_values_inner.push_back(values_innermost[4]);
+      return updated_values_inner;
+    };
+    // Indexes.
+    XlaOp k = values[0];
+
+    std::vector<XlaOp> values_inner(4);
+    values_inner[0] = ScalarLike(k, 0);  // index p.
+    values_inner[1] = values[1];         // v.
+    values_inner[2] = values[2];         // w.
+    values_inner[3] = values[3];         // tol.
+    TF_ASSIGN_OR_RETURN(
+        values_inner,
+        WhileLoopHelper(while_cond_fn_inner, while_body_fn_inner, values_inner,
+                        absl::StrCat(name, "-Inner"), body_builder));
+
+    std::vector<XlaOp> updated_values;
+    updated_values.reserve(values_inner.size());
+
+    updated_values.push_back(k + ScalarLike(k, 1));
+    updated_values.push_back(values_inner[1]);
+    updated_values.push_back(values_inner[2]);
+    updated_values.push_back(values_inner[3]);
+
+    return updated_values;
+  };
+  std::vector<XlaOp> values;
+  TF_ASSIGN_OR_RETURN(values, WhileLoopHelper(while_cond_fn, while_body_fn,
+                                              initial_values, name, builder));
+
+  return values;
+}
+
+StatusOr<SelfAdjointEigResult> SortByEigenvalues(SelfAdjointEigResult result) {
+  XlaBuilder* builder = result.v.builder();
+  TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(result.v));
+  const int64 num_dims = shape.rank();
+  auto dimensions = shape.dimensions();
+
+  std::vector<int64> broadcast_dims(num_dims - 1);
+  std::iota(broadcast_dims.begin(), broadcast_dims.end(), 0);
+  broadcast_dims[num_dims - 2] = num_dims - 1;
+  result.w = BroadcastInDim(result.w, dimensions, broadcast_dims);
+
+  XlaOp sort_result =
+      Sort({result.w, result.v},
+           CreateScalarLtComputation(
+               {shape.element_type(), shape.element_type()}, builder),
+           num_dims - 1);
+  result.w = GetMatrixDiagonal(GetTupleElement(sort_result, 0));
+  result.v = GetTupleElement(sort_result, 1);
+  return result;
+}
+
+}  // namespace
+
+// This is the cyclic Jacobi iteration. Please note that the eigenvalues are
+// possibly not ordered.
+//
+//  def jacobi(A):
+//      n, _ = A.shape
+//      V = np.eye(n)
+//      frobenius_norm = np.linalg.norm(A)
+//      diag_norm = np.linalg.norm(np.diag(A))
+//      off_diag_norm = np.sqrt(
+//          frobenius_norm - diag_norm) * np.sqrt(frobenius_norm + diag_norm)
+//      while off_diag_norm > 1e-6 * frobenius_norm:
+//          for p in range(n - 1):
+//              for q in range(p + 1, n):
+//                  c, s = sym_schur2x2(A, p, q)
+//                  A[[p, q], :] = np.matmul(np.array([[c, -s], [s, c]]),
+//                                           A[[p, q], :])
+//                  A[:, [p, q]] = np.matmul(A[:, [p, q]],
+//                                           np.array([[c, s], [-s, c]]))
+//                  V[:, [p, q]] = np.matmul(V[:, [p, q]],
+//                                               np.array([[c, s], [-s, c]]))
+//          frobenius_norm_sq = np.linalg.norm(A)
+//          diag_square_sum = np.linalg.norm(np.diag(A))
+//          off_diag_norm = np.sqrt(
+//              frobenius_norm - diag_norm) * np.sqrt(
+//                  frobenius_norm + diag_norm)
+//
+//      return A, V
+//
+// TODO(kuny): Implement parallel order Jacobi.
+//
+SelfAdjointEigResult SelfAdjointEig(XlaOp a, bool lower, int64 max_iter,
+                                    float epsilon) {
+  XlaBuilder* builder = a.builder();
+  auto return_error = [&](const Status& status) {
+    SelfAdjointEigResult result;
+    result.v = builder->ReportError(status);
+    result.w = builder->ReportError(status);
+    return result;
+  };
+  auto shape_with_status = builder->GetShape(a);
+  if (!shape_with_status.status().ok()) {
+    return return_error(shape_with_status.status());
+  }
+  Shape a_shape = shape_with_status.ValueOrDie();
+  const int64 num_dims = a_shape.rank();
+  if (num_dims < 2) {
+    return return_error(InvalidArgument(
+        "Arguments to Eigen decomposition must have rank >= 2: got shape %s.",
+        a_shape.ToString()));
+  }
+  PrimitiveType type = a_shape.element_type();
+  if (!primitive_util::IsFloatingPointType(type)) {
+    return return_error(InvalidArgument(
+        "Type of the input matrix must be float: got %s.", a_shape.ToString()));
+  }
+
+  const int64 m = ShapeUtil::GetDimension(a_shape, -2);
+  const int64 n = ShapeUtil::GetDimension(a_shape, -1);
+
+  if (m != n) {
+    return return_error(InvalidArgument(
+        "Arguments to Eigen decomposition must be square matrices: got shape "
+        "(%d, %d).",
+        m, n));
+  }
+
+  const int64 num_batch_dims = num_dims - 2;
+  std::vector<int64> batch_dims(num_batch_dims);
+  for (int i = 0; i < num_batch_dims; ++i) {
+    batch_dims[i] = ShapeUtil::GetDimension(a_shape, i);
+  }
+
+  auto tol = ScalarLike(a, epsilon);
+
+  auto v_init = Broadcast(IdentityMatrix(builder, type, m, m), batch_dims);
+  auto w_init = Triangle(a, lower);
+  w_init = w_init + TransposeInMinorDims(w_init) - w_init * v_init;
+
+  auto output_with_status = WhileLoopFn(
+      {
+          Zero(builder, S32),  // k
+          v_init,              // v
+          w_init,              // w
+          tol,                 //
+      },                       //
+      n,                       //
+      max_iter,                //
+      S32,                     //
+      "CyclicJacobi",          //
+      builder);
+  if (!output_with_status.status().ok()) {
+    return return_error(output_with_status.status());
+  }
+
+  auto output = output_with_status.ValueOrDie();
+
+  SelfAdjointEigResult result;
+  result.v = output[1];
+  result.w = GetMatrixDiagonal(output[2]);
+
+  return SortByEigenvalues(result).ValueOrDie();
+}
+
+}  // namespace xla

diff --git a/tensorflow/compiler/xla/client/lib/self_adjoint_eig.h b/tensorflow/compiler/xla/client/lib/self_adjoint_eig.h
new file mode 100644
index 0000000..2a08989
--- /dev/null
+++ b/tensorflow/compiler/xla/client/lib/self_adjoint_eig.h

@@ -0,0 +1,40 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIG_H_
+#define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIG_H_
+
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+
+namespace xla {
+
+// The eigenvalue decomposition of a symmetric matrix, the original matrix is
+// recovered by v * w * v_t.
+struct SelfAdjointEigResult {
+  // The i-th column is the normalized eigenvector corresponding to the
+  // eigenvalue w[i]. Will return a matrix object if a is a matrix object.
+  XlaOp v;
+  // The eigenvalues in ascending order, each repeated according to its
+  // multiplicity.
+  XlaOp w;
+};
+
+SelfAdjointEigResult SelfAdjointEig(XlaOp a, bool lower = true,
+                                    int64 max_iter = 100, float epsilon = 1e-6);
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIG_H_

diff --git a/tensorflow/compiler/xla/client/lib/self_adjoint_eig_test.cc b/tensorflow/compiler/xla/client/lib/self_adjoint_eig_test.cc
new file mode 100644
index 0000000..c8875df
--- /dev/null
+++ b/tensorflow/compiler/xla/client/lib/self_adjoint_eig_test.cc

@@ -0,0 +1,313 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/client/lib/self_adjoint_eig.h"
+
+#include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/array3d.h"
+#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+#include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/compiler/xla/client/lib/matrix.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+
+class SelfAdjointEigTest : public ClientLibraryTestBase {
+ protected:
+  void SetUp() override {
+    ClientLibraryTestBase::SetUp();
+    batch_3d_4x4_ = Array3D<float>{
+        {
+            {4, 6, 8, 10},
+            {6, 45, 54, 63},
+            {8, 54, 146, 166},
+            {10, 63, 166, 310},
+        },
+        {
+            {16, 24, 8, 12},
+            {24, 61, 82, 48},
+            {8, 82, 100, 6},
+            {12, 48, 6, 62},
+        },
+    };
+    matrix2d_8x8_ = Array2D<float>{
+        {14., 123., 49., 112., 115., 173., 182., 125.},
+        {123., 14., 60., 118., 150., 130., 91., 72.},
+        {49., 60., 138., 111., 106., 101., 115., 142.},
+        {112., 118., 111., 142., 91., 130., 25., 61.},
+        {115., 150., 106., 91., 116., 121., 128., 85.},
+        {173., 130., 101., 130., 121., 70., 151., 132.},
+        {182., 91., 115., 25., 128., 151., 66., 92.},
+        {125., 72., 142., 61., 85., 132., 92., 156.},
+    };
+    low_rank_4x4_ = Array2D<float>{
+        // x = [[1, 2, 3, 4], [1, -1, 1, -1]]
+        // matmul(x.T, x)
+        {2, 1, 4, 3},
+        {1, 5, 5, 9},
+        {4, 5, 10, 11},
+        {3, 9, 11, 17},
+    };
+  }
+  void TearDown() override { ClientLibraryTestBase::TearDown(); }
+
+  Array3D<float> GetUnitMatrix3D(const Array3D<float>& matrix) {
+    Array3D<float> result(matrix.n1(), matrix.n2(), matrix.n3(), 0.0);
+    for (int i = 0; i < matrix.n1(); ++i) {
+      for (int j = 0; j < matrix.n2(); ++j) {
+        result({i, j, j}) = 1.0;
+      }
+    }
+    return result;
+  }
+
+  Array3D<float> ExtractTriangularMatrix(const Array3D<float>& matrix,
+                                         bool lower) {
+    Array3D<float> result(matrix);
+    for (int i = 0; i < result.n1(); ++i) {
+      for (int j = 0; j < result.n2(); ++j) {
+        if (lower) {
+          for (int k = j + 1; k < result.n3(); ++k) {
+            result({i, j, k}) = 0.0;
+          }
+        } else {
+          for (int k = 0; k < j; ++k) {
+            result({i, j, k}) = 0.0;
+          }
+        }
+      }
+    }
+    return result;
+  }
+
+  XlaOp ComputeMatmulVWVt(SelfAdjointEigResult result, XlaBuilder* builder) {
+    Shape shape = builder->GetShape(result.v).ValueOrDie();
+    std::vector<int64> out_dims = shape.dimensions();
+    std::vector<int64> broadcast_dims(shape.rank() - 1);
+    std::iota(broadcast_dims.begin(), broadcast_dims.end(), 0);
+
+    broadcast_dims[shape.rank() - 2] = shape.rank() - 1;
+    auto vw = Mul(result.v, BroadcastInDim(result.w, out_dims, broadcast_dims));
+    return BatchDot(vw, TransposeInMinorDims(result.v),
+                    PrecisionConfig::HIGHEST);
+  }
+
+  XlaOp GetAverageAbsoluteError(XlaOp m1, XlaOp m2, XlaBuilder* builder) {
+    Shape shape = builder->GetShape(m1).ValueOrDie();
+    int64 size = 1;
+    for (auto d : shape.dimensions()) {
+      size *= d;
+    }
+    return ReduceAll(Abs(m1 - m2), ConstantR0WithType(builder, F32, 0),
+                     CreateScalarAddComputation(F32, builder)) /
+           ConstantR0WithType(builder, F32, size);
+  }
+
+  Array2D<float> GenerateRandomSymmetricMatrix(int size) {
+    Array2D<float> result{size, size, 0.0};
+    result.FillRandom(10 /* stddev */, 2 /* mean */);
+    for (int i = 0; i < size; ++i) {
+      for (int j = 0; j < i; ++j) {
+        result({j, i}) = result({i, j});
+      }
+    }
+    return result;
+  }
+
+  Array3D<float> batch_3d_4x4_;
+  Array2D<float> matrix2d_8x8_;
+  Array2D<float> low_rank_4x4_;
+  Array2D<int> wrong_type_4x4_;
+};
+
+XLA_TEST_F(SelfAdjointEigTest, Test_VWVt_EQ_A_2x4x4) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a;
+  auto a_data = CreateR3Parameter<float>(batch_3d_4x4_, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  ComputeMatmulVWVt(result, &builder);
+
+  ComputeAndCompareR3<float>(&builder, batch_3d_4x4_, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Test_VWVt_EQ_A_Lower_2x4x4) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a;
+  auto a_data = CreateR3Parameter<float>(
+      ExtractTriangularMatrix(batch_3d_4x4_, true), 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  ComputeMatmulVWVt(result, &builder);
+
+  ComputeAndCompareR3<float>(&builder, batch_3d_4x4_, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Test_VWVt_EQ_A_Upper_2x4x4) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a;
+  auto a_data = CreateR3Parameter<float>(
+      ExtractTriangularMatrix(batch_3d_4x4_, false), 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a, false);
+  ComputeMatmulVWVt(result, &builder);
+
+  ComputeAndCompareR3<float>(&builder, batch_3d_4x4_, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Test_Orthogonality_2x4x4) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a;
+  auto a_data = CreateR3Parameter<float>(batch_3d_4x4_, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  BatchDot(result.v, TransposeInMinorDims(result.v), PrecisionConfig::HIGHEST);
+
+  ComputeAndCompareR3<float>(&builder, GetUnitMatrix3D(batch_3d_4x4_),
+                             {a_data.get()}, ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Test_VtWV_EQ_A_Rank_Deficient_4x4) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a;
+  auto a_data = CreateR2Parameter<float>(low_rank_4x4_, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  ComputeMatmulVWVt(result, &builder);
+
+  ComputeAndCompareR2<float>(&builder, low_rank_4x4_, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Test_Eigen_8x8) {
+  XlaBuilder builder(TestName());
+
+  // This is computed by numpy.linalg.eigh with float32.
+  std::vector<float> expected{-182.69205, -116.86245, -105.74489, -9.545369,
+                              37.81711,   104.732285, 120.29153,  868.00385};
+
+  XlaOp a;
+  auto a_data = CreateR2Parameter<float>(matrix2d_8x8_, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  Add(result.w, ZerosLike(result.w));
+
+  ComputeAndCompareR1<float>(&builder, expected, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Test_Orthogonality_8x8) {
+  XlaBuilder builder(TestName());
+
+  float expected_vals = 1e-3;
+
+  XlaOp a;
+  auto a_data = CreateR2Parameter<float>(matrix2d_8x8_, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  // np.sum(norm(eye(n) - matmul(conj(T(v)), v)) / n**2
+  GetAverageAbsoluteError(IdentityMatrix(&builder, F32, 8, 8),
+                          BatchDot(TransposeInMinorDims(result.v), result.v),
+                          &builder);
+
+  ComputeAndCompareR0<float>(&builder, expected_vals, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Wrong_Type_Int) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a;
+  auto a_data = CreateR2Parameter<int>(wrong_type_4x4_, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  EXPECT_FALSE(result.v.valid());
+  EXPECT_FALSE(result.w.valid());
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Various_Size_Random_Matrix_8x8) {
+  XlaBuilder builder(TestName());
+  int size = 8;
+  Array2D<float> a_val = GenerateRandomSymmetricMatrix(size);
+  XlaOp a;
+  auto a_data = CreateR2Parameter<float>(a_val, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder);
+
+  ComputeAndCompareR0<float>(&builder, 1e-3, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Various_Size_Random_Matrix_16x16) {
+  XlaBuilder builder(TestName());
+  int size = 16;
+  Array2D<float> a_val = GenerateRandomSymmetricMatrix(size);
+  XlaOp a;
+  auto a_data = CreateR2Parameter<float>(a_val, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder);
+
+  ComputeAndCompareR0<float>(&builder, 1e-3, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Various_Size_Random_Matrix_32x32) {
+  XlaBuilder builder(TestName());
+  int size = 32;
+  Array2D<float> a_val = GenerateRandomSymmetricMatrix(size);
+  XlaOp a;
+  auto a_data = CreateR2Parameter<float>(a_val, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder);
+
+  ComputeAndCompareR0<float>(&builder, 1e-3, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Various_Size_Random_Matrix_256x256) {
+  XlaBuilder builder(TestName());
+  int size = 256;
+  Array2D<float> a_val = GenerateRandomSymmetricMatrix(size);
+  XlaOp a;
+  auto a_data = CreateR2Parameter<float>(a_val, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder);
+
+  ComputeAndCompareR0<float>(&builder, 1e-3, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+XLA_TEST_F(SelfAdjointEigTest, Various_Size_Random_Matrix_512x512) {
+  XlaBuilder builder(TestName());
+  int size = 512;
+  Array2D<float> a_val = GenerateRandomSymmetricMatrix(size);
+  XlaOp a;
+  auto a_data = CreateR2Parameter<float>(a_val, 0, "a", &builder, &a);
+  auto result = SelfAdjointEig(a);
+  GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder);
+
+  ComputeAndCompareR0<float>(&builder, 1e-3, {a_data.get()},
+                             ErrorSpec(1e-3, 1e-3));
+}
+
+}  // namespace xla

diff --git a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen.cc b/tensorflow/compiler/xla/client/lib/self_adjoint_eigen.cc
deleted file mode 100644
index c2c8cae..0000000
--- a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen.cc
+++ /dev/null

@@ -1,398 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/client/lib/self_adjoint_eigen.h"
-
-#include <memory>
-#include <vector>
-
-#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
-#include "tensorflow/compiler/xla/client/lib/constants.h"
-#include "tensorflow/compiler/xla/client/lib/loops.h"
-#include "tensorflow/compiler/xla/client/lib/math.h"
-#include "tensorflow/compiler/xla/client/lib/matrix.h"
-#include "tensorflow/compiler/xla/client/lib/slicing.h"
-#include "tensorflow/compiler/xla/client/xla_builder.h"
-#include "tensorflow/compiler/xla/literal_util.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/status_macros.h"
-#include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace xla {
-
-namespace {
-
-// Jacobi rotation (also known as Givens rotation):
-// G = [[ c, s],
-//      [-s, c]]
-// matmul(G_T, G) = I
-struct SymmetricSchurDecomposition {
-  XlaOp c;          // cosine.
-  XlaOp s;          // sine.
-  XlaOp reduction;  // Reduction in the off diagonal after applying G.
-};
-
-// JacobiUpdate holds the intermediate orthogonal matrix, Jacobi-rotated matrix
-// and the off-diagonal norm of the rotated matrix. After each Jacobi iteration,
-// off-diagonal norm is reduced.
-struct JacobiUpdate {
-  XlaOp v;
-  XlaOp w;
-  XlaOp off_diagonal_norm;
-};
-
-// Given an n-by-n symmetric A and integers p and q that satisfy 0 <= p < q < n,
-// it computes a rotation matrix G = [[c, s], [-s, c]], such that
-//                        G_T * A[[p, q], [p, q]] * G
-// is diagonalized.
-//
-//  def sym_schur2x2(A, p, q):
-//      if np.abs(A[p, q]) > 1e-6:
-//          tau = (A[q, q] - A[p, p]) / (2 * A[p, q])
-//          if tau >= 0:
-//              t = 1.0 / (tau + np.sqrt(1 + tau ** 2))
-//          else:
-//              t = -1.0 / (-tau + np.sqrt(1 + tau ** 2))
-//          c = 1.0 / np.sqrt(1.0 + t ** 2)
-//          s = t * c
-//      else:
-//          c = 1.0
-//          s = 0.0
-//      return c, s
-StatusOr<SymmetricSchurDecomposition> SymmetricShurDecomposition2x2(XlaOp a,
-                                                                    XlaOp p,
-                                                                    XlaOp q,
-                                                                    XlaOp tol) {
-  XlaBuilder* builder = a.builder();
-  TF_ASSIGN_OR_RETURN(Shape a_shape, builder->GetShape(a));
-
-  PrimitiveType type = a_shape.element_type();
-
-  const int64 num_dims = a_shape.rank();
-
-  auto zero = ScalarLike(a, 0.0);
-  auto one = ScalarLike(a, 1.0);
-  auto two = ScalarLike(a, 2.0);
-
-  auto pqs = DynamicSliceInMinorDims(a, {p, q}, {1, 1});
-
-  auto ps = DynamicSliceInMinorDims(a, {p, p}, {1, 1});
-  auto qs = DynamicSliceInMinorDims(a, {q, q}, {1, 1});
-
-  auto tau = (qs - ps) / (pqs * two);
-  auto t_pos = one / (tau + Sqrt(one + Square(tau)));
-  auto t_neg = -one / (-tau + Sqrt(one + Square(tau)));
-  auto t = Select(Ge(tau, zero), t_pos, t_neg);
-
-  auto c_temp = Rsqrt(one + Square(t));
-  auto s_temp = t * c_temp;
-
-  auto c = Select(Ge(Abs(pqs), tol), c_temp, ZerosLike(c_temp) + one);
-  auto s = Select(Ge(Abs(pqs), tol), s_temp, ZerosLike(s_temp));
-  // Renormalize c and s to compensate for low precision arithmetic, this step
-  // is redundant if high precision float is used, like float64.
-  auto rnorm = Rsqrt(Square(c) + Square(s));
-
-  SymmetricSchurDecomposition schur;
-
-  schur.c = c * rnorm;
-  schur.s = s * rnorm;
-  schur.reduction =
-      Reduce(two * Square(pqs), zero, CreateScalarAddComputation(type, builder),
-             {num_dims - 2, num_dims - 1});
-  return schur;
-}
-
-StatusOr<JacobiUpdate> Update(JacobiUpdate jacobi_update, XlaOp p, XlaOp q,
-                              XlaOp tol, int64 n) {
-  XlaBuilder* builder = jacobi_update.w.builder();
-  TF_ASSIGN_OR_RETURN(
-      SymmetricSchurDecomposition schur,
-      SymmetricShurDecomposition2x2(jacobi_update.w, p, q, tol));
-
-  TF_ASSIGN_OR_RETURN(Shape w_shape, builder->GetShape(jacobi_update.w));
-  const std::vector<int64> batch_dims(w_shape.dimensions().begin(),
-                                      w_shape.dimensions().end() - 2);
-  const int64 num_dims = w_shape.rank();
-
-  auto zero = ScalarLike(p, 0);
-
-  XlaOp c = schur.c;
-  XlaOp s = schur.s;
-
-  auto slice_p = DynamicSliceInMinorDims(jacobi_update.w, {p, zero}, {1, n});
-  auto slice_q = DynamicSliceInMinorDims(jacobi_update.w, {q, zero}, {1, n});
-
-  auto slice_p_new = c * slice_p - s * slice_q;
-  auto slice_q_new = s * slice_p + c * slice_q;
-
-  jacobi_update.w =
-      DynamicUpdateSliceInMinorDims(jacobi_update.w, slice_p_new, {p, zero});
-  jacobi_update.w =
-      DynamicUpdateSliceInMinorDims(jacobi_update.w, slice_q_new, {q, zero});
-
-  slice_p = DynamicSliceInMinorDims(jacobi_update.w, {zero, p}, {n, 1});
-  slice_q = DynamicSliceInMinorDims(jacobi_update.w, {zero, q}, {n, 1});
-
-  slice_p_new = c * slice_p - s * slice_q;
-  slice_q_new = s * slice_p + c * slice_q;
-
-  jacobi_update.w =
-      DynamicUpdateSliceInMinorDims(jacobi_update.w, slice_p_new, {zero, p});
-  jacobi_update.w =
-      DynamicUpdateSliceInMinorDims(jacobi_update.w, slice_q_new, {zero, q});
-
-  // Zero out a_{pq} explicitly.
-  std::vector<int64> pq_dims(batch_dims.begin(), batch_dims.end());
-  pq_dims.push_back(1);
-  pq_dims.push_back(1);
-  auto pq_zero = ScalarLike(jacobi_update.w, 0.0);
-  auto pq_zeros = Broadcast(pq_zero, pq_dims);
-  jacobi_update.w =
-      DynamicUpdateSliceInMinorDims(jacobi_update.w, pq_zeros, {p, q});
-  jacobi_update.w =
-      DynamicUpdateSliceInMinorDims(jacobi_update.w, pq_zeros, {q, p});
-
-  slice_p = DynamicSliceInMinorDims(jacobi_update.v, {zero, p}, {n, 1});
-  slice_q = DynamicSliceInMinorDims(jacobi_update.v, {zero, q}, {n, 1});
-
-  std::vector<int64> broadcast_dims(batch_dims.size());
-  std::iota(broadcast_dims.begin(), broadcast_dims.end(), 0);
-  broadcast_dims.push_back(num_dims - 1);
-
-  // Renormalize the p-th and q-th columns. This step is redundant if high
-  // precision floats are used, like 64-bit float. But for 32-bit float, it
-  // becomes necessary. This step will not increase the overall complexity.
-  slice_p_new = c * slice_p - s * slice_q;
-  slice_p_new = Mul(
-      slice_p_new,
-      Rsqrt(Reduce(Square(slice_p_new), pq_zero,
-                   CreateScalarAddComputation(w_shape.element_type(), builder),
-                   {num_dims - 2})),
-      broadcast_dims);
-  slice_q_new = s * slice_p + c * slice_q;
-  slice_q_new = Mul(
-      slice_q_new,
-      Rsqrt(Reduce(Square(slice_q_new), pq_zero,
-                   CreateScalarAddComputation(w_shape.element_type(), builder),
-                   {num_dims - 2})),
-      broadcast_dims);
-
-  jacobi_update.v =
-      DynamicUpdateSliceInMinorDims(jacobi_update.v, slice_p_new, {zero, p});
-  jacobi_update.v =
-      DynamicUpdateSliceInMinorDims(jacobi_update.v, slice_q_new, {zero, q});
-
-  jacobi_update.off_diagonal_norm = Sqrt(
-      Max(Square(jacobi_update.off_diagonal_norm) - schur.reduction, pq_zero));
-
-  return jacobi_update;
-}
-
-StatusOr<std::vector<XlaOp>> WhileLoopFn(
-    absl::Span<const XlaOp> initial_values,  //
-    int matrix_dimension,                    //
-    int max_sweep_updates,                   //
-    PrimitiveType index_type,                //
-    absl::string_view name,                  //
-    XlaBuilder* builder) {
-  auto while_cond_fn = [&](absl::Span<const XlaOp> values,
-                           XlaBuilder* cond_builder) -> StatusOr<XlaOp> {
-    auto k = values[0];
-    auto off_diagonal_norm = values[5];
-    // tol = frobenius_norm * epsilon.
-    auto tol = values[6] * values[7];
-
-    auto max_sweeps = ScalarLike(k, max_sweep_updates);
-
-    auto sweep_update_cond = Gt(max_sweeps, k);
-
-    auto tol_cond = ReduceAll(Lt(tol, off_diagonal_norm),
-                              xla::ConstantR0<bool>(cond_builder, false),
-                              CreateScalarOrComputation(PRED, cond_builder));
-    return And(tol_cond, sweep_update_cond);
-  };
-
-  auto while_body_fn =
-      [&](absl::Span<const XlaOp> values,
-          XlaBuilder* body_builder) -> StatusOr<std::vector<XlaOp>> {
-    auto zero = Zero(body_builder, index_type);
-    auto one = One(body_builder, index_type);
-    auto end_index = ScalarLike(one, matrix_dimension);
-
-    // Indexes.
-    XlaOp k = values[0];
-    XlaOp p = values[1];
-    XlaOp q = values[2];
-
-    JacobiUpdate jacobi_update;
-    jacobi_update.v = values[3];
-    jacobi_update.w = values[4];
-    jacobi_update.off_diagonal_norm = values[5];
-
-    XlaOp frobenius_norm = values[6];
-    XlaOp tol = values[7];
-
-    TF_ASSIGN_OR_RETURN(jacobi_update,
-                        Update(jacobi_update, p, q, tol, matrix_dimension));
-
-    std::vector<XlaOp> updated_values;
-    updated_values.reserve(values.size());
-
-    q = q + one;
-    p = Select(Eq(q, end_index), p + one, p);
-    k = Select(Eq(p, end_index - one), k + one, k);
-    p = Select(Eq(p, end_index - one), zero, p);
-    q = Select(Eq(q, end_index), p + one, q);
-
-    updated_values.push_back(k);
-    updated_values.push_back(p);
-    updated_values.push_back(q);
-
-    updated_values.push_back(jacobi_update.v);
-    updated_values.push_back(jacobi_update.w);
-    updated_values.push_back(jacobi_update.off_diagonal_norm);
-
-    updated_values.push_back(frobenius_norm);
-    updated_values.push_back(tol);
-
-    return updated_values;
-  };
-  std::vector<XlaOp> values;
-  TF_ASSIGN_OR_RETURN(values, WhileLoopHelper(while_cond_fn, while_body_fn,
-                                              initial_values, name, builder));
-
-  return values;
-}
-
-}  // namespace
-
-// This is the cyclic Jacobi iteration. Please note that the eigenvalues are
-// possibly not ordered.
-//
-//  def jacobi(A):
-//      n, _ = A.shape
-//      V = np.eye(n)
-//      nfrob = np.sum(A ** 2)
-//      ndiag = np.sum(np.diag(A) ** 2)
-//      off = nfrob - ndiag
-//      while off > 1e-6 * nfrob:
-//          for p in range(n - 1):
-//              for q in range(p + 1, n):
-//                  if off > 1e-6 * nfrob:
-//                      c, s = sym_schur2x2(A, p, q)
-//                      off = off - 2 * A[p, q] ** 2
-//                      A[[p, q], :] = np.matmul(np.array([[c, -s], [s, c]]),
-//                                               A[[p, q], :])
-//                      A[:, [p, q]] = np.matmul(A[:, [p, q]],
-//                                               np.array([[c, s], [-s, c]]))
-//                      V[:, [p, q]] = np.matmul(V[:, [p, q]],
-//                                               np.array([[c, s], [-s, c]]))
-//
-//      return A, V
-//
-// TODO(kuny): Implement parallel order Jacobi.
-//
-SelfAdjointEigenResult SelfAdjointEigen(XlaOp a, bool lower, int64 max_iter,
-                                        float epsilon) {
-  XlaBuilder* builder = a.builder();
-  auto return_error = [&](const Status& status) {
-    SelfAdjointEigenResult result;
-    result.v = builder->ReportError(status);
-    result.w = builder->ReportError(status);
-    return result;
-  };
-  auto shape_with_status = builder->GetShape(a);
-  if (!shape_with_status.status().ok()) {
-    return return_error(shape_with_status.status());
-  }
-  Shape a_shape = shape_with_status.ValueOrDie();
-  const int64 num_dims = a_shape.rank();
-  if (num_dims < 2) {
-    return return_error(InvalidArgument(
-        "Arguments to Eigen decomposition must have rank >= 2: got shape %s.",
-        a_shape.ToString()));
-  }
-  PrimitiveType type = a_shape.element_type();
-  if (!primitive_util::IsFloatingPointType(type)) {
-    return return_error(InvalidArgument(
-        "Type of the input matrix must be float: got %s.", a_shape.ToString()));
-  }
-
-  const int64 m = ShapeUtil::GetDimension(a_shape, -2);
-  const int64 n = ShapeUtil::GetDimension(a_shape, -1);
-
-  if (m != n) {
-    return return_error(InvalidArgument(
-        "Arguments to Eigen decomposition must be square matrices: got shape "
-        "(%d, %d).",
-        m, n));
-  }
-
-  const int64 num_batch_dims = num_dims - 2;
-  std::vector<int64> batch_dims(num_batch_dims);
-  for (int i = 0; i < num_batch_dims; ++i) {
-    batch_dims[i] = ShapeUtil::GetDimension(a_shape, i);
-  }
-
-  auto zero = ScalarLike(a, 0.0);
-  auto tol = ScalarLike(a, epsilon);
-
-  auto v_init = Broadcast(IdentityMatrix(builder, type, m, m), batch_dims);
-  auto w_init = Triangle(a, lower);
-  w_init = w_init + TransposeInMinorDims(w_init) - w_init * v_init;
-
-  auto frobenius_norm = Sqrt(Reduce(Square(w_init), zero,
-                                    CreateScalarAddComputation(type, builder),
-                                    {num_dims - 2, num_dims - 1}));
-  auto diag = GetMatrixDiagonal(w_init);
-  auto diag_square =
-      Reduce(Square(diag), zero, CreateScalarAddComputation(type, builder),
-             {num_dims - 2});
-
-  auto off_diagonal_init =
-      Sqrt(Max(Square(frobenius_norm) - diag_square, zero));
-
-  auto output_with_status = WhileLoopFn(
-      {
-          Zero(builder, S32),  // k
-          Zero(builder, S32),  // p
-          One(builder, S32),   // q
-          v_init,              //
-          w_init,              //
-          off_diagonal_init,   //
-          frobenius_norm,      //
-          tol,                 //
-      },                       //
-      n,                       //
-      max_iter,                //
-      S32,                     //
-      "CyclicJacobi",          //
-      builder);
-  if (!output_with_status.status().ok()) {
-    return return_error(output_with_status.status());
-  }
-
-  auto output = output_with_status.ValueOrDie();
-
-  SelfAdjointEigenResult result;
-  result.v = output[3];
-  result.w = GetMatrixDiagonal(output[4]);
-
-  return result;
-}
-
-}  // namespace xla

diff --git a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen.h b/tensorflow/compiler/xla/client/lib/self_adjoint_eigen.h
deleted file mode 100644
index 49fc17a..0000000
--- a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen.h
+++ /dev/null

@@ -1,42 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIGEN_H_
-#define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIGEN_H_
-
-#include "tensorflow/compiler/xla/client/xla_builder.h"
-#include "tensorflow/compiler/xla/xla_data.pb.h"
-
-namespace xla {
-
-// The eigenvalue decomposition of a symmetric matrix, the original matrix is
-// recovered by v * w * v_t.
-struct SelfAdjointEigenResult {
-  // The i-th column is the normalized eigenvector corresponding to the
-  // eigenvalue w[i]. Will return a matrix object if a is a matrix object.
-  XlaOp v;
-  // TODO(kuny): Sort the eigenvalues.
-  // The eigenvalues in ascending order, each repeated according to its
-  // multiplicity.
-  XlaOp w;
-};
-
-SelfAdjointEigenResult SelfAdjointEigen(XlaOp a, bool lower = true,
-                                        int64 max_iter = 100,
-                                        float epsilon = 1e-6);
-
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIGEN_H_

diff --git a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen_test.cc b/tensorflow/compiler/xla/client/lib/self_adjoint_eigen_test.cc
deleted file mode 100644
index 720c49b..0000000
--- a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen_test.cc
+++ /dev/null

@@ -1,300 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/client/lib/self_adjoint_eigen.h"
-
-#include "tensorflow/compiler/xla/array2d.h"
-#include "tensorflow/compiler/xla/array3d.h"
-#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
-#include "tensorflow/compiler/xla/client/lib/constants.h"
-#include "tensorflow/compiler/xla/client/lib/matrix.h"
-#include "tensorflow/compiler/xla/client/xla_builder.h"
-#include "tensorflow/compiler/xla/literal.h"
-#include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/compiler/xla/test.h"
-#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
-#include "tensorflow/compiler/xla/tests/literal_test_util.h"
-#include "tensorflow/compiler/xla/tests/test_macros.h"
-#include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-
-namespace xla {
-
-class SelfAdjointEigenTest : public ClientLibraryTestBase {
- protected:
-  void SetUp() override {
-    ClientLibraryTestBase::SetUp();
-    batch_3d_4x4_ = Array3D<float>{
-        {
-            {4, 6, 8, 10},
-            {6, 45, 54, 63},
-            {8, 54, 146, 166},
-            {10, 63, 166, 310},
-        },
-        {
-            {16, 24, 8, 12},
-            {24, 61, 82, 48},
-            {8, 82, 100, 6},
-            {12, 48, 6, 62},
-        },
-    };
-    matrix2d_8x8_ = Array2D<float>{
-        {14., 123., 49., 112., 115., 173., 182., 125.},
-        {123., 14., 60., 118., 150., 130., 91., 72.},
-        {49., 60., 138., 111., 106., 101., 115., 142.},
-        {112., 118., 111., 142., 91., 130., 25., 61.},
-        {115., 150., 106., 91., 116., 121., 128., 85.},
-        {173., 130., 101., 130., 121., 70., 151., 132.},
-        {182., 91., 115., 25., 128., 151., 66., 92.},
-        {125., 72., 142., 61., 85., 132., 92., 156.},
-    };
-    low_rank_4x4_ = Array2D<float>{
-        // x = [[1, 2, 3, 4], [1, -1, 1, -1]]
-        // matmul(x.T, x)
-        {2, 1, 4, 3},
-        {1, 5, 5, 9},
-        {4, 5, 10, 11},
-        {3, 9, 11, 17},
-    };
-  }
-  void TearDown() override { ClientLibraryTestBase::TearDown(); }
-
-  Array3D<float> get_unit_matrix_3d(const Array3D<float>& matrix) {
-    Array3D<float> result(matrix.n1(), matrix.n2(), matrix.n3(), 0.0);
-    for (int i = 0; i < matrix.n1(); ++i) {
-      for (int j = 0; j < matrix.n2(); ++j) {
-        result({i, j, j}) = 1.0;
-      }
-    }
-    return result;
-  }
-
-  Array3D<float> ExtractTriangularMatrix(const Array3D<float>& matrix,
-                                         bool lower) {
-    Array3D<float> result(matrix);
-    for (int i = 0; i < result.n1(); ++i) {
-      for (int j = 0; j < result.n2(); ++j) {
-        if (lower) {
-          for (int k = j + 1; k < result.n3(); ++k) {
-            result({i, j, k}) = 0.0;
-          }
-        } else {
-          for (int k = 0; k < j; ++k) {
-            result({i, j, k}) = 0.0;
-          }
-        }
-      }
-    }
-    return result;
-  }
-
-  XlaOp ComputeMatmulVWVt(SelfAdjointEigenResult result, XlaBuilder* builder) {
-    Shape shape = builder->GetShape(result.v).ValueOrDie();
-    std::vector<int64> out_dims = shape.dimensions();
-    std::vector<int64> broadcast_dims(shape.rank() - 1);
-    std::iota(broadcast_dims.begin(), broadcast_dims.end(), 0);
-
-    broadcast_dims[shape.rank() - 2] = shape.rank() - 1;
-    auto vw = Mul(result.v, BroadcastInDim(result.w, out_dims, broadcast_dims));
-    return BatchDot(vw, TransposeInMinorDims(result.v),
-                    PrecisionConfig::HIGHEST);
-  }
-
-  XlaOp GetAverageAbsoluteError(XlaOp m1, XlaOp m2, XlaBuilder* builder) {
-    Shape shape = builder->GetShape(m1).ValueOrDie();
-    int64 size = 1;
-    for (auto d : shape.dimensions()) {
-      size *= d;
-    }
-    return ReduceAll(Abs(m1 - m2), ConstantR0WithType(builder, F32, 0),
-                     CreateScalarAddComputation(F32, builder)) /
-           ConstantR0WithType(builder, F32, size);
-  }
-
-  Array2D<float> GenerateRandomSymmetricMatrix(int size) {
-    Array2D<float> result{size, size, 0.0};
-    result.FillRandom(10 /* stddev */, 2 /* mean */);
-    for (int i = 0; i < size; ++i) {
-      for (int j = 0; j < i; ++j) {
-        result({j, i}) = result({i, j});
-      }
-    }
-    return result;
-  }
-
-  Array3D<float> batch_3d_4x4_;
-  Array2D<float> matrix2d_8x8_;
-  Array2D<float> low_rank_4x4_;
-  Array2D<int> wrong_type_4x4_;
-};
-
-XLA_TEST_F(SelfAdjointEigenTest, Test_VWVt_EQ_A_2x4x4) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a;
-  auto a_data = CreateR3Parameter<float>(batch_3d_4x4_, 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  ComputeMatmulVWVt(result, &builder);
-
-  ComputeAndCompareR3<float>(&builder, batch_3d_4x4_, {a_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Test_VWVt_EQ_A_Lower_2x4x4) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a;
-  auto a_data = CreateR3Parameter<float>(
-      ExtractTriangularMatrix(batch_3d_4x4_, true), 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  ComputeMatmulVWVt(result, &builder);
-
-  ComputeAndCompareR3<float>(&builder, batch_3d_4x4_, {a_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Test_VWVt_EQ_A_Upper_2x4x4) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a;
-  auto a_data = CreateR3Parameter<float>(
-      ExtractTriangularMatrix(batch_3d_4x4_, false), 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a, false);
-  ComputeMatmulVWVt(result, &builder);
-
-  ComputeAndCompareR3<float>(&builder, batch_3d_4x4_, {a_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Test_Orthogonality_2x4x4) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a;
-  auto a_data = CreateR3Parameter<float>(batch_3d_4x4_, 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  BatchDot(result.v, TransposeInMinorDims(result.v), PrecisionConfig::HIGHEST);
-
-  ComputeAndCompareR3<float>(&builder, get_unit_matrix_3d(batch_3d_4x4_),
-                             {a_data.get()}, ErrorSpec(1e-3, 1e-3));
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Test_VtWV_EQ_A_Rank_Deficient_4x4) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a;
-  auto a_data = CreateR2Parameter<float>(low_rank_4x4_, 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  ComputeMatmulVWVt(result, &builder);
-
-  ComputeAndCompareR2<float>(&builder, low_rank_4x4_, {a_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Test_Eigen_8x8) {
-  XlaBuilder builder(TestName());
-
-  // This is computed by numpy.linalg.eigh with float32.
-  std::vector<float> expected{-182.69205, -116.86245, -105.74489, -9.545369,
-                              37.81711,   104.732285, 120.29153,  868.00385};
-
-  XlaOp a;
-  auto a_data = CreateR2Parameter<float>(matrix2d_8x8_, 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  Sort(result.w);
-
-  ComputeAndCompareR1<float>(&builder, expected, {a_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Test_Orthogonality_8x8) {
-  XlaBuilder builder(TestName());
-
-  float expected_vals = 1e-3;
-
-  XlaOp a;
-  auto a_data = CreateR2Parameter<float>(matrix2d_8x8_, 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  // np.sum(norm(eye(n) - matmul(conj(T(v)), v)) / n**2
-  GetAverageAbsoluteError(IdentityMatrix(&builder, F32, 8, 8),
-                          BatchDot(TransposeInMinorDims(result.v), result.v),
-                          &builder);
-
-  ComputeAndCompareR0<float>(&builder, expected_vals, {a_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Wrong_Type_Int) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a;
-  auto a_data = CreateR2Parameter<int>(wrong_type_4x4_, 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  EXPECT_FALSE(result.v.valid());
-  EXPECT_FALSE(result.w.valid());
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Various_Size_Random_Matrix_8x8) {
-  XlaBuilder builder(TestName());
-  int size = 8;
-  Array2D<float> a_val = GenerateRandomSymmetricMatrix(size);
-  XlaOp a;
-  auto a_data = CreateR2Parameter<float>(a_val, 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder);
-
-  ComputeAndCompareR0<float>(&builder, 1e-3, {a_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Various_Size_Random_Matrix_16x16) {
-  XlaBuilder builder(TestName());
-  int size = 16;
-  Array2D<float> a_val = GenerateRandomSymmetricMatrix(size);
-  XlaOp a;
-  auto a_data = CreateR2Parameter<float>(a_val, 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder);
-
-  ComputeAndCompareR0<float>(&builder, 1e-3, {a_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Various_Size_Random_Matrix_32x32) {
-  XlaBuilder builder(TestName());
-  int size = 32;
-  Array2D<float> a_val = GenerateRandomSymmetricMatrix(size);
-  XlaOp a;
-  auto a_data = CreateR2Parameter<float>(a_val, 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder);
-
-  ComputeAndCompareR0<float>(&builder, 1e-3, {a_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(SelfAdjointEigenTest, Various_Size_Random_Matrix_64x64) {
-  XlaBuilder builder(TestName());
-  int size = 64;
-  Array2D<float> a_val = GenerateRandomSymmetricMatrix(size);
-  XlaOp a;
-  auto a_data = CreateR2Parameter<float>(a_val, 0, "a", &builder, &a);
-  auto result = SelfAdjointEigen(a);
-  GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder);
-
-  ComputeAndCompareR0<float>(&builder, 1e-3, {a_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-}  // namespace xla

diff --git a/tensorflow/compiler/xla/client/lib/slicing.cc b/tensorflow/compiler/xla/client/lib/slicing.cc
index 77145ba..d7b33c5 100644
--- a/tensorflow/compiler/xla/client/lib/slicing.cc
+++ b/tensorflow/compiler/xla/client/lib/slicing.cc

@@ -134,4 +134,31 @@
   });
 }
 
+XlaOp TorchGather(XlaOp input, XlaOp index, int64 dim) {
+  XlaBuilder* builder = input.builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    TF_ASSIGN_OR_RETURN(Shape index_shape, builder->GetShape(index));
+    ShapeUtil::AppendMajorDimension(1, &index_shape);
+    std::vector<XlaOp> to_concat;
+    TF_ASSIGN_OR_RETURN(Shape input_shape, builder->GetShape(input));
+    to_concat.reserve(input_shape.rank());
+    for (int64 i = 0; i < input_shape.rank(); ++i) {
+      if (i == dim) {
+        to_concat.push_back(Reshape(index, index_shape.dimensions()));
+      } else {
+        to_concat.push_back(Iota(builder, index_shape, i));
+      }
+    }
+    XlaOp gather_indices = ConcatInDim(builder, to_concat, input_shape.rank());
+    std::vector<int64> slice_sizes(input_shape.rank(), 1);
+    GatherDimensionNumbers gather_dnums;
+    gather_dnums.set_index_vector_dim(input_shape.rank());
+    for (int64 i = 0; i < input_shape.rank(); ++i) {
+      gather_dnums.add_collapsed_slice_dims(i);
+      gather_dnums.add_start_index_map(i);
+    }
+    return Gather(input, gather_indices, gather_dnums, slice_sizes);
+  });
+}
+
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/client/lib/slicing.h b/tensorflow/compiler/xla/client/lib/slicing.h
index 6c482a3..69f98a6 100644
--- a/tensorflow/compiler/xla/client/lib/slicing.h
+++ b/tensorflow/compiler/xla/client/lib/slicing.h

@@ -43,6 +43,20 @@
 XlaOp DynamicUpdateSliceInMinorDims(XlaOp x, XlaOp update,
                                     absl::Span<const XlaOp> starts);
 
+// Gathers values along an axis specified by dim.
+//
+// For a 3-D tensor the output is specified by:
+//
+// out[i][j][k] = input[index[i][j][k]][j][k]  # if dim == 0
+// out[i][j][k] = input[i][index[i][j][k]][k]  # if dim == 1
+// out[i][j][k] = input[i][j][index[i][j][k]]  # if dim == 2
+//
+// If `input` is an n-dimensional tensor with size
+// [X0,X1,X2,..XN] and dim = i `index` must be an n-dimensional tensor with size
+// [X0,X1,...Y,Xi+1,...,X[N] where y >= 1 and `out` will have the same sizes as
+// `index`.
+XlaOp TorchGather(XlaOp input, XlaOp index, int64 dim);
+
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SLICING_H_

diff --git a/tensorflow/compiler/xla/client/lib/slicing_test.cc b/tensorflow/compiler/xla/client/lib/slicing_test.cc
index 8d36211..db6ebb9 100644
--- a/tensorflow/compiler/xla/client/lib/slicing_test.cc
+++ b/tensorflow/compiler/xla/client/lib/slicing_test.cc

@@ -102,5 +102,18 @@
       {a_data.get(), b_data.get(), x_data.get(), y_data.get()});
 }
 
+XLA_TEST_F(SlicingTest, TorchGather) {
+  xla::XlaBuilder builder(TestName());
+
+  xla::XlaOp input, index;
+  auto input_data =
+      CreateR2Parameter<int>({{1, 2}, {3, 4}}, 0, "input", &builder, &input);
+  auto index_data =
+      CreateR2Parameter<int>({{0, 0}, {1, 0}}, 1, "index", &builder, &index);
+  TorchGather(input, index, 1);
+
+  ComputeAndCompareR2<int>(&builder, {{1, 1}, {4, 3}},
+                           {input_data.get(), index_data.get()});
+}
 }  // namespace
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/client/lib/sorting.cc b/tensorflow/compiler/xla/client/lib/sorting.cc
index e8553a0..ddc39f4 100644
--- a/tensorflow/compiler/xla/client/lib/sorting.cc
+++ b/tensorflow/compiler/xla/client/lib/sorting.cc

@@ -14,6 +14,7 @@
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/client/lib/sorting.h"
+#include "tensorflow/compiler/xla/client/lib/comparators.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -30,7 +31,13 @@
         ShapeUtil::MakeShape(S32, AsInt64Slice(input_shape.dimensions()));
     XlaOp iota_s32 = Iota(builder, iota_shape, last_dim);
     auto input_dims = input_shape.dimensions();
-    XlaOp sort_result = Sort(Neg(input), {iota_s32});
+    // TODO(b/122298745): Get rid of Neg() and use CreateScalarGtComputation
+    // once the TPU backend supports the comparison computations.
+    XlaOp sort_result =
+        Sort({Neg(input), iota_s32},
+             CreateScalarLtComputation({input_shape.element_type(), S32},
+                                       iota_s32.builder()),
+             last_dim, /*is_stable=*/true);
     std::vector<int64> start_indices(input_shape.dimensions_size(), 0);
     std::vector<int64> limit_indices(input_dims.begin(), input_dims.end());
     limit_indices[last_dim] = k;

diff --git a/tensorflow/compiler/xla/client/lib/triangular_solve.cc b/tensorflow/compiler/xla/client/lib/triangular_solve.cc
deleted file mode 100644
index 1515e9e..0000000
--- a/tensorflow/compiler/xla/client/lib/triangular_solve.cc
+++ /dev/null

@@ -1,439 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/client/lib/triangular_solve.h"
-
-#include <memory>
-#include <vector>
-
-#include "tensorflow/compiler/xla/client/lib/constants.h"
-#include "tensorflow/compiler/xla/client/lib/math.h"
-#include "tensorflow/compiler/xla/client/lib/matrix.h"
-#include "tensorflow/compiler/xla/client/lib/slicing.h"
-#include "tensorflow/compiler/xla/client/xla_builder.h"
-#include "tensorflow/compiler/xla/client/xla_computation.h"
-#include "tensorflow/compiler/xla/literal.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/status_macros.h"
-#include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/math/math_util.h"
-
-namespace xla {
-
-// Get the diagonal blocks of the coefficient matrix
-XlaOp DiagonalBlocks(XlaOp a, int64 block_size) {
-  XlaBuilder* builder = a.builder();
-  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(a));
-    int ndims = shape.rank();
-    int64 n = ShapeUtil::GetDimension(shape, -1);
-    int64 num_blocks = n / block_size;
-
-    XlaOp diag_blocks;
-
-    // If the coefficient matrix is exactly the block size, we just add a
-    // singleton dimension i.e. [..., n, n] -> [..., 1, n, n]
-    if (n == block_size) {
-      std::vector<int64> permutation(ndims);
-      std::iota(permutation.begin(), permutation.end(), 1);
-      permutation.insert(permutation.end() - 2, 0);
-      return Transpose(Broadcast(a, /*broadcast_sizes=*/{1}), permutation);
-    }
-
-    // We can grab entire blocks using gather
-    if (n > block_size) {
-      // Construct the starting indices of the diagonal blocks
-      auto start_indices =
-          Transpose(Broadcast(Mul(Iota(builder, S32, num_blocks),
-                                  ConstantR0<int32>(builder, block_size)),
-                              /*broadcast_sizes=*/{2}),
-                    /*permutation=*/{1, 0});
-
-      PaddingConfig padding_config =
-          MakeEdgePaddingConfig({{0, 0}, {ndims - 2, 0}});
-      start_indices =
-          Pad(start_indices, ConstantR0<int32>(builder, 0), padding_config);
-
-      // Gather the diagonal blocks
-      std::vector<int64> slice_sizes(ndims);
-      GatherDimensionNumbers dim_numbers;
-      for (int i = 0; i < ndims - 2; ++i) {
-        dim_numbers.add_offset_dims(i);
-        dim_numbers.add_start_index_map(i);
-        slice_sizes[i] = ShapeUtil::GetDimension(shape, i);
-      }
-      slice_sizes[ndims - 2] = slice_sizes[ndims - 1] = block_size;
-      dim_numbers.add_offset_dims(ndims - 1);
-      dim_numbers.add_offset_dims(ndims);
-      dim_numbers.add_start_index_map(ndims - 2);
-      dim_numbers.add_start_index_map(ndims - 1);
-      dim_numbers.set_index_vector_dim(1);
-      diag_blocks = Gather(a, start_indices, dim_numbers, slice_sizes);
-    }
-
-    // The last block might be smaller than the block size,
-    // so we will need to pad it
-    if (n % block_size != 0) {
-      // Pad with zeros
-      auto last_blocks =
-          SliceInMinorDims(a, {n - n % block_size, n - n % block_size}, {n, n});
-      PaddingConfig config = MakeNoPaddingConfig(ndims);
-      int64 padding = block_size - n % block_size;
-      config.mutable_dimensions(ndims - 1)->set_edge_padding_high(padding);
-      config.mutable_dimensions(ndims - 2)->set_edge_padding_high(padding);
-      last_blocks =
-          Pad(last_blocks, Zero(builder, shape.element_type()), config);
-
-      // Add a singleton dimension
-      // i.e. [..., block_size, block_size] -> [..., 1, block_size, block_size]
-      TF_ASSIGN_OR_RETURN(Shape blocks_shape, builder->GetShape(last_blocks));
-      auto shape_dims = AsInt64Slice(blocks_shape.dimensions());
-      auto last_blocks_dims = std::vector<int64>(ndims);
-      std::copy(shape_dims.begin(), shape_dims.end(), last_blocks_dims.begin());
-      last_blocks_dims.insert(last_blocks_dims.end() - 2, 1);
-      last_blocks = Reshape(last_blocks, last_blocks_dims);
-
-      // Concatenate with the other blocks if necessary
-      if (n > block_size) {
-        diag_blocks =
-            ConcatInDim(builder, {diag_blocks, last_blocks}, ndims - 2);
-      } else {
-        diag_blocks = last_blocks;
-      }
-    }
-
-    return diag_blocks;
-  });
-}
-
-XlaOp InvertDiagonalBlocks(XlaOp diag_blocks, bool lower, bool transpose_a,
-                           bool conjugate_a,
-                           PrecisionConfig::Precision precision) {
-  XlaBuilder* builder = diag_blocks.builder();
-  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    // Input is a batch of square lower triangular square matrices. Its shape is
-    // (..., size, size). We resize this to (num_blocks, size, size).
-    TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(diag_blocks));
-    int64 block_size = ShapeUtil::GetDimension(shape, -1);
-    int64 num_blocks = ShapeUtil::ElementsIn(shape) /
-                       tensorflow::MathUtil::IPow(block_size, 2);
-    diag_blocks = Reshape(diag_blocks, {num_blocks, block_size, block_size});
-
-    // The input must be triangular because we rely on that when doing
-    // multiplications later on
-    diag_blocks = Triangle(diag_blocks, /*lower=*/lower);
-
-    // Rescale blocks to be unit triangular, but avoid dividing by
-    // zero (which can happen if the last block was padded) otherwise it will
-    // introduce nans which will propagate
-    auto diags = GetMatrixDiagonal(diag_blocks);
-    auto ones = FullLike(diags, 1);
-    diags = Select(Eq(diags, Zero(builder, shape.element_type())), ones, diags);
-    auto scaled_diag_blocks = Div(diag_blocks, diags, {0, 2});
-
-    // We can now use the fact that for an upper triangular matrix
-    // [[L11, 0], [L21, L22]], given the inverses L11' and L22', we have
-    // L22' = -L22' * L21 * L11'. In our case, L21 is a vector and our blocks
-    // have been rescaled to be unit triangular, so L22 = L22' = 1.
-
-    // Initialize the output matrix with -1s on the diagonal. We use -1 instead
-    // of 1 because we cannot do matrix-vector multiplies with variable shapes
-    // inside of a loop, or do irregularly shaped in-place updates. Hence,
-    // L21 <- -L22 * L21 * L11 cannot be done naively. Instead, we update the
-    // entire row i.e. we calculate
-    // [L21 L22 0] <- -[L21 L22 0] @ diag_blocks([L11', -I, -I])
-    // which means [L21 L22 0] <- [-L21 * L11', L22, 0].
-    auto identity =
-        IdentityMatrix(builder, shape.element_type(), block_size, block_size);
-    auto neg_identity = -identity;
-
-    // The first or last  diagonal element should be set to 1 instead of -1
-    // though, since we never update it
-    auto pos_one = Reshape(One(builder, shape.element_type()), {1, 1});
-    auto start_index = ConstantR0<int>(builder, (lower) ? 0 : block_size - 1);
-    auto output_block =
-        DynamicUpdateSlice(neg_identity, pos_one,
-                           /*start_indices=*/{start_index, start_index});
-
-    // Broadcast diag([1, -1, -1, ...]) to every block
-    XlaOp output = Broadcast(output_block,
-                             /*broadcast_sizes=*/{num_blocks});
-
-    // Now we construct a loop that performs matrix-vector multiplications
-    // inverting the blocks one row at a time
-    std::vector<Shape> tuple_shapes = {
-        // The loop iteration counter is a scalar, incremented each iteration.
-        ShapeUtil::MakeShape(S32, {}),
-        // The output has the shape of A, with one row updated each iteration.
-        ShapeUtil::MakeShape(shape.element_type(),
-                             {num_blocks, block_size, block_size}),
-        // The input is a loop invariant.
-        ShapeUtil::MakeShape(shape.element_type(),
-                             {num_blocks, block_size, block_size})};
-    Shape tuple_shape = ShapeUtil::MakeTupleShape(tuple_shapes);
-
-    auto init_i = One(builder, S32);
-    auto init = Tuple(builder, {init_i, output, scaled_diag_blocks});
-
-    // Construct the loop condition function.
-    std::unique_ptr<XlaBuilder> condb =
-        builder->CreateSubBuilder("InvertDiagCond");
-    {
-      auto i = GetTupleElement(
-          Parameter(condb.get(), 0, tuple_shape, "InvertDiagCondTuple"), 0);
-      Lt(i, ConstantR0<int32>(condb.get(), block_size));
-    }
-    TF_ASSIGN_OR_RETURN(auto cond, condb->Build());
-
-    // Construct the loop body function.
-    std::unique_ptr<XlaBuilder> bodyb =
-        builder->CreateSubBuilder("InvertDiagBody");
-    {
-      auto input_tuple =
-          Parameter(bodyb.get(), 0, tuple_shape, "InvertDiagBodyTuple");
-
-      auto i = GetTupleElement(input_tuple, 0);
-      auto body_out = GetTupleElement(input_tuple, 1);
-      auto body_input = GetTupleElement(input_tuple, 2);
-
-      auto zero = ConstantR0<int32>(bodyb.get(), 0);
-      auto j = (lower) ? i : ScalarLike(i, block_size - 1) - i;
-      auto input_row =
-          DynamicSlice(body_input, {zero, j, zero},
-                       /*slice_sizes=*/{num_blocks, 1, block_size});
-
-      // We want -L21 L11^{-1}
-      DotDimensionNumbers dnums;
-      dnums.add_lhs_batch_dimensions(0);
-      dnums.add_rhs_batch_dimensions(0);
-      dnums.add_lhs_contracting_dimensions(2);
-      dnums.add_rhs_contracting_dimensions(1);
-      PrecisionConfig precision_proto;
-      precision_proto.add_operand_precision(precision);
-      precision_proto.add_operand_precision(precision);
-      auto update = -DotGeneral(input_row, body_out, dnums, &precision_proto);
-
-      body_out = DynamicUpdateSlice(body_out, update, {zero, j, zero});
-
-      auto next_i = i + ScalarLike(i, 1);
-      Tuple(bodyb.get(), {next_i, body_out, body_input});
-    }
-    TF_ASSIGN_OR_RETURN(auto body, bodyb->Build());
-
-    // Construct the While loop and return the result,
-    // return while_loop(cond_fun, body_fun, init)[1]
-    auto invert_while = While(cond, body, init);
-    auto inv_diag_blocks = GetTupleElement(invert_while, 1);
-
-    // Undo the scaling
-    inv_diag_blocks = Div(inv_diag_blocks, diags,
-                          /*broadcast_dimensions=*/{0, 1});
-
-    // Reshape back to original batch major dimensions
-    return Reshape(inv_diag_blocks, AsInt64Slice(shape.dimensions()));
-  });
-}
-
-XlaOp SolveWithInvertedDiagonalBlocks(XlaOp a, XlaOp b, XlaOp inv_diag_blocks,
-                                      bool left_side, bool lower,
-                                      bool transpose_a, bool conjugate_a,
-                                      PrecisionConfig::Precision precision) {
-  XlaBuilder* builder = a.builder();
-  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    TF_ASSIGN_OR_RETURN(Shape blocks_shape, builder->GetShape(inv_diag_blocks));
-    TF_ASSIGN_OR_RETURN(Shape b_shape, builder->GetShape(b));
-    int64 block_size = ShapeUtil::GetDimension(blocks_shape, -1);
-
-    TF_ASSIGN_OR_RETURN(Shape a_shape, builder->GetShape(a));
-    int64 ndims = a_shape.rank();
-    int64 n = ShapeUtil::GetDimension(a_shape, -1);
-    int64 num_blocks = n / block_size + (n % block_size != 0);
-    int64 m_dim = (left_side) ? -1 : -2;
-    int64 m = ShapeUtil::GetDimension(b_shape, m_dim);
-
-    // Initialize the solution
-    auto x = ZerosLike(b);
-
-    // This loop is unrolled for performance reasons, but it could be expressed
-    // rolled as well since the matrices are of the same size each iteration
-    for (int i = 0; i < num_blocks; i++) {
-      // High-level intuition: We have B[i] = L[i] @ X. Since L is upper
-      // triangular this means B[i] = L[i, :i + 1] @ X[:i + 1]. We can split
-      // this into two parts: B[i] = L[i, :i] @ X[:i] + L[i, i] @ X[i] which
-      // can be solved for X[i] as X[i] = inv(L[i, i]) @ B[i] - L[i, :i] @ X[:i]
-
-      // Decide whether we go from first block to last or vice versa
-      auto j = (left_side ^ lower ^ transpose_a) ? num_blocks - 1 - i : i;
-
-      // Get the size of the inverse blocks (the last one might be smaller)
-      int64 block = (n % block_size != 0 && j + 1 == num_blocks)
-                        ? n % block_size
-                        : block_size;
-      auto inv_block =
-          MaybeConjugate(Collapse(SliceInMinorDims(inv_diag_blocks, {j, 0, 0},
-                                                   {j + 1, block, block}),
-                                  /*dimensions=*/{ndims - 2, ndims - 1}),
-                         conjugate_a);
-
-      // Get the corresponding row of B
-      int64 k = std::min((j + 1) * block_size, n);
-      std::vector<int64> start = {j * block_size, 0};
-      std::vector<int64> end = {k, m};
-      if (!left_side) {
-        std::swap(start[0], start[1]);
-        std::swap(end[0], end[1]);
-      }
-      auto b_row = SliceInMinorDims(b, start, end);
-
-      XlaOp remainder;
-      if (i == 0) {
-        remainder = b_row;
-      } else {
-        // This matrix multiply involves a lot of multiplying with zero (namely,
-        // X[i * block_size:] = 0), but this is faster than slicing...
-        end = {k, n};
-        if (!left_side) {
-          std::swap(end[0], end[1]);
-        }
-        if (transpose_a) {
-          std::swap(start[0], start[1]);
-          std::swap(end[0], end[1]);
-        }
-        auto a_row =
-            MaybeConjugate(SliceInMinorDims(a, start, end), conjugate_a);
-        if (left_side) {
-          remainder =
-              b_row - BatchDot(MaybeTransposeInMinorDims(a_row, transpose_a), x,
-                               precision);
-        } else {
-          remainder =
-              b_row - BatchDot(x, MaybeTransposeInMinorDims(a_row, transpose_a),
-                               precision);
-        }
-      }
-
-      XlaOp x_update;
-      auto zero = Zero(builder, S32);
-      auto start_index = ConstantR0WithType(builder, S32, j * block_size);
-      std::vector<XlaOp> update_starts = {start_index, zero};
-      if (left_side) {
-        x_update = BatchDot(MaybeTransposeInMinorDims(inv_block, transpose_a),
-                            remainder, precision);
-      } else {
-        x_update = BatchDot(remainder,
-                            MaybeTransposeInMinorDims(inv_block, transpose_a),
-                            precision);
-        std::swap(update_starts[0], update_starts[1]);
-      }
-      x = DynamicUpdateSliceInMinorDims(x, x_update, /*starts=*/update_starts);
-    }
-
-    return x;
-  });
-}
-
-XlaOp TriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower,
-                      bool transpose_a, bool conjugate_a, bool unit_diagonal,
-                      int64 block_size, PrecisionConfig::Precision precision) {
-  XlaBuilder* builder = a.builder();
-  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    TF_ASSIGN_OR_RETURN(Shape a_shape, builder->GetShape(a));
-    TF_ASSIGN_OR_RETURN(Shape b_shape, builder->GetShape(b));
-    if (a_shape.rank() != b_shape.rank()) {
-      return InvalidArgument(
-          "Arguments to TriangularSolve have shapes with different ranks: "
-          "%s vs. %s",
-          ShapeUtil::HumanString(a_shape), ShapeUtil::HumanString(b_shape));
-    }
-    const int64 ndims = a_shape.rank();
-    if (ndims < 2) {
-      return InvalidArgument(
-          "Arguments to TriangularSolve was rank %d but must have rank >= 2.",
-          ndims);
-    }
-    // The batch dimensions must be equal.
-    std::vector<int64> batch_dimensions;
-    for (int i = 0; i < ndims - 2; ++i) {
-      int64 a_size = a_shape.dimensions(i);
-      int64 b_size = b_shape.dimensions(i);
-      if (a_size != b_size) {
-        return InvalidArgument(
-            "Batch dimensions of arguments to TriangularSolve must be equal; "
-            "shapes were %s and %s.",
-            ShapeUtil::HumanString(a_shape), ShapeUtil::HumanString(b_shape));
-      }
-      batch_dimensions.push_back(a_size);
-    }
-
-    if (ShapeUtil::GetDimension(a_shape, -1) !=
-        ShapeUtil::GetDimension(a_shape, -2)) {
-      return InvalidArgument(
-          "The 'a' argument to TriangularSolve must be a batched square matrix;"
-          " shape was: %s",
-          ShapeUtil::HumanString(a_shape));
-    }
-    const int64 m = ShapeUtil::GetDimension(b_shape, -2);
-    const int64 n = ShapeUtil::GetDimension(b_shape, -1);
-    if ((left_side ? m : n) != ShapeUtil::GetDimension(a_shape, -1)) {
-      return InvalidArgument(
-          "Arguments to TriangularSolve have incompatible matrix shapes %s and "
-          "%s",
-          ShapeUtil::HumanString(a_shape), ShapeUtil::HumanString(b_shape));
-    }
-
-    if (block_size < 1) {
-      return InvalidArgument(
-          "block_size argument to TriangularSolve must be >= 1; got %d",
-          block_size);
-    }
-
-    if (ShapeUtil::IsZeroElementArray(b_shape)) {
-      // The output has the same shape as 'b', and since the output has zero
-      // elements, any such array will do.
-      return b;
-    }
-
-    // TODO(phawkins): consider pushing triangle masking into
-    // InvertDiagonalBlocks.
-    if (unit_diagonal) {
-      // Mask everything but the subdiagonal/superdiagonal elements.
-      a = lower ? Select(TriangleMask(a, -1), a, ZerosLike(a))
-                : Select(TriangleMask(a, 0), ZerosLike(a), a);
-      int64 k = ShapeUtil::GetDimension(a_shape, -1);
-      a = xla::Add(a, IdentityMatrix(builder, a_shape.element_type(), k, k),
-                   /*broadcast_dimensions=*/{ndims - 2, ndims - 1});
-    } else {
-      // Mask off the ignored elements of the triangular matrix a.
-      a = Triangle(a, lower);
-    }
-
-    // We find the diagonal blocks of the coefficient matrix
-    auto diag_blocks = DiagonalBlocks(a, block_size);
-
-    // We invert these blocks in parallel using batched matrix-vector products
-    auto inv_diag_blocks = InvertDiagonalBlocks(diag_blocks, lower, transpose_a,
-                                                conjugate_a, precision);
-
-    // We now find the solution using GEMMs
-    auto x =
-        SolveWithInvertedDiagonalBlocks(a, b, inv_diag_blocks, left_side, lower,
-                                        transpose_a, conjugate_a, precision);
-
-    return x;
-  });
-}
-
-}  // namespace xla

diff --git a/tensorflow/compiler/xla/client/lib/triangular_solve.h b/tensorflow/compiler/xla/client/lib/triangular_solve.h
deleted file mode 100644
index b87ef72..0000000
--- a/tensorflow/compiler/xla/client/lib/triangular_solve.h
+++ /dev/null

@@ -1,69 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_LIB_TRIANGULAR_SOLVE_H_
-#define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_TRIANGULAR_SOLVE_H_
-
-#include "tensorflow/compiler/xla/client/xla_builder.h"
-#include "tensorflow/compiler/xla/xla_data.pb.h"
-
-namespace xla {
-
-// Solves systems of linear equations with lower or upper triangular coefficient
-// matrices by forward- or back-substitution. Broadcasting along leading
-// dimensions, this routine solves one of the matrix systems
-//   `op(a) * x = b`,  or `x * op(a) = b`,
-// for the variable `x` given `a` and `b`, where `op(a)` is either
-//   `op(a) = a`,  or `op(a) = transpose(a)`,  or `op(a) = conj(transpose(a))`.
-// That is, the innermost matrices in the output satisfy a scalar system
-// depending on the value of the value of (left_side, transpose_a, conjugate_a)
-// according to:
-//   (F, F, F) => `output[..., i, k]  a[..., k, j] = b[..., i, j]`,
-//   (F, F, T) => `output[..., i, k] a*[..., k, j] = b[..., i, j]`,
-//   (F, T, F) => `output[..., i, k]  a[..., j, k] = b[..., i, j]`,
-//   (F, T, T) => `output[..., i, k] a*[..., j, k] = b[..., i, j]`,
-//   (T, F, F) => ` a[..., i, k] output[..., k, j] = b[..., i, j]`,
-//   (T, F, T) => `a*[..., i, k] output[..., k, j] = b[..., i, j]`,
-//   (T, T, F) => ` a[..., i, k] output[..., j, k] = b[..., i, j]`,
-//   (T, T, T) => `a*[..., i, k] output[..., j, k] = b[..., i, j]`,
-// where * denotes complex conjugation and where the index `k` is summed over.
-//
-// `a` is a tensor of shape `[..., M, M]` whose innermost 2 dimensions form
-// square matrices. If lower is true (false), then the strictly upper (lower)
-// triangular part of each innermost matrix in `a` is assumed to be zero and is
-// not accessed.
-// `b` is a tensor of shape `[..., M, K]` if left_side is true, otherwise a
-// tensor of shape `[..., K, M]`.
-// `left_side` is a boolean, indicating whether to solve a system of the form
-// op(a) * x = b (true) or x * op(a) = b (false).
-// `lower` is a boolean, indicating whether the argument `a` is lower-triangular
-// (true) or upper-triangular (false).
-// `transpose_a` is a boolean indicating whether the matrix `a` is transposed.
-// `conjugate_a` is a boolean indicating whether the entries of `a` are complex
-// conjugated (independently of whether they are transposed), so that when both
-// transpose_a and conjugate_a are true the effect is a Hermitian adjoint.
-// If `unit_diagonal` elements on the matrix diagonal are assumed to be '1' and
-// are not read by the triangular solve..
-//
-// Uses a blocked algorithm if `block_size` is > 1; if block_size == 1 then no
-// blocking is used.
-XlaOp TriangularSolve(
-    XlaOp a, XlaOp b, bool left_side, bool lower, bool transpose_a,
-    bool conjugate_a, bool unit_diagonal, int64 block_size = 128,
-    PrecisionConfig::Precision precision = PrecisionConfig::HIGHEST);
-
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_TRIANGULAR_SOLVE_H_

diff --git a/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc b/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
deleted file mode 100644
index b333ffa..0000000
--- a/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
+++ /dev/null

@@ -1,516 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/client/lib/triangular_solve.h"
-
-#include <memory>
-#include <numeric>
-#include <vector>
-
-#include "tensorflow/compiler/xla/array2d.h"
-#include "tensorflow/compiler/xla/client/lib/math.h"
-#include "tensorflow/compiler/xla/client/lib/matrix.h"
-#include "tensorflow/compiler/xla/client/xla_builder.h"
-#include "tensorflow/compiler/xla/literal.h"
-#include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/compiler/xla/test.h"
-#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
-#include "tensorflow/compiler/xla/tests/literal_test_util.h"
-#include "tensorflow/compiler/xla/tests/test_macros.h"
-#include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-
-namespace xla {
-namespace {
-
-using TriangularSolveTest = ClientLibraryTestBase;
-using TriangularSolveLeftLookingTest = ClientLibraryTestBase;
-
-static constexpr float kNan = std::numeric_limits<float>::quiet_NaN();
-
-Array2D<float> AValsLower() {
-  return {{2, kNan, kNan, kNan},
-          {3, 6, kNan, kNan},
-          {4, 7, 9, kNan},
-          {5, 8, 10, 11}};
-}
-
-Array2D<float> AValsUpper() {
-  return {{2, 3, 4, 5},
-          {kNan, 6, 7, 8},
-          {kNan, kNan, 9, 10},
-          {kNan, kNan, kNan, 11}};
-}
-
-Array2D<float> AValsLowerUnitDiagonal() {
-  return {{kNan, kNan, kNan, kNan},
-          {3, kNan, kNan, kNan},
-          {4, 7, kNan, kNan},
-          {5, 8, 10, kNan}};
-}
-
-Array2D<float> AValsUpperUnitDiagonal() {
-  return {{kNan, 3, 4, 5},
-          {kNan, kNan, 7, 8},
-          {kNan, kNan, kNan, 10},
-          {kNan, kNan, kNan, kNan}};
-}
-
-Array2D<float> BValsRight() {
-  return {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}};
-}
-
-Array2D<float> BValsLeft() {
-  return {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}};
-}
-
-static constexpr complex64 kNanC64 = complex64(kNan, kNan);
-
-Array2D<complex64> AValsLowerComplex() {
-  return {{2, kNanC64, kNanC64, kNanC64},
-          {complex64(3, 1), 6, kNanC64, kNanC64},
-          {4, complex64(7, 2), 9, kNanC64},
-          {5, 8, complex64(10, 3), 11}};
-}
-
-Array2D<complex64> AValsUpperComplex() {
-  return {{2, 3, complex64(4, 3), 5},
-          {kNanC64, 6, complex64(7, 2), 8},
-          {kNanC64, kNanC64, complex64(9, 1), 10},
-          {kNanC64, kNanC64, kNanC64, 11}};
-}
-
-Array2D<complex64> BValsRightComplex() {
-  return {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}};
-}
-
-Array2D<complex64> BValsLeftComplex() {
-  return {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}};
-}
-
-XLA_TEST_F(TriangularSolveTest, EmptyArrays) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data =
-      CreateR2Parameter<float>(Array2D<float>(0, 0), 0, "a", &builder, &a);
-  auto b_data =
-      CreateR2Parameter<float>(Array2D<float>(0, 10), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/true, /*lower=*/true,
-                  /*transpose_a=*/true, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 10),
-                             {a_data.get(), b_data.get()});
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTranspose) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/false, /*lower=*/true,
-                  /*transpose_a=*/true, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  Array2D<float> expected({
-      {0.5, 0.08333334, 0.04629629, 0.03367003},
-      {2.5, -0.25, -0.1388889, -0.1010101},
-      {4.5, -0.58333331, -0.32407406, -0.23569024},
-  });
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleRightLowerNotranspose) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/false, /*lower=*/true,
-                  /*transpose_a=*/false, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  Array2D<float> expected({
-      {-0.16414141, -0.06902357, -0.07070707, 0.36363636},
-      {0.64393939, 0.06565657, -0.03030303, 0.72727273},
-      {1.4520202, 0.2003367, 0.01010101, 1.09090909},
-  });
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleRightUpperTranspose) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/false, /*lower=*/false,
-                  /*transpose_a=*/true, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  Array2D<float> expected({
-      {-0.16414141, -0.06902357, -0.07070707, 0.36363636},
-      {0.64393939, 0.06565657, -0.03030303, 0.72727273},
-      {1.4520202, 0.2003367, 0.01010101, 1.09090909},
-  });
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleRightUpperNotranspose) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/false, /*lower=*/false,
-                  /*transpose_a=*/false, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  Array2D<float> expected({
-      {0.5, 0.08333334, 0.04629629, 0.03367003},
-      {2.5, -0.25, -0.1388889, -0.1010101},
-      {4.5, -0.58333331, -0.32407406, -0.23569024},
-  });
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerTranspose) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/true, /*lower=*/true,
-                  /*transpose_a=*/true, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  Array2D<float> expected({
-      {-0.89646465, -0.69444444, -0.49242424},
-      {-0.27441077, -0.24074074, -0.20707071},
-      {-0.23232323, -0.22222222, -0.21212121},
-      {0.90909091, 1., 1.09090909},
-  });
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotranspose) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/true, /*lower=*/true,
-                  /*transpose_a=*/false, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  Array2D<float> expected({
-      {0.5, 1.0, 1.5},
-      {0.41666667, 0.33333333, 0.25},
-      {0.23148148, 0.18518519, 0.13888889},
-      {0.16835017, 0.13468013, 0.1010101},
-  });
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNoTransposeUnitDiagonal) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data =
-      CreateR2Parameter<float>(AValsLowerUnitDiagonal(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/true, /*lower=*/true,
-                  /*transpose_a=*/false, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/true,
-                  /*block_size=*/2);
-
-  Array2D<float> expected(
-      {{1., 2., 3.}, {1., -1., -3.}, {-4., 7., 18.}, {37., -61., -159.}});
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotransposeIrregularblock) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/true, /*lower=*/true,
-                  /*transpose_a=*/false, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/3);
-
-  Array2D<float> expected({
-      {0.5, 1.0, 1.5},
-      {0.41666667, 0.33333333, 0.25},
-      {0.23148148, 0.18518519, 0.13888889},
-      {0.16835017, 0.13468013, 0.1010101},
-  });
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTranspose) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/true, /*lower=*/false,
-                  /*transpose_a=*/true, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  Array2D<float> expected({
-      {0.5, 1.0, 1.5},
-      {0.41666667, 0.33333333, 0.25},
-      {0.23148148, 0.18518519, 0.13888889},
-      {0.16835017, 0.13468013, 0.1010101},
-  });
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperNotranspose) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/true, /*lower=*/false,
-                  /*transpose_a=*/false, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  Array2D<float> expected({
-      {-0.89646465, -0.69444444, -0.49242424},
-      {-0.27441077, -0.24074074, -0.20707071},
-      {-0.23232323, -0.22222222, -0.21212121},
-      {0.90909091, 1., 1.09090909},
-  });
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperNotransposeUnitDiagonal) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data =
-      CreateR2Parameter<float>(AValsUpperUnitDiagonal(), 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/true, /*lower=*/false,
-                  /*transpose_a=*/false, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/true,
-                  /*block_size=*/2);
-
-  Array2D<float> expected({{-1402., -1538., -1674.},
-                           {575., 631., 687.},
-                           {-93., -102., -111.},
-                           {10., 11., 12.}});
-
-  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTransposeConjugate) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data =
-      CreateR2Parameter<complex64>(AValsLowerComplex(), 0, "a", &builder, &a);
-  auto b_data =
-      CreateR2Parameter<complex64>(BValsRightComplex(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/false, /*lower=*/true,
-                  /*transpose_a=*/true, /*conjugate_a=*/true,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  Array2D<complex64> expected({
-      {0.5, complex64(0.08333333, 0.08333333),
-       complex64(0.02777778, -0.0462963), complex64(0.06313131, -0.01094276)},
-      {2.5, complex64(-0.25, 0.41666667), complex64(-0.23148148, -0.37962963),
-       complex64(0.08670034, -0.02104377)},
-      {4.5, complex64(-0.58333333, 0.75), complex64(-0.49074074, -0.71296296),
-       complex64(0.11026936, -0.03114478)},
-  });
-
-  ComputeAndCompareR2<complex64>(
-      &builder, expected, {a_data.get(), b_data.get()}, ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTransposeNoconjugate) {
-  XlaBuilder builder(TestName());
-
-  XlaOp a, b;
-  auto a_data =
-      CreateR2Parameter<complex64>(AValsUpperComplex(), 0, "a", &builder, &a);
-  auto b_data =
-      CreateR2Parameter<complex64>(BValsLeftComplex(), 1, "b", &builder, &b);
-  TriangularSolve(a, b,
-                  /*left_side=*/true, /*lower=*/false,
-                  /*transpose_a=*/true, /*conjugate_a=*/false,
-                  /*unit_diagonal=*/false,
-                  /*block_size=*/2);
-
-  Array2D<complex64> expected({
-      {0.5, 1., 1.5},
-      {0.41666667, 0.33333333, 0.25},
-      {complex64(0.20020325, -2.81504065e-01),
-       complex64(0.13821138, -4.22764228e-01),
-       complex64(0.07621951, -5.64024390e-01)},
-      {complex64(0.19678492, 2.55912786e-01),
-       complex64(0.17738359, 3.84331116e-01),
-       complex64(0.15798226, 5.12749446e-01)},
-  });
-
-  ComputeAndCompareR2<complex64>(
-      &builder, expected, {a_data.get(), b_data.get()}, ErrorSpec(1e-2, 1e-2));
-}
-
-XLA_TEST_F(TriangularSolveTest, BatchedLeftUpper) {
-  XlaBuilder builder(TestName());
-
-  Array3D<float> bvals(7, 5, 5);
-  bvals.FillIota(1.);
-
-  // Set avals to the upper triangle of bvals.
-  Array3D<float> avals = bvals;
-  avals.Each([](absl::Span<const int64> indices, float* value) {
-    if (indices[1] > indices[2]) {
-      *value = 0;
-    }
-  });
-
-  XlaOp a, b;
-  auto a_data = CreateR3Parameter<float>(avals, 0, "a", &builder, &a);
-  auto b_data = CreateR3Parameter<float>(bvals, 1, "b", &builder, &b);
-  BatchDot(ConstantR3FromArray3D(&builder, avals),
-           TriangularSolve(a, b,
-                           /*left_side=*/true, /*lower=*/false,
-                           /*transpose_a=*/false, /*conjugate_a=*/false,
-                           /*unit_diagonal=*/false,
-                           /*block_size=*/2));
-
-  ComputeAndCompareR3<float>(&builder, bvals, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-struct TriangularSolveTestSpec {
-  int m, n;  // A is mxm, B is mxn
-  bool left_side;
-  bool lower;
-  bool transpose_a;
-};
-
-class TriangularSolveParametricTest
-    : public ClientLibraryTestBase,
-      public ::testing::WithParamInterface<TriangularSolveTestSpec> {};
-
-XLA_TEST_P(TriangularSolveParametricTest, Random) {
-  TriangularSolveTestSpec spec = GetParam();
-
-  XlaBuilder builder(TestName());
-
-  Array2D<float> avals(spec.m, spec.m);
-  avals.FillRandom(1.0);
-  for (int i = 0; i < spec.m; ++i) {
-    avals(i, i) += 10;
-  }
-
-  std::pair<int, int> bdims = spec.left_side ? std::make_pair(spec.m, spec.n)
-                                             : std::make_pair(spec.n, spec.m);
-  Array2D<float> bvals(bdims.first, bdims.second);
-  bvals.FillRandom(1.0);
-
-  XlaOp a, b;
-  auto a_data = CreateR2Parameter<float>(avals, 0, "a", &builder, &a);
-  auto b_data = CreateR2Parameter<float>(bvals, 1, "b", &builder, &b);
-  auto x = TriangularSolve(a, b, spec.left_side, spec.lower, spec.transpose_a,
-                           /*conjugate_a=*/false, /*unit_diagonal=*/false,
-                           /*block_size=*/3);
-  auto a_tri = Triangle(a, spec.lower);
-  a_tri = MaybeTransposeInMinorDims(a_tri, spec.transpose_a);
-  if (spec.left_side) {
-    BatchDot(a_tri, x);
-  } else {
-    BatchDot(x, a_tri);
-  }
-
-  ComputeAndCompareR2<float>(&builder, bvals, {a_data.get(), b_data.get()},
-                             ErrorSpec(1e-2, 1e-2));
-}
-
-std::vector<TriangularSolveTestSpec> TriangularSolveTests() {
-  std::vector<TriangularSolveTestSpec> specs;
-  for (int m : {5, 10}) {
-    for (int n : {5, 10}) {
-      for (bool left_side : {false, true}) {
-        for (bool lower : {false, true}) {
-          for (bool transpose_a : {false, true}) {
-            specs.push_back({m, n, left_side, lower, transpose_a});
-          }
-        }
-      }
-    }
-  }
-  return specs;
-}
-
-INSTANTIATE_TEST_SUITE_P(TriangularSolveParametricTestInstantiation,
-                         TriangularSolveParametricTest,
-                         ::testing::ValuesIn(TriangularSolveTests()));
-
-}  // namespace
-}  // namespace xla

diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index b64d352..9b7c01a 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc

@@ -299,12 +299,17 @@
   return build_status.ConsumeValueOrDie();
 }
 
-StatusOr<XlaComputation> XlaBuilder::Build(bool remove_dynamic_dimensions) {
+Status XlaBuilder::GetCurrentStatus() const {
   if (!first_error_.ok()) {
     string backtrace;
     first_error_backtrace_.Dump(tensorflow::DebugWriteToString, &backtrace);
     return AppendStatus(first_error_, backtrace);
   }
+  return Status::OK();
+}
+
+StatusOr<XlaComputation> XlaBuilder::Build(bool remove_dynamic_dimensions) {
+  TF_RETURN_IF_ERROR(GetCurrentStatus());
   return Build(instructions_.back().id(), remove_dynamic_dimensions);
 }
 
@@ -318,11 +323,7 @@
 
 StatusOr<XlaComputation> XlaBuilder::Build(int64 root_id,
                                            bool remove_dynamic_dimensions) {
-  if (!first_error_.ok()) {
-    string backtrace;
-    first_error_backtrace_.Dump(tensorflow::DebugWriteToString, &backtrace);
-    return AppendStatus(first_error_, backtrace);
-  }
+  TF_RETURN_IF_ERROR(GetCurrentStatus());
 
   // TODO(b/121223198): XLA backend cannot handle dynamic dimensions yet, remove
   // all dynamic dimensions before building xla program until we have support in
@@ -1014,6 +1015,18 @@
     HloInstructionProto instr;
     TF_ASSIGN_OR_RETURN(const Shape& lhs_shape, GetShape(lhs));
     TF_ASSIGN_OR_RETURN(const Shape& rhs_shape, GetShape(rhs));
+    // If one operand is a scalar, just multiply the two operands.
+    if (ShapeUtil::IsScalar(lhs_shape) || ShapeUtil::IsScalar(rhs_shape)) {
+      if (dimension_numbers.rhs_batch_dimensions_size() != 0 ||
+          dimension_numbers.lhs_batch_dimensions_size() != 0 ||
+          dimension_numbers.rhs_contracting_dimensions_size() != 0 ||
+          dimension_numbers.lhs_contracting_dimensions_size() != 0) {
+        return InvalidArgument(
+            "Dots with scalar operands must have no contracting or batch "
+            "dimensions");
+      }
+      return xla::Mul(lhs, rhs);
+    }
     TF_ASSIGN_OR_RETURN(Shape shape,
                         ShapeInference::InferDotOpShape(lhs_shape, rhs_shape,
                                                         dimension_numbers));
@@ -1539,27 +1552,142 @@
   });
 }
 
+namespace {
+// Switch from a floating point value to a integer value in such a way that when
+// using the integer value to compare, we get the same result for normal values,
+// and -Nan is treated as the smallest value, and Nan is treated as the largest
+// value.
+// If f is a float, and
+// x = bit_cast<int32>(f);
+// y = x < 0 ? numeric_limits<int32>::max() - x : x;
+// then y is ordered as an int32 such that finite values have the obvious order,
+// -0 is ordered before 0, and -NaN and NaN appear at the beginning and end of
+// the ordering.
+// Note that in order to avoid -x to overflow, we calculate
+// numeric_limits<int32>::max() - x as unsigned, and then convert back to
+// signed.
+XlaOp BitcastConvertFloatingPointToIntegral(const XlaOp& value,
+                                            int64 bit_width) {
+  PrimitiveType signed_type;
+  PrimitiveType unsigned_type;
+  XlaOp max_value;
+  switch (bit_width) {
+    case 16:
+      max_value =
+          ConstantR0(value.builder(),
+                     static_cast<uint16>(std::numeric_limits<int16>::max()));
+      signed_type = S16;
+      unsigned_type = U16;
+      break;
+    case 32:
+      max_value =
+          ConstantR0(value.builder(),
+                     static_cast<uint32>(std::numeric_limits<int32>::max()));
+      signed_type = S32;
+      unsigned_type = U32;
+      break;
+    case 64:
+      max_value =
+          ConstantR0(value.builder(),
+                     static_cast<uint64>(std::numeric_limits<int64>::max()));
+      signed_type = S64;
+      unsigned_type = U64;
+      break;
+    default:
+      return value.builder()->ReportError(
+          InvalidArgument("Invalid bit width %lld for Comparator floating "
+                          "point parameter.",
+                          bit_width));
+  }
+  auto signed_value = BitcastConvertType(value, signed_type);
+  auto unsigned_value = BitcastConvertType(value, unsigned_type);
+  auto flipped_value =
+      BitcastConvertType(Sub(max_value, unsigned_value), signed_type);
+  auto is_negative =
+      Lt(signed_value,
+         ConstantLiteral(value.builder(), LiteralUtil::Zero(signed_type)));
+  return Select(is_negative, flipped_value, signed_value);
+}
+}  // namespace
+
 XlaOp XlaBuilder::Sort(const XlaOp& keys, absl::Span<const XlaOp> values,
                        int64 dimension) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    std::vector<XlaOp> operands{keys};
+    for (const XlaOp& value : values) {
+      operands.push_back(value);
+    }
+    // Build the default less-than comparator (copied from lib/comparators.cc).
+    // TODO(b/122298745): Remove the deprecated API method so that this code
+    // duplication can be deleted.
+    auto b = this->CreateSubBuilder("comparator");
+    std::vector<PrimitiveType> operand_types;
+    for (const XlaOp& operand : operands) {
+      TF_ASSIGN_OR_RETURN(auto operand_shape, GetShape(operand));
+      operand_types.push_back(operand_shape.element_type());
+    }
+
+    int64 parameter_count = 0;
+    XlaOp first_lhs_param;
+    XlaOp first_rhs_param;
+
+    for (auto operand_type : operand_types) {
+      auto scalar_shape = ShapeUtil::MakeShape(operand_type, {});
+      auto lhs_param =
+          b->Parameter(parameter_count * 2, scalar_shape,
+                       absl::StrCat("p.", parameter_count, ".lhs"));
+      auto rhs_param =
+          b->Parameter(parameter_count * 2 + 1, scalar_shape,
+                       absl::StrCat("p.", parameter_count, ".rhs"));
+      if (parameter_count == 0) {
+        first_lhs_param = lhs_param;
+        first_rhs_param = rhs_param;
+      }
+      ++parameter_count;
+    }
+    if (primitive_util::IsFloatingPointType(operand_types[0])) {
+      PrimitiveType compare_type = operand_types[0];
+      // Special-case handling for BF16. We currently do not support direct
+      // comparisons with BF16, so we convert to F32 and then use the F32
+      // comparison logic.
+      if (compare_type == BF16) {
+        compare_type = F32;
+        first_lhs_param = b->ConvertElementType(first_lhs_param, F32);
+        first_rhs_param = b->ConvertElementType(first_rhs_param, F32);
+      }
+      int64 bit_width = primitive_util::BitWidth(compare_type);
+      first_lhs_param =
+          BitcastConvertFloatingPointToIntegral(first_lhs_param, bit_width);
+      first_rhs_param =
+          BitcastConvertFloatingPointToIntegral(first_rhs_param, bit_width);
+    }
+    Lt(first_lhs_param, first_rhs_param);
+
+    TF_ASSIGN_OR_RETURN(auto comparator, b->Build());
+    return Sort(operands, comparator, dimension, /*is_stable=*/false);
+  });
+}
+
+XlaOp XlaBuilder::Sort(absl::Span<const XlaOp> operands,
+                       const XlaComputation& comparator, int64 dimension,
+                       bool is_stable) {
+  return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     HloInstructionProto instr;
+    instr.set_is_stable(is_stable);
     std::vector<const Shape*> operand_shape_ptrs;
-    TF_ASSIGN_OR_RETURN(const Shape& keys_shape, GetShape(keys));
-    operand_shape_ptrs.push_back(&keys_shape);
-    TF_ASSIGN_OR_RETURN(std::vector<Shape> values_shapes,
-                        GetOperandShapes(values));
-    absl::c_transform(values_shapes, std::back_inserter(operand_shape_ptrs),
+    TF_ASSIGN_OR_RETURN(std::vector<Shape> operand_shapes,
+                        GetOperandShapes(operands));
+    absl::c_transform(operand_shapes, std::back_inserter(operand_shape_ptrs),
                       [](const Shape& shape) { return &shape; });
     TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferVariadicOpShape(
                                          HloOpcode::kSort, operand_shape_ptrs));
     *instr.mutable_shape() = shape.ToProto();
     if (dimension == -1) {
-      TF_ASSIGN_OR_RETURN(const Shape& keys_shape, GetShape(keys));
+      TF_ASSIGN_OR_RETURN(const Shape& keys_shape, GetShape(operands[0]));
       dimension = keys_shape.rank() - 1;
     }
     instr.add_dimensions(dimension);
-    std::vector<XlaOp> operands{keys};
-    operands.insert(operands.end(), values.begin(), values.end());
+    AddCalledComputation(comparator, &instr);
     return AddInstruction(std::move(instr), HloOpcode::kSort, operands);
   });
 }
@@ -2871,6 +2999,29 @@
   return operand.builder()->Fft(operand, fft_type, fft_length);
 }
 
+XlaOp TriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower,
+                      bool unit_diagonal,
+                      TriangularSolveOptions::Transpose transpose_a) {
+  XlaBuilder* builder = a.builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    HloInstructionProto instr;
+    TF_ASSIGN_OR_RETURN(const Shape& a_shape, builder->GetShape(a));
+    TF_ASSIGN_OR_RETURN(const Shape& b_shape, builder->GetShape(b));
+    xla::TriangularSolveOptions& options =
+        *instr.mutable_triangular_solve_options();
+    options.set_left_side(left_side);
+    options.set_lower(lower);
+    options.set_unit_diagonal(unit_diagonal);
+    options.set_transpose_a(transpose_a);
+    TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferTriangularSolveShape(
+                                         a_shape, b_shape, options));
+    *instr.mutable_shape() = shape.ToProto();
+
+    return builder->AddInstruction(std::move(instr),
+                                   HloOpcode::kTriangularSolve, {a, b});
+  });
+}
+
 XlaOp Infeed(XlaBuilder* builder, const Shape& shape, const string& config) {
   return builder->Infeed(shape, config);
 }
@@ -3171,6 +3322,12 @@
   return keys.builder()->Sort(keys, values, dimension);
 }
 
+XlaOp Sort(absl::Span<const XlaOp> operands, const XlaComputation& comparator,
+           int64 dimension, bool is_stable) {
+  return operands[0].builder()->Sort(operands, comparator, dimension,
+                                     is_stable);
+}
+
 XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max) {
   return min.builder()->Clamp(min, operand, max);
 }

diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index ea4dc18..379f0ff 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h

@@ -56,6 +56,9 @@
   }
   ~XlaOp() = default;
 
+  XlaOp(const XlaOp& other) = default;
+  XlaOp& operator=(const XlaOp& other) = default;
+
   // Precondition: !IsUninitialized().
   //
   // It's very common to do foo.builder()->bar().  Without this precondition, if
@@ -235,6 +238,10 @@
   // See also set_die_immediately_on_error().
   Status first_error() const { return first_error_; }
 
+  // Returns the current status of the builder, complete with the stack trace
+  // information.
+  Status GetCurrentStatus() const;
+
   // Returns the shape of the given op.
   StatusOr<Shape> GetShape(const XlaOp& op) const;
 
@@ -426,7 +433,6 @@
       const Shape& shape_with_layout, const string& opaque,
       absl::optional<absl::Span<const Shape>> operand_shapes_with_layout);
 
-
   XlaOp Reduce(const XlaOp& operand, const XlaOp& init_value,
                const XlaComputation& computation,
                absl::Span<const int64> dimensions_to_reduce);
@@ -499,8 +505,11 @@
 
   XlaOp Rev(const XlaOp& operand, absl::Span<const int64> dimensions);
 
+  ABSL_DEPRECATED("Use form with comparator computation instead")
   XlaOp Sort(const XlaOp& keys, absl::Span<const XlaOp> values = {},
              int64 dimension = -1);
+  XlaOp Sort(absl::Span<const XlaOp> operands, const XlaComputation& comparator,
+             int64 dimension = -1, bool is_stable = false);
 
   XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max);
 
@@ -789,6 +798,9 @@
       const PrecisionConfig* precision_config);
   friend XlaOp Fft(const XlaOp& operand, FftType fft_type,
                    absl::Span<const int64> fft_length);
+  friend XlaOp TriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower,
+                               bool unit_diagonal,
+                               TriangularSolveOptions::Transpose transpose_a);
   friend XlaOp Infeed(XlaBuilder* builder, const Shape& shape,
                       const string& config);
   friend void Outfeed(const XlaOp& operand, const Shape& shape_with_layout,
@@ -914,6 +926,9 @@
   friend XlaOp Rev(const XlaOp& operand, absl::Span<const int64> dimensions);
   friend XlaOp Sort(const XlaOp& keys, absl::Span<const XlaOp> values,
                     int64 dimension);
+  friend XlaOp Sort(absl::Span<const XlaOp> operands,
+                    const XlaComputation& comparator, int64 dimension,
+                    bool is_stable);
   friend XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max);
   friend XlaOp Map(XlaBuilder* builder, absl::Span<const XlaOp> operands,
                    const XlaComputation& computation,
@@ -1309,6 +1324,32 @@
 XlaOp Fft(const XlaOp& operand, FftType fft_type,
           absl::Span<const int64> fft_length);
 
+// Solves systems of linear equations with lower or upper triangular coefficient
+// matrices by forward- or back-substitution. Broadcasting along leading
+// dimensions, this routine solves for x in one of the matrix systems
+//   `op(a) * x = b`,  or `x * op(a) = b`,
+// for the variable `x` given `a` and `b`, where `op(a)` is either
+//   `op(a) = a`,  or `op(a) = transpose(a)`,  or `op(a) = conj(transpose(a))`.
+//
+// * `a` is a tensor of shape `[..., M, M]` whose innermost 2 dimensions form
+//   square matrices. If `lower` is true (false), then the strictly upper
+//   (lower) triangular part of each innermost matrix in `a` is assumed to be
+//   zero and is not accessed.
+// * `b` is a tensor of shape `[..., M, K]` if `left_side` is true, otherwise a
+//   tensor of shape `[..., K, M]`.
+// * `left_side` is a boolean, indicating whether to solve a system of the form
+//   op(a) * x = b (true) or x * op(a) = b (false).
+// * `lower` is a boolean, indicating whether the argument `a` is
+// lower-triangular
+//   (true) or upper-triangular (false).
+// * If `unit_diagonal` is true, the diagonal elements of `a` are assumed to be
+//   1 and not accessed.
+// * `transpose_a` indicates which function `op` we use to transform the tensor
+//   `a`: the identity function, transpose(a), or conjugate(transpose(a))
+XlaOp TriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower,
+                      bool unit_diagonal,
+                      TriangularSolveOptions::Transpose transpose_a);
+
 // Enqueues an infeed instruction onto the computation, which writes data of
 // the given shape to the infeed buffer of the device.
 XlaOp Infeed(XlaBuilder* builder, const Shape& shape,
@@ -1644,7 +1685,7 @@
 // of keys, in ascending order.
 // * If the keys have higher rank, the keys are sorted along the provided
 // dimension. For example, for a rank-2 tensor (a matrix) of keys, a dimension
-// value of 0 will indepenently sort every column, and a dimension value of 1
+// value of 0 will independently sort every column, and a dimension value of 1
 // will independently sort each row. If no dimension number is provided, then
 // the last dimension is chosen by default.
 //
@@ -1654,9 +1695,37 @@
 // * The result is a tuple that consists of a sorted tensor of keys (along the
 // provided dimension, as above) as the first element, and tensors with their
 // corresponding values as the other elements.
+ABSL_DEPRECATED("Use form with comparator computation instead")
 XlaOp Sort(const XlaOp& keys, absl::Span<const XlaOp> values = {},
            int64 dimension = -1);
 
+// Enqueues a sort instruction onto the computation, using 'comparator' for
+// comparisons. 'comparator' needs to define a strict weak order. 'is_stable'
+// determines whether the stable sorting should be used.
+// If only one operand is provided:
+// * If the operand is a rank-1 tensor (an array), the result is a sorted array.
+//   The resulting sorting order has the property that for all index positions
+//   i, j with i < j, either
+//   comparator(value[i], value[j]) = comparator(value[j], value[i]) = false or
+//   comparator(value[i], value[j]) = true.
+// * If the operand has higher rank, the operand is sorted along the provided
+//   dimension. For example, for a rank-2 tensor (a matrix), a dimension value
+//   of 0 will independently sort every column, and a dimension value of 1 will
+//   independently sort each row. If no dimension number is provided, then the
+//   last dimension is chosen by default. For the dimension which is sorted, the
+//   same sorting order applies as in the rank-1 case.
+//
+// If more than one operand is provided:
+// * All operands must be tensors with the same dimensions. The element types of
+//   the tensors may be different.
+// * The result is a tuple that consists of the operands in sorted order (along
+//   the provided dimension, as above). When comparing, 'comparator' is called
+//   with 2 * n scalar parameters, where parameter 2 * i and 2 * i + 1
+//   correspond to the value of operand i at two index positions.
+// Default comparator computations can be found in lib/comparators.h
+XlaOp Sort(absl::Span<const XlaOp> operands, const XlaComputation& comparator,
+           int64 dimension = -1, bool is_stable = false);
+
 // Enqueues a clamp instruction onto the computation.
 XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max);
 

diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc
index a9a9164..43d9ee0 100644
--- a/tensorflow/compiler/xla/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/debug_options_flags.cc

@@ -128,11 +128,6 @@
       tensorflow::Flag(
           "xla_hlo_graph_path", flag_values->mutable_xla_hlo_graph_path(),
           "With xla_generate_hlo_graph, dump the graphs into this path."),
-      tensorflow::Flag(
-          "xla_hlo_dump_as_graphdef",
-          bool_setter_for(&DebugOptions::set_xla_hlo_dump_as_graphdef),
-          flag_values->xla_hlo_dump_as_graphdef(),
-          "Dump HLO graphs as TensorFlow GraphDefs."),
       tensorflow::Flag("xla_hlo_dump_as_html",
                        bool_setter_for(&DebugOptions::set_xla_hlo_dump_as_html),
                        flag_values->xla_hlo_dump_as_html(),
@@ -145,13 +140,6 @@
           "Assign colors based on sharding assignments when generating the "
           "HLO graphs."),
       tensorflow::Flag(
-          "xla_hlo_tfgraph_device_scopes",
-          bool_setter_for(&DebugOptions::set_xla_hlo_tfgraph_device_scopes),
-          flag_values->xla_hlo_tfgraph_device_scopes(),
-          "When generating TensorFlow HLO graphs, if the HLO instructions "
-          "are assigned to a specific device, prefix the name scope with "
-          "\"devX\" with X being the device ordinal."),
-      tensorflow::Flag(
           "xla_log_hlo_text", flag_values->mutable_xla_log_hlo_text(),
           "HLO modules matching this regex will be dumped to LOG(INFO)."),
       tensorflow::Flag(

diff --git a/tensorflow/compiler/xla/executable_run_options.cc b/tensorflow/compiler/xla/executable_run_options.cc
index 0f9b591..230f3b2 100644
--- a/tensorflow/compiler/xla/executable_run_options.cc
+++ b/tensorflow/compiler/xla/executable_run_options.cc

@@ -77,7 +77,7 @@
 }
 
 ExecutableRunOptions& ExecutableRunOptions::set_device_assignment(
-    DeviceAssignment* device_assignment) {
+    const DeviceAssignment* device_assignment) {
   device_assignment_ = device_assignment;
   return *this;
 }

diff --git a/tensorflow/compiler/xla/executable_run_options.h b/tensorflow/compiler/xla/executable_run_options.h
index 6f36d11..1e74495 100644
--- a/tensorflow/compiler/xla/executable_run_options.h
+++ b/tensorflow/compiler/xla/executable_run_options.h

@@ -74,7 +74,7 @@
   ExecutableRunOptions& set_execution_profile(ExecutionProfile* profile);
 
   ExecutableRunOptions& set_device_assignment(
-      DeviceAssignment* device_assignment);
+      const DeviceAssignment* device_assignment);
   const DeviceAssignment* device_assignment() const;
 
   ExecutableRunOptions& set_rng_seed(int rng_seed);
@@ -83,7 +83,7 @@
  private:
   DeviceMemoryAllocator* allocator_ = nullptr;
   int device_ordinal_ = -1;
-  DeviceAssignment* device_assignment_ = nullptr;
+  const DeviceAssignment* device_assignment_ = nullptr;
   stream_executor::Stream* stream_ = nullptr;
   const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr;
   ExecutionProfile* execution_profile_ = nullptr;

diff --git a/tensorflow/compiler/xla/g3doc/_book.yaml b/tensorflow/compiler/xla/g3doc/_book.yaml
index 267701e..d756cd7 100644
--- a/tensorflow/compiler/xla/g3doc/_book.yaml
+++ b/tensorflow/compiler/xla/g3doc/_book.yaml

@@ -25,6 +25,8 @@
         path: /xla/operation_semantics
       - title: Shapes and layout
         path: /xla/shapes
+      - title: Tiled layout
+        path: /xla/tiled_layout
       - title: Using AOT compilation
         path: /xla/tfcompile
       - heading: Tutorials

diff --git a/tensorflow/compiler/xla/g3doc/layout_with_tiling.md b/tensorflow/compiler/xla/g3doc/layout_with_tiling.md
deleted file mode 100644
index 5e99085..0000000
--- a/tensorflow/compiler/xla/g3doc/layout_with_tiling.md
+++ /dev/null

@@ -1,159 +0,0 @@
-# Tiled layout
-
-*Note: This doc describes how tiled layout is intended to work. Tiling is being
-implemented, but this is an early effort and it is currently not even guaranteed
-to get an Unimplemented error if one tries to use tiling - it may be just
-silently ignored.*
-
-<center> ![](images/xla_array_layout_figure1.png)
-
-Figure 1 </center>
-
-Figure 1 shows how an array F32[3,5] is laid out in memory with 2x2 tiling. A
-shape with this layout is written as F32[3,5]{1,0:(2,2)}, where 1,0 relates to
-the physical order of dimensions (minor_to_major field in Layout) while (2,2)
-after the colon indicates tiling of the physical dimensions by a 2x2 tile.
-
-Intuitively tiles are laid out to cover the shape and then within each tile,
-elements are then laid out without tiling, as in the example above, where the
-right part of the example shows the layout in memory, including the white
-padding elements that are added in order to have complete 2x2 tiles even though
-the original array bounds are not even.
-
-The extra elements in the padding are not required to contain any particular
-value.
-
-## Linear index formulas for tiling given a shape and a tile
-
-Without tiling, an element e=(e<sub>n</sub>, e<sub>n-1</sub>, ... ,
-e<sub>1</sub>) in an array with array bounds d=(d<sub>n</sub>, d<sub>n-1</sub>,
-... , d<sub>1</sub>) (d1 is the most minor dimension) is laid out by major to
-minor order at position:
-
-&nbsp;&nbsp; linear_index(e, d) \
-= linear_index((e<sub>n</sub>, e<sub>n-1</sub>, ... , e<sub>1</sub>),
-(d<sub>n</sub>, d<sub>n-1</sub>, ... , d<sub>1</sub>)) \
-= e<sub>n</sub>d<sub>n-1</sub>...d<sub>1</sub> +
-e<sub>n-1</sub>d<sub>n-2</sub>...d<sub>1</sub> + ... + e<sub>1</sub>
-
-For simplicity of notation in this document we assume a tile has the same number
-of dimensions as the array. In XLA's implementation of tiling, this is
-generalized to tilings with fewer dimensions by leaving the initial most-major
-dimensions unchanged and applying the tiling only to the most minor dimensions,
-so that the tiling that is specified mentions a suffix of the physical
-dimensions of the shape being tiled.
-
-When tiling of size (t<sub>n</sub>, t<sub>n-1</sub>, ... , t<sub>1</sub>) is
-used, an element in the array with indices (e<sub>n</sub>, e<sub>n-1</sub>, ...
-, e<sub>1</sub>) is mapped to this position in the final layout:
-
-&nbsp;&nbsp; linear_index_with_tile(e, d, t) \
-= linear_index((⌊e/t⌋, e mod t), (⌈d/t⌉, t)) &nbsp; &nbsp; (arithmetic is
-elementwise, (a,b) is concatenation) \
-= linear_index((⌊e<sub>n</sub>/t<sub>n</sub>⌋, ... ,
-⌊e<sub>1</sub>/t<sub>1</sub>⌋, e<sub>n</sub> mod t<sub>n</sub>, ... ,
-e<sub>1</sub> mod t<sub>1</sub>), (⌈d<sub>n</sub>/t<sub>n</sub>⌉, ... ,
-⌈d<sub>1</sub>/t<sub>1</sub>⌉, t<sub>n</sub>, t<sub>n-1</sub>, ... ,
-t<sub>1</sub>)) \
-= linear_index((⌊e<sub>n</sub>/t<sub>n</sub>⌋, ... ,
-⌊e<sub>1</sub>/t<sub>1</sub>⌋), (⌈d<sub>n</sub>/t<sub>n</sub>⌉, ... ,
-⌈d<sub>1</sub>/t<sub>1</sub>⌉))∙t<sub>n</sub>t<sub>n-1</sub>...t<sub>1</sub> +
-linear_index((e<sub>n</sub> mod t<sub>n</sub>, ... , e<sub>1</sub> mod
-t<sub>1</sub>), (t<sub>n</sub>, t<sub>n-1</sub>, ... , t<sub>1</sub>))
-
-The layout can be thought of as having two parts:
-(⌊e<sub>n</sub>/t<sub>n</sub>⌋, ... , ⌊e<sub>1</sub>/t<sub>1</sub>⌋), which
-corresponds to a tile index in an array of tiles of size
-(⌈d<sub>n</sub>/t<sub>n</sub>⌉, ... , ⌈d<sub>1</sub>/t<sub>1</sub>⌉), and
-(e<sub>n</sub> mod t<sub>n</sub>, ... , e<sub>1</sub> mod t<sub>1</sub>), which
-corresponds to a within-tile index. The ceil function appears in
-⌈d<sub>i</sub>/t<sub>i</sub>⌉ because if tiles overrun the bounds of the larger
-array, padding is inserted as in Figure 1. Both the tiles and elements within
-tiles are laid out recursively without tiling.
-
-For the example in Figure 1, element (2,3) has tile index (1,1), and within-tile
-index (0,1), for a combined coordinate vector of (1, 1, 0, 1). The tile indices
-have bounds (2, 3) and the tile itself is (2, 2) for a combined vector of (2, 3,
-2, 2). The linear index with tile for the element with index (2, 3) in the
-logical shape is then
-
-&nbsp;&nbsp; linear_index_with_tile((2,3), (3,5), (2,2)) \
-= linear_index((1,1,0,1), (2,3,2,2)) \
-= linear_index((1,1), (2,3)) ∙ 2 ∙ 2 + linear_index((0,1), (2,2)) \
-= (1 ∙ 3 + 1) ∙ 2 ∙ 2 + (0 ∙ 2 + 1) \
-= 17.
-
-# Tiling as pad-reshape-transpose
-
-Tiling-based layout operates as follows: \
-Consider an array of dimensions (d<sub>n</sub>, d<sub>n-1</sub>, ... , d1) (d1
-is the most minor dimension). When it’s laid out with tiling of size
-(t<sub>n</sub>, t<sub>n-1</sub>, ... , t<sub>1</sub>) (t<sub>1</sub> is the most
-minor dimension), that tiling can be described in terms of pad-reshape-transpose
-in the following way.
-
-1.  The array is padded to (⌈d<sub>n</sub>/t<sub>n</sub>⌉∙t<sub>n</sub>, ... ,
-    ⌈d<sub>1</sub>/t<sub>1</sub>⌉∙t<sub>1</sub>).
-2.  Each dimension i is broken into (⌈d<sub>i</sub>/t</sub>i</sub>⌉,
-    t<sub>i</sub>), i.e. the array is reshaped to \
-    &nbsp; &nbsp; (⌈d<sub>n</sub>/t<sub>n</sub>⌉, t<sub>n</sub>, ... ,
-    ⌈d<sub>1</sub>/t<sub>1</sub>⌉, t<sub>1</sub>). \
-    There is no physical layout change in this reshape by itself, so this
-    reshape is a bitcast. If one is not explicitly thinking of a tiling, this
-    reshape could express any shape with the same number of elements as the
-    padded shape - the example here is of how to express a tile in this way.
-3.  A transpose happens by moving t<sub>n</sub>, ... , t<sub>1</sub> to the most
-    minor dimensions while keeping their relative order, so that the order of
-    dimensions from most major to most minor becomes \
-    &nbsp; &nbsp; (⌈d<sub>n</sub>/t<sub>n</sub>⌉, ... ,
-    ⌈d<sub>1</sub>/t<sub>1</sub>⌉, t<sub>n</sub>, ... , t<sub>1</sub>).
-
-The final shape has the prefix \
-&nbsp; &nbsp; (⌈d<sub>n</sub>/t<sub>n</sub>⌉, ... ,
-⌈d<sub>1</sub>/t<sub>1</sub>⌉), which describes the number of tiles in each
-dimension. An element in the array (e<sub>n</sub>, ... , e<sub>1</sub>) is
-mapped to this element in the final shape: \
-&nbsp; &nbsp; (⌊e<sub>n</sub>/t<sub>n</sub>⌋, ... ,
-⌊e<sub>0</sub>/t<sub>0</sub>⌋, e<sub>n</sub> mod t<sub>n</sub>, ... ,
-e<sub>1</sub> mod t<sub>1</sub>). It is easy to see that the linear index of the
-element follows the formula above as expected.
-
-# Repeated tiling
-
-XLA's tiling becomes even more flexible by applying it repeatedly.
-
-<center> ![](images/xla_array_layout_figure2.png)
-
-Figure 2 </center>
-
-Figure 2 shows how an array of size 4x8 is tiled by two levels of tiling (first
-2x4 then 2x1). We represent this repeated tiling as (2,4)(2,1). Each color
-indicates a 2x4 tile and each red border box is a 2x1 tile. The numbers
-indicates the linear index in memory of that element in the tiled format. This
-format matches the format used for BF16 on TPU, except that the initial tile is
-bigger, namely the tiling is (8,128)(2,1), where the purpose of the second
-tiling by 2x1 is to collect together two 16 bit values to form one 32 bit value
-in a way that aligns with the architecture of a TPU.
-
-Note that a second or later tile can refer to both the minor within-tile
-dimensions, which just rearranges data within the tile, as in this example with
-(8,128)(2,1), but can also refer to the major cross-tile dimensions from the
-prior tiling.
-
-# Combining dimensions using tiles
-
-XLA's tiling also supports combining dimensions. For example, it can combine
-dimensions in F32[2,7,8,11,10]{4,3,2,1,0} into F32[112,110]{1,0} first before
-tiling it with (2,3). The tile used is (&lowast;,&lowast;,2,&lowast;,3). Here an
-asterisk in a tile implies taking that dimension and combining it with the next
-more minor dimension. Multiple adjacent dimensions can be subsumed together into
-one dimension. A subsumed dimension is represented by a tile value of -1 in that
-dimension of the tile, which is not otherwise valid in a tile as a dimension
-size.
-
-More precisely, if dimension i of the shape is eliminated via an asterisk in the
-tile, then before the prior definition of tiling is applied, that dimension is
-removed from both the shape being tiled and the tile vector, and what was
-dimension i-1 of the shape has its array bound increased from d<sub>i-1</sub> to
-d<sub>i</sub>d<sub>i-1</sub>. This step is repeated for each asterisk in the
-tile vector.

diff --git a/tensorflow/compiler/xla/g3doc/tiled_layout.md b/tensorflow/compiler/xla/g3doc/tiled_layout.md
new file mode 100644
index 0000000..21e88ce
--- /dev/null
+++ b/tensorflow/compiler/xla/g3doc/tiled_layout.md

@@ -0,0 +1,157 @@
+# Tiled layout
+
+Caution: Tiled layout is *pre-release* and this describes how it's intended to
+work. Errors may be silently ignored.
+
+<center> ![](images/xla_array_layout_figure1.png)
+
+Figure 1 </center>
+
+Figure 1 shows how an array F32[3,5] is laid out in memory with 2x2 tiling. A
+shape with this layout is written as F32[3,5]{1,0:(2,2)}, where 1,0 relates to
+the physical order of dimensions (minor_to_major field in Layout) while (2,2)
+after the colon indicates tiling of the physical dimensions by a 2x2 tile.
+
+Intuitively tiles are laid out to cover the shape and then within each tile,
+elements are then laid out without tiling, as in the example above, where the
+right part of the example shows the layout in memory, including the white
+padding elements that are added in order to have complete 2x2 tiles even though
+the original array bounds are not even.
+
+The extra elements in the padding are not required to contain any particular
+value.
+
+## Linear index formulas for tiling given a shape and a tile
+
+Without tiling, an element e=(e<sub>n</sub>, e<sub>n-1</sub>, ... ,
+e<sub>1</sub>) in an array with array bounds d=(d<sub>n</sub>, d<sub>n-1</sub>,
+... , d<sub>1</sub>) (d1 is the most minor dimension) is laid out by major to
+minor order at position:
+
+&nbsp;&nbsp; linear_index(e, d) \
+= linear_index((e<sub>n</sub>, e<sub>n-1</sub>, ... , e<sub>1</sub>),
+(d<sub>n</sub>, d<sub>n-1</sub>, ... , d<sub>1</sub>)) \
+= e<sub>n</sub>d<sub>n-1</sub>...d<sub>1</sub> +
+e<sub>n-1</sub>d<sub>n-2</sub>...d<sub>1</sub> + ... + e<sub>1</sub>
+
+For simplicity of notation in this document we assume a tile has the same number
+of dimensions as the array. In XLA's implementation of tiling, this is
+generalized to tilings with fewer dimensions by leaving the initial most-major
+dimensions unchanged and applying the tiling only to the most minor dimensions,
+so that the tiling that is specified mentions a suffix of the physical
+dimensions of the shape being tiled.
+
+When tiling of size (t<sub>n</sub>, t<sub>n-1</sub>, ... , t<sub>1</sub>) is
+used, an element in the array with indices (e<sub>n</sub>, e<sub>n-1</sub>, ...
+, e<sub>1</sub>) is mapped to this position in the final layout:
+
+&nbsp;&nbsp; linear_index_with_tile(e, d, t) \
+= linear_index((⌊e/t⌋, e mod t), (⌈d/t⌉, t)) &nbsp; &nbsp; (arithmetic is
+elementwise, (a,b) is concatenation) \
+= linear_index((⌊e<sub>n</sub>/t<sub>n</sub>⌋, ... ,
+⌊e<sub>1</sub>/t<sub>1</sub>⌋, e<sub>n</sub> mod t<sub>n</sub>, ... ,
+e<sub>1</sub> mod t<sub>1</sub>), (⌈d<sub>n</sub>/t<sub>n</sub>⌉, ... ,
+⌈d<sub>1</sub>/t<sub>1</sub>⌉, t<sub>n</sub>, t<sub>n-1</sub>, ... ,
+t<sub>1</sub>)) \
+= linear_index((⌊e<sub>n</sub>/t<sub>n</sub>⌋, ... ,
+⌊e<sub>1</sub>/t<sub>1</sub>⌋), (⌈d<sub>n</sub>/t<sub>n</sub>⌉, ... ,
+⌈d<sub>1</sub>/t<sub>1</sub>⌉))∙t<sub>n</sub>t<sub>n-1</sub>...t<sub>1</sub> +
+linear_index((e<sub>n</sub> mod t<sub>n</sub>, ... , e<sub>1</sub> mod
+t<sub>1</sub>), (t<sub>n</sub>, t<sub>n-1</sub>, ... , t<sub>1</sub>))
+
+The layout can be thought of as having two parts:
+(⌊e<sub>n</sub>/t<sub>n</sub>⌋, ... , ⌊e<sub>1</sub>/t<sub>1</sub>⌋), which
+corresponds to a tile index in an array of tiles of size
+(⌈d<sub>n</sub>/t<sub>n</sub>⌉, ... , ⌈d<sub>1</sub>/t<sub>1</sub>⌉), and
+(e<sub>n</sub> mod t<sub>n</sub>, ... , e<sub>1</sub> mod t<sub>1</sub>), which
+corresponds to a within-tile index. The ceil function appears in
+⌈d<sub>i</sub>/t<sub>i</sub>⌉ because if tiles overrun the bounds of the larger
+array, padding is inserted as in Figure 1. Both the tiles and elements within
+tiles are laid out recursively without tiling.
+
+For the example in Figure 1, element (2,3) has tile index (1,1), and within-tile
+index (0,1), for a combined coordinate vector of (1, 1, 0, 1). The tile indices
+have bounds (2, 3) and the tile itself is (2, 2) for a combined vector of (2, 3,
+2, 2). The linear index with tile for the element with index (2, 3) in the
+logical shape is then
+
+&nbsp;&nbsp; linear_index_with_tile((2,3), (3,5), (2,2)) \
+= linear_index((1,1,0,1), (2,3,2,2)) \
+= linear_index((1,1), (2,3)) ∙ 2 ∙ 2 + linear_index((0,1), (2,2)) \
+= (1 ∙ 3 + 1) ∙ 2 ∙ 2 + (0 ∙ 2 + 1) \
+= 17.
+
+# Tiling as pad-reshape-transpose
+
+Tiling-based layout operates as follows: \
+Consider an array of dimensions (d<sub>n</sub>, d<sub>n-1</sub>, ... , d1) (d1
+is the most minor dimension). When it’s laid out with tiling of size
+(t<sub>n</sub>, t<sub>n-1</sub>, ... , t<sub>1</sub>) (t<sub>1</sub> is the most
+minor dimension), that tiling can be described in terms of pad-reshape-transpose
+in the following way.
+
+1.  The array is padded to (⌈d<sub>n</sub>/t<sub>n</sub>⌉∙t<sub>n</sub>, ... ,
+    ⌈d<sub>1</sub>/t<sub>1</sub>⌉∙t<sub>1</sub>).
+2.  Each dimension i is broken into (⌈d<sub>i</sub>/t</sub>i</sub>⌉,
+    t<sub>i</sub>), i.e. the array is reshaped to \
+    &nbsp; &nbsp; (⌈d<sub>n</sub>/t<sub>n</sub>⌉, t<sub>n</sub>, ... ,
+    ⌈d<sub>1</sub>/t<sub>1</sub>⌉, t<sub>1</sub>). \
+    There is no physical layout change in this reshape by itself, so this
+    reshape is a bitcast. If one is not explicitly thinking of a tiling, this
+    reshape could express any shape with the same number of elements as the
+    padded shape - the example here is of how to express a tile in this way.
+3.  A transpose happens by moving t<sub>n</sub>, ... , t<sub>1</sub> to the most
+    minor dimensions while keeping their relative order, so that the order of
+    dimensions from most major to most minor becomes \
+    &nbsp; &nbsp; (⌈d<sub>n</sub>/t<sub>n</sub>⌉, ... ,
+    ⌈d<sub>1</sub>/t<sub>1</sub>⌉, t<sub>n</sub>, ... , t<sub>1</sub>).
+
+The final shape has the prefix \
+&nbsp; &nbsp; (⌈d<sub>n</sub>/t<sub>n</sub>⌉, ... ,
+⌈d<sub>1</sub>/t<sub>1</sub>⌉), which describes the number of tiles in each
+dimension. An element in the array (e<sub>n</sub>, ... , e<sub>1</sub>) is
+mapped to this element in the final shape: \
+&nbsp; &nbsp; (⌊e<sub>n</sub>/t<sub>n</sub>⌋, ... ,
+⌊e<sub>0</sub>/t<sub>0</sub>⌋, e<sub>n</sub> mod t<sub>n</sub>, ... ,
+e<sub>1</sub> mod t<sub>1</sub>). It is easy to see that the linear index of the
+element follows the formula above as expected.
+
+# Repeated tiling
+
+XLA's tiling becomes even more flexible by applying it repeatedly.
+
+<center> ![](images/xla_array_layout_figure2.png)
+
+Figure 2 </center>
+
+Figure 2 shows how an array of size 4x8 is tiled by two levels of tiling (first
+2x4 then 2x1). We represent this repeated tiling as (2,4)(2,1). Each color
+indicates a 2x4 tile and each red border box is a 2x1 tile. The numbers
+indicates the linear index in memory of that element in the tiled format. This
+format matches the format used for BF16 on TPU, except that the initial tile is
+bigger, namely the tiling is (8,128)(2,1), where the purpose of the second
+tiling by 2x1 is to collect together two 16 bit values to form one 32 bit value
+in a way that aligns with the architecture of a TPU.
+
+Note that a second or later tile can refer to both the minor within-tile
+dimensions, which just rearranges data within the tile, as in this example with
+(8,128)(2,1), but can also refer to the major cross-tile dimensions from the
+prior tiling.
+
+# Combining dimensions using tiles
+
+XLA's tiling also supports combining dimensions. For example, it can combine
+dimensions in F32[2,7,8,11,10]{4,3,2,1,0} into F32[112,110]{1,0} first before
+tiling it with (2,3). The tile used is (&lowast;,&lowast;,2,&lowast;,3). Here an
+asterisk in a tile implies taking that dimension and combining it with the next
+more minor dimension. Multiple adjacent dimensions can be subsumed together into
+one dimension. A subsumed dimension is represented by a tile value of -1 in that
+dimension of the tile, which is not otherwise valid in a tile as a dimension
+size.
+
+More precisely, if dimension i of the shape is eliminated via an asterisk in the
+tile, then before the prior definition of tiling is applied, that dimension is
+removed from both the shape being tiled and the tile vector, and what was
+dimension i-1 of the shape has its array bound increased from d<sub>i-1</sub> to
+d<sub>i</sub>d<sub>i-1</sub>. This step is repeated for each asterisk in the
+tile vector.

diff --git a/tensorflow/compiler/xla/layout.cc b/tensorflow/compiler/xla/layout.cc
index e3b5fcd..000c4fd 100644
--- a/tensorflow/compiler/xla/layout.cc
+++ b/tensorflow/compiler/xla/layout.cc

@@ -30,7 +30,19 @@
 }
 
 string Tile::ToString() const {
-  return absl::StrCat("(", absl::StrJoin(dimensions(), ","), ")");
+  std::vector<string> elements;
+  for (auto dim : dimensions()) {
+    if (dim >= 0) {
+      elements.push_back(std::to_string(dim));
+    } else {
+      if (dim == kCombineDimension) {
+        elements.push_back("*");
+      } else {
+        elements.push_back(absl::StrCat("Invalid value ", dim));
+      }
+    }
+  }
+  return absl::StrCat("(", absl::StrJoin(elements, ","), ")");
 }
 
 /* static */ Layout Layout::CreateFromProto(const LayoutProto& proto) {
@@ -64,23 +76,43 @@
 }
 
 string Layout::ToString() const {
-  // TODO(b/119839262): Emit tiles in string.
   if (format() == SPARSE) {
+    CHECK_EQ(tiles_size(), 0) << "Sparse layout should not be tiled.";
     return absl::StrCat("sparse{", max_sparse_elements(), "}");
   } else if (format() == DENSE) {
-    return absl::StrCat("{", absl::StrJoin(minor_to_major(), ","), "}");
+    string colon_string = tiles().empty() ? "" : "T";
+    for (Tile tile : tiles()) {
+      absl::StrAppend(&colon_string, tile.ToString());
+    }
+    if (element_size_in_bits() != 0) {
+      absl::StrAppend(&colon_string, "E(", element_size_in_bits(), ")");
+    }
+    return absl::StrCat("{", absl::StrJoin(minor_to_major(), ","),
+                        colon_string.empty() ? "" : ":", colon_string, "}");
   } else {
     CHECK_EQ(format(), INVALID_FORMAT);
     return "invalid{}";
   }
 }
 
+bool Layout::Equal::operator()(const Layout& lhs, const Layout& rhs) {
+  if (lhs.format() != rhs.format() ||
+      lhs.minor_to_major() != rhs.minor_to_major() ||
+      lhs.max_sparse_elements() != rhs.max_sparse_elements()) {
+    return false;
+  }
+  if (!ignore_tiles_ && lhs.tiles() != rhs.tiles()) {
+    return false;
+  }
+  if (!ignore_element_size_ &&
+      lhs.element_size_in_bits() != rhs.element_size_in_bits()) {
+    return false;
+  }
+  return true;
+}
+
 bool Layout::operator==(const Layout& other) const {
-  return (other.format() == format() &&
-          other.minor_to_major() == minor_to_major() &&
-          other.element_size_in_bits() == element_size_in_bits() &&
-          other.max_sparse_elements() == max_sparse_elements() &&
-          other.tiles() == tiles());
+  return Equal()(*this, other);
 }
 
 std::ostream& operator<<(std::ostream& out, const Tile& tile) {

diff --git a/tensorflow/compiler/xla/layout.h b/tensorflow/compiler/xla/layout.h
index 313368c..acc449b 100644
--- a/tensorflow/compiler/xla/layout.h
+++ b/tensorflow/compiler/xla/layout.h

@@ -55,6 +55,20 @@
   // Returns the dimensions of the tile.
   const std::vector<int64>& dimensions() const { return dimensions_; }
 
+  Tile& add_dimensions(int64 value) {
+    dimensions_.push_back(value);
+    return *this;
+  }
+
+  Tile& clear_dimensions() {
+    dimensions_.clear();
+    return *this;
+  }
+
+  // This dimension size means the corresponding dimension in the shape is
+  // combined with the next minor dimension before tiling is applied.
+  static constexpr int64 kCombineDimension = std::numeric_limits<int64>::min();
+
  private:
   // The bounds of the tile.
   std::vector<int64> dimensions_;
@@ -71,10 +85,12 @@
 
   // Constructs a dense tiled layout with the given minor-to-major order and
   // tiles.
-  Layout(absl::Span<const int64> minor_to_major, absl::Span<const Tile> tiles)
+  Layout(absl::Span<const int64> minor_to_major, absl::Span<const Tile> tiles,
+         int64 element_size_in_bits = 0)
       : format_(DENSE),
         minor_to_major_(minor_to_major.begin(), minor_to_major.end()),
-        tiles_(tiles.begin(), tiles.end()) {}
+        tiles_(tiles.begin(), tiles.end()),
+        element_size_in_bits_(element_size_in_bits) {}
 
   // Construct a shape from a LayoutProto.
   static Layout CreateFromProto(const LayoutProto& proto);
@@ -85,6 +101,37 @@
   // Returns a human-readable string that represents this layout.
   string ToString() const;
 
+  // Equal is a configurable functor to check the equality of two layouts.
+  //
+  // Examples:
+  //
+  // - Comparing two layouts ignoring their difference in tiles:
+  //   Equal().IgnoreTiles()(layout1, layout2);
+  //
+  // - Comparing two layouts ignoring their difference in tiles and element
+  //   size:
+  //   Equal().IgnoreTiles().IgnoreElementSize()(layout1, layout2);
+  class Equal {
+   public:
+    Equal() = default;
+
+    bool operator()(const Layout& lhs, const Layout& rhs);
+
+    Equal& IgnoreTiles() {
+      ignore_tiles_ = true;
+      return *this;
+    }
+
+    Equal& IgnoreElementSize() {
+      ignore_element_size_ = true;
+      return *this;
+    }
+
+   private:
+    bool ignore_tiles_ = false;
+    bool ignore_element_size_ = false;
+  };
+
   bool operator==(const Layout& other) const;
   bool operator!=(const Layout& other) const { return !(*this == other); }
 
@@ -159,7 +206,7 @@
     element_size_in_bits_ = 0;
   }
 
- public:
+ private:
   // The format of this layout.
   Format format_ = INVALID_FORMAT;
 
@@ -172,11 +219,11 @@
   // memory.  This field must be zero unless the format is SPARSE.
   int64 max_sparse_elements_ = 0;
 
-  // The number of bits used to store an individual array element.
-  int64 element_size_in_bits_ = 0;
-
   // The tiles used in tiling-based layout.
   std::vector<Tile> tiles_;
+
+  // The number of bits used to store an individual array element.
+  int64 element_size_in_bits_ = 0;
 };
 
 std::ostream& operator<<(std::ostream& out, const Tile& Tile);

diff --git a/tensorflow/compiler/xla/layout_test.cc b/tensorflow/compiler/xla/layout_test.cc
index fb6abd3..f5d71c5 100644
--- a/tensorflow/compiler/xla/layout_test.cc
+++ b/tensorflow/compiler/xla/layout_test.cc

@@ -38,10 +38,13 @@
             "sparse{123}");
   EXPECT_EQ(Layout({4, 5, 6}).ToString(), "{4,5,6}");
   EXPECT_EQ(Layout({3, 2, 1, 0}, {Tile({42, 123}), Tile({4, 5})}).ToString(),
-            "{3,2,1,0}");
+            "{3,2,1,0:T(42,123)(4,5)}");
   EXPECT_EQ(
       Layout({1, 0}, {Tile({2, 55})}).set_element_size_in_bits(42).ToString(),
-      "{1,0}");
+      "{1,0:T(2,55)E(42)}");
+  EXPECT_EQ(
+      Layout({1, 0}, {Tile({-2, 55})}).set_element_size_in_bits(42).ToString(),
+      "{1,0:T(Invalid value -2,55)E(42)}");
 }
 
 TEST_F(LayoutTest, StreamOut) {
@@ -84,6 +87,15 @@
             Layout().set_format(SPARSE).set_max_sparse_elements(42));
   EXPECT_NE(Layout().set_format(SPARSE).set_max_sparse_elements(42),
             Layout().set_format(SPARSE).set_max_sparse_elements(24));
+
+  EXPECT_FALSE(
+      Layout::Equal()(Layout({0, 1, 2}, {Tile({42, 44})}), Layout({0, 1, 2})));
+  EXPECT_TRUE(Layout::Equal().IgnoreTiles()(Layout({0, 1, 2}, {Tile({42, 44})}),
+                                            Layout({0, 1, 2})));
+  EXPECT_FALSE(
+      Layout::Equal()(Layout({0, 1, 2}, {}, 32), Layout({0, 1, 2}, {}, 1)));
+  EXPECT_TRUE(Layout::Equal().IgnoreElementSize()(Layout({0, 1, 2}, {}, 32),
+                                                  Layout({0, 1, 2}, {}, 1)));
 }
 
 TEST_F(LayoutTest, LayoutToFromProto) {

diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index 2fe9b56..6231411 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc

@@ -54,12 +54,24 @@
 }  // namespace
 
 /* static */ Layout LayoutUtil::MakeLayout(
-    absl::Span<const int64> minor_to_major) {
+    absl::Span<const int64> minor_to_major, absl::Span<const Tile> tiles,
+    int64 element_size_in_bits) {
   Layout layout;
   layout.set_format(DENSE);
   for (int64 dimension_number : minor_to_major) {
     layout.add_minor_to_major(dimension_number);
   }
+  for (Tile tile : tiles) {
+    for (int64 dim : tile.dimensions()) {
+      if (dim < 0 && dim != Tile::kCombineDimension) {
+        LOG(FATAL) << "Tile dimension size needs to be mininum int64 value if "
+                      "it's negative. Value is "
+                   << dim;
+      }
+    }
+    *layout.add_tiles() = tile;
+  }
+  layout.set_element_size_in_bits(element_size_in_bits);
   return layout;
 }
 
@@ -235,6 +247,10 @@
       }
       dimensions_in_layout[dim] = true;
     }
+  } else {
+    if (layout.tiles_size() != 0) {
+      return InvalidArgument("Only dense layouts can be tiled.");
+    }
   }
 
   return Status::OK();

diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h
index 609dba6..9997aef 100644
--- a/tensorflow/compiler/xla/layout_util.h
+++ b/tensorflow/compiler/xla/layout_util.h

@@ -36,7 +36,9 @@
  public:
   // Creates a layout with the given minor-to-major dimension order. (This is a
   // convenience function for protobuf construction.)
-  static Layout MakeLayout(absl::Span<const int64> minor_to_major);
+  static Layout MakeLayout(absl::Span<const int64> minor_to_major,
+                           absl::Span<const Tile> tiles = {},
+                           int64 element_size_in_bits = 0);
 
   // Similar to MakeLayout, but take indices in reverse order.
   static Layout MakeLayoutFromMajorToMinor(

diff --git a/tensorflow/compiler/xla/layout_util_test.cc b/tensorflow/compiler/xla/layout_util_test.cc
index 4cc94c27..12da214 100644
--- a/tensorflow/compiler/xla/layout_util_test.cc
+++ b/tensorflow/compiler/xla/layout_util_test.cc

@@ -317,6 +317,81 @@
                             ShapeUtil::MakeShape(F32, {10, 20, 30, 15, 25}))));
 }
 
+TEST_F(LayoutUtilTest, HumanStringWithTiling) {
+  Shape shape = ShapeUtil::MakeShapeWithLayout(F32, {2, 3, 4}, {0, 1, 2});
+  Tile* tile;
+
+  // No tiling.
+  EXPECT_EQ(ShapeUtil::HumanStringWithLayout(shape), "f32[2,3,4]{0,1,2}");
+
+  // 2D tile.
+  tile = shape.mutable_layout()->add_tiles();
+  tile->add_dimensions(512);
+  tile->add_dimensions(1024);
+  EXPECT_EQ(ShapeUtil::HumanStringWithLayout(shape),
+            "f32[2,3,4]{0,1,2:T(512,1024)}");
+
+  // 1D tile.
+  shape.mutable_layout()->clear_tiles();
+  tile = shape.mutable_layout()->add_tiles();
+  tile->add_dimensions(512);
+  EXPECT_EQ(ShapeUtil::HumanStringWithLayout(shape),
+            "f32[2,3,4]{0,1,2:T(512)}");
+
+  // 2 tiles.
+  shape = ShapeUtil::MakeShapeWithLayout(BF16, {2, 3, 4}, {1, 2, 0});
+  tile = shape.mutable_layout()->add_tiles();
+  tile->add_dimensions(16);
+  tile->add_dimensions(256);
+  tile = shape.mutable_layout()->add_tiles();
+  tile->add_dimensions(2);
+  tile->add_dimensions(1);
+  EXPECT_EQ(ShapeUtil::HumanStringWithLayout(shape),
+            "bf16[2,3,4]{1,2,0:T(16,256)(2,1)}");
+
+  // PRED with element size of 8 bits.
+  shape = ShapeUtil::MakeShapeWithLayout(PRED, {8, 8, 8}, {0, 2, 1});
+  tile = shape.mutable_layout()->add_tiles();
+  tile->add_dimensions(8);
+  tile->add_dimensions(128);
+  EXPECT_EQ(ShapeUtil::HumanStringWithLayout(shape),
+            "pred[8,8,8]{0,2,1:T(8,128)}");
+
+  // PRED with element size of 32 bits.
+  shape.mutable_layout()->clear_tiles();
+  tile = shape.mutable_layout()->add_tiles();
+  tile->add_dimensions(8);
+  tile->add_dimensions(128);
+  shape.mutable_layout()->set_element_size_in_bits(32);
+  EXPECT_EQ(ShapeUtil::HumanStringWithLayout(shape),
+            "pred[8,8,8]{0,2,1:T(8,128)E(32)}");
+
+  // No tile. PRED with element size of 32 bits.
+  shape.mutable_layout()->clear_tiles();
+  shape.mutable_layout()->set_element_size_in_bits(32);
+  EXPECT_EQ(ShapeUtil::HumanStringWithLayout(shape),
+            "pred[8,8,8]{0,2,1:E(32)}");
+
+  // Tile with negative dimension size for combining dimensions.
+  shape = ShapeUtil::MakeShapeWithLayout(BF16, {2, 3, 1004}, {2, 1, 0});
+  tile = shape.mutable_layout()->add_tiles();
+  tile->add_dimensions(2);
+  tile->add_dimensions(Tile::kCombineDimension);
+  tile->add_dimensions(128);
+  EXPECT_EQ(ShapeUtil::HumanStringWithLayout(shape),
+            "bf16[2,3,1004]{2,1,0:T(2,*,128)}");
+
+  // Tile with two negative dimensions.
+  shape = ShapeUtil::MakeShapeWithLayout(BF16, {8, 2, 3, 1004}, {3, 2, 1, 0});
+  tile = shape.mutable_layout()->add_tiles();
+  tile->add_dimensions(2);
+  tile->add_dimensions(Tile::kCombineDimension);
+  tile->add_dimensions(Tile::kCombineDimension);
+  tile->add_dimensions(128);
+  EXPECT_EQ(ShapeUtil::HumanStringWithLayout(shape),
+            "bf16[8,2,3,1004]{3,2,1,0:T(2,*,*,128)}");
+}
+
 TEST_F(LayoutUtilTest, ValidateLayout_ValidArrayLayout) {
   Shape shape = ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {0, 1});
   auto status =

diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index bb9bca0..5cd738d 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc

@@ -44,7 +44,6 @@
 namespace {
 
 using absl::StrCat;
-using absl::StrFormat;
 
 constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__;
 

diff --git a/tensorflow/compiler/xla/parse_flags_from_env.cc b/tensorflow/compiler/xla/parse_flags_from_env.cc
index 91e71f5..e1e22f7 100644
--- a/tensorflow/compiler/xla/parse_flags_from_env.cc
+++ b/tensorflow/compiler/xla/parse_flags_from_env.cc

@@ -186,6 +186,14 @@
   tensorflow::mutex_lock lock(env_argv_mu);
   auto* env_argv = &EnvArgvs()[string(envvar)];
   SetArgvFromEnv(envvar, env_argv);  // a no-op if already initialized
+
+  if (VLOG_IS_ON(1)) {
+    VLOG(1) << "For env var " << envvar << " found arguments:";
+    for (int i = 0; i < env_argv->argc; i++) {
+      VLOG(1) << "  argv[" << i << "] = " << env_argv->argv[i];
+    }
+  }
+
   bool result =
       tensorflow::Flags::Parse(&env_argv->argc, &env_argv->argv[0], flag_list);
 

diff --git a/tensorflow/compiler/xla/protobuf_util.h b/tensorflow/compiler/xla/protobuf_util.h
index f22fc8b..4a88a48 100644
--- a/tensorflow/compiler/xla/protobuf_util.h
+++ b/tensorflow/compiler/xla/protobuf_util.h

@@ -16,6 +16,8 @@
 #ifndef TENSORFLOW_COMPILER_XLA_PROTOBUF_UTIL_H_
 #define TENSORFLOW_COMPILER_XLA_PROTOBUF_UTIL_H_
 
+#include "google/protobuf/duration.pb.h"
+#include "absl/time/time.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -43,6 +45,20 @@
 // dirpath along as-is.
 void RegisterDirectoryExpander(const std::function<string(string)>& expander);
 
+// Converts an absl::Duration to a google::protobuf::Duration.
+inline google::protobuf::Duration ToDurationProto(absl::Duration duration) {
+  google::protobuf::Duration proto;
+  proto.set_seconds(absl::IDivDuration(duration, absl::Seconds(1), &duration));
+  proto.set_nanos(
+      absl::IDivDuration(duration, absl::Nanoseconds(1), &duration));
+  return proto;
+}
+
+// Converts a google::protobuf::Duration to an absl::Duration.
+inline absl::Duration FromDurationProto(google::protobuf::Duration proto) {
+  return absl::Seconds(proto.seconds()) + absl::Nanoseconds(proto.nanos());
+}
+
 }  // namespace protobuf_util
 }  // namespace xla
 

diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD
index 4afb21d..55eacc1 100644
--- a/tensorflow/compiler/xla/python/BUILD
+++ b/tensorflow/compiler/xla/python/BUILD

@@ -59,10 +59,6 @@
     srcs = ["local_computation_builder.cc"],
     hdrs = ["local_computation_builder.h"],
     deps = [
-        "//tensorflow/cc:cc_ops",
-        "//tensorflow/cc:client_session",
-        "//tensorflow/cc:ops",
-        "//tensorflow/cc:scope",
         "//tensorflow/compiler/xla:executable_run_options",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:literal_util",
@@ -77,15 +73,38 @@
         "//tensorflow/compiler/xla/client/lib:cholesky",
         "//tensorflow/compiler/xla/client/lib:math",
         "//tensorflow/compiler/xla/client/lib:qr",
-        "//tensorflow/compiler/xla/client/lib:triangular_solve",
+        "//tensorflow/compiler/xla/service:computation_placer",
+        "//tensorflow/compiler/xla/service:hlo_graph_dumper",
         "//tensorflow/compiler/xla/service:platform_util",
         "//tensorflow/compiler/xla/service:shaped_buffer",
         "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry",
+        "//tensorflow/core:lib",
+        "//third_party/python_runtime:headers",  # buildcleaner: keep
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+cc_library(
+    name = "xrt",
+    srcs = ["xrt.cc"],
+    hdrs = ["xrt.h"],
+    deps = [
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:client_session",
+        "//tensorflow/cc:ops",
+        "//tensorflow/cc:scope",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service:hlo_proto",
+        "//tensorflow/compiler/xla/service:platform_util",
         "//tensorflow/compiler/xrt:xrt_proto",
         "//tensorflow/compiler/xrt/cc:xrt_ops",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//third_party/python_runtime:headers",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/types:span",
     ],
@@ -93,9 +112,12 @@
 
 tf_py_wrap_cc(
     name = "pywrap_xla",
-    srcs = ["xla.i"],
+    srcs = [
+        "xla.i",
+    ],
     swig_includes = [
         "local_computation_builder.i",
+        "xla_data.i",
         "//tensorflow/python:platform/base.i",
     ],
     version_script = select({
@@ -112,3 +134,27 @@
         "//tensorflow/compiler/xla/service:cpu_plugin",
     ] + xla_python_default_plugins(),
 )
+
+tf_py_wrap_cc(
+    name = "pywrap_xrt",
+    srcs = [
+        "xrt.i",
+    ],
+    swig_includes = [
+        "xla_data.i",
+        "//tensorflow/python:platform/base.i",
+    ],
+    version_script = select({
+        "//tensorflow:darwin": "pywrap_xla_exported_symbols.lds",
+        "//tensorflow:windows": None,
+        "//conditions:default": "pywrap_xla_version_script.lds",
+    }),
+    visibility = ["//visibility:public"],
+    deps = [
+        ":numpy_bridge",
+        ":xrt",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+    ],
+)

diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index 0891380..c14a01a 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc

@@ -20,29 +20,22 @@
 #include <vector>
 
 #include "absl/memory/memory.h"
-#include "tensorflow/cc/client/client_session.h"
-#include "tensorflow/cc/framework/ops.h"
-#include "tensorflow/cc/framework/scope.h"
-#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/compiler/xla/client/client_library.h"
 #include "tensorflow/compiler/xla/client/lib/cholesky.h"
 #include "tensorflow/compiler/xla/client/lib/math.h"
 #include "tensorflow/compiler/xla/client/lib/qr.h"
-#include "tensorflow/compiler/xla/client/lib/triangular_solve.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
 #include "tensorflow/compiler/xla/executable_run_options.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/computation_placer.h"
 #include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h"
+#include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
 #include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/compiler/xrt/cc/ops/xrt_compile_ops.h"
-#include "tensorflow/compiler/xrt/cc/ops/xrt_execute_op.h"
-#include "tensorflow/compiler/xrt/cc/ops/xrt_state_ops.h"
-#include "tensorflow/compiler/xrt/xrt.pb.h"
-#include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/thread_annotations.h"
@@ -51,77 +44,6 @@
 namespace xla {
 namespace swig {
 
-// TODO(b/118641336): Factor out XRT parts into a small c++ library of their
-// own.
-
-// TODO(b/34473877) Ideally XLA would support AllReduce among arbitrary sets of
-// device handles instead of needing to set the number of replicas at XLA
-// service initialization time.
-tensorflow::mutex g_local_client_mutex(tensorflow::LINKER_INITIALIZED);
-int g_replica_count GUARDED_BY(g_local_client_mutex) = 1;
-LocalClient* g_local_client GUARDED_BY(g_local_client_mutex) = nullptr;
-
-string* GetPlatformNameString() {
-  static string* platform_name_string PT_GUARDED_BY(g_local_client_mutex) =
-      new string("Host");
-  return platform_name_string;
-}
-
-Status InitializeReplicaCount(int replica_count) {
-  if (replica_count < 1) {
-    return InvalidArgument("Replica count must be >= 1; got %d.",
-                           replica_count);
-  }
-  tensorflow::mutex_lock lock(g_local_client_mutex);
-  if (g_local_client != nullptr) {
-    return FailedPrecondition(
-        "Attempted to set the replica count to %d, but a local XLA service was "
-        "previously created with a replica count of %d.",
-        replica_count, g_replica_count);
-  }
-  g_replica_count = replica_count;
-  return Status::OK();
-}
-
-Status InitializePlatformName(const string& platform_name) {
-  string* g_platform_name = GetPlatformNameString();
-  tensorflow::mutex_lock lock(g_local_client_mutex);
-  if (g_local_client != nullptr) {
-    return FailedPrecondition(
-        "Attempted to set the platform name to %s, but a local XLA service was "
-        "previously created with a platform name of %s.",
-        platform_name, *g_platform_name);
-  }
-  TF_ASSIGN_OR_RETURN(se::Platform * platform,
-                      PlatformUtil::GetPlatform(platform_name));
-  if (platform->VisibleDeviceCount() <= 0) {
-    return InvalidArgument("Platform %s has no visible devices.",
-                           platform_name);
-  }
-  *g_platform_name = platform_name;
-  return Status::OK();
-}
-
-int GetReplicaCount() {
-  tensorflow::mutex_lock lock(g_local_client_mutex);
-  return g_replica_count;
-}
-
-StatusOr<LocalClient*> GetOrCreateLocalClient() {
-  string* platform_name = GetPlatformNameString();
-  tensorflow::mutex_lock lock(g_local_client_mutex);
-  if (g_local_client != nullptr) {
-    return g_local_client;
-  }
-  LocalClientOptions options;
-  options.set_platform(PlatformUtil::GetPlatform(*platform_name).ValueOrDie());
-  options.set_number_of_replicas(g_replica_count);
-  TF_ASSIGN_OR_RETURN(g_local_client,
-                      ClientLibrary::GetOrCreateLocalClient(options));
-  CHECK(g_local_client != nullptr);
-  return g_local_client;
-}
-
 Status RegisterCpuCustomCallTarget(const string& fn_name, PyObject* capsule) {
   const char* name = "xla._CPU_CUSTOM_CALL_TARGET";
   if (!PyCapsule_IsValid(capsule, name)) {
@@ -136,62 +58,66 @@
   return Status::OK();
 }
 
-Status TransferToInfeedLocal(const Literal& literal) {
-  VLOG(1) << "Infeeding literal without replica number; shape: "
-          << literal.shape();
-  TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient());
-  return client->TransferToInfeedLocal(literal, /*device_ordinal=*/0);
+LocalClient::LocalClient(xla::LocalClient* client) : client_(client) {}
+
+/* static */ StatusOr<LocalClient> LocalClient::Get(
+    const string& platform_name) {
+  TF_ASSIGN_OR_RETURN(se::Platform * platform,
+                      PlatformUtil::GetPlatform(platform_name));
+  if (platform->VisibleDeviceCount() <= 0) {
+    return InvalidArgument("Platform %s has no visible devices.",
+                           platform_name);
+  }
+  LocalClientOptions options;
+  options.set_platform(platform);
+  TF_ASSIGN_OR_RETURN(xla::LocalClient * client,
+                      ClientLibrary::GetOrCreateLocalClient(options));
+  CHECK(client != nullptr);
+  return LocalClient(client);
 }
 
-Status TransferToInfeedLocalReplica(const Literal& literal,
-                                    int replica_number) {
-  VLOG(1) << "Infeeding shape " << literal.shape()
-          << " to replica number: " << replica_number;
-  TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient());
-  TF_ASSIGN_OR_RETURN(int device_ordinal,
-                      client->ReplicaNumberToDeviceOrdinal(replica_number));
-  return client->TransferToInfeedLocal(literal, device_ordinal);
+// Returns the number of devices known to the XLA client.
+int LocalClient::DeviceCount() const { return client_->device_count(); }
+
+Status LocalClient::TransferToInfeed(const Literal& literal,
+                                     int device_ordinal) {
+  VLOG(1) << "Infeeding literal to device " << device_ordinal
+          << "; shape: " << literal.shape();
+  return client_->TransferToInfeed(literal, device_ordinal);
 }
 
-StatusOr<Literal> TransferFromOutfeedLocalReplica(const Shape& shape,
-                                                  int replica_number) {
-  VLOG(1) << "Outfeeding literal from replica number: " << replica_number
-          << " shape: " << shape;
-  TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient());
-  TF_ASSIGN_OR_RETURN(int device_ordinal,
-                      client->ReplicaNumberToDeviceOrdinal(replica_number));
-  return client->TransferFromOutfeedLocal(shape, device_ordinal);
-}
-
-static StatusOr<ScopedShapedBuffer> ToBuffer(LocalClient* client,
-                                             int device_ordinal,
-                                             const Literal& arg) {
-  return client->LiteralToShapedBuffer(arg, device_ordinal,
-                                       client->backend().memory_allocator());
+StatusOr<Literal> LocalClient::TransferFromOutfeed(const Shape& shape,
+                                                   int device_ordinal) {
+  VLOG(1) << "Outfeeding literal from device " << device_ordinal
+          << "; shape: " << shape;
+  return client_->TransferFromOutfeed(&shape, device_ordinal);
 }
 
 /* static */
 StatusOr<LocalShapedBuffer*> LocalShapedBuffer::FromLiteral(
     const Literal& argument, const absl::optional<Shape>& shape_with_layout,
-    int replica_number) {
-  TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient());
-  TF_ASSIGN_OR_RETURN(int device_ordinal,
-                      client->ReplicaNumberToDeviceOrdinal(replica_number));
-  VLOG(1) << "Creating shaped buffer from literal on replica/ordinal: "
-          << replica_number << "/" << device_ordinal;
+    const LocalClient& client, int device_ordinal) {
+  VLOG(1) << "Creating shaped buffer from literal on device ordinal: "
+          << device_ordinal;
+  auto literal_to_buffer = [&](const Literal& arg) {
+    return client.client()->LiteralToShapedBuffer(
+        arg, device_ordinal, client.client()->backend().memory_allocator());
+  };
+
   StatusOr<ScopedShapedBuffer> buf = [&] {
     if (shape_with_layout) {
       Literal relaid = argument.Relayout(shape_with_layout.value());
-      return ToBuffer(client, device_ordinal, relaid);
+      return literal_to_buffer(relaid);
     }
-    return ToBuffer(client, device_ordinal, argument);
+    return literal_to_buffer(argument);
   }();
   TF_RETURN_IF_ERROR(buf.status());
-  return new LocalShapedBuffer(std::move(buf).ValueOrDie());
+  return new LocalShapedBuffer(std::move(buf).ValueOrDie(), client.client());
 }
 
-LocalShapedBuffer::LocalShapedBuffer(ScopedShapedBuffer shaped_buffer)
-    : shaped_buffer_(std::move(shaped_buffer)) {}
+LocalShapedBuffer::LocalShapedBuffer(ScopedShapedBuffer shaped_buffer,
+                                     xla::LocalClient* client)
+    : shaped_buffer_(std::move(shaped_buffer)), client_(client) {}
 
 const ScopedShapedBuffer* LocalShapedBuffer::shaped_buffer() const {
   return &shaped_buffer_;
@@ -204,8 +130,7 @@
 }
 
 StatusOr<Literal> LocalShapedBuffer::ToLiteral() const {
-  TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient());
-  return client->ShapedBufferToLiteral(*shaped_buffer());
+  return client_->ShapedBufferToLiteral(*shaped_buffer());
 }
 
 LocalShapedBufferTuple::LocalShapedBufferTuple(
@@ -236,120 +161,77 @@
 
 int64 LocalShapedBufferTuple::size() const { return elements_.size(); }
 
-XrtAllocation::XrtAllocation(int64 handle, Shape shape,
-                             const string& session_target)
-    : handle_(handle), shape_(shape), session_target_(session_target) {}
+StatusOr<LocalShapedBufferTuple*> LocalShapedBuffer::DestructureTuple() {
+  const Shape tuple_shape = shape();
 
-XrtAllocation::~XrtAllocation() {
-  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
-  auto allocation_handle =
-      tensorflow::ops::Placeholder(root, tensorflow::DT_INT64);
-  auto release =
-      tensorflow::ops::XRTReleaseAllocationHandle(root, allocation_handle);
-  if (!root.status().ok()) {
-    LOG(ERROR) << root.status();
-    return;
+  if (!tuple_shape.IsTuple()) {
+    return InvalidArgument(
+        "Attemped to destructure a LocalShapedBuffer that did not have a tuple "
+        "shape; shape: %s",
+        ShapeUtil::HumanString(tuple_shape));
   }
 
-  tensorflow::ClientSession session(root, session_target_);
-  tensorflow::ClientSession::FeedType inputs;
-  inputs.insert({allocation_handle, handle()});
-  std::vector<tensorflow::Tensor> outputs;
-  auto status = session.Run(inputs, {}, {release}, &outputs);
-  if (!status.ok()) {
-    LOG(ERROR) << status;
-    return;
+  DeviceMemoryAllocator* allocator = shaped_buffer()->memory_allocator();
+  ShapedBuffer tuple_buffer = Release();
+
+  // Extract some metadata we use to construct scoped buffers.
+  const se::Platform* platform = tuple_buffer.platform();
+  int device_ordinal = tuple_buffer.device_ordinal();
+
+  ShapeTree<se::DeviceMemoryBase>& shape_tree = tuple_buffer.buffers();
+  std::vector<LocalShapedBuffer*> results;
+  for (int64 i = 0; i < ShapeUtil::TupleElementCount(tuple_shape); ++i) {
+    // Create a shaped buffer for this destructured tuple element.
+    const Shape& subshape = ShapeUtil::GetSubshape(tuple_shape, {i});
+    VLOG(3) << "Starting tuple element " << i << " subshape: " << subshape;
+    ShapedBuffer shaped_buffer(subshape, subshape, platform, device_ordinal);
+
+    ShapeUtil::ForEachSubshape(
+        subshape, [&](const Shape& s, const ShapeIndex& index) {
+          ShapeIndex original(index);
+          original.push_front(i);
+          se::DeviceMemoryBase* device_memory =
+              shape_tree.mutable_element(original);
+          shaped_buffer.set_buffer(*device_memory, index);
+          *device_memory = se::DeviceMemoryBase();
+        });
+
+    VLOG(3) << "Completed tuple element: " << i;
+    results.push_back(new LocalShapedBuffer(
+        ScopedShapedBuffer(std::move(shaped_buffer), allocator), client_));
   }
+  // Deallocate the root buffer.
+  se::DeviceMemoryBase root_buffer = tuple_buffer.root_buffer();
+  TF_RETURN_IF_ERROR(allocator->Deallocate(device_ordinal, root_buffer));
+  return new LocalShapedBufferTuple(std::move(results));
 }
 
-/* static */
-StatusOr<XrtAllocation*> XrtAllocation::FromLiteral(
-    const Literal& argument, const string& session_target) {
-  xrt::XLAAllocation alloc;
-  *alloc.mutable_value() = argument.ToProto();
+LocalExecutable::LocalExecutable(
+    std::unique_ptr<xla::LocalExecutable> executable,
+    xla::DeviceAssignment device_assignment, xla::LocalClient* client)
+    : executable_(std::move(executable)),
+      device_assignment_(std::move(device_assignment)),
+      client_(client) {}
 
-  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
-  auto literal_string =
-      tensorflow::ops::Placeholder(root, tensorflow::DT_STRING);
-  auto literal_handle = tensorflow::ops::XRTAllocate(root, literal_string);
-  TF_RETURN_IF_ERROR(root.status());
-
-  tensorflow::ClientSession session(root, session_target);
-  tensorflow::ClientSession::FeedType inputs;
-  inputs.insert({literal_string, alloc.SerializeAsString()});
-  std::vector<tensorflow::Tensor> outputs;
-  TF_RETURN_IF_ERROR(session.Run(inputs, {literal_handle}, &outputs));
-
-  int64 handle = outputs[0].scalar<int64>()();
-  return new XrtAllocation(handle, argument.shape(), session_target);
-}
-
-const int64 XrtAllocation::handle() const { return handle_; }
-
-const Shape& XrtAllocation::shape() const { return shape_; }
-
-StatusOr<Literal> XrtAllocation::ToLiteral() const {
-  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
-  auto allocation_handle =
-      tensorflow::ops::Placeholder(root, tensorflow::DT_INT64);
-  auto read_literal = tensorflow::ops::XRTReadLiteral(root, allocation_handle);
-  TF_RETURN_IF_ERROR(root.status());
-
-  tensorflow::ClientSession session(root, session_target_);
-  tensorflow::ClientSession::FeedType inputs;
-  inputs.insert({allocation_handle, handle()});
-  std::vector<tensorflow::Tensor> outputs;
-  TF_RETURN_IF_ERROR(session.Run(inputs, {read_literal}, &outputs));
-
-  xla::LiteralProto response;
-  TF_RET_CHECK(response.ParseFromString(outputs[0].scalar<string>()()));
-  return Literal::CreateFromProto(response);
-}
-
-XrtAllocationTuple::XrtAllocationTuple(std::vector<XrtAllocation*> elements)
-    : elements_(std::move(elements)) {
-  for (auto* element : elements_) {
-    CHECK(element != nullptr);
+std::vector<int> LocalExecutable::DeviceOrdinals() const {
+  int num_replicas = device_assignment_.replica_count();
+  std::vector<int> device_ordinals;
+  device_ordinals.reserve(num_replicas);
+  for (int i = 0; i < num_replicas; ++i) {
+    device_ordinals.push_back(device_assignment_(i, 0));
   }
+  return device_ordinals;
 }
 
-XrtAllocationTuple::~XrtAllocationTuple() {
-  for (XrtAllocation* element : elements_) {
-    if (element != nullptr) {
-      delete element;
-    }
-  }
-}
-
-StatusOr<XrtAllocation*> XrtAllocationTuple::Release(int i) {
-  XrtAllocation* element = elements_[i];
-  if (element == nullptr) {
-    return InvalidArgument("Attempted to release already-released element %d.",
-                           i);
-  }
-  elements_[i] = nullptr;
-  return element;
-}
-
-int64 XrtAllocationTuple::size() const { return elements_.size(); }
-
-CompiledLocalComputation::CompiledLocalComputation(
-    std::unique_ptr<LocalExecutable> executable)
-    : executable_(std::move(executable)) {}
-
-StatusOr<LocalShapedBuffer*> CompiledLocalComputation::Execute(
+StatusOr<LocalShapedBuffer*> LocalExecutable::Execute(
     absl::Span<LocalShapedBuffer* const> argument_handles) {
   if (num_replicas() != 1) {
     return InvalidArgument(
         "Attempted to execute computation with %d replicas using Execute()",
         num_replicas());
   }
-  TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient());
-  TF_ASSIGN_OR_RETURN(DeviceAssignment device_assignment,
-                      client->backend().computation_placer()->AssignDevices(
-                          1, /*computation_count=*/1));
   StatusOr<ScopedShapedBuffer> result_buffer_status;
-  const int device_ordinal = device_assignment(0, 0);
+  const int device_ordinal = device_assignment_(0, 0);
   VLOG(3) << "Replica 0 mapped to device ordinal for execution: "
           << device_ordinal;
 
@@ -361,10 +243,10 @@
 
   ExecutableRunOptions options;
   options.set_device_ordinal(device_ordinal);
-  options.set_allocator(client->backend().memory_allocator());
+  options.set_allocator(client_->backend().memory_allocator());
   options.set_intra_op_thread_pool(
-      client->backend().eigen_intra_op_thread_pool_device());
-  options.set_device_assignment(&device_assignment);
+      client_->backend().eigen_intra_op_thread_pool_device());
+  options.set_device_assignment(&device_assignment_);
 
   result_buffer_status = executable_->Run(argument_buffers, options);
 
@@ -374,13 +256,13 @@
         "%s.",
         result_buffer_status.status().ToString());
   }
-  return new LocalShapedBuffer(std::move(result_buffer_status).ValueOrDie());
+  return new LocalShapedBuffer(std::move(result_buffer_status).ValueOrDie(),
+                               client_);
 }
 
-StatusOr<LocalShapedBufferTuple*> CompiledLocalComputation::ExecutePerReplica(
+StatusOr<LocalShapedBufferTuple*> LocalExecutable::ExecutePerReplica(
     absl::Span<const std::vector<LocalShapedBuffer*>> argument_handles) {
-  TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient());
-  const int num_devices = client->device_count();
+  const int num_devices = client_->device_count();
 
   if (argument_handles.size() != num_replicas()) {
     return InvalidArgument(
@@ -395,14 +277,9 @@
 
   VLOG(1) << "Executing with " << num_replicas() << " replicas.";
 
-  TF_ASSIGN_OR_RETURN(DeviceAssignment device_assignment,
-                      client->backend().computation_placer()->AssignDevices(
-                          num_replicas(), /*computation_count=*/1));
-
   std::vector<StatusOr<ScopedShapedBuffer>> results(num_replicas());
-  auto execute = [this, client, &device_assignment, &argument_handles,
-                  &results](int replica) {
-    const int device_ordinal = device_assignment(replica, 0);
+  auto execute = [this, &argument_handles, &results](int replica) {
+    const int device_ordinal = device_assignment_(replica, 0);
     VLOG(3) << "Replica " << replica
             << " mapped to device ordinal for execution: " << device_ordinal;
 
@@ -414,10 +291,10 @@
 
     ExecutableRunOptions options;
     options.set_device_ordinal(device_ordinal);
-    options.set_allocator(client->backend().memory_allocator());
+    options.set_allocator(client_->backend().memory_allocator());
     options.set_intra_op_thread_pool(
-        client->backend().eigen_intra_op_thread_pool_device());
-    options.set_device_assignment(&device_assignment);
+        client_->backend().eigen_intra_op_thread_pool_device());
+    options.set_device_assignment(&device_assignment_);
     StatusOr<ScopedShapedBuffer> result_buffer_status =
         executable_->Run(argument_buffers, options);
 
@@ -449,151 +326,43 @@
           replica, statusor.status().ToString());
     }
     wrapped_results[replica] =
-        new LocalShapedBuffer(std::move(statusor).ValueOrDie());
+        new LocalShapedBuffer(std::move(statusor).ValueOrDie(), client_);
   }
 
   return new LocalShapedBufferTuple(std::move(wrapped_results));
 }
 
-static StatusOr<Shape> GetReturnValueShape(const XlaComputation& computation) {
-  TF_ASSIGN_OR_RETURN(ProgramShape program_shape,
-                      computation.GetProgramShape());
-  return std::move(*program_shape.mutable_result());
-}
-
-CompiledXrtComputation::CompiledXrtComputation(
-    const ProgramShape& program_shape, int64 handle,
-    const string& session_target)
-    : program_shape_(program_shape),
-      handle_(handle),
-      session_target_(session_target) {}
-
-CompiledXrtComputation::~CompiledXrtComputation() {
-  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
-  auto computation_handle =
-      tensorflow::ops::Placeholder(root, tensorflow::DT_INT64);
-  auto release =
-      tensorflow::ops::XRTReleaseCompilationHandle(root, computation_handle);
-  if (!root.status().ok()) {
-    LOG(ERROR) << root.status();
-    return;
-  }
-
-  tensorflow::ClientSession session(root, session_target_);
-  tensorflow::ClientSession::FeedType inputs;
-  inputs.insert({computation_handle, handle()});
-  std::vector<tensorflow::Tensor> outputs;
-  auto status = session.Run(inputs, {}, {release}, &outputs);
-  if (!status.ok()) {
-    LOG(ERROR) << status;
-    return;
-  }
-}
-
-StatusOr<XrtAllocation*> CompiledXrtComputation::Execute(
-    absl::Span<XrtAllocation* const> argument_handles) {
-  const int num_expected_arguments = program_shape().parameters().size();
-
-  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
-  std::vector<tensorflow::Output> arguments;
-  arguments.reserve(num_expected_arguments);
-  for (int i = 0; i < num_expected_arguments; ++i) {
-    arguments.push_back(
-        tensorflow::ops::Placeholder(root, tensorflow::DT_INT64));
-  }
-  auto computation_handle =
-      tensorflow::ops::Placeholder(root, tensorflow::DT_INT64);
-  auto execution_config =
-      tensorflow::ops::Placeholder(root, tensorflow::DT_STRING);
-  auto execute = tensorflow::ops::XRTExecute(root, computation_handle,
-                                             execution_config, arguments);
-  TF_RETURN_IF_ERROR(root.status());
-
-  TF_RET_CHECK(argument_handles.size() == arguments.size());
-
-  xrt::XRTExecutionConfig e;
-  e.set_release_input_handles(false);
-  e.set_release_compilation_handle(false);
-
-  tensorflow::ClientSession session(root, session_target_);
-  tensorflow::ClientSession::FeedType inputs;
-  for (int i = 0; i < arguments.size(); ++i) {
-    inputs.insert({arguments[i], argument_handles[i]->handle()});
-  }
-  inputs.insert({computation_handle, handle()});
-  inputs.insert({execution_config, e.SerializeAsString()});
-  std::vector<tensorflow::Tensor> outputs;
-  TF_RETURN_IF_ERROR(session.Run(inputs, {execute}, &outputs));
-
-  int64 output = outputs[0].scalar<int64>()();
-  return new XrtAllocation(output, program_shape().result(), session_target_);
-}
-
-const ProgramShape& CompiledXrtComputation::program_shape() const {
-  return program_shape_;
-}
-
-int64 CompiledXrtComputation::handle() const { return handle_; }
-
-LocalComputation::LocalComputation(XlaComputation computation)
+Computation::Computation(XlaComputation computation)
     : computation_(std::move(computation)) {}
 
-StatusOr<CompiledLocalComputation*> LocalComputation::Compile(
+StatusOr<LocalExecutable*> Computation::Compile(
     const std::vector<Shape>& argument_shapes,
-    const ExecutableBuildOptions* build_options) {
+    const ExecutableBuildOptions* build_options, const LocalClient& client) {
   std::vector<const Shape*> argument_shape_pointers;
   argument_shape_pointers.reserve(argument_shapes.size());
   for (auto& argument_shape : argument_shapes) {
     argument_shape_pointers.push_back(&argument_shape);
   }
 
-  TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient());
   ExecutableBuildOptions options;
   if (build_options != nullptr) {
     options = *build_options;
   }
   TF_ASSIGN_OR_RETURN(
       auto local_executable,
-      client->Compile(computation_, argument_shape_pointers, options));
-  return new CompiledLocalComputation(std::move(local_executable));
+      client.client()->Compile(computation_, argument_shape_pointers, options));
+  TF_ASSIGN_OR_RETURN(
+      DeviceAssignment device_assignment,
+      client.client()->backend().computation_placer()->AssignDevices(
+          options.num_replicas(), /*computation_count=*/1));
+
+  return new LocalExecutable(std::move(local_executable),
+                             std::move(device_assignment), client.client());
 }
 
-StatusOr<CompiledXrtComputation*> LocalComputation::CompileForXrt(
-    const std::vector<Shape>& argument_shapes, const string& session_target) {
-  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
-  auto program = tensorflow::ops::Placeholder(root, tensorflow::DT_STRING);
-  auto compile = tensorflow::ops::XRTCompile(root, program);
-  TF_RETURN_IF_ERROR(root.status());
+const XlaComputation& Computation::computation() const { return computation_; }
 
-  xrt::XLAComputation c;
-  auto config = c.mutable_config();
-  ProgramShape shapes;
-  for (auto& shape : argument_shapes) {
-    *shapes.add_parameters() = shape;
-  }
-  TF_ASSIGN_OR_RETURN(*shapes.mutable_result(), GetReturnValueShape());
-  LayoutUtil::SetToDefaultLayout(&shapes);
-  *config->mutable_program_shape() = shapes.ToProto();
-  auto snapshot = computation().Snapshot().ValueOrDie();
-  *c.mutable_hlo_snapshot() = *snapshot;
-
-  tensorflow::ClientSession session(root, session_target);
-  tensorflow::ClientSession::FeedType inputs;
-  inputs.insert({program, c.SerializeAsString()});
-  std::vector<tensorflow::Tensor> outputs;
-  TF_RETURN_IF_ERROR(session.Run(inputs, {compile.handle}, &outputs));
-
-  TF_ASSIGN_OR_RETURN(ProgramShape program_shape,
-                      computation().GetProgramShape());
-  int64 handle = outputs[0].scalar<int64>()();
-  return new CompiledXrtComputation(program_shape, handle, session_target);
-}
-
-const XlaComputation& LocalComputation::computation() const {
-  return computation_;
-}
-
-string LocalComputation::GetSerializedProto() const {
+string Computation::GetSerializedProto() const {
   string result;
   if (!computation_.proto().SerializeToString(&result)) {
     LOG(ERROR) << "Failed to serialize the HloModuleProto.";
@@ -602,101 +371,129 @@
   return result;
 }
 
-StatusOr<Shape> LocalComputation::GetReturnValueShape() const {
-  return swig::GetReturnValueShape(computation_);
+StatusOr<string> Computation::GetHloText() const {
+  TF_ASSIGN_OR_RETURN(const HloModuleConfig module_config,
+                      HloModule::CreateModuleConfigFromProto(
+                          computation_.proto(), GetDebugOptionsFromFlags()));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<HloModule> hlo_module,
+      HloModule::CreateFromProto(computation_.proto(), module_config));
+  HloPrintOptions options;
+  options = HloPrintOptions::ShortParsable();
+  options.set_print_large_constants(false);
+  return hlo_module->ToString(options);
+}
+
+StatusOr<string> Computation::GetHloDotGraph() const {
+  TF_ASSIGN_OR_RETURN(const HloModuleConfig module_config,
+                      HloModule::CreateModuleConfigFromProto(
+                          computation_.proto(), GetDebugOptionsFromFlags()));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<HloModule> hlo_module,
+      HloModule::CreateFromProto(computation_.proto(), module_config));
+  hlo_graph_dumper::DotGraphOptions options;
+  options.debug_options = &hlo_module->config().debug_options();
+  return hlo_graph_dumper::HloComputationToDotGraph(
+      *hlo_module->entry_computation(), options);
+}
+
+StatusOr<ProgramShape> Computation::GetProgramShape() const {
+  return computation_.GetProgramShape();
+}
+
+StatusOr<Shape> Computation::GetReturnValueShape() const {
+  TF_ASSIGN_OR_RETURN(ProgramShape shape, computation_.GetProgramShape());
+  return std::move(*shape.mutable_result());
 }
 
 LocalOp::LocalOp(const XlaOp& op) : op_(op) {}
 
 const XlaOp& LocalOp::op() const { return op_; }
 
-LocalComputationBuilder::LocalComputationBuilder(const string& computation_name)
+ComputationBuilder::ComputationBuilder(const string& computation_name)
     : builder_(computation_name) {}
 
-void LocalComputationBuilder::SetOpMetadata(const OpMetadata& metadata) {
+void ComputationBuilder::SetOpMetadata(const OpMetadata& metadata) {
   builder_.SetOpMetadata(metadata);
 }
 
-void LocalComputationBuilder::ClearOpMetadata() { builder_.ClearOpMetadata(); }
+void ComputationBuilder::ClearOpMetadata() { builder_.ClearOpMetadata(); }
 
-StatusOr<LocalComputation*> LocalComputationBuilder::Build() {
+StatusOr<Computation*> ComputationBuilder::Build() {
   TF_ASSIGN_OR_RETURN(XlaComputation computation, builder_.Build());
-  return new LocalComputation(std::move(computation));
+  return new Computation(std::move(computation));
 }
 
-LocalOp LocalComputationBuilder::Parameter(int64 parameter_number,
-                                           const Shape& shape,
-                                           const string& name) {
+LocalOp ComputationBuilder::Parameter(int64 parameter_number,
+                                      const Shape& shape, const string& name) {
   return xla::Parameter(&builder_, parameter_number, shape, name);
 }
 
-StatusOr<LocalComputation*> LocalComputationBuilder::BuildWithRoot(
-    const LocalOp& root) {
+StatusOr<Computation*> ComputationBuilder::BuildWithRoot(const LocalOp& root) {
   TF_ASSIGN_OR_RETURN(XlaComputation computation, builder_.Build(root.op()));
-  return new LocalComputation(std::move(computation));
+  return new Computation(std::move(computation));
 }
 
-StatusOr<Shape> LocalComputationBuilder::GetShape(const LocalOp& operand) {
+StatusOr<Shape> ComputationBuilder::GetShape(const LocalOp& operand) {
   return builder_.GetShape(operand.op());
 }
 
-StatusOr<Shape> LocalComputationBuilder::GetReturnValueShape() {
+StatusOr<Shape> ComputationBuilder::GetReturnValueShape() {
   TF_ASSIGN_OR_RETURN(ProgramShape program_shape, builder_.GetProgramShape());
   return program_shape.result();
 }
 
-LocalOp LocalComputationBuilder::Infeed(const Shape& shape) {
+LocalOp ComputationBuilder::Infeed(const Shape& shape) {
   return xla::Infeed(&builder_, shape);
 }
 
-void LocalComputationBuilder::Outfeed(const LocalOp& operand,
-                                      const Shape& shape,
-                                      const string& outfeed_config) {
+void ComputationBuilder::Outfeed(const LocalOp& operand, const Shape& shape,
+                                 const string& outfeed_config) {
   xla::Outfeed(operand.op(), shape, outfeed_config);
 }
 
-LocalOp LocalComputationBuilder::ConstantLiteral(const Literal& literal) {
+LocalOp ComputationBuilder::ConstantLiteral(const Literal& literal) {
   return xla::ConstantLiteral(&builder_, literal);
 }
 
-LocalOp LocalComputationBuilder::Iota(PrimitiveType element_type, int64 size) {
+LocalOp ComputationBuilder::Iota(PrimitiveType element_type, int64 size) {
   return xla::Iota(&builder_, element_type, size);
 }
 
-LocalOp LocalComputationBuilder::BroadcastedIota(const Shape& shape,
-                                                 int64 dimension) {
+LocalOp ComputationBuilder::BroadcastedIota(const Shape& shape,
+                                            int64 dimension) {
   return xla::Iota(&builder_, shape, dimension);
 }
 
-LocalOp LocalComputationBuilder::Broadcast(
-    const LocalOp& operand, absl::Span<const int64> broadcast_sizes) {
+LocalOp ComputationBuilder::Broadcast(const LocalOp& operand,
+                                      absl::Span<const int64> broadcast_sizes) {
   return xla::Broadcast(operand.op(), broadcast_sizes);
 }
 
-LocalOp LocalComputationBuilder::BroadcastInDim(
+LocalOp ComputationBuilder::BroadcastInDim(
     const LocalOp& operand, absl::Span<const int64> out_dim_sizes,
     absl::Span<const int64> broadcast_dimensions) {
   return xla::BroadcastInDim(operand.op(), out_dim_sizes, broadcast_dimensions);
 }
 
-LocalOp LocalComputationBuilder::Pad(const LocalOp& operand,
-                                     const LocalOp& padding_value,
-                                     const PaddingConfig& padding_config) {
+LocalOp ComputationBuilder::Pad(const LocalOp& operand,
+                                const LocalOp& padding_value,
+                                const PaddingConfig& padding_config) {
   return xla::Pad(operand.op(), padding_value.op(), padding_config);
 }
 
-LocalOp LocalComputationBuilder::Reshape(const LocalOp& operand,
-                                         absl::Span<const int64> dimensions,
-                                         absl::Span<const int64> new_sizes) {
+LocalOp ComputationBuilder::Reshape(const LocalOp& operand,
+                                    absl::Span<const int64> dimensions,
+                                    absl::Span<const int64> new_sizes) {
   return xla::Reshape(operand.op(), dimensions, new_sizes);
 }
 
-LocalOp LocalComputationBuilder::Collapse(const LocalOp& operand,
-                                          absl::Span<const int64> dimensions) {
+LocalOp ComputationBuilder::Collapse(const LocalOp& operand,
+                                     absl::Span<const int64> dimensions) {
   return xla::Collapse(operand.op(), dimensions);
 }
 
-LocalOp LocalComputationBuilder::AllToAll(
+LocalOp ComputationBuilder::AllToAll(
     const LocalOp& operand, int64 split_dimension, int64 concat_dimension,
     int64 split_count, absl::Span<const ReplicaGroup> replica_groups) {
   std::vector<ReplicaGroup> rg(replica_groups.size());
@@ -707,39 +504,38 @@
                        split_count, rg);
 }
 
-LocalOp LocalComputationBuilder::CrossReplicaSum(
+LocalOp ComputationBuilder::CrossReplicaSum(
     const LocalOp& operand, absl::Span<const ReplicaGroup> replica_groups) {
   return xla::CrossReplicaSum(operand.op(), replica_groups);
 }
 
-LocalOp LocalComputationBuilder::Slice(const LocalOp& operand,
-                                       absl::Span<const int64> start_indices,
-                                       absl::Span<const int64> limit_indices,
-                                       absl::Span<const int64> strides) {
+LocalOp ComputationBuilder::Slice(const LocalOp& operand,
+                                  absl::Span<const int64> start_indices,
+                                  absl::Span<const int64> limit_indices,
+                                  absl::Span<const int64> strides) {
   return xla::Slice(operand.op(), start_indices, limit_indices, strides);
 }
 
-LocalOp LocalComputationBuilder::SliceInDim(const LocalOp& operand,
-                                            int64 start_index,
-                                            int64 limit_index, int64 stride,
-                                            int64 dimno) {
+LocalOp ComputationBuilder::SliceInDim(const LocalOp& operand,
+                                       int64 start_index, int64 limit_index,
+                                       int64 stride, int64 dimno) {
   return xla::SliceInDim(operand.op(), start_index, limit_index, stride, dimno);
 }
 
-LocalOp LocalComputationBuilder::DynamicSlice(
-    const LocalOp& operand, const LocalOp& start_indices,
-    absl::Span<const int64> slice_sizes) {
+LocalOp ComputationBuilder::DynamicSlice(const LocalOp& operand,
+                                         const LocalOp& start_indices,
+                                         absl::Span<const int64> slice_sizes) {
   return xla::DynamicSlice(operand.op(), start_indices.op(), slice_sizes);
 }
 
-LocalOp LocalComputationBuilder::DynamicUpdateSlice(
-    const LocalOp& operand, const LocalOp& update,
-    const LocalOp& start_indices) {
+LocalOp ComputationBuilder::DynamicUpdateSlice(const LocalOp& operand,
+                                               const LocalOp& update,
+                                               const LocalOp& start_indices) {
   return xla::DynamicUpdateSlice(operand.op(), update.op(), start_indices.op());
 }
 
-LocalOp LocalComputationBuilder::ConcatInDim(absl::Span<const LocalOp> operands,
-                                             int64 dimension) {
+LocalOp ComputationBuilder::ConcatInDim(absl::Span<const LocalOp> operands,
+                                        int64 dimension) {
   std::vector<XlaOp> xla_ops;
   xla_ops.reserve(operands.size());
   for (const auto& op : operands) {
@@ -748,18 +544,18 @@
   return xla::ConcatInDim(&builder_, xla_ops, dimension);
 }
 
-LocalOp LocalComputationBuilder::SelectAndScatterWithGeneralPadding(
-    const LocalOp& operand, const LocalComputation& select,
+LocalOp ComputationBuilder::SelectAndScatterWithGeneralPadding(
+    const LocalOp& operand, const Computation& select,
     absl::Span<const int64> window_dimensions,
     absl::Span<const int64> window_strides,
     absl::Span<const std::pair<int64, int64>> padding, const LocalOp& source,
-    const LocalOp& init_value, const LocalComputation& scatter) {
+    const LocalOp& init_value, const Computation& scatter) {
   return xla::SelectAndScatterWithGeneralPadding(
       operand.op(), select.computation(), window_dimensions, window_strides,
       padding, source.op(), init_value.op(), scatter.computation());
 }
 
-LocalOp LocalComputationBuilder::Tuple(absl::Span<const LocalOp> elements) {
+LocalOp ComputationBuilder::Tuple(absl::Span<const LocalOp> elements) {
   std::vector<XlaOp> xla_ops;
   xla_ops.reserve(elements.size());
   for (const auto& op : elements) {
@@ -769,22 +565,22 @@
   return xla::Tuple(&builder_, xla_ops);
 }
 
-LocalOp LocalComputationBuilder::GetTupleElement(const LocalOp& tuple_data,
-                                                 int64 index) {
+LocalOp ComputationBuilder::GetTupleElement(const LocalOp& tuple_data,
+                                            int64 index) {
   return xla::GetTupleElement(tuple_data.op(), index);
 }
 
-LocalOp LocalComputationBuilder::Dot(const LocalOp& lhs, const LocalOp& rhs) {
+LocalOp ComputationBuilder::Dot(const LocalOp& lhs, const LocalOp& rhs) {
   return xla::Dot(lhs.op(), rhs.op());
 }
 
-LocalOp LocalComputationBuilder::DotGeneral(
+LocalOp ComputationBuilder::DotGeneral(
     const LocalOp& lhs, const LocalOp& rhs,
     const DotDimensionNumbers& dimension_numbers) {
   return xla::DotGeneral(lhs.op(), rhs.op(), dimension_numbers);
 }
 
-LocalOp LocalComputationBuilder::ConvGeneralDilated(
+LocalOp ComputationBuilder::ConvGeneralDilated(
     const LocalOp& lhs, const LocalOp& rhs,
     absl::Span<const int64> window_strides,
     absl::Span<const std::pair<int64, int64>> padding,
@@ -796,18 +592,18 @@
                                  feature_group_count);
 }
 
-LocalOp LocalComputationBuilder::ConvertElementType(
-    const LocalOp& operand, PrimitiveType new_element_type) {
+LocalOp ComputationBuilder::ConvertElementType(const LocalOp& operand,
+                                               PrimitiveType new_element_type) {
   return xla::ConvertElementType(operand.op(), new_element_type);
 }
 
-LocalOp LocalComputationBuilder::BitcastConvertType(
-    const LocalOp& operand, PrimitiveType new_element_type) {
+LocalOp ComputationBuilder::BitcastConvertType(const LocalOp& operand,
+                                               PrimitiveType new_element_type) {
   return xla::BitcastConvertType(operand.op(), new_element_type);
 }
 
-LocalOp LocalComputationBuilder::Call(const LocalComputation& local_computation,
-                                      absl::Span<const LocalOp> operands) {
+LocalOp ComputationBuilder::Call(const Computation& local_computation,
+                                 absl::Span<const LocalOp> operands) {
   std::vector<XlaOp> xla_ops;
   xla_ops.reserve(operands.size());
   for (const auto& op : operands) {
@@ -816,7 +612,7 @@
   return xla::Call(&builder_, local_computation.computation(), xla_ops);
 }
 
-LocalOp LocalComputationBuilder::CustomCall(
+LocalOp ComputationBuilder::CustomCall(
     const string& call_target_name, absl::Span<const LocalOp> operands,
     const Shape& shape_with_layout,
     const std::vector<Shape>& operand_shapes_with_layout,
@@ -831,19 +627,19 @@
                                    operand_shapes_with_layout, opaque);
 }
 
-LocalOp LocalComputationBuilder::Transpose(
-    const LocalOp& operand, absl::Span<const int64> permutation) {
+LocalOp ComputationBuilder::Transpose(const LocalOp& operand,
+                                      absl::Span<const int64> permutation) {
   return xla::Transpose(operand.op(), permutation);
 }
 
-LocalOp LocalComputationBuilder::Rev(const LocalOp& operand,
-                                     absl::Span<const int64> dimensions) {
+LocalOp ComputationBuilder::Rev(const LocalOp& operand,
+                                absl::Span<const int64> dimensions) {
   return xla::Rev(operand.op(), dimensions);
 }
 
-LocalOp LocalComputationBuilder::Map(absl::Span<const LocalOp> operands,
-                                     const LocalComputation& local_computation,
-                                     absl::Span<const int64> dimensions) {
+LocalOp ComputationBuilder::Map(absl::Span<const LocalOp> operands,
+                                const Computation& local_computation,
+                                absl::Span<const int64> dimensions) {
   std::vector<XlaOp> xla_ops;
   xla_ops.reserve(operands.size());
   for (const auto& op : operands) {
@@ -854,17 +650,17 @@
                   dimensions);
 }
 
-LocalOp LocalComputationBuilder::Reduce(
+LocalOp ComputationBuilder::Reduce(
     const LocalOp& operand, const LocalOp& init_value,
-    const LocalComputation& local_computation,
+    const Computation& local_computation,
     absl::Span<const int64> dimensions_to_reduce) {
   return xla::Reduce(operand.op(), init_value.op(),
                      local_computation.computation(), dimensions_to_reduce);
 }
 
-LocalOp LocalComputationBuilder::ReduceWindowWithGeneralPadding(
+LocalOp ComputationBuilder::ReduceWindowWithGeneralPadding(
     const LocalOp& operand, const LocalOp& init_value,
-    const LocalComputation& local_computation,
+    const Computation& local_computation,
     absl::Span<const int64> window_dimensions,
     absl::Span<const int64> window_strides,
     absl::Span<const int64> base_dilations,
@@ -876,51 +672,50 @@
       padding);
 }
 
-LocalOp LocalComputationBuilder::RngNormal(const LocalOp& mu,
-                                           const LocalOp& sigma,
-                                           const Shape& shape) {
+LocalOp ComputationBuilder::RngNormal(const LocalOp& mu, const LocalOp& sigma,
+                                      const Shape& shape) {
   return xla::RngNormal(mu.op(), sigma.op(), shape);
 }
 
-LocalOp LocalComputationBuilder::RngUniform(const LocalOp& a, const LocalOp& b,
-                                            const Shape& shape) {
+LocalOp ComputationBuilder::RngUniform(const LocalOp& a, const LocalOp& b,
+                                       const Shape& shape) {
   return xla::RngUniform(a.op(), b.op(), shape);
 }
 
-LocalOp LocalComputationBuilder::While(const LocalComputation& condition,
-                                       const LocalComputation& body,
-                                       const LocalOp& init) {
+LocalOp ComputationBuilder::While(const Computation& condition,
+                                  const Computation& body,
+                                  const LocalOp& init) {
   return xla::While(condition.computation(), body.computation(), init.op());
 }
 
-LocalOp LocalComputationBuilder::Conditional(
-    const LocalOp& predicate, const LocalOp& true_operand,
-    const LocalComputation& true_computation, const LocalOp& false_operand,
-    const LocalComputation& false_computation) {
+LocalOp ComputationBuilder::Conditional(const LocalOp& predicate,
+                                        const LocalOp& true_operand,
+                                        const Computation& true_computation,
+                                        const LocalOp& false_operand,
+                                        const Computation& false_computation) {
   return xla::Conditional(predicate.op(), true_operand.op(),
                           true_computation.computation(), false_operand.op(),
                           false_computation.computation());
 }
 
-StatusOr<bool> LocalComputationBuilder::IsConstant(const LocalOp& operand) {
+StatusOr<bool> ComputationBuilder::IsConstant(const LocalOp& operand) {
   return builder_.IsConstant(operand.op());
 }
 
-LocalOp LocalComputationBuilder::Sort(const LocalOp& operand, int64 dimension) {
+LocalOp ComputationBuilder::Sort(const LocalOp& operand, int64 dimension) {
   return xla::Sort(operand.op(), {}, dimension);
 }
 
-LocalOp LocalComputationBuilder::SortKeyVal(const LocalOp& keys,
-                                            const LocalOp& values,
-                                            int64 dimension) {
+LocalOp ComputationBuilder::SortKeyVal(const LocalOp& keys,
+                                       const LocalOp& values, int64 dimension) {
   return xla::Sort(keys.op(), {values.op()}, dimension);
 }
 
-LocalOp LocalComputationBuilder::Cholesky(const LocalOp& a) {
+LocalOp ComputationBuilder::Cholesky(const LocalOp& a) {
   return xla::Cholesky(a.op());
 }
 
-LocalOp LocalComputationBuilder::QR(const LocalOp& a, bool full_matrices) {
+LocalOp ComputationBuilder::QR(const LocalOp& a, bool full_matrices) {
   XlaBuilder* builder = a.op().builder();
   return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     TF_ASSIGN_OR_RETURN(auto qr, xla::QRDecomposition(a.op(), full_matrices));
@@ -928,14 +723,16 @@
   });
 }
 
-LocalOp LocalComputationBuilder::TriangularSolve(
-    const LocalOp& a, const LocalOp& b, bool left_side, bool lower,
-    bool transpose_a, bool conjugate_a, bool unit_diagonal) {
-  return xla::TriangularSolve(a.op(), b.op(), left_side, lower, transpose_a,
-                              conjugate_a, unit_diagonal);
+LocalOp ComputationBuilder::TriangularSolve(const LocalOp& a, const LocalOp& b,
+                                            bool left_side, bool lower,
+                                            bool unit_diagonal,
+                                            int transpose_a) {
+  return xla::TriangularSolve(
+      a.op(), b.op(), left_side, lower, unit_diagonal,
+      xla::TriangularSolveOptions::Transpose(transpose_a));
 }
 
-LocalOp LocalComputationBuilder::Gather(
+LocalOp ComputationBuilder::Gather(
     const LocalOp& input, const LocalOp& start_indices,
     const GatherDimensionNumbers& dimension_numbers,
     absl::Span<const int64> slice_sizes) {
@@ -943,24 +740,24 @@
                      slice_sizes);
 }
 
-LocalOp LocalComputationBuilder::Scatter(
+LocalOp ComputationBuilder::Scatter(
     const LocalOp& input, const LocalOp& scatter_indices,
-    const LocalOp& updates, const LocalComputation& update_computation,
+    const LocalOp& updates, const Computation& update_computation,
     const ScatterDimensionNumbers& dimension_numbers) {
   return xla::Scatter(input.op(), scatter_indices.op(), updates.op(),
                       update_computation.computation(), dimension_numbers);
 }
 
-StatusOr<LocalComputation*> LocalComputationBuilder::BuildConstantSubGraph(
+StatusOr<Computation*> ComputationBuilder::BuildConstantSubGraph(
     const LocalOp& operand) {
   TF_ASSIGN_OR_RETURN(XlaComputation computation,
                       builder_.BuildConstantSubGraph(operand.op()));
-  return new LocalComputation(std::move(computation));
+  return new Computation(std::move(computation));
 }
 
-#define _FORWARD(method_name, return_sig, args_sig, args)    \
-  return_sig LocalComputationBuilder::method_name args_sig { \
-    return xla::method_name args;                            \
+#define _FORWARD(method_name, return_sig, args_sig, args) \
+  return_sig ComputationBuilder::method_name args_sig {   \
+    return xla::method_name args;                         \
   }
 
 #define _FORWARD_UNOP(method_name) \
@@ -1002,6 +799,7 @@
 _FORWARD_BINOP(Pow)
 _FORWARD_BINOP(Complex)
 _FORWARD_UNOP(Not)
+_FORWARD_UNOP(Clz)
 _FORWARD_UNOP(Abs)
 _FORWARD_UNOP(Exp)
 _FORWARD_UNOP(Expm1)
@@ -1047,108 +845,9 @@
   delete local_shaped_buffer;
 }
 
-void DeleteXrtAllocation(XrtAllocation* allocation) { delete allocation; }
+void DeleteLocalExecutable(LocalExecutable* computation) { delete computation; }
 
-void DeleteCompiledLocalComputation(CompiledLocalComputation* computation) {
-  delete computation;
-}
-
-void DeleteCompiledXrtComputation(CompiledXrtComputation* computation) {
-  delete computation;
-}
-
-void DeleteLocalComputation(LocalComputation* computation) {
-  delete computation;
-}
-
-StatusOr<LocalShapedBufferTuple*> DestructureLocalShapedBufferTuple(
-    LocalShapedBuffer* local_shaped_buffer) {
-  const Shape tuple_shape = local_shaped_buffer->shape();
-
-  if (!tuple_shape.IsTuple()) {
-    return InvalidArgument(
-        "Attemped to destructure a LocalShapedBuffer that did not have a tuple "
-        "shape; shape: %s",
-        ShapeUtil::HumanString(tuple_shape));
-  }
-
-  DeviceMemoryAllocator* allocator =
-      local_shaped_buffer->shaped_buffer()->memory_allocator();
-  ShapedBuffer tuple_buffer = local_shaped_buffer->Release();
-
-  // Extract some metadata we use to construct scoped buffers.
-  const se::Platform* platform = tuple_buffer.platform();
-  int device_ordinal = tuple_buffer.device_ordinal();
-
-  ShapeTree<se::DeviceMemoryBase>& shape_tree = tuple_buffer.buffers();
-  std::vector<LocalShapedBuffer*> results;
-  for (int64 i = 0; i < ShapeUtil::TupleElementCount(tuple_shape); ++i) {
-    // Create a shaped buffer for this destructured tuple element.
-    const Shape& subshape = ShapeUtil::GetSubshape(tuple_shape, {i});
-    VLOG(3) << "Starting tuple element " << i << " subshape: " << subshape;
-    ShapedBuffer shaped_buffer(subshape, subshape, platform, device_ordinal);
-
-    ShapeUtil::ForEachSubshape(
-        subshape, [&](const Shape& s, const ShapeIndex& index) {
-          ShapeIndex original(index);
-          original.push_front(i);
-          se::DeviceMemoryBase* device_memory =
-              shape_tree.mutable_element(original);
-          shaped_buffer.set_buffer(*device_memory, index);
-          *device_memory = se::DeviceMemoryBase();
-        });
-
-    VLOG(3) << "Completed tuple element: " << i;
-    results.push_back(new LocalShapedBuffer(
-        ScopedShapedBuffer(std::move(shaped_buffer), allocator)));
-  }
-  // Deallocate the root buffer.
-  se::DeviceMemoryBase root_buffer = tuple_buffer.root_buffer();
-  TF_RETURN_IF_ERROR(allocator->Deallocate(device_ordinal, root_buffer));
-  return new LocalShapedBufferTuple(std::move(results));
-}
-
-StatusOr<XrtAllocationTuple*> DestructureXrtAllocationTuple(
-    XrtAllocation* allocation, const string& session_target) {
-  const Shape& tuple_shape = allocation->shape();
-
-  if (!tuple_shape.IsTuple()) {
-    return InvalidArgument(
-        "Attemped to destructure a LocalShapedBuffer that did not have a tuple "
-        "shape; shape: %s",
-        ShapeUtil::HumanString(tuple_shape));
-  }
-
-  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
-  auto base_handle = tensorflow::ops::Placeholder(root, tensorflow::DT_INT64);
-  auto shape_index = tensorflow::ops::Placeholder(root, tensorflow::DT_INT32);
-  auto subtuple = tensorflow::ops::XRTSubTuple(root, base_handle, shape_index);
-  TF_RETURN_IF_ERROR(root.status());
-
-  tensorflow::ClientSession session(root, session_target);
-  tensorflow::ClientSession::FeedType inputs;
-  std::vector<XrtAllocation*> results;
-  for (int32 i = 0; i < ShapeUtil::TupleElementCount(tuple_shape); ++i) {
-    inputs.clear();
-    inputs.insert({base_handle, allocation->handle()});
-    inputs.insert({shape_index, {i}});
-    std::vector<tensorflow::Tensor> outputs;
-    auto status = session.Run(inputs, {subtuple}, &outputs);
-    if (!status.ok()) {
-      // Clean up before returning non-ok status.
-      for (int j = 0; j < results.size(); ++j) {
-        delete results[j];
-      }
-      return status;
-    }
-    const int64 subtuple_handle = outputs[0].scalar<int64>()();
-    const Shape& subtuple_shape =
-        ShapeUtil::GetTupleElementShape(tuple_shape, i);
-    results.push_back(
-        new XrtAllocation(subtuple_handle, subtuple_shape, session_target));
-  }
-  return new XrtAllocationTuple(std::move(results));
-}
+void DeleteComputation(Computation* computation) { delete computation; }
 
 }  // namespace swig
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
index 5e65ecf..66b1cce 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.h
+++ b/tensorflow/compiler/xla/python/local_computation_builder.h

@@ -22,9 +22,6 @@
 #include <Python.h>
 
 #include "absl/types/span.h"
-#include "tensorflow/cc/framework/ops.h"
-#include "tensorflow/cc/framework/scope.h"
-#include "tensorflow/compiler/xla/client/client_library.h"
 #include "tensorflow/compiler/xla/client/executable_build_options.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
@@ -35,42 +32,42 @@
 namespace xla {
 namespace swig {
 
-// Initializes the number of replicas that XLA will be initialized with (when
-// first obtaining a handle to the local XLA service). If this is called after
-// the handle to the local XLA service has been established, then an error is
-// returned.
-Status InitializeReplicaCount(int replica_count);
-
-// Initializes the platform name that XLA will be initialized with (when
-// first obtaining a handle to the local XLA service). If this is called after
-// the handle to the local XLA service has been established, then an error is
-// returned.
-Status InitializePlatformName(const string& platform_name);
-
-// Returns the replica count that is currently set, regardless of whether the
-// local XLA service has been instantiated yet or not.
-int GetReplicaCount();
-
 // Registers a 'fn_capsule' as a CPU custom call target.
 // 'fn_capsule' is a void* pointer encapsulated in a PyCapsule object, with name
 // "xla._CPU_CUSTOM_CALL_TARGET".
 Status RegisterCpuCustomCallTarget(const string& name, PyObject* fn_capsule);
 
-// Wraps the local client's infeed-transfer function.
-//
-// The default device ordinal (0) is used.
-Status TransferToInfeedLocal(const Literal& literal);
+// Wrapper around an xla::LocalClient.
+class LocalClient {
+ public:
+  // Initializes a local XLA client for `platform_name`. Returns an error if no
+  /// such platform exists, or if the platform has no visible devices.
+  static StatusOr<LocalClient> Get(const string& platform_name);
 
-// Transfers the given literal to the infeed of the given replica.
-//
-// The replica number is resolved to an appropriate device ordinal.
-Status TransferToInfeedLocalReplica(const Literal& literal, int replica_number);
+  // Copyable and moveable; the class is just a wrapper around a
+  // xla::LocalClient pointer for convenient SWIG wrapping.
 
-// Transfers a literal of the given shape from the outfeed of the given replica.
-//
-// The replica number is resolved to an appropriate device ordinal.
-StatusOr<Literal> TransferFromOutfeedLocalReplica(const Shape& shape,
-                                                  int replica_number);
+  // Returns the number of devices known to the XLA client.
+  int DeviceCount() const;
+
+  // Wraps the local client's infeed-transfer function.
+  //
+  // The default device ordinal (0) is used.
+  Status TransferToInfeed(const Literal& literal, int device_ordinal);
+
+  // Transfers a literal of the given shape from the outfeed of the given
+  // replica.
+  StatusOr<Literal> TransferFromOutfeed(const Shape& shape, int device_ordinal);
+
+  xla::LocalClient* client() const { return client_; }
+
+ private:
+  LocalClient(xla::LocalClient* client);
+
+  xla::LocalClient* client_;
+};
+
+class LocalShapedBufferTuple;
 
 // Represents a reference to literals that live in a device-allocated buffer via
 // XLA. Specifically, wraps a ScopedShapedBuffer produced by transferring a
@@ -79,9 +76,9 @@
  public:
   static StatusOr<LocalShapedBuffer*> FromLiteral(
       const Literal& argument, const absl::optional<Shape>& shape_with_layout,
-      int replica_number);
+      const LocalClient& client, int device_ordinal);
 
-  LocalShapedBuffer(ScopedShapedBuffer shaped_buffer);
+  LocalShapedBuffer(ScopedShapedBuffer shaped_buffer, xla::LocalClient* client);
   StatusOr<Literal> ToLiteral() const;
   const Shape& shape() const;
   const ScopedShapedBuffer* shaped_buffer() const;
@@ -90,8 +87,13 @@
   // analogous to std::unique_ptr::release().
   ShapedBuffer Release();
 
+  // Destructures a tuple-valued LocalShapedBuffer into its constitutent
+  // elements in LocalShapedBufferTuple form.
+  StatusOr<LocalShapedBufferTuple*> DestructureTuple();
+
  private:
   ScopedShapedBuffer shaped_buffer_;
+  xla::LocalClient* client_;
 };
 
 // Result of a tuple destructuring operation on a LocalShapedBuffer -- this
@@ -117,73 +119,21 @@
   std::vector<LocalShapedBuffer*> elements_;
 };
 
-// Destructures a tuple-valued LocalShapedBuffer into its constitutent elements
-// in LocalShapedBufferTuple form.
-StatusOr<LocalShapedBufferTuple*> DestructureLocalShapedBufferTuple(
-    LocalShapedBuffer* local_shaped_buffer);
-
-// Represents a reference to literals that live in a device-allocated buffer via
-// XRT. Specifically, wraps an int64 handle produced by running the allocation
-// graph, and an XLA shape to track the referent's shape.
-class XrtAllocation {
- public:
-  // Accepts a `session_target` argument, used in constructing the
-  // `tensorflow::ClientSession` instance in which allocation and deallocation
-  // graphs are run.
-  static StatusOr<XrtAllocation*> FromLiteral(const Literal& argument,
-                                              const string& session_target);
-
-  XrtAllocation(int64 handle, Shape shape, const string& session_target);
-  ~XrtAllocation();
-  StatusOr<Literal> ToLiteral() const;
-  const Shape& shape() const;
-  const int64 handle() const;
-
- private:
-  const int64 handle_;
-  const Shape shape_;
-  const string session_target_;
-};
-
-// Result of a tuple destructuring operation on an XrtAllocation.
-class XrtAllocationTuple {
- public:
-  // Note: any XrtAllocation elements that are not Release()'d will be
-  // deallocated in the destructor.
-  explicit XrtAllocationTuple(std::vector<XrtAllocation*> elements);
-
-  ~XrtAllocationTuple();
-
-  // Releases the ith element to the caller. Further attempts to release the ith
-  // element will return an invalid argument error.
-  StatusOr<XrtAllocation*> Release(int i);
-
-  // Returns the number of elements in the destructured tuple.
-  int64 size() const;
-
- private:
-  std::vector<XrtAllocation*> elements_;
-};
-
-// Destructures a tuple-valued XrtAllocation into its constitutent elements
-// in XrtAllocationTuple form.
-//
-// Accepts a `session_target` argument, used in constructing the
-// `tensorflow::ClientSession` instance in which the sub-tupling graph is run,
-// and passed along in constructing each constituent XrtAllocation.
-StatusOr<XrtAllocationTuple*> DestructureXrtAllocationTuple(
-    XrtAllocation* allocation, const string& session_target);
-
 // Represents a compiled computation that can be executed given handles to
 // device-allocated literals. Specifically, wraps an XLA LocalExecutable.
-class CompiledLocalComputation {
+class LocalExecutable {
  public:
-  CompiledLocalComputation(std::unique_ptr<LocalExecutable> executable);
+  LocalExecutable(std::unique_ptr<xla::LocalExecutable> executable,
+                  xla::DeviceAssignment device_assignment,
+                  xla::LocalClient* client);
 
   int num_replicas() const {
     return executable_->build_options().num_replicas();
   }
 
+  // Returns the device ordinals to which each replica is assigned.
+  std::vector<int> DeviceOrdinals() const;
+
   StatusOr<LocalShapedBuffer*> Execute(
       absl::Span<LocalShapedBuffer* const> argument_handles);
 
@@ -194,47 +144,22 @@
       absl::Span<const std::vector<LocalShapedBuffer*> > argument_handles);
 
  private:
-  std::unique_ptr<LocalExecutable> executable_;
+  const std::unique_ptr<xla::LocalExecutable> executable_;
+  const xla::DeviceAssignment device_assignment_;
+  xla::LocalClient* const client_;
 };
 
-// Represents a compiled computation that can be executed given handles to
-// device-allocated literals. Specifically, wraps an XRT computation handle.
-class CompiledXrtComputation {
- public:
-  // Accepts a `session_target` argument, used in constructing the
-  // `tensorflow::ClientSession` instance in which the execution graph is run.
-  CompiledXrtComputation(const ProgramShape& program_shape, int64 handle,
-                         const string& session_target);
-  ~CompiledXrtComputation();
-
-  StatusOr<XrtAllocation*> Execute(
-      absl::Span<XrtAllocation* const> argument_handles);
-
-  const ProgramShape& program_shape() const;
-  int64 handle() const;
-
- private:
-  const ProgramShape program_shape_;
-  const int64 handle_;
-  const string session_target_;
-};
-
-// Wraps a XlaComputation produced by a LocalComputationBuilder. The
+// Wraps a XlaComputation produced by a ComputationBuilder. The
 // Compile method compiles the computation to a (local) executable via
 // the client library's local client. This class is intended to be
 // made available to Python via SWIG.
-class LocalComputation {
+class Computation {
  public:
-  LocalComputation(XlaComputation computation);
+  Computation(XlaComputation computation);
 
-  StatusOr<CompiledLocalComputation*> Compile(
+  StatusOr<LocalExecutable*> Compile(
       const std::vector<Shape>& argument_shapes,
-      const ExecutableBuildOptions* build_options);
-
-  // Accepts a `session_target` argument, used in constructing the
-  // `tensorflow::ClientSession` instance in which the compilation graph is run.
-  StatusOr<CompiledXrtComputation*> CompileForXrt(
-      const std::vector<Shape>& argument_shapes, const string& session_target);
+      const ExecutableBuildOptions* build_options, const LocalClient& client);
 
   const XlaComputation& computation() const;
 
@@ -243,6 +168,15 @@
   // string on failure.
   string GetSerializedProto() const;
 
+  // Returns the computation in human-readable HLO text format.
+  StatusOr<string> GetHloText() const;
+
+  // Returns the computation in graphviz dot format.
+  StatusOr<string> GetHloDotGraph() const;
+
+  // Returns the program shape for this computation.
+  StatusOr<ProgramShape> GetProgramShape() const;
+
   // Returns the return-value shape for this computation.
   StatusOr<Shape> GetReturnValueShape() const;
 
@@ -250,7 +184,7 @@
   XlaComputation computation_;
 };
 
-// Wraps a XlaOp produced by a LocalComputationBuilder. This class is intended
+// Wraps a XlaOp produced by a ComputationBuilder. This class is intended
 // to be made available to Python via SWIG.
 class LocalOp {
  public:
@@ -267,20 +201,20 @@
 //   Python.
 // - Set up the underlying builder to use the client library's
 //   LocalClient.
-// - Wrap Computations in LocalComputations for Python access.
-// - Correspondingly unwrap incoming LocalComputations.
-class LocalComputationBuilder {
+// - Wrap Computations in Computations for Python access.
+// - Correspondingly unwrap incoming Computations.
+class ComputationBuilder {
  public:
-  LocalComputationBuilder(const string& computation_name);
+  ComputationBuilder(const string& computation_name);
 
   void SetOpMetadata(const OpMetadata& metadata);
   void ClearOpMetadata();
 
-  // Returns an owned LocalComputation to the caller on success.
-  StatusOr<LocalComputation*> Build();
+  // Returns an owned Computation to the caller on success.
+  StatusOr<Computation*> Build();
 
-  // Returns an owned LocalComputation to the caller on success with given root.
-  StatusOr<LocalComputation*> BuildWithRoot(const LocalOp& root);
+  // Returns an owned Computation to the caller on success with given root.
+  StatusOr<Computation*> BuildWithRoot(const LocalOp& root);
 
   LocalOp Parameter(int64 parameter_number, const Shape& shape,
                     const string& name);
@@ -339,11 +273,11 @@
   LocalOp ConcatInDim(absl::Span<const LocalOp> operands, int64 dimension);
 
   LocalOp SelectAndScatterWithGeneralPadding(
-      const LocalOp& operand, const LocalComputation& select,
+      const LocalOp& operand, const Computation& select,
       absl::Span<const int64> window_dimensions,
       absl::Span<const int64> window_strides,
       absl::Span<const std::pair<int64, int64> > padding, const LocalOp& source,
-      const LocalOp& init_value, const LocalComputation& scatter);
+      const LocalOp& init_value, const Computation& scatter);
 
   LocalOp Tuple(absl::Span<const LocalOp> elements);
 
@@ -369,7 +303,7 @@
   LocalOp BitcastConvertType(const LocalOp& operand,
                              PrimitiveType new_element_type);
 
-  LocalOp Call(const LocalComputation& local_computation,
+  LocalOp Call(const Computation& local_computation,
                absl::Span<const LocalOp> operands);
 
   LocalOp CustomCall(const string& call_target_name,
@@ -384,16 +318,16 @@
   LocalOp Rev(const LocalOp& operand, absl::Span<const int64> dimensions);
 
   LocalOp Map(absl::Span<const LocalOp> operands,
-              const LocalComputation& local_computation,
+              const Computation& local_computation,
               absl::Span<const int64> dimensions);
 
   LocalOp Reduce(const LocalOp& operand, const LocalOp& init_value,
-                 const LocalComputation& local_computation,
+                 const Computation& local_computation,
                  absl::Span<const int64> dimensions_to_reduce);
 
   LocalOp ReduceWindowWithGeneralPadding(
       const LocalOp& operand, const LocalOp& init_value,
-      const LocalComputation& local_computation,
+      const Computation& local_computation,
       absl::Span<const int64> window_dimensions,
       absl::Span<const int64> window_strides,
       absl::Span<const int64> base_dilations,
@@ -405,13 +339,13 @@
 
   LocalOp RngUniform(const LocalOp& a, const LocalOp& b, const Shape& shape);
 
-  LocalOp While(const LocalComputation& condition, const LocalComputation& body,
+  LocalOp While(const Computation& condition, const Computation& body,
                 const LocalOp& init);
 
   LocalOp Conditional(const LocalOp& predicate, const LocalOp& true_operand,
-                      const LocalComputation& true_computation,
+                      const Computation& true_computation,
                       const LocalOp& false_operand,
-                      const LocalComputation& false_computation);
+                      const Computation& false_computation);
 
   StatusOr<bool> IsConstant(const LocalOp& operand);
 
@@ -424,20 +358,21 @@
 
   LocalOp Cholesky(const LocalOp& a);
 
+  // `transpose_a` is the integer value of a TriangularSolveOptions::Transpose
+  // enum. We use an integer here so we don't have to teach SWIG about the
+  // enum.
   LocalOp TriangularSolve(const LocalOp& a, const LocalOp& b, bool left_side,
-                          bool lower, bool transpose_a, bool conjugate_a,
-                          bool unit_diagonal);
+                          bool lower, bool unit_diagonal, int transpose_a);
 
   LocalOp Gather(const LocalOp& input, const LocalOp& start_indices,
                  const GatherDimensionNumbers& dimension_numbers,
                  absl::Span<const int64> slice_sizes);
 
   LocalOp Scatter(const LocalOp& input, const LocalOp& scatter_indices,
-                  const LocalOp& updates,
-                  const LocalComputation& update_computation,
+                  const LocalOp& updates, const Computation& update_computation,
                   const ScatterDimensionNumbers& dimension_numbers);
 
-  StatusOr<LocalComputation*> BuildConstantSubGraph(const LocalOp& operand);
+  StatusOr<Computation*> BuildConstantSubGraph(const LocalOp& operand);
 
 #define _FORWARD(method_name, return_sig, args_sig) \
   return_sig method_name args_sig;
@@ -479,6 +414,7 @@
   _FORWARD_BINOP(Pow)
   _FORWARD_BINOP(Complex)
   _FORWARD_UNOP(Not)
+  _FORWARD_UNOP(Clz)
   _FORWARD_UNOP(Abs)
   _FORWARD_UNOP(Exp)
   _FORWARD_UNOP(Expm1)
@@ -526,10 +462,8 @@
 
 // Functions for freeing resources from the Python side.
 void DeleteLocalShapedBuffer(LocalShapedBuffer* local_shaped_buffer);
-void DeleteXrtAllocation(XrtAllocation* allocation);
-void DeleteCompiledLocalComputation(CompiledLocalComputation* computation);
-void DeleteCompiledXrtComputation(CompiledXrtComputation* computation);
-void DeleteLocalComputation(LocalComputation* computation);
+void DeleteLocalExecutable(LocalExecutable* computation);
+void DeleteComputation(Computation* computation);
 
 }  // namespace swig
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i
index df2ab0b..7d7a860 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.i
+++ b/tensorflow/compiler/xla/python/local_computation_builder.i

@@ -23,11 +23,13 @@
 //    C++                                  Python
 // -------------------------------------+---------------------------------------
 //  Span<int64>                        <-  sequence of int
+//  vector<int>                        ->  sequence of int
 //  Span<LocalOp>                      <-  sequence of LocalOp
 //  Literal                            <-> (nested tuple of) numpy ndarray
 //  std::vector<Literal>               <-  sequence of (nested tuple of) ndarray
 //  Shape                               -> pair holding (dtype, dimensions)
 //                                     <-  object duck-typed as xla_client.Shape
+//  ProgramShape                       ->  pair of ([arg_shapes], ret_shape)
 //  std::vector<Shape>                 <-  sequence of xla_client.Shape objects
 //  PrimitiveType                      <-  int
 //  Span<pair<int64, in64>>            <-  sequence of int pairs
@@ -97,7 +99,7 @@
 // wrapped in a Python class (xla_client.Shape) so as not to expose
 // the raw pair externally.
 //
-// Other SWIG object wrappers (e.g. of LocalComputation) are further
+// Other SWIG object wrappers (e.g. of Computation) are further
 // wrapped by xla_client in order to set up a custom destructor that
 // triggers memory deallocation on the C++ side.
 
@@ -107,6 +109,7 @@
 %nothread;
 
 %include "tensorflow/python/platform/base.i"
+%include "tensorflow/compiler/xla/python/xla_data.i"
 
 %{
 // Must be included first
@@ -124,87 +127,6 @@
 using namespace xla;
 using namespace xla::swig;
 
-namespace xla {
-
-namespace swig {
-
-bool GetIntAttr(PyObject* o, const char* field, int64* result) {
-  PyObject* fo = PyObject_GetAttrString(o, field);
-  if (!fo) {
-    return false;
-  }
-  const int64 value = numpy::PyIntOrPyLongToLong(fo);
-  if (value == -1 && PyErr_Occurred()) {
-    Py_DECREF(fo);
-    return false;
-  }
-  Py_DECREF(fo);
-  *result = value;
-  return true;
-}
-
-// Returns "ok"; true if there is no error, false if there was an error.
-bool HandleStringAttribute(PyObject* o,
-                           const char* attr_name,
-                           std::function<void(string s)> f) {
-  if (!PyObject_HasAttrString(o, attr_name)) {
-    return true;  // It's ok for the object to not have the attribute.
-  }
-  PyObject* attr = PyObject_GetAttrString(o, attr_name);
-  if (attr == nullptr) {
-    return false;  // An error occurred getting the attribute.
-  }
-  if (attr == Py_None) {
-    Py_DECREF(attr);
-    return true;  // The attribute is None, which we consider ok.
-  }
-  if (!PyString_Check(attr)) {
-    string message = absl::StrFormat("%s must be a string or none; got %s",
-        attr_name, numpy::PyObjectCppRepr(attr));
-    PyErr_SetString(PyExc_TypeError, message.c_str());
-    Py_DECREF(attr);
-    return false;  // Type error, not ok.
-  }
-  f(PyString_AsString(attr));
-  Py_DECREF(attr);
-  return true;  // Handled string attribute, ok!
-}
-
-bool HandleRepeatedInt64Attribute(
-    PyObject* o, const char* attr_name,
-    tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>* field) {
-  PyObject* seq = PyObject_GetAttrString(o, attr_name);
-  if (!seq) {
-    return false;
-  }
-
-  int length = PySequence_Size(seq);
-  if (length == -1) {
-    Py_DECREF(seq);
-    return false;
-  }
-
-  for (int i = 0; i < length; ++i) {
-    PyObject* item = PySequence_GetItem(seq, i);
-    if (!item) {
-      Py_DECREF(seq);
-      return false;
-    }
-    const int64 dimension = numpy::PyIntOrPyLongToLong(item);
-    if (dimension == -1 && PyErr_Occurred()) {
-      Py_DECREF(item);
-      Py_DECREF(seq);
-      return false;
-    }
-    *field->Add() = dimension;
-    Py_DECREF(item);
-  }
-  Py_DECREF(seq);
-  return true;
-}
-
-}  // namespace swig
-}  // namespace xla
 %}
 
 // Required to use PyArray_* functions.
@@ -212,57 +134,6 @@
 tensorflow::ImportNumpy();
 %}
 
-// Basic types
-
-%typemap(out) StatusOr<bool> {
-  if ($1.ok()) {
-    $result = PyBool_FromLong($1.ConsumeValueOrDie());
-  } else {
-    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
-    SWIG_fail;
-  }
-}
-
-%typemap(out) Status {
-  if (!$1.ok()) {
-    PyErr_SetString(
-        PyExc_RuntimeError, $1.ToString().c_str());
-    SWIG_fail;
-  }
-  Py_INCREF(Py_None);
-  $result = Py_None;
-}
-
-%typemap(in) absl::Span<const int64>
-    (std::vector<int64> temps) {
-  if (!PySequence_Check($input)) {
-    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
-    SWIG_fail;
-  }
-  const int size = PySequence_Size($input);
-  temps.resize(size);
-  for (int i = 0; i < size; ++i) {
-    PyObject* o = PySequence_GetItem($input, i);
-    PyObject* py_int = numpy::PyNumberToPyInt(o);
-    if (!py_int) {
-      PyErr_SetString(
-          PyExc_TypeError,
-          "Argument sequence element cannot be converted to int");
-      Py_DECREF(o);
-      SWIG_fail;
-    }
-    temps[i] = numpy::PyIntOrPyLongToLong(py_int);
-    if (temps[i] == -1 && PyErr_Occurred()) {
-      Py_DECREF(py_int);
-      Py_DECREF(o);
-      SWIG_fail;
-    }
-    Py_DECREF(py_int);
-    Py_DECREF(o);
-  }
-  $1 = temps;
-}
-
 // Computation builder types
 
 %typemap(in) absl::Span<const xla::swig::LocalOp>(
@@ -287,12 +158,12 @@
 
 // Computation and buffer/allocation types
 
-%typemap(out) StatusOr<xla::swig::CompiledLocalComputation*> {
+%typemap(out) StatusOr<xla::swig::LocalClient> {
   if ($1.ok()) {
-    auto* value = $1.ValueOrDie();
+    xla::swig::LocalClient value = $1.ValueOrDie();
     {
-      auto* $1 = value;
-      $typemap(out, xla::swig::CompiledLocalComputation*)
+      auto $1 = value;
+      $typemap(out, xla::swig::LocalClient)
     }
   } else {
     PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
@@ -300,12 +171,12 @@
   }
 }
 
-%typemap(out) StatusOr<xla::swig::CompiledXrtComputation*> {
+%typemap(out) StatusOr<xla::swig::LocalExecutable*> {
   if ($1.ok()) {
     auto* value = $1.ValueOrDie();
     {
       auto* $1 = value;
-      $typemap(out, xla::swig::CompiledXrtComputation*)
+      $typemap(out, xla::swig::LocalExecutable*)
     }
   } else {
     PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
@@ -339,38 +210,12 @@
   }
 }
 
-%typemap(out) StatusOr<xla::swig::XrtAllocation*> {
+%typemap(out) StatusOr<xla::swig::Computation*> {
   if ($1.ok()) {
     auto* value = $1.ValueOrDie();
     {
       auto* $1 = value;
-      $typemap(out, xla::swig::XrtAllocation*)
-    }
-  } else {
-    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
-    SWIG_fail;
-  }
-}
-
-%typemap(out) StatusOr<xla::swig::XrtAllocationTuple*> {
-  if ($1.ok()) {
-    auto* value = $1.ValueOrDie();
-    {
-      auto* $1 = value;
-      $typemap(out, xla::swig::XrtAllocationTuple*)
-    }
-  } else {
-    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
-    SWIG_fail;
-  }
-}
-
-%typemap(out) StatusOr<xla::swig::LocalComputation*> {
-  if ($1.ok()) {
-    auto* value = $1.ValueOrDie();
-    {
-      auto* $1 = value;
-      $typemap(out, xla::swig::LocalComputation*)
+      $typemap(out, xla::swig::Computation*)
     }
   } else {
     PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
@@ -430,473 +275,6 @@
   $1 = temps;
 }
 
-%typemap(in) absl::Span<xla::swig::XrtAllocation* const>
-    (std::vector<XrtAllocation*> temps) {
-  if (!PySequence_Check($input)) {
-    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
-    SWIG_fail;
-  }
-  const int size = PySequence_Size($input);
-  temps.reserve(size);
-  for (int i = 0; i < size; ++i) {
-    PyObject* o = PySequence_GetItem($input, i);
-    XrtAllocation* xrta;
-    if ((SWIG_ConvertPtr(o, (void**) &xrta, $descriptor(xla::swig::XrtAllocation*),
-                         SWIG_POINTER_EXCEPTION)) == -1) {
-      SWIG_fail;
-    }
-    temps.push_back(xrta);
-    Py_DECREF(o);
-  }
-  $1 = temps;
-}
-
-// Literal
-
-%typemap(in) const Literal& (StatusOr<Literal> literal_status) {
-  literal_status = numpy::XlaLiteralFromPyObject($input);
-  if (!literal_status.ok()) {
-    PyErr_SetString(PyExc_RuntimeError, literal_status.status().ToString().c_str());
-    SWIG_fail;
-  }
-  $1 = &literal_status.ValueOrDie();
-}
-
-%typemap(out) Literal (StatusOr<numpy::Safe_PyObjectPtr> obj_status) {
-  obj_status = numpy::PyObjectFromXlaLiteral(*$1);
-  if (!obj_status.ok()) {
-    PyErr_SetString(PyExc_RuntimeError, obj_status.status().ToString().c_str());
-    SWIG_fail;
-  }
-  $result = obj_status.ValueOrDie().release();
-}
-
-%typemap(out) StatusOr<Literal> (StatusOr<numpy::Safe_PyObjectPtr> obj_status) {
-  if (!$1.ok()) {
-    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
-    SWIG_fail;
-  }
-  obj_status = numpy::PyObjectFromXlaLiteral($1.ValueOrDie());
-  if (!obj_status.ok()) {
-    PyErr_SetString(PyExc_RuntimeError, obj_status.status().ToString().c_str());
-    SWIG_fail;
-  }
-  $result = obj_status.ValueOrDie().release();
-}
-
-%typemap(in) const std::vector<Literal>& (std::vector<Literal> temps) {
-  if (!PySequence_Check($input)) {
-    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
-    SWIG_fail;
-  }
-  const int size = PySequence_Size($input);
-  for (int i = 0; i < size; ++i) {
-    PyObject* o = PySequence_GetItem($input, i);
-    StatusOr<Literal> literal_status = numpy::XlaLiteralFromPyObject(o);
-    if (!literal_status.ok()) {
-      PyErr_SetString(PyExc_RuntimeError, literal_status.status().ToString().c_str());
-      Py_DECREF(o);
-      SWIG_fail;
-    }
-    temps.push_back(literal_status.ConsumeValueOrDie());
-    Py_DECREF(o);
-  }
-  $1 = &temps;
-}
-
-// OpMetadata
-
-%typemap(in) const OpMetadata& (OpMetadata temp) {
-  StatusOr<OpMetadata> statusor = numpy::OpMetadataFromPyObject($input);
-  if (!statusor.ok()) {
-    PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
-    SWIG_fail;
-  }
-  temp = std::move(statusor).ValueOrDie();
-  $1 = &temp;
-}
-
-// Shape
-
-%typemap(out) const Shape& {
-  $result = numpy::PyShapeInfoFromXlaShape(*$1);
-}
-
-%typemap(out) StatusOr<Shape> {
-  if ($1.ok()) {
-    $result = numpy::PyShapeInfoFromXlaShape($1.ConsumeValueOrDie());
-  } else {
-    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
-    SWIG_fail;
-  }
-}
-
-%typemap(in) const Shape& (Shape temp) {
-  StatusOr<Shape> statusor = numpy::XlaShapeFromPyShape($input);
-  if (!statusor.ok()) {
-    PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
-    SWIG_fail;
-  }
-  temp = std::move(statusor).ValueOrDie();
-  $1 = &temp;
-}
-
-%typemap(in) const absl::optional<Shape>& (
-    absl::optional<Shape> temp) {
-  if ($input == Py_None) {
-    temp = absl::nullopt;
-    $1 = &temp;
-  } else {
-    StatusOr<Shape> statusor = numpy::XlaShapeFromPyShape($input);
-    if (!statusor.ok()) {
-      PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
-      SWIG_fail;
-    }
-    temp = std::move(statusor).ValueOrDie();
-    $1 = &temp;
-  }
-}
-
-%typemap(out) std::unique_ptr<Shape> {
-  $result = numpy::PyShapeInfoFromXlaShape(*$1);
-}
-
-%typemap(in) const std::vector<Shape>& (std::vector<Shape> temps) {
-  if (!PySequence_Check($input)) {
-    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
-    SWIG_fail;
-  }
-  const int size = PySequence_Size($input);
-  for (int i = 0; i < size; ++i) {
-    PyObject* o = PySequence_GetItem($input, i);
-    StatusOr<Shape> statusor = numpy::XlaShapeFromPyShape(o);
-    Py_DECREF(o);
-    if (!statusor.ok()) {
-      PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
-      SWIG_fail;
-    }
-    temps.push_back(statusor.ConsumeValueOrDie());
-  }
-  $1 = &temps;
-}
-
-%typemap(in) const std::vector<absl::optional<Shape> >& (
-    std::vector<absl::optional<Shape> > temps) {
-  if (!PySequence_Check($input)) {
-    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
-    SWIG_fail;
-  }
-  const int size = PySequence_Size($input);
-  for (int i = 0; i < size; ++i) {
-    PyObject* o = PySequence_GetItem($input, i);
-    if (o == Py_None) {
-      temps.push_back(absl::nullopt);
-    } else {
-      StatusOr<Shape> statusor = numpy::XlaShapeFromPyShape(o);
-      Py_DECREF(o);
-      if (!statusor.ok()) {
-        PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
-        SWIG_fail;
-      }
-      temps.push_back(statusor.ConsumeValueOrDie());
-    }
-  }
-  $1 = &temps;
-}
-
-// PrimitiveType
-
-%typemap(in) PrimitiveType {
-  PyObject* py_int = numpy::PyNumberToPyInt($input);
-  if (!py_int) {
-    PyErr_SetString(PyExc_TypeError, "Argument cannot be converted to int");
-    SWIG_fail;
-  }
-  const long value = numpy::PyIntOrPyLongToLong(py_int);
-  if (value == -1 && PyErr_Occurred()) {
-    Py_DECREF(py_int);
-    SWIG_fail;
-  }
-  if (!PrimitiveType_IsValid(value)) {
-    PyErr_SetString(
-        PyExc_TypeError, "Argument not valid for PrimitiveType enum");
-    Py_DECREF(py_int);
-    SWIG_fail;
-  }
-  $1 = static_cast<PrimitiveType>(value);
-}
-
-// Span<pair<int64, in64>>
-
-%typemap(in) absl::Span<const std::pair<int64, int64> >
-    (std::vector<std::pair<int64, int64> > temps) {
-  if (!PySequence_Check($input)) {
-    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
-    SWIG_fail;
-  }
-  const int size = PySequence_Size($input);
-  temps.reserve(size);
-  for (int i = 0; i < size; ++i) {
-    PyObject* o = PySequence_GetItem($input, i);
-    if (!o) {
-      SWIG_fail;
-    }
-    PyObject* first = PyTuple_GetItem(o, 0);
-    if (!first) {
-      Py_DECREF(o);
-      SWIG_fail;
-    }
-    PyObject* first_pyint = numpy::PyNumberToPyInt(first);
-    if (!first_pyint) {
-      PyErr_SetString(
-          PyExc_TypeError,
-          "First pair item cannot be converted to int");
-      Py_DECREF(o);
-      SWIG_fail;
-    }
-    PyObject* second = PyTuple_GetItem(o, 1);
-    if (!second) {
-      Py_DECREF(o);
-      Py_DECREF(first_pyint);
-      SWIG_fail;
-    }
-    PyObject* second_pyint = numpy::PyNumberToPyInt(second);
-    if (!second_pyint) {
-      PyErr_SetString(
-          PyExc_TypeError,
-          "Second pair item cannot be converted to int");
-      Py_DECREF(o);
-      Py_DECREF(first_pyint);
-      SWIG_fail;
-    }
-    const int64 first_value = numpy::PyIntOrPyLongToLong(first_pyint);
-    if (first_value == -1 && PyErr_Occurred()) {
-      Py_DECREF(o);
-      Py_DECREF(first_pyint);
-      Py_DECREF(second_pyint);
-      SWIG_fail;
-    }
-    const int64 second_value = numpy::PyIntOrPyLongToLong(second_pyint);
-    if (second_value == -1 && PyErr_Occurred()) {
-      Py_DECREF(o);
-      Py_DECREF(first_pyint);
-      Py_DECREF(second_pyint);
-      SWIG_fail;
-    }
-    temps.push_back(std::make_pair(first_value, second_value));
-    Py_DECREF(o);
-  }
-  $1 = temps;
-}
-
-// DotDimensionNumbers
-
-%typemap(in) const DotDimensionNumbers&
-    (DotDimensionNumbers dimension_numbers) {
-  if (!HandleRepeatedInt64Attribute(
-        $input, "lhs_contracting_dimensions",
-        dimension_numbers.mutable_lhs_contracting_dimensions())) {
-    SWIG_fail;
-  }
-  if (!HandleRepeatedInt64Attribute(
-        $input, "rhs_contracting_dimensions",
-        dimension_numbers.mutable_rhs_contracting_dimensions())) {
-    SWIG_fail;
-  }
-  if (!HandleRepeatedInt64Attribute(
-        $input, "lhs_batch_dimensions",
-        dimension_numbers.mutable_lhs_batch_dimensions())) {
-    SWIG_fail;
-  }
-  if (!HandleRepeatedInt64Attribute(
-        $input, "rhs_batch_dimensions",
-        dimension_numbers.mutable_rhs_batch_dimensions())) {
-    SWIG_fail;
-  }
-
-  $1 = &dimension_numbers;
-}
-
-// PaddingConfig
-
-%typemap(in) const PaddingConfig&
-    (PaddingConfig padding_config) {
-  PyObject* dimensions = PyObject_GetAttrString($input, "dimensions");
-  if (!dimensions) {
-    SWIG_fail;
-  }
-
-  int length = PySequence_Size(dimensions);
-  if (length == -1) {
-    Py_DECREF(dimensions);
-    SWIG_fail;
-  }
-
-  for (int i = 0; i < length; ++i) {
-    PyObject* item = PySequence_GetItem(dimensions, i);
-    if (!item) {
-      Py_DECREF(dimensions);
-      SWIG_fail;
-    }
-    int64 edge_padding_low, edge_padding_high, interior_padding;
-    if (!GetIntAttr(item, "edge_padding_low", &edge_padding_low)
-        || !GetIntAttr(item, "edge_padding_high", &edge_padding_high)
-        || !GetIntAttr(item, "interior_padding", &interior_padding)) {
-      Py_DECREF(item);
-      Py_DECREF(dimensions);
-      SWIG_fail;
-    }
-    Py_DECREF(item);
-
-    PaddingConfig::PaddingConfigDimension* dimension =
-        padding_config.add_dimensions();
-    dimension->set_edge_padding_low(edge_padding_low);
-    dimension->set_edge_padding_high(edge_padding_high);
-    dimension->set_interior_padding(interior_padding);
-  }
-  Py_DECREF(dimensions);
-
-  $1 = &padding_config;
-}
-
-// ConvolutionDimensionNumbers
-
-%typemap(in) const ConvolutionDimensionNumbers&
-    (ConvolutionDimensionNumbers dimension_numbers) {
-  int64 value;
-
-  if (!GetIntAttr($input, "input_batch_dimension", &value)) {
-    SWIG_fail;
-  }
-  dimension_numbers.set_input_batch_dimension(value);
-
-  if (!GetIntAttr($input, "input_feature_dimension", &value)) {
-    SWIG_fail;
-  }
-  dimension_numbers.set_input_feature_dimension(value);
-
-  if (!GetIntAttr($input, "output_batch_dimension", &value)) {
-    SWIG_fail;
-  }
-  dimension_numbers.set_output_batch_dimension(value);
-
-  if (!GetIntAttr($input, "output_feature_dimension", &value)) {
-    SWIG_fail;
-  }
-  dimension_numbers.set_output_feature_dimension(value);
-
-  if (!GetIntAttr($input, "kernel_output_feature_dimension", &value)) {
-    SWIG_fail;
-  }
-  dimension_numbers.set_kernel_output_feature_dimension(value);
-
-  if (!GetIntAttr($input, "kernel_input_feature_dimension", &value)) {
-    SWIG_fail;
-  }
-  dimension_numbers.set_kernel_input_feature_dimension(value);
-
-  if (!HandleRepeatedInt64Attribute(
-        $input, "input_spatial_dimensions",
-        dimension_numbers.mutable_input_spatial_dimensions())) {
-    SWIG_fail;
-  }
-  if (!HandleRepeatedInt64Attribute(
-        $input, "kernel_spatial_dimensions",
-        dimension_numbers.mutable_kernel_spatial_dimensions())) {
-    SWIG_fail;
-  }
-  if (!HandleRepeatedInt64Attribute(
-        $input, "output_spatial_dimensions",
-        dimension_numbers.mutable_output_spatial_dimensions())) {
-    SWIG_fail;
-  }
-
-  $1 = &dimension_numbers;
-}
-
-// GatherDimensionNumbers
-
-%typemap(in) const GatherDimensionNumbers&
-    (GatherDimensionNumbers dimension_numbers) {
-  if (!HandleRepeatedInt64Attribute(
-        $input, "offset_dims",
-        dimension_numbers.mutable_offset_dims())) {
-    SWIG_fail;
-  }
-  if (!HandleRepeatedInt64Attribute(
-        $input, "collapsed_slice_dims",
-        dimension_numbers.mutable_collapsed_slice_dims())) {
-    SWIG_fail;
-  }
-  if (!HandleRepeatedInt64Attribute(
-        $input, "start_index_map",
-        dimension_numbers.mutable_start_index_map())) {
-    SWIG_fail;
-  }
-
-  int64 value;
-  if (!GetIntAttr($input, "index_vector_dim", &value)) {
-    SWIG_fail;
-  }
-  dimension_numbers.set_index_vector_dim(value);
-
-  $1 = &dimension_numbers;
-}
-
-// ScatterDimensionNumbers
-
-%typemap(in) const ScatterDimensionNumbers&
-    (ScatterDimensionNumbers dimension_numbers) {
-  if (!HandleRepeatedInt64Attribute(
-        $input, "update_window_dims",
-        dimension_numbers.mutable_update_window_dims())) {
-    SWIG_fail;
-  }
-  if (!HandleRepeatedInt64Attribute(
-        $input, "inserted_window_dims",
-        dimension_numbers.mutable_inserted_window_dims())) {
-    SWIG_fail;
-  }
-  if (!HandleRepeatedInt64Attribute(
-        $input, "scatter_dims_to_operand_dims",
-        dimension_numbers.mutable_scatter_dims_to_operand_dims())) {
-    SWIG_fail;
-  }
-
-  int64 value;
-  if (!GetIntAttr($input, "index_vector_dim", &value)) {
-    SWIG_fail;
-  }
-  dimension_numbers.set_index_vector_dim(value);
-
-  $1 = &dimension_numbers;
-}
-
-// Span<const ReplicaGroup>
-
-%typemap(in) absl::Span<const ReplicaGroup >
-    (std::vector<ReplicaGroup > temps) {
-  if (!PySequence_Check($input)) {
-    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
-    SWIG_fail;
-  }
-  const int size = PySequence_Size($input);
-  temps.reserve(size);
-  for (int i = 0; i < size; ++i) {
-    PyObject* o = PySequence_GetItem($input, i);
-    ReplicaGroup rgrp;
-    if (!HandleRepeatedInt64Attribute(
-            o, "replica_ids",
-            rgrp.mutable_replica_ids())) {
-        SWIG_fail;
-    }
-    temps.push_back(rgrp);
-    Py_DECREF(o);
-  }
-  $1 = temps;
-}
-
-
 // ExecutableBuildOptions
 
 %typemap(in) const ExecutableBuildOptions*
@@ -966,160 +344,151 @@
 %ignoreall
 %unignore xla;
 %unignore xla::swig;
-%unignore xla::swig::InitializeReplicaCount;
-%unignore xla::swig::InitializePlatformName;
-%unignore xla::swig::GetReplicaCount;
 %unignore xla::swig::RegisterCpuCustomCallTarget;
-%unignore xla::swig::TransferToInfeedLocal;
-%unignore xla::swig::TransferToInfeedLocalReplica;
-%unignore xla::swig::TransferFromOutfeedLocalReplica;
+%unignore xla::swig::LocalClient;
+%unignore xla::swig::LocalClient::Get;
+%unignore xla::swig::LocalClient::DeviceCount;
+%unignore xla::swig::LocalClient::TransferToInfeed;
+%unignore xla::swig::LocalClient::TransferFromOutfeed;
 %unignore xla::swig::LocalShapedBuffer;
 %unignore xla::swig::LocalShapedBuffer::FromLiteral;
 %unignore xla::swig::LocalShapedBuffer::ToLiteral;
 %unignore xla::swig::LocalShapedBuffer::shape;
+%unignore xla::swig::LocalShapedBuffer::DestructureTuple;
 %unignore xla::swig::LocalShapedBufferTuple;
 %unignore xla::swig::LocalShapedBufferTuple::Release;
 %unignore xla::swig::LocalShapedBufferTuple::size;
-%unignore xla::swig::XrtAllocation;
-%unignore xla::swig::XrtAllocation::FromLiteral;
-%unignore xla::swig::XrtAllocation::ToLiteral;
-%unignore xla::swig::XrtAllocation::shape;
-%unignore xla::swig::XrtAllocationTuple;
-%unignore xla::swig::XrtAllocationTuple::Release;
-%unignore xla::swig::XrtAllocationTuple::size;
-%unignore xla::swig::CompiledLocalComputation;
-%unignore xla::swig::CompiledLocalComputation::Execute;
-%unignore xla::swig::CompiledLocalComputation::ExecutePerReplica;
-%unignore xla::swig::CompiledXrtComputation;
-%unignore xla::swig::CompiledXrtComputation::Execute;
-%unignore xla::swig::LocalComputation;
-%unignore xla::swig::LocalComputation::Compile;
-%unignore xla::swig::LocalComputation::CompileForXrt;
-%unignore xla::swig::LocalComputation::GetReturnValueShape;
-%unignore xla::swig::LocalComputation::GetSerializedProto;
+%unignore xla::swig::LocalExecutable;
+%unignore xla::swig::LocalExecutable::DeviceOrdinals;
+%unignore xla::swig::LocalExecutable::Execute;
+%unignore xla::swig::LocalExecutable::ExecutePerReplica;
+%unignore xla::swig::Computation;
+%unignore xla::swig::Computation::Compile;
+%unignore xla::swig::Computation::GetProgramShape;
+%unignore xla::swig::Computation::GetReturnValueShape;
+%unignore xla::swig::Computation::GetSerializedProto;
+%unignore xla::swig::Computation::GetHloText;
+%unignore xla::swig::Computation::GetHloDotGraph;
 %unignore xla::swig::LocalOp;
-%unignore xla::swig::LocalComputationBuilder;
-%unignore xla::swig::LocalComputationBuilder::LocalComputationBuilder;
-%unignore xla::swig::LocalComputationBuilder::Build;
-%unignore xla::swig::LocalComputationBuilder::BuildWithRoot;
-%unignore xla::swig::LocalComputationBuilder::SetOpMetadata;
-%unignore xla::swig::LocalComputationBuilder::ClearOpMetadata;
-%unignore xla::swig::LocalComputationBuilder::Parameter;
-%unignore xla::swig::LocalComputationBuilder::GetShape;
-%unignore xla::swig::LocalComputationBuilder::GetReturnValueShape;
-%unignore xla::swig::LocalComputationBuilder::Infeed;
-%unignore xla::swig::LocalComputationBuilder::Outfeed;
-%unignore xla::swig::LocalComputationBuilder::ConstantLiteral;
-%unignore xla::swig::LocalComputationBuilder::ConstantR0;
-%unignore xla::swig::LocalComputationBuilder::Iota;
-%unignore xla::swig::LocalComputationBuilder::BroadcastedIota;
-%unignore xla::swig::LocalComputationBuilder::Broadcast;
-%unignore xla::swig::LocalComputationBuilder::BroadcastInDim;
-%unignore xla::swig::LocalComputationBuilder::Pad;
-%unignore xla::swig::LocalComputationBuilder::Reshape;
-%unignore xla::swig::LocalComputationBuilder::Collapse;
-%unignore xla::swig::LocalComputationBuilder::AllToAll;
-%unignore xla::swig::LocalComputationBuilder::CrossReplicaSum;
-%unignore xla::swig::LocalComputationBuilder::Slice;
-%unignore xla::swig::LocalComputationBuilder::SliceInDim;
-%unignore xla::swig::LocalComputationBuilder::DynamicSlice;
-%unignore xla::swig::LocalComputationBuilder::DynamicUpdateSlice;
-%unignore xla::swig::LocalComputationBuilder::ConcatInDim;
-%unignore xla::swig::LocalComputationBuilder::SelectAndScatterWithGeneralPadding;
-%unignore xla::swig::LocalComputationBuilder::Select;
-%unignore xla::swig::LocalComputationBuilder::Tuple;
-%unignore xla::swig::LocalComputationBuilder::GetTupleElement;
-%unignore xla::swig::LocalComputationBuilder::ConvertElementType;
-%unignore xla::swig::LocalComputationBuilder::BitcastConvertType;
-%unignore xla::swig::LocalComputationBuilder::Call;
-%unignore xla::swig::LocalComputationBuilder::Transpose;
-%unignore xla::swig::LocalComputationBuilder::Rev;
-%unignore xla::swig::LocalComputationBuilder::Clamp;
-%unignore xla::swig::LocalComputationBuilder::Map;
-%unignore xla::swig::LocalComputationBuilder::Reduce;
-%unignore xla::swig::LocalComputationBuilder::ReduceWindowWithGeneralPadding;
-%unignore xla::swig::LocalComputationBuilder::RngNormal;
-%unignore xla::swig::LocalComputationBuilder::RngUniform;
-%unignore xla::swig::LocalComputationBuilder::RngBernoulli;
-%unignore xla::swig::LocalComputationBuilder::While;
-%unignore xla::swig::LocalComputationBuilder::Conditional;
-%unignore xla::swig::LocalComputationBuilder::IsConstant;
-%unignore xla::swig::LocalComputationBuilder::Eq;
-%unignore xla::swig::LocalComputationBuilder::Ne;
-%unignore xla::swig::LocalComputationBuilder::Ge;
-%unignore xla::swig::LocalComputationBuilder::Gt;
-%unignore xla::swig::LocalComputationBuilder::Lt;
-%unignore xla::swig::LocalComputationBuilder::Le;
-%unignore xla::swig::LocalComputationBuilder::Dot;
-%unignore xla::swig::LocalComputationBuilder::DotGeneral;
-%unignore xla::swig::LocalComputationBuilder::ConvGeneralDilated;
-%unignore xla::swig::LocalComputationBuilder::Add;
-%unignore xla::swig::LocalComputationBuilder::Sub;
-%unignore xla::swig::LocalComputationBuilder::Mul;
-%unignore xla::swig::LocalComputationBuilder::Div;
-%unignore xla::swig::LocalComputationBuilder::Rem;
-%unignore xla::swig::LocalComputationBuilder::Max;
-%unignore xla::swig::LocalComputationBuilder::Min;
-%unignore xla::swig::LocalComputationBuilder::And;
-%unignore xla::swig::LocalComputationBuilder::Or;
-%unignore xla::swig::LocalComputationBuilder::Xor;
-%unignore xla::swig::LocalComputationBuilder::ShiftLeft;
-%unignore xla::swig::LocalComputationBuilder::ShiftRightArithmetic;
-%unignore xla::swig::LocalComputationBuilder::ShiftRightLogical;
-%unignore xla::swig::LocalComputationBuilder::Not;
-%unignore xla::swig::LocalComputationBuilder::Abs;
-%unignore xla::swig::LocalComputationBuilder::Exp;
-%unignore xla::swig::LocalComputationBuilder::Expm1;
-%unignore xla::swig::LocalComputationBuilder::Floor;
-%unignore xla::swig::LocalComputationBuilder::Ceil;
-%unignore xla::swig::LocalComputationBuilder::Round;
-%unignore xla::swig::LocalComputationBuilder::Log;
-%unignore xla::swig::LocalComputationBuilder::Log1p;
-%unignore xla::swig::LocalComputationBuilder::Sign;
-%unignore xla::swig::LocalComputationBuilder::Cos;
-%unignore xla::swig::LocalComputationBuilder::Sin;
-%unignore xla::swig::LocalComputationBuilder::Tanh;
-%unignore xla::swig::LocalComputationBuilder::Atan2;
-%unignore xla::swig::LocalComputationBuilder::IsFinite;
-%unignore xla::swig::LocalComputationBuilder::Pow;
-%unignore xla::swig::LocalComputationBuilder::Neg;
-%unignore xla::swig::LocalComputationBuilder::Sort;
-%unignore xla::swig::LocalComputationBuilder::SortKeyVal;
-%unignore xla::swig::LocalComputationBuilder::Sqrt;
-%unignore xla::swig::LocalComputationBuilder::Rsqrt;
-%unignore xla::swig::LocalComputationBuilder::Square;
-%unignore xla::swig::LocalComputationBuilder::Reciprocal;
-%unignore xla::swig::LocalComputationBuilder::Erfc;
-%unignore xla::swig::LocalComputationBuilder::Erf;
-%unignore xla::swig::LocalComputationBuilder::ErfInv;
-%unignore xla::swig::LocalComputationBuilder::Lgamma;
-%unignore xla::swig::LocalComputationBuilder::Digamma;
-%unignore xla::swig::LocalComputationBuilder::Acos;
-%unignore xla::swig::LocalComputationBuilder::Asin;
-%unignore xla::swig::LocalComputationBuilder::Atan;
-%unignore xla::swig::LocalComputationBuilder::Tan;
-%unignore xla::swig::LocalComputationBuilder::Acosh;
-%unignore xla::swig::LocalComputationBuilder::Asinh;
-%unignore xla::swig::LocalComputationBuilder::Atanh;
-%unignore xla::swig::LocalComputationBuilder::Cosh;
-%unignore xla::swig::LocalComputationBuilder::Sinh;
-%unignore xla::swig::LocalComputationBuilder::Real;
-%unignore xla::swig::LocalComputationBuilder::Imag;
-%unignore xla::swig::LocalComputationBuilder::Conj;
-%unignore xla::swig::LocalComputationBuilder::Complex;
-%unignore xla::swig::LocalComputationBuilder::Cholesky;
-%unignore xla::swig::LocalComputationBuilder::QR;
-%unignore xla::swig::LocalComputationBuilder::TriangularSolve;
-%unignore xla::swig::LocalComputationBuilder::CustomCall;
-%unignore xla::swig::LocalComputationBuilder::Gather;
-%unignore xla::swig::LocalComputationBuilder::Scatter;
-%unignore xla::swig::DeleteLocalComputation;
-%unignore xla::swig::DestructureLocalShapedBufferTuple;
-%unignore xla::swig::DestructureXrtAllocationTuple;
+%unignore xla::swig::ComputationBuilder;
+%unignore xla::swig::ComputationBuilder::ComputationBuilder;
+%unignore xla::swig::ComputationBuilder::Build;
+%unignore xla::swig::ComputationBuilder::BuildWithRoot;
+%unignore xla::swig::ComputationBuilder::SetOpMetadata;
+%unignore xla::swig::ComputationBuilder::ClearOpMetadata;
+%unignore xla::swig::ComputationBuilder::Parameter;
+%unignore xla::swig::ComputationBuilder::GetShape;
+%unignore xla::swig::ComputationBuilder::GetReturnValueShape;
+%unignore xla::swig::ComputationBuilder::Infeed;
+%unignore xla::swig::ComputationBuilder::Outfeed;
+%unignore xla::swig::ComputationBuilder::ConstantLiteral;
+%unignore xla::swig::ComputationBuilder::ConstantR0;
+%unignore xla::swig::ComputationBuilder::Iota;
+%unignore xla::swig::ComputationBuilder::BroadcastedIota;
+%unignore xla::swig::ComputationBuilder::Broadcast;
+%unignore xla::swig::ComputationBuilder::BroadcastInDim;
+%unignore xla::swig::ComputationBuilder::Pad;
+%unignore xla::swig::ComputationBuilder::Reshape;
+%unignore xla::swig::ComputationBuilder::Collapse;
+%unignore xla::swig::ComputationBuilder::AllToAll;
+%unignore xla::swig::ComputationBuilder::CrossReplicaSum;
+%unignore xla::swig::ComputationBuilder::Slice;
+%unignore xla::swig::ComputationBuilder::SliceInDim;
+%unignore xla::swig::ComputationBuilder::DynamicSlice;
+%unignore xla::swig::ComputationBuilder::DynamicUpdateSlice;
+%unignore xla::swig::ComputationBuilder::ConcatInDim;
+%unignore xla::swig::ComputationBuilder::SelectAndScatterWithGeneralPadding;
+%unignore xla::swig::ComputationBuilder::Select;
+%unignore xla::swig::ComputationBuilder::Tuple;
+%unignore xla::swig::ComputationBuilder::GetTupleElement;
+%unignore xla::swig::ComputationBuilder::ConvertElementType;
+%unignore xla::swig::ComputationBuilder::BitcastConvertType;
+%unignore xla::swig::ComputationBuilder::Call;
+%unignore xla::swig::ComputationBuilder::Transpose;
+%unignore xla::swig::ComputationBuilder::Rev;
+%unignore xla::swig::ComputationBuilder::Clamp;
+%unignore xla::swig::ComputationBuilder::Map;
+%unignore xla::swig::ComputationBuilder::Reduce;
+%unignore xla::swig::ComputationBuilder::ReduceWindowWithGeneralPadding;
+%unignore xla::swig::ComputationBuilder::RngNormal;
+%unignore xla::swig::ComputationBuilder::RngUniform;
+%unignore xla::swig::ComputationBuilder::RngBernoulli;
+%unignore xla::swig::ComputationBuilder::While;
+%unignore xla::swig::ComputationBuilder::Conditional;
+%unignore xla::swig::ComputationBuilder::IsConstant;
+%unignore xla::swig::ComputationBuilder::Eq;
+%unignore xla::swig::ComputationBuilder::Ne;
+%unignore xla::swig::ComputationBuilder::Ge;
+%unignore xla::swig::ComputationBuilder::Gt;
+%unignore xla::swig::ComputationBuilder::Lt;
+%unignore xla::swig::ComputationBuilder::Le;
+%unignore xla::swig::ComputationBuilder::Dot;
+%unignore xla::swig::ComputationBuilder::DotGeneral;
+%unignore xla::swig::ComputationBuilder::ConvGeneralDilated;
+%unignore xla::swig::ComputationBuilder::Add;
+%unignore xla::swig::ComputationBuilder::Sub;
+%unignore xla::swig::ComputationBuilder::Mul;
+%unignore xla::swig::ComputationBuilder::Div;
+%unignore xla::swig::ComputationBuilder::Rem;
+%unignore xla::swig::ComputationBuilder::Max;
+%unignore xla::swig::ComputationBuilder::Min;
+%unignore xla::swig::ComputationBuilder::And;
+%unignore xla::swig::ComputationBuilder::Or;
+%unignore xla::swig::ComputationBuilder::Xor;
+%unignore xla::swig::ComputationBuilder::ShiftLeft;
+%unignore xla::swig::ComputationBuilder::ShiftRightArithmetic;
+%unignore xla::swig::ComputationBuilder::ShiftRightLogical;
+%unignore xla::swig::ComputationBuilder::Not;
+%unignore xla::swig::ComputationBuilder::Clz;
+%unignore xla::swig::ComputationBuilder::Abs;
+%unignore xla::swig::ComputationBuilder::Exp;
+%unignore xla::swig::ComputationBuilder::Expm1;
+%unignore xla::swig::ComputationBuilder::Floor;
+%unignore xla::swig::ComputationBuilder::Ceil;
+%unignore xla::swig::ComputationBuilder::Round;
+%unignore xla::swig::ComputationBuilder::Log;
+%unignore xla::swig::ComputationBuilder::Log1p;
+%unignore xla::swig::ComputationBuilder::Sign;
+%unignore xla::swig::ComputationBuilder::Cos;
+%unignore xla::swig::ComputationBuilder::Sin;
+%unignore xla::swig::ComputationBuilder::Tanh;
+%unignore xla::swig::ComputationBuilder::Atan2;
+%unignore xla::swig::ComputationBuilder::IsFinite;
+%unignore xla::swig::ComputationBuilder::Pow;
+%unignore xla::swig::ComputationBuilder::Neg;
+%unignore xla::swig::ComputationBuilder::Sort;
+%unignore xla::swig::ComputationBuilder::SortKeyVal;
+%unignore xla::swig::ComputationBuilder::Sqrt;
+%unignore xla::swig::ComputationBuilder::Rsqrt;
+%unignore xla::swig::ComputationBuilder::Square;
+%unignore xla::swig::ComputationBuilder::Reciprocal;
+%unignore xla::swig::ComputationBuilder::Erfc;
+%unignore xla::swig::ComputationBuilder::Erf;
+%unignore xla::swig::ComputationBuilder::ErfInv;
+%unignore xla::swig::ComputationBuilder::Lgamma;
+%unignore xla::swig::ComputationBuilder::Digamma;
+%unignore xla::swig::ComputationBuilder::Acos;
+%unignore xla::swig::ComputationBuilder::Asin;
+%unignore xla::swig::ComputationBuilder::Atan;
+%unignore xla::swig::ComputationBuilder::Tan;
+%unignore xla::swig::ComputationBuilder::Acosh;
+%unignore xla::swig::ComputationBuilder::Asinh;
+%unignore xla::swig::ComputationBuilder::Atanh;
+%unignore xla::swig::ComputationBuilder::Cosh;
+%unignore xla::swig::ComputationBuilder::Sinh;
+%unignore xla::swig::ComputationBuilder::Real;
+%unignore xla::swig::ComputationBuilder::Imag;
+%unignore xla::swig::ComputationBuilder::Conj;
+%unignore xla::swig::ComputationBuilder::Complex;
+%unignore xla::swig::ComputationBuilder::Cholesky;
+%unignore xla::swig::ComputationBuilder::QR;
+%unignore xla::swig::ComputationBuilder::TriangularSolve;
+%unignore xla::swig::ComputationBuilder::CustomCall;
+%unignore xla::swig::ComputationBuilder::Gather;
+%unignore xla::swig::ComputationBuilder::Scatter;
+%unignore xla::swig::DeleteComputation;
 %unignore xla::swig::DeleteLocalShapedBuffer;
-%unignore xla::swig::DeleteXrtAllocation;
-%unignore xla::swig::DeleteCompiledLocalComputation;
-%unignore xla::swig::DeleteCompiledXrtComputation;
+%unignore xla::swig::DeleteLocalExecutable;
 
 %thread;
 %include "tensorflow/compiler/xla/python/local_computation_builder.h"

diff --git a/tensorflow/compiler/xla/python/numpy_bridge.cc b/tensorflow/compiler/xla/python/numpy_bridge.cc
index 8e056f9..74f45b7 100644
--- a/tensorflow/compiler/xla/python/numpy_bridge.cc
+++ b/tensorflow/compiler/xla/python/numpy_bridge.cc

@@ -127,28 +127,42 @@
   }
 }
 
-PyObject* PyShapeInfoFromXlaShape(const Shape& shape) {
+Safe_PyObjectPtr PyShapeInfoFromXlaShape(const Shape& shape) {
   int np_typenum = PrimitiveTypeToNumpyType(shape.element_type());
   PyArray_Descr* np_dtype = PyArray_DescrFromType(np_typenum);
 
-  PyObject* dimensions;
+  Safe_PyObjectPtr dimensions;
   if (shape.IsTuple()) {
     int num_elements = ShapeUtil::TupleElementCount(shape);
-    dimensions = PyTuple_New(ShapeUtil::TupleElementCount(shape));
+    dimensions = make_safe(PyTuple_New(ShapeUtil::TupleElementCount(shape)));
     for (int i = 0; i < num_elements; ++i) {
       PyTuple_SET_ITEM(
-          dimensions, i,
-          PyShapeInfoFromXlaShape(ShapeUtil::GetTupleElementShape(shape, i)));
+          dimensions.get(), i,
+          PyShapeInfoFromXlaShape(ShapeUtil::GetTupleElementShape(shape, i))
+              .release());
     }
   } else {
     int rank = shape.rank();
-    dimensions = PyTuple_New(rank);
+    dimensions = make_safe(PyTuple_New(rank));
     for (int i = 0; i < rank; ++i) {
-      PyTuple_SET_ITEM(dimensions, i,
+      PyTuple_SET_ITEM(dimensions.get(), i,
                        LongToPyIntOrPyLong(ShapeUtil::GetDimension(shape, i)));
     }
   }
-  return PyTuple_Pack(2, np_dtype, dimensions);
+  return make_safe(PyTuple_Pack(2, np_dtype, dimensions.release()));
+}
+
+Safe_PyObjectPtr PyProgramShapeInfoFromXlaProgramShape(
+    const ProgramShape& shape) {
+  Safe_PyObjectPtr arg_shapes = make_safe(PyTuple_New(shape.parameters_size()));
+  for (int i = 0; i < shape.parameters_size(); ++i) {
+    PyTuple_SET_ITEM(arg_shapes.get(), i,
+                     PyShapeInfoFromXlaShape(shape.parameters(i)).release());
+  }
+
+  Safe_PyObjectPtr result_shape = PyShapeInfoFromXlaShape(shape.result());
+  return make_safe(
+      PyTuple_Pack(2, arg_shapes.release(), result_shape.release()));
 }
 
 // Precondition: o->ob_type == &PyArrayDescr_Type
@@ -553,6 +567,92 @@
 
 }  // namespace numpy
 
+bool GetIntAttr(PyObject* o, const char* field, int64* result) {
+  PyObject* fo = PyObject_GetAttrString(o, field);
+  if (!fo) {
+    return false;
+  }
+  const int64 value = numpy::PyIntOrPyLongToLong(fo);
+  if (value == -1 && PyErr_Occurred()) {
+    Py_DECREF(fo);
+    return false;
+  }
+  Py_DECREF(fo);
+  *result = value;
+  return true;
+}
+
+// Returns "ok"; true if there is no error, false if there was an error.
+bool HandleStringAttribute(PyObject* o, const char* attr_name,
+                           std::function<void(string s)> f) {
+  if (!PyObject_HasAttrString(o, attr_name)) {
+    return true;  // It's ok for the object to not have the attribute.
+  }
+  PyObject* attr = PyObject_GetAttrString(o, attr_name);
+  if (attr == nullptr) {
+    return false;  // An error occurred getting the attribute.
+  }
+  if (attr == Py_None) {
+    Py_DECREF(attr);
+    return true;  // The attribute is None, which we consider ok.
+  }
+#if PY_MAJOR_VERSION < 3
+  if (!PyString_Check(attr)) {
+    string message = absl::StrFormat("%s must be a string or none; got %s",
+                                     attr_name, numpy::PyObjectCppRepr(attr));
+    PyErr_SetString(PyExc_TypeError, message.c_str());
+    Py_DECREF(attr);
+    return false;  // Type error, not ok.
+  }
+  f(PyString_AsString(attr));
+#else
+  if (!PyBytes_Check(attr)) {
+    string message = absl::StrFormat("%s must be a string or none; got %s",
+                                     attr_name, numpy::PyObjectCppRepr(attr));
+    PyErr_SetString(PyExc_TypeError, message.c_str());
+    Py_DECREF(attr);
+    return false;  // Type error, not ok.
+  }
+  f(PyBytes_AsString(attr));
+#endif
+
+  Py_DECREF(attr);
+  return true;  // Handled string attribute, ok!
+}
+
+bool HandleRepeatedInt64Attribute(
+    PyObject* o, const char* attr_name,
+    tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>* field) {
+  PyObject* seq = PyObject_GetAttrString(o, attr_name);
+  if (!seq) {
+    return false;
+  }
+
+  int length = PySequence_Size(seq);
+  if (length == -1) {
+    Py_DECREF(seq);
+    return false;
+  }
+
+  for (int i = 0; i < length; ++i) {
+    PyObject* item = PySequence_GetItem(seq, i);
+    if (!item) {
+      Py_DECREF(seq);
+      return false;
+    }
+    const int64 dimension = numpy::PyIntOrPyLongToLong(item);
+    if (dimension == -1 && PyErr_Occurred()) {
+      Py_DECREF(item);
+      Py_DECREF(seq);
+      return false;
+    }
+    *field->Add() = dimension;
+    Py_DECREF(item);
+  }
+  Py_DECREF(seq);
+  return true;
+}
+
 }  // namespace swig
 
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/python/numpy_bridge.h b/tensorflow/compiler/xla/python/numpy_bridge.h
index 737fc4b..eff8cda 100644
--- a/tensorflow/compiler/xla/python/numpy_bridge.h
+++ b/tensorflow/compiler/xla/python/numpy_bridge.h

@@ -64,7 +64,13 @@
 // providing the array dimensions.
 //
 // The return value is a new reference.
-PyObject* PyShapeInfoFromXlaShape(const Shape& shape);
+Safe_PyObjectPtr PyShapeInfoFromXlaShape(const Shape& shape);
+
+// Returns a pair of (arg_shapes, result_shape), where arg_shapes is a tuple
+// of argument shapes and result_shape is the result shape. Each shape is as
+// described in in PyShapeInfoFromXlaShape's comment.
+Safe_PyObjectPtr PyProgramShapeInfoFromXlaProgramShape(
+    const ProgramShape& shape);
 
 // Converts a Python object with a method interface mathing that of
 // xla_client.Shape into an XLA Shape object.
@@ -130,6 +136,18 @@
 
 }  // namespace numpy
 
+// Miscellaneous swig helpers that don't have a better home.
+
+bool GetIntAttr(PyObject* o, const char* field, int64* result);
+
+// Returns "ok"; true if there is no error, false if there was an error.
+bool HandleStringAttribute(PyObject* o, const char* attr_name,
+                           std::function<void(string s)> f);
+
+bool HandleRepeatedInt64Attribute(
+    PyObject* o, const char* attr_name,
+    tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>* field);
+
 }  // namespace swig
 
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/python/pywrap_xla_exported_symbols.lds b/tensorflow/compiler/xla/python/pywrap_xla_exported_symbols.lds
index bce6c1a..ef77ed3 100644
--- a/tensorflow/compiler/xla/python/pywrap_xla_exported_symbols.lds
+++ b/tensorflow/compiler/xla/python/pywrap_xla_exported_symbols.lds

@@ -1 +1,2 @@
 _PyInit__pywrap_xla
+_init_pywrap_xla

diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index 6a7a27d..9019a97 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py

@@ -34,13 +34,32 @@
 from tensorflow.compiler.xla.python import pywrap_xla as c_api
 from tensorflow.compiler.xla.service import hlo_pb2
 
+# Import the XRT backend, if available.
+try:
+  # pylint: disable=g-import-not-at-top
+  from tensorflow.compiler.xla.python import pywrap_xrt as xrt_api
+except ImportError:
+  xrt_api = None
+
 
 # Most functions are snake_case for consistency with other modules, whereas
-# method names of ComputationBuilder and LocalComputation are CamelCase for
+# method names of ComputationBuilder and Computation are CamelCase for
 # consistency with XLA.
 # pylint: disable=invalid-name
 
 
+# Version of the XLA Python client.
+#
+# JAX packages the XLA python plugin as a binary pip module (jaxlib) that is
+# packaged separately from the Python code that consumes it (jax).
+#
+# We occasionally need to make backwards-incompatible changes to jaxlib, in
+# which case we need to be able to detect when incompatible versions are
+# installed.
+def version():
+  return (0, 1, 8)
+
+
 _OP_METADATA_FIELDS = [
     'op_type',
     'op_name',
@@ -55,6 +74,10 @@
   """Abstract base class for XLA backends."""
 
   @abc.abstractmethod
+  def device_count(self):
+    """Returns the number of devices known to the backend."""
+
+  @abc.abstractmethod
   def buffer_from_pyval(self, pyval, device=0):
     """Allocates a fresh buffer and populates it with `pyval`."""
 
@@ -67,7 +90,8 @@
     """Destructures a tuple buffer into a sequence of buffers."""
 
   @abc.abstractmethod
-  def compile(self, computation, argument_shapes, compile_options):
+  def compile(self, computation, argument_shapes, result_shape,
+              compile_options):
     """Compiles a computation. Returns an executable."""
 
   @abc.abstractmethod
@@ -83,25 +107,41 @@
     """Runs an executable in a replicated manner."""
 
 
+def _maybe_encode_string(s):
+  if six.PY3:
+    return s.encode('utf-8')
+  else:
+    return s
+
+
 class XlaLocalBackend(Backend):
   """XLA backend implemented using the in-process xla::LocalClient API."""
 
+  def __init__(self, platform=None):
+    platform = platform or _get_default_platform_name()
+    self.client = c_api.LocalClient.Get(_maybe_encode_string(platform))
+    self._delete_buffer = c_api.DeleteLocalShapedBuffer
+    self._delete_executable = c_api.DeleteLocalExecutable
+
+  def device_count(self):
+    return self.client.DeviceCount()
+
   def buffer_from_pyval(self, pyval, device=0):
-    return c_api.LocalShapedBuffer.FromLiteral(pyval, None, device)
+    return c_api.LocalShapedBuffer.FromLiteral(pyval, None, self.client, device)
 
   def delete_buffer(self, c_buffer):
-    c_api.DeleteLocalShapedBuffer(c_buffer)
+    self._delete_buffer(c_buffer)
 
   def destructure_tuple(self, c_buffer):
-    result = c_api.DestructureLocalShapedBufferTuple(c_buffer)
+    result = c_buffer.DestructureTuple()
     return [result.Release(i) for i in xrange(result.size())]
 
-  def compile(self, c_computation, argument_shapes, compile_options):
-    return c_computation.Compile(argument_shapes, compile_options)
+  def compile(self, c_computation, argument_shapes, result_shape,
+              compile_options):
+    return c_computation.Compile(argument_shapes, compile_options, self.client)
 
   def delete_executable(self, executable):
-    assert isinstance(executable, c_api.CompiledLocalComputation)
-    c_api.DeleteCompiledLocalComputation(executable)
+    self._delete_executable(executable)
 
   def execute(self, executable, args):
     return executable.Execute(args)
@@ -117,29 +157,35 @@
 
   def __init__(self, target):
     self.target = target
+    self._delete_buffer = xrt_api.DeleteXrtAllocation
+    self._delete_executable = xrt_api.DeleteXrtExecutable
+
+  def device_count(self):
+    return 1  # Multidevice execution not implemented.
 
   def buffer_from_pyval(self, pyval, device=0):
     if device != 0:
       raise NotImplementedError(
           'Multi-replica execution is not yet supported via the XRT backend.')
-    return c_api.XrtAllocation.FromLiteral(pyval,
-                                           _maybe_encode_string(self.target))
+    return xrt_api.XrtAllocation.FromLiteral(pyval,
+                                             _maybe_encode_string(self.target))
 
   def delete_buffer(self, c_buffer):
-    c_api.DeleteXrtAllocation(c_buffer)
+    self._delete_buffer(c_buffer)
 
   def destructure_tuple(self, c_buffer):
-    result = c_api.DestructureXrtAllocationTuple(
+    result = xrt_api.DestructureXrtAllocationTuple(
         c_buffer, _maybe_encode_string(self.target))
     return [result.Release(i) for i in xrange(result.size())]
 
-  def compile(self, c_computation, argument_shapes, compile_options):
-    return c_computation.CompileForXrt(argument_shapes,
-                                       _maybe_encode_string(self.target))
+  def compile(self, c_computation, argument_shapes, result_shape,
+              compile_options):
+    return xrt_api.XrtExecutable.CompileForXrt(
+        c_computation.GetSerializedProto(), argument_shapes, result_shape,
+        _maybe_encode_string(self.target))
 
   def delete_executable(self, executable):
-    assert isinstance(executable, c_api.CompiledXrtComputation)
-    c_api.DeleteCompiledXrtComputation(executable)
+    self._delete_executable(executable)
 
   def execute(self, executable, args):
     return executable.Execute(args)
@@ -151,7 +197,20 @@
     return [executable.Execute(per_replica_args[0])]
 
 
-XLA_LOCAL_BACKEND = XlaLocalBackend()
+_default_platform_name = 'Host'
+_default_backend = None
+
+
+def _get_default_platform_name():
+  return _default_platform_name
+
+
+def _get_default_local_backend():
+  global _default_backend
+  global _default_platform_name
+  if _default_backend is None:
+    _default_backend = XlaLocalBackend(_default_platform_name)
+  return _default_backend
 
 
 class BackendType(enum.Enum):
@@ -162,7 +221,7 @@
 def BackendSpec(backend, target):
   """Compatibility wrapper to support older clients. Do not use in new code."""
   if backend == BackendType.XLA_LOCAL:
-    return XLA_LOCAL_BACKEND
+    return _get_default_local_backend()
   elif backend == BackendType.XRT:
     return XrtBackend(target)
   else:
@@ -189,13 +248,6 @@
       source_line=lineno)
 
 
-def _maybe_encode_string(s):
-  if six.PY3:
-    return s.encode('utf-8')
-  else:
-    return s
-
-
 class PaddingType(enum.Enum):
   VALID = 1
   SAME = 2
@@ -233,6 +285,7 @@
 
 _UNARY_OPS = [
     'Not',
+    'Clz',
     'Abs',
     'Exp',
     'Expm1',
@@ -334,22 +387,18 @@
   means the referent is in device memory.
   """
 
-  def __init__(self, c_buffer, backend, replica):
+  def __init__(self, c_buffer, backend, device):
     self.c_buffer = c_buffer
     self._backend = backend
-    self._replica = replica
+    self._device = device
 
   @staticmethod
-  def from_pyval(pyval, replica=0, backend=XLA_LOCAL_BACKEND):
+  def from_pyval(pyval, device=0, backend=None):
     """Allocate and copy to XLA the given python value."""
+    backend = backend or _get_default_local_backend()
     pyval = require_numpy_array_layout(pyval)
-    num_replicas = get_replica_count()
-    if not 0 <= replica < num_replicas:
-      raise ValueError(
-          'Attempt to place buffer on replica {} when the replica count is {}'
-          .format(replica, num_replicas))
-    cbuf = backend.buffer_from_pyval(pyval, replica)
-    return LocalBuffer(cbuf, backend, replica)
+    cbuf = backend.buffer_from_pyval(pyval, device)
+    return LocalBuffer(cbuf, backend, device)
 
   def to_py(self):
     return self.c_buffer.ToLiteral()
@@ -357,8 +406,8 @@
   def shape(self):
     return _wrap_shape(self.c_buffer.shape())
 
-  def replica(self):
-    return self._replica
+  def device(self):
+    return self._device
 
   def delete(self):
     if self.c_buffer is not None:
@@ -371,7 +420,7 @@
     result = self._backend.destructure_tuple(self.c_buffer)
     self.delete()
     return tuple(
-        LocalBuffer(sub_buffer, replica=self._replica, backend=self._backend)
+        LocalBuffer(sub_buffer, device=self._device, backend=self._backend)
         for sub_buffer in result)
 
   def is_deleted(self):
@@ -521,6 +570,16 @@
     updated._check_minor_to_major()  # pylint: disable=protected-access
     return updated
 
+  def with_major_to_minor_layout_if_absent(self):
+    """Returns a copy of a shape with missing layouts set to major-to-minor."""
+
+    def f(a):
+      if a.minor_to_major():
+        return None
+      return a.update_minor_to_major(tuple(xrange(a.rank() - 1, -1, -1)))
+
+    return self.map_leaves(f)
+
   def serialize(self, proto):
     """Serializes 'shape' into proto."""
     if self.is_tuple():
@@ -536,6 +595,10 @@
         proto.layout.minor_to_major.extend(self.minor_to_major())
 
 
+ProgramShape = collections.namedtuple('ProgramShape',
+                                      ('parameter_shapes', 'result_shape'))
+
+
 def _wrap_shape(shape_info):
   dtype, dims = shape_info
   element_type = DTYPE_TO_XLA_ELEMENT_TYPE[str(dtype)]
@@ -546,6 +609,12 @@
     return Shape.array_shape(dtype, dims)
 
 
+def _wrap_program_shape(shape_info):
+  arg_shapes, result_shape = shape_info
+  return ProgramShape([_wrap_shape(arg) for arg in arg_shapes],
+                      _wrap_shape(result_shape))
+
+
 def require_numpy_array_layout(value):
   if isinstance(value, tuple):
     return tuple(require_numpy_array_layout(x) for x in value)
@@ -569,7 +638,7 @@
     self.num_replicas = get_replica_count()
 
 
-def transfer_to_infeed(value, replica_number=None):
+def transfer_to_infeed(value, device_ordinal=0):
   """Transfers the given value into the XLA infeed queue.
 
   XLA's infeed queue is a single queue that feeds the "XLA virtual machine" with
@@ -579,52 +648,50 @@
   Args:
     value: the value that the caller would like to enqueue into the XLA infeed
       queue
-    replica_number: the replica number to infeed the value to -- if not
-      provided, then the default replica (trivially replica 0) is used.
+    device_ordinal: the device to infeed the value to. Each device has a
+      distinct infeed queue.
   """
-  if replica_number is None:
-    c_api.TransferToInfeedLocal(require_numpy_array_layout(value))
-  else:
-    c_api.TransferToInfeedLocalReplica(
-        require_numpy_array_layout(value), replica_number)
+  # TODO(phawkins): support non-default backends.
+  backend = _get_default_local_backend()
+  backend.client.TransferToInfeed(
+      require_numpy_array_layout(value), device_ordinal)
 
 
-def transfer_from_outfeed(shape, replica_number=None):
-  """Transfers a literal of the given shape from replica_number's outfeed.
+def transfer_from_outfeed(shape, device_ordinal=0):
+  """Transfers a literal of the given shape from `device_ordinal`'s outfeed.
 
   Args:
     shape: The shape of the value to transfer from outfeed.
-    replica_number: The replica number ordinal to transfer the outfeed value
-      from. (Each replica has a distinct outfeed queue.)
+    device_ordinal: The device ordinal to transfer the outfeed value from. Each
+      device has a distinct outfeed queue..
 
   Returns:
     The literal value that is produced from the outfeed queue.
   """
-  return c_api.TransferFromOutfeedLocalReplica(shape, replica_number or 0)
+  # TODO(phawkins): support non-default backends.
+  backend = _get_default_local_backend()
+  return backend.client.TransferFromOutfeed(shape, device_ordinal)
 
 
-class LocalComputation(object):
-  """Python wrapper for a local XLA Computation.
+class Computation(object):
+  """Python wrapper for an XLA Computation.
 
-  A LocalComputation can be executed if it is compiled. Otherwise, it
-  can still be used as a Computation where required by the
-  ComputationBuilder methods.
+  A Computation can be compiled to form an Executable, or used as a
+  subcomputation in ComputationBuilder methods.
   """
 
-  def __init__(self, c_computation, is_compiled, backend=XLA_LOCAL_BACKEND):
+  def __init__(self, c_computation, backend=None):
     self._c_computation = c_computation
+    # The backend argument is deprecated. Pass a backend to Compile() instead.
     self._backend = backend
-    self._is_compiled = is_compiled
+    self._delete_computation = c_api.DeleteComputation
 
   @property
   def computation(self):
-    if self._is_compiled:
-      raise ValueError(
-          'Attempt to read the XLA computation of a compiled LocalComputation.')
     return self._c_computation
 
   def GetProto(self):
-    """Get the HloModuleProto proto object in this local computation.
+    """Get the HloModuleProto proto object in this computation.
 
     Returns:
        An HloModuleProto proto object that has the whole-graph information.
@@ -633,30 +700,41 @@
     proto = hlo_pb2.HloModuleProto.FromString(serialized)
     return proto
 
-  def Compile(self, argument_shapes=(), compile_options=None, layout_fn=None):
-    """Compiles an un-compiled local computation.
+  def GetHloText(self):
+    """Get the textual HLO representation of this computation.
 
-    Local computations are the result of a "LocalComputationBuild'ing" process
-    -- they start in uncompiled form, and via a call to Compile() turn into a
-    compiled local computation.
+    Returns:
+       A string containing the textual HLO.
+    """
+    return self.computation.GetHloText()
 
-    Raises:
-      ValueError: if this is already a compiled local computation.
+  def GetHloDotGraph(self):
+    """Get a Graphviz Dot representation of this computation.
+
+    Returns:
+       A string containing the graphviz dot graph.
+    """
+    return self.computation.GetHloDotGraph()
+
+  def Compile(self, argument_shapes=(), compile_options=None, layout_fn=None,
+              backend=None):
+    """Compiles a computation.
+
+    Computations are the result of a "ComputationBuild'ing" process.
 
     Arguments:
       argument_shapes: parameter shapes -- they are first laid out by layout_fn
         if layout_fn is provided. Otherwise, the default layout for those shapes
         will be used.
-      compile_options: options to use for compilation, includes an optional
-        laid out result shape for the computation.
+      compile_options: options to use for compilation, includes an optional laid
+        out result shape for the computation.
       layout_fn: lambda that is used to lay out the argument/result shapes.
+      backend: a `Backend` for which an executable should be generated.
 
     Returns:
-      A newly *compiled* local computation instance.
+      A Executable instance.
     """
-    if self._is_compiled:
-      raise ValueError('Attempt to compile a compiled local XLA computation.')
-
+    backend = backend or self._backend or _get_default_local_backend()
     result_shape = _wrap_shape(self.computation.GetReturnValueShape())
 
     if layout_fn:
@@ -669,29 +747,52 @@
 
     compile_options = compile_options or CompileOptions()
     compile_options.result_shape = result_shape
-    c = self._backend.compile(self.computation, argument_shapes,
-                              compile_options)
-    return LocalComputation(c, is_compiled=True, backend=self._backend)
+    c = backend.compile(self.computation, argument_shapes, result_shape,
+                        compile_options)
+    return Executable(c, backend=backend)
 
   def CompileWithExampleArguments(self,
                                   arguments=(),
                                   compile_options=None,
-                                  layout_fn=None):
+                                  layout_fn=None,
+                                  backend=None):
     return self.Compile(
         argument_shapes=[Shape.from_pyval(arg) for arg in arguments],
         compile_options=compile_options,
-        layout_fn=layout_fn)
+        layout_fn=layout_fn,
+        backend=backend)
+
+  def GetProgramShape(self):
+    return _wrap_program_shape(self._c_computation.GetProgramShape())
 
   def GetReturnValueShape(self):
     return _wrap_shape(self._c_computation.GetReturnValueShape())
 
+  def __del__(self):
+    if self._c_computation:
+      self._delete_computation(self._c_computation)
+
+
+class Executable(object):
+  """Python wrapper for an XLA Executable."""
+
+  def __init__(self, c_executable, backend=None):
+    self._c_executable = c_executable
+    self._device_ordinals = c_executable.DeviceOrdinals()
+    self._backend = backend
+
+  def DeviceOrdinals(self):
+    """Returns a list containing the device ordinals for each replica."""
+    return self._device_ordinals
+
   def Execute(self, arguments=(), check_for_deleted_args=True):
     """Execute on one replica with LocalBuffer arguments and return value."""
     if check_for_deleted_args and any(arg.is_deleted() for arg in arguments):
       raise ValueError('Executing with deleted local buffer argument')
     raw_args = [arg.c_buffer for arg in arguments]
-    output_buffer = self._backend.execute(self._c_computation, raw_args)
-    return LocalBuffer(output_buffer, backend=self._backend, replica=0)
+    output_buffer = self._backend.execute(self._c_executable, raw_args)
+    return LocalBuffer(
+        output_buffer, backend=self._backend, device=self._device_ordinals[0])
 
   def ExecutePerReplica(self, arguments=None):
     """Execute on many replicas with LocalBuffer arguments and return value.
@@ -701,14 +802,12 @@
         sequence comprises the arguments for execution on the i'th replica.
 
     Returns:
-      A list of the computation's outputs on each replica, as a LocalBuffer. If
+      A list of the computation's outputs for each replica, as a LocalBuffer. If
       a shallow sequence of arguments was passed in for `arguments`, then the
       sole, zero'th replica's output is returned instead, as a LocalBuffer.
     """
-    if not self._is_compiled:
-      raise ValueError('Cannot execute an uncompiled local XLA computation.')
     if arguments is None:
-      arguments = ((),) * get_replica_count()
+      arguments = ((),) * len(self._device_ordinals)
     else:
       arguments = [list(replica_args) for replica_args in arguments]
 
@@ -717,30 +816,35 @@
       for arg in replica_args:
         if arg.is_deleted():
           raise ValueError('Executing with deleted local buffer argument')
-        if arg.replica() != replica:
+        if arg.device() != self._device_ordinals[replica]:
           raise ValueError(
-              'Executing on replica {} with argument from replica {}'.format(
-                  replica, arg.replica()))
+              'Executing on device {} with argument from device {}'.format(
+                  self._device_ordinals[replica], arg.device()))
 
     # Pull out argument buffer handles
+    # pylint: disable=g-complex-comprehension
     stripped_args = [
         [arg.c_buffer for arg in replica_args] for replica_args in arguments
     ]
 
     # Execute
-    output_buffers = self._backend.execute_replicated(
-        self._c_computation, stripped_args)
+    output_buffers = self._backend.execute_replicated(self._c_executable,
+                                                      stripped_args)
 
     # Wrap output handles in LocalBuffer instances
     return tuple(
-        LocalBuffer(output_buffer, backend=self._backend, replica=replica)
+        LocalBuffer(
+            output_buffer,
+            backend=self._backend,
+            device=self._device_ordinals[replica])
         for replica, output_buffer in enumerate(output_buffers))
 
   def ExecuteWithPythonValues(self, arguments=()):
     """Execute on one replica with Python values as arguments and output."""
 
     def put(arg):
-      return LocalBuffer.from_pyval(arg, backend=self._backend)
+      return LocalBuffer.from_pyval(
+          arg, device=self._device_ordinals[0], backend=self._backend)
 
     arguments = [put(arg) for arg in arguments]
     return self.Execute(arguments).to_py()
@@ -748,21 +852,19 @@
   def ExecuteWithPythonValuesPerReplica(self, arguments):
     """Execute on many replicas with Python values as arguments and output."""
 
-    def put(arg, replica):
-      return LocalBuffer.from_pyval(arg, replica, backend=self._backend)
+    def put(arg, device):
+      return LocalBuffer.from_pyval(arg, device, backend=self._backend)
 
-    arguments = [[put(arg, replica)
-                  for arg in replica_args]
-                 for replica, replica_args in enumerate(arguments)]
+    # pylint: disable=g-complex-comprehension
+    arguments = [[
+        put(arg, self._device_ordinals[replica]) for arg in replica_args
+    ] for replica, replica_args in enumerate(arguments)]
     return [out.to_py() for out in self.ExecutePerReplica(arguments)]
 
   def __del__(self):
-    # Ensure a reference to C-based destructor for use in __del__.
-    if self._is_compiled:
-      self._backend.delete_executable(self._c_computation)
-    else:
-      assert isinstance(self._c_computation, c_api.LocalComputation)
-      c_api.DeleteLocalComputation(self._c_computation)
+    # Python may have freed c_api first.
+    if c_api and self._c_executable:
+      self._backend.delete_executable(self._c_executable)
 
 
 def _make_replica_group_proto(replica_group):
@@ -775,8 +877,8 @@
   """XLA computation builder.
 
   Enqueues XLA ops in sequence and in order to build a
-  LocalComputation, which in turn can be compiled into a
-  CompiledLocalComputation, which in turn can be locally executed.
+  Computation, which in turn can be compiled into a
+  LocalExecutable, which in turn can be locally executed.
   """
 
   # The methods of this class map 1-to-1 onto the XLA C++
@@ -787,16 +889,23 @@
   # pylint: disable=g-doc-args
 
   def __init__(self, name):
-    self._client = c_api.LocalComputationBuilder(name.encode('utf8'))
+    self._client = c_api.ComputationBuilder(name.encode('utf8'))
     self._parameter_numbering = itertools.count()
 
-  def Build(self, root=None, backend=XLA_LOCAL_BACKEND):
+  def Build(self, root=None, backend=None):
+    """Builds a `Computation` from the contents of the builder.
+
+    Args:
+      root: if not None, the operator containing the return value of the
+        computation.
+      backend: deprecated. Pass a `backend` to `Computation.Compile` instead.
+    Returns:
+      A `Computation`.
+    """
     if root is not None:
-      return LocalComputation(
-          self._client.BuildWithRoot(root), is_compiled=False, backend=backend)
+      return Computation(self._client.BuildWithRoot(root), backend=backend)
     else:
-      return LocalComputation(
-          self._client.Build(), is_compiled=False, backend=backend)
+      return Computation(self._client.Build(), backend=backend)
 
   def SetOpMetadata(self, op_metadata):
     """Set metadata for operations that are about to be enqueued."""
@@ -1448,7 +1557,7 @@
 
     Args:
       operand: a LocalOp to test.
-    Returns: a LocalComputation that is rooted on the given `operand` which is a
+    Returns: a Computation that is rooted on the given `operand` which is a
       compile-time constant.
     """
     return self._client.BuildConstantSubGraph(operand)
@@ -1622,8 +1731,14 @@
                       conjugate_a=False,
                       unit_diagonal=False):
     """Enqueues a triangular-solve operation onto the computation."""
-    return self._client.TriangularSolve(a, b, left_side, lower, transpose_a,
-                                        conjugate_a, unit_diagonal)
+    if not transpose_a:
+      transpose = 1
+      if conjugate_a:
+        a = self.Conj(a)
+    else:
+      transpose = 3 if conjugate_a else 2
+    return self._client.TriangularSolve(a, b, left_side, lower, unit_diagonal,
+                                        transpose)
 
   def Gather(self, a, start_indices, dimension_numbers, slice_sizes):
     """Enqueues a Gather operation onto the computation."""
@@ -1643,7 +1758,7 @@
 
   Set up methods, corresponding to unary and binary XLA operations,
   whose calls are forwarded in a boilerplate manner to the underlying
-  LocalComputationBuilder C-extension API.
+  ComputationBuilder C-extension API.
   """
 
   def forward_to_local_builder_with_handles(target_method, is_binop=False):
@@ -1663,13 +1778,13 @@
 
   for method_name in _UNARY_OPS:
     forward = forward_to_local_builder_with_handles(
-        getattr(c_api.LocalComputationBuilder, method_name))
+        getattr(c_api.ComputationBuilder, method_name))
     forward.__name__ = method_name
     setattr(ComputationBuilder, method_name, forward)
 
   for method_name in _BINARY_OPS:
     forward = forward_to_local_builder_with_handles(
-        getattr(c_api.LocalComputationBuilder, method_name), is_binop=True)
+        getattr(c_api.ComputationBuilder, method_name), is_binop=True)
     forward.__name__ = method_name
     setattr(ComputationBuilder, method_name, forward)
 
@@ -1677,8 +1792,14 @@
 _forward_methods_to_local_builder()
 
 
+_default_replica_count = 1
+
+
 def initialize_replica_count(replica_count):
-  """Initializes the desired replica count to use on XLA service init.
+  """Initializes the default replica count to use.
+
+  Deprecated; pass `num_replicas` as an option to `Computation.Compile()`
+  instead.
 
   Args:
     replica_count: number of replicas that are desired for set up during XLA
@@ -1687,31 +1808,30 @@
   Raises:
     A runtime exception if the XLA service has already been initialized.
   """
-  c_api.InitializeReplicaCount(replica_count)
-
-
-def initialize_platform_name(platform_name):
-  """Initializes the desired platform name to use on XLA service init.
-
-  Args:
-    platform_name: string name of platform.
-
-  Raises:
-    A runtime exception if the XLA service has already been initialized.
-    A runtime exception if the platform does not exist, or there are no devices
-    with that platform.
-  """
-  platform_name = _maybe_encode_string(platform_name)
-  c_api.InitializePlatformName(platform_name)
+  global _default_replica_count
+  _default_replica_count = replica_count
 
 
 def get_replica_count():
-  """Returns the current replica count used for the XLA service.
+  """Returns the default replica count.
 
-  Note: this will return a value whether the XLA service has been initialized
-  yet or not.
+  Deprecated; pass `num_replicas` as an option to `Computation.Compile()`
+  instead.
   """
-  return c_api.GetReplicaCount()
+  return _default_replica_count
+
+
+def initialize_platform_name(platform_name):
+  """Initializes the default platform name to use for XLA.
+
+  Args:
+    platform_name: string name of platform.
+  """
+  global _default_platform_name
+  _default_platform_name = platform_name
+
+  # Make sure the platform is valid by trying to instantiate it.
+  _get_default_local_backend()
 
 
 def register_cpu_custom_call_target(name, fn):

diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
index aa38c06..51ef7d7 100644
--- a/tensorflow/compiler/xla/python/xla_client_test.py
+++ b/tensorflow/compiler/xla/python/xla_client_test.py

@@ -29,7 +29,7 @@
 import unittest
 
 
-class LocalComputationTest(unittest.TestCase):
+class ComputationTest(unittest.TestCase):
   """Base class for running an XLA Computation through the local client."""
 
   def _NewComputation(self, name=None):
@@ -85,7 +85,27 @@
   return np.array(*args, dtype=np.bool, **kwargs)
 
 
-class ComputationsWithConstantsTest(LocalComputationTest):
+class ComputationPrinting(unittest.TestCase):
+
+  def ExampleComputation(self):
+    builder = xla_client.ComputationBuilder("acomputation")
+    p0 = builder.ParameterFromNumpy(np.float32(0))
+    p1 = builder.ParameterFromNumpy(np.zeros((4,), np.float32))
+    builder.Mul(p0, p1)
+    return builder.Build()
+
+  def testComputationToHloText(self):
+    computation = self.ExampleComputation()
+    hlo_text = computation.GetHloText()
+    self.assertTrue(hlo_text.startswith("HloModule acomputation"))
+
+  def testComputationToHloGraph(self):
+    computation = self.ExampleComputation()
+    hlo_dot_graph = computation.GetHloDotGraph()
+    self.assertTrue(hlo_dot_graph.startswith("digraph "))
+
+
+class ComputationsWithConstantsTest(ComputationTest):
   """Tests focusing on Constant ops."""
 
   def testConstantScalarSumS8(self):
@@ -304,7 +324,7 @@
     self._ExecuteAndCompareClose(c, expected=0.75)
 
 
-class ParametersTest(LocalComputationTest):
+class ParametersTest(ComputationTest):
   """Tests focusing on Parameter ops and argument-passing."""
 
   def setUp(self):
@@ -384,7 +404,7 @@
         expected=[-4.3, 1.3, -6.3, 3.3])
 
 
-class LocalBufferTest(LocalComputationTest):
+class LocalBufferTest(ComputationTest):
   """Tests focusing on execution with LocalBuffers."""
 
   def _Execute(self, c, arguments):
@@ -482,7 +502,7 @@
     self.assertEqual(np.dtype(xla_shape.element_type()), np.dtype(np.float32))
 
 
-class SingleOpTest(LocalComputationTest):
+class SingleOpTest(ComputationTest):
   """Tests for single ops.
 
   The goal here is smoke testing - to exercise the most basic functionality of
@@ -757,6 +777,12 @@
     c.Not(c.Constant(arr))
     self._ExecuteAndCompareClose(c, expected=~arr)
 
+  def testCountLeadingZeros(self):
+    c = self._NewComputation()
+    arr = NumpyArrayS32([0x7FFF, 0x12345678])
+    c.Clz(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=[17, 3])
+
   def testExp(self):
     c = self._NewComputation()
     arr = NumpyArrayF32([3.3, 12.1])
@@ -1175,7 +1201,7 @@
     np.testing.assert_allclose(g, expected, rtol=1e-4)
 
 
-class EmbeddedComputationsTest(LocalComputationTest):
+class EmbeddedComputationsTest(ComputationTest):
   """Tests for XLA graphs with embedded computations (such as maps)."""
 
   def _CreateConstantS32Computation(self):
@@ -1639,7 +1665,7 @@
     self._ExecuteAndCompareClose(c, expected=expected)
 
 
-class ErrorTest(LocalComputationTest):
+class ErrorTest(ComputationTest):
 
   def setUp(self):
     self.f32_scalar_2 = NumpyArrayF32(2.0)
@@ -1656,7 +1682,7 @@
         lambda: c.Build().CompileWithExampleArguments([self.f32_scalar_2]))
 
 
-class ComputationRootTest(LocalComputationTest):
+class ComputationRootTest(ComputationTest):
   """Tests related to setting the root of the computation."""
 
   def testComputationRootDifferentFromLastOp(self):

diff --git a/tensorflow/compiler/xla/python/xla_data.i b/tensorflow/compiler/xla/python/xla_data.i
new file mode 100644
index 0000000..974f314
--- /dev/null
+++ b/tensorflow/compiler/xla/python/xla_data.i

@@ -0,0 +1,654 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// SWIG typemaps and declarations for building, compiling, and
+// executing XLA computations, wrapping most of what is declared in
+// xla_data.h.
+//
+// The typemaps below implement/assert the following correspondences
+// (with elaborations below):
+//
+//    C++                                  Python
+// -------------------------------------+---------------------------------------
+//  Span<int64>                        <-  sequence of int
+//  vector<int>                        ->  sequence of int
+//  Span<LocalOp>                      <-  sequence of LocalOp
+//  Literal                            <-> (nested tuple of) numpy ndarray
+//  std::vector<Literal>               <-  sequence of (nested tuple of) ndarray
+//  Shape                               -> pair holding (dtype, dimensions)
+//                                     <-  object duck-typed as xla_client.Shape
+//  ProgramShape                       ->  pair of ([arg_shapes], ret_shape)
+//  std::vector<Shape>                 <-  sequence of xla_client.Shape objects
+//  PrimitiveType                      <-  int
+//  Span<pair<int64, in64>>            <-  sequence of int pairs
+//  PaddingConfig proto                <-  corresponding Python proto
+//  ConvolutionDimensionNumbers proto  <-  corresponding Python proto
+//  DotDimensionNumbers proto          <-  corresponding Python proto
+//  GatherDimensionNumbers proto       <-  corresponding Python proto
+//  ScatterDimensionNumbers proto      <-  corresponding Python proto
+//  Span<ReplicaGroup proto>           <-  sequence of ReplicaGroup Python proto
+//
+// Arrows indicate whether a conversion only ever occurs in one
+// direction, or whether it is maintained bidirectionally.
+//
+// The Python objects corresponding to C++ Literals have the type:
+//
+//   T = ndarray | (T, ...)
+//
+// where a terminal numpy ndarray translates to a Literal with a
+// non-tuple Shape, an XLA primitive element type corresponding to the
+// ndarray's dtype. Meanwhile, a non-terminal "tuple of T" translates
+// to a tuple-shaped Literal whose tuple components are translated
+// recursively. For example, if x is a numpy ndarray in Python, with
+// shape (2, 3) and dtype of dtype('float32'), then x translates to a
+// Literal with rank 2, dimension 2 and 3, and XLA primitive type
+// F32. Meanwhile,
+//
+//   (x, (x, x), (x,)),
+//
+// translates to a tuple-shaped XLA Literal, whose component subshapes
+// are a 2x3 F32-shaped literal followed by two tuple-shaped literals.
+//
+// Shapes output by C++ become Python objects with the type:
+//
+//   T            = (dtype, S)
+//   S            = DIMENSIONS | TUPLE_SHAPES
+//   DIMENSIONS   = (int, ...)
+//   TUPLE_SHAPES = (T, ...)
+//
+// In the pair described by the T rule, the terminal dtype determines
+// whether S expands as DIMENSIONS or TUPLE_SHAPES. Namely if it is
+// dtype('O'), numpy's object dtype, the structure represents a tuple
+// shape and the expansion of the non-terminal S is
+// TUPLE_SHAPES. Otherwise, dtype describes a primitive element type
+// and S expands into DIMENSIONS giving dimension sizes. For example:
+//
+//   (dtype('float32'), (3, 5, 7))
+//
+// describes a 3x5x7 array of F32s, and
+//
+//   (dtype('O'), ((dtype('float32'), (2, 3)),
+//                 (dtype('float64'), (4, 5))))
+//
+// describes a tuple shape with two subshapes: the first a 2x3 F32,
+// and the other a 4x5 F64.
+//
+// The Python int corresponding to a PrimitiveType enum must be valid
+// per xla_data.proto (e.g. xla_data.PRED, xla_data.F32).
+//
+// The SWIG object wrappers generated by this file are not intended
+// for end use, but rather for internal use in the Python XLA client,
+// xla_client.py.
+//
+// One central reason for the Python-side indirection is that the
+// Python-side objects produced by the typemaps in this file are
+// further packaged up by xla_client before being passed on. For
+// instance, the Python pair produced for a C++ Shape is further
+// wrapped in a Python class (xla_client.Shape) so as not to expose
+// the raw pair externally.
+//
+// Other SWIG object wrappers (e.g. of Computation) are further
+// wrapped by xla_client in order to set up a custom destructor that
+// triggers memory deallocation on the C++ side.
+
+%module(threads="1") xla_data
+
+// Keep the GIL except where explicitly specified.
+%nothread;
+
+%include "tensorflow/python/platform/base.i"
+
+%{
+// Must be included first
+#include "tensorflow/python/lib/core/numpy.h"
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "absl/types/span.h"
+#include "tensorflow/compiler/xla/python/numpy_bridge.h"
+
+using namespace xla;
+using namespace xla::swig;
+
+%}
+
+// Basic types
+
+
+%typemap(out) std::vector<int> {
+  PyObject* out = PyList_New($1.size());
+  for (int i = 0; i < $1.size(); ++i) {
+    PyList_SET_ITEM(out, i, PyInt_FromLong($1[i]));
+  }
+  $result = out;
+}
+
+%typemap(out) StatusOr<bool> {
+  if ($1.ok()) {
+    $result = PyBool_FromLong($1.ConsumeValueOrDie());
+  } else {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    SWIG_fail;
+  }
+}
+
+%typemap(out) StatusOr<string> {
+  if ($1.ok()) {
+    $result = PyString_FromString($1.ConsumeValueOrDie().c_str());
+  } else {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    SWIG_fail;
+  }
+}
+
+%typemap(out) Status {
+  if (!$1.ok()) {
+    PyErr_SetString(
+        PyExc_RuntimeError, $1.ToString().c_str());
+    SWIG_fail;
+  }
+  Py_INCREF(Py_None);
+  $result = Py_None;
+}
+
+%typemap(in) absl::Span<const int64>
+    (std::vector<int64> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    SWIG_fail;
+  }
+  const int size = PySequence_Size($input);
+  temps.resize(size);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    PyObject* py_int = numpy::PyNumberToPyInt(o);
+    if (!py_int) {
+      PyErr_SetString(
+          PyExc_TypeError,
+          "Argument sequence element cannot be converted to int");
+      Py_DECREF(o);
+      SWIG_fail;
+    }
+    temps[i] = numpy::PyIntOrPyLongToLong(py_int);
+    if (temps[i] == -1 && PyErr_Occurred()) {
+      Py_DECREF(py_int);
+      Py_DECREF(o);
+      SWIG_fail;
+    }
+    Py_DECREF(py_int);
+    Py_DECREF(o);
+  }
+  $1 = temps;
+}
+
+// Literal
+
+%typemap(in) const Literal& (StatusOr<Literal> literal_status) {
+  literal_status = numpy::XlaLiteralFromPyObject($input);
+  if (!literal_status.ok()) {
+    PyErr_SetString(PyExc_RuntimeError, literal_status.status().ToString().c_str());
+    SWIG_fail;
+  }
+  $1 = &literal_status.ValueOrDie();
+}
+
+%typemap(out) Literal (StatusOr<numpy::Safe_PyObjectPtr> obj_status) {
+  obj_status = numpy::PyObjectFromXlaLiteral(*$1);
+  if (!obj_status.ok()) {
+    PyErr_SetString(PyExc_RuntimeError, obj_status.status().ToString().c_str());
+    SWIG_fail;
+  }
+  $result = obj_status.ValueOrDie().release();
+}
+
+%typemap(out) StatusOr<Literal> (StatusOr<numpy::Safe_PyObjectPtr> obj_status) {
+  if (!$1.ok()) {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    SWIG_fail;
+  }
+  obj_status = numpy::PyObjectFromXlaLiteral($1.ValueOrDie());
+  if (!obj_status.ok()) {
+    PyErr_SetString(PyExc_RuntimeError, obj_status.status().ToString().c_str());
+    SWIG_fail;
+  }
+  $result = obj_status.ValueOrDie().release();
+}
+
+%typemap(in) const std::vector<Literal>& (std::vector<Literal> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    SWIG_fail;
+  }
+  const int size = PySequence_Size($input);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    StatusOr<Literal> literal_status = numpy::XlaLiteralFromPyObject(o);
+    if (!literal_status.ok()) {
+      PyErr_SetString(PyExc_RuntimeError, literal_status.status().ToString().c_str());
+      Py_DECREF(o);
+      SWIG_fail;
+    }
+    temps.push_back(literal_status.ConsumeValueOrDie());
+    Py_DECREF(o);
+  }
+  $1 = &temps;
+}
+
+// OpMetadata
+
+%typemap(in) const OpMetadata& (OpMetadata temp) {
+  StatusOr<OpMetadata> statusor = numpy::OpMetadataFromPyObject($input);
+  if (!statusor.ok()) {
+    PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
+    SWIG_fail;
+  }
+  temp = std::move(statusor).ValueOrDie();
+  $1 = &temp;
+}
+
+// Shape
+
+%typemap(out) const Shape& {
+  $result = numpy::PyShapeInfoFromXlaShape(*$1).release();
+}
+
+%typemap(out) StatusOr<Shape> {
+  if ($1.ok()) {
+    $result = numpy::PyShapeInfoFromXlaShape($1.ConsumeValueOrDie()).release();
+  } else {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    SWIG_fail;
+  }
+}
+
+
+%typemap(out) StatusOr<ProgramShape> {
+  if ($1.ok()) {
+    $result = numpy::PyProgramShapeInfoFromXlaProgramShape(
+        $1.ConsumeValueOrDie()).release();
+  } else {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    SWIG_fail;
+  }
+}
+
+
+%typemap(in) const Shape& (Shape temp) {
+  StatusOr<Shape> statusor = numpy::XlaShapeFromPyShape($input);
+  if (!statusor.ok()) {
+    PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
+    SWIG_fail;
+  }
+  temp = std::move(statusor).ValueOrDie();
+  $1 = &temp;
+}
+
+%typemap(in) const absl::optional<Shape>& (
+    absl::optional<Shape> temp) {
+  if ($input == Py_None) {
+    temp = absl::nullopt;
+    $1 = &temp;
+  } else {
+    StatusOr<Shape> statusor = numpy::XlaShapeFromPyShape($input);
+    if (!statusor.ok()) {
+      PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
+      SWIG_fail;
+    }
+    temp = std::move(statusor).ValueOrDie();
+    $1 = &temp;
+  }
+}
+
+%typemap(out) std::unique_ptr<Shape> {
+  $result = numpy::PyShapeInfoFromXlaShape(*$1).release();
+}
+
+%typemap(in) const std::vector<Shape>& (std::vector<Shape> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    SWIG_fail;
+  }
+  const int size = PySequence_Size($input);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    StatusOr<Shape> statusor = numpy::XlaShapeFromPyShape(o);
+    Py_DECREF(o);
+    if (!statusor.ok()) {
+      PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
+      SWIG_fail;
+    }
+    temps.push_back(statusor.ConsumeValueOrDie());
+  }
+  $1 = &temps;
+}
+
+%typemap(in) const std::vector<absl::optional<Shape> >& (
+    std::vector<absl::optional<Shape> > temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    SWIG_fail;
+  }
+  const int size = PySequence_Size($input);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    if (o == Py_None) {
+      temps.push_back(absl::nullopt);
+    } else {
+      StatusOr<Shape> statusor = numpy::XlaShapeFromPyShape(o);
+      Py_DECREF(o);
+      if (!statusor.ok()) {
+        PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
+        SWIG_fail;
+      }
+      temps.push_back(statusor.ConsumeValueOrDie());
+    }
+  }
+  $1 = &temps;
+}
+
+// PrimitiveType
+
+%typemap(in) PrimitiveType {
+  PyObject* py_int = numpy::PyNumberToPyInt($input);
+  if (!py_int) {
+    PyErr_SetString(PyExc_TypeError, "Argument cannot be converted to int");
+    SWIG_fail;
+  }
+  const long value = numpy::PyIntOrPyLongToLong(py_int);
+  if (value == -1 && PyErr_Occurred()) {
+    Py_DECREF(py_int);
+    SWIG_fail;
+  }
+  if (!PrimitiveType_IsValid(value)) {
+    PyErr_SetString(
+        PyExc_TypeError, "Argument not valid for PrimitiveType enum");
+    Py_DECREF(py_int);
+    SWIG_fail;
+  }
+  $1 = static_cast<PrimitiveType>(value);
+}
+
+// Span<pair<int64, in64>>
+
+%typemap(in) absl::Span<const std::pair<int64, int64> >
+    (std::vector<std::pair<int64, int64> > temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    SWIG_fail;
+  }
+  const int size = PySequence_Size($input);
+  temps.reserve(size);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    if (!o) {
+      SWIG_fail;
+    }
+    PyObject* first = PyTuple_GetItem(o, 0);
+    if (!first) {
+      Py_DECREF(o);
+      SWIG_fail;
+    }
+    PyObject* first_pyint = numpy::PyNumberToPyInt(first);
+    if (!first_pyint) {
+      PyErr_SetString(
+          PyExc_TypeError,
+          "First pair item cannot be converted to int");
+      Py_DECREF(o);
+      SWIG_fail;
+    }
+    PyObject* second = PyTuple_GetItem(o, 1);
+    if (!second) {
+      Py_DECREF(o);
+      Py_DECREF(first_pyint);
+      SWIG_fail;
+    }
+    PyObject* second_pyint = numpy::PyNumberToPyInt(second);
+    if (!second_pyint) {
+      PyErr_SetString(
+          PyExc_TypeError,
+          "Second pair item cannot be converted to int");
+      Py_DECREF(o);
+      Py_DECREF(first_pyint);
+      SWIG_fail;
+    }
+    const int64 first_value = numpy::PyIntOrPyLongToLong(first_pyint);
+    if (first_value == -1 && PyErr_Occurred()) {
+      Py_DECREF(o);
+      Py_DECREF(first_pyint);
+      Py_DECREF(second_pyint);
+      SWIG_fail;
+    }
+    const int64 second_value = numpy::PyIntOrPyLongToLong(second_pyint);
+    if (second_value == -1 && PyErr_Occurred()) {
+      Py_DECREF(o);
+      Py_DECREF(first_pyint);
+      Py_DECREF(second_pyint);
+      SWIG_fail;
+    }
+    temps.push_back(std::make_pair(first_value, second_value));
+    Py_DECREF(o);
+  }
+  $1 = temps;
+}
+
+// DotDimensionNumbers
+
+%typemap(in) const DotDimensionNumbers&
+    (DotDimensionNumbers dimension_numbers) {
+  if (!HandleRepeatedInt64Attribute(
+        $input, "lhs_contracting_dimensions",
+        dimension_numbers.mutable_lhs_contracting_dimensions())) {
+    SWIG_fail;
+  }
+  if (!HandleRepeatedInt64Attribute(
+        $input, "rhs_contracting_dimensions",
+        dimension_numbers.mutable_rhs_contracting_dimensions())) {
+    SWIG_fail;
+  }
+  if (!HandleRepeatedInt64Attribute(
+        $input, "lhs_batch_dimensions",
+        dimension_numbers.mutable_lhs_batch_dimensions())) {
+    SWIG_fail;
+  }
+  if (!HandleRepeatedInt64Attribute(
+        $input, "rhs_batch_dimensions",
+        dimension_numbers.mutable_rhs_batch_dimensions())) {
+    SWIG_fail;
+  }
+
+  $1 = &dimension_numbers;
+}
+
+// PaddingConfig
+
+%typemap(in) const PaddingConfig&
+    (PaddingConfig padding_config) {
+  PyObject* dimensions = PyObject_GetAttrString($input, "dimensions");
+  if (!dimensions) {
+    SWIG_fail;
+  }
+
+  int length = PySequence_Size(dimensions);
+  if (length == -1) {
+    Py_DECREF(dimensions);
+    SWIG_fail;
+  }
+
+  for (int i = 0; i < length; ++i) {
+    PyObject* item = PySequence_GetItem(dimensions, i);
+    if (!item) {
+      Py_DECREF(dimensions);
+      SWIG_fail;
+    }
+    int64 edge_padding_low, edge_padding_high, interior_padding;
+    if (!GetIntAttr(item, "edge_padding_low", &edge_padding_low)
+        || !GetIntAttr(item, "edge_padding_high", &edge_padding_high)
+        || !GetIntAttr(item, "interior_padding", &interior_padding)) {
+      Py_DECREF(item);
+      Py_DECREF(dimensions);
+      SWIG_fail;
+    }
+    Py_DECREF(item);
+
+    PaddingConfig::PaddingConfigDimension* dimension =
+        padding_config.add_dimensions();
+    dimension->set_edge_padding_low(edge_padding_low);
+    dimension->set_edge_padding_high(edge_padding_high);
+    dimension->set_interior_padding(interior_padding);
+  }
+  Py_DECREF(dimensions);
+
+  $1 = &padding_config;
+}
+
+// ConvolutionDimensionNumbers
+
+%typemap(in) const ConvolutionDimensionNumbers&
+    (ConvolutionDimensionNumbers dimension_numbers) {
+  int64 value;
+
+  if (!GetIntAttr($input, "input_batch_dimension", &value)) {
+    SWIG_fail;
+  }
+  dimension_numbers.set_input_batch_dimension(value);
+
+  if (!GetIntAttr($input, "input_feature_dimension", &value)) {
+    SWIG_fail;
+  }
+  dimension_numbers.set_input_feature_dimension(value);
+
+  if (!GetIntAttr($input, "output_batch_dimension", &value)) {
+    SWIG_fail;
+  }
+  dimension_numbers.set_output_batch_dimension(value);
+
+  if (!GetIntAttr($input, "output_feature_dimension", &value)) {
+    SWIG_fail;
+  }
+  dimension_numbers.set_output_feature_dimension(value);
+
+  if (!GetIntAttr($input, "kernel_output_feature_dimension", &value)) {
+    SWIG_fail;
+  }
+  dimension_numbers.set_kernel_output_feature_dimension(value);
+
+  if (!GetIntAttr($input, "kernel_input_feature_dimension", &value)) {
+    SWIG_fail;
+  }
+  dimension_numbers.set_kernel_input_feature_dimension(value);
+
+  if (!HandleRepeatedInt64Attribute(
+        $input, "input_spatial_dimensions",
+        dimension_numbers.mutable_input_spatial_dimensions())) {
+    SWIG_fail;
+  }
+  if (!HandleRepeatedInt64Attribute(
+        $input, "kernel_spatial_dimensions",
+        dimension_numbers.mutable_kernel_spatial_dimensions())) {
+    SWIG_fail;
+  }
+  if (!HandleRepeatedInt64Attribute(
+        $input, "output_spatial_dimensions",
+        dimension_numbers.mutable_output_spatial_dimensions())) {
+    SWIG_fail;
+  }
+
+  $1 = &dimension_numbers;
+}
+
+// GatherDimensionNumbers
+
+%typemap(in) const GatherDimensionNumbers&
+    (GatherDimensionNumbers dimension_numbers) {
+  if (!HandleRepeatedInt64Attribute(
+        $input, "offset_dims",
+        dimension_numbers.mutable_offset_dims())) {
+    SWIG_fail;
+  }
+  if (!HandleRepeatedInt64Attribute(
+        $input, "collapsed_slice_dims",
+        dimension_numbers.mutable_collapsed_slice_dims())) {
+    SWIG_fail;
+  }
+  if (!HandleRepeatedInt64Attribute(
+        $input, "start_index_map",
+        dimension_numbers.mutable_start_index_map())) {
+    SWIG_fail;
+  }
+
+  int64 value;
+  if (!GetIntAttr($input, "index_vector_dim", &value)) {
+    SWIG_fail;
+  }
+  dimension_numbers.set_index_vector_dim(value);
+
+  $1 = &dimension_numbers;
+}
+
+// ScatterDimensionNumbers
+
+%typemap(in) const ScatterDimensionNumbers&
+    (ScatterDimensionNumbers dimension_numbers) {
+  if (!HandleRepeatedInt64Attribute(
+        $input, "update_window_dims",
+        dimension_numbers.mutable_update_window_dims())) {
+    SWIG_fail;
+  }
+  if (!HandleRepeatedInt64Attribute(
+        $input, "inserted_window_dims",
+        dimension_numbers.mutable_inserted_window_dims())) {
+    SWIG_fail;
+  }
+  if (!HandleRepeatedInt64Attribute(
+        $input, "scatter_dims_to_operand_dims",
+        dimension_numbers.mutable_scatter_dims_to_operand_dims())) {
+    SWIG_fail;
+  }
+
+  int64 value;
+  if (!GetIntAttr($input, "index_vector_dim", &value)) {
+    SWIG_fail;
+  }
+  dimension_numbers.set_index_vector_dim(value);
+
+  $1 = &dimension_numbers;
+}
+
+// Span<const ReplicaGroup>
+
+%typemap(in) absl::Span<const ReplicaGroup >
+    (std::vector<ReplicaGroup > temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    SWIG_fail;
+  }
+  const int size = PySequence_Size($input);
+  temps.reserve(size);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    ReplicaGroup rgrp;
+    if (!HandleRepeatedInt64Attribute(
+            o, "replica_ids",
+            rgrp.mutable_replica_ids())) {
+        SWIG_fail;
+    }
+    temps.push_back(rgrp);
+    Py_DECREF(o);
+  }
+  $1 = temps;
+}

diff --git a/tensorflow/compiler/xla/python/xrt.cc b/tensorflow/compiler/xla/python/xrt.cc
new file mode 100644
index 0000000..2c55abc
--- /dev/null
+++ b/tensorflow/compiler/xla/python/xrt.cc

@@ -0,0 +1,297 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/python/xrt.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "absl/memory/memory.h"
+#include "tensorflow/cc/client/client_session.h"
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/compiler/xrt/cc/ops/xrt_compile_ops.h"
+#include "tensorflow/compiler/xrt/cc/ops/xrt_execute_op.h"
+#include "tensorflow/compiler/xrt/cc/ops/xrt_state_ops.h"
+#include "tensorflow/compiler/xrt/xrt.pb.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace swig {
+
+XrtAllocation::XrtAllocation(int64 handle, Shape shape,
+                             const string& session_target)
+    : handle_(handle), shape_(shape), session_target_(session_target) {}
+
+XrtAllocation::~XrtAllocation() {
+  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
+  auto allocation_handle =
+      tensorflow::ops::Placeholder(root, tensorflow::DT_INT64);
+  auto release =
+      tensorflow::ops::XRTReleaseAllocationHandle(root, allocation_handle);
+  if (!root.status().ok()) {
+    LOG(ERROR) << root.status();
+    return;
+  }
+
+  tensorflow::ClientSession session(root, session_target_);
+  tensorflow::ClientSession::FeedType inputs;
+  inputs.insert({allocation_handle, handle()});
+  std::vector<tensorflow::Tensor> outputs;
+  auto status = session.Run(inputs, {}, {release}, &outputs);
+  if (!status.ok()) {
+    LOG(ERROR) << status;
+    return;
+  }
+}
+
+/* static */
+StatusOr<XrtAllocation*> XrtAllocation::FromLiteral(
+    const Literal& argument, const string& session_target) {
+  xrt::XLAAllocation alloc;
+  *alloc.mutable_value() = argument.ToProto();
+
+  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
+  auto literal_string =
+      tensorflow::ops::Placeholder(root, tensorflow::DT_STRING);
+  auto literal_handle = tensorflow::ops::XRTAllocate(root, literal_string);
+  TF_RETURN_IF_ERROR(root.status());
+
+  tensorflow::ClientSession session(root, session_target);
+  tensorflow::ClientSession::FeedType inputs;
+  inputs.insert({literal_string, alloc.SerializeAsString()});
+  std::vector<tensorflow::Tensor> outputs;
+  TF_RETURN_IF_ERROR(session.Run(inputs, {literal_handle}, &outputs));
+
+  int64 handle = outputs[0].scalar<int64>()();
+  return new XrtAllocation(handle, argument.shape(), session_target);
+}
+
+const int64 XrtAllocation::handle() const { return handle_; }
+
+const Shape& XrtAllocation::shape() const { return shape_; }
+
+StatusOr<Literal> XrtAllocation::ToLiteral() const {
+  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
+  auto allocation_handle =
+      tensorflow::ops::Placeholder(root, tensorflow::DT_INT64);
+  auto read_literal = tensorflow::ops::XRTReadLiteral(root, allocation_handle);
+  TF_RETURN_IF_ERROR(root.status());
+
+  tensorflow::ClientSession session(root, session_target_);
+  tensorflow::ClientSession::FeedType inputs;
+  inputs.insert({allocation_handle, handle()});
+  std::vector<tensorflow::Tensor> outputs;
+  TF_RETURN_IF_ERROR(session.Run(inputs, {read_literal}, &outputs));
+
+  xla::LiteralProto response;
+  TF_RET_CHECK(response.ParseFromString(outputs[0].scalar<string>()()));
+  return Literal::CreateFromProto(response);
+}
+
+XrtAllocationTuple::XrtAllocationTuple(std::vector<XrtAllocation*> elements)
+    : elements_(std::move(elements)) {
+  for (auto* element : elements_) {
+    CHECK(element != nullptr);
+  }
+}
+
+XrtAllocationTuple::~XrtAllocationTuple() {
+  for (XrtAllocation* element : elements_) {
+    if (element != nullptr) {
+      delete element;
+    }
+  }
+}
+
+StatusOr<XrtAllocation*> XrtAllocationTuple::Release(int i) {
+  XrtAllocation* element = elements_[i];
+  if (element == nullptr) {
+    return InvalidArgument("Attempted to release already-released element %d.",
+                           i);
+  }
+  elements_[i] = nullptr;
+  return element;
+}
+
+int64 XrtAllocationTuple::size() const { return elements_.size(); }
+
+StatusOr<XrtExecutable*> XrtExecutable::CompileForXrt(
+    const string& hlo_module_proto, const std::vector<Shape>& argument_shapes,
+    const Shape& result_shape, const string& session_target) {
+  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
+  auto program = tensorflow::ops::Placeholder(root, tensorflow::DT_STRING);
+  auto compile = tensorflow::ops::XRTCompile(root, program);
+  TF_RETURN_IF_ERROR(root.status());
+
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  ProgramShape program_shape;
+  for (auto& shape : argument_shapes) {
+    *program_shape.add_parameters() = shape;
+  }
+  *program_shape.mutable_result() = result_shape;
+
+  LayoutUtil::SetToDefaultLayout(&program_shape);
+  *config->mutable_program_shape() = program_shape.ToProto();
+  c.mutable_hlo_snapshot()
+      ->mutable_hlo()
+      ->mutable_hlo_module()
+      ->ParsePartialFromString(hlo_module_proto);
+
+  tensorflow::ClientSession session(root, session_target);
+  tensorflow::ClientSession::FeedType inputs;
+  inputs.insert({program, c.SerializeAsString()});
+  std::vector<tensorflow::Tensor> outputs;
+  TF_RETURN_IF_ERROR(session.Run(inputs, {compile.handle}, &outputs));
+
+  int64 handle = outputs[0].scalar<int64>()();
+  return new XrtExecutable(program_shape, handle, session_target);
+}
+
+XrtExecutable::XrtExecutable(const ProgramShape& program_shape, int64 handle,
+                             const string& session_target)
+    : program_shape_(program_shape),
+      handle_(handle),
+      session_target_(session_target) {}
+
+XrtExecutable::~XrtExecutable() {
+  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
+  auto computation_handle =
+      tensorflow::ops::Placeholder(root, tensorflow::DT_INT64);
+  auto release =
+      tensorflow::ops::XRTReleaseCompilationHandle(root, computation_handle);
+  if (!root.status().ok()) {
+    LOG(ERROR) << root.status();
+    return;
+  }
+
+  tensorflow::ClientSession session(root, session_target_);
+  tensorflow::ClientSession::FeedType inputs;
+  inputs.insert({computation_handle, handle()});
+  std::vector<tensorflow::Tensor> outputs;
+  auto status = session.Run(inputs, {}, {release}, &outputs);
+  if (!status.ok()) {
+    LOG(ERROR) << status;
+    return;
+  }
+}
+
+StatusOr<XrtAllocation*> XrtExecutable::Execute(
+    absl::Span<XrtAllocation* const> argument_handles) {
+  const int num_expected_arguments = program_shape().parameters().size();
+
+  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
+  std::vector<tensorflow::Output> arguments;
+  arguments.reserve(num_expected_arguments);
+  for (int i = 0; i < num_expected_arguments; ++i) {
+    arguments.push_back(
+        tensorflow::ops::Placeholder(root, tensorflow::DT_INT64));
+  }
+  auto computation_handle =
+      tensorflow::ops::Placeholder(root, tensorflow::DT_INT64);
+  auto execution_config =
+      tensorflow::ops::Placeholder(root, tensorflow::DT_STRING);
+  auto execute = tensorflow::ops::XRTExecute(root, computation_handle,
+                                             execution_config, arguments);
+  TF_RETURN_IF_ERROR(root.status());
+
+  TF_RET_CHECK(argument_handles.size() == arguments.size());
+
+  xrt::XRTExecutionConfig e;
+  e.set_release_input_handles(false);
+  e.set_release_compilation_handle(false);
+
+  tensorflow::ClientSession session(root, session_target_);
+  tensorflow::ClientSession::FeedType inputs;
+  for (int i = 0; i < arguments.size(); ++i) {
+    inputs.insert({arguments[i], argument_handles[i]->handle()});
+  }
+  inputs.insert({computation_handle, handle()});
+  inputs.insert({execution_config, e.SerializeAsString()});
+  std::vector<tensorflow::Tensor> outputs;
+  TF_RETURN_IF_ERROR(session.Run(inputs, {execute}, &outputs));
+
+  int64 output = outputs[0].scalar<int64>()();
+  return new XrtAllocation(output, program_shape().result(), session_target_);
+}
+
+const ProgramShape& XrtExecutable::program_shape() const {
+  return program_shape_;
+}
+
+int64 XrtExecutable::handle() const { return handle_; }
+
+void DeleteXrtAllocation(XrtAllocation* allocation) { delete allocation; }
+
+void DeleteXrtExecutable(XrtExecutable* computation) { delete computation; }
+
+StatusOr<XrtAllocationTuple*> DestructureXrtAllocationTuple(
+    XrtAllocation* allocation, const string& session_target) {
+  const Shape& tuple_shape = allocation->shape();
+
+  if (!tuple_shape.IsTuple()) {
+    return InvalidArgument(
+        "Attemped to destructure a LocalShapedBuffer that did not have a tuple "
+        "shape; shape: %s",
+        ShapeUtil::HumanString(tuple_shape));
+  }
+
+  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
+  auto base_handle = tensorflow::ops::Placeholder(root, tensorflow::DT_INT64);
+  auto shape_index = tensorflow::ops::Placeholder(root, tensorflow::DT_INT32);
+  auto subtuple = tensorflow::ops::XRTSubTuple(root, base_handle, shape_index);
+  TF_RETURN_IF_ERROR(root.status());
+
+  tensorflow::ClientSession session(root, session_target);
+  tensorflow::ClientSession::FeedType inputs;
+  std::vector<XrtAllocation*> results;
+  for (int32 i = 0; i < ShapeUtil::TupleElementCount(tuple_shape); ++i) {
+    inputs.clear();
+    inputs.insert({base_handle, allocation->handle()});
+    inputs.insert({shape_index, {i}});
+    std::vector<tensorflow::Tensor> outputs;
+    auto status = session.Run(inputs, {subtuple}, &outputs);
+    if (!status.ok()) {
+      // Clean up before returning non-ok status.
+      for (int j = 0; j < results.size(); ++j) {
+        delete results[j];
+      }
+      return status;
+    }
+    const int64 subtuple_handle = outputs[0].scalar<int64>()();
+    const Shape& subtuple_shape =
+        ShapeUtil::GetTupleElementShape(tuple_shape, i);
+    results.push_back(
+        new XrtAllocation(subtuple_handle, subtuple_shape, session_target));
+  }
+  return new XrtAllocationTuple(std::move(results));
+}
+
+}  // namespace swig
+}  // namespace xla

diff --git a/tensorflow/compiler/xla/python/xrt.h b/tensorflow/compiler/xla/python/xrt.h
new file mode 100644
index 0000000..dd5bba6
--- /dev/null
+++ b/tensorflow/compiler/xla/python/xrt.h

@@ -0,0 +1,118 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_XRT_H_
+#define TENSORFLOW_COMPILER_XLA_PYTHON_XRT_H_
+
+#include <string>
+#include <vector>
+
+#include "absl/types/span.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/shape.h"
+
+namespace xla {
+namespace swig {
+
+// Represents a reference to literals that live in a device-allocated buffer via
+// XRT. Specifically, wraps an int64 handle produced by running the allocation
+// graph, and an XLA shape to track the referent's shape.
+class XrtAllocation {
+ public:
+  // Accepts a `session_target` argument, used in constructing the
+  // `tensorflow::ClientSession` instance in which allocation and deallocation
+  // graphs are run.
+  static StatusOr<XrtAllocation*> FromLiteral(const Literal& argument,
+                                              const string& session_target);
+
+  XrtAllocation(int64 handle, Shape shape, const string& session_target);
+  ~XrtAllocation();
+  StatusOr<Literal> ToLiteral() const;
+  const Shape& shape() const;
+  const int64 handle() const;
+
+ private:
+  const int64 handle_;
+  const Shape shape_;
+  const string session_target_;
+};
+
+// Result of a tuple destructuring operation on an XrtAllocation.
+class XrtAllocationTuple {
+ public:
+  // Note: any XrtAllocation elements that are not Release()'d will be
+  // deallocated in the destructor.
+  explicit XrtAllocationTuple(std::vector<XrtAllocation*> elements);
+
+  ~XrtAllocationTuple();
+
+  // Releases the ith element to the caller. Further attempts to release the ith
+  // element will return an invalid argument error.
+  StatusOr<XrtAllocation*> Release(int i);
+
+  // Returns the number of elements in the destructured tuple.
+  int64 size() const;
+
+ private:
+  std::vector<XrtAllocation*> elements_;
+};
+
+// Destructures a tuple-valued XrtAllocation into its constitutent elements
+// in XrtAllocationTuple form.
+//
+// Accepts a `session_target` argument, used in constructing the
+// `tensorflow::ClientSession` instance in which the sub-tupling graph is run,
+// and passed along in constructing each constituent XrtAllocation.
+StatusOr<XrtAllocationTuple*> DestructureXrtAllocationTuple(
+    XrtAllocation* allocation, const string& session_target);
+
+// Represents a compiled computation that can be executed given handles to
+// device-allocated literals. Specifically, wraps an XRT computation handle.
+class XrtExecutable {
+ public:
+  // Accepts a `session_target` argument, used in constructing the
+  // `tensorflow::ClientSession` instance in which the compilation graph is run.
+  static StatusOr<XrtExecutable*> CompileForXrt(
+      const string& hlo_module_proto, const std::vector<Shape>& argument_shapes,
+      const Shape& result_shape, const string& session_target);
+
+  // Accepts a `session_target` argument, used in constructing the
+  // `tensorflow::ClientSession` instance in which the execution graph is run.
+  XrtExecutable(const ProgramShape& program_shape, int64 handle,
+                const string& session_target);
+  ~XrtExecutable();
+
+  std::vector<int> DeviceOrdinals() const { return {0}; }
+
+  StatusOr<XrtAllocation*> Execute(
+      absl::Span<XrtAllocation* const> argument_handles);
+
+  const ProgramShape& program_shape() const;
+  int64 handle() const;
+
+ private:
+  const ProgramShape program_shape_;
+  const int64 handle_;
+  const string session_target_;
+};
+
+// Functions for freeing resources from the Python side.
+void DeleteXrtAllocation(XrtAllocation* allocation);
+void DeleteXrtExecutable(XrtExecutable* computation);
+
+}  // namespace swig
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_XRT_H_

diff --git a/tensorflow/compiler/xla/python/xrt.i b/tensorflow/compiler/xla/python/xrt.i
new file mode 100644
index 0000000..456dd7b
--- /dev/null
+++ b/tensorflow/compiler/xla/python/xrt.i

@@ -0,0 +1,124 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Wrappers for XRT ops.
+
+%module(threads="1") xrt
+
+// Keep the GIL except where explicitly specified.
+%nothread;
+
+%include "tensorflow/python/platform/base.i"
+%include "tensorflow/compiler/xla/python/xla_data.i"
+
+%{
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "absl/types/span.h"
+#include "tensorflow/compiler/xla/python/xrt.h"
+
+using namespace xla;
+using namespace xla::swig;
+
+%}
+
+// Computation and buffer/allocation types
+
+%typemap(out) StatusOr<xla::swig::XrtExecutable*> {
+  if ($1.ok()) {
+    auto* value = $1.ValueOrDie();
+    {
+      auto* $1 = value;
+      $typemap(out, xla::swig::XrtExecutable*)
+    }
+  } else {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    SWIG_fail;
+  }
+}
+
+%typemap(out) StatusOr<xla::swig::XrtAllocation*> {
+  if ($1.ok()) {
+    auto* value = $1.ValueOrDie();
+    {
+      auto* $1 = value;
+      $typemap(out, xla::swig::XrtAllocation*)
+    }
+  } else {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    SWIG_fail;
+  }
+}
+
+%typemap(out) StatusOr<xla::swig::XrtAllocationTuple*> {
+  if ($1.ok()) {
+    auto* value = $1.ValueOrDie();
+    {
+      auto* $1 = value;
+      $typemap(out, xla::swig::XrtAllocationTuple*)
+    }
+  } else {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    SWIG_fail;
+  }
+}
+
+
+%typemap(in) absl::Span<xla::swig::XrtAllocation* const>
+    (std::vector<XrtAllocation*> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    SWIG_fail;
+  }
+  const int size = PySequence_Size($input);
+  temps.reserve(size);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    XrtAllocation* xrta;
+    if ((SWIG_ConvertPtr(o, (void**) &xrta, $descriptor(xla::swig::XrtAllocation*),
+                         SWIG_POINTER_EXCEPTION)) == -1) {
+      SWIG_fail;
+    }
+    temps.push_back(xrta);
+    Py_DECREF(o);
+  }
+  $1 = temps;
+}
+
+
+%ignoreall
+%unignore xla;
+%unignore xla::swig;
+%unignore xla::swig::XrtAllocation;
+%unignore xla::swig::XrtAllocation::FromLiteral;
+%unignore xla::swig::XrtAllocation::ToLiteral;
+%unignore xla::swig::XrtAllocation::shape;
+%unignore xla::swig::XrtAllocationTuple;
+%unignore xla::swig::XrtAllocationTuple::Release;
+%unignore xla::swig::XrtAllocationTuple::size;
+%unignore xla::swig::XrtExecutable;
+%unignore xla::swig::XrtExecutable::CompileForXrt;
+%unignore xla::swig::XrtExecutable::DeviceOrdinals;
+%unignore xla::swig::XrtExecutable::Execute;
+%unignore xla::swig::DestructureXrtAllocationTuple;
+%unignore xla::swig::DeleteXrtAllocation;
+%unignore xla::swig::DeleteXrtExecutable;
+
+%thread;
+%include "tensorflow/compiler/xla/python/xrt.h"
+%nothread;
+
+%unignoreall

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index a0cdf34..8d8394c 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD

@@ -680,7 +680,6 @@
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
-        "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
         "//third_party/eigen3",
@@ -1461,11 +1460,15 @@
     hdrs = ["hlo_creation_utils.h"],
     deps = [
         ":hlo",
+        ":hlo_module_config",
         ":shape_inference",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/client:xla_computation",
+        "//tensorflow/compiler/xla/client/lib:comparators",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -1511,6 +1514,20 @@
 )
 
 cc_library(
+    name = "op_expander_pass",
+    srcs = ["op_expander_pass.cc"],
+    hdrs = ["op_expander_pass.h"],
+    deps = [
+        ":hlo",
+        ":hlo_creation_utils",
+        ":hlo_pass",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:util",
+        "@com_google_absl//absl/algorithm:container",
+    ],
+)
+
+cc_library(
     name = "gather_expander",
     srcs = ["gather_expander.cc"],
     hdrs = ["gather_expander.h"],
@@ -1518,6 +1535,7 @@
         ":hlo",
         ":hlo_creation_utils",
         ":hlo_pass",
+        ":op_expander_pass",
         ":while_util",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:statusor",
@@ -1541,6 +1559,28 @@
     ],
 )
 
+cc_library(
+    name = "triangular_solve_expander",
+    srcs = ["triangular_solve_expander.cc"],
+    hdrs = ["triangular_solve_expander.h"],
+    deps = [
+        ":op_expander_pass",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/client:xla_computation",
+        "//tensorflow/compiler/xla/client/lib:constants",
+        "//tensorflow/compiler/xla/client/lib:math",
+        "//tensorflow/compiler/xla/client/lib:matrix",
+        "//tensorflow/compiler/xla/client/lib:slicing",
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
+    ],
+)
+
 tf_cc_test(
     name = "batchnorm_expander_test",
     size = "small",
@@ -1603,7 +1643,6 @@
         ":hlo",
         ":hlo_casting_utils",
         ":hlo_creation_utils",
-        ":hlo_matchers",
         ":hlo_parser",
         ":hlo_pass",
         ":pattern_matcher",
@@ -2164,6 +2203,8 @@
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:test_helpers",
+        "//tensorflow/compiler/xla/service:hlo_matchers",
+        "//tensorflow/compiler/xla/service:hlo_parser",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "@com_google_absl//absl/container:flat_hash_map",
@@ -3193,33 +3234,6 @@
 )
 
 cc_library(
-    name = "hlo_tfgraph_builder",
-    srcs = ["hlo_tfgraph_builder.cc"],
-    hdrs = ["hlo_tfgraph_builder.h"],
-    deps = [
-        ":hlo",
-        "//tensorflow/compiler/xla:literal",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:xla_proto",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
-    ],
-)
-
-tf_cc_test(
-    name = "hlo_tfgraph_builder_test",
-    srcs = ["hlo_tfgraph_builder_test.cc"],
-    deps = [
-        ":hlo_tfgraph_builder",
-        "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-    ],
-)
-
-cc_library(
     name = "hlo_graph_dumper",
     srcs = [
         "hlo_graph_dumper.cc",
@@ -3230,7 +3244,6 @@
         ":hlo",
         ":hlo_casting_utils",
         ":hlo_execution_profile",
-        ":hlo_tfgraph_builder",
         ":pattern_matcher",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:shape_util",
@@ -3490,6 +3503,37 @@
 )
 
 cc_library(
+    name = "stable_sort_expander",
+    srcs = ["stable_sort_expander.cc"],
+    hdrs = ["stable_sort_expander.h"],
+    deps = [
+        ":hlo",
+        ":hlo_casting_utils",
+        ":hlo_pass",
+        ":op_expander_pass",
+        "//tensorflow/compiler/xla:statusor",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
+    ],
+)
+
+tf_cc_test(
+    name = "stable_sort_expander_test",
+    srcs = ["stable_sort_expander_test.cc"],
+    deps = [
+        ":algebraic_simplifier",
+        ":hlo_matchers",
+        ":hlo_parser",
+        ":pattern_matcher",
+        ":pattern_matcher_gmock",
+        ":stable_sort_expander",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/core:test",
+    ],
+)
+
+cc_library(
     name = "tuple_util",
     srcs = ["tuple_util.cc"],
     hdrs = ["tuple_util.h"],

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index acc2c28..bd17e96 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc

@@ -280,15 +280,51 @@
         hlo));
   }
 
-  // Helper method to perform and add reduction in a single dimension.
-  HloInstruction* AddReduce(HloInstruction* hlo, int64 dim) {
+  // Converts to primitive type if the input hlo is not that type, otherwise
+  // returns the original hlo.
+  HloInstruction* AsType(HloInstruction* hlo,
+                         const PrimitiveType element_type) {
+    if (hlo->shape().element_type() == element_type) {
+      return hlo;
+    }
+    return computation_->AddInstruction(HloInstruction::CreateConvert(
+        ShapeUtil::ChangeElementType(hlo->shape(), element_type), hlo));
+  }
+
+  // Transposes a dot operand such that the batch dimensions are the msot major,
+  // and the contracting dimensions are most minor.
+  StatusOr<HloInstruction*> NormalizeDotOperandToBatchMajorAndContractingMinor(
+      HloInstruction* dot_operand, absl::Span<const int64> batch_dimensions,
+      absl::Span<const int64> contracting_dimensions) {
+    std::vector<int64> transpose_dimensions(batch_dimensions.begin(),
+                                            batch_dimensions.end());
+    for (int64 i = 0; i < dot_operand->shape().rank(); ++i) {
+      if (!(absl::c_linear_search(batch_dimensions, i) ||
+            absl::c_linear_search(contracting_dimensions, i))) {
+        transpose_dimensions.push_back(i);
+      }
+    }
+    transpose_dimensions.insert(transpose_dimensions.end(),
+                                contracting_dimensions.begin(),
+                                contracting_dimensions.end());
+    return MakeTransposeHlo(dot_operand, transpose_dimensions);
+  }
+
+  // Helper method to perform and add reduction on a list of dimensions.
+  HloInstruction* AddReduce(HloInstruction* hlo, absl::Span<const int64> dims) {
     HloInstruction* zero =
         computation_->AddInstruction(HloInstruction::CreateConstant(
             LiteralUtil::Zero(hlo->shape().element_type()).Clone()));
     HloComputation* AddReduce_computation = GetOrCreateScalarAddComputation();
-    Shape shape = ShapeUtil::DeleteDimension(dim, hlo->shape());
+    Shape shape = ShapeUtil::FilterDimensions(
+        [&](int64 dim) { return !absl::c_linear_search(dims, dim); },
+        hlo->shape());
     return computation_->AddInstruction(HloInstruction::CreateReduce(
-        shape, hlo, zero, {dim}, AddReduce_computation));
+        shape, hlo, zero, dims, AddReduce_computation));
+  }
+
+  HloInstruction* AddReduce(HloInstruction* hlo, int64 dim) {
+    return AddReduce(hlo, std::vector<int64>{dim});
   }
 
   // Convenience method for replacing an instruction with a bitcast. If operand
@@ -1090,7 +1126,7 @@
   const int64 rhs_rank = rhs->shape().rank();
   const int64 lhs_rank = lhs->shape().rank();
   const auto& dnums = dot->dot_dimension_numbers();
-  if (dnums.rhs_contracting_dimensions_size() > 1) {
+  if (dnums.rhs_contracting_dimensions_size() != 1) {
     return false;
   }
   if (dot_rank > 2 && (lhs_rank != rhs_rank || lhs_rank != dot_rank)) {
@@ -1120,16 +1156,8 @@
     std::swap(rhs_collapsing_dim, rhs_kept_dim);
   }
 
-  auto as_type = [&](HloInstruction* hlo, const PrimitiveType element_type) {
-    if (hlo->shape().element_type() == element_type) {
-      return hlo;
-    }
-    return computation_->AddInstruction(HloInstruction::CreateConvert(
-        ShapeUtil::ChangeElementType(hlo->shape(), element_type), hlo));
-  };
-
   auto reshape_if_necessary = [&](HloInstruction* hlo) {
-    hlo = as_type(hlo, dot->shape().element_type());
+    hlo = AsType(hlo, dot->shape().element_type());
     if (!ShapeUtil::SameDimensions(hlo->shape(), dot->shape())) {
       hlo = computation_->AddInstruction(
           HloInstruction::CreateReshape(dot->shape(), hlo));
@@ -1138,7 +1166,7 @@
   };
 
   auto add_reduce_in_f32 = [&](HloInstruction* hlo, const int64 dim) {
-    return AddReduce(as_type(hlo, F32), dim);
+    return AddReduce(AsType(hlo, F32), dim);
   };
 
   auto broadcast = [&](HloInstruction* hlo, const Shape& shape,
@@ -1247,8 +1275,8 @@
     return dims;
   };
 
-  // If the contracting dimension is 1, remove the degnerate dimnesions from the
-  // lhs and rhs, broadcast each to the result shape and multiply.
+  // If the contracting dimension is 1, remove the degnerate dimnensions from
+  // the lhs and rhs, broadcast each to the result shape and multiply.
   if (lhs->shape().dimensions(lhs_collapsing_dim) == 1 &&
       (rhs_kept_dim == rhs_rank - 1 ||
        (rhs_collapsing_dim == rhs_rank - 1 && rhs_kept_dim == rhs_rank - 2))) {
@@ -1585,7 +1613,9 @@
 Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
   HloInstruction *lhs, *rhs;
   CHECK(Match(dot, m::Dot(m::Op(&lhs), m::Op(&rhs))));
-
+  if (options_.is_layout_sensitive()) {
+    return Status::OK();
+  }
   // Replace a zero element dot with a broadcast of the constant 0.
   if (ShapeUtil::IsZeroElementArray(dot->shape()) ||
       ShapeUtil::IsZeroElementArray(lhs->shape()) ||
@@ -1602,6 +1632,117 @@
       dot->shape().element_type() != BF16) {
     return Status::OK();
   }
+
+  // If there are no contracting dimensions, a dot can be rewritten as
+  // mul(broadcast(transpose(x)),broadcast(transpose(y)))
+  if (dot->dot_dimension_numbers().lhs_contracting_dimensions_size() == 0) {
+    TF_ASSIGN_OR_RETURN(
+        HloInstruction * new_lhs,
+        NormalizeDotOperandToBatchMajorAndContractingMinor(
+            lhs,
+            AsInt64Slice(dot->dot_dimension_numbers().lhs_batch_dimensions()),
+            AsInt64Slice(
+                dot->dot_dimension_numbers().lhs_contracting_dimensions())));
+    if (dot->shape().rank() != lhs->shape().rank()) {
+      std::vector<int64> lhs_broadcast_dims(lhs->shape().rank());
+      absl::c_iota(lhs_broadcast_dims, 0);
+      new_lhs = computation_->AddInstruction(HloInstruction::CreateBroadcast(
+          dot->shape(), new_lhs, lhs_broadcast_dims));
+    }
+    TF_ASSIGN_OR_RETURN(
+        HloInstruction * new_rhs,
+        NormalizeDotOperandToBatchMajorAndContractingMinor(
+            rhs,
+            AsInt64Slice(dot->dot_dimension_numbers().rhs_batch_dimensions()),
+            AsInt64Slice(
+                dot->dot_dimension_numbers().rhs_contracting_dimensions())));
+    if (dot->shape().rank() != rhs->shape().rank()) {
+      std::vector<int64> rhs_broadcast_dims(
+          dot->dot_dimension_numbers().lhs_batch_dimensions_size());
+      absl::c_iota(rhs_broadcast_dims, 0);
+      for (int64 i = lhs->shape().rank(); i < dot->shape().rank(); ++i) {
+        rhs_broadcast_dims.push_back(i);
+      }
+      new_rhs = computation_->AddInstruction(HloInstruction::CreateBroadcast(
+          dot->shape(), new_rhs, rhs_broadcast_dims));
+    }
+    return ReplaceWithNewInstruction(
+        dot, HloInstruction::CreateBinary(dot->shape(), HloOpcode::kMultiply,
+                                          new_lhs, new_rhs));
+  }
+
+  // If the lhs or rhs have only batch and contracting dimensions, a dot can be
+  // rewritten as reduce(mul(broadcast(transpose(x)),broadcast(transpose(y))))
+  if ((dot->dot_dimension_numbers().lhs_batch_dimensions_size() +
+           dot->dot_dimension_numbers().lhs_contracting_dimensions_size() ==
+       lhs->shape().rank()) ||
+      (dot->dot_dimension_numbers().rhs_contracting_dimensions_size() +
+           dot->dot_dimension_numbers().rhs_batch_dimensions_size() ==
+       rhs->shape().rank())) {
+    TF_ASSIGN_OR_RETURN(
+        HloInstruction * new_lhs,
+        NormalizeDotOperandToBatchMajorAndContractingMinor(
+            lhs,
+            AsInt64Slice(dot->dot_dimension_numbers().lhs_batch_dimensions()),
+            AsInt64Slice(
+                dot->dot_dimension_numbers().lhs_contracting_dimensions())));
+    TF_ASSIGN_OR_RETURN(
+        HloInstruction * new_rhs,
+        NormalizeDotOperandToBatchMajorAndContractingMinor(
+            rhs,
+            AsInt64Slice(dot->dot_dimension_numbers().rhs_batch_dimensions()),
+            AsInt64Slice(
+                dot->dot_dimension_numbers().rhs_contracting_dimensions())));
+
+    int64 lhs_outer_dims =
+        lhs->shape().rank() -
+        (dot->dot_dimension_numbers().lhs_batch_dimensions_size() +
+         dot->dot_dimension_numbers().lhs_contracting_dimensions_size());
+    int64 rhs_outer_dims =
+        rhs->shape().rank() -
+        (dot->dot_dimension_numbers().rhs_batch_dimensions_size() +
+         dot->dot_dimension_numbers().rhs_contracting_dimensions_size());
+    CHECK(lhs_outer_dims == 0 || rhs_outer_dims == 0);
+    if (rhs_outer_dims > 0) {
+      std::vector<int64> lhs_broadcast_dims(
+          dot->dot_dimension_numbers().lhs_batch_dimensions_size());
+      absl::c_iota(lhs_broadcast_dims, 0);
+      lhs_broadcast_dims.resize(lhs->shape().rank());
+      std::iota(lhs_broadcast_dims.begin() +
+                    dot->dot_dimension_numbers().lhs_batch_dimensions_size(),
+                lhs_broadcast_dims.end(),
+                dot->dot_dimension_numbers().lhs_batch_dimensions_size() +
+                    rhs_outer_dims);
+      new_lhs = computation_->AddInstruction(HloInstruction::CreateBroadcast(
+          new_rhs->shape(), new_lhs, lhs_broadcast_dims));
+    } else if (lhs_outer_dims > 0) {
+      std::vector<int64> rhs_broadcast_dims(
+          dot->dot_dimension_numbers().rhs_batch_dimensions_size());
+      absl::c_iota(rhs_broadcast_dims, 0);
+      rhs_broadcast_dims.resize(rhs->shape().rank());
+      std::iota(rhs_broadcast_dims.begin() +
+                    dot->dot_dimension_numbers().rhs_batch_dimensions_size(),
+                rhs_broadcast_dims.end(),
+                dot->dot_dimension_numbers().rhs_batch_dimensions_size() +
+                    lhs_outer_dims);
+      new_rhs = computation_->AddInstruction(HloInstruction::CreateBroadcast(
+          new_lhs->shape(), new_rhs, rhs_broadcast_dims));
+    }
+
+    TF_ASSIGN_OR_RETURN(HloInstruction * new_dot,
+                        MakeBinaryHlo(HloOpcode::kMultiply, new_lhs, new_rhs));
+    std::vector<int64> reduce_dims(
+        dot->dot_dimension_numbers().lhs_contracting_dimensions_size());
+    new_dot = AsType(new_dot, F32);
+    const int64 outer_dims = std::max(rhs_outer_dims, lhs_outer_dims);
+    absl::c_iota(
+        reduce_dims,
+        outer_dims + dot->dot_dimension_numbers().lhs_batch_dimensions_size());
+    new_dot = AddReduce(new_dot, reduce_dims);
+    new_dot = AsType(new_dot, dot->shape().element_type());
+    return ReplaceInstruction(dot, new_dot);
+  }
+
   if (lhs->shape().rank() > 2 || rhs->shape().rank() > 2 ||
       dot->shape().rank() > 2) {
     if (options_.enable_dot_strength_reduction() &&
@@ -1640,7 +1781,11 @@
   }
 
   // Simplify dot(transpose(a), transpose(b)) to transpose(dot(b,a)).
-  if (lhs->IsRank2Transpose() && rhs->IsRank2Transpose()) {
+  if (dot->dot_dimension_numbers().lhs_batch_dimensions_size() == 0 &&
+      dot->dot_dimension_numbers().lhs_contracting_dimensions_size() == 1 &&
+      dot->dot_dimension_numbers().lhs_contracting_dimensions(0) == 1 &&
+      dot->dot_dimension_numbers().rhs_contracting_dimensions(0) == 0 &&
+      lhs->IsRank2Transpose() && rhs->IsRank2Transpose()) {
     DotDimensionNumbers dot_dimension_numbers;
     dot_dimension_numbers.add_lhs_contracting_dimensions(1);
     dot_dimension_numbers.add_rhs_contracting_dimensions(0);
@@ -2530,11 +2675,11 @@
         int64 start = slice->slice_starts(i);
         int64 low = padding_config.dimensions(i).edge_padding_low();
         int64 data = pad->operand(0)->shape().dimensions(i);
-        if (start >= low && start < low + data) {
-          return false;
+        if (start < low || start >= low + data) {
+          return true;
         }
       }
-      return true;
+      return false;
     }();
 
     if (in_padding) {

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index a908051..af03fcb 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc

@@ -28,7 +28,6 @@
 #include "tensorflow/compiler/xla/service/hlo_creation_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
-#include "tensorflow/compiler/xla/service/hlo_matchers.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_fix.h"
@@ -47,7 +46,6 @@
 
 using ::testing::ElementsAre;
 namespace m = match;
-namespace op = xla::testing::opcode_matchers;
 
 class AlgebraicSimplifierTest : public HloTestBase {
  protected:
@@ -2755,8 +2753,9 @@
   Shape keys_shape = ShapeUtil::MakeShape(F32, {1});
   auto keys = builder.AddInstruction(
       HloInstruction::CreateParameter(0, keys_shape, "keys"));
-  TF_ASSERT_OK(
-      MakeSortHlo(keys_shape, {keys}, 0, &builder, module.get()).status());
+  TF_ASSERT_OK(MakeSortHlo(keys_shape, {keys}, 0, /*is_stable=*/false, &builder,
+                           module.get())
+                   .status());
   HloComputation* computation = module->AddEntryComputation(builder.Build());
   AlgebraicSimplifier simplifier(default_options_);
   ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
@@ -2777,7 +2776,8 @@
       HloInstruction::CreateParameter(2, values_shape, "values1"));
   TF_ASSERT_OK(MakeSortHlo(ShapeUtil::MakeTupleShape(
                                {keys_shape, values_shape, values_shape}),
-                           {keys, values0, values1}, 0, &builder, module.get())
+                           {keys, values0, values1}, 0, /*is_stable=*/false,
+                           &builder, module.get())
                    .status());
   HloComputation* computation = module->AddEntryComputation(builder.Build());
   AlgebraicSimplifier simplifier(default_options_);
@@ -3714,8 +3714,8 @@
   HloInstruction* y =
       builder.AddInstruction(HloInstruction::CreateParameter(1, r1f32, "y"));
   DotDimensionNumbers dot_dnums;
-  dot_dnums.add_lhs_contracting_dimensions(1);
-  dot_dnums.add_rhs_contracting_dimensions(0);
+  dot_dnums.add_lhs_batch_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(0);
   builder.AddInstruction(HloInstruction::CreateDot(r1f32, x, y, dot_dnums,
                                                    DefaultPrecisionConfig(2)));
   std::unique_ptr<HloComputation> dot_computation(builder.Build());
@@ -3960,7 +3960,7 @@
       param = f32[3,4] parameter(0)
       constant = f32[] constant(0.0)
       pad = f32[8,10] pad(f32[3,4] param, f32[] constant), padding=3_2x1_5
-      ROOT slice = f32[1,1] slice(f32[8,10] pad), slice={[5:6],[9:10]}
+      ROOT slice = f32[1,1] slice(f32[8,10] pad), slice={[5:6],[4:5]}
     }
   )";
   TF_ASSERT_OK_AND_ASSIGN(auto module,
@@ -3971,6 +3971,27 @@
   EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie());
 }
 
+TEST_F(AlgebraicSimplifierTest, SliceOfPadMidScalarConstant) {
+  const char* hlo_string = R"(
+    HloModule module
+
+    ENTRY test {
+      param = f32[3,4] parameter(0)
+      constant = f32[] constant(0.0)
+      pad = f32[8,10] pad(f32[3,4] param, f32[] constant), padding=3_2x1_5
+      ROOT slice = f32[1,1] slice(f32[8,10] pad), slice={[5:6],[9:10]}
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  AlgebraicSimplifierOptions options;
+  AlgebraicSimplifier simplifier(options);
+  EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, GmockMatch(m::Reshape(m::Constant())));
+}
+
 TEST_F(AlgebraicSimplifierTest, SliceOfPadMidScalar) {
   const char* hlo_string = R"(
     HloModule module
@@ -3992,6 +4013,29 @@
   EXPECT_THAT(root, GmockMatch(m::Parameter()));
 }
 
+TEST_F(AlgebraicSimplifierTest, SliceOfPadSomeDimsInPadding) {
+  const char* hlo_string = R"(
+    HloModule module
+
+    ENTRY entry () -> f32[1]{0} {
+      constant.val = f32[] constant(4)
+      constant.pad = f32[] constant(-7)
+      reshape.1 = f32[1,1,1]{2,1,0} reshape(f32[] constant.val)
+      pad = f32[3,3,3]{2,1,0} pad(f32[1,1,1]{2,1,0} reshape.1, f32[] constant.pad), padding=0_2x0_2x2_0
+      slice = f32[1,1,1]{2,1,0} slice(f32[3,3,3]{2,1,0} pad), slice={[0:1], [0:1], [0:1]}
+      ROOT reshape.2 = f32[1]{0} reshape(f32[1,1,1]{2,1,0} slice)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  AlgebraicSimplifierOptions options;
+  AlgebraicSimplifier simplifier(options);
+  EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, GmockMatch(m::Reshape(m::ConstantScalar(-7.0))));
+}
+
 TEST_F(AlgebraicSimplifierTest, SliceOfConcatScalarInput) {
   const char* hlo_string = R"(
     HloModule module
@@ -4222,10 +4266,24 @@
   int m, k, n;
   PrimitiveType element_type;
   std::tie(m, k, n, element_type) = GetParam();
-
-  Shape dot_shape = ShapeUtil::MakeShape(element_type, {1, 3, 5, m, n});
-  Shape lhs_shape = ShapeUtil::MakeShape(element_type, {1, 3, 5, m, k});
-  Shape rhs_shape = ShapeUtil::MakeShape(element_type, {1, 3, 5, k, n});
+  std::vector<int64> lhs_dims = {1, 3, 5};
+  std::vector<int64> rhs_dims = lhs_dims;
+  std::vector<int64> output_dims = lhs_dims;
+  if (m > 0) {
+    lhs_dims.push_back(m);
+    output_dims.push_back(m);
+  }
+  if (k > 0) {
+    lhs_dims.push_back(k);
+    rhs_dims.push_back(k);
+  }
+  if (n > 0) {
+    rhs_dims.push_back(n);
+    output_dims.push_back(n);
+  }
+  Shape dot_shape = ShapeUtil::MakeShape(element_type, output_dims);
+  Shape lhs_shape = ShapeUtil::MakeShape(element_type, lhs_dims);
+  Shape rhs_shape = ShapeUtil::MakeShape(element_type, rhs_dims);
   HloComputation::Builder builder(TestName());
 
   auto lhs = builder.AddInstruction(
@@ -4239,16 +4297,18 @@
   dot_dnums.add_rhs_batch_dimensions(0);
   dot_dnums.add_rhs_batch_dimensions(1);
   dot_dnums.add_rhs_batch_dimensions(2);
-  dot_dnums.add_lhs_contracting_dimensions(4);
-  dot_dnums.add_rhs_contracting_dimensions(3);
+  if (k > 0) {
+    dot_dnums.add_lhs_contracting_dimensions(m > 0 ? 4 : 3);
+    dot_dnums.add_rhs_contracting_dimensions(3);
+  }
   builder.AddInstruction(HloInstruction::CreateDot(
       dot_shape, lhs, rhs, dot_dnums, DefaultPrecisionConfig(2)));
   auto computation = module->AddEntryComputation(builder.Build());
   AlgebraicSimplifier simplifier(default_options_);
   TF_ASSERT_OK_AND_ASSIGN(bool changed, simplifier.Run(module.get()));
-  const bool dot_should_be_transformed = m == 1 || k == 1 || n == 1;
-  const bool computation_should_be_modified = dot_should_be_transformed;
-  EXPECT_EQ(changed, computation_should_be_modified);
+  const bool dot_should_be_transformed =
+      m == 1 || k == 1 || n == 1 || m == -1 || k == -1 || n == -1;
+  EXPECT_EQ(changed, dot_should_be_transformed);
   bool has_no_dot = true;
   for (const auto& hlo : computation->instructions()) {
     if (hlo->opcode() == HloOpcode::kDot) {
@@ -4259,10 +4319,12 @@
   EXPECT_EQ(has_no_dot, dot_should_be_transformed);
 }
 
-INSTANTIATE_TEST_SUITE_P(
-    BatchDotStrengthReductionTestInstantiation, BatchDotStrengthReductionTest,
-    ::testing::Combine(::testing::Values(1, 2), ::testing::Values(1, 2),
-                       ::testing::Values(1, 2), ::testing::Values(F32, BF16)));
+INSTANTIATE_TEST_SUITE_P(BatchDotStrengthReductionTestInstantiation,
+                         BatchDotStrengthReductionTest,
+                         ::testing::Combine(::testing::Values(-1, 1, 2),
+                                            ::testing::Values(-1, 1, 2),
+                                            ::testing::Values(-1, 1, 2),
+                                            ::testing::Values(F32, BF16)));
 
 class DotStrengthReductionTest
     : public AlgebraicSimplifierTest,
@@ -4792,7 +4854,7 @@
   AlgebraicSimplifier simplifier(options);
   EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie());
   HloInstruction* root = module->entry_computation()->root_instruction();
-  EXPECT_THAT(root, op::Constant());
+  EXPECT_THAT(root, GmockMatch(m::Constant()));
 }
 
 TEST_F(AlgebraicSimplifierTest, DividedByConstantInstructionWithoutLayout) {
@@ -4814,7 +4876,7 @@
   AlgebraicSimplifier simplifier(options);
   EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie());
   HloInstruction* root = module->entry_computation()->root_instruction();
-  EXPECT_THAT(root, op::Multiply());
+  EXPECT_THAT(root, GmockMatch(m::Multiply()));
 }
 
 }  // namespace

diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner.cc b/tensorflow/compiler/xla/service/ar_crs_combiner.cc
index 99373dc..52d6982 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner.cc
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner.cc

@@ -32,15 +32,13 @@
 
 namespace xla {
 
-namespace {
-
 namespace m = match;
 
 // Checks if the argument instruction is an AllReduce, followed by a certain
 // sequence of instructions and then a CRS. It must be possible to move
 // the AR past each instruction in the sequence. Returns the CRS, which is the
 // last instruction in the sequence.
-absl::optional<HloInstruction*> MatchesArCrsPattern(
+absl::optional<ArCrsCombiner::ArCrsPair> ArCrsCombiner::MatchesArCrsPattern(
     HloInstruction* instruction) {
   auto can_ar_move_past_instruction = [](HloInstruction* instruction) -> bool {
     if (instruction->user_count() != 1) {
@@ -77,23 +75,23 @@
     return absl::nullopt;
   }
   auto next = instruction->users()[0];
+  int64 distance = 1;
   while (!next->IsCrossReplicaAllReduce()) {
     if (can_ar_move_past_instruction(next)) {
       next = next->users()[0];
     } else {
       return absl::nullopt;
     }
+    ++distance;
   }
   if (!Cast<HloAllReduceInstruction>(next)->IsNoop() &&
       computation_is_addition(next->called_computations()[0])) {
-    return absl::optional<HloInstruction*>(next);
+    return absl::optional<ArCrsPair>(ArCrsPair(instruction, next, distance));
   } else {
     return absl::nullopt;
   }
 }
 
-}  // namespace
-
 absl::optional<HloInstruction*> ArCrsCombiner::WhileFromBodyParameter(
     HloInstruction* instruction) {
   CHECK_EQ(HloOpcode::kParameter, instruction->opcode());
@@ -235,15 +233,55 @@
 }
 
 void ArCrsCombiner::GroupAllReducesById(HloModule* module) {
+  // Say that two or more ARs lead to the same CRS: (AR1, CRS), (AR2, CRS),
+  // ... , (ARn, CRS).
+  // If as we traverse the HLO graph we start tracking the pair (AR2, CRS),
+  // and later find that AR1's distance from the CRS is longer, we discard
+  // AR2 and start tracking AR1. We put the discarded ids in this set, in order
+  // to skip processing of short paths when we encounter the other ARs that
+  // have the same id as AR2.
+  absl::flat_hash_set<int64> discarded_ar_ids;
   for (HloComputation* computation : module->MakeNonfusionComputations()) {
     for (HloInstruction* instruction : computation->instructions()) {
-      auto maybe_crs = MatchesArCrsPattern(instruction);
-      if (maybe_crs) {
-        auto crs = *maybe_crs;
+      auto maybe_pair = MatchesArCrsPattern(instruction);
+      if (maybe_pair) {
+        auto pair = *maybe_pair;
         int64 ar_id = *(instruction->all_reduce_id());
-        if (crs_reserved_map_.find(crs) == crs_reserved_map_.end()) {
-          all_reduce_map_[ar_id].push_back(instruction);
-          crs_reserved_map_[crs] = ar_id;
+        if (discarded_ar_ids.find(ar_id) != discarded_ar_ids.end()) {
+          continue;
+        }
+        auto it = crs_reserved_map_.find(pair.crs);
+        if (it != crs_reserved_map_.end()) {
+          auto prev_ar_id = it->second;
+          // Since there is another AR paired with CRS,
+          // all_reduce_map_[prev_ar_id] should exist, but
+          // all_reduce_map_[ar_id] shouldn't.
+          CHECK(all_reduce_map_.find(ar_id) == all_reduce_map_.end());
+          CHECK_NE(prev_ar_id, ar_id);
+          auto prev_pair = all_reduce_map_[prev_ar_id].back();
+          int64 prev_distance = prev_pair.distance;
+          if (prev_distance < pair.distance) {
+            // The current AR's distance to CRS is longer than the previously
+            // tracked AR, so we discard the previous AR.
+            all_reduce_map_.erase(prev_ar_id);
+            discarded_ar_ids.insert(prev_ar_id);
+            all_reduce_map_[ar_id].push_back(pair);
+            crs_reserved_map_[pair.crs] = ar_id;
+          } else {
+            // Discard the current AR id because we are keeping the previously
+            // tracked AR.
+            discarded_ar_ids.insert(ar_id);
+          }
+        } else {
+          if (all_reduce_map_.find(ar_id) != all_reduce_map_.end()) {
+            int64 prev_distance = all_reduce_map_[ar_id].back().distance;
+            CHECK_EQ(prev_distance, pair.distance)
+                << "All ARs with the same AR ID must have the same distance "
+                   "from the corresponding CRSs. Found: "
+                << prev_distance << " and " << pair.distance;
+          }
+          all_reduce_map_[ar_id].push_back(pair);
+          crs_reserved_map_[pair.crs] = ar_id;
         }
       }
     }
@@ -253,11 +291,11 @@
 void ArCrsCombiner::KeepProvablyEqualInstructionGroups() {
   for (auto it : all_reduce_map_) {
     auto all_reduce_id = it.first;
-    auto instruction_vec = it.second;
-    CHECK_EQ(instruction_vec.size(), num_spatial_partitions_);
-    auto instr_0 = instruction_vec[0];
-    for (int i = 1; i < instruction_vec.size(); ++i) {
-      auto instr_i = instruction_vec[i];
+    auto pairs_vec = it.second;
+    CHECK_EQ(pairs_vec.size(), num_spatial_partitions_);
+    auto instr_0 = pairs_vec[0].ar;
+    for (int i = 1; i < pairs_vec.size(); ++i) {
+      auto instr_i = pairs_vec[i].ar;
       auto next_0 = instr_0->users()[0];
       auto next_i = instr_i->users()[0];
       absl::flat_hash_map<int64, int64> visited_pairs;
@@ -281,8 +319,9 @@
     return false;
   }
   for (auto it : all_reduce_map_) {
-    auto instruction_vec = it.second;
-    for (auto all_reduce : instruction_vec) {
+    auto pairs_vec = it.second;
+    for (auto pair : pairs_vec) {
+      auto all_reduce = pair.ar;
       auto parent_computation = all_reduce->parent();
       auto all_reduce_id = all_reduce->all_reduce_id();
       auto prev = all_reduce->mutable_operand(0);
@@ -303,16 +342,23 @@
                                      ? next->operands()[1]
                                      : next->operands()[0];
             // To move the AR past the addition/subtraction, we need to divide
-            // other_operand by the number of spatial partitions.
-            auto shape = other_operand->shape();
-            Literal lit(shape);
-            lit.PopulateWithValue<float>(num_spatial_partitions_);
-            auto divisor = parent_computation->AddInstruction(
-                HloInstruction::CreateConstant(lit.Clone()));
-            auto division =
-                parent_computation->AddInstruction(HloInstruction::CreateBinary(
-                    shape, HloOpcode::kDivide, other_operand, divisor));
-            TF_CHECK_OK(other_operand->ReplaceUseWith(next, division));
+            // other_operand by the number of spatial partitions, except if
+            // other_operand is a cross-module AR, which can be eliminated.
+            if (other_operand->IsCrossModuleAllReduce() &&
+                other_operand->user_count() == 1) {
+              TF_CHECK_OK(other_operand->ReplaceAllUsesWith(
+                  other_operand->mutable_operand(0)));
+            } else {
+              auto shape = other_operand->shape();
+              Literal lit(shape);
+              lit.PopulateWithValue<float>(num_spatial_partitions_);
+              auto divisor = parent_computation->AddInstruction(
+                  HloInstruction::CreateConstant(lit.Clone()));
+              auto division = parent_computation->AddInstruction(
+                  HloInstruction::CreateBinary(shape, HloOpcode::kDivide,
+                                               other_operand, divisor));
+              TF_CHECK_OK(other_operand->ReplaceUseWith(next, division));
+            }
             break;
           }
           default:

diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner.h b/tensorflow/compiler/xla/service/ar_crs_combiner.h
index e61ef5d..f503e1d 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner.h
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner.h

@@ -26,11 +26,47 @@
 namespace xla {
 
 // When the HLO graph contains a cross-module AllReduce, followed by some simple
-// linear operations, followed by a cross-replica AllReduce, we can combine the
-// CMAR and the CRAR, to use an efficient AllReduce implementation that fully
-// utilizes the interconnect bandwidth.
+// linear operations, followed by a cross-replica AllReduce (also known as
+// cross-replica sum, or CRS), we can combine the CMAR and the CRAR, to use an
+// efficient AllReduce implementation that fully utilizes the interconnect
+// bandwidth.
 // Such sequences appear in spatially partitioned models.
-// This pass must run right after spatial partitioning.
+// This pass must run right after spatial partitioning, when the code is still
+// in a single HLO module.
+//
+// The steps are:
+// 1) Find CMARs followed by simple ops followed by CRARs.
+// 2) Group CMARs by all_reduce_id. They must all be rewritten.
+// 3) Prove that the CMAR patterns in each core produce the same result.
+// 4) Eliminate the CMAR, and if it feeds an addition/subtraction, divide the
+//    other operand by the number of spatial partitions.
+// 5) Turn the CRAR into an all-core AllReduce.
+//
+// The pass also handles the case where multiple CMARs lead to the same CRAR,
+// and eliminates all CMARs. This graph:
+//
+//        Y
+//        |
+//  X   CMAR_2   Z
+//  |      \    /
+// CMAR_1     +
+//    \     /
+//       +
+//       |
+//     CRAR
+//
+// gets rewritten to:
+//
+//           Z   num_partitions
+//            \  /
+//       Y    div
+//        \   /
+//    X     +
+//     \   /
+//       +
+//       |
+//  all-core AR
+//
 class ArCrsCombiner : public HloModulePass {
  public:
   ArCrsCombiner(int num_spatial_partitions)
@@ -43,6 +79,28 @@
                                                HloInstruction* i2);
 
  private:
+  // We used this struct because multiple ARs could be paired with the same CRS.
+  // In this case, we want to select the AR that is furthest from the CRS,
+  // because it makes it easier to eliminate all ARs during RewriteGraph.
+  struct ArCrsPair {
+    HloInstruction* ar;
+    HloInstruction* crs;
+    // The length of the path from AR to CRS in the HLO graph.
+    int64 distance;
+
+    ArCrsPair(HloInstruction* all_reduce, HloInstruction* cross_replica_sum,
+              int64 dist)
+        : ar(all_reduce), crs(cross_replica_sum), distance(dist) {}
+
+    string ToString() {
+      return absl::StrCat("(AR: ", ar->name(), ", CRS: ", crs->name(),
+                          ", distance: ", distance, ")");
+    }
+  };
+
+  absl::optional<ArCrsCombiner::ArCrsPair> MatchesArCrsPattern(
+      HloInstruction* instruction);
+
   // If the passed instruction is a while parameter, and the while body is only
   // called by a single while instruction, return the while instruction.
   absl::optional<HloInstruction*> WhileFromBodyParameter(
@@ -80,8 +138,8 @@
 
   int num_spatial_partitions_;
 
-  // Map from all-reduce ids to the all reduce instructions.
-  absl::flat_hash_map<int64, std::vector<HloInstruction*>> all_reduce_map_;
+  // Map from all-reduce ids to the AR/CRS pairs.
+  absl::flat_hash_map<int64, std::vector<ArCrsPair>> all_reduce_map_;
 
   // Map from a CRS instruction to the all-reduce ID of the AR paired with the
   // CRS. Sometimes, several ARs in the code could be paired with the same CRS.

diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc b/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
index 5152f0d..9c9db74 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc

@@ -1005,11 +1005,11 @@
               op::Tuple(op::AllReduce(op::Add(
                             op::Add(op::Parameter(),
                                     op::Divide(op::Constant(), op::Constant())),
-                            op::Divide(op::AllReduce(), op::Constant()))),
+                            op::Parameter())),
                         op::AllReduce(op::Add(
                             op::Add(op::Parameter(),
                                     op::Divide(op::Constant(), op::Constant())),
-                            op::Divide(op::AllReduce(), op::Constant())))));
+                            op::Parameter()))));
   auto crs_after =
       module->entry_computation()->root_instruction()->operands()[0];
   auto replica_groups_after = crs_after->replica_groups();
@@ -1093,15 +1093,17 @@
   ArCrsCombiner combiner(2);
   auto changed = combiner.Run(module.get()).ValueOrDie();
   EXPECT_TRUE(changed);
-  EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::AllReduce(op::Add(
-                            op::Parameter(),
-                            op::Divide(op::Add(op::AllReduce(), op::Constant()),
-                                       op::Constant()))),
-                        op::AllReduce(op::Add(
-                            op::Parameter(),
-                            op::Divide(op::Add(op::AllReduce(), op::Constant()),
-                                       op::Constant())))));
+  EXPECT_THAT(
+      module->entry_computation()->root_instruction(),
+      op::Tuple(op::AllReduce(op::Add(
+                    op::Parameter(),
+                    op::Add(op::Parameter(),
+                            op::Divide(op::Constant(), op::Constant())))),
+                op::AllReduce(op::Add(
+                    op::Parameter(),
+                    op::Add(op::Parameter(),
+                            op::Divide(op::Constant(), op::Constant()))))));
+
   auto crs_after =
       module->entry_computation()->root_instruction()->operands()[0];
   auto replica_groups_after = crs_after->replica_groups();

diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc
index 215e8ce..d016d3e 100644
--- a/tensorflow/compiler/xla/service/backend.cc
+++ b/tensorflow/compiler/xla/service/backend.cc

@@ -29,7 +29,6 @@
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/common_runtime/eigen_thread_pool.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/byte_order.h"
@@ -67,18 +66,38 @@
   return allowed_devices_;
 }
 
+namespace {
+
+class EigenThreadPoolWrapper : public Eigen::ThreadPoolInterface {
+ public:
+  explicit EigenThreadPoolWrapper(tensorflow::thread::ThreadPool* pool)
+      : pool_(pool) {}
+  ~EigenThreadPoolWrapper() override {}
+
+  void Schedule(std::function<void()> fn) override {
+    pool_->Schedule(std::move(fn));
+  }
+  int NumThreads() const override { return pool_->NumThreads(); }
+  int CurrentThreadId() const override { return pool_->CurrentThreadId(); }
+
+ private:
+  tensorflow::thread::ThreadPool* pool_ = nullptr;
+};
+
+}  // namespace
+
 // Define this in .cc file to avoid having to include eigen or forward declare
 // these types in the header.
-struct Backend::EigenThreadPoolWrapper {
-  explicit EigenThreadPoolWrapper(const int num_threads)
+struct Backend::IntraOpThreadPool {
+  explicit IntraOpThreadPool(const int num_threads)
       : pool(new tensorflow::thread::ThreadPool(tensorflow::Env::Default(),
                                                 "XLAEigen", num_threads)),
-        wrapper(new tensorflow::EigenThreadPoolWrapper(pool.get())),
+        wrapper(new EigenThreadPoolWrapper(pool.get())),
         device(new Eigen::ThreadPoolDevice(wrapper.get(),
                                            wrapper->NumThreads())) {}
 
   std::unique_ptr<tensorflow::thread::ThreadPool> pool;
-  std::unique_ptr<tensorflow::EigenThreadPoolWrapper> wrapper;
+  std::unique_ptr<EigenThreadPoolWrapper> wrapper;
   std::unique_ptr<Eigen::ThreadPoolDevice> device;
 };
 
@@ -146,8 +165,7 @@
     const int num_threads = intra_op_parallelism_threads > 0
                                 ? intra_op_parallelism_threads
                                 : tensorflow::port::NumSchedulableCPUs();
-    intra_op_thread_pool_wrapper_.reset(
-        new EigenThreadPoolWrapper(num_threads));
+    intra_op_thread_pool_.reset(new IntraOpThreadPool(num_threads));
   }
 }
 
@@ -159,17 +177,17 @@
 
 const Eigen::ThreadPoolDevice* Backend::eigen_intra_op_thread_pool_device()
     const {
-  if (intra_op_thread_pool_wrapper_ == nullptr) {
+  if (intra_op_thread_pool_ == nullptr) {
     return nullptr;
   }
-  return intra_op_thread_pool_wrapper_->device.get();
+  return intra_op_thread_pool_->device.get();
 }
 
 tensorflow::thread::ThreadPool* Backend::eigen_intra_op_thread_pool() const {
-  if (intra_op_thread_pool_wrapper_ == nullptr) {
+  if (intra_op_thread_pool_ == nullptr) {
     return nullptr;
   }
-  return intra_op_thread_pool_wrapper_->pool.get();
+  return intra_op_thread_pool_->pool.get();
 }
 
 StatusOr<se::StreamExecutor*> Backend::stream_executor(

diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h
index c35f033..e7f29a0 100644
--- a/tensorflow/compiler/xla/service/backend.h
+++ b/tensorflow/compiler/xla/service/backend.h

@@ -156,7 +156,6 @@
   Status ResetDevices();
 
  private:
-  struct EigenThreadPoolWrapper;
   Backend(se::Platform* platform, Compiler* compiler,
           absl::Span<se::StreamExecutor* const> stream_executors,
           TransferManager* transfer_manager,
@@ -183,7 +182,8 @@
   std::unique_ptr<StreamExecutorMemoryAllocator> memory_allocator_;
 
   // For the CPU backend, an Eigen threadpool device for use by Eigen code.
-  std::unique_ptr<EigenThreadPoolWrapper> intra_op_thread_pool_wrapper_;
+  struct IntraOpThreadPool;
+  std::unique_ptr<IntraOpThreadPool> intra_op_thread_pool_;
 };
 
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/service/batch_dot_simplification.cc b/tensorflow/compiler/xla/service/batch_dot_simplification.cc
index eda026a..dbabd82 100644
--- a/tensorflow/compiler/xla/service/batch_dot_simplification.cc
+++ b/tensorflow/compiler/xla/service/batch_dot_simplification.cc

@@ -28,6 +28,13 @@
                  *rhs = batch_dot->mutable_operand(1);
   const Shape& lhs_shape = lhs->shape();
 
+  // A dot with no contracting dims will be rewritten into a multiply by
+  // AlgebraicSimplifier. Dots with multiple contracting dims are currently
+  // unsupported.
+  if (dim_numbers.lhs_contracting_dimensions_size() != 1) {
+    return false;
+  }
+
   std::vector<int64> degenerate_dims;
   for (int64 batch_dim : dim_numbers.lhs_batch_dimensions()) {
     if (lhs_shape.dimensions(batch_dim) == 1) {

diff --git a/tensorflow/compiler/xla/service/batch_dot_simplification_test.cc b/tensorflow/compiler/xla/service/batch_dot_simplification_test.cc
index 52ec1a7..a81f394 100644
--- a/tensorflow/compiler/xla/service/batch_dot_simplification_test.cc
+++ b/tensorflow/compiler/xla/service/batch_dot_simplification_test.cc

@@ -169,5 +169,47 @@
                   /*lhs_contracting_dim=*/3, /*rhs_contracting_dim=*/2)));
 }
 
+TEST_F(BatchDotSimplificationTest,
+       ElideMultipleDegenerateBatchDotDimsNonContracting) {
+  const char* hlo_text = R"(
+HloModule BatchDot
+
+main {
+  a = f32[1,101] parameter(0)
+  b = f32[1,101] parameter(1)
+  ROOT dot = f32[1,101,101] dot(a,b), lhs_batch_dims={0},
+                                      lhs_contracting_dims={},
+                                      rhs_batch_dims={0},
+                                      rhs_contracting_dims={}
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> m,
+                          ParseAndReturnVerifiedModule(hlo_text));
+  BatchDotSimplification pass;
+  ASSERT_FALSE(pass.Run(m.get()).ValueOrDie());
+}
+
+TEST_F(BatchDotSimplificationTest,
+       ElideMultipleDegenerateBatchDotDimsMultipleContracting) {
+  const char* hlo_text = R"(
+HloModule BatchDot
+
+main {
+  lhs = f32[1,5,17,10,13] parameter(0)
+  rhs = f32[1,9,10,13,6,5] parameter(1)
+  ROOT dot = f32[10,1,17,9,6] dot(lhs,rhs), lhs_batch_dims={3,0},
+                                            rhs_batch_dims={2,0},
+                                            lhs_contracting_dims={1,4},
+                                            rhs_contracting_dims={5,3}
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> m,
+                          ParseAndReturnVerifiedModule(hlo_text));
+  BatchDotSimplification pass;
+  ASSERT_FALSE(pass.Run(m.get()).ValueOrDie());
+}
+
 }  // namespace
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
index 2591ff6..2caa979 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc

@@ -286,7 +286,8 @@
   TF_ASSERT_OK_AND_ASSIGN(
       auto* sort,
       MakeSortHlo(ShapeUtil::MakeTupleShape({bf16_shape, s32_shape}),
-                  {key, value}, 0, &builder, module.get()));
+                  {key, value}, 0, /*is_stable=*/false, &builder,
+                  module.get()));
   HloInstruction* gte = builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(bf16_shape, sort, 0));
 
@@ -314,7 +315,8 @@
   TF_ASSERT_OK_AND_ASSIGN(
       auto* sort,
       MakeSortHlo(ShapeUtil::MakeTupleShape({bf16_shape, f32_shape}),
-                  {key, value}, 0, &builder, module.get()));
+                  {key, value}, 0, /*is_stable=*/false, &builder,
+                  module.get()));
 
   auto computation = module->AddEntryComputation(builder.Build());
 

diff --git a/tensorflow/compiler/xla/service/call_graph.h b/tensorflow/compiler/xla/service/call_graph.h
index c02ffda..57a636f 100644
--- a/tensorflow/compiler/xla/service/call_graph.h
+++ b/tensorflow/compiler/xla/service/call_graph.h

@@ -30,7 +30,7 @@
 
 // The context in which a computation is called by another computation.
 enum class CallContext {
-  // In a parallel contex the computation is applied to each element of the
+  // In a parallel context the computation is applied to each element of the
   // array argument(s). kMap and kReduce instructions call computations in
   // parallel context.
   kParallel,

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index d4535b2..42672bc 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD

@@ -136,6 +136,7 @@
         "//tensorflow/compiler/xla/service:reshape_mover",
         "//tensorflow/compiler/xla/service:sort_simplifier",
         "//tensorflow/compiler/xla/service:transpose_folding",
+        "//tensorflow/compiler/xla/service:triangular_solve_expander",
         "//tensorflow/compiler/xla/service:tuple_simplifier",
         "//tensorflow/compiler/xla/service:while_loop_constant_sinking",
         "//tensorflow/compiler/xla/service:while_loop_invariant_code_motion",

diff --git a/tensorflow/compiler/xla/service/cpu/build_defs.bzl b/tensorflow/compiler/xla/service/cpu/build_defs.bzl
index e78330b..ffa1cd4 100644
--- a/tensorflow/compiler/xla/service/cpu/build_defs.bzl
+++ b/tensorflow/compiler/xla/service/cpu/build_defs.bzl

@@ -1,12 +1,11 @@
 """build_defs for service/cpu."""
 
-
 def runtime_copts():
-  """Returns copts used for CPU runtime libraries."""
-  return (["-DEIGEN_AVOID_STL_ARRAY"] + select({
-      "//tensorflow:android_arm": ["-mfpu=neon"],
-      "//conditions:default": []
-  }) + select({
-      "//tensorflow:android": ["-O2"],
-      "//conditions:default": []
-  }))
+    """Returns copts used for CPU runtime libraries."""
+    return (["-DEIGEN_AVOID_STL_ARRAY"] + select({
+        "//tensorflow:android_arm": ["-mfpu=neon"],
+        "//conditions:default": [],
+    }) + select({
+        "//tensorflow:android": ["-O2"],
+        "//conditions:default": [],
+    }))

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index eafda68..cc9489d 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc

@@ -95,6 +95,7 @@
 #include "tensorflow/compiler/xla/service/scatter_expander.h"
 #include "tensorflow/compiler/xla/service/sort_simplifier.h"
 #include "tensorflow/compiler/xla/service/transpose_folding.h"
+#include "tensorflow/compiler/xla/service/triangular_solve_expander.h"
 #include "tensorflow/compiler/xla/service/tuple_simplifier.h"
 #include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h"
 #include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h"
@@ -255,6 +256,8 @@
 
   pipeline.AddPass<MapInliner>();
 
+  pipeline.AddPass<TriangularSolveExpander>();
+
   // TODO(b/65775800): Fix wrong output bug in Call and remove the CallInliner
   // pass.
   pipeline.AddPass<CallInliner>();
@@ -670,9 +673,9 @@
   if (embed_ir_in_executable) {
     ir_module_string = llvm_ir::DumpModuleToString(*llvm_module);
   }
-  TF_RETURN_IF_ERROR(VerifyLlvmModule(*llvm_module));
 
   XLA_VLOG_LINES(2, "LLVM IR:\n" + llvm_ir::DumpModuleToString(*llvm_module));
+  TF_RETURN_IF_ERROR(VerifyLlvmModule(*llvm_module));
 
   // JIT compile the LLVM IR module to in-memory machine code.
   jit->AddModule(std::move(llvm_module));

diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index 0fecbaf..2bf22ec 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc

@@ -963,8 +963,8 @@
   KernelSupportLibrary ksl(b);
 
   return ksl.ForWithStatus(
-      "bdot", /*start=*/0, /*end=*/batch_count, /*step=*/1,
-      [&](llvm::Value* indvar) {
+      llvm_ir::IrName(&dot, "bdot"), /*start=*/0, /*end=*/batch_count,
+      /*step=*/1, [&](llvm::Value* indvar) {
         DotDimensionNumbers adjusted_dim_numbers = dot.dot_dimension_numbers();
         adjusted_dim_numbers.clear_lhs_batch_dimensions();
         adjusted_dim_numbers.clear_rhs_batch_dimensions();

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index efdda85..9967cf2 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc

@@ -77,7 +77,6 @@
 using llvm_ir::AsStringRef;
 using llvm_ir::IrName;
 using llvm_ir::SetToFirstInsertPoint;
-namespace gtl = tensorflow::gtl;
 }  // namespace
 
 namespace cpu {
@@ -517,6 +516,7 @@
     case U8:
     case S16:
     case U16:
+    case BF16:
     case F16:
     case S32:
     case U32:
@@ -577,72 +577,14 @@
     lower_dimensions *= normalized_keys_shape.dimensions(i);
   }
 
-  llvm::FunctionType* less_than_type = llvm::FunctionType::get(
-      b_.getInt1Ty(), {b_.getInt8PtrTy(), b_.getInt8PtrTy()},
-      /*isVarArg=*/false);
-  auto less_than_function = llvm_ir::CreateFunction(
-      less_than_type, llvm::GlobalValue::InternalLinkage,
-      /*enable_fast_math=*/false,
-      /*optimize_for_size=*/true, absl::StrCat(IrName(sort), "_comparator"),
-      module_);
-  // Emit the code for the less_than function.
-  {
-    llvm::IRBuilder<>::InsertPointGuard guard(b_);
-
-    auto* entry_bb =
-        llvm::BasicBlock::Create(b_.getContext(), "entry", less_than_function);
-
-    b_.SetInsertPoint(entry_bb);
-    auto keys_ir_type = llvm_ir::PrimitiveTypeToIrType(keys_type, module_);
-    CHECK_EQ(less_than_function->arg_size(), 2);
-    llvm::Value* keys_lhs_ptr = less_than_function->arg_begin();
-    keys_lhs_ptr = PointerCast(keys_lhs_ptr, keys_ir_type->getPointerTo());
-    llvm::Value* keys_rhs_ptr = less_than_function->arg_begin() + 1;
-    keys_rhs_ptr = PointerCast(keys_rhs_ptr, keys_ir_type->getPointerTo());
-
-    // TODO(b/122298745): Replace the custom compare logic with a call to the
-    // computation specified for the Sort op.
-    llvm::Value* keys_lhs = Load(keys_ir_type, keys_lhs_ptr);
-    llvm::Value* keys_rhs = Load(keys_ir_type, keys_rhs_ptr);
-    bool is_signed_comparison = true;
-    if (primitive_util::IsFloatingPointType(keys_type)) {
-      // We would like a total order of floating point numbers so that the
-      // sort has a predictable behavior in the presence of NaNs. Rather
-      // than using floating point comparison, we use the following trick:
-      // If f is a float, and
-      // x = bit_cast<int32>(f);
-      // y = x < 0 ? 0x7FFFFFFF - x : x;
-      // then y is ordered as an int32 such that finite values have the
-      // obvious order, -0 is ordered before 0, and -NaN and NaN appear at
-      // the beginning and end of the ordering.
-      auto k = b_.getInt(llvm::APInt::getSignedMaxValue(
-          keys_lhs->getType()->getPrimitiveSizeInBits()));
-      auto comparison_type = k->getType();
-      auto zero = llvm::ConstantInt::get(comparison_type, 0);
-      auto maybe_flip = [&](llvm::Value* v) {
-        return b_.CreateSelect(b_.CreateICmp(llvm::ICmpInst::ICMP_SLT, v, zero),
-                               b_.CreateSub(k, v), v);
-      };
-      keys_lhs = b_.CreateBitCast(keys_lhs, comparison_type);
-      keys_rhs = b_.CreateBitCast(keys_rhs, comparison_type);
-      keys_lhs = maybe_flip(keys_lhs);
-      keys_rhs = maybe_flip(keys_rhs);
-    } else if (!primitive_util::IsSignedIntegralType(keys_type)) {
-      is_signed_comparison = false;
-    }
-    llvm::Value* result =
-        b_.CreateICmp(is_signed_comparison ? llvm::ICmpInst::ICMP_SLT
-                                           : llvm::ICmpInst::ICMP_ULT,
-                      keys_lhs, keys_rhs);
-    llvm::ReturnInst::Create(b_.getContext(),
-                             /*retVal=*/result, entry_bb);
-  }
-
+  auto less_than_function = FindOrDie(emitted_functions_, sort->to_apply());
+  CHECK(absl::c_binary_search(thread_local_computations_, sort->to_apply()));
   llvm::FunctionType* key_value_sort_type = llvm::FunctionType::get(
       b_.getVoidTy(),
       {b_.getInt64Ty(), b_.getInt64Ty(), b_.getInt64Ty(),
        b_.getInt8PtrTy()->getPointerTo(), b_.getInt32Ty(),
-       b_.getInt32Ty()->getPointerTo(), less_than_function->getType()},
+       b_.getInt32Ty()->getPointerTo(), b_.getInt1Ty(), b_.getInt8PtrTy(),
+       b_.getInt64Ty()->getPointerTo(), less_than_function->getType()},
       /*isVarArg=*/false);
   auto* key_value_sort_func = llvm::dyn_cast<llvm::Function>(
       module_
@@ -673,7 +615,9 @@
   Call(key_value_sort_func,
        {b_.getInt64(higher_dimensions), b_.getInt64(sort_dimension_elements),
         b_.getInt64(lower_dimensions), values,
-        b_.getInt32(sort->operand_count()), sizes, less_than_function});
+        b_.getInt32(sort->operand_count()), sizes,
+        b_.getInt1(sort->is_stable()), GetExecutableRunOptionsArgument(),
+        GetProfileCountersArgument(), less_than_function});
 
   if (sort->values_count() > 0) {
     llvm_ir::EmitTuple(GetIrArrayFor(sort), destination_addresses, &b_,
@@ -1914,7 +1858,7 @@
 }
 
 Status IrEmitter::HandleReduce(HloInstruction* reduce) {
-  // TODO(b/112040122): Support variadic reduce.
+  // TODO(b/118333695): Support variadic reduce.
   if (!reduce->shape().IsArray()) {
     return Unimplemented("Variadic reduce is not supported on CPU");
   }

diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
index a0667d0..70a6d0a 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc

@@ -32,8 +32,9 @@
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort(
     int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes,
-    bool (*less_than)(char*, char*)) {
+    int32* values_primitive_type_size_in_bytes, bool is_stable,
+    char* run_options, int64* prof_counters,
+    void (*less_than)(char*, char*, char**, char**, tensorflow::int64*)) {
   // 'values' and 'values_primitive_type_size_in_bytes' are managed by the JIT
   // code, so msan can't tell they are initialized.
   TF_ANNOTATE_MEMORY_IS_INITIALIZED(values, values_count * sizeof(char*));
@@ -54,6 +55,7 @@
   int64 sort_dimension_offset = c;
 
   std::unique_ptr<int64[]> indices(new int64[sort_dimension_elements]);
+  std::unique_ptr<char*[]> comparison_values(new char*[2 * values_count]);
   std::iota(indices.get(), indices.get() + sort_dimension_elements, 0);
   std::unique_ptr<std::string[]> reordered_values(
       new std::string[sort_dimension_elements]);
@@ -67,16 +69,27 @@
     int64 base_offset =
         index % sort_dimension_offset +
         (index - index % sort_dimension_offset) * sort_dimension_elements;
-    std::stable_sort(
-        indices.get(), indices.get() + sort_dimension_elements,
-        [&](int64 a, int64 b) {
-          int64 memory_index_lhs = (base_offset + a * sort_dimension_offset) *
-                                   values_primitive_type_size_in_bytes[0];
-          int64 memory_index_rhs = (base_offset + b * sort_dimension_offset) *
-                                   values_primitive_type_size_in_bytes[0];
-          return less_than(values[0] + memory_index_lhs,
-                           values[0] + memory_index_rhs);
-        });
+    auto compare_function = [&](int64 a, int64 b) -> bool {
+      int64 memory_index_lhs = (base_offset + a * sort_dimension_offset) *
+                               values_primitive_type_size_in_bytes[0];
+      int64 memory_index_rhs = (base_offset + b * sort_dimension_offset) *
+                               values_primitive_type_size_in_bytes[0];
+      for (int32 i = 0; i < values_count; ++i) {
+        comparison_values[i * 2] = values[i] + memory_index_lhs;
+        comparison_values[i * 2 + 1] = values[i] + memory_index_rhs;
+      }
+      char result = 0;  // Overwritten by less_than.
+      less_than(&result, run_options, comparison_values.get(), nullptr,
+                prof_counters);
+      return result != 0u;
+    };
+    if (is_stable) {
+      std::stable_sort(indices.get(), indices.get() + sort_dimension_elements,
+                       compare_function);
+    } else {
+      std::sort(indices.get(), indices.get() + sort_dimension_elements,
+                compare_function);
+    }
 
     // Reorder the values according to the order defined by 'indices'.
     for (int32 idx = 0; idx < values_count; ++idx) {

diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
index 5460af3..50c2911 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
+++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h

@@ -22,18 +22,25 @@
 extern "C" {
 
 // Each entry in 'values' represents a 3-dimensional shape with dimensions
-// [a, b, c]. The 'b' dimension of the first shape is sorted into ascending
-// order according to the results of comparisons using the provided 'less_than'
+// [a, b, c]. The 'b' dimension of each shape is sorted into ascending order
+// according to the results of comparisons using the provided 'less_than'
 // function. 'values_count' must be > 0 and specifies the number of entries in
 // 'values' and 'values_primitive_type_size_in_bytes'. The size of the primitive
 // type of the i-th shape has exactly 'values_primitive_type_size_in_bytes[i]'
-// bytes. The elements in each 'values' shape are reordered in the same way
-// according to the comparisons using the first shape.
+// bytes. 'is_stable' specifies whether the sorting should be stable.
+// 'run_options' and 'prof_counters' are passed through to the less-than
+// function, which expects the following arguments:
+// - pointer to the return value buffer (char*)
+// - xla::ExecutableRunOptions = 'run_options' (char*)
+// - pointers to the parameter buffers (char**)
+// - pointers to the buffer tables = nullptr for thread local functions (char**)
+// - profile counters = 'prof_counters' (int64*)
 extern void __xla_cpu_runtime_KeyValueSort(
     tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c,
     char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes,
-    bool (*less_than)(char*, char*));
+    tensorflow::int32* values_primitive_type_size_in_bytes, bool is_stable,
+    char* run_options, tensorflow::int64* prof_counters,
+    void (*less_than)(char*, char*, char**, char**, tensorflow::int64*));
 }
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_KEY_VALUE_SORT_H_

diff --git a/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc b/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc
index eb6c44b..9fc472f 100644
--- a/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc

@@ -938,6 +938,53 @@
   });
 }
 
+llvm::Type* GetPointerToElementType(llvm::Type* pointer_type) {
+  llvm::Type* type =
+      llvm::cast<llvm::PointerType>(pointer_type)->getElementType();
+  while (auto* array_type = llvm::dyn_cast<llvm::ArrayType>(type)) {
+    type = array_type->getElementType();
+  }
+
+  return type->getPointerTo();
+}
+
+struct GemvBuffersWithCanonicalType {
+  llvm::Value* lhs_canonicalized;
+  llvm::Value* rhs_canonicalized;
+  llvm::Value* addend_canonicalized;
+  llvm::Value* result_canonicalized;
+};
+
+GemvBuffersWithCanonicalType GetGemvBuffersWithCanonicalType(
+    llvm::Value* lhs, llvm::Value* rhs, llvm::Value* addend,
+    llvm::Value* result, llvm::IRBuilder<>* b) {
+  // We characterize a GEMV operation via M and K, since N is implicitly 1.
+  // This means the GEMV that multiplies (say) [5,6] with [6,1] is implemented
+  // by the same GEMV that multiplies [5,6] with [1,6].  However, the
+  // `llvm::Types` for the inputs to the two GEMVs don't match (in a trivial
+  // sense -- the in memory representations are the same) since they're computed
+  // from the `xla::Shape`s.  Since we want to be able to call the same
+  // `llvm::Function` for the two GEMVs we canonicalize the types of the GEMV
+  // inputs here into the same type.
+  GemvBuffersWithCanonicalType buffers_with_canonical_type;
+  llvm::Type* lhs_type = lhs->getType();
+  llvm::Type* rhs_type = rhs->getType();
+  llvm::Type* addend_type = addend ? addend->getType() : nullptr;
+  llvm::Type* result_type = result->getType();
+
+  buffers_with_canonical_type.lhs_canonicalized =
+      b->CreateBitCast(lhs, GetPointerToElementType(lhs_type));
+  buffers_with_canonical_type.rhs_canonicalized =
+      b->CreateBitCast(rhs, GetPointerToElementType(rhs_type));
+  buffers_with_canonical_type.addend_canonicalized =
+      addend ? b->CreateBitCast(addend, GetPointerToElementType(addend_type))
+             : nullptr;
+  buffers_with_canonical_type.result_canonicalized =
+      b->CreateBitCast(result, GetPointerToElementType(result_type));
+
+  return buffers_with_canonical_type;
+}
+
 }  // namespace
 
 void EmitRowMajorGemv(PrimitiveType scalar_type, int64 tile_rows,
@@ -950,12 +997,18 @@
       /*tile_rows=*/tile_rows, /*tile_cols=*/tile_cols,
       /*m=*/m, /*k=*/k, /*has_addend=*/addend != nullptr);
 
+  GemvBuffersWithCanonicalType canonical_inputs =
+      GetGemvBuffersWithCanonicalType(lhs, rhs, addend, result, b);
+
   KernelSupportLibrary::EmitAndCallOutlinedKernel(
       /*enable_fast_math=*/enable_fast_math,
-      /*optimize_for_size=*/optimize_for_size, b, config.GetCacheKey(), lhs,
-      rhs, addend, result,
-      [&](llvm::Value* lhs, llvm::Value* rhs, llvm::Value* addend,
-          llvm::Value* result) {
+      /*optimize_for_size=*/optimize_for_size, b, config.GetCacheKey(),
+      canonical_inputs.lhs_canonicalized, canonical_inputs.rhs_canonicalized,
+      canonical_inputs.addend_canonicalized,
+      canonical_inputs.result_canonicalized,
+      [&config, b, &canonical_inputs](llvm::Value* lhs, llvm::Value* rhs,
+                                      llvm::Value* addend,
+                                      llvm::Value* result) {
         RowMajorMatrixVectorProductEmitter emitter(config, lhs, rhs, addend,
                                                    result, b);
         emitter.Emit();
@@ -972,12 +1025,18 @@
       /*tile_rows=*/tile_rows, /*tile_cols=*/tile_cols,
       /*m=*/m, /*k=*/k, /*has_addend=*/addend != nullptr);
 
+  GemvBuffersWithCanonicalType canonical_inputs =
+      GetGemvBuffersWithCanonicalType(lhs, rhs, addend, result, b);
+
   KernelSupportLibrary::EmitAndCallOutlinedKernel(
       /*enable_fast_math=*/enable_fast_math,
-      /*optimize_for_size=*/optimize_for_size, b, config.GetCacheKey(), lhs,
-      rhs, addend, result,
-      [&](llvm::Value* lhs, llvm::Value* rhs, llvm::Value* addend,
-          llvm::Value* result) {
+      /*optimize_for_size=*/optimize_for_size, b, config.GetCacheKey(),
+      canonical_inputs.lhs_canonicalized, canonical_inputs.rhs_canonicalized,
+      canonical_inputs.addend_canonicalized,
+      canonical_inputs.result_canonicalized,
+      [&config, b, &canonical_inputs](llvm::Value* lhs, llvm::Value* rhs,
+                                      llvm::Value* addend,
+                                      llvm::Value* result) {
         ColumnMajorMatrixVectorProductEmitter emitter(config, lhs, rhs, addend,
                                                       result, b);
         emitter.Emit();

diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index c54e954..af03977 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h

@@ -105,6 +105,7 @@
   }
   virtual Status HandleConvolution(HloInstructionPtr hlo) = 0;
   virtual Status HandleFft(HloInstructionPtr fft) = 0;
+  virtual Status HandleTriangularSolve(HloInstructionPtr hlo) = 0;
   virtual Status HandleAllReduce(HloInstructionPtr hlo) = 0;
   virtual Status HandleAllToAll(HloInstructionPtr hlo) = 0;
   virtual Status HandleCollectivePermute(HloInstructionPtr hlo) = 0;

diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
index 33b2cc3..341bb37 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h

@@ -91,6 +91,9 @@
   Status HandleFft(HloInstructionPtr fft) override {
     return DefaultAction(fft);
   }
+  Status HandleTriangularSolve(HloInstructionPtr hlo) override {
+    return DefaultAction(hlo);
+  }
   Status HandleAllReduce(HloInstructionPtr crs) override {
     return DefaultAction(crs);
   }

diff --git a/tensorflow/compiler/xla/service/dot_decomposer.cc b/tensorflow/compiler/xla/service/dot_decomposer.cc
index e8bc6d0..559b9c1 100644
--- a/tensorflow/compiler/xla/service/dot_decomposer.cc
+++ b/tensorflow/compiler/xla/service/dot_decomposer.cc

@@ -297,7 +297,7 @@
       const DotDimensionNumbers& dnums = instruction->dot_dimension_numbers();
       // A dot it not canonical if there are more than one contracting
       // dimension.
-      if (dnums.lhs_contracting_dimensions_size() > 1) {
+      if (dnums.lhs_contracting_dimensions_size() != 1) {
         non_canonical_dots.push_back(instruction);
         continue;
       }

diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index e868dc6..808929b 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc

@@ -1367,26 +1367,69 @@
       llvm_ir::PrimitiveTypeToIrType(elem_prim_ty, module_);
   llvm::Type* raw_value_ty = raw_value->getType();
 
-  // Convert raw integer to float in range [0, 1) if the element is a float.
+  // If we're generating a floating-point value, convert the raw integer R (i.e.
+  // `raw_value`) to a float in the range [0, 1).
+  //
+  // The basic approach is to choose a significand and exponent such that the
+  // significand is uniformly distributed and the exponent is distributed, well,
+  // exponentially (it's more likely to be close to 0 than far from 0).
+  //
+  // An easy way to do this is to say that the significand is the first S bits
+  // of R, and the exponent is determined by the number of trailing zeroes in R,
+  // exp = 2^-(cttz(R) + 1).  (+1 because the largest exponent should be -1;
+  // this way the largest value we can return is 1.999... * 2^-1 = 1-ε.)
+  //
+  // This results in a small bias.  Namely, if R has enough trailing zeroes, the
+  // significand and exponent will "overlap".  As a concrete example, consider
+  //
+  //         20 X's                 12 zeroes
+  //   R = 0bXXXXXXXXXXXXXXXXXXXX000000000000
+  //
+  // Here the exponent is 2^-13 because R has 12 trailing zeroes.  The
+  // significand is made up of the first 23 most-significant bits of R, which we
+  // observe contain 3 zeroes.  This is biased because any random value with
+  // exponent 2^-12 will have a significand which ends in `000`.
+  //
+  // For f32s, this problem occurs only when there are more than 32-23 = 9
+  // trailing zeros, which happens with probability 0.5^10 = ~0.1%. Moreover the
+  // probability of a large bias (i.e. many trailing 0s in the significand) is
+  // exponentially low.  So we deem this acceptable.
   llvm::Value* elem_value = raw_value;
   if (elem_ir_ty->isFloatingPointTy()) {
-    unsigned raw_value_size_in_bits = raw_value_ty->getPrimitiveSizeInBits();
-    CHECK(raw_value_size_in_bits == 32 || raw_value_size_in_bits == 64);
-    // Perform the division using the float type with the same number of bits
-    // as the raw value to avoid overflow.
-    if (raw_value_size_in_bits == 32) {
-      elem_value = UIToFP(elem_value, b_->getFloatTy());
-      elem_value = FDiv(elem_value,
-                        llvm::ConstantFP::get(b_->getFloatTy(), std::exp2(32)));
-    } else {
-      elem_value = UIToFP(elem_value, b_->getDoubleTy());
-      elem_value = FDiv(
-          elem_value, llvm::ConstantFP::get(b_->getDoubleTy(), std::exp2(64)));
-    }
+    const auto& dest_flt_semantics = elem_ir_ty->getFltSemantics();
+    const int bits = raw_value_ty->getPrimitiveSizeInBits();
+    CHECK_GE(bits, llvm::APFloat::semanticsSizeInBits(dest_flt_semantics));
 
-    if (elem_ir_ty != elem_value->getType()) {
-      elem_value = FPTrunc(elem_value, elem_ir_ty);
-    }
+    // Subtract 1 because semanticsPrecision includes the "hidden bit", i.e. the
+    // implicit "1." at the beginning of the significand.
+    const int significand_bits =
+        llvm::APFloat::semanticsPrecision(dest_flt_semantics) - 1;
+
+    llvm::Value* cttz = llvm_ir::EmitCallToIntrinsic(
+        llvm::Intrinsic::cttz, {raw_value, /*is_zero_undef=*/b_->getFalse()},
+        {raw_value->getType()}, b_);
+    llvm::Value* significand = LShr(raw_value, bits - significand_bits);
+
+    // Exponent bias is -127 for f32, meaning that if the exponent is E and the
+    // significand is S, then the value of the number is 2^(E - 127) * (1.S).
+    //
+    // We want cttz == 0 to correspond to 2^-1, so our exponent is computed as
+    // E = 126 - cttz.
+    //
+    // For f64, this is all the same, except the bias is -1023.
+    //
+    // In IEEE floating point, the absolute value of the exponent bias equals
+    // the value of the largest possible exponent.
+    const int bias = -llvm::APFloat::semanticsMaxExponent(dest_flt_semantics);
+    llvm::Value* exponent =
+        Sub(llvm::ConstantInt::get(cttz->getType(), -bias - 1), cttz);
+
+    // Now just slot everything into place!  The `Trunc` is here because
+    // raw_value may be larger than our float destination.
+    elem_value =
+        BitCast(Trunc(Or(Shl(exponent, significand_bits), significand),
+                      b_->getIntNTy(elem_ir_ty->getPrimitiveSizeInBits())),
+                elem_ir_ty);
   }
 
   // Convert the value for the requested distribution.

diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/elemental_ir_emitter.h
index d3e2aca..7d360fe 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.h

@@ -216,8 +216,11 @@
   llvm_ir::ElementGenerator MakePhiloxRngElementGenerator(
       const HloInstruction* hlo,
       const HloToElementGeneratorMap& operand_to_generator);
+
   // Converts the raw value generated by a random number generation algorithm
   // to the distribution requested by the RNG HloInstruction.
+  //
+  // Precondition: raw_value has at least as many bits as hlo's element type.
   StatusOr<llvm::Value*> ConvertValueForDistribution(
       const HloInstruction* hlo,
       const ElementalIrEmitter::HloToElementGeneratorMap& operand_to_generator,

diff --git a/tensorflow/compiler/xla/service/gather_expander.cc b/tensorflow/compiler/xla/service/gather_expander.cc
index 590942c..a58ac39 100644
--- a/tensorflow/compiler/xla/service/gather_expander.cc
+++ b/tensorflow/compiler/xla/service/gather_expander.cc

@@ -296,7 +296,7 @@
 // [3,1] out of operand into an accumulator of shape [4,3,1].  We then
 // reshape this result to [2,2,3] and finally transpose it to [2,3,2].
 
-StatusOr<HloInstruction*> GatherExpander::ExpandGather(
+StatusOr<HloInstruction*> GatherExpander::ExpandInstruction(
     HloInstruction* gather_instr) {
   CHECK(!ShapeUtil::IsZeroElementArray(gather_instr->shape()));
 
@@ -361,25 +361,11 @@
                                    output_rank);
 }
 
-StatusOr<bool> GatherExpander::Run(HloModule* module) {
-  auto is_nontrivial_gather = [](HloInstruction* inst) {
-    return inst->opcode() == HloOpcode::kGather &&
-           // Avoid expanding gather ops that produce zero sized tensors,
-           // instead punt these to ZeroSizedHloElimination.
-           !ShapeUtil::IsZeroElementArray(inst->shape());
-  };
-
-  std::vector<HloInstruction*> gather_instrs;
-  for (HloComputation* computation : module->MakeNonfusionComputations()) {
-    absl::c_copy_if(computation->instructions(),
-                    std::back_inserter(gather_instrs), is_nontrivial_gather);
-  }
-
-  for (HloInstruction* inst : gather_instrs) {
-    TF_ASSIGN_OR_RETURN(HloInstruction * expanded_root, ExpandGather(inst));
-    TF_RETURN_IF_ERROR(inst->parent()->ReplaceInstruction(inst, expanded_root));
-  }
-
-  return !gather_instrs.empty();
+bool GatherExpander::InstructionMatchesPattern(HloInstruction* inst) {
+  return inst->opcode() == HloOpcode::kGather &&
+         // Avoid expanding gather ops that produce zero sized tensors,
+         // instead punt these to ZeroSizedHloElimination.
+         !ShapeUtil::IsZeroElementArray(inst->shape());
 }
+
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/service/gather_expander.h b/tensorflow/compiler/xla/service/gather_expander.h
index 8af9c6b..5625a37 100644
--- a/tensorflow/compiler/xla/service/gather_expander.h
+++ b/tensorflow/compiler/xla/service/gather_expander.h

@@ -16,20 +16,22 @@
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GATHER_EXPANDER_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GATHER_EXPANDER_H_
 
-#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+#include "tensorflow/compiler/xla/service/op_expander_pass.h"
 
 namespace xla {
 
 // This pass rewrites gather operations into (roughly) while loops of dynamic
 // slices.  This lets backends that don't support gather directly to
 // nevertheless have a minimum level of support.
-class GatherExpander : public HloModulePass {
+class GatherExpander : public OpExpanderPass {
  public:
   absl::string_view name() const override { return "gather_expander"; }
-  StatusOr<bool> Run(HloModule* module) override;
 
  protected:
-  StatusOr<HloInstruction*> ExpandGather(HloInstruction* gather_instr);
+  bool InstructionMatchesPattern(HloInstruction* instruction) override;
+
+  StatusOr<HloInstruction*> ExpandInstruction(
+      HloInstruction* gather_inst) override;
 };
 
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 85fb2dd..25c4f70 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD

@@ -305,6 +305,7 @@
         "sequential_thunk.cc",
         "thunk.cc",
         "thunk_schedule.cc",
+        "triangular_solve_thunk.cc",
         "tuple_thunk.cc",
         "while_thunk.cc",
     ],
@@ -324,6 +325,7 @@
         "sequential_thunk.h",
         "thunk.h",
         "thunk_schedule.h",
+        "triangular_solve_thunk.h",
         "tuple_thunk.h",
         "while_thunk.h",
     ],
@@ -364,6 +366,8 @@
         "//tensorflow/core/platform/default/build_config:cufft_plugin",
         "//tensorflow/core/platform/default/build_config:stream_executor_cuda",  # build_cleaner: keep
         "//tensorflow/stream_executor",
+        "//tensorflow/stream_executor:blas",
+        "//tensorflow/stream_executor:device_memory",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
@@ -396,18 +400,21 @@
     srcs = ["cudnn_conv_algorithm_picker.cc"],
     hdrs = ["cudnn_conv_algorithm_picker.h"],
     deps = [
+        ":autotuning_proto",
         ":backend_configs",
         ":buffer_comparator",
         ":cudnn_conv_runner",
         ":gpu_executable",
         ":ir_emission_utils",
         "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:protobuf_util",
         "//tensorflow/compiler/xla/service:compiler",
         "//tensorflow/compiler/xla/service:device_memory_allocator",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_casting_utils",
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/core:lib",
+        "//tensorflow/core:logger",
         "//tensorflow/core:stream_executor_no_cuda",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -758,6 +765,7 @@
         "//tensorflow/compiler/xla/service:reduce_precision_insertion",
         "//tensorflow/compiler/xla/service:reshape_mover",
         "//tensorflow/compiler/xla/service:sort_simplifier",
+        "//tensorflow/compiler/xla/service:stable_sort_expander",
         "//tensorflow/compiler/xla/service:transpose_folding",
         "//tensorflow/compiler/xla/service:tuple_simplifier",
         "//tensorflow/compiler/xla/service:while_loop_constant_sinking",
@@ -1081,3 +1089,12 @@
         "//tensorflow/compiler/xla/tests:hlo_test_base",
     ],
 )
+
+xla_proto_library(
+    name = "autotuning_proto",
+    srcs = ["autotuning.proto"],
+    deps = [
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service:hlo_proto",
+    ],
+)

diff --git a/tensorflow/compiler/xla/service/gpu/autotuning.proto b/tensorflow/compiler/xla/service/gpu/autotuning.proto
new file mode 100644
index 0000000..b4a0896
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/autotuning.proto

@@ -0,0 +1,81 @@
+// This file defines protos that store the results of autotuning XLA:GPU
+// operations.
+//
+// They are in proto format because we want to log them structured. They offer
+// tremendous statistical, testing, and debugging value.
+syntax = "proto3";
+
+package xla.gpu;
+
+import "google/protobuf/duration.proto";
+import "tensorflow/compiler/xla/xla_data.proto";
+import "tensorflow/compiler/xla/service/hlo.proto";
+
+message CudnnVersion {
+  int32 major = 1;
+  int32 minor = 2;
+  int32 patch = 3;
+}
+
+message ComputeCapability {
+  int32 major = 1;
+  int32 minor = 2;
+}
+
+message AutotuneResult {
+  message SuccessResult {
+    int64 scratch_bytes = 1;
+    google.protobuf.Duration run_time = 2;
+  }
+
+  message ConvKey {
+    int64 algorithm = 1;
+    bool tensor_ops_enabled = 2;
+  }
+
+  // If the conv runs successfully, success will be populated with the
+  // autotuning result. Otherwise, the error message is propagated.
+  oneof result {
+    SuccessResult success = 3;
+    string error_string = 4;
+  }
+
+  oneof key {
+    ConvKey conv = 5;
+  }
+
+  // Sometimes we run a correctness checker during autotuning. It compares the
+  // result buffer content between two algorithms, say, "reference" and "test"
+  // algorithms. The "test" algorithm is the one associated with this
+  // AutotuneResult.
+  //
+  // This field records the reference algorithm used. Notice that naming it
+  // "reference" doesn't mean it's always correct. However, empirically it's
+  // more correct, as it's "algo 0", less fancy than the compared one.
+  //
+  // Notice that the checker_failure may exist even in the success case.
+  // This is because the error string in `result` comes from the underlying
+  // implementation like cuDNN, which isn't aware that it produced an incorrect
+  // result. And even if the checker detects an incorrect result, we can still
+  // retrieve scratch_bytes and runtime_ms.
+  oneof checker_failure {
+    ConvKey reference_conv = 6;
+  }
+}
+
+message AutotuneLog {
+  message Instruction {
+    xla.HloInstructionProto instruction = 1;
+    repeated xla.ShapeProto operand_shapes = 2;
+  }
+
+  oneof instr_oneof {
+    Instruction instr = 1;
+  }
+
+  // Records all auto-tuning results per algorithm.
+  repeated AutotuneResult results = 3;
+
+  CudnnVersion cudnn_version = 4;
+  ComputeCapability compute_capability = 5;
+}

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc
index 309b0ac..603af5a 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc

@@ -16,14 +16,17 @@
 #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
+#include "absl/time/time.h"
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/protobuf_util.h"
 #include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"
 #include "tensorflow/compiler/xla/service/gpu/buffer_comparator.h"
 #include "tensorflow/compiler/xla/service/gpu/convolution_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/platform/logger.h"
 #include "tensorflow/core/platform/mutex.h"
 
 namespace xla {
@@ -32,7 +35,6 @@
 
 using absl::optional;
 using se::DeviceMemoryBase;
-using se::dnn::AlgorithmConfig;
 using se::dnn::AlgorithmDesc;
 
 class ScratchAllocator : public se::ScratchAllocator {
@@ -132,6 +134,31 @@
   return tensorflow::mutex_lock{it->second};
 }
 
+xla::gpu::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
+  xla::gpu::CudnnVersion cudnn_version;
+  if (auto* dnn = stream_executor->AsDnn()) {
+    StatusOr<se::dnn::VersionInfo> version_or = dnn->GetVersion();
+    if (version_or.ok()) {
+      const auto& version = version_or.ValueOrDie();
+      cudnn_version.set_major(version.major_version());
+      cudnn_version.set_minor(version.minor_version());
+      cudnn_version.set_patch(version.patch());
+    }
+  }
+  return cudnn_version;
+}
+
+xla::gpu::ComputeCapability GetComputeCapability(
+    se::StreamExecutor* stream_executor) {
+  xla::gpu::ComputeCapability cc;
+  int cc_major, cc_minor;
+  stream_executor->GetDeviceDescription().cuda_compute_capability(&cc_major,
+                                                                  &cc_minor);
+  cc.set_major(cc_major);
+  cc.set_minor(cc_minor);
+  return cc;
+}
+
 }  // anonymous namespace
 
 // We could have caching here so that we don't redo this work for two identical
@@ -145,8 +172,7 @@
 // cache misses and doing extra work.  Overall, caching doesn't seem worth the
 // trouble, but we may want to revisit this if we ever find a model where
 // caching would speed up compilation a lot.
-StatusOr<CudnnConvAlgorithmPicker::AutotuneResult>
-CudnnConvAlgorithmPicker::PickBestAlgorithm(
+StatusOr<AutotuneResult> CudnnConvAlgorithmPicker::PickBestAlgorithm(
     const HloCustomCallInstruction* instr) {
   // TODO(timshen): for now only check fp16. It can be expanded to other types,
   // with some work on the HLO routines.
@@ -233,8 +259,6 @@
           &stream, ShapeUtil::ByteSizeOf(instr->shape().tuple_shapes(0))));
   initialize_buffer(result_buffer);
 
-  se::dnn::ProfileResult best_result;
-  int64 best_result_bytes_used = 0;
   TF_ASSIGN_OR_RETURN(auto backend_config,
                       instr->backend_config<CudnnConvBackendConfig>());
 
@@ -244,6 +268,7 @@
   // this algorithm considered correct, though.
   optional<AlgorithmDesc> first_algorithm;
   TF_ASSIGN_OR_RETURN(CudnnConvKind kind, GetCudnnConvKind(instr));
+  std::vector<AutotuneResult> profile_results;
   for (const AlgorithmDesc& alg : GetAlgorithms(kind, stream_exec_)) {
     ScratchAllocator scratch_allocator(device_ordinal, allocator);
     se::dnn::ProfileResult profile_result;
@@ -254,73 +279,108 @@
     RunConvOptions options;
     options.profile_result = &profile_result;
     options.algo_override = alg;
-    bool launch_ok =
+    Status launch_status =
         RunCudnnConv(instr, absl::MakeSpan(operand_buffers), result_buffer,
-                     &scratch_allocator, &stream, options)
-            .ok();
+                     &scratch_allocator, &stream, options);
 
-    if (launch_ok && profile_result.is_valid()) {
-      const bool crash_on_checking_failure =
-          instr->GetModule()
-              ->config()
-              .debug_options()
-              .xla_gpu_crash_on_verification_failures();
-      if (comparator.has_value()) {
-        StatusOr<bool> result = comparator->CompareEqual(
-            se::DeviceMemory<Eigen::half>(result_buffer));
-        if (!result.ok()) {
-          LOG(ERROR) << "Unable to compare "
-                     << AlgorithmToString(*first_algorithm) << " against "
-                     << AlgorithmToString(alg) << " for " << instr->ToString()
-                     << ": " << result.status();
-          CHECK(!crash_on_checking_failure);
-        } else if (!result.ValueOrDie()) {
-          LOG(ERROR) << "Results mismatch between different convolution "
-                        "algorithms. This is likely a bug in convolution, or "
-                        "an excessive loss of precision in convolution. "
-                     << instr->ToString() << " for "
-                     << AlgorithmToString(*first_algorithm) << " vs "
-                     << AlgorithmToString(alg);
-          CHECK(!crash_on_checking_failure);
-        }
-      } else if (cross_check_enabled) {
-        auto comp = F16BufferComparator::Create(
-            se::DeviceMemory<Eigen::half>(result_buffer), compiler_, allocator,
-            &stream);
-        if (comp.ok()) {
-          comparator.emplace(comp.ConsumeValueOrDie());
-          first_algorithm.emplace(alg);
-        } else {
-          LOG(ERROR) << "Fail to initialize buffer comparator: "
-                     << comp.status() << ", instruction: " << instr->ToString();
-          CHECK(!crash_on_checking_failure);
-        }
+    profile_results.emplace_back();
+    AutotuneResult& result = profile_results.back();
+    result.mutable_conv()->set_algorithm(alg.algo_id());
+    result.mutable_conv()->set_tensor_ops_enabled(alg.tensor_ops_enabled());
+
+    if (!launch_status.ok()) {
+      result.set_error_string(launch_status.error_message());
+      continue;
+    }
+
+    if (!profile_result.is_valid()) {
+      result.set_error_string("Invalid profile result");
+      continue;
+    }
+
+    int64 scratch_bytes_used = scratch_allocator.TotalAllocatedBytes();
+    result.mutable_success()->set_scratch_bytes(scratch_bytes_used);
+    *result.mutable_success()->mutable_run_time() =
+        protobuf_util::ToDurationProto(
+            absl::Milliseconds(profile_result.elapsed_time_in_ms()));
+
+    const bool crash_on_checking_failure =
+        instr->GetModule()
+            ->config()
+            .debug_options()
+            .xla_gpu_crash_on_verification_failures();
+
+    if (comparator.has_value()) {
+      StatusOr<bool> compare_result = comparator->CompareEqual(
+          se::DeviceMemory<Eigen::half>(result_buffer));
+      if (!compare_result.ok()) {
+        LOG(ERROR) << "Unable to compare "
+                   << AlgorithmToString(*first_algorithm) << " against "
+                   << AlgorithmToString(alg) << " for " << instr->ToString()
+                   << ": " << compare_result.status();
+        CHECK(!crash_on_checking_failure);
+      } else if (!compare_result.ValueOrDie()) {
+        LOG(ERROR) << "Results mismatch between different convolution "
+                      "algorithms. This is likely a bug in convolution, or "
+                      "an excessive loss of precision in convolution. "
+                   << instr->ToString() << " for "
+                   << AlgorithmToString(*first_algorithm) << " vs "
+                   << AlgorithmToString(alg);
+        CHECK(!crash_on_checking_failure);
+        auto* failure = result.mutable_reference_conv();
+        failure->set_algorithm(first_algorithm->algo_id());
+        failure->set_tensor_ops_enabled(first_algorithm->tensor_ops_enabled());
       }
-      int64 scratch_bytes_used = scratch_allocator.TotalAllocatedBytes();
-      VLOG(3) << "Run of algorithm " << AlgorithmToString(alg)
-              << " succeeded, taking " << profile_result.elapsed_time_in_ms()
-              << "ms and using " << NumBytesToString(scratch_bytes_used)
-              << " of scratch (Best result: "
-              << best_result.elapsed_time_in_ms() << "ms, "
-              << NumBytesToString(best_result_bytes_used) << " of scratch)";
-      if (profile_result.elapsed_time_in_ms() <
-          best_result.elapsed_time_in_ms()) {
-        best_result = profile_result;
-        best_result_bytes_used = scratch_bytes_used;
+    } else if (cross_check_enabled) {
+      auto comp = F16BufferComparator::Create(
+          se::DeviceMemory<Eigen::half>(result_buffer), compiler_, allocator,
+          &stream);
+      if (comp.ok()) {
+        comparator.emplace(comp.ConsumeValueOrDie());
+        first_algorithm.emplace(alg);
+      } else {
+        LOG(ERROR) << "Fail to initialize buffer comparator: " << comp.status()
+                   << ", instruction: " << instr->ToString();
+        CHECK(!crash_on_checking_failure);
       }
-    } else {
-      VLOG(3) << "Run of algorithm " << AlgorithmToString(alg) << " failed.";
     }
   }
-  if (best_result.is_valid()) {
-    VLOG(2) << "Best algorithm for " << instr->ToString() << ": "
-            << AlgorithmToString(best_result.algorithm()) << ", takes "
-            << best_result.elapsed_time_in_ms() << "ms, and uses "
-            << best_result_bytes_used << "B of scratch memory.";
-    return AutotuneResult{best_result.algorithm().algo_id(),
-                          best_result.algorithm().tensor_ops_enabled(),
-                          best_result_bytes_used,
-                          absl::Milliseconds(best_result.elapsed_time_in_ms())};
+
+  // Log the autotuning result.
+  {
+    AutotuneLog log;
+    *log.mutable_instr()->mutable_instruction() = instr->ToProto();
+    for (const auto* op : instr->operands()) {
+      *log.mutable_instr()->add_operand_shapes() = op->shape().ToProto();
+    }
+    for (const auto& profile : profile_results) {
+      *log.add_results() = profile;
+    }
+    *log.mutable_compute_capability() = GetComputeCapability(stream_exec_);
+    *log.mutable_cudnn_version() = GetCudnnVersion(stream_exec_);
+    VLOG(2) << "Autotuning result:\n" << log.DebugString();
+    tensorflow::Logger::Singleton()->LogProto(log);
+  }
+
+  auto* profile_results_end = profile_results.data() + profile_results.size();
+
+  const AutotuneResult* best_result = std::min_element(
+      profile_results.data(), profile_results_end,
+      [](const AutotuneResult& lhs, const AutotuneResult& rhs) {
+        // The successful one should have a smaller key, since we are doing
+        // min_element. If they are both unsuccessful, keep the earlier one in
+        // the vector by comparing pointers.
+        return std::make_tuple(
+                   !lhs.has_success(),
+                   protobuf_util::FromDurationProto(lhs.success().run_time()),
+                   &lhs) < std::make_tuple(!rhs.has_success(),
+                                           protobuf_util::FromDurationProto(
+                                               rhs.success().run_time()),
+                                           &rhs);
+      });
+
+  if (best_result != profile_results_end && best_result->has_success()) {
+    return *best_result;
   }
 
   return InternalError(
@@ -341,22 +401,23 @@
   }
 
   auto best_algo = std::move(best_algo_or).ValueOrDie();
-  VLOG(1) << "Setting cudnn conv to use algorithm " << best_algo.algorithm
-          << " and " << NumBytesToString(best_algo.scratch_bytes)
+  VLOG(1) << "Setting cudnn conv to use algorithm "
+          << best_algo.conv().algorithm() << " and "
+          << NumBytesToString(best_algo.success().scratch_bytes())
           << " of scratch memory: " << instr->ToString()
-          << " tensor_ops_enabled: " << best_algo.tensor_ops_enabled;
+          << " tensor_ops_enabled: " << best_algo.conv().tensor_ops_enabled();
 
   // Replace instr with a new CustomCall which has the correct algorithm, and
   // whose output shape has the appropriate amount of scratch memory.
   HloComputation* computation = instr->parent();
   Shape new_call_shape = ShapeUtil::MakeTupleShape(
       {instr->shape().tuple_shapes(0),
-       ShapeUtil::MakeShape(U8, {best_algo.scratch_bytes})});
+       ShapeUtil::MakeShape(U8, {best_algo.success().scratch_bytes()})});
 
   TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config,
                       instr->backend_config<CudnnConvBackendConfig>());
-  backend_config.set_algorithm(best_algo.algorithm);
-  backend_config.set_tensor_ops_enabled(best_algo.tensor_ops_enabled);
+  backend_config.set_algorithm(best_algo.conv().algorithm());
+  backend_config.set_tensor_ops_enabled(best_algo.conv().tensor_ops_enabled());
 
   HloInstruction* new_call = computation->AddInstruction(
       instr->CloneWithNewOperands(new_call_shape, instr->operands()));

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h
index 4991db0..2e34ba9 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h

@@ -20,6 +20,7 @@
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/compiler.h"
 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
+#include "tensorflow/compiler/xla/service/gpu/autotuning.pb.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
@@ -47,13 +48,6 @@
   StatusOr<bool> Run(HloModule* module) override;
 
  private:
-  struct AutotuneResult {
-    int64 algorithm;
-    bool tensor_ops_enabled;
-    int64 scratch_bytes;
-    absl::Duration runtime;
-  };
-
   StatusOr<bool> RunOnComputation(HloComputation* computation);
   StatusOr<bool> RunOnInstruction(HloInstruction* instr);
   StatusOr<AutotuneResult> PickBestAlgorithm(

diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
index ffd4214..80ddb3e 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc

@@ -448,7 +448,7 @@
         return Load(accum_ptr);
       };
     case HloOpcode::kReduce:
-      // TODO(b/112040122): This should be supported.
+      // TODO(b/118332391): This should be supported.
       CHECK_EQ(hlo->operand_count(), 2) << "Did not expect variadic reduce";
       return [=, &operand_to_generator](
                  const IrArray::Index& output_index) -> StatusOr<llvm::Value*> {

diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc
index 86c9bc6..a7053e6 100644
--- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc

@@ -428,7 +428,8 @@
         scratch_data = scratch_mem->device_memory();
       }
       const MatrixDescriptor scratch_descriptor(
-          scratch_data, false, output_num_cols, output_num_rows, batch_size);
+          scratch_data, false, output_matrix.num_rows, output_matrix.num_cols,
+          batch_size);
 
       StatusOr<se::blas::AlgorithmType> best_algorithm = GetGemmAutotuneFn(
           element_type)(lhs_matrix, rhs_matrix, scratch_descriptor, alpha_,

diff --git a/tensorflow/compiler/xla/service/gpu/gpu_fusible.h b/tensorflow/compiler/xla/service/gpu/gpu_fusible.h
index e9d7ba1..9f0de3f 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_fusible.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_fusible.h

@@ -48,7 +48,7 @@
 
 // Whether instruction shapes are compatible for multi-output fusion, i.e.
 // whether the emitters support lowering the resulting fusion.
-// This function works for both, sibling and producer-conumser multi-output
+// This function works for both, sibling and producer-consumer multi-output
 // fusion.
 // So far, multi-output fusion is supported for loop fusions and reduce
 // input fusions only. It is up to the caller to ensure the instructions

diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index 58bdd42..a6d80f0 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc

@@ -240,6 +240,32 @@
         TF_RETURN_IF_ERROR(
             constraints->SetBufferLayout(keys_layout, *output_buffer));
       }
+    } else if (instruction->opcode() == HloOpcode::kTriangularSolve) {
+      // TODO(phawkins): Ideally we would relax this constraint. What we
+      // actually want is that:
+      // a) the batch dimensions are major, in no particular order.
+      // b) the two minor dimensions are in fortran (column-major) order,
+      // although for the 'a' argument we could potentially accept row-major
+      // order and fold the transpose into the operator.
+      auto set_fortran_layout = [](Shape* shape) {
+        LayoutUtil::SetToDefaultLayout(shape);
+        int n = shape->mutable_layout()->minor_to_major_size();
+        CHECK_GE(n, 2);
+        std::swap(shape->mutable_layout()->mutable_minor_to_major()->at(0),
+                  shape->mutable_layout()->mutable_minor_to_major()->at(1));
+      };
+      Shape op0_shape = instruction->operand(0)->shape();
+      Shape op1_shape = instruction->operand(1)->shape();
+      Shape output_shape = instruction->shape();
+      set_fortran_layout(&op0_shape);
+      set_fortran_layout(&op1_shape);
+      set_fortran_layout(&output_shape);
+      TF_RETURN_IF_ERROR(
+          constraints->SetOperandLayout(op0_shape, instruction, 0));
+      TF_RETURN_IF_ERROR(
+          constraints->SetOperandLayout(op1_shape, instruction, 1));
+      TF_RETURN_IF_ERROR(
+          constraints->SetInstructionLayout(output_shape, instruction));
     }
   }
   return Status::OK();

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
index 0007a9a..8f010ab 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc

@@ -492,8 +492,11 @@
       result = llvm::ConstantAggregateZero::get(lhs_array.GetElementLlvmType());
       result = InsertValue(result, value.first, {0});
       result = InsertValue(result, value.second, {1});
-    } else {
+    } else if (ShapeUtil::ElementIsFloating(lhs_shape)) {
       result = FMul(lhs_value, rhs_value);
+    } else {
+      TF_RET_CHECK(ShapeUtil::ElementIsIntegral(lhs_shape));
+      result = Mul(lhs_value, rhs_value);
     }
     target_array.EmitWriteArrayElement(/*index=*/element_index, result, &b_);
     return Status::OK();
@@ -583,9 +586,13 @@
     llvm::Value* accum_imag = Imag(accum, &b_);
     llvm::Value* imag_sum = FAdd(accum_imag, value.second);
     updated_accum = InsertValue(updated_accum, imag_sum, {1});
-  } else {
+  } else if (ShapeUtil::ElementIsFloating(lhs_shape)) {
     llvm::Value* product = FMul(lhs_element, rhs_element);
     updated_accum = FAdd(accum, product);
+  } else {
+    TF_RET_CHECK(ShapeUtil::ElementIsIntegral(lhs_shape));
+    llvm::Value* product = Mul(lhs_element, rhs_element);
+    updated_accum = Add(accum, product);
   }
   Store(updated_accum, accum_address);
 
@@ -647,7 +654,7 @@
 }
 
 Status IrEmitter::HandleReduce(HloInstruction* reduce) {
-  // TODO(b/112040122): Support variadic reduce.
+  // TODO(b/118332391): Support variadic reduce.
   if (!reduce->shape().IsArray()) {
     return Unimplemented("Variadic reduce is not supported on GPU");
   }

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 4fab959..0cc65eb 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc

@@ -59,6 +59,7 @@
 #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/sequential_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/thunk.h"
+#include "tensorflow/compiler/xla/service/gpu/triangular_solve_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/tuple_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/while_thunk.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
@@ -486,6 +487,41 @@
   return Status::OK();
 }
 
+Status IrEmitterUnnested::HandleTriangularSolve(HloInstruction* hlo) {
+  auto has_fortran_layout = [](const Layout& layout) {
+    int n = layout.minor_to_major_size();
+    return layout.minor_to_major(0) == n - 2 &&
+           layout.minor_to_major(1) == n - 1;
+  };
+  TF_RET_CHECK(has_fortran_layout(hlo->operand(0)->shape().layout()));
+  TF_RET_CHECK(has_fortran_layout(hlo->operand(1)->shape().layout()));
+  TF_RET_CHECK(has_fortran_layout(hlo->shape().layout()));
+
+  std::vector<std::unique_ptr<Thunk>> thunks;
+
+  // Triangular solve is in-place on 'b', so copy 'b' to the output if they
+  // aren't the same buffer.
+  auto operand_buffer = GetAllocationSlice(*hlo->operand(1));
+  auto destination_buffer = GetAllocationSlice(*hlo);
+  if (operand_buffer != destination_buffer) {
+    thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
+        /*source_address=*/operand_buffer,
+        /*destination_buffer=*/destination_buffer,
+        /*mem_size=*/ShapeUtil::ByteSizeOf(hlo->operand(1)->shape()), hlo));
+  }
+
+  thunks.push_back(BuildTriangularSolveThunk(hlo));
+
+  // Elide the sequential thunk if there's no copy.
+  if (thunks.size() == 1) {
+    AddThunkToThunkSequence(std::move(thunks[0]));
+  } else {
+    AddThunkToThunkSequence(
+        absl::make_unique<SequentialThunk>(std::move(thunks), hlo));
+  }
+  return Status::OK();
+}
+
 Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
   HloInstruction* root = fusion->fused_expression_root();
   if (HloInstruction::FusionKind::kInput == fusion->fusion_kind()) {
@@ -545,7 +581,7 @@
         // a 1D array. The specialized version requires a initializer thunk that
         // initializes the output array to the initial value of the reduce.
         if (root->opcode() == HloOpcode::kReduce && root->shape().IsTuple()) {
-          // TODO(b/112040122): Support variadic reduce.
+          // TODO(b/118332391): Support variadic reduce.
           return Unimplemented("Variadic reduce is not supported on GPU");
         }
         return EmitReductionToVector(fusion);
@@ -634,7 +670,7 @@
 }
 
 Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) {
-  // TODO(b/112040122): Support multi-output reduce.
+  // TODO(b/118332391): Support multi-output reduce.
   if (!reduce->shape().IsArray()) {
     return Unimplemented("Multi-output reduce is not supported on GPU");
   }
@@ -958,18 +994,18 @@
       BuildKernelThunk(scatter,
                        /*implements_whole_instruction=*/thunks.empty()));
 
-  TF_RETURN_IF_ERROR(
-      EmitScatter(thunks.back().get(), scatter,
-                  /*scatter_indices_gen=*/
-                  [=](const IrArray::Index& index) {
-                    return GetIrArray(*scatter_indices, *scatter)
-                        .EmitReadArrayElement(index, &b_, "scatter_index");
-                  },
-                  /*updates_gen=*/
-                  [=](const IrArray::Index& index) {
-                    return GetIrArray(*updates, *scatter)
-                        .EmitReadArrayElement(index, &b_, "update");
-                  }));
+  TF_RETURN_IF_ERROR(EmitScatter(
+      thunks.back().get(), scatter,
+      /*scatter_indices_gen=*/
+      [=](const IrArray::Index& index) {
+        return GetIrArray(*scatter_indices, *scatter)
+            .EmitReadArrayElement(index, &b_, "scatter_index");
+      },
+      /*updates_gen=*/
+      [=](const IrArray::Index& index) {
+        return GetIrArray(*updates, *scatter)
+            .EmitReadArrayElement(index, &b_, "update");
+      }));
 
   // Elide the sequential thunk if there's no copy.
   if (thunks.size() == 1) {
@@ -1117,17 +1153,7 @@
   std::vector<std::unique_ptr<Thunk>> thunks;
   Shape keys_shape = sort->operand(0)->shape();
   int64 dimension_to_sort = sort->dimensions(0);
-  // In case there is a 'values' parameter that is a iota, we take note and use
-  // it later to ensure a stable sort. Otherwise, we don't guarantee a stable
-  // sort.
-  int64 iota_values_parameter_index = -1;
   for (int64 i = 0; i < sort->operand_count(); ++i) {
-    if (i > 0 && sort->operand(i)->opcode() == HloOpcode::kIota &&
-        ShapeUtil::ElementIsIntegral(sort->operand(i)->shape()) &&
-        Cast<HloIotaInstruction>(sort->operand(i))->iota_dimension() ==
-            dimension_to_sort) {
-      iota_values_parameter_index = i;
-    }
     ShapeIndex shape_index =
         sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({});
     // We assume that the layout of all involved operands and outputs is the
@@ -1240,25 +1266,23 @@
                                              : standard_launch_dimensions;
     UpdateLaunchDimensions(launch_dimensions, thunks.back().get(),
                            ir_emitter_context_->llvm_module());
-    IrArray keys_array;
     std::vector<IrArray> values_arrays;
-    values_arrays.reserve(sort->operand_count() - 1);
+    values_arrays.reserve(sort->operand_count());
     for (int64 i = 0; i < sort->operand_count(); ++i) {
       ShapeIndex shape_index =
           sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({});
-      if (i == 0) {
-        keys_array = GetIrArray(*sort, *sort, shape_index);
-      } else {
-        values_arrays.push_back(GetIrArray(*sort, *sort, shape_index));
-      }
+      values_arrays.push_back(GetIrArray(*sort, *sort, shape_index));
     }
     return llvm_ir::EmitSortInPlace(
-        dimension_to_sort, keys_array, values_arrays,
-        iota_values_parameter_index, IrName(sort), xor_masks, &b_,
+        dimension_to_sort, values_arrays, IrName(sort), xor_masks, &b_,
         launch_dimensions,
         xor_masks.size() > 1 ? num_iterations_in_sort_dim
                              : standard_num_iterations_in_sort_dim,
-        kTileSize);
+        kTileSize,
+        [&](absl::Span<llvm::Value* const> operands, llvm::Value* output) {
+          return EmitCallToNestedComputation(*sort->to_apply(), operands,
+                                             output);
+        });
   };
   std::vector<int64> xor_masks;
   for (int64 stage = 0; stage < num_stages; ++stage) {
@@ -1757,6 +1781,29 @@
       /*output_shape=*/inst->shape(), inst);
 }
 
+std::unique_ptr<Thunk> IrEmitterUnnested::BuildTriangularSolveThunk(
+    const HloInstruction* inst) {
+  const HloInstruction* a = inst->operand(0);
+  const HloInstruction* b = inst->operand(1);
+  int64 m = b->shape().dimensions(b->shape().rank() - 2);
+  int64 n = b->shape().dimensions(b->shape().rank() - 1);
+  int64 batch_size = std::accumulate(
+      b->shape().dimensions().begin(), b->shape().dimensions().end() - 2,
+      int64{1}, [](int64 a, int64 b) { return a * b; });
+  int64 elem_size =
+      ShapeUtil::ByteSizeOfPrimitiveType(inst->shape().element_type());
+  int64 a_batch_stride = inst->triangular_solve_options().left_side()
+                             ? m * m * elem_size
+                             : n * n * elem_size;
+  int64 b_batch_stride = m * n * elem_size;
+  return absl::make_unique<TriangularSolveThunk>(
+      inst->triangular_solve_options(),
+      /*a_input_buffer=*/GetAllocationSlice(*a),
+      /*b_input_buffer=*/GetAllocationSlice(*inst),
+      inst->shape().element_type(), batch_size, m, n, a_batch_stride,
+      b_batch_stride, inst);
+}
+
 StatusOr<std::unique_ptr<Thunk>> IrEmitterUnnested::BuildInitializerThunk(
     HloInstruction* hlo, const ShapeIndex& index) {
   bool fused = HloOpcode::kFusion == hlo->opcode();
@@ -2132,7 +2179,6 @@
   return param_arrays;
 }
 
-
 int IrEmitterUnnested::ConstructInputReducedShapeAndCastInputIrArrayToShape(
     const HloInstruction& hlo, const std::vector<IrArray>& param_arrays,
     const std::vector<llvm::Value*>& param_buffers,
@@ -2966,12 +3012,11 @@
   // since we touch threadIdx.x and blockIdx.x at the beginning of the kernel
   // *anyway*.
   if (!reduction_info && unnested_hlo->IsMultiOutputFusion()) {
-    KernelSupportLibrary{&b_}.If(
-        "emit_mof_tuple", IsBlock0Thread0(&b_), [&] {
-          llvm_ir::EmitTuple(GetIrArray(*unnested_hlo, *unnested_hlo),
-                             ConstructIrArrayForOutputs(*unnested_hlo), &b_,
-                             module_);
-        });
+    KernelSupportLibrary{&b_}.If("emit_mof_tuple", IsBlock0Thread0(&b_), [&] {
+      llvm_ir::EmitTuple(GetIrArray(*unnested_hlo, *unnested_hlo),
+                         ConstructIrArrayForOutputs(*unnested_hlo), &b_,
+                         module_);
+    });
   }
 
   // For each tiled parameter, cast its input IrArray to the corresponding

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index 21b842b..f85e18b 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h

@@ -176,6 +176,7 @@
   Status HandleScatter(HloInstruction* scatter) override;
   Status HandleSelect(HloInstruction* select) override;
   Status HandleSort(HloInstruction* sort) override;
+  Status HandleTriangularSolve(HloInstruction* hlo) override;
   Status HandleTupleSelect(HloInstruction* tuple_select) override;
   Status HandleAllReduce(HloInstruction* crs) override;
   Status HandleAfterAll(HloInstruction* after_all) override;
@@ -319,6 +320,9 @@
   // Returns a FftThunk that calls cuFFT to implement `inst`.
   std::unique_ptr<Thunk> BuildFftThunk(const HloInstruction* inst);
 
+  // Returns a TriangularSolveThunk that calls cuBlas to implement `inst`.
+  std::unique_ptr<Thunk> BuildTriangularSolveThunk(const HloInstruction* inst);
+
   // Returns a GemmThunk that calls gemm to implement `inst`. The caller needs
   // to make sure `inst` outlives the lifetime of the returned Thunk object.
   std::unique_ptr<Thunk> BuildGemmThunk(const HloInstruction* inst);

diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 4bead3e..6e00e4b 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc

@@ -82,6 +82,7 @@
 #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h"
 #include "tensorflow/compiler/xla/service/reshape_mover.h"
 #include "tensorflow/compiler/xla/service/sort_simplifier.h"
+#include "tensorflow/compiler/xla/service/stable_sort_expander.h"
 #include "tensorflow/compiler/xla/service/transpose_folding.h"
 #include "tensorflow/compiler/xla/service/tuple_simplifier.h"
 #include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h"
@@ -195,6 +196,8 @@
     pipeline.AddPass<ConvolutionGroupConverter>(
         cost_model,
         /*convert_batch_groups_only=*/true);
+    // Expand the sort op to support stable sorting if required.
+    pipeline.AddPass<StableSortExpander>();
     // Convert BF16 operations to F32 operations so that the GPU backend can
     // support BF16 operations without directly implementing a BF16 lowering for
     // most ops.
@@ -795,14 +798,19 @@
   std::unique_ptr<HloProfileIndexMap> profile_index_map;
   std::unique_ptr<HloProfilePrinterData> profile_printer;
 
-  if (module->config().hlo_profiling_enabled()) {
+  if (module->config().hlo_profiling_enabled() || VLOG_IS_ON(1)) {
     HloCostAnalysis cost_analysis(ShapeSizeBytesFunction());
     cost_analysis.set_bytes_per_second(
         stream_exec->GetDeviceDescription().memory_bandwidth());
     TF_RETURN_IF_ERROR(module->entry_computation()->Accept(&cost_analysis));
-    profile_index_map = absl::make_unique<HloProfileIndexMap>(*module);
-    profile_printer = CreateHloProfilePrinterData(
-        *profile_index_map, cost_analysis, entry_computation->name());
+    VLOG(1) << "HLO memory read+written: "
+            << tensorflow::strings::HumanReadableNumBytes(
+                   cost_analysis.bytes_accessed());
+    if (module->config().hlo_profiling_enabled()) {
+      profile_index_map = absl::make_unique<HloProfileIndexMap>(*module);
+      profile_printer = CreateHloProfilePrinterData(
+          *profile_index_map, cost_analysis, entry_computation->name());
+    }
   }
 
   auto* gpu_executable = new GpuExecutable(

diff --git a/tensorflow/compiler/xla/service/gpu/thunk.cc b/tensorflow/compiler/xla/service/gpu/thunk.cc
index c78605c..a677617 100644
--- a/tensorflow/compiler/xla/service/gpu/thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/thunk.cc

@@ -48,6 +48,8 @@
       return os << "kOutfeed";
     case Thunk::kSequential:
       return os << "kSequential";
+    case Thunk::kTriangularSolve:
+      return os << "kTriangularSolve";
     case Thunk::kTuple:
       return os << "kTuple";
     case Thunk::kWhile:

diff --git a/tensorflow/compiler/xla/service/gpu/thunk.h b/tensorflow/compiler/xla/service/gpu/thunk.h
index e68bee0..bc69af8 100644
--- a/tensorflow/compiler/xla/service/gpu/thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/thunk.h

@@ -56,6 +56,7 @@
     kMemzero,
     kOutfeed,
     kSequential,
+    kTriangularSolve,
     kTuple,
     kWhile,
   };

diff --git a/tensorflow/compiler/xla/service/gpu/triangular_solve_thunk.cc b/tensorflow/compiler/xla/service/gpu/triangular_solve_thunk.cc
new file mode 100644
index 0000000..5200a2a
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/triangular_solve_thunk.cc

@@ -0,0 +1,149 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/triangular_solve_thunk.h"
+
+#include <string>
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/stream_executor_no_cuda.h"
+#include "tensorflow/stream_executor/blas.h"
+#include "tensorflow/stream_executor/device_memory.h"
+
+namespace xla {
+namespace gpu {
+
+TriangularSolveThunk::TriangularSolveThunk(
+    const TriangularSolveOptions& options,
+    const BufferAllocation::Slice& a_buffer,
+    const BufferAllocation::Slice& b_buffer, PrimitiveType type,
+    int64 batch_size, int64 m, int64 n, int64 a_batch_stride,
+    int64 b_batch_stride, const HloInstruction* hlo)
+    : Thunk(Kind::kTriangularSolve, hlo),
+      uplo_(options.lower() ? se::blas::UpperLower::kLower
+                            : se::blas::UpperLower::kUpper),
+      side_(options.left_side() ? se::blas::Side::kLeft
+                                : se::blas::Side::kRight),
+      unit_diagonal_(options.unit_diagonal() ? se::blas::Diagonal::kUnit
+                                             : se::blas::Diagonal::kNonUnit),
+      a_buffer_(a_buffer),
+      b_buffer_(b_buffer),
+      type_(type),
+      batch_size_(batch_size),
+      m_(m),
+      n_(n),
+      a_batch_stride_(a_batch_stride),
+      b_batch_stride_(b_batch_stride) {
+  transpose_a_ = [&] {
+    switch (options.transpose_a()) {
+      case TriangularSolveOptions::NO_TRANSPOSE:
+        return se::blas::Transpose::kNoTranspose;
+      case TriangularSolveOptions::TRANSPOSE:
+        return se::blas::Transpose::kTranspose;
+      case TriangularSolveOptions::ADJOINT:
+        return se::blas::Transpose::kConjugateTranspose;
+      default:
+        LOG(ERROR) << "Invalid triangular solve transpose value "
+                   << options.transpose_a();
+        return se::blas::Transpose::kNoTranspose;
+    }
+  }();
+}
+
+Status TriangularSolveThunk::ExecuteOnStream(
+    const BufferAllocations& buffer_allocations, se::Stream* stream,
+    HloExecutionProfiler* profiler) {
+  VLOG(3) << "uplo=" << se::blas::UpperLowerString(uplo_)
+          << " side=" << se::blas::SideString(side_)
+          << " diagonal=" << se::blas::DiagonalString(unit_diagonal_)
+          << " batch_size=" << batch_size_ << " m=" << m_ << " n=" << n_
+          << " a_batch_stride=" << a_batch_stride_
+          << " b_batch_stride=" << b_batch_stride_;
+
+  const int lda = side_ == se::blas::Side::kLeft ? m_ : n_;
+  const int ldb = m_;
+
+  char* a_base = static_cast<char*>(
+      buffer_allocations.GetDeviceAddress(a_buffer_).opaque());
+  char* b_base = static_cast<char*>(
+      buffer_allocations.GetDeviceAddress(b_buffer_).opaque());
+  for (int64 i = 0; i < batch_size_; ++i) {
+    bool launch_ok;
+    se::DeviceMemoryBase a_data =
+        se::DeviceMemoryBase(a_base + i * a_batch_stride_, a_batch_stride_);
+    se::DeviceMemoryBase b_data =
+        se::DeviceMemoryBase(b_base + i * b_batch_stride_, b_batch_stride_);
+    switch (type_) {
+      case F32: {
+        se::DeviceMemory<float> b_data_typed(b_data);
+        launch_ok = stream
+                        ->ThenBlasTrsm(side_, uplo_, transpose_a_,
+                                       unit_diagonal_, m_, n_, /*alpha=*/1.0f,
+                                       se::DeviceMemory<float>(a_data), lda,
+                                       &b_data_typed, ldb)
+                        .ok();
+        break;
+      }
+      case F64: {
+        se::DeviceMemory<double> b_data_typed(b_data);
+        launch_ok = stream
+                        ->ThenBlasTrsm(side_, uplo_, transpose_a_,
+                                       unit_diagonal_, m_, n_, /*alpha=*/1.0,
+                                       se::DeviceMemory<double>(a_data), lda,
+                                       &b_data_typed, ldb)
+                        .ok();
+        break;
+      }
+      case C64: {
+        se::DeviceMemory<std::complex<float>> b_data_typed(b_data);
+        launch_ok =
+            stream
+                ->ThenBlasTrsm(side_, uplo_, transpose_a_, unit_diagonal_, m_,
+                               n_, /*alpha=*/1.0f,
+                               se::DeviceMemory<std::complex<float>>(a_data),
+                               lda, &b_data_typed, ldb)
+                .ok();
+        break;
+      }
+      case C128: {
+        se::DeviceMemory<std::complex<double>> b_data_typed(b_data);
+        launch_ok =
+            stream
+                ->ThenBlasTrsm(side_, uplo_, transpose_a_, unit_diagonal_, m_,
+                               n_, /*alpha=*/1.0,
+                               se::DeviceMemory<std::complex<double>>(a_data),
+                               lda, &b_data_typed, ldb)
+                .ok();
+        break;
+      }
+      default:
+        return InvalidArgument("Invalid type for triangular solve %d", type_);
+    }
+    if (!launch_ok) {
+      return InternalError("Unable to launch triangular solve for thunk %p",
+                           this);
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace gpu
+}  // namespace xla

diff --git a/tensorflow/compiler/xla/service/gpu/triangular_solve_thunk.h b/tensorflow/compiler/xla/service/gpu/triangular_solve_thunk.h
new file mode 100644
index 0000000..c947162
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/triangular_solve_thunk.h

@@ -0,0 +1,75 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_TRIANGULAR_SOLVE_THUNK_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_TRIANGULAR_SOLVE_THUNK_H_
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/service/buffer_assignment.h"
+#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
+#include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
+#include "tensorflow/compiler/xla/service/gpu/thunk.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/stream_executor_no_cuda.h"
+#include "tensorflow/stream_executor/blas.h"
+
+namespace xla {
+namespace gpu {
+
+// This class stores everything that StreamExecutor needs to launch a triangular
+// solve (BlasTrsm). It is generated by IrEmitter.
+//
+// Thread-compatible.
+class TriangularSolveThunk : public Thunk {
+ public:
+  TriangularSolveThunk(const TriangularSolveOptions& options,
+                       const BufferAllocation::Slice& a_buffer,
+                       const BufferAllocation::Slice& b_buffer,
+                       PrimitiveType type, int64 batch_size, int64 m, int64 n,
+                       int64 a_batch_stride, int64 b_batch_stride,
+                       const HloInstruction* hlo);
+
+  TriangularSolveThunk(const TriangularSolveThunk&) = delete;
+  TriangularSolveThunk& operator=(const TriangularSolveThunk&) = delete;
+
+  Status ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                         se::Stream* stream,
+                         HloExecutionProfiler* profiler) override;
+
+ private:
+  const se::blas::UpperLower uplo_;
+  const se::blas::Side side_;
+  const se::blas::Diagonal unit_diagonal_;
+  se::blas::Transpose transpose_a_;
+
+  const BufferAllocation::Slice a_buffer_;
+  const BufferAllocation::Slice b_buffer_;
+
+  const PrimitiveType type_;
+  const int64 batch_size_;
+  const int64 m_;
+  const int64 n_;
+  const int64 a_batch_stride_;
+  const int64 b_batch_stride_;
+};
+
+}  // namespace gpu
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_TRIANGULAR_SOLVE_THUNK_H_

diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 263b42a..ae9e316 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto

@@ -34,7 +34,7 @@
 option cc_enable_arenas = true;
 
 // Serialization of HloInstruction.
-// Next ID: 59
+// Next ID: 62
 message HloInstructionProto {
   reserved 10;
   reserved "parameter_name";
@@ -175,6 +175,9 @@
   // partners.
   bool is_host_transfer = 47;
 
+  // Whether this Sort instruction should be stable.
+  bool is_stable = 60;
+
   xla.ScatterDimensionNumbers scatter_dimension_numbers = 48;
 
   // Precision configuration for the instruction. Has backend-specific meaning.
@@ -193,6 +196,12 @@
   // operand.
   bool constrain_layout = 56;
   repeated xla.ShapeProto operand_shapes_with_layout = 57;
+
+  // Options for TriangularSolve
+  xla.TriangularSolveOptions triangular_solve_options = 59;
+
+  // Describes how parameters behave with regards to replicas.
+  xla.ParameterReplication parameter_replication = 61;
 }
 
 // Serialization of HloComputation.

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 40fe913..817e15f 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc

@@ -296,7 +296,7 @@
 }  // namespace
 
 void HloComputation::ComputeInstructionPostOrder(
-    const HloComputation::ChannelDependencyMap& channel_dependency_map,
+    const HloComputation::ChannelDependencyGroup& channel_dependency_group,
     std::vector<HloInstruction*>* post_order, HloInstruction* root,
     absl::flat_hash_map<HloInstruction*, VisitState>* visited) const {
   std::vector<HloInstruction*> dfs_stack;
@@ -320,66 +320,75 @@
 
     visited->insert({current, kVisiting});
 
-    // Add the operands to the stack in reverse order so the first operand is
-    // processed first. This will produce a more natural ordering and a nicer
-    // result for things like HLO stringification.
-    const auto& operands = current->operands();
-    for (int64 i = operands.size() - 1; i >= 0; --i) {
-      dfs_stack.emplace_back(operands[i]);
-    }
-
-    for (HloInstruction* op : current->control_predecessors()) {
-      dfs_stack.emplace_back(op);
-    }
-
-    // Add inputs for send->recv_done dependencies and all-reduce
-    // dependencies.
-    switch (current->opcode()) {
-      case HloOpcode::kRecvDone: {
-        auto it = channel_dependency_map.find(current->channel_id());
-        if (it != channel_dependency_map.end()) {
-          for (HloInstruction* op : it->second) {
-            dfs_stack.emplace_back(op);
-          }
-        }
-        break;
+    const auto get_channel_id =
+        [](HloInstruction* inst) -> absl::optional<int64> {
+      switch (inst->opcode()) {
+        case HloOpcode::kRecvDone:
+          return inst->channel_id();
+        case HloOpcode::kAllReduce:
+          return inst->all_reduce_id();
+        default:
+          return absl::nullopt;
       }
-      case HloOpcode::kAllReduce: {
-        auto all_reduce_id = current->all_reduce_id();
-        if (all_reduce_id) {
-          auto it = channel_dependency_map.find(all_reduce_id.value());
-          if (it != channel_dependency_map.end()) {
-            for (HloInstruction* op : it->second) {
-              dfs_stack.emplace_back(op);
-            }
-          }
+    };
+
+    // When adding a predecessor to the dfs_stack, we need to also add its
+    // associated channel dependencies.
+    const auto add_dfs_stack = [&](HloInstruction* inst) {
+      auto channel_id = get_channel_id(inst);
+      if (channel_id && channel_dependency_group.count(*channel_id)) {
+        auto it = channel_dependency_group.find(*channel_id);
+        for (HloInstruction* cinst : it->second) {
+          dfs_stack.emplace_back(cinst);
         }
-        break;
+      } else {
+        dfs_stack.emplace_back(inst);
       }
-      default:
-        break;
+    };
+
+    const auto add_predecessors = [&](HloInstruction* inst) {
+      // Add the operands to the stack in reverse order so the first operand is
+      // processed first. This will produce a more natural ordering and a nicer
+      // result for things like HLO stringification.
+      const auto& operands = inst->operands();
+      for (int64 i = operands.size() - 1; i >= 0; --i) {
+        add_dfs_stack(operands[i]);
+      }
+
+      for (HloInstruction* op : inst->control_predecessors()) {
+        add_dfs_stack(op);
+      }
+    };
+
+    // If the current instruction is a channel instruction, add the dependencies
+    // from all associated instructions of the channel.
+    auto channel_id = get_channel_id(current);
+    if (channel_id && channel_dependency_group.count(*channel_id)) {
+      auto it = channel_dependency_group.find(*channel_id);
+      for (HloInstruction* cinst : it->second) {
+        add_predecessors(cinst);
+      }
+    } else {
+      add_predecessors(current);
     }
   }
 }
 
-HloComputation::ChannelDependencyMap
+HloComputation::ChannelDependencyGroup
 HloComputation::ComputeChannelDependencies() const {
-  ChannelDependencyMap channel_dependency_map;
+  ChannelDependencyGroup channel_dependency_group;
   for (const auto& instruction : instructions_) {
     switch (instruction->opcode()) {
-      case HloOpcode::kSend: {
-        channel_dependency_map[instruction->channel_id()].push_back(
+      case HloOpcode::kSend:
+      case HloOpcode::kRecvDone:
+        channel_dependency_group[instruction->channel_id()].push_back(
             instruction.get());
         break;
-      }
       case HloOpcode::kAllReduce: {
         auto all_reduce_id = instruction->all_reduce_id();
         if (all_reduce_id) {
-          auto& dependencies = channel_dependency_map[all_reduce_id.value()];
-          absl::c_copy(instruction->operands(),
-                       std::back_inserter(dependencies));
-          absl::c_copy(instruction->control_predecessors(),
-                       std::back_inserter(dependencies));
+          channel_dependency_group[all_reduce_id.value()].push_back(
+              instruction.get());
         }
         break;
       }
@@ -387,11 +396,11 @@
         break;
     }
   }
-  return channel_dependency_map;
+  return channel_dependency_group;
 }
 
 std::vector<HloInstruction*> HloComputation::MakeInstructionPostOrder() const {
-  auto channel_dependency_map = ComputeChannelDependencies();
+  auto channel_dependency_group = ComputeChannelDependencies();
   std::vector<HloInstruction*> post_order;
   post_order.reserve(instruction_count());
   std::vector<HloInstruction*> trace_instructions;
@@ -404,7 +413,7 @@
       // users).
       trace_instructions.push_back(instruction.get());
     } else if (instruction->users().empty()) {
-      ComputeInstructionPostOrder(channel_dependency_map, &post_order,
+      ComputeInstructionPostOrder(channel_dependency_group, &post_order,
                                   instruction.get(), &visited);
     }
   }

diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 0cb9cad..212dfa1 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h

@@ -369,13 +369,13 @@
   // channel complete).
   bool IsRemovable(const HloInstruction* instruction);
 
-  // Returns a map from channel-id to directed dependencies of the channel
-  // instructions. For send&recv pairs it means the send instruction and for
-  // all-reduce the union of the dependencies for all participating
-  // instructions.
-  using ChannelDependencyMap =
+  // Returns a map from channel-id to the group of instructions associated with
+  // the channel. These instructions will be considered as a single node for
+  // dependency purposes. Send and RecvDone are in the group, and AllReduces
+  // with the same channel id are in the group.
+  using ChannelDependencyGroup =
       absl::flat_hash_map<int64, absl::InlinedVector<HloInstruction*, 1>>;
-  ChannelDependencyMap ComputeChannelDependencies() const;
+  ChannelDependencyGroup ComputeChannelDependencies() const;
 
   // Returns true if this computation has a side effect. A computation has a
   // side effect if it contains one or more instructions with a side effect.
@@ -391,6 +391,10 @@
     fusion_instruction_ = fusion_instruction;
   }
 
+  // Clear the unique ID of the computation so that it can be re-assigned, such
+  // as for the purpose of compacting the unique IDs.
+  void ClearUniqueIdInternal() { unique_id_ = -1; }
+
   // The id of this computation should be unique within the module.
   void SetUniqueId(int64 id) {
     CHECK_EQ(unique_id_, -1);
@@ -434,7 +438,7 @@
 
   enum VisitState { kVisiting, kVisited };
   void ComputeInstructionPostOrder(
-      const HloComputation::ChannelDependencyMap& channel_dependency_map,
+      const HloComputation::ChannelDependencyGroup& channel_dependency_map,
       std::vector<HloInstruction*>* post_order, HloInstruction* root,
       absl::flat_hash_map<HloInstruction*, VisitState>* visited) const;
 

diff --git a/tensorflow/compiler/xla/service/hlo_computation_test.cc b/tensorflow/compiler/xla/service/hlo_computation_test.cc
index 3b88e97..fe37ca6 100644
--- a/tensorflow/compiler/xla/service/hlo_computation_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation_test.cc

@@ -24,7 +24,9 @@
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/service/pattern_matcher.h"
 #include "tensorflow/compiler/xla/service/pattern_matcher_gmock.h"
 #include "tensorflow/compiler/xla/shape_util.h"
@@ -37,6 +39,7 @@
 namespace {
 
 namespace m = match;
+namespace op = xla::testing::opcode_matchers;
 using ::testing::ElementsAre;
 using ::testing::UnorderedElementsAre;
 
@@ -668,5 +671,34 @@
   EXPECT_FALSE(*computation_c == *computation_b);
 }
 
+// Tests that cross-module AllReduce instructions are ordered before all their
+// predecessors and after all their successors.
+TEST_F(HloComputationTest, InstructionPostOrderWithAllReduce) {
+  const char* const hlo_string = R"(
+HloModule Module
+
+add {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT add = f32[] add(lhs, rhs)
+}
+
+ENTRY entry {
+  param = f32[128] parameter(0), sharding={maximal device=0}
+  crs0 = f32[128] all-reduce(param),
+    replica_groups={{0}}, all_reduce_id=1, barrier="", to_apply=add,
+    sharding={maximal device=0}
+  crs1 = f32[128] all-reduce(param),
+    replica_groups={{0}}, all_reduce_id=1, barrier="", to_apply=add,
+    sharding={maximal device=1}
+  add = f32[128] add(crs0, crs0), sharding={maximal device=0}
+  ROOT t = (f32[128], f32[128]) tuple(add, crs1)
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(hlo_string));
+  EXPECT_THAT(module->entry_computation()->MakeInstructionPostOrder(),
+              ElementsAre(op::Parameter(), op::AllReduce(), op::AllReduce(),
+                          op::Add(), op::Tuple()));
+}
+
 }  // namespace
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index 1ee9581..29ac263 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc

@@ -245,7 +245,7 @@
   for (auto dim : dnums.lhs_contracting_dimensions()) {
     reduction_width *= lhs_shape.dimensions(dim);
   }
-  // Each output elment requires reduction_widht FMA operations.
+  // Each output elment requires reduction_width FMA operations.
   current_properties_[kFlopsKey] =
       kFmaFlops * ShapeUtil::ElementsIn(dot_shape) * reduction_width;
   return Status::OK();
@@ -546,6 +546,21 @@
   return Status::OK();
 }
 
+Status HloCostAnalysis::HandleTriangularSolve(const HloInstruction* hlo) {
+  float bytes_accessed = GetShapeSize(hlo->operand(0)->shape()) / 2.0f;
+  bytes_accessed += GetShapeSize(hlo->operand(1)->shape());
+  current_properties_[kBytesAccessedKey] = bytes_accessed;
+
+  const Shape& a_shape = hlo->operand(0)->shape();
+  const Shape& b_shape = hlo->operand(1)->shape();
+  // Estimate as batch * mn^2 / 2 flops.
+  int64 elems = a_shape.dimensions(a_shape.dimensions_size() - 1);
+  elems *= ShapeUtil::ElementsIn(b_shape);
+  // Each output elment requires reduction_widht FMA operations.
+  current_properties_[kFlopsKey] = kFmaFlops * elems;
+  return Status::OK();
+}
+
 Status HloCostAnalysis::HandleAllReduce(const HloInstruction* crs) {
   // We assume 2 replicas, so that each output element is the sum of two input
   // elements.

diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
index 421786e..96357de 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h

@@ -71,6 +71,7 @@
   Status HandleDot(const HloInstruction* dot) override;
   Status HandleConvolution(const HloInstruction* convolution) override;
   Status HandleFft(const HloInstruction* fft) override;
+  Status HandleTriangularSolve(const HloInstruction* hlo) override;
   Status HandleAllReduce(const HloInstruction* crs) override;
   Status HandleAllToAll(const HloInstruction* hlo) override;
   Status HandleCollectivePermute(const HloInstruction* hlo) override;

diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc
index 5d3e11f..b5d9e8e 100644
--- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc
+++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc

@@ -17,10 +17,15 @@
 #include "absl/algorithm/container.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
+#include "tensorflow/compiler/xla/client/lib/comparators.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/client/xla_computation.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_clone_context.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
 #include "tensorflow/compiler/xla/util.h"
 
@@ -270,37 +275,25 @@
 
 StatusOr<HloInstruction*> MakeSortHlo(
     const Shape& sort_shape, absl::Span<HloInstruction* const> operands,
-    int64 dimension_to_sort, HloComputation::Builder* builder,
+    int64 dimension_to_sort, bool is_stable, HloComputation::Builder* builder,
     HloModule* module) {
   CHECK(!operands.empty()) << "Sort Hlo requires at least one operand.";
   HloComputation* compare_computation;
-  {
-    auto b = HloComputation::Builder("Sort.Compare");
-    Shape key_scalar_shape =
-        ShapeUtil::MakeShape(operands[0]->shape().element_type(), {});
-    auto lhs = b.AddInstruction(
-        HloInstruction::CreateParameter(0, key_scalar_shape, "p.0.lhs"));
-    auto rhs = b.AddInstruction(
-        HloInstruction::CreateParameter(1, key_scalar_shape, "p.0.rhs"));
-    int parameter_count = 2;
-    for (const auto* operand : operands.subspan(1)) {
-      Shape scalar_shape =
-          ShapeUtil::MakeShape(operand->shape().element_type(), {});
-      b.AddInstruction(HloInstruction::CreateParameter(
-          parameter_count, scalar_shape,
-          StrCat("p.", parameter_count / 2, ".lhs")));
-      ++parameter_count;
-      b.AddInstruction(HloInstruction::CreateParameter(
-          parameter_count, scalar_shape,
-          StrCat("p.", parameter_count / 2, ".rhs")));
-      ++parameter_count;
-    }
-    b.AddInstruction(HloInstruction::CreateBinary(
-        ShapeUtil::MakeShape(PRED, {}), HloOpcode::kLt, lhs, rhs));
-    compare_computation = module->AddEmbeddedComputation(b.Build());
+  XlaBuilder b("Sort.Compare");
+  std::vector<PrimitiveType> operand_types(operands.size());
+  for (int64 i = 0; i < operands.size(); ++i) {
+    operand_types[i] = operands[i]->shape().element_type();
   }
+  XlaComputation comparator = CreateScalarLtComputation(operand_types, &b);
+  TF_ASSIGN_OR_RETURN(ProgramShape program_shape, comparator.GetProgramShape());
+  HloModuleConfig config(program_shape);
+  TF_ASSIGN_OR_RETURN(auto new_module,
+                      HloModule::CreateFromProto(comparator.proto(), config));
+  HloCloneContext context(module);
+  compare_computation =
+      module->DeepCloneComputation(new_module->entry_computation(), &context);
   return builder->AddInstruction(HloInstruction::CreateSort(
-      sort_shape, dimension_to_sort, operands, compare_computation));
+      sort_shape, dimension_to_sort, operands, compare_computation, is_stable));
 }
 
 StatusOr<HloInstruction*> CollapseFirstNDims(HloInstruction* operand, int64 n) {

diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.h b/tensorflow/compiler/xla/service/hlo_creation_utils.h
index 80f58f6..17b7a2d 100644
--- a/tensorflow/compiler/xla/service/hlo_creation_utils.h
+++ b/tensorflow/compiler/xla/service/hlo_creation_utils.h

@@ -126,13 +126,10 @@
 // Creates a Sort HLO instruction and adds it to the computation containing the
 // operands. All operands must be in the same computation. Also creates a
 // default compare sub-computation which sorts the first operand into ascending
-// order.
-// Note that this default compare sub-computation does not have special handling
-// for floating point values and thus can result in undefined behavior in the
-// presence of NaN values.
+// order. 'is_stable' specifies whether the sorting should be stable.
 StatusOr<HloInstruction*> MakeSortHlo(
     const Shape& sort_shape, absl::Span<HloInstruction* const> operands,
-    int64 dimension_to_sort, HloComputation::Builder* builder,
+    int64 dimension_to_sort, bool is_stable, HloComputation::Builder* builder,
     HloModule* module);
 
 // Creates an R1 Constant HLO instruction of the given PrimitiveType with the

diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
index e3059e0..768e3af 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc

@@ -2363,7 +2363,8 @@
   auto keys = builder.AddInstruction(
       HloInstruction::CreateParameter(0, keys_shape, "keys"));
   TF_ASSERT_OK_AND_ASSIGN(
-      auto* sort, MakeSortHlo(keys_shape, {keys}, -1, &builder, module_.get()));
+      auto* sort, MakeSortHlo(keys_shape, {keys}, -1, /*is_stable=*/false,
+                              &builder, module_.get()));
 
   computation_ = module_->AddEntryComputation(builder.Build());
   RunAnalysis();
@@ -2385,7 +2386,8 @@
   TF_ASSERT_OK_AND_ASSIGN(
       auto* sort,
       MakeSortHlo(ShapeUtil::MakeTupleShape({keys_shape, values_shape}),
-                  {keys, values}, 0, &builder, module_.get()));
+                  {keys, values}, 0, /*is_stable=*/false, &builder,
+                  module_.get()));
 
   computation_ = module_->AddEntryComputation(builder.Build());
   RunAnalysis();

diff --git a/tensorflow/compiler/xla/service/hlo_element_type_converter.cc b/tensorflow/compiler/xla/service/hlo_element_type_converter.cc
index bff4677..7d6b860 100644
--- a/tensorflow/compiler/xla/service/hlo_element_type_converter.cc
+++ b/tensorflow/compiler/xla/service/hlo_element_type_converter.cc

@@ -146,14 +146,10 @@
           opcode == HloOpcode::kMap || opcode == HloOpcode::kReduce ||
           opcode == HloOpcode::kReduceWindow || opcode == HloOpcode::kScatter ||
           opcode == HloOpcode::kSelectAndScatter ||
-          opcode == HloOpcode::kConditional) {
+          opcode == HloOpcode::kSort || opcode == HloOpcode::kConditional) {
         continue;
       }
-      // TODO(b/122298745): Once we don't ignore called computations anymore,
-      // add kSort to the if statement above.
-      if (opcode != HloOpcode::kSort) {
-        TF_RET_CHECK(hlo->called_computations().empty()) << hlo->ToString();
-      }
+      TF_RET_CHECK(hlo->called_computations().empty()) << hlo->ToString();
 
       bool nullary = hlo->operands().empty();
       bool wrong_element_type = hlo->shape().element_type() == eliminate_type_;

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 56a1b6f..4d64877 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc

@@ -33,6 +33,7 @@
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -1461,14 +1462,6 @@
   }
   Shape key_shape = sort->operand(0)->shape();
   auto rank = key_shape.rank();
-  PrimitiveType keys_type = key_shape.element_type();
-  if (keys_type != F64 && keys_type != U64 && keys_type != S64 &&
-      keys_type != F32 && keys_type != U32 && keys_type != S32 &&
-      keys_type != BF16 && keys_type != F16 && keys_type != U16 &&
-      keys_type != S16 && keys_type != U8 && keys_type != S8) {
-    return InvalidArgument("Unsupported type for Sort: %s",
-                           PrimitiveType_Name(keys_type));
-  }
   std::vector<Literal> result_literals;
   result_literals.reserve(sort->operand_count());
   for (int64 i = 0; i < sort->operand_count(); ++i) {
@@ -1479,6 +1472,7 @@
   int64 sort_dim = sort->dimensions(0);
   int64 sort_dim_elements = key_shape.dimensions(sort_dim);
   increment[sort_dim] = sort_dim_elements;
+  HloEvaluator embedded_evaluator(max_loop_iterations_);
   // Iterate through each dimension except 'sort_dim'.
   TF_RETURN_IF_ERROR(ShapeUtil::ForEachIndexWithStatus(
       key_shape, zero_base, AsInt64Slice(key_shape.dimensions()), increment,
@@ -1499,78 +1493,51 @@
         }
         std::vector<int64> indices_to_sort(sort_dim_elements);
         std::iota(indices_to_sort.begin(), indices_to_sort.end(), 0);
-        std::stable_sort(
-            indices_to_sort.begin(), indices_to_sort.end(),
-            [keys_type, &literals_to_sort](int64 a, int64 b) {
-              switch (keys_type) {
-                case F64: {
-                  auto key_lhs = literals_to_sort[0].Get<double>({a});
-                  auto key_rhs = literals_to_sort[0].Get<double>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case U64: {
-                  auto key_lhs = literals_to_sort[0].Get<uint64>({a});
-                  auto key_rhs = literals_to_sort[0].Get<uint64>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case S64: {
-                  auto key_lhs = literals_to_sort[0].Get<int64>({a});
-                  auto key_rhs = literals_to_sort[0].Get<int64>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case F32: {
-                  auto key_lhs = literals_to_sort[0].Get<float>({a});
-                  auto key_rhs = literals_to_sort[0].Get<float>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case U32: {
-                  auto key_lhs = literals_to_sort[0].Get<uint32>({a});
-                  auto key_rhs = literals_to_sort[0].Get<uint32>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case S32: {
-                  auto key_lhs = literals_to_sort[0].Get<int32>({a});
-                  auto key_rhs = literals_to_sort[0].Get<int32>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case BF16: {
-                  auto key_lhs = literals_to_sort[0].Get<bfloat16>({a});
-                  auto key_rhs = literals_to_sort[0].Get<bfloat16>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case F16: {
-                  auto key_lhs = literals_to_sort[0].Get<Eigen::half>({a});
-                  auto key_rhs = literals_to_sort[0].Get<Eigen::half>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case U16: {
-                  auto key_lhs = literals_to_sort[0].Get<uint16>({a});
-                  auto key_rhs = literals_to_sort[0].Get<uint16>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case S16: {
-                  auto key_lhs = literals_to_sort[0].Get<int16>({a});
-                  auto key_rhs = literals_to_sort[0].Get<int16>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case U8: {
-                  auto key_lhs = literals_to_sort[0].Get<uint8>({a});
-                  auto key_rhs = literals_to_sort[0].Get<uint8>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                case S8: {
-                  auto key_lhs = literals_to_sort[0].Get<int8>({a});
-                  auto key_rhs = literals_to_sort[0].Get<int8>({b});
-                  return SafeLess(key_lhs, key_rhs);
-                }
-                default:
-                  // We should never reach here, because we checked earlier
-                  // that 'key_type' is one of the cases above.
-                  LOG(FATAL) << "Invalid key type in Sort: %s",
-                      PrimitiveType_Name(keys_type);
-                  return false;
-              }
-            });
+        Status compare_status = Status::OK();
+        auto comparator = [sort, &compare_status, &embedded_evaluator,
+                           &literals_to_sort](int64 a, int64 b) {
+          std::vector<Literal> literals;
+          literals.reserve(2 * sort->operand_count());
+          for (int64 i = 0; i < sort->operand_count(); ++i) {
+            auto lhs = ExtractFromIndexPositions(literals_to_sort[i], {a},
+                                                 /*extract_as_scalar=*/true);
+            if (!lhs.ok()) {
+              compare_status = lhs.status();
+              return false;
+            }
+            literals.push_back(std::move(lhs.ValueOrDie()));
+            auto rhs = ExtractFromIndexPositions(literals_to_sort[i], {b},
+                                                 /*extract_as_scalar=*/true);
+            if (!rhs.ok()) {
+              compare_status = rhs.status();
+              return false;
+            }
+            literals.push_back(std::move(rhs.ValueOrDie()));
+          }
+          std::vector<const Literal*> literal_ptrs;
+          absl::c_transform(literals, std::back_inserter(literal_ptrs),
+                            [](const Literal& literal) { return &literal; });
+
+          auto computed_result =
+              embedded_evaluator.Evaluate(*sort->to_apply(), literal_ptrs);
+          // Clear visit states so that we can use the evaluator again
+          // on the same computation.
+          embedded_evaluator.ResetVisitStates();
+          if (!computed_result.ok()) {
+            compare_status = computed_result.status();
+            return false;
+          }
+          return computed_result.ValueOrDie().Get<bool>({});
+        };
+        if (Cast<HloSortInstruction>(sort)->is_stable()) {
+          std::stable_sort(indices_to_sort.begin(), indices_to_sort.end(),
+                           comparator);
+        } else {
+          std::sort(indices_to_sort.begin(), indices_to_sort.end(), comparator);
+        }
+        if (!compare_status.ok()) {
+          return compare_status;
+        }
         std::vector<int64> slice_dimensions(rank, 1);
         slice_dimensions[sort_dim] = sort_dim_elements;
         std::vector<int64> start_indices(rank, 0);

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index 72ea40b..357975a 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h

@@ -331,16 +331,16 @@
   std::vector<const Literal*> arg_literals_;
 
   // Max loop iterations to execute with no maximum if negative.
-  int64 max_loop_iterations_;
+  int64 max_loop_iterations_ = 0;
 
   // Module-level seed handle.
-  uint64 seed_;
+  uint64 seed_ = 0;
   // RNG engine.
   std::minstd_rand0 engine_;
 
   // DynamicDimensionInference is used to evaluate GetDimensionSize, which
   // returns the dynamic dimension size of its operand.
-  DynamicDimensionInference* dynamic_dimension_inference_;
+  DynamicDimensionInference* dynamic_dimension_inference_ = nullptr;
 
   // Optional handler for custom_call ops.
   std::function<StatusOr<Literal>(HloInstruction* custom_call,

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index fb8cd29..383921f 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc

@@ -111,6 +111,24 @@
     EXPECT_TRUE(LiteralTestUtil::Equal(expected, result));
   }
 
+  void TestTernaryOp(HloOpcode opcode, Literal expected, Literal src0,
+                     Literal src1, Literal src2) {
+    HloComputation::Builder b(TestName());
+    auto operand0 =
+        b.AddInstruction(HloInstruction::CreateConstant(std::move(src0)));
+    auto operand1 =
+        b.AddInstruction(HloInstruction::CreateConstant(std::move(src1)));
+    auto operand2 =
+        b.AddInstruction(HloInstruction::CreateConstant(std::move(src2)));
+    b.AddInstruction(HloInstruction::CreateTernary(
+        expected.shape(), opcode, operand0, operand1, operand2));
+    m_->AddEntryComputation(b.Build());
+
+    Literal result = Evaluate();
+
+    EXPECT_TRUE(LiteralTestUtil::Equal(expected, result));
+  }
+
  protected:
   explicit HloEvaluatorTest(bool use_bfloat16) : use_bfloat16_(use_bfloat16) {}
   HloEvaluator evaluator_;
@@ -152,6 +170,33 @@
   EXPECT_TRUE(LiteralTestUtil::Equal(expected, result));
 }
 
+// Verifies that clamping of int64 does not cause loss of precision
+TEST_P(HloEvaluatorBf16Test, DoesClampInt64) {
+  auto ones = [](int bits) { return (int64{1} << bits) - 1; };
+
+  auto low =
+      LiteralUtil::CreateR2<int64>({{0, ones(54)}, {ones(54), ones(58)}});
+  auto value = LiteralUtil::CreateR2<int64>({{0, ones(56)}, {0, ones(58)}});
+  auto high = LiteralUtil::CreateR2<int64>(
+      {{ones(54), ones(55)}, {ones(56), ones(58)}});
+
+  Shape shape = low.shape();
+  HloComputation::Builder b(TestName());
+  auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(low)));
+  auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(value)));
+  auto c3 = b.AddInstruction(HloInstruction::CreateConstant(std::move(high)));
+  b.AddInstruction(
+      HloInstruction::CreateTernary(shape, HloOpcode::kClamp, c1, c2, c3));
+  m_->AddEntryComputation(b.Build());
+
+  Literal result = Evaluate();
+
+  auto expected =
+      LiteralUtil::CreateR2<int64>({{0, ones(55)}, {ones(54), ones(58)}});
+
+  EXPECT_TRUE(LiteralTestUtil::Equal(expected, result));
+}
+
 TEST_P(HloEvaluatorBf16Test, DISABLED_DoesClampSpecialBroadcast) {
   auto low = LiteralUtil::CreateR0<float>(0.f);
   auto value = LiteralUtil::CreateR2<float>({{-1.f, 0.f}, {1.f, 2.f}});
@@ -254,6 +299,20 @@
   TestBinaryOp(HloOpcode::kDivide, std::move(expected), std::move(lhs),
                std::move(rhs));
 }
+
+TEST_F(HloEvaluatorTest, DoesClampS64) {
+  auto low = LiteralUtil::CreateR1<int64>(
+      {-8616761059752331528LL, 6780561065411491190LL, -8616761059752331528LL});
+  auto value = LiteralUtil::CreateR1<int64>(
+      {-6780561065411491190LL, 6780561065411491180LL, 4241131823772864090LL});
+  auto high = LiteralUtil::CreateR1<int64>(
+      {-6780561065411491180LL, 8616761059752331528LL, 3832151243857508051LL});
+  auto expected = LiteralUtil::CreateR1<int64>(
+      {-6780561065411491190LL, 6780561065411491190LL, 3832151243857508051LL});
+  TestTernaryOp(HloOpcode::kClamp, std::move(expected), std::move(low),
+                std::move(value), std::move(high));
+}
+
 TEST_P(HloEvaluatorBf16Test, DoesDivideDouble) {
   auto lhs = LiteralUtil::CreateR2<double>({{1.0, 0.0}, {-100.0, 4.0}});
   auto rhs = LiteralUtil::CreateR2<double>({{2.2, 4.0}, {4.0, 4.0}});

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index 652042e..d516a62 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h

@@ -17,6 +17,7 @@
 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_EVALUATOR_TYPED_VISITOR_H_
 
 #include <cmath>
+#include <type_traits>
 
 #include "absl/algorithm/container.h"
 #include "absl/base/casts.h"
@@ -43,46 +44,6 @@
 using is_complex_t =
     absl::disjunction<std::is_same<T, complex64>, std::is_same<T, complex128>>;
 
-// It's UB to use std::sort with std::less<float>, because of NaNs. Define
-// "safe" less functions which are actually strict weak orders. -NaN and NaN
-// should appear at the beginning and end of the ordering, and -0.0 should
-// appear before 0.0.
-template <
-    typename NativeT,
-    typename std::enable_if<std::is_integral<NativeT>::value>::type* = nullptr>
-bool SafeLess(const NativeT& a, const NativeT& b) {
-  return a < b;
-}
-
-template <typename NativeT, typename std::enable_if<std::is_floating_point<
-                                NativeT>::value>::type* = nullptr>
-bool SafeLess(const NativeT& a, const NativeT& b) {
-  bool lhs_is_negative = std::signbit(a);
-  bool rhs_is_negative = std::signbit(b);
-  // If the signs are different, we can just compare the signs.
-  if (lhs_is_negative != rhs_is_negative) {
-    return lhs_is_negative && !rhs_is_negative;
-  }
-  bool lhs_nan = std::isnan(a);
-  bool rhs_nan = std::isnan(b);
-  // Exactly one number is nan?
-  if (lhs_nan != rhs_nan) {
-    if (lhs_nan) {
-      return lhs_is_negative;
-    }
-    return !rhs_is_negative;
-  }
-  return a < b;
-}
-
-template <typename NativeT,
-          typename std::enable_if<
-              std::is_same<NativeT, bfloat16>::value ||
-              std::is_same<NativeT, Eigen::half>::value>::type* = nullptr>
-bool SafeLess(const NativeT& a, const NativeT& b) {
-  return SafeLess(static_cast<float>(a), static_cast<float>(b));
-}
-
 // ToArithmeticSafeType(T t):
 //  - converts `t` to the bitwise-equivalent `unsigned T` if T is a signed
 //    integer, and
@@ -933,9 +894,29 @@
     return HandleShiftRightLogical<ElementwiseT>(shrl);
   }
 
-  template <
-      typename NativeT,
-      typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
+  // Special case for integral type due to MSVC's std::isnan being unable to
+  // handle integral type.
+  template <typename NativeT,
+            typename std::enable_if<!is_complex_t<NativeT>::value &&
+                                    std::is_integral<NativeT>::value>::type* =
+                nullptr>
+  Status HandleClamp(HloInstruction* clamp) {
+    std::function<ElementwiseT(ElementwiseT, ElementwiseT, ElementwiseT)>
+        clamp_op = [](ElementwiseT low, ElementwiseT value, ElementwiseT high) {
+          return static_cast<ElementwiseT>(
+              std::min(high, std::max(value, low)));
+        };
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[clamp],
+        ElementwiseTernaryOp(clamp,
+                             std::move(ConvertTernaryFunction(clamp_op))));
+    return Status::OK();
+  }
+
+  template <typename NativeT,
+            typename std::enable_if<!is_complex_t<NativeT>::value &&
+                                    !std::is_integral<NativeT>::value>::type* =
+                nullptr>
   Status HandleClamp(HloInstruction* clamp) {
     std::function<ElementwiseT(ElementwiseT, ElementwiseT, ElementwiseT)>
         clamp_op = [](ElementwiseT low, ElementwiseT value, ElementwiseT high) {
@@ -943,7 +924,7 @@
             return static_cast<ElementwiseT>(NAN);
           }
           return static_cast<ElementwiseT>(
-              std::fmin(high, std::fmax(value, low)));
+              std::min<NativeT>(high, std::max<NativeT>(value, low)));
         };
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[clamp],
@@ -2710,12 +2691,25 @@
         const Literal& high =
             parent_->GetEvaluatedLiteralFor(random->operand(1));
 
-        std::uniform_real_distribution<NativeT> generator(
-            low.Get<NativeT>({}), high.Get<NativeT>({}));
-
+        // std::uniform_real_distribution(a, b) can sometimes return a value
+        // equal to b.  Unclear if this is a spec bug or an implementation bug
+        // or WAI [0] [1] [2].  Anyway for our purposes we want a half-open
+        // interval, so we have to re-sample if we get `b` out.
+        //
+        // [0] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63176
+        // [1] https://bugs.llvm.org/show_bug.cgi?id=18767
+        // [2] http://open-std.org/JTC1/SC22/WG21/docs/lwg-active.html#2524
+        auto low_val = low.Get<NativeT>({});
+        auto high_val = high.Get<NativeT>({});
+        std::uniform_real_distribution<NativeT> generator(low_val, high_val);
         TF_RETURN_IF_ERROR(
             result.Populate<NativeT>([&](absl::Span<const int64> /*indexes*/) {
-              return generator(parent_->engine_);
+              while (true) {
+                NativeT v = generator(parent_->engine_);
+                if (v != high_val) {
+                  return v;
+                }
+              }
             }));
         break;
       }
@@ -2849,21 +2843,10 @@
       absl::Span<HloInstruction* const> start_indices,
       const Shape& result_shape) {
     std::vector<int64> start;
-    // TODO(b/118437727): Remove the R1 code-path. Note that to distinguish
-    // between the cases, this currently assumes there is at least 1 index. That
-    // is wrong in the general case, because for scalar indices, if the operand
-    // is scalar, then there are no indices. This problem with resolve itself.
-    const HloInstruction* first_index = start_indices[0];
-    if (first_index->shape().rank() == 1) {
-      auto start_indices_typed =
-          parent_->GetEvaluatedLiteralFor(first_index).data<IndexT>();
-      start = std::vector<int64>(start_indices_typed.begin(),
-                                 start_indices_typed.end());
-    } else {
-      for (HloInstruction* index : start_indices) {
-        start.push_back(
-            parent_->GetEvaluatedLiteralFor(index).GetFirstElement<IndexT>());
-      }
+
+    for (HloInstruction* index : start_indices) {
+      start.push_back(
+          parent_->GetEvaluatedLiteralFor(index).GetFirstElement<IndexT>());
     }
 
     // Clamp the start indices so the slice is in-bounds w.r.t the operand.
@@ -2896,22 +2879,11 @@
     auto result = operand_literal.Clone();
     const auto rank = result.shape().rank();
     std::vector<int64> start;
-    // TODO(b/118437727): Remove the R1 code-path. Note that to distinguish
-    // between the cases, this currently assumes there is at least 1 index. That
-    // is wrong in the general case, because for scalar indices, if the operand
-    // is scalar, then there are no indices. This problem with resolve itself.
-    const HloInstruction* first_index = start_indices[0];
-    if (first_index->shape().rank() == 1) {
-      auto start_indices_typed =
-          parent_->GetEvaluatedLiteralFor(first_index).data<IndexT>();
-      start = std::vector<int64>(start_indices_typed.begin(),
-                                 start_indices_typed.end());
-    } else {
-      for (HloInstruction* index : start_indices) {
-        start.push_back(
-            parent_->GetEvaluatedLiteralFor(index).GetFirstElement<IndexT>());
-      }
+    for (HloInstruction* index : start_indices) {
+      start.push_back(
+          parent_->GetEvaluatedLiteralFor(index).GetFirstElement<IndexT>());
     }
+
     // Clamp the update start indices so the slice is in-bounds w.r.t the
     // operand.
     for (int64 i = 0; i < rank; ++i) {

diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 46ee999..49300b3 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc

@@ -38,7 +38,6 @@
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
-#include "tensorflow/compiler/xla/service/hlo_tfgraph_builder.h"
 #include "tensorflow/compiler/xla/service/pattern_matcher.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -536,7 +535,12 @@
     }
   }
 
-  return StrFormat(fmt, graph_label, StrJoin(edge_css_rules, "\n"));
+  // Browsers require that we URI-encode the contents of our data URI.  (It
+  // seems this was a relatively recent change?) In practice, this means that we
+  // need to escape '#'.
+  return StrFormat(
+      fmt, graph_label,
+      absl::StrReplaceAll(StrJoin(edge_css_rules, "\n"), {{"#", "%23"}}));
 }
 
 string HloDotDumper::Footer() { return StrCat(StrJoin(edges_, "\n"), "\n}"); }
@@ -1011,6 +1015,7 @@
     case HloOpcode::kConvolution:
     case HloOpcode::kDot:
     case HloOpcode::kFft:
+    case HloOpcode::kTriangularSolve:
       return kDarkBlue;
     case HloOpcode::kReducePrecision:
       return kRed;
@@ -1281,8 +1286,9 @@
 
 // Gets a NodeFilter that includes roughly all instructions whose distance from
 // root is <= radius.
-NodeFilter MakeNodeRadiusAroundFilter(const HloInstruction* root,
-                                      int64 radius) {
+NodeFilter MakeNodeRadiusAroundFilter(
+    const HloInstruction* root, int64 radius,
+    const absl::flat_hash_set<const HloInstruction*>& boundary) {
   // First, find the neighborhood of nodes with distance from root <= radius.
   // These nodes are our initial set of "normal" nodes.
   absl::flat_hash_map<const HloInstruction*, NodeFilterResult> nodes;
@@ -1298,6 +1304,9 @@
     if (depth == radius) {
       continue;
     }
+    if (boundary.contains(instr)) {
+      continue;
+    }
 
     // Traverse into instr's operands.
     //
@@ -1446,9 +1455,6 @@
     case GraphRendererInterface::DOT_GRAPH:
       file_extension = ".dot";
       break;
-    case GraphRendererInterface::TF_GRAPHDEF:
-      file_extension = ".pbtxt";
-      break;
   }
   string path = JoinPath(dest_path, StrCat("hlo_graph_", output_num++, "."));
   auto status = Status::OK();
@@ -1486,25 +1492,27 @@
 
 }  // namespace
 
+string HloComputationToDotGraph(const HloComputation& computation,
+                                const DotGraphOptions& options) {
+  DebugOptions default_debug_options;
+  return HloDotDumper(&computation, options.label,
+                      options.debug_options ? *options.debug_options
+                                            : default_debug_options,
+                      options.show_backend_config, options.profile,
+                      NodeFilter())
+      .Dump();
+}
+
 string DumpGraph(const HloComputation& computation, const string& label,
                  const DebugOptions& debug_options,
                  const HloExecutionProfile* hlo_execution_profile,
                  bool show_backend_config) {
   GraphRendererInterface::GraphKind graph_kind;
-  string graph;
-  if (debug_options.xla_hlo_dump_as_graphdef()) {
-    HloTfGraphBuilder builder(debug_options);
-    TF_CHECK_OK(builder.AddComputation(computation));
-    CHECK(tensorflow::protobuf::TextFormat::PrintToString(builder.GetGraphDef(),
-                                                          &graph));
-    graph_kind = GraphRendererInterface::TF_GRAPHDEF;
-  } else {
-    graph =
-        HloDotDumper(&computation, label, debug_options, show_backend_config,
-                     hlo_execution_profile, NodeFilter())
-            .Dump();
-    graph_kind = GraphRendererInterface::DOT_GRAPH;
-  }
+  string graph =
+      HloDotDumper(&computation, label, debug_options, show_backend_config,
+                   hlo_execution_profile, NodeFilter())
+          .Dump();
+  graph_kind = GraphRendererInterface::DOT_GRAPH;
 
   string graph_url = ExportGraph(graph, graph_kind, debug_options);
   LOG(INFO) << "computation " << computation.name() << " [" << label
@@ -1512,12 +1520,13 @@
   return graph_url;
 }
 
-string DumpNeighborhoodAround(const HloInstruction& node, int radius,
-                              bool show_backend_config) {
+string DumpNeighborhoodAround(
+    const HloInstruction& node, int radius, bool show_backend_config,
+    const absl::flat_hash_set<const HloInstruction*>& boundary) {
   auto debug_options = node.GetModule()->config().debug_options();
   string label =
       StrCat("Neighborhood of ", radius, " nodes around ", node.name());
-  NodeFilter filter = MakeNodeRadiusAroundFilter(&node, radius);
+  NodeFilter filter = MakeNodeRadiusAroundFilter(&node, radius, boundary);
   string graph =
       HloDotDumper(node.parent(), label, debug_options, show_backend_config,
                    /*profile=*/nullptr, filter)

diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.h b/tensorflow/compiler/xla/service/hlo_graph_dumper.h
index 8e51454..563cea4 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.h
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.h

@@ -26,13 +26,23 @@
 namespace xla {
 namespace hlo_graph_dumper {
 
+// Converts a HLO module to a DOT (graphviz) graph. Returns the dot graph as
+// a string.
+struct DotGraphOptions {
+  absl::string_view label;
+  const DebugOptions* debug_options = nullptr;
+  const HloExecutionProfile* profile = nullptr;
+  bool show_backend_config = false;
+};
+string HloComputationToDotGraph(const HloComputation& computation,
+                                const DotGraphOptions& options);
+
 // Abstract interface for classes that render HLO graphs (e.g. DOT graph,
-// tensorflow GraphDef).
+// tensorflow GraphDef) to files or services.
 class GraphRendererInterface {
  public:
   enum GraphKind {
     DOT_GRAPH,
-    TF_GRAPHDEF,
   };
 
   virtual ~GraphRendererInterface() = default;
@@ -63,8 +73,12 @@
 // The number of nodes dumped is controlled by the radius parameter, which
 // (roughly) corresponds to the max distance a node may be from the primary node
 // before it's omitted from the graph.
-string DumpNeighborhoodAround(const HloInstruction& node, int radius,
-                              bool show_backend_config = false);
+//
+// The optional boundary specifies a set of boundary nodes, beyond which nodes
+// will be omitted even if they are within the radius.
+string DumpNeighborhoodAround(
+    const HloInstruction& node, int radius, bool show_backend_config = false,
+    const absl::flat_hash_set<const HloInstruction*>& boundary = {});
 
 // Dumps nodes on any of the paths from `from` to `to`.  If there are more than
 // max_nodes on all paths, restricts to the max_nodes nodes on the shortest

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index fd9b8ea..33c2270 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc

@@ -132,6 +132,14 @@
                               absl::Span<const int64>(fft_length));
       break;
     }
+    case HloOpcode::kTriangularSolve: {
+      TF_RET_CHECK(proto.operand_ids_size() == 2)
+          << "Triangular solve instruction should have 2 operands but sees "
+          << proto.operand_ids_size();
+      instruction = CreateTriangularSolve(shape, operands(0), operands(1),
+                                          proto.triangular_solve_options());
+      break;
+    }
     case HloOpcode::kSend:
       TF_RET_CHECK(proto.operand_ids_size() == 2)
           << "Send instruction should have 2 operand but sees "
@@ -201,16 +209,12 @@
           << proto.operand_ids_size();
       TF_RET_CHECK(proto.dimensions().size() == 1)
           << "Sort instruction should have 1 dimension";
+      TF_RET_CHECK(proto.called_computation_ids_size() == 1)
+          << "Sort instruction should one called computation but sees "
+          << proto.called_computation_ids_size();
       auto sort_operands = all_operands();
-      HloInstruction* keys = sort_operands[0];
-      if (proto.called_computation_ids_size() == 1) {
-        instruction = CreateSort(shape, proto.dimensions(0), all_operands(),
-                                 computations(0));
-      } else {
-        instruction = CreateSort(
-            shape, proto.dimensions(0), keys,
-            absl::Span<HloInstruction* const>(sort_operands).subspan(1));
-      }
+      instruction = CreateSort(shape, proto.dimensions(0), all_operands(),
+                               computations(0), proto.is_stable());
       break;
     }
     case HloOpcode::kTranspose:
@@ -300,6 +304,10 @@
     case HloOpcode::kParameter:
       instruction =
           CreateParameter(proto.parameter_number(), shape, proto.name());
+      if (!proto.parameter_replication().replicated_at_leaf_buffers().empty()) {
+        instruction->set_parameter_replicated_at_leaf_buffers(
+            proto.parameter_replication().replicated_at_leaf_buffers());
+      }
       break;
     case HloOpcode::kGetTupleElement:
       TF_RET_CHECK(proto.operand_ids_size() == 1)
@@ -795,6 +803,13 @@
                                               fft_length);
 }
 
+/* static */ std::unique_ptr<HloInstruction>
+HloInstruction::CreateTriangularSolve(const Shape& shape, HloInstruction* a,
+                                      HloInstruction* b,
+                                      const TriangularSolveOptions& options) {
+  return absl::make_unique<HloTriangularSolveInstruction>(shape, a, b, options);
+}
+
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateDot(
     const Shape& shape, HloInstruction* lhs, HloInstruction* rhs,
     const DotDimensionNumbers& dimension_numbers,
@@ -1158,16 +1173,11 @@
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateSort(
-    const Shape& shape, int64 dimension, HloInstruction* keys,
-    absl::Span<HloInstruction* const> values) {
-  return absl::make_unique<HloSortInstruction>(shape, dimension, keys, values);
-}
-
-/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateSort(
     const Shape& shape, int64 dimension,
-    absl::Span<HloInstruction* const> operands, HloComputation* compare) {
+    absl::Span<HloInstruction* const> operands, HloComputation* compare,
+    bool is_stable) {
   return absl::make_unique<HloSortInstruction>(shape, dimension, operands,
-                                               compare);
+                                               compare, is_stable);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateFusion(
@@ -1359,6 +1369,7 @@
     case HloOpcode::kDot:
     case HloOpcode::kDomain:
     case HloOpcode::kGetDimensionSize:
+    case HloOpcode::kTriangularSolve:
       clone = CloneWithNewOperandsImpl(shape, new_operands, context);
       break;
     // Unary ops.
@@ -1814,6 +1825,7 @@
     case HloOpcode::kDot:
     case HloOpcode::kDomain:
     case HloOpcode::kGetDimensionSize:
+    case HloOpcode::kTriangularSolve:
       LOG(FATAL) << "Base class impl called for opcode with subclass: "
                  << opcode();
   }
@@ -2259,12 +2271,8 @@
                opcode() == HloOpcode::kAllReduce ||
                opcode() == HloOpcode::kScatter ||
                opcode() == HloOpcode::kSort) {
-      // TODO(b/122298745): Remove this check when Sort has a required
-      // sub-computation.
-      if (!called_computations().empty()) {
-        extra.push_back(
-            StrCat("to_apply=", PrintName(to_apply()->name(), options)));
-      }
+      extra.push_back(
+          StrCat("to_apply=", PrintName(to_apply()->name(), options)));
     } else if (!called_computations().empty()) {
       extra.push_back(StrCat(
           "calls=",
@@ -2300,12 +2308,8 @@
       case HloOpcode::kAllReduce:
       case HloOpcode::kScatter:
       case HloOpcode::kSort:
-        // TODO(b/122298745): Remove this check once sort has a required
-        // sub-computation.
-        if (to_apply() != nullptr) {
-          extra.push_back(
-              StrCat("to_apply=\n", to_apply()->ToString(new_options)));
-        }
+        extra.push_back(
+            StrCat("to_apply=\n", to_apply()->ToString(new_options)));
         break;
       default:
         if (!called_computations().empty()) {
@@ -2609,6 +2613,8 @@
       return visitor->HandleIota(this);
     case HloOpcode::kGetDimensionSize:
       return visitor->HandleGetDimensionSize(this);
+    case HloOpcode::kTriangularSolve:
+      return visitor->HandleTriangularSolve(this);
 
     // These opcodes are not handled here.
     case HloOpcode::kTrace:
@@ -3320,6 +3326,19 @@
   return Cast<HloParameterInstruction>(this)->parameter_number();
 }
 
+void HloInstruction::set_parameter_replicated_at_leaf_buffers(
+    absl::Span<const bool> parameter_replicated_at_leaf_buffers) {
+  return Cast<HloParameterInstruction>(this)
+      ->set_parameter_replicated_at_leaf_buffers(
+          parameter_replicated_at_leaf_buffers);
+}
+
+const absl::optional<std::vector<bool>>&
+HloInstruction::parameter_replicated_at_leaf_buffers() const {
+  return Cast<HloParameterInstruction>(this)
+      ->parameter_replicated_at_leaf_buffers();
+}
+
 int64 HloInstruction::tuple_index() const {
   return Cast<HloGetTupleElementInstruction>(this)->tuple_index();
 }
@@ -3476,4 +3495,8 @@
 const DomainMetadata& HloInstruction::user_side_metadata() const {
   return Cast<HloDomainInstruction>(this)->user_side_metadata();
 }
+
+const TriangularSolveOptions& HloInstruction::triangular_solve_options() const {
+  return Cast<HloTriangularSolveInstruction>(this)->triangular_solve_options();
+}
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 79c6c85..8ac1636 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h

@@ -47,6 +47,7 @@
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_sharding.h"
 #include "tensorflow/compiler/xla/service/name_uniquer.h"
+#include "tensorflow/compiler/xla/shape_tree.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -384,6 +385,14 @@
 
   // Creates a random number generation instruction that fills a shape with
   // random numbers from a given distribution.
+  //
+  // The parameters to the instruction are interpreted as follows:
+  //
+  //  - If `distribution` is RNG_UNIFORM, generates a number in range
+  //    [param0, param1).
+  //
+  //  - If `distribution` is RNG_NORMAL, generates a normally-distributed value
+  //    with mean `param0` and standard deviation `param1`.
   static std::unique_ptr<HloInstruction> CreateRng(
       const Shape& shape, RandomDistribution distribution,
       absl::Span<HloInstruction* const> parameters);
@@ -435,6 +444,10 @@
       const Shape& shape, HloInstruction* operand, FftType fft_type,
       absl::Span<const int64> fft_length);
 
+  static std::unique_ptr<HloInstruction> CreateTriangularSolve(
+      const Shape& shape, HloInstruction* a, HloInstruction* b,
+      const TriangularSolveOptions& options);
+
   // Creates a dot op with operands 'lhs' and 'rhs' with contracting and batch
   // dimensions specified in 'dimension_numbers'.
   static std::unique_ptr<HloInstruction> CreateDot(
@@ -489,7 +502,7 @@
   // Data is sent/received according to the (source_replica_id,
   // target_replica_id) pairs in `source_target_pairs`. If a replica id is not a
   // target_replica_id in any pair, the output on that replica is a tensor
-  // conssits of 0(s) in `shape`.
+  // consists of 0(s) in `shape`.
   static std::unique_ptr<HloInstruction> CreateCollectivePermute(
       const Shape& shape, HloInstruction* operand,
       const std::vector<std::pair<int64, int64>>& source_target_pairs);
@@ -598,7 +611,6 @@
   // f_2 = f(f_1.tuple_element(0), ..., f_1.tuple_element(N), input0.value1,
   // ..., inputN.value1)
   // ...
-  // TODO(b/112040122): Add support to this in HLO passes and in backends.
   static std::unique_ptr<HloInstruction> CreateReduce(
       const Shape& shape, absl::Span<HloInstruction* const> operands,
       absl::Span<HloInstruction* const> init_values,
@@ -671,19 +683,15 @@
       const Shape& shape, HloInstruction* operand,
       absl::Span<const int64> dimensions);
 
-  // Creates a sort op, with a keys operand, and optional values operands.
-  static std::unique_ptr<HloInstruction> CreateSort(
-      const Shape& shape, int64 dimension, HloInstruction* keys,
-      absl::Span<HloInstruction* const> values = {});
-
   // Creates a n-ary sort op with a 'compare' computation which is used for
   // comparisons in the sorting algorithm. 'compare' gets 2 * n parameters,
   // where parameters 2 * i and 2 * i + 1 are the values of the i-th operand at
   // specific index positions which should be compared, and should return a
-  // PRED.
+  // PRED. 'is_stable' specifies whether stable sorting is required.
   static std::unique_ptr<HloInstruction> CreateSort(
       const Shape& shape, int64 dimension,
-      absl::Span<HloInstruction* const> operands, HloComputation* compare);
+      absl::Span<HloInstruction* const> operands, HloComputation* compare,
+      bool is_stable);
 
   // Creates a while instruction, given a condition computation, a body
   // computation, and the initial value for the input of the computations. For
@@ -1251,6 +1259,10 @@
   // on the instruction's existing name.
   void UniquifyName(NameUniquer* name_uniquer);
 
+  // Clear the unique ID of the instruction so that it can be re-assigned, such
+  // as for the purpose of compacting the instruction unique IDs.
+  void ClearUniqueIdInternal() { unique_id_ = -1; }
+
   // Set the unique id for this instruction to "id"
   void SetUniqueId(int id) {
     CHECK_EQ(unique_id_, -1);  // Should not be assigned already
@@ -1284,6 +1296,9 @@
     backend_config_ = std::move(config_str);
   }
 
+  bool is_default_config() const { return is_default_config_; }
+  void set_default_config() { is_default_config_ = true; }
+
   // Returns a string representation of a proto in the format used by
   // raw_backend_config_string.
   //
@@ -1454,6 +1469,15 @@
   // Delegates to HloParameterInstruction::parameter_number.
   int64 parameter_number() const;
 
+  // Delegates to
+  // HloParameterInstruction::set_parameter_replicated_at_leaf_buffers.
+  void set_parameter_replicated_at_leaf_buffers(
+      absl::Span<const bool> parameter_replicated_at_leaf_buffers);
+
+  // Delegates to HloParameterInstruction::parameter_replicated_at_leaf_buffers.
+  const absl::optional<std::vector<bool>>&
+  parameter_replicated_at_leaf_buffers() const;
+
   // Delegates to HloGetTupleElementInstruction::tuple_index.
   int64 tuple_index() const;
 
@@ -1563,6 +1587,9 @@
   // Delegates to HloDomainInstruction::user_side_metadata().
   const DomainMetadata& user_side_metadata() const;
 
+  // Delegates to HloTriangularSolveInstruction::triangular_solve_options().
+  const TriangularSolveOptions& triangular_solve_options() const;
+
   // Old methods kept for smooth subclassing transition END.
 
  protected:
@@ -1729,6 +1756,10 @@
   // HLO. See the documentation on backend_config().
   string backend_config_;
 
+  // This field is assigned to true when backend_config_ is assigned to
+  // a default configuration.
+  bool is_default_config_ = false;
+
   // String identifier for instruction.
   string name_;
 

diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 8e40e54..905a6fe 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc

@@ -201,6 +201,57 @@
                                               fft_length_);
 }
 
+HloTriangularSolveInstruction::HloTriangularSolveInstruction(
+    const Shape& shape, HloInstruction* a, HloInstruction* b,
+    const TriangularSolveOptions& options)
+    : HloInstruction(HloOpcode::kTriangularSolve, shape),
+      triangular_solve_options_(options) {
+  AppendOperand(a);
+  AppendOperand(b);
+}
+
+HloInstructionProto HloTriangularSolveInstruction::ToProto() const {
+  HloInstructionProto proto = HloInstruction::ToProto();
+  *proto.mutable_triangular_solve_options() = triangular_solve_options_;
+  return proto;
+}
+
+std::vector<string> HloTriangularSolveInstruction::ExtraAttributesToStringImpl(
+    const HloPrintOptions& options) const {
+  return {
+      StrCat("left_side=",
+             triangular_solve_options_.left_side() ? "true" : "false"),
+      StrCat("lower=", triangular_solve_options_.lower() ? "true" : "false"),
+      StrCat("unit_diagonal=",
+             triangular_solve_options_.unit_diagonal() ? "true" : "false"),
+      StrCat("transpose_a=", TriangularSolveOptions_Transpose_Name(
+                                 triangular_solve_options_.transpose_a()))};
+}
+
+bool HloTriangularSolveInstruction::IdenticalSlowPath(
+    const HloInstruction& other,
+    const std::function<bool(const HloComputation*, const HloComputation*)>&
+        eq_computations) const {
+  const auto& casted_other =
+      static_cast<const HloTriangularSolveInstruction&>(other);
+  const auto& options = triangular_solve_options();
+  const auto& other_options = casted_other.triangular_solve_options();
+
+  return options.left_side() == other_options.left_side() &&
+         options.lower() == other_options.lower() &&
+         options.unit_diagonal() == other_options.unit_diagonal() &&
+         options.transpose_a() == other_options.transpose_a();
+}
+
+std::unique_ptr<HloInstruction>
+HloTriangularSolveInstruction::CloneWithNewOperandsImpl(
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
+    HloCloneContext* context) const {
+  CHECK_EQ(new_operands.size(), 2);
+  return absl::make_unique<HloTriangularSolveInstruction>(
+      shape, new_operands[0], new_operands[1], triangular_solve_options());
+}
+
 HloSendRecvInstruction::HloSendRecvInstruction(HloOpcode opcode,
                                                const Shape& shape,
                                                int64 channel_id,
@@ -609,20 +660,13 @@
                                                  dimensions(), to_apply());
 }
 
-HloSortInstruction::HloSortInstruction(const Shape& shape, int64 dimension,
-                                       HloInstruction* keys,
-                                       absl::Span<HloInstruction* const> values)
-    : HloInstruction(HloOpcode::kSort, shape), dimensions_({dimension}) {
-  AppendOperand(keys);
-  for (auto* value : values) {
-    AppendOperand(value);
-  }
-}
-
 HloSortInstruction::HloSortInstruction(
     const Shape& shape, int64 dimension,
-    absl::Span<HloInstruction* const> operands, HloComputation* compare)
-    : HloInstruction(HloOpcode::kSort, shape), dimensions_({dimension}) {
+    absl::Span<HloInstruction* const> operands, HloComputation* compare,
+    bool is_stable)
+    : HloInstruction(HloOpcode::kSort, shape),
+      dimensions_({dimension}),
+      is_stable_(is_stable) {
   for (auto* value : operands) {
     AppendOperand(value);
   }
@@ -634,12 +678,18 @@
   for (int64 dimension : dimensions_) {
     proto.add_dimensions(dimension);
   }
+  proto.set_is_stable(is_stable());
   return proto;
 }
 
 std::vector<string> HloSortInstruction::ExtraAttributesToStringImpl(
     const HloPrintOptions& options) const {
-  return {StrCat("dimensions={", StrJoin(dimensions(), ","), "}")};
+  std::vector<string> attrs;
+  attrs.push_back(StrCat("dimensions={", StrJoin(dimensions(), ","), "}"));
+  if (is_stable()) {
+    attrs.push_back("is_stable=true");
+  }
+  return attrs;
 }
 
 bool HloSortInstruction::IdenticalSlowPath(
@@ -650,10 +700,7 @@
   if (dimensions() != casted_other.dimensions()) {
     return false;
   }
-  if (called_computations().empty()) {
-    return other.called_computations().empty();
-  }
-  if (other.called_computations().empty()) {
+  if (is_stable() != casted_other.is_stable()) {
     return false;
   }
   return eq_computations(to_apply(), other.to_apply());
@@ -662,13 +709,8 @@
 std::unique_ptr<HloInstruction> HloSortInstruction::CloneWithNewOperandsImpl(
     const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
-  if (!called_computations().empty()) {
-    return absl::make_unique<HloSortInstruction>(shape, dimensions(0),
-                                                 new_operands, to_apply());
-  }
-  HloInstruction* keys = new_operands[0];
-  return absl::make_unique<HloSortInstruction>(shape, dimensions(0), keys,
-                                               new_operands.subspan(1));
+  return absl::make_unique<HloSortInstruction>(
+      shape, dimensions(0), new_operands, to_apply(), is_stable());
 }
 
 HloTransposeInstruction::HloTransposeInstruction(
@@ -1496,9 +1538,30 @@
 HloInstructionProto HloParameterInstruction::ToProto() const {
   HloInstructionProto proto = HloInstruction::ToProto();
   proto.set_parameter_number(parameter_number_);
+  if (parameter_replicated_at_leaf_buffers_) {
+    for (bool replicated : *parameter_replicated_at_leaf_buffers_) {
+      proto.mutable_parameter_replication()->add_replicated_at_leaf_buffers(
+          replicated);
+    }
+  }
   return proto;
 }
 
+std::vector<string> HloParameterInstruction::ExtraAttributesToStringImpl(
+    const HloPrintOptions& /*options*/) const {
+  std::vector<string> result;
+  if (!parameter_replicated_at_leaf_buffers_) {
+    return result;
+  }
+  std::vector<string> buffers_replicated_strs;
+  for (bool replicated : *parameter_replicated_at_leaf_buffers_) {
+    buffers_replicated_strs.push_back(replicated ? "true" : "false");
+  }
+  result.push_back(StrCat("parameter_replication={",
+                          StrJoin(buffers_replicated_strs, ","), "}"));
+  return result;
+}
+
 string HloParameterInstruction::OperandsToStringWithCanonicalNameMap(
     const HloPrintOptions& options,
     CanonicalNameMap* canonical_name_map) const {
@@ -1986,6 +2049,17 @@
   if (batch_group_count_ != casted_other.batch_group_count_) {
     return false;
   }
+  if (layout_constrained() != casted_other.layout_constrained()) {
+    return false;
+  }
+  if (layout_constrained()) {
+    for (int64 i = 0; i < operand_shapes_with_layout_.size(); ++i) {
+      if (!ShapeUtil::Equal(operand_shapes_with_layout_[i],
+                            casted_other.operand_shapes_with_layout_[i])) {
+        return false;
+      }
+    }
+  }
   return custom_call_target_ == casted_other.custom_call_target_ &&
          opaque_ == casted_other.opaque_;
 }
@@ -1996,6 +2070,10 @@
     HloCloneContext* context) const {
   auto cloned = absl::make_unique<HloCustomCallInstruction>(
       shape, new_operands, custom_call_target(), opaque());
+  if (layout_constrained()) {
+    cloned->layout_constrained_ = true;
+    cloned->operand_shapes_with_layout_ = operand_shapes_with_layout();
+  }
   if (window_ != nullptr) {
     cloned->set_window(*window_);
   }

diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 6b6157a..4d23cb6 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h

@@ -131,6 +131,34 @@
   std::vector<int64> fft_length_;
 };
 
+class HloTriangularSolveInstruction : public HloInstruction {
+ public:
+  explicit HloTriangularSolveInstruction(const Shape& shape, HloInstruction* a,
+                                         HloInstruction* b,
+                                         const TriangularSolveOptions& options);
+  const TriangularSolveOptions& triangular_solve_options() const {
+    return triangular_solve_options_;
+  }
+
+  // Returns a serialized representation of this instruction.
+  HloInstructionProto ToProto() const override;
+
+ private:
+  std::vector<string> ExtraAttributesToStringImpl(
+      const HloPrintOptions& options) const override;
+  bool IdenticalSlowPath(
+      const HloInstruction& other,
+      const std::function<bool(const HloComputation*, const HloComputation*)>&
+          eq_computations) const override;
+
+  // Implementation for non-common logic of CloneWithNewOperands.
+  std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
+      HloCloneContext* context) const override;
+
+  TriangularSolveOptions triangular_solve_options_;
+};
+
 class HloSendRecvInstruction : public HloInstruction {
  public:
   // Returns the channel id associated with the instruction. The id is
@@ -418,11 +446,8 @@
 class HloSortInstruction : public HloInstruction {
  public:
   explicit HloSortInstruction(const Shape& shape, int64 dimension,
-                              HloInstruction* keys,
-                              absl::Span<HloInstruction* const> values = {});
-  explicit HloSortInstruction(const Shape& shape, int64 dimension,
                               absl::Span<HloInstruction* const> operands,
-                              HloComputation* compare);
+                              HloComputation* compare, bool is_stable);
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
@@ -435,6 +460,7 @@
   HloInstruction* mutable_keys() { return mutable_operand(0); }
   // Returns the number of value operands.
   int64 values_count() const { return operand_count() - 1; }
+  bool is_stable() const { return is_stable_; }
 
  private:
   std::vector<string> ExtraAttributesToStringImpl(
@@ -449,6 +475,7 @@
       HloCloneContext* context) const override;
 
   std::vector<int64> dimensions_;
+  bool is_stable_;
 };
 
 class HloTransposeInstruction : public HloInstruction {
@@ -790,10 +817,28 @@
   explicit HloParameterInstruction(int64 parameter_number, const Shape& shape,
                                    const string& name);
   int64 parameter_number() const { return parameter_number_; }
+
+  // Sets and gets the whether all replicas will receive the same parameter data
+  // for each leaf buffer in data parallelism.
+  void set_parameter_replicated_at_leaf_buffers(
+      absl::Span<const bool> parameter_replicated_at_leaf_buffers) {
+    CHECK_EQ(ShapeUtil::GetLeafCount(shape()),
+             parameter_replicated_at_leaf_buffers.size());
+    parameter_replicated_at_leaf_buffers_.emplace(
+        parameter_replicated_at_leaf_buffers.begin(),
+        parameter_replicated_at_leaf_buffers.end());
+  }
+  const absl::optional<std::vector<bool>>&
+  parameter_replicated_at_leaf_buffers() const {
+    return parameter_replicated_at_leaf_buffers_;
+  }
+
   // Returns a serialized representation of this instruction.
   HloInstructionProto ToProto() const override;
 
  private:
+  std::vector<string> ExtraAttributesToStringImpl(
+      const HloPrintOptions& options) const override;
   bool IdenticalSlowPath(
       const HloInstruction& other,
       const std::function<bool(const HloComputation*, const HloComputation*)>&
@@ -807,6 +852,10 @@
       HloCloneContext* context) const override;
 
   int64 parameter_number_ = 0;
+
+  // Specifies whether each buffer has the same parameter value on all replicas
+  // in data parallelism.
+  absl::optional<std::vector<bool>> parameter_replicated_at_leaf_buffers_;
 };
 
 class HloGetTupleElementInstruction : public HloInstruction {
@@ -906,9 +955,7 @@
                                  HloInstruction* token_operand,
                                  absl::string_view outfeed_config);
   // Returns the shape for the Outfeed instruction.
-  const Shape& outfeed_shape() const {
-    return outfeed_shape_;
-  }
+  const Shape& outfeed_shape() const { return outfeed_shape_; }
   // Returns the config for the Outfeed instruction.
   const string& outfeed_config() const { return outfeed_config_; }
   // Returns a serialized representation of this instruction.

diff --git a/tensorflow/compiler/xla/service/hlo_lexer.cc b/tensorflow/compiler/xla/service/hlo_lexer.cc
index c1a642d..2255383 100644
--- a/tensorflow/compiler/xla/service/hlo_lexer.cc
+++ b/tensorflow/compiler/xla/service/hlo_lexer.cc

@@ -153,6 +153,8 @@
         return LexPercent();
       case ':':
         return TokKind::kColon;
+      case '*':
+        return TokKind::kAsterisk;
       case '[':
         return TokKind::kLsquare;
       case ']':
@@ -464,6 +466,8 @@
       return "kComma";
     case TokKind::kColon:
       return "kColon";
+    case TokKind::kAsterisk:
+      return "kAsterisk";
     case TokKind::kLsquare:
       return "kLsquare";
     case TokKind::kRsquare:

diff --git a/tensorflow/compiler/xla/service/hlo_lexer.h b/tensorflow/compiler/xla/service/hlo_lexer.h
index 16eed21..383fb4e 100644
--- a/tensorflow/compiler/xla/service/hlo_lexer.h
+++ b/tensorflow/compiler/xla/service/hlo_lexer.h

@@ -38,9 +38,10 @@
   kError,
 
   // Tokens with no info.
-  kEqual,  // =
-  kComma,  // ,
-  kColon,  // :
+  kEqual,     // =
+  kComma,     // ,
+  kColon,     // :
+  kAsterisk,  // *
   kLsquare,
   kRsquare,  // [  ]
   kLbrace,
@@ -108,7 +109,7 @@
         LOG(FATAL) << "This token does not have string value";
     }
   }
-  tensorflow::int64 GetInt64Val() const {
+  int64 GetInt64Val() const {
     CHECK(GetKind() == TokKind::kInt);
     return token_state_.int64_val;
   }
@@ -171,7 +172,7 @@
     const char* token_start = nullptr;
     TokKind current_kind;
     string str_val;
-    tensorflow::int64 int64_val;
+    int64 int64_val;
     double decimal_val;
     PrimitiveType primitive_type_val;
   };

diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 759ce65..8322870 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc

@@ -246,6 +246,34 @@
   return proto;
 }
 
+Status HloModule::CheckUniqueNamesAndIdsForComputationsAndInstructions() const {
+  absl::flat_hash_set<string> computation_names;
+  absl::flat_hash_set<int> computation_ids;
+  absl::flat_hash_set<string> instruction_names;
+  absl::flat_hash_set<int> instruction_ids;
+
+  for (const HloComputation* computation : computations()) {
+    TF_RET_CHECK(!ContainsKey(computation_names, computation->name()))
+        << "Computation name is not unique: " << computation->name();
+    computation_names.insert(computation->name());
+
+    TF_RET_CHECK(!ContainsKey(computation_ids, computation->unique_id()))
+        << "Computation id is not unique: " << computation->unique_id();
+    computation_ids.insert(computation->unique_id());
+
+    for (const HloInstruction* instruction : computation->instructions()) {
+      TF_RET_CHECK(!ContainsKey(instruction_names, instruction->name()))
+          << "Instruction name is not unique: " << instruction->name();
+      instruction_names.insert(instruction->name());
+
+      TF_RET_CHECK(!ContainsKey(instruction_ids, instruction->unique_id()))
+          << "Instruction id is not unique: " << instruction->unique_id();
+      instruction_ids.insert(instruction->unique_id());
+    }
+  }
+  return Status::OK();
+}
+
 /* static */
 StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
     const HloModuleProto& proto, const HloModuleConfig& module_config) {
@@ -329,28 +357,8 @@
                       DynamicParameterBinding::CreateFromProto(
                           proto.dynamic_parameter_binding()));
 
-  absl::flat_hash_set<string> computation_names;
-  absl::flat_hash_set<string> instruction_names;
-  absl::flat_hash_set<int> computation_ids;
-  absl::flat_hash_set<int> instruction_ids;
-  for (HloComputation* computation : module->computations()) {
-    TF_RET_CHECK(!ContainsKey(computation_names, computation->name()))
-        << "Computation name is not unique: " << computation->name();
-    computation_names.insert(computation->name());
-
-    TF_RET_CHECK(!ContainsKey(computation_ids, computation->unique_id()))
-        << "Computation id is not unique: " << computation->unique_id();
-    computation_ids.insert(computation->unique_id());
-    for (HloInstruction* instruction : computation->instructions()) {
-      TF_RET_CHECK(!ContainsKey(instruction_names, instruction->name()))
-          << "Instruction name is not unique: " << instruction->name();
-      instruction_names.insert(instruction->name());
-
-      TF_RET_CHECK(!ContainsKey(instruction_ids, instruction->unique_id()))
-          << "Instruction id is not unique: " << instruction->unique_id();
-      instruction_ids.insert(instruction->unique_id());
-    }
-  }
+  TF_RETURN_IF_ERROR(
+      module->CheckUniqueNamesAndIdsForComputationsAndInstructions());
 
   if (proto.has_schedule()) {
     TF_ASSIGN_OR_RETURN(

diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index f1310e4..b6fe6a5 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h

@@ -187,6 +187,7 @@
   std::vector<HloComputation*> MakeNonfusionComputations() const;
 
   const HloModuleConfig& config() const { return config_; }
+  void set_config(HloModuleConfig& config) { config_ = config; }
 
   // Return a string representation of the module.
   //
@@ -264,6 +265,18 @@
   const HloSchedule& schedule() const { return *schedule_; }
   HloSchedule& schedule() { return *schedule_; }
 
+  HloComputation* AddComputationAndUnifyNamesAndIds(
+      std::unique_ptr<HloComputation> computation, bool is_entry) {
+    computation->ClearUniqueIdInternal();
+    for (auto* instruction : computation->instructions()) {
+      instruction->ClearUniqueIdInternal();
+    }
+    return AddComputationInternal(std::move(computation), is_entry,
+                                  /*uniquify_identifiers=*/true);
+  }
+
+  Status CheckUniqueNamesAndIdsForComputationsAndInstructions() const;
+
  private:
   HloComputation* AddComputationInternal(
       std::unique_ptr<HloComputation> computation, bool is_entry,

diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
index 47734bc..b877081 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc

@@ -389,9 +389,10 @@
                instruction1->opcode() == HloOpcode::kCall);
   VLOG(2) << "adding as companions:" << instruction1->ToString() << " and "
           << instruction2->ToString();
-
-  if (!ContainsKey(companion_set_index_, instruction1) &&
-      !ContainsKey(companion_set_index_, instruction2)) {
+  if (instruction1 == instruction2) {
+    return Status::OK();
+  } else if (!ContainsKey(companion_set_index_, instruction1) &&
+             !ContainsKey(companion_set_index_, instruction2)) {
     companion_sets_.push_back(
         absl::make_unique<std::vector<HloInstruction*>>());
     auto companion_set = companion_sets_.back().get();
@@ -419,7 +420,10 @@
     for (HloInstruction* hlo : Companions(instruction2)) {
       companion_set_index_[hlo] = companion_set_index_[instruction1];
     }
-    companion_sets_.erase(companion_sets_.begin() + index_to_remove);
+    // We can't remove the set from the vector because companion_set_index_
+    // references sets by their index in this vector, so we reset to nullptr
+    // instead.
+    companion_sets_[index_to_remove].reset(nullptr);
   }
   return Status::OK();
 }

diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
index 3ed95c1..84f7f2f 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h

@@ -173,7 +173,8 @@
   // Returns the number of modules for devices (excluding the host module).
   int64 GetDeviceModulesCount() const;
 
-  // Returns the companion instructions for the given instruction.
+  // Returns the companion set for the given instruction, including the
+  // instruction itself.
   //
   // Precondition: IsCompanionWhile(instruction) is true.
   const std::vector<HloInstruction*>& Companions(

diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h
index bf9b3c8..35626ba 100644
--- a/tensorflow/compiler/xla/service/hlo_opcode.h
+++ b/tensorflow/compiler/xla/service/hlo_opcode.h

@@ -137,6 +137,7 @@
   V(kTanh, "tanh")                                           \
   V(kTrace, "trace")                                         \
   V(kTranspose, "transpose")                                 \
+  V(kTriangularSolve, "triangular-solve")                    \
   V(kTuple, "tuple", kHloOpcodeIsVariadic)                   \
   V(kTupleSelect, "tuple-select")                            \
   V(kWhile, "while")                                         \

diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index b3b24c1..f448571 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc

@@ -63,8 +63,7 @@
 
 // Some functions accept either a linear index or a multi-dimensional index
 // (used for indexing into sparse literals).
-using LinearOrMultiIndex =
-    absl::variant<tensorflow::int64, absl::Span<const int64>>;
+using LinearOrMultiIndex = absl::variant<int64, absl::Span<const int64>>;
 
 // Parser for the HloModule::ToString() format text.
 class HloParser {
@@ -83,6 +82,7 @@
   // Stand alone parsing utils for various aggregate data types.
   StatusOr<Shape> ParseShapeOnly();
   StatusOr<HloSharding> ParseShardingOnly();
+  StatusOr<std::vector<bool>> ParseParameterReplicationOnly();
   StatusOr<Window> ParseWindowOnly();
   StatusOr<ConvolutionDimensionNumbers> ParseConvolutionDimensionNumbersOnly();
   StatusOr<PaddingConfig> ParsePaddingConfigOnly();
@@ -129,8 +129,8 @@
   // given value. If the literal is dense, it myst have the default layout.
   //
   // `loc` should be the source location of the value.
-  bool SetValueInLiteral(LocTy loc, tensorflow::int64 value,
-                         LinearOrMultiIndex index, Literal* literal);
+  bool SetValueInLiteral(LocTy loc, int64 value, LinearOrMultiIndex index,
+                         Literal* literal);
   bool SetValueInLiteral(LocTy loc, double value, LinearOrMultiIndex index,
                          Literal* literal);
   bool SetValueInLiteral(LocTy loc, bool value, LinearOrMultiIndex index,
@@ -158,9 +158,9 @@
   // Describes the start, limit, and stride on every dimension of the operand
   // being sliced.
   struct SliceRanges {
-    std::vector<tensorflow::int64> starts;
-    std::vector<tensorflow::int64> limits;
-    std::vector<tensorflow::int64> strides;
+    std::vector<int64> starts;
+    std::vector<int64> limits;
+    std::vector<int64> strides;
   };
 
   // The data parsed for the kDomain instruction.
@@ -180,9 +180,11 @@
     kBracedInt64ListList,
     kHloComputation,
     kFftType,
+    kTriangularSolveTranspose,
     kWindow,
     kConvolutionDimensionNumbers,
     kSharding,
+    kParameterReplication,
     kInstructionList,
     kSliceRanges,
     kPaddingConfig,
@@ -247,21 +249,21 @@
   bool ParseMetadata(OpMetadata* metadata);
   bool ParseSharding(OpSharding* sharding);
   bool ParseSingleSharding(OpSharding* sharding, bool lbrace_pre_lexed);
+  bool ParseParameterReplication(ParameterReplication* parameter_replication);
 
   // Parses the metadata behind a kDOmain instruction.
   bool ParseDomain(DomainData* domain);
 
   // Parses a sub-attribute of the window attribute, e.g.,size=1x2x3.
-  bool ParseDxD(const string& name, std::vector<tensorflow::int64>* result);
+  bool ParseDxD(const string& name, std::vector<int64>* result);
   // Parses window's pad sub-attriute, e.g., pad=0_0x3x3.
-  bool ParseWindowPad(std::vector<std::vector<tensorflow::int64>>* pad);
+  bool ParseWindowPad(std::vector<std::vector<int64>>* pad);
 
   bool ParseSliceRanges(SliceRanges* result);
   bool ParsePrecisionList(std::vector<PrecisionConfig::Precision>* result);
   bool ParseShapeList(std::vector<Shape>* result);
   bool ParseInt64List(const TokKind start, const TokKind end,
-                      const TokKind delim,
-                      std::vector<tensorflow::int64>* result);
+                      const TokKind delim, std::vector<int64>* result);
   // 'parse_and_add_item' is an lambda to parse an element in the list and add
   // the parsed element to the result. It's supposed to capture the result.
   bool ParseList(const TokKind start, const TokKind end, const TokKind delim,
@@ -276,12 +278,14 @@
                            std::vector<bool>* dynamic_dimensions);
   bool ParseShape(Shape* result);
   bool ParseLayout(Layout* layout);
+  bool ParseTiles(std::vector<Tile>* tiles);
   bool ParseOpcode(HloOpcode* result);
   bool ParseFftType(FftType* result);
+  bool ParseTriangularSolveTranspose(TriangularSolveOptions::Transpose* result);
   bool ParseFusionKind(HloInstruction::FusionKind* result);
   bool ParseRandomDistribution(RandomDistribution* result);
   bool ParsePrecision(PrecisionConfig::Precision* result);
-  bool ParseInt64(tensorflow::int64* result);
+  bool ParseInt64(int64* result);
   bool ParseDouble(double* result);
   bool ParseComplex(std::complex<double>* result);
   bool ParseBool(bool* result);
@@ -643,6 +647,10 @@
   std::unordered_map<string, AttrConfig> attrs;
   optional<OpSharding> sharding;
   attrs["sharding"] = {/*required=*/false, AttrTy::kSharding, &sharding};
+  optional<ParameterReplication> parameter_replication;
+  attrs["parameter_replication"] = {/*required=*/false,
+                                    AttrTy::kParameterReplication,
+                                    &parameter_replication};
   optional<std::vector<HloInstruction*>> predecessors;
   attrs["control-predecessors"] = {/*required=*/false, AttrTy::kInstructionList,
                                    &predecessors};
@@ -656,7 +664,7 @@
   HloInstruction* instruction;
   switch (opcode) {
     case HloOpcode::kParameter: {
-      tensorflow::int64 parameter_number;
+      int64 parameter_number;
       if (!ParseToken(TokKind::kLparen,
                       "expects '(' before parameter number") ||
           !ParseInt64(&parameter_number)) {
@@ -688,7 +696,7 @@
       break;
     }
     case HloOpcode::kIota: {
-      optional<tensorflow::int64> iota_dimension;
+      optional<int64> iota_dimension;
       attrs["iota_dimension"] = {/*required=*/true, AttrTy::kInt64,
                                  &iota_dimension};
       if (!ParseOperands(&operands, /*expected_size=*/0) ||
@@ -891,26 +899,21 @@
       break;
     }
     case HloOpcode::kSort: {
-      optional<std::vector<tensorflow::int64>> dimensions;
+      optional<std::vector<int64>> dimensions;
       attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
                              &dimensions};
+      optional<bool> is_stable = false;
+      attrs["is_stable"] = {/*required=*/false, AttrTy::kBool, &is_stable};
       optional<HloComputation*> to_apply;
-      // TODO(b/122298745): Make this required.
-      attrs["to_apply"] = {/*required=*/false, AttrTy::kHloComputation,
+      attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation,
                            &to_apply};
       if (!ParseOperands(&operands) || !ParseAttributes(attrs) ||
           dimensions->size() != 1) {
         return false;
       }
-      if (to_apply.has_value()) {
-        instruction = builder->AddInstruction(HloInstruction::CreateSort(
-            shape, dimensions->at(0), operands, to_apply.value()));
-      } else {
-        instruction = builder->AddInstruction(HloInstruction::CreateSort(
-            shape, dimensions->at(0),
-            /*keys=*/operands[0],
-            /*values=*/absl::Span<HloInstruction* const>(operands).subspan(1)));
-      }
+      instruction = builder->AddInstruction(
+          HloInstruction::CreateSort(shape, dimensions->at(0), operands,
+                                     to_apply.value(), is_stable.value()));
       break;
     }
     case HloOpcode::kTuple: {
@@ -936,7 +939,7 @@
       break;
     }
     case HloOpcode::kRecv: {
-      optional<tensorflow::int64> channel_id;
+      optional<int64> channel_id;
       // If the is_host_transfer attribute is not present then default to false.
       optional<bool> is_host_transfer = false;
       attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id};
@@ -952,7 +955,7 @@
       break;
     }
     case HloOpcode::kRecvDone: {
-      optional<tensorflow::int64> channel_id;
+      optional<int64> channel_id;
       // If the is_host_transfer attribute is not present then default to false.
       optional<bool> is_host_transfer = false;
       attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id};
@@ -970,7 +973,7 @@
       break;
     }
     case HloOpcode::kSend: {
-      optional<tensorflow::int64> channel_id;
+      optional<int64> channel_id;
       // If the is_host_transfer attribute is not present then default to false.
       optional<bool> is_host_transfer = false;
       attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id};
@@ -985,7 +988,7 @@
       break;
     }
     case HloOpcode::kSendDone: {
-      optional<tensorflow::int64> channel_id;
+      optional<int64> channel_id;
       // If the is_host_transfer attribute is not present then default to false.
       optional<bool> is_host_transfer = false;
       attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id};
@@ -1003,7 +1006,7 @@
       break;
     }
     case HloOpcode::kGetTupleElement: {
-      optional<tensorflow::int64> index;
+      optional<int64> index;
       attrs["index"] = {/*required=*/true, AttrTy::kInt64, &index};
       if (!ParseOperands(&operands, /*expected_size=*/1) ||
           !ParseAttributes(attrs)) {
@@ -1086,7 +1089,7 @@
     }
     case HloOpcode::kFft: {
       optional<FftType> fft_type;
-      optional<std::vector<tensorflow::int64>> fft_length;
+      optional<std::vector<int64>> fft_length;
       attrs["fft_type"] = {/*required=*/true, AttrTy::kFftType, &fft_type};
       attrs["fft_length"] = {/*required=*/true, AttrTy::kBracedInt64List,
                              &fft_length};
@@ -1098,8 +1101,40 @@
           shape, operands[0], *fft_type, *fft_length));
       break;
     }
+    case HloOpcode::kTriangularSolve: {
+      optional<bool> left_side;
+      optional<bool> lower;
+      optional<bool> unit_diagonal;
+      optional<TriangularSolveOptions::Transpose> transpose_a;
+      attrs["left_side"] = {/*required=*/false, AttrTy::kBool, &left_side};
+      attrs["lower"] = {/*required=*/false, AttrTy::kBool, &lower};
+      attrs["unit_diagonal"] = {/*required=*/false, AttrTy::kBool,
+                                &unit_diagonal};
+      attrs["transpose_a"] = {/*required=*/false,
+                              AttrTy::kTriangularSolveTranspose, &transpose_a};
+      if (!ParseOperands(&operands, /*expected_size=*/2) ||
+          !ParseAttributes(attrs)) {
+        return false;
+      }
+      TriangularSolveOptions options;
+      if (left_side) {
+        options.set_left_side(*left_side);
+      }
+      if (lower) {
+        options.set_lower(*lower);
+      }
+      if (unit_diagonal) {
+        options.set_unit_diagonal(*unit_diagonal);
+      }
+      options.set_transpose_a(
+          transpose_a ? *transpose_a : TriangularSolveOptions::NO_TRANSPOSE);
+      instruction =
+          builder->AddInstruction(HloInstruction::CreateTriangularSolve(
+              shape, operands[0], operands[1], options));
+      break;
+    }
     case HloOpcode::kBroadcast: {
-      optional<std::vector<tensorflow::int64>> broadcast_dimensions;
+      optional<std::vector<int64>> broadcast_dimensions;
       attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
                              &broadcast_dimensions};
       if (!ParseOperands(&operands, /*expected_size=*/1) ||
@@ -1111,7 +1146,7 @@
       break;
     }
     case HloOpcode::kConcatenate: {
-      optional<std::vector<tensorflow::int64>> dimensions;
+      optional<std::vector<int64>> dimensions;
       attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
                              &dimensions};
       if (!ParseOperands(&operands) || !ParseAttributes(attrs) ||
@@ -1126,7 +1161,7 @@
       optional<HloComputation*> to_apply;
       attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation,
                            &to_apply};
-      optional<std::vector<tensorflow::int64>> dimensions;
+      optional<std::vector<int64>> dimensions;
       attrs["dimensions"] = {/*required=*/false, AttrTy::kBracedInt64List,
                              &dimensions};
       if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
@@ -1142,7 +1177,7 @@
       optional<HloComputation*> reduce_computation;
       attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation,
                            &reduce_computation};
-      optional<std::vector<tensorflow::int64>> dimensions_to_reduce;
+      optional<std::vector<int64>> dimensions_to_reduce;
       attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
                              &dimensions_to_reduce};
       if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
@@ -1163,7 +1198,7 @@
       break;
     }
     case HloOpcode::kReverse: {
-      optional<std::vector<tensorflow::int64>> dimensions;
+      optional<std::vector<int64>> dimensions;
       attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
                              &dimensions};
       if (!ParseOperands(&operands, /*expected_size=*/1) ||
@@ -1207,7 +1242,7 @@
       break;
     }
     case HloOpcode::kDynamicSlice: {
-      optional<std::vector<tensorflow::int64>> dynamic_slice_sizes;
+      optional<std::vector<int64>> dynamic_slice_sizes;
       attrs["dynamic_slice_sizes"] = {
           /*required=*/true, AttrTy::kBracedInt64List, &dynamic_slice_sizes};
       LocTy loc = lexer_.GetLoc();
@@ -1246,7 +1281,7 @@
       break;
     }
     case HloOpcode::kTranspose: {
-      optional<std::vector<tensorflow::int64>> dimensions;
+      optional<std::vector<int64>> dimensions;
       attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
                              &dimensions};
       if (!ParseOperands(&operands, /*expected_size=*/1) ||
@@ -1260,7 +1295,7 @@
     case HloOpcode::kBatchNormTraining: {
       optional<float> epsilon;
       attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon};
-      optional<tensorflow::int64> feature_index;
+      optional<int64> feature_index;
       attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64,
                                 &feature_index};
       if (!ParseOperands(&operands, /*expected_size=*/3) ||
@@ -1276,7 +1311,7 @@
     case HloOpcode::kBatchNormInference: {
       optional<float> epsilon;
       attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon};
-      optional<tensorflow::int64> feature_index;
+      optional<int64> feature_index;
       attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64,
                                 &feature_index};
       if (!ParseOperands(&operands, /*expected_size=*/5) ||
@@ -1293,7 +1328,7 @@
     case HloOpcode::kBatchNormGrad: {
       optional<float> epsilon;
       attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon};
-      optional<tensorflow::int64> feature_index;
+      optional<int64> feature_index;
       attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64,
                                 &feature_index};
       if (!ParseOperands(&operands, /*expected_size=*/5) ||
@@ -1375,8 +1410,8 @@
       break;
     }
     case HloOpcode::kReducePrecision: {
-      optional<tensorflow::int64> exponent_bits;
-      optional<tensorflow::int64> mantissa_bits;
+      optional<int64> exponent_bits;
+      optional<int64> mantissa_bits;
       attrs["exponent_bits"] = {/*required=*/true, AttrTy::kInt64,
                                 &exponent_bits};
       attrs["mantissa_bits"] = {/*required=*/true, AttrTy::kInt64,
@@ -1487,16 +1522,16 @@
       break;
     }
     case HloOpcode::kDot: {
-      optional<std::vector<tensorflow::int64>> lhs_contracting_dims;
+      optional<std::vector<int64>> lhs_contracting_dims;
       attrs["lhs_contracting_dims"] = {
           /*required=*/false, AttrTy::kBracedInt64List, &lhs_contracting_dims};
-      optional<std::vector<tensorflow::int64>> rhs_contracting_dims;
+      optional<std::vector<int64>> rhs_contracting_dims;
       attrs["rhs_contracting_dims"] = {
           /*required=*/false, AttrTy::kBracedInt64List, &rhs_contracting_dims};
-      optional<std::vector<tensorflow::int64>> lhs_batch_dims;
+      optional<std::vector<int64>> lhs_batch_dims;
       attrs["lhs_batch_dims"] = {/*required=*/false, AttrTy::kBracedInt64List,
                                  &lhs_batch_dims};
-      optional<std::vector<tensorflow::int64>> rhs_batch_dims;
+      optional<std::vector<int64>> rhs_batch_dims;
       attrs["rhs_batch_dims"] = {/*required=*/false, AttrTy::kBracedInt64List,
                                  &rhs_batch_dims};
       optional<std::vector<PrecisionConfig::Precision>> operand_precision;
@@ -1540,19 +1575,19 @@
       break;
     }
     case HloOpcode::kGather: {
-      optional<std::vector<tensorflow::int64>> offset_dims;
+      optional<std::vector<int64>> offset_dims;
       attrs["offset_dims"] = {/*required=*/true, AttrTy::kBracedInt64List,
                               &offset_dims};
-      optional<std::vector<tensorflow::int64>> collapsed_slice_dims;
+      optional<std::vector<int64>> collapsed_slice_dims;
       attrs["collapsed_slice_dims"] = {
           /*required=*/true, AttrTy::kBracedInt64List, &collapsed_slice_dims};
-      optional<std::vector<tensorflow::int64>> start_index_map;
+      optional<std::vector<int64>> start_index_map;
       attrs["start_index_map"] = {/*required=*/true, AttrTy::kBracedInt64List,
                                   &start_index_map};
-      optional<tensorflow::int64> index_vector_dim;
+      optional<int64> index_vector_dim;
       attrs["index_vector_dim"] = {/*required=*/true, AttrTy::kInt64,
                                    &index_vector_dim};
-      optional<std::vector<tensorflow::int64>> slice_sizes;
+      optional<std::vector<int64>> slice_sizes;
       attrs["slice_sizes"] = {/*required=*/true, AttrTy::kBracedInt64List,
                               &slice_sizes};
 
@@ -1574,17 +1609,17 @@
       break;
     }
     case HloOpcode::kScatter: {
-      optional<std::vector<tensorflow::int64>> update_window_dims;
+      optional<std::vector<int64>> update_window_dims;
       attrs["update_window_dims"] = {
           /*required=*/true, AttrTy::kBracedInt64List, &update_window_dims};
-      optional<std::vector<tensorflow::int64>> inserted_window_dims;
+      optional<std::vector<int64>> inserted_window_dims;
       attrs["inserted_window_dims"] = {
           /*required=*/true, AttrTy::kBracedInt64List, &inserted_window_dims};
-      optional<std::vector<tensorflow::int64>> scatter_dims_to_operand_dims;
+      optional<std::vector<int64>> scatter_dims_to_operand_dims;
       attrs["scatter_dims_to_operand_dims"] = {/*required=*/true,
                                                AttrTy::kBracedInt64List,
                                                &scatter_dims_to_operand_dims};
-      optional<tensorflow::int64> index_vector_dim;
+      optional<int64> index_vector_dim;
       attrs["index_vector_dim"] = {/*required=*/true, AttrTy::kInt64,
                                    &index_vector_dim};
 
@@ -1625,7 +1660,7 @@
       return TokenError(StrCat("parsing not yet implemented for op: ",
                                HloOpcodeString(opcode)));
     case HloOpcode::kGetDimensionSize:
-      optional<std::vector<tensorflow::int64>> dimensions;
+      optional<std::vector<int64>> dimensions;
       attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
                              &dimensions};
       if (!ParseOperands(&operands, /*expected_size=*/1) ||
@@ -1650,6 +1685,18 @@
     instruction->set_sharding(
         HloSharding::FromProto(sharding.value()).ValueOrDie());
   }
+  if (parameter_replication) {
+    int leaf_count = ShapeUtil::GetLeafCount(instruction->shape());
+    const auto& replicated =
+        parameter_replication->replicated_at_leaf_buffers();
+    if (leaf_count != replicated.size()) {
+      return Error(lexer_.GetLoc(),
+                   StrCat("parameter has ", leaf_count,
+                          " leaf buffers, but parameter_replication has ",
+                          replicated.size(), " elements."));
+    }
+    instruction->set_parameter_replicated_at_leaf_buffers(replicated);
+  }
   if (predecessors) {
     for (auto* pre : *predecessors) {
       Status status = pre->AddControlDependencyTo(instruction);
@@ -1714,8 +1761,8 @@
   LocTy loc = lexer_.GetLoc();
   bool maximal = false;
   bool replicated = false;
-  std::vector<tensorflow::int64> devices;
-  std::vector<tensorflow::int64> tile_assignment_dimensions;
+  std::vector<int64> devices;
+  std::vector<int64> tile_assignment_dimensions;
   while (lexer_.GetKind() != TokKind::kRbrace) {
     switch (lexer_.GetKind()) {
       case TokKind::kw_maximal:
@@ -1741,7 +1788,7 @@
           }
 
           do {
-            tensorflow::int64 dim;
+            int64 dim;
             if (!ParseInt64(&dim)) {
               return false;
             }
@@ -1753,7 +1800,7 @@
             return false;
           }
           do {
-            tensorflow::int64 device;
+            int64 device;
             if (!ParseInt64(&device)) {
               return false;
             }
@@ -1797,10 +1844,10 @@
           "dimensions");
     }
     sharding->set_type(OpSharding::Type::OpSharding_Type_OTHER);
-    for (tensorflow::int64 dim : tile_assignment_dimensions) {
+    for (int64 dim : tile_assignment_dimensions) {
       sharding->add_tile_assignment_dimensions(dim);
     }
-    for (tensorflow::int64 device : devices) {
+    for (int64 device : devices) {
       sharding->add_tile_assignment_devices(device);
     }
   }
@@ -1809,6 +1856,32 @@
   return true;
 }
 
+// parameter_replication ::=
+//   '{' ('true' | 'false')* (',' ('true' | 'false'))*  '}'
+bool HloParser::ParseParameterReplication(
+    ParameterReplication* parameter_replication) {
+  if (!ParseToken(TokKind::kLbrace,
+                  "expected '{' to start parameter_replication attribute")) {
+    return false;
+  }
+
+  if (lexer_.GetKind() != TokKind::kRbrace) {
+    do {
+      if (lexer_.GetKind() == TokKind::kw_true) {
+        parameter_replication->add_replicated_at_leaf_buffers(true);
+      } else if (lexer_.GetKind() == TokKind::kw_false) {
+        parameter_replication->add_replicated_at_leaf_buffers(false);
+      } else {
+        return false;
+      }
+      lexer_.Lex();
+    } while (EatIfPresent(TokKind::kComma));
+  }
+
+  return ParseToken(TokKind::kRbrace,
+                    "expected '}' to end parameter_replication attribute");
+}
+
 // domain ::= '{' 'kind=' domain_kind ',' 'entry=' entry_sharding ','
 //            'exit=' exit_sharding '}'
 bool HloParser::ParseDomain(DomainData* domain) {
@@ -1861,22 +1934,18 @@
                     "expects '}' at the end of instruction name list");
 }
 
-bool HloParser::SetValueInLiteral(LocTy loc, tensorflow::int64 value,
+bool HloParser::SetValueInLiteral(LocTy loc, int64 value,
                                   LinearOrMultiIndex index, Literal* literal) {
   const Shape& shape = literal->shape();
   switch (shape.element_type()) {
     case S8:
-      return SetValueInLiteralHelper<tensorflow::int8>(loc, value, index,
-                                                       literal);
+      return SetValueInLiteralHelper<int8>(loc, value, index, literal);
     case S16:
-      return SetValueInLiteralHelper<tensorflow::int16>(loc, value, index,
-                                                        literal);
+      return SetValueInLiteralHelper<int16>(loc, value, index, literal);
     case S32:
-      return SetValueInLiteralHelper<tensorflow::int32>(loc, value, index,
-                                                        literal);
+      return SetValueInLiteralHelper<int32>(loc, value, index, literal);
     case S64:
-      return SetValueInLiteralHelper<tensorflow::int64>(loc, value, index,
-                                                        literal);
+      return SetValueInLiteralHelper<int64>(loc, value, index, literal);
     case U8:
       return SetValueInLiteralHelper<tensorflow::uint8>(loc, value, index,
                                                         literal);
@@ -1964,7 +2033,7 @@
   }
 
   // Check that the index is in range and assign into the literal
-  if (auto* linear_index = absl::get_if<tensorflow::int64>(&index)) {
+  if (auto* linear_index = absl::get_if<int64>(&index)) {
     if (*linear_index >= ShapeUtil::ElementsIn(literal->shape())) {
       return Error(loc, StrCat("trys to set value ", StringifyValue(value),
                                " to a literal in shape ",
@@ -2069,8 +2138,8 @@
   // Create a literal with the given shape in default layout.
   *literal = LiteralUtil::CreateFromDimensions(
       shape.element_type(), AsInt64Slice(shape.dimensions()));
-  tensorflow::int64 nest_level = 0;
-  tensorflow::int64 linear_index = 0;
+  int64 nest_level = 0;
+  int64 linear_index = 0;
   // elems_seen_per_dim[i] is how many elements or sub-arrays we have seen for
   // the dimension i. For example, to parse f32[2,3] {{1, 2, 3}, {4, 5, 6}},
   // when we are parsing the 2nd '{' (right before '1'), we are seeing a
@@ -2078,13 +2147,13 @@
   // the first '}' (right after '3'), it means the sub-array ends, and the
   // sub-array is supposed to contain exactly 3 elements, so check if
   // elems_seen_per_dim[1] is 3.
-  std::vector<tensorflow::int64> elems_seen_per_dim(rank);
+  std::vector<int64> elems_seen_per_dim(rank);
   auto get_index_str = [&elems_seen_per_dim](int dim) -> string {
-    std::vector<tensorflow::int64> elems_seen_until_dim(
-        elems_seen_per_dim.begin(), elems_seen_per_dim.begin() + dim);
+    std::vector<int64> elems_seen_until_dim(elems_seen_per_dim.begin(),
+                                            elems_seen_per_dim.begin() + dim);
     return StrCat("[",
                   StrJoin(elems_seen_until_dim, ",",
-                          [](string* out, const tensorflow::int64& num_elems) {
+                          [](string* out, const int64& num_elems) {
                             StrAppend(out, num_elems - 1);
                           }),
                   "]");
@@ -2190,7 +2259,7 @@
         } else if (primitive_util::IsIntegralType(shape.element_type()) ||
                    shape.element_type() == PRED) {
           LocTy loc = lexer_.GetLoc();
-          tensorflow::int64 value;
+          int64 value;
           if (!ParseInt64(&value)) {
             return Error(loc, StrCat("expects integer for primitive type: ",
                                      PrimitiveType_Name(shape.element_type())));
@@ -2235,9 +2304,9 @@
       break;
     }
 
-    std::vector<tensorflow::int64> index;
+    std::vector<int64> index;
     if (lexer_.GetKind() == TokKind::kInt) {
-      tensorflow::int64 single_index = lexer_.GetInt64Val();
+      int64 single_index = lexer_.GetInt64Val();
       lexer_.Lex();
       index.push_back(single_index);
     } else {
@@ -2261,7 +2330,7 @@
       }
       lexer_.Lex();
     } else if (primitive_util::IsIntegralType(shape.element_type())) {
-      tensorflow::int64 value;
+      int64 value;
       if (!ParseInt64(&value)) {
         return Error(value_loc,
                      StrCat("expects integer for primitive type: ",
@@ -2347,7 +2416,7 @@
         -std::numeric_limits<ParsedElemT>::infinity() == value))) {
     // Skip range checking for non-finite value.
   } else if (std::is_unsigned<LiteralNativeT>::value) {
-    CHECK((std::is_same<ParsedElemT, tensorflow::int64>::value ||
+    CHECK((std::is_same<ParsedElemT, int64>::value ||
            std::is_same<ParsedElemT, bool>::value))
         << "Unimplemented checking for ParsedElemT";
 
@@ -2573,24 +2642,23 @@
         return true;
       }
       case AttrTy::kInt64: {
-        tensorflow::int64 result;
+        int64 result;
         if (!ParseInt64(&result)) {
           return false;
         }
-        static_cast<optional<tensorflow::int64>*>(attr_out_ptr)
-            ->emplace(result);
+        static_cast<optional<int64>*>(attr_out_ptr)->emplace(result);
         return true;
       }
       case AttrTy::kInt32: {
-        tensorflow::int64 result;
+        int64 result;
         if (!ParseInt64(&result)) {
           return false;
         }
-        if (result != static_cast<tensorflow::int32>(result)) {
+        if (result != static_cast<int32>(result)) {
           return Error(attr_loc, "value out of range for int32");
         }
-        static_cast<optional<tensorflow::int32>*>(attr_out_ptr)
-            ->emplace(static_cast<tensorflow::int32>(result));
+        static_cast<optional<int32>*>(attr_out_ptr)
+            ->emplace(static_cast<int32>(result));
         return true;
       }
       case AttrTy::kFloat: {
@@ -2630,6 +2698,15 @@
         static_cast<optional<FftType>*>(attr_out_ptr)->emplace(result);
         return true;
       }
+      case AttrTy::kTriangularSolveTranspose: {
+        TriangularSolveOptions::Transpose result;
+        if (!ParseTriangularSolveTranspose(&result)) {
+          return false;
+        }
+        static_cast<optional<TriangularSolveOptions::Transpose>*>(attr_out_ptr)
+            ->emplace(result);
+        return true;
+      }
       case AttrTy::kWindow: {
         Window result;
         if (!ParseWindow(&result, /*expect_outer_curlies=*/true)) {
@@ -2655,6 +2732,15 @@
         static_cast<optional<OpSharding>*>(attr_out_ptr)->emplace(sharding);
         return true;
       }
+      case AttrTy::kParameterReplication: {
+        ParameterReplication parameter_replication;
+        if (!ParseParameterReplication(&parameter_replication)) {
+          return false;
+        }
+        static_cast<optional<ParameterReplication>*>(attr_out_ptr)
+            ->emplace(parameter_replication);
+        return true;
+      }
       case AttrTy::kInstructionList: {
         std::vector<HloInstruction*> result;
         if (!ParseInstructionNames(&result)) {
@@ -2674,19 +2760,19 @@
         return true;
       }
       case AttrTy::kBracedInt64List: {
-        std::vector<tensorflow::int64> result;
+        std::vector<int64> result;
         if (!ParseInt64List(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma,
                             &result)) {
           return false;
         }
-        static_cast<optional<std::vector<tensorflow::int64>>*>(attr_out_ptr)
+        static_cast<optional<std::vector<int64>>*>(attr_out_ptr)
             ->emplace(result);
         return true;
       }
       case AttrTy::kBracedInt64ListList: {
-        std::vector<std::vector<tensorflow::int64>> result;
+        std::vector<std::vector<int64>> result;
         auto parse_and_add_item = [&]() {
-          std::vector<tensorflow::int64> item;
+          std::vector<int64> item;
           if (!ParseInt64List(TokKind::kLbrace, TokKind::kRbrace,
                               TokKind::kComma, &item)) {
             return false;
@@ -2698,8 +2784,7 @@
                        parse_and_add_item)) {
           return false;
         }
-        static_cast<optional<std::vector<std::vector<tensorflow::int64>>>*>(
-            attr_out_ptr)
+        static_cast<optional<std::vector<std::vector<int64>>>*>(attr_out_ptr)
             ->emplace(result);
         return true;
       }
@@ -2900,7 +2985,7 @@
   absl::string_view rhs = split2[0];
   absl::string_view out = split2[1];
 
-  const tensorflow::int64 rank = lhs.length();
+  const int64 rank = lhs.length();
   if (rank != rhs.length() || rank != out.length()) {
     return TokenError(
         "convolution lhs, rhs, and output must have the same rank");
@@ -3011,7 +3096,7 @@
   if (!ParseToken(TokKind::kLbrace, "expects '{' to start ranges")) {
     return false;
   }
-  std::vector<std::vector<tensorflow::int64>> ranges;
+  std::vector<std::vector<int64>> ranges;
   if (lexer_.GetKind() == TokKind::kRbrace) {
     // empty
     return ParseToken(TokKind::kRbrace, "expects '}' to end ranges");
@@ -3081,9 +3166,9 @@
 //   ::= int64_val (delim int64_val)*
 bool HloParser::ParseInt64List(const TokKind start, const TokKind end,
                                const TokKind delim,
-                               std::vector<tensorflow::int64>* result) {
+                               std::vector<int64>* result) {
   auto parse_and_add_item = [&]() {
-    tensorflow::int64 i;
+    int64 i;
     if (!ParseInt64(&i)) {
       return false;
     }
@@ -3159,7 +3244,7 @@
 bool HloParser::ParseDimensionSizes(std::vector<int64>* dimension_sizes,
                                     std::vector<bool>* dynamic_dimensions) {
   auto parse_and_add_item = [&]() {
-    tensorflow::int64 i;
+    int64 i;
     bool is_dynamic = false;
     if (lexer_.GetKind() == TokKind::kLeq) {
       is_dynamic = true;
@@ -3176,22 +3261,108 @@
                    parse_and_add_item);
 }
 
-// layout ::= '{' int64_list '}'
+// tiles
+//   ::= /*empty*/
+//   ::= 'T' '(' dim_list ')'
+// dim_list
+//   ::= /*empty*/
+//   ::= (int64 | '*') (',' (int64 | '*'))*
+bool HloParser::ParseTiles(std::vector<Tile>* tiles) {
+  auto parse_and_add_tile_dimension = [&]() {
+    tensorflow::int64 i;
+    if (ParseInt64(&i)) {
+      tiles->back().add_dimensions(i);
+      return true;
+    }
+    if (lexer_.GetKind() == TokKind::kAsterisk) {
+      tiles->back().add_dimensions(Tile::kCombineDimension);
+      lexer_.Lex();
+      return true;
+    }
+    return false;
+  };
+
+  do {
+    tiles->push_back(Tile());
+    if (!ParseList(TokKind::kLparen, TokKind::kRparen, TokKind::kComma,
+                   parse_and_add_tile_dimension)) {
+      return false;
+    }
+  } while (lexer_.GetKind() == TokKind::kLparen);
+  return true;
+}
+
+// layout ::= '{' int64_list (':' tiles element_size_in_bits)? '}'
+// element_size_in_bits
+//   ::= /*empty*/
+//   ::= 'E' '(' int64 ')'
 bool HloParser::ParseLayout(Layout* layout) {
   std::vector<int64> minor_to_major;
+  std::vector<Tile> tiles;
+  tensorflow::int64 element_size_in_bits = 0;
+
   auto parse_and_add_item = [&]() {
-    tensorflow::int64 i;
+    int64 i;
     if (!ParseInt64(&i)) {
       return false;
     }
     minor_to_major.push_back(i);
     return true;
   };
-  if (!ParseList(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma,
-                 parse_and_add_item)) {
+
+  if (!ParseToken(TokKind::kLbrace,
+                  StrCat("expects layout to start with ",
+                         TokKindToString(TokKind::kLbrace)))) {
     return false;
   }
-  *layout = LayoutUtil::MakeLayout(minor_to_major);
+  if (lexer_.GetKind() != TokKind::kRbrace) {
+    if (lexer_.GetKind() == TokKind::kInt) {
+      // Parse minor to major.
+      do {
+        if (!parse_and_add_item()) {
+          return false;
+        }
+      } while (EatIfPresent(TokKind::kComma));
+    }
+
+    if (lexer_.GetKind() == TokKind::kColon) {
+      lexer_.Lex();
+      if (lexer_.GetKind() == TokKind::kIdent && lexer_.GetStrVal() == "T") {
+        lexer_.Lex();
+        ParseTiles(&tiles);
+      }
+
+      if (lexer_.GetKind() == TokKind::kIdent && lexer_.GetStrVal() == "E") {
+        // Parse element size in bits.
+        lexer_.Lex();
+        if (!ParseToken(TokKind::kLparen,
+                        StrCat("expects element size in bits to start with ",
+                               TokKindToString(TokKind::kLparen)))) {
+          return false;
+        }
+        if (!ParseInt64(&element_size_in_bits)) {
+          return false;
+        }
+        if (!ParseToken(TokKind::kRparen,
+                        StrCat("expects element size in bits to end with ",
+                               TokKindToString(TokKind::kRparen)))) {
+          return false;
+        }
+      }
+    }
+  }
+  if (!ParseToken(TokKind::kRbrace,
+                  StrCat("expects layout to end with ",
+                         TokKindToString(TokKind::kRbrace)))) {
+    return false;
+  }
+
+  std::vector<Tile> vec_tiles(tiles.size());
+  for (int i = 0; i < tiles.size(); i++) {
+    vec_tiles[i] = Tile(tiles[i]);
+  }
+  *layout =
+      LayoutUtil::MakeLayout(minor_to_major, vec_tiles, element_size_in_bits);
   return true;
 }
 
@@ -3243,7 +3414,7 @@
     lexer_.Lex();
     const string message =
         "expects a brace-bracketed integer for sparse layout";
-    tensorflow::int64 max_sparse_elements;
+    int64 max_sparse_elements;
     if (!ParseToken(TokKind::kLbrace, message) ||
         !ParseInt64(&max_sparse_elements) ||
         !ParseToken(TokKind::kRbrace, message)) {
@@ -3263,13 +3434,20 @@
   //
   // The open brace could either be the start of a computation or the start of a
   // layout for the f32[123] shape. We consider it the start of a layout if the
-  // next token after the open brace is a integer
+  // next token after the open brace is an integer or a colon.
   if (lexer_.GetKind() == TokKind::kLbrace &&
-      lexer_.LookAhead() == TokKind::kInt) {
+      (lexer_.LookAhead() == TokKind::kInt ||
+       lexer_.LookAhead() == TokKind::kColon)) {
     Layout layout;
     if (!ParseLayout(&layout)) {
       return false;
     }
+    if (layout.minor_to_major_size() != result->rank()) {
+      return Error(
+          lexer_.GetLoc(),
+          StrFormat("Dimensions size is %ld, but minor to major size is %ld.",
+                    result->rank(), layout.minor_to_major_size()));
+    }
     *result->mutable_layout() = layout;
   }
   return true;
@@ -3312,15 +3490,14 @@
   return true;
 }
 
-bool HloParser::ParseDxD(const string& name,
-                         std::vector<tensorflow::int64>* result) {
+bool HloParser::ParseDxD(const string& name, std::vector<int64>* result) {
   LocTy loc = lexer_.GetLoc();
   if (!result->empty()) {
     return Error(loc, StrFormat("sub-attribute '%s=' already exists", name));
   }
   // 1D
   if (lexer_.GetKind() == TokKind::kInt) {
-    tensorflow::int64 number;
+    int64 number;
     if (!ParseInt64(&number)) {
       return Error(loc, StrFormat("expects sub-attribute '%s=i'", name));
     }
@@ -3339,8 +3516,7 @@
   return TokenError("expects token type kInt or kDxD");
 }
 
-bool HloParser::ParseWindowPad(
-    std::vector<std::vector<tensorflow::int64>>* pad) {
+bool HloParser::ParseWindowPad(std::vector<std::vector<int64>>* pad) {
   LocTy loc = lexer_.GetLoc();
   if (!pad->empty()) {
     return Error(loc, "sub-attribute 'pad=' already exists");
@@ -3350,7 +3526,7 @@
   }
   string str = lexer_.GetStrVal();
   for (const auto& padding_dim_str : absl::StrSplit(str, 'x')) {
-    std::vector<tensorflow::int64> low_high;
+    std::vector<int64> low_high;
     if (!SplitToInt64s(padding_dim_str, '_', &low_high) ||
         low_high.size() != 2) {
       return Error(loc,
@@ -3373,7 +3549,7 @@
   LocTy loc = lexer_.GetLoc();
   string str = lexer_.GetStrVal();
   for (const auto& padding_dim_str : absl::StrSplit(str, 'x')) {
-    std::vector<tensorflow::int64> padding_dim;
+    std::vector<int64> padding_dim;
     if (!SplitToInt64s(padding_dim_str, '_', &padding_dim) ||
         (padding_dim.size() != 2 && padding_dim.size() != 3)) {
       return Error(loc,
@@ -3395,7 +3571,7 @@
   optional<string> op_type;
   optional<string> op_name;
   optional<string> source_file;
-  optional<tensorflow::int32> source_line;
+  optional<int32> source_line;
   attrs["op_type"] = {/*required=*/false, AttrTy::kString, &op_type};
   attrs["op_name"] = {/*required=*/false, AttrTy::kString, &op_name};
   attrs["source_file"] = {/*required=*/false, AttrTy::kString, &source_file};
@@ -3447,6 +3623,22 @@
   return true;
 }
 
+bool HloParser::ParseTriangularSolveTranspose(
+    TriangularSolveOptions::Transpose* result) {
+  VLOG(1) << "ParseTriangularSolveTranspose";
+  if (lexer_.GetKind() != TokKind::kIdent) {
+    return TokenError("expects triangular solve transpose type");
+  }
+  string val = lexer_.GetStrVal();
+  if (!TriangularSolveOptions_Transpose_Parse(val, result) ||
+      !TriangularSolveOptions_Transpose_IsValid(*result)) {
+    return TokenError(
+        StrFormat("expects triangular solve transpose type but sees: %s", val));
+  }
+  lexer_.Lex();
+  return true;
+}
+
 bool HloParser::ParseFusionKind(HloInstruction::FusionKind* result) {
   VLOG(1) << "ParseFusionKind";
   if (lexer_.GetKind() != TokKind::kIdent) {
@@ -3498,7 +3690,7 @@
   return true;
 }
 
-bool HloParser::ParseInt64(tensorflow::int64* result) {
+bool HloParser::ParseInt64(int64* result) {
   VLOG(1) << "ParseInt64";
   if (lexer_.GetKind() != TokKind::kInt) {
     return TokenError("expects integer");
@@ -3650,6 +3842,21 @@
   return HloSharding::FromProto(op_sharding);
 }
 
+StatusOr<std::vector<bool>> HloParser::ParseParameterReplicationOnly() {
+  lexer_.Lex();
+  ParameterReplication parameter_replication;
+  if (!ParseParameterReplication(&parameter_replication)) {
+    return InvalidArgument("Syntax error:\n%s", GetError());
+  }
+  if (lexer_.GetKind() != TokKind::kEof) {
+    return InvalidArgument(
+        "Syntax error:\nExtra content after parameter replication");
+  }
+  return std::vector<bool>(
+      parameter_replication.replicated_at_leaf_buffers().begin(),
+      parameter_replication.replicated_at_leaf_buffers().end());
+}
+
 StatusOr<Window> HloParser::ParseWindowOnly() {
   lexer_.Lex();
   Window window;
@@ -3765,6 +3972,11 @@
   return parser.ParseShardingOnly();
 }
 
+StatusOr<std::vector<bool>> ParseParameterReplication(absl::string_view str) {
+  HloParser parser(str);
+  return parser.ParseParameterReplicationOnly();
+}
+
 StatusOr<Window> ParseWindow(absl::string_view str) {
   HloParser parser(str);
   return parser.ParseWindowOnly();

diff --git a/tensorflow/compiler/xla/service/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h
index 450a54c..a96260b 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.h
+++ b/tensorflow/compiler/xla/service/hlo_parser.h

@@ -44,11 +44,16 @@
 // creates a HloModule with default config.
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(absl::string_view str);
 
-// ParseHloString sharding from str. str is supposed to contain the body of the
-// sharding, i.e. just the rhs of the "sharding={...}" attribute string,
-// e.g., "{replicated}".
+// Parses sharding from str. str is supposed to contain the body of the
+// sharding, i.e. just the rhs of the "sharding={...}" attribute string, e.g.,
+// "{replicated}".
 StatusOr<HloSharding> ParseSharding(absl::string_view str);
 
+// Parses parameter replication from str. str is supposed to contain the body of
+// the parameter replication, i.e. just the rhs of the
+// "parameter_replication={...}" attribute string, e.g., "{true, false}".
+StatusOr<std::vector<bool>> ParseParameterReplication(absl::string_view str);
+
 // Parses the result of window_util::ToString(const Window&).
 StatusOr<Window> ParseWindow(absl::string_view str);
 

diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index cd7effe..8e3f1e4 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc

@@ -65,6 +65,19 @@
 
 )"
 },
+// parameter replication
+{
+"ParamReplication",
+R"(HloModule param_replication_module
+
+ENTRY %param_replication (a: f32[], b: (f32[2,4], (f32[2,4]))) -> (f32[], (f32[2,4], (f32[2,4]))) {
+  %a = f32[] parameter(0), parameter_replication={true}
+  %b = (f32[2,4]{1,0}, (f32[2,4]{1,0})) parameter(1), parameter_replication={false,true}
+  ROOT %tuple = (f32[], (f32[2,4]{1,0}, (f32[2,4]{1,0}))) tuple(f32[] %a, (f32[2,4]{1,0}, (f32[2,4]{1,0})) %b)
+}
+
+)"
+},
 // pred constant
 {
 "ConstantPred",
@@ -1147,6 +1160,24 @@
 
 )"
 },
+// Sort (Key) is_stable=true
+{
+"SortKeyStable",
+R"(HloModule sort
+
+compare {
+  p.0.lhs = f32[] parameter(0)
+  p.0.rhs = f32[] parameter(1)
+  ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs)
+}
+
+ENTRY Sort {
+  x = f32[1024]{0} parameter(0)
+  ROOT sorted = f32[1024]{0} sort(x), dimensions={0}, is_stable=true, to_apply=compare
+}
+
+)"
+},
 // Conditional
 {
 "Conditional",
@@ -2574,6 +2605,60 @@
       << "actual:   " << ShapeUtil::HumanString(actual);
 }
 
+TEST_F(HloParserTest, ParseShapeStringWithTilingLayout) {
+  // One tile.
+  string shape_string = "f32[123,456]{0,1:T(2,128)}";
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape(shape_string));
+  Shape expected =
+      ShapeUtil::MakeShapeWithLayout(F32, {123, 456}, {0, 1}, {Tile({2, 128})});
+  EXPECT_EQ(expected, actual)
+      << "expected: " << ShapeUtil::HumanStringWithLayout(expected)
+      << "actual:   " << ShapeUtil::HumanStringWithLayout(actual);
+
+  // Tile with negative dimension size for combining dimensions.
+  shape_string = "f32[123,456,789]{0,1,2:T(2, * , 128)}";
+  TF_ASSERT_OK_AND_ASSIGN(actual, ParseShape(shape_string));
+  expected =
+      ShapeUtil::MakeShapeWithLayout(F32, {123, 456, 789}, {0, 1, 2},
+                                     {Tile({2, Tile::kCombineDimension, 128})});
+  EXPECT_EQ(expected, actual)
+      << "expected: " << ShapeUtil::HumanStringWithLayout(expected)
+      << "actual:   " << ShapeUtil::HumanStringWithLayout(actual);
+
+  // Two tiles.
+  shape_string = "bf16[123,456,789]{2,1,0:T(2,*,128)(2,1)}";
+  TF_ASSERT_OK_AND_ASSIGN(actual, ParseShape(shape_string));
+  expected = ShapeUtil::MakeShapeWithLayout(
+      BF16, {123, 456, 789}, {2, 1, 0},
+      {Tile({2, Tile::kCombineDimension, 128}), Tile({2, 1})});
+  EXPECT_EQ(expected, actual)
+      << "expected: " << ShapeUtil::HumanStringWithLayout(expected)
+      << "actual:   " << ShapeUtil::HumanStringWithLayout(actual);
+
+  // Tile with element size in bits.
+  shape_string = "pred[123,456]{1,0:T(2,128)E(1)}";
+  TF_ASSERT_OK_AND_ASSIGN(actual, ParseShape(shape_string));
+  expected = ShapeUtil::MakeShapeWithLayout(PRED, {123, 456}, {1, 0},
+                                            {Tile({2, 128})}, 1);
+  EXPECT_EQ(expected, actual)
+      << "expected: " << ShapeUtil::HumanStringWithLayout(expected)
+      << "actual:   " << ShapeUtil::HumanStringWithLayout(actual);
+
+  // Element size in bits without tile.
+  shape_string = "pred[123,456]{1,0:E(1)}";
+  TF_ASSERT_OK_AND_ASSIGN(actual, ParseShape(shape_string));
+  expected = ShapeUtil::MakeShapeWithLayout(PRED, {123, 456}, {1, 0}, {}, 1);
+  EXPECT_EQ(expected, actual)
+      << "expected: " << ShapeUtil::HumanStringWithLayout(expected)
+      << "actual:   " << ShapeUtil::HumanStringWithLayout(actual);
+
+  // Wrong minor_to_major.
+  shape_string = "f32[123,456,789]{1:T(2, * , 128)}";
+  auto result = ParseShape(shape_string);
+  ExpectHasSubstr(result.status().error_message(),
+                  "Dimensions size is 3, but minor to major size is 1.");
+}
+
 TEST_F(HloParserTest, ParseShapeStringWithSparseLayout) {
   string shape_string = "f32[123,456]sparse{10}";
   TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape(shape_string));
@@ -2638,5 +2723,16 @@
               ::testing::HasSubstr("parameter number must be >= 0"));
 }
 
+TEST_F(HloParserTest, WrongNumberOfParameterLeafBuffersInReplication) {
+  const string hlo_string =
+      "par0 = (f32[3,5], f32[]) parameter(0), "
+      "parameter_replication={true,false,true}";
+  auto result = ParseHloString(hlo_string);
+  ASSERT_FALSE(result.status().ok());
+  EXPECT_THAT(result.status().error_message(),
+              ::testing::HasSubstr("parameter has 2 leaf buffers, but "
+                                   "parameter_replication has 3 elements"));
+}
+
 }  // namespace
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/service/hlo_reachability.cc b/tensorflow/compiler/xla/service/hlo_reachability.cc
index 0fced7f..b7f507b 100644
--- a/tensorflow/compiler/xla/service/hlo_reachability.cc
+++ b/tensorflow/compiler/xla/service/hlo_reachability.cc

@@ -77,28 +77,51 @@
     const HloComputation* computation) {
   const auto& all = computation->MakeInstructionPostOrder();
   auto result = absl::make_unique<HloReachabilityMap>(all);
-  auto channel_dependency_map = computation->ComputeChannelDependencies();
+  auto channel_group = computation->ComputeChannelDependencies();
 
-  std::vector<HloInstruction*> inputs;
   for (const HloInstruction* hlo : all) {
-    inputs.assign(hlo->operands().begin(), hlo->operands().end());
-    inputs.insert(inputs.end(), hlo->control_predecessors().begin(),
-                  hlo->control_predecessors().end());
+    std::vector<HloInstruction*> inputs;
+    const auto add_input = [&channel_group, &inputs](HloInstruction* input) {
+      inputs.push_back(input);
+      if (input->opcode() == HloOpcode::kAllReduce && input->all_reduce_id()) {
+        auto it = channel_group.find(*input->all_reduce_id());
+        if (it != channel_group.end()) {
+          inputs.insert(inputs.end(), it->second.begin(), it->second.end());
+        }
+      }
+    };
+
+    const auto add_dependencies = [&add_input](const HloInstruction* hlo) {
+      for (HloInstruction* operand : hlo->operands()) {
+        add_input(operand);
+      }
+      for (HloInstruction* predecessor : hlo->control_predecessors()) {
+        add_input(predecessor);
+      }
+    };
+
+    add_dependencies(hlo);
 
     switch (hlo->opcode()) {
       case HloOpcode::kRecvDone: {
-        auto it = channel_dependency_map.find(hlo->channel_id());
-        if (it != channel_dependency_map.end()) {
-          absl::c_copy(it->second, std::back_inserter(inputs));
+        auto it = channel_group.find(hlo->channel_id());
+        if (it != channel_group.end()) {
+          for (HloInstruction* channel : it->second) {
+            if (channel->opcode() == HloOpcode::kSend) {
+              add_input(channel);
+            }
+          }
         }
         break;
       }
       case HloOpcode::kAllReduce: {
         auto all_reduce_id = hlo->all_reduce_id();
         if (all_reduce_id) {
-          auto it = channel_dependency_map.find(all_reduce_id.value());
-          if (it != channel_dependency_map.end()) {
-            absl::c_copy(it->second, std::back_inserter(inputs));
+          auto it = channel_group.find(all_reduce_id.value());
+          if (it != channel_group.end()) {
+            for (HloInstruction* all_reduce : it->second) {
+              add_dependencies(all_reduce);
+            }
           }
         }
         break;

diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index d7d66ae..5a5401e 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc

@@ -168,6 +168,35 @@
       /*profile=*/profile);
 }
 
+StatusOr<Literal> HloRunner::Execute(
+    std::unique_ptr<Executable> executable,
+    const absl::Span<const Literal* const> arguments,
+    ExecutionProfile* profile) {
+  TF_ASSIGN_OR_RETURN(std::vector<ScopedShapedBuffer> argument_buffers,
+                      TransferLiteralsToDevice(arguments));
+  TF_ASSIGN_OR_RETURN(ScopedShapedBuffer result,
+                      ExecuteWithDeviceBuffers(
+                          /*executable=*/executable.get(),
+                          /*arguments=*/argument_buffers,
+                          /*profile=*/profile));
+  return TransferLiteralFromDevice(result);
+}
+
+StatusOr<Literal> HloRunner::Execute(std::unique_ptr<Executable> executable,
+                                     const absl::Span<const Literal> arguments,
+                                     ExecutionProfile* profile) {
+  // Construct a vector of plain pointers for the arguments.
+  std::vector<const Literal*> argument_pointers;
+  argument_pointers.reserve(arguments.size());
+  for (const auto& argument : arguments) {
+    argument_pointers.push_back(&argument);
+  }
+  return Execute(
+      /*module=*/std::move(executable),
+      /*arguments=*/argument_pointers,
+      /*profile=*/profile);
+}
+
 StatusOr<ScopedShapedBuffer> HloRunner::ExecuteWithDeviceBuffers(
     std::unique_ptr<HloModule> module,
     const absl::Span<const ShapedBuffer* const> arguments, bool run_hlo_passes,
@@ -206,7 +235,7 @@
 }
 
 StatusOr<ScopedShapedBuffer> HloRunner::ExecuteWithDeviceBuffers(
-    std::unique_ptr<Executable> executable,
+    Executable* executable,
     const absl::Span<const ShapedBuffer* const> arguments,
     ExecutionProfile* profile) {
   // Get service run options.
@@ -225,7 +254,7 @@
 }
 
 StatusOr<ScopedShapedBuffer> HloRunner::ExecuteWithDeviceBuffers(
-    std::unique_ptr<Executable> executable,
+    Executable* executable,
     const absl::Span<const ScopedShapedBuffer> arguments,
     ExecutionProfile* profile) {
   std::vector<const ShapedBuffer*> argument_pointers;

diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h
index bb792cf..098989c 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.h
+++ b/tensorflow/compiler/xla/service/hlo_runner.h

@@ -60,7 +60,7 @@
     // The number of times the infeed literal should be fed to the HLO module.
     // For a clean exit, this should match the iterations-per-loop parameter
     // used when generating the HLO module proto (that is usually the main
-    // while bounary counter). A value higher then iterations-per-loop would
+    // while boundary counter). A value higher then iterations-per-loop would
     // lead to infeed threads feeding to a gone computation, while a lower
     // value would trigger a stuck ExecuteReplicated() call (the computation
     // will be trying to infeed data which will never come).
@@ -124,6 +124,14 @@
                             bool run_hlo_passes = true,
                             ExecutionProfile* profile = nullptr);
 
+  StatusOr<Literal> Execute(std::unique_ptr<Executable> executable,
+                            const absl::Span<const Literal* const> arguments,
+                            ExecutionProfile* profile = nullptr);
+
+  StatusOr<Literal> Execute(std::unique_ptr<Executable> executable,
+                            const absl::Span<const Literal> arguments,
+                            ExecutionProfile* profile = nullptr);
+
   // As Execute(), but accepts and returns device buffers instead of host
   // buffers.
   StatusOr<ScopedShapedBuffer> ExecuteWithDeviceBuffers(
@@ -136,13 +144,16 @@
       const absl::Span<const ScopedShapedBuffer> arguments,
       bool run_hlo_passes = true, ExecutionProfile* profile = nullptr);
 
+  // In the following two calls, "executable" is not a unique_ptr to allow
+  // reuse of the Executable.  This call may update the profile information in
+  // *executable.
   StatusOr<ScopedShapedBuffer> ExecuteWithDeviceBuffers(
-      std::unique_ptr<Executable> executable,
+      Executable* executable,
       const absl::Span<const ShapedBuffer* const> arguments,
       ExecutionProfile* profile = nullptr);
 
   StatusOr<ScopedShapedBuffer> ExecuteWithDeviceBuffers(
-      std::unique_ptr<Executable> executable,
+      Executable* executable,
       const absl::Span<const ScopedShapedBuffer> arguments,
       ExecutionProfile* profile = nullptr);
 

diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
index 37cc146..f1d7e60 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding.cc

@@ -96,13 +96,13 @@
 
   if (replicated_) {
     return "{replicated}";
-  } else if (maximal_) {
+  }
+  if (maximal_) {
     return StrCat(
         "{maximal device=", static_cast<int64>(*tile_assignment_.begin()), "}");
-  } else {
-    return StrCat("{devices=[", StrJoin(tile_assignment_.dimensions(), ","),
-                  "]", StrJoin(tile_assignment_, ","), "}");
   }
+  return StrCat("{devices=[", StrJoin(tile_assignment_.dimensions(), ","), "]",
+                StrJoin(tile_assignment_, ","), "}");
 }
 
 bool HloSharding::UsesDevice(int64 device) const {
@@ -328,8 +328,8 @@
             status = tensorflow::errors::InvalidArgument(
                 StrCat("core ", core, " is not unique in tile assignment"));
           }
+          seen_cores.insert(core);
         }
-        seen_cores.insert(core);
       });
   if (!status.ok()) {
     return status;
@@ -347,7 +347,7 @@
         ToString(), ", input_shape=", ShapeUtil::HumanString(shape));
   }
 
-  // The correct constructor have to be used to create tile maximal shardings.
+  // The correct constructor has to be used to create tile maximal shardings.
   if (tile_assignment_.num_elements() == 1) {
     return tensorflow::errors::InvalidArgument(
         "Tile assignment only contains a single device. If a replicated "

diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h
index 5789ae0..dd57ea8 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.h
+++ b/tensorflow/compiler/xla/service/hlo_sharding.h

@@ -118,7 +118,7 @@
   // Returns true if the sharding defines an operation on the given device.
   bool UsesDevice(int64 device) const;
 
-  // Retrieves an histogram of the devices used by the sharding. The returned
+  // Retrieves a histogram of the devices used by the sharding. The returned
   // map has the device number as key, and the occurrence count as value.
   // If a sharding does not have a device, it will not be incuded in the
   // histogram. The count argument, if not nullptr, will receive the total
@@ -260,6 +260,19 @@
   bool replicated_;
   bool maximal_;
   bool tuple_;
+  // This field is only used if replicated_ is false. If maximal_ is true, then
+  // the field contains a rank 1 array with a single element, which is the
+  // device the HLO is assigned to. If maximal_ is false, the field contains an
+  // array with the same rank as the corresponding HLO. The dimension sizes of
+  // the array describe the number of ways the HLO is partitioned along each
+  // dimension. The values of the array specify which device each tile of
+  // the HLO is assigned to. The index of each value determines which tile it
+  // takes.
+  // For example, {{{2, 3}}, {{5, 7}}} (whose ToString representation is
+  // "{devices=[2,1,2]2,3,5,7}"), means that dimension 1 is split two way and
+  // dimension 3 is split 2 way. Core 5, whose index is [2,1,1] will take the
+  // tile that contains the 2nd half of dimension 1 and the 1st half of
+  // dimension 3.
   Array<int64> tile_assignment_;
   // Only non-empty when tuple_ is true. If a tuple is empty then one entry is
   // present for the root. This is a flattened list of all the leaf shardings in

diff --git a/tensorflow/compiler/xla/service/hlo_sharding_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_test.cc
index 8063467..9e234e0 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding_test.cc

@@ -84,7 +84,7 @@
   }
 
   {
-    // Test should fail because of more devices used then `num_device`.
+    // Test should fail because of more devices used than `num_device`.
     HloSharding sharding = HloSharding::Tile(MakeArray({2, 2}, {0, 1, 2, 3}));
     EXPECT_IS_NOT_OK(sharding.Validate(ShapeUtil::MakeShape(U32, {4, 6}),
                                        /*num_devices=*/2));

diff --git a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
deleted file mode 100644
index 6925dc3..0000000
--- a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
+++ /dev/null

@@ -1,242 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-LIcensed under the Apache License, Version 2.0 (the "License");
-You may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/hlo_tfgraph_builder.h"
-#include "absl/strings/str_cat.h"
-#include "absl/strings/str_join.h"
-#include "tensorflow/compiler/xla/layout_util.h"
-#include "tensorflow/compiler/xla/literal.h"
-#include "tensorflow/compiler/xla/service/hlo_opcode.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/framework/attr_value.pb.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/tensor_shape.pb.h"
-
-namespace xla {
-namespace hlo_graph_dumper {
-namespace {
-
-using absl::StrAppend;
-using absl::StrCat;
-using tensorflow::GraphDef;
-using tensorflow::NodeDef;
-using tensorflow::TensorShapeProto;
-
-string GetOpDefName(const HloInstruction* instruction) {
-  string name = StrCat("hlo-", HloOpcodeString(instruction->opcode()));
-  tensorflow::str_util::TitlecaseString(&name, "-");  // non-absl ok
-  name.erase(std::remove(name.begin(), name.end(), '-'), name.end());
-
-  if (instruction->opcode() == HloOpcode::kFusion) {
-    string fusion_name = ToString(instruction->fusion_kind());
-    StrAppend(&name, absl::string_view(fusion_name).substr(1));
-  }
-  return name;
-}
-
-TensorShapeProto GetTensorShape(const HloInstruction* instruction) {
-  TensorShapeProto tensor_shape;
-  const Shape& shape = instruction->shape();
-  for (auto dim : shape.dimensions()) {
-    tensor_shape.add_dim()->set_size(dim);
-  }
-  return tensor_shape;
-}
-
-string GetDeviceName(int device) { return StrCat("/device/XLA:", device); }
-
-void CleanNodeName(string* name) {
-  name->erase(std::remove(name->begin(), name->end(), '%'), name->end());
-  const string chars_to_replace = "<>[]";
-  auto pred = [&](char c) {
-    return absl::c_linear_search(chars_to_replace, c);
-  };
-  std::replace_if(name->begin(), name->end(), pred, '_');
-}
-
-}  // namespace
-
-HloTfGraphBuilder::HloTfGraphBuilder(const DebugOptions& debug_options)
-    : debug_options_(debug_options) {}
-
-Status HloTfGraphBuilder::AddComputation(const HloComputation& computation) {
-  VLOG(2) << "Adding computation " << computation.name();
-  for (auto embedded : computation.MakeEmbeddedComputationsList()) {
-    for (auto* instruction : embedded->instructions()) {
-      TF_RETURN_IF_ERROR(AddInstruction(instruction));
-    }
-  }
-  for (auto* instruction : computation.instructions()) {
-    TF_RETURN_IF_ERROR(AddInstruction(instruction));
-  }
-  return Status::OK();
-}
-
-const GraphDef& HloTfGraphBuilder::GetGraphDef() const { return graph_def_; }
-
-const string& HloTfGraphBuilder::GetNodeNameForInstruction(
-    const HloInstruction* instruction) {
-  if (ContainsKey(instruction_to_node_name_, instruction)) {
-    return instruction_to_node_name_[instruction];
-  }
-  auto append = [](string* str, const string& other) {
-    if (str->empty()) {
-      *str = other;
-    } else if (!other.empty()) {
-      StrAppend(str, "/", other);
-    }
-  };
-  string node_name;
-  if (debug_options_.xla_hlo_tfgraph_device_scopes()) {
-    auto device = instruction->sharding_unique_device();
-    if (device) {
-      node_name = StrCat("dev", *device);
-    }
-  }
-  // If an instruction is fused, put it in the subgraph of the fusion;
-  // otherwise, put it in the computation subgraph.
-  const HloComputation* computation = instruction->parent();
-  if (computation->IsFusionComputation()) {
-    append(&node_name,
-           GetNodeNameForInstruction(computation->FusionInstruction()));
-  } else {
-    append(&node_name, computation->name());
-    if (!instruction->metadata().op_name().empty()) {
-      // Always make computations contain TF ops but not the other way around.
-      append(&node_name, instruction->metadata().op_name());
-    }
-  }
-  string instruction_name = instruction->name();
-  if (instruction->opcode() == HloOpcode::kParameter) {
-    StrAppend(&instruction_name, ".", instruction->parameter_number());
-  }
-  append(&node_name, instruction_name);
-  CleanNodeName(&node_name);
-  auto ret =
-      instruction_to_node_name_.insert(std::make_pair(instruction, node_name));
-  CHECK(ret.second);
-  return ret.first->second;
-}
-
-void HloTfGraphBuilder::SetNodeAttrs(const HloInstruction* instruction,
-                                     NodeDef* node_def) const {
-  auto& attrs = *node_def->mutable_attr();
-
-  // Set the number of arguments for instructions that have variadic operands.
-  if (HloOpcodeIsVariadic(instruction->opcode())) {
-    tensorflow::AttrValue attr_value;
-    attr_value.set_i(instruction->operands().size());
-    attrs["arg_num"] = attr_value;
-  }
-
-  // Set the node type.
-  attrs["type"].set_s(
-      xla::PrimitiveType_Name(instruction->shape().element_type()));
-
-  // Set the framework op (e.g. Tensorflow op) that generated this XLA op.
-  attrs["tf_op_type"].set_s(instruction->metadata().op_type());
-  attrs["tf_op_name"].set_s(instruction->metadata().op_name());
-
-  // Set the shape of the output tensor. "_output_shapes" is a special attribute
-  // name used by Tensorboard for shapes of output tensors.
-  tensorflow::AttrValue shapes;
-  *shapes.mutable_list()->add_shape() = GetTensorShape(instruction);
-  attrs["_output_shapes"] = shapes;
-
-  // Set the layout.
-  if (LayoutUtil::HasLayout(instruction->shape())) {
-    string layout_string;
-    if (instruction->shape().IsTuple()) {
-      // For tuples, emit the full shape because the layout of a tuple is not
-      // represented in a single Layout field.
-      layout_string = ShapeUtil::HumanStringWithLayout(instruction->shape());
-    } else if (instruction->shape().has_layout()) {
-      // For non-tuples, only emit the layout when the shape has a Layout.
-      // This extra check is required because LayoutUtil::HasLayout ignores
-      // token, opaque types etc.
-      layout_string = instruction->shape().layout().ToString();
-    }
-    attrs["layout"].set_s(layout_string);
-  }
-
-  // Set op-specific attributes.
-  switch (instruction->opcode()) {
-    case HloOpcode::kConcatenate:
-    case HloOpcode::kBroadcast:
-    case HloOpcode::kReduce:
-    case HloOpcode::kReverse:
-    case HloOpcode::kTranspose:
-      for (auto dim : instruction->dimensions()) {
-        attrs["dims"].mutable_list()->add_i(dim);
-      }
-      break;
-    case HloOpcode::kGetTupleElement:
-      attrs["index"].set_i(instruction->tuple_index());
-      break;
-    case HloOpcode::kRng:
-      attrs["dist"].set_s(
-          RandomDistribution_Name(instruction->random_distribution()));
-      break;
-    case HloOpcode::kConstant:
-      if (ShapeUtil::IsScalar(instruction->shape())) {
-        attrs["value"].set_s(instruction->literal().GetAsString({}));
-      }
-      break;
-    case HloOpcode::kCustomCall:
-      attrs["custom_call_target"].set_s(instruction->custom_call_target());
-      break;
-    case HloOpcode::kSend:
-    case HloOpcode::kRecv:
-      attrs["channel_id"].set_i(instruction->channel_id());
-      break;
-    default:
-      break;
-  }
-}
-
-Status HloTfGraphBuilder::AddInstruction(const HloInstruction* instruction) {
-  if (!visited_instructions_.insert(instruction).second) {
-    // Skip instructions that have already been added.
-    return Status::OK();
-  }
-
-  NodeDef* node_def = graph_def_.add_node();
-  node_def->set_name(GetNodeNameForInstruction(instruction));
-  node_def->set_op(GetOpDefName(instruction));
-
-  auto device = instruction->sharding_unique_device();
-  if (device) {
-    node_def->set_device(GetDeviceName(*device));
-  }
-  SetNodeAttrs(instruction, node_def);
-  if (instruction->opcode() == HloOpcode::kFusion) {
-    for (auto* fused_instruction : instruction->fused_instructions()) {
-      TF_RETURN_IF_ERROR(AddInstruction(fused_instruction));
-    }
-  }
-  // Add all edges including control edges.
-  for (unsigned i = 0; i < instruction->operands().size(); ++i) {
-    *node_def->add_input() = GetNodeNameForInstruction(instruction->operand(i));
-  }
-  // Called computations are control dependencies.
-  for (const auto* called_computation : instruction->called_computations()) {
-    *node_def->add_input() = StrCat(
-        "^", GetNodeNameForInstruction(called_computation->root_instruction()));
-  }
-  return Status::OK();
-}
-
-}  // namespace hlo_graph_dumper
-}  // namespace xla

diff --git a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.h b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.h
deleted file mode 100644
index c4876b8..0000000
--- a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.h
+++ /dev/null

@@ -1,59 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_TFGRAPH_BUILDER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_TFGRAPH_BUILDER_H_
-
-#include "tensorflow/compiler/xla/service/hlo_computation.h"
-#include "tensorflow/compiler/xla/xla.pb.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/node_def.pb.h"
-
-namespace xla {
-namespace hlo_graph_dumper {
-
-// This constructs a tensorflow graph for HLO computations.
-class HloTfGraphBuilder {
- public:
-  HloTfGraphBuilder(const DebugOptions& debug_options = DebugOptions());
-
-  // Adds a computation to the graph.
-  Status AddComputation(const HloComputation& computation);
-
-  const tensorflow::GraphDef& GetGraphDef() const;
-
- private:
-  // Gets the node name of an instruction. The node name is hierarchical. For
-  // example, if an instruction is fused, it will be put in a subgraph of the
-  // fusion instruction.
-  const string& GetNodeNameForInstruction(const HloInstruction* instruction);
-
-  void SetNodeAttrs(const HloInstruction* instruction,
-                    tensorflow::NodeDef* node_def) const;
-
-  Status AddInstruction(const HloInstruction* instruction);
-
-  DebugOptions debug_options_;
-  tensorflow::GraphDef graph_def_;
-  // This records instructions that have been visited.
-  std::unordered_set<const HloInstruction*> visited_instructions_;
-  // A cache that maps instruction to the node name.
-  std::unordered_map<const HloInstruction*, string> instruction_to_node_name_;
-};
-
-}  // namespace hlo_graph_dumper
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_TFGRAPH_BUILDER_H_

diff --git a/tensorflow/compiler/xla/service/hlo_tfgraph_builder_test.cc b/tensorflow/compiler/xla/service/hlo_tfgraph_builder_test.cc
deleted file mode 100644
index 498abcf..0000000
--- a/tensorflow/compiler/xla/service/hlo_tfgraph_builder_test.cc
+++ /dev/null

@@ -1,201 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/hlo_tfgraph_builder.h"
-#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
-#include "tensorflow/core/framework/attr_value.pb.h"
-#include "tensorflow/core/framework/tensor_shape.pb.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-
-namespace xla {
-namespace hlo_graph_dumper {
-namespace {
-
-using ::tensorflow::GraphDef;
-
-class HloTfGraphBuilderTest : public HloTestBase {
- protected:
-  HloTfGraphBuilderTest() {}
-  HloTfGraphBuilder generator_;
-
-  // Create a computation which takes a scalar and returns its negation.
-  std::unique_ptr<HloComputation> CreateNegateComputation() {
-    auto builder = HloComputation::Builder("Negate");
-    auto param = builder.AddInstruction(
-        HloInstruction::CreateParameter(0, r0f32_, "param0"));
-    builder.AddInstruction(
-        HloInstruction::CreateUnary(r0f32_, HloOpcode::kNegate, param));
-    return builder.Build();
-  }
-
-  // Creates a computation which calls map with the given computation.
-  std::unique_ptr<HloComputation> CreateMapComputation(
-      HloComputation *map_computation) {
-    auto builder = HloComputation::Builder("Map");
-    auto param = builder.AddInstruction(
-        HloInstruction::CreateParameter(0, r0f32_, "param0"));
-    builder.AddInstruction(
-        HloInstruction::CreateMap(r0f32_, {param}, map_computation));
-    return builder.Build();
-  }
-  Shape r0f32_ = ShapeUtil::MakeShape(PrimitiveType::F32, {});
-};
-
-static const tensorflow::AttrValue &GetNodeAttr(const tensorflow::NodeDef &node,
-                                                const string &attr_name) {
-  auto attr = node.attr().find(attr_name);
-  CHECK(attr != node.attr().end());
-  return attr->second;
-}
-
-TEST_F(HloTfGraphBuilderTest, CheckConcatenateDimsAndShapes) {
-  auto builder = HloComputation::Builder("Concatenate");
-  Shape shape = ShapeUtil::MakeShape(PrimitiveType::F32, {2, 2});
-  auto param_1 = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, shape, "param0"));
-  auto param_2 = builder.AddInstruction(
-      HloInstruction::CreateParameter(1, shape, "param1"));
-  builder.AddInstruction(HloInstruction::CreateConcatenate(
-      ShapeUtil::MakeShape(PrimitiveType::F32, {2, 4}), {param_1, param_2}, 1));
-  TF_CHECK_OK(generator_.AddComputation(*builder.Build()));
-  GraphDef graph_def = generator_.GetGraphDef();
-  EXPECT_EQ(graph_def.node_size(), 3);
-  const auto &node = graph_def.node(2);
-  EXPECT_EQ(node.name(), "Concatenate/concatenate");
-
-  // Check dimensions.
-  auto dims_value = GetNodeAttr(node, "dims");
-  EXPECT_EQ(dims_value.list().i_size(), 1);
-  EXPECT_EQ(dims_value.list().i(0), 1);
-
-  // Check shapes.
-  auto shape_value = GetNodeAttr(node, "_output_shapes");
-  EXPECT_EQ(shape_value.list().shape_size(), 1);
-  EXPECT_EQ(shape_value.list().shape(0).dim_size(), 2);
-  EXPECT_EQ(shape_value.list().shape(0).dim(0).size(), 2);
-  EXPECT_EQ(shape_value.list().shape(0).dim(1).size(), 4);
-}
-
-TEST_F(HloTfGraphBuilderTest, CheckScalarValue) {
-  auto builder = HloComputation::Builder("Const");
-  HloInstruction *instruction = builder.AddInstruction(
-      HloInstruction::CreateConstant(LiteralUtil::CreateR0(123)));
-  OpMetadata metadata;
-  metadata.set_op_name("x");
-  metadata.set_op_type("y");
-  instruction->set_metadata(metadata);
-  TF_CHECK_OK(generator_.AddComputation(*builder.Build()));
-  GraphDef graph_def = generator_.GetGraphDef();
-  EXPECT_EQ(graph_def.node_size(), 1);
-  const auto &node = graph_def.node(0);
-  EXPECT_EQ(GetNodeAttr(node, "value").s(), "123");
-  EXPECT_EQ(GetNodeAttr(node, "type").s(), "S32");
-  EXPECT_EQ(GetNodeAttr(node, "tf_op_name").s(), "x");
-  EXPECT_EQ(GetNodeAttr(node, "tf_op_type").s(), "y");
-}
-
-TEST_F(HloTfGraphBuilderTest, SimpleNegateComputation) {
-  auto negate_computation = CreateNegateComputation();
-  TF_CHECK_OK(generator_.AddComputation(*negate_computation));
-  GraphDef graph_def = generator_.GetGraphDef();
-  EXPECT_EQ(graph_def.node_size(), 2);
-  EXPECT_EQ(graph_def.node(0).name(), "Negate/param0.0");
-  EXPECT_EQ(graph_def.node(0).op(), "HloParameter");
-  EXPECT_EQ(graph_def.node(1).name(), "Negate/negate");
-  EXPECT_EQ(graph_def.node(1).op(), "HloNegate");
-  EXPECT_EQ(graph_def.node(1).input_size(), 1);
-  EXPECT_EQ(graph_def.node(1).input(0), "Negate/param0.0");
-}
-
-TEST_F(HloTfGraphBuilderTest, GreaterThanOrEqualTo) {
-  auto builder = HloComputation::Builder("GE");
-  auto param_1 = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, r0f32_, "param0"));
-  auto param_2 = builder.AddInstruction(
-      HloInstruction::CreateParameter(1, r0f32_, "param1"));
-  builder.AddInstruction(
-      HloInstruction::CreateBinary(r0f32_, HloOpcode::kGe, param_1, param_2));
-  TF_CHECK_OK(generator_.AddComputation(*builder.Build()));
-  GraphDef graph_def = generator_.GetGraphDef();
-  EXPECT_EQ(graph_def.node_size(), 3);
-  EXPECT_EQ(graph_def.node(0).name(), "GE/param0.0");
-  EXPECT_EQ(graph_def.node(1).name(), "GE/param1.1");
-  EXPECT_EQ(graph_def.node(2).input_size(), 2);
-  EXPECT_EQ(graph_def.node(2).name(), "GE/greater-than-or-equal-to");
-  EXPECT_EQ(graph_def.node(2).op(), "HloGreaterThanOrEqualTo");
-}
-
-TEST_F(HloTfGraphBuilderTest, IncorparateTfOpsStructure) {
-  auto builder = HloComputation::Builder("GE");
-  auto param_1 = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, r0f32_, "param0"));
-  auto param_2 = builder.AddInstruction(
-      HloInstruction::CreateParameter(1, r0f32_, "param1"));
-  auto ge = builder.AddInstruction(
-      HloInstruction::CreateBinary(r0f32_, HloOpcode::kGe, param_1, param_2));
-  OpMetadata metadata;
-  metadata.set_op_name("x/y");
-  metadata.set_op_type("Y");
-  ge->set_metadata(metadata);
-  TF_CHECK_OK(generator_.AddComputation(*builder.Build()));
-  GraphDef graph_def = generator_.GetGraphDef();
-  EXPECT_EQ(graph_def.node_size(), 3);
-  EXPECT_EQ(graph_def.node(0).name(), "GE/param0.0");
-  EXPECT_EQ(graph_def.node(1).name(), "GE/param1.1");
-  EXPECT_EQ(graph_def.node(2).input_size(), 2);
-  EXPECT_EQ(graph_def.node(2).name(), "GE/x/y/greater-than-or-equal-to");
-  EXPECT_EQ(graph_def.node(2).op(), "HloGreaterThanOrEqualTo");
-}
-
-TEST_F(HloTfGraphBuilderTest, EmbeddedComputationsDiamond) {
-  // Create computations with a diamond-shaped callgraph.
-  auto negate_computation = CreateNegateComputation();
-  auto map1_computation = CreateMapComputation(negate_computation.get());
-  auto map2_computation = CreateMapComputation(negate_computation.get());
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, r0f32_, "param0"));
-  auto map1 = builder.AddInstruction(
-      HloInstruction::CreateMap(r0f32_, {param}, map1_computation.get()));
-  auto map2 = builder.AddInstruction(
-      HloInstruction::CreateMap(r0f32_, {param}, map2_computation.get()));
-  builder.AddInstruction(
-      HloInstruction::CreateBinary(r0f32_, HloOpcode::kAdd, map1, map2));
-  auto computation = builder.Build();
-  TF_CHECK_OK(generator_.AddComputation(*computation));
-  EXPECT_GT(generator_.GetGraphDef().node_size(), 0);
-}
-
-TEST_F(HloTfGraphBuilderTest, TokenHasNoLayout) {
-  auto builder = HloComputation::Builder("Token");
-  auto token = builder.AddInstruction(HloInstruction::CreateToken());
-  OpMetadata metadata;
-  metadata.set_op_name("x");
-  metadata.set_op_type("y");
-  token->set_metadata(metadata);
-  TF_ASSERT_OK(generator_.AddComputation(*builder.Build()));
-  GraphDef graph_def = generator_.GetGraphDef();
-  ASSERT_EQ(graph_def.node_size(), 1);
-  const auto &node = graph_def.node(0);
-  ASSERT_EQ(GetNodeAttr(node, "type").s(), "TOKEN");
-  ASSERT_EQ(GetNodeAttr(node, "layout").s(), "");
-  ASSERT_EQ(GetNodeAttr(node, "tf_op_name").s(), "x");
-  ASSERT_EQ(GetNodeAttr(node, "tf_op_type").s(), "y");
-}
-
-}  // namespace
-}  // namespace hlo_graph_dumper
-}  // namespace xla

diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 9d94c5c..97e6ea9 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc

@@ -168,6 +168,15 @@
   return CheckShape(fft, expected);
 }
 
+Status ShapeVerifier::HandleTriangularSolve(HloInstruction* hlo) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(hlo, 2));
+  TF_ASSIGN_OR_RETURN(const Shape expected,
+                      ShapeInference::InferTriangularSolveShape(
+                          hlo->operand(0)->shape(), hlo->operand(1)->shape(),
+                          hlo->triangular_solve_options()));
+  return CheckShape(hlo, expected);
+}
+
 Status ShapeVerifier::HandleAllReduce(HloInstruction* crs) {
   std::vector<const Shape*> operand_shapes;
   for (const HloInstruction* operand : crs->operands()) {
@@ -328,13 +337,48 @@
     return InternalError("Expected at least 1 operand for %s instruction: %s",
                          HloOpcodeString(sort->opcode()), sort->ToString());
   }
+  HloComputation* compare = sort->to_apply();
+
+  // Check that the 'compare' computation returns a PRED.
+  Shape compare_shape = compare->root_instruction()->shape();
+  if (!ShapesSame(compare_shape, ShapeUtil::MakeShape(PRED, {}))) {
+    return InternalError(
+        "The Sort compare computation shape does not lead to a scalar "
+        "predicate shape: %s",
+        StringifyShape(compare_shape));
+  }
+
+  // Check that the number of parameters of the 'compare' computation is
+  // correct.
+  TF_RETURN_IF_ERROR(
+      CheckParameterCount(sort, compare, sort->operand_count() * 2));
+
+  // Verify that the operands of the compare computation have the correct scalar
+  // shapes.
+  for (int64 parameter_idx = 0; parameter_idx < compare->num_parameters();
+       ++parameter_idx) {
+    int64 operand_idx = parameter_idx / 2;
+    Shape expected_scalar_shape = ShapeUtil::MakeShape(
+        sort->operand(operand_idx)->shape().element_type(), {});
+    Shape actual_parameter_shape =
+        compare->parameter_instruction(parameter_idx)->shape();
+    if (!ShapeUtil::CompatibleIgnoringFpPrecision(expected_scalar_shape,
+                                                  actual_parameter_shape)) {
+      return InternalError(
+          "Expected the %lld-th parameter of the compare computation of sort "
+          "to have shape %s, but got %s",
+          parameter_idx, StringifyShape(expected_scalar_shape),
+          StringifyShape(actual_parameter_shape));
+    }
+  }
+
+  // Verify that all operand shapes have the same dimensions.
   for (int64 operand = 1; operand < sort->operand_count(); ++operand) {
     if (!ShapeUtil::SameDimensions(sort->operand(0)->shape(),
                                    sort->operand(operand)->shape())) {
       return InternalError(
-          "Expected sort to have to have the same dimensions for the keys "
-          "and the values. Keys shape is: %s\n, Values shape (operand index "
-          "%lld) is: %s",
+          "Expected sort to have to have the same dimensions for all operands. "
+          "First operand shape is: %s\n, shape (operand index %lld) is: %s",
           StringifyShape(sort->operand(0)->shape()), operand,
           StringifyShape(sort->operand(operand)->shape()));
     }
@@ -1281,8 +1325,8 @@
   return Status::OK();
 }
 
-// Checks that the non-scalar operand shapes are compatible to the output
-// shape, i.e., that there are no implicit broadcasts of size-one dimensions.
+// Checks that the operand shapes are compatible to the output shape, i.e.,
+// that there are no implicit broadcasts.
 Status CheckElementwiseInstruction(HloInstruction* instruction) {
   const Shape& out_shape = instruction->shape();
   for (HloInstruction* operand : instruction->operands()) {

diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index facb76a..a9b5e9a 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h

@@ -52,6 +52,7 @@
   Status HandleDot(HloInstruction* dot) override;
   Status HandleConvolution(HloInstruction* convolution) override;
   Status HandleFft(HloInstruction* fft) override;
+  Status HandleTriangularSolve(HloInstruction* hlo) override;
   Status HandleAllReduce(HloInstruction* crs) override;
   Status HandleAllToAll(HloInstruction* hlo) override;
   Status HandleCollectivePermute(HloInstruction* hlo) override;

diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
index 76bf488..c5d32a4 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc

@@ -27,7 +27,6 @@
 #include "tensorflow/compiler/xla/util.h"
 
 namespace xla {
-namespace gtl = ::tensorflow::gtl;
 
 namespace {
 using Analysis = IndexedArrayAnalysis;

diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index d4794ac..a576777 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc

@@ -158,6 +158,7 @@
     case HloOpcode::kSort:
     case HloOpcode::kTanh:
     case HloOpcode::kTrace:
+    case HloOpcode::kTriangularSolve:
     case HloOpcode::kWhile:
     case HloOpcode::kGetDimensionSize:
       return true;

diff --git a/tensorflow/compiler/xla/service/interpreter/BUILD b/tensorflow/compiler/xla/service/interpreter/BUILD
index a305c6e..8cd9362 100644
--- a/tensorflow/compiler/xla/service/interpreter/BUILD
+++ b/tensorflow/compiler/xla/service/interpreter/BUILD

@@ -50,6 +50,7 @@
         "//tensorflow/compiler/xla/service:map_inliner",
         "//tensorflow/compiler/xla/service:reduce_precision_insertion",
         "//tensorflow/compiler/xla/service:reshape_mover",
+        "//tensorflow/compiler/xla/service:triangular_solve_expander",
         "//tensorflow/compiler/xla/service:while_loop_simplifier",
         "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry",
         "//tensorflow/core:lib",

diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index 0827b1d..792773c 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc

@@ -35,6 +35,7 @@
 #include "tensorflow/compiler/xla/service/map_inliner.h"
 #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h"
 #include "tensorflow/compiler/xla/service/reshape_mover.h"
+#include "tensorflow/compiler/xla/service/triangular_solve_expander.h"
 #include "tensorflow/compiler/xla/service/while_loop_simplifier.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -79,6 +80,7 @@
   HloPassPipeline pipeline("Interpreter");
 
   pipeline.AddPass<DynamicIndexSplitter>();
+  pipeline.AddPass<TriangularSolveExpander>();
   pipeline.AddPass<LayoutAssignment>(
       hlo_module->mutable_entry_computation_layout(),
       LayoutAssignment::InstructionCanChangeLayout);

diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 9376a3c..2dd9e05 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc

@@ -2070,6 +2070,7 @@
     case HloOpcode::kSort:
     case HloOpcode::kSubtract:
     case HloOpcode::kTanh:
+    case HloOpcode::kTriangularSolve:
     case HloOpcode::kTupleSelect:
     case HloOpcode::kWhile:
       return false;

diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc
index 89b6a36..d71adde 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc

@@ -24,6 +24,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Value.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
@@ -45,13 +46,14 @@
 namespace {
 
 // Adds the inner comparison loop body where we compare elements.
-void EmitCompareLoopBody(
-    int64 iteration_bound, PrimitiveType key_type, int64 num_values,
-    int64 iota_values_parameter_index, llvm::Value* element_pair_index,
+Status EmitCompareLoopBody(
+    int64 iteration_bound, int64 num_values, llvm::Value* element_pair_index,
     int64 xor_mask, llvm::Type* index_type,
-    std::function<llvm::Value*(int64 operand, llvm::Value* index)> read_element,
+    std::function<llvm::Value*(int64 operand, llvm::Value* index)>
+        element_address,
     std::function<void(int64 operand, llvm::Value* index, llvm::Value* value)>
         write_element,
+    const EmitCallToNestedComputationCallback& emit_compare_callback,
     llvm::IRBuilder<>* b, bool needs_bounds_checks = true) {
   auto index_typed_constant = [&](int64 value) {
     return llvm::ConstantInt::get(index_type, value);
@@ -108,74 +110,44 @@
 
   // if (is_smaller_index && index_is_inbounds)
   KernelSupportLibrary ksl(b);
-  ksl.If("smaller_comparison_index", do_comparison, [&]() {
-    auto key1 = read_element(0, current_keys_index);
-    auto key2 = read_element(0, compare_keys_index);
-    auto compare_key1 = key1;
-    auto compare_key2 = key2;
-    bool is_signed_comparison = true;
-    if (primitive_util::IsFloatingPointType(key_type)) {
-      // We would like a total order of floating point numbers so that the
-      // sort has a predictable behavior in the presence of NaNs. Rather
-      // than using floating point comparison, we use the following trick:
-      // If f is a float, and
-      // x = bit_cast<int32>(f);
-      // y = x < 0 ? 0x7FFFFFFF - x : x;
-      // then y is ordered as an int32 such that finite values have the
-      // obvious order, -0 is ordered before 0, and -NaN and NaN appear at
-      // the beginning and end of the ordering.
-      auto k = b->getInt(llvm::APInt::getSignedMaxValue(
-          key1->getType()->getPrimitiveSizeInBits()));
-      auto comparison_type = k->getType();
-      auto zero = llvm::ConstantInt::get(comparison_type, 0);
-      auto maybe_flip = [&](llvm::Value* v) {
-        return b->CreateSelect(b->CreateICmp(llvm::ICmpInst::ICMP_SLT, v, zero),
-                               b->CreateSub(k, v), v);
-      };
-      compare_key1 = b->CreateBitCast(key1, comparison_type);
-      compare_key2 = b->CreateBitCast(key2, comparison_type);
-      compare_key1 = maybe_flip(compare_key1);
-      compare_key2 = maybe_flip(compare_key2);
-    } else if (!primitive_util::IsSignedIntegralType(key_type)) {
-      is_signed_comparison = false;
+  return ksl.IfWithStatus("smaller_comparison_index", do_comparison, [&]() {
+    std::vector<llvm::Value*> values_to_compare;
+    for (int i = 0; i < num_values; ++i) {
+      values_to_compare.push_back(element_address(i, compare_keys_index));
+      values_to_compare.push_back(element_address(i, current_keys_index));
     }
-    // If key2 < key1
-    auto is_smaller_than =
-        b->CreateICmp(is_signed_comparison ? llvm::ICmpInst::ICMP_SLT
-                                           : llvm::ICmpInst::ICMP_ULT,
-                      compare_key2, compare_key1);
-    if (iota_values_parameter_index >= 0) {
-      auto keys_equal = b->CreateICmpEQ(compare_key1, compare_key2);
-      auto key_index1 =
-          read_element(iota_values_parameter_index, current_keys_index);
-      auto key_index2 =
-          read_element(iota_values_parameter_index, compare_keys_index);
-      auto index_is_smaller_than =
-          b->CreateICmp(llvm::ICmpInst::ICMP_ULT, key_index2, key_index1);
-      is_smaller_than = b->CreateOr(
-          is_smaller_than, b->CreateAnd(keys_equal, index_is_smaller_than));
-    }
+    llvm::Module* module = b->GetInsertBlock()->getParent()->getParent();
+    llvm::Value* compare_return_buffer = llvm_ir::EmitAllocaAtFunctionEntry(
+        llvm_ir::PrimitiveTypeToIrType(PRED, module), "compare_return_buffer",
+        b);
+    TF_RETURN_IF_ERROR(
+        emit_compare_callback(values_to_compare, compare_return_buffer));
+    llvm::Value* result = b->CreateLoad(compare_return_buffer);
+
+    // Check if the 'compare' function returns true.
+    llvm::Value* is_smaller_than =
+        b->CreateICmpNE(result, llvm::ConstantInt::get(result->getType(), 0),
+                        "boolean_predicate");
     ksl.If("is_smaller_than", is_smaller_than, [&]() {
-      // Swap key1 with key2.
-      write_element(0, current_keys_index, key2);
-      write_element(0, compare_keys_index, key1);
-      for (int64 i = 1; i <= num_values; ++i) {
-        // Also swap the values.
-        auto value1 = read_element(i, current_keys_index);
-        auto value2 = read_element(i, compare_keys_index);
-        write_element(i, current_keys_index, value2);
-        write_element(i, compare_keys_index, value1);
+      for (int64 i = 0; i < num_values; ++i) {
+        // Swap the values.
+        auto value1 = b->CreateLoad(values_to_compare[i * 2]);
+        auto value2 = b->CreateLoad(values_to_compare[i * 2 + 1]);
+        write_element(i, current_keys_index, value1);
+        write_element(i, compare_keys_index, value2);
       }
     });
+    return Status::OK();
   });
 }
 
-void EmitTiledCompareLoop(
+Status EmitTiledCompareLoop(
     const IrArray::Index& tiled_keys_index, int64 dimension_to_sort,
-    int64 dimension_to_sort_bound, PrimitiveType keys_type,
-    absl::Span<const int64> xor_masks, const std::vector<IrArray>& params,
-    const std::vector<llvm::Value*>& param_shmem_buffers,
-    int64 iota_values_parameter_index, int64 tile_size, llvm::IRBuilder<>* b) {
+    int64 dimension_to_sort_bound, absl::Span<const int64> xor_masks,
+    const std::vector<IrArray>& params,
+    const std::vector<llvm::Value*>& param_shmem_buffers, int64 tile_size,
+    const EmitCallToNestedComputationCallback& emit_compare_callback,
+    llvm::IRBuilder<>* b) {
   KernelSupportLibrary ksl(b);
   llvm::Value* thread_id = llvm_ir::EmitCallToIntrinsic(
       llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x, {}, {}, b);
@@ -200,7 +172,7 @@
             [&]() {
               auto cache_index = b->CreateShl(thread_id, value_one);
               read_or_write(cache_index, current_keys_index);
-              // Increment to go the next index position.
+              // Increment to go to the next index position.
               current_keys_index = b->CreateAdd(current_keys_index, value_one);
               // Here we check whether the next index position is within bounds.
               ksl.If("inner_smaller_keys_index",
@@ -230,10 +202,18 @@
   llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::nvvm_barrier0, {}, {}, b);
 
   // Now emit the bodies of the comparison loops.
-  auto read_element = [&](int64 operand, llvm::Value* index) {
-    return b->CreateLoad(
+  auto element_address = [&](int64 operand, llvm::Value* index) {
+    auto shared_memory_address =
         b->CreateGEP(param_shmem_buffers[operand],
-                     {tiled_keys_index.GetConstantWithIndexType(0), index}));
+                     {tiled_keys_index.GetConstantWithIndexType(0), index});
+    auto ptr_type = shared_memory_address->getType();
+    // We need a generic pointer with address space 0 instead of a pointer to
+    // shared memory (address space 3) so that we can pass it to the comparison
+    // computation.
+    return b->CreateAddrSpaceCast(
+        shared_memory_address,
+        llvm::PointerType::get(ptr_type->getPointerElementType(),
+                               /*AddressSpace=*/0));
   };
   auto write_element = [&](int64 operand, llvm::Value* index,
                            llvm::Value* value) {
@@ -252,7 +232,7 @@
     if (dimension_to_sort_bound % tile_size) {
       // Otherwise we need a bounds check for the last tile. The last tile has
       // size 'dimension_to_sort_bound' % 'tile_size'.
-      ksl.If(
+      TF_RETURN_IF_ERROR(ksl.IfWithStatus(
           "is_last_tile",
           b->CreateICmpUGE(
               b->CreateMul(tiled_keys_index[dimension_to_sort],
@@ -260,24 +240,24 @@
               tiled_keys_index.GetConstantWithIndexType(
                   RoundDownToNearest(dimension_to_sort_bound, tile_size))),
           [&]() {
-            EmitCompareLoopBody(dimension_to_sort_bound % tile_size, keys_type,
-                                params.size() - 1, iota_values_parameter_index,
-                                element_pair_index, xor_mask,
-                                tiled_keys_index.GetType(), read_element,
-                                write_element, b);
+            return EmitCompareLoopBody(
+                dimension_to_sort_bound % tile_size, params.size(),
+                element_pair_index, xor_mask, tiled_keys_index.GetType(),
+                element_address, write_element, emit_compare_callback, b);
           },
           [&]() {
-            EmitCompareLoopBody(tile_size, keys_type, params.size() - 1,
-                                iota_values_parameter_index, element_pair_index,
-                                xor_mask, tiled_keys_index.GetType(),
-                                read_element, write_element, b,
-                                /*needs_bounds_checks=*/false);
-          });
+            return EmitCompareLoopBody(
+                tile_size, params.size(), element_pair_index, xor_mask,
+                tiled_keys_index.GetType(), element_address, write_element,
+                emit_compare_callback, b,
+                /*needs_bounds_checks=*/false);
+          }));
     } else {
-      EmitCompareLoopBody(tile_size, keys_type, params.size() - 1,
-                          iota_values_parameter_index, element_pair_index,
-                          xor_mask, tiled_keys_index.GetType(), read_element,
-                          write_element, b, /*needs_bounds_checks=*/false);
+      TF_RETURN_IF_ERROR(EmitCompareLoopBody(
+          tile_size, params.size(), element_pair_index, xor_mask,
+          tiled_keys_index.GetType(), element_address, write_element,
+          emit_compare_callback, b,
+          /*needs_bounds_checks=*/false));
     }
     // Wait until all comparisons have happened.
     llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::nvvm_barrier0, {}, {}, b);
@@ -301,17 +281,16 @@
   // same location in shared memory because we have exactly tile_size / 2 many
   // threads, and the linear index calculated by ParallelLoopEmitter uses
   // linear_index = blockIdx.x * blockDim.x + threadIdx.x;
+  return Status::OK();
 }
 }  // namespace
 
-Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array,
-                       const std::vector<IrArray>& values_arrays,
-                       int64 iota_values_parameter_index,
-                       absl::string_view name,
-                       absl::Span<const int64> xor_masks, llvm::IRBuilder<>* b,
-                       const gpu::LaunchDimensions& launch_dimensions,
-                       int64 num_iterations_in_sort_dim,
-                       const int64 tile_size) {
+Status EmitSortInPlace(
+    int64 dimension_to_sort, const std::vector<IrArray>& values_arrays,
+    absl::string_view name, absl::Span<const int64> xor_masks,
+    llvm::IRBuilder<>* b, const gpu::LaunchDimensions& launch_dimensions,
+    int64 num_iterations_in_sort_dim, const int64 tile_size,
+    const EmitCallToNestedComputationCallback& emit_compare_callback) {
   // Iterate through the keys shape in physical order, but skip the dimension to
   // sort and make it the innermost loop which is the loop where the comparisons
   // happen. In the dimension to sort, if we use tiling, we iterate through it
@@ -321,7 +300,7 @@
   // within those 64 elements and are therefore independent of the other
   // comparisons).
 
-  const Shape& keys_shape = keys_array.GetShape();
+  const Shape& keys_shape = values_arrays[0].GetShape();
   int64 rank = keys_shape.rank();
   int64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort);
   std::vector<int64> dimensions_in_iteration_order(rank);
@@ -338,18 +317,16 @@
 
   Shape iteration_shape = ShapeUtil::MakeShape(keys_shape.element_type(),
                                                dimensions_in_iteration_order);
-  std::vector<IrArray> params(1, keys_array);
-  params.insert(params.end(), values_arrays.begin(), values_arrays.end());
 
   // Allocate shared memory for the tiled compare loop.
-  std::vector<llvm::Value*> param_shmem_buffers(params.size(), nullptr);
+  std::vector<llvm::Value*> param_shmem_buffers(values_arrays.size(), nullptr);
   if (xor_masks.size() > 1) {
     llvm::Module* module = b->GetInsertBlock()->getParent()->getParent();
-    for (int64 i = 0; i < params.size(); ++i) {
-      llvm::Type* tile_type =
-          llvm::ArrayType::get(llvm_ir::PrimitiveTypeToIrType(
-                                   params[i].GetShape().element_type(), module),
-                               tile_size);
+    for (int64 i = 0; i < values_arrays.size(); ++i) {
+      llvm::Type* tile_type = llvm::ArrayType::get(
+          llvm_ir::PrimitiveTypeToIrType(
+              values_arrays[i].GetShape().element_type(), module),
+          tile_size);
       param_shmem_buffers[i] = llvm_ir::AllocateSharedMemoryTile(
           module, tile_type, absl::StrCat(name, "_tile_param_", i));
     }
@@ -376,25 +353,24 @@
       keys_index[iteration_order_to_logical_order[i]] = tiles_index[i];
     }
     if (xor_masks.size() > 1) {
-      EmitTiledCompareLoop(keys_index, dimension_to_sort,
-                           dimension_to_sort_bound, keys_shape.element_type(),
-                           xor_masks, params, param_shmem_buffers,
-                           iota_values_parameter_index, tile_size, b);
+      TF_RETURN_IF_ERROR(EmitTiledCompareLoop(
+          keys_index, dimension_to_sort, dimension_to_sort_bound, xor_masks,
+          values_arrays, param_shmem_buffers, tile_size, emit_compare_callback,
+          b));
     } else {
-      auto read_element = [&](int64 operand, llvm::Value* index) {
+      auto element_address = [&](int64 operand, llvm::Value* index) {
         keys_index[dimension_to_sort] = index;
-        return params[operand].EmitReadArrayElement(keys_index, b);
+        return values_arrays[operand].EmitArrayElementAddress(keys_index, b);
       };
       auto write_element = [&](int64 operand, llvm::Value* index,
                                llvm::Value* value) {
         keys_index[dimension_to_sort] = index;
-        params[operand].EmitWriteArrayElement(keys_index, value, b);
+        values_arrays[operand].EmitWriteArrayElement(keys_index, value, b);
       };
-      EmitCompareLoopBody(dimension_to_sort_bound, keys_shape.element_type(),
-                          values_arrays.size(), iota_values_parameter_index,
-                          tiles_index[rank - 1], xor_masks[0],
-                          tiles_index.GetType(), read_element, write_element,
-                          b);
+      TF_RETURN_IF_ERROR(EmitCompareLoopBody(
+          dimension_to_sort_bound, values_arrays.size(), tiles_index[rank - 1],
+          xor_masks[0], tiles_index.GetType(), element_address, write_element,
+          emit_compare_callback, b));
     }
     return Status::OK();
   };

diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.h b/tensorflow/compiler/xla/service/llvm_ir/sort_util.h
index 685f938..b9341a3 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/sort_util.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.h

@@ -28,19 +28,18 @@
 
 namespace xla {
 namespace llvm_ir {
+using EmitCallToNestedComputationCallback =
+    std::function<Status(absl::Span<llvm::Value* const>, llvm::Value*)>;
 // Emits llvm IR to do pairwise comparisons/swaps in the 'dimension_to_sort'
-// dimension of 'keys_array'. All other dimensions are kept as-is. This
-// implements the inner loop of BitonicSort. It is assumed that 'xor_masks'
-// contains only powers of 2, or values 2^k - 1 (k > 0). If
-// 'iota_values_parameter_index' is >= 0, it points at a 'values_arrays' operand
-// that is a iota and can be used to make the sorting stable.
-Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array,
-                       const std::vector<IrArray>& values_arrays,
-                       int64 iota_values_parameter_index,
-                       absl::string_view name,
-                       absl::Span<const int64> xor_masks, llvm::IRBuilder<>* b,
-                       const gpu::LaunchDimensions& launch_dimensions,
-                       int64 num_iterations_in_sort_dim, int64 tile_size);
+// dimension of each array in 'values_arrays'. All other dimensions are kept
+// as-is. This implements the inner loop of BitonicSort. It is assumed that
+// 'xor_masks' contains only powers of 2, or values 2^k - 1 (k > 0).
+Status EmitSortInPlace(
+    int64 dimension_to_sort, const std::vector<IrArray>& values_arrays,
+    absl::string_view name, absl::Span<const int64> xor_masks,
+    llvm::IRBuilder<>* b, const gpu::LaunchDimensions& launch_dimensions,
+    int64 num_iterations_in_sort_dim, int64 tile_size,
+    const EmitCallToNestedComputationCallback& emit_compare_callback);
 }  // namespace llvm_ir
 }  // namespace xla
 

diff --git a/tensorflow/compiler/xla/service/op_expander_pass.cc b/tensorflow/compiler/xla/service/op_expander_pass.cc
new file mode 100644
index 0000000..02c9d4b
--- /dev/null
+++ b/tensorflow/compiler/xla/service/op_expander_pass.cc

@@ -0,0 +1,47 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/op_expander_pass.h"
+
+#include <utility>
+
+#include "absl/algorithm/container.h"
+#include "tensorflow/compiler/xla/service/hlo_creation_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/util.h"
+
+namespace xla {
+
+StatusOr<bool> OpExpanderPass::Run(HloModule* module) {
+  std::vector<HloInstruction*> matching_instructions;
+  for (HloComputation* computation : module->MakeNonfusionComputations()) {
+    absl::c_copy_if(
+        computation->instructions(), std::back_inserter(matching_instructions),
+        [&](HloInstruction* inst) { return InstructionMatchesPattern(inst); });
+  }
+
+  for (HloInstruction* inst : matching_instructions) {
+    TF_ASSIGN_OR_RETURN(HloInstruction * expanded_root,
+                        ExpandInstruction(inst));
+    if (expanded_root == nullptr) {
+      continue;
+    }
+    TF_RETURN_IF_ERROR(inst->parent()->ReplaceInstruction(inst, expanded_root));
+  }
+
+  return !matching_instructions.empty();
+}
+}  // namespace xla

diff --git a/tensorflow/compiler/xla/service/op_expander_pass.h b/tensorflow/compiler/xla/service/op_expander_pass.h
new file mode 100644
index 0000000..276e3d7
--- /dev/null
+++ b/tensorflow/compiler/xla/service/op_expander_pass.h

@@ -0,0 +1,45 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_OP_EXPANDER_PASS_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_OP_EXPANDER_PASS_H_
+
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// This pass is an abstract superclass for passes that replace operations that
+// match a pattern. It is intended to be subclassed, not used directly.
+//
+// This pass is useful for legalizing HLO instructions that a particular backend
+// does not support into other HLO instructions.
+class OpExpanderPass : public HloModulePass {
+ public:
+  StatusOr<bool> Run(HloModule* module) override;
+
+ protected:
+  // Returns `true` if `instruction` should be expanded by this pass.
+  virtual bool InstructionMatchesPattern(HloInstruction* instruction) = 0;
+
+  // Returns a replacement for `instruction`, or nullptr if no replacement is
+  // neeeded (e.g. only the to_apply subcomputation of the instruction was
+  // modified).
+  virtual StatusOr<HloInstruction*> ExpandInstruction(
+      HloInstruction* instruction) = 0;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_OP_EXPANDER_PASS_H_

diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index a403428..3f4456c 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc

@@ -836,7 +836,8 @@
           ShapeUtil::HumanString(larger_shape));
     }
     if (small_is_dynamic != large_is_dynamic) {
-      if ((small_dimension_size == 1 && !small_is_dynamic) ||
+      if (small_dimension_size == large_dimension_size ||
+          (small_dimension_size == 1 && !small_is_dynamic) ||
           (large_dimension_size == 1 && !large_is_dynamic)) {
         // Do nothing. It's OK when the size-1 dimension is not static.
       } else {
@@ -1906,6 +1907,49 @@
 #undef RET_CHECK_RANK
 }
 
+/* static */ StatusOr<Shape> ShapeInference::InferTriangularSolveShape(
+    const Shape& a, const Shape& b, const TriangularSolveOptions& options) {
+  if (a.rank() < 2) {
+    return InvalidArgument(
+        "The 'a' argument to TriangularSolve must have rank >= 2, got shape %s",
+        a.ToString());
+  }
+  if (b.rank() != a.rank()) {
+    return InvalidArgument(
+        "Arguments to triangular solve must have equal rank; got %s and %s.",
+        b.ToString(), a.ToString());
+  }
+  if (a.dimensions(a.rank() - 2) != a.dimensions(a.rank() - 1)) {
+    return InvalidArgument(
+        "The two minor dimensions of 'a' must have equal size, got %s.",
+        a.ToString());
+  }
+  if (a.dimensions(a.rank() - 1) !=
+      b.dimensions(b.rank() - (options.left_side() ? 2 : 1))) {
+    return InvalidArgument(
+        "The shared dimension of 'a' and 'b' does not match, got shapes %s and "
+        "%s",
+        a.ToString(), b.ToString());
+  }
+  absl::Span<const int64> a_batch_dims(a.dimensions());
+  absl::Span<const int64> b_batch_dims(b.dimensions());
+  a_batch_dims.remove_suffix(2);
+  b_batch_dims.remove_suffix(2);
+  if (a_batch_dims != b_batch_dims) {
+    return InvalidArgument(
+        "The leading batch dimensions of the arguments to triangular solve "
+        "must be equal; got %s and %s.",
+        b.ToString(), a.ToString());
+  }
+  if (!TriangularSolveOptions_Transpose_IsValid(options.transpose_a()) ||
+      options.transpose_a() == TriangularSolveOptions::TRANSPOSE_INVALID) {
+    return InvalidArgument(
+        "Invalid transpose option value for triangular solve (%d).\n",
+        options.transpose_a());
+  }
+  return b;
+}
+
 /* static */ StatusOr<Shape> ShapeInference::InferAllReduceShape(
     absl::Span<const Shape* const> operand_shapes) {
   for (const Shape* operand_shape : operand_shapes) {
@@ -2352,8 +2396,8 @@
 
     if (operand_shape.rank() != number_of_indices) {
       return InvalidArgument(
-          "Dynamic update slice start number of dimensions %d must match rank "
-          "%d of slice input (%s).",
+          "Dynamic update slice start number of dimensions %d must match "
+          "rank %d of slice input (%s).",
           number_of_indices, operand_shape.rank(),
           ShapeUtil::HumanString(operand_shape));
     }
@@ -2589,7 +2633,7 @@
         operand_shape.dimensions(i) != 1) {
       return InvalidArgument(
           "Input dimension should be either 1 or equal to the output dimension "
-          "it's broadcasting into; the %lldth operand dimension is %lld, the "
+          "it is broadcasting into; the %lldth operand dimension is %lld, the "
           "%lldth output dimension is %lld.",
           i, operand_shape.dimensions(i), broadcast_dimensions[i],
           output_shape.dimensions(broadcast_dimensions[i]));
@@ -2657,11 +2701,7 @@
     const Shape& operand, absl::Span<const int64> dimensions) {
   TF_RETURN_IF_ERROR(ExpectArray(operand, "transpose"));
 
-  std::vector<int64> indices(operand.rank());
-  std::iota(indices.begin(), indices.end(), 0);
-  if (dimensions.size() != operand.rank() ||
-      !std::is_permutation(dimensions.begin(), dimensions.end(),
-                           indices.begin())) {
+  if (!IsPermutation(dimensions, operand.rank())) {
     return InvalidArgument(
         "Transpose dimensions [%s] are not a permutation of the operand "
         "dimensions (operand shape is %s).",

diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h
index 7d39ef3..acb071a 100644
--- a/tensorflow/compiler/xla/service/shape_inference.h
+++ b/tensorflow/compiler/xla/service/shape_inference.h

@@ -116,6 +116,10 @@
   static StatusOr<Shape> InferFftShape(const Shape& in, FftType fft_type,
                                        absl::Span<const int64> fft_length);
 
+  // Infers the shape produced by the given triangular solve operation.
+  static StatusOr<Shape> InferTriangularSolveShape(
+      const Shape& a, const Shape& b, const TriangularSolveOptions& options);
+
   // Infers the shape produced by a cross replica sum with the given operand
   // shapes.
   static StatusOr<Shape> InferAllReduceShape(

diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index eabc223..f400ef5 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc

@@ -252,7 +252,7 @@
 
 TEST_F(ShapeInferenceTest, Complex) {
   auto complex_shape = [&](const Shape& lhs, const Shape& rhs,
-                           const absl::Span<const int64>& bcast) {
+                           absl::Span<const int64> bcast) {
     return ShapeInference::InferBinaryOpShape(HloOpcode::kComplex, lhs, rhs,
                                               bcast);
   };
@@ -1572,6 +1572,16 @@
                                     ShapeUtil::MakeShape(F32, {3, 4, 5, 2})));
 }
 
+TEST_F(ShapeInferenceTest, Rank1Transpose) {
+  Shape a_shape = ShapeUtil::MakeShape(F32, {5});
+  auto inferred_shape_and_status =
+      ShapeInference::InferTransposeShape(a_shape, {0});
+  EXPECT_IS_OK(inferred_shape_and_status);
+  Shape inferred_shape = inferred_shape_and_status.ValueOrDie();
+  EXPECT_TRUE(
+      ShapeUtil::Compatible(inferred_shape, ShapeUtil::MakeShape(F32, {5})));
+}
+
 TEST_F(ShapeInferenceTest, Conditional) {
   auto inferred_status0 = ShapeInference::InferConditionalShape(
       pred_, vector_32_, vector_64_,

diff --git a/tensorflow/compiler/xla/service/sort_simplifier.cc b/tensorflow/compiler/xla/service/sort_simplifier.cc
index 4a00e8d..122366a 100644
--- a/tensorflow/compiler/xla/service/sort_simplifier.cc
+++ b/tensorflow/compiler/xla/service/sort_simplifier.cc

@@ -14,12 +14,15 @@
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/sort_simplifier.h"
-#include "tensorflow/compiler/xla/service/hlo_computation.h"
-#include "tensorflow/compiler/xla/service/hlo_instruction.h"
-#include "tensorflow/compiler/xla/statusor.h"
+
+#include <memory>
+#include <vector>
 
 #include "absl/container/flat_hash_map.h"
 #include "absl/container/flat_hash_set.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/statusor.h"
 
 namespace xla {
 namespace {
@@ -39,8 +42,7 @@
     return false;
   }
 
-  // Index 0 is the sorting key used by the sort HLO itself.
-  absl::flat_hash_set<int64> used_indices{0};
+  absl::flat_hash_set<int64> used_indices;
   for (const HloInstruction* user : sort->users()) {
     if (user->opcode() != HloOpcode::kGetTupleElement) {
       // Can't analyse users other then get-tuple-element.
@@ -49,15 +51,25 @@
     used_indices.insert(user->tuple_index());
   }
 
+  // Also note which parameters are used by the comparator computation.
+  auto comparator = sort->to_apply();
+  for (int64 i = 0; i < sort->operand_count() * 2; ++i) {
+    if (comparator->parameter_instruction(i)->user_count() > 0) {
+      // operand i corresponds to parameters 2 * i and 2 * i + 1 of the
+      // computation.
+      used_indices.insert(i / 2);
+    }
+  }
+
   if (used_indices.size() == sort->operand_count()) {
     // All operands are used.
     return false;
   }
 
-  std::vector<HloInstruction*> operands{sort->mutable_operand(0)};
-  std::vector<Shape> new_shapes{sort->operand(0)->shape()};
-  for (int64 i = 1; i < sort->operand_count(); ++i) {
-    if (used_indices.count(i)) {
+  std::vector<HloInstruction*> operands;
+  std::vector<Shape> new_shapes;
+  for (int64 i = 0; i < sort->operand_count(); ++i) {
+    if (used_indices.contains(i)) {
       operands.push_back(sort->mutable_operand(i));
       new_shapes.push_back(sort->operand(i)->shape());
     }
@@ -68,6 +80,32 @@
                              : ShapeUtil::MakeTupleShape(new_shapes);
   HloInstruction* new_sort = computation->AddInstruction(
       sort->CloneWithNewOperands(new_sort_shape, operands));
+  absl::flat_hash_map<const HloInstruction*, std::unique_ptr<HloInstruction>>
+      replacements;
+  int64 parameter_number = 0;
+  for (int64 i = 0; i < sort->operand_count(); ++i) {
+    auto* old_lhs_parameter = comparator->parameter_instruction(i * 2);
+    auto* old_rhs_parameter = comparator->parameter_instruction(i * 2 + 1);
+    if (used_indices.contains(i)) {
+      Shape scalar_shape =
+          ShapeUtil::MakeShape(sort->operand(i)->shape().element_type(), {});
+      replacements[old_lhs_parameter] = HloInstruction::CreateParameter(
+          parameter_number, scalar_shape,
+          absl::StrCat("p.", parameter_number / 2, ".lhs"));
+      ++parameter_number;
+      replacements[old_rhs_parameter] = HloInstruction::CreateParameter(
+          parameter_number, scalar_shape,
+          absl::StrCat("p.", parameter_number / 2, ".rhs"));
+      ++parameter_number;
+    } else {
+      replacements[old_lhs_parameter] = nullptr;
+      replacements[old_rhs_parameter] = nullptr;
+    }
+  }
+  HloModule* module = sort->GetModule();
+  HloComputation* new_compare = module->AddEmbeddedComputation(
+      comparator->CloneWithReplacements(std::move(replacements)));
+  new_sort->set_to_apply(new_compare);
 
   // Map from original get-tuple-element tuple index to new HLO instruction
   absl::flat_hash_map<int64, HloInstruction*> result_map;
@@ -83,7 +121,8 @@
       }
     }
   } else {
-    result_map[0] = new_sort;
+    CHECK_EQ(used_indices.size(), 1);
+    result_map[*used_indices.begin()] = new_sort;
   }
   std::vector<HloInstruction*> users(sort->users().begin(),
                                      sort->users().end());

diff --git a/tensorflow/compiler/xla/service/sort_simplifier_test.cc b/tensorflow/compiler/xla/service/sort_simplifier_test.cc
index a05bc79..696ac1b 100644
--- a/tensorflow/compiler/xla/service/sort_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/sort_simplifier_test.cc

@@ -124,5 +124,37 @@
   SortSimplifier simplifier;
   EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie());
 }
+
+TEST_F(SortSimplifierTest, RemoveUnusedFirstOperand) {
+  const char* hlo_string = R"(
+   HloModule permutation_sort
+
+   compare {
+     p.0.lhs = f32[] parameter(0)
+     p.0.rhs = f32[] parameter(1)
+     p.1.lhs = s32[] parameter(2)
+     p.1.rhs = s32[] parameter(3)
+     ROOT lt = pred[] less-than(p.1.lhs, p.1.rhs)
+   }
+
+   ENTRY sort_computation {
+     keys = f32[64,8732]{1,0} parameter(0)
+     values = s32[64,8732]{1,0} parameter(1)
+     sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values),
+       dimensions={1}, to_apply=compare
+     ROOT gte = s32[64,8732]{1,0} get-tuple-element(sort), index=1
+   })";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  SortSimplifier simplifier;
+  uint64 num_executions = 0;
+  do {
+    num_executions++;
+  } while (simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_EQ(num_executions, 2);
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, GmockMatch(m::Sort(m::Parameter(1))));
+}
 }  // namespace
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/service/stable_sort_expander.cc b/tensorflow/compiler/xla/service/stable_sort_expander.cc
new file mode 100644
index 0000000..1aa7e5f
--- /dev/null
+++ b/tensorflow/compiler/xla/service/stable_sort_expander.cc

@@ -0,0 +1,204 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/stable_sort_expander.h"
+
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/op_expander_pass.h"
+#include "tensorflow/compiler/xla/statusor.h"
+
+namespace xla {
+
+// Looks for a iota operand that can be used as tie breaker in the computation.
+// If no matching iota operand is found, a iota operand is added to Sort. The
+// comparison computation is adjusted to break ties using the values from the
+// iota operand.
+StatusOr<HloInstruction*> StableSortExpander::ExpandInstruction(
+    HloInstruction* instruction) {
+  auto* sort = Cast<HloSortInstruction>(instruction);
+  HloComputation* computation = sort->parent();
+
+  HloInstruction* expanded_sort = nullptr;
+  absl::flat_hash_set<int64> used_indices;
+  int64 iota_index = -1;
+  for (const HloInstruction* operand : sort->operands()) {
+    // We can only use the iota operand if it has an iota dimension which is the
+    // same as the dimension to sort. Also it should have an integral type that
+    // is large enough for the number of elements in the sort dimension. For
+    // now, we only allow S32, because we expect to find a S32 iota operand for
+    // all Sort ops which are created by TopK.
+    // TODO(b/122298745): Also support other types.
+    if (operand->opcode() == HloOpcode::kIota &&
+        Cast<HloIotaInstruction>(operand)->iota_dimension() ==
+            sort->sort_dimension() &&
+        operand->shape().element_type() == S32) {
+      iota_index = sort->operand_index(operand);
+      break;
+    }
+  }
+
+  // If there is currently no iota operand which we could use for making the
+  // sort stable, we will have to add a new such operand.
+  if (iota_index == -1) {
+    Shape iota_shape = sort->operand(0)->shape();
+    // We might need to use S64 if the number of elements in the sort dimension
+    // is bigger than 2^31 - 1.
+    // TODO(b/122298745): Handle Sort ops where S32 is too small for the number
+    // of elements in the sort dimension.
+    if (iota_shape.dimensions(sort->sort_dimension()) >
+        std::numeric_limits<int32>::max()) {
+      return Unimplemented(
+          "Stable sorting of more than 2^31-1 elements is not implemented");
+    }
+    iota_shape.set_element_type(S32);
+    auto iota = computation->AddInstruction(
+        HloInstruction::CreateIota(iota_shape, sort->sort_dimension()));
+
+    // Create a new comparator.
+    auto comparator = sort->to_apply();
+    absl::flat_hash_map<const HloInstruction*, std::unique_ptr<HloInstruction>>
+        replacements;
+    std::vector<std::unique_ptr<HloInstruction>> extra_parameters;
+    std::vector<HloInstruction*> extra_parameter_ptrs;
+    Shape scalar_shape = ShapeUtil::MakeShape(S32, {});
+    extra_parameters.push_back(HloInstruction::CreateParameter(
+        sort->operand_count() * 2, scalar_shape,
+        absl::StrCat("p.", sort->operand_count(), ".lhs")));
+    extra_parameter_ptrs.push_back(extra_parameters.back().get());
+    extra_parameters.push_back(HloInstruction::CreateParameter(
+        sort->operand_count() * 2 + 1, scalar_shape,
+        absl::StrCat("p.", sort->operand_count(), ".rhs")));
+    extra_parameter_ptrs.push_back(extra_parameters.back().get());
+    sort->set_to_apply(sort->GetModule()->AddEmbeddedComputation(
+        comparator->CloneWithReplacements(std::move(replacements),
+                                          extra_parameter_ptrs)));
+
+    // Replace the original sort op.
+    std::vector<HloInstruction*> new_operands(sort->operands().begin(),
+                                              sort->operands().end());
+    new_operands.push_back(iota);
+    std::vector<Shape> new_shapes = sort->operand_count() == 1
+                                        ? std::vector<Shape>{sort->shape()}
+                                        : sort->shape().tuple_shapes();
+    new_shapes.push_back(iota_shape);
+    Shape new_sort_shape = ShapeUtil::MakeTupleShape(new_shapes);
+    HloInstruction* new_sort = computation->AddInstruction(
+        sort->CloneWithNewOperands(new_sort_shape, new_operands));
+
+    // Add a "wrapper" around the new sort op to make sure we have the same
+    // shape as before. For the rank 1 case, we only need a GetTupleElement,
+    // otherwise we create a Tuple consisting of GetTupleElements of the new
+    // sort.
+    std::vector<HloInstruction*> tuple_elements;
+    tuple_elements.reserve(sort->operand_count());
+    for (int64 i = 0; i < sort->operand_count(); ++i) {
+      tuple_elements.push_back(
+          computation->AddInstruction(HloInstruction::CreateGetTupleElement(
+              sort->operand(i)->shape(), new_sort, i)));
+    }
+    expanded_sort = tuple_elements[0];
+    if (tuple_elements.size() > 1) {
+      expanded_sort = computation->AddInstruction(
+          HloInstruction::CreateTuple(tuple_elements));
+    }
+    sort = Cast<HloSortInstruction>(new_sort);
+    iota_index = sort->operand_count() - 1;
+  }
+
+  // Modify the computation to break ties using the iota operand.
+  auto comparator = sort->to_apply();
+  std::vector<HloInstruction*> instructions_postorder =
+      comparator->MakeInstructionPostOrder();
+  absl::flat_hash_map<HloInstruction*, HloInstruction*> replacements;
+  // Look up instr in the replacements map, and return either the replacement,
+  // or instr, if the replacement isn't present.
+  auto replace = [&](HloInstruction* instr) {
+    auto it = replacements.find(instr);
+    if (it == replacements.end()) {
+      return instr;
+    }
+    return it->second;
+  };
+  HloInstruction* old_root = comparator->root_instruction();
+  // The comparison computation gets 2 * n parameters (n being the number of
+  // operands of Sort), where parameters 2 * i and 2 * i + 1 correspond to two
+  // different scalars of operand i of Sort which are to be compared. The
+  // comparison computation should induce a strict weak order, so if
+  // to_apply(p1.lhs, p1.rhs, ..., pn.lhs, pn.rhs) is equal to
+  // to_apply(p1.rhs, p1.lhs, ..., pn.rhs, pn.lhs), we can conclude that the
+  // values to be compared are equivalent, and perform a tie-breaker comparison.
+  //
+  // We clone each instruction with at least one operand, but use as new
+  // operands of the instruction the replacements of the original operands.
+  // Parameter 2 * i is replaced by parameter 2 * i + 1 and vice versa. This
+  // should make sure that the cloned root instruction gives the result of the
+  // comparison computation when being called with each scalar pair reversed.
+  // parameters corresponding to the iota operand.
+  for (int64 i = 0; i < comparator->num_parameters(); ++i) {
+    replacements[comparator->parameter_instruction(i)] =
+        comparator->parameter_instruction(i ^ 1);
+  }
+  HloInstruction* cloned_root = nullptr;
+  for (HloInstruction* inst : instructions_postorder) {
+    if (inst->operand_count() == 0) {
+      continue;
+    }
+    std::vector<HloInstruction*> new_operands;
+    new_operands.reserve(inst->operand_count());
+    for (HloInstruction* operand : inst->operands()) {
+      new_operands.push_back(replace(operand));
+    }
+    auto new_instruction =
+        inst->CloneWithNewOperands(inst->shape(), new_operands);
+    replacements[inst] = new_instruction.get();
+    if (inst == old_root) {
+      cloned_root = new_instruction.get();
+    }
+    comparator->AddInstruction(std::move(new_instruction));
+  }
+  CHECK_NE(cloned_root, nullptr);
+  Shape scalar_pred = ShapeUtil::MakeShape(PRED, {});
+  HloInstruction* same =
+      comparator->AddInstruction(HloInstruction::CreateBinary(
+          scalar_pred, HloOpcode::kEq, old_root, cloned_root));
+  HloInstruction* tie_breaker =
+      comparator->AddInstruction(HloInstruction::CreateBinary(
+          scalar_pred, HloOpcode::kLt,
+          comparator->parameter_instruction(2 * iota_index),
+          comparator->parameter_instruction(2 * iota_index + 1)));
+  HloInstruction* new_root =
+      comparator->AddInstruction(HloInstruction::CreateTernary(
+          ShapeUtil::MakeShape(PRED, {}), HloOpcode::kSelect, same, tie_breaker,
+          old_root));
+  comparator->set_root_instruction(new_root);
+
+  return expanded_sort;
+}
+
+bool StableSortExpander::InstructionMatchesPattern(
+    HloInstruction* instruction) {
+  return instruction->opcode() == HloOpcode::kSort &&
+         Cast<HloSortInstruction>(instruction)->is_stable();
+}
+}  // namespace xla

diff --git a/tensorflow/compiler/xla/service/stable_sort_expander.h b/tensorflow/compiler/xla/service/stable_sort_expander.h
new file mode 100644
index 0000000..31b6fd9
--- /dev/null
+++ b/tensorflow/compiler/xla/service/stable_sort_expander.h

@@ -0,0 +1,42 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_STABLE_SORT_EXPANDER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_STABLE_SORT_EXPANDER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+#include "tensorflow/compiler/xla/service/op_expander_pass.h"
+#include "tensorflow/compiler/xla/statusor.h"
+
+namespace xla {
+
+// HLO pass which expands Sort ops that have the is_stable field set to true
+// into equivalent Sort ops which guarantee stable sorting without relying on
+// the is_stable field.
+class StableSortExpander : public OpExpanderPass {
+ public:
+  absl::string_view name() const override { return "stable-sort-expander"; }
+
+ private:
+  bool InstructionMatchesPattern(HloInstruction* instruction) override;
+  StatusOr<HloInstruction*> ExpandInstruction(
+      HloInstruction* instruction) override;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_STABLE_SORT_EXPANDER_H_

diff --git a/tensorflow/compiler/xla/service/stable_sort_expander_test.cc b/tensorflow/compiler/xla/service/stable_sort_expander_test.cc
new file mode 100644
index 0000000..a62d953
--- /dev/null
+++ b/tensorflow/compiler/xla/service/stable_sort_expander_test.cc

@@ -0,0 +1,358 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/stable_sort_expander.h"
+
+#include "tensorflow/compiler/xla/service/algebraic_simplifier.h"
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/service/pattern_matcher.h"
+#include "tensorflow/compiler/xla/service/pattern_matcher_gmock.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+namespace m = match;
+
+using StableSortExpanderTest = HloTestBase;
+
+// Checks whether 'a' and 'b' are roots of equivalent computations, except that
+// parameters 2 * i and 2 * i + 1 are switched.
+bool IsSameComputationExceptParams(const HloInstruction* a,
+                                   const HloInstruction* b) {
+  if (a->opcode() != b->opcode() || a->operand_count() != b->operand_count()) {
+    return false;
+  }
+  if (a->opcode() == HloOpcode::kParameter) {
+    // Check that parameters were switched.
+    return a->parameter_number() == (b->parameter_number() ^ 1);
+  }
+  // If the operation has no operands, it should actually be the same.
+  if (a->operand_count() == 0) {
+    return a == b;
+  }
+  // Otherwise recursively compare all operands.
+  for (int64 i = 0; i < a->operand_count(); ++i) {
+    if (!IsSameComputationExceptParams(a->operand(i), b->operand(i))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Check that the comparison computation has been modified to add a tie breaker
+// using 'iota_parameter'.
+void CheckComputationHasTieBreaker(const HloInstruction* root,
+                                   int64 iota_parameter) {
+  // With the tie breaker, the root instruction should be
+  //   Select(Eq(Comp(), CompReverse()), Lt(), Comp())
+  // with Comp() being the original comparison function, and CompReverse() being
+  // the copied comparison function where the parameters are reversed. Lt() is
+  // the tie breaker comparison using the Iota operand.
+  ASSERT_EQ(root->opcode(), HloOpcode::kSelect);
+  ASSERT_EQ(root->operand(0)->opcode(), HloOpcode::kEq);
+
+  // Check that the tie breaker instruction is correct.
+  EXPECT_THAT(root->operand(1),
+              GmockMatch(m::Lt(m::Parameter(iota_parameter * 2),
+                               m::Parameter(iota_parameter * 2 + 1))));
+  EXPECT_EQ(root->operand(2), root->operand(0)->operand(0));
+
+  // Check that Comp() and CompReverse() are equivalent except that
+  // CompReverse() has reversed parameters.
+  EXPECT_TRUE(IsSameComputationExceptParams(root->operand(0)->operand(0),
+                                            root->operand(0)->operand(1)));
+}
+
+TEST_F(StableSortExpanderTest, StabilizeSortReuseIotaOperand) {
+  const char* hlo_string = R"(
+   HloModule permutation_sort
+
+   compare {
+     p.0.lhs = f32[] parameter(0)
+     p.0.rhs = f32[] parameter(1)
+     p.1.lhs = s32[] parameter(2)
+     p.1.rhs = s32[] parameter(3)
+     ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs)
+   }
+
+   ENTRY sort_computation {
+     keys = f32[64,8732]{1,0} parameter(0)
+     values = s32[64,8732]{1,0} iota(), iota_dimension=1
+     sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values),
+       dimensions={1}, to_apply=compare, is_stable=true
+     ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0
+   })";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  StableSortExpander stabilizer;
+  EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie());
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, GmockMatch(m::GetTupleElement(
+                        m::Sort(m::Parameter(0), m::Iota()), 0)));
+  CheckComputationHasTieBreaker(
+      root->operand(0)->to_apply()->root_instruction(), /*iota_parameter=*/1);
+}
+
+TEST_F(StableSortExpanderTest,
+       StabilizeSortReuseIotaOperandComplicatedComparison) {
+  const char* hlo_string = R"(
+   HloModule permutation_sort
+
+   compare {
+     p.0.lhs = f32[] parameter(0)
+     p.0.rhs = f32[] parameter(1)
+     p.1.lhs = s32[] parameter(2)
+     p.1.rhs = s32[] parameter(3)
+     max = u32[] constant(2147483647)
+     zero = s32[] constant(0)
+     lhs.signed = s32[] bitcast-convert(p.0.lhs)
+     lhs.unsigned = u32[] bitcast-convert(p.0.lhs)
+     lhs.flipped = u32[] subtract(max, lhs.unsigned)
+     lhs.flipped.signed = s32[] bitcast-convert(lhs.flipped)
+     lhs.is_negative = pred[] less-than(lhs.flipped.signed, zero)
+     lhs.converted = s32[] select(lhs.is_negative, lhs.flipped.signed, lhs.signed)
+     rhs.signed = s32[] bitcast-convert(p.0.rhs)
+     rhs.unsigned = u32[] bitcast-convert(p.0.rhs)
+     rhs.flipped = u32[] subtract(max, rhs.unsigned)
+     rhs.flipped.signed = s32[] bitcast-convert(rhs.flipped)
+     rhs.is_negative = pred[] less-than(rhs.flipped.signed, zero)
+     rhs.converted = s32[] select(rhs.is_negative, rhs.flipped.signed, rhs.signed)
+     ROOT lt = pred[] less-than(lhs.converted, rhs.converted)
+   }
+
+   ENTRY sort_computation {
+     keys = f32[64,8732]{1,0} parameter(0)
+     values = s32[64,8732]{1,0} iota(), iota_dimension=1
+     sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values),
+       dimensions={1}, to_apply=compare, is_stable=true
+     ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0
+   })";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  StableSortExpander stabilizer;
+  EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie());
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, GmockMatch(m::GetTupleElement(
+                        m::Sort(m::Parameter(0), m::Iota()), 0)));
+  CheckComputationHasTieBreaker(
+      root->operand(0)->to_apply()->root_instruction(), /*iota_parameter=*/1);
+}
+
+TEST_F(StableSortExpanderTest, StabilizeSortAddIotaOperandAndChangeRoot) {
+  const char* hlo_string = R"(
+   HloModule permutation_sort
+
+   compare {
+     p.0.lhs = f32[] parameter(0)
+     p.0.rhs = f32[] parameter(1)
+     p.1.lhs = s32[] parameter(2)
+     p.1.rhs = s32[] parameter(3)
+     ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs)
+   }
+
+   ENTRY sort_computation {
+     keys = f32[64,8732]{1,0} parameter(0)
+     values = s32[64,8732]{1,0} parameter(1)
+     ROOT sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values),
+       dimensions={1}, to_apply=compare, is_stable=true
+   })";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  StableSortExpander stabilizer;
+  EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie());
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root, GmockMatch(m::Tuple(
+                m::GetTupleElement(
+                    m::Sort(m::Parameter(0), m::Parameter(1), m::Iota()), 0),
+                m::GetTupleElement(
+                    m::Sort(m::Parameter(0), m::Parameter(1), m::Iota()), 1))));
+  CheckComputationHasTieBreaker(
+      root->operand(0)->operand(0)->to_apply()->root_instruction(),
+      /*iota_parameter=*/2);
+}
+
+TEST_F(StableSortExpanderTest, HonorIsStableFlag) {
+  const char* hlo_string = R"(
+   HloModule permutation_sort
+
+   compare {
+     p.0.lhs = f32[] parameter(0)
+     p.0.rhs = f32[] parameter(1)
+     p.1.lhs = s32[] parameter(2)
+     p.1.rhs = s32[] parameter(3)
+     ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs)
+   }
+
+   ENTRY sort_computation {
+     keys = f32[64,8732]{1,0} parameter(0)
+     values = s32[64,8732]{1,0} iota(), iota_dimension=1
+     sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values),
+       dimensions={1}, to_apply=compare, is_stable=false
+     ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0
+   })";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  StableSortExpander stabilizer;
+  EXPECT_FALSE(stabilizer.Run(module.get()).ValueOrDie());
+}
+
+TEST_F(StableSortExpanderTest,
+       StabilizeSortDontReuseIotaOperandWrongDimension) {
+  const char* hlo_string = R"(
+   HloModule permutation_sort
+
+   compare {
+     p.0.lhs = f32[] parameter(0)
+     p.0.rhs = f32[] parameter(1)
+     p.1.lhs = s32[] parameter(2)
+     p.1.rhs = s32[] parameter(3)
+     ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs)
+   }
+
+   ENTRY sort_computation {
+     keys = f32[64,8732]{1,0} parameter(0)
+     values = s32[64,8732]{1,0} iota(), iota_dimension=0
+     sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values),
+       dimensions={1}, to_apply=compare, is_stable=true
+     ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0
+   })";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  StableSortExpander stabilizer;
+  EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie());
+  // Simplify away the "wrapper" tuple around the new sort.
+  AlgebraicSimplifier simplifier(AlgebraicSimplifierOptions(
+      [](const Shape&, const Shape&) { return false; }));
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, GmockMatch(m::GetTupleElement(
+                        m::Sort(m::Parameter(0), m::Iota(), m::Iota()), 0)));
+  CheckComputationHasTieBreaker(
+      root->operand(0)->to_apply()->root_instruction(),
+      /*iota_parameter=*/2);
+}
+
+TEST_F(StableSortExpanderTest, StabilizeSortDontReuseIotaOperandWrongType) {
+  const char* hlo_string = R"(
+   HloModule permutation_sort
+
+   compare {
+     p.0.lhs = f32[] parameter(0)
+     p.0.rhs = f32[] parameter(1)
+     p.1.lhs = f32[] parameter(2)
+     p.1.rhs = f32[] parameter(3)
+     ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs)
+   }
+
+   ENTRY sort_computation {
+     keys = f32[64,8732]{1,0} parameter(0)
+     values = f32[64,8732]{1,0} iota(), iota_dimension=1
+     sort = (f32[64,8732]{1,0}, f32[64,8732]{1,0}) sort(keys, values),
+       dimensions={1}, to_apply=compare, is_stable=true
+     ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0
+   })";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  StableSortExpander stabilizer;
+  EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie());
+  // Simplify away the "wrapper" tuple around the new sort.
+  AlgebraicSimplifier simplifier(AlgebraicSimplifierOptions(
+      [](const Shape&, const Shape&) { return false; }));
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, GmockMatch(m::GetTupleElement(
+                        m::Sort(m::Parameter(0), m::Iota(), m::Iota()), 0)));
+  CheckComputationHasTieBreaker(
+      root->operand(0)->to_apply()->root_instruction(),
+      /*iota_parameter=*/2);
+}
+
+TEST_F(StableSortExpanderTest, StabilizeSortR1) {
+  const char* hlo_string = R"(
+   HloModule permutation_sort
+
+   compare {
+     p.0.lhs = s32[] parameter(0)
+     p.0.rhs = s32[] parameter(1)
+     mask = s32[] constant(65535)
+     lhs = s32[] and(p.0.lhs, mask)
+     rhs = s32[] and(p.0.rhs, mask)
+     ROOT lt = pred[] less-than(lhs, rhs)
+   }
+
+   ENTRY sort_computation {
+     keys = s32[64,8732]{1,0} parameter(0)
+     ROOT sort = s32[64,8732]{1,0} sort(keys), dimensions={0}, to_apply=compare,
+       is_stable=true
+   })";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  StableSortExpander stabilizer;
+  EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie());
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, GmockMatch(m::GetTupleElement(
+                        m::Sort(m::Parameter(0), m::Iota()), 0)));
+  CheckComputationHasTieBreaker(
+      root->operand(0)->to_apply()->root_instruction(), /*iota_parameter=*/1);
+}
+
+TEST_F(StableSortExpanderTest, StabilizeSortR1NoRoot) {
+  const char* hlo_string = R"(
+   HloModule permutation_sort
+
+   compare {
+     p.0.lhs = s32[] parameter(0)
+     p.0.rhs = s32[] parameter(1)
+     mask = s32[] constant(65535)
+     lhs = s32[] and(p.0.lhs, mask)
+     rhs = s32[] and(p.0.rhs, mask)
+     ROOT lt = pred[] less-than(lhs, rhs)
+   }
+
+   ENTRY sort_computation {
+     keys = s32[64,8732]{1,0} parameter(0)
+     sort = s32[64,8732]{1,0} sort(keys), dimensions={0}, to_apply=compare,
+       is_stable=true
+     ROOT neg = s32[64,8732]{1,0} negate(sort)
+   })";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  StableSortExpander stabilizer;
+  EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie());
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, GmockMatch(m::Negate(m::GetTupleElement(
+                        m::Sort(m::Parameter(0), m::Iota()), 0))));
+  CheckComputationHasTieBreaker(
+      root->operand(0)->operand(0)->to_apply()->root_instruction(),
+      /*iota_parameter=*/1);
+}
+
+}  // namespace
+}  // namespace xla

diff --git a/tensorflow/compiler/xla/service/triangular_solve_expander.cc b/tensorflow/compiler/xla/service/triangular_solve_expander.cc
new file mode 100644
index 0000000..b26cdc1d
--- /dev/null
+++ b/tensorflow/compiler/xla/service/triangular_solve_expander.cc

@@ -0,0 +1,504 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/triangular_solve_expander.h"
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/compiler/xla/client/lib/math.h"
+#include "tensorflow/compiler/xla/client/lib/matrix.h"
+#include "tensorflow/compiler/xla/client/lib/slicing.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/client/xla_computation.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/math/math_util.h"
+
+namespace xla {
+
+namespace {
+
+// Get the diagonal blocks of the coefficient matrix
+XlaOp DiagonalBlocks(XlaOp a, int64 block_size) {
+  XlaBuilder* builder = a.builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(a));
+    int ndims = shape.rank();
+    int64 n = ShapeUtil::GetDimension(shape, -1);
+    int64 num_blocks = n / block_size;
+
+    XlaOp diag_blocks;
+
+    // If the coefficient matrix is exactly the block size, we just add a
+    // singleton dimension i.e. [..., n, n] -> [..., 1, n, n]
+    if (n == block_size) {
+      std::vector<int64> permutation(ndims);
+      std::iota(permutation.begin(), permutation.end(), 1);
+      permutation.insert(permutation.end() - 2, 0);
+      return Transpose(Broadcast(a, /*broadcast_sizes=*/{1}), permutation);
+    }
+
+    // We can grab entire blocks using gather
+    if (n > block_size) {
+      // Construct the starting indices of the diagonal blocks
+      auto start_indices =
+          Transpose(Broadcast(Mul(Iota(builder, S32, num_blocks),
+                                  ConstantR0<int32>(builder, block_size)),
+                              /*broadcast_sizes=*/{2}),
+                    /*permutation=*/{1, 0});
+
+      PaddingConfig padding_config =
+          MakeEdgePaddingConfig({{0, 0}, {ndims - 2, 0}});
+      start_indices =
+          Pad(start_indices, ConstantR0<int32>(builder, 0), padding_config);
+
+      // Gather the diagonal blocks
+      std::vector<int64> slice_sizes(ndims);
+      GatherDimensionNumbers dim_numbers;
+      for (int i = 0; i < ndims - 2; ++i) {
+        dim_numbers.add_offset_dims(i);
+        dim_numbers.add_start_index_map(i);
+        slice_sizes[i] = ShapeUtil::GetDimension(shape, i);
+      }
+      slice_sizes[ndims - 2] = slice_sizes[ndims - 1] = block_size;
+      dim_numbers.add_offset_dims(ndims - 1);
+      dim_numbers.add_offset_dims(ndims);
+      dim_numbers.add_start_index_map(ndims - 2);
+      dim_numbers.add_start_index_map(ndims - 1);
+      dim_numbers.set_index_vector_dim(1);
+      diag_blocks = Gather(a, start_indices, dim_numbers, slice_sizes);
+    }
+
+    // The last block might be smaller than the block size,
+    // so we will need to pad it
+    if (n % block_size != 0) {
+      // Pad with zeros
+      auto last_blocks =
+          SliceInMinorDims(a, {n - n % block_size, n - n % block_size}, {n, n});
+      PaddingConfig config = MakeNoPaddingConfig(ndims);
+      int64 padding = block_size - n % block_size;
+      config.mutable_dimensions(ndims - 1)->set_edge_padding_high(padding);
+      config.mutable_dimensions(ndims - 2)->set_edge_padding_high(padding);
+      last_blocks =
+          Pad(last_blocks, Zero(builder, shape.element_type()), config);
+
+      // Add a singleton dimension
+      // i.e. [..., block_size, block_size] -> [..., 1, block_size, block_size]
+      TF_ASSIGN_OR_RETURN(Shape blocks_shape, builder->GetShape(last_blocks));
+      auto shape_dims = AsInt64Slice(blocks_shape.dimensions());
+      auto last_blocks_dims = std::vector<int64>(ndims);
+      std::copy(shape_dims.begin(), shape_dims.end(), last_blocks_dims.begin());
+      last_blocks_dims.insert(last_blocks_dims.end() - 2, 1);
+      last_blocks = Reshape(last_blocks, last_blocks_dims);
+
+      // Concatenate with the other blocks if necessary
+      if (n > block_size) {
+        diag_blocks =
+            ConcatInDim(builder, {diag_blocks, last_blocks}, ndims - 2);
+      } else {
+        diag_blocks = last_blocks;
+      }
+    }
+
+    return diag_blocks;
+  });
+}
+
+XlaOp InvertDiagonalBlocks(XlaOp diag_blocks, bool lower, bool transpose_a,
+                           bool conjugate_a,
+                           PrecisionConfig::Precision precision) {
+  XlaBuilder* builder = diag_blocks.builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    // Input is a batch of square lower triangular square matrices. Its shape is
+    // (..., size, size). We resize this to (num_blocks, size, size).
+    TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(diag_blocks));
+    int64 block_size = ShapeUtil::GetDimension(shape, -1);
+    int64 num_blocks = ShapeUtil::ElementsIn(shape) /
+                       tensorflow::MathUtil::IPow(block_size, 2);
+    diag_blocks = Reshape(diag_blocks, {num_blocks, block_size, block_size});
+
+    // The input must be triangular because we rely on that when doing
+    // multiplications later on
+    diag_blocks = Triangle(diag_blocks, /*lower=*/lower);
+
+    // Rescale blocks to be unit triangular, but avoid dividing by
+    // zero (which can happen if the last block was padded) otherwise it will
+    // introduce nans which will propagate
+    auto diags = GetMatrixDiagonal(diag_blocks);
+    auto ones = FullLike(diags, 1);
+    diags = Select(Eq(diags, Zero(builder, shape.element_type())), ones, diags);
+    auto scaled_diag_blocks = Div(diag_blocks, diags, {0, 2});
+
+    // We can now use the fact that for an upper triangular matrix
+    // [[L11, 0], [L21, L22]], given the inverses L11' and L22', we have
+    // L22' = -L22' * L21 * L11'. In our case, L21 is a vector and our blocks
+    // have been rescaled to be unit triangular, so L22 = L22' = 1.
+
+    // Initialize the output matrix with -1s on the diagonal. We use -1 instead
+    // of 1 because we cannot do matrix-vector multiplies with variable shapes
+    // inside of a loop, or do irregularly shaped in-place updates. Hence,
+    // L21 <- -L22 * L21 * L11 cannot be done naively. Instead, we update the
+    // entire row i.e. we calculate
+    // [L21 L22 0] <- -[L21 L22 0] @ diag_blocks([L11', -I, -I])
+    // which means [L21 L22 0] <- [-L21 * L11', L22, 0].
+    auto identity =
+        IdentityMatrix(builder, shape.element_type(), block_size, block_size);
+    auto neg_identity = -identity;
+
+    // The first or last  diagonal element should be set to 1 instead of -1
+    // though, since we never update it
+    auto pos_one = Reshape(One(builder, shape.element_type()), {1, 1});
+    auto start_index = ConstantR0<int>(builder, (lower) ? 0 : block_size - 1);
+    auto output_block =
+        DynamicUpdateSlice(neg_identity, pos_one,
+                           /*start_indices=*/{start_index, start_index});
+
+    // Broadcast diag([1, -1, -1, ...]) to every block
+    XlaOp output = Broadcast(output_block,
+                             /*broadcast_sizes=*/{num_blocks});
+
+    // Now we construct a loop that performs matrix-vector multiplications
+    // inverting the blocks one row at a time
+    std::vector<Shape> tuple_shapes = {
+        // The loop iteration counter is a scalar, incremented each iteration.
+        ShapeUtil::MakeShape(S32, {}),
+        // The output has the shape of A, with one row updated each iteration.
+        ShapeUtil::MakeShape(shape.element_type(),
+                             {num_blocks, block_size, block_size}),
+        // The input is a loop invariant.
+        ShapeUtil::MakeShape(shape.element_type(),
+                             {num_blocks, block_size, block_size})};
+    Shape tuple_shape = ShapeUtil::MakeTupleShape(tuple_shapes);
+
+    auto init_i = One(builder, S32);
+    auto init = Tuple(builder, {init_i, output, scaled_diag_blocks});
+
+    // Construct the loop condition function.
+    std::unique_ptr<XlaBuilder> condb =
+        builder->CreateSubBuilder("InvertDiagCond");
+    {
+      auto i = GetTupleElement(
+          Parameter(condb.get(), 0, tuple_shape, "InvertDiagCondTuple"), 0);
+      Lt(i, ConstantR0<int32>(condb.get(), block_size));
+    }
+    TF_ASSIGN_OR_RETURN(auto cond, condb->Build());
+
+    // Construct the loop body function.
+    std::unique_ptr<XlaBuilder> bodyb =
+        builder->CreateSubBuilder("InvertDiagBody");
+    {
+      auto input_tuple =
+          Parameter(bodyb.get(), 0, tuple_shape, "InvertDiagBodyTuple");
+
+      auto i = GetTupleElement(input_tuple, 0);
+      auto body_out = GetTupleElement(input_tuple, 1);
+      auto body_input = GetTupleElement(input_tuple, 2);
+
+      auto zero = ConstantR0<int32>(bodyb.get(), 0);
+      auto j = (lower) ? i : ScalarLike(i, block_size - 1) - i;
+      auto input_row =
+          DynamicSlice(body_input, {zero, j, zero},
+                       /*slice_sizes=*/{num_blocks, 1, block_size});
+
+      // We want -L21 L11^{-1}
+      DotDimensionNumbers dnums;
+      dnums.add_lhs_batch_dimensions(0);
+      dnums.add_rhs_batch_dimensions(0);
+      dnums.add_lhs_contracting_dimensions(2);
+      dnums.add_rhs_contracting_dimensions(1);
+      PrecisionConfig precision_proto;
+      precision_proto.add_operand_precision(precision);
+      precision_proto.add_operand_precision(precision);
+      auto update = -DotGeneral(input_row, body_out, dnums, &precision_proto);
+
+      body_out = DynamicUpdateSlice(body_out, update, {zero, j, zero});
+
+      auto next_i = i + ScalarLike(i, 1);
+      Tuple(bodyb.get(), {next_i, body_out, body_input});
+    }
+    TF_ASSIGN_OR_RETURN(auto body, bodyb->Build());
+
+    // Construct the While loop and return the result,
+    // return while_loop(cond_fun, body_fun, init)[1]
+    auto invert_while = While(cond, body, init);
+    auto inv_diag_blocks = GetTupleElement(invert_while, 1);
+
+    // Undo the scaling
+    inv_diag_blocks = Div(inv_diag_blocks, diags,
+                          /*broadcast_dimensions=*/{0, 1});
+
+    // Reshape back to original batch major dimensions
+    return Reshape(inv_diag_blocks, AsInt64Slice(shape.dimensions()));
+  });
+}
+
+XlaOp SolveWithInvertedDiagonalBlocks(XlaOp a, XlaOp b, XlaOp inv_diag_blocks,
+                                      bool left_side, bool lower,
+                                      bool transpose_a, bool conjugate_a,
+                                      PrecisionConfig::Precision precision) {
+  XlaBuilder* builder = a.builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    TF_ASSIGN_OR_RETURN(Shape blocks_shape, builder->GetShape(inv_diag_blocks));
+    TF_ASSIGN_OR_RETURN(Shape b_shape, builder->GetShape(b));
+    int64 block_size = ShapeUtil::GetDimension(blocks_shape, -1);
+
+    TF_ASSIGN_OR_RETURN(Shape a_shape, builder->GetShape(a));
+    int64 ndims = a_shape.rank();
+    int64 n = ShapeUtil::GetDimension(a_shape, -1);
+    int64 num_blocks = n / block_size + (n % block_size != 0);
+    int64 m_dim = (left_side) ? -1 : -2;
+    int64 m = ShapeUtil::GetDimension(b_shape, m_dim);
+
+    // Initialize the solution
+    auto x = ZerosLike(b);
+
+    // This loop is unrolled for performance reasons, but it could be expressed
+    // rolled as well since the matrices are of the same size each iteration
+    for (int i = 0; i < num_blocks; i++) {
+      // High-level intuition: We have B[i] = L[i] @ X. Since L is upper
+      // triangular this means B[i] = L[i, :i + 1] @ X[:i + 1]. We can split
+      // this into two parts: B[i] = L[i, :i] @ X[:i] + L[i, i] @ X[i] which
+      // can be solved for X[i] as X[i] = inv(L[i, i]) @ B[i] - L[i, :i] @ X[:i]
+
+      // Decide whether we go from first block to last or vice versa
+      auto j = (left_side ^ lower ^ transpose_a) ? num_blocks - 1 - i : i;
+
+      // Get the size of the inverse blocks (the last one might be smaller)
+      int64 block = (n % block_size != 0 && j + 1 == num_blocks)
+                        ? n % block_size
+                        : block_size;
+      auto inv_block =
+          MaybeConjugate(Collapse(SliceInMinorDims(inv_diag_blocks, {j, 0, 0},
+                                                   {j + 1, block, block}),
+                                  /*dimensions=*/{ndims - 2, ndims - 1}),
+                         conjugate_a);
+
+      // Get the corresponding row of B
+      int64 k = std::min((j + 1) * block_size, n);
+      std::vector<int64> start = {j * block_size, 0};
+      std::vector<int64> end = {k, m};
+      if (!left_side) {
+        std::swap(start[0], start[1]);
+        std::swap(end[0], end[1]);
+      }
+      auto b_row = SliceInMinorDims(b, start, end);
+
+      XlaOp remainder;
+      if (i == 0) {
+        remainder = b_row;
+      } else {
+        // This matrix multiply involves a lot of multiplying with zero (namely,
+        // X[i * block_size:] = 0), but this is faster than slicing...
+        end = {k, n};
+        if (!left_side) {
+          std::swap(end[0], end[1]);
+        }
+        if (transpose_a) {
+          std::swap(start[0], start[1]);
+          std::swap(end[0], end[1]);
+        }
+        auto a_row =
+            MaybeConjugate(SliceInMinorDims(a, start, end), conjugate_a);
+        if (left_side) {
+          remainder =
+              b_row - BatchDot(MaybeTransposeInMinorDims(a_row, transpose_a), x,
+                               precision);
+        } else {
+          remainder =
+              b_row - BatchDot(x, MaybeTransposeInMinorDims(a_row, transpose_a),
+                               precision);
+        }
+      }
+
+      XlaOp x_update;
+      auto zero = Zero(builder, S32);
+      auto start_index = ConstantR0WithType(builder, S32, j * block_size);
+      std::vector<XlaOp> update_starts = {start_index, zero};
+      if (left_side) {
+        x_update = BatchDot(MaybeTransposeInMinorDims(inv_block, transpose_a),
+                            remainder, precision);
+      } else {
+        x_update = BatchDot(remainder,
+                            MaybeTransposeInMinorDims(inv_block, transpose_a),
+                            precision);
+        std::swap(update_starts[0], update_starts[1]);
+      }
+      x = DynamicUpdateSliceInMinorDims(x, x_update, /*starts=*/update_starts);
+    }
+
+    return x;
+  });
+}
+
+XlaOp BuildTriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower,
+                           bool transpose_a, bool conjugate_a,
+                           bool unit_diagonal, int64 block_size,
+                           PrecisionConfig::Precision precision) {
+  XlaBuilder* builder = a.builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    TF_ASSIGN_OR_RETURN(Shape a_shape, builder->GetShape(a));
+    TF_ASSIGN_OR_RETURN(Shape b_shape, builder->GetShape(b));
+    if (a_shape.rank() != b_shape.rank()) {
+      return InvalidArgument(
+          "Arguments to TriangularSolve have shapes with different ranks: "
+          "%s vs. %s",
+          ShapeUtil::HumanString(a_shape), ShapeUtil::HumanString(b_shape));
+    }
+    const int64 ndims = a_shape.rank();
+    if (ndims < 2) {
+      return InvalidArgument(
+          "Arguments to TriangularSolve was rank %d but must have rank >= 2.",
+          ndims);
+    }
+    // The batch dimensions must be equal.
+    std::vector<int64> batch_dimensions;
+    for (int i = 0; i < ndims - 2; ++i) {
+      int64 a_size = a_shape.dimensions(i);
+      int64 b_size = b_shape.dimensions(i);
+      if (a_size != b_size) {
+        return InvalidArgument(
+            "Batch dimensions of arguments to TriangularSolve must be equal; "
+            "shapes were %s and %s.",
+            ShapeUtil::HumanString(a_shape), ShapeUtil::HumanString(b_shape));
+      }
+      batch_dimensions.push_back(a_size);
+    }
+
+    if (ShapeUtil::GetDimension(a_shape, -1) !=
+        ShapeUtil::GetDimension(a_shape, -2)) {
+      return InvalidArgument(
+          "The 'a' argument to TriangularSolve must be a batched square matrix;"
+          " shape was: %s",
+          ShapeUtil::HumanString(a_shape));
+    }
+    const int64 m = ShapeUtil::GetDimension(b_shape, -2);
+    const int64 n = ShapeUtil::GetDimension(b_shape, -1);
+    if ((left_side ? m : n) != ShapeUtil::GetDimension(a_shape, -1)) {
+      return InvalidArgument(
+          "Arguments to TriangularSolve have incompatible matrix shapes %s and "
+          "%s",
+          ShapeUtil::HumanString(a_shape), ShapeUtil::HumanString(b_shape));
+    }
+
+    if (block_size < 1) {
+      return InvalidArgument(
+          "block_size argument to TriangularSolve must be >= 1; got %d",
+          block_size);
+    }
+
+    if (ShapeUtil::IsZeroElementArray(b_shape)) {
+      // The output has the same shape as 'b', and since the output has zero
+      // elements, any such array will do.
+      return b;
+    }
+
+    // TODO(phawkins): consider pushing triangle masking into
+    // InvertDiagonalBlocks.
+    if (unit_diagonal) {
+      // Mask everything but the subdiagonal/superdiagonal elements.
+      a = lower ? Select(TriangleMask(a, -1), a, ZerosLike(a))
+                : Select(TriangleMask(a, 0), ZerosLike(a), a);
+      int64 k = ShapeUtil::GetDimension(a_shape, -1);
+      a = xla::Add(a, IdentityMatrix(builder, a_shape.element_type(), k, k),
+                   /*broadcast_dimensions=*/{ndims - 2, ndims - 1});
+    } else {
+      // Mask off the ignored elements of the triangular matrix a.
+      a = Triangle(a, lower);
+    }
+
+    // We find the diagonal blocks of the coefficient matrix
+    auto diag_blocks = DiagonalBlocks(a, block_size);
+
+    // We invert these blocks in parallel using batched matrix-vector products
+    auto inv_diag_blocks = InvertDiagonalBlocks(diag_blocks, lower, transpose_a,
+                                                conjugate_a, precision);
+
+    // We now find the solution using GEMMs
+    auto x =
+        SolveWithInvertedDiagonalBlocks(a, b, inv_diag_blocks, left_side, lower,
+                                        transpose_a, conjugate_a, precision);
+
+    return x;
+  });
+}
+
+}  // namespace
+
+bool TriangularSolveExpander::InstructionMatchesPattern(
+    HloInstruction* instruction) {
+  return instruction->opcode() == HloOpcode::kTriangularSolve;
+}
+
+StatusOr<HloInstruction*> TriangularSolveExpander::ExpandInstruction(
+    HloInstruction* instruction) {
+  const TriangularSolveOptions& options =
+      instruction->triangular_solve_options();
+  const string name = absl::StrFormat(
+      "xla.triangular_solve_%s_%s_%s_%s_%s_%s",
+      instruction->operand(0)->shape().ToString(),
+      instruction->operand(1)->shape().ToString(),
+      options.left_side() ? "left" : "right",
+      options.lower() ? "lower" : "upper",
+      TriangularSolveOptions_Transpose_Name(options.transpose_a()),
+      options.unit_diagonal() ? "unit" : "nonunit");
+
+  HloModule* module = instruction->parent()->parent();
+
+  HloComputation*& computation =
+      computation_cache_.emplace(name, nullptr).first->second;
+  if (!computation) {
+    // Builds a new expansion.
+    //
+    // We do something unusual here: we build the computation using the
+    // XlaBuilder API, which is nominally an XLA client API. We do this because
+    // the external APIs for building complicated computations (XlaBuilder)
+    // are much more ergonomic than the internal ones. As it turns out,
+    // XlaBuilder isn't really a client API—what it does is build a
+    // HloModuleProto protocol buffer, that we can then deserialize and clone
+    // into our HloModule. Ideally we would avoid the protocol buffer step;
+    // that is left as an exercise for future work.
+    XlaBuilder builder(name);
+    XlaOp a = Parameter(&builder, 0, instruction->operand(0)->shape(), "a");
+    XlaOp b = Parameter(&builder, 1, instruction->operand(1)->shape(), "b");
+    bool transpose_a =
+        options.transpose_a() != TriangularSolveOptions::NO_TRANSPOSE;
+    bool conjugate_a = options.transpose_a() == TriangularSolveOptions::ADJOINT;
+
+    BuildTriangularSolve(a, b, options.left_side(), options.lower(),
+                         transpose_a, conjugate_a, options.unit_diagonal(),
+                         /*block_size=*/128,
+                         /*precision=*/PrecisionConfig::HIGHEST);
+    TF_ASSIGN_OR_RETURN(XlaComputation xla_computation, builder.Build());
+
+    TF_ASSIGN_OR_RETURN(ProgramShape program_shape,
+                        xla_computation.GetProgramShape());
+    HloModuleConfig config(program_shape);
+    TF_ASSIGN_OR_RETURN(auto new_module, HloModule::CreateFromProto(
+                                             xla_computation.proto(), config));
+    HloCloneContext context(module);
+    computation =
+        module->DeepCloneComputation(new_module->entry_computation(), &context);
+  }
+
+  return instruction->parent()->AddInstruction(HloInstruction::CreateCall(
+      instruction->shape(), instruction->operands(), computation));
+}
+
+}  // namespace xla

diff --git a/tensorflow/compiler/xla/service/triangular_solve_expander.h b/tensorflow/compiler/xla/service/triangular_solve_expander.h
new file mode 100644
index 0000000..be2374e
--- /dev/null
+++ b/tensorflow/compiler/xla/service/triangular_solve_expander.h

@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_TRIANGULAR_SOLVE_EXPANDER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_TRIANGULAR_SOLVE_EXPANDER_H_
+
+#include "absl/container/flat_hash_map.h"
+#include "tensorflow/compiler/xla/service/op_expander_pass.h"
+
+namespace xla {
+
+class TriangularSolveExpander : public OpExpanderPass {
+ public:
+  absl::string_view name() const override {
+    return "triangular_solve_expander";
+  }
+
+ protected:
+  bool InstructionMatchesPattern(HloInstruction* instruction) override;
+
+  StatusOr<HloInstruction*> ExpandInstruction(
+      HloInstruction* instruction) override;
+
+ private:
+  // Mapping from op signatures to existing computations.
+  absl::flat_hash_map<string, HloComputation*> computation_cache_;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_TRIANGULAR_SOLVE_EXPANDER_H_

diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
index 5e505aa..cc82e9b 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc

@@ -699,6 +699,8 @@
 // (4) The 'user' of 'operand' is DynamicUpdateSlice or While at operand index
 //     0.
 // (5) The 'user' of 'operand' is Sort, and it is the only user.
+// (6) The 'user' of 'operand' is TriangularSolve, it is the second operand,
+//     and it is the only user.
 //
 // (2) and (3) can only be determined if points-to analysis is available.
 bool TuplePointsToAnalysis::CanShareOperandBufferWithUser(
@@ -779,6 +781,14 @@
     std::vector<int64> operand_indices = user->OperandIndices(operand);
     return operand_indices.size() == 1 && user_index[0] == operand_indices[0];
   }
+  if (user->opcode() == HloOpcode::kTriangularSolve) {
+    // Only valid if there are no other users.
+    if (operand->users().size() != 1) {
+      return false;
+    }
+    std::vector<int64> operand_indices = user->OperandIndices(operand);
+    return operand_indices.size() == 1 && operand_indices[0] == 1;
+  }
   if (user->opcode() == HloOpcode::kCall) {
     // TODO(b/62548313): Remove when buffer assignment is module scoped and
     // does not assign buffers to calls.

diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
index 5516026..6f61fc4 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc

@@ -1072,7 +1072,8 @@
   auto keys = builder.AddInstruction(
       HloInstruction::CreateParameter(0, keys_shape, "keys"));
   TF_ASSERT_OK_AND_ASSIGN(
-      auto* sort, MakeSortHlo(keys_shape, {keys}, 0, &builder, module_.get()));
+      auto* sort, MakeSortHlo(keys_shape, {keys}, 0, /*is_stable=*/false,
+                              &builder, module_.get()));
 
   computation_ = module_->AddEntryComputation(builder.Build());
   RunAnalysis();
@@ -1094,7 +1095,8 @@
   TF_ASSERT_OK_AND_ASSIGN(
       auto* sort,
       MakeSortHlo(ShapeUtil::MakeTupleShape({keys_shape, values_shape}),
-                  {keys, values}, 0, &builder, module_.get()));
+                  {keys, values}, 0, /*is_stable=*/false, &builder,
+                  module_.get()));
 
   computation_ = module_->AddEntryComputation(builder.Build());
   RunAnalysis();

diff --git a/tensorflow/compiler/xla/shape.cc b/tensorflow/compiler/xla/shape.cc
index a36d354..9485404 100644
--- a/tensorflow/compiler/xla/shape.cc
+++ b/tensorflow/compiler/xla/shape.cc

@@ -34,8 +34,12 @@
   // instead of a constructor.
   if (shape_proto.dimensions_size() !=
       shape_proto.is_dynamic_dimension_size()) {
-    LOG(ERROR) << "Malformed shape proto: number of is_dynamic_dimension "
-                  "fields does not match number of dimension fields";
+    if (shape_proto.is_dynamic_dimension_size() != 0) {
+      LOG(ERROR) << "Malformed shape proto: number of is_dynamic_dimension "
+                    "fields does not match number of dimension fields";
+    } else {
+      LOG(WARNING) << "Malformed shape proto: is_dynamic_dimension is empty";
+    }
   }
   int64 num_dynamic_dimension_fields = std::min(
       shape_proto.dimensions_size(), shape_proto.is_dynamic_dimension_size());
@@ -143,28 +147,17 @@
       return false;
     }
     if (LayoutUtil::IsDenseArray(lhs)) {
-      if (!absl::c_equal(LayoutUtil::MinorToMajor(lhs),
-                         LayoutUtil::MinorToMajor(rhs))) {
+      Layout::Equal equal;
+      if (ignore_tiles_in_layout_) {
+        equal.IgnoreTiles();
+      }
+      if (ignore_element_size_in_layout_) {
+        equal.IgnoreElementSize();
+      }
+      if (!equal(lhs.layout(), rhs.layout())) {
         VLOG(3) << "CompareShapes: lhs layout != rhs layout";
         return false;
       }
-
-      const auto& lhs_tiles = lhs.layout().tiles();
-      const auto& rhs_tiles = rhs.layout().tiles();
-      if (lhs_tiles.size() != rhs_tiles.size()) {
-        return false;
-      }
-      for (int64 i = 0; i < lhs_tiles.size(); i++) {
-        if (!absl::c_equal(lhs_tiles[i].dimensions(),
-                           rhs_tiles[i].dimensions())) {
-          return false;
-        }
-      }
-
-      if (lhs.layout().element_size_in_bits() !=
-          rhs.layout().element_size_in_bits()) {
-        return false;
-      }
     }
   }
 

diff --git a/tensorflow/compiler/xla/shape.h b/tensorflow/compiler/xla/shape.h
index 1d59490..78cea83 100644
--- a/tensorflow/compiler/xla/shape.h
+++ b/tensorflow/compiler/xla/shape.h

@@ -146,10 +146,10 @@
   //
   // Examples:
   //
-  // - Comparing two shapes ignoring they layout difference:
+  // - Comparing two shapes ignoring their layout difference:
   //   Equal().IgnoreLayout()(shape1, shape2);
   //
-  // - Comparing two shapes ignoring they layout and element type difference:
+  // - Comparing two shapes ignoring their layout and element type difference:
   //   Equal().IgnoreLayout().IgnoreElementType()(shape1, shape2);
   class Equal {
    public:
@@ -161,6 +161,14 @@
       ignore_layout_ = true;
       return *this;
     }
+    Equal& IgnoreTilesInLayout() {
+      ignore_tiles_in_layout_ = true;
+      return *this;
+    }
+    Equal& IgnoreElementSizeInLayout() {
+      ignore_element_size_in_layout_ = true;
+      return *this;
+    }
     Equal& IgnoreElementType() {
       ignore_element_type_ = true;
       return *this;
@@ -174,8 +182,10 @@
       return *this;
     }
 
-   public:
+   private:
     bool ignore_layout_ = false;
+    bool ignore_tiles_in_layout_ = false;
+    bool ignore_element_size_in_layout_ = false;
     bool ignore_element_type_ = false;
     bool ignore_fp_precision_ = false;
     bool ignore_dynamic_dimension_ = false;

diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 1ada4bc..d045fc7 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc

@@ -22,6 +22,7 @@
 #include <utility>
 #include <vector>
 
+#include "absl/container/inlined_vector.h"
 #include "absl/strings/ascii.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
@@ -89,7 +90,8 @@
 // its Layout.
 StatusOr<Shape> MakeShapeWithLayoutInternal(
     PrimitiveType element_type, absl::Span<const int64> dimensions,
-    absl::Span<const int64> minor_to_major) {
+    absl::Span<const int64> minor_to_major, absl::Span<const Tile> tiles,
+    int64 element_size_in_bits) {
   if (dimensions.size() != minor_to_major.size()) {
     return InvalidArgument("Dimensions size is %ld, but layout size is %ld.",
                            dimensions.size(), minor_to_major.size());
@@ -100,11 +102,8 @@
   }
   TF_ASSIGN_OR_RETURN(Shape shape,
                       ShapeUtil::MakeValidatedShape(element_type, dimensions));
-  auto min2maj = shape.mutable_layout()->mutable_minor_to_major();
-  min2maj->clear();
-  for (int64 value : minor_to_major) {
-    min2maj->push_back(value);
-  }
+  *shape.mutable_layout() =
+      LayoutUtil::MakeLayout(minor_to_major, tiles, element_size_in_bits);
   if (!shape.has_layout()) {
     return InvalidArgument("Shape has no layout.");
   }
@@ -189,8 +188,10 @@
 
 /* static */ Shape ShapeUtil::MakeShapeWithLayout(
     PrimitiveType element_type, absl::Span<const int64> dimensions,
-    absl::Span<const int64> minor_to_major) {
-  return MakeShapeWithLayoutInternal(element_type, dimensions, minor_to_major)
+    absl::Span<const int64> minor_to_major, absl::Span<const Tile> tiles,
+    int64 element_size_in_bits) {
+  return MakeShapeWithLayoutInternal(element_type, dimensions, minor_to_major,
+                                     tiles, element_size_in_bits)
       .ValueOrDie();
 }
 
@@ -1256,6 +1257,43 @@
     const Shape& input_shape, const Shape& output_shape) {
   CHECK(input_shape.IsArray());
   CHECK(output_shape.IsArray());
+  // Removing trivial dimensions from the shape simplifies the alignment
+  // algorithm since ones can go in any position.
+  if (HasDegenerateDimensions(input_shape) ||
+      HasDegenerateDimensions(output_shape)) {
+    auto simple_output_shape =
+        AlignLayouts(DropDegenerateDimensions(input_shape),
+                     DropDegenerateDimensions(output_shape));
+    if (!simple_output_shape) {
+      return absl::nullopt;
+    }
+
+    auto layout = simple_output_shape->layout().minor_to_major();
+    // For each one sized dimension in the output, increment the dimension
+    // numbers in layout that are more minor than the one.
+    absl::InlinedVector<int64, 8> dim_map;
+    dim_map.reserve(simple_output_shape->rank());
+    for (int64 i = 0; i < output_shape.rank(); ++i) {
+      if (output_shape.dimensions(i) != 1) {
+        dim_map.push_back(i);
+      }
+    }
+    for (int64& d : layout) {
+      d = dim_map[d];
+    }
+
+    // Add the ones in descending order to the layout. Descending layouts tend
+    // to reduce the number of copies inserted in layout assignment.
+    for (int64 i = output_shape.rank() - 1; i >= 0; --i) {
+      if (output_shape.dimensions(i) == 1) {
+        layout.push_back(i);
+      }
+    }
+    Shape output_shape_with_layout = output_shape;
+    *output_shape_with_layout.mutable_layout()->mutable_minor_to_major() =
+        layout;
+    return output_shape_with_layout;
+  }
 
   int64 input_rank = input_shape.rank();
   int64 output_rank = output_shape.rank();
@@ -1304,10 +1342,10 @@
   if (input_dimension_product != output_dimension_product) {
     return absl::nullopt;
   }
+
   // We also need to store an end element so that we know where the last
   // alignment part ends.
   alignment.push_back({input_rank, output_rank});
-
   // Now check if the physical layout can potentially be aligned to the output
   // shape by changing the physical layout of the output shape. We need to check
   // that all dimension numbers that belong to the same alignment part appear
@@ -1319,40 +1357,23 @@
   for (int64 i = 0; i < input_rank;) {
     int64 current_dimension_number = input_dimension_numbers[i];
 
-    // Skip trivial dimensions with a bound of 1.
-    if (input_shape.dimensions(current_dimension_number) == 1) {
-      ++i;
-      continue;
-    }
-
-    // Calculate the number of non-trivial dimension bounds in the input shape
-    // belonging to the current alignment part.
+    // Trivial dimensions are stripped.
+    CHECK_NE(input_shape.dimensions(current_dimension_number), 1);
     const int64 current_alignment_index =
         dimension_to_alignment_index[current_dimension_number];
     // Because of the special end element that we added, we can be sure that
     // 'current_alignment_index' is < alignment.size() - 1.
     CHECK_LT(current_alignment_index, alignment.size() - 1);
-    int64 num_non_trivial_dimensions_in_alignment_part = 0;
-    for (int64 j = alignment[current_alignment_index].first;
-         j < alignment[current_alignment_index + 1].first; ++j) {
-      if (input_shape.dimensions(j) != 1) {
-        ++num_non_trivial_dimensions_in_alignment_part;
-      }
-    }
 
     // Check that the following 'num_non_trivial_dimensions_in_alignment_part'
     // dimension numbers (ignoring dimension numbers with dimension bound 1) are
     // in descending order and belong to the current alignment part.
-    for (int64 j = 0; j < num_non_trivial_dimensions_in_alignment_part;
+    for (int64 j = 0; j < alignment[current_alignment_index + 1].first -
+                              alignment[current_alignment_index].first;
          ++i, ++j) {
       if (i == input_rank) {
         return absl::nullopt;
       }
-      // Skip trivial dimensions with a bound of 1.
-      if (input_shape.dimensions(input_dimension_numbers[i]) == 1) {
-        --j;
-        continue;
-      }
       // If the current dimension number belongs to a different alignment part,
       // or the dimension numbers are not in descending order, we can return
       // early.
@@ -1363,22 +1384,11 @@
       }
       current_dimension_number = input_dimension_numbers[i];
     }
-
     // The output dimension numbers that belong to the current alignment part
-    // need to appear in the same descending order as in the input. Again, we
-    // can skip dimensions with a bound of 1.
+    // need to appear in the same descending order as in the input.
     for (int64 j = alignment[current_alignment_index + 1].second - 1;
          j >= alignment[current_alignment_index].second; --j) {
-      if (output_shape.dimensions(j) != 1) {
-        output_layout.push_back(j);
-      }
-    }
-  }
-  // Now add all the dimensions with dimension bound 1 at the end of
-  // 'output_layout'.
-  for (int64 i = 0; i < output_rank; ++i) {
-    if (output_shape.dimensions(i) == 1) {
-      output_layout.push_back(i);
+      output_layout.push_back(j);
     }
   }
   CHECK_EQ(output_layout.size(), output_rank);

diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index fb6da74..7f610a6 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h

@@ -398,7 +398,9 @@
   // Returns a value shape such that shape.has_layout().
   static Shape MakeShapeWithLayout(PrimitiveType element_type,
                                    absl::Span<const int64> dimensions,
-                                   absl::Span<const int64> minor_to_major);
+                                   absl::Span<const int64> minor_to_major,
+                                   absl::Span<const Tile> tiles = {},
+                                   int64 element_size_in_bits = 0);
 
   static Shape MakeShapeWithSparseLayout(PrimitiveType element_type,
                                          absl::Span<const int64> dimensions,

diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc
index 126ae58..020b062 100644
--- a/tensorflow/compiler/xla/shape_util_test.cc
+++ b/tensorflow/compiler/xla/shape_util_test.cc

@@ -761,8 +761,15 @@
   auto aligned_shape = ShapeUtil::AlignLayouts(
       input, ShapeUtil::MakeShape(xla::F32, {1, 4, 1, 3, 2, 7, 5, 11, 1}));
   EXPECT_TRUE(aligned_shape);
-  EXPECT_THAT(aligned_shape.value().layout().minor_to_major(),
-              ElementsAre(6, 5, 4, 3, 1, 7, 0, 2, 8));
+  EXPECT_TRUE(ShapeUtil::ReshapeIsBitcast(input, aligned_shape.value()));
+}
+
+TEST(AlignmentTest, AlignLayoutsWithAllTrivialDimensions) {
+  Shape input =
+      ShapeUtil::MakeShapeWithLayout(xla::F32, {1, 1, 1, 1}, {0, 1, 3, 2});
+  auto aligned_shape = ShapeUtil::AlignLayouts(
+      input, ShapeUtil::MakeShape(xla::F32, {1, 1, 1, 1, 1}));
+  EXPECT_TRUE(aligned_shape);
   EXPECT_TRUE(ShapeUtil::ReshapeIsBitcast(input, aligned_shape.value()));
 }
 

diff --git a/tensorflow/compiler/xla/status_macros.cc b/tensorflow/compiler/xla/status_macros.cc
index b88fe36..aa7238f 100644
--- a/tensorflow/compiler/xla/status_macros.cc
+++ b/tensorflow/compiler/xla/status_macros.cc

@@ -25,6 +25,13 @@
 namespace xla {
 namespace status_macros {
 
+ABSL_CONST_INIT const char kPossibleAutoJitAlternative[] =
+    "This error might be occurring with the use of xla.compile. If it is not "
+    "necessary that every Op be compiled with XLA, an alternative is to use "
+    "auto_jit with OptimizerOptions.global_jit_level = ON_2 or the environment "
+    "variable TF_XLA_FLAGS=\"tf_xla_auto_jit=2\" which will attempt to use xla "
+    "to compile as much of the graph as the compiler is able to.";
+
 static Status MakeStatus(tensorflow::error::Code code, const string& message) {
   return Status(code, message);
 }

diff --git a/tensorflow/compiler/xla/status_macros.h b/tensorflow/compiler/xla/status_macros.h
index e51dd64..315136a 100644
--- a/tensorflow/compiler/xla/status_macros.h
+++ b/tensorflow/compiler/xla/status_macros.h

@@ -30,6 +30,10 @@
 namespace xla {
 namespace status_macros {
 
+// This is a useful error message when encountering XLA Compiler errors that
+// could be handled with the non-strict AutoJit mode.
+extern const char kPossibleAutoJitAlternative[];
+
 // Stream object used to collect error messages in MAKE_ERROR macros
 // or append error messages with APPEND_ERROR.  It accepts any
 // arguments with operator<< to build an error string, and then has an

diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index e8e779f..72c5ed5 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD

@@ -317,6 +317,11 @@
     name = "conv_depthwise_backprop_filter_test",
     timeout = "long",
     srcs = ["conv_depthwise_backprop_filter_test.cc"],
+    # these backends do not natively handle batch group counts.
+    blacklisted_backends = [
+        "gpu",
+        "cpu",
+    ],
     shard_count = 6,
     deps = [
         "//tensorflow/compiler/xla:execution_options_util",
@@ -1141,7 +1146,7 @@
 xla_test(
     name = "reduce_test",
     srcs = ["reduce_test.cc"],
-    shard_count = 40,
+    shard_count = 31,
     tags = [
         "optonly",
     ],
@@ -1389,8 +1394,8 @@
 )
 
 xla_test(
-    name = "fmax_test",
-    srcs = ["fmax_test.cc"],
+    name = "fmax_fmin_test",
+    srcs = ["fmax_fmin_test.cc"],
     deps = [
         "//tensorflow/compiler/xla/client:local_client",
         "//tensorflow/compiler/xla/client:xla_builder",
@@ -2155,3 +2160,26 @@
         "//tensorflow/compiler/xla:test",
     ],
 )
+
+xla_test(
+    name = "triangular_solve_test",
+    srcs = ["triangular_solve_test.cc"],
+    tags = [
+        "enable_for_xla_interpreter",
+        "noasan",  # sometimes times out, http://b/78650012
+    ],
+    deps = [
+        "//tensorflow/compiler/xla:array2d",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/client/lib:math",
+        "//tensorflow/compiler/xla/client/lib:matrix",
+        "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+    ],
+)

diff --git a/tensorflow/compiler/xla/tests/conv_depthwise_backprop_filter_test.cc b/tensorflow/compiler/xla/tests/conv_depthwise_backprop_filter_test.cc
index 96f4aed..dfbf047 100644
--- a/tensorflow/compiler/xla/tests/conv_depthwise_backprop_filter_test.cc
+++ b/tensorflow/compiler/xla/tests/conv_depthwise_backprop_filter_test.cc

@@ -37,6 +37,8 @@
   std::vector<int64> activation_dims;
   std::vector<int64> kernel_dims;
   std::vector<int64> output_dims;
+  std::vector<int64> activation_and_kernel_layout;
+  std::vector<int64> output_layout;
 };
 
 class BatchGroupedConvolution2DTest
@@ -47,8 +49,9 @@
 static std::vector<BatchGroupedConvolution2DSpec> GetConv2DTestCases() {
   std::vector<BatchGroupedConvolution2DSpec> config_set;
   std::vector<std::vector<int64>> config_options = {
-      {8, 5, 3, 2},   {4, 5, 5, 2},   {8, 7, 4, 128},  {16, 20, 20, 256},
-      {256, 7, 5, 4}, {256, 6, 6, 4}, {256, 8, 8, 512}};
+      {8, 5, 3, 2},      {4, 5, 5, 2},    {8, 7, 4, 128},
+      {16, 20, 20, 256}, {256, 7, 5, 4},  {256, 6, 6, 4},
+      {256, 8, 8, 512},  {64, 7, 7, 960}, {64, 14, 14, 576}};
 
   for (auto option : config_options) {
     int64 feature = option[3];
@@ -68,8 +71,14 @@
     int64 output_space_size = 3 + activation_size - kernel_size;
     config.output_dims = {output_space_size, output_space_size, feature, 1};
 
+    config.activation_and_kernel_layout = {0, 3, 1, 2};
+    config.output_layout = {2, 3, 0, 1};
     config_set.push_back(config);
 
+    BatchGroupedConvolution2DSpec different_layout_config = config;
+    different_layout_config.activation_and_kernel_layout = {3, 0, 1, 2};
+    config_set.push_back(different_layout_config);
+
     // Add configurations for window dilation cases.
     if (activation_size % 2 == 0 && activation_size == kernel_size) {
       BatchGroupedConvolution2DSpec config;
@@ -79,11 +88,17 @@
       config.activation_dims = {batch, activation_size, activation_size,
                                 feature};
       config.kernel_dims = {batch, kernel_size / 2, kernel_size / 2, feature};
+      config.activation_and_kernel_layout = {0, 3, 1, 2};
+      config.output_layout = {2, 3, 0, 1};
 
       int64 output_space_size = 5;
       config.output_dims = {output_space_size, output_space_size, feature, 1};
 
       config_set.push_back(config);
+
+      BatchGroupedConvolution2DSpec different_layout_config = config;
+      different_layout_config.activation_and_kernel_layout = {3, 0, 1, 2};
+      config_set.push_back(different_layout_config);
     }
   }
 
@@ -97,8 +112,11 @@
   const string data_type = GetFloatDataType(::testing::get<1>(data.param));
   string str = absl::StrCat(
       "activation_dims_", absl::StrJoin(spec.activation_dims, "x"),
-      "_kernel_dims_", absl::StrJoin(spec.kernel_dims, "x"), "_output_dims_",
-      absl::StrJoin(spec.output_dims, "x"), data_type);
+      "_kernel_dims_", absl::StrJoin(spec.kernel_dims, "x"),
+      "_activation_layout_",
+      absl::StrJoin(spec.activation_and_kernel_layout, "_"), "_output_dims_",
+      absl::StrJoin(spec.output_dims, "x"), data_type, "_output_layout_",
+      absl::StrJoin(spec.output_layout, "_"));
 
   // Test names are not allowed to contain the '-' character.
   absl::c_replace(str, '-', 'n');
@@ -110,23 +128,28 @@
   const string data_type = GetFloatDataType(use_bfloat16);
   return absl::StrFormat(
       R"(
-    HloModule TensorFlowDepthwiseConv
+    HloModule TensorFlowDepthwiseConv, is_scheduled=true
 
     ENTRY main {
-      activation = %s[%s] parameter(0)
-      kernel = %s[%s] parameter(1)
-      ROOT conv = %s[%s] convolution(%s[%s] activation, %s[%s] kernel),
+      activation = %s[%s]{%s} parameter(0)
+      kernel = %s[%s]{%s} parameter(1)
+      ROOT conv = %s[%s]{%s} convolution(%s[%s]{%s} activation, %s[%s]{%s} kernel),
           window={size=%dx%d pad=1_%dx1_%d rhs_dilate=%dx%d}, dim_labels=f01b_i01o->01fb,
           batch_group_count=%d
     }
     )",
-      data_type, absl::StrJoin(spec.activation_dims, ","), data_type,
-      absl::StrJoin(spec.kernel_dims, ","), data_type,
-      absl::StrJoin(spec.output_dims, ","), data_type,
-      absl::StrJoin(spec.activation_dims, ","), data_type,
-      absl::StrJoin(spec.kernel_dims, ","), spec.window, spec.window,
-      spec.window_dilation, spec.window_dilation, spec.window_dilation,
-      spec.window_dilation, spec.output_batch);
+      data_type, absl::StrJoin(spec.activation_dims, ","),
+      absl::StrJoin(spec.activation_and_kernel_layout, ","), data_type,
+      absl::StrJoin(spec.kernel_dims, ","),
+      absl::StrJoin(spec.activation_and_kernel_layout, ","), data_type,
+      absl::StrJoin(spec.output_dims, ","),
+      absl::StrJoin(spec.output_layout, ","), data_type,
+      absl::StrJoin(spec.activation_dims, ","),
+      absl::StrJoin(spec.activation_and_kernel_layout, ","), data_type,
+      absl::StrJoin(spec.kernel_dims, ","),
+      absl::StrJoin(spec.activation_and_kernel_layout, ","), spec.window,
+      spec.window, spec.window_dilation, spec.window_dilation,
+      spec.window_dilation, spec.window_dilation, spec.output_batch);
 }
 
 XLA_TEST_P(BatchGroupedConvolution2DTest, DoIt) {
@@ -135,13 +158,13 @@
   const string hlo_text =
       BuildHloTextBatchGroupedConvolution2D(spec, use_bfloat16);
 
-  EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{0.01, 0.01},
-                            [](HloModule* module) -> Status {
-                              BFloat16MixedPrecisionRemoval remover;
-                              TF_RETURN_IF_ERROR(remover.Run(module).status());
-                              Despecializer despecializer;
-                              return despecializer.Run(module).status();
-                            }));
+  EXPECT_TRUE(RunAndCompareNoHloPasses(
+      hlo_text, ErrorSpec{0.01, 0.01}, [](HloModule* module) -> Status {
+        BFloat16MixedPrecisionRemoval remover;
+        TF_RETURN_IF_ERROR(remover.Run(module).status());
+        Despecializer despecializer;
+        return despecializer.Run(module).status();
+      }));
 }
 
 INSTANTIATE_TEST_CASE_P(

diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc
index cad43d1..4687ed6 100644
--- a/tensorflow/compiler/xla/tests/custom_call_test.cc
+++ b/tensorflow/compiler/xla/tests/custom_call_test.cc

@@ -172,8 +172,10 @@
 
   const Shape& r2f32_dim0_major =
       ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0});
-  b.AddInstruction(HloInstruction::CreateCustomCall(
+  auto custom_call = b.AddInstruction(HloInstruction::CreateCustomCall(
       r2f32_dim0_major, {input}, "Add1ToValues", {r2f32_dim0_major}));
+  b.AddInstruction(
+      custom_call->CloneWithNewOperands(r2f32_dim0_major, {custom_call}));
 
   module->AddEntryComputation(b.Build());
   ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0}));
@@ -182,7 +184,7 @@
   Literal argument = LiteralUtil::CreateR2<float>({{1.f, 2.f}, {3.f, 4.f}});
 
   Literal result = ExecuteAndTransfer(std::move(module), {&argument});
-  LiteralTestUtil::ExpectR2Equal<float>({{2.f, 3.f}, {4.f, 5.f}}, result);
+  LiteralTestUtil::ExpectR2Equal<float>({{3.f, 4.f}, {5.f, 6.f}}, result);
 }
 
 XLA_TEST_F(CustomCallTest, TupleOutput) {

diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index f740f48..6ee2178 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc

@@ -1188,6 +1188,16 @@
       p{v{8, 55, 11, 3}, v{55, 11, 3, 29}, "mkBC,kBCn->BCnm"},
       p{v{5, 6}, v{6, 7}, "ab,cd->dcba"},
       p{v{6}, v{6, 7}, "b,bc->c"},
+      p{v{5, 6, 7}, v{5, 6, 7}, "abc,abc->ab"},
+      p{v{5, 6, 7}, v{7, 6, 5}, "abc,cba->ca"},
+      p{v{77}, v{77}, "a,a->a"},
+      p{v{77}, v{77, 55}, "a,ab->ba"},
+      p{v{2, 3, 77}, v{77, 2, 3, 55}, "ija,aijb->baij"},
+      p{v{55}, v{}, "a,->a"},
+      p{v{11, 111}, v{11}, "ab,a->ab"},
+      p{v{16, 34}, v{16, 34}, "ab,ab->ab"},
+      p{v{16, 3, 34}, v{3, 16, 34}, "abc,bac->abc"},
+      p{v{5, 19}, v{}, "ab,->ab"},
   };
   return test_cases;
 }
@@ -1257,5 +1267,82 @@
   EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3}));
 }
 
+XLA_TEST_F(DotOperationTextTest, CpuTiledDotEmitterCachingBug_1) {
+  // Tests for a caching bug in the XLA CPU backend.
+  absl::string_view hlo_string =
+      R"(
+HloModule CpuTiledDotEmitterCachingBug
+
+ENTRY main {
+  lhs = f32[20,40] parameter(0)
+  rhs_0 = f32[40,1] parameter(2)
+  rhs_1 = f32[1,40] parameter(1)
+
+  dot_0 = f32[20,1] dot(lhs, rhs_0), lhs_contracting_dims={1}, rhs_contracting_dims={0}
+  dot_1 = f32[20,1] dot(lhs, rhs_1), lhs_contracting_dims={1}, rhs_contracting_dims={1}
+
+  ROOT result = f32[20,1] divide(dot_0, dot_1)
+}
+)";
+
+  EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3}));
+}
+
+XLA_TEST_F(DotOperationTextTest, CpuTiledDotEmitterCachingBug_2) {
+  // Tests for a caching bug in the XLA CPU backend.
+  absl::string_view hlo_string =
+      R"(
+HloModule CpuTiledDotEmitterCachingBug
+
+ENTRY main {
+  lhs_0 = f32[20,40] parameter(0)
+  rhs_0 = f32[40,1] parameter(1)
+  lhs_1 = f32[1,40] parameter(2)
+  rhs_1 = f32[20,40] parameter(3)
+
+  dot_0 = f32[20,1] dot(lhs_0, rhs_0), lhs_contracting_dims={1}, rhs_contracting_dims={0}
+  dot_1 = f32[1,20] dot(lhs_1, rhs_1), lhs_contracting_dims={1}, rhs_contracting_dims={1}
+
+  dot_0_reshaped = f32[20] reshape(dot_0)
+  dot_1_reshaped = f32[20] reshape(dot_1)
+
+  ROOT result = f32[20] divide(dot_0_reshaped, dot_1_reshaped)
+}
+)";
+
+  EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3}));
+}
+
+XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(GpuIntegerDotCodegen)) {
+  absl::string_view hlo_string =
+      R"(
+HloModule SmallIntegerDot
+
+ENTRY SmallIntegerDot {
+  arg0 = s32[1,2,2] parameter(0)
+  arg1 = s32[1,2,1] parameter(1)
+  ROOT dot = s32[1,2,1] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1}
+}
+)";
+
+  EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3}));
+}
+
+XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(GpuTransposeOutput)) {
+  absl::string_view hlo_string =
+      R"(
+HloModule TransposeOutput
+
+ENTRY TransposeOutput {
+  p0 = f32[32,32] parameter(0)
+  p1 = f32[32,64] parameter(1)
+  dot = f32[32,64] dot(p0, p1), lhs_contracting_dims={0}, rhs_contracting_dims={0}
+  ROOT tr = f32[64,32] transpose(dot), dimensions={1,0}
+}
+)";
+
+  EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3}));
+}
+
 }  // namespace
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/tests/fmax_fmin_test.cc b/tensorflow/compiler/xla/tests/fmax_fmin_test.cc
new file mode 100644
index 0000000..7423ac0
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/fmax_fmin_test.cc

@@ -0,0 +1,88 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <vector>
+
+#include "tensorflow/compiler/xla/client/local_client.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace {
+
+class FmaxSimpleTest : public ClientLibraryTestBase {};
+
+TEST_F(FmaxSimpleTest, FmaxTenValues) {
+  SetFastMathDisabled(true);
+  XlaBuilder builder(TestName());
+  auto x = ConstantR1<float>(
+      &builder, {-0.0, 1.0, 2.0, -3.0, -4.0, 5.0, 6.0, -7.0, -8.0, 9.0});
+  auto y = ConstantR1<float>(
+      &builder, {-0.0, -1.0, -2.0, 3.0, 4.0, -5.0, -6.0, 7.0, 8.0, -9.0});
+  Max(x, y);
+
+  std::vector<float> expected = {-0.0, 1.0, 2.0, 3.0, 4.0,
+                                 5.0,  6.0, 7.0, 8.0, 9.0};
+  ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
+}
+
+TEST_F(FmaxSimpleTest, FmaxEdgeCases) {
+  SetFastMathDisabled(true);
+  XlaBuilder builder(TestName());
+  XlaOp param0, param1;
+  std::unique_ptr<GlobalData> param0_data = CreateR1Parameter<float>(
+      {INFINITY, INFINITY, INFINITY, -INFINITY, INFINITY, -INFINITY, NAN,
+       INFINITY, -INFINITY, NAN},
+      /*parameter_number=*/0, /*name=*/"param0",
+      /*builder=*/&builder, /*data_handle=*/&param0);
+  std::unique_ptr<GlobalData> param1_data = CreateR1Parameter<float>(
+      {INFINITY, -INFINITY, NAN, NAN, -4.0, -5.0, -6.0, 7.0, 8.0, 9.0},
+      /*parameter_number=*/1, /*name=*/"param1",
+      /*builder=*/&builder, /*data_handle=*/&param1);
+
+  Max(param0, param1);
+  std::vector<float> expected = {INFINITY, INFINITY, NAN,      NAN, INFINITY,
+                                 -5,       NAN,      INFINITY, 8,   NAN};
+  ComputeAndCompareR1<float>(&builder, expected,
+                             {param0_data.get(), param1_data.get()},
+                             ErrorSpec(0.0001));
+}
+
+TEST_F(FmaxSimpleTest, FminEdgeCases) {
+  SetFastMathDisabled(true);
+  XlaBuilder builder(TestName());
+  XlaOp param0, param1;
+  std::unique_ptr<GlobalData> param0_data = CreateR1Parameter<float>(
+      {INFINITY, INFINITY, INFINITY, -INFINITY, INFINITY, -INFINITY, NAN,
+       INFINITY, -INFINITY, NAN},
+      /*parameter_number=*/0, /*name=*/"param0",
+      /*builder=*/&builder, /*data_handle=*/&param0);
+  std::unique_ptr<GlobalData> param1_data = CreateR1Parameter<float>(
+      {INFINITY, -INFINITY, NAN, NAN, -4.0, -5.0, -6.0, 7.0, 8.0, 9.0},
+      /*parameter_number=*/1, /*name=*/"param1",
+      /*builder=*/&builder, /*data_handle=*/&param1);
+
+  Min(param0, param1);
+  std::vector<float> expected = {INFINITY,  -INFINITY, NAN, NAN,       -4,
+                                 -INFINITY, NAN,       7,   -INFINITY, NAN};
+  ComputeAndCompareR1<float>(&builder, expected,
+                             {param0_data.get(), param1_data.get()},
+                             ErrorSpec(0.0001));
+}
+
+}  // namespace
+}  // namespace xla

diff --git a/tensorflow/compiler/xla/tests/fmax_test.cc b/tensorflow/compiler/xla/tests/fmax_test.cc
deleted file mode 100644
index c5bbbe7..0000000
--- a/tensorflow/compiler/xla/tests/fmax_test.cc
+++ /dev/null

@@ -1,43 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include <vector>
-
-#include "tensorflow/compiler/xla/client/local_client.h"
-#include "tensorflow/compiler/xla/client/xla_builder.h"
-#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
-#include "tensorflow/compiler/xla/tests/literal_test_util.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace xla {
-namespace {
-
-class FmaxSimpleTest : public ClientLibraryTestBase {};
-
-TEST_F(FmaxSimpleTest, FmaxTenValues) {
-  XlaBuilder builder(TestName());
-  auto x = ConstantR1<float>(
-      &builder, {-0.0, 1.0, 2.0, -3.0, -4.0, 5.0, 6.0, -7.0, -8.0, 9.0});
-  auto y = ConstantR1<float>(
-      &builder, {-0.0, -1.0, -2.0, 3.0, 4.0, -5.0, -6.0, 7.0, 8.0, -9.0});
-  Max(x, y);
-
-  std::vector<float> expected = {-0.0, 1.0, 2.0, 3.0, 4.0,
-                                 5.0,  6.0, 7.0, 8.0, 9.0};
-  ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
-}
-
-}  // namespace
-}  // namespace xla

diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc
index 66f72ba..0151981 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc

@@ -205,6 +205,17 @@
   return test_runner_.Execute(std::move(module), arguments).ValueOrDie();
 }
 
+StatusOr<std::vector<Literal>> HloTestBase::ExecuteReplicated(
+    std::unique_ptr<HloModule> module, absl::Span<Literal* const> arguments,
+    int64 num_replicas) {
+  HloRunner::ReplicatedExecuteOptions options;
+  options.num_replicas = num_replicas;
+  for (auto argument : arguments) {
+    options.arguments.push_back(argument);
+  }
+  return test_runner_.ExecuteReplicated(std::move(module), options);
+}
+
 StatusOr<std::unique_ptr<HloModule>> HloTestBase::MakeReferenceModule(
     const HloModule& test_module,
     const std::function<void(HloModule*)>& reference_preprocessor) {
@@ -313,7 +324,10 @@
                        reference_preprocessor);
 }
 
-::testing::AssertionResult HloTestBase::Run(string_view hlo_string) {
+::testing::AssertionResult HloTestBase::Run(string_view hlo_string,
+                                            bool run_hlo_passes,
+                                            ExecutionProfile* profile,
+                                            string backend_config) {
   auto module_or_status =
       HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest());
   if (!module_or_status.ok()) {
@@ -321,19 +335,108 @@
            << "Error while parsing HLO text format: "
            << module_or_status.status().ToString();
   }
+
+  std::unique_ptr<HloModule> module = std::move(module_or_status.ValueOrDie());
   const auto& fake_arguments =
-      MakeFakeArguments(module_or_status.ValueOrDie().get())
-          .ConsumeValueOrDie();
+      MakeFakeArguments(module.get()).ConsumeValueOrDie();
   std::vector<Literal*> fake_argument_ptrs;
   absl::c_transform(
       fake_arguments, std::back_inserter(fake_argument_ptrs),
       [](const Literal& literal) { return const_cast<Literal*>(&literal); });
-  return test_runner_
-                 .Execute(std::move(module_or_status.ValueOrDie()),
-                          fake_argument_ptrs, /*run_hlo_passes=*/true)
-                 .ok()
+
+  if (profile != nullptr) {
+    // We have to enable HLO profiling since otherwise currently the
+    // ExecutionProfile is not correct.
+    //
+    // TODO(b/119432044): Fix collection of the ExecutionProfile
+    // so that this is not necessary.
+    HloModuleConfig config = module->config();
+    DebugOptions debug_options = config.debug_options();
+    debug_options.set_xla_hlo_profile(true);
+    config.set_debug_options(debug_options);
+    module->set_config(config);
+  }
+
+  if (!backend_config.empty()) {
+    // Set backend configuration if it is given.
+    HloInstruction* instruction =
+        module->entry_computation()->root_instruction();
+    instruction->set_raw_backend_config_string(backend_config);
+  }
+
+  // return ::testing::AssertionSuccess();
+  auto output = test_runner_.Execute(std::move(module), fake_argument_ptrs,
+                                     /*run_hlo_passes=*/run_hlo_passes,
+                                     /*profile=*/profile);
+
+  return output.ok()
              ? ::testing::AssertionSuccess()
-             : ::testing::AssertionFailure();
+             : ::testing::AssertionFailure() << output.status().error_message();
+}
+
+::testing::AssertionResult HloTestBase::RunMultipleTimes(
+    string_view hlo_string, bool run_hlo_passes,
+    std::vector<ExecutionProfile>* profiles, string backend_config) {
+  int n = profiles->size();
+  std::vector<std::vector<Literal*>> fake_argument_ptrs(n);
+  std::vector<std::vector<Literal>> fake_arguments(n);
+  std::vector<std::unique_ptr<Executable>> executables(n);
+
+  for (int i = 0; i < n; ++i) {
+    auto module_or_status =
+        HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest());
+    if (!module_or_status.ok()) {
+      return ::testing::AssertionFailure()
+             << "Error while parsing HLO text format: "
+             << module_or_status.status().ToString();
+    }
+    std::unique_ptr<HloModule> module =
+        std::move(module_or_status.ValueOrDie());
+
+    fake_arguments[i] = MakeFakeArguments(module.get()).ConsumeValueOrDie();
+    absl::c_transform(
+        fake_arguments[i], std::back_inserter(fake_argument_ptrs[i]),
+        [](const Literal& literal) { return const_cast<Literal*>(&literal); });
+
+    if (profiles != nullptr) {
+      // We have to enable HLO profiling since otherwise currently the
+      // ExecutionProfile is not correct.
+      //
+      // TODO(b/119432044): Fix collection of the ExecutionProfile
+      // so that this is not necessary.
+      HloModuleConfig config = module->config();
+      DebugOptions debug_options = config.debug_options();
+      debug_options.set_xla_hlo_profile(true);
+      config.set_debug_options(debug_options);
+      module->set_config(config);
+    }
+
+    if (!backend_config.empty()) {
+      // Set backend configuration if it is given.
+      HloInstruction* instruction =
+          module->entry_computation()->root_instruction();
+      instruction->set_raw_backend_config_string(backend_config);
+    }
+
+    auto executable =
+        test_runner_.CreateExecutable(std::move(module), run_hlo_passes);
+    if (!executable.ok()) {
+      return ::testing::AssertionFailure()
+             << executable.status().error_message();
+    }
+    executables[i] = std::move(executable.ValueOrDie());
+  }
+
+  for (int i = 0; i < n; ++i) {
+    auto output =
+        test_runner_.Execute(std::move(executables[i]), fake_argument_ptrs[i],
+                             /*profile=*/&((*profiles)[i]));
+    if (!output.ok()) {
+      return ::testing::AssertionFailure() << output.status().error_message();
+    }
+  }
+
+  return ::testing::AssertionSuccess();
 }
 
 ::testing::AssertionResult HloTestBase::RunAndCompareFromFile(

diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h
index 69a4f96..3c2bcbb 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.h
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.h

@@ -173,6 +173,11 @@
   Literal ExecuteAndTransfer(std::unique_ptr<HloModule> module,
                              absl::Span<Literal* const> arguments);
 
+  // Executes the given module on multiple replicas.
+  StatusOr<std::vector<Literal>> ExecuteReplicated(
+      std::unique_ptr<HloModule> module, absl::Span<Literal* const> arguments,
+      int64 num_replicas);
+
   // Executes the given hlo module on two backends and compares results.
   //
   // 'arguments': the input of the hlo module.
@@ -221,8 +226,14 @@
       const absl::optional<ErrorSpec>& error,
       const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
       TF_MUST_USE_RESULT;
-  ::testing::AssertionResult Run(const absl::string_view hlo_string)
-      TF_MUST_USE_RESULT;
+  ::testing::AssertionResult Run(const absl::string_view hlo_string,
+                                 bool run_hlo_passes = true,
+                                 ExecutionProfile* profile = nullptr,
+                                 string backend_config = "") TF_MUST_USE_RESULT;
+  ::testing::AssertionResult RunMultipleTimes(
+      const absl::string_view hlo_string, bool run_hlo_passes,
+      std::vector<ExecutionProfile>* profiles,
+      string backend_config = "") TF_MUST_USE_RESULT;
   ::testing::AssertionResult RunAndCompareFromFile(
       const string& filename, const absl::optional<ErrorSpec>& error,
       const std::function<void(HloModule*)>& reference_preprocessor = nullptr)

diff --git a/tensorflow/compiler/xla/tests/plugin.bzl b/tensorflow/compiler/xla/tests/plugin.bzl
index 8a5d913..107869f 100644
--- a/tensorflow/compiler/xla/tests/plugin.bzl
+++ b/tensorflow/compiler/xla/tests/plugin.bzl

@@ -33,4 +33,3 @@
 # }
 
 plugins = {}
-

diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc
index 95c89b0..67d2258 100644
--- a/tensorflow/compiler/xla/tests/test_utils.cc
+++ b/tensorflow/compiler/xla/tests/test_utils.cc

@@ -238,6 +238,79 @@
   return std::move(literal);
 }
 
+template <typename IntT>
+void PopulateWithRandomIntegralDataWithBounds(Literal* literal,
+                                              std::minstd_rand0* engine,
+                                              IntT min, IntT max) {
+  CHECK(engine != nullptr);
+  CHECK_EQ(literal->shape().element_type(),
+           primitive_util::NativeToPrimitiveType<IntT>());
+  std::uniform_int_distribution<IntT> generator(min, max);
+  for (IntT& value : literal->data<IntT>()) {
+    value = generator(*engine);
+  }
+}
+
+// Same as MakeFakeLiteralInternal but generates random numbers in the given
+// range [min, max]. Currently this works only for INT types.
+StatusOr<Literal> MakeFakeLiteralInternalWithBounds(const Shape& shape,
+                                                    std::minstd_rand0* engine,
+                                                    int64 min, int64 max) {
+  if (shape.IsTuple()) {
+    std::vector<Literal> elements;
+    for (const Shape& element_shape : shape.tuple_shapes()) {
+      TF_ASSIGN_OR_RETURN(
+          Literal element,
+          MakeFakeLiteralInternalWithBounds(element_shape, engine, min, max));
+      elements.push_back(std::move(element));
+    }
+    return LiteralUtil::MakeTupleOwned(std::move(elements));
+  }
+  if (engine == nullptr) {
+    return Literal::CreateFromShape(shape);
+  }
+  Literal literal(shape);
+  switch (shape.element_type()) {
+    case S8:
+      PopulateWithRandomIntegralDataWithBounds<int8>(
+          &literal, engine, static_cast<int8>(min), static_cast<int8>(max));
+      break;
+    case U8:
+      PopulateWithRandomIntegralDataWithBounds<uint8>(
+          &literal, engine, static_cast<uint8>(min), static_cast<uint8>(max));
+      break;
+    case S16:
+      PopulateWithRandomIntegralDataWithBounds<int16>(
+          &literal, engine, static_cast<int16>(min), static_cast<int16>(max));
+      break;
+    case U16:
+      PopulateWithRandomIntegralDataWithBounds<uint16>(
+          &literal, engine, static_cast<uint16>(min), static_cast<uint16>(max));
+      break;
+    case S32:
+      PopulateWithRandomIntegralDataWithBounds<int32>(
+          &literal, engine, static_cast<int32>(min), static_cast<int32>(max));
+      break;
+    case U32:
+      PopulateWithRandomIntegralDataWithBounds<uint32>(
+          &literal, engine, static_cast<uint32>(min), static_cast<uint32>(max));
+      break;
+    case S64:
+      PopulateWithRandomIntegralDataWithBounds<int64>(
+          &literal, engine, static_cast<int64>(min), static_cast<int64>(max));
+      break;
+    case U64:
+      PopulateWithRandomIntegralDataWithBounds<uint64>(
+          &literal, engine, static_cast<uint64>(min), static_cast<uint64>(max));
+      break;
+    default:
+      return Unimplemented(
+          "Unsupported type for fake random literal generation with bounds: %s",
+          ShapeUtil::HumanString(shape));
+  }
+  return std::move(literal);
+}
+
 enum class ConstantType { kUnknown, kZero, kOne };
 
 // Return the constant type required by this computation, if known.
@@ -297,6 +370,10 @@
       if ((opcode == HloOpcode::kDynamicSlice && op_num >= 1) ||
           (opcode == HloOpcode::kDynamicUpdateSlice && op_num >= 2)) {
         constrained_uses.push_back(instruction);
+      } else if ((opcode == HloOpcode::kGather ||
+                  opcode == HloOpcode::kScatter) &&
+                 op_num == 1) {
+        constrained_uses.push_back(instruction);
       } else if (opcode == HloOpcode::kFusion) {
         const HloInstruction* const to_analyze =
             instruction->fused_parameter(op_num);
@@ -356,6 +433,22 @@
         }
         break;
       }
+      case HloOpcode::kGather:
+      case HloOpcode::kScatter: {
+        const Shape& operand_shape = use->operand(0)->shape();
+        if (use->operand(1) == &param) {
+          auto index_map =
+              use->opcode() == HloOpcode::kGather
+                  ? use->gather_dimension_numbers().start_index_map()
+                  : use->scatter_dimension_numbers()
+                        .scatter_dims_to_operand_dims();
+          for (const auto dim_in_operand : index_map) {
+            index_bound =
+                std::min(index_bound, operand_shape.dimensions(dim_in_operand));
+          }
+        }
+        break;
+      }
       case HloOpcode::kReduce:
       case HloOpcode::kReduceWindow:
         needs_constant = true;
@@ -385,8 +478,8 @@
     return Unimplemented("Conflicting operand generation constraints.");
   }
   if (index_bound != INT64_MAX) {
-    return MakeRandomIndex(index_bound, engine)
-        .Reshape(param.shape().dimensions());
+    return MakeFakeLiteralInternalWithBounds(param.shape(), engine, -1,
+                                             index_bound);
   } else if (needs_constant) {
     switch (constant_type) {
       case ConstantType::kZero:

diff --git a/tensorflow/compiler/xla/tests/test_utils_test.cc b/tensorflow/compiler/xla/tests/test_utils_test.cc
index 321c3fb..f68ee04 100644
--- a/tensorflow/compiler/xla/tests/test_utils_test.cc
+++ b/tensorflow/compiler/xla/tests/test_utils_test.cc

@@ -92,12 +92,13 @@
                           MakeFakeArguments(module.get()));
   ASSERT_EQ(args.size(), 5);
 
-  EXPECT_EQ(args[0].Get<int32>({}), 0);
+  EXPECT_GE(args[0].Get<int32>({}), -1);
+  EXPECT_LE(args[0].Get<int32>({}), 1);
 
-  EXPECT_GE(args[1].Get<int32>({}), 0);
-  EXPECT_LE(args[0].Get<int32>({}), 2);
+  EXPECT_GE(args[1].Get<int32>({}), -1);
+  EXPECT_LE(args[1].Get<int32>({}), 2);
 
-  EXPECT_GE(args[2].Get<int32>({}), 0);
+  EXPECT_GE(args[2].Get<int32>({}), -1);
   EXPECT_LE(args[2].Get<int32>({}), 3);
 }
 
@@ -122,12 +123,13 @@
                           MakeFakeArguments(module.get()));
   ASSERT_EQ(args.size(), 7);
 
-  EXPECT_EQ(args[0].Get<int32>({}), 0);
+  EXPECT_GE(args[0].Get<int32>({}), -1);
+  EXPECT_LE(args[0].Get<int32>({}), 1);
 
-  EXPECT_GE(args[1].Get<int32>({}), 0);
-  EXPECT_LE(args[0].Get<int32>({}), 2);
+  EXPECT_GE(args[1].Get<int32>({}), -1);
+  EXPECT_LE(args[1].Get<int32>({}), 2);
 
-  EXPECT_GE(args[2].Get<int32>({}), 0);
+  EXPECT_GE(args[2].Get<int32>({}), -1);
   EXPECT_LE(args[2].Get<int32>({}), 3);
 }
 
@@ -252,5 +254,77 @@
       << ShapeUtil::HumanString(args[1].shape());
 }
 
+XLA_TEST_F(TestUtilsTest, MakeFakeArgumentsForGather) {
+  auto module = ParseHloString(R"(
+  HloModule Test
+
+ENTRY %module(paramater.0: f32[200,100,300], parameter.1: s32[10,2]) ->
+                                                          f32[10,300] {
+  %parameter.0 = f32[200,100,300] parameter(0)
+  %parameter.1 = s32[10,2] parameter(1)
+  ROOT gather = f32[10,300] gather(f32[200,100,300] %parameter.0,
+                                   s32[10,2] %parameter.1),
+      offset_dims={1},
+      collapsed_slice_dims={0,1},
+      start_index_map={0,1},
+      index_vector_dim=1,
+      slice_sizes={1,1,300}
+}
+)")
+                    .ValueOrDie();
+
+  TF_ASSERT_OK_AND_ASSIGN(std::vector<Literal> args,
+                          MakeFakeArguments(module.get()));
+  ASSERT_EQ(args.size(), 2);
+
+  const Shape& indices_shape = args[1].shape();
+  EXPECT_TRUE(
+      ShapeUtil::Equal(indices_shape, ShapeUtil::MakeShape(S32, {10, 2})))
+      << ShapeUtil::HumanString(indices_shape);
+  auto indices = args[1].data<int32>();
+  for (const auto index : indices) {
+    EXPECT_GE(index, -1);
+    EXPECT_LE(index, 100);
+  }
+}
+
+XLA_TEST_F(TestUtilsTest, MakeFakeArgumentsForScatter) {
+  auto module = ParseHloString(R"(
+  HloModule Test
+
+scatter_update (lhs: f32[], rhs: f32[]) -> f32[] {
+  lhs = f32[] parameter(0)
+  ROOT rhs = f32[] parameter(1)
+}
+
+ENTRY main {
+  operand = f32[200,100,300] parameter(0)
+  indices = s32[10,2] parameter(1)
+  updates = f32[10,300] parameter(2)
+  ROOT scatter = f32[200,100,300] scatter(operand, indices, updates),
+    to_apply=scatter_update,
+    update_window_dims={1},
+    inserted_window_dims={0,1},
+    scatter_dims_to_operand_dims={0,1},
+    index_vector_dim=1
+  }
+)")
+                    .ValueOrDie();
+
+  TF_ASSERT_OK_AND_ASSIGN(std::vector<Literal> args,
+                          MakeFakeArguments(module.get()));
+  ASSERT_EQ(args.size(), 3);
+
+  const Shape& indices_shape = args[1].shape();
+  EXPECT_TRUE(
+      ShapeUtil::Equal(indices_shape, ShapeUtil::MakeShape(S32, {10, 2})))
+      << ShapeUtil::HumanString(indices_shape);
+  auto indices = args[1].data<int32>();
+  for (const auto index : indices) {
+    EXPECT_GE(index, -1);
+    EXPECT_LE(index, 100);
+  }
+}
+
 }  // namespace
 }  // namespace xla

diff --git a/tensorflow/compiler/xla/tests/triangular_solve_test.cc b/tensorflow/compiler/xla/tests/triangular_solve_test.cc
new file mode 100644
index 0000000..24ab121
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/triangular_solve_test.cc

@@ -0,0 +1,502 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+#include <numeric>
+#include <vector>
+
+#include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/client/lib/math.h"
+#include "tensorflow/compiler/xla/client/lib/matrix.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+using TriangularSolveTest = ClientLibraryTestBase;
+using TriangularSolveLeftLookingTest = ClientLibraryTestBase;
+
+static constexpr float kNan = std::numeric_limits<float>::quiet_NaN();
+
+Array2D<float> AValsLower() {
+  return {{2, kNan, kNan, kNan},
+          {3, 6, kNan, kNan},
+          {4, 7, 9, kNan},
+          {5, 8, 10, 11}};
+}
+
+Array2D<float> AValsUpper() {
+  return {{2, 3, 4, 5},
+          {kNan, 6, 7, 8},
+          {kNan, kNan, 9, 10},
+          {kNan, kNan, kNan, 11}};
+}
+
+Array2D<float> AValsLowerUnitDiagonal() {
+  return {{kNan, kNan, kNan, kNan},
+          {3, kNan, kNan, kNan},
+          {4, 7, kNan, kNan},
+          {5, 8, 10, kNan}};
+}
+
+Array2D<float> AValsUpperUnitDiagonal() {
+  return {{kNan, 3, 4, 5},
+          {kNan, kNan, 7, 8},
+          {kNan, kNan, kNan, 10},
+          {kNan, kNan, kNan, kNan}};
+}
+
+Array2D<float> BValsRight() {
+  return {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}};
+}
+
+Array2D<float> BValsLeft() {
+  return {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}};
+}
+
+static constexpr complex64 kNanC64 = complex64(kNan, kNan);
+
+Array2D<complex64> AValsLowerComplex() {
+  return {{2, kNanC64, kNanC64, kNanC64},
+          {complex64(3, 1), 6, kNanC64, kNanC64},
+          {4, complex64(7, 2), 9, kNanC64},
+          {5, 8, complex64(10, 3), 11}};
+}
+
+Array2D<complex64> AValsUpperComplex() {
+  return {{2, 3, complex64(4, 3), 5},
+          {kNanC64, 6, complex64(7, 2), 8},
+          {kNanC64, kNanC64, complex64(9, 1), 10},
+          {kNanC64, kNanC64, kNanC64, 11}};
+}
+
+Array2D<complex64> BValsRightComplex() {
+  return {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}};
+}
+
+Array2D<complex64> BValsLeftComplex() {
+  return {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}};
+}
+
+XLA_TEST_F(TriangularSolveTest, EmptyArrays) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data =
+      CreateR2Parameter<float>(Array2D<float>(0, 0), 0, "a", &builder, &a);
+  auto b_data =
+      CreateR2Parameter<float>(Array2D<float>(0, 10), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/true, /*lower=*/true,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::TRANSPOSE);
+
+  ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 10),
+                             {a_data.get(), b_data.get()});
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTranspose) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/false, /*lower=*/true,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::TRANSPOSE);
+
+  Array2D<float> expected({
+      {0.5, 0.08333334, 0.04629629, 0.03367003},
+      {2.5, -0.25, -0.1388889, -0.1010101},
+      {4.5, -0.58333331, -0.32407406, -0.23569024},
+  });
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleRightLowerNotranspose) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/false, /*lower=*/true,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::NO_TRANSPOSE);
+
+  Array2D<float> expected({
+      {-0.16414141, -0.06902357, -0.07070707, 0.36363636},
+      {0.64393939, 0.06565657, -0.03030303, 0.72727273},
+      {1.4520202, 0.2003367, 0.01010101, 1.09090909},
+  });
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleRightUpperTranspose) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/false, /*lower=*/false,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::TRANSPOSE);
+
+  Array2D<float> expected({
+      {-0.16414141, -0.06902357, -0.07070707, 0.36363636},
+      {0.64393939, 0.06565657, -0.03030303, 0.72727273},
+      {1.4520202, 0.2003367, 0.01010101, 1.09090909},
+  });
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleRightUpperNotranspose) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/false, /*lower=*/false,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::NO_TRANSPOSE);
+
+  Array2D<float> expected({
+      {0.5, 0.08333334, 0.04629629, 0.03367003},
+      {2.5, -0.25, -0.1388889, -0.1010101},
+      {4.5, -0.58333331, -0.32407406, -0.23569024},
+  });
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerTranspose) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/true, /*lower=*/true,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::TRANSPOSE);
+
+  Array2D<float> expected({
+      {-0.89646465, -0.69444444, -0.49242424},
+      {-0.27441077, -0.24074074, -0.20707071},
+      {-0.23232323, -0.22222222, -0.21212121},
+      {0.90909091, 1., 1.09090909},
+  });
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotranspose) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/true, /*lower=*/true,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::NO_TRANSPOSE);
+
+  Array2D<float> expected({
+      {0.5, 1.0, 1.5},
+      {0.41666667, 0.33333333, 0.25},
+      {0.23148148, 0.18518519, 0.13888889},
+      {0.16835017, 0.13468013, 0.1010101},
+  });
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNoTransposeUnitDiagonal) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data =
+      CreateR2Parameter<float>(AValsLowerUnitDiagonal(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/true, /*lower=*/true,
+                  /*unit_diagonal=*/true,
+                  /*transpose_a=*/TriangularSolveOptions::NO_TRANSPOSE);
+
+  Array2D<float> expected(
+      {{1., 2., 3.}, {1., -1., -3.}, {-4., 7., 18.}, {37., -61., -159.}});
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotransposeIrregularblock) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/true, /*lower=*/true,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::NO_TRANSPOSE);
+
+  Array2D<float> expected({
+      {0.5, 1.0, 1.5},
+      {0.41666667, 0.33333333, 0.25},
+      {0.23148148, 0.18518519, 0.13888889},
+      {0.16835017, 0.13468013, 0.1010101},
+  });
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTranspose) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/true, /*lower=*/false,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::TRANSPOSE);
+
+  Array2D<float> expected({
+      {0.5, 1.0, 1.5},
+      {0.41666667, 0.33333333, 0.25},
+      {0.23148148, 0.18518519, 0.13888889},
+      {0.16835017, 0.13468013, 0.1010101},
+  });
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperNotranspose) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/true, /*lower=*/false,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::NO_TRANSPOSE);
+
+  Array2D<float> expected({
+      {-0.89646465, -0.69444444, -0.49242424},
+      {-0.27441077, -0.24074074, -0.20707071},
+      {-0.23232323, -0.22222222, -0.21212121},
+      {0.90909091, 1., 1.09090909},
+  });
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperNotransposeUnitDiagonal) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data =
+      CreateR2Parameter<float>(AValsUpperUnitDiagonal(), 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/true, /*lower=*/false,
+                  /*unit_diagonal=*/true,
+                  /*transpose_a=*/TriangularSolveOptions::NO_TRANSPOSE);
+
+  Array2D<float> expected({{-1402., -1538., -1674.},
+                           {575., 631., 687.},
+                           {-93., -102., -111.},
+                           {10., 11., 12.}});
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTransposeConjugate) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data =
+      CreateR2Parameter<complex64>(AValsLowerComplex(), 0, "a", &builder, &a);
+  auto b_data =
+      CreateR2Parameter<complex64>(BValsRightComplex(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/false, /*lower=*/true,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::ADJOINT);
+
+  Array2D<complex64> expected({
+      {0.5, complex64(0.08333333, 0.08333333),
+       complex64(0.02777778, -0.0462963), complex64(0.06313131, -0.01094276)},
+      {2.5, complex64(-0.25, 0.41666667), complex64(-0.23148148, -0.37962963),
+       complex64(0.08670034, -0.02104377)},
+      {4.5, complex64(-0.58333333, 0.75), complex64(-0.49074074, -0.71296296),
+       complex64(0.11026936, -0.03114478)},
+  });
+
+  ComputeAndCompareR2<complex64>(
+      &builder, expected, {a_data.get(), b_data.get()}, ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTransposeNoconjugate) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data =
+      CreateR2Parameter<complex64>(AValsUpperComplex(), 0, "a", &builder, &a);
+  auto b_data =
+      CreateR2Parameter<complex64>(BValsLeftComplex(), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/true, /*lower=*/false,
+                  /*unit_diagonal=*/false,
+                  /*transpose_a=*/TriangularSolveOptions::TRANSPOSE);
+
+  Array2D<complex64> expected({
+      {0.5, 1., 1.5},
+      {0.41666667, 0.33333333, 0.25},
+      {complex64(0.20020325, -2.81504065e-01),
+       complex64(0.13821138, -4.22764228e-01),
+       complex64(0.07621951, -5.64024390e-01)},
+      {complex64(0.19678492, 2.55912786e-01),
+       complex64(0.17738359, 3.84331116e-01),
+       complex64(0.15798226, 5.12749446e-01)},
+  });
+
+  ComputeAndCompareR2<complex64>(
+      &builder, expected, {a_data.get(), b_data.get()}, ErrorSpec(1e-2, 1e-2));
+}
+
+XLA_TEST_F(TriangularSolveTest, BatchedLeftUpper) {
+  XlaBuilder builder(TestName());
+
+  Array3D<float> bvals(7, 5, 5);
+  bvals.FillIota(1.);
+
+  // Set avals to the upper triangle of bvals.
+  Array3D<float> avals = bvals;
+  avals.Each([](absl::Span<const int64> indices, float* value) {
+    if (indices[1] > indices[2]) {
+      *value = 0;
+    }
+  });
+
+  XlaOp a, b;
+  auto a_data = CreateR3Parameter<float>(avals, 0, "a", &builder, &a);
+  auto b_data = CreateR3Parameter<float>(bvals, 1, "b", &builder, &b);
+  BatchDot(
+      ConstantR3FromArray3D(&builder, avals),
+      TriangularSolve(a, b,
+                      /*left_side=*/true, /*lower=*/false,
+                      /*unit_diagonal=*/false,
+                      /*transpose_a=*/TriangularSolveOptions::NO_TRANSPOSE));
+
+  ComputeAndCompareR3<float>(&builder, bvals, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+struct TriangularSolveTestSpec {
+  int m, n;  // A is mxm, B is mxn
+  bool left_side;
+  bool lower;
+  TriangularSolveOptions::Transpose transpose_a;
+};
+
+class TriangularSolveParametricTest
+    : public ClientLibraryTestBase,
+      public ::testing::WithParamInterface<TriangularSolveTestSpec> {};
+
+XLA_TEST_P(TriangularSolveParametricTest, Random) {
+  TriangularSolveTestSpec spec = GetParam();
+
+  XlaBuilder builder(TestName());
+
+  Array2D<float> avals(spec.m, spec.m);
+  avals.FillRandom(1.0);
+  for (int i = 0; i < spec.m; ++i) {
+    avals(i, i) += 10;
+  }
+
+  std::pair<int, int> bdims = spec.left_side ? std::make_pair(spec.m, spec.n)
+                                             : std::make_pair(spec.n, spec.m);
+  Array2D<float> bvals(bdims.first, bdims.second);
+  bvals.FillRandom(1.0);
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(avals, 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(bvals, 1, "b", &builder, &b);
+  auto x = TriangularSolve(a, b, spec.left_side, spec.lower,
+                           /*unit_diagonal=*/false, spec.transpose_a);
+  auto a_tri = Triangle(a, spec.lower);
+  a_tri = MaybeTransposeInMinorDims(
+      a_tri, spec.transpose_a != TriangularSolveOptions::NO_TRANSPOSE);
+  if (spec.left_side) {
+    BatchDot(a_tri, x);
+  } else {
+    BatchDot(x, a_tri);
+  }
+
+  ComputeAndCompareR2<float>(&builder, bvals, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+std::vector<TriangularSolveTestSpec> TriangularSolveTests() {
+  std::vector<TriangularSolveTestSpec> specs;
+  for (int m : {5, 10}) {
+    for (int n : {5, 10}) {
+      for (bool left_side : {false, true}) {
+        for (bool lower : {false, true}) {
+          for (TriangularSolveOptions::Transpose transpose_a :
+               {TriangularSolveOptions::NO_TRANSPOSE,
+                TriangularSolveOptions::TRANSPOSE}) {
+            specs.push_back({m, n, left_side, lower, transpose_a});
+          }
+        }
+      }
+    }
+  }
+  return specs;
+}
+
+INSTANTIATE_TEST_SUITE_P(TriangularSolveParametricTestInstantiation,
+                         TriangularSolveParametricTest,
+                         ::testing::ValuesIn(TriangularSolveTests()));
+
+}  // namespace
+}  // namespace xla

diff --git a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc
index c7337e8..7b7b8f5 100644
--- a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc
+++ b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc

@@ -40,8 +40,6 @@
 namespace xla {
 namespace {
 
-namespace gtl = ::tensorflow::gtl;
-
 class HloProfileTest : public ClientLibraryTestBase {};
 
 struct ParsedProfileOutputLine {

diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD
index 52fee47..ebd4bb1 100644
--- a/tensorflow/compiler/xla/tools/BUILD
+++ b/tensorflow/compiler/xla/tools/BUILD

@@ -178,26 +178,6 @@
 )
 
 tf_cc_binary(
-    name = "dumped_computation_to_tf_graphdef",
-    srcs = ["dumped_computation_to_tf_graphdef.cc"],
-    deps = [
-        "//tensorflow/compiler/xla:debug_options_flags",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla:types",
-        "//tensorflow/compiler/xla/client",
-        "//tensorflow/compiler/xla/client:client_library",
-        "//tensorflow/compiler/xla/client:local_client",
-        "//tensorflow/compiler/xla/client:xla_computation",
-        "//tensorflow/compiler/xla/service",
-        "//tensorflow/compiler/xla/service:hlo_graph_dumper",
-        "//tensorflow/compiler/xla/service:hlo_proto",
-        "//tensorflow/compiler/xla/service:interpreter_plugin",
-        "//tensorflow/core:lib",
-        "@com_google_absl//absl/types:span",
-    ],
-)
-
-tf_cc_binary(
     name = "hlo_proto_to_json",
     srcs = ["hlo_proto_to_json.cc"],
     deps = [

diff --git a/tensorflow/compiler/xla/tools/dumped_computation_to_tf_graphdef.cc b/tensorflow/compiler/xla/tools/dumped_computation_to_tf_graphdef.cc
deleted file mode 100644
index f8bb9a6..0000000
--- a/tensorflow/compiler/xla/tools/dumped_computation_to_tf_graphdef.cc
+++ /dev/null

@@ -1,85 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Usage: dumped_computation_to_tf_graph some_binary_snapshot_proto*
-//
-// Dumps a tensorflow GraphDef in text format for a snapshot computation. The
-// dumped graph is an HLO computation with HLO instructions as nodes and can be
-// visualized on Tensorboard. Upload the dumped files on Tensorboard.
-//
-// some_binary_snapshot_proto is obtained by serializing the SessionModule from
-// ServiceInterface::SnapshotComputation to disk.
-
-#include <stdio.h>
-#include <memory>
-#include <string>
-
-#include "absl/types/span.h"
-#include "tensorflow/compiler/xla/client/client.h"
-#include "tensorflow/compiler/xla/client/client_library.h"
-#include "tensorflow/compiler/xla/client/local_client.h"
-#include "tensorflow/compiler/xla/client/xla_computation.h"
-#include "tensorflow/compiler/xla/debug_options_flags.h"
-#include "tensorflow/compiler/xla/service/hlo.pb.h"
-#include "tensorflow/compiler/xla/service/service.h"
-#include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/init_main.h"
-#include "tensorflow/core/platform/logging.h"
-
-using tensorflow::Env;
-
-namespace xla {
-namespace tools {
-
-void RealMain(absl::Span<char* const> args) {
-  Client* client = ClientLibrary::LocalClientOrDie();
-  for (char* arg : args) {
-    HloSnapshot module;
-    TF_CHECK_OK(
-        tensorflow::ReadBinaryProto(tensorflow::Env::Default(), arg, &module));
-    XlaComputation computation =
-        client->LoadSnapshot(module).ConsumeValueOrDie();
-    DebugOptions debug_options = GetDebugOptionsFromFlags();
-    debug_options.set_xla_generate_hlo_graph(".*");
-    debug_options.set_xla_hlo_dump_as_graphdef(true);
-    ComputationStats stats =
-        client->GetComputationStats(computation, debug_options)
-            .ConsumeValueOrDie();
-    fprintf(stdout, ">>> %s :: %s\n", arg, stats.DebugString().c_str());
-  }
-}
-
-}  // namespace tools
-}  // namespace xla
-
-int main(int argc, char** argv) {
-  std::vector<tensorflow::Flag> flag_list;
-  xla::AppendDebugOptionsFlags(&flag_list);
-  xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
-  const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
-  if (!parse_result) {
-    LOG(ERROR) << "\n" << usage;
-    return 2;
-  }
-
-  tensorflow::port::InitMain(argv[0], &argc, &argv);
-
-  absl::Span<char* const> args(argv, argc);
-  args.remove_prefix(1);  // Pop off the binary name, argv[0]
-  xla::tools::RealMain(args);
-  return 0;
-}

diff --git a/tensorflow/compiler/xla/tools/interactive_graphviz.cc b/tensorflow/compiler/xla/tools/interactive_graphviz.cc
index ac86570..0c7c078 100644
--- a/tensorflow/compiler/xla/tools/interactive_graphviz.cc
+++ b/tensorflow/compiler/xla/tools/interactive_graphviz.cc

@@ -139,9 +139,10 @@
 // Print a help message describing the various available commands.
 void DoHelpCommand() {
   std::cout << R"(Commands:
-  <instruction> [<width>]
-    Renders a neighborhood of <width> nodes around <instruction>.  If <width>
-    is not provided, the default value is )"
+  <instruction> [<width>] [/ <boundary_instruction>+]
+    Renders a neighborhood of <width> nodes around <instruction>, without going
+    beyond the optional boundary instructions.  If <width> is not provided, 
+    the default value is )"
             << kDefaultWidth << R"(.
   allpaths <instruction> <instruction> [<n>]
     Renders a subset of all paths from one instruction to the other.  Either
@@ -457,12 +458,6 @@
 // Plot a given instruction neighborhood or computation with graphviz.
 void DoPlotCommand(const Options& opts, const HloModule& module,
                    const std::vector<string>& tokens) {
-  if (tokens.size() > 2) {
-    std::cerr << R"(Illegal input.  Enter e.g. "%fusion.1 42" or "%fusion.1".)"
-              << std::endl;
-    return;
-  }
-
   string node_name = tokens[0];
 
   // Find the node with the given name.
@@ -475,16 +470,43 @@
   }
 
   uint64 graph_width = kDefaultWidth;
-  if (tokens.size() == 2) {
+  absl::flat_hash_set<const HloInstruction*> boundary;
+  if (tokens.size() >= 2) {
     if (comp) {
       std::cerr << "Can only use graph-size parameter with instructions, but "
                 << node_name << " is a computation." << std::endl;
       return;
     }
-    if (!absl::SimpleAtoi(tokens[1], &graph_width)) {
-      std::cerr << "Can't parse '" << tokens[1] << "' as an integer."
-                << std::endl;
-      return;
+
+    int bound_index = 1;
+    // Get the <width> if present.
+    if (absl::SimpleAtoi(tokens[bound_index], &graph_width)) {
+      bound_index++;
+    } else {
+      // <width> not found, need to reset graph_width.
+      graph_width = kDefaultWidth;
+    }
+    // Get the '/'.
+    if (bound_index < tokens.size()) {
+      // This token must be a '/'.
+      if (tokens[bound_index] != "/") {
+        std::cerr << "Expect a /, but get a '" << tokens[bound_index] << "'."
+                  << std::endl;
+        return;
+      }
+      bound_index++;
+    }
+    // Get the boundary nodes.
+    while (bound_index < tokens.size()) {
+      string bnode_name = tokens[bound_index];
+      const HloInstruction* binstr = FindInstruction(module, bnode_name);
+      if (!binstr) {
+        std::cerr << "Couldn't find HloInstruction named " << bnode_name << "."
+                  << std::endl;
+        return;
+      }
+      boundary.insert(binstr);
+      bound_index++;
     }
   }
 
@@ -496,7 +518,9 @@
         /*show_backend_config=*/show_backend_config));
   } else {
     DisplayGraphHandle(opts, hlo_graph_dumper::DumpNeighborhoodAround(
-        *instr, graph_width, /*show_backend_config=*/show_backend_config));
+                                 *instr, graph_width,
+                                 /*show_backend_config=*/show_backend_config,
+                                 /*boundary=*/boundary));
   }
 }
 
@@ -515,7 +539,7 @@
                 << std::endl;
       continue;
     }
-    std::vector<string> tokens = absl::StrSplit(line, ' ');
+    std::vector<string> tokens = absl::StrSplit(line, ' ', absl::SkipEmpty());
     if (tokens[0] == "quit" || tokens[0] == "exit") {
       break;
     } else if (tokens[0] == "help") {

diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc
index 21217c2..d665613 100644
--- a/tensorflow/compiler/xla/tools/replay_computation.cc
+++ b/tensorflow/compiler/xla/tools/replay_computation.cc

@@ -102,8 +102,9 @@
     argument_layouts.push_back(Shape(param));
     argument_layout_ptrs.push_back(&argument_layouts.back());
   }
-  return client->Compile(computation, argument_layout_ptrs,
-                         ExecutableBuildOptions());
+  ExecutableBuildOptions exec_build_options;
+  *exec_build_options.mutable_debug_options() = GetDebugOptionsFromFlags();
+  return client->Compile(computation, argument_layout_ptrs, exec_build_options);
 }
 
 absl::optional<Shape> GetXfeedShape(bool is_infeed,
@@ -328,7 +329,10 @@
   fprintf(stderr, "%s: is not HloProto. Trying HLO text.\n", filename.c_str());
   string contents;
   TF_RETURN_IF_ERROR(tensorflow::ReadFileToString(env, filename, &contents));
-  StatusOr<std::unique_ptr<HloModule>> module = ParseHloString(contents);
+  HloModuleConfig config;
+  config.set_debug_options(GetDebugOptionsFromFlags());
+  StatusOr<std::unique_ptr<HloModule>> module =
+      ParseHloString(contents, config);
   if (module.ok()) {
     *snapshot.mutable_hlo()->mutable_hlo_module() =
         module.ValueOrDie()->ToProto();

diff --git a/tensorflow/compiler/xla/util.cc b/tensorflow/compiler/xla/util.cc
index 34b73b5..bb8bbf5 100644
--- a/tensorflow/compiler/xla/util.cc
+++ b/tensorflow/compiler/xla/util.cc

@@ -18,6 +18,7 @@
 #include <stdarg.h>
 #include <numeric>
 
+#include "absl/container/inlined_vector.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
@@ -80,13 +81,9 @@
   if (rank != permutation.size()) {
     return false;
   }
-  std::vector<int64> output(permutation.size(), -1);
-  for (auto index : permutation) {
-    CHECK_GE(index, 0);
-    CHECK_LT(index, rank);
-    output[index] = 0;
-  }
-  return !absl::c_linear_search(output, -1);
+  absl::InlinedVector<int64, 8> trivial_permutation(rank);
+  absl::c_iota(trivial_permutation, 0);
+  return absl::c_is_permutation(permutation, trivial_permutation);
 }
 
 std::vector<int64> InversePermutation(

diff --git a/tensorflow/compiler/xla/xla.bzl b/tensorflow/compiler/xla/xla.bzl
index 60adea5..cda2d7c 100644
--- a/tensorflow/compiler/xla/xla.bzl
+++ b/tensorflow/compiler/xla/xla.bzl

@@ -17,7 +17,9 @@
     cc_proto_library(
         name = name,
         srcs = srcs,
-        deps = deps,
+        # Append well-known proto dep. As far as I know this is the only way
+        # for xla_proto_library to access google.protobuf.{Any,Duration,...}.
+        deps = deps + ["@protobuf_archive//:cc_wkt_protos"],
         cc_libs = if_static(
             ["@protobuf_archive//:protobuf"],
             otherwise = ["@protobuf_archive//:protobuf_headers"],
@@ -28,6 +30,11 @@
         **kwargs
     )
 
+def xla_py_proto_library(**kwargs):
+    # Note: we don't currently define a proto library target for Python in OSS.
+    _ignore = kwargs
+    pass
+
 def xla_py_grpc_library(**kwargs):
     # Note: we don't currently define any special targets for Python GRPC in OSS.
     _ignore = kwargs

diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index 92834db..925fcbf 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto

@@ -15,11 +15,11 @@
 
 syntax = "proto3";
 
-import "tensorflow/compiler/xla/xla_data.proto";
-import "tensorflow/compiler/xla/service/hlo.proto";
-
 package xla;
 
+import "tensorflow/compiler/xla/service/hlo.proto";
+import "tensorflow/compiler/xla/xla_data.proto";
+
 // Options for the HLO insert-reduce-precision-operations pass.
 message HloReducePrecisionOptions {
   // Where and when the reduce-precision operations will be added.
@@ -72,8 +72,7 @@
   // Path to dump HLO graphs to.
   string xla_hlo_graph_path = 4;
 
-  // Dump HLO graphs as TensorFlow GraphDefs.
-  bool xla_hlo_dump_as_graphdef = 5;
+  reserved 5;  // Was xla_hlo_dump_as_graphdef
 
   // HLO modules matching this regex will be dumped to LOG(INFO). Set to ".*" to
   // dump *all* HLO modules.
@@ -171,9 +170,7 @@
   // HLO graph.
   bool xla_hlo_graph_sharding_color = 92;
 
-  // Prefix the name scopes of the TF graph exports with "devX" device
-  // assignments, if available.
-  bool xla_hlo_tfgraph_device_scopes = 93;
+  reserved 93;  // Was xla_hlo_tfgraph_device_scopes
 
   // If true, the GPU backend is free to use cudnn for HLO batch normalization
   // ops.
@@ -234,7 +231,23 @@
   // versions of DynamicSlice and DynamicUpdateSlice. Only used for testing.
   bool xla_allow_scalar_index_dynamic_ops = 107;
 
-  // Next id: 108
+  enum StepMarkerLocation {
+    // Generate step mark at each iteration of top level while loop, which
+    // is assumed to be a training loop. This is the default.
+    STEP_MARK_AT_ENTRY = 0;
+    // Generate step mark at program entry. This handles the case where each
+    // step are done by one or multiple programs execution. Only the first
+    // program will be tagged for generating step mark at program entry.
+    STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP = 1;
+    // No step mark.
+    STEP_MARK_NONE = 2;
+  }
+  // Option to emit a target-specific marker to indicate the start of a training
+  // step. The location of the marker (if any) is determined by the option
+  // value.
+  StepMarkerLocation xla_step_marker_location = 108;
+
+  // Next id: 109
 
   // Extra options to pass to the compilation backend (e.g. LLVM); specific
   // interpretation of these values is left to the backend.
@@ -306,8 +319,7 @@
   DeviceHandle device_handle = 3;
 }
 
-message TransferToInfeedResponse {
-}
+message TransferToInfeedResponse {}
 
 message TransferFromOutfeedRequest {
   // This optional field directs the service to return the literal in this
@@ -326,8 +338,7 @@
   DeviceHandle device_handle = 1;
 }
 
-message ResetDeviceResponse {
-}
+message ResetDeviceResponse {}
 
 message ComputationGraphStatsRequest {
   HloModuleProto computation = 1;
@@ -350,8 +361,7 @@
   repeated GlobalDataHandle data = 1;
 }
 
-message UnregisterResponse {
-}
+message UnregisterResponse {}
 
 message CompileRequest {
   // The graph to be compiled.

diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index a64e2f5..226299a 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto

@@ -545,6 +545,26 @@
   // Next: 4
 }
 
+message TriangularSolveOptions {
+  // If true, solves ax = b. If false, solves xa = b.
+  bool left_side = 1;
+
+  // If true, 'a' is lower triangular. If false, 'a' is upper triangular.
+  bool lower = 2;
+
+  // If true, the diagonal elements of 'a' are assumed to be 1 and not accessed.
+  bool unit_diagonal = 3;
+
+  // Should we transpose or use the adjoint of 'a'?
+  enum Transpose {
+    TRANSPOSE_INVALID = 0;
+    NO_TRANSPOSE = 1;  // Don't transpose 'a'.
+    TRANSPOSE = 2;     // Transpose 'a'.
+    ADJOINT = 3;       // Complex conjugate and transpose 'a'.
+  };
+  Transpose transpose_a = 4;
+}
+
 message OpSharding {
   enum Type {
     // This sharding is replicated across all devices (implies maximal,
@@ -604,3 +624,15 @@
 
   // Next: 2
 }
+
+// Describes whether all data-parallelism replicas will receive the same
+// parameter data at each buffer.
+message ParameterReplication {
+  // A list of boolean values for the flattened leaf buffers. Each value
+  // indicates whether the corresponding leaf buffer is replicated.
+  //
+  // If this field is empty, it means no buffer is replicated. Otherwise, the
+  // number of elements in this field must match the number of leaf buffers in
+  // the HLO instruction's shape.
+  repeated bool replicated_at_leaf_buffers = 1;
+}

diff --git a/tensorflow/compiler/xrt/BUILD b/tensorflow/compiler/xrt/BUILD
index 2dae746..b2718c5 100644
--- a/tensorflow/compiler/xrt/BUILD
+++ b/tensorflow/compiler/xrt/BUILD

@@ -11,9 +11,15 @@
 
 load(
     "//tensorflow:tensorflow.bzl",
+    "tf_custom_op_py_library",
     "tf_gen_op_libs",
+    "tf_gen_op_wrapper_py",
 )
 load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library")
+load(
+    "//tensorflow/core:platform/default/build_config.bzl",
+    "tf_proto_library_py",
+)
 
 xla_proto_library(
     name = "xrt_proto",
@@ -27,6 +33,12 @@
     ],
 )
 
+tf_proto_library_py(
+    name = "xrt_proto",  # bzl adds a _py suffix
+    srcs = ["xrt.proto"],
+    visibility = ["//visibility:public"],
+)
+
 cc_library(
     name = "xrt_utils",
     srcs = [
@@ -78,6 +90,25 @@
     ],
 )
 
+tf_gen_op_wrapper_py(
+    name = "xrt_ops_wrapper_py",
+    out = "xrt_ops.py",
+    deps = [
+        ":xrt_compile_ops_op_lib",
+        ":xrt_execute_op_op_lib",
+        ":xrt_state_ops_op_lib",
+    ],
+)
+
+tf_custom_op_py_library(
+    name = "xrt_ops",
+    kernels = ["//tensorflow/compiler/xrt/kernels:xrt_ops"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":xrt_ops_wrapper_py",
+    ],
+)
+
 cc_library(
     name = "xrt_server",
     visibility = ["//visibility:public"],

diff --git a/tensorflow/compiler/xrt/kernels/BUILD b/tensorflow/compiler/xrt/kernels/BUILD
index dc02fd2..1e32519 100644
--- a/tensorflow/compiler/xrt/kernels/BUILD
+++ b/tensorflow/compiler/xrt/kernels/BUILD

@@ -51,7 +51,10 @@
         "//tensorflow/compiler/xla/service:compiler",
         "//tensorflow/compiler/xla/service:computation_placer",
         "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xrt:xrt_compile_ops_op_lib",
+        "//tensorflow/compiler/xrt:xrt_execute_op_op_lib",
         "//tensorflow/compiler/xrt:xrt_proto",
+        "//tensorflow/compiler/xrt:xrt_state_ops_op_lib",
         "//tensorflow/compiler/xrt:xrt_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",

diff --git a/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc
index 6a7f106..343f43b 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc

@@ -122,6 +122,17 @@
                             .HostMemory("literal"),
                         XRTReadLiteralOp<true, XRTGenericDeviceAccessor>);
 
+REGISTER_KERNEL_BUILDER(Name("XRTReadToTensor")
+                            .Device(DEVICE_XLA_GPU)
+                            .HostMemory("handles")
+                            .HostMemory("tensors"),
+                        XRTReadToTensorOp<XRTGenericDeviceAccessor>);
+REGISTER_KERNEL_BUILDER(Name("XRTReadToTensor")
+                            .Device(DEVICE_XLA_CPU)
+                            .HostMemory("handles")
+                            .HostMemory("tensors"),
+                        XRTReadToTensorOp<XRTGenericDeviceAccessor>);
+
 REGISTER_KERNEL_BUILDER(Name("XRTReleaseAllocationHandle")
                             .Device(DEVICE_XLA_GPU)
                             .HostMemory("handle"),

diff --git a/tensorflow/compiler/xrt/kernels/xrt_state_ops.h b/tensorflow/compiler/xrt/kernels/xrt_state_ops.h
index e2c223b..6af73ec 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_state_ops.h
+++ b/tensorflow/compiler/xrt/kernels/xrt_state_ops.h

@@ -25,6 +25,7 @@
 
 #include "tensorflow/compiler/tf2xla/literal_util.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/literal.h"
@@ -40,6 +41,7 @@
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
@@ -215,27 +217,29 @@
     OP_REQUIRES_OK(ctx, ctx->GetAttr("shapes", &tf_shapes_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("make_tuple", &make_tuple));
+    std::vector<int64> minor_to_major;
     if (ctx->HasAttr("layouts")) {
-      OP_REQUIRES_OK(ctx, ctx->GetAttr("layouts", &minor_to_major_));
+      OP_REQUIRES_OK(ctx, ctx->GetAttr("layouts", &minor_to_major));
     }
     OP_REQUIRES(
         ctx, tf_shapes_.size() == dtypes_.size(),
         errors::InvalidArgument("shapes and dtypes must be the same length"));
     std::vector<xla::Shape> xla_shapes;
+    xla_shapes.reserve(tf_shapes_.size());
     for (int i = 0; i < tf_shapes_.size(); i++) {
       xla::Shape xla_shape;
       OP_REQUIRES_OK(
           ctx, TensorShapeToXLAShape(dtypes_[i], tf_shapes_[i], &xla_shape));
-      xla_shapes.push_back(xla_shape);
+      xla_shapes.push_back(std::move(xla_shape));
     }
     if (xla_shapes.size() > 1 || make_tuple) {
       shape_ = xla::ShapeUtil::MakeTupleShape(xla_shapes);
     } else {
       shape_.Swap(&xla_shapes.front());
     }
-    if (!minor_to_major_.empty()) {
+    if (!minor_to_major.empty()) {
       xla::Shape shape_with_layouts;
-      OP_REQUIRES_OK(ctx, GetShapeWithLayout(shape_, minor_to_major_,
+      OP_REQUIRES_OK(ctx, GetShapeWithLayout(shape_, minor_to_major,
                                              /*layout_func=*/nullptr,
                                              &shape_with_layouts));
       shape_.Swap(&shape_with_layouts);
@@ -304,7 +308,6 @@
  private:
   std::vector<TensorShape> tf_shapes_;
   DataTypeVector dtypes_;
-  std::vector<int64> minor_to_major_;
   xla::Shape shape_;
 };
 
@@ -487,7 +490,7 @@
     OP_REQUIRES_OK(ctx, DeviceAccessor::InitScopedRef(
                             ctx, allocation->device_ordinal(), &device_ref));
 
-    xla::Literal literal;
+    xla::Literal literal(allocation->on_host_shape());
     OP_REQUIRES_OK(
         ctx, allocation->ToLiteral(device_ref.backend(),
                                    device_ref.device_ordinal(), &literal));
@@ -499,6 +502,96 @@
   }
 };
 
+// Op that reads a device-resident tuple to host memory and returns it as a
+// literal.
+template <class DeviceAccessor>
+class XRTReadToTensorOp : public OpKernel {
+ public:
+  explicit XRTReadToTensorOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("release_handles", &discard_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_));
+  }
+  ~XRTReadToTensorOp() override = default;
+  XRTReadToTensorOp(const XRTReadToTensorOp&) = delete;
+  XRTReadToTensorOp& operator=(const XRTReadToTensorOp&) = delete;
+
+  void Compute(OpKernelContext* ctx) override {
+    VLOG(1) << "XRTReadToTensorOp::Compute";
+
+    const Tensor& handle_tensor = ctx->input(0);
+    // TODO(phawkins,dlibenzi): accept multiple handles (i.e., vectors, not
+    // just scalars.)
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsScalar(handle_tensor.shape()),
+        errors::Internal("computation input should be an int64 scalar"));
+    int64 allocation_handle = handle_tensor.scalar<int64>()();
+
+    ResourceMgr* rm;
+    OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm));
+
+    XRTTupleAllocation* allocation;
+    OP_REQUIRES_OK(
+        ctx, XRTTupleAllocation::Lookup(rm, allocation_handle, &allocation));
+    core::ScopedUnref allocation_unref(allocation);
+
+    if (discard_) {
+      VLOG(2) << "Releasing handle " << allocation_handle;
+      OP_REQUIRES_OK(ctx, XRTTupleAllocation::DeleteFromResourceManager(
+                              rm, allocation_handle));
+    }
+
+    // We are guaranteed that the underlying device object won't be deleted out
+    // from under us, while the ScopedRef is live.
+    class DeviceAccessor::ScopedRef device_ref;
+    OP_REQUIRES_OK(ctx, DeviceAccessor::InitScopedRef(
+                            ctx, allocation->device_ordinal(), &device_ref));
+
+    xla::Shape shape = allocation->on_host_shape();
+    int output = 0;
+    Status status = xla::ShapeUtil::ForEachMutableSubshapeWithStatus(
+        &shape,
+        [&](xla::Shape* subshape, const xla::ShapeIndex& index) -> Status {
+          if (subshape->IsTuple()) return Status::OK();
+
+          xla::PrimitiveType xla_type;
+          TF_RETURN_IF_ERROR(DataTypeToPrimitiveType(
+              ctx->expected_output_dtype(output), &xla_type));
+          if (xla_type != subshape->element_type()) {
+            return errors::InvalidArgument(
+                "Type mismatch between buffer type (", subshape->ToString(),
+                ") and tensor type (",
+                DataTypeString(ctx->expected_output_dtype(output)),
+                ") for output tensor ", output);
+          }
+
+          TensorShape output_shape;
+          TF_RETURN_IF_ERROR(XLAShapeToTensorShape(*subshape, &output_shape));
+
+          Tensor* output_tensor;
+          TF_RETURN_IF_ERROR(
+              ctx->allocate_output(output, output_shape, &output_tensor));
+
+          XRTTupleAllocation* sub;
+          TF_RETURN_IF_ERROR(XRTTupleAllocation::MakeSubBuffer(
+              allocation, index, &sub, /*alias_parent_allocation=*/true));
+          core::ScopedUnref sub_unref(sub);
+
+          xla::MutableBorrowingLiteral literal;
+          TF_RETURN_IF_ERROR(HostTensorToMutableBorrowingLiteral(
+              xla::LayoutUtil::GetWithDefaultLayout(*subshape), output_tensor,
+              &literal));
+          TF_RETURN_IF_ERROR(sub->ToLiteral(
+              device_ref.backend(), device_ref.device_ordinal(), &literal));
+
+          ++output;
+          return Status::OK();
+        });
+    OP_REQUIRES_OK(ctx, status);
+  }
+  bool discard_;
+  DataTypeVector dtypes_;
+};
+
 // Op that writes a new literal value into device-resident memory.
 template <class DeviceAccessor>
 class XRTWriteLiteralOp : public OpKernel {

diff --git a/tensorflow/compiler/xrt/ops/xrt_state_ops.cc b/tensorflow/compiler/xrt/ops/xrt_state_ops.cc
index 2e743fe..8832270 100644
--- a/tensorflow/compiler/xrt/ops/xrt_state_ops.cc
+++ b/tensorflow/compiler/xrt/ops/xrt_state_ops.cc

@@ -151,6 +151,27 @@
 'literal' is a serialized xla::LiteralProto proto.
 )");
 
+REGISTER_OP("XRTReadToTensor")
+    .Input("handles: int64")
+    .Attr("release_handles: bool = False")
+    .Attr("dtypes: list(type)")
+    .Output("tensors: dtypes")
+    .SetShapeFn(tensorflow::shape_inference::UnknownShape)
+    .Doc(
+        R"(
+Copies allocated values from device memory and returns them as zero or more
+Tensors. If a handle refers to a non-tuple buffer, a single tensor is returned.
+In general, the tensors returned for a handle correspond to an in-order traversal
+of a the tuple-tree value referenced by the handle.
+
+'handles' contains ids returned from Ops that produced on-device allocations.
+At present, only a single (scalar) handle is supported.
+'dtypes' are the expected types for each `Tensor` to be returned. If the
+expected and actual tensor types do not match, an error is returned.
+'release_handles': if True, `handles` are released.
+'tensors' are the output Tensors.
+)");
+
 REGISTER_OP("XRTReleaseAllocationHandle")
     .Input("handle: int64")
     .SetShapeFn(tensorflow::shape_inference::NoOutputs)

diff --git a/tensorflow/compiler/xrt/xrt_state.cc b/tensorflow/compiler/xrt/xrt_state.cc
index 78a1b6a..1b3bcbe 100644
--- a/tensorflow/compiler/xrt/xrt_state.cc
+++ b/tensorflow/compiler/xrt/xrt_state.cc

@@ -220,7 +220,7 @@
 }
 
 Status XRTTupleAllocation::ToLiteral(xla::Backend* backend, int device_ordinal,
-                                     xla::Literal* literal) {
+                                     xla::MutableLiteralBase* literal) {
   auto transfer_manager = backend->transfer_manager();
   TF_ASSIGN_OR_RETURN(auto stream, backend->BorrowStream(device_ordinal));
 
@@ -234,9 +234,8 @@
                                      " has been released");
     }
   }
-  TF_ASSIGN_OR_RETURN(*literal, transfer_manager->TransferLiteralFromDevice(
-                                    stream.get(), shaped_buffer));
-  return Status::OK();
+  return transfer_manager->TransferLiteralFromDevice(stream.get(),
+                                                     shaped_buffer, *literal);
 }
 
 Status XRTTupleAllocation::WriteLiteral(xla::Backend* backend,

diff --git a/tensorflow/compiler/xrt/xrt_state.h b/tensorflow/compiler/xrt/xrt_state.h
index ddf2656..6519da3 100644
--- a/tensorflow/compiler/xrt/xrt_state.h
+++ b/tensorflow/compiler/xrt/xrt_state.h

@@ -147,7 +147,7 @@
 
   // Copies the allocation from device to host and returns it in literal.
   Status ToLiteral(xla::Backend* backend, int device_ordinal,
-                   xla::Literal* literal);
+                   xla::MutableLiteralBase* literal);
 
   // Write a new literal value to the allocation.
   Status WriteLiteral(xla::Backend* backend, const xla::Literal& literal);

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 25f2640..0173b8b 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD

@@ -218,7 +218,6 @@
         "//tensorflow/contrib/tensor_forest:stats_ops_op_lib",
         "//tensorflow/contrib/tensor_forest:tensor_forest_ops_op_lib",
         "//tensorflow/contrib/text:all_ops",
-        "//tensorflow/contrib/tpu:all_ops",
     ] + select({
         "//tensorflow:android": [],
         "//tensorflow:ios": [],

diff --git a/tensorflow/contrib/android/BUILD b/tensorflow/contrib/android/BUILD
index f0b1c92..5608e7d 100644
--- a/tensorflow/contrib/android/BUILD
+++ b/tensorflow/contrib/android/BUILD

@@ -73,8 +73,7 @@
         "-z defs",
         "-s",
         "-Wl,--gc-sections",
-        "-Wl,--version-script",  # This line must be directly followed by LINKER_SCRIPT.
-        "$(location {})".format(LINKER_SCRIPT),
+        "-Wl,--version-script,$(location {})".format(LINKER_SCRIPT),
     ]),
     linkshared = 1,
     linkstatic = 1,

diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc b/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc
index 6138d79..f063759 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc

@@ -19,7 +19,6 @@
 #include "tensorflow/core/lib/core/threadpool.h"
 
 namespace tensorflow {
-
 namespace {
 
 class BigtableClientOp : public OpKernel {
@@ -341,8 +340,8 @@
   }
 
   template <typename T>
-  Status ParseScalarArgument(OpKernelContext* ctx,
-                             const StringPiece& argument_name, T* output) {
+  Status ParseScalarArgument(OpKernelContext* ctx, StringPiece argument_name,
+                             T* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
     if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
@@ -360,5 +359,4 @@
 
 }  // namespace
 }  // namespace data
-
 }  // namespace tensorflow

diff --git a/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.cc b/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.cc
index e6fda9e..d9fce6e 100644
--- a/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.cc
+++ b/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.cc

@@ -335,6 +335,17 @@
   return grpc::Status(grpc::StatusCode::UNIMPLEMENTED,
                       "ReadModifyWriteRow not implemented.");
 }
+std::unique_ptr<grpc::ClientAsyncResponseReaderInterface<
+    google::bigtable::v2::ReadModifyWriteRowResponse>>
+BigtableTestClient::AsyncReadModifyWriteRow(
+    grpc::ClientContext* context,
+    google::bigtable::v2::ReadModifyWriteRowRequest const& request,
+    grpc::CompletionQueue* cq) {
+  LOG(WARNING) << "Call to AsyncReadModifyWriteRow:" << __func__
+               << "(); this will likely cause a crash!";
+  return nullptr;
+}
+
 std::unique_ptr<
     grpc::ClientReaderInterface<google::bigtable::v2::ReadRowsResponse>>
 BigtableTestClient::ReadRows(

diff --git a/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.h b/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.h
index 8e1326f..63d59b3 100644
--- a/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.h
+++ b/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.h

@@ -46,6 +46,13 @@
       google::bigtable::v2::ReadModifyWriteRowRequest const& request,
       google::bigtable::v2::ReadModifyWriteRowResponse* response) override;
 
+  std::unique_ptr<grpc::ClientAsyncResponseReaderInterface<
+      google::bigtable::v2::ReadModifyWriteRowResponse>>
+  AsyncReadModifyWriteRow(
+      grpc::ClientContext* context,
+      google::bigtable::v2::ReadModifyWriteRowRequest const& request,
+      grpc::CompletionQueue* cq) override;
+
   std::unique_ptr<
       grpc::ClientReaderInterface<google::bigtable::v2::ReadRowsResponse>>
   ReadRows(grpc::ClientContext* context,

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py
index a178820..5ffbb90 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py

@@ -84,12 +84,10 @@
       output_leaf_index: whether to output leaf indices along with predictions
         during inference. The leaf node indexes are available in predictions
         dict by the key 'leaf_index'. It is a Tensor of rank 2 and its shape is
-        [batch_size, num_trees].
-        For example,
-        result_iter = classifier.predict(...)
-        for result_dict in result_iter:
-          # access leaf index list by result_dict["leaf_index"]
-          # which contains one leaf index per tree
+        [batch_size, num_trees]. For example, result_iter =
+        classifier.predict(...)
+        for result_dict in result_iter: # access leaf index list by
+          result_dict["leaf_index"] # which contains one leaf index per tree
       override_global_step_value: If after the training is done, global step
         value must be reset to this value. This should be used to reset global
         step to a number > number of steps used to train the current ensemble.
@@ -179,8 +177,8 @@
         `[batch_size, label_dimension]`).
       num_trees: An int, number of trees to build.
       feature_columns: A list of feature columns.
-      label_name: String, name of the key in label dict. Can be null if label
-          is a tensor (single headed models).
+      label_name: String, name of the key in label dict. Can be null if label is
+        a tensor (single headed models).
       weight_column_name: Name of the column for weights, or None if not
         weighted.
       model_dir: Directory for model exports, etc.
@@ -195,11 +193,11 @@
         opposed to contrib) version of tensorflow.
       output_leaf_index: whether to output leaf indices along with predictions
         during inference. The leaf node indexes are available in predictions
-        dict by the key 'leaf_index'. For example,
-        result_dict = classifier.predict(...)
-        for example_prediction_result in result_dict:
-          # access leaf index list by example_prediction_result["leaf_index"]
-          # which contains one leaf index per tree
+        dict by the key 'leaf_index'. For example, result_dict =
+        classifier.predict(...)
+        for example_prediction_result in result_dict: # access leaf index list
+          by example_prediction_result["leaf_index"] # which contains one leaf
+          index per tree
       override_global_step_value: If after the training is done, global step
         value must be reset to this value. This should be used to reset global
         step to a number > number of steps used to train the current ensemble.
@@ -286,11 +284,11 @@
         opposed to contrib) version of tensorflow.
       output_leaf_index: whether to output leaf indices along with predictions
         during inference. The leaf node indexes are available in predictions
-        dict by the key 'leaf_index'. For example,
-        result_dict = classifier.predict(...)
-        for example_prediction_result in result_dict:
-          # access leaf index list by example_prediction_result["leaf_index"]
-          # which contains one leaf index per tree
+        dict by the key 'leaf_index'. For example, result_dict =
+        classifier.predict(...)
+        for example_prediction_result in result_dict: # access leaf index list
+          by example_prediction_result["leaf_index"] # which contains one leaf
+          index per tree
       override_global_step_value: If after the training is done, global step
         value must be reset to this value. This should be used to reset global
         step to a number > number of steps used to train the current ensemble.
@@ -353,10 +351,9 @@
         layer. It can also be a function that computes the number of examples
         based on the depth of the layer that's being built.
       head: `Head` instance.
-      ranking_model_pair_keys: Keys to distinguish between features
-        for left and right part of the training pairs for ranking. For example,
-        for an Example with features "a.f1" and "b.f1", the keys would be
-        ("a", "b").
+      ranking_model_pair_keys: Keys to distinguish between features for left and
+        right part of the training pairs for ranking. For example, for an
+        Example with features "a.f1" and "b.f1", the keys would be ("a", "b").
       num_trees: An int, number of trees to build.
       feature_columns: A list of feature columns.
       weight_column_name: Name of the column for weights, or None if not
@@ -376,12 +373,10 @@
       output_leaf_index: whether to output leaf indices along with predictions
         during inference. The leaf node indexes are available in predictions
         dict by the key 'leaf_index'. It is a Tensor of rank 2 and its shape is
-        [batch_size, num_trees].
-        For example,
-        result_iter = classifier.predict(...)
-        for result_dict in result_iter:
-          # access leaf index list by result_dict["leaf_index"]
-          # which contains one leaf index per tree
+        [batch_size, num_trees]. For example, result_iter =
+        classifier.predict(...)
+        for result_dict in result_iter: # access leaf index list by
+          result_dict["leaf_index"] # which contains one leaf index per tree
       override_global_step_value: If after the training is done, global step
         value must be reset to this value. This should be used to reset global
         step to a number > number of steps used to train the current ensemble.
@@ -417,12 +412,12 @@
         config=config,
         feature_engineering_fn=feature_engineering_fn)
 
+
 # When using this estimator, make sure to regularize the hessian (at least l2,
 # min_node_weight)!
 # TODO(nponomareva): extend to take multiple quantiles in one go.
 class GradientBoostedDecisionTreeQuantileRegressor(estimator.Estimator):
-  """An estimator that does quantile regression and returns quantile estimates.
-  """
+  """An estimator that does quantile regression and returns quantile estimates."""
 
   def __init__(self,
                learner_config,
@@ -449,8 +444,8 @@
         layer. It can also be a function that computes the number of examples
         based on the depth of the layer that's being built.
       quantiles: a list of quantiles for the loss, each between 0 and 1.
-      label_dimension: Dimension of regression label. This is the size
-        of the last dimension of the labels `Tensor` (typically, this has shape
+      label_dimension: Dimension of regression label. This is the size of the
+        last dimension of the labels `Tensor` (typically, this has shape
         `[batch_size, label_dimension]`). When label_dimension>1, it is
         recommended to use multiclass strategy diagonal hessian or full hessian.
       num_trees: An int, number of trees to build.
@@ -469,11 +464,11 @@
         opposed to contrib) version of tensorflow.
       output_leaf_index: whether to output leaf indices along with predictions
         during inference. The leaf node indexes are available in predictions
-        dict by the key 'leaf_index'. For example,
-        result_dict = classifier.predict(...)
-        for example_prediction_result in result_dict:
-          # access leaf index list by example_prediction_result["leaf_index"]
-          # which contains one leaf index per tree
+        dict by the key 'leaf_index'. For example, result_dict =
+        classifier.predict(...)
+        for example_prediction_result in result_dict: # access leaf index list
+          by example_prediction_result["leaf_index"] # which contains one leaf
+          index per tree
       override_global_step_value: If after the training is done, global step
         value must be reset to this value. This should be used to reset global
         step to a number > number of steps used to train the current ensemble.
@@ -519,6 +514,7 @@
         config=config,
         feature_engineering_fn=feature_engineering_fn)
 
+
 # ================== New Estimator interface===================================
 # The estimators below use new core Estimator interface and must be used with
 # new feature columns and heads.
@@ -534,10 +530,8 @@
 
   def loss_fn(labels, logits):
     result = losses.per_example_maxent_loss(
-        labels=labels,
-        logits=logits,
-        weights=weight_column,
-        num_classes=n_classes)
+        # Don't pass the weights: head already multiplies by them.
+        labels=labels, logits=logits, weights=None, num_classes=n_classes)
     return result[0]
 
   # pylint:disable=protected-access
@@ -564,7 +558,8 @@
     result = losses.per_example_quantile_regression_loss(
         labels=labels,
         predictions=logits,
-        weights=weight_column,
+        # Don't pass the weights: head already multiplies by them.
+        weights=None,
         quantile=quantiles)
     return result[0]
 
@@ -623,11 +618,11 @@
         the bias.
       output_leaf_index: whether to output leaf indices along with predictions
         during inference. The leaf node indexes are available in predictions
-        dict by the key 'leaf_index'. For example,
-        result_dict = classifier.predict(...)
-        for example_prediction_result in result_dict:
-          # access leaf index list by example_prediction_result["leaf_index"]
-          # which contains one leaf index per tree
+        dict by the key 'leaf_index'. For example, result_dict =
+        classifier.predict(...)
+        for example_prediction_result in result_dict: # access leaf index list
+          by example_prediction_result["leaf_index"] # which contains one leaf
+          index per tree
       num_quantiles: Number of quantiles to build for numeric feature values.
     """
 
@@ -685,10 +680,9 @@
         layer. It can also be a function that computes the number of examples
         based on the depth of the layer that's being built.
       head: `Head` instance.
-      ranking_model_pair_keys: Keys to distinguish between features
-        for left and right part of the training pairs for ranking. For example,
-        for an Example with features "a.f1" and "b.f1", the keys would be
-        ("a", "b").
+      ranking_model_pair_keys: Keys to distinguish between features for left and
+        right part of the training pairs for ranking. For example, for an
+        Example with features "a.f1" and "b.f1", the keys would be ("a", "b").
       num_trees: An int, number of trees to build.
       feature_columns: A list of feature columns.
       weight_column_name: Name of the column for weights, or None if not
@@ -703,12 +697,10 @@
       output_leaf_index: whether to output leaf indices along with predictions
         during inference. The leaf node indexes are available in predictions
         dict by the key 'leaf_index'. It is a Tensor of rank 2 and its shape is
-        [batch_size, num_trees].
-        For example,
-        result_iter = classifier.predict(...)
-        for result_dict in result_iter:
-          # access leaf index list by result_dict["leaf_index"]
-          # which contains one leaf index per tree
+        [batch_size, num_trees]. For example, result_iter =
+        classifier.predict(...)
+        for result_dict in result_iter: # access leaf index list by
+          result_dict["leaf_index"] # which contains one leaf index per tree
       num_quantiles: Number of quantiles to build for numeric feature values.
 
     Raises:
@@ -748,8 +740,7 @@
 # TODO(nponomareva): extend to take multiple quantiles in one go.
 class CoreGradientBoostedDecisionTreeQuantileRegressor(
     core_estimator.Estimator):
-  """An estimator that does quantile regression and returns quantile estimates.
-  """
+  """An estimator that does quantile regression and returns quantile estimates."""
 
   def __init__(self,
                learner_config,
@@ -775,8 +766,8 @@
         layer. It can also be a function that computes the number of examples
         based on the depth of the layer that's being built.
       quantiles: a list of quantiles for the loss, each between 0 and 1.
-      label_dimension: Dimension of regression label. This is the size
-        of the last dimension of the labels `Tensor` (typically, this has shape
+      label_dimension: Dimension of regression label. This is the size of the
+        last dimension of the labels `Tensor` (typically, this has shape
         `[batch_size, label_dimension]`). When label_dimension>1, it is
         recommended to use multiclass strategy diagonal hessian or full hessian.
       num_trees: An int, number of trees to build.
@@ -795,11 +786,11 @@
         the bias.
       output_leaf_index: whether to output leaf indices along with predictions
         during inference. The leaf node indexes are available in predictions
-        dict by the key 'leaf_index'. For example,
-        result_dict = classifier.predict(...)
-        for example_prediction_result in result_dict:
-          # access leaf index list by example_prediction_result["leaf_index"]
-          # which contains one leaf index per tree
+        dict by the key 'leaf_index'. For example, result_dict =
+        classifier.predict(...)
+        for example_prediction_result in result_dict: # access leaf index list
+          by example_prediction_result["leaf_index"] # which contains one leaf
+          index per tree
       num_quantiles: Number of quantiles to build for numeric feature values.
     """
     if len(quantiles) > 1:
@@ -814,7 +805,9 @@
           params={
               'head':
                   core_quantile_regression_head(
-                      quantiles[0], label_dimension=label_dimension),
+                      quantiles[0],
+                      label_dimension=label_dimension,
+                      weight_column=weight_column_name),
               'feature_columns':
                   feature_columns,
               'learner_config':

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
index 47d910d..5a8b2ba 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py

@@ -399,8 +399,8 @@
   def testQuantileRegression(self):
     learner_config = learner_pb2.LearnerConfig()
     learner_config.num_classes = 2
-    learner_config.constraints.max_tree_depth = 3
-    learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE
+    learner_config.constraints.max_tree_depth = 6
+    learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
     learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE
     learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE
     learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE
@@ -413,7 +413,7 @@
     model_upper = estimator.GradientBoostedDecisionTreeQuantileRegressor(
         quantiles=[0.95],
         learner_config=learner_config,
-        num_trees=100,
+        num_trees=12,
         examples_per_layer=_QUANTILE_REGRESSION_SIZE,
         center_bias=False)
 
@@ -428,31 +428,12 @@
     self.assertTrue(frac_below_upper >= 0.92)
     self.assertTrue(frac_below_upper <= 0.98)
 
-    train_input_fn, test_input_fn, _ = _quantile_regression_input_fns()
-    model_lower = estimator.GradientBoostedDecisionTreeQuantileRegressor(
-        quantiles=[0.05],
-        learner_config=learner_config,
-        num_trees=100,
-        examples_per_layer=_QUANTILE_REGRESSION_SIZE,
-        center_bias=False)
-
-    model_lower.fit(input_fn=train_input_fn, steps=1000)
-    result_iter = model_lower.predict(input_fn=test_input_fn)
-    lower = []
-    for prediction_dict in result_iter:
-      lower.append(prediction_dict["scores"])
-
-    frac_above_lower = round(1. * np.count_nonzero(lower < y) / len(y), 3)
-    # +/- 3%
-    self.assertTrue(frac_above_lower >= 0.92)
-    self.assertTrue(frac_above_lower <= 0.98)
-
   # Multi-dimensional quantile regression.
   def testQuantileRegressionMultiDimLabel(self):
     learner_config = learner_pb2.LearnerConfig()
     learner_config.num_classes = 2
-    learner_config.constraints.max_tree_depth = 3
-    learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE
+    learner_config.constraints.max_tree_depth = 6
+    learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
     learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE
     learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE
     learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE
@@ -467,7 +448,7 @@
         quantiles=[0.95],
         learner_config=learner_config,
         label_dimension=2,
-        num_trees=100,
+        num_trees=18,
         examples_per_layer=_QUANTILE_REGRESSION_SIZE,
         center_bias=False)
 
@@ -490,35 +471,6 @@
     self.assertTrue(frac_both_below_upper >= 0.91)
     self.assertTrue(frac_both_below_upper <= 0.99)
 
-    train_input_fn, test_input_fn, _ = _quantile_regression_input_fns(
-        two_dimension=True)
-    model_lower = estimator.GradientBoostedDecisionTreeQuantileRegressor(
-        quantiles=[0.05],
-        learner_config=learner_config,
-        label_dimension=2,
-        num_trees=100,
-        examples_per_layer=_QUANTILE_REGRESSION_SIZE,
-        center_bias=False)
-
-    model_lower.fit(input_fn=train_input_fn, steps=1000)
-    result_iter = model_lower.predict(input_fn=test_input_fn)
-    lower = []
-    for prediction_dict in result_iter:
-      lower.append(prediction_dict["scores"])
-
-    count_above_lower = np.count_nonzero(lower < y, axis=0)
-    count_both_aboce_lower = np.count_nonzero(np.prod(lower < y, axis=1))
-    frac_above_lower_0 = round(1. * count_above_lower[0] / len(y), 3)
-    frac_above_lower_1 = round(1. * count_above_lower[1] / len(y), 3)
-    frac_both_above_lower = round(1. * count_both_aboce_lower / len(y), 3)
-    # +/- 3%
-    self.assertTrue(frac_above_lower_0 >= 0.92)
-    self.assertTrue(frac_above_lower_0 <= 0.98)
-    self.assertTrue(frac_above_lower_1 >= 0.92)
-    self.assertTrue(frac_above_lower_1 <= 0.98)
-    self.assertTrue(frac_both_above_lower >= 0.91)
-    self.assertTrue(frac_both_above_lower <= 0.99)
-
 
 class CoreGradientBoostedDecisionTreeEstimators(test_util.TensorFlowTestCase):
 
@@ -712,11 +664,12 @@
     est.evaluate(input_fn=input_fn, steps=1)
     est.predict(input_fn=input_fn)
 
-  # One dimensional quantile regression.
-  def testQuantileRegression(self):
+  # Quantile regression in core is the same as in non core estimator, so we
+  # just check that it does not fail.
+  def testQuantileRegressionDoesNotThroughException(self):
     learner_config = learner_pb2.LearnerConfig()
     learner_config.num_classes = 2
-    learner_config.constraints.max_tree_depth = 3
+    learner_config.constraints.max_tree_depth = 1
     learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE
     learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE
     learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE
@@ -731,112 +684,12 @@
     model_upper = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor(
         quantiles=[0.95],
         learner_config=learner_config,
-        num_trees=100,
+        num_trees=1,
         examples_per_layer=_QUANTILE_REGRESSION_SIZE,
         center_bias=False)
 
     model_upper.train(input_fn=train_input_fn, steps=1000)
     result_iter = model_upper.predict(input_fn=test_input_fn)
-    upper = []
-    for prediction_dict in result_iter:
-      upper.append(prediction_dict["predictions"])
-
-    frac_below_upper = round(1. * np.count_nonzero(upper > y) / len(y), 3)
-    # +/- 3%
-    self.assertTrue(frac_below_upper >= 0.92)
-    self.assertTrue(frac_below_upper <= 0.98)
-
-    train_input_fn, test_input_fn, _ = _quantile_regression_input_fns()
-    model_lower = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor(
-        quantiles=[0.05],
-        learner_config=learner_config,
-        num_trees=100,
-        examples_per_layer=_QUANTILE_REGRESSION_SIZE,
-        center_bias=False)
-
-    model_lower.train(input_fn=train_input_fn, steps=1000)
-    result_iter = model_lower.predict(input_fn=test_input_fn)
-    lower = []
-    for prediction_dict in result_iter:
-      lower.append(prediction_dict["predictions"])
-
-    frac_above_lower = round(1. * np.count_nonzero(lower < y) / len(y), 3)
-    # +/- 3%
-    self.assertTrue(frac_above_lower >= 0.92)
-    self.assertTrue(frac_above_lower <= 0.98)
-
-  # Multi-dimensional quantile regression.
-  def testQuantileRegressionMultiDimLabel(self):
-    learner_config = learner_pb2.LearnerConfig()
-    learner_config.num_classes = 2
-    learner_config.constraints.max_tree_depth = 3
-    learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE
-    learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE
-    learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE
-    learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE
-    learner_config.regularization.tree_complexity = (
-        1.0 / _QUANTILE_REGRESSION_SIZE)
-
-    train_input_fn, test_input_fn, y = _quantile_regression_input_fns(
-        two_dimension=True)
-    y = y.reshape(_QUANTILE_REGRESSION_SIZE, 2)
-
-    # 95% percentile.
-    model_upper = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor(
-        quantiles=[0.95],
-        learner_config=learner_config,
-        num_trees=100,
-        label_dimension=2,
-        examples_per_layer=_QUANTILE_REGRESSION_SIZE,
-        center_bias=False)
-
-    model_upper.train(input_fn=train_input_fn, steps=1000)
-    result_iter = model_upper.predict(input_fn=test_input_fn)
-    upper = []
-    for prediction_dict in result_iter:
-      upper.append(prediction_dict["predictions"])
-
-    count_below_upper = np.count_nonzero(upper > y, axis=0)
-    count_both_below_upper = np.count_nonzero(np.prod(upper > y, axis=1))
-    frac_below_upper_0 = round(1. * count_below_upper[0] / len(y), 3)
-    frac_below_upper_1 = round(1. * count_below_upper[1] / len(y), 3)
-    frac_both_below_upper = round(1. * count_both_below_upper / len(y), 3)
-    # +/- 3%
-    self.assertTrue(frac_below_upper_0 >= 0.92)
-    self.assertTrue(frac_below_upper_0 <= 0.98)
-    self.assertTrue(frac_below_upper_1 >= 0.92)
-    self.assertTrue(frac_below_upper_1 <= 0.98)
-    self.assertTrue(frac_both_below_upper >= 0.91)
-    self.assertTrue(frac_both_below_upper <= 0.99)
-
-    train_input_fn, test_input_fn, _ = _quantile_regression_input_fns(
-        two_dimension=True)
-    model_lower = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor(
-        quantiles=[0.05],
-        learner_config=learner_config,
-        num_trees=100,
-        label_dimension=2,
-        examples_per_layer=_QUANTILE_REGRESSION_SIZE,
-        center_bias=False)
-
-    model_lower.train(input_fn=train_input_fn, steps=1000)
-    result_iter = model_lower.predict(input_fn=test_input_fn)
-    lower = []
-    for prediction_dict in result_iter:
-      lower.append(prediction_dict["predictions"])
-
-    count_above_lower = np.count_nonzero(lower < y, axis=0)
-    count_both_aboce_lower = np.count_nonzero(np.prod(lower < y, axis=1))
-    frac_above_lower_0 = round(1. * count_above_lower[0] / len(y), 3)
-    frac_above_lower_1 = round(1. * count_above_lower[1] / len(y), 3)
-    frac_both_above_lower = round(1. * count_both_aboce_lower / len(y), 3)
-    # +/- 3%
-    self.assertTrue(frac_above_lower_0 >= 0.92)
-    self.assertTrue(frac_above_lower_0 <= 0.98)
-    self.assertTrue(frac_above_lower_1 >= 0.92)
-    self.assertTrue(frac_above_lower_1 <= 0.98)
-    self.assertTrue(frac_both_above_lower >= 0.91)
-    self.assertTrue(frac_both_above_lower <= 0.99)
 
 
 if __name__ == "__main__":

diff --git a/tensorflow/contrib/boosted_trees/python/ops/model_ops.py b/tensorflow/contrib/boosted_trees/python/ops/model_ops.py
index c3685b5..ad6ff0a 100644
--- a/tensorflow/contrib/boosted_trees/python/ops/model_ops.py
+++ b/tensorflow/contrib/boosted_trees/python/ops/model_ops.py

@@ -33,7 +33,7 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import resources
 from tensorflow.python.training import saver
-from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.tracking import tracking
 
 ops.NotDifferentiable("TreeEnsembleVariable")
 ops.NotDifferentiable("TreeEnsembleSerialize")

diff --git a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py
index 0c319cc..aff7105 100644
--- a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py
+++ b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py

@@ -33,7 +33,7 @@
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import resources
 from tensorflow.python.training import saver
-from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.tracking import tracking
 
 # Pattern to remove all non alpha numeric from a string.
 _PATTERN = re.compile(r"[\W_]+")

diff --git a/tensorflow/contrib/boosted_trees/python/ops/stats_accumulator_ops.py b/tensorflow/contrib/boosted_trees/python/ops/stats_accumulator_ops.py
index ad1191d..2a0a206 100644
--- a/tensorflow/contrib/boosted_trees/python/ops/stats_accumulator_ops.py
+++ b/tensorflow/contrib/boosted_trees/python/ops/stats_accumulator_ops.py

@@ -26,7 +26,7 @@
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import resources
 from tensorflow.python.training import saver
-from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.tracking import tracking
 
 # Pattern to remove all non alpha numeric from a string.
 _PATTERN = re.compile(r"[\W_]+")

diff --git a/tensorflow/contrib/checkpoint/__init__.py b/tensorflow/contrib/checkpoint/__init__.py
index 99ed495..7b3df96 100644
--- a/tensorflow/contrib/checkpoint/__init__.py
+++ b/tensorflow/contrib/checkpoint/__init__.py

@@ -27,7 +27,7 @@
 @@NoDependency
 @@split_dependency
 
-Checkpointable data structures:
+Trackable data structures:
 @@List
 @@Mapping
 @@UniqueNameTracker
@@ -49,17 +49,16 @@
 from tensorflow.contrib.checkpoint.python.python_state import PythonStateWrapper
 from tensorflow.contrib.checkpoint.python.split_dependency import split_dependency
 from tensorflow.contrib.checkpoint.python.visualize import dot_graph_from_checkpoint
-from tensorflow.core.protobuf.checkpointable_object_graph_pb2 import CheckpointableObjectGraph
+from tensorflow.core.protobuf.trackable_object_graph_pb2 import TrackableObjectGraph as CheckpointableObjectGraph
 from tensorflow.python.training.checkpoint_management import CheckpointManager
-from tensorflow.python.training.checkpointable.base import Checkpointable as CheckpointableBase
-from tensorflow.python.training.checkpointable.data_structures import List
-from tensorflow.python.training.checkpointable.data_structures import Mapping
-from tensorflow.python.training.checkpointable.data_structures import NoDependency
-from tensorflow.python.training.checkpointable.tracking import AutoCheckpointable as Checkpointable
-from tensorflow.python.training.checkpointable.util import capture_dependencies
-from tensorflow.python.training.checkpointable.util import list_objects
-from tensorflow.python.training.checkpointable.util import object_metadata
-
+from tensorflow.python.training.tracking.base import Trackable as CheckpointableBase
+from tensorflow.python.training.tracking.data_structures import List
+from tensorflow.python.training.tracking.data_structures import Mapping
+from tensorflow.python.training.tracking.data_structures import NoDependency
+from tensorflow.python.training.tracking.tracking import AutoTrackable as Checkpointable
+from tensorflow.python.training.tracking.util import capture_dependencies
+from tensorflow.python.training.tracking.util import list_objects
+from tensorflow.python.training.tracking.util import object_metadata
 from tensorflow.python.util.all_util import remove_undocumented
 
 remove_undocumented(module_name=__name__)

diff --git a/tensorflow/contrib/checkpoint/python/BUILD b/tensorflow/contrib/checkpoint/python/BUILD
index 4e52932..cd9c94c 100644
--- a/tensorflow/contrib/checkpoint/python/BUILD
+++ b/tensorflow/contrib/checkpoint/python/BUILD

@@ -12,7 +12,7 @@
         ":python_state",
         ":split_dependency",
         ":visualize",
-        "//tensorflow/python/training/checkpointable:data_structures",
+        "//tensorflow/python/training/tracking:data_structures",
     ],
 )
 
@@ -22,8 +22,8 @@
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:internal"],
     deps = [
-        "//tensorflow/python/training/checkpointable:base",
-        "//tensorflow/python/training/checkpointable:data_structures",
+        "//tensorflow/python/training/tracking:base",
+        "//tensorflow/python/training/tracking:data_structures",
     ],
 )
 
@@ -36,8 +36,8 @@
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python/training/checkpointable:base",
-        "//tensorflow/python/training/checkpointable:util",
+        "//tensorflow/python/training/tracking:base",
+        "//tensorflow/python/training/tracking:util",
     ],
 )
 
@@ -47,7 +47,7 @@
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:internal"],
     deps = [
-        "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/tracking:base",
         "//third_party/py/numpy",
         "@six_archive//:six",
     ],
@@ -64,7 +64,7 @@
         "//tensorflow/python:session",
         "//tensorflow/python:variables",
         "//tensorflow/python/eager:test",
-        "//tensorflow/python/training/checkpointable:util",
+        "//tensorflow/python/training/tracking:util",
     ],
 )
 
@@ -76,7 +76,7 @@
     deps = [
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:training",
-        "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/tracking:base",
     ],
 )
 
@@ -89,8 +89,8 @@
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python/eager:test",
-        "//tensorflow/python/training/checkpointable:base",
-        "//tensorflow/python/training/checkpointable:util",
+        "//tensorflow/python/training/tracking:base",
+        "//tensorflow/python/training/tracking:util",
     ],
 )
 
@@ -101,8 +101,8 @@
     visibility = ["//tensorflow:internal"],
     deps = [
         "//tensorflow/python:pywrap_tensorflow",
-        "//tensorflow/python/training/checkpointable:base",
-        "//tensorflow/python/training/checkpointable:util",
+        "//tensorflow/python/training/tracking:base",
+        "//tensorflow/python/training/tracking:util",
     ],
 )
 
@@ -118,6 +118,7 @@
         "//tensorflow/python/eager:test",
         "//tensorflow/python/keras:engine",
         "//tensorflow/python/keras:layers",
-        "//tensorflow/python/training/checkpointable:util",
+        "//tensorflow/python/training/tracking:util",
     ],
+    tags = ["no_oss"],  # b/124472244
 )

diff --git a/tensorflow/contrib/checkpoint/python/containers.py b/tensorflow/contrib/checkpoint/python/containers.py
index 97936d9..a25d519 100644
--- a/tensorflow/contrib/checkpoint/python/containers.py
+++ b/tensorflow/contrib/checkpoint/python/containers.py

@@ -1,4 +1,4 @@
-"""Checkpointable data structures."""
+"""Trackable data structures."""
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,12 +17,12 @@
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.training.checkpointable import base as checkpointable_lib
-from tensorflow.python.training.checkpointable import data_structures
+from tensorflow.python.training.tracking import base as trackable_lib
+from tensorflow.python.training.tracking import data_structures
 
 
-class UniqueNameTracker(data_structures.CheckpointableDataStructure):
-  """Adds dependencies on checkpointable objects with name hints.
+class UniqueNameTracker(data_structures.TrackableDataStructure):
+  """Adds dependencies on trackable objects with name hints.
 
   Useful for creating dependencies with locally unique names.
 
@@ -43,30 +43,30 @@
 
   def __init__(self):
     super(UniqueNameTracker, self).__init__()
-    self._maybe_initialize_checkpointable()
+    self._maybe_initialize_trackable()
     self._name_counts = {}
 
   @property
   def _values(self):
     return [dep.ref for dep in self._checkpoint_dependencies]
 
-  def track(self, checkpointable, base_name):
-    """Add a dependency on `checkpointable`.
+  def track(self, trackable, base_name):
+    """Add a dependency on `trackable`.
 
     Args:
-      checkpointable: An object to add a checkpoint dependency on.
+      trackable: An object to add a checkpoint dependency on.
       base_name: A name hint, which is uniquified to determine the dependency
         name.
     Returns:
-      `checkpointable`, for chaining.
+      `trackable`, for chaining.
     Raises:
-      ValueError: If `checkpointable` is not a checkpointable object.
+      ValueError: If `trackable` is not a trackable object.
     """
 
-    if not isinstance(checkpointable, checkpointable_lib.Checkpointable):
+    if not isinstance(trackable, trackable_lib.Trackable):
       raise ValueError(
-          ("Expected a checkpointable value, got %s which does not inherit "
-           "from CheckpointableBase.") % (checkpointable,))
+          ("Expected a trackable value, got %s which does not inherit "
+           "from tf.track.Trackable.") % (trackable,))
 
     def _format_name(prefix, number):
       if number > 0:
@@ -80,5 +80,5 @@
       count += 1
       candidate = _format_name(base_name, count)
     self._name_counts[base_name] = count + 1
-    self._track_value(checkpointable, name=candidate)
-    return checkpointable
+    self._track_value(trackable, name=candidate)
+    return trackable

diff --git a/tensorflow/contrib/checkpoint/python/containers_test.py b/tensorflow/contrib/checkpoint/python/containers_test.py
index a2d453e..bace219 100644
--- a/tensorflow/contrib/checkpoint/python/containers_test.py
+++ b/tensorflow/contrib/checkpoint/python/containers_test.py

@@ -26,9 +26,9 @@
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.platform import test
-from tensorflow.python.training.checkpointable import data_structures
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util
+from tensorflow.python.training.tracking import data_structures
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util
 
 
 class UniqueNameTrackerTests(test.TestCase):
@@ -52,7 +52,7 @@
     save_root = util.Checkpoint(slots=slots)
     save_path = save_root.save(checkpoint_prefix)
 
-    restore_slots = tracking.AutoCheckpointable()
+    restore_slots = tracking.AutoTrackable()
     restore_root = util.Checkpoint(
         slots=restore_slots)
     status = restore_root.restore(save_path)
@@ -68,7 +68,7 @@
 
   @test_util.run_in_graph_and_eager_modes
   def testExample(self):
-    class SlotManager(tracking.AutoCheckpointable):
+    class SlotManager(tracking.AutoTrackable):
 
       def __init__(self):
         self.slotdeps = containers.UniqueNameTracker()

diff --git a/tensorflow/contrib/checkpoint/python/python_state.py b/tensorflow/contrib/checkpoint/python/python_state.py
index 969c90c..737a6c3 100644
--- a/tensorflow/contrib/checkpoint/python/python_state.py
+++ b/tensorflow/contrib/checkpoint/python/python_state.py

@@ -23,7 +23,7 @@
 
 import numpy
 
-from tensorflow.python.training.checkpointable import base
+from tensorflow.python.training.tracking import base
 
 # pylint: disable=g-import-not-at-top
 try:
@@ -34,8 +34,8 @@
 # pylint: enable=g-import-not-at-top
 
 
-class NumpyState(base.Checkpointable):
-  """A checkpointable object whose NumPy array attributes are saved/restored.
+class NumpyState(base.Trackable):
+  """A trackable object whose NumPy array attributes are saved/restored.
 
   Example usage:
 
@@ -72,7 +72,7 @@
     """Create placeholder NumPy arrays for to-be-restored attributes.
 
     Typically `_lookup_dependency` is used to check by name whether a dependency
-    exists. We cheat slightly by creating a checkpointable object for `name` if
+    exists. We cheat slightly by creating a trackable object for `name` if
     we don't already have one, giving us attribute re-creation behavior when
     loading a checkpoint.
 
@@ -85,7 +85,7 @@
     value = super(NumpyState, self)._lookup_dependency(name)
     if value is None:
       value = _NumpyWrapper(numpy.array([]))
-      new_reference = base.CheckpointableReference(name=name, ref=value)
+      new_reference = base.TrackableReference(name=name, ref=value)
       self._unconditional_checkpoint_dependencies.append(new_reference)
       self._unconditional_dependency_names[name] = value
       super(NumpyState, self).__setattr__(name, value)
@@ -101,7 +101,7 @@
   def __setattr__(self, name, value):
     """Automatically wrap NumPy arrays assigned to attributes."""
     # TODO(allenl): Consider supporting lists/tuples, either ad-hoc or by making
-    # ndarrays checkpointable natively and using standard checkpointable list
+    # ndarrays trackable natively and using standard trackable list
     # tracking.
     if isinstance(value, (numpy.ndarray, numpy.generic)):
       try:
@@ -110,19 +110,19 @@
         return
       except AttributeError:
         value = _NumpyWrapper(value)
-        self._track_checkpointable(value, name=name, overwrite=True)
+        self._track_trackable(value, name=name, overwrite=True)
     elif (name not in ("_setattr_tracking", "_update_uid")
           and getattr(self, "_setattr_tracking", True)):
-      # Mixing restore()-created attributes with user-added checkpointable
+      # Mixing restore()-created attributes with user-added trackable
       # objects is tricky, since we can't use the `_lookup_dependency` trick to
       # re-create attributes (we might accidentally steal the restoration for
-      # another checkpointable object). For now `NumpyState` objects must be
+      # another trackable object). For now `NumpyState` objects must be
       # leaf nodes. Theoretically we could add some extra arguments to
       # `_lookup_dependency` to figure out whether we should create a NumPy
       # array for the attribute or not.
       raise NotImplementedError(
           ("Assigned %s to the %s property of %s, which is not a NumPy array. "
-           "Currently mixing NumPy arrays and other checkpointable objects is "
+           "Currently mixing NumPy arrays and other trackable objects is "
            "not supported. File a feature request if this limitation bothers "
            "you.")
           % (value, name, self))
@@ -130,7 +130,7 @@
 
 
 @six.add_metaclass(abc.ABCMeta)
-class PythonStateWrapper(base.Checkpointable):
+class PythonStateWrapper(base.Trackable):
   """Wraps a Python object for storage in an object-based checkpoint."""
 
   @abc.abstractmethod

diff --git a/tensorflow/contrib/checkpoint/python/python_state_test.py b/tensorflow/contrib/checkpoint/python/python_state_test.py
index 4549435..40d8fe8 100644
--- a/tensorflow/contrib/checkpoint/python/python_state_test.py
+++ b/tensorflow/contrib/checkpoint/python/python_state_test.py

@@ -26,7 +26,7 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import variables
-from tensorflow.python.training.checkpointable import util
+from tensorflow.python.training.tracking import util
 
 
 class NumpyStateTests(test.TestCase):

diff --git a/tensorflow/contrib/checkpoint/python/split_dependency.py b/tensorflow/contrib/checkpoint/python/split_dependency.py
index 3e9700a..d7b02b5 100644
--- a/tensorflow/contrib/checkpoint/python/split_dependency.py
+++ b/tensorflow/contrib/checkpoint/python/split_dependency.py

@@ -21,7 +21,7 @@
 
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 
 
 class _CallbackSaveable(saver_lib.BaseSaverBuilder.SaveableObject):
@@ -43,7 +43,7 @@
     return self._restore_callback(tensor)
 
 
-class _SplitDependency(checkpointable.Checkpointable):
+class _SplitDependency(trackable.Trackable):
   """Looks like a regular variable while synchronizing save/restores."""
 
   def __init__(self, save_buffer, restore_buffer, name, dtype, num_components,
@@ -81,9 +81,9 @@
       return control_flow_ops.no_op()
 
   def _gather_saveables_for_checkpoint(self):
-    """Looks to Checkpointable like a regular variable."""
+    """Looks to Trackable like a regular variable."""
     return {
-        checkpointable.VARIABLE_VALUE_KEY:
+        trackable.VARIABLE_VALUE_KEY:
         functools.partial(_CallbackSaveable,
                           dtype=self._dtype,
                           save_callback=self._save,
@@ -117,7 +117,7 @@
       may return `None`).
 
   Returns:
-    A dictionary mapping from names to Checkpointable objects. If one is
+    A dictionary mapping from names to Trackable objects. If one is
     reachable from an object as a dependency, the others should be too; adding
     dependencies on some but not all of the objects will result in errors.
   """

diff --git a/tensorflow/contrib/checkpoint/python/split_dependency_test.py b/tensorflow/contrib/checkpoint/python/split_dependency_test.py
index 664a4e7..9bc0105 100644
--- a/tensorflow/contrib/checkpoint/python/split_dependency_test.py
+++ b/tensorflow/contrib/checkpoint/python/split_dependency_test.py

@@ -23,9 +23,9 @@
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.training.checkpointable import base
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util
+from tensorflow.python.training.tracking import base
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util
 
 
 def _split_variable_closure(variable):
@@ -44,7 +44,7 @@
   return _consume_restore_buffer_fn
 
 
-class SaveTensorSlicesAsDeps(base.Checkpointable):
+class SaveTensorSlicesAsDeps(base.Trackable):
 
   def __init__(self):
     self.combined = resource_variable_ops.ResourceVariable([0., 0., 0., 0.])
@@ -56,17 +56,17 @@
         consume_restore_buffer_fn=_combine_variable_closure(
             self.combined))
     for name, dep in split_dependencies.items():
-      self._track_checkpointable(dep, name=name)
+      self._track_trackable(dep, name=name)
 
 
-class HasRegularDeps(tracking.AutoCheckpointable):
+class HasRegularDeps(tracking.AutoTrackable):
 
   def __init__(self):
     self.first_half = resource_variable_ops.ResourceVariable([0., 0.])
     self.second_half = resource_variable_ops.ResourceVariable([0., 0.])
 
 
-class OnlyOneDep(tracking.AutoCheckpointable):
+class OnlyOneDep(tracking.AutoTrackable):
 
   def __init__(self):
     self.first_half = resource_variable_ops.ResourceVariable([0., 0.])

diff --git a/tensorflow/contrib/checkpoint/python/visualize.py b/tensorflow/contrib/checkpoint/python/visualize.py
index bac071c..faf90f0 100644
--- a/tensorflow/contrib/checkpoint/python/visualize.py
+++ b/tensorflow/contrib/checkpoint/python/visualize.py

@@ -18,8 +18,8 @@
 from __future__ import print_function
 
 from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.training.checkpointable import base as checkpointable
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import base as trackable
+from tensorflow.python.training.tracking import util as trackable_utils
 
 
 def dot_graph_from_checkpoint(save_path):
@@ -51,7 +51,7 @@
     A graph in DOT format as a string.
   """
   reader = pywrap_tensorflow.NewCheckpointReader(save_path)
-  object_graph = checkpointable_utils.object_metadata(save_path)
+  object_graph = trackable_utils.object_metadata(save_path)
   shape_map = reader.get_variable_to_shape_map()
   dtype_map = reader.get_variable_to_dtype_map()
   graph = 'digraph {\n'
@@ -63,7 +63,7 @@
       slot_ids.add(slot_reference.slot_variable_node_id)
   for node_id, node in enumerate(object_graph.nodes):
     if (len(node.attributes) == 1
-        and node.attributes[0].name == checkpointable.VARIABLE_VALUE_KEY):
+        and node.attributes[0].name == trackable.VARIABLE_VALUE_KEY):
       if node_id in slot_ids:
         color = 'orange'
         tooltip_prefix = 'Slot variable'

diff --git a/tensorflow/contrib/checkpoint/python/visualize_test.py b/tensorflow/contrib/checkpoint/python/visualize_test.py
index 583e3bc..98a22d5 100644
--- a/tensorflow/contrib/checkpoint/python/visualize_test.py
+++ b/tensorflow/contrib/checkpoint/python/visualize_test.py

@@ -28,7 +28,7 @@
 from tensorflow.python.keras.layers import core
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.training import adam
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import util as trackable_utils
 
 try:
   import pydot  # pylint: disable=g-import-not-at-top
@@ -57,7 +57,7 @@
       model = MyModel()
       optimizer = adam.AdamOptimizer(0.001)
       optimizer_step = resource_variable_ops.ResourceVariable(12)
-      save_checkpoint = checkpointable_utils.Checkpoint(
+      save_checkpoint = trackable_utils.Checkpoint(
           optimizer=optimizer, model=model, optimizer_step=optimizer_step)
       optimizer.minimize(functools.partial(model, input_value))
       checkpoint_directory = self.get_temp_dir()

diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake
index e570c09..793a0f3 100644
--- a/tensorflow/contrib/cmake/external/grpc.cmake
+++ b/tensorflow/contrib/cmake/external/grpc.cmake

@@ -17,7 +17,7 @@
 set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include)
 set(GRPC_URL https://github.com/grpc/grpc.git)
 set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc)
-set(GRPC_TAG 69b6c047bc767b4d80e7af4d00ccb7c45b683dae)
+set(GRPC_TAG 69b2d570428623d2d9097ec9e21b3e0a02268ab9)
 
 if(WIN32)
   # We use unsecure gRPC because boringssl does not build on windows

diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
index 21ae9a0..3d86ab9 100644
--- a/tensorflow/contrib/cmake/python_modules.txt
+++ b/tensorflow/contrib/cmake/python_modules.txt

@@ -13,6 +13,7 @@
 tensorflow/core/lib/core
 tensorflow/core/profiler
 tensorflow/core/protobuf
+tensorflow/core/protobuf/tpu
 tensorflow/core/util
 tensorflow/examples
 tensorflow/examples/tutorials
@@ -71,7 +72,7 @@
 tensorflow/python/tools/api
 tensorflow/python/tools/api/generator
 tensorflow/python/training
-tensorflow/python/training/checkpointable
+tensorflow/python/training/tracking
 tensorflow/python/user_ops
 tensorflow/python/util
 tensorflow/python/util/protobuf
@@ -437,7 +438,6 @@
 tensorflow/contrib/tpu
 tensorflow/contrib/tpu/ops
 tensorflow/contrib/tpu/profiler
-tensorflow/contrib/tpu/proto
 tensorflow/contrib/tpu/python
 tensorflow/contrib/tpu/python/ops
 tensorflow/contrib/tpu/python/profiler

diff --git a/tensorflow/contrib/cmake/python_protos.txt b/tensorflow/contrib/cmake/python_protos.txt
index 013180c..b460320 100644
--- a/tensorflow/contrib/cmake/python_protos.txt
+++ b/tensorflow/contrib/cmake/python_protos.txt

@@ -1,6 +1,7 @@
 tensorflow/core
 tensorflow/core/kernels/boosted_trees
 tensorflow/core/profiler
+tensorflow/core/protobuf/tpu
 tensorflow/python
 tensorflow/contrib/boosted_trees/proto
 tensorflow/contrib/cloud/kernels
@@ -12,7 +13,6 @@
 tensorflow/contrib/session_bundle
 tensorflow/contrib/tensor_forest/proto
 tensorflow/contrib/tensorboard/plugins/projector
-tensorflow/contrib/tpu/proto
 tensorflow/contrib/tpu/profiler
 tensorflow/contrib/training/python/training
 tensorflow/contrib/verbs

diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
index d8d1cc3..cc263d7 100644
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake

@@ -125,9 +125,9 @@
 
 file(GLOB_RECURSE tf_protos_cc_srcs RELATIVE ${tensorflow_source_dir}
     "${tensorflow_source_dir}/tensorflow/core/*.proto"
+    "${tensorflow_source_dir}/tensorflow/core/protobuf/tpu/*.proto"
     "${tensorflow_source_dir}/tensorflow/compiler/xla/*.proto"
     "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tpu/proto/*.proto"
 )
 
 RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS

diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py
index ca92c31..403f309 100644
--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py

@@ -58,7 +58,7 @@
 from tensorflow.python.training import momentum
 from tensorflow.python.training import rmsprop
 from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import util as trackable_utils
 
 
 CUDNN_LSTM = cudnn_rnn_ops.CUDNN_LSTM
@@ -709,7 +709,7 @@
     self._TestSaveRestoreHelper(CUDNN_RNN_RELU)
 
 
-class CudnnRNNTestSaveRestoreCheckpointable(test_util.TensorFlowTestCase):
+class CudnnRNNTestSaveRestoreTrackable(test_util.TensorFlowTestCase):
 
   def _VerifyCheckpoint(
       self, checkpoint_path, compatible_cell_fn, cudnn_cell_fn,
@@ -718,7 +718,7 @@
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     with ops.device("gpu:0"):
       cudnn_layer = cudnn_cell_fn()
-      cudnn_checkpoint = checkpointable_utils.Checkpoint(cell=cudnn_layer)
+      cudnn_checkpoint = trackable_utils.Checkpoint(cell=cudnn_layer)
       status = cudnn_checkpoint.restore(checkpoint_path)
       inputs = 3. * array_ops.ones([num_applications, num_layers, input_size],
                                    dtype=dtypes.float32)
@@ -726,7 +726,7 @@
       status.run_restore_ops()
     second_save_path = cudnn_checkpoint.save(checkpoint_prefix)
     restore_layer = compatible_cell_fn()
-    restore_layer_checkpoint = checkpointable_utils.Checkpoint(
+    restore_layer_checkpoint = trackable_utils.Checkpoint(
         cell=restore_layer)
     status = restore_layer_checkpoint.restore(second_save_path)
     current_state = restore_layer.zero_state(1, dtypes.float32)
@@ -742,7 +742,7 @@
     self.assertAllClose(self.evaluate(restore_layer_output),
                         self.evaluate(cudnn_output)[-1, -1:, ...])
 
-  def _CheckpointableSingleCellUnidirectionalTestTemplate(
+  def _TrackableSingleCellUnidirectionalTestTemplate(
       self, single_cell_fn, cudnn_cell_fn):
     # Single-layer cuDNN cells with object-based checkpointing should be
     # checkpoint compatible with either single CudnnCompatible cells or
@@ -759,7 +759,7 @@
       value = np.random.normal(size=variable.shape)
       expected_values.append(value)
       self.evaluate(variable.assign(value))
-    save_checkpoint = checkpointable_utils.Checkpoint(cell=save_cell_layer)
+    save_checkpoint = trackable_utils.Checkpoint(cell=save_cell_layer)
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     first_save_path = save_checkpoint.save(checkpoint_prefix)
@@ -775,10 +775,10 @@
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
   @test_util.run_in_graph_and_eager_modes
-  def testLSTMCheckpointableSingleLayer(self):
+  def testLSTMTrackableSingleLayer(self):
     num_units = 2
     direction = CUDNN_RNN_UNIDIRECTION
-    self._CheckpointableSingleCellUnidirectionalTestTemplate(
+    self._TrackableSingleCellUnidirectionalTestTemplate(
         single_cell_fn=functools.partial(
             cudnn_rnn_ops.CudnnCompatibleLSTMCell, num_units=num_units),
         cudnn_cell_fn=functools.partial(
@@ -788,19 +788,19 @@
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
   @test_util.run_in_graph_and_eager_modes
-  def testGRUCheckpointableSingleLayer(self):
+  def testGRUTrackableSingleLayer(self):
     num_units = 2
     direction = CUDNN_RNN_UNIDIRECTION
     with self.assertRaises(NotImplementedError):
       # TODO(allenl): Implement object-based saving for GRUs and other cells.
-      self._CheckpointableSingleCellUnidirectionalTestTemplate(
+      self._TrackableSingleCellUnidirectionalTestTemplate(
           single_cell_fn=functools.partial(
               cudnn_rnn_ops.CudnnCompatibleGRUCell, num_units=num_units),
           cudnn_cell_fn=functools.partial(
               cudnn_rnn.CudnnGRU, num_layers=1, num_units=num_units,
               direction=direction, name="awesome_gru"))
 
-  def _CheckpointableMultiLayerTestTemplate(
+  def _TrackableMultiLayerTestTemplate(
       self, single_cell_fn, cudnn_cell_fn, num_layers):
 
     def _MultiCellFn():
@@ -819,7 +819,7 @@
         value = np.random.normal(size=variable.shape)
         expected_values.append(value)
         self.evaluate(variable.assign(value))
-      save_checkpoint = checkpointable_utils.Checkpoint(cell=save_layer)
+      save_checkpoint = trackable_utils.Checkpoint(cell=save_layer)
       checkpoint_directory = self.get_temp_dir()
       checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
       first_save_path = save_checkpoint.save(checkpoint_prefix)
@@ -837,7 +837,7 @@
     num_units = 2
     num_layers = 3
     direction = CUDNN_RNN_UNIDIRECTION
-    self._CheckpointableMultiLayerTestTemplate(
+    self._TrackableMultiLayerTestTemplate(
         single_cell_fn=functools.partial(
             cudnn_rnn_ops.CudnnCompatibleLSTMCell, num_units=num_units),
         cudnn_cell_fn=functools.partial(

diff --git a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py
index 86ad8ae..1cb4777 100644
--- a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py
+++ b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py

@@ -518,8 +518,8 @@
         direction=self.direction,
         scope=vs.get_variable_scope(),
         name="%s_saveable" % self.trainable_variables[0].name.split(":")[0])
-    self._saveable._add_checkpointable_dependencies(  # pylint: disable=protected-access
-        checkpointable=self, dtype=self._plain_dtype)
+    self._saveable._add_trackable_dependencies(  # pylint: disable=protected-access
+        trackable=self, dtype=self._plain_dtype)
     ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, self._saveable)
 
 

diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
index f36e8d5..7d848e2 100644
--- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
+++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py

@@ -33,7 +33,7 @@
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.training import saver
-from tensorflow.python.training.checkpointable import tracking as checkpointable_lib
+from tensorflow.python.training.tracking import tracking as trackable_lib
 
 CUDNN_RNN_UNIDIRECTION = "unidirectional"
 CUDNN_RNN_BIDIRECTION = "bidirectional"
@@ -737,13 +737,13 @@
     return state_ops.assign(
         self._variables, opaque_params, validate_shape=False)
 
-  def _checkpointable_save(self, save_buffer):
+  def _trackable_save(self, save_buffer):
     weights, biases = self.format_converter.opaque_to_tf_canonical(
         self._variables)
     for name, tensor in zip(self._param_names, weights + biases):
       save_buffer[name] = array_ops.identity(tensor)
 
-  def _checkpointable_restore(self, restore_buffer):
+  def _trackable_restore(self, restore_buffer):
     tensors = [
         array_ops.identity(restore_buffer[name]) for name in self._param_names
     ]
@@ -752,26 +752,26 @@
         restored_shapes=None  # Unused
     )
 
-  def _add_checkpointable_dependencies(self, checkpointable, dtype):
-    """Add canonical weight dependencies to `checkpointable`.
+  def _add_trackable_dependencies(self, trackable, dtype):
+    """Add canonical weight dependencies to `trackable`.
 
     When saving or restoring, converts to or from the opaque buffer
     format. Weights are saved and loaded in the configuration expected by
     cuDNN-compatible cells.
 
     Args:
-      checkpointable: An object inheriting from `CheckpointableBase` to add
+      trackable: An object inheriting from `Trackable` to add
         dependencies too (typically the cuDNN `Layer`).
       dtype: The dtype for the canonical parameter Tensors.
     """
     split_dependencies = split_dependency.split_dependency(
         component_names=self._param_names,
         component_dtypes=(dtype,) * len(self._param_names),
-        fill_save_buffer_fn=self._checkpointable_save,
-        consume_restore_buffer_fn=self._checkpointable_restore)
-    self._checkpointable_track_params(checkpointable, split_dependencies)
+        fill_save_buffer_fn=self._trackable_save,
+        consume_restore_buffer_fn=self._trackable_restore)
+    self._trackable_track_params(trackable, split_dependencies)
 
-  def _checkpointable_track_params(self, checkpointable, params):
+  def _trackable_track_params(self, trackable, params):
     """Tracks parameters in a canonical configuration."""
     return  # NotImplementedError raised by the Layer.
 
@@ -819,7 +819,7 @@
     tf_weights_names.append(prefix + "/kernel")
     tf_bias_names.append(prefix + "/bias")
 
-  def _checkpointable_track_params(self, checkpointable, params):
+  def _trackable_track_params(self, trackable, params):
     """Track parameters for compatibility with CudnnCompatibleLSTMCell."""
     biases = []
     weights = []
@@ -833,12 +833,12 @@
       # wrapping.
       kernel, = weights  # pylint: disable=unbalanced-tuple-unpacking
       bias, = biases  # pylint: disable=unbalanced-tuple-unpacking
-      checkpointable._track_checkpointable(kernel, name="kernel")  # pylint: disable=protected-access
-      checkpointable._track_checkpointable(bias, name="bias")  # pylint: disable=protected-access
+      trackable._track_trackable(kernel, name="kernel")  # pylint: disable=protected-access
+      trackable._track_trackable(bias, name="bias")  # pylint: disable=protected-access
     assert len(biases) == len(weights)
     for cell_index, (bias, kernel) in enumerate(zip(biases, weights)):
-      cell = checkpointable_lib.AutoCheckpointable()
-      checkpointable._track_checkpointable(cell, name="cell-%d" % cell_index)  # pylint: disable=protected-access
+      cell = trackable_lib.AutoTrackable()
+      trackable._track_trackable(cell, name="cell-%d" % cell_index)  # pylint: disable=protected-access
       cell.bias = bias
       cell.kernel = kernel
 

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index f4c9e00..6387996 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD

@@ -136,8 +136,7 @@
     srcs = ["one_device_strategy_test.py"],
     additional_deps = [
         ":strategy_test_lib",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python/distribute:one_device_strategy",
+        ":combinations",
         "//tensorflow/python/eager:test",
     ],
 )
@@ -539,6 +538,7 @@
     srcs = [
         "keras_backward_compat_test.py",
         "keras_test.py",
+        "keras_utils_test.py",
     ],
     deps = [
         ":combinations",
@@ -571,6 +571,24 @@
     ],
 )
 
+distribute_py_test(
+    name = "keras_utils_test",
+    srcs = ["keras_utils_test.py"],
+    full_precision = True,
+    main = "keras_utils_test.py",
+    shard_count = 32,
+    tags = [
+        "multi_and_single_gpu",
+        "no_oss",  # TODO(b/117919883): Fix python error.
+        "no_windows_gpu",
+        "notsan",
+    ],
+    deps = [
+        ":keras_test",
+        ":keras_test_lib",
+    ],
+)
+
 # TODO(b/121200287): Remove this in 2.0
 distribute_py_test(
     name = "keras_backward_compat_test",
@@ -782,6 +800,6 @@
         ":tpu_strategy",
         "//tensorflow/compiler/tests:xla_test",
         "//tensorflow/python/eager:test",
-        "//tensorflow/python/training/checkpointable:util",
+        "//tensorflow/python/training/tracking:util",
     ],
 )

diff --git a/tensorflow/contrib/distribute/python/checkpointing_test.py b/tensorflow/contrib/distribute/python/checkpointing_test.py
index aa5b9f5..eadf723 100644
--- a/tensorflow/contrib/distribute/python/checkpointing_test.py
+++ b/tensorflow/contrib/distribute/python/checkpointing_test.py

@@ -30,15 +30,15 @@
 from tensorflow.python.training import adam as adam_v1
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import training_util
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util as trackable_utils
 
 
-class NonLayerCheckpointable(tracking.AutoCheckpointable):
+class NonLayerTrackable(tracking.AutoTrackable):
 
   def __init__(self):
-    super(NonLayerCheckpointable, self).__init__()
-    self.a_variable = checkpointable_utils.add_variable(
+    super(NonLayerTrackable, self).__init__()
+    self.a_variable = trackable_utils.add_variable(
         self, name="a_variable", shape=[])
 
 
@@ -49,8 +49,8 @@
     super(Subclassed, self).__init__()
     self._named_dense = core.Dense(1, use_bias=True)
     self._second = core.Dense(1, use_bias=False)
-    # We can still track Checkpointables which aren't Layers.
-    self._non_layer = NonLayerCheckpointable()
+    # We can still track Trackables which aren't Layers.
+    self._non_layer = NonLayerTrackable()
 
   def call(self, values):
     ret = self._second(self._named_dense(values))
@@ -76,7 +76,7 @@
       with strategy.scope():
         model = Subclassed()
         optimizer = adam_v1.AdamOptimizer(0.001)
-        root = checkpointable_utils.Checkpoint(
+        root = trackable_utils.Checkpoint(
             optimizer=optimizer, model=model,
             optimizer_step=training_util.get_or_create_global_step())
         root.restore(checkpoint_management.latest_checkpoint(

diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
index acbe467..ee7640d 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py

@@ -410,6 +410,7 @@
         num_gpus=num_gpus,
         use_core_strategy=use_core_strategy)
 
+  # TODO(b/124344198): Re-enable after fixing this flaky test.
   # TODO(yuefengz): Update how we use num_gpus and required_gpus
   @combinations.generate(
       combinations.combine(
@@ -418,7 +419,8 @@
           required_gpus=1,
           use_dataset=[True, False],
           use_core_strategy=[True, False]))
-  def testMakeInputFnIterator(self, num_gpus, use_dataset, use_core_strategy):
+  def DISABLED_testMakeInputFnIterator(self, num_gpus, use_dataset,
+                                       use_core_strategy):
     if context.num_gpus() < num_gpus:
       self.skipTest('Not enough GPUs')
     if use_dataset:
@@ -553,7 +555,7 @@
           required_gpus=2,
           use_dataset=[True, False],
           use_core_strategy=[True, False]))
-  def testMakeInputFnIterator(self, use_dataset, use_core_strategy):
+  def DISABLED_testMakeInputFnIterator(self, use_dataset, use_core_strategy):
     num_gpus = 2
     if use_dataset:
       fn = lambda: dataset_ops.Dataset.range(5 * num_gpus)

diff --git a/tensorflow/contrib/distribute/python/cross_device_ops_test.py b/tensorflow/contrib/distribute/python/cross_device_ops_test.py
index 54cce29..2d56c25 100644
--- a/tensorflow/contrib/distribute/python/cross_device_ops_test.py
+++ b/tensorflow/contrib/distribute/python/cross_device_ops_test.py

@@ -204,15 +204,15 @@
   reduction_to_one_combinations = combinations.combine(
       cross_device_ops=[
           combinations.NamedObject(
-              "DefaultReductionToOneDeviceCrossDeviceOps",
-              cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps()),
+              "DefaultReductionToOneDevice",
+              cross_device_ops_lib.ReductionToOneDevice()),
           combinations.NamedObject(
               "ReductionToCPUDeviceCrossDeviceOps",
-              cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps(
+              cross_device_ops_lib.ReductionToOneDevice(
                   reduce_to_device=_cpu_device)),
           combinations.NamedObject(
               "AccumulateNCrossDeviceOp",
-              cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps(
+              cross_device_ops_lib.ReductionToOneDevice(
                   accumulation_fn=math_ops.accumulate_n)),
       ],
       distribution=[
@@ -229,19 +229,22 @@
               "AllReduce",
               cross_device_ops_lib.AllReduceCrossDeviceOps("nccl", 1, 0, 0)),
           combinations.NamedObject(
-              "HierarchicalCopy",
-              cross_device_ops_lib.AllReduceCrossDeviceOps(
-                  "hierarchical_copy", 8, 0, 0)),
-          combinations.NamedObject(
               "AllReduceNoGradientRepacking",
               cross_device_ops_lib.AllReduceCrossDeviceOps("nccl", 0, 0, 0)),
+          combinations.NamedObject("NcclAllReduce",
+                                   cross_device_ops_lib.NcclAllReduce()),
+          combinations.NamedObject(
+              "HierarchicalCopy",
+              cross_device_ops_lib.HierarchicalCopyAllReduce(8)),
           combinations.NamedObject(
               "HierarchicalCopyAggregateSmallTensors",
               cross_device_ops_lib.AllReduceCrossDeviceOps(
                   "hierarchical_copy", 0, 100, 10))
       ],
-      distribution=[combinations.mirrored_strategy_with_two_gpus,
-                    combinations.core_mirrored_strategy_with_two_gpus],
+      distribution=[
+          combinations.mirrored_strategy_with_two_gpus,
+          combinations.core_mirrored_strategy_with_two_gpus
+      ],
       mode=["graph", "eager"])
 
   @combinations.generate(reduction_to_one_combinations + allreduce_combinations)
@@ -306,8 +309,8 @@
       combinations.combine(
           cross_device_ops_instance=[
               combinations.NamedObject(
-                  "ReductionToOneDeviceCrossDeviceOps",
-                  cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps()),
+                  "ReductionToOneDevice",
+                  cross_device_ops_lib.ReductionToOneDevice()),
               combinations.NamedObject(
                   "AllReduceCrossDeviceOps",
                   cross_device_ops_lib.AllReduceCrossDeviceOps())

diff --git a/tensorflow/contrib/distribute/python/input_lib_test.py b/tensorflow/contrib/distribute/python/input_lib_test.py
index 10a5831..204f52b 100644
--- a/tensorflow/contrib/distribute/python/input_lib_test.py
+++ b/tensorflow/contrib/distribute/python/input_lib_test.py

@@ -22,7 +22,6 @@
 
 from tensorflow.contrib.distribute.python import combinations
 from tensorflow.contrib.distribute.python import multi_worker_test_base
-from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.distribute import distribute_lib
 from tensorflow.python.distribute import input_lib
@@ -214,33 +213,5 @@
                           expected_values, sess)
 
 
-class SplitDatasetBatchTest(test.TestCase):
-
-  def testBatchDataset(self):
-    dataset = dataset_ops.Dataset.range(100).batch(20)
-    split_batch_by = 2
-    result_dataset = input_lib._split_dataset_batch(dataset, split_batch_by)
-    expected_values = [range(i, i+10) for i in range(0, 100, 10)]
-    result = [self.evaluate(el) for el in result_dataset]
-    self.assertAllEqual(expected_values, result)
-
-  def testMapAndBatchDataset(self):
-    dataset = dataset_ops.Dataset.range(100)
-    dataset = dataset.apply(batching.map_and_batch(lambda x: x, 20))
-    split_batch_by = 2
-    result_dataset = input_lib._split_dataset_batch(dataset, split_batch_by)
-    expected_values = [range(i, i+10) for i in range(0, 100, 10)]
-    result = [self.evaluate(el) for el in result_dataset]
-    self.assertAllEqual(expected_values, result)
-
-  def testPrefetchDataset(self):
-    dataset = dataset_ops.Dataset.range(100).batch(20).prefetch(1)
-    split_batch_by = 2
-    result_dataset = input_lib._split_dataset_batch(dataset, split_batch_by)
-    expected_values = [range(i, i+10) for i in range(0, 100, 10)]
-    result = [self.evaluate(el) for el in result_dataset]
-    self.assertAllEqual(expected_values, result)
-
-
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/contrib/distribute/python/keras_backward_compat_test.py b/tensorflow/contrib/distribute/python/keras_backward_compat_test.py
index 9a581e7..c49b552 100644
--- a/tensorflow/contrib/distribute/python/keras_backward_compat_test.py
+++ b/tensorflow/contrib/distribute/python/keras_backward_compat_test.py

@@ -31,10 +31,10 @@
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import distributed_training_utils
 from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_keras
+from tensorflow.python.keras.utils.mode_keys import ModeKeys
 from tensorflow.python.ops.parsing_ops import gen_parsing_ops
 from tensorflow.python.training import gradient_descent
 from tensorflow.python.training import rmsprop
-from tensorflow.python.training.mode_keys import ModeKeys
 
 _RANDOM_SEED = 1337
 _TRAIN_SIZE = 200

diff --git a/tensorflow/contrib/distribute/python/keras_dnn_correctness_test.py b/tensorflow/contrib/distribute/python/keras_dnn_correctness_test.py
index dae3218..61202e3 100644
--- a/tensorflow/contrib/distribute/python/keras_dnn_correctness_test.py
+++ b/tensorflow/contrib/distribute/python/keras_dnn_correctness_test.py

@@ -47,7 +47,9 @@
       # We add few non-linear layers to make it non-trivial.
       model = keras.Sequential()
       model.add(keras.layers.Dense(10, activation='relu', input_shape=(1,)))
-      model.add(keras.layers.Dense(10, activation='relu'))
+      model.add(keras.layers.Dense(
+          10, activation='relu',
+          kernel_regularizer=keras.regularizers.l2(1e-4)))
       model.add(keras.layers.Dense(10, activation='relu'))
       model.add(keras.layers.Dense(1))
 

diff --git a/tensorflow/contrib/distribute/python/keras_image_model_correctness_test.py b/tensorflow/contrib/distribute/python/keras_image_model_correctness_test.py
index f625664..3c29614 100644
--- a/tensorflow/contrib/distribute/python/keras_image_model_correctness_test.py
+++ b/tensorflow/contrib/distribute/python/keras_image_model_correctness_test.py

@@ -23,7 +23,7 @@
 from tensorflow.contrib.distribute.python import keras_correctness_test_base
 from tensorflow.python import keras
 from tensorflow.python.eager import test
-from tensorflow.python.training import gradient_descent
+from tensorflow.python.keras.optimizer_v2 import gradient_descent
 
 
 class DistributionStrategyCnnCorrectnessTest(
@@ -33,7 +33,8 @@
     with keras_correctness_test_base.MaybeDistributionScope(distribution):
       image = keras.layers.Input(shape=(28, 28, 3), name='image')
       c1 = keras.layers.Conv2D(
-          name='conv1', filters=16, kernel_size=(3, 3), strides=(4, 4))(
+          name='conv1', filters=16, kernel_size=(3, 3), strides=(4, 4),
+          kernel_regularizer=keras.regularizers.l2(1e-4))(
               image)
       if self.with_batch_norm:
         c1 = keras.layers.BatchNormalization(name='bn1')(c1)
@@ -47,7 +48,7 @@
         model.set_weights(initial_weights)
 
       model.compile(
-          optimizer=gradient_descent.GradientDescentOptimizer(
+          optimizer=gradient_descent.SGD(
               learning_rate=0.1),
           loss='sparse_categorical_crossentropy',
           metrics=['sparse_categorical_accuracy'])

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 6a15083..77e2419 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py

@@ -17,9 +17,7 @@
 from __future__ import division
 from __future__ import print_function
 
-import collections
 import os
-import tempfile
 from absl.testing import parameterized
 import numpy as np
 
@@ -27,17 +25,17 @@
 from tensorflow.contrib.distribute.python import mirrored_strategy
 from tensorflow.contrib.distribute.python import tpu_strategy
 from tensorflow.python import keras
+from tensorflow.python.data.experimental.ops import cardinality
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import values
 from tensorflow.python.eager import test
 from tensorflow.python.estimator import keras as keras_lib
 from tensorflow.python.estimator import run_config as run_config_lib
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import distributed_training_utils
 from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_keras
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.parsing_ops import gen_parsing_ops
 from tensorflow.python.platform import gfile
 from tensorflow.python.summary.writer import writer_cache
@@ -72,6 +70,20 @@
   return model
 
 
+def simple_subclassed_model(num_labels=_NUM_CLASS):
+
+  class _SimpleMLP(keras.Model):
+
+    def __init__(self, num_labels):
+      super(_SimpleMLP, self).__init__()
+      self.dense = keras.layers.Dense(num_labels)
+
+    def call(self, inputs):
+      return self.dense(inputs)
+
+  return _SimpleMLP(num_labels)
+
+
 def simple_multi_inputs_multi_outputs_model():
   input_a = keras.layers.Input(shape=(16,), name='input_a')
   input_b = keras.layers.Input(shape=(16,), name='input_b')
@@ -216,6 +228,22 @@
   return dataset
 
 
+def convert_numpy_to_dataset_with_unknown_cardinality(inputs,
+                                                      targets=None):
+  if targets is not None:
+    input_slices = (inputs, targets)
+    dummy_op = (lambda inp, target: True)
+  else:
+    input_slices = inputs
+    dummy_op = (lambda inp: True)
+
+  original_dataset = (dataset_ops.Dataset.from_tensor_slices(
+      input_slices))
+  ds_with_unknown_cardinality = (original_dataset.filter(dummy_op).
+                                 batch(10, drop_remainder=True))
+  return ds_with_unknown_cardinality
+
+
 def multi_input_output_model():
   a = keras.layers.Input(shape=(3,), name='input_a')
   b = keras.layers.Input(shape=(5,), name='input_b')
@@ -235,6 +263,7 @@
 strategies_minus_tpu = [
     combinations.default_strategy,
     combinations.one_device_strategy,
+    combinations.one_device_strategy_gpu,
     combinations.mirrored_strategy_with_gpu_and_cpu,
     combinations.mirrored_strategy_with_two_gpus,
     combinations.core_mirrored_strategy_with_gpu_and_cpu,
@@ -263,6 +292,7 @@
   strategy_minus_default_combinations = combinations.combine(
       distribution=[
           combinations.one_device_strategy,
+          combinations.one_device_strategy_gpu,
           combinations.mirrored_strategy_with_gpu_and_cpu,
           combinations.mirrored_strategy_with_two_gpus,
           combinations.core_mirrored_strategy_with_gpu_and_cpu,
@@ -286,12 +316,6 @@
       ]))
 
 
-def strategy_for_numpy_input_combinations():
-  one_gpu = combinations.one_device_strategy_gpu
-  return (all_strategy_combinations() +
-          combinations.combine(distribution=[one_gpu], mode=['graph', 'eager']))
-
-
 class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase,
                                         parameterized.TestCase):
 
@@ -447,7 +471,7 @@
 class TestDistributionStrategyWithNumpyArrays(test.TestCase,
                                               parameterized.TestCase):
 
-  @combinations.generate(strategy_for_numpy_input_combinations())
+  @combinations.generate(all_strategy_combinations())
   def test_calculating_input_params_no_steps_no_batch_size(self, distribution):
     # Calculate the per_replica_batch_size scaling factor for strategies
     # that use per_core_batch_size
@@ -478,7 +502,7 @@
         distributed_training_utils.get_input_params(
             distribution, input_63_samples, steps=None, batch_size=None)
 
-  @combinations.generate(strategy_for_numpy_input_combinations())
+  @combinations.generate(all_strategy_combinations())
   def test_calculating_input_params_with_steps_no_batch_size(self,
                                                              distribution):
     # Calculate the per_replica_batch_size scaling factor for strategies
@@ -524,7 +548,7 @@
           distributed_training_utils.get_input_params(
               distribution, input_63_samples, steps=1, batch_size=None)
 
-  @combinations.generate(strategy_for_numpy_input_combinations())
+  @combinations.generate(all_strategy_combinations())
   def test_calculating_input_params_no_steps_with_batch_size(self,
                                                              distribution):
     # Calculate the per_replica_batch_size scaling factor for strategies
@@ -558,7 +582,7 @@
         distributed_training_utils.get_input_params(
             distribution, input_64_samples, steps=None, batch_size=3)
 
-  @combinations.generate(strategy_for_numpy_input_combinations())
+  @combinations.generate(all_strategy_combinations())
   def test_calculating_input_params_with_steps_with_batch_size(self,
                                                                distribution):
     with self.cached_session():
@@ -575,7 +599,7 @@
         distributed_training_utils.get_input_params(
             distribution, input_64_samples, steps=10, batch_size=13)
 
-  @combinations.generate(strategy_for_numpy_input_combinations())
+  @combinations.generate(all_strategy_combinations())
   def test_calling_model_with_numpy_arrays(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -606,7 +630,7 @@
         # with batch_size
         model.predict(inputs, batch_size=8)
 
-  @combinations.generate(strategy_for_numpy_input_combinations())
+  @combinations.generate(all_strategy_combinations())
   def test_calling_model_with_nested_numpy_arrays(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -658,7 +682,7 @@
       model.fit(inputs, targets, sample_weight=sample_weights, epochs=1,
                 steps_per_epoch=2, verbose=1)
 
-  @combinations.generate(strategy_for_numpy_input_combinations())
+  @combinations.generate(all_strategy_combinations())
   def test_flatten_predict_outputs(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -841,9 +865,7 @@
 
       model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1)
 
-  # TODO(b/122743976): Include TPUStrategy for this test as well once
-  # step inference is supported.
-  @combinations.generate(strategy_minus_tpu_combinations())
+  @combinations.generate(all_strategy_combinations())
   def test_fit_eval_and_predict_methods_on_dataset_without_steps(
       self, distribution):
     with self.cached_session():
@@ -864,7 +886,7 @@
       predict_with_numpy = model.predict(inputs, batch_size=10)
 
       dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
-      dataset = dataset.batch(10)
+      dataset = dataset.batch(10, drop_remainder=True)
       fit_with_ds = model.fit(dataset, epochs=1).history
       eval_with_ds = model.evaluate(dataset)
       predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs)
@@ -878,6 +900,61 @@
           predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4)
 
   @combinations.generate(all_strategy_combinations())
+  def test_on_dataset_with_unknown_cardinality_without_steps(
+      self, distribution):
+    with self.cached_session():
+      with distribution.scope():
+        model = get_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+        loss = 'mse'
+        metrics = ['mae', keras.metrics.CategoricalAccuracy()]
+        model.compile(optimizer, loss, metrics=metrics)
+
+      inputs = np.zeros((1000, 3), dtype=np.float32)
+      targets = np.zeros((1000, 4), dtype=np.float32)
+      # steps/steps_per_epoch are calculated when using numpy arrays as
+      # input data.
+      fit_with_numpy = model.fit(inputs, targets, epochs=1,
+                                 batch_size=10).history
+      fit_with_numpy_multiple_epochs = model.fit(
+          inputs, targets, epochs=2, batch_size=10).history
+      eval_with_numpy = model.evaluate(inputs, targets, batch_size=10)
+      predict_with_numpy = model.predict(inputs, batch_size=10)
+
+      dataset = convert_numpy_to_dataset_with_unknown_cardinality(
+          inputs, targets)
+      predict_dataset = convert_numpy_to_dataset_with_unknown_cardinality(
+          inputs)
+
+      self.assertEqual(keras.backend.get_value(cardinality.cardinality(
+          dataset)), cardinality.UNKNOWN)
+      self.assertEqual(keras.backend.get_value(cardinality.cardinality(
+          predict_dataset)), cardinality.UNKNOWN)
+
+      eval_with_ds = model.evaluate(dataset)
+      predict_with_ds = model.predict(predict_dataset)
+      self.assertAllClose(
+          eval_with_numpy, eval_with_ds, atol=1e-4, rtol=1e-4)
+      self.assertAllClose(
+          predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4)
+
+      if (distributed_training_utils.is_tpu_strategy(distribution) and
+          distribution.extended.steps_per_run != 1):
+        with self.assertRaisesRegexp(ValueError, '`steps_per_epoch` '
+                                     'should be specified'):
+          fit_with_ds = model.fit(dataset, epochs=1)
+      else:
+        fit_with_ds = model.fit(dataset,
+                                epochs=1).history
+        fit_with_ds_multiple_epochs = model.fit(dataset,
+                                                epochs=2).history
+        self.assertAllClose(
+            fit_with_numpy, fit_with_ds, atol=1e-4, rtol=1e-4)
+        self.assertAllClose(
+            fit_with_numpy_multiple_epochs,
+            fit_with_ds_multiple_epochs, atol=1e-4, rtol=1e-4)
+
+  @combinations.generate(all_strategy_combinations())
   def test_fit_eval_and_predict_methods_on_dataset(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -1123,397 +1200,126 @@
           atol=1e-4, rtol=1e-4)
 
 
-class Counter(keras.callbacks.Callback):
-  """Counts the number of times each callback method was run.
+class TestRegularizerLoss(test.TestCase, parameterized.TestCase):
+  class IdentityRegularizer(keras.regularizers.Regularizer):
 
-  Attributes:
-    method_counts: dict. Contains the counts of time  each callback method was
-      run.
-  """
+    def __call__(self, x):
+      return array_ops.identity(x)
 
-  def __init__(self):
-    self.method_counts = collections.defaultdict(int)
-    methods_to_count = [
-        'on_batch_begin', 'on_batch_end', 'on_epoch_begin', 'on_epoch_end',
-        'on_predict_batch_begin', 'on_predict_batch_end', 'on_predict_begin',
-        'on_predict_end', 'on_test_batch_begin', 'on_test_batch_end',
-        'on_test_begin', 'on_test_end', 'on_train_batch_begin',
-        'on_train_batch_end', 'on_train_begin', 'on_train_end'
-    ]
-    for method_name in methods_to_count:
-      setattr(self, method_name,
-              self.wrap_with_counts(method_name, getattr(self, method_name)))
+  class AddLayer(keras.layers.Layer):
 
-  def wrap_with_counts(self, method_name, method):
+    def build(self, _):
+      self.v = self.add_weight(
+          'v', (), initializer='ones',
+          regularizer=TestRegularizerLoss.IdentityRegularizer())
 
-    def _call_and_count(*args, **kwargs):
-      self.method_counts[method_name] += 1
-      return method(*args, **kwargs)
+    def call(self, inputs):
+      return inputs + self.v
 
-    return _call_and_count
+  @staticmethod
+  def loss_fn(_, y_pred):
+    return math_ops.reduce_mean(y_pred)
 
+  @combinations.generate(all_strategy_combinations_minus_default())
+  def test_regularizer_loss(self, distribution):
+    batch_size = 2
+    if not distributed_training_utils.global_batch_size_supported(distribution):
+      batch_size //= distribution.num_replicas_in_sync
 
-class TestDistributionStrategyWithCallbacks(test.TestCase,
-                                            parameterized.TestCase):
-
-  @combinations.generate(all_strategy_combinations())
-  def test_callbacks_in_fit(self, distribution):
+      # Given an input x, which is always 1, and variable v, this model computes
+      # Loss=x+v+regularizer_loss, where regularizer_loss=v and the variable is
+      # initialized to 1. Therefore, this model computes Loss=1+2v, and so the
+      # gradient dLoss/dv = 2. This gradient of 2 is averaged over all examples
+      # in a batch and then multiplied by the learning rate of 1. As a result,
+      # the model update for one batch should subtract 2 from v, resulting in v
+      # being -1. If the regularizer loss is not scaled correctly by number of
+      # replicas, the variable value will be incorrect when number of replicas
+      # >1. For e.g. it will be -2 if num replicas = 2.
     with distribution.scope():
-      model = get_model()
-      model.compile(optimizer='sgd', loss='mse', metrics=['mae'])
-
-    dataset = get_dataset(distribution)
-    counter = Counter()
-
-    epochs = 2
-    steps_per_epoch = 5
-    validation_steps = 3
-
-    model.fit(
-        dataset,
-        epochs=epochs,
-        steps_per_epoch=steps_per_epoch,
-        verbose=0,
-        validation_data=dataset,
-        validation_steps=validation_steps,
-        callbacks=[counter])
-
-    if isinstance(distribution, tpu_strategy.TPUStrategy):
-      # TPU Strategy can have multi step training, from extended.steps_per_run
-      # if steps_per_run = 1, then num_batch_call_per_epoch = steps_per_epoch
-      steps_per_run = distribution.extended.steps_per_run
-      num_batch_call_per_epoch = steps_per_epoch // steps_per_run
-      if steps_per_epoch % steps_per_run:
-        num_batch_call_per_epoch += 1
-    else:
-      num_batch_call_per_epoch = steps_per_epoch
-
-    self.assertDictEqual(
-        counter.method_counts, {
-            'on_batch_begin': epochs * num_batch_call_per_epoch,
-            'on_batch_end': epochs * num_batch_call_per_epoch,
-            'on_epoch_begin': epochs,
-            'on_epoch_end': epochs,
-            'on_test_batch_begin': epochs * validation_steps,
-            'on_test_batch_end': epochs * validation_steps,
-            'on_test_begin': epochs,
-            'on_test_end': epochs,
-            'on_train_batch_begin': epochs * num_batch_call_per_epoch,
-            'on_train_batch_end': epochs * num_batch_call_per_epoch,
-            'on_train_begin': 1,
-            'on_train_end': 1
-        })
-
-  @combinations.generate(all_strategy_combinations())
-  def test_callbacks_in_eval(self, distribution):
-    with distribution.scope():
-      model = get_model()
-      model.compile(optimizer='sgd', loss='mse', metrics=['mae'])
-
-    dataset = get_dataset(distribution)
-    counter = Counter()
-
-    model.evaluate(dataset, steps=5, callbacks=[counter])
-
-    self.assertDictEqual(
-        counter.method_counts, {
-            'on_test_batch_begin': 5,
-            'on_test_batch_end': 5,
-            'on_test_begin': 1,
-            'on_test_end': 1
-        })
-
-  @combinations.generate(all_strategy_combinations())
-  def test_callbacks_in_predict(self, distribution):
-    with distribution.scope():
-      model = get_model()
-      model.compile(optimizer='sgd', loss='mse', metrics=['mae'])
-
-    dataset = get_dataset(distribution)
-    counter = Counter()
-
-    model.predict(get_predict_dataset(dataset), steps=5, callbacks=[counter])
-
-    self.assertDictEqual(
-        counter.method_counts, {
-            'on_predict_batch_begin': 5,
-            'on_predict_batch_end': 5,
-            'on_predict_begin': 1,
-            'on_predict_end': 1
-        })
+      x = keras.layers.Input(shape=(), batch_size=batch_size)
+      y = TestRegularizerLoss.AddLayer()(x)
+      model = keras.models.Model(inputs=x, outputs=y)
+      opt = gradient_descent_keras.SGD(1.)
+      model.compile(opt, loss=TestRegularizerLoss.loss_fn)
+      model.fit(
+          x=np.array([[1.], [1.]], dtype=np.float32),
+          y=np.array([[1.], [1.]], dtype=np.float32),
+          batch_size=batch_size)
+      v = model.get_weights()[0]
+      self.assertEqual(-1.0, v)
 
 
-class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
-
-  @combinations.generate(combinations.combine(
-      distribution=[
-          combinations.mirrored_strategy_with_gpu_and_cpu,
-          combinations.core_mirrored_strategy_with_gpu_and_cpu],
-      mode=['graph', 'eager']))
-  def test_validating_dataset_input_tensors_with_shape_mismatch(self,
-                                                                distribution):
-    with self.cached_session():
-      a = constant_op.constant([1, 2], shape=(1, 2))
-      b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2))
-      device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
-      x = values.DistributedValues(device_map, (a, b))
-      y = values.DistributedValues(device_map, (a, a))
-      # Removed device and input tensor shape details from the error message
-      # since the order of the device and the corresponding input tensor shape
-      # is not deterministic over different runs.
-      with self.assertRaisesRegexp(ValueError,
-                                   'Input tensor shapes do not match for '
-                                   'distributed tensor inputs '
-                                   'DistributedValues:.+'):
-        with distribution.scope():
-          distributed_training_utils.validate_distributed_dataset_inputs(
-              distribution, x, y)
-
-  @combinations.generate(combinations.combine(
-      distribution=[
-          combinations.mirrored_strategy_with_gpu_and_cpu,
-          combinations.core_mirrored_strategy_with_gpu_and_cpu],
-      mode=['graph', 'eager']))
-  def test_validating_dataset_input_tensors_with_dtype_mismatch(self,
-                                                                distribution):
-    with self.cached_session():
-      a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32)
-      b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64)
-      device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
-      x = values.DistributedValues(device_map, (a, b))
-      y = values.DistributedValues(device_map, (a, a))
-      # Removed device and input tensor dtype details from the error message
-      # since the order of the device and the corresponding input tensor dtype
-      # is not deterministic over different runs.
-      with self.assertRaisesRegexp(ValueError,
-                                   'Input tensor dtypes do not match for '
-                                   'distributed tensor inputs '
-                                   'DistributedValues:.+'):
-        with distribution.scope():
-          distributed_training_utils.validate_distributed_dataset_inputs(
-              distribution, x, y)
-
-  @combinations.generate(combinations.combine(
-      distribution=[
-          combinations.mirrored_strategy_with_gpu_and_cpu,
-          combinations.core_mirrored_strategy_with_gpu_and_cpu],
-      mode=['graph', 'eager']))
-  def test_unsupported_features(self, distribution):
-    with self.cached_session():
-      with distribution.scope():
-        model = get_model()
-        optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-        loss = 'mse'
-        metrics = ['mae']
-        model.compile(optimizer, loss, metrics=metrics)
-
-      dataset = get_dataset(distribution)
-
-      # Test with validation split
-      with self.assertRaisesRegexp(
-          ValueError, '`validation_split` argument is not '
-                      'supported when input `x` is a dataset or a '
-                      'dataset iterator.+'):
-        model.fit(dataset,
-                  epochs=1, steps_per_epoch=2, verbose=0,
-                  validation_split=0.5, validation_steps=2)
-
-      # Test with sample weight.
-      sample_weight = np.random.random((10,))
-      with self.assertRaisesRegexp(
-          ValueError, '`sample_weight` argument is not supported when input '
-                      '`x` is a dataset or a dataset iterator.'):
-        model.fit(
-            dataset,
-            epochs=1,
-            steps_per_epoch=2,
-            verbose=0,
-            sample_weight=sample_weight)
-
-      # Test with not specifying the `steps` argument for dataset with infinite
-      # cardinality.
-      dataset = dataset.repeat()
-      with self.assertRaisesRegexp(ValueError, 'When passing an infinitely '
-                                   'repeating dataset, you must specify the '
-                                   '`steps_per_epoch` argument'):
-        model.fit(dataset, epochs=1, verbose=0)
-      with self.assertRaisesRegexp(ValueError, 'When passing an infinitely '
-                                   'repeating dataset, you must specify the '
-                                   '`steps` argument'):
-        model.evaluate(dataset, verbose=0)
-
-      with self.assertRaisesRegexp(ValueError, 'When passing an infinitely '
-                                   'repeating dataset, you must specify the '
-                                   '`steps` argument'):
-        model.predict(dataset, verbose=0)
-
-  @combinations.generate(combinations.combine(
-      distribution=[
-          combinations.mirrored_strategy_with_gpu_and_cpu,
-          combinations.core_mirrored_strategy_with_gpu_and_cpu],
-      mode=['graph', 'eager']))
-  def test_calling_with_unsupported_predefined_callbacks(self, distribution):
-    with self.cached_session():
-      with distribution.scope():
-        model = get_model()
-        optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-        loss = 'mse'
-        metrics = ['mae']
-        model.compile(optimizer, loss, metrics=metrics)
-
-      dataset = get_dataset(distribution)
-
-      def schedule(_):
-        return 0.001
-      with self.assertRaisesRegexp(ValueError,
-                                   'You must specify a Keras Optimizer V2 when '
-                                   'using'):
-        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                  callbacks=[keras.callbacks.LearningRateScheduler(schedule)])
-
-      with self.assertRaisesRegexp(ValueError,
-                                   'You must specify a Keras Optimizer V2 when '
-                                   'using'):
-        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                  callbacks=[keras.callbacks.ReduceLROnPlateau()])
-
-
-class TestDistributionStrategyWithLossMasking(test.TestCase,
+class TestDistributionStrategyWithKerasModels(test.TestCase,
                                               parameterized.TestCase):
 
-  # TODO(priyag): Enable all strategies for this test. Currently it does not
-  # work for TPU due to some invalid datatype.
-  @combinations.generate(combinations.combine(
-      distribution=[
-          combinations.mirrored_strategy_with_gpu_and_cpu,
-          combinations.core_mirrored_strategy_with_gpu_and_cpu],
-      mode=['graph', 'eager']))
-  def test_masking(self, distribution):
-    with self.cached_session():
-      np.random.seed(1337)
-      x = np.array([[[1], [1]], [[0], [0]]])
-      with distribution.scope():
-        model = keras.models.Sequential()
-        model.add(keras.layers.Masking(mask_value=0, input_shape=(2, 1)))
-        model.add(
-            keras.layers.TimeDistributed(
-                keras.layers.Dense(1, kernel_initializer='one')))
-        model.compile(loss='mse',
-                      optimizer=gradient_descent.GradientDescentOptimizer(0.01))
-      y = np.array([[[1], [1]], [[1], [1]]])
-      dataset = dataset_ops.Dataset.from_tensor_slices((x, y))
-      dataset = dataset.repeat(100)
-      dataset = dataset.batch(10)
-      hist = model.fit(x=dataset, epochs=1, steps_per_epoch=2)
-      self.assertEqual(hist.history['loss'][0], 0)
+  @combinations.generate(all_strategy_combinations())
+  def test_distribution_strategy_on_sequential_model(self, distribution):
+    with distribution.scope():
+      model = simple_sequential_model()
+      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss)
 
+      inputs = np.zeros((20, 10), np.float32)
+      targets = np.zeros((20, 2), np.float32)
 
-class TestDistributionStrategyWithNormalizationLayer(
-    test.TestCase, parameterized.TestCase):
+    model.fit(inputs, targets, epochs=1, steps_per_epoch=2)
+    model.predict(inputs, steps=1)
+    model.evaluate(inputs, targets, steps=1)
 
-  @combinations.generate(combinations.times(
-      all_strategy_combinations(),
-      combinations.combine(fused=[True, False])))
-  def test_batchnorm_correctness(self, distribution, fused):
-    with self.cached_session():
-      with distribution.scope():
-        model = keras.models.Sequential()
-        norm = keras.layers.BatchNormalization(
-            input_shape=(10,), momentum=0.8, fused=fused)
-        model.add(norm)
-        model.compile(loss='mse',
-                      optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+  @combinations.generate(all_strategy_combinations())
+  def test_distribution_strategy_on_functional_model(self, distribution):
+    with distribution.scope():
+      model = get_model()
+      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss)
 
-      # centered on 5.0, variance 10.0
-      x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10))
-      x = x.astype('float32')
-      dataset = dataset_ops.Dataset.from_tensor_slices((x, x))
-      dataset = dataset.repeat(100)
-      dataset = batch_wrapper(dataset, 32, distribution)
+      inputs = np.zeros((64, 3), dtype=np.float32)
+      targets = np.zeros((64, 4), dtype=np.float32)
 
-      predict_dataset = dataset_ops.Dataset.from_tensor_slices(x)
-      predict_dataset = predict_dataset.repeat(100)
-      predict_dataset = batch_wrapper(predict_dataset, 32, distribution)
+    model.fit(inputs, targets, epochs=1, steps_per_epoch=2)
+    model.predict(inputs, steps=1)
+    model.evaluate(inputs, targets, steps=1)
 
-      model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10)
-      out = model.predict(predict_dataset, steps=2)
-      out -= keras.backend.eval(norm.beta)
-      out /= keras.backend.eval(norm.gamma)
-      np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
-      np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
+  # TODO(b/124377929): Remove error assertions once subclassed models
+  # are supported in DistributedStrategy.
+  @combinations.generate(all_strategy_combinations_minus_default())
+  def test_distribution_strategy_on_subclassed_model(self, distribution):
+    with distribution.scope():
+      model = simple_subclassed_model()
+      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss)
 
+      inputs = np.zeros((64, 3), dtype=np.float32)
+      targets = np.zeros((64, 2), dtype=np.float32)
 
-class TestDistributionStrategySaveLoadWeights(test.TestCase,
-                                              parameterized.TestCase):
+    with self.assertRaisesRegexp(AttributeError, 'has no attribute'):
+      model.fit(inputs, targets, epochs=1, steps_per_epoch=2)
+
+    with self.assertRaisesRegexp(AttributeError, 'has no attribute'):
+      model.predict(inputs, steps=1)
+
+    with self.assertRaisesRegexp(AttributeError, 'has no attribute'):
+      model.evaluate(inputs, targets, steps=1)
 
   @combinations.generate(all_strategy_combinations_minus_default())
-  def test_save_load_h5(self, distribution):
-    with self.cached_session():
-      dataset = get_dataset(distribution)
-      with distribution.scope():
-        model = get_model()
-        model.compile(gradient_descent_keras.SGD(0.01), 'mse')
-        model.fit(dataset, epochs=1, steps_per_epoch=1)
+  def test_distribution_strategy_one_dimensional(self, distribution):
+    with distribution.scope():
+      inp = keras.layers.Input(shape=(10,))
+      out = keras.layers.Dense(3, activation='softmax')(inp)
+      model = keras.Model(inputs=[inp], outputs=[out])
+      model.compile(
+          optimizer='rmsprop',
+          loss='sparse_categorical_crossentropy',
+          metrics=['sparse_categorical_accuracy'],
+      )
 
-        weights_file = tempfile.mktemp('.h5')
-        model.save_weights(weights_file)
+      x = np.random.random((64, 10)).astype('float32')
+      y = np.random.randint(3, size=64)
 
-        model_2 = get_model()
-        model_2.compile(gradient_descent_keras.SGD(0.01), 'mse')
-        model_2.load_weights(weights_file)
-        model_2.predict(get_predict_dataset(distribution), steps=2)
-        model_2.fit(dataset, epochs=1, steps_per_epoch=1)
-
-  @combinations.generate(all_strategy_combinations_minus_default())
-  def test_save_load_checkpointable(self, distribution):
-    # TODO(sourabhbajaj): Test fails with optimizer v2 without h5
-    with self.cached_session():
-      dataset = get_dataset(distribution)
-      with distribution.scope():
-        model = get_model()
-        model.compile(gradient_descent.GradientDescentOptimizer(0.01), 'mse')
-        model.fit(dataset, epochs=1, steps_per_epoch=1)
-
-        weights_file = tempfile.mktemp()
-        model.save_weights(weights_file)
-
-        model_2 = get_model()
-        model_2.compile(gradient_descent.GradientDescentOptimizer(0.01), 'mse')
-        model_2.load_weights(weights_file)
-        model_2.predict(get_predict_dataset(distribution), steps=2)
-        model_2.fit(dataset, epochs=1, steps_per_epoch=1)
-
-
-class TestDistributionStrategyValidation(test.TestCase,
-                                         parameterized.TestCase):
-
-  @combinations.generate(all_strategy_combinations_minus_default())
-  def test_layer_outside_scope(self, distribution):
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          ValueError, 'was not created in the distribution strategy'):
-        x = keras.layers.Input(shape=(3,), name='input')
-        y = keras.layers.Dense(4, name='dense')(x)
-        with distribution.scope():
-          model = keras.Model(x, y)
-          optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-          loss = 'mse'
-          metrics = ['mae', keras.metrics.CategoricalAccuracy()]
-          model.compile(optimizer, loss, metrics=metrics)
-
-  @combinations.generate(all_strategy_combinations_minus_default())
-  def test_model_outside_scope(self, distribution):
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          ValueError, 'was not created in the distribution strategy'):
-        x = keras.layers.Input(shape=(3,), name='input')
-        y = keras.layers.Dense(4, name='dense')(x)
-        model = keras.Model(x, y)
-        with distribution.scope():
-          optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-          loss = 'mse'
-          metrics = ['mae', keras.metrics.CategoricalAccuracy()]
-          model.compile(optimizer, loss, metrics=metrics)
+      model.fit(x, y, epochs=1, steps_per_epoch=2)
 
 
 if __name__ == '__main__':

diff --git a/tensorflow/contrib/distribute/python/keras_utils_test.py b/tensorflow/contrib/distribute/python/keras_utils_test.py
new file mode 100644
index 0000000..36eaee7
--- /dev/null
+++ b/tensorflow/contrib/distribute/python/keras_utils_test.py

@@ -0,0 +1,471 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tf.keras models with callbacks, checkpointing with dist strategy."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import tempfile
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.contrib.distribute.python import combinations
+from tensorflow.contrib.distribute.python import keras_test as keras_test_lib
+from tensorflow.contrib.distribute.python import tpu_strategy
+from tensorflow.python import keras
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.distribute import values
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.keras.engine import distributed_training_utils
+from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_keras
+from tensorflow.python.training import gradient_descent
+
+
+class Counter(keras.callbacks.Callback):
+  """Counts the number of times each callback method was run.
+
+  Attributes:
+    method_counts: dict. Contains the counts of time  each callback method was
+      run.
+  """
+
+  def __init__(self):
+    self.method_counts = collections.defaultdict(int)
+    methods_to_count = [
+        'on_batch_begin', 'on_batch_end', 'on_epoch_begin', 'on_epoch_end',
+        'on_predict_batch_begin', 'on_predict_batch_end', 'on_predict_begin',
+        'on_predict_end', 'on_test_batch_begin', 'on_test_batch_end',
+        'on_test_begin', 'on_test_end', 'on_train_batch_begin',
+        'on_train_batch_end', 'on_train_begin', 'on_train_end'
+    ]
+    for method_name in methods_to_count:
+      setattr(self, method_name,
+              self.wrap_with_counts(method_name, getattr(self, method_name)))
+
+  def wrap_with_counts(self, method_name, method):
+
+    def _call_and_count(*args, **kwargs):
+      self.method_counts[method_name] += 1
+      return method(*args, **kwargs)
+
+    return _call_and_count
+
+
+class TestDistributionStrategyWithCallbacks(test.TestCase,
+                                            parameterized.TestCase):
+
+  @combinations.generate(keras_test_lib.all_strategy_combinations())
+  def test_callbacks_in_fit(self, distribution):
+    with distribution.scope():
+      model = keras_test_lib.get_model()
+      model.compile(optimizer='sgd', loss='mse', metrics=['mae'])
+
+    dataset = keras_test_lib.get_dataset(distribution)
+    counter = Counter()
+
+    epochs = 2
+    steps_per_epoch = 5
+    validation_steps = 3
+
+    model.fit(
+        dataset,
+        epochs=epochs,
+        steps_per_epoch=steps_per_epoch,
+        verbose=0,
+        validation_data=dataset,
+        validation_steps=validation_steps,
+        callbacks=[counter])
+
+    if isinstance(distribution, tpu_strategy.TPUStrategy):
+      # TPU Strategy can have multi step training, from extended.steps_per_run
+      # if steps_per_run = 1, then num_batch_call_per_epoch = steps_per_epoch
+      steps_per_run = distribution.extended.steps_per_run
+      num_batch_call_per_epoch = steps_per_epoch // steps_per_run
+      if steps_per_epoch % steps_per_run:
+        num_batch_call_per_epoch += 1
+    else:
+      num_batch_call_per_epoch = steps_per_epoch
+
+    self.assertDictEqual(
+        counter.method_counts, {
+            'on_batch_begin': epochs * num_batch_call_per_epoch,
+            'on_batch_end': epochs * num_batch_call_per_epoch,
+            'on_epoch_begin': epochs,
+            'on_epoch_end': epochs,
+            'on_test_batch_begin': epochs * validation_steps,
+            'on_test_batch_end': epochs * validation_steps,
+            'on_test_begin': epochs,
+            'on_test_end': epochs,
+            'on_train_batch_begin': epochs * num_batch_call_per_epoch,
+            'on_train_batch_end': epochs * num_batch_call_per_epoch,
+            'on_train_begin': 1,
+            'on_train_end': 1
+        })
+
+  @combinations.generate(keras_test_lib.all_strategy_combinations())
+  def test_callbacks_in_eval(self, distribution):
+    with distribution.scope():
+      model = keras_test_lib.get_model()
+      model.compile(optimizer='sgd', loss='mse', metrics=['mae'])
+
+    dataset = keras_test_lib.get_dataset(distribution)
+    counter = Counter()
+
+    model.evaluate(dataset, steps=5, callbacks=[counter])
+
+    self.assertDictEqual(
+        counter.method_counts, {
+            'on_test_batch_begin': 5,
+            'on_test_batch_end': 5,
+            'on_test_begin': 1,
+            'on_test_end': 1
+        })
+
+  @combinations.generate(keras_test_lib.all_strategy_combinations())
+  def test_callbacks_in_predict(self, distribution):
+    with distribution.scope():
+      model = keras_test_lib.get_model()
+      model.compile(optimizer='sgd', loss='mse', metrics=['mae'])
+
+    dataset = keras_test_lib.get_dataset(distribution)
+    counter = Counter()
+
+    model.predict(
+        keras_test_lib.get_predict_dataset(dataset),
+        steps=5,
+        callbacks=[counter])
+
+    self.assertDictEqual(
+        counter.method_counts, {
+            'on_predict_batch_begin': 5,
+            'on_predict_batch_end': 5,
+            'on_predict_begin': 1,
+            'on_predict_end': 1
+        })
+
+
+class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              combinations.mirrored_strategy_with_gpu_and_cpu,
+              combinations.core_mirrored_strategy_with_gpu_and_cpu
+          ],
+          mode=['graph', 'eager']))
+  def test_validating_dataset_input_tensors_with_shape_mismatch(
+      self, distribution):
+    with self.cached_session():
+      a = constant_op.constant([1, 2], shape=(1, 2))
+      b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2))
+      device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
+      x = values.DistributedValues(device_map, (a, b))
+      y = values.DistributedValues(device_map, (a, a))
+      # Removed device and input tensor shape details from the error message
+      # since the order of the device and the corresponding input tensor shape
+      # is not deterministic over different runs.
+      with self.assertRaisesRegexp(
+          ValueError, 'Input tensor shapes do not match for '
+          'distributed tensor inputs '
+          'DistributedValues:.+'):
+        with distribution.scope():
+          distributed_training_utils.validate_distributed_dataset_inputs(
+              distribution, x, y)
+
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              combinations.mirrored_strategy_with_gpu_and_cpu,
+              combinations.core_mirrored_strategy_with_gpu_and_cpu
+          ],
+          mode=['graph', 'eager']))
+  def test_validating_dataset_input_tensors_with_dtype_mismatch(
+      self, distribution):
+    with self.cached_session():
+      a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32)
+      b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64)
+      device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
+      x = values.DistributedValues(device_map, (a, b))
+      y = values.DistributedValues(device_map, (a, a))
+      # Removed device and input tensor dtype details from the error message
+      # since the order of the device and the corresponding input tensor dtype
+      # is not deterministic over different runs.
+      with self.assertRaisesRegexp(
+          ValueError, 'Input tensor dtypes do not match for '
+          'distributed tensor inputs '
+          'DistributedValues:.+'):
+        with distribution.scope():
+          distributed_training_utils.validate_distributed_dataset_inputs(
+              distribution, x, y)
+
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              combinations.mirrored_strategy_with_gpu_and_cpu,
+              combinations.core_mirrored_strategy_with_gpu_and_cpu
+          ],
+          mode=['graph', 'eager']))
+  def test_unsupported_features(self, distribution):
+    with self.cached_session():
+      with distribution.scope():
+        model = keras_test_lib.get_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+        loss = 'mse'
+        metrics = ['mae']
+        model.compile(optimizer, loss, metrics=metrics)
+
+      dataset = keras_test_lib.get_dataset(distribution)
+
+      # Test with validation split
+      with self.assertRaisesRegexp(
+          ValueError, '`validation_split` argument is not '
+          'supported when input `x` is a dataset or a '
+          'dataset iterator.+'):
+        model.fit(
+            dataset,
+            epochs=1,
+            steps_per_epoch=2,
+            verbose=0,
+            validation_split=0.5,
+            validation_steps=2)
+
+      # Test with sample weight.
+      sample_weight = np.random.random((10,))
+      with self.assertRaisesRegexp(
+          ValueError, '`sample_weight` argument is not supported when input '
+          '`x` is a dataset or a dataset iterator.'):
+        model.fit(
+            dataset,
+            epochs=1,
+            steps_per_epoch=2,
+            verbose=0,
+            sample_weight=sample_weight)
+
+      # Test with not specifying the `steps` argument for dataset with infinite
+      # cardinality.
+      dataset = dataset.repeat()
+      with self.assertRaisesRegexp(
+          ValueError, 'When passing an infinitely '
+          'repeating dataset, you must specify the '
+          '`steps_per_epoch` argument'):
+        model.fit(dataset, epochs=1, verbose=0)
+      with self.assertRaisesRegexp(
+          ValueError, 'When passing an infinitely '
+          'repeating dataset, you must specify the '
+          '`steps` argument'):
+        model.evaluate(dataset, verbose=0)
+
+      with self.assertRaisesRegexp(
+          ValueError, 'When passing an infinitely '
+          'repeating dataset, you must specify the '
+          '`steps` argument'):
+        model.predict(dataset, verbose=0)
+
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              combinations.mirrored_strategy_with_gpu_and_cpu,
+              combinations.core_mirrored_strategy_with_gpu_and_cpu
+          ],
+          mode=['graph', 'eager']))
+  def test_calling_with_unsupported_predefined_callbacks(self, distribution):
+    with self.cached_session():
+      with distribution.scope():
+        model = keras_test_lib.get_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+        loss = 'mse'
+        metrics = ['mae']
+        model.compile(optimizer, loss, metrics=metrics)
+
+      dataset = keras_test_lib.get_dataset(distribution)
+
+      def schedule(_):
+        return 0.001
+
+      with self.assertRaisesRegexp(
+          ValueError, 'You must specify a Keras Optimizer V2 when '
+          'using'):
+        model.fit(
+            dataset,
+            epochs=1,
+            steps_per_epoch=2,
+            verbose=0,
+            callbacks=[keras.callbacks.LearningRateScheduler(schedule)])
+
+      with self.assertRaisesRegexp(
+          ValueError, 'You must specify a Keras Optimizer V2 when '
+          'using'):
+        model.fit(
+            dataset,
+            epochs=1,
+            steps_per_epoch=2,
+            verbose=0,
+            callbacks=[keras.callbacks.ReduceLROnPlateau()])
+
+
+class TestDistributionStrategyWithLossMasking(test.TestCase,
+                                              parameterized.TestCase):
+
+  # TODO(priyag): Enable all strategies for this test. Currently it does not
+  # work for TPU due to some invalid datatype.
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              combinations.mirrored_strategy_with_gpu_and_cpu,
+              combinations.core_mirrored_strategy_with_gpu_and_cpu
+          ],
+          mode=['graph', 'eager']))
+  def test_masking(self, distribution):
+    with self.cached_session():
+      np.random.seed(1337)
+      x = np.array([[[1], [1]], [[0], [0]]])
+      with distribution.scope():
+        model = keras.models.Sequential()
+        model.add(keras.layers.Masking(mask_value=0, input_shape=(2, 1)))
+        model.add(
+            keras.layers.TimeDistributed(
+                keras.layers.Dense(1, kernel_initializer='one')))
+        model.compile(
+            loss='mse',
+            optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+      y = np.array([[[1], [1]], [[1], [1]]])
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, y))
+      dataset = dataset.repeat(100)
+      dataset = dataset.batch(10)
+      hist = model.fit(x=dataset, epochs=1, steps_per_epoch=2)
+      self.assertEqual(hist.history['loss'][0], 0)
+
+
+class TestDistributionStrategyWithNormalizationLayer(test.TestCase,
+                                                     parameterized.TestCase):
+
+  @combinations.generate(
+      combinations.times(keras_test_lib.all_strategy_combinations(),
+                         combinations.combine(fused=[True, False])))
+  def test_batchnorm_correctness(self, distribution, fused):
+    with self.cached_session():
+      with distribution.scope():
+        model = keras.models.Sequential()
+        norm = keras.layers.BatchNormalization(
+            input_shape=(10,), momentum=0.8, fused=fused)
+        model.add(norm)
+        model.compile(
+            loss='mse',
+            optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+
+      # centered on 5.0, variance 10.0
+      x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10))
+      x = x.astype('float32')
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, x))
+      dataset = dataset.repeat(100)
+      dataset = keras_test_lib.batch_wrapper(dataset, 32, distribution)
+
+      predict_dataset = dataset_ops.Dataset.from_tensor_slices(x)
+      predict_dataset = predict_dataset.repeat(100)
+      predict_dataset = keras_test_lib.batch_wrapper(predict_dataset, 32,
+                                                     distribution)
+
+      model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10)
+      out = model.predict(predict_dataset, steps=2)
+      out -= keras.backend.eval(norm.beta)
+      out /= keras.backend.eval(norm.gamma)
+      np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
+      np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
+
+
+class TestDistributionStrategySaveLoadWeights(test.TestCase,
+                                              parameterized.TestCase):
+
+  @combinations.generate(
+      keras_test_lib.all_strategy_combinations_minus_default())
+  def test_save_load_h5(self, distribution):
+    with self.cached_session():
+      dataset = keras_test_lib.get_dataset(distribution)
+      with distribution.scope():
+        model = keras_test_lib.get_model()
+        model.compile(gradient_descent_keras.SGD(0.01), 'mse')
+        model.fit(dataset, epochs=1, steps_per_epoch=1)
+
+        weights_file = tempfile.mktemp('.h5')
+        model.save_weights(weights_file)
+
+        model_2 = keras_test_lib.get_model()
+        model_2.compile(gradient_descent_keras.SGD(0.01), 'mse')
+        model_2.load_weights(weights_file)
+        model_2.predict(
+            keras_test_lib.get_predict_dataset(distribution), steps=2)
+        model_2.fit(dataset, epochs=1, steps_per_epoch=1)
+
+  @combinations.generate(
+      keras_test_lib.all_strategy_combinations_minus_default())
+  def test_save_load_trackable(self, distribution):
+    # TODO(sourabhbajaj): Test fails with optimizer v2 without h5
+    with self.cached_session():
+      dataset = keras_test_lib.get_dataset(distribution)
+      with distribution.scope():
+        model = keras_test_lib.get_model()
+        model.compile(gradient_descent.GradientDescentOptimizer(0.01), 'mse')
+        model.fit(dataset, epochs=1, steps_per_epoch=1)
+
+        weights_file = tempfile.mktemp()
+        model.save_weights(weights_file)
+
+        model_2 = keras_test_lib.get_model()
+        model_2.compile(gradient_descent.GradientDescentOptimizer(0.01), 'mse')
+        model_2.load_weights(weights_file)
+        model_2.predict(
+            keras_test_lib.get_predict_dataset(distribution), steps=2)
+        model_2.fit(dataset, epochs=1, steps_per_epoch=1)
+
+
+class TestDistributionStrategyValidation(test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(
+      keras_test_lib.all_strategy_combinations_minus_default())
+  def test_layer_outside_scope(self, distribution):
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          ValueError, 'was not created in the distribution strategy'):
+        x = keras.layers.Input(shape=(3,), name='input')
+        y = keras.layers.Dense(4, name='dense')(x)
+        with distribution.scope():
+          model = keras.Model(x, y)
+          optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+          loss = 'mse'
+          metrics = ['mae', keras.metrics.CategoricalAccuracy()]
+          model.compile(optimizer, loss, metrics=metrics)
+
+  @combinations.generate(
+      keras_test_lib.all_strategy_combinations_minus_default())
+  def test_model_outside_scope(self, distribution):
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          ValueError, 'was not created in the distribution strategy'):
+        x = keras.layers.Input(shape=(3,), name='input')
+        y = keras.layers.Dense(4, name='dense')(x)
+        model = keras.Model(x, y)
+        with distribution.scope():
+          optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+          loss = 'mse'
+          metrics = ['mae', keras.metrics.CategoricalAccuracy()]
+          model.compile(optimizer, loss, metrics=metrics)
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index bc0572b..5ce7318 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py

@@ -116,7 +116,8 @@
     self._test_input_fn_iterator(iterator, distribution.extended.worker_devices,
                                  expected_values)
 
-  def testMakeInputFnIteratorWithCallable(self, distribution):
+  # TODO(b/124344198): Re-enable after fixing this flaky test.
+  def DISABLED_testMakeInputFnIteratorWithCallable(self, distribution):
     def fn():
       dataset = dataset_ops.Dataset.range(2).interleave(
           (lambda _: dataset_ops.Dataset.range(10)), cycle_length=2)
@@ -1455,7 +1456,7 @@
       self._test_input_fn_iterator(
           iterator, distribution.extended.worker_devices, expected_values, sess)
 
-  def testMakeInputFnIteratorWithCallable(self, distribution):
+  def DISABLED_testMakeInputFnIteratorWithCallable(self, distribution):
     self._configure_distribution_strategy(distribution)
     def fn():
       dataset = dataset_ops.Dataset.range(100)

diff --git a/tensorflow/contrib/distribute/python/one_device_strategy_test.py b/tensorflow/contrib/distribute/python/one_device_strategy_test.py
index 93c2447..0e56f66 100644
--- a/tensorflow/contrib/distribute/python/one_device_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/one_device_strategy_test.py

@@ -18,42 +18,35 @@
 from __future__ import division
 from __future__ import print_function
 
-import sys
-
+from tensorflow.contrib.distribute.python import combinations
 from tensorflow.contrib.distribute.python import strategy_test_lib
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import one_device_strategy
+from tensorflow.python.eager import context
 from tensorflow.python.eager import test
-from tensorflow.python.framework import test_util
 
 
+@combinations.generate(combinations.combine(
+    distribution=[
+        combinations.one_device_strategy,
+        combinations.one_device_strategy_gpu],
+    mode=["eager", "graph"]))
 class OneDeviceStrategyTest(
     strategy_test_lib.DistributionTestBase,
     strategy_test_lib.OneDeviceDistributionTestBase):
 
-  # TODO(josh11b): Switch to using the combinations library.
-  def _get_distribution_strategy(self):
-    if "test_gpu" in sys.argv[0]:
-      return one_device_strategy.OneDeviceStrategy("/device:GPU:0")
+  def testMinimizeLoss(self, distribution):
+    if context.executing_eagerly():
+      self._test_minimize_loss_eager(distribution)
     else:
-      return one_device_strategy.OneDeviceStrategy("/device:CPU:0")
+      self._test_minimize_loss_graph(distribution)
 
-  def testMinimizeLossEager(self):
-    self._test_minimize_loss_eager(self._get_distribution_strategy())
+  def testReplicaId(self, distribution):
+    self._test_replica_id(distribution)
 
-  def testMinimizeLossGraph(self):
-    self._test_minimize_loss_graph(self._get_distribution_strategy())
+  def testCallAndMergeExceptions(self, distribution):
+    self._test_call_and_merge_exceptions(distribution)
 
-  def testReplicaId(self):
-    self._test_replica_id(self._get_distribution_strategy())
-
-  @test_util.run_in_graph_and_eager_modes
-  def testCallAndMergeExceptions(self):
-    self._test_call_and_merge_exceptions(self._get_distribution_strategy())
-
-  @test_util.run_in_graph_and_eager_modes
-  def testMakeInputFnIteratorWithDataset(self):
-    d = one_device_strategy.OneDeviceStrategy("/device:CPU:0")
+  def testMakeInputFnIteratorWithDataset(self, distribution):
     dataset_fn = lambda: dataset_ops.Dataset.range(10)
     expected_values = [[i] for i in range(10)]
     input_fn = self._input_fn_to_test_input_context(
@@ -61,13 +54,11 @@
         expected_num_replicas_in_sync=1,
         expected_num_input_pipelines=1,
         expected_input_pipeline_id=0)
-    iterator = d.make_input_fn_iterator(input_fn)
+    iterator = distribution.make_input_fn_iterator(input_fn)
     self._test_input_fn_iterator(
-        iterator, d.extended.worker_devices, expected_values)
+        iterator, distribution.extended.worker_devices, expected_values)
 
-  @test_util.run_in_graph_and_eager_modes
-  def testMakeInputFnIteratorWithCallable(self):
-    d = one_device_strategy.OneDeviceStrategy("/device:CPU:0")
+  def testMakeInputFnIteratorWithCallable(self, distribution):
     def fn():
       dataset = dataset_ops.Dataset.range(10)
       it = dataset.make_one_shot_iterator()
@@ -78,32 +69,31 @@
         expected_num_replicas_in_sync=1,
         expected_num_input_pipelines=1,
         expected_input_pipeline_id=0)
-    iterator = d.make_input_fn_iterator(input_fn)
+    iterator = distribution.make_input_fn_iterator(input_fn)
     self._test_input_fn_iterator(
-        iterator, d.extended.worker_devices, expected_values,
+        iterator, distribution.extended.worker_devices, expected_values,
         test_reinitialize=False)
 
-  @test_util.run_in_graph_and_eager_modes
-  def testNumpyIterator(self):
-    self._test_numpy_iterator(self._get_distribution_strategy())
+  def testNumpyIterator(self, distribution):
+    self._test_numpy_iterator(distribution)
 
-  def testAllReduceSum(self):
-    self._test_all_reduce_sum(self._get_distribution_strategy())
+  def testAllReduceSum(self, distribution):
+    self._test_all_reduce_sum(distribution)
 
-  def testAllReduceSumGradients(self):
-    self._test_all_reduce_sum_gradients(self._get_distribution_strategy())
+  def testAllReduceSumGradients(self, distribution):
+    self._test_all_reduce_sum_gradients(distribution)
 
-  def testAllReduceSumGradientTape(self):
-    self._test_all_reduce_sum_gradient_tape(self._get_distribution_strategy())
+  def testAllReduceSumGradientTape(self, distribution):
+    self._test_all_reduce_sum_gradient_tape(distribution)
 
-  def testAllReduceMean(self):
-    self._test_all_reduce_mean(self._get_distribution_strategy())
+  def testAllReduceMean(self, distribution):
+    self._test_all_reduce_mean(distribution)
 
-  def testAllReduceMeanGradients(self):
-    self._test_all_reduce_mean_gradients(self._get_distribution_strategy())
+  def testAllReduceMeanGradients(self, distribution):
+    self._test_all_reduce_mean_gradients(distribution)
 
-  def testAllReduceMeanGradientTape(self):
-    self._test_all_reduce_mean_gradient_tape(self._get_distribution_strategy())
+  def testAllReduceMeanGradientTape(self, distribution):
+    self._test_all_reduce_mean_gradient_tape(distribution)
 
 
 if __name__ == "__main__":

diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
index fede253..3de2041 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py

@@ -696,6 +696,7 @@
   def testMinimizeLossGraphLocal(self, num_gpus, use_core_strategy):
     self._test_minimize_loss_graph(None, None, num_gpus, use_core_strategy)
 
+  # TODO(b/124344198): Re-enable after fixing this flaky test.
   # TODO(priyag): Refactor this and other multi worker tests.
   @combinations.generate(
       combinations.combine(
@@ -704,8 +705,8 @@
           required_gpus=1,
           use_core_strategy=[True, False],
           use_dataset=[True, False]))
-  def testMakeInputFnIteratorDistributed(self, num_gpus, use_core_strategy,
-                                         use_dataset):
+  def DISABLED_testMakeInputFnIteratorDistributed(
+      self, num_gpus, use_core_strategy, use_dataset):
     if context.num_gpus() < num_gpus:
       self.skipTest('Not enough GPUs')
     if use_dataset:
@@ -732,6 +733,7 @@
         test_reinitialize=use_dataset,
         use_core_strategy=use_core_strategy)
 
+  # TODO(b/124344198): Re-enable after fixing this flaky test.
   @combinations.generate(
       combinations.combine(
           mode=['graph'],
@@ -739,8 +741,8 @@
           required_gpus=1,
           use_core_strategy=[True, False],
           use_dataset=[True, False]))
-  def testMakeInputFnIteratorLocal(self, num_gpus, use_core_strategy,
-                                   use_dataset):
+  def DISABLED_testMakeInputFnIteratorLocal(self, num_gpus, use_core_strategy,
+                                            use_dataset):
     if context.num_gpus() < num_gpus:
       self.skipTest('Not enough GPUs')
     if use_dataset:

diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index 341d9ae..2d9d221 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py

@@ -26,6 +26,7 @@
 
 from tensorflow.contrib.tpu.python.ops import tpu_ops
 from tensorflow.contrib.tpu.python.tpu import device_assignment as device_assignment_lib
+from tensorflow.contrib.tpu.python.tpu import functional as tpu_functional_ops
 from tensorflow.contrib.tpu.python.tpu import topology
 from tensorflow.contrib.tpu.python.tpu import tpu
 from tensorflow.contrib.tpu.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
@@ -41,6 +42,7 @@
 from tensorflow.python.distribute import values
 from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver
 from tensorflow.python.eager import context
+from tensorflow.python.eager import function
 from tensorflow.python.eager import tape
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import device as tf_device
@@ -52,6 +54,7 @@
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 
 
@@ -69,11 +72,36 @@
   master = cluster_resolver.master()
 
   logging.info("Initializing the TPU system.")
-  session_config = config_pb2.ConfigProto(allow_soft_placement=True)
 
-  with ops.Graph().as_default():
-    with session_lib.Session(config=session_config, target=master) as sess:
-      serialized_topology = sess.run(tpu.initialize_system())
+  if context.executing_eagerly():
+    # This function looks as it is for the following non-intuitive reasons.
+    # tpu.initialize_system creates a dummy op whose sole purpose is to trigger
+    # DistributedTPURewritePass. This pass actually adds real ops that
+    # initialize the TPU system. Thus, we can't simply run tpu.initialize_system
+    # eagerly. We need to wrap it in defun and trigger the rewrite passes on it.
+    # The easiest way to trigger a rewrite is to run the function with
+    # TPUPartitionedCallOp.
+    @function.defun
+    def _tpu_init_fn():
+      return tpu.initialize_system()
+
+    # We can't call _tpu_init_fn normally (because it contains just a dummy op,
+    # see above) but need to define it to get it added to eager context
+    # and get its assigned name.
+    # pylint: disable=protected-access
+    graph_func = _tpu_init_fn._get_concrete_function_internal()
+    func_name = compat.as_str(graph_func._inference_function.name)
+    # pylint: enable=protected-access
+
+    output = tpu_functional_ops.TPUPartitionedCall(
+        args=[], device_ordinal=0, Tout=[dtypes.string], f=func_name)
+    serialized_topology = output[0].numpy()
+  else:
+    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
+    with ops.Graph().as_default():
+      with session_lib.Session(config=session_config, target=master) as sess:
+        serialized_topology = sess.run(tpu.initialize_system())
+
   logging.info("Finished initializing TPU system.")
   return topology.Topology(serialized=serialized_topology)
 
@@ -133,7 +161,7 @@
         strategy, device_map, value_list, aggregation,
         logical_device=logical_device)
 
-  if not context.executing_eagerly():
+  if not (context.executing_eagerly() or ops.inside_function()):
     g = ops.get_default_graph()
     # If "trainable" is True, next_creator() will add the member variables
     # to the TRAINABLE_VARIABLES collection, so we manually remove
@@ -183,8 +211,9 @@
   # This implementation runs a single step. It does not use infeed or outfeed.
   def experimental_run(self, fn, input_iterator=None):
     """See base class."""
-    if context.executing_eagerly():
-      raise NotImplementedError("Eager mode not supported in TPUStrategy.")
+    if context.executing_eagerly() and not ops.inside_function():
+      raise NotImplementedError(
+          "Eager mode not supported in TPUStrategy outside TF functions.")
 
     if input_iterator is None:
       inputs = []
@@ -192,13 +221,13 @@
       inputs = input_iterator.get_next()
 
     result = [None]
-    def replicated_fn(replica_id, inputs):
+    def replicated_fn(replica_id, replica_input):
       """Wraps user function to provide replica ID and `Tensor` inputs."""
       with _TPUReplicaContext(self, replica_id_in_sync_group=replica_id):
         if input_iterator is None:
           result[0] = fn()
         else:
-          result[0] = fn(inputs)
+          result[0] = fn(replica_input)
       return result[0]
 
     replicate_inputs = []  # By replica.
@@ -507,9 +536,10 @@
             # name as the absolute name of the variable.
             kwargs["name"] = "%s/replica_%d/" % (var0name, i)
             # Initialize replicas with the same value:
-            if context.executing_eagerly():
-              kwargs["initial_value"] = array_ops.identity(
-                  value_list[0].value())
+            if context.executing_eagerly() or ops.inside_function():
+              with ops.init_scope():
+                kwargs["initial_value"] = array_ops.identity(
+                    value_list[0].value())
             else:
               def initial_value_fn(device=d):
                 with ops.device(device):

diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index 8966a9b..d441e47 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD

@@ -144,7 +144,7 @@
         "//tensorflow/python:variable_scope",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:function",
-        "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/tracking:base",
     ],
 )
 

diff --git a/tensorflow/contrib/eager/python/datasets_test.py b/tensorflow/contrib/eager/python/datasets_test.py
index 78ab155..48925b1 100644
--- a/tensorflow/contrib/eager/python/datasets_test.py
+++ b/tensorflow/contrib/eager/python/datasets_test.py

@@ -37,7 +37,7 @@
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import util as trackable_utils
 
 
 class IteratorTest(test.TestCase):
@@ -238,7 +238,7 @@
     dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
     dataset = dataset.map(math_ops.square).batch(2)
     iterator = datasets.Iterator(dataset)
-    checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
+    checkpoint = trackable_utils.Checkpoint(iterator=iterator)
     self.assertAllEqual([1, 4], iterator.get_next().numpy())
     save_path = checkpoint.save(checkpoint_prefix)
     self.assertAllEqual([9, 16], iterator.get_next().numpy())
@@ -257,7 +257,7 @@
     dataset_2 = Dataset.range(10)
     iterator_3 = datasets.Iterator(dataset_2)
 
-    checkpoint = checkpointable_utils.Checkpoint(
+    checkpoint = trackable_utils.Checkpoint(
         iterator_1=iterator_1, iterator_2=iterator_2, iterator_3=iterator_3)
     self.assertAllEqual([1, 4], iterator_1.get_next().numpy())
     self.assertEqual(0, iterator_3.get_next().numpy())
@@ -279,7 +279,7 @@
     dataset = Dataset.range(3)
     iterator = datasets.Iterator(dataset)
 
-    checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
+    checkpoint = trackable_utils.Checkpoint(iterator=iterator)
     self.assertEqual(0, iterator.get_next().numpy())
     self.assertEqual(1, iterator.get_next().numpy())
     save_path = checkpoint.save(checkpoint_prefix)
@@ -293,7 +293,7 @@
     dataset = Dataset.range(10)
     for i in range(5):
       iterator = datasets.Iterator(dataset)
-      checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
+      checkpoint = trackable_utils.Checkpoint(iterator=iterator)
       checkpoint.restore(checkpoint_management.latest_checkpoint(
           checkpoint_directory))
       for j in range(2):

diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py
index d18a097..3143270 100644
--- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py
+++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py

@@ -37,7 +37,7 @@
 from tensorflow.python.eager import test
 from tensorflow.python.framework import test_util
 from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import util as trackable_utils
 # pylint: enable=g-bad-import-order
 
 
@@ -421,7 +421,7 @@
 
     # 5. Verify that checkpoints exist and contains all the expected variables.
     self.assertTrue(glob.glob(os.path.join(config.logdir, "ckpt*")))
-    object_graph = checkpointable_utils.object_metadata(
+    object_graph = trackable_utils.object_metadata(
         checkpoint_management.latest_checkpoint(config.logdir))
     ckpt_variable_names = set()
     for node in object_graph.nodes:

diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py
index c8d9266..b32501c 100644
--- a/tensorflow/contrib/eager/python/metrics_impl.py
+++ b/tensorflow/contrib/eager/python/metrics_impl.py

@@ -32,12 +32,12 @@
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import summary_ops_v2 as summary_ops
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 
 _to_replace = re.compile("[^A-Za-z0-9.]")
 
 
-class Metric(checkpointable.Checkpointable):
+class Metric(trackable.Trackable):
   """A metric holds state for aggregating statistics over an evaluation run.
 
   Example use with eager execution:
@@ -269,7 +269,7 @@
       else:
         collections = [ops.GraphKeys.LOCAL_VARIABLES]
       collections += [ops.GraphKeys.METRIC_VARIABLES]
-    # Variables are Checkpointable dependencies of Metrics regardless of the
+    # Variables are Trackable dependencies of Metrics regardless of the
     # global/local distinction. Users can avoid saving variables by not adding a
     # dependency on the Metric.
     v = self._add_variable_with_custom_getter(
@@ -282,7 +282,7 @@
         use_resource=True,
         getter=variable_scope.get_variable,
         # Raise duplicate variable exceptions from get_variable rather than
-        # Checkpointable.
+        # Trackable.
         overwrite=True)
     self._vars.append(v)
     if context.executing_eagerly():

diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py
index 39e5957..c56d195 100644
--- a/tensorflow/contrib/eager/python/metrics_test.py
+++ b/tensorflow/contrib/eager/python/metrics_test.py

@@ -35,7 +35,7 @@
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import summary_ops_v2 as summary_ops
 from tensorflow.python.training import training_util
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import util as trackable_utils
 
 
 class MetricsTest(test.TestCase):
@@ -314,7 +314,7 @@
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     mean = metrics.Mean()
-    checkpoint = checkpointable_utils.Checkpoint(mean=mean)
+    checkpoint = trackable_utils.Checkpoint(mean=mean)
     mean.build()
     mean._built = True
     self.evaluate(mean.init_variables())
@@ -327,7 +327,7 @@
     self.assertAllEqual(200., self.evaluate(mean.value()))
 
     restore_mean = metrics.Mean()
-    restore_checkpoint = checkpointable_utils.Checkpoint(mean=restore_mean)
+    restore_checkpoint = trackable_utils.Checkpoint(mean=restore_mean)
     status = restore_checkpoint.restore(save_path)
     restore_update = restore_mean(300.)
     status.assert_consumed().run_restore_ops()

diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py
index 240f213..b3e8dad 100644
--- a/tensorflow/contrib/eager/python/network_test.py
+++ b/tensorflow/contrib/eager/python/network_test.py

@@ -31,7 +31,7 @@
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.training import training_util
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import util as trackable_utils
 
 
 # pylint: disable=not-callable
@@ -65,7 +65,7 @@
 
   def test_checkpointing_not_implemented(self):
     checkpoint_directory = self.get_temp_dir()
-    checkpoint = checkpointable_utils.Checkpoint(net=MyNetwork())
+    checkpoint = trackable_utils.Checkpoint(net=MyNetwork())
     with self.assertRaises(NotImplementedError):
       checkpoint.save(checkpoint_directory)
 

diff --git a/tensorflow/contrib/eager/python/parameter_server.py b/tensorflow/contrib/eager/python/parameter_server.py
index 7803a67..258f0a1 100644
--- a/tensorflow/contrib/eager/python/parameter_server.py
+++ b/tensorflow/contrib/eager/python/parameter_server.py

@@ -30,7 +30,7 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 
 
 def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode):
@@ -129,8 +129,8 @@
     if constraint is not None and not callable(constraint):
       raise ValueError("The `constraint` argument must be a callable.")
 
-    if isinstance(initial_value, checkpointable.CheckpointInitialValue):
-      self._maybe_initialize_checkpointable()
+    if isinstance(initial_value, trackable.CheckpointInitialValue):
+      self._maybe_initialize_trackable()
       self._update_uid = initial_value.checkpoint_position.restore_uid
       initial_value = initial_value.wrapped_value
 

diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py
index 12bbdc0..df5b059 100644
--- a/tensorflow/contrib/eager/python/tfe.py
+++ b/tensorflow/contrib/eager/python/tfe.py

@@ -137,8 +137,8 @@
 from tensorflow.python.ops.variable_scope import EagerVariableStore
 from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import template
-from tensorflow.python.training.checkpointable.tracking import AutoCheckpointable as Checkpointable
-from tensorflow.python.training.checkpointable.util import Checkpoint
+from tensorflow.python.training.tracking.tracking import AutoTrackable as Checkpointable
+from tensorflow.python.training.tracking.util import Checkpoint
 from tensorflow.python.util.all_util import remove_undocumented
 
 py_func = script_ops.eager_py_func

diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD
index 48a6ef4..da2479a 100644
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD

@@ -203,10 +203,7 @@
     srcs = ["python/ops/kmeans_test.py"],
     shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = [
-        "nomac",  # b/73741358
-        "notsan",  # b/67512932
-    ],
+    tags = ["notsan"],
     deps = [
         ":factorization_py",
         ":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO",

diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD
index 8fc5f1c..0a9199d 100644
--- a/tensorflow/contrib/feature_column/BUILD
+++ b/tensorflow/contrib/feature_column/BUILD

@@ -14,7 +14,6 @@
     srcs_version = "PY2AND3",
     deps = [
         ":sequence_feature_column",
-        ":sequence_feature_column_v2",
         "//tensorflow/python:util",
     ],
 )
@@ -72,60 +71,3 @@
     ],
     tags = ["no_pip"],
 )
-
-py_library(
-    name = "sequence_feature_column_v2",
-    srcs = ["python/feature_column/sequence_feature_column_v2.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:check_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python/feature_column",
-        "//tensorflow/python/feature_column:feature_column_py",
-    ],
-)
-
-tf_py_test(
-    name = "sequence_feature_column_v2_test",
-    srcs = ["python/feature_column/sequence_feature_column_v2_test.py"],
-    additional_deps = [
-        ":sequence_feature_column_v2",
-        "@absl_py//absl/testing:parameterized",
-        "//third_party/py/numpy",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:training",
-        "//tensorflow/python/feature_column:feature_column_py",
-        "//tensorflow/python/feature_column:feature_column_v2_test",
-    ],
-    tags = ["no_pip"],
-)
-
-py_test(
-    name = "sequence_feature_column_v2_integration_test",
-    srcs = ["python/feature_column/sequence_feature_column_v2_integration_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        ":sequence_feature_column_v2",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python:util",
-        "//tensorflow/python/feature_column:feature_column_py",
-        "//tensorflow/python/keras:layers",
-    ],
-)

diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2.py
deleted file mode 100644
index 2f4bda1..0000000
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2.py
+++ /dev/null

@@ -1,582 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""This API defines FeatureColumn for sequential input.
-
-NOTE: This API is a work in progress and will likely be changing frequently.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-import collections
-
-
-from tensorflow.python.feature_column import feature_column as fc_old
-from tensorflow.python.feature_column import feature_column_lib as fc
-from tensorflow.python.feature_column import feature_column_v2 as fc_v2
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import sparse_ops
-
-# pylint: disable=protected-access
-
-
-class SequenceFeatures(fc_v2._BaseFeaturesLayer):
-  """A layer for sequence input.
-
-    All `feature_columns` must be sequence dense columns with the same
-    `sequence_length`. The output of this method can be fed into sequence
-    networks, such as RNN.
-
-    The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`.
-    `T` is the maximum sequence length for this batch, which could differ from
-    batch to batch.
-
-    If multiple `feature_columns` are given with `Di` `num_elements` each, their
-    outputs are concatenated. So, the final `Tensor` has shape
-    `[batch_size, T, D0 + D1 + ... + Dn]`.
-
-    Example:
-
-    ```python
-    rating = sequence_numeric_column('rating')
-    watches = sequence_categorical_column_with_identity(
-        'watches', num_buckets=1000)
-    watches_embedding = embedding_column(watches, dimension=10)
-    columns = [rating, watches]
-
-    features = tf.parse_example(..., features=make_parse_example_spec(columns))
-    sequence_input_layer = SequenceFeatures(columns)
-    sequence_input, sequence_length = sequence_input_layer(features)
-    sequence_length_mask = tf.sequence_mask(sequence_length)
-
-    rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
-    rnn_layer = tf.keras.layers.RNN(rnn_cell)
-    outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
-    ```
-  """
-
-  def __init__(
-      self,
-      feature_columns,
-      trainable=True,
-      name=None,
-      **kwargs):
-    """"Constructs a SequenceFeatures layer.
-
-    Args:
-      feature_columns: An iterable of dense sequence columns. Valid columns are
-        - `embedding_column` that wraps a `sequence_categorical_column_with_*`
-        - `sequence_numeric_column`.
-      trainable: Boolean, whether the layer's variables will be updated via
-        gradient descent during training.
-      name: Name to give to the SequenceFeatures.
-      **kwargs: Keyword arguments to construct a layer.
-
-    Raises:
-      ValueError: If any of the `feature_columns` is not a
-        `SequenceDenseColumn`.
-    """
-    super(SequenceFeatures, self).__init__(
-        feature_columns=feature_columns,
-        trainable=trainable,
-        name=name,
-        expected_column_type=fc_v2.SequenceDenseColumn,
-        **kwargs)
-
-  def _target_shape(self, input_shape, total_elements):
-    return (input_shape[0], input_shape[1], total_elements)
-
-  def call(self, features):
-    """Returns sequence input corresponding to the `feature_columns`.
-
-    Args:
-      features: A dict mapping keys to tensors.
-
-    Returns:
-      An `(input_layer, sequence_length)` tuple where:
-      - input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
-          `T` is the maximum sequence length for this batch, which could differ
-          from batch to batch. `D` is the sum of `num_elements` for all
-          `feature_columns`.
-      - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
-          length for each example.
-
-    Raises:
-      ValueError: If features are not a dictionary.
-    """
-    if not isinstance(features, dict):
-      raise ValueError('We expected a dictionary here. Instead we got: ',
-                       features)
-    transformation_cache = fc.FeatureTransformationCache(features)
-    output_tensors = []
-    sequence_lengths = []
-
-    for column in self._feature_columns:
-      with ops.name_scope(column.name):
-        dense_tensor, sequence_length = column.get_sequence_dense_tensor(
-            transformation_cache, self._state_manager)
-        # Flattens the final dimension to produce a 3D Tensor.
-        output_tensors.append(self._process_dense_tensor(column, dense_tensor))
-        sequence_lengths.append(sequence_length)
-
-    # Check and process sequence lengths.
-    fc_v2._verify_static_batch_size_equality(sequence_lengths,
-                                             self._feature_columns)
-    sequence_length = _assert_all_equal_and_return(sequence_lengths)
-
-    return self._verify_and_concat_tensors(output_tensors), sequence_length
-
-
-def concatenate_context_input(context_input, sequence_input):
-  """Replicates `context_input` across all timesteps of `sequence_input`.
-
-  Expands dimension 1 of `context_input` then tiles it `sequence_length` times.
-  This value is appended to `sequence_input` on dimension 2 and the result is
-  returned.
-
-  Args:
-    context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`.
-    sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size,
-      padded_length, d0]`.
-
-  Returns:
-    A `Tensor` of dtype `float32` and shape `[batch_size, padded_length,
-    d0 + d1]`.
-
-  Raises:
-    ValueError: If `sequence_input` does not have rank 3 or `context_input` does
-      not have rank 2.
-  """
-  seq_rank_check = check_ops.assert_rank(
-      sequence_input,
-      3,
-      message='sequence_input must have rank 3',
-      data=[array_ops.shape(sequence_input)])
-  seq_type_check = check_ops.assert_type(
-      sequence_input,
-      dtypes.float32,
-      message='sequence_input must have dtype float32; got {}.'.format(
-          sequence_input.dtype))
-  ctx_rank_check = check_ops.assert_rank(
-      context_input,
-      2,
-      message='context_input must have rank 2',
-      data=[array_ops.shape(context_input)])
-  ctx_type_check = check_ops.assert_type(
-      context_input,
-      dtypes.float32,
-      message='context_input must have dtype float32; got {}.'.format(
-          context_input.dtype))
-  with ops.control_dependencies(
-      [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]):
-    padded_length = array_ops.shape(sequence_input)[1]
-    tiled_context_input = array_ops.tile(
-        array_ops.expand_dims(context_input, 1),
-        array_ops.concat([[1], [padded_length], [1]], 0))
-  return array_ops.concat([sequence_input, tiled_context_input], 2)
-
-
-def sequence_categorical_column_with_identity(
-    key, num_buckets, default_value=None):
-  """Returns a feature column that represents sequences of integers.
-
-  Pass this to `embedding_column` or `indicator_column` to convert sequence
-  categorical data into dense representation for input to sequence NN, such as
-  RNN.
-
-  Example:
-
-  ```python
-  watches = sequence_categorical_column_with_identity(
-      'watches', num_buckets=1000)
-  watches_embedding = embedding_column(watches, dimension=10)
-  columns = [watches_embedding]
-
-  features = tf.parse_example(..., features=make_parse_example_spec(columns))
-  sequence_feature_layer = SequenceFeatures(columns)
-  sequence_input, sequence_length = sequence_feature_layer(features)
-  sequence_length_mask = tf.sequence_mask(sequence_length)
-
-  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
-  rnn_layer = tf.keras.layers.RNN(rnn_cell)
-  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
-  ```
-
-  Args:
-    key: A unique string identifying the input feature.
-    num_buckets: Range of inputs. Namely, inputs are expected to be in the
-      range `[0, num_buckets)`.
-    default_value: If `None`, this column's graph operations will fail for
-      out-of-range inputs. Otherwise, this value must be in the range
-      `[0, num_buckets)`, and will replace out-of-range inputs.
-
-  Returns:
-    A `SequenceCategoricalColumn`.
-
-  Raises:
-    ValueError: if `num_buckets` is less than one.
-    ValueError: if `default_value` is not in range `[0, num_buckets)`.
-  """
-  return fc.SequenceCategoricalColumn(
-      fc.categorical_column_with_identity(
-          key=key,
-          num_buckets=num_buckets,
-          default_value=default_value))
-
-
-def sequence_categorical_column_with_hash_bucket(
-    key, hash_bucket_size, dtype=dtypes.string):
-  """A sequence of categorical terms where ids are set by hashing.
-
-  Pass this to `embedding_column` or `indicator_column` to convert sequence
-  categorical data into dense representation for input to sequence NN, such as
-  RNN.
-
-  Example:
-
-  ```python
-  tokens = sequence_categorical_column_with_hash_bucket(
-      'tokens', hash_bucket_size=1000)
-  tokens_embedding = embedding_column(tokens, dimension=10)
-  columns = [tokens_embedding]
-
-  features = tf.parse_example(..., features=make_parse_example_spec(columns))
-  sequence_feature_layer = SequenceFeatures(columns)
-  sequence_input, sequence_length = sequence_feature_layer(features)
-  sequence_length_mask = tf.sequence_mask(sequence_length)
-
-  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
-  rnn_layer = tf.keras.layers.RNN(rnn_cell)
-  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
-  ```
-
-  Args:
-    key: A unique string identifying the input feature.
-    hash_bucket_size: An int > 1. The number of buckets.
-    dtype: The type of features. Only string and integer types are supported.
-
-  Returns:
-    A `SequenceCategoricalColumn`.
-
-  Raises:
-    ValueError: `hash_bucket_size` is not greater than 1.
-    ValueError: `dtype` is neither string nor integer.
-  """
-  return fc.SequenceCategoricalColumn(
-      fc.categorical_column_with_hash_bucket(
-          key=key,
-          hash_bucket_size=hash_bucket_size,
-          dtype=dtype))
-
-
-def sequence_categorical_column_with_vocabulary_file(
-    key, vocabulary_file, vocabulary_size=None, num_oov_buckets=0,
-    default_value=None, dtype=dtypes.string):
-  """A sequence of categorical terms where ids use a vocabulary file.
-
-  Pass this to `embedding_column` or `indicator_column` to convert sequence
-  categorical data into dense representation for input to sequence NN, such as
-  RNN.
-
-  Example:
-
-  ```python
-  states = sequence_categorical_column_with_vocabulary_file(
-      key='states', vocabulary_file='/us/states.txt', vocabulary_size=50,
-      num_oov_buckets=5)
-  states_embedding = embedding_column(states, dimension=10)
-  columns = [states_embedding]
-
-  features = tf.parse_example(..., features=make_parse_example_spec(columns))
-  sequence_feature_layer = SequenceFeatures(columns)
-  sequence_input, sequence_length = sequence_feature_layer(features)
-  sequence_length_mask = tf.sequence_mask(sequence_length)
-
-  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
-  rnn_layer = tf.keras.layers.RNN(rnn_cell)
-  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
-  ```
-
-  Args:
-    key: A unique string identifying the input feature.
-    vocabulary_file: The vocabulary file name.
-    vocabulary_size: Number of the elements in the vocabulary. This must be no
-      greater than length of `vocabulary_file`, if less than length, later
-      values are ignored. If None, it is set to the length of `vocabulary_file`.
-    num_oov_buckets: Non-negative integer, the number of out-of-vocabulary
-      buckets. All out-of-vocabulary inputs will be assigned IDs in the range
-      `[vocabulary_size, vocabulary_size+num_oov_buckets)` based on a hash of
-      the input value. A positive `num_oov_buckets` can not be specified with
-      `default_value`.
-    default_value: The integer ID value to return for out-of-vocabulary feature
-      values, defaults to `-1`. This can not be specified with a positive
-      `num_oov_buckets`.
-    dtype: The type of features. Only string and integer types are supported.
-
-  Returns:
-    A `SequenceCategoricalColumn`.
-
-  Raises:
-    ValueError: `vocabulary_file` is missing or cannot be opened.
-    ValueError: `vocabulary_size` is missing or < 1.
-    ValueError: `num_oov_buckets` is a negative integer.
-    ValueError: `num_oov_buckets` and `default_value` are both specified.
-    ValueError: `dtype` is neither string nor integer.
-  """
-  return fc.SequenceCategoricalColumn(
-      fc.categorical_column_with_vocabulary_file(
-          key=key,
-          vocabulary_file=vocabulary_file,
-          vocabulary_size=vocabulary_size,
-          num_oov_buckets=num_oov_buckets,
-          default_value=default_value,
-          dtype=dtype))
-
-
-def sequence_categorical_column_with_vocabulary_list(
-    key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0):
-  """A sequence of categorical terms where ids use an in-memory list.
-
-  Pass this to `embedding_column` or `indicator_column` to convert sequence
-  categorical data into dense representation for input to sequence NN, such as
-  RNN.
-
-  Example:
-
-  ```python
-  colors = sequence_categorical_column_with_vocabulary_list(
-      key='colors', vocabulary_list=('R', 'G', 'B', 'Y'),
-      num_oov_buckets=2)
-  colors_embedding = embedding_column(colors, dimension=3)
-  columns = [colors_embedding]
-
-  features = tf.parse_example(..., features=make_parse_example_spec(columns))
-  sequence_feature_layer = SequenceFeatures(columns)
-  sequence_input, sequence_length = sequence_feature_layer(features)
-  sequence_length_mask = tf.sequence_mask(sequence_length)
-
-  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
-  rnn_layer = tf.keras.layers.RNN(rnn_cell)
-  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
-  ```
-
-  Args:
-    key: A unique string identifying the input feature.
-    vocabulary_list: An ordered iterable defining the vocabulary. Each feature
-      is mapped to the index of its value (if present) in `vocabulary_list`.
-      Must be castable to `dtype`.
-    dtype: The type of features. Only string and integer types are supported.
-      If `None`, it will be inferred from `vocabulary_list`.
-    default_value: The integer ID value to return for out-of-vocabulary feature
-      values, defaults to `-1`. This can not be specified with a positive
-      `num_oov_buckets`.
-    num_oov_buckets: Non-negative integer, the number of out-of-vocabulary
-      buckets. All out-of-vocabulary inputs will be assigned IDs in the range
-      `[len(vocabulary_list), len(vocabulary_list)+num_oov_buckets)` based on a
-      hash of the input value. A positive `num_oov_buckets` can not be specified
-      with `default_value`.
-
-  Returns:
-    A `SequenceCategoricalColumn`.
-
-  Raises:
-    ValueError: if `vocabulary_list` is empty, or contains duplicate keys.
-    ValueError: `num_oov_buckets` is a negative integer.
-    ValueError: `num_oov_buckets` and `default_value` are both specified.
-    ValueError: if `dtype` is not integer or string.
-  """
-  return fc.SequenceCategoricalColumn(
-      fc.categorical_column_with_vocabulary_list(
-          key=key,
-          vocabulary_list=vocabulary_list,
-          dtype=dtype,
-          default_value=default_value,
-          num_oov_buckets=num_oov_buckets))
-
-
-def sequence_numeric_column(
-    key,
-    shape=(1,),
-    default_value=0.,
-    dtype=dtypes.float32,
-    normalizer_fn=None):
-  """Returns a feature column that represents sequences of numeric data.
-
-  Example:
-
-  ```python
-  temperature = sequence_numeric_column('temperature')
-  columns = [temperature]
-
-  features = tf.parse_example(..., features=make_parse_example_spec(columns))
-  sequence_feature_layer = SequenceFeatures(columns)
-  sequence_input, sequence_length = sequence_feature_layer(features)
-  sequence_length_mask = tf.sequence_mask(sequence_length)
-
-  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
-  rnn_layer = tf.keras.layers.RNN(rnn_cell)
-  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
-  ```
-
-  Args:
-    key: A unique string identifying the input features.
-    shape: The shape of the input data per sequence id. E.g. if `shape=(2,)`,
-      each example must contain `2 * sequence_length` values.
-    default_value: A single value compatible with `dtype` that is used for
-      padding the sparse data into a dense `Tensor`.
-    dtype: The type of values.
-    normalizer_fn: If not `None`, a function that can be used to normalize the
-      value of the tensor after `default_value` is applied for parsing.
-      Normalizer function takes the input `Tensor` as its argument, and returns
-      the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that
-      even though the most common use case of this function is normalization, it
-      can be used for any kind of Tensorflow transformations.
-
-  Returns:
-    A `SequenceNumericColumn`.
-
-  Raises:
-    TypeError: if any dimension in shape is not an int.
-    ValueError: if any dimension in shape is not a positive integer.
-    ValueError: if `dtype` is not convertible to `tf.float32`.
-  """
-  shape = fc_v2._check_shape(shape=shape, key=key)
-  if not (dtype.is_integer or dtype.is_floating):
-    raise ValueError('dtype must be convertible to float. '
-                     'dtype: {}, key: {}'.format(dtype, key))
-  if normalizer_fn is not None and not callable(normalizer_fn):
-    raise TypeError(
-        'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn))
-
-  return SequenceNumericColumn(
-      key,
-      shape=shape,
-      default_value=default_value,
-      dtype=dtype,
-      normalizer_fn=normalizer_fn)
-
-
-def _assert_all_equal_and_return(tensors, name=None):
-  """Asserts that all tensors are equal and returns the first one."""
-  with ops.name_scope(name, 'assert_all_equal', values=tensors):
-    if len(tensors) == 1:
-      return tensors[0]
-    assert_equal_ops = []
-    for t in tensors[1:]:
-      assert_equal_ops.append(check_ops.assert_equal(tensors[0], t))
-    with ops.control_dependencies(assert_equal_ops):
-      return array_ops.identity(tensors[0])
-
-
-class SequenceNumericColumn(
-    fc.SequenceDenseColumn,
-    collections.namedtuple(
-        'SequenceNumericColumn',
-        ('key', 'shape', 'default_value', 'dtype', 'normalizer_fn'))):
-  """Represents sequences of numeric data."""
-
-  @property
-  def _is_v2_column(self):
-    return True
-
-  @property
-  def name(self):
-    """See `FeatureColumn` base class."""
-    return self.key
-
-  @property
-  def parse_example_spec(self):
-    """See `FeatureColumn` base class."""
-    return {self.key: parsing_ops.VarLenFeature(self.dtype)}
-
-  def transform_feature(self, transformation_cache, state_manager):
-    """See `FeatureColumn` base class.
-
-    In this case, we apply the `normalizer_fn` to the input tensor.
-
-    Args:
-      transformation_cache: A `FeatureTransformationCache` object to access
-        features.
-      state_manager: A `StateManager` to create / access resources such as
-        lookup tables.
-
-    Returns:
-      Normalized input tensor.
-    """
-    input_tensor = transformation_cache.get(self.key, state_manager)
-    if self.normalizer_fn is not None:
-      input_tensor = self.normalizer_fn(input_tensor)
-    return input_tensor
-
-  @property
-  def variable_shape(self):
-    """Returns a `TensorShape` representing the shape of sequence input."""
-    return tensor_shape.TensorShape(self.shape)
-
-  def get_sequence_dense_tensor(self, transformation_cache, state_manager):
-    """Returns a `TensorSequenceLengthPair`.
-
-    Args:
-      transformation_cache: A `FeatureTransformationCache` object to access
-        features.
-      state_manager: A `StateManager` to create / access resources such as
-        lookup tables.
-    """
-    sp_tensor = transformation_cache.get(self, state_manager)
-    dense_tensor = sparse_ops.sparse_tensor_to_dense(
-        sp_tensor, default_value=self.default_value)
-    # Reshape into [batch_size, T, variable_shape].
-    dense_shape = array_ops.concat(
-        [array_ops.shape(dense_tensor)[:1], [-1], self.variable_shape],
-        axis=0)
-    dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape)
-
-    # Get the number of timesteps per example
-    # For the 2D case, the raw values are grouped according to num_elements;
-    # for the 3D case, the grouping happens in the third dimension, and
-    # sequence length is not affected.
-    if sp_tensor.shape.ndims == 2:
-      num_elements = self.variable_shape.num_elements()
-    else:
-      num_elements = 1
-    seq_length = fc_old._sequence_length_from_sparse_tensor(
-        sp_tensor, num_elements=num_elements)
-
-    return fc.SequenceDenseColumn.TensorSequenceLengthPair(
-        dense_tensor=dense_tensor, sequence_length=seq_length)
-
-  # TODO(b/119409767): Implement parents, _{get,from}_config.
-  @property
-  def parents(self):
-    """See 'FeatureColumn` base class."""
-    raise NotImplementedError()
-
-  def _get_config(self):
-    """See 'FeatureColumn` base class."""
-    raise NotImplementedError()
-
-  @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
-    """See 'FeatureColumn` base class."""
-    raise NotImplementedError()
-
-# pylint: enable=protected-access

diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2_integration_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2_integration_test.py
deleted file mode 100644
index 1b165a6..0000000
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2_integration_test.py
+++ /dev/null

@@ -1,283 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Integration test for sequence feature columns with SequenceExamples."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import string
-import tempfile
-
-from google.protobuf import text_format
-
-from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column_v2 as sfc
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.feature_column import feature_column_v2 as fc
-from tensorflow.python.keras.layers import recurrent
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import test
-from tensorflow.python.util import compat
-
-
-class SequenceFeatureColumnIntegrationTest(test.TestCase):
-
-  def _make_sequence_example(self):
-    example = example_pb2.SequenceExample()
-    example.context.feature['int_ctx'].int64_list.value.extend([5])
-    example.context.feature['float_ctx'].float_list.value.extend([123.6])
-    for val in range(0, 10, 2):
-      feat = feature_pb2.Feature()
-      feat.int64_list.value.extend([val] * val)
-      example.feature_lists.feature_list['int_list'].feature.extend([feat])
-    for val in range(1, 11, 2):
-      feat = feature_pb2.Feature()
-      feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val)
-      example.feature_lists.feature_list['str_list'].feature.extend([feat])
-
-    return example
-
-  def _build_feature_columns(self):
-    col = fc.categorical_column_with_identity('int_ctx', num_buckets=100)
-    ctx_cols = [
-        fc.embedding_column(col, dimension=10),
-        fc.numeric_column('float_ctx')
-    ]
-
-    identity_col = sfc.sequence_categorical_column_with_identity(
-        'int_list', num_buckets=10)
-    bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
-        'bytes_list', hash_bucket_size=100)
-    seq_cols = [
-        fc.embedding_column(identity_col, dimension=10),
-        fc.embedding_column(bucket_col, dimension=20)
-    ]
-
-    return ctx_cols, seq_cols
-
-  def test_sequence_example_into_input_layer(self):
-    examples = [_make_sequence_example().SerializeToString()] * 100
-    ctx_cols, seq_cols = self._build_feature_columns()
-
-    def _parse_example(example):
-      ctx, seq = parsing_ops.parse_single_sequence_example(
-          example,
-          context_features=fc.make_parse_example_spec_v2(ctx_cols),
-          sequence_features=fc.make_parse_example_spec_v2(seq_cols))
-      ctx.update(seq)
-      return ctx
-
-    ds = dataset_ops.Dataset.from_tensor_slices(examples)
-    ds = ds.map(_parse_example)
-    ds = ds.batch(20)
-
-    # Test on a single batch
-    features = ds.make_one_shot_iterator().get_next()
-
-    # Tile the context features across the sequence features
-    sequence_input_layer = sfc.SequenceFeatures(seq_cols)
-    seq_layer, _ = sequence_input_layer(features)
-    input_layer = fc.DenseFeatures(ctx_cols)
-    ctx_layer = input_layer(features)
-    input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer)
-
-    rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10))
-    output = rnn_layer(input_layer)
-
-    with self.cached_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      features_r = sess.run(features)
-      self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6])
-
-      output_r = sess.run(output)
-      self.assertAllEqual(output_r.shape, [20, 10])
-
-
-class SequenceExampleParsingTest(test.TestCase):
-
-  def test_seq_ex_in_sequence_categorical_column_with_identity(self):
-    self._test_parsed_sequence_example(
-        'int_list', sfc.sequence_categorical_column_with_identity,
-        10, [3, 6], [2, 4, 6])
-
-  def test_seq_ex_in_sequence_categorical_column_with_hash_bucket(self):
-    self._test_parsed_sequence_example(
-        'bytes_list', sfc.sequence_categorical_column_with_hash_bucket,
-        10, [3, 4], [compat.as_bytes(x) for x in 'acg'])
-
-  def test_seq_ex_in_sequence_categorical_column_with_vocabulary_list(self):
-    self._test_parsed_sequence_example(
-        'bytes_list', sfc.sequence_categorical_column_with_vocabulary_list,
-        list(string.ascii_lowercase), [3, 4],
-        [compat.as_bytes(x) for x in 'acg'])
-
-  def test_seq_ex_in_sequence_categorical_column_with_vocabulary_file(self):
-    _, fname = tempfile.mkstemp()
-    with open(fname, 'w') as f:
-      f.write(string.ascii_lowercase)
-    self._test_parsed_sequence_example(
-        'bytes_list', sfc.sequence_categorical_column_with_vocabulary_file,
-        fname, [3, 4], [compat.as_bytes(x) for x in 'acg'])
-
-  def _test_parsed_sequence_example(
-      self, col_name, col_fn, col_arg, shape, values):
-    """Helper function to check that each FeatureColumn parses correctly.
-
-    Args:
-      col_name: string, name to give to the feature column. Should match
-        the name that the column will parse out of the features dict.
-      col_fn: function used to create the feature column. For example,
-        sequence_numeric_column.
-      col_arg: second arg that the target feature column is expecting.
-      shape: the expected dense_shape of the feature after parsing into
-        a SparseTensor.
-      values: the expected values at index [0, 2, 6] of the feature
-        after parsing into a SparseTensor.
-    """
-    example = _make_sequence_example()
-    columns = [
-        fc.categorical_column_with_identity('int_ctx', num_buckets=100),
-        fc.numeric_column('float_ctx'),
-        col_fn(col_name, col_arg)
-    ]
-    context, seq_features = parsing_ops.parse_single_sequence_example(
-        example.SerializeToString(),
-        context_features=fc.make_parse_example_spec_v2(columns[:2]),
-        sequence_features=fc.make_parse_example_spec_v2(columns[2:]))
-
-    with self.cached_session() as sess:
-      ctx_result, seq_result = sess.run([context, seq_features])
-      self.assertEqual(list(seq_result[col_name].dense_shape), shape)
-      self.assertEqual(
-          list(seq_result[col_name].values[[0, 2, 6]]), values)
-      self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
-      self.assertEqual(ctx_result['int_ctx'].values[0], 5)
-      self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
-      self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
-
-
-_SEQ_EX_PROTO = """
-context {
-  feature {
-    key: "float_ctx"
-    value {
-      float_list {
-        value: 123.6
-      }
-    }
-  }
-  feature {
-    key: "int_ctx"
-    value {
-      int64_list {
-        value: 5
-      }
-    }
-  }
-}
-feature_lists {
-  feature_list {
-    key: "bytes_list"
-    value {
-      feature {
-        bytes_list {
-          value: "a"
-        }
-      }
-      feature {
-        bytes_list {
-          value: "b"
-          value: "c"
-        }
-      }
-      feature {
-        bytes_list {
-          value: "d"
-          value: "e"
-          value: "f"
-          value: "g"
-        }
-      }
-    }
-  }
-  feature_list {
-    key: "float_list"
-    value {
-      feature {
-        float_list {
-          value: 1.0
-        }
-      }
-      feature {
-        float_list {
-          value: 3.0
-          value: 3.0
-          value: 3.0
-        }
-      }
-      feature {
-        float_list {
-          value: 5.0
-          value: 5.0
-          value: 5.0
-          value: 5.0
-          value: 5.0
-        }
-      }
-    }
-  }
-  feature_list {
-    key: "int_list"
-    value {
-      feature {
-        int64_list {
-          value: 2
-          value: 2
-        }
-      }
-      feature {
-        int64_list {
-          value: 4
-          value: 4
-          value: 4
-          value: 4
-        }
-      }
-      feature {
-        int64_list {
-          value: 6
-          value: 6
-          value: 6
-          value: 6
-          value: 6
-          value: 6
-        }
-      }
-    }
-  }
-}
-"""
-
-
-def _make_sequence_example():
-  example = example_pb2.SequenceExample()
-  return text_format.Parse(_SEQ_EX_PROTO, example)
-
-
-if __name__ == '__main__':
-  test.main()

diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2_test.py
deleted file mode 100644
index a1feadd..0000000
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2_test.py
+++ /dev/null

@@ -1,1525 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for sequential_feature_column."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column_v2 as sfc
-from tensorflow.python.feature_column import feature_column_lib as fc
-from tensorflow.python.feature_column.feature_column_v2_test import _TestStateManager
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.ops import variables as variables_lib
-from tensorflow.python.platform import test
-from tensorflow.python.training import monitored_session
-
-
-class SequenceFeaturesTest(test.TestCase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args_a': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (2, 0, 1),
-           'dense_shape': (2, 2)},
-       'sparse_input_args_b': {
-           # example 0, ids [1]
-           # example 1, ids [2, 0]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (1, 2, 0),
-           'dense_shape': (2, 2)},
-       'expected_input_layer': [
-           # example 0, ids_a [2], ids_b [1]
-           [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
-           # example 1, ids_a [0, 1], ids_b [2, 0]
-           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],],
-       'expected_sequence_length': [1, 2]},
-      {'testcase_name': '3D',
-       'sparse_input_args_a': {
-           # feature 0, ids [[2], [0, 1]]
-           # feature 1, ids [[0, 0], [1]]
-           'indices': (
-               (0, 0, 0), (0, 1, 0), (0, 1, 1),
-               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           'values': (2, 0, 1, 0, 0, 1),
-           'dense_shape': (2, 2, 2)},
-       'sparse_input_args_b': {
-           # feature 0, ids [[1, 1], [1]]
-           # feature 1, ids [[2], [0]]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           'values': (1, 1, 1, 2, 0),
-           'dense_shape': (2, 2, 2)},
-       'expected_input_layer': [
-           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
-           [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]],
-           # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -]
-           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]],
-       'expected_sequence_length': [2, 2]},
-      )
-  def test_embedding_column(
-      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
-      expected_sequence_length):
-
-    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
-    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
-    vocabulary_size = 3
-    embedding_dimension_a = 2
-    embedding_values_a = (
-        (1., 2.),  # id 0
-        (3., 4.),  # id 1
-        (5., 6.)  # id 2
-    )
-    embedding_dimension_b = 3
-    embedding_values_b = (
-        (11., 12., 13.),  # id 0
-        (14., 15., 16.),  # id 1
-        (17., 18., 19.)  # id 2
-    )
-    def _get_initializer(embedding_dimension, embedding_values):
-      def _initializer(shape, dtype, partition_info):
-        self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
-        self.assertEqual(dtypes.float32, dtype)
-        self.assertIsNone(partition_info)
-        return embedding_values
-      return _initializer
-
-    categorical_column_a = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column_a = fc.embedding_column(
-        categorical_column_a,
-        dimension=embedding_dimension_a,
-        initializer=_get_initializer(embedding_dimension_a, embedding_values_a))
-    categorical_column_b = sfc.sequence_categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size)
-    embedding_column_b = fc.embedding_column(
-        categorical_column_b,
-        dimension=embedding_dimension_b,
-        initializer=_get_initializer(embedding_dimension_b, embedding_values_b))
-
-    # Test that columns are reordered alphabetically.
-    sequence_input_layer = sfc.SequenceFeatures(
-        [embedding_column_b, embedding_column_a])
-    input_layer, sequence_length = sequence_input_layer({
-        'aaa': sparse_input_a, 'bbb': sparse_input_b,})
-
-    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertCountEqual(
-        ('sequence_features/aaa_embedding/embedding_weights:0',
-         'sequence_features/bbb_embedding/embedding_weights:0'),
-        tuple([v.name for v in global_vars]))
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess))
-      self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess))
-      self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess))
-      self.assertAllEqual(
-          expected_sequence_length, sequence_length.eval(session=sess))
-
-  def test_embedding_column_with_non_sequence_categorical(self):
-    """Tests that error is raised for non-sequence embedding column."""
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-
-    categorical_column_a = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column_a = fc.embedding_column(
-        categorical_column_a, dimension=2)
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'In embedding_column: aaa_embedding\. categorical_column must be of '
-        r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
-      sequence_input_layer = sfc.SequenceFeatures([embedding_column_a])
-      _, _ = sequence_input_layer({'aaa': sparse_input})
-
-  def test_shared_embedding_column(self):
-    vocabulary_size = 3
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        # example 0, ids [1]
-        # example 1, ids [2, 0]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(1, 2, 0),
-        dense_shape=(2, 2))
-
-    embedding_dimension = 2
-    embedding_values = (
-        (1., 2.),  # id 0
-        (3., 4.),  # id 1
-        (5., 6.)  # id 2
-    )
-
-    def _get_initializer(embedding_dimension, embedding_values):
-
-      def _initializer(shape, dtype, partition_info):
-        self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
-        self.assertEqual(dtypes.float32, dtype)
-        self.assertIsNone(partition_info)
-        return embedding_values
-
-      return _initializer
-
-    expected_input_layer = [
-        # example 0, ids_a [2], ids_b [1]
-        [[5., 6., 3., 4.], [0., 0., 0., 0.]],
-        # example 1, ids_a [0, 1], ids_b [2, 0]
-        [[1., 2., 5., 6.], [3., 4., 1., 2.]],
-    ]
-    expected_sequence_length = [1, 2]
-
-    categorical_column_a = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    categorical_column_b = sfc.sequence_categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size)
-    # Test that columns are reordered alphabetically.
-    shared_embedding_columns = fc.shared_embedding_columns_v2(
-        [categorical_column_b, categorical_column_a],
-        dimension=embedding_dimension,
-        initializer=_get_initializer(embedding_dimension, embedding_values))
-
-    sequence_input_layer = sfc.SequenceFeatures(shared_embedding_columns)
-    input_layer, sequence_length = sequence_input_layer({
-        'aaa': sparse_input_a, 'bbb': sparse_input_b})
-
-    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertCountEqual(
-        ('aaa_bbb_shared_embedding:0',),
-        tuple([v.name for v in global_vars]))
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess))
-      self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess))
-      self.assertAllEqual(
-          expected_sequence_length, sequence_length.eval(session=sess))
-
-  def test_shared_embedding_column_with_non_sequence_categorical(self):
-    """Tests that error is raised for non-sequence shared embedding column."""
-    vocabulary_size = 3
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-
-    categorical_column_a = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    categorical_column_b = fc.categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size)
-    shared_embedding_columns = fc.shared_embedding_columns_v2(
-        [categorical_column_a, categorical_column_b], dimension=2)
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'In embedding_column: aaa_shared_embedding\. categorical_column must '
-        r'be of type SequenceCategoricalColumn to use SequenceFeatures\.'):
-      sequence_input_layer = sfc.SequenceFeatures(shared_embedding_columns)
-      _, _ = sequence_input_layer({'aaa': sparse_input_a,
-                                   'bbb': sparse_input_b})
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args_a': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (2, 0, 1),
-           'dense_shape': (2, 2)},
-       'sparse_input_args_b': {
-           # example 0, ids [1]
-           # example 1, ids [1, 0]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (1, 1, 0),
-           'dense_shape': (2, 2)},
-       'expected_input_layer': [
-           # example 0, ids_a [2], ids_b [1]
-           [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
-           # example 1, ids_a [0, 1], ids_b [1, 0]
-           [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
-       'expected_sequence_length': [1, 2]},
-      {'testcase_name': '3D',
-       'sparse_input_args_a': {
-           # feature 0, ids [[2], [0, 1]]
-           # feature 1, ids [[0, 0], [1]]
-           'indices': (
-               (0, 0, 0), (0, 1, 0), (0, 1, 1),
-               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           'values': (2, 0, 1, 0, 0, 1),
-           'dense_shape': (2, 2, 2)},
-       'sparse_input_args_b': {
-           # feature 0, ids [[1, 1], [1]]
-           # feature 1, ids [[1], [0]]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           'values': (1, 1, 1, 1, 0),
-           'dense_shape': (2, 2, 2)},
-       'expected_input_layer': [
-           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
-           [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]],
-           # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -]
-           [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
-       'expected_sequence_length': [2, 2]},
-      )
-  def test_indicator_column(
-      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
-      expected_sequence_length):
-    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
-    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
-
-    vocabulary_size_a = 3
-    vocabulary_size_b = 2
-
-    categorical_column_a = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size_a)
-    indicator_column_a = fc.indicator_column(categorical_column_a)
-    categorical_column_b = sfc.sequence_categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size_b)
-    indicator_column_b = fc.indicator_column(categorical_column_b)
-    # Test that columns are reordered alphabetically.
-    sequence_input_layer = sfc.SequenceFeatures(
-        [indicator_column_b, indicator_column_a])
-    input_layer, sequence_length = sequence_input_layer({
-        'aaa': sparse_input_a, 'bbb': sparse_input_b})
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess))
-      self.assertAllEqual(
-          expected_sequence_length, sequence_length.eval(session=sess))
-
-  def test_indicator_column_with_non_sequence_categorical(self):
-    """Tests that error is raised for non-sequence categorical column."""
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-
-    categorical_column_a = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    indicator_column_a = fc.indicator_column(categorical_column_a)
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'In indicator_column: aaa_indicator\. categorical_column must be of '
-        r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
-      sequence_input_layer = sfc.SequenceFeatures([indicator_column_a])
-      _, _ = sequence_input_layer({'aaa': sparse_input})
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args': {
-           # example 0, values [0., 1]
-           # example 1, [10.]
-           'indices': ((0, 0), (0, 1), (1, 0)),
-           'values': (0., 1., 10.),
-           'dense_shape': (2, 2)},
-       'expected_input_layer': [
-           [[0.], [1.]],
-           [[10.], [0.]]],
-       'expected_sequence_length': [2, 1]},
-      {'testcase_name': '3D',
-       'sparse_input_args': {
-           # feature 0, ids [[20, 3], [5]]
-           # feature 1, ids [[3], [8]]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           'values': (20., 3., 5., 3., 8.),
-           'dense_shape': (2, 2, 2)},
-       'expected_input_layer': [
-           [[20.], [3.], [5.], [0.]],
-           [[3.], [0.], [8.], [0.]]],
-       'expected_sequence_length': [2, 2]},
-      )
-  def test_numeric_column(
-      self, sparse_input_args, expected_input_layer, expected_sequence_length):
-    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
-
-    numeric_column = sfc.sequence_numeric_column('aaa')
-
-    sequence_input_layer = sfc.SequenceFeatures([numeric_column])
-    input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess))
-      self.assertAllEqual(
-          expected_sequence_length, sequence_length.eval(session=sess))
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args': {
-           # example 0, values [0., 1.,  2., 3., 4., 5., 6., 7.]
-           # example 1, [10., 11., 12., 13.]
-           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           'dense_shape': (2, 8)},
-       'expected_input_layer': [
-           # The output of numeric_column._get_dense_tensor should be flattened.
-           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
-           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
-       'expected_sequence_length': [2, 1]},
-      {'testcase_name': '3D',
-       'sparse_input_args': {
-           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
-           # example 1, [[10., 11., 12., 13.], []]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
-                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
-                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
-           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           'dense_shape': (2, 2, 4)},
-       'expected_input_layer': [
-           # The output of numeric_column._get_dense_tensor should be flattened.
-           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
-           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
-       'expected_sequence_length': [2, 1]},
-      )
-  def test_numeric_column_multi_dim(
-      self, sparse_input_args, expected_input_layer, expected_sequence_length):
-    """Tests SequenceFeatures for multi-dimensional numeric_column."""
-    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
-
-    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
-
-    sequence_input_layer = sfc.SequenceFeatures([numeric_column])
-    input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess))
-      self.assertAllEqual(
-          expected_sequence_length, sequence_length.eval(session=sess))
-
-  def test_sequence_length_not_equal(self):
-    """Tests that an error is raised when sequence lengths are not equal."""
-    # Input a with sequence_length = [2, 1]
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (0, 1), (1, 0)),
-        values=(0., 1., 10.),
-        dense_shape=(2, 2))
-    # Input b with sequence_length = [1, 1]
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (1, 0)),
-        values=(1., 10.),
-        dense_shape=(2, 2))
-    numeric_column_a = sfc.sequence_numeric_column('aaa')
-    numeric_column_b = sfc.sequence_numeric_column('bbb')
-
-    sequence_input_layer = sfc.SequenceFeatures(
-        [numeric_column_a, numeric_column_b])
-    _, sequence_length = sequence_input_layer({
-        'aaa': sparse_input_a, 'bbb': sparse_input_b})
-
-    with monitored_session.MonitoredSession() as sess:
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[Condition x == y did not hold element-wise:\] '
-          r'\[x \(sequence_features/aaa/sequence_length:0\) = \] \[2 1\] '
-          r'\[y \(sequence_features/bbb/sequence_length:0\) = \] \[1 1\]'):
-        sess.run(sequence_length)
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args': {
-           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
-           # example 1, [[[10., 11.],  [12., 13.]]]
-           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           'dense_shape': (2, 8)},
-       'expected_shape': [2, 2, 4]},
-      {'testcase_name': '3D',
-       'sparse_input_args': {
-           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
-           # example 1, [[10., 11., 12., 13.], []]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
-                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2),
-                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
-           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           'dense_shape': (2, 2, 4)},
-       'expected_shape': [2, 2, 4]},
-      )
-  def test_static_shape_from_tensors_numeric(
-      self, sparse_input_args, expected_shape):
-    """Tests that we return a known static shape when we have one."""
-    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
-    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
-
-    sequence_input_layer = sfc.SequenceFeatures([numeric_column])
-    input_layer, _ = sequence_input_layer({'aaa': sparse_input})
-    shape = input_layer.get_shape()
-    self.assertEqual(shape, expected_shape)
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           # example 2, ids []
-           # example 3, ids [1]
-           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
-           'values': (2, 0, 1, 1),
-           'dense_shape': (4, 2)},
-       'expected_shape': [4, 2, 3]},
-      {'testcase_name': '3D',
-       'sparse_input_args': {
-           # example 0, ids [[2]]
-           # example 1, ids [[0, 1], [2]]
-           # example 2, ids []
-           # example 3, ids [[1], [0, 2]]
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
-                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
-           'values': (2, 0, 1, 2, 1, 0, 2),
-           'dense_shape': (4, 2, 2)},
-       'expected_shape': [4, 2, 3]}
-      )
-  def test_static_shape_from_tensors_indicator(
-      self, sparse_input_args, expected_shape):
-    """Tests that we return a known static shape when we have one."""
-    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
-    categorical_column = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=3)
-    indicator_column = fc.indicator_column(categorical_column)
-
-    sequence_input_layer = sfc.SequenceFeatures([indicator_column])
-    input_layer, _ = sequence_input_layer({'aaa': sparse_input})
-    shape = input_layer.get_shape()
-    self.assertEqual(shape, expected_shape)
-
-  def test_compute_output_shape(self):
-    price1 = sfc.sequence_numeric_column('price1', shape=2)
-    price2 = sfc.sequence_numeric_column('price2')
-    with ops.Graph().as_default():
-      features = {
-          'price1': sparse_tensor.SparseTensor(
-              indices=[[0, 0, 0], [0, 0, 1],
-                       [0, 1, 0], [0, 1, 1],
-                       [1, 0, 0], [1, 0, 1],
-                       [2, 0, 0], [2, 0, 1],
-                       [3, 0, 0], [3, 0, 1]],
-              values=[0., 1., 10., 11., 100., 101., 200., 201., 300., 301.],
-              dense_shape=(4, 3, 2)),
-          'price2': sparse_tensor.SparseTensor(
-              indices=[[0, 0],
-                       [0, 1],
-                       [1, 0],
-                       [2, 0],
-                       [3, 0]],
-              values=[10., 11., 20., 30., 40.],
-              dense_shape=(4, 3))}
-      sequence_features = sfc.SequenceFeatures([price1, price2])
-      seq_input, seq_len = sequence_features(features)
-      self.assertEqual(
-          sequence_features.compute_output_shape((None, None)),
-          (None, None, 3))
-      self.evaluate(variables_lib.global_variables_initializer())
-      self.evaluate(lookup_ops.tables_initializer())
-
-      self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]],
-                           [[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]],
-                           [[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]],
-                           [[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]],
-                          self.evaluate(seq_input))
-      self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len))
-
-
-class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
-  """Tests the utility fn concatenate_context_input."""
-
-  def test_concatenate_context_input(self):
-    seq_input = ops.convert_to_tensor(np.arange(12).reshape(2, 3, 2))
-    context_input = ops.convert_to_tensor(np.arange(10).reshape(2, 5))
-    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
-    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
-    input_layer = sfc.concatenate_context_input(context_input, seq_input)
-
-    expected = np.array([
-        [[0, 1, 0, 1, 2, 3, 4], [2, 3, 0, 1, 2, 3, 4], [4, 5, 0, 1, 2, 3, 4]],
-        [[6, 7, 5, 6, 7, 8, 9], [8, 9, 5, 6, 7, 8, 9], [10, 11, 5, 6, 7, 8, 9]]
-    ], dtype=np.float32)
-    with monitored_session.MonitoredSession() as sess:
-      output = sess.run(input_layer)
-      self.assertAllEqual(expected, output)
-
-  @parameterized.named_parameters(
-      {'testcase_name': 'rank_lt_3',
-       'seq_input_arg': np.arange(100).reshape(10, 10)},
-      {'testcase_name': 'rank_gt_3',
-       'seq_input_arg': np.arange(100).reshape(5, 5, 2, 2)}
-      )
-  def test_sequence_input_throws_error(self, seq_input_arg):
-    seq_input = ops.convert_to_tensor(seq_input_arg)
-    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
-    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
-    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
-    with self.assertRaisesRegexp(ValueError, 'sequence_input must have rank 3'):
-      sfc.concatenate_context_input(context_input, seq_input)
-
-  @parameterized.named_parameters(
-      {'testcase_name': 'rank_lt_2',
-       'context_input_arg': np.arange(100)},
-      {'testcase_name': 'rank_gt_2',
-       'context_input_arg': np.arange(100).reshape(5, 5, 4)}
-      )
-  def test_context_input_throws_error(self, context_input_arg):
-    context_input = ops.convert_to_tensor(context_input_arg)
-    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
-    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
-    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
-    with self.assertRaisesRegexp(ValueError, 'context_input must have rank 2'):
-      sfc.concatenate_context_input(context_input, seq_input)
-
-  def test_integer_seq_input_throws_error(self):
-    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
-    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
-    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
-    with self.assertRaisesRegexp(
-        TypeError, 'sequence_input must have dtype float32'):
-      sfc.concatenate_context_input(context_input, seq_input)
-
-  def test_integer_context_input_throws_error(self):
-    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
-    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
-    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
-    with self.assertRaisesRegexp(
-        TypeError, 'context_input must have dtype float32'):
-      sfc.concatenate_context_input(context_input, seq_input)
-
-
-class DenseFeaturesTest(test.TestCase):
-  """Tests DenseFeatures with sequence feature columns."""
-
-  def test_embedding_column(self):
-    """Tests that error is raised for sequence embedding column."""
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-
-    categorical_column_a = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column_a = fc.embedding_column(
-        categorical_column_a, dimension=2)
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'In embedding_column: aaa_embedding\. categorical_column must not be '
-        r'of type SequenceCategoricalColumn\.'):
-      input_layer = fc.DenseFeatures([embedding_column_a])
-      _ = input_layer({'aaa': sparse_input})
-
-  def test_indicator_column(self):
-    """Tests that error is raised for sequence indicator column."""
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-
-    categorical_column_a = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    indicator_column_a = fc.indicator_column(categorical_column_a)
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'In indicator_column: aaa_indicator\. categorical_column must not be '
-        r'of type SequenceCategoricalColumn\.'):
-      input_layer = fc.DenseFeatures([indicator_column_a])
-      _ = input_layer({'aaa': sparse_input})
-
-
-def _assert_sparse_tensor_value(test_case, expected, actual):
-  _assert_sparse_tensor_indices_shape(test_case, expected, actual)
-
-  test_case.assertEqual(
-      np.array(expected.values).dtype, np.array(actual.values).dtype)
-  test_case.assertAllEqual(expected.values, actual.values)
-
-
-def _assert_sparse_tensor_indices_shape(test_case, expected, actual):
-  test_case.assertEqual(np.int64, np.array(actual.indices).dtype)
-  test_case.assertAllEqual(expected.indices, actual.indices)
-
-  test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype)
-  test_case.assertAllEqual(expected.dense_shape, actual.dense_shape)
-
-
-def _get_sequence_dense_tensor(column, features):
-  return column.get_sequence_dense_tensor(
-      fc.FeatureTransformationCache(features), None)
-
-
-def _get_sequence_dense_tensor_state(column, features):
-  state_manager = _TestStateManager()
-  column.create_state(state_manager)
-  return column.get_sequence_dense_tensor(
-      fc.FeatureTransformationCache(features), state_manager)
-
-
-def _get_sparse_tensors(column, features):
-  return column.get_sparse_tensors(
-      fc.FeatureTransformationCache(features), None)
-
-
-class SequenceCategoricalColumnWithIdentityTest(
-    test.TestCase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'inputs_args': {
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (1, 2, 0),
-           'dense_shape': (2, 2)},
-       'expected_args': {
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-           'values': np.array((1, 2, 0), dtype=np.int64),
-           'dense_shape': (2, 2, 1)}},
-      {'testcase_name': '3D',
-       'inputs_args': {
-           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           'values': (6, 7, 8),
-           'dense_shape': (2, 2, 2)},
-       'expected_args': {
-           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           'values': (6, 7, 8),
-           'dense_shape': (2, 2, 2)}}
-      )
-  def test_get_sparse_tensors(self, inputs_args, expected_args):
-    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
-    expected = sparse_tensor.SparseTensorValue(**expected_args)
-    column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9)
-
-    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
-
-    self.assertIsNone(id_weight_pair.weight_tensor)
-    with monitored_session.MonitoredSession() as sess:
-      _assert_sparse_tensor_value(
-          self, expected, id_weight_pair.id_tensor.eval(session=sess))
-
-
-class SequenceCategoricalColumnWithHashBucketTest(
-    test.TestCase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'inputs_args': {
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': ('omar', 'stringer', 'marlo'),
-           'dense_shape': (2, 2)},
-       'expected_args': {
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-           # Ignored to avoid hash dependence in test.
-           'values': np.array((0, 0, 0), dtype=np.int64),
-           'dense_shape': (2, 2, 1)}},
-      {'testcase_name': '3D',
-       'inputs_args': {
-           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           'values': ('omar', 'stringer', 'marlo'),
-           'dense_shape': (2, 2, 2)},
-       'expected_args': {
-           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           # Ignored to avoid hash dependence in test.
-           'values': np.array((0, 0, 0), dtype=np.int64),
-           'dense_shape': (2, 2, 2)}}
-      )
-  def test_get_sparse_tensors(self, inputs_args, expected_args):
-    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
-    expected = sparse_tensor.SparseTensorValue(**expected_args)
-    column = sfc.sequence_categorical_column_with_hash_bucket(
-        'aaa', hash_bucket_size=10)
-
-    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
-
-    self.assertIsNone(id_weight_pair.weight_tensor)
-    with monitored_session.MonitoredSession() as sess:
-      _assert_sparse_tensor_indices_shape(
-          self, expected, id_weight_pair.id_tensor.eval(session=sess))
-
-
-class SequenceCategoricalColumnWithVocabularyFileTest(
-    test.TestCase, parameterized.TestCase):
-
-  def _write_vocab(self, vocab_strings, file_name):
-    vocab_file = os.path.join(self.get_temp_dir(), file_name)
-    with open(vocab_file, 'w') as f:
-      f.write('\n'.join(vocab_strings))
-    return vocab_file
-
-  def setUp(self):
-    super(SequenceCategoricalColumnWithVocabularyFileTest, self).setUp()
-
-    vocab_strings = ['omar', 'stringer', 'marlo']
-    self._wire_vocabulary_file_name = self._write_vocab(vocab_strings,
-                                                        'wire_vocabulary.txt')
-    self._wire_vocabulary_size = 3
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'inputs_args': {
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': ('marlo', 'skywalker', 'omar'),
-           'dense_shape': (2, 2)},
-       'expected_args': {
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-           'values': np.array((2, -1, 0), dtype=np.int64),
-           'dense_shape': (2, 2, 1)}},
-      {'testcase_name': '3D',
-       'inputs_args': {
-           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           'values': ('omar', 'skywalker', 'marlo'),
-           'dense_shape': (2, 2, 2)},
-       'expected_args': {
-           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           'values': np.array((0, -1, 2), dtype=np.int64),
-           'dense_shape': (2, 2, 2)}}
-      )
-  def test_get_sparse_tensors(self, inputs_args, expected_args):
-    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
-    expected = sparse_tensor.SparseTensorValue(**expected_args)
-    column = sfc.sequence_categorical_column_with_vocabulary_file(
-        key='aaa',
-        vocabulary_file=self._wire_vocabulary_file_name,
-        vocabulary_size=self._wire_vocabulary_size)
-
-    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
-
-    self.assertIsNone(id_weight_pair.weight_tensor)
-    with monitored_session.MonitoredSession() as sess:
-      _assert_sparse_tensor_value(
-          self, expected, id_weight_pair.id_tensor.eval(session=sess))
-
-  def test_get_sparse_tensors_dynamic_zero_length(self):
-    """Tests _get_sparse_tensors with a dynamic sequence length."""
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=np.zeros((0, 2)), values=[], dense_shape=(2, 0))
-    expected = sparse_tensor.SparseTensorValue(
-        indices=np.zeros((0, 3)),
-        values=np.array((), dtype=np.int64),
-        dense_shape=(2, 0, 1))
-    column = sfc.sequence_categorical_column_with_vocabulary_file(
-        key='aaa',
-        vocabulary_file=self._wire_vocabulary_file_name,
-        vocabulary_size=self._wire_vocabulary_size)
-    input_placeholder_shape = list(inputs.dense_shape)
-    # Make second dimension (sequence length) dynamic.
-    input_placeholder_shape[1] = None
-    input_placeholder = array_ops.sparse_placeholder(
-        dtypes.string, shape=input_placeholder_shape)
-    id_weight_pair = _get_sparse_tensors(column, {'aaa': input_placeholder})
-
-    self.assertIsNone(id_weight_pair.weight_tensor)
-    with monitored_session.MonitoredSession() as sess:
-      result = id_weight_pair.id_tensor.eval(
-          session=sess, feed_dict={input_placeholder: inputs})
-      _assert_sparse_tensor_value(
-          self, expected, result)
-
-
-class SequenceCategoricalColumnWithVocabularyListTest(
-    test.TestCase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'inputs_args': {
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': ('marlo', 'skywalker', 'omar'),
-           'dense_shape': (2, 2)},
-       'expected_args': {
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-           'values': np.array((2, -1, 0), dtype=np.int64),
-           'dense_shape': (2, 2, 1)}},
-      {'testcase_name': '3D',
-       'inputs_args': {
-           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           'values': ('omar', 'skywalker', 'marlo'),
-           'dense_shape': (2, 2, 2)},
-       'expected_args': {
-           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           'values': np.array((0, -1, 2), dtype=np.int64),
-           'dense_shape': (2, 2, 2)}}
-      )
-  def test_get_sparse_tensors(self, inputs_args, expected_args):
-    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
-    expected = sparse_tensor.SparseTensorValue(**expected_args)
-    column = sfc.sequence_categorical_column_with_vocabulary_list(
-        key='aaa',
-        vocabulary_list=('omar', 'stringer', 'marlo'))
-
-    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
-
-    self.assertIsNone(id_weight_pair.weight_tensor)
-    with monitored_session.MonitoredSession() as sess:
-      _assert_sparse_tensor_value(
-          self, expected, id_weight_pair.id_tensor.eval(session=sess))
-
-
-class SequenceEmbeddingColumnTest(
-    test.TestCase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'inputs_args': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           # example 2, ids []
-           # example 3, ids [1]
-           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
-           'values': (2, 0, 1, 1),
-           'dense_shape': (4, 2)},
-       'expected': [
-           # example 0, ids [2]
-           [[7., 11.], [0., 0.]],
-           # example 1, ids [0, 1]
-           [[1., 2.], [3., 5.]],
-           # example 2, ids []
-           [[0., 0.], [0., 0.]],
-           # example 3, ids [1]
-           [[3., 5.], [0., 0.]]]},
-      {'testcase_name': '3D',
-       'inputs_args': {
-           # example 0, ids [[2]]
-           # example 1, ids [[0, 1], [2]]
-           # example 2, ids []
-           # example 3, ids [[1], [0, 2]]
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
-                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
-           'values': (2, 0, 1, 2, 1, 0, 2),
-           'dense_shape': (4, 2, 2)},
-       'expected': [
-           # example 0, ids [[2]]
-           [[7., 11.], [0., 0.]],
-           # example 1, ids [[0, 1], [2]]
-           [[2, 3.5], [7., 11.]],
-           # example 2, ids []
-           [[0., 0.], [0., 0.]],
-           # example 3, ids [[1], [0, 2]]
-           [[3., 5.], [4., 6.5]]]}
-      )
-  def test_get_sequence_dense_tensor(self, inputs_args, expected):
-    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
-    vocabulary_size = 3
-    embedding_dimension = 2
-    embedding_values = (
-        (1., 2.),  # id 0
-        (3., 5.),  # id 1
-        (7., 11.)  # id 2
-    )
-    def _initializer(shape, dtype, partition_info):
-      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
-      self.assertEqual(dtypes.float32, dtype)
-      self.assertIsNone(partition_info)
-      return embedding_values
-
-    categorical_column = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column = fc.embedding_column(
-        categorical_column, dimension=embedding_dimension,
-        initializer=_initializer)
-
-    embedding_lookup, _ = _get_sequence_dense_tensor_state(
-        embedding_column, {'aaa': inputs})
-
-    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertCountEqual(
-        ('embedding_weights:0',), tuple([v.name for v in global_vars]))
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess))
-      self.assertAllEqual(expected, embedding_lookup.eval(session=sess))
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'inputs_args': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (2, 0, 1),
-           'dense_shape': (2, 2)},
-       'expected_sequence_length': [1, 2]},
-      {'testcase_name': '3D',
-       'inputs_args': {
-           # example 0, ids [[2]]
-           # example 1, ids [[0, 1], [2]]
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           'values': (2, 0, 1, 2),
-           'dense_shape': (2, 2, 2)},
-       'expected_sequence_length': [1, 2]}
-      )
-  def test_sequence_length(self, inputs_args, expected_sequence_length):
-    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
-    vocabulary_size = 3
-
-    categorical_column = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column = fc.embedding_column(
-        categorical_column, dimension=2)
-
-    _, sequence_length = _get_sequence_dense_tensor_state(
-        embedding_column, {'aaa': inputs})
-
-    with monitored_session.MonitoredSession() as sess:
-      sequence_length = sess.run(sequence_length)
-      self.assertAllEqual(expected_sequence_length, sequence_length)
-      self.assertEqual(np.int64, sequence_length.dtype)
-
-  def test_sequence_length_with_empty_rows(self):
-    """Tests _sequence_length when some examples do not have ids."""
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids []
-        # example 1, ids [2]
-        # example 2, ids [0, 1]
-        # example 3, ids []
-        # example 4, ids [1]
-        # example 5, ids []
-        indices=((1, 0), (2, 0), (2, 1), (4, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(6, 2))
-    expected_sequence_length = [0, 1, 2, 0, 1, 0]
-
-    categorical_column = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column = fc.embedding_column(
-        categorical_column, dimension=2)
-
-    _, sequence_length = _get_sequence_dense_tensor_state(
-        embedding_column, {'aaa': sparse_input})
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_sequence_length, sequence_length.eval(session=sess))
-
-
-class SequenceSharedEmbeddingColumnTest(test.TestCase):
-
-  def test_get_sequence_dense_tensor(self):
-    vocabulary_size = 3
-    embedding_dimension = 2
-    embedding_values = (
-        (1., 2.),  # id 0
-        (3., 5.),  # id 1
-        (7., 11.)  # id 2
-    )
-
-    def _initializer(shape, dtype, partition_info):
-      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
-      self.assertEqual(dtypes.float32, dtype)
-      self.assertIsNone(partition_info)
-      return embedding_values
-
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(4, 2))
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        # example 0, ids [1]
-        # example 1, ids [0, 2]
-        # example 2, ids [0]
-        # example 3, ids []
-        indices=((0, 0), (1, 0), (1, 1), (2, 0)),
-        values=(1, 0, 2, 0),
-        dense_shape=(4, 2))
-
-    expected_lookups_a = [
-        # example 0, ids [2]
-        [[7., 11.], [0., 0.]],
-        # example 1, ids [0, 1]
-        [[1., 2.], [3., 5.]],
-        # example 2, ids []
-        [[0., 0.], [0., 0.]],
-        # example 3, ids [1]
-        [[3., 5.], [0., 0.]],
-    ]
-
-    expected_lookups_b = [
-        # example 0, ids [1]
-        [[3., 5.], [0., 0.]],
-        # example 1, ids [0, 2]
-        [[1., 2.], [7., 11.]],
-        # example 2, ids [0]
-        [[1., 2.], [0., 0.]],
-        # example 3, ids []
-        [[0., 0.], [0., 0.]],
-    ]
-
-    categorical_column_a = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    categorical_column_b = sfc.sequence_categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size)
-    shared_embedding_columns = fc.shared_embedding_columns_v2(
-        [categorical_column_a, categorical_column_b],
-        dimension=embedding_dimension,
-        initializer=_initializer)
-
-    embedding_lookup_a = _get_sequence_dense_tensor(
-        shared_embedding_columns[0], {'aaa': sparse_input_a})[0]
-    embedding_lookup_b = _get_sequence_dense_tensor(
-        shared_embedding_columns[1], {'bbb': sparse_input_b})[0]
-
-    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertItemsEqual(('aaa_bbb_shared_embedding:0',),
-                          tuple([v.name for v in global_vars]))
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess))
-      self.assertAllEqual(
-          expected_lookups_a, embedding_lookup_a.eval(session=sess))
-      self.assertAllEqual(
-          expected_lookups_b, embedding_lookup_b.eval(session=sess))
-
-  def test_sequence_length(self):
-    vocabulary_size = 3
-
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-    expected_sequence_length_a = [1, 2]
-    categorical_column_a = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        # example 0, ids [0, 2]
-        # example 1, ids [1]
-        indices=((0, 0), (0, 1), (1, 0)),
-        values=(0, 2, 1),
-        dense_shape=(2, 2))
-    expected_sequence_length_b = [2, 1]
-    categorical_column_b = sfc.sequence_categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size)
-    shared_embedding_columns = fc.shared_embedding_columns_v2(
-        [categorical_column_a, categorical_column_b], dimension=2)
-
-    sequence_length_a = _get_sequence_dense_tensor(
-        shared_embedding_columns[0], {'aaa': sparse_input_a})[1]
-    sequence_length_b = _get_sequence_dense_tensor(
-        shared_embedding_columns[1], {'bbb': sparse_input_b})[1]
-
-    with monitored_session.MonitoredSession() as sess:
-      sequence_length_a = sess.run(sequence_length_a)
-      self.assertAllEqual(expected_sequence_length_a, sequence_length_a)
-      self.assertEqual(np.int64, sequence_length_a.dtype)
-      sequence_length_b = sess.run(sequence_length_b)
-      self.assertAllEqual(expected_sequence_length_b, sequence_length_b)
-      self.assertEqual(np.int64, sequence_length_b.dtype)
-
-  def test_sequence_length_with_empty_rows(self):
-    """Tests _sequence_length when some examples do not have ids."""
-    vocabulary_size = 3
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        # example 0, ids []
-        # example 1, ids [2]
-        # example 2, ids [0, 1]
-        # example 3, ids []
-        # example 4, ids [1]
-        # example 5, ids []
-        indices=((1, 0), (2, 0), (2, 1), (4, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(6, 2))
-    expected_sequence_length_a = [0, 1, 2, 0, 1, 0]
-    categorical_column_a = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids []
-        # example 2, ids []
-        # example 3, ids []
-        # example 4, ids [1]
-        # example 5, ids [0, 1]
-        indices=((0, 0), (4, 0), (5, 0), (5, 1)),
-        values=(2, 1, 0, 1),
-        dense_shape=(6, 2))
-    expected_sequence_length_b = [1, 0, 0, 0, 1, 2]
-    categorical_column_b = sfc.sequence_categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size)
-
-    shared_embedding_columns = fc.shared_embedding_columns_v2(
-        [categorical_column_a, categorical_column_b], dimension=2)
-
-    sequence_length_a = _get_sequence_dense_tensor(
-        shared_embedding_columns[0], {'aaa': sparse_input_a})[1]
-    sequence_length_b = _get_sequence_dense_tensor(
-        shared_embedding_columns[1], {'bbb': sparse_input_b})[1]
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_sequence_length_a, sequence_length_a.eval(session=sess))
-      self.assertAllEqual(
-          expected_sequence_length_b, sequence_length_b.eval(session=sess))
-
-
-class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'inputs_args': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           # example 2, ids []
-           # example 3, ids [1]
-           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
-           'values': (2, 0, 1, 1),
-           'dense_shape': (4, 2)},
-       'expected': [
-           # example 0, ids [2]
-           [[0., 0., 1.], [0., 0., 0.]],
-           # example 1, ids [0, 1]
-           [[1., 0., 0.], [0., 1., 0.]],
-           # example 2, ids []
-           [[0., 0., 0.], [0., 0., 0.]],
-           # example 3, ids [1]
-           [[0., 1., 0.], [0., 0., 0.]]]},
-      {'testcase_name': '3D',
-       'inputs_args': {
-           # example 0, ids [[2]]
-           # example 1, ids [[0, 1], [2]]
-           # example 2, ids []
-           # example 3, ids [[1], [2, 2]]
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
-                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
-           'values': (2, 0, 1, 2, 1, 2, 2),
-           'dense_shape': (4, 2, 2)},
-       'expected': [
-           # example 0, ids [[2]]
-           [[0., 0., 1.], [0., 0., 0.]],
-           # example 1, ids [[0, 1], [2]]
-           [[1., 1., 0.], [0., 0., 1.]],
-           # example 2, ids []
-           [[0., 0., 0.], [0., 0., 0.]],
-           # example 3, ids [[1], [2, 2]]
-           [[0., 1., 0.], [0., 0., 2.]]]}
-      )
-  def test_get_sequence_dense_tensor(self, inputs_args, expected):
-    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
-    vocabulary_size = 3
-
-    categorical_column = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    indicator_column = fc.indicator_column(categorical_column)
-
-    indicator_tensor, _ = _get_sequence_dense_tensor(
-        indicator_column, {'aaa': inputs})
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(expected, indicator_tensor.eval(session=sess))
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'inputs_args': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (2, 0, 1),
-           'dense_shape': (2, 2)},
-       'expected_sequence_length': [1, 2]},
-      {'testcase_name': '3D',
-       'inputs_args': {
-           # example 0, ids [[2]]
-           # example 1, ids [[0, 1], [2]]
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           'values': (2, 0, 1, 2),
-           'dense_shape': (2, 2, 2)},
-       'expected_sequence_length': [1, 2]}
-      )
-  def test_sequence_length(self, inputs_args, expected_sequence_length):
-    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
-    vocabulary_size = 3
-
-    categorical_column = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    indicator_column = fc.indicator_column(categorical_column)
-
-    _, sequence_length = _get_sequence_dense_tensor(
-        indicator_column, {'aaa': inputs})
-
-    with monitored_session.MonitoredSession() as sess:
-      sequence_length = sess.run(sequence_length)
-      self.assertAllEqual(expected_sequence_length, sequence_length)
-      self.assertEqual(np.int64, sequence_length.dtype)
-
-  def test_sequence_length_with_empty_rows(self):
-    """Tests _sequence_length when some examples do not have ids."""
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids []
-        # example 1, ids [2]
-        # example 2, ids [0, 1]
-        # example 3, ids []
-        # example 4, ids [1]
-        # example 5, ids []
-        indices=((1, 0), (2, 0), (2, 1), (4, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(6, 2))
-    expected_sequence_length = [0, 1, 2, 0, 1, 0]
-
-    categorical_column = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    indicator_column = fc.indicator_column(categorical_column)
-
-    _, sequence_length = _get_sequence_dense_tensor(
-        indicator_column, {'aaa': sparse_input})
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_sequence_length, sequence_length.eval(session=sess))
-
-
-class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
-
-  def test_defaults(self):
-    a = sfc.sequence_numeric_column('aaa')
-    self.assertEqual('aaa', a.key)
-    self.assertEqual('aaa', a.name)
-    self.assertEqual((1,), a.shape)
-    self.assertEqual(0., a.default_value)
-    self.assertEqual(dtypes.float32, a.dtype)
-    self.assertIsNone(a.normalizer_fn)
-
-  def test_shape_saved_as_tuple(self):
-    a = sfc.sequence_numeric_column('aaa', shape=[1, 2])
-    self.assertEqual((1, 2), a.shape)
-
-  def test_shape_must_be_positive_integer(self):
-    with self.assertRaisesRegexp(TypeError, 'shape dimensions must be integer'):
-      sfc.sequence_numeric_column('aaa', shape=[1.0])
-
-    with self.assertRaisesRegexp(
-        ValueError, 'shape dimensions must be greater than 0'):
-      sfc.sequence_numeric_column('aaa', shape=[0])
-
-  def test_dtype_is_convertible_to_float(self):
-    with self.assertRaisesRegexp(
-        ValueError, 'dtype must be convertible to float'):
-      sfc.sequence_numeric_column('aaa', dtype=dtypes.string)
-
-  def test_normalizer_fn_must_be_callable(self):
-    with self.assertRaisesRegexp(TypeError, 'must be a callable'):
-      sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable')
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'inputs_args': {
-           # example 0, values [0., 1]
-           # example 1, [10.]
-           'indices': ((0, 0), (0, 1), (1, 0)),
-           'values': (0., 1., 10.),
-           'dense_shape': (2, 2)},
-       'expected': [
-           [[0.], [1.]],
-           [[10.], [0.]]]},
-      {'testcase_name': '3D',
-       'inputs_args': {
-           # feature 0, ids [[20, 3], [5]]
-           # feature 1, ids [[3], [8]]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           'values': (20, 3, 5., 3., 8.),
-           'dense_shape': (2, 2, 2)},
-       'expected': [
-           [[20.], [3.], [5.], [0.]],
-           [[3.], [0.], [8.], [0.]]]},
-      )
-  def test_get_sequence_dense_tensor(self, inputs_args, expected):
-    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
-    numeric_column = sfc.sequence_numeric_column('aaa')
-
-    dense_tensor, _ = _get_sequence_dense_tensor(
-        numeric_column, {'aaa': inputs})
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(expected, dense_tensor.eval(session=sess))
-
-  def test_get_sequence_dense_tensor_with_normalizer_fn(self):
-
-    def _increment_two(input_sparse_tensor):
-      return sparse_ops.sparse_add(
-          input_sparse_tensor,
-          sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2))
-      )
-
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0.], [1]]
-        # example 1, [[10.]]
-        indices=((0, 0), (0, 1), (1, 0)),
-        values=(0., 1., 10.),
-        dense_shape=(2, 2))
-
-    # Before _increment_two:
-    #   [[0.], [1.]],
-    #   [[10.], [0.]],
-    # After _increment_two:
-    #   [[2.], [1.]],
-    #   [[10.], [2.]],
-    expected_dense_tensor = [
-        [[2.], [1.]],
-        [[10.], [2.]],
-    ]
-    numeric_column = sfc.sequence_numeric_column(
-        'aaa', normalizer_fn=_increment_two)
-
-    dense_tensor, _ = _get_sequence_dense_tensor(
-        numeric_column, {'aaa': sparse_input})
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_dense_tensor, dense_tensor.eval(session=sess))
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args': {
-           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
-           # example 1, [[[10., 11.],  [12., 13.]]]
-           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           'dense_shape': (2, 8)},
-       'expected_dense_tensor': [
-           [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
-           [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]},
-      {'testcase_name': '3D',
-       'sparse_input_args': {
-           'indices': ((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6),
-                       (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6),
-                       (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)),
-           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           'dense_shape': (2, 2, 8)},
-       'expected_dense_tensor': [
-           [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]],
-            [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]],
-           [[[10., 0.], [11., 0.]], [[12., 0.], [13., 0.]],
-            [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]},
-      )
-  def test_get_dense_tensor_multi_dim(
-      self, sparse_input_args, expected_dense_tensor):
-    """Tests get_sequence_dense_tensor for multi-dim numeric_column."""
-    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
-    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
-
-    dense_tensor, _ = _get_sequence_dense_tensor(
-        numeric_column, {'aaa': sparse_input})
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_dense_tensor, dense_tensor.eval(session=sess))
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'inputs_args': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (2., 0., 1.),
-           'dense_shape': (2, 2)},
-       'expected_sequence_length': [1, 2],
-       'shape': (1,)},
-      {'testcase_name': '3D',
-       'inputs_args': {
-           # example 0, ids [[2]]
-           # example 1, ids [[0, 1], [2]]
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           'values': (2., 0., 1., 2.),
-           'dense_shape': (2, 2, 2)},
-       'expected_sequence_length': [1, 2],
-       'shape': (1,)},
-      {'testcase_name': '2D_with_shape',
-       'inputs_args': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (2., 0., 1.),
-           'dense_shape': (2, 2)},
-       'expected_sequence_length': [1, 1],
-       'shape': (2,)},
-      {'testcase_name': '3D_with_shape',
-       'inputs_args': {
-           # example 0, ids [[2]]
-           # example 1, ids [[0, 1], [2]]
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           'values': (2., 0., 1., 2.),
-           'dense_shape': (2, 2, 2)},
-       'expected_sequence_length': [1, 2],
-       'shape': (2,)},
-      )
-  def test_sequence_length(self, inputs_args, expected_sequence_length, shape):
-    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
-    numeric_column = sfc.sequence_numeric_column('aaa', shape=shape)
-
-    _, sequence_length = _get_sequence_dense_tensor(
-        numeric_column, {'aaa': inputs})
-
-    with monitored_session.MonitoredSession() as sess:
-      sequence_length = sess.run(sequence_length)
-      self.assertAllEqual(expected_sequence_length, sequence_length)
-      self.assertEqual(np.int64, sequence_length.dtype)
-
-  def test_sequence_length_with_empty_rows(self):
-    """Tests _sequence_length when some examples do not have ids."""
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values []
-        # example 1, values [[0.], [1.]]
-        # example 2, [[2.]]
-        # example 3, values []
-        # example 4, [[3.]]
-        # example 5, values []
-        indices=((1, 0), (1, 1), (2, 0), (4, 0)),
-        values=(0., 1., 2., 3.),
-        dense_shape=(6, 2))
-    expected_sequence_length = [0, 2, 1, 0, 1, 0]
-    numeric_column = sfc.sequence_numeric_column('aaa')
-
-    _, sequence_length = _get_sequence_dense_tensor(
-        numeric_column, {'aaa': sparse_input})
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_sequence_length, sequence_length.eval(session=sess))
-
-
-if __name__ == '__main__':
-  test.main()

diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD
index c99f847..8fd2b5f 100644
--- a/tensorflow/contrib/framework/BUILD
+++ b/tensorflow/contrib/framework/BUILD

@@ -32,7 +32,6 @@
         "python/ops/arg_scope.py",
         "python/ops/audio_ops.py",
         "python/ops/checkpoint_ops.py",
-        "python/ops/critical_section_ops.py",
         "python/ops/ops.py",
         "python/ops/prettyprint_ops.py",
         "python/ops/script_ops.py",
@@ -172,27 +171,6 @@
     ],
 )
 
-cuda_py_test(
-    name = "critical_section_test",
-    size = "medium",
-    srcs = ["python/ops/critical_section_test.py"],
-    additional_deps = [
-        "//tensorflow/python:client_testlib",
-        ":framework_py",
-        "//tensorflow/python/data/experimental/ops:prefetching_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:gradients",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:tensor_array_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/eager:context",
-    ],
-)
-
 py_test(
     name = "ops_test",
     size = "small",

diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py
index fc2334d..063717f 100644
--- a/tensorflow/contrib/framework/__init__.py
+++ b/tensorflow/contrib/framework/__init__.py

@@ -94,8 +94,6 @@
 @@smart_constant_value
 @@smart_case
 
-@@CriticalSection
-
 @@BoundedTensorSpec
 @@TensorSpec
 
@@ -129,6 +127,7 @@
 _allowed_symbols = ['nest']
 _nest_allowed_symbols = [
     'assert_same_structure',
+    'is_nested',
     'is_sequence',
     'is_sequence_or_composite',
     'flatten',

diff --git a/tensorflow/contrib/framework/python/ops/__init__.py b/tensorflow/contrib/framework/python/ops/__init__.py
index c497649..8113bf7 100644
--- a/tensorflow/contrib/framework/python/ops/__init__.py
+++ b/tensorflow/contrib/framework/python/ops/__init__.py

@@ -22,7 +22,6 @@
 # pylint: disable=wildcard-import
 from tensorflow.contrib.framework.python.ops.arg_scope import *
 from tensorflow.contrib.framework.python.ops.checkpoint_ops import *
-from tensorflow.contrib.framework.python.ops.critical_section_ops import *
 from tensorflow.contrib.framework.python.ops.ops import *
 from tensorflow.contrib.framework.python.ops.prettyprint_ops import *
 from tensorflow.contrib.framework.python.ops.script_ops import *

diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py
deleted file mode 100644
index 71ab755..0000000
--- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py
+++ /dev/null

@@ -1,449 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Critical Section object and execution logic."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-
-# TODO(ebrevdo): Re-enable once CriticalSection is in core.
-# from tensorflow.core.protobuf import critical_section_pb2
-
-from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gen_resource_variable_ops
-from tensorflow.python.ops import tensor_array_ops
-from tensorflow.python.util import nest
-
-
-# Graph Keys
-CRITICAL_SECTIONS = "critical_sections"
-CRITICAL_SECTION_EXECUTIONS = "critical_section_executions"
-
-
-class _ExecutionSignature(
-    collections.namedtuple("_ExecutionSignature",
-                           ("op", "handle",
-                            "resources", "exclusive_resource_access"))):
-  """A class storing an `ExecuteInCriticalResource` op and associated attrs."""
-  pass
-
-
-def _identity(x):
-  """Identity op that recognizes `TensorArray`, `Operation`, and `Tensor`."""
-  if isinstance(x, tensor_array_ops.TensorArray):
-    return x.identity()
-  elif isinstance(x, ops.Operation):
-    return control_flow_ops.group(x)
-  elif context.executing_eagerly() and x is None:
-    return None
-  else:
-    return array_ops.identity(x)
-
-
-def _get_colocation(op):
-  """Get colocation symbol from op, if any."""
-  try:
-    return op.get_attr("_class")
-  except ValueError:
-    return None
-
-
-class CriticalSection(object):
-  """Critical section.
-
-  A `CriticalSection` object is a resource in the graph which executes subgraphs
-  in **serial** order.  A common example of a subgraph one may wish to run
-  exclusively is the one given by the following function:
-
-  ```python
-  v = resource_variable_ops.ResourceVariable(0.0, name="v")
-
-  def count():
-    value = v.read_value()
-    with tf.control_dependencies([value]):
-      with tf.control_dependencies([v.assign_add(1)]):
-        return tf.identity(value)
-  ```
-
-  Here, a snapshot of `v` is captured in `value`; and then `v` is updated.
-  The snapshot value is returned.
-
-  If multiple workers or threads all execute `count` in parallel, there is no
-  guarantee that access to the variable `v` is atomic at any point within
-  any thread's calculation of `count`.  In fact, even implementing an atomic
-  counter that guarantees that the user will see each value `0, 1, ...,` is
-  currently impossible.
-
-  The solution is to ensure any access to the underlying resource `v` is
-  only processed through a critical section:
-
-  ```python
-  cs = CriticalSection()
-  f1 = cs.execute(count)
-  f2 = cs.execute(count)
-  output = f1 + f2
-  session.run(output)
-  ```
-  The functions `f1` and `f2` will be executed serially, and updates to `v`
-  will be atomic.
-
-  **NOTES**
-
-  All resource objects, including the critical section and any captured
-  variables of functions executed on that critical section, will be
-  colocated to the same device (host and cpu/gpu).
-
-  When using multiple critical sections on the same resources, there is no
-  guarantee of exclusive access to those resources.  This behavior is disallowed
-  by default (but see the kwarg `exclusive_resource_access`).
-
-  For example, running the same function in two separate critical sections
-  will not ensure serial execution:
-
-  ```python
-  v = tf.get_variable("v", initializer=0.0, use_resource=True)
-  def accumulate(up):
-    x = v.read_value()
-    with tf.control_dependencies([x]):
-      with tf.control_dependencies([v.assign_add(up)]):
-        return tf.identity(x)
-  ex1 = CriticalSection().execute(
-    accumulate, 1.0, exclusive_resource_access=False)
-  ex2 = CriticalSection().execute(
-    accumulate, 1.0, exclusive_resource_access=False)
-  bad_sum = ex1 + ex2
-  sess.run(v.initializer)
-  sess.run(bad_sum)  # May return 0.0
-  ```
-  """
-
-  def __init__(self, name=None, shared_name=None,
-               critical_section_def=None, import_scope=None):
-    """Creates a critical section."""
-    if critical_section_def and name is not None:
-      raise ValueError("critical_section_def and shared_name are "
-                       "mutually exclusive.")
-    if critical_section_def:
-      self._init_from_proto(critical_section_def, import_scope=import_scope)
-    else:
-      self._init_from_args(name, shared_name)
-
-  def _init_from_proto(self, critical_section_def, import_scope):  # pylint: disable=invalid-name
-    raise NotImplementedError("Not yet implemented")
-    # TODO(ebrevdo): Re-enable once CriticalSection is in core.
-    # assert isinstance(
-    #     critical_section_def, critical_section_pb2.CriticalSectionDef)
-    # # Create from critical_section_def.
-    # g = ops.get_default_graph()
-    # self._handle = g.as_graph_element(
-    #     ops.prepend_name_scope(
-    #         critical_section_def.critical_section_name,
-    #         import_scope=import_scope))
-
-  def _init_from_args(self, name, shared_name):  # pylint: disable=invalid-name
-    """Initialize the CriticalSection from constructor arguments."""
-    with ops.name_scope(name, "CriticalSection", []) as name:
-      with ops.init_scope():
-        # pylint: disable=protected-access
-        container = ops.get_default_graph()._container
-        # pylint: enable=protected-access
-        if shared_name is None:
-          shared_name = name
-        if container is None:
-          container = ""
-        self._handle = gen_resource_variable_ops.mutex_v2(
-            shared_name=shared_name, container=container, name=name)
-
-    if not context.executing_eagerly():
-      ops.add_to_collections(CRITICAL_SECTIONS, self)
-
-  @property
-  def name(self):
-    return self._handle.op.name
-
-  def execute(self, fn, *args, **kwargs):
-    """Execute function `fn(*args, **kwargs)` inside the CriticalSection.
-
-    Args:
-      fn: The function to execute.  Must return at least one tensor.
-      *args: Additional positional arguments to `fn`.
-      **kwargs: Additional keyword arguments to `fn`.
-        Several keywords are reserved for `execute`.  These are:
-
-        - name; The name to use when creating the execute operation.
-        - exclusive_resource_access; Whether the resources required by
-          `fn` should be exclusive to this `CriticalSection`.  Default: `True`.
-          You may want to set this to `False` if you will be accessing a
-          resource in read-only mode in two different CriticalSections.
-
-    Returns:
-      The tensors returned from `fn(*args, **kwargs)`.
-
-    Raises:
-      ValueError: If `fn` attempts to lock this `CriticalSection` in any nested
-        or lazy way that may cause a deadlock.
-      ValueError: If `exclusive_resource_access` is not provided (is `True`) and
-        another `CriticalSection` has an execution requesting the same
-        resources as in `*args`, `**kwargs`, and any additionally captured
-        inputs in `fn`.  Note, even if `exclusive_resource_access` is `True`,
-        if another execution in another `CriticalSection` was created without
-        `exclusive_resource_access=True`, a `ValueError` will be raised.
-    """
-    name = kwargs.pop("name", None)
-    exclusive_resource_access = kwargs.pop("exclusive_resource_access", True)
-
-    with ops.name_scope(name, "critical_section_execute", []):
-
-      # Ensure that mutex locking only happens *after* all args and
-      # kwargs have been executed.  This avoids certain types of deadlocks.
-      lock = gen_resource_variable_ops.mutex_lock(self._handle)
-
-      if not context.executing_eagerly():
-        # NOTE(ebrevdo): This is to ensure we don't pick up spurious
-        # Operations created by other threads.
-        with ops.get_default_graph()._lock:  # pylint: disable=protected-access
-          existing_ops = ops.get_default_graph().get_operations()
-          with ops.control_dependencies([lock]):
-            r = fn(*args, **kwargs)
-          # TODO(ebrevdo): If creating critical sections in a python loop, this
-          # makes graph creation time quadratic.  Revisit if this
-          # becomes a problem.
-          created_ops = (set(ops.get_default_graph().get_operations())
-                         .difference(existing_ops))
-      else:
-        with ops.control_dependencies([lock]):
-          r = fn(*args, **kwargs)
-
-      if not context.executing_eagerly():
-        self._add_control_dependencies_to_lock(created_ops, lock.op)
-
-        # captured_resources is a list of resources that are directly
-        # accessed only by ops created during fn(), not by any
-        # ancestors of those ops in the graph.
-        captured_resources = set([
-            input_ for op in created_ops
-            for input_ in op.inputs
-            if input_.dtype == dtypes.resource
-        ])
-
-        # NOTE(ebrevdo): The only time self._is_self_handle() is True
-        # in this call is if one of the recently created ops, within
-        # the execute(), themselves attempt to access the
-        # CriticalSection.  This will cause a deadlock.
-        if any(self._is_self_handle(x) for x in captured_resources):
-          raise ValueError("The function fn attempts to directly access the "
-                           "CriticalSection in which it would be running.  "
-                           "This is illegal and would cause deadlocks.")
-
-        self._check_multiple_access_to_resources(
-            captured_resources, exclusive_resource_access)
-
-      r_flat = [_identity(x) for x in nest.flatten(r)]
-
-      with ops.control_dependencies(r_flat):
-        # The identity must run on the same machine as self._handle
-        with ops.colocate_with(self._handle):
-          # Do not use array_ops.identity as there are special
-          # optimizations within TensorFlow which seem to elide it
-          # even when optimizations are disabled(!).
-          ensure_lock_exists = gen_resource_variable_ops.consume_mutex_lock(
-              lock)
-
-        # Make sure that if any element of r is accessed, all of
-        # them are executed together.
-        r = nest.pack_sequence_as(r, control_flow_ops.tuple(nest.flatten(r)))
-
-      with ops.control_dependencies([ensure_lock_exists]):
-        outputs = nest.map_structure(_identity, r)
-
-      if not context.executing_eagerly():
-        signature = _ExecutionSignature(
-            op=lock.op,
-            handle=self._handle,
-            resources=list(captured_resources),
-            exclusive_resource_access=exclusive_resource_access)
-        ops.add_to_collections(
-            CRITICAL_SECTION_EXECUTIONS, signature)
-
-      return outputs
-
-  def _add_control_dependencies_to_lock(self, created_ops, lock_op):
-    """To avoid deadlocks, all args must be executed before lock_op."""
-    # Get all arguments (explicit and captured) of all ops created by fn().
-    all_args = set([input_.op for op in created_ops for input_ in op.inputs])
-    all_args.update(
-        input_op for op in created_ops for input_op in op.control_inputs)
-    # Unfortunately, we can't use sets throughout because TF seems to
-    # create new Operation objects for the same op sometimes; and we
-    # can't rely on id(op).
-
-    # pylint: disable=protected-access
-    all_args_dict = dict((op._id, op) for op in all_args)
-
-    # Remove ops created within fn, or that lock_op already has a
-    # control dependency on.  Also remove a possible self-loop.
-    for op in created_ops:
-      all_args_dict.pop(op._id, None)
-    for op in lock_op.control_inputs:
-      all_args_dict.pop(op._id, None)
-    for input_ in lock_op.inputs:
-      all_args_dict.pop(input_.op._id, None)
-    all_args_dict.pop(lock_op._id, None)
-
-    all_args = all_args_dict.values()
-
-    if not all_args:
-      # No control dependencies to add; return early.
-      return
-
-    # This group is important: it ensures that any ops in all_args
-    # outside the control context of the lock_op (and this fn, which
-    # runs in the same context) are added to this context before
-    # being added to the control dependencies of lock_op.
-    all_args = control_flow_ops.group(*all_args)
-
-    lock_op._add_control_input(all_args)
-    # pylint: enable=protected-access
-
-  def _is_self_handle(self, x):
-    """Check if the tensor `x` is the same Mutex as `self._handle`."""
-    if isinstance(x, ops.EagerTensor):
-      return x is self._handle
-    return (x.op.type == "MutexV2"
-            # blank shared_name means the op will create a unique one.
-            and x.op.get_attr("shared_name")
-            and (x.op.get_attr("shared_name") ==
-                 self._handle.op.get_attr("shared_name"))
-            and (x.op.device == self._handle.op.device
-                 or _get_colocation(x.op) == _get_colocation(self._handle.op)))
-
-  def _check_multiple_access_to_resources(
-      self, captured_resources, exclusive_resource_access):
-    """Raise if captured_resources are accessed by another CriticalSection.
-
-    Args:
-      captured_resources: Set of tensors of type resource.
-      exclusive_resource_access: Whether this execution requires exclusive
-        resource access.
-
-    Raises:
-      ValueError: If any tensors in `captured_resources` are also accessed
-        by another `CriticalSection`, and at least one of them requires
-        exclusive resource access.
-    """
-    # Collections and op introspection does not work in eager
-    # mode.  This is generally ok; since eager mode (as of
-    # writing) executes sequentially anyway.
-    for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS):
-      if self._is_self_handle(sg.handle):
-        # Other executions in the same critical section are allowed.
-        continue
-      if not (exclusive_resource_access or sg.exclusive_resource_access):
-        # Neither execution requested exclusive access.
-        continue
-      resource_intersection = captured_resources.intersection(sg.resources)
-      if resource_intersection:
-        raise ValueError(
-            "This execution would access resources: %s.  Either this "
-            "lock (CriticalSection: %s) or lock '%s' "
-            "(CriticalSection: %s) requested exclusive resource access "
-            "of this resource.  Did you mean to call execute with keyword "
-            "argument exclusive_resource_access=False?" %
-            (list(resource_intersection), self._handle, sg, sg.handle))
-
-  # TODO(ebrevdo): Re-enable once CriticalSection is in core.
-
-  # def to_proto(self, export_scope=None):
-  #   """Converts a `CriticalSection` to a `CriticalSectoinDef` protocol buffer.
-
-  #   Args:
-  #     export_scope: Optional `string`. Name scope to remove.
-
-  #   Returns:
-  #     A `CriticalSectionDef` protocol buffer, or `None` if the
-  #     `CriticalSection` is not in the specified name scope.
-  #   """
-  #   if export_scope is None or self.handle.name.startswith(export_scope):
-  #     cs_def = critical_section_pb2.CriticalSectionDef()
-  #     cs_def.critical_section_name = ops.strip_name_scope(
-  #         self._handle.name, export_scope)
-  #     return cs_def
-  #   else:
-  #     return None
-
-  # @staticmethod
-  # def from_proto(critical_section_def, import_scope=None):
-  #   return CriticalSection(
-  #       critical_section_def=critical_section_def, import_scope=import_scope)
-
-
-# TODO(ebrevdo): Re-enable once CriticalSection is in core.
-
-# def _execution_to_proto_fn(execution_signature, export_scope=None):
-#   """Converts `_ExecutionSignature` to a `CriticalSectionExecutionDef`.
-#   # TODO(ebrevdo): Update for _ExecutionSignature storing resource list.
-
-#   Args:
-#     execution_signature: Instance of `_ExecutionSignature`.
-#     export_scope: The export scope, if any.
-
-#   Returns:
-#     An instance of `CriticalSectionExecutionDef`.
-#   """
-#   if (export_scope is None
-#       or execution_signature.op.name.startswith(export_scope)):
-#     op_def = critical_section_pb2.CriticalSectionExecutionDef()
-#     op_def.execute_in_critical_section_name = ops.strip_name_scope(
-#         execution_signature.op.name, export_scope)
-#     op_def.exclusive_resource_access = (
-#         execution_signature.exclusive_resource_access)
-#     return op_def
-#   else:
-#     return None
-
-
-# def _execution_from_proto_fn(op_def, import_scope=None):
-#   """Converts a `CriticalSectionExecutionDef` to a `_ExecutionSignature`."""
-#   # TODO(ebrevdo): Update for _ExecutionSignature storing resource list.
-#   assert isinstance(
-#       op_def, critical_section_pb2.CriticalSectionExecutionDef)
-
-#   # Create from op_def.
-#   g = ops.get_default_graph()
-#   execution_op = g.as_graph_element(
-#       ops.prepend_name_scope(
-#           op_def.execute_in_critical_section_name,
-#           import_scope=import_scope))
-#   return _ExecutionSignature(
-#       op=execution_op,
-#       exclusive_resource_access=op_def.exclusive_resource_access)
-
-# ops.register_proto_function(
-#     CRITICAL_SECTIONS,
-#     proto_type=critical_section_pb2.CriticalSectionDef,
-#     to_proto=CriticalSection.to_proto,
-#     from_proto=CriticalSection.from_proto)
-
-# ops.register_proto_function(
-#     CRITICAL_SECTION_EXECUTIONS,
-#     proto_type=critical_section_pb2.CriticalSectionExecutionDef,
-#     to_proto=_execution_to_proto_fn,
-#     from_proto=_execution_from_proto_fn)

diff --git a/tensorflow/contrib/framework/python/ops/critical_section_test.py b/tensorflow/contrib/framework/python/ops/critical_section_test.py
deleted file mode 100644
index d2bb4f4..0000000
--- a/tensorflow/contrib/framework/python/ops/critical_section_test.py
+++ /dev/null

@@ -1,392 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""critical section tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.framework.python.ops import critical_section_ops
-from tensorflow.python.data.experimental.ops import prefetching_ops
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.eager import context
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging as logging
-# TODO(ebrevdo): Re-enable once CriticalSection is in core.
-# from tensorflow.python.training import saver as saver_lib
-
-
-class CriticalSectionTest(test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes
-  def testCreateCriticalSection(self):
-    cs = critical_section_ops.CriticalSection(shared_name="cs")
-    v = resource_variable_ops.ResourceVariable(0.0, name="v")
-
-    def fn(a, b):
-      c = v.value()
-      with ops.control_dependencies([c]):
-        nv = v.assign_add(a * b)
-        with ops.control_dependencies([nv]):
-          return array_ops.identity(c)
-
-    num_concurrent = 100
-    r = [cs.execute(fn, 1.0, 2.0) for _ in range(num_concurrent)]
-    self.evaluate(v.initializer)
-    r_value = self.evaluate(r)
-    self.assertAllClose([2.0 * i for i in range(num_concurrent)],
-                        sorted(r_value))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testCriticalSectionWithControlFlow(self):
-    for outer_cond in [False, True]:
-      for inner_cond in [False, True]:
-        cs = critical_section_ops.CriticalSection(shared_name="cs")
-        v = resource_variable_ops.ResourceVariable(0.0, name="v")
-        num_concurrent = 100
-
-        # pylint: disable=cell-var-from-loop
-        def fn(a, b):
-          c = v.read_value()
-          def true_fn():
-            with ops.control_dependencies([c]):
-              nv = v.assign_add(a * b)
-              with ops.control_dependencies([nv]):
-                return array_ops.identity(c)
-          return control_flow_ops.cond(
-              array_ops.identity(inner_cond), true_fn, lambda: c)
-
-        def execute():
-          return cs.execute(fn, 1.0, 2.0)
-
-        r = [
-            control_flow_ops.cond(array_ops.identity(outer_cond),
-                                  execute,
-                                  v.read_value)
-            for _ in range(num_concurrent)
-        ]
-        # pylint: enable=cell-var-from-loop
-
-        self.evaluate(v.initializer)
-        r_value = self.evaluate(r)
-        if inner_cond and outer_cond:
-          self.assertAllClose([2.0 * i for i in range(num_concurrent)],
-                              sorted(r_value))
-        else:
-          self.assertAllClose([0] * num_concurrent, r_value)
-
-  def testCriticalSectionInParallelDoesntDeadlockOnError(self):
-    # No eager mode execution of this test because eager does not
-    # run fn() in parallel, which is where the deadlock could
-    # potentially occur (in graph mode).
-    cs = critical_section_ops.CriticalSection(shared_name="cs")
-    v = resource_variable_ops.ResourceVariable(0.0, name="v")
-
-    def fn(i):
-      error = control_flow_ops.Assert((i % 2) == 1, ["Error"])
-      with ops.control_dependencies([error]):
-        return v.read_value()
-    num_concurrent = 2
-    r = [cs.execute(fn, i) for i in range(num_concurrent)]
-    self.evaluate(v.initializer)
-    for _ in range(100):
-      with self.assertRaisesOpError("Error"):
-        self.evaluate(r)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testCreateCriticalSectionFnReturnsOp(self):
-    cs = critical_section_ops.CriticalSection(shared_name="cs")
-    v = resource_variable_ops.ResourceVariable(0.0, name="v")
-
-    def fn_return_op(a, b):
-      c = v.read_value()
-      with ops.control_dependencies([c]):
-        nv = v.assign_add(a * b)
-        with ops.control_dependencies([nv]):
-          return control_flow_ops.no_op()
-
-    num_concurrent = 100
-    r = [cs.execute(fn_return_op, 1.0, 2.0) for _ in range(num_concurrent)]
-    self.evaluate(v.initializer)
-    self.evaluate(r)
-    final_v = self.evaluate(v)
-    self.assertAllClose(2.0 * num_concurrent, final_v)
-
-  def testCollection(self):
-    cs = critical_section_ops.CriticalSection(shared_name="cs")
-    self.assertIn(
-        cs, ops.get_collection(critical_section_ops.CRITICAL_SECTIONS))
-    execute = cs.execute(lambda x: x + 1, 1.0, name="my_execute")
-    execute_op = [
-        x for x in execute.graph.get_operations()
-        if "my_execute" in x.name and "MutexLock" in x.type
-    ][0]
-    self.assertIn(
-        execute_op,
-        [signature.op for signature in
-         ops.get_collection(critical_section_ops.CRITICAL_SECTION_EXECUTIONS)])
-
-  def testRecursiveCriticalSectionAccessIsIllegal(self):
-    # This does not work properly in eager mode.  Eager users will
-    # just hit a deadlock if they do this.  But at least it'll be easier
-    # to debug.
-    cs = critical_section_ops.CriticalSection()
-    def fn(x):
-      return cs.execute(lambda y: y + 1, x)
-    with self.assertRaisesRegexp(
-        ValueError,
-        r"attempts to directly access the CriticalSection in which it "
-        r"would be running"):
-      cs.execute(fn, 1.0)
-
-  def testRecursiveCriticalSectionAccessViaCapturedTensorIsProtected(self):
-    # This one is subtle; and we're being overly cautious here.  The
-    # deadlock we are ensuring we catch is:
-    #
-    # to_capture = CS[lambda x: x + 1](1.0)
-    # deadlocked = CS[lambda x: x + to_capture](1.0)
-    #
-    # This would have caused a deadlock because executing `deadlocked` will
-    # lock the mutex on CS; but then due to dependencies, will attempt
-    # to compute `to_capture`.  This computation requires locking CS,
-    # but that is not possible now because CS is already locked by
-    # `deadlocked`.
-    #
-    # We check that CriticalSection.execute properly inserts new
-    # control dependencies to its lock to ensure all captured
-    # operations are finished before anything runs within the critical section.
-    cs = critical_section_ops.CriticalSection(shared_name="cs")
-    fn = array_ops.identity
-    to_capture = cs.execute(fn, 1.0)
-    fn_captures = lambda x: x + to_capture
-    to_capture_too = array_ops.identity(to_capture)
-
-    ex_0 = cs.execute(fn_captures, 1.0)
-
-    with ops.control_dependencies([to_capture]):
-      # This is OK because to_capture will execute before this next call
-      ex_1 = cs.execute(fn_captures, 1.0)
-
-    dependency = array_ops.identity(to_capture)
-
-    fn_captures_dependency = lambda x: x + dependency
-
-    ex_2 = cs.execute(fn_captures_dependency, 1.0)
-
-    with ops.control_dependencies([to_capture_too]):
-      ex_3 = cs.execute(fn_captures_dependency, 1.0)
-
-    # Ensure there's no actual deadlock on to_execute.
-    self.assertEquals(2.0, self.evaluate(ex_0))
-    self.assertEquals(2.0, self.evaluate(ex_1))
-    self.assertEquals(2.0, self.evaluate(ex_2))
-    self.assertEquals(2.0, self.evaluate(ex_3))
-
-  def testRecursiveCriticalSectionAccessWithinLoopIsProtected(self):
-    cs = critical_section_ops.CriticalSection(shared_name="cs")
-
-    def body_implicit_capture(i, j):
-      # This would have caused a deadlock if not for logic in execute
-      # that inserts additional control dependencies onto the lock op:
-      #   * Loop body argument j is captured by fn()
-      #   * i is running in parallel to move forward the execution
-      #   * j is not being checked by the predicate function
-      #   * output of cs.execute() is returned as next j.
-      fn = lambda: j + 1
-      return (i + 1, cs.execute(fn))
-
-    (i_n, j_n) = control_flow_ops.while_loop(
-        lambda i, _: i < 1000,
-        body_implicit_capture,
-        [0, 0],
-        parallel_iterations=25)
-    logging.warn(
-        "\n==============\nRunning "
-        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
-        "body_implicit_capture'\n"
-        "==============\n")
-    self.assertEquals((1000, 1000), self.evaluate((i_n, j_n)))
-    logging.warn(
-        "\n==============\nSuccessfully finished running "
-        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
-        "body_implicit_capture'\n"
-        "==============\n")
-
-    def body_implicit_capture_protected(i, j):
-      # This version is ok because we manually add a control
-      # dependency on j, which is an argument to the while_loop body
-      # and captured by fn.
-      fn = lambda: j + 1
-      with ops.control_dependencies([j]):
-        return (i + 1, cs.execute(fn))
-
-    (i_n, j_n) = control_flow_ops.while_loop(
-        lambda i, _: i < 1000,
-        body_implicit_capture_protected,
-        [0, 0],
-        parallel_iterations=25)
-    logging.warn(
-        "\n==============\nRunning "
-        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
-        "body_implicit_capture_protected'\n"
-        "==============\n")
-    self.assertEquals((1000, 1000), self.evaluate((i_n, j_n)))
-    logging.warn(
-        "\n==============\nSuccessfully finished running "
-        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
-        "body_implicit_capture_protected'\n"
-        "==============\n")
-
-    def body_args_capture(i, j):
-      # This version is ok because j is an argument to fn and we can
-      # ensure there's a control dependency on j.
-      fn = lambda x: x + 1
-      return (i + 1, cs.execute(fn, j))
-
-    (i_n, j_n) = control_flow_ops.while_loop(
-        lambda i, _: i < 1000,
-        body_args_capture,
-        [0, 0],
-        parallel_iterations=25)
-    logging.warn(
-        "\n==============\nRunning "
-        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
-        "body_args_capture'\n"
-        "==============\n")
-    self.assertEquals((1000, 1000), self.evaluate((i_n, j_n)))
-    logging.warn(
-        "\n==============\nSuccessfully finished running "
-        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
-        "body_args_capture'\n"
-        "==============\n")
-
-  def testRecursiveCriticalSectionAccessIsIllegalSameSharedName(self):
-    # This does not work properly in eager mode.  Eager users will
-    # just hit a deadlock if they do this.  But at least it'll be easier
-    # to debug.
-    cs = critical_section_ops.CriticalSection(shared_name="cs")
-    cs_same = critical_section_ops.CriticalSection(shared_name="cs")
-    def fn(x):
-      return cs_same.execute(lambda x: x+1, x)
-    with self.assertRaisesRegexp(
-        ValueError,
-        r"attempts to directly access the CriticalSection in which it "
-        r"would be running"):
-      cs.execute(fn, 1.0)
-
-  def testMultipleCSExecutionsRequestSameResource(self):
-    cs0 = critical_section_ops.CriticalSection()
-    cs1 = critical_section_ops.CriticalSection()
-    v = resource_variable_ops.ResourceVariable(0.0, name="v")
-    cs0.execute(lambda: v + 1)
-    # It's OK for the same CriticalSection to access this resource.
-    cs0.execute(lambda: v - 1)
-    # It's *not* OK for a different CriticalSection to access it by
-    # default.
-    with self.assertRaisesRegexp(
-        ValueError, "requested exclusive resource access"):
-      cs1.execute(lambda: v + 1)
-    # It's not even OK if the second call doesn't request exclusive access.
-    with self.assertRaisesRegexp(
-        ValueError, "requested exclusive resource access"):
-      cs1.execute(lambda: v + 1, exclusive_resource_access=False)
-
-    v2 = resource_variable_ops.ResourceVariable(0.0, name="v2")
-    cs0.execute(lambda: v2 + 1, exclusive_resource_access=False)
-    # It's OK if neither requests exclusive resource access.
-    cs1.execute(lambda: v2 + 1, exclusive_resource_access=False)
-
-    # It's not OK if the second request requires exlusive resource
-    # access.
-    with self.assertRaisesRegexp(
-        ValueError, "requested exclusive resource access"):
-      cs1.execute(lambda: v2 + 1)
-
-  def testControlDependencyFromOutsideWhileLoopMixedWithInsideLoop(self):
-    cs = critical_section_ops.CriticalSection()
-    v = resource_variable_ops.ResourceVariable(0, name="v")
-    # Make sure that the control dependencies on v do not cause issues
-    # in the lock_op's automatic control dependency adder.
-    #
-    # Note, here v must be a resource variable (or something similar),
-    # otherwise it gets hoisted into the while_loop by the time we add
-    # control dependencies to the lock_op.
-    out = control_flow_ops.while_loop(
-        lambda i: i < 10, lambda i: cs.execute(lambda j: v + j + 1, i), [0])
-    self.evaluate(v.initializer)
-    self.assertEqual(10, self.evaluate(out))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testInsideFunction(self):
-    if test_util.is_gpu_available():
-      self.skipTest(
-          "b/123899495: Colocation errors for critical sections in map on GPU")
-    cs = critical_section_ops.CriticalSection()
-    with ops.device("/gpu:0" if test_util.is_gpu_available() else "/cpu:0"):
-      v = resource_variable_ops.ResourceVariable(1)
-    def fn():
-      return v.read_value()
-
-    # map() creates a TensorFlow function.
-    ds = dataset_ops.Dataset.range(1)
-    if test_util.is_gpu_available():
-      ds = (ds.apply(prefetching_ops.copy_to_device("/gpu:0"))
-            .apply(prefetching_ops.map_on_gpu(lambda _: cs.execute(fn))))
-    else:
-      ds = ds.map(lambda _: cs.execute(fn))
-
-    def get_first():
-      if context.executing_eagerly():
-        return self.evaluate(ds.make_one_shot_iterator().get_next())
-      itr = ds.make_initializable_iterator()
-      self.evaluate([v.initializer, itr.initializer])
-      return self.evaluate(itr.get_next())
-
-    self.assertEqual(1, get_first())
-
-  # TODO(ebrevdo): Re-enable once CriticalSection is in core.
-  #
-  # def testCriticalSectionAndExecuteOpSaverRoundTrip(self):
-  #   cs = critical_section_ops.CriticalSection()
-  #   r = cs.execute(lambda x: x + 1, 1.0)
-  #   graph = ops.get_default_graph()
-  #   meta_graph = saver_lib.export_meta_graph(
-  #       graph=graph, collection_list=graph.get_all_collection_keys())
-  #   graph_copy = ops.Graph()
-  #   with graph_copy.as_default():
-  #     _ = saver_lib.import_meta_graph(meta_graph, import_scope="imported")
-  #     restored_cs = ops.get_collection(critical_section_ops.CRITICAL_SECTIONS)
-  #     restored_exec = ops.get_collection(
-  #         critical_section_ops.CRITICAL_SECTION_EXECUTIONS)
-  #     self.assertEqual(1, len(restored_cs))
-  #     self.assertEqual(1, len(restored_exec))
-  #     self.assertEqual(restored_cs[0].name, "imported/%s" % cs.name)
-  #     self.assertEqual(restored_exec[0].op.name, "imported/%s" % r.op.name)
-
-  # def testToProto(self):
-  #   cs = critical_section_ops.CriticalSection(shared_name="cs")
-  #   proto = cs.to_proto()
-  #   self.assertEqual(proto.critical_section_name, cs._handle.name)
-  #   cs_copy = critical_section_ops.CriticalSection.from_proto(proto)
-  #   self.assertEqual(cs_copy._handle, cs._handle)
-
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
index b6b75ff..f13a667 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc

@@ -565,6 +565,20 @@
         fused_conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(
             stream->parent()),
         &algorithms));
+    if (activation_mode == ActivationMode::NONE) {
+      // Only CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM is supported for
+      // identity activation, other algs seem to quietly do Relu.
+      // See
+      // https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnConvolutionBiasActivationForward
+      algorithms.erase(
+          std::remove_if(
+              algorithms.begin(), algorithms.end(),
+              [](dnn::AlgorithmDesc alg) {
+                return alg.algo_id() !=
+                       CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
+              }),
+          algorithms.end());
+    }
     dnn::ProfileResult best_result;
     dnn::ProfileResult best_result_no_scratch;
     for (auto profile_algorithm : algorithms) {

diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD
index db0868f..386e4cf 100644
--- a/tensorflow/contrib/gan/BUILD
+++ b/tensorflow/contrib/gan/BUILD

@@ -377,7 +377,10 @@
     name = "classifier_metrics_test",
     srcs = ["python/eval/python/classifier_metrics_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_windows"],  # TODO: needs investigation on Windows
+    tags = [
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":classifier_metrics",
         "//tensorflow/core:protos_all_py",

diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
index bd17571..bc7c105 100644
--- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
+++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py

@@ -365,7 +365,7 @@
     unused_image = array_ops.zeros([2, 299, 299, 3])
     incscore = _run_with_mock(classifier_metrics.inception_score, unused_image)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       incscore_np = sess.run(incscore, {'concat:0': logits})
 
     self.assertAllClose(_expected_inception_score(logits), incscore_np)
@@ -473,7 +473,7 @@
         classifier_fn=lambda x: x,
         max_block_size=600)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       actual_kid, actual_std = sess.run(kid_op)
 
     expected_kid, expected_std = _expected_kid_and_std(test_pool_real_a,
@@ -500,7 +500,7 @@
         max_block_size=max_block_size)
 
     for block_size in [50, 512, 1000]:
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         actual_kid, actual_std = sess.run(kid_op, {max_block_size: block_size})
 
       expected_kid, expected_std = _expected_kid_and_std(

diff --git a/tensorflow/contrib/gan/python/train.py b/tensorflow/contrib/gan/python/train.py
index f36a5d3..9bff809 100644
--- a/tensorflow/contrib/gan/python/train.py
+++ b/tensorflow/contrib/gan/python/train.py

@@ -757,7 +757,9 @@
 
   return namedtuples.CycleGANLoss(loss_x2y, loss_y2x)
 
-
+# Begin google-internal
+# The four major parts can be found here: http://screen/tMRMBAohDYG.
+# End google-internal
 def stargan_loss(
     model,
     generator_loss_fn=tfgan_losses.stargan_generator_loss_wrapper(
@@ -776,8 +778,6 @@
     add_summaries=True):
   """StarGAN Loss.
 
-  The four major part can be found here: http://screen/tMRMBAohDYG.
-
   Args:
     model: (StarGAN) Model output of the stargan_model() function call.
     generator_loss_fn: The loss function on the generator. Takes a

diff --git a/tensorflow/contrib/gdr/gdr_collective_executor_mgr.cc b/tensorflow/contrib/gdr/gdr_collective_executor_mgr.cc
index b84710d..755cbdf 100644
--- a/tensorflow/contrib/gdr/gdr_collective_executor_mgr.cc
+++ b/tensorflow/contrib/gdr/gdr_collective_executor_mgr.cc

@@ -100,8 +100,7 @@
 
     // Logic to be executed on the RecvBufAsync callback.
     auto recv_buf_callback = [this, state, peer_task, to_device, to_alloc_attr,
-                              to_device_ctx, to_tensor, dev_to_dev_stream_index,
-                              done](const Status& s) {
+                              to_device_ctx, to_tensor, done](const Status& s) {
       if (s.ok()) {
         remote_memory_manager_->TensorFromTransportOptions(
             to_tensor, state->call->resp_.transport_options(), to_device,

diff --git a/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc b/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
index 5f8c300..1124dff 100644
--- a/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
+++ b/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc

@@ -167,8 +167,11 @@
 
     // RendezvousMgr already aborted, shouldn't send RPC call any more
     if (!call->status().ok()) {
-      done(call->status(), Args(), Args(), Tensor(), false);
+      // NOTE: `*session()` can potentially be deleted before we return from
+      // `call->done()(...)`, so we must release the worker before calling the
+      // callback.
       session()->worker_cache->ReleaseWorker(src_worker, rwi);
+      done(call->status(), Args(), Args(), Tensor(), false);
       delete call;
       return;
     }
@@ -181,8 +184,11 @@
       // If StartAbort was called prior to DeregisterCall, then the
       // current status should be bad.
       Status s = call->status();
-      done(s, Args(), call->recv_args(), call->tensor(), call->is_dead());
+      // NOTE: `*session()` can potentially be deleted before we return from
+      // `call->done()(...)`, so we must release the worker before calling the
+      // callback.
       session()->worker_cache->ReleaseWorker(src_worker, rwi);
+      done(s, Args(), call->recv_args(), call->tensor(), call->is_dead());
       delete call;
       Unref();
     });

diff --git a/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD b/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD
index d319aa7..92016e6 100644
--- a/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD
+++ b/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD

@@ -19,16 +19,25 @@
         "//tensorflow/core:array_ops_op_lib",
         "//tensorflow/core:candidate_sampling_ops_op_lib",
         "//tensorflow/core:control_flow_ops_op_lib",
+        "//tensorflow/core:data_flow_ops_op_lib",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:functional_ops_op_lib",
+        "//tensorflow/core:io_ops_op_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:list_ops_op_lib",
+        "//tensorflow/core:logging_ops_op_lib",
+        "//tensorflow/core:lookup_ops_op_lib",
         "//tensorflow/core:manip_ops_op_lib",
         "//tensorflow/core:math_ops_op_lib",
         "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:no_op_op_lib",
+        "//tensorflow/core:parsing_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:random_ops_op_lib",
         "//tensorflow/core:remote_fused_graph_ops_op_lib",
+        "//tensorflow/core:sendrecv_ops_op_lib",
+        "//tensorflow/core:sparse_ops_op_lib",
+        "//tensorflow/core:state_ops_op_lib",
         "//tensorflow/core:string_ops_op_lib",
         "//tensorflow/core:training_ops_op_lib",
         "//tensorflow/core:user_ops_op_lib",

diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 403b522..9d9524e 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py

@@ -2308,7 +2308,7 @@
           initializer=init_ops.ones_initializer(),
           collections=gamma_collections,
           trainable=trainable)
-    # Calculate the moments on the last axis (layer activations).
+    # By default, compute the moments across all the dimensions except the one with index 0.
     norm_axes = list(range(begin_norm_axis, inputs_rank))
     mean, variance = nn.moments(inputs, norm_axes, keep_dims=True)
     # Compute layer normalization using the batch_normalization function.

diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
index a283949..8fda828 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py

@@ -36,7 +36,7 @@
 from tensorflow.python.util import deprecation
 
 
-# TODO(rohanj): This should subclass Checkpointable and implement
+# TODO(rohanj): This should subclass Trackable and implement
 # _gather_saveables_for_checkpoint.
 class ShardedMutableDenseHashTable(object):
   """A sharded version of MutableDenseHashTable.

diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py
index 591eabc..9fe8daf 100644
--- a/tensorflow/contrib/lookup/lookup_ops_test.py
+++ b/tensorflow/contrib/lookup/lookup_ops_test.py

@@ -1483,3 +1483,4 @@
 
 if __name__ == "__main__":
   test.main()
+

diff --git a/tensorflow/contrib/losses/BUILD b/tensorflow/contrib/losses/BUILD
index 728f75f..f4ebbde 100644
--- a/tensorflow/contrib/losses/BUILD
+++ b/tensorflow/contrib/losses/BUILD

@@ -82,10 +82,11 @@
 
 py_test(
     name = "metric_loss_ops_test",
-    size = "large",
+    size = "medium",
     srcs = [
         "python/metric_learning/metric_loss_ops_test.py",
     ],
+    shard_count = 4,
     srcs_version = "PY2AND3",
     deps = [
         ":metric_learning_py",

diff --git a/tensorflow/contrib/makefile/proto_text_cc_files.txt b/tensorflow/contrib/makefile/proto_text_cc_files.txt
index 9ea94c7..0a0ba36 100644
--- a/tensorflow/contrib/makefile/proto_text_cc_files.txt
+++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt

@@ -40,7 +40,6 @@
 tensorflow/core/platform/cpu_info.cc
 tensorflow/core/platform/default/logging.cc
 tensorflow/core/platform/default/mutex.cc
-tensorflow/core/platform/default/protobuf.cc
 tensorflow/core/platform/default/tracing.cc
 tensorflow/core/platform/denormal.cc
 tensorflow/core/platform/env.cc
@@ -53,6 +52,7 @@
 tensorflow/core/platform/posix/load_library.cc
 tensorflow/core/platform/posix/port.cc
 tensorflow/core/platform/posix/posix_file_system.cc
+tensorflow/core/platform/protobuf.cc
 tensorflow/core/platform/protobuf_util.cc
 tensorflow/core/platform/setround.cc
 tensorflow/core/platform/tensor_coding.cc

diff --git a/tensorflow/contrib/memory_stats/BUILD b/tensorflow/contrib/memory_stats/BUILD
index 63843b9..9370124 100644
--- a/tensorflow/contrib/memory_stats/BUILD
+++ b/tensorflow/contrib/memory_stats/BUILD

@@ -10,6 +10,7 @@
 load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
 load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs")
 load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_cc")
 load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
@@ -45,6 +46,28 @@
     deps = [":memory_stats_ops_op_lib"],
 )
 
+tf_gen_op_wrapper_cc(
+    name = "memory_stats_ops",
+    out_ops_file = "memory_stats_ops",
+)
+
+cc_library(
+    name = "memory_stats_cc",
+    srcs = ["memory_stats_ops.cc"],
+    hdrs = ["memory_stats_ops.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":memory_stats_kernels",
+        ":memory_stats_ops_op_lib",
+        "//tensorflow/cc:const_op",
+        "//tensorflow/cc:ops",
+        "//tensorflow/cc:scope",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+    alwayslink = 1,
+)
+
 tf_custom_op_py_library(
     name = "memory_stats_py",
     srcs = [

diff --git a/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc b/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc
index 974fb53..7ae1dbe 100644
--- a/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc
+++ b/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc

@@ -24,13 +24,15 @@
   void Compute(OpKernelContext* context) override {
     Allocator* allocator =
         context->device()->GetAllocator(AllocatorAttributes());
-    AllocatorStats allocator_stats;
-    allocator->GetStats(&allocator_stats);
+    absl::optional<AllocatorStats> allocator_stats = allocator->GetStats();
+    if (!allocator_stats) {
+      *allocator_stats = AllocatorStats();
+    }
 
     Tensor* output_tensor = nullptr;
     OP_REQUIRES_OK(
         context, context->allocate_output(0, TensorShape({}), &output_tensor));
-    output_tensor->scalar<int64>()() = ExtractAllocatorStats(allocator_stats);
+    output_tensor->scalar<int64>()() = ExtractAllocatorStats(*allocator_stats);
   }
 
  protected:
@@ -71,7 +73,7 @@
  private:
   int64 ExtractAllocatorStats(
       const AllocatorStats& allocator_stats) const override {
-    return allocator_stats.bytes_limit;
+    return allocator_stats.bytes_limit ? *allocator_stats.bytes_limit : -1;
   }
 };
 
@@ -93,7 +95,7 @@
  private:
   int64 ExtractAllocatorStats(
       const AllocatorStats& allocator_stats) const override {
-    return allocator_stats.max_bytes_in_use;
+    return allocator_stats.peak_bytes_in_use;
   }
 };
 

diff --git a/tensorflow/contrib/mpi_collectives/ring.h b/tensorflow/contrib/mpi_collectives/ring.h
index cae57ce..9b5d52e 100644
--- a/tensorflow/contrib/mpi_collectives/ring.h
+++ b/tensorflow/contrib/mpi_collectives/ring.h

@@ -129,7 +129,7 @@
  *  has the fully accumulated Segment 1; and so on. The scatter-reduce is
  * complete.
  *
- *  Next, the allgather distributes these fully accumululated chunks across all
+ *  Next, the allgather distributes these fully accumulated chunks across all
  * nodes. Communication proceeds in the same ring, once again in N-1 steps. At
  * the ith step, node j will send chunk (j - i + 1) and receive chunk (j - i).
  * For example, at the first iteration, the following transfers will occur:

diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index 12320d9..f30643c 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD

@@ -413,8 +413,9 @@
 
 py_test(
     name = "shampoo_test",
-    size = "large",
+    size = "medium",
     srcs = ["python/training/shampoo_test.py"],
+    shard_count = 4,
     srcs_version = "PY2AND3",
     deps = [
         ":opt_py",

diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
index b5de726..b2ea3da 100644
--- a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
+++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py

@@ -44,15 +44,15 @@
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import saver as core_saver
 from tensorflow.python.training import training_util
-from tensorflow.python.training.checkpointable import graph_view
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util
+from tensorflow.python.training.tracking import graph_view
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util
 
 
-class NonLayerCheckpointable(tracking.AutoCheckpointable):
+class NonLayerTrackable(tracking.AutoTrackable):
 
   def __init__(self):
-    super(NonLayerCheckpointable, self).__init__()
+    super(NonLayerTrackable, self).__init__()
     self.a_variable = util.add_variable(
         self, name="a_variable", shape=[])
 
@@ -65,8 +65,8 @@
     super(MyModel, self).__init__()
     self._named_dense = core.Dense(1, use_bias=True)
     self._second = core.Dense(1, use_bias=False)
-    # We can still track Checkpointables which aren't Layers.
-    self._non_layer = NonLayerCheckpointable()
+    # We can still track Trackables which aren't Layers.
+    self._non_layer = NonLayerTrackable()
 
   def call(self, values):
     ret = self._second(self._named_dense(values))
@@ -101,7 +101,7 @@
     other_model = MyModel()
     optimizer = adam.AdamOptimizer(0.001)
     optimizer_step = training_util.get_or_create_global_step()
-    root_checkpointable = util.Checkpoint(
+    root_trackable = util.Checkpoint(
         optimizer=optimizer, model=model, optimizer_step=optimizer_step)
     if context.executing_eagerly():
       optimizer.minimize(
@@ -117,10 +117,10 @@
           other_model(input_value),
           global_step=optimizer_step)
       self.evaluate(util.gather_initializers(
-          root_checkpointable))
+          root_trackable))
       self.evaluate(train_op)
     named_variables, serialized_graph, _ = graph_view.ObjectGraphView(
-        root_checkpointable).serialize_object_graph()
+        root_trackable).serialize_object_graph()
     expected_checkpoint_names = (
         # Created in the root node, so no prefix.
         "optimizer_step",
@@ -208,7 +208,7 @@
   def testSaveRestore(self):
     model = MyModel()
     optimizer = adam.AdamOptimizer(0.001)
-    root_checkpointable = util.Checkpoint(
+    root_trackable = util.Checkpoint(
         optimizer=optimizer, model=model)
     input_value = constant_op.constant([[3.]])
     if context.executing_eagerly():
@@ -217,24 +217,24 @@
     else:
       train_op = optimizer.minimize(model(input_value))
       # TODO(allenl): Make initialization more pleasant when graph building.
-      root_checkpointable.save_counter  # pylint: disable=pointless-statement
+      root_trackable.save_counter  # pylint: disable=pointless-statement
       self.evaluate(util.gather_initializers(
-          root_checkpointable))
+          root_trackable))
       self.evaluate(train_op)
     prefix = os.path.join(self.get_temp_dir(), "ckpt")
     self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
     m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
     self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
-    save_path = root_checkpointable.save(file_prefix=prefix)
+    save_path = root_trackable.save(file_prefix=prefix)
     self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
-    self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3))
+    self.evaluate(state_ops.assign(root_trackable.save_counter, 3))
     optimizer_variables = self.evaluate(optimizer.variables())
     self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
     # Immediate restoration
-    status = root_checkpointable.restore(save_path=save_path).assert_consumed()
+    status = root_trackable.restore(save_path=save_path).assert_consumed()
     status.run_restore_ops()
     self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1]))
-    self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter))
+    self.assertAllEqual(1, self.evaluate(root_trackable.save_counter))
     self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
     if not context.executing_eagerly():
       return  # Restore-on-create is only supported when executing eagerly
@@ -542,11 +542,11 @@
       first_session = session_lib.Session(graph=first_graph)
       with first_graph.as_default(), first_session.as_default():
         first_variable = resource_variable_ops.ResourceVariable([1.])
-        first_root_checkpointable = util.Checkpoint(
+        first_root_trackable = util.Checkpoint(
             optimizer=optimizer, variable=first_variable)
         train_op = optimizer.minimize(first_variable.read_value)
         self.evaluate(util.gather_initializers(
-            first_root_checkpointable))
+            first_root_trackable))
         self.evaluate(train_op)
         self.evaluate(first_variable.assign([1.]))
         self.evaluate(optimizer.get_slot(
@@ -558,23 +558,23 @@
       second_graph = ops.Graph()
       with second_graph.as_default(), session_lib.Session(graph=second_graph):
         second_variable = resource_variable_ops.ResourceVariable([1.])
-        second_root_checkpointable = util.Checkpoint(
+        second_root_trackable = util.Checkpoint(
             optimizer=optimizer, variable=second_variable)
         train_op = optimizer.minimize(second_variable.read_value)
-        second_root_checkpointable.restore(None).initialize_or_restore()
+        second_root_trackable.restore(None).initialize_or_restore()
         self.evaluate(train_op)
         self.evaluate(second_variable.assign([4.]))
         self.evaluate(optimizer.get_slot(
             var=second_variable, name="m").assign([5.]))
         beta_1_power, _ = optimizer._get_beta_accumulators()
         self.evaluate(beta_1_power.assign(6.))
-        save_path = second_root_checkpointable.save(checkpoint_prefix)
+        save_path = second_root_trackable.save(checkpoint_prefix)
         self.evaluate(second_variable.assign([7.]))
         self.evaluate(optimizer.get_slot(
             var=second_variable, name="m").assign([8.]))
         beta_1_power, _ = optimizer._get_beta_accumulators()
         self.assertAllEqual(6., self.evaluate(beta_1_power))
-        status = second_root_checkpointable.restore(save_path)
+        status = second_root_trackable.restore(save_path)
         status.assert_consumed().run_restore_ops()
         self.assertAllEqual([4.], self.evaluate(second_variable))
         self.assertAllEqual([5.], self.evaluate(optimizer.get_slot(
@@ -594,7 +594,7 @@
 class TemplateTests(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
-  def test_checkpointable_save_restore(self):
+  def test_trackable_save_restore(self):
 
     def _templated():
       v = variable_scope.get_variable(
@@ -641,13 +641,13 @@
     model = MyModel()
     optimizer = adam.AdamOptimizer(0.001)
     optimizer_step = training_util.get_or_create_global_step()
-    root_checkpointable = util.Checkpoint(
+    root_trackable = util.Checkpoint(
         optimizer=optimizer, model=model, optimizer_step=optimizer_step)
     train_op = optimizer.minimize(
         functools.partial(model, input_value),
         global_step=optimizer_step)
     self.evaluate(util.gather_initializers(
-        root_checkpointable))
+        root_trackable))
     self.evaluate(train_op)
     # A regular variable, a slot variable, and a non-slot Optimizer variable
     # with known values to check when loading.
@@ -656,24 +656,24 @@
         var=model._named_dense.bias, name="m").assign([2.]))
     beta_1_power, _ = optimizer._get_beta_accumulators()
     self.evaluate(beta_1_power.assign(3.))
-    return root_checkpointable
+    return root_trackable
 
-  def _set_sentinels(self, root_checkpointable):
-    self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.]))
+  def _set_sentinels(self, root_trackable):
+    self.evaluate(root_trackable.model._named_dense.bias.assign([101.]))
     self.evaluate(
-        root_checkpointable.optimizer.get_slot(
-            var=root_checkpointable.model._named_dense.bias, name="m")
+        root_trackable.optimizer.get_slot(
+            var=root_trackable.model._named_dense.bias, name="m")
         .assign([102.]))
-    beta_1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    beta_1_power, _ = root_trackable.optimizer._get_beta_accumulators()
     self.evaluate(beta_1_power.assign(103.))
 
-  def _check_sentinels(self, root_checkpointable):
+  def _check_sentinels(self, root_trackable):
     self.assertAllEqual(
-        [1.], self.evaluate(root_checkpointable.model._named_dense.bias))
+        [1.], self.evaluate(root_trackable.model._named_dense.bias))
     self.assertAllEqual([2.], self.evaluate(
-        root_checkpointable.optimizer.get_slot(
-            var=root_checkpointable.model._named_dense.bias, name="m")))
-    beta_1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+        root_trackable.optimizer.get_slot(
+            var=root_trackable.model._named_dense.bias, name="m")))
+    beta_1_power, _ = root_trackable.optimizer._get_beta_accumulators()
     self.assertAllEqual(3., self.evaluate(beta_1_power))
 
   def _write_name_based_checkpoint(self):
@@ -698,7 +698,7 @@
       self._set_sentinels(root)
       with self.assertRaises(AssertionError):
         self._check_sentinels(root)
-      object_saver = util.CheckpointableSaver(graph_view.ObjectGraphView(root))
+      object_saver = util.TrackableSaver(graph_view.ObjectGraphView(root))
       self._set_sentinels(root)
       status = object_saver.restore(save_path)
       if context.executing_eagerly():

diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
index a49149e..a7f9786 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py

@@ -38,7 +38,7 @@
 from tensorflow.python.ops import variables
 from tensorflow.python.training import optimizer as optimizer_v1
 from tensorflow.python.training import slot_creator
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 
 
@@ -223,7 +223,7 @@
       }
     self._slots = {}
     self._non_slot_dict = {}
-    # Extra state to help Optimizers implement Checkpointable. Holds information
+    # Extra state to help Optimizers implement Trackable. Holds information
     # about variables which will be restored as soon as they're created.
     self._deferred_dependencies = {}  # Non-slot variables
     self._deferred_slot_restorations = {}  # Slot variables
@@ -366,8 +366,8 @@
     slot variable needs to be restored).
 
     Args:
-      slot_variable_position: A `checkpointable._CheckpointPosition` object
-        indicating the slot variable `Checkpointable` object to be restored.
+      slot_variable_position: A `trackable._CheckpointPosition` object
+        indicating the slot variable `Trackable` object to be restored.
       slot_name: The name of this `Optimizer`'s slot to restore into.
       variable: The variable object this slot is being created for.
       optional_op_name: Name to use when scoping the Variable that needs to be
@@ -385,7 +385,7 @@
         # (aside from double initialization), and makes variable creator scopes
         # behave the same way they do when graph building.
         and not ops.get_default_graph()._variable_creator_stack):  # pylint: disable=protected-access
-      initializer = checkpointable.CheckpointInitialValue(
+      initializer = trackable.CheckpointInitialValue(
           checkpoint_position=slot_variable_position)
       slot_variable = self.create_slot(
           var=variable,
@@ -1259,10 +1259,10 @@
     return self._per_graph_state.get(var._graph_key, None)
 
   # --------------
-  # Overridden methods from Checkpointable.
+  # Overridden methods from Trackable.
   # --------------
 
-  def _track_checkpointable(self, *args, **kwargs):
+  def _track_trackable(self, *args, **kwargs):
     """Optimizers may not track dependencies. Raises an error."""
     raise NotImplementedError(
         "Optimizers may not have dependencies. File a feature request if this "
@@ -1270,7 +1270,7 @@
 
   @property
   def _checkpoint_dependencies(self):
-    """From Checkpointable. Gather graph-specific non-slot variables to save."""
+    """From Trackable. Gather graph-specific non-slot variables to save."""
     current_graph_non_slot_variables = []
     state = self._get_per_graph_state()
     if state is not None:
@@ -1279,14 +1279,14 @@
           # Avoid comparing variables
           key=lambda item: item[0]):
         current_graph_non_slot_variables.append(
-            checkpointable.CheckpointableReference(
+            trackable.TrackableReference(
                 name=name, ref=variable_object))
     # Note: ignores super(); Optimizers may not have any dependencies outside of
     # state objects.
     return current_graph_non_slot_variables
 
   def _lookup_dependency(self, name):
-    """From Checkpointable. Find a non-slot variable in the current graph."""
+    """From Trackable. Find a non-slot variable in the current graph."""
     state = self._get_per_graph_state()
     if state is None:
       return None
@@ -1295,10 +1295,10 @@
 
   @property
   def _deferred_dependencies(self):
-    """Lets Checkpointable know where non-slot variables are created.
+    """Lets Trackable know where non-slot variables are created.
 
     If necessary, creates a new state object for the current default graph.
-    Checkpointable will then add entries to that state's deferred dependency
+    Trackable will then add entries to that state's deferred dependency
     dictionary. The state object will check that dictionary when creating
     non-slot variables, restoring their value if an entry is found.
 
@@ -1311,14 +1311,14 @@
 
   def _create_or_restore_slot_variable(self, slot_variable_position, slot_name,
                                        variable):
-    """Checkpointable: Restore a slot variable's value, possibly creating it.
+    """Trackable: Restore a slot variable's value, possibly creating it.
 
     Called when a variable which has an associated slot variable is created or
     restored.
 
     Args:
-      slot_variable_position: A `checkpointable._CheckpointPosition` object
-        indicating the slot variable `Checkpointable` object to be restored.
+      slot_variable_position: A `trackable._CheckpointPosition` object
+        indicating the slot variable `Trackable` object to be restored.
       slot_name: The name of this `Optimizer`'s slot to restore into.
       variable: The variable object this slot is being created for.
     """

diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD
index d65d80d..24fa740 100644
--- a/tensorflow/contrib/rnn/BUILD
+++ b/tensorflow/contrib/rnn/BUILD

@@ -103,26 +103,6 @@
 )
 
 cuda_py_tests(
-    name = "core_rnn_cell_test",
-    size = "medium",
-    srcs = ["python/kernel_tests/core_rnn_cell_test.py"],
-    additional_deps = [
-        ":rnn_py",
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:rnn",
-        "//tensorflow/python:rnn_cell",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-cuda_py_tests(
     name = "rnn_test",
     size = "medium",
     srcs = ["python/kernel_tests/rnn_test.py"],
@@ -144,32 +124,6 @@
     ],
 )
 
-cuda_py_tests(
-    name = "core_rnn_test",
-    size = "medium",
-    srcs = ["python/kernel_tests/core_rnn_test.py"],
-    additional_deps = [
-        ":rnn_py",
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:gradients",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:rnn",
-        "//tensorflow/python:tensor_array_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/eager:context",
-    ],
-    shard_count = 10,
-)
-
 tf_py_test(
     name = "fused_rnn_cell_test",
     size = "medium",

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
deleted file mode 100644
index a70e806..0000000
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ /dev/null

@@ -1,1241 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for RNN cells."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.contrib import rnn as contrib_rnn
-from tensorflow.contrib.rnn.python.ops import core_rnn_cell
-from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
-from tensorflow.python.framework import test_util
-from tensorflow.python.keras import layers as keras_layers
-from tensorflow.python.layers import base as base_layer
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import rnn
-from tensorflow.python.ops import rnn_cell_impl
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables as variables_lib
-from tensorflow.python.platform import test
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
-
-# pylint: enable=protected-access
-Linear = core_rnn_cell._Linear  # pylint: disable=invalid-name
-
-
-class RNNCellTest(test.TestCase, parameterized.TestCase):
-
-  def testLinear(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(1.0)):
-        x = array_ops.zeros([1, 2])
-        l = Linear([x], 2, False)([x])
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([l], {x.name: np.array([[1., 2.]])})
-        self.assertAllClose(res[0], [[3.0, 3.0]])
-
-        # Checks prevent you from accidentally creating a shared function.
-        with self.assertRaises(ValueError):
-          l1 = Linear([x], 2, False)([x])
-
-        # But you can create a new one in a new scope and share the variables.
-        with variable_scope.variable_scope("l1") as new_scope:
-          l1 = Linear([x], 2, False)([x])
-        with variable_scope.variable_scope(new_scope, reuse=True):
-          Linear([l1], 2, False)([l1])
-        self.assertEqual(len(variables_lib.trainable_variables()), 2)
-
-  def testBasicRNNCell(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m = array_ops.zeros([1, 2])
-        cell = rnn_cell_impl.BasicRNNCell(2)
-        g, _ = cell(x, m)
-        self.assertEqual([
-            "root/basic_rnn_cell/%s:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-            "root/basic_rnn_cell/%s:0" % rnn_cell_impl._BIAS_VARIABLE_NAME
-        ], [v.name for v in cell.trainable_variables])
-        self.assertFalse(cell.non_trainable_variables)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g], {
-            x.name: np.array([[1., 1.]]),
-            m.name: np.array([[0.1, 0.1]])
-        })
-        self.assertEqual(res[0].shape, (1, 2))
-
-  def testBasicRNNCellNotTrainable(self):
-    with self.cached_session() as sess:
-
-      def not_trainable_getter(getter, *args, **kwargs):
-        kwargs["trainable"] = False
-        return getter(*args, **kwargs)
-
-      with variable_scope.variable_scope(
-          "root",
-          initializer=init_ops.constant_initializer(0.5),
-          custom_getter=not_trainable_getter):
-        x = array_ops.zeros([1, 2])
-        m = array_ops.zeros([1, 2])
-        cell = rnn_cell_impl.BasicRNNCell(2)
-        g, _ = cell(x, m)
-        self.assertFalse(cell.trainable_variables)
-        self.assertEqual([
-            "root/basic_rnn_cell/%s:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-            "root/basic_rnn_cell/%s:0" % rnn_cell_impl._BIAS_VARIABLE_NAME
-        ], [v.name for v in cell.non_trainable_variables])
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g], {
-            x.name: np.array([[1., 1.]]),
-            m.name: np.array([[0.1, 0.1]])
-        })
-        self.assertEqual(res[0].shape, (1, 2))
-
-  def testIndRNNCell(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m = array_ops.zeros([1, 2])
-        cell = contrib_rnn_cell.IndRNNCell(2)
-        g, _ = cell(x, m)
-        self.assertEqual([
-            "root/ind_rnn_cell/%s_w:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-            "root/ind_rnn_cell/%s_u:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-            "root/ind_rnn_cell/%s:0" % rnn_cell_impl._BIAS_VARIABLE_NAME
-        ], [v.name for v in cell.trainable_variables])
-        self.assertFalse(cell.non_trainable_variables)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g], {
-            x.name: np.array([[1., 1.]]),
-            m.name: np.array([[0.1, 0.1]])
-        })
-        self.assertEqual(res[0].shape, (1, 2))
-
-  def testGRUCell(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m = array_ops.zeros([1, 2])
-        g, _ = rnn_cell_impl.GRUCell(2)(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g], {
-            x.name: np.array([[1., 1.]]),
-            m.name: np.array([[0.1, 0.1]])
-        })
-        # Smoke test
-        self.assertAllClose(res[0], [[0.175991, 0.175991]])
-      with variable_scope.variable_scope(
-          "other", initializer=init_ops.constant_initializer(0.5)):
-        # Test GRUCell with input_size != num_units.
-        x = array_ops.zeros([1, 3])
-        m = array_ops.zeros([1, 2])
-        g, _ = rnn_cell_impl.GRUCell(2)(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g], {
-            x.name: np.array([[1., 1., 1.]]),
-            m.name: np.array([[0.1, 0.1]])
-        })
-        # Smoke test
-        self.assertAllClose(res[0], [[0.156736, 0.156736]])
-
-  def testIndyGRUCell(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m = array_ops.zeros([1, 2])
-        g, _ = contrib_rnn_cell.IndyGRUCell(2)(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g], {
-            x.name: np.array([[1., 1.]]),
-            m.name: np.array([[0.1, 0.1]])
-        })
-        # Smoke test
-        self.assertAllClose(res[0], [[0.185265, 0.17704]])
-      with variable_scope.variable_scope(
-          "other", initializer=init_ops.constant_initializer(0.5)):
-        # Test IndyGRUCell with input_size != num_units.
-        x = array_ops.zeros([1, 3])
-        m = array_ops.zeros([1, 2])
-        g, _ = contrib_rnn_cell.IndyGRUCell(2)(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g], {
-            x.name: np.array([[1., 1., 1.]]),
-            m.name: np.array([[0.1, 0.1]])
-        })
-        # Smoke test
-        self.assertAllClose(res[0], [[0.155127, 0.157328]])
-
-  def testSRUCell(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m = array_ops.zeros([1, 2])
-        g, _ = contrib_rnn_cell.SRUCell(2)(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g], {
-            x.name: np.array([[1., 1.]]),
-            m.name: np.array([[0.1, 0.1]])
-        })
-        # Smoke test
-        self.assertAllClose(res[0], [[0.509682, 0.509682]])
-
-  def testSRUCellKerasRNN(self):
-    """Tests that SRUCell works with keras RNN layer."""
-    cell = contrib_rnn_cell.SRUCell(10)
-    seq_input = ops.convert_to_tensor(
-        np.random.rand(2, 3, 5), name="seq_input", dtype=dtypes.float32)
-    rnn_layer = keras_layers.RNN(cell=cell)
-    rnn_outputs_keras = rnn_layer(seq_input)
-    with self.cached_session() as sess:
-      sess.run([variables_lib.global_variables_initializer()])
-      self.assertEqual(sess.run(rnn_outputs_keras).shape, (2, 10))
-
-  def testSRUCellBiasType(self):
-    """Tests that the bias' dtype is properly set."""
-    cell = contrib_rnn_cell.SRUCell(10)
-    cell.build((2, 3, 5))
-    self.assertEqual(cell._bias.dtype, dtypes.float32_ref)
-
-    cell = contrib_rnn_cell.SRUCell(10, dtype=dtypes.int32)
-    cell.build((2, 3, 5))
-    self.assertEqual(cell._bias.dtype, dtypes.int32_ref)
-
-    cell_input = ops.convert_to_tensor(
-        np.random.rand(2, 5), name="cell_input", dtype=dtypes.float16)
-    cell_state = ops.convert_to_tensor(
-        np.random.rand(2, 10), name="cell_state", dtype=dtypes.float16)
-    cell = contrib_rnn_cell.SRUCell(10)
-    cell(cell_input, [cell_state])
-    self.assertEqual(cell._bias.dtype, dtypes.float16_ref)
-
-  def testSRUCellWithDiffSize(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 3])
-        m = array_ops.zeros([1, 2])
-        g, _ = contrib_rnn_cell.SRUCell(2)(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g], {
-            x.name: np.array([[1., 1., 1.]]),
-            m.name: np.array([[0.1, 0.1]])
-        })
-        # Smoke test
-        self.assertAllClose(res[0], [[0.55255556, 0.55255556]])
-
-  def testBasicLSTMCell(self):
-    for dtype in [dtypes.float16, dtypes.float32]:
-      np_dtype = dtype.as_numpy_dtype
-      with self.session(graph=ops.Graph()) as sess:
-        with variable_scope.variable_scope(
-            "root", initializer=init_ops.constant_initializer(0.5)):
-          x = array_ops.zeros([1, 2], dtype=dtype)
-          m = array_ops.zeros([1, 8], dtype=dtype)
-          cell = rnn_cell_impl.MultiRNNCell(
-              [
-                  rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
-                  for _ in range(2)
-              ],
-              state_is_tuple=False)
-          self.assertEqual(cell.dtype, None)
-          self.assertEqual("cell-0", cell._checkpoint_dependencies[0].name)
-          self.assertEqual("cell-1", cell._checkpoint_dependencies[1].name)
-          cell.get_config()  # Should not throw an error
-          g, out_m = cell(x, m)
-          # Layer infers the input type.
-          self.assertEqual(cell.dtype, dtype.name)
-          expected_variable_names = [
-              "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
-              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-              "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
-              rnn_cell_impl._BIAS_VARIABLE_NAME,
-              "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
-              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-              "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
-              rnn_cell_impl._BIAS_VARIABLE_NAME
-          ]
-          self.assertEqual(expected_variable_names,
-                           [v.name for v in cell.trainable_variables])
-          self.assertFalse(cell.non_trainable_variables)
-          sess.run([variables_lib.global_variables_initializer()])
-          res = sess.run([g, out_m], {
-              x.name: np.array([[1., 1.]]),
-              m.name: 0.1 * np.ones([1, 8])
-          })
-          self.assertEqual(len(res), 2)
-          variables = variables_lib.global_variables()
-          self.assertEqual(expected_variable_names, [v.name for v in variables])
-          # The numbers in results were not calculated, this is just a
-          # smoke test.
-          self.assertAllClose(res[0], np.array(
-              [[0.240, 0.240]], dtype=np_dtype), 1e-2)
-          expected_mem = np.array(
-              [[0.689, 0.689, 0.448, 0.448, 0.398, 0.398, 0.240, 0.240]],
-              dtype=np_dtype)
-          self.assertAllClose(res[1], expected_mem, 1e-2)
-        with variable_scope.variable_scope(
-            "other", initializer=init_ops.constant_initializer(0.5)):
-          # Test BasicLSTMCell with input_size != num_units.
-          x = array_ops.zeros([1, 3], dtype=dtype)
-          m = array_ops.zeros([1, 4], dtype=dtype)
-          g, out_m = rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)(x, m)
-          sess.run([variables_lib.global_variables_initializer()])
-          res = sess.run(
-              [g, out_m], {
-                  x.name: np.array([[1., 1., 1.]], dtype=np_dtype),
-                  m.name: 0.1 * np.ones([1, 4], dtype=np_dtype)
-              })
-          self.assertEqual(len(res), 2)
-
-  def testBasicLSTMCellDimension0Error(self):
-    """Tests that dimension 0 in both(x and m) shape must be equal."""
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        num_units = 2
-        state_size = num_units * 2
-        batch_size = 3
-        input_size = 4
-        x = array_ops.zeros([batch_size, input_size])
-        m = array_ops.zeros([batch_size - 1, state_size])
-        with self.assertRaises(ValueError):
-          g, out_m = rnn_cell_impl.BasicLSTMCell(
-              num_units, state_is_tuple=False)(x, m)
-          sess.run([variables_lib.global_variables_initializer()])
-          sess.run(
-              [g, out_m], {
-                  x.name: 1 * np.ones([batch_size, input_size]),
-                  m.name: 0.1 * np.ones([batch_size - 1, state_size])
-              })
-
-  def testBasicLSTMCellStateSizeError(self):
-    """Tests that state_size must be num_units * 2."""
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        num_units = 2
-        state_size = num_units * 3  # state_size must be num_units * 2
-        batch_size = 3
-        input_size = 4
-        x = array_ops.zeros([batch_size, input_size])
-        m = array_ops.zeros([batch_size, state_size])
-        with self.assertRaises(ValueError):
-          g, out_m = rnn_cell_impl.BasicLSTMCell(
-              num_units, state_is_tuple=False)(x, m)
-          sess.run([variables_lib.global_variables_initializer()])
-          sess.run(
-              [g, out_m], {
-                  x.name: 1 * np.ones([batch_size, input_size]),
-                  m.name: 0.1 * np.ones([batch_size, state_size])
-              })
-
-  def testBasicLSTMCellStateTupleType(self):
-    with self.cached_session():
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m0 = (array_ops.zeros([1, 2]),) * 2
-        m1 = (array_ops.zeros([1, 2]),) * 2
-        cell = rnn_cell_impl.MultiRNNCell(
-            [rnn_cell_impl.BasicLSTMCell(2) for _ in range(2)],
-            state_is_tuple=True)
-        self.assertTrue(isinstance(cell.state_size, tuple))
-        self.assertTrue(
-            isinstance(cell.state_size[0], rnn_cell_impl.LSTMStateTuple))
-        self.assertTrue(
-            isinstance(cell.state_size[1], rnn_cell_impl.LSTMStateTuple))
-
-        # Pass in regular tuples
-        _, (out_m0, out_m1) = cell(x, (m0, m1))
-        self.assertTrue(isinstance(out_m0, rnn_cell_impl.LSTMStateTuple))
-        self.assertTrue(isinstance(out_m1, rnn_cell_impl.LSTMStateTuple))
-
-        # Pass in LSTMStateTuples
-        variable_scope.get_variable_scope().reuse_variables()
-        zero_state = cell.zero_state(1, dtypes.float32)
-        self.assertTrue(isinstance(zero_state, tuple))
-        self.assertTrue(isinstance(zero_state[0], rnn_cell_impl.LSTMStateTuple))
-        self.assertTrue(isinstance(zero_state[1], rnn_cell_impl.LSTMStateTuple))
-        _, (out_m0, out_m1) = cell(x, zero_state)
-        self.assertTrue(isinstance(out_m0, rnn_cell_impl.LSTMStateTuple))
-        self.assertTrue(isinstance(out_m1, rnn_cell_impl.LSTMStateTuple))
-
-  def testBasicLSTMCellWithStateTuple(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m0 = array_ops.zeros([1, 4])
-        m1 = array_ops.zeros([1, 4])
-        cell = rnn_cell_impl.MultiRNNCell(
-            [
-                rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
-                for _ in range(2)
-            ],
-            state_is_tuple=True)
-        g, (out_m0, out_m1) = cell(x, (m0, m1))
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run(
-            [g, out_m0, out_m1], {
-                x.name: np.array([[1., 1.]]),
-                m0.name: 0.1 * np.ones([1, 4]),
-                m1.name: 0.1 * np.ones([1, 4])
-            })
-        self.assertEqual(len(res), 3)
-        # The numbers in results were not calculated, this is just a smoke test.
-        # Note, however, these values should match the original
-        # version having state_is_tuple=False.
-        self.assertAllClose(res[0], [[0.24024698, 0.24024698]])
-        expected_mem0 = np.array(
-            [[0.68967271, 0.68967271, 0.44848421, 0.44848421]])
-        expected_mem1 = np.array(
-            [[0.39897051, 0.39897051, 0.24024698, 0.24024698]])
-        self.assertAllClose(res[1], expected_mem0)
-        self.assertAllClose(res[2], expected_mem1)
-
-  def testIndyLSTMCell(self):
-    for dtype in [dtypes.float16, dtypes.float32]:
-      np_dtype = dtype.as_numpy_dtype
-      with self.session(graph=ops.Graph()) as sess:
-        with variable_scope.variable_scope(
-            "root", initializer=init_ops.constant_initializer(0.5)):
-          x = array_ops.zeros([1, 2], dtype=dtype)
-          state_0 = (array_ops.zeros([1, 2], dtype=dtype),) * 2
-          state_1 = (array_ops.zeros([1, 2], dtype=dtype),) * 2
-          cell = rnn_cell_impl.MultiRNNCell(
-              [contrib_rnn_cell.IndyLSTMCell(2) for _ in range(2)])
-          self.assertEqual(cell.dtype, None)
-          self.assertEqual("cell-0", cell._checkpoint_dependencies[0].name)
-          self.assertEqual("cell-1", cell._checkpoint_dependencies[1].name)
-          cell.get_config()  # Should not throw an error
-          g, (out_state_0, out_state_1) = cell(x, (state_0, state_1))
-          # Layer infers the input type.
-          self.assertEqual(cell.dtype, dtype.name)
-          expected_variable_names = [
-              "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s_w:0" %
-              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-              "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s_u:0" %
-              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-              "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s:0" %
-              rnn_cell_impl._BIAS_VARIABLE_NAME,
-              "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s_w:0" %
-              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-              "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s_u:0" %
-              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-              "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s:0" %
-              rnn_cell_impl._BIAS_VARIABLE_NAME
-          ]
-          self.assertEqual(expected_variable_names,
-                           [v.name for v in cell.trainable_variables])
-          self.assertFalse(cell.non_trainable_variables)
-          sess.run([variables_lib.global_variables_initializer()])
-          res = sess.run(
-              [g, out_state_0, out_state_1], {
-                  x.name: np.array([[1., 1.]]),
-                  state_0[0].name: 0.1 * np.ones([1, 2]),
-                  state_0[1].name: 0.1 * np.ones([1, 2]),
-                  state_1[0].name: 0.1 * np.ones([1, 2]),
-                  state_1[1].name: 0.1 * np.ones([1, 2]),
-              })
-          self.assertEqual(len(res), 3)
-          variables = variables_lib.global_variables()
-          self.assertEqual(expected_variable_names, [v.name for v in variables])
-          # Only check the range of outputs as this is just a smoke test.
-          self.assertAllInRange(res[0], -1.0, 1.0)
-          self.assertAllInRange(res[1], -1.0, 1.0)
-          self.assertAllInRange(res[2], -1.0, 1.0)
-        with variable_scope.variable_scope(
-            "other", initializer=init_ops.constant_initializer(0.5)):
-          # Test IndyLSTMCell with input_size != num_units.
-          x = array_ops.zeros([1, 3], dtype=dtype)
-          state = (array_ops.zeros([1, 2], dtype=dtype),) * 2
-          g, out_state = contrib_rnn_cell.IndyLSTMCell(2)(x, state)
-          sess.run([variables_lib.global_variables_initializer()])
-          res = sess.run(
-              [g, out_state], {
-                  x.name: np.array([[1., 1., 1.]], dtype=np_dtype),
-                  state[0].name: 0.1 * np.ones([1, 2], dtype=np_dtype),
-                  state[1].name: 0.1 * np.ones([1, 2], dtype=np_dtype),
-              })
-          self.assertEqual(len(res), 2)
-
-  def testLSTMCell(self):
-    with self.cached_session() as sess:
-      num_units = 8
-      num_proj = 6
-      state_size = num_units + num_proj
-      batch_size = 3
-      input_size = 2
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([batch_size, input_size])
-        m = array_ops.zeros([batch_size, state_size])
-        cell = rnn_cell_impl.LSTMCell(
-            num_units=num_units,
-            num_proj=num_proj,
-            forget_bias=1.0,
-            state_is_tuple=False)
-        output, state = cell(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run(
-            [output, state], {
-                x.name: np.array([[1., 1.], [2., 2.], [3., 3.]]),
-                m.name: 0.1 * np.ones((batch_size, state_size))
-            })
-        self.assertEqual(len(res), 2)
-        # The numbers in results were not calculated, this is mostly just a
-        # smoke test.
-        self.assertEqual(res[0].shape, (batch_size, num_proj))
-        self.assertEqual(res[1].shape, (batch_size, state_size))
-        # Different inputs so different outputs and states
-        for i in range(1, batch_size):
-          self.assertTrue(
-              float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) > 1e-6)
-          self.assertTrue(
-              float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) > 1e-6)
-
-  def testLSTMCellVariables(self):
-    with self.cached_session():
-      num_units = 8
-      num_proj = 6
-      state_size = num_units + num_proj
-      batch_size = 3
-      input_size = 2
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([batch_size, input_size])
-        m = array_ops.zeros([batch_size, state_size])
-        cell = rnn_cell_impl.LSTMCell(
-            num_units=num_units,
-            num_proj=num_proj,
-            forget_bias=1.0,
-            state_is_tuple=False)
-        cell(x, m)  # Execute to create variables
-      variables = variables_lib.global_variables()
-      self.assertEquals(variables[0].op.name, "root/lstm_cell/kernel")
-      self.assertEquals(variables[1].op.name, "root/lstm_cell/bias")
-      self.assertEquals(variables[2].op.name,
-                        "root/lstm_cell/projection/kernel")
-
-  def testLSTMCellLayerNorm(self):
-    with self.cached_session() as sess:
-      num_units = 2
-      num_proj = 3
-      batch_size = 1
-      input_size = 4
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([batch_size, input_size])
-        c = array_ops.zeros([batch_size, num_units])
-        h = array_ops.zeros([batch_size, num_proj])
-        state = rnn_cell_impl.LSTMStateTuple(c, h)
-        cell = contrib_rnn_cell.LayerNormLSTMCell(
-            num_units=num_units,
-            num_proj=num_proj,
-            forget_bias=1.0,
-            layer_norm=True,
-            norm_gain=1.0,
-            norm_shift=0.0)
-        g, out_m = cell(x, state)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run(
-            [g, out_m], {
-                x.name: np.ones((batch_size, input_size)),
-                c.name: 0.1 * np.ones((batch_size, num_units)),
-                h.name: 0.1 * np.ones((batch_size, num_proj))
-            })
-        self.assertEqual(len(res), 2)
-        # The numbers in results were not calculated, this is mostly just a
-        # smoke test.
-        self.assertEqual(res[0].shape, (batch_size, num_proj))
-        self.assertEqual(res[1][0].shape, (batch_size, num_units))
-        self.assertEqual(res[1][1].shape, (batch_size, num_proj))
-        # Different inputs so different outputs and states
-        for i in range(1, batch_size):
-          self.assertTrue(
-              float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
-          self.assertTrue(
-              float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testWrapperCheckpointing(self):
-    for wrapper_type in [
-        rnn_cell_impl.DropoutWrapper,
-        rnn_cell_impl.ResidualWrapper,
-        lambda cell: rnn_cell_impl.MultiRNNCell([cell])]:
-      cell = rnn_cell_impl.BasicRNNCell(1)
-      wrapper = wrapper_type(cell)
-      wrapper(array_ops.ones([1, 1]),
-              state=wrapper.zero_state(batch_size=1, dtype=dtypes.float32))
-      self.evaluate([v.initializer for v in cell.variables])
-      checkpoint = checkpointable_utils.Checkpoint(wrapper=wrapper)
-      prefix = os.path.join(self.get_temp_dir(), "ckpt")
-      self.evaluate(cell._bias.assign([40.]))
-      save_path = checkpoint.save(prefix)
-      self.evaluate(cell._bias.assign([0.]))
-      checkpoint.restore(save_path).assert_consumed().run_restore_ops()
-      self.assertAllEqual([40.], self.evaluate(cell._bias))
-
-  def testOutputProjectionWrapper(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 3])
-        m = array_ops.zeros([1, 3])
-        cell = contrib_rnn.OutputProjectionWrapper(rnn_cell_impl.GRUCell(3), 2)
-        g, new_m = cell(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g, new_m], {
-            x.name: np.array([[1., 1., 1.]]),
-            m.name: np.array([[0.1, 0.1, 0.1]])
-        })
-        self.assertEqual(res[1].shape, (1, 3))
-        # The numbers in results were not calculated, this is just a smoke test.
-        self.assertAllClose(res[0], [[0.231907, 0.231907]])
-
-  def testInputProjectionWrapper(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m = array_ops.zeros([1, 3])
-        cell = contrib_rnn.InputProjectionWrapper(
-            rnn_cell_impl.GRUCell(3), num_proj=3)
-        g, new_m = cell(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g, new_m], {
-            x.name: np.array([[1., 1.]]),
-            m.name: np.array([[0.1, 0.1, 0.1]])
-        })
-        self.assertEqual(res[1].shape, (1, 3))
-        # The numbers in results were not calculated, this is just a smoke test.
-        self.assertAllClose(res[0], [[0.154605, 0.154605, 0.154605]])
-
-  @parameterized.parameters(
-      [rnn_cell_impl.ResidualWrapper, rnn_cell_impl.ResidualWrapperV2])
-  @test_util.run_in_graph_and_eager_modes
-  def testResidualWrapper(self, wrapper_type):
-    x = ops.convert_to_tensor(np.array([[1., 1., 1.]]))
-    m = ops.convert_to_tensor(np.array([[0.1, 0.1, 0.1]]))
-    base_cell = rnn_cell_impl.GRUCell(
-        3, kernel_initializer=init_ops.constant_initializer(0.5),
-        bias_initializer=init_ops.constant_initializer(0.5))
-    g, m_new = base_cell(x, m)
-    wrapper_object = wrapper_type(base_cell)
-    (name, dep), = wrapper_object._checkpoint_dependencies
-    wrapper_object.get_config()  # Should not throw an error
-    self.assertIs(dep, base_cell)
-    self.assertEqual("cell", name)
-
-    g_res, m_new_res = wrapper_object(x, m)
-    self.evaluate([variables_lib.global_variables_initializer()])
-    res = self.evaluate([g, g_res, m_new, m_new_res])
-    # Residual connections
-    self.assertAllClose(res[1], res[0] + [1., 1., 1.])
-    # States are left untouched
-    self.assertAllClose(res[2], res[3])
-
-  @parameterized.parameters(
-      [rnn_cell_impl.ResidualWrapper, rnn_cell_impl.ResidualWrapperV2])
-  @test_util.run_in_graph_and_eager_modes
-  def testResidualWrapperWithSlice(self, wrapper_type):
-    x = ops.convert_to_tensor(np.array([[1., 1., 1., 1., 1.]]))
-    m = ops.convert_to_tensor(np.array([[0.1, 0.1, 0.1]]))
-    base_cell = rnn_cell_impl.GRUCell(
-        3, kernel_initializer=init_ops.constant_initializer(0.5),
-        bias_initializer=init_ops.constant_initializer(0.5))
-    g, m_new = base_cell(x, m)
-
-    def residual_with_slice_fn(inp, out):
-      inp_sliced = array_ops.slice(inp, [0, 0], [-1, 3])
-      return inp_sliced + out
-
-    g_res, m_new_res = wrapper_type(
-        base_cell, residual_with_slice_fn)(x, m)
-    self.evaluate([variables_lib.global_variables_initializer()])
-    res_g, res_g_res, res_m_new, res_m_new_res = self.evaluate(
-        [g, g_res, m_new, m_new_res])
-    # Residual connections
-    self.assertAllClose(res_g_res, res_g + [1., 1., 1.])
-    # States are left untouched
-    self.assertAllClose(res_m_new, res_m_new_res)
-
-  def testDeviceWrapper(self):
-    with variable_scope.variable_scope(
-        "root", initializer=init_ops.constant_initializer(0.5)):
-      x = array_ops.zeros([1, 3])
-      m = array_ops.zeros([1, 3])
-      wrapped = rnn_cell_impl.GRUCell(3)
-      cell = rnn_cell_impl.DeviceWrapper(wrapped, "/cpu:14159")
-      (name, dep), = cell._checkpoint_dependencies
-      cell.get_config()  # Should not throw an error
-      self.assertIs(dep, wrapped)
-      self.assertEqual("cell", name)
-
-      outputs, _ = cell(x, m)
-      self.assertTrue("cpu:14159" in outputs.device.lower())
-
-  def _retrieve_cpu_gpu_stats(self, run_metadata):
-    cpu_stats = None
-    gpu_stats = None
-    step_stats = run_metadata.step_stats
-    for ds in step_stats.dev_stats:
-      if "cpu:0" in ds.device[-5:].lower():
-        cpu_stats = ds.node_stats
-      if "gpu:0" == ds.device[-5:].lower():
-        gpu_stats = ds.node_stats
-    return cpu_stats, gpu_stats
-
-  def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
-    if not test.is_gpu_available():
-      # Can't perform this test w/o a GPU
-      return
-
-    gpu_dev = test.gpu_device_name()
-    with self.session(use_gpu=True) as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 1, 3])
-        cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), gpu_dev)
-        with ops.device("/cpu:0"):
-          outputs, _ = rnn.dynamic_rnn(
-              cell=cell, inputs=x, dtype=dtypes.float32)
-        run_metadata = config_pb2.RunMetadata()
-        opts = config_pb2.RunOptions(
-            trace_level=config_pb2.RunOptions.FULL_TRACE)
-
-        sess.run([variables_lib.global_variables_initializer()])
-        _ = sess.run(outputs, options=opts, run_metadata=run_metadata)
-
-      cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
-      self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
-      self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name])
-
-  def testEmbeddingWrapper(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 1], dtype=dtypes.int32)
-        m = array_ops.zeros([1, 2])
-        embedding_cell = contrib_rnn.EmbeddingWrapper(
-            rnn_cell_impl.GRUCell(2), embedding_classes=3, embedding_size=2)
-        self.assertEqual(embedding_cell.output_size, 2)
-        g, new_m = embedding_cell(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g, new_m], {
-            x.name: np.array([[1]]),
-            m.name: np.array([[0.1, 0.1]])
-        })
-        self.assertEqual(res[1].shape, (1, 2))
-        # The numbers in results were not calculated, this is just a smoke test.
-        self.assertAllClose(res[0], [[0.17139, 0.17139]])
-
-  def testEmbeddingWrapperWithDynamicRnn(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope("root"):
-        inputs = ops.convert_to_tensor([[[0], [0]]], dtype=dtypes.int64)
-        input_lengths = ops.convert_to_tensor([2], dtype=dtypes.int64)
-        embedding_cell = contrib_rnn.EmbeddingWrapper(
-            rnn_cell_impl.BasicLSTMCell(1, state_is_tuple=True),
-            embedding_classes=1,
-            embedding_size=2)
-        outputs, _ = rnn.dynamic_rnn(
-            cell=embedding_cell,
-            inputs=inputs,
-            sequence_length=input_lengths,
-            dtype=dtypes.float32)
-        sess.run([variables_lib.global_variables_initializer()])
-        # This will fail if output's dtype is inferred from input's.
-        sess.run(outputs)
-
-  def testMultiRNNCell(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m = array_ops.zeros([1, 4])
-        multi_rnn_cell = rnn_cell_impl.MultiRNNCell(
-            [rnn_cell_impl.GRUCell(2) for _ in range(2)],
-            state_is_tuple=False)
-        _, ml = multi_rnn_cell(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run(ml, {
-            x.name: np.array([[1., 1.]]),
-            m.name: np.array([[0.1, 0.1, 0.1, 0.1]])
-        })
-        # The numbers in results were not calculated, this is just a smoke test.
-        self.assertAllClose(res, [[0.175991, 0.175991, 0.13248, 0.13248]])
-        self.assertEqual(len(multi_rnn_cell.weights), 2 * 4)
-        self.assertTrue(
-            [x.dtype == dtypes.float32 for x in multi_rnn_cell.weights])
-
-  def testMultiRNNCellWithStateTuple(self):
-    with self.cached_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m_bad = array_ops.zeros([1, 4])
-        m_good = (array_ops.zeros([1, 2]), array_ops.zeros([1, 2]))
-
-        # Test incorrectness of state
-        with self.assertRaisesRegexp(ValueError, "Expected state .* a tuple"):
-          rnn_cell_impl.MultiRNNCell(
-              [rnn_cell_impl.GRUCell(2) for _ in range(2)],
-              state_is_tuple=True)(x, m_bad)
-
-        _, ml = rnn_cell_impl.MultiRNNCell(
-            [rnn_cell_impl.GRUCell(2) for _ in range(2)],
-            state_is_tuple=True)(x, m_good)
-
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run(
-            ml, {
-                x.name: np.array([[1., 1.]]),
-                m_good[0].name: np.array([[0.1, 0.1]]),
-                m_good[1].name: np.array([[0.1, 0.1]])
-            })
-
-        # The numbers in results were not calculated, this is just a
-        # smoke test.  However, these numbers should match those of
-        # the test testMultiRNNCell.
-        self.assertAllClose(res[0], [[0.175991, 0.175991]])
-        self.assertAllClose(res[1], [[0.13248, 0.13248]])
-
-  @parameterized.parameters(
-      [[rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2],
-       [rnn_cell_impl.ResidualWrapper, rnn_cell_impl.ResidualWrapperV2]])
-  @test_util.run_in_graph_and_eager_modes
-  def testWrapperKerasStyle(self, wrapper, wrapper_v2):
-    """Tests if wrapper cell is instantiated in keras style scope."""
-    wrapped_cell_v2 = wrapper_v2(rnn_cell_impl.BasicRNNCell(1))
-    self.assertTrue(wrapped_cell_v2._keras_style)
-
-    wrapped_cell = wrapper(rnn_cell_impl.BasicRNNCell(1))
-    self.assertFalse(wrapped_cell._keras_style)
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapperV2, rnn_cell_impl.ResidualWrapperV2])
-  @test_util.run_in_graph_and_eager_modes
-  def testWrapperV2VariableNames(self, wrapper):
-    """Tests that variables names do not depend on wrapper in RNN layer."""
-
-    def _rnn_input(apply_wrapper, name):
-      """Creates a RNN layer with/without wrapper and returns built rnn cell."""
-      with base_layer.keras_style_scope():
-        base_cell = rnn_cell_impl.MultiRNNCell(
-            [rnn_cell_impl.BasicRNNCell(1, name="basic_rnn_cell")
-             for _ in range(2)])
-      if apply_wrapper:
-        rnn_cell = wrapper(base_cell)
-      else:
-        rnn_cell = base_cell
-      rnn_layer = keras_layers.RNN(rnn_cell, name=name)
-      inputs = ops.convert_to_tensor([[[1]]], dtype=dtypes.float32)
-      _ = rnn_layer(inputs)
-      return base_cell._cells[0]
-
-    rnn_1 = _rnn_input(True, name="rnn_0")
-    rnn_2 = _rnn_input(False, name="rnn_1")
-
-    for i, cell in enumerate([rnn_1, rnn_2]):
-      var_prefix = "rnn_{}/cell_0/basic_rnn_cell/".format(i)
-      self.assertCountEqual([v.name for v in cell.weights],
-                            (var_prefix + "kernel:0", var_prefix + "bias:0"))
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapperV2, rnn_cell_impl.ResidualWrapperV2])
-  @test_util.run_in_graph_and_eager_modes
-  def testWrapperWeights(self, wrapper):
-    """Tests that wrapper weights contain wrapped cells weights."""
-
-    with base_layer.keras_style_scope():
-      base_cell = rnn_cell_impl.BasicRNNCell(1, name="basic_rnn_cell")
-    rnn_cell = wrapper(base_cell)
-    rnn_layer = keras_layers.RNN(rnn_cell)
-    inputs = ops.convert_to_tensor([[[1]]], dtype=dtypes.float32)
-    rnn_layer(inputs)
-
-    expected_weights = ["rnn/" + var for var in ("kernel:0", "bias:0")]
-    self.assertEqual(len(rnn_cell.weights), 2)
-    self.assertCountEqual([v.name for v in rnn_cell.weights], expected_weights)
-    self.assertCountEqual([v.name for v in rnn_cell.trainable_variables],
-                          expected_weights)
-    self.assertCountEqual([v.name for v in rnn_cell.non_trainable_variables],
-                          [])
-    self.assertCountEqual([v.name for v in rnn_cell._cell.weights],
-                          expected_weights)
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapperV2, rnn_cell_impl.ResidualWrapperV2])
-  @test_util.run_in_graph_and_eager_modes
-  def testWrapperV2Caller(self, wrapper):
-    """Tests that wrapper V2 is using the LayerRNNCell's caller."""
-
-    with base_layer.keras_style_scope():
-      base_cell = rnn_cell_impl.MultiRNNCell(
-          [rnn_cell_impl.BasicRNNCell(1) for _ in range(2)])
-    rnn_cell = wrapper(base_cell)
-    inputs = ops.convert_to_tensor([[1]], dtype=dtypes.float32)
-    state = ops.convert_to_tensor([[1]], dtype=dtypes.float32)
-    _ = rnn_cell(inputs, [state, state])
-    weights = base_cell._cells[0].weights
-    self.assertLen(weights, expected_len=2)
-    self.assertTrue(all(["_wrapper" in v.name for v in weights]))
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapperV2, rnn_cell_impl.ResidualWrapperV2])
-  @test_util.run_in_graph_and_eager_modes
-  def testWrapperV2Build(self, wrapper):
-    cell = rnn_cell_impl.LSTMCell(10)
-    wrapper = wrapper(cell)
-    wrapper.build((1,))
-    self.assertTrue(cell.built)
-
-
-@test_util.run_all_in_graph_and_eager_modes
-class DropoutWrapperTest(test.TestCase, parameterized.TestCase):
-
-  def _testDropoutWrapper(self,
-                          batch_size=None,
-                          time_steps=None,
-                          parallel_iterations=None,
-                          wrapper_type=None,
-                          scope="root",
-                          **kwargs):
-    if batch_size is None and time_steps is None:
-      # 2 time steps, batch size 1, depth 3
-      batch_size = 1
-      time_steps = 2
-      x = constant_op.constant(
-          [[[2., 2., 2.]], [[1., 1., 1.]]], dtype=dtypes.float32)
-      m = rnn_cell_impl.LSTMStateTuple(
-          *[constant_op.constant([[0.1, 0.1, 0.1]], dtype=dtypes.float32)] * 2)
-    else:
-      x = constant_op.constant(
-          np.random.randn(time_steps, batch_size, 3).astype(np.float32))
-      m = rnn_cell_impl.LSTMStateTuple(*[
-          constant_op.
-          constant([[0.1, 0.1, 0.1]] * batch_size, dtype=dtypes.float32)] * 2)
-    outputs, final_state = rnn.dynamic_rnn(
-        cell=wrapper_type(
-            rnn_cell_impl.LSTMCell(
-                3, initializer=init_ops.constant_initializer(0.5)),
-            dtype=x.dtype, **kwargs),
-        time_major=True,
-        parallel_iterations=parallel_iterations,
-        inputs=x,
-        initial_state=m,
-        scope=scope)
-    self.evaluate([variables_lib.global_variables_initializer()])
-    res = self.evaluate([outputs, final_state])
-    self.assertEqual(res[0].shape, (time_steps, batch_size, 3))
-    self.assertEqual(res[1].c.shape, (batch_size, 3))
-    self.assertEqual(res[1].h.shape, (batch_size, 3))
-    return res
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperProperties(self, wrapper_type):
-    cell = rnn_cell_impl.BasicRNNCell(10)
-    wrapper = wrapper_type(cell)
-    # Github issue 15810
-    self.assertEqual(wrapper.wrapped_cell, cell)
-    self.assertEqual(wrapper.state_size, 10)
-    self.assertEqual(wrapper.output_size, 10)
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperZeroState(self, wrapper_type):
-    class _Cell(rnn_cell_impl.BasicRNNCell):
-
-      def zero_state(self, batch_size=None, dtype=None):
-        return "wrapped_cell_zero_state"
-    wrapper = wrapper_type(_Cell(10))
-    self.assertEqual(wrapper.zero_state(10, dtypes.float32),
-                     "wrapped_cell_zero_state")
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperKeepAllConstantInput(self, wrapper_type):
-    keep = array_ops.ones([])
-    res = self._testDropoutWrapper(
-        input_keep_prob=keep, output_keep_prob=keep, state_keep_prob=keep,
-        wrapper_type=wrapper_type)
-    true_full_output = np.array(
-        [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
-        dtype=np.float32)
-    true_full_final_c = np.array(
-        [[1.949385, 1.949385, 1.949385]], dtype=np.float32)
-    self.assertAllClose(true_full_output, res[0])
-    self.assertAllClose(true_full_output[1], res[1].h)
-    self.assertAllClose(true_full_final_c, res[1].c)
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperKeepAll(self, wrapper_type):
-    keep = variable_scope.get_variable("all", initializer=1.0)
-    res = self._testDropoutWrapper(
-        input_keep_prob=keep, output_keep_prob=keep, state_keep_prob=keep,
-        wrapper_type=wrapper_type)
-    true_full_output = np.array(
-        [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
-        dtype=np.float32)
-    true_full_final_c = np.array(
-        [[1.949385, 1.949385, 1.949385]], dtype=np.float32)
-    self.assertAllClose(true_full_output, res[0])
-    self.assertAllClose(true_full_output[1], res[1].h)
-    self.assertAllClose(true_full_final_c, res[1].c)
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperWithSeed(self, wrapper_type):
-    keep_some = 0.5
-    random_seed.set_random_seed(2)
-    ## Use parallel_iterations = 1 in both calls to
-    ## _testDropoutWrapper to ensure the (per-time step) dropout is
-    ## consistent across both calls.  Otherwise the seed may not end
-    ## up being munged consistently across both graphs.
-    res_standard_1 = self._testDropoutWrapper(
-        input_keep_prob=keep_some,
-        output_keep_prob=keep_some,
-        state_keep_prob=keep_some,
-        seed=10,
-        parallel_iterations=1,
-        wrapper_type=wrapper_type,
-        scope="root_1")
-    random_seed.set_random_seed(2)
-    res_standard_2 = self._testDropoutWrapper(
-        input_keep_prob=keep_some,
-        output_keep_prob=keep_some,
-        state_keep_prob=keep_some,
-        seed=10,
-        parallel_iterations=1,
-        wrapper_type=wrapper_type,
-        scope="root_2")
-    self.assertAllClose(res_standard_1[0], res_standard_2[0])
-    self.assertAllClose(res_standard_1[1].c, res_standard_2[1].c)
-    self.assertAllClose(res_standard_1[1].h, res_standard_2[1].h)
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperKeepNoOutput(self, wrapper_type):
-    keep_all = variable_scope.get_variable("all", initializer=1.0)
-    keep_none = variable_scope.get_variable("none", initializer=1e-6)
-    res = self._testDropoutWrapper(
-        input_keep_prob=keep_all,
-        output_keep_prob=keep_none,
-        state_keep_prob=keep_all,
-        wrapper_type=wrapper_type)
-    true_full_output = np.array(
-        [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
-        dtype=np.float32)
-    true_full_final_c = np.array(
-        [[1.949385, 1.949385, 1.949385]], dtype=np.float32)
-    self.assertAllClose(np.zeros(res[0].shape), res[0])
-    self.assertAllClose(true_full_output[1], res[1].h)
-    self.assertAllClose(true_full_final_c, res[1].c)
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperKeepNoStateExceptLSTMCellMemory(self, wrapper_type):
-    keep_all = variable_scope.get_variable("all", initializer=1.0)
-    keep_none = variable_scope.get_variable("none", initializer=1e-6)
-    # Even though we dropout state, by default DropoutWrapper never
-    # drops out the memory ("c") term of an LSTMStateTuple.
-    res = self._testDropoutWrapper(
-        input_keep_prob=keep_all,
-        output_keep_prob=keep_all,
-        state_keep_prob=keep_none,
-        wrapper_type=wrapper_type)
-    true_c_state = np.array([[1.713925, 1.713925, 1.713925]], dtype=np.float32)
-    true_full_output = np.array(
-        [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
-        dtype=np.float32)
-    self.assertAllClose(true_full_output[0], res[0][0])
-    # Second output is modified by zero input state
-    self.assertGreater(np.linalg.norm(true_full_output[1] - res[0][1]), 1e-4)
-    # h state has been set to zero
-    self.assertAllClose(np.zeros(res[1].h.shape), res[1].h)
-    # c state of an LSTMStateTuple is NEVER modified.
-    self.assertAllClose(true_c_state, res[1].c)
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperKeepNoInput(self, wrapper_type):
-    keep_all = variable_scope.get_variable("all", initializer=1.0)
-    keep_none = variable_scope.get_variable("none", initializer=1e-6)
-    true_full_output = np.array(
-        [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
-        dtype=np.float32)
-    true_full_final_c = np.array(
-        [[1.949385, 1.949385, 1.949385]], dtype=np.float32)
-    # All outputs are different because inputs are zeroed out
-    res = self._testDropoutWrapper(
-        input_keep_prob=keep_none,
-        output_keep_prob=keep_all,
-        state_keep_prob=keep_all,
-        wrapper_type=wrapper_type)
-    self.assertGreater(np.linalg.norm(res[0] - true_full_output), 1e-4)
-    self.assertGreater(np.linalg.norm(res[1].h - true_full_output[1]), 1e-4)
-    self.assertGreater(np.linalg.norm(res[1].c - true_full_final_c), 1e-4)
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperRecurrentOutput(self, wrapper_type):
-    keep_some = 0.8
-    keep_all = variable_scope.get_variable("all", initializer=1.0)
-    res = self._testDropoutWrapper(
-        input_keep_prob=keep_all,
-        output_keep_prob=keep_some,
-        state_keep_prob=keep_all,
-        variational_recurrent=True,
-        wrapper_type=wrapper_type,
-        input_size=3,
-        batch_size=5,
-        time_steps=7)
-    # Ensure the same dropout pattern for all time steps
-    output_mask = np.abs(res[0]) > 1e-6
-    for m in output_mask[1:]:
-      self.assertAllClose(output_mask[0], m)
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperRecurrentStateInputAndOutput(self, wrapper_type):
-    keep_some = 0.9
-    res = self._testDropoutWrapper(
-        input_keep_prob=keep_some,
-        output_keep_prob=keep_some,
-        state_keep_prob=keep_some,
-        variational_recurrent=True,
-        wrapper_type=wrapper_type,
-        input_size=3,
-        batch_size=5,
-        time_steps=7)
-
-    # Smoke test for the state/input masks.
-    output_mask = np.abs(res[0]) > 1e-6
-    for time_step in output_mask:
-      # Ensure the same dropout output pattern for all time steps
-      self.assertAllClose(output_mask[0], time_step)
-      for batch_entry in time_step:
-        # Assert all batch entries get the same mask
-        self.assertAllClose(batch_entry, time_step[0])
-
-    # For state, ensure all batch entries have the same mask
-    state_c_mask = np.abs(res[1].c) > 1e-6
-    state_h_mask = np.abs(res[1].h) > 1e-6
-    for batch_entry in state_c_mask:
-      self.assertAllClose(batch_entry, state_c_mask[0])
-    for batch_entry in state_h_mask:
-      self.assertAllClose(batch_entry, state_h_mask[0])
-
-  @parameterized.parameters(
-      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
-  def testDropoutWrapperRecurrentStateInputAndOutputWithSeed(
-      self, wrapper_type):
-    keep_some = 0.9
-    random_seed.set_random_seed(2347)
-    np.random.seed(23487)
-    res0 = self._testDropoutWrapper(
-        input_keep_prob=keep_some,
-        output_keep_prob=keep_some,
-        state_keep_prob=keep_some,
-        variational_recurrent=True,
-        wrapper_type=wrapper_type,
-        input_size=3,
-        batch_size=5,
-        time_steps=7,
-        seed=-234987,
-        scope="root_0")
-    random_seed.set_random_seed(2347)
-    np.random.seed(23487)
-    res1 = self._testDropoutWrapper(
-        input_keep_prob=keep_some,
-        output_keep_prob=keep_some,
-        state_keep_prob=keep_some,
-        variational_recurrent=True,
-        wrapper_type=wrapper_type,
-        input_size=3,
-        batch_size=5,
-        time_steps=7,
-        seed=-234987,
-        scope="root_1")
-
-    output_mask = np.abs(res0[0]) > 1e-6
-    for time_step in output_mask:
-      # Ensure the same dropout output pattern for all time steps
-      self.assertAllClose(output_mask[0], time_step)
-      for batch_entry in time_step:
-        # Assert all batch entries get the same mask
-        self.assertAllClose(batch_entry, time_step[0])
-
-    # For state, ensure all batch entries have the same mask
-    state_c_mask = np.abs(res0[1].c) > 1e-6
-    state_h_mask = np.abs(res0[1].h) > 1e-6
-    for batch_entry in state_c_mask:
-      self.assertAllClose(batch_entry, state_c_mask[0])
-    for batch_entry in state_h_mask:
-      self.assertAllClose(batch_entry, state_h_mask[0])
-
-    # Ensure seeded calculation is identical.
-    self.assertAllClose(res0[0], res1[0])
-    self.assertAllClose(res0[1].c, res1[1].c)
-    self.assertAllClose(res0[1].h, res1[1].h)
-
-
-def basic_rnn_cell(inputs, state, num_units, scope=None):
-  if state is None:
-    if inputs is not None:
-      batch_size = inputs.get_shape()[0]
-      dtype = inputs.dtype
-    else:
-      batch_size = 0
-      dtype = dtypes.float32
-    init_output = array_ops.zeros(
-        array_ops.stack([batch_size, num_units]), dtype=dtype)
-    init_state = array_ops.zeros(
-        array_ops.stack([batch_size, num_units]), dtype=dtype)
-    init_output.set_shape([batch_size, num_units])
-    init_state.set_shape([batch_size, num_units])
-    return init_output, init_state
-  else:
-    with variable_scope.variable_scope(scope, "basic_rnn_cell",
-                                       [inputs, state]):
-      output = math_ops.tanh(
-          Linear([inputs, state], num_units, True)([inputs, state]))
-    return output, output
-
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
deleted file mode 100644
index ef372b9..0000000
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
+++ /dev/null

@@ -1,2447 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for rnn module."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import itertools
-
-import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
-from tensorflow.contrib import rnn as rnn_lib
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops as ops_lib
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gradients_impl
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import rnn
-from tensorflow.python.ops import rnn_cell
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import tensor_array_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables as variables_lib
-from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging
-from tensorflow.python.util import nest
-
-
-class Plus1RNNCell(rnn_lib.RNNCell):
-  """RNN Cell generating (output, new_state) = (input + 1, state + 1)."""
-
-  @property
-  def output_size(self):
-    return 5
-
-  @property
-  def state_size(self):
-    return 5
-
-  def __call__(self, input_, state, scope=None):
-    return (input_ + 1, state + 1)
-
-
-class DummyMultiDimensionalLSTM(rnn_lib.RNNCell):
-  """LSTM Cell generating (output, new_state) = (input + 1, state + 1).
-
-  The input to this cell may have an arbitrary number of dimensions that follow
-  the preceding 'Time' and 'Batch' dimensions.
-  """
-
-  def __init__(self, dims):
-    """Initialize the Multi-dimensional LSTM cell.
-
-    Args:
-      dims: tuple that contains the dimensions of the output of the cell,
-      without including 'Time' or 'Batch' dimensions.
-    """
-    if not isinstance(dims, tuple):
-      raise TypeError("The dimensions passed to DummyMultiDimensionalLSTM "
-                      "should be a tuple of ints.")
-    self._dims = dims
-    self._output_size = tensor_shape.TensorShape(self._dims)
-    self._state_size = (tensor_shape.TensorShape(self._dims),
-                        tensor_shape.TensorShape(self._dims))
-
-  @property
-  def output_size(self):
-    return self._output_size
-
-  @property
-  def state_size(self):
-    return self._state_size
-
-  def __call__(self, input_, state, scope=None):
-    h, c = state
-    return (input_ + 1, (h + 1, c + 1))
-
-
-class NestedRNNCell(rnn_lib.RNNCell):
-  """RNN Cell generating (output, new_state) = (input + 1, state + 1).
-
-  The input, output and state of this cell is a tuple of two tensors.
-  """
-
-  @property
-  def output_size(self):
-    return (5, 5)
-
-  @property
-  def state_size(self):
-    return (6, 6)
-
-  def __call__(self, input_, state, scope=None):
-    h, c = state
-    x, y = input_
-    return ((x + 1, y + 1), (h + 1, c + 1))
-
-
-class TestStateSaver(object):
-
-  def __init__(self, batch_size, state_size):
-    self._batch_size = batch_size
-    self._state_size = state_size
-    self.saved_state = {}
-
-  def state(self, name):
-
-    if isinstance(self._state_size, dict):
-      state_size = self._state_size[name]
-    else:
-      state_size = self._state_size
-    if isinstance(state_size, int):
-      state_size = (state_size,)
-    elif isinstance(state_size, tuple):
-      pass
-    else:
-      raise TypeError("state_size should either be an int or a tuple")
-
-    return array_ops.zeros((self._batch_size,) + state_size)
-
-  def save_state(self, name, state):
-    self.saved_state[name] = state
-    return array_ops.identity(state)
-
-  @property
-  def batch_size(self):
-    return self._batch_size
-
-  @property
-  def state_size(self):
-    return self._state_size
-
-
-class TestStateSaverWithCounters(TestStateSaver):
-  """Class wrapper around TestStateSaver.
-
-  A dummy class used for testing of static_state_saving_rnn. It helps test if
-  save_state and state functions got called same number of time when we
-  evaluate output of rnn cell and state or either of them separately. It
-  inherits from the TestStateSaver and adds the counters for calls of functions.
-  """
-
-  def __init__(self, batch_size, state_size):
-    super(TestStateSaverWithCounters, self).__init__(batch_size, state_size)
-    self._num_state_calls = variables_lib.VariableV1(0)
-    self._num_save_state_calls = variables_lib.VariableV1(0)
-
-  def state(self, name):
-    with ops_lib.control_dependencies(
-        [state_ops.assign_add(self._num_state_calls, 1)]):
-      return super(TestStateSaverWithCounters, self).state(name)
-
-  def save_state(self, name, state):
-    with ops_lib.control_dependencies([state_ops.assign_add(
-        self._num_save_state_calls, 1)]):
-      return super(TestStateSaverWithCounters, self).save_state(name, state)
-
-  @property
-  def num_state_calls(self):
-    return self._num_state_calls
-
-  @property
-  def num_save_state_calls(self):
-    return self._num_save_state_calls
-
-
-class RNNTest(test.TestCase):
-
-  def setUp(self):
-    self._seed = 23489
-    np.random.seed(self._seed)
-
-  def testInvalidSequenceLengthShape(self):
-    cell = Plus1RNNCell()
-    inputs = [array_ops.placeholder(dtypes.float32, shape=(3, 4))]
-    with self.assertRaisesRegexp(ValueError, "must be a vector"):
-      rnn.static_rnn(cell, inputs, dtype=dtypes.float32, sequence_length=4)
-
-  def testRNN(self):
-    cell = Plus1RNNCell()
-    batch_size = 2
-    input_size = 5
-    max_length = 8  # unrolled up to this length
-    inputs = max_length * [
-        array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
-    ]
-    outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
-    self.assertEqual(len(outputs), len(inputs))
-    for out, inp in zip(outputs, inputs):
-      self.assertEqual(out.get_shape(), inp.get_shape())
-      self.assertEqual(out.dtype, inp.dtype)
-
-    with self.session(use_gpu=True) as sess:
-      input_value = np.random.randn(batch_size, input_size)
-      values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value})
-
-      # Outputs
-      for v in values[:-1]:
-        self.assertAllClose(v, input_value + 1.0)
-
-      # Final state
-      self.assertAllClose(values[-1],
-                          max_length * np.ones(
-                              (batch_size, input_size), dtype=np.float32))
-
-  def testDropout(self):
-    cell = Plus1RNNCell()
-    full_dropout_cell = rnn_cell.DropoutWrapper(
-        cell, input_keep_prob=1e-6, seed=0)
-    (name, dep), = full_dropout_cell._checkpoint_dependencies
-    self.assertIs(dep, cell)
-    self.assertEqual("cell", name)
-    batch_size = 2
-    input_size = 5
-    max_length = 8
-    inputs = max_length * [
-        array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
-    ]
-    with variable_scope.variable_scope("share_scope"):
-      outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
-    with variable_scope.variable_scope("drop_scope"):
-      dropped_outputs, _ = rnn.static_rnn(
-          full_dropout_cell, inputs, dtype=dtypes.float32)
-    self.assertEqual(len(outputs), len(inputs))
-    for out, inp in zip(outputs, inputs):
-      self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list())
-      self.assertEqual(out.dtype, inp.dtype)
-
-    with self.session(use_gpu=True) as sess:
-      input_value = np.random.randn(batch_size, input_size)
-      values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value})
-      full_dropout_values = sess.run(
-          dropped_outputs, feed_dict={
-              inputs[0]: input_value
-          })
-
-      for v in values[:-1]:
-        self.assertAllClose(v, input_value + 1.0)
-      for d_v in full_dropout_values[:-1]:  # Add 1.0 to dropped_out (all zeros)
-        self.assertAllClose(d_v, np.ones_like(input_value))
-
-  def testDynamicCalculation(self):
-    cell = Plus1RNNCell()
-    sequence_length = array_ops.placeholder(dtypes.int64)
-    batch_size = 2
-    input_size = 5
-    max_length = 8
-    inputs = max_length * [
-        array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
-    ]
-    with variable_scope.variable_scope("drop_scope"):
-      dynamic_outputs, dynamic_state = rnn.static_rnn(
-          cell, inputs, sequence_length=sequence_length, dtype=dtypes.float32)
-    self.assertEqual(len(dynamic_outputs), len(inputs))
-
-    with self.session(use_gpu=True) as sess:
-      input_value = np.random.randn(batch_size, input_size)
-      dynamic_values = sess.run(
-          dynamic_outputs,
-          feed_dict={
-              inputs[0]: input_value,
-              sequence_length: [2, 3]
-          })
-      dynamic_state_value = sess.run(
-          [dynamic_state],
-          feed_dict={
-              inputs[0]: input_value,
-              sequence_length: [2, 3]
-          })
-
-      # outputs are fully calculated for t = 0, 1
-      for v in dynamic_values[:2]:
-        self.assertAllClose(v, input_value + 1.0)
-
-      # outputs at t = 2 are zero for entry 0, calculated for entry 1
-      self.assertAllClose(dynamic_values[2],
-                          np.vstack((np.zeros((input_size)),
-                                     1.0 + input_value[1, :])))
-
-      # outputs at t = 3+ are zero
-      for v in dynamic_values[3:]:
-        self.assertAllEqual(v, np.zeros_like(input_value))
-
-      # the final states are:
-      #  entry 0: the values from the calculation at t=1
-      #  entry 1: the values from the calculation at t=2
-      self.assertAllEqual(dynamic_state_value[0],
-                          np.vstack((1.0 * (1 + 1) * np.ones((input_size)),
-                                     1.0 * (2 + 1) * np.ones((input_size)))))
-
-  def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
-    with self.session(use_gpu=True, graph=ops_lib.Graph()):
-      if use_outer_scope:
-        with variable_scope.variable_scope(prefix) as scope:
-          factory(scope)
-      else:
-        factory(prefix)
-
-      # check that all the variables names starts
-      # with the proper scope.
-      variables_lib.global_variables_initializer()
-      all_vars = variables_lib.global_variables()
-      prefix = prefix or "rnn"
-      scope_vars = [v for v in all_vars if v.name.startswith(prefix + "/")]
-      tf_logging.info("RNN with scope: %s (%s)" %
-                      (prefix, "scope" if use_outer_scope else "str"))
-      for v in scope_vars:
-        tf_logging.info(v.name)
-      self.assertEqual(len(scope_vars), len(all_vars))
-
-  def testScope(self):
-
-    def factory(scope):
-      cell = Plus1RNNCell()
-      batch_size = 2
-      input_size = 5
-      max_length = 8  # unrolled up to this length
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
-      ]
-      return rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope=scope)
-
-    self._testScope(factory, use_outer_scope=True)
-    self._testScope(factory, use_outer_scope=False)
-    self._testScope(factory, prefix=None, use_outer_scope=False)
-
-
-class LSTMTest(test.TestCase):
-
-  def setUp(self):
-    self._seed = 23489
-    np.random.seed(self._seed)
-
-  def testDType(self):
-    # Test case for GitHub issue 16228
-    # Not passing dtype in constructor results in default float32
-    lstm = rnn_cell.LSTMCell(10)
-    input_tensor = array_ops.ones([10, 50])
-    lstm.build(input_tensor.get_shape())
-    self.assertEqual(lstm._bias.dtype, dtypes.float32_ref)
-
-    # Explicitly pass dtype in constructor
-    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
-      lstm = rnn_cell.LSTMCell(10, dtype=dtype)
-      input_tensor = array_ops.ones([10, 50])
-      lstm.build(input_tensor.get_shape())
-      self.assertEqual(lstm._bias.dtype, dtype._as_ref)
-
-  def testNoProjNoSharding(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    max_length = 8
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      cell = rnn_cell.LSTMCell(
-          num_units, initializer=initializer, state_is_tuple=False)
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
-      ]
-      outputs, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
-      self.assertEqual(len(outputs), len(inputs))
-      for out in outputs:
-        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units])
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      sess.run(outputs, feed_dict={inputs[0]: input_value})
-
-  def testCellClipping(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    max_length = 8
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          cell_clip=0.0,
-          initializer=initializer,
-          state_is_tuple=False)
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
-      ]
-      outputs, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
-      self.assertEqual(len(outputs), len(inputs))
-      for out in outputs:
-        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units])
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      values = sess.run(outputs, feed_dict={inputs[0]: input_value})
-
-    for value in values:
-      # if cell c is clipped to 0, tanh(c) = 0 => m==0
-      self.assertAllEqual(value, np.zeros((batch_size, num_units)))
-
-  def testNoProjNoShardingSimpleStateSaver(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    max_length = 8
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      state_saver = TestStateSaver(batch_size, 2 * num_units)
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=False,
-          initializer=initializer,
-          state_is_tuple=False)
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
-      ]
-      with variable_scope.variable_scope("share_scope"):
-        outputs, state = rnn.static_state_saving_rnn(
-            cell, inputs, state_saver=state_saver, state_name="save_lstm")
-      self.assertEqual(len(outputs), len(inputs))
-      for out in outputs:
-        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units])
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      (last_state_value, saved_state_value) = sess.run(
-          [state, state_saver.saved_state["save_lstm"]],
-          feed_dict={
-              inputs[0]: input_value
-          })
-      self.assertAllEqual(last_state_value, saved_state_value)
-
-  def testNoProjNoShardingTupleStateSaver(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    max_length = 8
-    with self.session(graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      state_saver = TestStateSaver(batch_size, num_units)
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=False,
-          initializer=initializer,
-          state_is_tuple=True)
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
-      ]
-      with variable_scope.variable_scope("share_scope"):
-        outputs, state = rnn.static_state_saving_rnn(
-            cell, inputs, state_saver=state_saver, state_name=("c", "m"))
-      self.assertEqual(len(outputs), len(inputs))
-      for out in outputs:
-        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units])
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      last_and_saved_states = sess.run(
-          state + (state_saver.saved_state["c"], state_saver.saved_state["m"]),
-          feed_dict={
-              inputs[0]: input_value
-          })
-      self.assertEqual(4, len(last_and_saved_states))
-      self.assertAllEqual(last_and_saved_states[:2], last_and_saved_states[2:])
-
-  def testNoProjNoShardingNestedTupleStateSaver(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    max_length = 8
-    with self.session(graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      state_saver = TestStateSaver(
-          batch_size, {
-              "c0": num_units,
-              "m0": num_units,
-              "c1": num_units + 1,
-              "m1": num_units + 1,
-              "c2": num_units + 2,
-              "m2": num_units + 2,
-              "c3": num_units + 3,
-              "m3": num_units + 3
-          })
-
-      def _cell(i):
-        return rnn_cell.LSTMCell(
-            num_units + i,
-            use_peepholes=False,
-            initializer=initializer,
-            state_is_tuple=True)
-
-      # This creates a state tuple which has 4 sub-tuples of length 2 each.
-      cell = rnn_cell.MultiRNNCell(
-          [_cell(i) for i in range(4)], state_is_tuple=True)
-
-      self.assertEqual(len(cell.state_size), 4)
-      for i in range(4):
-        self.assertEqual(len(cell.state_size[i]), 2)
-
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
-      ]
-
-      state_names = (("c0", "m0"), ("c1", "m1"), ("c2", "m2"), ("c3", "m3"))
-      with variable_scope.variable_scope("share_scope"):
-        outputs, state = rnn.static_state_saving_rnn(
-            cell, inputs, state_saver=state_saver, state_name=state_names)
-      self.assertEqual(len(outputs), len(inputs))
-
-      # Final output comes from _cell(3) which has state size num_units + 3
-      for out in outputs:
-        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units + 3])
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      last_states = sess.run(
-          list(nest.flatten(state)), feed_dict={
-              inputs[0]: input_value
-          })
-      saved_states = sess.run(
-          list(state_saver.saved_state.values()),
-          feed_dict={
-              inputs[0]: input_value
-          })
-      self.assertEqual(8, len(last_states))
-      self.assertEqual(8, len(saved_states))
-      flat_state_names = nest.flatten(state_names)
-      named_saved_states = dict(
-          zip(state_saver.saved_state.keys(), saved_states))
-
-      for i in range(8):
-        self.assertAllEqual(last_states[i],
-                            named_saved_states[flat_state_names[i]])
-
-  def testProjNoSharding(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    num_proj = 4
-    max_length = 8
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
-      ]
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          num_proj=num_proj,
-          initializer=initializer,
-          state_is_tuple=False)
-      outputs, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
-      self.assertEqual(len(outputs), len(inputs))
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      sess.run(outputs, feed_dict={inputs[0]: input_value})
-
-  def _testStateTupleWithProjAndSequenceLength(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    num_proj = 4
-    max_length = 8
-    sequence_length = [4, 6]
-    with self.session(graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
-      ]
-      cell_notuple = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          num_proj=num_proj,
-          initializer=initializer,
-          state_is_tuple=False)
-      cell_tuple = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          num_proj=num_proj,
-          initializer=initializer,
-          state_is_tuple=True)
-      with variable_scope.variable_scope("root") as scope:
-        outputs_notuple, state_notuple = rnn.static_rnn(
-            cell_notuple,
-            inputs,
-            dtype=dtypes.float32,
-            sequence_length=sequence_length,
-            scope=scope)
-        scope.reuse_variables()
-        # TODO(ebrevdo): For this test, we ensure values are identical and
-        # therefore the weights here are tied.  In the future, we may consider
-        # making the state_is_tuple property mutable so we can avoid
-        # having to do this - especially if users ever need to reuse
-        # the parameters from different RNNCell instances.  Right now,
-        # this seems an unrealistic use case except for testing.
-        cell_tuple._scope = cell_notuple._scope  # pylint: disable=protected-access
-        outputs_tuple, state_tuple = rnn.static_rnn(
-            cell_tuple,
-            inputs,
-            dtype=dtypes.float32,
-            sequence_length=sequence_length,
-            scope=scope)
-      self.assertEqual(len(outputs_notuple), len(inputs))
-      self.assertEqual(len(outputs_tuple), len(inputs))
-      self.assertTrue(isinstance(state_tuple, tuple))
-      self.assertTrue(isinstance(state_notuple, ops_lib.Tensor))
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      outputs_notuple_v = sess.run(
-          outputs_notuple, feed_dict={
-              inputs[0]: input_value
-          })
-      outputs_tuple_v = sess.run(
-          outputs_tuple, feed_dict={
-              inputs[0]: input_value
-          })
-      self.assertAllEqual(outputs_notuple_v, outputs_tuple_v)
-
-      (state_notuple_v,) = sess.run(
-          (state_notuple,), feed_dict={
-              inputs[0]: input_value
-          })
-      state_tuple_v = sess.run(state_tuple, feed_dict={inputs[0]: input_value})
-      self.assertAllEqual(state_notuple_v, np.hstack(state_tuple_v))
-
-  def testProjSharding(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    num_proj = 4
-    num_proj_shards = 3
-    num_unit_shards = 2
-    max_length = 8
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
-      ]
-
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          num_proj=num_proj,
-          num_unit_shards=num_unit_shards,
-          num_proj_shards=num_proj_shards,
-          initializer=initializer,
-          state_is_tuple=False)
-
-      outputs, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
-
-      self.assertEqual(len(outputs), len(inputs))
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      sess.run(outputs, feed_dict={inputs[0]: input_value})
-
-  def testDoubleInput(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    num_proj = 4
-    num_proj_shards = 3
-    num_unit_shards = 2
-    max_length = 8
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(-1, 1, seed=self._seed)
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float64, shape=(None, input_size))
-      ]
-
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          num_proj=num_proj,
-          num_unit_shards=num_unit_shards,
-          num_proj_shards=num_proj_shards,
-          initializer=initializer,
-          state_is_tuple=False)
-
-      outputs, _ = rnn.static_rnn(
-          cell,
-          inputs,
-          initial_state=cell.zero_state(batch_size, dtypes.float64))
-
-      self.assertEqual(len(outputs), len(inputs))
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.asarray(
-          np.random.randn(batch_size, input_size), dtype=np.float64)
-      values = sess.run(outputs, feed_dict={inputs[0]: input_value})
-      self.assertEqual(values[0].dtype, input_value.dtype)
-
-  def testShardNoShardEquivalentOutput(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    num_proj = 4
-    num_proj_shards = 3
-    num_unit_shards = 2
-    max_length = 8
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
-      ]
-      initializer = init_ops.constant_initializer(0.001)
-
-      cell_noshard = rnn_cell.LSTMCell(
-          num_units,
-          num_proj=num_proj,
-          use_peepholes=True,
-          initializer=initializer,
-          num_unit_shards=num_unit_shards,
-          num_proj_shards=num_proj_shards,
-          state_is_tuple=False)
-
-      cell_shard = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          initializer=initializer,
-          num_proj=num_proj,
-          state_is_tuple=False)
-
-      with variable_scope.variable_scope("noshard_scope"):
-        outputs_noshard, state_noshard = rnn.static_rnn(
-            cell_noshard, inputs, dtype=dtypes.float32)
-      with variable_scope.variable_scope("shard_scope"):
-        outputs_shard, state_shard = rnn.static_rnn(
-            cell_shard, inputs, dtype=dtypes.float32)
-
-      self.assertEqual(len(outputs_noshard), len(inputs))
-      self.assertEqual(len(outputs_noshard), len(outputs_shard))
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      feeds = dict((x, input_value) for x in inputs)
-      values_noshard = sess.run(outputs_noshard, feed_dict=feeds)
-      values_shard = sess.run(outputs_shard, feed_dict=feeds)
-      state_values_noshard = sess.run([state_noshard], feed_dict=feeds)
-      state_values_shard = sess.run([state_shard], feed_dict=feeds)
-      self.assertEqual(len(values_noshard), len(values_shard))
-      self.assertEqual(len(state_values_noshard), len(state_values_shard))
-      for (v_noshard, v_shard) in zip(values_noshard, values_shard):
-        self.assertAllClose(v_noshard, v_shard, atol=1e-3)
-      for (s_noshard, s_shard) in zip(state_values_noshard, state_values_shard):
-        self.assertAllClose(s_noshard, s_shard, atol=1e-3)
-
-  def testDoubleInputWithDropoutAndDynamicCalculation(self):
-    """Smoke test for using LSTM with doubles, dropout, dynamic calculation."""
-
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    num_proj = 4
-    num_proj_shards = 3
-    num_unit_shards = 2
-    max_length = 8
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      sequence_length = array_ops.placeholder(dtypes.int64)
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float64, shape=(None, input_size))
-      ]
-
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          num_proj=num_proj,
-          num_unit_shards=num_unit_shards,
-          num_proj_shards=num_proj_shards,
-          initializer=initializer,
-          state_is_tuple=False)
-      dropout_cell = rnn_cell.DropoutWrapper(cell, 0.5, seed=0)
-
-      outputs, state = rnn.static_rnn(
-          dropout_cell,
-          inputs,
-          sequence_length=sequence_length,
-          initial_state=cell.zero_state(batch_size, dtypes.float64))
-
-      self.assertEqual(len(outputs), len(inputs))
-
-      variables_lib.global_variables_initializer().run(feed_dict={
-          sequence_length: [2, 3]
-      })
-      input_value = np.asarray(
-          np.random.randn(batch_size, input_size), dtype=np.float64)
-      values = sess.run(
-          outputs, feed_dict={
-              inputs[0]: input_value,
-              sequence_length: [2, 3]
-          })
-      state_value = sess.run(
-          [state], feed_dict={
-              inputs[0]: input_value,
-              sequence_length: [2, 3]
-          })
-      self.assertEqual(values[0].dtype, input_value.dtype)
-      self.assertEqual(state_value[0].dtype, input_value.dtype)
-
-  def testSharingWeightsWithReuse(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    num_proj = 4
-    max_length = 8
-    with self.session(graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(-1, 1, seed=self._seed)
-      initializer_d = init_ops.random_uniform_initializer(
-          -1, 1, seed=self._seed + 1)
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
-      ]
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          num_proj=num_proj,
-          initializer=initializer,
-          state_is_tuple=False)
-      cell_d = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          num_proj=num_proj,
-          initializer=initializer_d,
-          state_is_tuple=False)
-
-      with variable_scope.variable_scope("share_scope"):
-        outputs0, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
-      with variable_scope.variable_scope("share_scope", reuse=True):
-        outputs1, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
-      with variable_scope.variable_scope("diff_scope"):
-        outputs2, _ = rnn.static_rnn(cell_d, inputs, dtype=dtypes.float32)
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      output_values = sess.run(
-          outputs0 + outputs1 + outputs2, feed_dict={
-              inputs[0]: input_value
-          })
-      outputs0_values = output_values[:max_length]
-      outputs1_values = output_values[max_length:2 * max_length]
-      outputs2_values = output_values[2 * max_length:]
-      self.assertEqual(len(outputs0_values), len(outputs1_values))
-      self.assertEqual(len(outputs0_values), len(outputs2_values))
-      for o1, o2, o3 in zip(outputs0_values, outputs1_values, outputs2_values):
-        # Same weights used by both RNNs so outputs should be the same.
-        self.assertAllEqual(o1, o2)
-        # Different weights used so outputs should be different.
-        self.assertTrue(np.linalg.norm(o1 - o3) > 1e-6)
-
-  def testSharingWeightsWithDifferentNamescope(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    num_proj = 4
-    max_length = 8
-    with self.session(graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(-1, 1, seed=self._seed)
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
-      ]
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          num_proj=num_proj,
-          initializer=initializer,
-          state_is_tuple=False)
-
-      with ops_lib.name_scope("scope0"):
-        with variable_scope.variable_scope("share_scope"):
-          outputs0, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
-      with ops_lib.name_scope("scope1"):
-        with variable_scope.variable_scope("share_scope", reuse=True):
-          outputs1, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
-
-      variables_lib.global_variables_initializer().run()
-      input_value = np.random.randn(batch_size, input_size)
-      output_values = sess.run(
-          outputs0 + outputs1, feed_dict={
-              inputs[0]: input_value
-          })
-      outputs0_values = output_values[:max_length]
-      outputs1_values = output_values[max_length:]
-      self.assertEqual(len(outputs0_values), len(outputs1_values))
-      for out0, out1 in zip(outputs0_values, outputs1_values):
-        self.assertAllEqual(out0, out1)
-
-  def testDynamicRNNAllowsUnknownTimeDimension(self):
-    inputs = array_ops.placeholder(dtypes.float32, shape=[1, None, 20])
-    cell = rnn_cell.GRUCell(30)
-    # Smoke test, this should not raise an error
-    rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testDynamicRNNWithTupleStates(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    num_proj = 4
-    max_length = 8
-    sequence_length = [4, 6]
-    in_graph_mode = not context.executing_eagerly()
-    with self.session(graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      if in_graph_mode:
-        inputs = max_length * [
-            array_ops.placeholder(dtypes.float32, shape=(None, input_size))
-        ]
-      else:
-        inputs = max_length * [
-            constant_op.constant(
-                np.random.randn(batch_size, input_size).astype(np.float32))
-        ]
-      inputs_c = array_ops.stack(inputs)
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          num_proj=num_proj,
-          initializer=initializer,
-          state_is_tuple=True)
-      with variable_scope.variable_scope("root") as scope:
-        outputs_static, state_static = rnn.static_rnn(
-            cell,
-            inputs,
-            dtype=dtypes.float32,
-            sequence_length=sequence_length,
-            scope=scope)
-        scope.reuse_variables()
-        outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
-            cell,
-            inputs_c,
-            dtype=dtypes.float32,
-            time_major=True,
-            sequence_length=sequence_length,
-            scope=scope)
-      self.assertTrue(isinstance(state_static, rnn_cell.LSTMStateTuple))
-      self.assertTrue(isinstance(state_dynamic, rnn_cell.LSTMStateTuple))
-      self.assertEqual(state_static[0], state_static.c)
-      self.assertEqual(state_static[1], state_static.h)
-      self.assertEqual(state_dynamic[0], state_dynamic.c)
-      self.assertEqual(state_dynamic[1], state_dynamic.h)
-
-      if in_graph_mode:
-        variables_lib.global_variables_initializer().run()
-        input_value = np.random.randn(batch_size, input_size)
-        outputs_static = sess.run(
-            outputs_static, feed_dict={
-                inputs[0]: input_value
-            })
-        outputs_dynamic = sess.run(
-            outputs_dynamic, feed_dict={
-                inputs[0]: input_value
-            })
-        state_static = sess.run(
-            state_static, feed_dict={
-                inputs[0]: input_value
-            })
-        state_dynamic = sess.run(
-            state_dynamic, feed_dict={
-                inputs[0]: input_value
-            })
-
-      if in_graph_mode:
-        self.assertAllEqual(outputs_static, outputs_dynamic)
-      else:
-        self.assertAllEqual(array_ops.stack(outputs_static), outputs_dynamic)
-      self.assertAllEqual(np.hstack(state_static), np.hstack(state_dynamic))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testDynamicRNNWithNestedTupleStates(self):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    num_proj = 4
-    max_length = 8
-    sequence_length = [4, 6]
-    in_graph_mode = not context.executing_eagerly()
-    with self.session(graph=ops_lib.Graph()) as sess:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      if in_graph_mode:
-        inputs = max_length * [
-            array_ops.placeholder(dtypes.float32, shape=(None, input_size))
-        ]
-      else:
-        inputs = max_length * [
-            constant_op.constant(
-                np.random.randn(batch_size, input_size).astype(np.float32))
-        ]
-      inputs_c = array_ops.stack(inputs)
-
-      def _cell(i):
-        return rnn_cell.LSTMCell(
-            num_units + i,
-            use_peepholes=True,
-            num_proj=num_proj + i,
-            initializer=initializer,
-            state_is_tuple=True)
-
-      # This creates a state tuple which has 4 sub-tuples of length 2 each.
-      cell = rnn_cell.MultiRNNCell(
-          [_cell(i) for i in range(4)], state_is_tuple=True)
-
-      self.assertEqual(len(cell.state_size), 4)
-      for i in range(4):
-        self.assertEqual(len(cell.state_size[i]), 2)
-
-      test_zero = cell.zero_state(1, dtypes.float32)
-      self.assertEqual(len(test_zero), 4)
-      for i in range(4):
-        self.assertEqual(test_zero[i][0].get_shape()[1], cell.state_size[i][0])
-        self.assertEqual(test_zero[i][1].get_shape()[1], cell.state_size[i][1])
-
-      with variable_scope.variable_scope("root") as scope:
-        outputs_static, state_static = rnn.static_rnn(
-            cell,
-            inputs,
-            dtype=dtypes.float32,
-            sequence_length=sequence_length,
-            scope=scope)
-        scope.reuse_variables()
-        outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
-            cell,
-            inputs_c,
-            dtype=dtypes.float32,
-            time_major=True,
-            sequence_length=sequence_length,
-            scope=scope)
-
-      if in_graph_mode:
-        input_value = np.random.randn(batch_size, input_size)
-        variables_lib.global_variables_initializer().run()
-        outputs_static = sess.run(
-            outputs_static, feed_dict={
-                inputs[0]: input_value
-            })
-        outputs_dynamic = sess.run(
-            outputs_dynamic, feed_dict={
-                inputs[0]: input_value
-            })
-        state_static = sess.run(
-            nest.flatten(state_static), feed_dict={
-                inputs[0]: input_value
-            })
-        state_dynamic = sess.run(
-            nest.flatten(state_dynamic), feed_dict={
-                inputs[0]: input_value
-            })
-
-      if in_graph_mode:
-        self.assertAllEqual(outputs_static, outputs_dynamic)
-      else:
-        self.assertAllEqual(array_ops.stack(outputs_static), outputs_dynamic)
-        state_static = nest.flatten(state_static)
-        state_dynamic = nest.flatten(state_dynamic)
-      self.assertAllEqual(np.hstack(state_static), np.hstack(state_dynamic))
-
-  def _testDynamicEquivalentToStaticRNN(self, use_sequence_length):
-    time_steps = 8
-    num_units = 3
-    num_proj = 4
-    input_size = 5
-    batch_size = 2
-
-    input_values = np.random.randn(time_steps, batch_size, input_size).astype(
-        np.float32)
-
-    if use_sequence_length:
-      sequence_length = np.random.randint(0, time_steps, size=batch_size)
-    else:
-      sequence_length = None
-
-    in_graph_mode = not context.executing_eagerly()
-
-    # TODO(b/68017812): Eager ignores operation seeds, so we need to create a
-    # single cell and reuse it across the static and dynamic RNNs. Remove this
-    # special case once is fixed.
-    if not in_graph_mode:
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-      cell = rnn_cell.LSTMCell(
-          num_units,
-          use_peepholes=True,
-          initializer=initializer,
-          num_proj=num_proj,
-          state_is_tuple=False)
-
-    ########### Step 1: Run static graph and generate readouts
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      if in_graph_mode:
-        concat_inputs = array_ops.placeholder(
-            dtypes.float32, shape=(time_steps, batch_size, input_size))
-      else:
-        concat_inputs = constant_op.constant(input_values)
-      inputs = array_ops.unstack(concat_inputs)
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-
-      # TODO(akshayka): Remove special case once b/68017812 is fixed.
-      if in_graph_mode:
-        cell = rnn_cell.LSTMCell(
-            num_units,
-            use_peepholes=True,
-            initializer=initializer,
-            num_proj=num_proj,
-            state_is_tuple=False)
-
-      with variable_scope.variable_scope("dynamic_scope"):
-        outputs_static, state_static = rnn.static_rnn(
-            cell, inputs, sequence_length=sequence_length, dtype=dtypes.float32)
-
-      if in_graph_mode:
-        # Generate gradients and run sessions to obtain outputs
-        feeds = {concat_inputs: input_values}
-        # Initialize
-        variables_lib.global_variables_initializer().run(feed_dict=feeds)
-        # Generate gradients of sum of outputs w.r.t. inputs
-        static_gradients = gradients_impl.gradients(
-            outputs_static + [state_static], [concat_inputs])
-        # Generate gradients of individual outputs w.r.t. inputs
-        static_individual_gradients = nest.flatten([
-            gradients_impl.gradients(y, [concat_inputs])
-            for y in [outputs_static[0], outputs_static[-1], state_static]
-        ])
-        # Generate gradients of individual variables w.r.t. inputs
-        trainable_variables = ops_lib.get_collection(
-            ops_lib.GraphKeys.TRAINABLE_VARIABLES)
-        assert len(trainable_variables) > 1, (
-            "Count of trainable variables: %d" % len(trainable_variables))
-        # pylint: disable=bad-builtin
-        static_individual_variable_gradients = nest.flatten([
-            gradients_impl.gradients(y, trainable_variables)
-            for y in [outputs_static[0], outputs_static[-1], state_static]
-        ])
-        # Test forward pass
-        values_static = sess.run(outputs_static, feed_dict=feeds)
-        (state_value_static,) = sess.run((state_static,), feed_dict=feeds)
-
-        # Test gradients to inputs and variables w.r.t. outputs & final state
-        static_grad_values = sess.run(static_gradients, feed_dict=feeds)
-
-        static_individual_grad_values = sess.run(
-            static_individual_gradients, feed_dict=feeds)
-
-        static_individual_var_grad_values = sess.run(
-            static_individual_variable_gradients, feed_dict=feeds)
-
-    ########## Step 2: Run dynamic graph and generate readouts
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      if in_graph_mode:
-        concat_inputs = array_ops.placeholder(
-            dtypes.float32, shape=(time_steps, batch_size, input_size))
-      else:
-        concat_inputs = constant_op.constant(input_values)
-      initializer = init_ops.random_uniform_initializer(
-          -0.01, 0.01, seed=self._seed)
-
-      # TODO(akshayka): Remove this special case once b/68017812 is
-      # fixed.
-      if in_graph_mode:
-        cell = rnn_cell.LSTMCell(
-            num_units,
-            use_peepholes=True,
-            initializer=initializer,
-            num_proj=num_proj,
-            state_is_tuple=False)
-
-      with variable_scope.variable_scope("dynamic_scope"):
-        outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
-            cell,
-            inputs=concat_inputs,
-            sequence_length=sequence_length,
-            time_major=True,
-            dtype=dtypes.float32)
-        split_outputs_dynamic = array_ops.unstack(outputs_dynamic, time_steps)
-
-      if in_graph_mode:
-        feeds = {concat_inputs: input_values}
-
-        # Initialize
-        variables_lib.global_variables_initializer().run(feed_dict=feeds)
-
-        # Generate gradients of sum of outputs w.r.t. inputs
-        dynamic_gradients = gradients_impl.gradients(
-            split_outputs_dynamic + [state_dynamic], [concat_inputs])
-
-        # Generate gradients of several individual outputs w.r.t. inputs
-        dynamic_individual_gradients = nest.flatten([
-            gradients_impl.gradients(y, [concat_inputs])
-            for y in [
-                split_outputs_dynamic[0], split_outputs_dynamic[-1],
-                state_dynamic
-            ]
-        ])
-
-        # Generate gradients of individual variables w.r.t. inputs
-        trainable_variables = ops_lib.get_collection(
-            ops_lib.GraphKeys.TRAINABLE_VARIABLES)
-        assert len(trainable_variables) > 1, (
-            "Count of trainable variables: %d" % len(trainable_variables))
-        dynamic_individual_variable_gradients = nest.flatten([
-            gradients_impl.gradients(y, trainable_variables)
-            for y in [
-                split_outputs_dynamic[0], split_outputs_dynamic[-1],
-                state_dynamic
-            ]
-        ])
-
-        # Test forward pass
-        values_dynamic = sess.run(split_outputs_dynamic, feed_dict=feeds)
-        (state_value_dynamic,) = sess.run((state_dynamic,), feed_dict=feeds)
-
-        # Test gradients to inputs and variables w.r.t. outputs & final state
-        dynamic_grad_values = sess.run(dynamic_gradients, feed_dict=feeds)
-
-        dynamic_individual_grad_values = sess.run(
-            dynamic_individual_gradients, feed_dict=feeds)
-
-        dynamic_individual_var_grad_values = sess.run(
-            dynamic_individual_variable_gradients, feed_dict=feeds)
-
-    ######### Step 3: Comparisons
-    if not in_graph_mode:
-      values_static = outputs_static
-      values_dynamic = split_outputs_dynamic
-      state_value_static = state_static
-      state_value_dynamic = state_dynamic
-
-    self.assertEqual(len(values_static), len(values_dynamic))
-    for (value_static, value_dynamic) in zip(values_static, values_dynamic):
-      self.assertAllEqual(value_static, value_dynamic)
-    self.assertAllEqual(state_value_static, state_value_dynamic)
-
-    if in_graph_mode:
-
-      self.assertAllEqual(static_grad_values, dynamic_grad_values)
-
-      self.assertEqual(
-          len(static_individual_grad_values),
-          len(dynamic_individual_grad_values))
-      self.assertEqual(
-          len(static_individual_var_grad_values),
-          len(dynamic_individual_var_grad_values))
-
-      for i, (a, b) in enumerate(
-          zip(static_individual_grad_values, dynamic_individual_grad_values)):
-        tf_logging.info("Comparing individual gradients iteration %d" % i)
-        self.assertAllEqual(a, b)
-
-      for i, (a, b) in enumerate(
-          zip(static_individual_var_grad_values,
-              dynamic_individual_var_grad_values)):
-        tf_logging.info(
-            "Comparing individual variable gradients iteration %d" % i)
-        self.assertAllEqual(a, b)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testDynamicEquivalentToStaticRNN(self):
-    self._testDynamicEquivalentToStaticRNN(use_sequence_length=False)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testDynamicEquivalentToStaticRNNWithSequenceLength(self):
-    self._testDynamicEquivalentToStaticRNN(use_sequence_length=True)
-
-
-class BidirectionalRNNTest(test.TestCase):
-
-  def setUp(self):
-    self._seed = 23489
-    np.random.seed(self._seed)
-
-  def _createBidirectionalRNN(self, use_shape, use_sequence_length, scope=None):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    max_length = 8
-
-    initializer = init_ops.random_uniform_initializer(
-        -0.01, 0.01, seed=self._seed)
-    sequence_length = array_ops.placeholder(
-        dtypes.int64) if use_sequence_length else None
-    cell_fw = rnn_cell.LSTMCell(
-        num_units, input_size, initializer=initializer, state_is_tuple=False)
-    cell_bw = rnn_cell.LSTMCell(
-        num_units, input_size, initializer=initializer, state_is_tuple=False)
-    inputs = max_length * [
-        array_ops.placeholder(
-            dtypes.float32,
-            shape=(batch_size, input_size) if use_shape else (None, input_size))
-    ]
-    outputs, state_fw, state_bw = rnn.static_bidirectional_rnn(
-        cell_fw,
-        cell_bw,
-        inputs,
-        dtype=dtypes.float32,
-        sequence_length=sequence_length,
-        scope=scope)
-    self.assertEqual(len(outputs), len(inputs))
-    for out in outputs:
-      self.assertEqual(out.get_shape().as_list(),
-                       [batch_size if use_shape else None, 2 * num_units])
-
-    input_value = np.random.randn(batch_size, input_size)
-    outputs = array_ops.stack(outputs)
-
-    return input_value, inputs, outputs, state_fw, state_bw, sequence_length
-
-  def _testBidirectionalRNN(self, use_shape):
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      input_value, inputs, outputs, state_fw, state_bw, sequence_length = (
-          self._createBidirectionalRNN(use_shape, True))
-      variables_lib.global_variables_initializer().run()
-      # Run with pre-specified sequence length of 2, 3
-      out, s_fw, s_bw = sess.run(
-          [outputs, state_fw, state_bw],
-          feed_dict={
-              inputs[0]: input_value,
-              sequence_length: [2, 3]
-          })
-
-      # Since the forward and backward LSTM cells were initialized with the
-      # same parameters, the forward and backward output has to be the same,
-      # but reversed in time. The format is output[time][batch][depth], and
-      # due to depth concatenation (as num_units=3 for both RNNs):
-      # - forward output:  out[][][depth] for 0 <= depth < 3
-      # - backward output: out[][][depth] for 4 <= depth < 6
-      #
-      # First sequence in batch is length=2
-      # Check that the time=0 forward output is equal to time=1 backward output
-      self.assertEqual(out[0][0][0], out[1][0][3])
-      self.assertEqual(out[0][0][1], out[1][0][4])
-      self.assertEqual(out[0][0][2], out[1][0][5])
-      # Check that the time=1 forward output is equal to time=0 backward output
-      self.assertEqual(out[1][0][0], out[0][0][3])
-      self.assertEqual(out[1][0][1], out[0][0][4])
-      self.assertEqual(out[1][0][2], out[0][0][5])
-
-      # Second sequence in batch is length=3
-      # Check that the time=0 forward output is equal to time=2 backward output
-      self.assertEqual(out[0][1][0], out[2][1][3])
-      self.assertEqual(out[0][1][1], out[2][1][4])
-      self.assertEqual(out[0][1][2], out[2][1][5])
-      # Check that the time=1 forward output is equal to time=1 backward output
-      self.assertEqual(out[1][1][0], out[1][1][3])
-      self.assertEqual(out[1][1][1], out[1][1][4])
-      self.assertEqual(out[1][1][2], out[1][1][5])
-      # Check that the time=2 forward output is equal to time=0 backward output
-      self.assertEqual(out[2][1][0], out[0][1][3])
-      self.assertEqual(out[2][1][1], out[0][1][4])
-      self.assertEqual(out[2][1][2], out[0][1][5])
-      # Via the reasoning above, the forward and backward final state should be
-      # exactly the same
-      self.assertAllClose(s_fw, s_bw)
-
-  def _testBidirectionalRNNWithoutSequenceLength(self, use_shape):
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      input_value, inputs, outputs, state_fw, state_bw, _ = (
-          self._createBidirectionalRNN(use_shape, False))
-      variables_lib.global_variables_initializer().run()
-      out, s_fw, s_bw = sess.run(
-          [outputs, state_fw, state_bw], feed_dict={
-              inputs[0]: input_value
-          })
-
-      # Since the forward and backward LSTM cells were initialized with the
-      # same parameters, the forward and backward output has to be the same,
-      # but reversed in time. The format is output[time][batch][depth], and
-      # due to depth concatenation (as num_units=3 for both RNNs):
-      # - forward output:  out[][][depth] for 0 <= depth < 3
-      # - backward output: out[][][depth] for 4 <= depth < 6
-      #
-      # Both sequences in batch are length=8.  Check that the time=i
-      # forward output is equal to time=8-1-i backward output
-      for i in xrange(8):
-        self.assertEqual(out[i][0][0], out[8 - 1 - i][0][3])
-        self.assertEqual(out[i][0][1], out[8 - 1 - i][0][4])
-        self.assertEqual(out[i][0][2], out[8 - 1 - i][0][5])
-      for i in xrange(8):
-        self.assertEqual(out[i][1][0], out[8 - 1 - i][1][3])
-        self.assertEqual(out[i][1][1], out[8 - 1 - i][1][4])
-        self.assertEqual(out[i][1][2], out[8 - 1 - i][1][5])
-      # Via the reasoning above, the forward and backward final state should be
-      # exactly the same
-      self.assertAllClose(s_fw, s_bw)
-
-  def testBidirectionalRNN(self):
-    self._testBidirectionalRNN(use_shape=False)
-    self._testBidirectionalRNN(use_shape=True)
-
-  def testBidirectionalRNNWithoutSequenceLength(self):
-    self._testBidirectionalRNNWithoutSequenceLength(use_shape=False)
-    self._testBidirectionalRNNWithoutSequenceLength(use_shape=True)
-
-  def _createBidirectionalDynamicRNN(self,
-                                     use_shape,
-                                     use_state_tuple,
-                                     use_time_major,
-                                     use_sequence_length,
-                                     scope=None):
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    max_length = 8
-
-    initializer = init_ops.random_uniform_initializer(
-        -0.01, 0.01, seed=self._seed)
-    sequence_length = (
-        array_ops.placeholder(dtypes.int64) if use_sequence_length else None)
-    cell_fw = rnn_cell.LSTMCell(
-        num_units, initializer=initializer, state_is_tuple=use_state_tuple)
-    cell_bw = rnn_cell.LSTMCell(
-        num_units, initializer=initializer, state_is_tuple=use_state_tuple)
-    inputs = max_length * [
-        array_ops.placeholder(
-            dtypes.float32,
-            shape=(batch_size if use_shape else None, input_size))
-    ]
-    inputs_c = array_ops.stack(inputs)
-    if not use_time_major:
-      inputs_c = array_ops.transpose(inputs_c, [1, 0, 2])
-    outputs, states = rnn.bidirectional_dynamic_rnn(
-        cell_fw,
-        cell_bw,
-        inputs_c,
-        sequence_length,
-        dtype=dtypes.float32,
-        time_major=use_time_major,
-        scope=scope)
-    outputs = array_ops.concat(outputs, 2)
-    state_fw, state_bw = states
-    outputs_shape = [None, max_length, 2 * num_units]
-    if use_shape:
-      outputs_shape[0] = batch_size
-    if use_time_major:
-      outputs_shape[0], outputs_shape[1] = outputs_shape[1], outputs_shape[0]
-    self.assertEqual(outputs.get_shape().as_list(), outputs_shape)
-
-    input_value = np.random.randn(batch_size, input_size)
-
-    return input_value, inputs, outputs, state_fw, state_bw, sequence_length
-
-  def _testBidirectionalDynamicRNN(self, use_shape, use_state_tuple,
-                                   use_time_major, use_sequence_length):
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      input_value, inputs, outputs, state_fw, state_bw, sequence_length = (
-          self._createBidirectionalDynamicRNN(
-              use_shape, use_state_tuple, use_time_major, use_sequence_length))
-      variables_lib.global_variables_initializer().run()
-      # Run with pre-specified sequence length of 2, 3
-      feed_dict = ({sequence_length: [2, 3]} if use_sequence_length else {})
-      feed_dict.update({inputs[0]: input_value})
-      if use_state_tuple:
-        out, c_fw, m_fw, c_bw, m_bw = sess.run(
-            [outputs, state_fw[0], state_fw[1], state_bw[0], state_bw[1]],
-            feed_dict=feed_dict)
-        s_fw = (c_fw, m_fw)
-        s_bw = (c_bw, m_bw)
-      else:
-        feed_dict.update({inputs[0]: input_value})
-        out, s_fw, s_bw = sess.run(
-            [outputs, state_fw, state_bw], feed_dict=feed_dict)
-
-      # Since the forward and backward LSTM cells were initialized with the
-      # same parameters, the forward and backward output has to be the same,
-      # but reversed in time. The format is output[time][batch][depth], and
-      # due to depth concatenation (as num_units=3 for both RNNs):
-      # - forward output:  out[][][depth] for 0 <= depth < 3
-      # - backward output: out[][][depth] for 4 <= depth < 6
-      #
-      if not use_time_major:
-        out = np.swapaxes(out, 0, 1)
-
-      if use_sequence_length:
-        # First sequence in batch is length=2
-        # Check that the t=0 forward output is equal to t=1 backward output
-        self.assertEqual(out[0][0][0], out[1][0][3])
-        self.assertEqual(out[0][0][1], out[1][0][4])
-        self.assertEqual(out[0][0][2], out[1][0][5])
-        # Check that the t=1 forward output is equal to t=0 backward output
-        self.assertEqual(out[1][0][0], out[0][0][3])
-        self.assertEqual(out[1][0][1], out[0][0][4])
-        self.assertEqual(out[1][0][2], out[0][0][5])
-
-        # Second sequence in batch is length=3
-        # Check that the t=0 forward output is equal to t=2 backward output
-        self.assertEqual(out[0][1][0], out[2][1][3])
-        self.assertEqual(out[0][1][1], out[2][1][4])
-        self.assertEqual(out[0][1][2], out[2][1][5])
-        # Check that the t=1 forward output is equal to t=1 backward output
-        self.assertEqual(out[1][1][0], out[1][1][3])
-        self.assertEqual(out[1][1][1], out[1][1][4])
-        self.assertEqual(out[1][1][2], out[1][1][5])
-        # Check that the t=2 forward output is equal to t=0 backward output
-        self.assertEqual(out[2][1][0], out[0][1][3])
-        self.assertEqual(out[2][1][1], out[0][1][4])
-        self.assertEqual(out[2][1][2], out[0][1][5])
-        # Via the reasoning above, the forward and backward final state should
-        # be exactly the same
-        self.assertAllClose(s_fw, s_bw)
-      else:  # not use_sequence_length
-        max_length = 8  # from createBidirectionalDynamicRNN
-        for t in range(max_length):
-          self.assertAllEqual(out[t, :, 0:3], out[max_length - t - 1, :, 3:6])
-        self.assertAllClose(s_fw, s_bw)
-
-  def testBidirectionalDynamicRNN(self):
-    # Generate 2^5 option values
-    # from [True, True, True, True, True] to [False, False, False, False, False]
-    options = itertools.product([True, False], repeat=4)
-    for option in options:
-      self._testBidirectionalDynamicRNN(
-          use_shape=option[0],
-          use_state_tuple=option[1],
-          use_time_major=option[2],
-          use_sequence_length=option[3])
-
-  def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
-    # REMARKS: factory(scope) is a function accepting a scope
-    #          as an argument, such scope can be None, a string
-    #          or a VariableScope instance.
-    with self.session(use_gpu=True, graph=ops_lib.Graph()):
-      if use_outer_scope:
-        with variable_scope.variable_scope(prefix) as scope:
-          factory(scope)
-      else:
-        factory(prefix)
-
-      # check that all the variables names starts
-      # with the proper scope.
-      variables_lib.global_variables_initializer()
-      all_vars = variables_lib.global_variables()
-      prefix = prefix or "bidirectional_rnn"
-      scope_vars = [v for v in all_vars if v.name.startswith(prefix + "/")]
-      tf_logging.info("BiRNN with scope: %s (%s)" %
-                      (prefix, "scope" if use_outer_scope else "str"))
-      for v in scope_vars:
-        tf_logging.info(v.name)
-      self.assertEqual(len(scope_vars), len(all_vars))
-
-  def testBidirectionalRNNScope(self):
-
-    def factory(scope):
-      return self._createBidirectionalRNN(
-          use_shape=True, use_sequence_length=True, scope=scope)
-
-    self._testScope(factory, use_outer_scope=True)
-    self._testScope(factory, use_outer_scope=False)
-    self._testScope(factory, prefix=None, use_outer_scope=False)
-
-  def testBidirectionalDynamicRNNScope(self):
-
-    def get_factory(use_time_major):
-
-      def factory(scope):
-        return self._createBidirectionalDynamicRNN(
-            use_shape=True,
-            use_state_tuple=True,
-            use_sequence_length=True,
-            use_time_major=use_time_major,
-            scope=scope)
-
-      return factory
-
-    self._testScope(get_factory(True), use_outer_scope=True)
-    self._testScope(get_factory(True), use_outer_scope=False)
-    self._testScope(get_factory(True), prefix=None, use_outer_scope=False)
-    self._testScope(get_factory(False), use_outer_scope=True)
-    self._testScope(get_factory(False), use_outer_scope=False)
-    self._testScope(get_factory(False), prefix=None, use_outer_scope=False)
-
-
-class MultiDimensionalLSTMTest(test.TestCase):
-
-  def setUp(self):
-    self._seed = 23489
-    np.random.seed(self._seed)
-
-  def testMultiDimensionalLSTMAllRNNContainers(self):
-    feature_dims = (3, 4, 5)
-    input_size = feature_dims
-    batch_size = 2
-    max_length = 8
-    sequence_length = [4, 6]
-    with self.session(graph=ops_lib.Graph()) as sess:
-      inputs = max_length * [
-          array_ops.placeholder(dtypes.float32, shape=(None,) + input_size)
-      ]
-      inputs_using_dim = max_length * [
-          array_ops.placeholder(
-              dtypes.float32, shape=(batch_size,) + input_size)
-      ]
-      inputs_c = array_ops.stack(inputs)
-      # Create a cell for the whole test. This is fine because the cell has no
-      # variables.
-      cell = DummyMultiDimensionalLSTM(feature_dims)
-      state_saver = TestStateSaver(batch_size, input_size)
-      outputs_static, state_static = rnn.static_rnn(
-          cell, inputs, dtype=dtypes.float32, sequence_length=sequence_length)
-      outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
-          cell,
-          inputs_c,
-          dtype=dtypes.float32,
-          time_major=True,
-          sequence_length=sequence_length)
-      outputs_bid, state_fw, state_bw = rnn.static_bidirectional_rnn(
-          cell,
-          cell,
-          inputs_using_dim,
-          dtype=dtypes.float32,
-          sequence_length=sequence_length)
-      outputs_sav, state_sav = rnn.static_state_saving_rnn(
-          cell,
-          inputs_using_dim,
-          sequence_length=sequence_length,
-          state_saver=state_saver,
-          state_name=("h", "c"))
-
-      self.assertEqual(outputs_dynamic.get_shape().as_list(),
-                       inputs_c.get_shape().as_list())
-      for out, inp in zip(outputs_static, inputs):
-        self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list())
-      for out, inp in zip(outputs_bid, inputs_using_dim):
-        input_shape_list = inp.get_shape().as_list()
-        # fwd and bwd activations are concatenated along the second dim.
-        input_shape_list[1] *= 2
-        self.assertEqual(out.get_shape().as_list(), input_shape_list)
-
-      variables_lib.global_variables_initializer().run()
-
-      input_total_size = (batch_size,) + input_size
-      input_value = np.random.randn(*input_total_size)
-      outputs_static_v = sess.run(
-          outputs_static, feed_dict={
-              inputs[0]: input_value
-          })
-      outputs_dynamic_v = sess.run(
-          outputs_dynamic, feed_dict={
-              inputs[0]: input_value
-          })
-      outputs_bid_v = sess.run(
-          outputs_bid, feed_dict={
-              inputs_using_dim[0]: input_value
-          })
-      outputs_sav_v = sess.run(
-          outputs_sav, feed_dict={
-              inputs_using_dim[0]: input_value
-          })
-
-      self.assertAllEqual(outputs_static_v, outputs_dynamic_v)
-      self.assertAllEqual(outputs_static_v, outputs_sav_v)
-      outputs_static_array = np.array(outputs_static_v)
-      outputs_static_array_double = np.concatenate(
-          (outputs_static_array, outputs_static_array), axis=2)
-      outputs_bid_array = np.array(outputs_bid_v)
-      self.assertAllEqual(outputs_static_array_double, outputs_bid_array)
-
-      state_static_v = sess.run(
-          state_static, feed_dict={
-              inputs[0]: input_value
-          })
-      state_dynamic_v = sess.run(
-          state_dynamic, feed_dict={
-              inputs[0]: input_value
-          })
-      state_bid_fw_v = sess.run(
-          state_fw, feed_dict={
-              inputs_using_dim[0]: input_value
-          })
-      state_bid_bw_v = sess.run(
-          state_bw, feed_dict={
-              inputs_using_dim[0]: input_value
-          })
-      state_sav_v = sess.run(
-          state_sav, feed_dict={
-              inputs_using_dim[0]: input_value
-          })
-      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_dynamic_v))
-      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_sav_v))
-      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_bid_fw_v))
-      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_bid_bw_v))
-
-
-class NestedLSTMTest(test.TestCase):
-
-  def setUp(self):
-    self._seed = 23489
-    np.random.seed(self._seed)
-
-  def testNestedIOLSTMAllRNNContainers(self):
-    input_size = 5
-    batch_size = 2
-    state_size = 6
-    max_length = 8
-    sequence_length = [4, 6]
-    with self.session(graph=ops_lib.Graph()) as sess:
-      state_saver = TestStateSaver(batch_size, state_size)
-      single_input = (array_ops.placeholder(
-          dtypes.float32, shape=(None, input_size)),
-                      array_ops.placeholder(
-                          dtypes.float32, shape=(None, input_size)))
-      inputs = max_length * [single_input]
-      inputs_c = (array_ops.stack([input_[0] for input_ in inputs]),
-                  array_ops.stack([input_[1] for input_ in inputs]))
-      single_input_using_dim = (array_ops.placeholder(
-          dtypes.float32, shape=(batch_size, input_size)),
-                                array_ops.placeholder(
-                                    dtypes.float32,
-                                    shape=(batch_size, input_size)))
-      inputs_using_dim = max_length * [single_input_using_dim]
-
-      # Create a cell for the whole test. This is fine because the cell has no
-      # variables.
-      cell = NestedRNNCell()
-      outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
-          cell,
-          inputs_c,
-          dtype=dtypes.float32,
-          time_major=True,
-          sequence_length=sequence_length)
-      outputs_static, state_static = rnn.static_rnn(
-          cell, inputs, dtype=dtypes.float32, sequence_length=sequence_length)
-      outputs_bid, state_fw, state_bw = rnn.static_bidirectional_rnn(
-          cell,
-          cell,
-          inputs_using_dim,
-          dtype=dtypes.float32,
-          sequence_length=sequence_length)
-      outputs_sav, state_sav = rnn.static_state_saving_rnn(
-          cell,
-          inputs_using_dim,
-          sequence_length=sequence_length,
-          state_saver=state_saver,
-          state_name=("h", "c"))
-
-      def _assert_same_shape(input1, input2, double=False):
-        flat_input1 = nest.flatten(input1)
-        flat_input2 = nest.flatten(input2)
-        for inp1, inp2 in zip(flat_input1, flat_input2):
-          input_shape = inp1.get_shape().as_list()
-          if double:
-            input_shape[1] *= 2
-          self.assertEqual(input_shape, inp2.get_shape().as_list())
-
-      _assert_same_shape(inputs_c, outputs_dynamic)
-      _assert_same_shape(inputs, outputs_static)
-      _assert_same_shape(inputs_using_dim, outputs_sav)
-      _assert_same_shape(inputs_using_dim, outputs_bid, double=True)
-
-      variables_lib.global_variables_initializer().run()
-
-      input_total_size = (batch_size, input_size)
-      input_value = (np.random.randn(*input_total_size),
-                     np.random.randn(*input_total_size))
-      outputs_dynamic_v = sess.run(
-          outputs_dynamic, feed_dict={
-              single_input: input_value
-          })
-      outputs_static_v = sess.run(
-          outputs_static, feed_dict={
-              single_input: input_value
-          })
-      outputs_sav_v = sess.run(
-          outputs_sav, feed_dict={
-              single_input_using_dim: input_value
-          })
-      outputs_bid_v = sess.run(
-          outputs_bid, feed_dict={
-              single_input_using_dim: input_value
-          })
-
-      self.assertAllEqual(outputs_static_v,
-                          np.transpose(outputs_dynamic_v, (1, 0, 2, 3)))
-      self.assertAllEqual(outputs_static_v, outputs_sav_v)
-      outputs_static_array = np.array(outputs_static_v)
-      outputs_static_array_double = np.concatenate(
-          (outputs_static_array, outputs_static_array), axis=3)
-      outputs_bid_array = np.array(outputs_bid_v)
-      self.assertAllEqual(outputs_static_array_double, outputs_bid_array)
-
-      state_dynamic_v = sess.run(
-          state_dynamic, feed_dict={
-              single_input: input_value
-          })
-      state_static_v = sess.run(
-          state_static, feed_dict={
-              single_input: input_value
-          })
-      state_bid_fw_v = sess.run(
-          state_fw, feed_dict={
-              single_input_using_dim: input_value
-          })
-      state_bid_bw_v = sess.run(
-          state_bw, feed_dict={
-              single_input_using_dim: input_value
-          })
-      state_sav_v = sess.run(
-          state_sav, feed_dict={
-              single_input_using_dim: input_value
-          })
-      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_dynamic_v))
-      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_sav_v))
-      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_bid_fw_v))
-      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_bid_bw_v))
-
-
-class StateSaverRNNTest(test.TestCase):
-
-  def setUp(self):
-    self._seed = 23489
-    np.random.seed(self._seed)
-
-  def _factory(self, scope, state_saver):
-    num_units = state_saver.state_size // 2
-    batch_size = state_saver.batch_size
-    input_size = 5
-    max_length = 8
-    initializer = init_ops.random_uniform_initializer(
-        -0.01, 0.01, seed=self._seed)
-    cell = rnn_cell.LSTMCell(
-        num_units,
-        use_peepholes=False,
-        initializer=initializer,
-        state_is_tuple=False)
-    inputs = max_length * [
-        array_ops.zeros(dtype=dtypes.float32, shape=(batch_size, input_size))
-    ]
-    out, state = rnn.static_state_saving_rnn(
-        cell,
-        inputs,
-        state_saver=state_saver,
-        state_name="save_lstm",
-        scope=scope)
-    return out, state, state_saver
-
-  def _testScope(self, prefix="prefix", use_outer_scope=True):
-    num_units = 3
-    batch_size = 2
-    state_saver = TestStateSaver(batch_size, 2 * num_units)
-
-    with self.session(use_gpu=True, graph=ops_lib.Graph()):
-      if use_outer_scope:
-        with variable_scope.variable_scope(prefix) as scope:
-          self._factory(scope=scope, state_saver=state_saver)
-      else:
-        self._factory(scope=prefix, state_saver=state_saver)
-        variables_lib.global_variables_initializer()
-
-      # check that all the variables names starts
-      # with the proper scope.
-      all_vars = variables_lib.global_variables()
-      prefix = prefix or "rnn"
-      scope_vars = [v for v in all_vars if v.name.startswith(prefix + "/")]
-      tf_logging.info("RNN with scope: %s (%s)" %
-                      (prefix, "scope" if use_outer_scope else "str"))
-      for v in scope_vars:
-        tf_logging.info(v.name)
-      self.assertEqual(len(scope_vars), len(all_vars))
-
-  def testStateSaverRNNScope(self):
-    self._testScope(use_outer_scope=True)
-    self._testScope(use_outer_scope=False)
-    self._testScope(prefix=None, use_outer_scope=False)
-
-  def testStateSaverCallsSaveState(self):
-    """Test that number of calls to state and save_state is equal.
-
-    Test if the order of actual evaluating or skipping evaluation of out,
-    state tensors, which are the output tensors from static_state_saving_rnn,
-    have influence on number of calls to save_state and state methods of
-    state_saver object (the number of calls should be same.)
-    """
-
-    num_units = 3
-    batch_size = 2
-    state_saver = TestStateSaverWithCounters(batch_size, 2 * num_units)
-    out, state, state_saver = self._factory(scope=None, state_saver=state_saver)
-
-    with self.cached_session() as sess:
-      sess.run(variables_lib.global_variables_initializer())
-      sess.run(variables_lib.local_variables_initializer())
-
-      _, _, num_state_calls, num_save_state_calls = sess.run([
-          out,
-          state,
-          state_saver.num_state_calls,
-          state_saver.num_save_state_calls])
-      self.assertEqual(num_state_calls, num_save_state_calls)
-
-      _, num_state_calls, num_save_state_calls = sess.run([
-          out,
-          state_saver.num_state_calls,
-          state_saver.num_save_state_calls])
-      self.assertEqual(num_state_calls, num_save_state_calls)
-
-      _, num_state_calls, num_save_state_calls = sess.run([
-          state,
-          state_saver.num_state_calls,
-          state_saver.num_save_state_calls])
-      self.assertEqual(num_state_calls, num_save_state_calls)
-
-class GRUTest(test.TestCase):
-
-  def setUp(self):
-    self._seed = 23489
-    np.random.seed(self._seed)
-
-  def testDynamic(self):
-    time_steps = 8
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-
-    input_values = np.random.randn(time_steps, batch_size, input_size)
-
-    sequence_length = np.random.randint(0, time_steps, size=batch_size)
-
-    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
-      concat_inputs = array_ops.placeholder(
-          dtypes.float32, shape=(time_steps, batch_size, input_size))
-
-      cell = rnn_cell.GRUCell(num_units=num_units)
-
-      with variable_scope.variable_scope("dynamic_scope"):
-        outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
-            cell,
-            inputs=concat_inputs,
-            sequence_length=sequence_length,
-            time_major=True,
-            dtype=dtypes.float32)
-
-      feeds = {concat_inputs: input_values}
-
-      # Initialize
-      variables_lib.global_variables_initializer().run(feed_dict=feeds)
-
-      sess.run([outputs_dynamic, state_dynamic], feed_dict=feeds)
-
-  def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
-    with self.session(use_gpu=True, graph=ops_lib.Graph()):
-      if use_outer_scope:
-        with variable_scope.variable_scope(prefix) as scope:
-          factory(scope)
-      else:
-        factory(prefix)
-        variables_lib.global_variables_initializer()
-
-      # check that all the variables names starts
-      # with the proper scope.
-      all_vars = variables_lib.global_variables()
-      prefix = prefix or "rnn"
-      scope_vars = [v for v in all_vars if v.name.startswith(prefix + "/")]
-      tf_logging.info("RNN with scope: %s (%s)" %
-                      (prefix, "scope" if use_outer_scope else "str"))
-      for v in scope_vars:
-        tf_logging.info(v.name)
-      self.assertEqual(len(scope_vars), len(all_vars))
-
-  def testDynamicScope(self):
-    time_steps = 8
-    num_units = 3
-    input_size = 5
-    batch_size = 2
-    sequence_length = np.random.randint(0, time_steps, size=batch_size)
-
-    def factory(scope):
-      concat_inputs = array_ops.placeholder(
-          dtypes.float32, shape=(time_steps, batch_size, input_size))
-      cell = rnn_cell.GRUCell(num_units=num_units)
-      return rnn.dynamic_rnn(
-          cell,
-          inputs=concat_inputs,
-          sequence_length=sequence_length,
-          time_major=True,
-          dtype=dtypes.float32,
-          scope=scope)
-
-    self._testScope(factory, use_outer_scope=True)
-    self._testScope(factory, use_outer_scope=False)
-    self._testScope(factory, prefix=None, use_outer_scope=False)
-
-
-class RawRNNTest(test.TestCase):
-
-  def setUp(self):
-    self._seed = 23489
-    np.random.seed(self._seed)
-
-  def _testRawRNN(self, max_time):
-    with self.session(graph=ops_lib.Graph()) as sess:
-      batch_size = 16
-      input_depth = 4
-      num_units = 3
-
-      inputs = array_ops.placeholder(
-          shape=(max_time, batch_size, input_depth), dtype=dtypes.float32)
-      sequence_length = array_ops.placeholder(
-          shape=(batch_size,), dtype=dtypes.int32)
-      inputs_ta = tensor_array_ops.TensorArray(
-          dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
-      inputs_ta = inputs_ta.unstack(inputs)
-
-      cell = rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-
-      def loop_fn(time_, cell_output, cell_state, unused_loop_state):
-        emit_output = cell_output  # == None for time == 0
-        if cell_output is None:  # time == 0
-          next_state = cell.zero_state(batch_size, dtypes.float32)
-        else:
-          next_state = cell_state  # copy state through
-        elements_finished = (time_ >= sequence_length)
-        finished = math_ops.reduce_all(elements_finished)
-        # For the very final iteration, we must emit a dummy input
-        next_input = control_flow_ops.cond(
-            finished,
-            lambda: array_ops.zeros([batch_size, input_depth], dtype=dtypes.float32),
-            lambda: inputs_ta.read(time_))
-        return (elements_finished, next_input, next_state, emit_output, None)
-
-      reuse_scope = variable_scope.get_variable_scope()
-
-      outputs_ta, final_state, _ = rnn.raw_rnn(cell, loop_fn, scope=reuse_scope)
-      outputs = outputs_ta.stack()
-
-      reuse_scope.reuse_variables()
-      outputs_dynamic_rnn, final_state_dynamic_rnn = rnn.dynamic_rnn(
-          cell,
-          inputs,
-          time_major=True,
-          dtype=dtypes.float32,
-          sequence_length=sequence_length,
-          scope=reuse_scope)
-
-      variables = variables_lib.trainable_variables()
-      gradients = gradients_impl.gradients([outputs, final_state],
-                                           [inputs] + variables)
-      gradients_dynamic_rnn = gradients_impl.gradients(
-          [outputs_dynamic_rnn, final_state_dynamic_rnn], [inputs] + variables)
-
-      variables_lib.global_variables_initializer().run()
-
-      rand_input = np.random.randn(max_time, batch_size, input_depth)
-      if max_time == 0:
-        rand_seq_len = np.zeros(batch_size)
-      else:
-        rand_seq_len = np.random.randint(max_time, size=batch_size)
-
-      # To ensure same output lengths for dynamic_rnn and raw_rnn
-      rand_seq_len[0] = max_time
-
-      (outputs_val, outputs_dynamic_rnn_val, final_state_val,
-       final_state_dynamic_rnn_val) = sess.run(
-           [outputs, outputs_dynamic_rnn, final_state, final_state_dynamic_rnn],
-           feed_dict={
-               inputs: rand_input,
-               sequence_length: rand_seq_len
-           })
-
-      self.assertAllClose(outputs_dynamic_rnn_val, outputs_val)
-      self.assertAllClose(final_state_dynamic_rnn_val, final_state_val)
-
-      # NOTE: Because with 0 time steps, raw_rnn does not have shape
-      # information about the input, it is impossible to perform
-      # gradients comparisons as the gradients eval will fail.  So
-      # this case skips the gradients test.
-      if max_time > 0:
-        self.assertEqual(len(gradients), len(gradients_dynamic_rnn))
-        gradients_val = sess.run(
-            gradients,
-            feed_dict={
-                inputs: rand_input,
-                sequence_length: rand_seq_len
-            })
-        gradients_dynamic_rnn_val = sess.run(
-            gradients_dynamic_rnn,
-            feed_dict={
-                inputs: rand_input,
-                sequence_length: rand_seq_len
-            })
-        self.assertEqual(len(gradients_val), len(gradients_dynamic_rnn_val))
-        input_gradients_val = gradients_val[0]
-        input_gradients_dynamic_rnn_val = gradients_dynamic_rnn_val[0]
-        self.assertAllClose(input_gradients_val,
-                            input_gradients_dynamic_rnn_val)
-        for i in range(1, len(gradients_val)):
-          self.assertAllClose(gradients_dynamic_rnn_val[i], gradients_val[i])
-
-  def testRawRNNZeroLength(self):
-    # NOTE: Because with 0 time steps, raw_rnn does not have shape
-    # information about the input, it is impossible to perform
-    # gradients comparisons as the gradients eval will fail.  So this
-    # case skips the gradients test.
-    self._testRawRNN(max_time=0)
-
-  def testRawRNN(self):
-    self._testRawRNN(max_time=10)
-
-  def testLoopState(self):
-    with self.session(graph=ops_lib.Graph()):
-      max_time = 10
-      batch_size = 16
-      input_depth = 4
-      num_units = 3
-
-      inputs = np.random.randn(max_time, batch_size, input_depth)
-      inputs_ta = tensor_array_ops.TensorArray(
-          dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
-      inputs_ta = inputs_ta.unstack(inputs)
-
-      cell = rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-
-      def loop_fn(time_, cell_output, cell_state, loop_state):
-        if cell_output is None:
-          loop_state = constant_op.constant([0])
-          next_state = cell.zero_state(batch_size, dtypes.float32)
-        else:
-          loop_state = array_ops.stack([array_ops.squeeze(loop_state) + 1])
-          next_state = cell_state
-        emit_output = cell_output  # == None for time == 0
-        elements_finished = array_ops.tile([time_ >= max_time], [batch_size])
-        finished = math_ops.reduce_all(elements_finished)
-        # For the very final iteration, we must emit a dummy input
-        next_input = control_flow_ops.cond(
-            finished,
-            lambda: array_ops.zeros([batch_size, input_depth], dtype=dtypes.float32),
-            lambda: inputs_ta.read(time_))
-        return (elements_finished, next_input, next_state, emit_output,
-                loop_state)
-
-      r = rnn.raw_rnn(cell, loop_fn)
-      loop_state = r[-1]
-      self.assertEqual([10], loop_state.eval())
-
-  def testLoopStateWithTensorArray(self):
-    with self.session(graph=ops_lib.Graph()):
-      max_time = 4
-      batch_size = 16
-      input_depth = 4
-      num_units = 3
-
-      inputs = np.random.randn(max_time, batch_size, input_depth)
-      inputs_ta = tensor_array_ops.TensorArray(
-          dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
-      inputs_ta = inputs_ta.unstack(inputs)
-
-      cell = rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-
-      def loop_fn(time_, cell_output, cell_state, loop_state):
-        if cell_output is None:
-          loop_state = tensor_array_ops.TensorArray(
-              dynamic_size=True,
-              size=0,
-              dtype=dtypes.int32,
-              clear_after_read=False)
-          loop_state = loop_state.write(0, 1)
-          next_state = cell.zero_state(batch_size, dtypes.float32)
-        else:
-          loop_state = loop_state.write(time_,
-                                        loop_state.read(time_ - 1) + time_)
-          next_state = cell_state
-        emit_output = cell_output  # == None for time == 0
-        elements_finished = array_ops.tile([time_ >= max_time], [batch_size])
-        finished = math_ops.reduce_all(elements_finished)
-        # For the very final iteration, we must emit a dummy input
-        next_input = control_flow_ops.cond(
-            finished,
-            lambda: array_ops.zeros([batch_size, input_depth], dtype=dtypes.float32),
-            lambda: inputs_ta.read(time_))
-        return (elements_finished, next_input, next_state, emit_output,
-                loop_state)
-
-      r = rnn.raw_rnn(cell, loop_fn)
-      loop_state = r[-1]
-      loop_state = loop_state.stack()
-      self.assertAllEqual([1, 2, 2 + 2, 4 + 3, 7 + 4], loop_state.eval())
-
-  def testEmitDifferentStructureThanCellOutput(self):
-    with self.session(graph=ops_lib.Graph()) as sess:
-      max_time = 10
-      batch_size = 16
-      input_depth = 4
-      num_units = 3
-
-      inputs = np.random.randn(max_time, batch_size, input_depth)
-      inputs_ta = tensor_array_ops.TensorArray(
-          dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
-      inputs_ta = inputs_ta.unstack(inputs)
-      # Verify emit shapes may be unknown by feeding a placeholder that
-      # determines an emit shape.
-      unknown_dim = array_ops.placeholder(dtype=dtypes.int32)
-
-      cell = rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-
-      def loop_fn(time_, cell_output, cell_state, _):
-        if cell_output is None:
-          emit_output = (array_ops.zeros([2, 3], dtype=dtypes.int32),
-                         array_ops.zeros([unknown_dim], dtype=dtypes.int64))
-          next_state = cell.zero_state(batch_size, dtypes.float32)
-        else:
-          emit_output = (array_ops.ones([batch_size, 2, 3], dtype=dtypes.int32),
-                         array_ops.ones(
-                             [batch_size, unknown_dim], dtype=dtypes.int64))
-          next_state = cell_state
-        elements_finished = array_ops.tile([time_ >= max_time], [batch_size])
-        finished = math_ops.reduce_all(elements_finished)
-        # For the very final iteration, we must emit a dummy input
-        next_input = control_flow_ops.cond(
-            finished,
-            lambda: array_ops.zeros([batch_size, input_depth], dtype=dtypes.float32),
-            lambda: inputs_ta.read(time_))
-        return (elements_finished, next_input, next_state, emit_output, None)
-
-      r = rnn.raw_rnn(cell, loop_fn)
-      output_ta = r[0]
-      self.assertEqual(2, len(output_ta))
-      self.assertEqual([dtypes.int32, dtypes.int64],
-                       [ta.dtype for ta in output_ta])
-      output = [ta.stack() for ta in output_ta]
-      output_vals = sess.run(output, feed_dict={unknown_dim: 1})
-      self.assertAllEqual(
-          np.ones((max_time, batch_size, 2, 3), np.int32), output_vals[0])
-      self.assertAllEqual(
-          np.ones((max_time, batch_size, 1), np.int64), output_vals[1])
-
-  def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
-    with self.session(use_gpu=True, graph=ops_lib.Graph()):
-      if use_outer_scope:
-        with variable_scope.variable_scope(prefix) as scope:
-          factory(scope)
-      else:
-        factory(prefix)
-        variables_lib.global_variables_initializer()
-
-      # check that all the variables names starts
-      # with the proper scope.
-      all_vars = variables_lib.global_variables()
-      prefix = prefix or "rnn"
-      scope_vars = [v for v in all_vars if v.name.startswith(prefix + "/")]
-      tf_logging.info("RNN with scope: %s (%s)" %
-                      (prefix, "scope" if use_outer_scope else "str"))
-      for v in scope_vars:
-        tf_logging.info(v.name)
-      self.assertEqual(len(scope_vars), len(all_vars))
-
-  def testRawRNNScope(self):
-    max_time = 10
-    batch_size = 16
-    input_depth = 4
-    num_units = 3
-
-    def factory(scope):
-      inputs = array_ops.placeholder(
-          shape=(max_time, batch_size, input_depth), dtype=dtypes.float32)
-      sequence_length = array_ops.placeholder(
-          shape=(batch_size,), dtype=dtypes.int32)
-      inputs_ta = tensor_array_ops.TensorArray(
-          dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
-      inputs_ta = inputs_ta.unstack(inputs)
-
-      cell = rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-
-      def loop_fn(time_, cell_output, cell_state, unused_loop_state):
-        emit_output = cell_output  # == None for time == 0
-        if cell_output is None:  # time == 0
-          next_state = cell.zero_state(batch_size, dtypes.float32)
-        else:
-          next_state = cell_state
-
-        elements_finished = (time_ >= sequence_length)
-        finished = math_ops.reduce_all(elements_finished)
-        # For the very final iteration, we must emit a dummy input
-        next_input = control_flow_ops.cond(
-            finished,
-            lambda: array_ops.zeros([batch_size, input_depth], dtype=dtypes.float32),
-            lambda: inputs_ta.read(time_))
-        return (elements_finished, next_input, next_state, emit_output, None)
-
-      return rnn.raw_rnn(cell, loop_fn, scope=scope)
-
-    self._testScope(factory, use_outer_scope=True)
-    self._testScope(factory, use_outer_scope=False)
-    self._testScope(factory, prefix=None, use_outer_scope=False)
-
-
-class DeviceWrapperCell(rnn_cell.RNNCell):
-  """Class to ensure cell calculation happens on a specific device."""
-
-  def __init__(self, cell, device):
-    self._cell = cell
-    self._device = device
-
-  @property
-  def output_size(self):
-    return self._cell.output_size
-
-  @property
-  def state_size(self):
-    return self._cell.state_size
-
-  def __call__(self, input_, state, scope=None):
-    if self._device is not None:
-      with ops_lib.device(self._device):
-        return self._cell(input_, state, scope=scope)
-    else:
-      return self._cell(input_, state, scope=scope)
-
-
-class TensorArrayOnCorrectDeviceTest(test.TestCase):
-
-  def _execute_rnn_on(self,
-                      rnn_device=None,
-                      cell_device=None,
-                      input_device=None):
-    batch_size = 3
-    time_steps = 7
-    input_size = 5
-    num_units = 10
-
-    cell = rnn_cell.LSTMCell(num_units, use_peepholes=True)
-    gpu_cell = DeviceWrapperCell(cell, cell_device)
-    inputs = np.random.randn(batch_size, time_steps, input_size).astype(
-        np.float32)
-    sequence_length = np.random.randint(0, time_steps, size=batch_size)
-
-    if input_device is not None:
-      with ops_lib.device(input_device):
-        inputs = constant_op.constant(inputs)
-
-    if rnn_device is not None:
-      with ops_lib.device(rnn_device):
-        outputs, _ = rnn.dynamic_rnn(
-            gpu_cell,
-            inputs,
-            sequence_length=sequence_length,
-            dtype=dtypes.float32)
-    else:
-      outputs, _ = rnn.dynamic_rnn(
-          gpu_cell,
-          inputs,
-          sequence_length=sequence_length,
-          dtype=dtypes.float32)
-
-    with self.session(use_gpu=True) as sess:
-      opts = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
-      run_metadata = config_pb2.RunMetadata()
-      variables_lib.global_variables_initializer().run()
-      sess.run(outputs, options=opts, run_metadata=run_metadata)
-
-    return run_metadata
-
-  def _retrieve_cpu_gpu_stats(self, run_metadata):
-    cpu_stats = None
-    gpu_stats = None
-    step_stats = run_metadata.step_stats
-    for ds in step_stats.dev_stats:
-      if "cpu:0" in ds.device[-5:].lower():
-        cpu_stats = ds.node_stats
-      if "gpu:0" == ds.device[-5:].lower():
-        gpu_stats = ds.node_stats
-    return cpu_stats, gpu_stats
-
-  def testRNNOnCPUCellOnGPU(self):
-    if not test.is_gpu_available():
-      return  # Test requires access to a GPU
-
-    gpu_dev = test.gpu_device_name()
-    run_metadata = self._execute_rnn_on(
-        rnn_device="/cpu:0", cell_device=gpu_dev)
-    cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
-
-    def _assert_in(op_str, in_stats, out_stats):
-      self.assertTrue(any(op_str in s.node_name for s in in_stats))
-      self.assertFalse(any(op_str in s.node_name for s in out_stats))
-
-    # Writes happen at output of RNN cell
-    _assert_in("TensorArrayWrite", gpu_stats, cpu_stats)
-    # Gather happens on final TensorArray
-    _assert_in("TensorArrayGather", gpu_stats, cpu_stats)
-    # Reads happen at input to RNN cell
-    _assert_in("TensorArrayRead", cpu_stats, gpu_stats)
-    # Scatters happen to get initial input into TensorArray
-    _assert_in("TensorArrayScatter", cpu_stats, gpu_stats)
-
-  def testRNNOnCPUCellOnCPU(self):
-    if not test.is_gpu_available():
-      return  # Test requires access to a GPU
-
-    gpu_dev = test.gpu_device_name()
-    run_metadata = self._execute_rnn_on(
-        rnn_device="/cpu:0", cell_device="/cpu:0", input_device=gpu_dev)
-    cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
-
-    def _assert_in(op_str, in_stats, out_stats):
-      self.assertTrue(any(op_str in s.node_name for s in in_stats))
-      self.assertFalse(any(op_str in s.node_name for s in out_stats))
-
-    # All TensorArray operations happen on CPU
-    _assert_in("TensorArray", cpu_stats, gpu_stats)
-
-  def testInputOnGPUCellNotDeclared(self):
-    if not test.is_gpu_available():
-      return  # Test requires access to a GPU
-
-    gpu_dev = test.gpu_device_name()
-    run_metadata = self._execute_rnn_on(input_device=gpu_dev)
-    cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
-
-    def _assert_in(op_str, in_stats, out_stats):
-      self.assertTrue(any(op_str in s.node_name for s in in_stats))
-      self.assertFalse(any(op_str in s.node_name for s in out_stats))
-
-    # Everything happens on GPU
-    _assert_in("TensorArray", gpu_stats, cpu_stats)
-
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index d7ee7fb..dfac2df 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py

@@ -22,6 +22,7 @@
 
 import numpy as np
 
+from tensorflow.contrib.rnn.python.ops import core_rnn_cell as legacy_rnn_cell
 from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
@@ -53,6 +54,294 @@
 
 class RNNCellTest(test.TestCase):
 
+  def testIndRNNCell(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        cell = contrib_rnn_cell.IndRNNCell(2)
+        g, _ = cell(x, m)
+        self.assertEqual([
+            "root/ind_rnn_cell/%s_w:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+            "root/ind_rnn_cell/%s_u:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+            "root/ind_rnn_cell/%s:0" % rnn_cell_impl._BIAS_VARIABLE_NAME
+        ], [v.name for v in cell.trainable_variables])
+        self.assertFalse(cell.non_trainable_variables)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        self.assertEqual(res[0].shape, (1, 2))
+
+  def testIndyGRUCell(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        g, _ = contrib_rnn_cell.IndyGRUCell(2)(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.185265, 0.17704]])
+      with variable_scope.variable_scope(
+          "other", initializer=init_ops.constant_initializer(0.5)):
+        # Test IndyGRUCell with input_size != num_units.
+        x = array_ops.zeros([1, 3])
+        m = array_ops.zeros([1, 2])
+        g, _ = contrib_rnn_cell.IndyGRUCell(2)(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.155127, 0.157328]])
+
+  def testIndyLSTMCell(self):
+    for dtype in [dtypes.float16, dtypes.float32]:
+      np_dtype = dtype.as_numpy_dtype
+      with self.session(graph=ops.Graph()) as sess:
+        with variable_scope.variable_scope(
+            "root", initializer=init_ops.constant_initializer(0.5)):
+          x = array_ops.zeros([1, 2], dtype=dtype)
+          state_0 = (array_ops.zeros([1, 2], dtype=dtype),) * 2
+          state_1 = (array_ops.zeros([1, 2], dtype=dtype),) * 2
+          cell = rnn_cell_impl.MultiRNNCell(
+              [contrib_rnn_cell.IndyLSTMCell(2) for _ in range(2)])
+          self.assertEqual(cell.dtype, None)
+          self.assertEqual("cell-0", cell._checkpoint_dependencies[0].name)
+          self.assertEqual("cell-1", cell._checkpoint_dependencies[1].name)
+          cell.get_config()  # Should not throw an error
+          g, (out_state_0, out_state_1) = cell(x, (state_0, state_1))
+          # Layer infers the input type.
+          self.assertEqual(cell.dtype, dtype.name)
+          expected_variable_names = [
+              "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s_w:0" %
+              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+              "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s_u:0" %
+              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+              "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s:0" %
+              rnn_cell_impl._BIAS_VARIABLE_NAME,
+              "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s_w:0" %
+              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+              "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s_u:0" %
+              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+              "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s:0" %
+              rnn_cell_impl._BIAS_VARIABLE_NAME
+          ]
+          self.assertEqual(expected_variable_names,
+                           [v.name for v in cell.trainable_variables])
+          self.assertFalse(cell.non_trainable_variables)
+          sess.run([variables.global_variables_initializer()])
+          res = sess.run(
+              [g, out_state_0, out_state_1], {
+                  x.name: np.array([[1., 1.]]),
+                  state_0[0].name: 0.1 * np.ones([1, 2]),
+                  state_0[1].name: 0.1 * np.ones([1, 2]),
+                  state_1[0].name: 0.1 * np.ones([1, 2]),
+                  state_1[1].name: 0.1 * np.ones([1, 2]),
+              })
+          self.assertEqual(len(res), 3)
+          global_variables = variables.global_variables()
+          self.assertEqual(expected_variable_names,
+                           [v.name for v in global_variables])
+          # Only check the range of outputs as this is just a smoke test.
+          self.assertAllInRange(res[0], -1.0, 1.0)
+          self.assertAllInRange(res[1], -1.0, 1.0)
+          self.assertAllInRange(res[2], -1.0, 1.0)
+        with variable_scope.variable_scope(
+            "other", initializer=init_ops.constant_initializer(0.5)):
+          # Test IndyLSTMCell with input_size != num_units.
+          x = array_ops.zeros([1, 3], dtype=dtype)
+          state = (array_ops.zeros([1, 2], dtype=dtype),) * 2
+          g, out_state = contrib_rnn_cell.IndyLSTMCell(2)(x, state)
+          sess.run([variables.global_variables_initializer()])
+          res = sess.run(
+              [g, out_state], {
+                  x.name: np.array([[1., 1., 1.]], dtype=np_dtype),
+                  state[0].name: 0.1 * np.ones([1, 2], dtype=np_dtype),
+                  state[1].name: 0.1 * np.ones([1, 2], dtype=np_dtype),
+              })
+          self.assertEqual(len(res), 2)
+
+  def testLSTMCellLayerNorm(self):
+    with self.cached_session() as sess:
+      num_units = 2
+      num_proj = 3
+      batch_size = 1
+      input_size = 4
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([batch_size, input_size])
+        c = array_ops.zeros([batch_size, num_units])
+        h = array_ops.zeros([batch_size, num_proj])
+        state = rnn_cell_impl.LSTMStateTuple(c, h)
+        cell = contrib_rnn_cell.LayerNormLSTMCell(
+            num_units=num_units,
+            num_proj=num_proj,
+            forget_bias=1.0,
+            layer_norm=True,
+            norm_gain=1.0,
+            norm_shift=0.0)
+        g, out_m = cell(x, state)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run(
+            [g, out_m], {
+                x.name: np.ones((batch_size, input_size)),
+                c.name: 0.1 * np.ones((batch_size, num_units)),
+                h.name: 0.1 * np.ones((batch_size, num_proj))
+            })
+        self.assertEqual(len(res), 2)
+        # The numbers in results were not calculated, this is mostly just a
+        # smoke test.
+        self.assertEqual(res[0].shape, (batch_size, num_proj))
+        self.assertEqual(res[1][0].shape, (batch_size, num_units))
+        self.assertEqual(res[1][1].shape, (batch_size, num_proj))
+        # Different inputs so different outputs and states
+        for i in range(1, batch_size):
+          self.assertTrue(
+              float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
+          self.assertTrue(
+              float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
+
+  def testOutputProjectionWrapper(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 3])
+        m = array_ops.zeros([1, 3])
+        cell = legacy_rnn_cell.OutputProjectionWrapper(
+            rnn_cell_impl.GRUCell(3), 2)
+        g, new_m = cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g, new_m], {
+            x.name: np.array([[1., 1., 1.]]),
+            m.name: np.array([[0.1, 0.1, 0.1]])
+        })
+        self.assertEqual(res[1].shape, (1, 3))
+        # The numbers in results were not calculated, this is just a smoke test.
+        self.assertAllClose(res[0], [[0.231907, 0.231907]])
+
+  def testInputProjectionWrapper(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 3])
+        cell = legacy_rnn_cell.InputProjectionWrapper(
+            rnn_cell_impl.GRUCell(3), num_proj=3)
+        g, new_m = cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g, new_m], {
+            x.name: np.array([[1., 1.]]),
+            m.name: np.array([[0.1, 0.1, 0.1]])
+        })
+        self.assertEqual(res[1].shape, (1, 3))
+        # The numbers in results were not calculated, this is just a smoke test.
+        self.assertAllClose(res[0], [[0.154605, 0.154605, 0.154605]])
+
+  def testEmbeddingWrapper(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 1], dtype=dtypes.int32)
+        m = array_ops.zeros([1, 2])
+        embedding_cell = legacy_rnn_cell.EmbeddingWrapper(
+            rnn_cell_impl.GRUCell(2), embedding_classes=3, embedding_size=2)
+        self.assertEqual(embedding_cell.output_size, 2)
+        g, new_m = embedding_cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g, new_m], {
+            x.name: np.array([[1]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        self.assertEqual(res[1].shape, (1, 2))
+        # The numbers in results were not calculated, this is just a smoke test.
+        self.assertAllClose(res[0], [[0.17139, 0.17139]])
+
+  def testEmbeddingWrapperWithDynamicRnn(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope("root"):
+        inputs = ops.convert_to_tensor([[[0], [0]]], dtype=dtypes.int64)
+        input_lengths = ops.convert_to_tensor([2], dtype=dtypes.int64)
+        embedding_cell = legacy_rnn_cell.EmbeddingWrapper(
+            rnn_cell_impl.BasicLSTMCell(1, state_is_tuple=True),
+            embedding_classes=1,
+            embedding_size=2)
+        outputs, _ = rnn.dynamic_rnn(
+            cell=embedding_cell,
+            inputs=inputs,
+            sequence_length=input_lengths,
+            dtype=dtypes.float32)
+        sess.run([variables.global_variables_initializer()])
+        # This will fail if output's dtype is inferred from input's.
+        sess.run(outputs)
+
+  def testSRUCell(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        g, _ = contrib_rnn_cell.SRUCell(2)(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.509682, 0.509682]])
+
+  def testSRUCellKerasRNN(self):
+    """Tests that SRUCell works with keras RNN layer."""
+    cell = contrib_rnn_cell.SRUCell(10)
+    seq_input = ops.convert_to_tensor(
+        np.random.rand(2, 3, 5), name="seq_input", dtype=dtypes.float32)
+    rnn_layer = keras_layers.RNN(cell=cell)
+    rnn_outputs_keras = rnn_layer(seq_input)
+    with self.cached_session() as sess:
+      sess.run([variables.global_variables_initializer()])
+      self.assertEqual(sess.run(rnn_outputs_keras).shape, (2, 10))
+
+  def testSRUCellBiasType(self):
+    """Tests that the bias' dtype is properly set."""
+    cell = contrib_rnn_cell.SRUCell(10)
+    cell.build((2, 3, 5))
+    self.assertEqual(cell._bias.dtype, dtypes.float32_ref)
+
+    cell = contrib_rnn_cell.SRUCell(10, dtype=dtypes.int32)
+    cell.build((2, 3, 5))
+    self.assertEqual(cell._bias.dtype, dtypes.int32_ref)
+
+    cell_input = ops.convert_to_tensor(
+        np.random.rand(2, 5), name="cell_input", dtype=dtypes.float16)
+    cell_state = ops.convert_to_tensor(
+        np.random.rand(2, 10), name="cell_state", dtype=dtypes.float16)
+    cell = contrib_rnn_cell.SRUCell(10)
+    cell(cell_input, [cell_state])
+    self.assertEqual(cell._bias.dtype, dtypes.float16_ref)
+
+  def testSRUCellWithDiffSize(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 3])
+        m = array_ops.zeros([1, 2])
+        g, _ = contrib_rnn_cell.SRUCell(2)(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.55255556, 0.55255556]])
+
   def testCoupledInputForgetGateLSTMCell(self):
     with self.cached_session() as sess:
       num_units = 2

diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
index d815f81..1a5692f 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py

@@ -358,7 +358,7 @@
         rnn_output=ResultSummary(
             shape=(5, 3, 6), dtype=dtype('float32'), mean=-0.00597103),
         sample_id=ResultSummary(
-            shape=(5, 3), dtype=dtype('int32'), mean=1.6))
+            shape=(5, 3), dtype=dtype('int32'), mean=1.4))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
             c=ResultSummary(
@@ -387,7 +387,7 @@
         rnn_output=ResultSummary(
             shape=(5, 3, 6), dtype=dtype('float32'), mean=-0.0052615386),
         sample_id=ResultSummary(
-            shape=(5, 3), dtype=dtype('int32'), mean=1.3333333333))
+            shape=(5, 3), dtype=dtype('int32'), mean=1.4))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
             c=ResultSummary(
@@ -454,7 +454,7 @@
         rnn_output=ResultSummary(
             shape=(5, 3, 6), dtype=dtype('float32'), mean=-0.0052615386),
         sample_id=ResultSummary(
-            shape=(5, 3), dtype=dtype('int32'), mean=1.3333333333333333))
+            shape=(5, 3), dtype=dtype('int32'), mean=1.4))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
             c=ResultSummary(
@@ -696,7 +696,7 @@
         rnn_output=ResultSummary(
             shape=(5, 3, 6), dtype=dtype('float32'), mean=-0.0025896581),
         sample_id=ResultSummary(
-            shape=(5, 3), dtype=dtype('int32'), mean=1.6))
+            shape=(5, 3), dtype=dtype('int32'), mean=1.73333333))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
             c=ResultSummary(
@@ -707,12 +707,12 @@
             shape=(5, 6), dtype=dtype('float32'), mean=-0.00069823361),
         time=3,
         alignments=ResultSummary(
-            shape=(5, 8), dtype=dtype('float32'), mean=0.028698336),
+            shape=(5, 8), dtype=dtype('float32'), mean=0.029914695),
         attention_state=ResultSummary(
-            shape=(5, 8), dtype=dtype('float32'), mean=0.028698336),
+            shape=(5, 8), dtype=dtype('float32'), mean=0.029914695),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
-        shape=(3, 5, 8), dtype=dtype('float32'), mean=0.04865776002407074)
+        shape=(3, 5, 8), dtype=dtype('float32'), mean=0.0465225502849)
 
     self._testWithAttention(
         create_attention_mechanism,
@@ -921,9 +921,9 @@
 
     expected_final_output = BasicDecoderOutput(
         rnn_output=ResultSummary(
-            shape=(5, 3, 20), dtype=dtype('float32'), mean=0.11723966),
+            shape=(5, 3, 20), dtype=dtype('float32'), mean=0.115853324533),
         sample_id=ResultSummary(
-            shape=(5, 3), dtype=dtype('int32'), mean=7.266666666666667))
+            shape=(5, 3), dtype=dtype('int32'), mean=8.6))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
             c=ResultSummary(
@@ -931,7 +931,7 @@
             h=ResultSummary(
                 shape=(5, 9), dtype=dtype('float32'), mean=-0.0018327223)),
         attention=ResultSummary(
-            shape=(5, 20), dtype=dtype('float32'), mean=0.11601614207),
+            shape=(5, 20), dtype=dtype('float32'), mean=0.11462739855),
         time=3,
         alignments=(ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),

diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_v2_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_v2_test.py
index 7ff04e1..5ee01f6 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_v2_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_v2_test.py

@@ -17,14 +17,23 @@
 from __future__ import division
 from __future__ import print_function
 
+import collections
+
 from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.contrib.seq2seq.python.ops import attention_wrapper as wrapper
-from tensorflow.python.framework import ops
+from tensorflow.contrib.seq2seq.python.ops import basic_decoder
+from tensorflow.contrib.seq2seq.python.ops import sampler as sampler_py
+from tensorflow.python import keras
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras import initializers
+from tensorflow.python.ops import rnn_cell
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
+from tensorflow.python.util import nest
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -37,13 +46,10 @@
     self.memory_size = 6
     self.units = 8
 
-    self.memory = ops.convert_to_tensor(
-        np.random.random((self.batch, self.timestep, self.memory_size)),
-        dtype=np.float32)
-    self.query = ops.convert_to_tensor(
-        np.random.random((self.batch, self.units)), dtype=np.float32)
-    self.state = ops.convert_to_tensor(
-        np.random.random((self.batch, self.timestep)), dtype=np.float32)
+    self.memory = np.random.randn(self.batch, self.timestep,
+                                  self.memory_size).astype(np.float32)
+    self.query = np.random.randn(self.batch, self.units).astype(np.float32)
+    self.state = np.random.randn(self.batch, self.timestep).astype(np.float32)
 
   @parameterized.named_parameters(
       ("luong", wrapper.LuongAttentionV2),
@@ -52,8 +58,8 @@
       ("bahdanau_monotonic", wrapper.BahdanauMonotonicAttentionV2),
   )
   def test_attention_shape_inference(self, attention_cls):
-    attention = attention_cls(self.units)
-    attention_score = attention([self.query, self.state, self.memory])
+    attention = attention_cls(self.units, self.memory)
+    attention_score = attention([self.query, self.state])
     self.assertLen(attention_score, 2)
     self.assertEqual(attention_score[0].shape, (self.batch, self.timestep))
     self.assertEqual(attention_score[1].shape, (self.batch, self.timestep))
@@ -65,7 +71,7 @@
       ("bahdanau_monotonic", wrapper.BahdanauMonotonicAttentionV2),
   )
   def test_get_config(self, attention_cls):
-    attention = attention_cls(self.units)
+    attention = attention_cls(self.units, self.memory)
     config = attention.get_config()
 
     attention_from_config = attention_cls.from_config(config)
@@ -80,9 +86,8 @@
       ("bahdanau_monotonic", wrapper.BahdanauMonotonicAttentionV2),
   )
   def test_layer_output(self, attention_cls):
-    attention = attention_cls(self.units)
-
-    score = attention([self.query, self.state, self.memory])
+    attention = attention_cls(self.units, self.memory)
+    score = attention([self.query, self.state])
     self.evaluate(variables.variables_initializer(attention.variables))
 
     score_val = self.evaluate(score)
@@ -90,5 +95,651 @@
     self.assertEqual(score_val[0].shape, (self.batch, self.timestep))
     self.assertEqual(score_val[1].shape, (self.batch, self.timestep))
 
+  @parameterized.named_parameters(
+      ("luong", wrapper.LuongAttentionV2),
+      ("luong_monotonic", wrapper.LuongMonotonicAttentionV2),
+      ("bahdanau", wrapper.BahdanauAttentionV2),
+      ("bahdanau_monotonic", wrapper.BahdanauMonotonicAttentionV2),
+  )
+  def test_passing_memory_from_call(self, attention_cls):
+    attention = attention_cls(self.units, self.memory)
+    weights_before_query = attention.get_weights()
+    ref_score = attention([self.query, self.state])
+
+    self.evaluate(variables.global_variables_initializer())
+    ref_score_val = self.evaluate(ref_score)
+
+    all_weights = attention.get_weights()
+    config = attention.get_config()
+    # Simulate the twice invocation of calls here.
+    attention_from_config = attention_cls.from_config(config)
+    attention_from_config.build(self.memory.shape)
+    attention_from_config.set_weights(weights_before_query)
+    attention_from_config(self.memory, setup_memory=True)
+    attention_from_config.build([self.query.shape, self.state.shape])
+    attention_from_config.set_weights(all_weights)
+    score = attention_from_config([self.query, self.state])
+
+    score_val = self.evaluate(score)
+    self.assertAllClose(ref_score_val, score_val)
+
+  @parameterized.named_parameters(
+      ("luong", wrapper.LuongAttentionV2),
+      ("luong_monotonic", wrapper.LuongMonotonicAttentionV2),
+      ("bahdanau", wrapper.BahdanauAttentionV2),
+      ("bahdanau_monotonic", wrapper.BahdanauMonotonicAttentionV2),
+  )
+  def test_save_load_layer(self, attention_cls):
+    vocab = 20
+    embedding_dim = 6
+    inputs = keras.layers.Input(shape=[self.timestep])
+    encoder_input = keras.layers.Embedding(
+        vocab, embedding_dim, mask_zero=True)(
+            inputs)
+    encoder_output = keras.layers.UnifiedLSTM(
+        self.memory_size, return_sequences=True)(
+            encoder_input)
+
+    attention = attention_cls(self.units, encoder_output)
+    query = keras.layers.Input(shape=[self.units])
+    state = keras.layers.Input(shape=[self.timestep])
+
+    score = attention([query, state])
+
+    x = np.random.randint(vocab, size=(self.batch, self.timestep))
+    x_test = np.random.randint(vocab, size=(self.batch, self.timestep))
+    y = np.random.randn(self.batch, self.timestep)
+    model = keras.models.Model([inputs, query, state], score)
+    model.compile("rmsprop", "mse")
+    model.fit([x, self.query, self.state], (y, y))
+    y_ref = model.predict_on_batch([x_test, self.query, self.state])
+
+    config = model.get_config()
+    weights = model.get_weights()
+    loaded_model = keras.models.Model.from_config(
+        config, custom_objects={attention_cls.__name__: attention_cls})
+    loaded_model.set_weights(weights)
+
+    y = loaded_model.predict_on_batch([x_test, self.query, self.state])
+
+    self.assertAllClose(y_ref, y)
+
+  # TODO(scottzhu): Add tests for model.compile(run_eagerly=True)
+
+
+class ResultSummary(
+    collections.namedtuple("ResultSummary", ("shape", "dtype", "mean"))):
+  pass
+
+
+def get_result_summary(x):
+  if isinstance(x, np.ndarray):
+    return ResultSummary(x.shape, x.dtype, x.mean())
+  return x
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class AttentionWrapperV2Test(test.TestCase, parameterized.TestCase):
+
+  def assertAllCloseOrEqual(self, x, y, **kwargs):
+    if isinstance(x, np.ndarray) or isinstance(x, float):
+      return super(AttentionWrapperV2Test, self).assertAllClose(
+          x, y, atol=1e-3, **kwargs)
+    else:
+      self.assertAllEqual(x, y, **kwargs)
+
+  def setUp(self):
+    super(AttentionWrapperV2Test, self).setUp()
+    self.batch = 64
+    self.units = 128
+    self.encoder_timestep = 10
+    self.encoder_dim = 256
+    self.decoder_timestep = 12
+    self.encoder_outputs = np.random.randn(self.batch, self.encoder_timestep,
+                                           self.encoder_dim)
+    self.encoder_sequence_length = np.random.randint(
+        self.encoder_timestep, size=(self.batch,)).astype(np.int32)
+    self.decoder_inputs = np.random.randn(self.batch, self.decoder_timestep,
+                                          self.units)
+    self.decoder_sequence_length = np.random.randint(
+        self.decoder_timestep, size=(self.batch,)).astype(np.int32)
+
+  def _testWithAttention(self,
+                         create_attention_mechanism,
+                         expected_final_output,
+                         expected_final_state,
+                         attention_mechanism_depth=3,
+                         alignment_history=False,
+                         expected_final_alignment_history=None,
+                         attention_layer_size=6,
+                         attention_layer=None,
+                         create_query_layer=False,
+                         create_memory_layer=True,
+                         create_attention_kwargs=None):
+    attention_layer_sizes = ([attention_layer_size]
+                             if attention_layer_size is not None else None)
+    attention_layers = ([attention_layer]
+                        if attention_layer is not None else None)
+    self._testWithMaybeMultiAttention(
+        is_multi=False,
+        create_attention_mechanisms=[create_attention_mechanism],
+        expected_final_output=expected_final_output,
+        expected_final_state=expected_final_state,
+        attention_mechanism_depths=[attention_mechanism_depth],
+        alignment_history=alignment_history,
+        expected_final_alignment_history=expected_final_alignment_history,
+        attention_layer_sizes=attention_layer_sizes,
+        attention_layers=attention_layers,
+        create_query_layer=create_query_layer,
+        create_memory_layer=create_memory_layer,
+        create_attention_kwargs=create_attention_kwargs)
+
+  def _testWithMaybeMultiAttention(self,
+                                   is_multi,
+                                   create_attention_mechanisms,
+                                   expected_final_output,
+                                   expected_final_state,
+                                   attention_mechanism_depths,
+                                   alignment_history=False,
+                                   expected_final_alignment_history=None,
+                                   attention_layer_sizes=None,
+                                   attention_layers=None,
+                                   create_query_layer=False,
+                                   create_memory_layer=True,
+                                   create_attention_kwargs=None):
+    # Allow is_multi to be True with a single mechanism to enable test for
+    # passing in a single mechanism in a list.
+    assert len(create_attention_mechanisms) == 1 or is_multi
+    encoder_sequence_length = [3, 2, 3, 1, 1]
+    decoder_sequence_length = [2, 0, 1, 2, 3]
+    batch_size = 5
+    encoder_max_time = 8
+    decoder_max_time = 4
+    input_depth = 7
+    encoder_output_depth = 10
+    cell_depth = 9
+    create_attention_kwargs = create_attention_kwargs or {}
+
+    if attention_layer_sizes is not None:
+      # Compute sum of attention_layer_sizes. Use encoder_output_depth if None.
+      attention_depth = sum(attention_layer_size or encoder_output_depth
+                            for attention_layer_size in attention_layer_sizes)
+    elif attention_layers is not None:
+      # Compute sum of attention_layers output depth.
+      attention_depth = sum(
+          attention_layer.compute_output_shape(
+              [batch_size, cell_depth + encoder_output_depth]).dims[-1].value
+          for attention_layer in attention_layers)
+    else:
+      attention_depth = encoder_output_depth * len(create_attention_mechanisms)
+
+    decoder_inputs = np.random.randn(batch_size, decoder_max_time,
+                                     input_depth).astype(np.float32)
+    encoder_outputs = np.random.randn(batch_size, encoder_max_time,
+                                      encoder_output_depth).astype(np.float32)
+
+    attention_mechanisms = []
+    for creator, depth in zip(create_attention_mechanisms,
+                              attention_mechanism_depths):
+      # Create a memory layer with deterministic initializer to avoid randomness
+      # in the test between graph and eager.
+      if create_query_layer:
+        create_attention_kwargs["query_layer"] = keras.layers.Dense(
+            depth, kernel_initializer="ones", use_bias=False)
+      if create_memory_layer:
+        create_attention_kwargs["memory_layer"] = keras.layers.Dense(
+            depth, kernel_initializer="ones", use_bias=False)
+
+      attention_mechanisms.append(
+          creator(
+              units=depth,
+              memory=encoder_outputs,
+              memory_sequence_length=encoder_sequence_length,
+              **create_attention_kwargs))
+
+    with self.cached_session(use_gpu=True):
+      attention_layer_size = attention_layer_sizes
+      attention_layer = attention_layers
+      if not is_multi:
+        if attention_layer_size is not None:
+          attention_layer_size = attention_layer_size[0]
+        if attention_layer is not None:
+          attention_layer = attention_layer[0]
+      cell = rnn_cell.LSTMCell(cell_depth, initializer="ones")
+      cell = wrapper.AttentionWrapper(
+          cell,
+          attention_mechanisms if is_multi else attention_mechanisms[0],
+          attention_layer_size=attention_layer_size,
+          alignment_history=alignment_history,
+          attention_layer=attention_layer)
+      # Set the attention_layer within AttentionWrapper to have deterministic
+      # kernel initializer, for testing purpose.
+      if cell._attention_layers is not None:
+        for layer in cell._attention_layers:
+          if getattr(layer, "kernel_initializer") is None:
+            layer.kernel_initializer = initializers.ones()
+
+      sampler = sampler_py.TrainingSampler()
+      my_decoder = basic_decoder.BasicDecoderV2(cell=cell, sampler=sampler)
+      initial_state = cell.zero_state(
+          dtype=dtypes.float32, batch_size=batch_size)
+      final_outputs, final_state, _ = my_decoder(
+          decoder_inputs,
+          initial_state=initial_state,
+          sequence_length=decoder_sequence_length)
+
+      self.assertIsInstance(final_outputs, basic_decoder.BasicDecoderOutput)
+      self.assertIsInstance(final_state, wrapper.AttentionWrapperState)
+      self.assertIsInstance(final_state.cell_state, rnn_cell.LSTMStateTuple)
+
+      expected_time = (
+          expected_final_state.time if context.executing_eagerly() else None)
+      self.assertEqual((batch_size, expected_time, attention_depth),
+                       tuple(final_outputs.rnn_output.get_shape().as_list()))
+      self.assertEqual((batch_size, expected_time),
+                       tuple(final_outputs.sample_id.get_shape().as_list()))
+
+      self.assertEqual((batch_size, attention_depth),
+                       tuple(final_state.attention.get_shape().as_list()))
+      self.assertEqual((batch_size, cell_depth),
+                       tuple(final_state.cell_state.c.get_shape().as_list()))
+      self.assertEqual((batch_size, cell_depth),
+                       tuple(final_state.cell_state.h.get_shape().as_list()))
+
+      if alignment_history:
+        if is_multi:
+          state_alignment_history = []
+          for history_array in final_state.alignment_history:
+            history = history_array.stack()
+            self.assertEqual((expected_time, batch_size, encoder_max_time),
+                             tuple(history.get_shape().as_list()))
+            state_alignment_history.append(history)
+          state_alignment_history = tuple(state_alignment_history)
+        else:
+          state_alignment_history = final_state.alignment_history.stack()
+          self.assertEqual((expected_time, batch_size, encoder_max_time),
+                           tuple(state_alignment_history.get_shape().as_list()))
+        nest.assert_same_structure(cell.state_size,
+                                   cell.zero_state(batch_size, dtypes.float32))
+        # Remove the history from final_state for purposes of the
+        # remainder of the tests.
+        final_state = final_state._replace(alignment_history=())  # pylint: disable=protected-access
+      else:
+        state_alignment_history = ()
+
+      self.evaluate(variables.global_variables_initializer())
+      eval_result = self.evaluate({
+          "final_outputs": final_outputs,
+          "final_state": final_state,
+          "state_alignment_history": state_alignment_history,
+      })
+
+      final_output_info = nest.map_structure(get_result_summary,
+                                             eval_result["final_outputs"])
+      final_state_info = nest.map_structure(get_result_summary,
+                                            eval_result["final_state"])
+      print("final_output_info: ", final_output_info)
+      print("final_state_info: ", final_state_info)
+
+      nest.map_structure(self.assertAllCloseOrEqual, expected_final_output,
+                         final_output_info)
+      nest.map_structure(self.assertAllCloseOrEqual, expected_final_state,
+                         final_state_info)
+      if alignment_history:  # by default, the wrapper emits attention as output
+        final_alignment_history_info = nest.map_structure(
+            get_result_summary, eval_result["state_alignment_history"])
+        print("final_alignment_history_info: ", final_alignment_history_info)
+        nest.map_structure(
+            self.assertAllCloseOrEqual,
+            # outputs are batch major but the stacked TensorArray is time major
+            expected_final_alignment_history,
+            final_alignment_history_info)
+
+  @parameterized.parameters([np.float16, np.float32, np.float64])
+  def _testBahdanauNormalizedDType(self, dtype):
+    encoder_outputs = self.encoder_outputs.astype(dtype)
+    decoder_inputs = self.decoder_inputs.astype(dtype)
+    attention_mechanism = wrapper.BahdanauAttentionV2(
+        units=self.units,
+        memory=encoder_outputs,
+        memory_sequence_length=self.encoder_sequence_length,
+        normalize=True,
+        dtype=dtype)
+    cell = rnn_cell.LSTMCell(self.units)
+    cell = wrapper.AttentionWrapper(cell, attention_mechanism)
+
+    sampler = sampler_py.TrainingSampler()
+    my_decoder = basic_decoder.BasicDecoderV2(cell=cell, sampler=sampler)
+
+    final_outputs, final_state, _ = my_decoder(
+        decoder_inputs,
+        initial_state=cell.zero_state(dtype=dtype, batch_size=self.batch),
+        sequence_length=self.decoder_sequence_length)
+    self.assertIsInstance(final_outputs, basic_decoder.BasicDecoderOutput)
+    self.assertEqual(final_outputs.rnn_output.dtype, dtype)
+    self.assertIsInstance(final_state, wrapper.AttentionWrapperState)
+    self.assertIsInstance(final_state.cell_state, rnn_cell.LSTMStateTuple)
+
+  @parameterized.parameters([np.float16, np.float32, np.float64])
+  def testLuongScaledDType(self, dtype):
+    # Test case for GitHub issue 18099
+    encoder_outputs = self.encoder_outputs.astype(dtype)
+    decoder_inputs = self.decoder_inputs.astype(dtype)
+    attention_mechanism = wrapper.LuongAttentionV2(
+        units=self.units,
+        memory=encoder_outputs,
+        memory_sequence_length=self.encoder_sequence_length,
+        scale=True,
+        dtype=dtype,
+    )
+    cell = rnn_cell.LSTMCell(self.units)
+    cell = wrapper.AttentionWrapper(cell, attention_mechanism)
+
+    sampler = sampler_py.TrainingSampler()
+    my_decoder = basic_decoder.BasicDecoderV2(cell=cell, sampler=sampler)
+
+    final_outputs, final_state, _ = my_decoder(
+        decoder_inputs,
+        initial_state=cell.zero_state(dtype=dtype, batch_size=self.batch),
+        sequence_length=self.decoder_sequence_length)
+    self.assertIsInstance(final_outputs, basic_decoder.BasicDecoderOutput)
+    self.assertEqual(final_outputs.rnn_output.dtype, dtype)
+    self.assertIsInstance(final_state, wrapper.AttentionWrapperState)
+    self.assertIsInstance(final_state.cell_state, rnn_cell.LSTMStateTuple)
+
+  def testBahdanauNotNormalized(self):
+    create_attention_mechanism = wrapper.BahdanauAttentionV2
+    create_attention_kwargs = {"kernel_initializer": "ones"}
+    expected_final_output = basic_decoder.BasicDecoderOutput(
+        rnn_output=ResultSummary(
+            shape=(5, 3, 6), dtype=np.dtype(np.float32), mean=4.8290324),
+        sample_id=ResultSummary(shape=(5, 3), dtype=np.dtype(np.int32), mean=0))
+    expected_final_state = wrapper.AttentionWrapperState(
+        cell_state=rnn_cell.LSTMStateTuple(
+            c=ResultSummary(
+                shape=(5, 9), dtype=np.dtype(np.float32), mean=1.6432636),
+            h=ResultSummary(
+                shape=(5, 9), dtype=np.dtype(np.float32), mean=0.75866824)),
+        attention=ResultSummary(
+            shape=(5, 6), dtype=np.dtype(np.float32), mean=6.7445569),
+        time=3,
+        alignments=ResultSummary(
+            shape=(5, 8), dtype=np.dtype(np.float32), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=np.dtype(np.float32), mean=0.125),
+        alignment_history=())
+    expected_final_alignment_history = ResultSummary(
+        shape=(3, 5, 8), dtype=np.dtype(np.float32), mean=0.125)
+
+    self._testWithAttention(
+        create_attention_mechanism,
+        expected_final_output,
+        expected_final_state,
+        alignment_history=True,
+        create_query_layer=True,
+        expected_final_alignment_history=expected_final_alignment_history,
+        create_attention_kwargs=create_attention_kwargs)
+
+  def testBahdanauNormalized(self):
+    create_attention_mechanism = wrapper.BahdanauAttentionV2
+    create_attention_kwargs = {"kernel_initializer": "ones", "normalize": True}
+
+    expected_final_output = basic_decoder.BasicDecoderOutput(
+        rnn_output=ResultSummary(
+            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=3.9548259),
+        sample_id=ResultSummary(
+            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
+    expected_final_state = wrapper.AttentionWrapperState(
+        cell_state=rnn_cell.LSTMStateTuple(
+            c=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=1.4652209),
+            h=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=0.70997983)),
+        attention=ResultSummary(
+            shape=(5, 6), dtype=np.dtype("float32"), mean=6.3075728),
+        time=3,
+        alignments=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
+        alignment_history=())
+
+    self._testWithAttention(
+        create_attention_mechanism,
+        expected_final_output,
+        expected_final_state,
+        create_query_layer=True,
+        create_attention_kwargs=create_attention_kwargs)
+
+  def testLuongNotNormalized(self):
+    create_attention_mechanism = wrapper.LuongAttentionV2
+
+    expected_final_output = basic_decoder.BasicDecoderOutput(
+        rnn_output=ResultSummary(
+            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=2.6605489),
+        sample_id=ResultSummary(
+            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
+    expected_final_state = wrapper.AttentionWrapperState(
+        cell_state=rnn_cell.LSTMStateTuple(
+            c=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=0.88403547),
+            h=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=0.37819088)),
+        attention=ResultSummary(
+            shape=(5, 6), dtype=np.dtype("float32"), mean=4.084631),
+        time=3,
+        alignments=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
+        alignment_history=())
+
+    self._testWithAttention(
+        create_attention_mechanism,
+        expected_final_output,
+        expected_final_state,
+        attention_mechanism_depth=9)
+
+  def testLuongScaled(self):
+    create_attention_mechanism = wrapper.LuongAttentionV2
+    create_attention_kwargs = {"scale": True}
+
+    expected_final_output = basic_decoder.BasicDecoderOutput(
+        rnn_output=ResultSummary(
+            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=2.6605489),
+        sample_id=ResultSummary(
+            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
+    expected_final_state = wrapper.AttentionWrapperState(
+        cell_state=rnn_cell.LSTMStateTuple(
+            c=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=0.88403547),
+            h=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=0.37819088)),
+        attention=ResultSummary(
+            shape=(5, 6), dtype=np.dtype("float32"), mean=4.0846314),
+        time=3,
+        alignments=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
+        alignment_history=())
+
+    self._testWithAttention(
+        create_attention_mechanism,
+        expected_final_output,
+        expected_final_state,
+        attention_mechanism_depth=9,
+        create_attention_kwargs=create_attention_kwargs)
+
+  def testNotUseAttentionLayer(self):
+    create_attention_mechanism = wrapper.BahdanauAttentionV2
+    create_attention_kwargs = {"kernel_initializer": "ones"}
+
+    expected_final_output = basic_decoder.BasicDecoderOutput(
+        rnn_output=ResultSummary(
+            shape=(5, 3, 10), dtype=np.dtype("float32"), mean=0.072406612),
+        sample_id=ResultSummary(
+            shape=(5, 3), dtype=np.dtype("int32"), mean=3.86666666))
+    expected_final_state = wrapper.AttentionWrapperState(
+        cell_state=rnn_cell.LSTMStateTuple(
+            c=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=1.032002),
+            h=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=0.61177742)),
+        attention=ResultSummary(
+            shape=(5, 10), dtype=np.dtype("float32"), mean=0.011346335),
+        time=3,
+        alignments=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
+        alignment_history=())
+
+    self._testWithAttention(
+        create_attention_mechanism,
+        expected_final_output,
+        expected_final_state,
+        attention_layer_size=None,
+        create_query_layer=True,
+        create_attention_kwargs=create_attention_kwargs)
+
+  def testBahdanauMonotonicNotNormalized(self):
+    create_attention_mechanism = wrapper.BahdanauMonotonicAttentionV2
+    create_attention_kwargs = {"kernel_initializer": "ones"}
+
+    expected_final_output = basic_decoder.BasicDecoderOutput(
+        rnn_output=ResultSummary(
+            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=5.9850435),
+        sample_id=ResultSummary(
+            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
+    expected_final_state = wrapper.AttentionWrapperState(
+        cell_state=rnn_cell.LSTMStateTuple(
+            c=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=1.6752492),
+            h=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=0.76052248)),
+        attention=ResultSummary(
+            shape=(5, 6), dtype=np.dtype("float32"), mean=8.361186),
+        time=3,
+        alignments=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.10989678),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.10989678),
+        alignment_history=())
+    expected_final_alignment_history = ResultSummary(
+        shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.117412611)
+
+    self._testWithAttention(
+        create_attention_mechanism,
+        expected_final_output,
+        expected_final_state,
+        alignment_history=True,
+        expected_final_alignment_history=expected_final_alignment_history,
+        create_query_layer=True,
+        create_attention_kwargs=create_attention_kwargs)
+
+  def testBahdanauMonotonicNormalized(self):
+    create_attention_mechanism = wrapper.BahdanauMonotonicAttentionV2
+    create_attention_kwargs = {"kernel_initializer": "ones",
+                               "normalize": True}
+    expected_final_output = basic_decoder.BasicDecoderOutput(
+        rnn_output=ResultSummary(
+            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=4.5706983),
+        sample_id=ResultSummary(
+            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
+    expected_final_state = wrapper.AttentionWrapperState(
+        cell_state=rnn_cell.LSTMStateTuple(
+            c=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=1.6005473),
+            h=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=0.77863038)),
+        attention=ResultSummary(
+            shape=(5, 6), dtype=np.dtype("float32"), mean=7.3326721),
+        time=3,
+        alignments=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.12258384),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.12258384),
+        alignment_history=())
+    expected_final_alignment_history = ResultSummary(
+        shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.12258384)
+
+    self._testWithAttention(
+        create_attention_mechanism,
+        expected_final_output,
+        expected_final_state,
+        alignment_history=True,
+        expected_final_alignment_history=expected_final_alignment_history,
+        create_query_layer=True,
+        create_attention_kwargs=create_attention_kwargs)
+
+  def testLuongMonotonicNotNormalized(self):
+    create_attention_mechanism = wrapper.LuongMonotonicAttentionV2
+
+    expected_final_output = basic_decoder.BasicDecoderOutput(
+        rnn_output=ResultSummary(
+            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=3.159497),
+        sample_id=ResultSummary(
+            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
+    expected_final_state = wrapper.AttentionWrapperState(
+        cell_state=rnn_cell.LSTMStateTuple(
+            c=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=1.072384),
+            h=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=0.50331038)),
+        attention=ResultSummary(
+            shape=(5, 6), dtype=np.dtype("float32"), mean=5.3079605),
+        time=3,
+        alignments=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.11467695),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.11467695),
+        alignment_history=())
+    expected_final_alignment_history = ResultSummary(
+        shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.11899644)
+
+    self._testWithAttention(
+        create_attention_mechanism,
+        expected_final_output,
+        expected_final_state,
+        attention_mechanism_depth=9,
+        alignment_history=True,
+        expected_final_alignment_history=expected_final_alignment_history)
+
+  def testLuongMonotonicScaled(self):
+    create_attention_mechanism = wrapper.LuongMonotonicAttentionV2
+    create_attention_kwargs = {"scale": True}
+
+    expected_final_output = basic_decoder.BasicDecoderOutput(
+        rnn_output=ResultSummary(
+            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=3.159497),
+        sample_id=ResultSummary(
+            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
+    expected_final_state = wrapper.AttentionWrapperState(
+        cell_state=rnn_cell.LSTMStateTuple(
+            c=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=1.072384),
+            h=ResultSummary(
+                shape=(5, 9), dtype=np.dtype("float32"), mean=0.50331038)),
+        attention=ResultSummary(
+            shape=(5, 6), dtype=np.dtype("float32"), mean=5.3079605),
+        time=3,
+        alignments=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.11467695),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=np.dtype("float32"), mean=0.11467695),
+        alignment_history=())
+    expected_final_alignment_history = ResultSummary(
+        shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.11899644)
+
+    self._testWithAttention(
+        create_attention_mechanism,
+        expected_final_output,
+        expected_final_state,
+        attention_mechanism_depth=9,
+        alignment_history=True,
+        expected_final_alignment_history=expected_final_alignment_history,
+        create_attention_kwargs=create_attention_kwargs)
+
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_v2_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_v2_test.py
index 15fb688..2341ebb 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_v2_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_v2_test.py

@@ -127,7 +127,7 @@
           np.argmax(eval_result["step_outputs"].rnn_output, -1),
           eval_result["step_outputs"].sample_id)
 
-  def testStepWithGreedyEmbeddingHelper(self):
+  def DISABLED_testStepWithGreedyEmbeddingHelper(self):
     batch_size = 5
     vocabulary_size = 7
     cell_depth = vocabulary_size  # cell's logits must match vocabulary size

diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py
index 5e28e65..56f2a0a 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py

@@ -25,10 +25,13 @@
 from tensorflow.contrib.seq2seq.python.ops import beam_search_decoder
 from tensorflow.contrib.seq2seq.python.ops import beam_search_ops
 from tensorflow.contrib.seq2seq.python.ops import decoder
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import layers
 from tensorflow.python.layers import core as layers_core
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import nn_ops
@@ -530,11 +533,10 @@
           return (shape[1], shape[0]) + shape[2:]
         return shape
 
-      self.assertTrue(
-          isinstance(final_outputs,
-                     beam_search_decoder.FinalBeamSearchDecoderOutput))
-      self.assertTrue(
-          isinstance(final_state, beam_search_decoder.BeamSearchDecoderState))
+      self.assertIsInstance(
+          final_outputs, beam_search_decoder.FinalBeamSearchDecoderOutput)
+      self.assertIsInstance(
+          final_state, beam_search_decoder.BeamSearchDecoderState)
 
       beam_search_decoder_output = final_outputs.beam_search_decoder_output
       self.assertEqual(
@@ -574,5 +576,119 @@
         with_alignment_history=True)
 
 
+@test_util.run_all_in_graph_and_eager_modes
+class BeamSearchDecoderV2Test(test.TestCase):
+
+  def _testDynamicDecodeRNN(self, time_major, has_attention,
+                            with_alignment_history=False):
+    encoder_sequence_length = np.array([3, 2, 3, 1, 1])
+    decoder_sequence_length = np.array([2, 0, 1, 2, 3])
+    batch_size = 5
+    decoder_max_time = 4
+    input_depth = 7
+    cell_depth = 9
+    attention_depth = 6
+    vocab_size = 20
+    end_token = vocab_size - 1
+    start_token = 0
+    embedding_dim = 50
+    max_out = max(decoder_sequence_length)
+    output_layer = layers.Dense(vocab_size, use_bias=True, activation=None)
+    beam_width = 3
+
+    with self.cached_session():
+      batch_size_tensor = constant_op.constant(batch_size)
+      embedding = np.random.randn(vocab_size, embedding_dim).astype(np.float32)
+      cell = rnn_cell.LSTMCell(cell_depth)
+      initial_state = cell.zero_state(batch_size, dtypes.float32)
+      coverage_penalty_weight = 0.0
+      if has_attention:
+        coverage_penalty_weight = 0.2
+        inputs = array_ops.placeholder_with_default(
+            np.random.randn(batch_size, decoder_max_time, input_depth).astype(
+                np.float32),
+            shape=(None, None, input_depth))
+        tiled_inputs = beam_search_decoder.tile_batch(
+            inputs, multiplier=beam_width)
+        tiled_sequence_length = beam_search_decoder.tile_batch(
+            encoder_sequence_length, multiplier=beam_width)
+        attention_mechanism = attention_wrapper.BahdanauAttention(
+            num_units=attention_depth,
+            memory=tiled_inputs,
+            memory_sequence_length=tiled_sequence_length)
+        initial_state = beam_search_decoder.tile_batch(
+            initial_state, multiplier=beam_width)
+        cell = attention_wrapper.AttentionWrapper(
+            cell=cell,
+            attention_mechanism=attention_mechanism,
+            attention_layer_size=attention_depth,
+            alignment_history=with_alignment_history)
+      cell_state = cell.zero_state(
+          dtype=dtypes.float32, batch_size=batch_size_tensor * beam_width)
+      if has_attention:
+        cell_state = cell_state.clone(cell_state=initial_state)
+      bsd = beam_search_decoder.BeamSearchDecoderV2(
+          cell=cell,
+          beam_width=beam_width,
+          output_layer=output_layer,
+          length_penalty_weight=0.0,
+          coverage_penalty_weight=coverage_penalty_weight,
+          output_time_major=time_major,
+          maximum_iterations=max_out)
+
+      final_outputs, final_state, final_sequence_lengths = bsd(
+          embedding,
+          start_tokens=array_ops.fill([batch_size_tensor], start_token),
+          end_token=end_token,
+          initial_state=cell_state)
+
+      def _t(shape):
+        if time_major:
+          return (shape[1], shape[0]) + shape[2:]
+        return shape
+
+      self.assertIsInstance(
+          final_outputs, beam_search_decoder.FinalBeamSearchDecoderOutput)
+      self.assertIsInstance(
+          final_state, beam_search_decoder.BeamSearchDecoderState)
+
+      beam_search_decoder_output = final_outputs.beam_search_decoder_output
+      expected_seq_length = 3 if context.executing_eagerly() else None
+      self.assertEqual(
+          _t((batch_size, expected_seq_length, beam_width)),
+          tuple(beam_search_decoder_output.scores.get_shape().as_list()))
+      self.assertEqual(
+          _t((batch_size, expected_seq_length, beam_width)),
+          tuple(final_outputs.predicted_ids.get_shape().as_list()))
+
+      self.evaluate(variables.global_variables_initializer())
+      eval_results = self.evaluate({
+          'final_outputs': final_outputs,
+          'final_sequence_lengths': final_sequence_lengths
+      })
+
+      max_sequence_length = np.max(eval_results['final_sequence_lengths'])
+
+      # A smoke test
+      self.assertEqual(
+          _t((batch_size, max_sequence_length, beam_width)),
+          eval_results['final_outputs'].beam_search_decoder_output.scores.shape)
+      self.assertEqual(
+          _t((batch_size, max_sequence_length, beam_width)), eval_results[
+              'final_outputs'].beam_search_decoder_output.predicted_ids.shape)
+
+  def testDynamicDecodeRNNBatchMajorNoAttention(self):
+    self._testDynamicDecodeRNN(time_major=False, has_attention=False)
+
+  def testDynamicDecodeRNNBatchMajorYesAttention(self):
+    self._testDynamicDecodeRNN(time_major=False, has_attention=True)
+
+  def testDynamicDecodeRNNBatchMajorYesAttentionWithAlignmentHistory(self):
+    self._testDynamicDecodeRNN(
+        time_major=False,
+        has_attention=True,
+        with_alignment_history=True)
+
+
 if __name__ == '__main__':
   test.main()

diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index ae3e7f1..79c2ac2 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py

@@ -25,10 +25,13 @@
 import numpy as np
 
 from tensorflow.contrib.framework.python.framework import tensor_util
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.keras import initializers
 from tensorflow.python.keras import layers
+from tensorflow.python.keras.engine import base_layer_utils
 from tensorflow.python.layers import base as layers_base
 from tensorflow.python.layers import core as layers_core
 from tensorflow.python.ops import array_ops
@@ -225,18 +228,39 @@
     1. Storing the query and memory layers.
     2. Preprocessing and storing the memory.
 
-  Note that this layer only support Keras functional API since it takes multiple
-  input tensors, which is not available in sequential model.
+  Note that this layer takes memory as its init parameter, which is an
+  anti-pattern of Keras API, we have to keep the memory as init parameter for
+  performance and dependency reason. Under the hood, during `__init__()`, it
+  will invoke `base_layer.__call__(memory, setup_memory=True)`. This will let
+  keras to keep track of the memory tensor as the input of this layer. Once
+  the `__init__()` is done, then user can query the attention by
+  `score = att_obj([query, state])`, and use it as a normal keras layer.
+
+  Special attention is needed when adding using this class as the base layer for
+  new attention:
+    1. Build() could be invoked at least twice. So please make sure weights are
+       not duplicated.
+    2. Layer.get_weights() might return different set of weights if the instance
+       has `query_layer`. The query_layer weights is not initialized until the
+       memory is configured.
+
+  Also note that this layer does not work with Keras model when
+  `model.compile(run_eagerly=True)` due to the fact that this layer is stateful.
+  The support for that will be added in a future version.
   """
 
   def __init__(self,
+               memory,
                probability_fn,
                query_layer=None,
                memory_layer=None,
+               memory_sequence_length=None,
                **kwargs):
     """Construct base AttentionMechanism class.
 
     Args:
+      memory: The memory to query; usually the output of an RNN encoder.  This
+        tensor should be shaped `[batch_size, max_time, ...]`.
       probability_fn: A `callable`. Converts the score and previous alignments
         to probabilities. Its signature should be:
         `probabilities = probability_fn(score, state)`.
@@ -247,6 +271,9 @@
         depth must match the depth of `query_layer`.
         If `memory_layer` is not provided, the shape of `memory` must match
         that of `query_layer`.
+      memory_sequence_length (optional): Sequence lengths for the batch entries
+        in memory. If provided, the memory tensor rows are masked with zeros
+        for values past the respective sequence lengths.
       **kwargs: Dictionary that contains other common arguments for layer
         creation.
     """
@@ -273,20 +300,127 @@
     self.batch_size = None
     self._memory_initialized = False
     self._check_inner_dims_defined = True
+    self.supports_masking = True
+    self.score_mask_value = dtypes.as_dtype(self.dtype).as_numpy_dtype(-np.inf)
+
+    if memory is not None:
+      # Setup the memory by self.__call__() with memory and memory_seq_length.
+      # This will make the attention follow the keras convention which takes
+      # all the tensor inputs via __call__().
+      if memory_sequence_length is None:
+        inputs = memory
+      else:
+        inputs = [memory, memory_sequence_length]
+
+      self.values = super(_BaseAttentionMechanismV2, self).__call__(
+          inputs, setup_memory=True)
 
   def build(self, input_shape):
-    # The layer suppose to take 3 inputs, [query, state, memory].
-    query_input_shape, _, memory_input_shape = input_shape
-    if self.query_layer is not None:
-      self.query_layer.build(query_input_shape)
-    if self.memory_layer is not None:
-      self.memory_layer.build(memory_input_shape)
-    # dtype of the layer is known at this moment, create the score_mask_value if
-    # needed.
-    self.score_mask_value = dtypes.as_dtype(self.dtype).as_numpy_dtype(-np.inf)
-    self.built = True
+    if not self._memory_initialized:
+      # This is for setting up the memory, which contains memory and optional
+      # memory_sequence_length. Build the memory_layer with memory shape.
+      if self.memory_layer is not None and not self.memory_layer.built:
+        if isinstance(input_shape, list):
+          self.memory_layer.build(input_shape[0])
+        else:
+          self.memory_layer.build(input_shape)
+    else:
+      # The input_shape should be query.shape and state.shape. Use the query
+      # to init the query layer.
+      if self.query_layer is not None and not self.query_layer.built:
+        self.query_layer.build(input_shape[0])
 
-  def _setup_memory(self, memory, memory_mask=None):
+  def __call__(self, inputs, **kwargs):
+    """Preprocess the inputs before calling `base_layer.__call__()`.
+
+    Note that there are situation here, one for setup memory, and one with
+    actual query and state.
+    1. When the memory has not been configured, we just pass all the param to
+    base_layer.__call__(), which will then invoke self.call() with proper
+    inputs, which allows this class to setup memory.
+    2. When the memory has already been setup, the input should contain query
+    and state, and optionally processed memory. If the processed memory is
+    not included in the input, we will have to append it to the inputs and
+    give it to the base_layer.__call__(). The processed memory is the output
+    of first invocation of self.__call__(). If we don't add it here, then from
+    keras perspective, the graph is disconnected since the output from
+    previous call is never used.
+
+    Args:
+      inputs: the inputs tensors.
+      **kwargs: dict, other keyeword arguments for the `__call__()`
+    """
+    if self._memory_initialized:
+      if len(inputs) not in (2, 3):
+        raise ValueError("Expect the inputs to have 2 or 3 tensors, got %d" %
+                         len(inputs))
+      if len(inputs) == 2:
+        # We append the calculated memory here so that the graph will be
+        # connected.
+        inputs.append(self.values)
+    return super(_BaseAttentionMechanismV2, self).__call__(inputs, **kwargs)
+
+  def call(self, inputs, mask=None, setup_memory=False, **kwargs):
+    """Setup the memory or query the attention.
+
+    There are two case here, one for setup memory, and the second is query the
+    attention score. `setup_memory` is the flag to indicate which mode it is.
+    The input list will be treated differently based on that flag.
+
+    Args:
+      inputs: a list of tensor that could either be `query` and `state`, or
+        `memory` and `memory_sequence_length`.
+        `query` is the tensor of dtype matching `memory` and shape
+        `[batch_size, query_depth]`.
+        `state` is the tensor of dtype matching `memory` and shape
+        `[batch_size, alignments_size]`. (`alignments_size` is memory's
+        `max_time`).
+        `memory` is the memory to query; usually the output of an RNN encoder.
+        The tensor should be shaped `[batch_size, max_time, ...]`.
+        `memory_sequence_length` (optional) is the sequence lengths for the
+         batch entries in memory. If provided, the memory tensor rows are masked
+        with zeros for values past the respective sequence lengths.
+      mask: optional bool tensor with shape `[batch, max_time]` for the mask of
+        memory. If it is not None, the corresponding item of the memory should
+        be filtered out during calculation.
+      setup_memory: boolean, whether the input is for setting up memory, or
+        query attention.
+      **kwargs: Dict, other keyword arguments for the call method.
+    Returns:
+      Either processed memory or attention score, based on `setup_memory`.
+    """
+    if setup_memory:
+      if isinstance(inputs, list):
+        if len(inputs) not in (1, 2):
+          raise ValueError("Expect inputs to have 1 or 2 tensors, got %d" %
+                           len(inputs))
+        memory = inputs[0]
+        memory_sequence_length = inputs[1] if len(inputs) == 2 else None
+        memory_mask = mask
+      else:
+        memory, memory_sequence_length = inputs, None
+        memory_mask = mask
+      self._setup_memory(memory, memory_sequence_length, memory_mask)
+      # We force the self.built to false here since only memory is initialized,
+      # but the real query/state has not been call() yet. The layer should be
+      # build and call again.
+      self.built = False
+      # Return the processed memory in order to create the Keras connectivity
+      # data for it.
+      return self.values
+    else:
+      if not self._memory_initialized:
+        raise ValueError("Cannot query the attention before the setup of "
+                         "memory")
+      if len(inputs) not in (2, 3):
+        raise ValueError("Expect the inputs to have query, state, and optional "
+                         "processed memory, got %d items" % len(inputs))
+      # Ignore the rest of the inputs and only care about the query and state
+      query, state = inputs[0], inputs[1]
+      return self._calculate_attention(query, state)
+
+  def _setup_memory(self, memory, memory_sequence_length=None,
+                    memory_mask=None):
     """Pre-process the memory before actually query the memory.
 
     This should only be called once at the first invocation of call().
@@ -294,17 +428,30 @@
     Args:
       memory: The memory to query; usually the output of an RNN encoder. This
         tensor should be shaped `[batch_size, max_time, ...]`.
-      memory_mask: The boolean tensor with shape `[batch_size, max_time]`. For
-        any value equal to False, the corresponding value in memory should be
-        ignored.
+      memory_sequence_length (optional): Sequence lengths for the batch entries
+        in memory. If provided, the memory tensor rows are masked with zeros for
+        values past the respective sequence lengths.
+      memory_mask: (Optional) The boolean tensor with shape `[batch_size,
+        max_time]`. For any value equal to False, the corresponding value in
+        memory should be ignored.
     """
     if self._memory_initialized:
       raise ValueError("The memory for the attention has already been setup.")
+    if memory_sequence_length is not None and memory_mask is not None:
+      raise ValueError("memory_sequence_length and memory_mask cannot be "
+                       "used at same time for attention.")
     with ops.name_scope(
         self.name, "BaseAttentionMechanismInit", nest.flatten(memory)):
       self.values = _prepare_memory(
-          memory, memory_mask=memory_mask,
+          memory,
+          memory_sequence_length=memory_sequence_length,
+          memory_mask=memory_mask,
           check_inner_dims_defined=self._check_inner_dims_defined)
+      # Mark the value as check since the memory and memory mask might not
+      # passed from __call__(), which does not have proper keras metadata.
+      # TODO(omalleyt): Remove this hack once the mask the has proper keras
+      # history.
+      base_layer_utils.mark_checked(self.values)
       if self.memory_layer is not None:
         self.keys = self.memory_layer(self.values)
       else:
@@ -315,36 +462,25 @@
       self._alignments_size = (tensor_shape.dimension_value(self.keys.shape[1])
                                or array_ops.shape(self.keys)[1])
       if memory_mask is not None:
-        self.probability_fn = lambda score, prev: (  # pylint:disable=g-long-lambda
-            self.probability_fn(_maybe_mask_score(
-                score, self.score_mask_value, memory_mask=memory_mask), prev))
+        unwrapped_probability_fn = self.probability_fn
+        def _mask_probability_fn(score, prev):
+          return unwrapped_probability_fn(
+              _maybe_mask_score(
+                  score,
+                  memory_mask=memory_mask,
+                  memory_sequence_length=memory_sequence_length,
+                  score_mask_value=self.score_mask_value), prev)
+        self.probability_fn = _mask_probability_fn
     self._memory_initialized = True
 
-  def call(self, inputs, mask=None, **kwargs):
-    """Base method to calculate the attention score.
-
-    Args:
-      inputs: a list of tensor that contains `query`, `state`, and `memory`.
-        `query` is the tensor of dtype matching `memory` and shape
-        `[batch_size, query_depth]`.
-        `state` is the tensor of dtype matching `memory` and shape
-        `[batch_size, alignments_size]`. (`alignments_size` is memory's
-        `max_time`).
-        `memory` is the memory to query; usually the output of an RNN encoder.
-        This tensor should be shaped `[batch_size, max_time, feature]`.
-      mask: optional bool tensor with shape `[batch, max_time]` for the mask of
-        memory. If it is not None, the corresponding item of the memory should
-        be filtered out during calculation.
-      **kwargs: Dict, other keyword arguments for the call method.
-    """
-    query, state, memory, memory_mask = self._process_inputs(inputs, mask)
-    if not self._memory_initialized:
-      self._setup_memory(memory, memory_mask=memory_mask)
-    return self.calculate_attention(query, state)
-
-  def calculate_attention(self, query, state):
+  def _calculate_attention(self, query, state):
     raise NotImplementedError(
-        "calculate_attention need to be implemented by subclasses.")
+        "_calculate_attention need to be implemented by subclasses.")
+
+  def compute_mask(self, inputs, mask=None):
+    # There real input of the attention is query and state, and the memory layer
+    # mask shouldn't be pass down. Returning None for all output mask here.
+    return None, None
 
   def get_config(self):
     config = {}
@@ -361,16 +497,12 @@
           "class_name": self.memory_layer.__class__.__name__,
           "config": self.memory_layer.get_config(),
       }
+    # memory is a required init parameter and its a tensor. It cannot be
+    # serialized to config, so we put a placeholder for it.
+    config["memory"] = None
     base_config = super(_BaseAttentionMechanismV2, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
 
-  def _process_inputs(self, inputs, mask):
-    if len(inputs) != 3:
-      raise ValueError(
-          "Expect to have 3 inputs for attention, got %d" % len(inputs))
-    query, state, memory = inputs
-    return query, state, memory, mask
-
   def _process_probability_fn(self, func_name):
     """Helper method to retrieve the probably function by string input."""
     valid_probability_fns = {
@@ -418,6 +550,46 @@
   def alignments_size(self):
     return self._alignments_size
 
+  @property
+  def state_size(self):
+    return self._alignments_size
+
+  def initial_alignments(self, batch_size, dtype):
+    """Creates the initial alignment values for the `AttentionWrapper` class.
+
+    This is important for AttentionMechanisms that use the previous alignment
+    to calculate the alignment at the next time step (e.g. monotonic attention).
+
+    The default behavior is to return a tensor of all zeros.
+
+    Args:
+      batch_size: `int32` scalar, the batch_size.
+      dtype: The `dtype`.
+
+    Returns:
+      A `dtype` tensor shaped `[batch_size, alignments_size]`
+      (`alignments_size` is the values' `max_time`).
+    """
+    max_time = self._alignments_size
+    return _zero_state_tensors(max_time, batch_size, dtype)
+
+  def initial_state(self, batch_size, dtype):
+    """Creates the initial state values for the `AttentionWrapper` class.
+
+    This is important for AttentionMechanisms that use the previous alignment
+    to calculate the alignment at the next time step (e.g. monotonic attention).
+
+    The default behavior is to return the same output as initial_alignments.
+
+    Args:
+      batch_size: `int32` scalar, the batch_size.
+      dtype: The `dtype`.
+
+    Returns:
+      A structure of all-zero tensors with shapes as described by `state_size`.
+    """
+    return self.initial_alignments(batch_size, dtype)
+
 
 def _luong_score(query, keys, scale):
   """Implements Luong-style (multiplicative) scoring function.
@@ -587,6 +759,8 @@
 
   def __init__(self,
                units,
+               memory,
+               memory_sequence_length=None,
                scale=False,
                probability_fn="softmax",
                dtype=None,
@@ -596,6 +770,11 @@
 
     Args:
       units: The depth of the attention mechanism.
+      memory: The memory to query; usually the output of an RNN encoder.  This
+        tensor should be shaped `[batch_size, max_time, ...]`.
+      memory_sequence_length: (optional): Sequence lengths for the batch entries
+        in memory.  If provided, the memory tensor rows are masked with zeros
+        for values past the respective sequence lengths.
       scale: Python boolean. Whether to scale the energy term.
       probability_fn: (optional) string, the name of function to convert the
         attention score to probabilities. The default is `softmax` which is
@@ -618,26 +797,27 @@
     if not memory_layer:
       memory_layer = layers.Dense(
           units, name="memory_layer", use_bias=False, dtype=dtype)
+    self.units = units
+    self.scale = scale
+    self.scale_weight = None
     super(LuongAttentionV2, self).__init__(
+        memory=memory,
+        memory_sequence_length=memory_sequence_length,
         query_layer=None,
         memory_layer=memory_layer,
         probability_fn=wrapped_probability_fn,
         name=name,
         dtype=dtype,
         **kwargs)
-    self.units = units
-    self.scale = scale
 
   def build(self, input_shape):
     super(LuongAttentionV2, self).build(input_shape)
-    if self.scale:
+    if self.scale and self.scale_weight is None:
       self.scale_weight = self.add_weight(
           "attention_g", initializer=init_ops.ones_initializer, shape=())
-    else:
-      self.scale_weight = None
     self.built = True
 
-  def calculate_attention(self, query, state):
+  def _calculate_attention(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
@@ -851,8 +1031,11 @@
 
   def __init__(self,
                units,
+               memory,
+               memory_sequence_length=None,
                normalize=False,
                probability_fn="softmax",
+               kernel_initializer="glorot_uniform",
                dtype=None,
                name="BahdanauAttention",
                **kwargs):
@@ -860,12 +1043,19 @@
 
     Args:
       units: The depth of the query mechanism.
+      memory: The memory to query; usually the output of an RNN encoder.  This
+        tensor should be shaped `[batch_size, max_time, ...]`.
+      memory_sequence_length: (optional): Sequence lengths for the batch entries
+        in memory.  If provided, the memory tensor rows are masked with zeros
+        for values past the respective sequence lengths.
       normalize: Python boolean.  Whether to normalize the energy term.
       probability_fn: (optional) string, the name of function to convert the
         attention score to probabilities. The default is `softmax` which is
         `tf.nn.softmax`. Other options is `hardmax`, which is hardmax() within
         this module. Any other value will result into validation error. Default
         to use `softmax`.
+      kernel_initializer: (optional), the name of the initializer for the
+        attention kernel.
       dtype: The data type for the query and memory layers of the attention
         mechanism.
       name: Name to use when creating ops.
@@ -885,33 +1075,39 @@
     if not memory_layer:
       memory_layer = layers.Dense(
           units, name="memory_layer", use_bias=False, dtype=dtype)
+    self.units = units
+    self.normalize = normalize
+    self.kernel_initializer = initializers.get(kernel_initializer)
+    self.attention_v = None
+    self.attention_g = None
+    self.attention_b = None
     super(BahdanauAttentionV2, self).__init__(
+        memory=memory,
+        memory_sequence_length=memory_sequence_length,
         query_layer=query_layer,
         memory_layer=memory_layer,
         probability_fn=wrapped_probability_fn,
         name=name,
         dtype=dtype,
         **kwargs)
-    self.units = units
-    self.normalize = normalize
 
   def build(self, input_shape):
     super(BahdanauAttentionV2, self).build(input_shape)
-    self.attention_v = self.add_weight(
-        "attention_v", [self.units], dtype=self.dtype)
-    if self.normalize:
+    if self.attention_v is None:
+      self.attention_v = self.add_weight(
+          "attention_v", [self.units],
+          dtype=self.dtype,
+          initializer=self.kernel_initializer)
+    if self.normalize and self.attention_g is None and self.attention_b is None:
       self.attention_g = self.add_weight(
           "attention_g", initializer=init_ops.constant_initializer(
               math.sqrt((1. / self.units))), shape=())
       self.attention_b = self.add_weight(
           "attention_b", shape=[self.units],
           initializer=init_ops.zeros_initializer())
-    else:
-      self.attention_g = None
-      self.attention_b = None
     self.built = True
 
-  def calculate_attention(self, query, state):
+  def _calculate_attention(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
@@ -940,6 +1136,7 @@
         "units": self.units,
         "normalize": self.normalize,
         "probability_fn": self.probability_fn_name,
+        "kernel_initializer": initializers.serialize(self.kernel_initializer)
     }
     base_config = super(BahdanauAttentionV2, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
@@ -1299,11 +1496,14 @@
 
   def __init__(self,
                units,
+               memory,
+               memory_sequence_length=None,
                normalize=False,
                sigmoid_noise=0.,
                sigmoid_noise_seed=None,
                score_bias_init=0.,
                mode="parallel",
+               kernel_initializer="glorot_uniform",
                dtype=None,
                name="BahdanauMonotonicAttention",
                **kwargs):
@@ -1311,6 +1511,11 @@
 
     Args:
       units: The depth of the query mechanism.
+      memory: The memory to query; usually the output of an RNN encoder.  This
+        tensor should be shaped `[batch_size, max_time, ...]`.
+      memory_sequence_length: (optional): Sequence lengths for the batch entries
+        in memory.  If provided, the memory tensor rows are masked with zeros
+        for values past the respective sequence lengths.
       normalize: Python boolean. Whether to normalize the energy term.
       sigmoid_noise: Standard deviation of pre-sigmoid noise. See the docstring
         for `_monotonic_probability_fn` for more information.
@@ -1321,6 +1526,8 @@
       mode: How to compute the attention distribution. Must be one of
         'recursive', 'parallel', or 'hard'. See the docstring for
         `tf.contrib.seq2seq.monotonic_attention` for more information.
+      kernel_initializer: (optional), the name of the initializer for the
+        attention kernel.
       dtype: The data type for the query and memory layers of the attention
         mechanism.
       name: Name to use when creating ops.
@@ -1341,32 +1548,39 @@
     if not memory_layer:
       memory_layer = layers.Dense(
           units, name="memory_layer", use_bias=False, dtype=dtype)
-    super(BahdanauMonotonicAttentionV2, self).__init__(
-        query_layer=query_layer,
-        memory_layer=memory_layer,
-        probability_fn=wrapped_probability_fn,
-        name=name,
-        dtype=dtype,
-        **kwargs)
     self.units = units
     self.normalize = normalize
     self.sigmoid_noise = sigmoid_noise
     self.sigmoid_noise_seed = sigmoid_noise_seed
     self.score_bias_init = score_bias_init
     self.mode = mode
+    self.kernel_initializer = initializers.get(kernel_initializer)
+    self.attention_v = None
+    self.attention_score_bias = None
+    self.attention_g = None
+    self.attention_b = None
+    super(BahdanauMonotonicAttentionV2, self).__init__(
+        memory=memory,
+        memory_sequence_length=memory_sequence_length,
+        query_layer=query_layer,
+        memory_layer=memory_layer,
+        probability_fn=wrapped_probability_fn,
+        name=name,
+        dtype=dtype,
+        **kwargs)
 
   def build(self, input_shape):
     super(BahdanauMonotonicAttentionV2, self).build(input_shape)
-    self.attention_v = self.add_weight(
-        "attention_v", [self.units], dtype=self.dtype)
-    self.attention_score_bias = self.add_weight(
-        "attention_score_bias", shape=(), dtype=self.dtype,
-        initializer=init_ops.constant_initializer(
-            self.score_bias_init, dtype=self.dtype))
-    if not self.normalize:
-      self.attention_g = None
-      self.attention_b = None
-    else:
+    if self.attention_v is None:
+      self.attention_v = self.add_weight(
+          "attention_v", [self.units], dtype=self.dtype,
+          initializer=self.kernel_initializer)
+    if self.attention_score_bias is None:
+      self.attention_score_bias = self.add_weight(
+          "attention_score_bias", shape=(), dtype=self.dtype,
+          initializer=init_ops.constant_initializer(
+              self.score_bias_init, dtype=self.dtype))
+    if self.normalize and self.attention_g is None and self.attention_b is None:
       self.attention_g = self.add_weight(
           "attention_g", dtype=self.dtype,
           initializer=init_ops.constant_initializer(
@@ -1377,7 +1591,7 @@
           initializer=init_ops.zeros_initializer())
     self.built = True
 
-  def calculate_attention(self, query, state):
+  def _calculate_attention(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
@@ -1409,6 +1623,7 @@
         "sigmoid_noise_seed": self.sigmoid_noise_seed,
         "score_bias_init": self.score_bias_init,
         "mode": self.mode,
+        "kernel_initializer": initializers.serialize(self.kernel_initializer),
     }
     base_config = super(BahdanauMonotonicAttentionV2, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
@@ -1542,6 +1757,8 @@
 
   def __init__(self,
                units,
+               memory,
+               memory_sequence_length=None,
                scale=False,
                sigmoid_noise=0.,
                sigmoid_noise_seed=None,
@@ -1554,6 +1771,11 @@
 
     Args:
       units: The depth of the query mechanism.
+      memory: The memory to query; usually the output of an RNN encoder.  This
+        tensor should be shaped `[batch_size, max_time, ...]`.
+      memory_sequence_length: (optional): Sequence lengths for the batch entries
+        in memory.  If provided, the memory tensor rows are masked with zeros
+        for values past the respective sequence lengths.
       scale: Python boolean.  Whether to scale the energy term.
       sigmoid_noise: Standard deviation of pre-sigmoid noise.  See the docstring
         for `_monotonic_probability_fn` for more information.
@@ -1580,34 +1802,37 @@
     if not memory_layer:
       memory_layer = layers.Dense(
           units, name="memory_layer", use_bias=False, dtype=dtype)
-    super(LuongMonotonicAttentionV2, self).__init__(
-        query_layer=None,
-        memory_layer=memory_layer,
-        probability_fn=wrapped_probability_fn,
-        name=name,
-        dtype=dtype,
-        **kwargs)
     self.units = units
     self.scale = scale
     self.sigmoid_noise = sigmoid_noise
     self.sigmoid_noise_seed = sigmoid_noise_seed
     self.score_bias_init = score_bias_init
     self.mode = mode
+    self.attention_g = None
+    self.attention_score_bias = None
+    super(LuongMonotonicAttentionV2, self).__init__(
+        memory=memory,
+        memory_sequence_length=memory_sequence_length,
+        query_layer=None,
+        memory_layer=memory_layer,
+        probability_fn=wrapped_probability_fn,
+        name=name,
+        dtype=dtype,
+        **kwargs)
 
   def build(self, input_shape):
     super(LuongMonotonicAttentionV2, self).build(input_shape)
-    if self.scale:
+    if self.scale and self.attention_g is None:
       self.attention_g = self.add_weight(
           "attention_g", initializer=init_ops.ones_initializer, shape=())
-    else:
-      self.attention_g = None
-    self.attention_score_bias = self.add_weight(
-        "attention_score_bias", shape=(),
-        initializer=init_ops.constant_initializer(
-            self.score_bias_init, dtype=self.dtype))
+    if self.attention_score_bias is None:
+      self.attention_score_bias = self.add_weight(
+          "attention_score_bias", shape=(),
+          initializer=init_ops.constant_initializer(
+              self.score_bias_init, dtype=self.dtype))
     self.built = True
 
-  def calculate_attention(self, query, state):
+  def _calculate_attention(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
@@ -1695,7 +1920,15 @@
     def with_same_shape(old, new):
       """Check and set new tensor's shape."""
       if isinstance(old, ops.Tensor) and isinstance(new, ops.Tensor):
-        return tensor_util.with_same_shape(old, new)
+        if not context.executing_eagerly():
+          return tensor_util.with_same_shape(old, new)
+        else:
+          if old.shape.as_list() != new.shape.as_list():
+            raise ValueError("The shape of the AttentionWrapperState is "
+                             "expected to be same as the one to clone. "
+                             "self.shape: %s, input.shape: %s" %
+                             (old.shape, new.shape))
+          return new
       return new
 
     return nest.map_structure(
@@ -1739,41 +1972,26 @@
                          "but saw shape: %s" % (m.name, m.get_shape()))
     nest.map_structure(_check_dims, memory)
   if memory_sequence_length is None and memory_mask is None:
-    seq_len_mask = None
-    seq_len_batch_size = None
+    return memory
   elif memory_sequence_length is not None:
     seq_len_mask = array_ops.sequence_mask(
         memory_sequence_length,
         maxlen=array_ops.shape(nest.flatten(memory)[0])[1],
         dtype=nest.flatten(memory)[0].dtype)
-    seq_len_batch_size = (
-        tensor_shape.dimension_value(memory_sequence_length.shape[0])
-        or array_ops.shape(memory_sequence_length)[0])
   else:
     # For memory_mask is not None
-    seq_len_mask = memory_mask
-    seq_len_batch_size = (
-        tensor_shape.dimension_value(memory_mask.shape[0])
-        or array_ops.shape(memory_mask)[0])
+    seq_len_mask = math_ops.cast(
+        memory_mask, dtype=nest.flatten(memory)[0].dtype)
   def _maybe_mask(m, seq_len_mask):
     """Mask the memory based on the memory mask."""
     rank = m.get_shape().ndims
     rank = rank if rank is not None else array_ops.rank(m)
     extra_ones = array_ops.ones(rank - 2, dtype=dtypes.int32)
-    m_batch_size = tensor_shape.dimension_value(
-        m.shape[0]) or array_ops.shape(m)[0]
-    if seq_len_batch_size is not None:
-      message = ("memory_sequence_length and memory tensor batch sizes do not "
-                 "match.")
-      with ops.control_dependencies([
-          check_ops.assert_equal(
-              seq_len_batch_size, m_batch_size, message=message)]):
-        seq_len_mask = array_ops.reshape(
-            seq_len_mask,
-            array_ops.concat((array_ops.shape(seq_len_mask), extra_ones), 0))
-        return m * seq_len_mask
-    else:
-      return m
+    seq_len_mask = array_ops.reshape(
+        seq_len_mask,
+        array_ops.concat((array_ops.shape(seq_len_mask), extra_ones), 0))
+    return m * seq_len_mask
+
   return nest.map_structure(lambda m: _maybe_mask(m, seq_len_mask), memory)
 
 
@@ -1819,8 +2037,14 @@
 def _compute_attention(attention_mechanism, cell_output, attention_state,
                        attention_layer):
   """Computes the attention and alignments for a given attention_mechanism."""
-  alignments, next_attention_state = attention_mechanism(
-      cell_output, state=attention_state)
+  if isinstance(attention_mechanism, _BaseAttentionMechanismV2):
+    alignments, next_attention_state = attention_mechanism(
+        [cell_output, attention_state])
+  else:
+    # For other class, assume they are following _BaseAttentionMechanism, which
+    # takes query and state as separate parameter.
+    alignments, next_attention_state = attention_mechanism(
+        cell_output, state=attention_state)
 
   # Reshape from [batch_size, memory_time] to [batch_size, 1, memory_time]
   expanded_alignments = array_ops.expand_dims(alignments, 1)
@@ -1833,13 +2057,13 @@
   # the batched matmul is over memory_time, so the output shape is
   #   [batch_size, 1, memory_size].
   # we then squeeze out the singleton dim.
-  context = math_ops.matmul(expanded_alignments, attention_mechanism.values)
-  context = array_ops.squeeze(context, [1])
+  context_ = math_ops.matmul(expanded_alignments, attention_mechanism.values)
+  context_ = array_ops.squeeze(context_, [1])
 
   if attention_layer is not None:
-    attention = attention_layer(array_ops.concat([cell_output, context], 1))
+    attention = attention_layer(array_ops.concat([cell_output, context_], 1))
   else:
-    attention = context
+    attention = context_
 
   return attention, alignments, next_attention_state
 

diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
index 8f8f057..1d773a4 100644
--- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
+++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py

@@ -24,11 +24,12 @@
 from tensorflow.contrib.seq2seq.python.ops import attention_wrapper
 from tensorflow.contrib.seq2seq.python.ops import beam_search_ops
 from tensorflow.contrib.seq2seq.python.ops import decoder
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
-from tensorflow.python.layers import base as layers_base
+from tensorflow.python.keras import layers
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import embedding_ops
@@ -182,11 +183,12 @@
   return ordered
 
 
-def _check_maybe(t):
+def _check_ndims(t):
   if t.shape.ndims is None:
     raise ValueError(
         "Expected tensor (%s) to have known rank, but ndims == None." % t)
 
+
 def _check_static_batch_beam_maybe(shape, batch_size, beam_width):
   """Raises an exception if dimensions are known statically and can not be
   reshaped to [batch_size, beam_size, -1].
@@ -205,6 +207,7 @@
     return False
   return True
 
+
 def _check_batch_beam(t, batch_size, beam_width):
   """Returns an Assert operation checking that the elements of the stacked
   TensorArray can be reshaped to [batch_size, beam_size, -1]. At this point,
@@ -229,70 +232,30 @@
   return control_flow_ops.Assert(condition, [error_message])
 
 
+class BeamSearchDecoderMixin(object):
+  """BeamSearchDecoderMixin contains the common methods for BeamSearchDecoder.
 
-class BeamSearchDecoder(decoder.Decoder):
-  """BeamSearch sampling decoder.
-
-    **NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in
-    `AttentionWrapper`, then you must ensure that:
-
-    - The encoder output has been tiled to `beam_width` via
-      `tf.contrib.seq2seq.tile_batch` (NOT `tf.tile`).
-    - The `batch_size` argument passed to the `zero_state` method of this
-      wrapper is equal to `true_batch_size * beam_width`.
-    - The initial state created with `zero_state` above contains a
-      `cell_state` value containing properly tiled final state from the
-      encoder.
-
-    An example:
-
-    ```
-    tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
-        encoder_outputs, multiplier=beam_width)
-    tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
-        encoder_final_state, multiplier=beam_width)
-    tiled_sequence_length = tf.contrib.seq2seq.tile_batch(
-        sequence_length, multiplier=beam_width)
-    attention_mechanism = MyFavoriteAttentionMechanism(
-        num_units=attention_depth,
-        memory=tiled_inputs,
-        memory_sequence_length=tiled_sequence_length)
-    attention_cell = AttentionWrapper(cell, attention_mechanism, ...)
-    decoder_initial_state = attention_cell.zero_state(
-        dtype, batch_size=true_batch_size * beam_width)
-    decoder_initial_state = decoder_initial_state.clone(
-        cell_state=tiled_encoder_final_state)
-    ```
-
-    Meanwhile, with `AttentionWrapper`, coverage penalty is suggested to use
-    when computing scores(https://arxiv.org/pdf/1609.08144.pdf). It encourages
-    the translation to cover all inputs.
+  It is expected to be used a base class for concrete BeamSearchDecoder. Since
+  this is a mixin class, it is expected to be used together with other class as
+  base.
   """
 
   def __init__(self,
                cell,
-               embedding,
-               start_tokens,
-               end_token,
-               initial_state,
                beam_width,
                output_layer=None,
                length_penalty_weight=0.0,
                coverage_penalty_weight=0.0,
-               reorder_tensor_arrays=True):
-    """Initialize the BeamSearchDecoder.
+               reorder_tensor_arrays=True,
+               **kwargs):
+    """Initialize the BeamSearchDecoderMixin.
 
     Args:
       cell: An `RNNCell` instance.
-      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
-        or the `params` argument for `embedding_lookup`.
-      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
-      end_token: `int32` scalar, the token that marks end of decoding.
-      initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
       beam_width:  Python integer, the number of beams.
-      output_layer: (Optional) An instance of `tf.layers.Layer`, i.e.,
-        `tf.layers.Dense`.  Optional layer to apply to the RNN output prior
-        to storing the result or sampling.
+      output_layer: (Optional) An instance of `tf.keras.layers.Layer`, i.e.,
+        `tf.keras.layers.Dense`.  Optional layer to apply to the RNN output
+        prior to storing the result or sampling.
       length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
       coverage_penalty_weight: Float weight to penalize the coverage of source
         sentence. Disabled with 0.0.
@@ -302,59 +265,35 @@
         Otherwise, the `TensorArray` will be returned as is. Set this flag to
         `False` if the cell state contains `TensorArray`s that are not amenable
         to reordering.
+      **kwargs: Dict, other keyword arguments for parent class.
 
     Raises:
       TypeError: if `cell` is not an instance of `RNNCell`,
-        or `output_layer` is not an instance of `tf.layers.Layer`.
-      ValueError: If `start_tokens` is not a vector or
-        `end_token` is not a scalar.
+        or `output_layer` is not an instance of `tf.keras.layers.Layer`.
     """
     rnn_cell_impl.assert_like_rnncell("cell", cell)  # pylint: disable=protected-access
     if (output_layer is not None and
-        not isinstance(output_layer, layers_base.Layer)):
+        not isinstance(output_layer, layers.Layer)):
       raise TypeError(
           "output_layer must be a Layer, received: %s" % type(output_layer))
     self._cell = cell
     self._output_layer = output_layer
     self._reorder_tensor_arrays = reorder_tensor_arrays
 
-    if callable(embedding):
-      self._embedding_fn = embedding
-    else:
-      self._embedding_fn = (
-          lambda ids: embedding_ops.embedding_lookup(embedding, ids))
-
-    self._start_tokens = ops.convert_to_tensor(
-        start_tokens, dtype=dtypes.int32, name="start_tokens")
-    if self._start_tokens.get_shape().ndims != 1:
-      raise ValueError("start_tokens must be a vector")
-    self._end_token = ops.convert_to_tensor(
-        end_token, dtype=dtypes.int32, name="end_token")
-    if self._end_token.get_shape().ndims != 0:
-      raise ValueError("end_token must be a scalar")
-
-    self._batch_size = array_ops.size(start_tokens)
+    self._start_tokens = None
+    self._end_token = None
+    self._batch_size = None
     self._beam_width = beam_width
     self._length_penalty_weight = length_penalty_weight
     self._coverage_penalty_weight = coverage_penalty_weight
-    self._initial_cell_state = nest.map_structure(
-        self._maybe_split_batch_beams, initial_state, self._cell.state_size)
-    self._start_tokens = array_ops.tile(
-        array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width])
-    self._start_inputs = self._embedding_fn(self._start_tokens)
-
-    self._finished = array_ops.one_hot(
-        array_ops.zeros([self._batch_size], dtype=dtypes.int32),
-        depth=self._beam_width,
-        on_value=False,
-        off_value=True,
-        dtype=dtypes.bool)
+    super(BeamSearchDecoderMixin, self).__init__(**kwargs)
 
   @property
   def batch_size(self):
     return self._batch_size
 
   def _rnn_output_size(self):
+    """Get the output shape from the RNN layer."""
     size = self._cell.output_size
     if self._output_layer is None:
       return size
@@ -393,50 +332,6 @@
         predicted_ids=tensor_shape.TensorShape([self._beam_width]),
         parent_ids=tensor_shape.TensorShape([self._beam_width]))
 
-  @property
-  def output_dtype(self):
-    # Assume the dtype of the cell is the output_size structure
-    # containing the input_state's first component's dtype.
-    # Return that structure and int32 (the id)
-    dtype = nest.flatten(self._initial_cell_state)[0].dtype
-    return BeamSearchDecoderOutput(
-        scores=nest.map_structure(lambda _: dtype, self._rnn_output_size()),
-        predicted_ids=dtypes.int32,
-        parent_ids=dtypes.int32)
-
-  def initialize(self, name=None):
-    """Initialize the decoder.
-
-    Args:
-      name: Name scope for any created operations.
-
-    Returns:
-      `(finished, start_inputs, initial_state)`.
-    """
-    finished, start_inputs = self._finished, self._start_inputs
-
-    dtype = nest.flatten(self._initial_cell_state)[0].dtype
-    log_probs = array_ops.one_hot(  # shape(batch_sz, beam_sz)
-        array_ops.zeros([self._batch_size], dtype=dtypes.int32),
-        depth=self._beam_width,
-        on_value=ops.convert_to_tensor(0.0, dtype=dtype),
-        off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype),
-        dtype=dtype)
-    init_attention_probs = get_attention_probs(
-        self._initial_cell_state, self._coverage_penalty_weight)
-    if init_attention_probs is None:
-      init_attention_probs = ()
-
-    initial_state = BeamSearchDecoderState(
-        cell_state=self._initial_cell_state,
-        log_probs=log_probs,
-        finished=finished,
-        lengths=array_ops.zeros(
-            [self._batch_size, self._beam_width], dtype=dtypes.int64),
-        accumulated_attention_probs=init_attention_probs)
-
-    return (finished, start_inputs, initial_state)
-
   def finalize(self, outputs, final_state, sequence_lengths):
     """Finalize and return the predicted_ids.
 
@@ -562,7 +457,7 @@
     """
     if isinstance(t, tensor_array_ops.TensorArray):
       return t
-    _check_maybe(t)
+    _check_ndims(t)
     if t.shape.ndims >= 1:
       return self._split_batch_beams(t, s)
     else:
@@ -586,7 +481,7 @@
     """
     if isinstance(t, tensor_array_ops.TensorArray):
       return t
-    _check_maybe(t)
+    _check_ndims(t)
     if t.shape.ndims >= 2:
       return self._merge_batch_beams(t, s)
     else:
@@ -609,11 +504,18 @@
     if not isinstance(t, tensor_array_ops.TensorArray):
       return t
     # pylint: disable=protected-access
-    if (not t._infer_shape or not t._element_shape
-        or t._element_shape[0].ndims is None
-        or t._element_shape[0].ndims < 1):
+    # This is a bad hack due to the implementation detail of eager/graph TA.
+    # TODO(b/124374427): Update this to use public property of TensorArray.
+    if context.executing_eagerly():
+      element_shape = t._element_shape
+    else:
+      element_shape = t._element_shape[0]
+    if (not t._infer_shape
+        or not t._element_shape
+        or element_shape.ndims is None
+        or element_shape.ndims < 1):
       shape = (
-          t._element_shape[0] if t._infer_shape and t._element_shape
+          element_shape if t._infer_shape and t._element_shape
           else tensor_shape.TensorShape(None))
       tf_logging.warn("The TensorArray %s in the cell state is not amenable to "
                       "sorting based on the beam search result. For a "
@@ -621,10 +523,10 @@
                       "defined and have at least a rank of 1, but saw shape: %s"
                       % (t.handle.name, shape))
       return t
-    shape = t._element_shape[0]
     # pylint: enable=protected-access
     if not _check_static_batch_beam_maybe(
-        shape, tensor_util.constant_value(self._batch_size), self._beam_width):
+        element_shape, tensor_util.constant_value(self._batch_size),
+        self._beam_width):
       return t
     t = t.stack()
     with ops.control_dependencies(
@@ -684,6 +586,359 @@
     return (beam_search_output, beam_search_state, next_inputs, finished)
 
 
+class BeamSearchDecoder(BeamSearchDecoderMixin, decoder.Decoder):
+  # Note that the inheritance hierarchy is important here. The Mixin has to be
+  # the first parent class since we will use super().__init__(), and Mixin which
+  # is a object will properly invoke the __init__ method of other parent class.
+  """BeamSearch sampling decoder.
+
+    **NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in
+    `AttentionWrapper`, then you must ensure that:
+
+    - The encoder output has been tiled to `beam_width` via
+      `tf.contrib.seq2seq.tile_batch` (NOT `tf.tile`).
+    - The `batch_size` argument passed to the `zero_state` method of this
+      wrapper is equal to `true_batch_size * beam_width`.
+    - The initial state created with `zero_state` above contains a
+      `cell_state` value containing properly tiled final state from the
+      encoder.
+
+    An example:
+
+    ```
+    tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
+        encoder_outputs, multiplier=beam_width)
+    tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
+        encoder_final_state, multiplier=beam_width)
+    tiled_sequence_length = tf.contrib.seq2seq.tile_batch(
+        sequence_length, multiplier=beam_width)
+    attention_mechanism = MyFavoriteAttentionMechanism(
+        num_units=attention_depth,
+        memory=tiled_inputs,
+        memory_sequence_length=tiled_sequence_length)
+    attention_cell = AttentionWrapper(cell, attention_mechanism, ...)
+    decoder_initial_state = attention_cell.zero_state(
+        dtype, batch_size=true_batch_size * beam_width)
+    decoder_initial_state = decoder_initial_state.clone(
+        cell_state=tiled_encoder_final_state)
+    ```
+
+    Meanwhile, with `AttentionWrapper`, coverage penalty is suggested to use
+    when computing scores (https://arxiv.org/pdf/1609.08144.pdf). It encourages
+    the decoder to cover all inputs.
+  """
+
+  def __init__(self,
+               cell,
+               embedding,
+               start_tokens,
+               end_token,
+               initial_state,
+               beam_width,
+               output_layer=None,
+               length_penalty_weight=0.0,
+               coverage_penalty_weight=0.0,
+               reorder_tensor_arrays=True):
+    """Initialize the BeamSearchDecoder.
+
+    Args:
+      cell: An `RNNCell` instance.
+      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
+        or the `params` argument for `embedding_lookup`.
+      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
+      end_token: `int32` scalar, the token that marks end of decoding.
+      initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
+      beam_width:  Python integer, the number of beams.
+      output_layer: (Optional) An instance of `tf.keras.layers.Layer`, i.e.,
+        `tf.keras.layers.Dense`.  Optional layer to apply to the RNN output
+        prior to storing the result or sampling.
+      length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
+      coverage_penalty_weight: Float weight to penalize the coverage of source
+        sentence. Disabled with 0.0.
+      reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell
+        state will be reordered according to the beam search path. If the
+        `TensorArray` can be reordered, the stacked form will be returned.
+        Otherwise, the `TensorArray` will be returned as is. Set this flag to
+        `False` if the cell state contains `TensorArray`s that are not amenable
+        to reordering.
+
+    Raises:
+      TypeError: if `cell` is not an instance of `RNNCell`,
+        or `output_layer` is not an instance of `tf.keras.layers.Layer`.
+      ValueError: If `start_tokens` is not a vector or
+        `end_token` is not a scalar.
+    """
+    super(BeamSearchDecoder, self).__init__(
+        cell,
+        beam_width,
+        output_layer=output_layer,
+        length_penalty_weight=length_penalty_weight,
+        coverage_penalty_weight=coverage_penalty_weight,
+        reorder_tensor_arrays=reorder_tensor_arrays)
+
+    if callable(embedding):
+      self._embedding_fn = embedding
+    else:
+      self._embedding_fn = (
+          lambda ids: embedding_ops.embedding_lookup(embedding, ids))
+
+    self._start_tokens = ops.convert_to_tensor(
+        start_tokens, dtype=dtypes.int32, name="start_tokens")
+    if self._start_tokens.get_shape().ndims != 1:
+      raise ValueError("start_tokens must be a vector")
+    self._end_token = ops.convert_to_tensor(
+        end_token, dtype=dtypes.int32, name="end_token")
+    if self._end_token.get_shape().ndims != 0:
+      raise ValueError("end_token must be a scalar")
+
+    self._batch_size = array_ops.size(start_tokens)
+    self._initial_cell_state = nest.map_structure(
+        self._maybe_split_batch_beams, initial_state, self._cell.state_size)
+    self._start_tokens = array_ops.tile(
+        array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width])
+    self._start_inputs = self._embedding_fn(self._start_tokens)
+
+    self._finished = array_ops.one_hot(
+        array_ops.zeros([self._batch_size], dtype=dtypes.int32),
+        depth=self._beam_width,
+        on_value=False,
+        off_value=True,
+        dtype=dtypes.bool)
+
+  def initialize(self, name=None):
+    """Initialize the decoder.
+
+    Args:
+      name: Name scope for any created operations.
+
+    Returns:
+      `(finished, start_inputs, initial_state)`.
+    """
+    finished, start_inputs = self._finished, self._start_inputs
+
+    dtype = nest.flatten(self._initial_cell_state)[0].dtype
+    log_probs = array_ops.one_hot(  # shape(batch_sz, beam_sz)
+        array_ops.zeros([self._batch_size], dtype=dtypes.int32),
+        depth=self._beam_width,
+        on_value=ops.convert_to_tensor(0.0, dtype=dtype),
+        off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype),
+        dtype=dtype)
+    init_attention_probs = get_attention_probs(
+        self._initial_cell_state, self._coverage_penalty_weight)
+    if init_attention_probs is None:
+      init_attention_probs = ()
+
+    initial_state = BeamSearchDecoderState(
+        cell_state=self._initial_cell_state,
+        log_probs=log_probs,
+        finished=finished,
+        lengths=array_ops.zeros(
+            [self._batch_size, self._beam_width], dtype=dtypes.int64),
+        accumulated_attention_probs=init_attention_probs)
+
+    return (finished, start_inputs, initial_state)
+
+  @property
+  def output_dtype(self):
+    # Assume the dtype of the cell is the output_size structure
+    # containing the input_state's first component's dtype.
+    # Return that structure and int32 (the id)
+    dtype = nest.flatten(self._initial_cell_state)[0].dtype
+    return BeamSearchDecoderOutput(
+        scores=nest.map_structure(lambda _: dtype, self._rnn_output_size()),
+        predicted_ids=dtypes.int32,
+        parent_ids=dtypes.int32)
+
+
+class BeamSearchDecoderV2(BeamSearchDecoderMixin, decoder.BaseDecoder):
+  # Note that the inheritance hierarchy is important here. The Mixin has to be
+  # the first parent class since we will use super().__init__(), and Mixin which
+  # is a object will properly invoke the __init__ method of other parent class.
+  """BeamSearch sampling decoder.
+
+    **NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in
+    `AttentionWrapper`, then you must ensure that:
+
+    - The encoder output has been tiled to `beam_width` via
+      `tf.contrib.seq2seq.tile_batch` (NOT `tf.tile`).
+    - The `batch_size` argument passed to the `zero_state` method of this
+      wrapper is equal to `true_batch_size * beam_width`.
+    - The initial state created with `zero_state` above contains a
+      `cell_state` value containing properly tiled final state from the
+      encoder.
+
+    An example:
+
+    ```
+    tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
+        encoder_outputs, multiplier=beam_width)
+    tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
+        encoder_final_state, multiplier=beam_width)
+    tiled_sequence_length = tf.contrib.seq2seq.tile_batch(
+        sequence_length, multiplier=beam_width)
+    attention_mechanism = MyFavoriteAttentionMechanism(
+        num_units=attention_depth,
+        memory=tiled_inputs,
+        memory_sequence_length=tiled_sequence_length)
+    attention_cell = AttentionWrapper(cell, attention_mechanism, ...)
+    decoder_initial_state = attention_cell.zero_state(
+        dtype, batch_size=true_batch_size * beam_width)
+    decoder_initial_state = decoder_initial_state.clone(
+        cell_state=tiled_encoder_final_state)
+    ```
+
+    Meanwhile, with `AttentionWrapper`, coverage penalty is suggested to use
+    when computing scores (https://arxiv.org/pdf/1609.08144.pdf). It encourages
+    the decoding to cover all inputs.
+  """
+
+  def __init__(self,
+               cell,
+               beam_width,
+               embedding_fn=None,
+               output_layer=None,
+               length_penalty_weight=0.0,
+               coverage_penalty_weight=0.0,
+               reorder_tensor_arrays=True,
+               **kwargs):
+    """Initialize the BeamSearchDecoderV2.
+
+    Args:
+      cell: An `RNNCell` instance.
+      beam_width:  Python integer, the number of beams.
+      embedding_fn: A callable that takes a vector tensor of `ids` (argmax ids).
+      output_layer: (Optional) An instance of `tf.keras.layers.Layer`, i.e.,
+        `tf.keras.layers.Dense`.  Optional layer to apply to the RNN output
+        prior to storing the result or sampling.
+      length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
+      coverage_penalty_weight: Float weight to penalize the coverage of source
+        sentence. Disabled with 0.0.
+      reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell
+        state will be reordered according to the beam search path. If the
+        `TensorArray` can be reordered, the stacked form will be returned.
+        Otherwise, the `TensorArray` will be returned as is. Set this flag to
+        `False` if the cell state contains `TensorArray`s that are not amenable
+        to reordering.
+      **kwargs: Dict, other keyword arguments for initialization.
+
+    Raises:
+      TypeError: if `cell` is not an instance of `RNNCell`,
+        or `output_layer` is not an instance of `tf.keras.layers.Layer`.
+    """
+    super(BeamSearchDecoderV2, self).__init__(
+        cell,
+        beam_width,
+        output_layer=output_layer,
+        length_penalty_weight=length_penalty_weight,
+        coverage_penalty_weight=coverage_penalty_weight,
+        reorder_tensor_arrays=reorder_tensor_arrays,
+        **kwargs)
+
+    if embedding_fn is None or callable(embedding_fn):
+      self._embedding_fn = embedding_fn
+    else:
+      raise ValueError("embedding_fn is expected to be a callable, got %s" %
+                       type(embedding_fn))
+
+  def initialize(self,
+                 embedding,
+                 start_tokens,
+                 end_token,
+                 initial_state):
+    """Initialize the decoder.
+
+    Args:
+      embedding: A tensor from the embedding layer output, which is the
+        `params` argument for `embedding_lookup`.
+      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
+      end_token: `int32` scalar, the token that marks end of decoding.
+      initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
+    Returns:
+      `(finished, start_inputs, initial_state)`.
+    Raises:
+      ValueError: If `start_tokens` is not a vector or `end_token` is not a
+        scalar.
+    """
+    if embedding is not None and self._embedding_fn is not None:
+      raise ValueError(
+          "embedding and embedding_fn cannot be provided at same time")
+    elif embedding is not None:
+      self._embedding_fn = (
+          lambda ids: embedding_ops.embedding_lookup(embedding, ids))
+
+    self._start_tokens = ops.convert_to_tensor(
+        start_tokens, dtype=dtypes.int32, name="start_tokens")
+    if self._start_tokens.get_shape().ndims != 1:
+      raise ValueError("start_tokens must be a vector")
+    self._end_token = ops.convert_to_tensor(
+        end_token, dtype=dtypes.int32, name="end_token")
+    if self._end_token.get_shape().ndims != 0:
+      raise ValueError("end_token must be a scalar")
+
+    self._batch_size = array_ops.size(start_tokens)
+    self._initial_cell_state = nest.map_structure(
+        self._maybe_split_batch_beams, initial_state, self._cell.state_size)
+    self._start_tokens = array_ops.tile(
+        array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width])
+    self._start_inputs = self._embedding_fn(self._start_tokens)
+
+    self._finished = array_ops.one_hot(
+        array_ops.zeros([self._batch_size], dtype=dtypes.int32),
+        depth=self._beam_width,
+        on_value=False,
+        off_value=True,
+        dtype=dtypes.bool)
+
+    finished, start_inputs = self._finished, self._start_inputs
+
+    dtype = nest.flatten(self._initial_cell_state)[0].dtype
+    log_probs = array_ops.one_hot(  # shape(batch_sz, beam_sz)
+        array_ops.zeros([self._batch_size], dtype=dtypes.int32),
+        depth=self._beam_width,
+        on_value=ops.convert_to_tensor(0.0, dtype=dtype),
+        off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype),
+        dtype=dtype)
+    init_attention_probs = get_attention_probs(
+        self._initial_cell_state, self._coverage_penalty_weight)
+    if init_attention_probs is None:
+      init_attention_probs = ()
+
+    initial_state = BeamSearchDecoderState(
+        cell_state=self._initial_cell_state,
+        log_probs=log_probs,
+        finished=finished,
+        lengths=array_ops.zeros(
+            [self._batch_size, self._beam_width], dtype=dtypes.int64),
+        accumulated_attention_probs=init_attention_probs)
+
+    return (finished, start_inputs, initial_state)
+
+  @property
+  def output_dtype(self):
+    # Assume the dtype of the cell is the output_size structure
+    # containing the input_state's first component's dtype.
+    # Return that structure and int32 (the id)
+    dtype = nest.flatten(self._initial_cell_state)[0].dtype
+    return BeamSearchDecoderOutput(
+        scores=nest.map_structure(lambda _: dtype, self._rnn_output_size()),
+        predicted_ids=dtypes.int32,
+        parent_ids=dtypes.int32)
+
+  def call(self, embeddning, start_tokens, end_token, initial_state, **kwargs):
+    init_kwargs = kwargs
+    init_kwargs["start_tokens"] = start_tokens
+    init_kwargs["end_token"] = end_token
+    init_kwargs["initial_state"] = initial_state
+    return decoder.dynamic_decode(self,
+                                  output_time_major=self.output_time_major,
+                                  impute_finished=self.impute_finished,
+                                  maximum_iterations=self.maximum_iterations,
+                                  parallel_iterations=self.parallel_iterations,
+                                  swap_memory=self.swap_memory,
+                                  decoder_init_input=embeddning,
+                                  decoder_init_kwargs=init_kwargs)
+
+
 def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                       beam_width, end_token, length_penalty_weight,
                       coverage_penalty_weight):
@@ -1068,7 +1323,7 @@
   """
   if isinstance(gather_from, tensor_array_ops.TensorArray):
     return gather_from
-  _check_maybe(gather_from)
+  _check_ndims(gather_from)
   if gather_from.shape.ndims >= len(gather_shape):
     return _tensor_gather_helper(
         gather_indices=gather_indices,

diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD
index f88b03e..7dd52df 100644
--- a/tensorflow/contrib/summary/BUILD
+++ b/tensorflow/contrib/summary/BUILD

@@ -4,17 +4,14 @@
     "LICENSE",
 ])
 
-load(
-    "//tensorflow:tensorflow.bzl",
-    "py_test",
-    "tf_gen_op_wrapper_py",
-)
+load("//tensorflow:tensorflow.bzl", "py_test")
 
 py_test(
     name = "summary_ops_test",
     srcs = ["summary_ops_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":summary",
         ":summary_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:errors",
@@ -22,7 +19,6 @@
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform",
         "//tensorflow/python:state_ops",
-        "//tensorflow/python:summary_ops_v2",
         "//tensorflow/python:training",
         "//tensorflow/python/eager:function",
         "//tensorflow/python/eager:test",
@@ -35,6 +31,7 @@
     srcs = ["summary_ops_graph_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":summary",
         ":summary_test_util",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
@@ -43,7 +40,6 @@
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:summary_ops_v2",
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
         "@six_archive//:six",

diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index 807741e..8e13f7f 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py

@@ -22,6 +22,7 @@
 
 import six
 
+from tensorflow.contrib.summary import summary as summary_ops
 from tensorflow.contrib.summary import summary_test_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
@@ -32,7 +33,6 @@
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import summary_ops_v2 as summary_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test

diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py
index 10e4556..27bfdeb 100644
--- a/tensorflow/contrib/summary/summary_ops_test.py
+++ b/tensorflow/contrib/summary/summary_ops_test.py

@@ -25,6 +25,7 @@
 import numpy as np
 import six
 
+from tensorflow.contrib.summary import summary as summary_ops
 from tensorflow.contrib.summary import summary_test_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
@@ -36,7 +37,6 @@
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import summary_ops_v2 as summary_ops
 from tensorflow.python.platform import gfile
 from tensorflow.python.training import training_util
 

diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD
index 398ac31..583bbf9 100644
--- a/tensorflow/contrib/tensor_forest/BUILD
+++ b/tensorflow/contrib/tensor_forest/BUILD

@@ -537,8 +537,9 @@
 
 py_test(
     name = "random_forest_test",
-    size = "large",
+    size = "medium",
     srcs = ["client/random_forest_test.py"],
+    shard_count = 6,
     srcs_version = "PY2AND3",
     tags = [
         "noasan",

diff --git a/tensorflow/contrib/tensor_forest/kernels/model_ops.cc b/tensorflow/contrib/tensor_forest/kernels/model_ops.cc
index b9aad36..76b1d2b 100644
--- a/tensorflow/contrib/tensor_forest/kernels/model_ops.cc
+++ b/tensorflow/contrib/tensor_forest/kernels/model_ops.cc

@@ -304,7 +304,7 @@
     auto worker_threads = context->device()->tensorflow_cpu_worker_threads();
     int num_threads = worker_threads->num_threads;
     const int64 costPerTraverse = 500;
-    auto traverse = [this, &set_leaf_ids, &data_set, decision_tree_resource,
+    auto traverse = [&set_leaf_ids, &data_set, decision_tree_resource,
                      num_data](int64 start, int64 end) {
       CHECK(start <= end);
       CHECK(end <= num_data);

diff --git a/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc b/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc
index fe2c91c..0243f10 100644
--- a/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc
+++ b/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc

@@ -307,7 +307,7 @@
     // from a digits run on local desktop.  Heuristics might be necessary
     // if it really matters that much.
     const int64 costPerUpdate = 1000;
-    auto update = [this, &target, &leaf_ids_tensor, &num_targets, &data_set,
+    auto update = [&target, &leaf_ids_tensor, &num_targets, &data_set,
                    fertile_stats_resource, &locks, &set_lock, &ready_to_split,
                    num_data](int64 start, int64 end) {
       CHECK(start <= end);
@@ -317,7 +317,7 @@
                   static_cast<int32>(end), &ready_to_split);
     };
 
-    auto update_collated = [this, &target, &num_targets, fertile_stats_resource,
+    auto update_collated = [&target, &num_targets, fertile_stats_resource,
                             tree_resource, &leaf_examples, &set_lock,
                             &ready_to_split, &data_set,
                             num_leaves](int64 start, int64 end) {

diff --git a/tensorflow/contrib/tensor_forest/python/ops/model_ops.py b/tensorflow/contrib/tensor_forest/python/ops/model_ops.py
index 290c16f..40bf708 100644
--- a/tensorflow/contrib/tensor_forest/python/ops/model_ops.py
+++ b/tensorflow/contrib/tensor_forest/python/ops/model_ops.py

@@ -35,7 +35,7 @@
 from tensorflow.python.ops import resources
 from tensorflow.python.platform import resource_loader
 from tensorflow.python.training import saver
-from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.tracking import tracking
 
 
 _model_ops = loader.load_op_library(

diff --git a/tensorflow/contrib/tensor_forest/python/ops/stats_ops.py b/tensorflow/contrib/tensor_forest/python/ops/stats_ops.py
index 9184198..80afcfb 100644
--- a/tensorflow/contrib/tensor_forest/python/ops/stats_ops.py
+++ b/tensorflow/contrib/tensor_forest/python/ops/stats_ops.py

@@ -32,7 +32,7 @@
 from tensorflow.python.ops import resources
 from tensorflow.python.platform import resource_loader
 from tensorflow.python.training import saver
-from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.tracking import tracking
 
 
 _stats_ops = loader.load_op_library(

diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD
index d1be31d..4ba814b 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/BUILD
+++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD

@@ -161,7 +161,10 @@
     ],
     shard_count = 10,
     srcs_version = "PY2AND3",
-    tags = ["no_pip_gpu"],  # b/63391119
+    tags = [
+        "no_pip_gpu",  # b/63391119
+        "notap",  # b/124520733
+    ],
     deps = [
         ":estimators",
         ":feature_keys",

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 1859dee..7c1661d 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD

@@ -23,17 +23,12 @@
     ],
 )
 
-cc_library(
-    name = "all_ops",
+py_library(
+    name = "tpu_py",
+    srcs = ["python/ops/tpu_ops.py"],
+    srcs_version = "PY2AND3",
     deps = [
-        ":cross_replica_ops_op_lib",
-        ":heartbeat_ops_op_lib",
-        ":host_compute_ops_op_lib",
-        ":infeed_ops_op_lib",
-        ":outfeed_ops_op_lib",
-        ":replication_ops_op_lib",
-        ":tpu_configuration_ops_op_lib",
-        ":tpu_embedding_ops_op_lib",
+        "//tensorflow/python/tpu:tpu_py",
     ],
 )
 
@@ -42,19 +37,7 @@
     srcs = ["python/tpu/async_checkpoint.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:summary",
-        "//tensorflow/python:summary_ops_v2",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/estimator:estimator_py",
+        "//tensorflow/python/tpu:async_checkpoint",
     ],
 )
 
@@ -75,145 +58,20 @@
         ":functional",
         ":tpu_embedding",
         ":tpu_lib",
-        ":tpu_ordinal_selector_py",
         "//tensorflow/contrib/training:training_py",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:function",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:session",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:summary",
-        "//tensorflow/python:summary_ops_v2",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/estimator:estimator_py",
-        "//tensorflow/python/estimator:util",
-        "@six_archive//:six",
+        "//tensorflow/python/tpu:tpu_estimator",
     ],
 )
 
-tf_gen_op_libs(
-    op_lib_names = [
-        "cross_replica_ops",
-        "heartbeat_ops",
-        "host_compute_ops",
-        "infeed_ops",
-        "outfeed_ops",
-        "replication_ops",
-        "tpu_configuration_ops",
-        "tpu_embedding_ops",
-        "tpu_ordinal_selector_op",
-        "functional_ops",
-    ],
-    deps = [
-        "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_cc",
-        "//tensorflow/contrib/tpu/utils:tpu_embedding_optimization_parameters_utils",
-        "//tensorflow/contrib/tpu/utils:tpu_embedding_output_layout_utils",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_proto_parsing",
-        "//tensorflow/core:protos_all_cc",
-    ],
-)
-
-tf_custom_op_library(
-    name = "python/ops/_tpu_ops.so",
-    srcs = [
-        "ops/cross_replica_ops.cc",
-        "ops/heartbeat_ops.cc",
-        "ops/host_compute_ops.cc",
-        "ops/infeed_ops.cc",
-        "ops/outfeed_ops.cc",
-        "ops/replication_ops.cc",
-        "ops/tpu_configuration_ops.cc",
-        "ops/tpu_embedding_ops.cc",
-    ],
-    deps = [
-        "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_cc",
-        "//tensorflow/contrib/tpu/utils:tpu_embedding_optimization_parameters_utils",
-        "//tensorflow/contrib/tpu/utils:tpu_embedding_output_layout_utils",
-        "//tensorflow/core:lib_proto_parsing",
-    ],
-)
-
-tf_gen_op_wrapper_py(
-    name = "tpu_ops",
-    hidden = [
-        "SendTPUEmbeddingGradients",
-        "EnqueueTPUEmbeddingIntegerBatch",
-        "EnqueueTPUEmbeddingSparseBatch",
-        "EnqueueTPUEmbeddingSparseTensorBatch",
-    ],
-    deps = [
-        ":cross_replica_ops_op_lib",
-        ":heartbeat_ops_op_lib",
-        ":host_compute_ops_op_lib",
-        ":infeed_ops_op_lib",
-        ":outfeed_ops_op_lib",
-        ":replication_ops_op_lib",
-        ":tpu_configuration_ops_op_lib",
-        ":tpu_embedding_ops_op_lib",
-    ],
-)
-
-tf_custom_op_library(
-    name = "python/ops/_tpu_ordinal_selector_op.so",
-    srcs = ["ops/tpu_ordinal_selector_op.cc"],
-)
-
-tf_custom_op_py_library(
-    name = "tpu_ordinal_selector_py",
-    srcs = ["python/ops/tpu_ordinal_selector_op.py"],
-    dso = [":python/ops/_tpu_ordinal_selector_op.so"],
-    kernels = [
-        ":tpu_ordinal_selector_op_op_lib",
-    ],
-    srcs_version = "PY2AND3",
-    visibility = ["//visibility:public"],
-    deps = [
-        ":tpu_ordinal_selector_op",
-    ],
-)
-
-tf_gen_op_wrapper_py(
-    name = "tpu_ordinal_selector_op",
-    deps = [
-        ":tpu_ordinal_selector_op_op_lib",
-    ],
-)
-
-tf_custom_op_library(
-    name = "python/ops/_functional_ops.so",
-    srcs = ["ops/functional_ops.cc"],
-)
-
-tf_gen_op_wrapper_py(
-    name = "gen_functional_ops",
-    out = "python/tpu/gen_functional_ops.py",
-    hidden = [
-        "TPUPartitionedCall",
-    ],
-    deps = [":functional_ops_op_lib"],
-)
-
-tf_custom_op_py_library(
+py_library(
     name = "functional",
     srcs = ["python/tpu/functional.py"],
-    dso = [":python/ops/_functional_ops.so"],
-    kernels = [
-        ":functional_ops_op_lib",
-    ],
     srcs_version = "PY2AND3",
     visibility = [
         "//visibility:public",
     ],
     deps = [
-        ":gen_functional_ops",
+        "//tensorflow/python/tpu:functional",
     ],
 )
 
@@ -222,30 +80,7 @@
     srcs = ["python/profiler/__init__.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/tpu/profiler:tpu_profiler_analysis_pb2_grpc",
-        "//tensorflow/contrib/tpu/profiler:tpu_profiler_analysis_proto_py",
-        "//tensorflow/contrib/tpu/profiler:trace_events_proto_py",
-        "//tensorflow/python:util",
-    ],
-)
-
-tf_custom_op_py_library(
-    name = "tpu_py",
-    srcs = ["python/ops/tpu_ops.py"],
-    dso = [":python/ops/_tpu_ops.so"],
-    kernels = [
-        ":all_ops",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":profiler",
-        ":tpu_ops",
-        "//tensorflow/contrib/compiler:xla",
-        "//tensorflow/contrib/util:util_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:util",
+        "//tensorflow/python/tpu/profiler",
     ],
 )
 
@@ -262,6 +97,7 @@
         ":tpu_embedding",
         ":tpu_estimator",
         ":tpu_lib",
+        "//tensorflow/python/tpu",
     ],
 )
 
@@ -284,8 +120,8 @@
         "//tensorflow/contrib/cluster_resolver:cluster_resolver_py",
         "//tensorflow/contrib/distribute",
         "//tensorflow/contrib/framework:framework_py",
-        "//tensorflow/contrib/tpu/proto:compilation_result_proto_py",
         "//tensorflow/core:protos_all_py",
+        "//tensorflow/core/protobuf/tpu:compilation_result_proto_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
@@ -325,30 +161,12 @@
     srcs_version = "PY2AND3",
     deps = [
         ":datasets",
+        ":functional",
         ":profiler",
         ":tpu_py",
-        "//tensorflow/compiler/xla/experimental/xla_sharding",
-        "//tensorflow/compiler/xla/python_api:xla_shape",
         "//tensorflow/contrib/cluster_resolver:cluster_resolver_py",
         "//tensorflow/contrib/compiler:xla",
-        "//tensorflow/contrib/tpu/proto:compilation_result_proto_py",
-        "//tensorflow/contrib/tpu/proto:dynamic_padding_proto_py",
-        "//tensorflow/contrib/tpu/proto:optimization_parameters_proto_py",
-        "//tensorflow/contrib/tpu/proto:topology_proto_py",
-        "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_py",
-        "//tensorflow/contrib/tpu/proto:tpu_embedding_output_layout_proto_py",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:control_flow_util",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:training",
-        "//tensorflow/python:util",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python/ops/losses",
+        "//tensorflow/python/tpu:tpu_lib",
     ],
 )
 
@@ -359,125 +177,20 @@
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:function",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/ops:readers",
-    ],
-)
-
-tf_py_test(
-    name = "datasets_test",
-    size = "medium",
-    srcs = ["python/tpu/datasets_test.py"],
-    additional_deps = [
-        "//tensorflow/python:client_testlib",
-        ":datasets",
-    ],
-    grpc_enabled = True,
-    shard_count = 4,
-    tags = ["no_oss"],
-)
-
-tf_py_test(
-    name = "tpu_test",
-    size = "small",
-    srcs = ["python/tpu/tpu_test.py"],
-    additional_deps = [
-        ":tpu",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:layers",
-    ],
-    tags = ["no_windows"],  # TODO: needs investigation on Windows
-)
-
-tf_py_test(
-    name = "tpu_sharding_test",
-    size = "small",
-    srcs = ["python/tpu/tpu_sharding_test.py"],
-    additional_deps = [
-        ":tpu",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework",
-    ],
-)
-
-tf_py_test(
-    name = "bfloat16_test",
-    size = "small",
-    srcs = ["python/tpu/bfloat16_test.py"],
-    additional_deps = [
-        ":tpu",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework",
-    ],
-)
-
-tf_py_test(
-    name = "tpu_infeed_test",
-    size = "small",
-    srcs = ["python/tpu/tpu_infeed_test.py"],
-    additional_deps = [
-        ":tpu",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
-tf_py_test(
-    name = "tpu_config_test",
-    size = "small",
-    srcs = ["python/tpu/tpu_config_test.py"],
-    additional_deps = [
-        ":tpu_estimator",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
-tf_py_test(
-    name = "tpu_estimator_signals_test",
-    size = "small",
-    srcs = ["python/tpu/tpu_estimator_signals_test.py"],
-    additional_deps = [
-        ":tpu_estimator",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
-tf_py_test(
-    name = "topology_test",
-    size = "medium",
-    srcs = ["python/tpu/topology_test.py"],
-    additional_deps = [
-        ":tpu",
-        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python/tpu:datasets",
     ],
 )
 
 py_library(
     name = "tpu_embedding",
-    srcs = ["python/tpu/tpu_embedding.py"],
+    srcs = [
+        "python/tpu/tpu_embedding.py",
+        "python/tpu/tpu_embedding_gradient.py",
+    ],
     srcs_version = "PY2AND3",
     deps = [
         ":tpu_lib",
-        ":tpu_ops",
-        "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:partitioned_variables",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-        "@six_archive//:six",
+        "//tensorflow/python/tpu:tpu_embedding",
     ],
 )
 
@@ -486,31 +199,6 @@
     srcs = ["python/tpu/feature_column.py"],
     deps = [
         ":tpu_lib",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python/feature_column",
-        "//tensorflow/python/feature_column:feature_column_py",
+        "//tensorflow/python/tpu:feature_column",
     ],
 )
-
-tf_py_test(
-    name = "feature_column_test",
-    srcs = [
-        "python/tpu/feature_column_test.py",
-    ],
-    additional_deps = [
-        ":feature_column",
-        "//third_party/py/numpy",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:lookup_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/feature_column",
-        "//tensorflow/python/feature_column:feature_column_py",
-    ],
-    main = "python/tpu/feature_column_test.py",
-)

diff --git a/tensorflow/contrib/tpu/ops/cross_replica_ops.cc b/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
deleted file mode 100644
index 87e3a59..0000000
--- a/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
+++ /dev/null

@@ -1,148 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-namespace tensorflow {
-using shape_inference::DimensionHandle;
-using shape_inference::InferenceContext;
-using shape_inference::ShapeHandle;
-
-REGISTER_OP("AllToAll")
-    .Input("input: T")
-    .Input("group_assignment: int32")
-    .Output("output: T")
-    .Attr("T: {bfloat16, float}")
-    .Attr("concat_dimension: int")
-    .Attr("split_dimension: int")
-    .Attr("split_count: int")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input = c->input(0);
-      int64 rank;
-      if (c->RankKnown(input)) {
-        rank = c->Rank(input);
-      } else {
-        return errors::InvalidArgument("input's rank is unknown.");
-      }
-      int concat_dimension;
-      int split_dimension;
-
-      TF_RETURN_IF_ERROR(c->GetAttr("concat_dimension", &concat_dimension));
-
-      if (concat_dimension < 0 || concat_dimension >= rank) {
-        return errors::InvalidArgument("concat_dimension ", concat_dimension,
-                                       " is out of range of input rank ", rank);
-      }
-
-      TF_RETURN_IF_ERROR(c->GetAttr("split_dimension", &split_dimension));
-      if (split_dimension < 0 || split_dimension >= rank) {
-        return errors::InvalidArgument("split_dimension ", split_dimension,
-                                       " is out of range of input rank ", rank);
-      }
-
-      std::vector<DimensionHandle> dims;
-      dims.resize(rank);
-
-      for (int32 i = 0; i < rank; ++i) {
-        int64 in_idx = i;
-        if (i == concat_dimension) {
-          in_idx = split_dimension;
-        } else if (i == split_dimension) {
-          in_idx = concat_dimension;
-        }
-
-        dims[i] = c->Dim(input, in_idx);
-      }
-
-      c->set_output(0, c->MakeShape(dims));
-      return Status::OK();
-    })
-    .Doc(R"doc(
-An Op to exchange data across TPU replicas. On each replica, the input is
-split into `split_count` blocks along `split_dimension` and send to the other
-replicas given group_assignment. After receiving `split_count` - 1 blocks from
-other replicas, we concatenate the blocks along `concat_dimension` as the
-output.
-
-For example, suppose there are 2 TPU replicas:
-replica 0 receives input: `[[A, B]]`
-replica 1 receives input: `[[C, D]]`
-
-group_assignment=`[[0, 1]]`
-concat_dimension=0
-split_dimension=1
-split_count=2
-
-replica 0's output: `[[A], [C]]`
-replica 1's output: `[[B], [D]]`
-
-input: The local input to the sum.
-group_assignment: An int32 tensor with shape
-  [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
-  replica ids in the ith subgroup.
-concat_dimension: The dimension number to concatenate.
-split_dimension: The dimension number to split.
-split_count: The number of splits, this number must equal to the sub-group
-  size(group_assignment.get_shape()[1])
-output: The exchanged result.
-T: The type of elements to be exchanged.
-)doc");
-
-REGISTER_OP("CrossReplicaSum")
-    .Input("input: T")
-    .Input("group_assignment: int32")
-    .Output("output: T")
-    .Attr("T: {bfloat16, float}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-An Op to sum inputs across replicated TPU instances. Each instance supplies its
-own input.
-
-For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`.
-Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0,
-and `B, D, F, H` as group 1. Thus we get the outputs:
-`[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`.
-
-input: The local input to the sum.
-group_assignment: An int32 tensor with shape
-  [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
-  replica ids in the ith subgroup.
-output: The sum of all the distributed inputs.
-T: The type of elements to be summed.
-)doc");
-
-REGISTER_OP("CollectivePermute")
-    .Input("input: T")
-    .Input("source_target_pairs: int32")
-    .Output("output: T")
-    .Attr("T: numbertype")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-An Op to permute tensors across replicated TPU instances. Each instance
-supplies its own input.
-
-For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing
-source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs:
-`[D, A, B, C]`.
-
-input: The local input to be permuted. Currently only supports float and
-  bfloat16.
-source_target_pairs: A tensor with shape [num_pairs, 2].
-output: The permuted input.
-T: The type of elements to be exchanged.
-)doc");
-}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/ops/heartbeat_ops.cc b/tensorflow/contrib/tpu/ops/heartbeat_ops.cc
deleted file mode 100644
index ca0f5bc..0000000
--- a/tensorflow/contrib/tpu/ops/heartbeat_ops.cc
+++ /dev/null

@@ -1,37 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/lib/core/status.h"
-
-namespace tensorflow {
-
-REGISTER_OP("WorkerHeartbeat")
-    .Input("request: string")
-    .Output("response: string")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Worker heartbeat op.
-
-Heartbeats may be sent periodically to indicate the coordinator is still active,
-to retrieve the current worker status and to expedite shutdown when necessary.
-
-request: A string tensor containing a serialized WorkerHeartbeatRequest
-response: A string tensor containing a serialized WorkerHeartbeatResponse
-)doc");
-
-}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/ops/infeed_ops.cc b/tensorflow/contrib/tpu/ops/infeed_ops.cc
deleted file mode 100644
index 2ed16c2..0000000
--- a/tensorflow/contrib/tpu/ops/infeed_ops.cc
+++ /dev/null

@@ -1,108 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-namespace tensorflow {
-
-using shape_inference::InferenceContext;
-using shape_inference::ShapeHandle;
-
-REGISTER_OP("InfeedDequeue")
-    .Output("output: dtype")
-    .Attr("dtype: type")
-    .Attr("shape: shape")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::ExplicitShape)
-    .Doc(R"doc(
-A placeholder op for a value that will be fed into the computation.
-
-output: A tensor that will be provided using the infeed mechanism.
-dtype: The type of elements in the tensor.
-shape: The shape of the tensor.
-)doc");
-
-REGISTER_OP("InfeedEnqueue")
-    .Input("input: dtype")
-    .Attr("dtype: type")
-    .Attr("shape: shape = {}")
-    .Attr("layout: list(int) = []")
-    .Attr("device_ordinal: int = -1")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .SetIsStateful()
-    .Doc(R"doc(
-An op which feeds a single Tensor value into the computation.
-
-input: A tensor that will be provided using the infeed mechanism.
-dtype: The type of elements in the tensor.
-shape: The shape of the tensor.
-layout: A vector holding the requested layout in minor-to-major sequence.
-If a layout attribute is passed, but its values are all -1, the layout will
-be computed by the infeed operation.
-device_ordinal: The TPU device to use. This should be -1 when the Op
-is running on a TPU device, and >= 0 when the Op is running on the CPU
-device.
-)doc");
-
-REGISTER_OP("InfeedEnqueueTuple")
-    .Input("inputs: dtypes")
-    .Attr("dtypes: list(type)")
-    .Attr("shapes: list(shape)")
-    .Attr("layouts: list(int) = []")
-    .Attr("device_ordinal: int = -1")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .SetIsStateful()
-    .Doc(R"doc(
-An op which feeds multiple Tensor values into the computation as an XLA tuple.
-
-inputs: A list of tensors that will be provided using the infeed mechanism.
-dtypes: The element types of each element in `inputs`.
-shapes: The shapes of each tensor in `inputs`.
-layouts: A vector holding the requested layout in minor-to-major sequence for
-all the tuple shapes, in the order the shapes appear in the "shapes" input.
-The layout elements for a sub-shape can be set to -1, in which case the
-corresponding layout will be computed by the infeed operation.
-device_ordinal: The TPU device to use. This should be -1 when the Op
-is running on a TPU device, and >= 0 when the Op is running on the CPU
-device.
-)doc");
-
-REGISTER_OP("InfeedDequeueTuple")
-    .Output("outputs: dtypes")
-    .Attr("dtypes: list(type)")
-    .Attr("shapes: list(shape)")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      std::vector<PartialTensorShape> shapes;
-      TF_RETURN_IF_ERROR(c->GetAttr("shapes", &shapes));
-      for (int i = 0; i < shapes.size(); ++i) {
-        ShapeHandle out;
-        TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(shapes[i], &out));
-        c->set_output(i, out);
-      }
-      return Status::OK();
-    })
-    .Doc(R"doc(
-A placeholder op for multiple values that will be fed into the computation
-simultaneously as an XLA tuple.
-
-outputs: A list of tensors that will be provided using the infeed mechanism.
-dtypes: The element types of each element in `outputs`.
-shapes: The shapes of each tensor in `outputs`.
-)doc");
-
-}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/ops/outfeed_ops.cc b/tensorflow/contrib/tpu/ops/outfeed_ops.cc
deleted file mode 100644
index b05c76c..0000000
--- a/tensorflow/contrib/tpu/ops/outfeed_ops.cc
+++ /dev/null

@@ -1,102 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-namespace tensorflow {
-
-using shape_inference::InferenceContext;
-using shape_inference::ShapeHandle;
-
-REGISTER_OP("OutfeedEnqueue")
-    .Input("input: dtype")
-    .Attr("dtype: type")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R"doc(
-An op which emits a single Tensor value from an XLA computation.
-
-input: A tensor that will be inserted into the outfeed queue.
-)doc");
-
-REGISTER_OP("OutfeedEnqueueTuple")
-    .Input("inputs: dtypes")
-    .Attr("dtypes: list(type)")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R"doc(
-An op which emits multiple Tensor values from an XLA computation.
-
-inputs: A list of tensors that will be inserted into the outfeed queue as an
-XLA tuple.
-)doc");
-
-REGISTER_OP("OutfeedDequeue")
-    .Output("output: dtype")
-    .Attr("dtype: type")
-    .Attr("shape: shape")
-    .Attr("device_ordinal: int = -1")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::ExplicitShape)
-    .Doc(R"doc(
-Retrieves a single tensor from the computation outfeed.  This operation will
-block indefinitely until data is available.
-
-output: A tensor that will be read from the device outfeed.
-dtype: The type of elements in the tensor.
-shape: The shape of the tensor.
-device_ordinal: The TPU device to use. This should be -1 when the Op
-is running on a TPU device, and >= 0 when the Op is running on the CPU
-device.
-)doc");
-
-REGISTER_OP("OutfeedDequeueTuple")
-    .Output("outputs: dtypes")
-    .Attr("dtypes: list(type)")
-    .Attr("shapes: list(shape)")
-    .Attr("device_ordinal: int = -1")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      std::vector<PartialTensorShape> shapes;
-      std::vector<DataType> dtypes;
-      TF_RETURN_IF_ERROR(c->GetAttr("shapes", &shapes));
-      TF_RETURN_IF_ERROR(c->GetAttr("dtypes", &dtypes));
-      if (shapes.size() != dtypes.size()) {
-        return errors::InvalidArgument(
-            "Incorrect number of output shapes specified");
-      }
-      for (int i = 0; i < shapes.size(); ++i) {
-        ShapeHandle out;
-        TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(shapes[i], &out));
-        c->set_output(i, out);
-      }
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Retrieve multiple values that will be emitted by the computation as an XLA
-tuple.  This operations will block indefinitely until data is available.
-Output `i` corresponds to XLA tuple element `i`.
-
-outputs: A list of tensors that will be read from the outfeed.
-dtypes: The element types of each element in `outputs`.
-shapes: The shapes of each tensor in `outputs`.
-device_ordinal: The TPU device to use. This should be -1 when the Op
-is running on a TPU device, and >= 0 when the Op is running on the CPU
-device.
-)doc");
-
-}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/ops/replication_ops.cc b/tensorflow/contrib/tpu/ops/replication_ops.cc
deleted file mode 100644
index d4180d1..0000000
--- a/tensorflow/contrib/tpu/ops/replication_ops.cc
+++ /dev/null

@@ -1,146 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-namespace tensorflow {
-
-using shape_inference::InferenceContext;
-using shape_inference::ShapeHandle;
-
-REGISTER_OP("TPUReplicateMetadata")
-    .Attr("num_replicas: int >= 0")
-    .Attr("num_cores_per_replica: int = 1")
-    .Attr("topology: string = \"\"")
-    .Attr("use_tpu: bool = true")
-    .Attr("device_assignment: list(int) = []")
-    // Deprecated. Use num_cores_per_replica instead.
-    .Attr("computation_shape: list(int) = []")
-    .Attr("host_compute_core: list(string) = []")
-    .Attr("padding_map: list(string) = []")
-    .SetShapeFn(shape_inference::UnknownShape);
-
-REGISTER_OP("TPUReplicatedInput")
-    .Input("inputs: N * T")
-    .Output("output: T")
-    .Attr("N: int >= 1")
-    .Attr("T: type")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle cur = c->input(c->num_inputs() - 1);
-      for (int i = c->num_inputs() - 2; i >= 0; --i) {
-        TF_RETURN_WITH_CONTEXT_IF_ERROR(c->Merge(c->input(i), cur, &cur),
-                                        "From merging shape ", i,
-                                        " with other shapes.");
-      }
-      c->set_output(0, cur);
-
-      // If this is a resource, unify the resource shapes.
-      DataType dtype;
-      TF_RETURN_IF_ERROR(c->GetAttr("T", &dtype));
-      if (dtype == DT_RESOURCE) {
-        const std::vector<shape_inference::ShapeAndType>* shapes_and_types =
-            nullptr;
-        for (int i = c->num_inputs() - 1; i >= 0; --i) {
-          if (shapes_and_types) {
-            // The return value of MergeInputHandleShapesAndTypes indicates
-            // the shape was refined, not that there was an error.
-            // TODO(phawkins): there seems to be no way to discover errors.
-            (void)c->MergeInputHandleShapesAndTypes(i, *shapes_and_types);
-          } else {
-            shapes_and_types = c->input_handle_shapes_and_types(i);
-          }
-        }
-        if (shapes_and_types) {
-          c->set_output_handle_shapes_and_types(0, *shapes_and_types);
-        }
-      }
-      return Status::OK();
-    })
-    .Doc(
-        "Operator that connects N unreplicated inputs to an N-way "
-        "replicated TPU computation.");
-
-REGISTER_OP("TPUReplicatedOutput")
-    .Input("input: T")
-    .Output("outputs: num_replicas * T")
-    .Attr("num_replicas: int >= 1")
-    .Attr("T: type")
-    .SetShapeFn([](InferenceContext* c) {
-      for (int i = 0; i < c->num_outputs(); ++i) {
-        c->set_output(i, c->input(0));
-      }
-      return Status::OK();
-    })
-    .Doc(
-        "Operator that connects the output of an N-way replicated TPU "
-        "computation to N separate outputs.");
-
-REGISTER_OP("TPUCompilationResult")
-    .Output("output: string")
-    .SetShapeFn(shape_inference::ScalarShape);
-
-REGISTER_OP("TPUReplicate")
-    .Attr("computation: func")
-    .Attr("num_replicas: int >= 1")
-    .Attr("num_cores_per_replica: int = 1")
-    .Attr("topology: string = \"\"")
-    .Attr("use_tpu: bool = true")
-    .Attr("device_assignment: list(int) = []")
-    .Attr("host_compute_core: list(string) = []")
-    .Attr("Tinputs: list(type) >= 0")
-    .Attr("Tbroadcast_inputs: list(type) >= 0")
-    .Attr("NumVariables: int >= 0")
-    .Attr("Tguaranteed_constants: list(type) >= 0")
-    .Attr("output_types: list(type) >= 0")
-    .Attr("padding_map: list(string) = []")
-    .Input("inputs: Tinputs")
-    .Input("broadcast_inputs: Tbroadcast_inputs")
-    .Input("variables: NumVariables * resource")
-    .Input("guaranteed_constants: Tguaranteed_constants")
-    .Output("outputs: output_types")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Runs replicated computations on a distributed TPU system.
-
-computation: a function containing the computation to run.
-num_replicas: the number of replicas of the computation to run.
-num_cores_per_replica: the number of logical cores in each replica.
-topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
-topology.
-use_tpu: a bool indicating if this computation will run on TPU or CPU/GPU.
-Currently, only supports a default placement (computation is placed on GPU
-if one is available, and on CPU if not).
-device_assignment: a flattened array with shape
-  [replica, num_cores_per_replica, mesh_dimension] that maps the coordinates
-  of logical cores in each replica of a computation to physical coordinates in
-  the TPU topology.
-Tinputs: the types of the arguments to 'computation'.
-inputs: the inputs to 'computation', flattened, in replica-major order.
-Tbroadcast_inputs: the types of the additional arguments to broadcast to all
-  replicas.
-Tguaranteed_constants: the types of the arguments to 'guaranteed_constants'.
-broadcast_inputs: additional arguments to broadcast to all replicas. The
-  broadcast inputs are appended to the per-replica inputs when calling
-  computation.
-guaranteed_constants: arguments which have been guaranteed to not
-change their values during the session lifetime. These contain tensors marked as
-constant using the GuaranteeConstOp.
-output_types: the types of the outputs of 'computation'.
-outputs: the outputs of 'computation'.
-)doc");
-
-}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc
deleted file mode 100644
index d5600ee..0000000
--- a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc
+++ /dev/null

@@ -1,217 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/lib/core/status.h"
-
-namespace tensorflow {
-
-using shape_inference::InferenceContext;
-using shape_inference::ShapeHandle;
-
-// Configuring a distributed TPU system is achieved by running
-// the following Ops:
-//
-// 1 Run _DisconnectHostFromDistributedTPUSystem on the TPU_SYSTEM of each
-// host. This is needed in case the system had previously been configured. It
-// returns, for each host, the number of TPU chips on the host.
-//
-// 2 Run _ConfigureDistributedTPU on TPU_SYSTEM of worker 0. Takes as input the
-// number of chips on each host. Validates that all hosts have the same number
-// of chips, and that the chips are consistent with the topology set by
-// flags. Has a single output which is a proto describing the requested system
-// configuration, which is sent to all hosts.
-//
-// 3 Run _InitializeHostForDistributedTPU on the TPU_SYSTEM of each host, taking
-// as input the output from ConfigureDistributedTPU. Has a single Tensor output
-// which is a vector of int32 indicating, for each TPU on the host, what its
-// global TPU system id is.
-//
-// 4 Run _WaitForDistributedTPU on TPU_SYSTEM, taking as input the
-// outputs from all the _InitializeHostForDistributedTPU
-// Ops. _These partial specs are combined in the Op with the outputs from
-// the host initialization Ops to construct a mapping from full TPU device
-// specs to global TPU ids. Has a single Tensor output which is a
-// matrix of int32 indicating, for each host (outer dimension) and for
-// each TPU on the host (inner dimension) what that TPU's global id
-// is. _WaitForDistributedTPU also waits for the TPU distributed
-// system to initialize fully, which may take several minutes for a
-// large system.
-//
-// 5 Run _SetGlobalTPUArray on the TPU_SYSTEM of each host, taking as input the
-// output from _WaitForDistributedTPU. This Op tells each host the global Id of
-// every TPU on every host.
-//
-// Most user code works by placing the ConfigureDistributedTPU Op on the desired
-// TPU_SYSTEM device, and a graph rewrite replaces it by the subgraph described
-// above.
-//
-//
-// A distributed TPU system can be cleanly shut down by running the following
-// Ops:
-//
-// 1 Run _DisconnectHostFromDistributedTPUSystem on the TPU_SYSTEM of each host.
-//
-// 2 Run _ShutdownDistributedTPU on the TPU_SYSTEM where
-// _ConfigureDistributedTPU was run. The Op will return an error if no system is
-// configured.
-//
-//
-// Most user code works by placing the ShutdownDistributedTPU Op on the desired
-// TPU_SYSTEM device, and a graph rewrite replaces it by the subgraph described
-// above.
-
-REGISTER_OP("_ConfigureDistributedTPU")
-    .Input("inputs: N * int32")
-    .Output("output: string")
-    .Attr("N: int >= 1")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input;
-      // Validate that all the inputs are scalars.
-      for (int i = 0; i < c->num_inputs(); ++i) {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &input));
-      }
-      c->set_output(0, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-An op that sets up the centralized structures for a distributed TPU
-system.
-
-inputs: A scalar tensor for each host indicating how many TPU chips
-there are on the host.
-output: A tensor containing a TPUHostConfiguration proto serialized to
-a string, containing the information necessary to initialize the chips
-in a host.
-)doc");
-
-REGISTER_OP("_WaitForDistributedTPU")
-    .Input("inputs: N * int32")
-    .Output("topology: string")
-    .Attr("startup_timeout_sec: int = 20")
-    .Attr("N: int")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input;
-      // Validate that all the inputs have the same vector shape.
-      for (int i = 0; i < c->num_inputs(); ++i) {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 1, &input));
-      }
-      c->set_output(0, c->Scalar());
-      return ::tensorflow::Status::OK();
-    })
-    .Doc(R"doc(
-An op that blocks execution until a distributed TPU system has
-started up. This Op must be run on the same TPU_SYSTEM device as
-_ConfigureDistributedTPU, and takes an inputs the outputs from the
-_InitializeHostForDistributedTPU Ops.
-
-inputs: For each initialized host, a vector giving the global TPU id
-of each TPU on the host.
-topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
-topology.
-startup_timeout_sec: The number of seconds to wait for the TPU system
-to stabilize.
-)doc");
-
-REGISTER_OP("_SetGlobalTPUArray")
-    .Input("topology: string")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &input));
-      return ::tensorflow::Status::OK();
-    })
-    .Doc(R"doc(
-An op that informs a host of the global ids of all the of TPUs in the
-system.
-
-topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
-topology.
-)doc");
-
-REGISTER_OP("_ShutdownDistributedTPU")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-An op that shuts down a running distributed TPU system. The Op returns
-an error if no system is running. This Op must be run on the same
-TPU_SYSTEM device as the corresponding _ConfigureDistributedTPU was run
-to start the system, and must be run only after
-_DisconnectHostFromDistributedTPUSystem has completed on every host in
-the system.
-)doc");
-
-REGISTER_OP("_InitializeHostForDistributedTPU")
-    .Input("input: string")
-    .Output("tpu_ids: int32")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &input));
-      c->set_output(0, c->Vector(c->UnknownDim()));
-      return ::tensorflow::Status::OK();
-    })
-    .Doc(R"doc(
-An op that connects each chip on the host to a centralized UberDriver to allow
-them to operate as a distributed system with chips in other hosts.
-
-input: A string containing the address of the UberDriver to connect to.
-tpu_ids: A vector containing the global TPU id of each TPU on the host.
-)doc");
-
-REGISTER_OP("_DisconnectHostFromDistributedTPUSystem")
-    .Output("number_of_tpu_chips: int32")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-An op that disconnects the TPUs on a host from a running distributed
-TPU system.
-
-number_of_tpu_chips: A scalar tensor containing the number of TPU
-chips on the host.
-)doc");
-
-REGISTER_OP("ConfigureDistributedTPU")
-    .Output("topology: string")
-    .Attr("embedding_config: string = ''")
-    .Attr("tpu_embedding_config: string = ''")
-    .Attr("is_global_init: bool = false")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-An op that sets up the centralized structures for a distributed TPU
-system.
-
-topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
-topology.
-tpu_embedding_config: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that
-describes the embedding lookups of the program.
-embedding_config: Reserved. Do not use.
-is_global_init: Reserved. Do not use.
-)doc");
-
-REGISTER_OP("ShutdownDistributedTPU")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-An op that shuts down a running distributed TPU system. The Op returns
-an error if no system is running.
-)doc");
-
-}  // end namespace tensorflow

diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
deleted file mode 100644
index 676aed0..0000000
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ /dev/null

@@ -1,603 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/contrib/tpu/proto/tpu_embedding_configuration.pb.h"
-#include "tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h"
-#include "tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h"
-#include "tensorflow/core/framework/attr_value.pb.h"
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/lib/strings/stringprintf.h"
-
-namespace tensorflow {
-
-// TPUs use a specialized mechanism for performing embedding lookups,
-// necessitating differences in TF Graphs that use embeddings on TPUs relative
-// to CPUs. Embedding lookups on TPU systems are achieved by including the
-// following in the TF Graph.
-//
-// 0. Construct a TPUEmbeddingConfiguration, specifying the embedding tables
-//    in the model, the size of the TPU system to be used, and the optimizer to
-//    be used for each table. Some of this information is redundant with other
-//    pieces of the TF Graph.
-// 1. Pass this TPUEmbeddingConfiguration to tpu.initialize_system() as the
-//    tpu_embedding_config parameter.
-// 2. Use the LoadTPUEmbedding Ops to initialize the embedding tables in TPU
-//    memories, sharded across the memories attached to each Host.
-// 3. Use EnqueueTPUEmbeddingSparseBatch to provide the TPU with embedding
-//    indices and aggregation weights.
-// 4. RecvTPUEmbeddingActivations returns a list of Tensors, containing the
-//    activations from each table specified in the configuration.
-// 5. TPUEmbeddingActivations, when used with appropriate Python libraries,
-//    enables the automatic differentiation of models that use embeddings.
-// 6. SendTPUEmbeddingGradients takes a list of Tensors (of the same shapes
-//    as those returned by TPUEmbeddingReceiveActivations) containing gradients
-//    to use in updating the embedding tables.
-// 7. Before saving a checkpoint, use the RetrieveTPUEmbedding Ops to update
-//    the Graph's embedding table Variables from the updated tables in the
-//    TPU memories.
-//
-// TPU Embeddings use dedicated ops to enforce Host/TPU consistency in the
-// state of embedding table variables. Before beginning training or inference,
-// the model must Load the optimizer parameters into the TPU memories. Before
-// saving a checkpoint, the model must Retrieve the parameters back into the
-// host CPU memory.
-
-namespace {
-
-void RegisterPerTableLoadAndRetrieveOps();
-
-class RegisterPerTableLoadAndRetrieveOpsOnConstruction {
- public:
-  RegisterPerTableLoadAndRetrieveOpsOnConstruction() {
-    RegisterPerTableLoadAndRetrieveOps();
-  }
-};
-
-// Object whose constructor does registrations.
-RegisterPerTableLoadAndRetrieveOpsOnConstruction
-    register_per_table_load_and_retrieve_ops_var;
-
-Status RegisterPerTableLoadOpsForAlgorithmBody(
-    tpu::OptimizationAlgorithm alg, bool is_debug_op,
-    OpRegistrationData* op_reg_data) {
-  tpu::GradientAccumulationSupport grad_accum_support;
-  TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
-
-  std::vector<tpu::StateVariableSpecification> state_variable_specs;
-  TF_CHECK_OK(GetOptimizationAlgorithmStateVariables(
-      alg,
-      grad_accum_support == tpu::GradientAccumulationSupport::kSupported &&
-          is_debug_op,
-      &state_variable_specs));
-  auto* op_def = &op_reg_data->op_def;
-  op_def->set_name(
-      strings::StrCat("LoadTPUEmbedding", GetOptimizationAlgorithmName(alg),
-                      "Parameters", (is_debug_op ? "GradAccumDebug" : "")));
-  // It is important for the order of the inputs to the op defined here
-  // to match the order in input_names because the indexes are used in
-  // the combining transformation.
-  for (const auto& parameter : state_variable_specs) {
-    if (parameter.has_user_defined() || is_debug_op) {
-      auto* arg = op_def->add_input_arg();
-      arg->set_name(parameter.name());
-      arg->set_description(
-          strings::StrCat("Value of ", parameter.name(), " used in the ",
-                          GetOptimizationAlgorithmFriendlyName(alg),
-                          " optimization algorithm."));
-      arg->set_type(DT_FLOAT);
-    }
-  }
-  {
-    auto* table_id_attr = op_def->add_attr();
-    table_id_attr->set_name("table_id");
-    table_id_attr->set_type("int");
-    table_id_attr->set_has_minimum(true);
-    table_id_attr->set_minimum(-1);
-    table_id_attr->mutable_default_value()->set_i(-1);
-  }
-  {
-    auto* table_name_attr = op_def->add_attr();
-    table_name_attr->set_name("table_name");
-    table_name_attr->set_type("string");
-    table_name_attr->mutable_default_value()->set_s("");
-  }
-  {
-    auto* num_shards_attr = op_def->add_attr();
-    num_shards_attr->set_name("num_shards");
-    num_shards_attr->set_type("int");
-  }
-  {
-    auto* shard_id_attr = op_def->add_attr();
-    shard_id_attr->set_name("shard_id");
-    shard_id_attr->set_type("int");
-  }
-  op_def->set_summary("Load embedding parameters for a single table.");
-  string parameter_descriptions;
-  for (const auto& parameter : state_variable_specs) {
-    if (parameter.has_user_defined() || is_debug_op) {
-      strings::Appendf(&parameter_descriptions,
-                       R"(
-%s: A tensor containing the initial embedding table %s to use in embedding
-lookups using the %s optimization algorithm.)",
-                       parameter.name().c_str(), parameter.name().c_str(),
-                       GetOptimizationAlgorithmFriendlyName(alg).c_str());
-    }
-  }
-  op_def->set_description(strings::Printf(R"doc(
-An op that loads optimization parameters into HBM for embedding. Must be
-preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-embedding table configuration. For example, this op is used to install
-parameters that are loaded from a checkpoint before a training loop is
-executed.
-%s
-table_name: Name of this table; must match a name in the
-  TPUEmbeddingConfiguration proto (overrides table_id).
-num_shards: Number of shards into which the embedding tables are divided.
-shard_id: Identifier of shard for this operation.
-table_id: Index of this table in the EmbeddingLayerConfiguration proto
-  (deprecated).
-)doc",
-                                          parameter_descriptions.c_str()));
-  op_def->set_is_commutative(false);
-  op_def->set_is_aggregate(false);
-  op_def->set_is_stateful(true);
-  auto shape_inference_function =
-      [state_variable_specs,
-       is_debug_op](shape_inference::InferenceContext* c) -> Status {
-    int table_id;
-    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
-    string table_name;
-    TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
-    // Exactly one must be non-default.
-    if ((table_id >= 0) == (!table_name.empty())) {
-      return errors::InvalidArgument(
-          "exactly one of table_id or table_name must be non-default");
-    }
-    int num_shards;
-    TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
-    int shard_id;
-    TF_RETURN_IF_ERROR(c->GetAttr("shard_id", &shard_id));
-    const int user_param_count =
-        std::count_if(state_variable_specs.begin(), state_variable_specs.end(),
-                      [&](const tpu::StateVariableSpecification& sv) {
-                        return sv.has_user_defined() || is_debug_op;
-                      });
-    std::vector<shape_inference::ShapeHandle> inputs(user_param_count);
-    int input_index = 0;
-    for (int i = 0; i < state_variable_specs.size(); ++i) {
-      if (state_variable_specs[i].has_user_defined() || is_debug_op) {
-        std::vector<shape_inference::ShapeHandle> input_temp;
-        TF_RETURN_IF_ERROR(
-            c->input(state_variable_specs[i].name(), &input_temp));
-        if (input_temp.size() != 1) {
-          return errors::InvalidArgument("each input to be rank 1");
-        }
-        inputs[input_index] = input_temp[0];
-        ++input_index;
-      }
-    }
-    // Verify shapes have rank 2 and are compatible when they are
-    // required to be valid.
-    shape_inference::ShapeHandle parameter_shape;
-    TF_RETURN_IF_ERROR(c->WithRank(inputs[0], 2, &parameter_shape));
-    for (int j = 1; j < user_param_count; ++j) {
-      shape_inference::ShapeHandle accumulator_j_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(inputs[j], 2, &accumulator_j_shape));
-      shape_inference::ShapeHandle merged;
-      TF_RETURN_IF_ERROR(
-          c->Merge(parameter_shape, accumulator_j_shape, &merged));
-    }
-    return Status::OK();
-  };
-  op_reg_data->shape_inference_fn = shape_inference_function;
-  return Status::OK();
-}
-
-Status RegisterPerTableRetrieveOpsForAlgorithmBody(
-    tpu::OptimizationAlgorithm alg, bool is_debug_op,
-    OpRegistrationData* op_reg_data) {
-  tpu::GradientAccumulationSupport grad_accum_support;
-  TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
-
-  std::vector<tpu::StateVariableSpecification> state_variable_specs;
-  TF_CHECK_OK(GetOptimizationAlgorithmStateVariables(
-      alg,
-      grad_accum_support == tpu::GradientAccumulationSupport::kSupported &&
-          is_debug_op,
-      &state_variable_specs));
-
-  auto* op_def = &op_reg_data->op_def;
-  op_def->set_name(strings::StrCat(
-      "RetrieveTPUEmbedding", tpu::GetOptimizationAlgorithmName(alg),
-      "Parameters", (is_debug_op ? "GradAccumDebug" : "")));
-  // It is important for the order of the outputs of the op defined here
-  // to match the order in output_names because the indexes are used in
-  // the combining transformation.
-  for (const auto& parameter : state_variable_specs) {
-    if (parameter.has_user_defined() || is_debug_op) {
-      auto* arg = op_def->add_output_arg();
-      arg->set_name(parameter.name());
-      arg->set_description(
-          strings::StrCat("Parameter ", parameter.name(), " updated by the ",
-                          tpu::GetOptimizationAlgorithmFriendlyName(alg),
-                          " optimization algorithm."));
-      arg->set_type(DT_FLOAT);
-    }
-  }
-  {
-    auto* table_id_attr = op_def->add_attr();
-    table_id_attr->set_name("table_id");
-    table_id_attr->set_type("int");
-    table_id_attr->set_has_minimum(true);
-    table_id_attr->set_minimum(-1);
-    table_id_attr->mutable_default_value()->set_i(-1);
-  }
-  {
-    auto* table_name_attr = op_def->add_attr();
-    table_name_attr->set_name("table_name");
-    table_name_attr->set_type("string");
-    table_name_attr->mutable_default_value()->set_s("");
-  }
-  {
-    auto* num_shards_attr = op_def->add_attr();
-    num_shards_attr->set_name("num_shards");
-    num_shards_attr->set_type("int");
-  }
-  {
-    auto* shard_id_attr = op_def->add_attr();
-    shard_id_attr->set_name("shard_id");
-    shard_id_attr->set_type("int");
-  }
-  op_def->set_summary("Retrieve embedding parameters for a single table.");
-  string parameter_descriptions;
-  for (const auto& param : state_variable_specs) {
-    if (param.has_user_defined() || is_debug_op) {
-      strings::Appendf(&parameter_descriptions,
-                       R"(
-%s: A tensor containing the embedding table %s to store with the
-parameters from embedding updates using the %s optimization algorithm.)",
-                       param.name().c_str(), param.name().c_str(),
-                       tpu::GetOptimizationAlgorithmFriendlyName(alg).c_str());
-    }
-  }
-  op_def->set_description(strings::Printf(R"doc(
-An op that retrieves optimization parameters from embedding to host
-memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-the correct embedding table configuration. For example, this op is
-used to retrieve updated parameters before saving a checkpoint.
-%s
-table_name: Name of this table; must match a name in the
-  TPUEmbeddingConfiguration proto (overrides table_id).
-num_shards: Number of shards into which the embedding tables are divided.
-shard_id: Identifier of shard for this operation.
-table_id: Index of this table in the EmbeddingLayerConfiguration proto
-  (deprecated).
-)doc",
-                                          parameter_descriptions.c_str()));
-  op_def->set_is_commutative(false);
-  op_def->set_is_aggregate(false);
-  op_def->set_is_stateful(true);
-  auto shape_inference_function =
-      [state_variable_specs,
-       is_debug_op](shape_inference::InferenceContext* c) -> Status {
-    int table_id;
-    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
-    string table_name;
-    TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
-    // Exactly one must be non-default.
-    if ((table_id >= 0) == (!table_name.empty())) {
-      return errors::InvalidArgument(
-          "exactly one of table_id or table_name must be non-default");
-    }
-    int num_shards;
-    TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
-    int shard_id;
-    TF_RETURN_IF_ERROR(c->GetAttr("shard_id", &shard_id));
-    for (int j = 0; j < state_variable_specs.size(); ++j) {
-      if (state_variable_specs[j].has_user_defined() || is_debug_op) {
-        auto shape = c->MakeShape(
-            std::vector<shape_inference::DimensionHandle>(2, c->UnknownDim()));
-        TF_RETURN_IF_ERROR(
-            c->set_output(state_variable_specs[j].name(),
-                          std::vector<shape_inference::ShapeHandle>(1, shape)));
-      }
-    }
-    return Status::OK();
-  };
-  op_reg_data->shape_inference_fn = shape_inference_function;
-  return Status::OK();
-}
-
-void RegisterPerTableLoadAndRetrieveOps() {
-  // Load ops
-  for (tpu::OptimizationAlgorithm alg : tpu::GetOptimizationAlgorithms()) {
-    OpRegistry::Global()->Register(
-        [alg](OpRegistrationData* op_reg_data) -> Status {
-          return RegisterPerTableLoadOpsForAlgorithmBody(alg, false,
-                                                         op_reg_data);
-        });
-    tpu::GradientAccumulationSupport grad_accum_support;
-    TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
-    if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
-      OpRegistry::Global()->Register(
-          [alg](OpRegistrationData* op_reg_data) -> Status {
-            return RegisterPerTableLoadOpsForAlgorithmBody(alg, true,
-                                                           op_reg_data);
-          });
-    }
-  }
-  // Retrieve ops
-  for (tpu::OptimizationAlgorithm alg : tpu::GetOptimizationAlgorithms()) {
-    OpRegistry::Global()->Register(
-        [alg](OpRegistrationData* op_reg_data) -> Status {
-          return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, false,
-                                                             op_reg_data);
-        });
-    tpu::GradientAccumulationSupport grad_accum_support;
-    TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
-    if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
-      OpRegistry::Global()->Register(
-          [alg](OpRegistrationData* op_reg_data) -> Status {
-            return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, true,
-                                                               op_reg_data);
-          });
-    }
-  }
-}
-
-}  // namespace
-
-REGISTER_OP("RecvTPUEmbeddingActivations")
-    .Output("outputs: num_outputs * float32")
-    .Attr("num_outputs: int >= 1")
-    .Attr("config: string")
-    .SetIsStateful()
-    .SetShapeFn([](shape_inference::InferenceContext* c) -> Status {
-      string config_string;
-      TF_RETURN_IF_ERROR(c->GetAttr("config", &config_string));
-      tpu::TPUEmbeddingConfiguration config;
-      if (!config.ParseFromString(config_string)) {
-        return errors::InvalidArgument("Malformed tpu_embedding_config.");
-      }
-      tpu::AddDefaultEmbeddingOutputLayoutIfNeeded(&config);
-      std::vector<TensorShapeProto> output_shapes;
-      TF_RETURN_IF_ERROR(ComputeOutputTensorShapes(config, &output_shapes));
-      if (c->num_outputs() != output_shapes.size()) {
-        return errors::InvalidArgument("num outputs != size of output shapes");
-      }
-      for (int i = 0; i < c->num_outputs(); ++i) {
-        shape_inference::ShapeHandle output_shape;
-        TF_RETURN_IF_ERROR(
-            c->MakeShapeFromShapeProto(output_shapes[i], &output_shape));
-        c->set_output(i, output_shape);
-      }
-      return Status::OK();
-    })
-    .Doc(R"doc(
-An op that receives embedding activations on the TPU.
-
-The TPU system performs the embedding lookups and aggregations specified by
-the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The
-results of these aggregations are visible to the Tensorflow Graph as the
-outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing
-one Tensor of activations per table specified in the model. There can be at
-most one RecvTPUEmbeddingActivations op in the TPU graph.
-
-outputs: A TensorList of embedding activations containing one Tensor per
-    embedding table in the model.
-num_outputs: The number of output activation tensors, equal to the number of
-    embedding tables in the model.
-config: Serialized TPUEmbeddingConfiguration proto.
-)doc");
-
-REGISTER_OP("TPUEmbeddingActivations")
-    .Input("embedding_variable: float32")
-    .Input("sliced_activations: float32")
-    .Output("output: float32")
-    .Attr("table_id: int >= 0")
-    .Attr("lookup_id: int >= 0")
-    .SetShapeFn([](shape_inference::InferenceContext *c) {
-      c->set_output(0, c->input(1));
-      return Status::OK();
-    })
-    .Doc(R"doc(
-An op enabling differentiation of TPU Embeddings.
-
-This op simply returns its first input, which is assumed to have been sliced
-from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of this
-op, and its first argument being a trainable Variable, enables automatic
-differentiation of graphs containing embeddings via the TPU Embedding Python
-libraries.
-
-embedding_variable: A trainable variable, enabling optimizers to find this op.
-sliced_activations: The embedding activations Tensor to return.
-table_id: The id of the table in the embedding layer configuration from which
-    these activations were computed.
-lookup_id: Identifier of the set of embedding indices which produced these
-    activations.
-)doc");
-
-REGISTER_OP("SendTPUEmbeddingGradients")
-    .Input("inputs: N * float32")
-    .Input("learning_rates: NN * float32")
-    .Attr("N: int >= 1")
-    .Attr("NN: int >= 0 = 0")
-    .Attr("config: string")
-    .SetIsStateful()
-    .SetShapeFn([](shape_inference::InferenceContext* c) -> Status {
-      int nn;
-      TF_RETURN_IF_ERROR(c->GetAttr("NN", &nn));
-      std::vector<shape_inference::ShapeHandle> learning_rates;
-      TF_RETURN_IF_ERROR(c->input("learning_rates", &learning_rates));
-      for (int i = 0; i < nn; ++i) {
-        // Verify that each learning_rates element is scalar
-        shape_inference::ShapeHandle learning_rates_shape;
-        TF_RETURN_IF_ERROR(
-            c->WithRank(learning_rates[i], 0, &learning_rates_shape));
-      }
-
-      return Status::OK();
-    })
-    .Doc(R"doc(
-An op that performs gradient updates of embedding tables using the specified
-learning rates.
-
-inputs: A TensorList of gradients with which to update embedding tables.
-    This argument has the same length and shapes as the return value of
-    RecvTPUEmbeddingActivations, but contains gradients of the model's loss
-    with respect to the embedding activations. The embedding tables are updated
-    from these gradients via the optimizer specified in the TPU embedding
-    configuration given to tpu.initialize_system.
-learning_rates: A TensorList of float32 scalars, one for each dynamic learning
-    rate tag: see the comments in
-    //third_party/tensorflow/contrib/tpu/proto/optimization_parameters.proto.
-    Multiple tables can share the same dynamic learning rate tag as specified
-    in the configuration. If the learning rates for all tables are constant,
-    this list should be empty.
-config: Serialized TPUEmbeddingConfiguration proto.
-)doc");
-
-REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch")
-    .Input("batch: N * int32")
-    .Input("mode_override: string")
-    .Attr("N: int >= 1")
-    .Attr("device_ordinal: int = -1")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-An op that enqueues a list of input batch tensors to TPUEmbedding.
-
-batch: A list of 1D tensors, one for each embedding table, containing the
-    indices into the tables.
-mode_override: A string input that overrides the mode specified in the
-    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
-    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
-    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
-device_ordinal: The TPU device to use. Should be >= 0 and less than the number
-    of TPU cores in the task on which the node is placed.
-)doc");
-
-REGISTER_OP("EnqueueTPUEmbeddingSparseBatch")
-    .Input("sample_indices: N * int32")
-    .Input("embedding_indices: N * int32")
-    .Input("aggregation_weights: N * float32")
-    .Input("mode_override: string")
-    .Attr("N: int >= 1")
-    .Attr("device_ordinal: int = -1")
-    .Attr("combiners: list(string) = []")
-    .SetIsStateful()
-    .SetShapeFn([](shape_inference::InferenceContext* c) -> Status {
-      std::vector<string> combiners;
-      TF_RETURN_IF_ERROR(c->GetAttr("combiners", &combiners));
-      int n;
-      TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
-      if (!combiners.empty() && combiners.size() != n) {
-        return errors::InvalidArgument("Invalid length of combiners. Have ",
-                                       combiners.size(), " but expected 0 or ",
-                                       n);
-      }
-
-      return Status::OK();
-    })
-    .Doc(R"doc(
-An op that enqueues TPUEmbedding input indices from a SparseTensor.
-
-This Op eases the porting of code that uses embedding_lookup_sparse(),
-although some Python preprocessing of the SparseTensor arguments to
-embedding_lookup_sparse() is required to produce the arguments to this Op,
-since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training
-step.
-
-The tensors at corresponding positions in the three input lists
-must have the same shape, i.e. rank 1 with dim_size() equal to the total
-number of lookups into the table described by the corresponding table_id.
-
-sample_indices: A list of rank 1 Tensors specifying the training example and
-    feature to which the corresponding embedding_indices and aggregation_weights
-    values belong. sample_indices[i] must equal b * nf + f, where nf is the
-    number of features from the corresponding table, f is in [0, nf), and
-    b is in [0, batch size).
-embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
-aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
-    (training example, feature) -- aggregation weights.
-mode_override: A string input that overrides the mode specified in the
-    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
-    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
-    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
-device_ordinal: The TPU device to use. Should be >= 0 and less than the number
-    of TPU cores in the task on which the node is placed.
-combiners: A list of string scalars, one for each embedding table that specify
-    how to normalize the embedding activations after weighted summation.
-    Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
-    the sum of the weights be 0 for 'mean' or the sum of the squared weights be
-    0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
-    all tables.
-)doc");
-
-REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
-    .Input("sample_indices: N * int32")
-    .Input("embedding_indices: N * int32")
-    .Input("aggregation_weights: N * float32")
-    .Input("mode_override: string")
-    .Attr("N: int >= 1")
-    .Attr("device_ordinal: int = -1")
-    .Attr("combiners: list(string) = []")
-    .Attr("table_ids: list(int)")
-    .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-This Op eases the porting of code that uses tf.nn.embedding_lookup_sparse().
-
-sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
-to the ith feature. table_ids[i] indicates which embedding table to look up ith
-feature.
-
-The tensors at corresponding positions in the three input lists (sample_indices,
-embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
-with dim_size() equal to the total number of lookups into the table described by
-the corresponding feature.
-
-sample_indices: A list of rank 1 Tensors specifying the training example to
-    which the corresponding embedding_indices and aggregation_weights values
-    belong. It corresponds to sp_ids.indices[:,0] in  embedding_lookup_sparse().
-embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
-    It corresponds to sp_ids.values in embedding_lookup_sparse().
-aggregation_weights: A list of rank 1 Tensors containing per training example
-    aggregation weights. It corresponds to sp_weights.values in
-    embedding_lookup_sparse().
-mode_override: A string input that overrides the mode specified in the
-    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
-    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
-    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
-device_ordinal: The TPU device to use. Should be >= 0 and less than the number
-    of TPU cores in the task on which the node is placed.
-combiners: A list of string scalars, one for each embedding table that specify
-    how to normalize the embedding activations after weighted summation.
-    Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
-    the sum of the weights be 0 for 'mean' or the sum of the squared weights be
-    0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
-    all tables.
-table_ids: A list of integers specifying the identifier of the embedding table
-    (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
-    corresponding input. The ith input is looked up using table_ids[i]. The size
-    of the table_ids list must be equal to that of sample_indices,
-    embedding_indices and aggregation_weights.
-)doc");
-
-}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/ops/tpu_ordinal_selector_op.cc b/tensorflow/contrib/tpu/ops/tpu_ordinal_selector_op.cc
deleted file mode 100644
index 54e6b20..0000000
--- a/tensorflow/contrib/tpu/ops/tpu_ordinal_selector_op.cc
+++ /dev/null

@@ -1,39 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-namespace tensorflow {
-
-REGISTER_OP("TPUOrdinalSelector")
-    .Output("device_ordinals: int32")
-    .SetIsStateful()
-    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
-      c->set_output(0,
-                    c->Vector(shape_inference::InferenceContext::kUnknownDim));
-      return Status::OK();
-    })
-    .Doc(R"doc(
-A TPU core selector Op.
-
-This Op produces a set of TPU cores (for warm-up) or a single TPU core
-(for regular inference) to execute the TPU program on. The output is
-consumed by TPUPartitionedCall.
-
-device_ordinals: A vector 1 or more TPU cores.
-)doc");
-
-}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD
index 541fbf3..2eff4f2 100644
--- a/tensorflow/contrib/tpu/profiler/BUILD
+++ b/tensorflow/contrib/tpu/profiler/BUILD

@@ -3,32 +3,20 @@
 load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library")
-load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library_cc")
 load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_all_protos")
 
-tf_proto_library(
-    name = "tpu_profiler_proto",
-    srcs = ["tpu_profiler.proto"],
-    has_services = 1,
-    cc_api_version = 2,
-    cc_grpc_version = 1,
-    protodeps = [":op_profile_proto"] + tf_additional_all_protos(),
-    visibility = ["//visibility:public"],
-)
-
 cc_library(
     name = "dump_tpu_profile",
     srcs = ["dump_tpu_profile.cc"],
     hdrs = ["dump_tpu_profile.h"],
     visibility = ["//visibility:public"],
     deps = [
-        ":op_profile_proto_cc",
-        ":tpu_profiler_proto_cc",
-        ":trace_events_proto_cc",
         ":trace_events_to_json",
         "//tensorflow/core:framework",
+        "//tensorflow/core:grpc_services",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/profiler:protos_all_cc",
     ],
 )
 
@@ -45,32 +33,20 @@
     ],
     visibility = ["//visibility:public"],
     deps = [
-        ":dump_tpu_profile",
-        ":tpu_profiler_analysis_proto_cc",
-        ":tpu_profiler_proto_cc",
         ":version",
-        "//tensorflow:grpc++",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
-        "//tensorflow/core/distributed_runtime/rpc:grpc_util",
-        "//tensorflow/core/platform/cloud:gcs_file_system",
+        "//tensorflow/core/profiler/rpc/client:capture_profile",
     ],
 )
 
-tf_proto_library(
-    name = "trace_events_proto",
-    srcs = ["trace_events.proto"],
-    cc_api_version = 2,
-    visibility = ["//visibility:public"],
-)
-
 cc_library(
     name = "trace_events_to_json",
     srcs = ["trace_events_to_json.cc"],
     hdrs = ["trace_events_to_json.h"],
     deps = [
-        ":trace_events_proto_cc",
         "//tensorflow/core:lib",
+        "//tensorflow/core/profiler:protos_all_cc",
         "@jsoncpp_git//:jsoncpp",
     ],
 )
@@ -83,31 +59,7 @@
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "//tensorflow/core/profiler:protos_all_cc",
         "@jsoncpp_git//:jsoncpp",
     ],
 )
-
-tf_proto_library(
-    name = "op_profile_proto",
-    srcs = ["op_profile.proto"],
-    cc_api_version = 2,
-    visibility = ["//visibility:public"],
-)
-
-tf_proto_library(
-    name = "tpu_profiler_analysis_proto",
-    srcs = ["tpu_profiler_analysis.proto"],
-    has_services = 1,
-    cc_api_version = 2,
-    cc_grpc_version = 1,
-    protodeps = [":tpu_profiler_proto"] + tf_additional_all_protos(),
-    visibility = ["//visibility:public"],
-)
-
-py_library(
-    name = "tpu_profiler_analysis_pb2_grpc",
-    srcs = ["tpu_profiler_analysis_pb2_grpc.py"],
-    srcs_version = "PY2AND3",
-    visibility = ["//visibility:public"],
-    deps = [":tpu_profiler_analysis_proto_py"],
-)

diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
index 1c5ea2d..508b929 100644
--- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc

@@ -18,235 +18,11 @@
 // Initiates a TPU profiling on the TPUProfiler service at service_addr,
 // receives and dumps the profile data to a tensorboard log directory.
 
-#include "grpcpp/grpcpp.h"
-
-#include <cstdio>
-#include <ctime>
-#include <vector>
-
-#include "tensorflow/contrib/tpu/profiler/dump_tpu_profile.h"
-#include "tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h"
-#include "tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.grpc.pb.h"
 #include "tensorflow/contrib/tpu/profiler/version.h"
-#include "tensorflow/core/distributed_runtime/rpc/grpc_util.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/lib/strings/numbers.h"
-#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/profiler/rpc/client/capture_profile.h"
 #include "tensorflow/core/util/command_line_flags.h"
 
-namespace tensorflow {
-namespace tpu {
-namespace {
-
-using ::tensorflow::TPUProfileAnalysis;
-using ::tensorflow::TPUProfiler;
-
-constexpr uint64 kMaxEvents = 1000000;
-
-string GetCurrentTimeStampAsString() {
-  char s[128];
-  std::time_t t = std::time(nullptr);
-  CHECK_NE(std::strftime(s, sizeof(s), "%F_%T", std::localtime(&t)), 0);
-  return s;
-}
-
-Status ValidateHostPortPair(const string& host_port) {
-  uint32 port;
-  std::vector<string> parts = str_util::Split(host_port, ':');
-  // Must be host:port, port must be a number, host must not contain a '/',
-  // host also must not be empty.
-  if (parts.size() != 2 || !strings::safe_strtou32(parts[1], &port) ||
-      parts[0].find("/") != string::npos || parts[0].empty()) {
-    return errors::InvalidArgument("Could not interpret \"", host_port,
-                                   "\" as a host-port pair.");
-  }
-  return Status::OK();
-}
-
-ProfileRequest PopulateProfileRequest(int duration_ms,
-                                      const string& repository_root,
-                                      const string& session_id,
-                                      const ProfileOptions& opts) {
-  ProfileRequest request;
-  request.set_duration_ms(duration_ms);
-  request.set_max_events(kMaxEvents);
-  if (tensorflow::str_util::StartsWith(repository_root, "gs://")) {
-    // For backward compatibilities, only generate tracetable etc when the
-    // user provide a GCS path for model directory.
-    request.set_repository_root(repository_root);
-    request.set_session_id(session_id);
-  }
-  request.add_tools("op_profile");
-  request.add_tools("input_pipeline");
-  request.add_tools("memory_viewer");
-  request.add_tools("overview_page");
-  *request.mutable_opts() = opts;
-  return request;
-}
-
-// Returns whether the returned trace is empty.
-// Failure are handled by CHECK, i.e. abort()
-bool Profile(const string& service_addr, const string& logdir, int duration_ms,
-             const string& repository_root, const string& session_id,
-             const ProfileOptions& opts) {
-  ProfileRequest request =
-      PopulateProfileRequest(duration_ms, repository_root, session_id, opts);
-
-  ::grpc::ClientContext context;
-  ::grpc::ChannelArguments channel_args;
-  // TODO(qiuminxu): use `NewHostPortGrpcChannel` instead once their
-  // `ValidateHostPortPair` checks for empty host string case.
-  channel_args.SetInt(GRPC_ARG_MAX_MESSAGE_LENGTH,
-                      std::numeric_limits<int32>::max());
-  std::unique_ptr<TPUProfiler::Stub> stub =
-      TPUProfiler::NewStub(::grpc::CreateCustomChannel(
-          "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(),
-          channel_args));
-  ProfileResponse response;
-  TF_QCHECK_OK(FromGrpcStatus(stub->Profile(&context, request, &response)));
-
-  if (!response.encoded_trace().empty()) {
-    TF_CHECK_OK(tensorflow::tpu::WriteTensorboardTPUProfile(
-        logdir, session_id, "", response, &std::cout));
-    // Print this at the end so that it's not buried in irrelevant LOG messages.
-    std::cout
-        << "NOTE: using the trace duration " << duration_ms << "ms."
-        << std::endl
-        << "Set an appropriate duration (with --duration_ms) if you "
-           "don't see a full step in your trace or the captured trace is too "
-           "large."
-        << std::endl;
-  }
-
-  return response.encoded_trace().empty();
-}
-
-// Start a new profiling session that include all the hosts included in
-// hostnames, for the time interval of duration_ms. Possibly save the profiling
-// result in the directory specified by repository_root and session_id.
-bool NewSession(const string& service_addr,
-                const std::vector<tensorflow::string>& hostnames,
-                int duration_ms, const string& repository_root,
-                const string& session_id, const ProfileOptions& opts) {
-  NewProfileSessionRequest new_session_request;
-  *new_session_request.mutable_request() =
-      PopulateProfileRequest(duration_ms, repository_root, session_id, opts);
-  new_session_request.set_repository_root(repository_root);
-  new_session_request.set_session_id(session_id);
-  for (const auto& hostname : hostnames) {
-    new_session_request.add_hosts(hostname);
-  }
-
-  ::grpc::ClientContext context;
-  ::grpc::ChannelArguments channel_args;
-  // TODO(qiuminxu): use `NewHostPortGrpcChannel` instead once their
-  // `ValidateHostPortPair` checks for empty host string case.
-  channel_args.SetMaxReceiveMessageSize(std::numeric_limits<int32>::max());
-  // TODO(jiesun): GRPC support following relevant naming scheme:
-  // 1. dns:///host:port
-  // 2. ipv4:host:port or ipv6:[host]:port
-  // We might need to change the prefix which depends on what TPU name resolver
-  // will give us.
-  std::unique_ptr<TPUProfileAnalysis::Stub> stub =
-      TPUProfileAnalysis::NewStub(::grpc::CreateCustomChannel(
-          "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(),
-          channel_args));
-  NewProfileSessionResponse new_session_response;
-  TF_QCHECK_OK(FromGrpcStatus(
-      stub->NewSession(&context, new_session_request, &new_session_response)));
-
-  std::cout << "Profile session succeed for host(s):"
-            << str_util::Join(hostnames, ",") << std::endl;
-  return new_session_response.empty_trace();
-}
-
-// Starts tracing on a single or multiple TPU hosts and saves the result in the
-// given logdir. If no trace was collected, retries tracing for
-// num_tracing_attempts.
-void StartTracing(const tensorflow::string& service_addr,
-                  const tensorflow::string& logdir,
-                  const tensorflow::string& workers_list,
-                  bool include_dataset_ops, int duration_ms,
-                  int num_tracing_attempts) {
-  // Use the current timestamp as the run name.
-  tensorflow::string session_id = GetCurrentTimeStampAsString();
-  constexpr char kProfilePluginDirectory[] = "plugins/profile/";
-  tensorflow::string repository_root =
-      io::JoinPath(logdir, kProfilePluginDirectory);
-  std::vector<tensorflow::string> hostnames =
-      tensorflow::str_util::Split(workers_list, ",");
-
-  bool empty_trace = false;
-  int remaining_attempts = num_tracing_attempts;
-  tensorflow::ProfileOptions opts;
-  opts.set_include_dataset_ops(include_dataset_ops);
-  while (true) {
-    std::cout << "Starting to profile TPU traces for " << duration_ms << " ms. "
-              << "Remaining attempt(s): " << remaining_attempts-- << std::endl;
-    if (hostnames.empty()) {
-      empty_trace = tensorflow::tpu::Profile(service_addr, logdir, duration_ms,
-                                             repository_root, session_id, opts);
-    } else {
-      tensorflow::string tpu_master = service_addr;
-      empty_trace =
-          tensorflow::tpu::NewSession(tpu_master, hostnames, duration_ms,
-                                      repository_root, session_id, opts);
-    }
-    if (remaining_attempts <= 0 || !empty_trace) break;
-    std::cout << "No trace event is collected. Automatically retrying."
-              << std::endl
-              << std::endl;
-  }
-
-  if (empty_trace) {
-    std::cout << "No trace event is collected after " << num_tracing_attempts
-              << " attempt(s). "
-              << "Perhaps, you want to try again (with more attempts?)."
-              << std::endl
-              << "Tip: increase number of attempts with --num_tracing_attempts."
-              << std::endl;
-  }
-}
-
-MonitorRequest PopulateMonitorRequest(int duration_ms, int monitoring_level) {
-  MonitorRequest request;
-  request.set_duration_ms(duration_ms);
-  request.set_monitoring_level(monitoring_level);
-  return request;
-}
-
-// Repeatedly collects profiles and shows user-friendly metrics for
-// 'num_queries' time(s).
-void StartMonitoring(const tensorflow::string& service_addr, int duration_ms,
-                     int monitoring_level, int num_queries) {
-  for (int query = 0; query < num_queries; ++query) {
-    MonitorRequest request =
-        PopulateMonitorRequest(duration_ms, monitoring_level);
-
-    ::grpc::ClientContext context;
-    ::grpc::ChannelArguments channel_args;
-    channel_args.SetInt(GRPC_ARG_MAX_MESSAGE_LENGTH,
-                        std::numeric_limits<int32>::max());
-    std::unique_ptr<TPUProfiler::Stub> stub =
-        TPUProfiler::NewStub(::grpc::CreateCustomChannel(
-            "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(),
-            channel_args));
-    MonitorResponse response;
-    TF_QCHECK_OK(FromGrpcStatus(stub->Monitor(&context, request, &response)));
-
-    std::cout << "Cloud TPU Monitoring Results (Sample " << query + 1
-              << "):\n\n"
-              << response.data() << std::flush;
-  }
-}
-
-}  // namespace
-}  // namespace tpu
-}  // namespace tensorflow
-
 int main(int argc, char** argv) {
   tensorflow::string FLAGS_service_addr;
   tensorflow::string FLAGS_logdir;
@@ -301,7 +77,7 @@
     return 2;
   }
   tensorflow::Status status =
-      tensorflow::tpu::ValidateHostPortPair(FLAGS_service_addr);
+      tensorflow::profiler::client::ValidateHostPortPair(FLAGS_service_addr);
   if (!status.ok()) {
     std::cout << status.error_message() << std::endl;
     std::cout << usage.c_str() << std::endl;
@@ -324,12 +100,12 @@
               << FLAGS_service_addr << " for " << duration_ms
               << "ms and show metrics for " << num_queries << " time(s)."
               << std::endl;
-    tensorflow::tpu::StartMonitoring(FLAGS_service_addr, duration_ms,
-                                     FLAGS_monitoring_level, num_queries);
+    tensorflow::profiler::client::StartMonitoring(
+        FLAGS_service_addr, duration_ms, FLAGS_monitoring_level, num_queries);
   } else {
-    tensorflow::tpu::StartTracing(FLAGS_service_addr, FLAGS_logdir,
-                                  FLAGS_workers_list, FLAGS_include_dataset_ops,
-                                  duration_ms, num_tracing_attempts);
+    tensorflow::profiler::client::StartTracing(
+        FLAGS_service_addr, FLAGS_logdir, FLAGS_workers_list,
+        FLAGS_include_dataset_ops, duration_ms, num_tracing_attempts);
   }
   return 0;
 }

diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
index b4b06a4..d138c1f 100644
--- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc

@@ -19,8 +19,6 @@
 #include <ctime>
 #include <vector>
 
-#include "tensorflow/contrib/tpu/profiler/op_profile.pb.h"
-#include "tensorflow/contrib/tpu/profiler/trace_events.pb.h"
 #include "tensorflow/contrib/tpu/profiler/trace_events_to_json.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/compression.h"
@@ -29,6 +27,8 @@
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/profiler/op_profile.pb.h"
+#include "tensorflow/core/profiler/trace_events.pb.h"
 #include "tensorflow/core/util/events_writer.h"
 
 namespace tensorflow {
@@ -72,7 +72,7 @@
   LOG(INFO) << "Dumped raw-proto trace data to " << proto_path;
 
   string json_path = JoinPath(run_dir, StrCat(host_prefix, kJsonTraceFileName));
-  Trace trace;
+  profiler::Trace trace;
   trace.ParseFromString(encoded_trace);
   if (os) {
     *os << "Trace contains " << trace.trace_events_size() << " events."
@@ -88,7 +88,7 @@
 
 Status DumpOpProfileToLogDirectory(StringPiece run_dir,
                                    const string& host_prefix,
-                                   const tpu::op_profile::Profile& profile,
+                                   const profiler::op_profile::Profile& profile,
                                    std::ostream* os) {
   string path = JoinPath(run_dir, StrCat(host_prefix, kJsonOpProfileFileName));
   string json;

diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h
index ecf21b1..7ddd7b1 100644
--- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h
+++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h

@@ -16,8 +16,8 @@
 #ifndef TENSORFLOW_CONTRIB_TPU_PROFILER_DUMP_TPU_PROFILE_H_
 #define TENSORFLOW_CONTRIB_TPU_PROFILER_DUMP_TPU_PROFILE_H_
 
-#include "tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/grpc_services.h"
 
 namespace tensorflow {
 namespace tpu {

diff --git a/tensorflow/contrib/tpu/profiler/op_profile.proto b/tensorflow/contrib/tpu/profiler/op_profile.proto
deleted file mode 100644
index 292108f..0000000
--- a/tensorflow/contrib/tpu/profiler/op_profile.proto
+++ /dev/null

@@ -1,76 +0,0 @@
-syntax = "proto3";
-
-package tensorflow.tpu.op_profile;
-
-// Profile is the top-level data that summarizes a program.
-message Profile {
-  reserved 2;
-  reserved "by_program_structure";
-  reserved 3;
-  reserved "per_program";
-  // Root of a profile broken down by instruction category.
-  Node by_category = 1;
-  // Root of a profile broken down by program.
-  Node by_program = 4;
-}
-
-// An entry in the profile tree. (An instruction, or set of instructions).
-message Node {
-  string name = 1;      // Semantics depend on contents.
-  Metrics metrics = 2;  // May be omitted e.g. for fused instructions.
-  repeated Node children = 3;  // Subjected to pruning.
-
-  // Details about what this node represents.
-  oneof contents {
-    InstructionCategory category = 4;
-    XLAInstruction xla = 5;
-  }
-
-  int32 num_children = 6;  // Total number of children before pruning.
-  // A category of XLA instructions.
-  // name is a descriptive string, like "data formatting".
-  message InstructionCategory {
-  }
-  // A single XLA instruction.
-  // name is the unique instruction id, like "%multiply.5".
-  message XLAInstruction {
-    string op = 1;          // Opcode like %multiply
-    string expression = 2;  // %multiply = [shape]multiply(operand1, operand2)
-    string provenance = 3;  // Typically the TensorFlow operation name.
-    string category = 4;
-    // Describes the physical memory layout of the instruction's primary input.
-    // e.g. for a convolution, this analyzes the image and ignores the kernel.
-    LayoutAnalysis layout = 5;
-    message LayoutAnalysis {
-      // The physical data layout, from most-minor to most-major dimensions.
-      repeated Dimension dimensions = 1;
-      message Dimension {
-        int32 size = 1;       // Size of the data in this dimension.
-        int32 alignment = 2;  // Data must be padded to a multiple of alignment.
-        string semantics = 3;  // What the dimension represents, e.g. "spatial".
-      }
-    }
-  }
-}
-
-// Measurements of an operation (or aggregated set of operations).
-// Metrics are always "total" rather than "self".
-message Metrics {
-  // Core-time taken by this operation, as a fraction of all operations.
-  double time = 1;
-  // Floating point computations performed by this operation, as a fraction of
-  // peak core FLOPS * program time. This representation has useful properties:
-  //  - it is proportional to the number of floating point operations performed
-  //  - utilization is flops/time
-  //  - wasted potential flops is proportional to time - flops
-  //  - it does not reveal the peak core FLOPS of the hardware
-  double flops = 2;
-
-  // The memory bandwidth used to load operands, as a fraction of
-  // thereotical memory bandwidth on the specific hardware.
-  double memory_bandwidth = 3;
-
-  double raw_time = 11;   // Elapsed core-time in picoseconds.
-  double raw_flops = 12;  // Total floating-point operations performed.
-  double raw_bytes_accessed = 13;  // Total bytes accessed (include read/write).
-}

diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
deleted file mode 100644
index da4a95e..0000000
--- a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
+++ /dev/null

@@ -1,132 +0,0 @@
-syntax = "proto3";
-package tensorflow;
-
-import "tensorflow/core/framework/graph.proto";
-import "tensorflow/core/protobuf/config.proto";
-import "tensorflow/contrib/tpu/profiler/op_profile.proto";
-
-// The TPUProfiler service retrieves performance information about
-// the programs running on connected TPUs over a period of time.
-service TPUProfiler {
-  // Starts a profiling session, blocks until it completes, and returns data.
-  rpc Profile(ProfileRequest) returns (ProfileResponse) {
-  }
-  // Collects profiling data and returns user-friendly metrics.
-  rpc Monitor(MonitorRequest) returns (MonitorResponse) {
-  }
-}
-
-message ProfileOptions {
-  // We don't collect the dataset ops by default for better trace-viewer
-  // scalability. The caller can mannually set this field to include the ops.
-  bool include_dataset_ops = 1;
-
-  // next-field: 2
-}
-
-message ToolRequestOptions {
-  // Required formats for the tool, it should be one of "json", "proto", "raw"
-  // etc. If not specified (backward compatible), use default format, i.e. most
-  // tools use json format.
-  string output_formats = 2;
-
-  // Whether save the result directly to repository or pass it back to caller.
-  // Default to false for backward compatibilities.
-  bool save_to_repo = 3;
-}
-
-message ProfileRequest {
-  // In future, the caller will be able to customize when profiling starts and
-  // stops. For now, it collects `duration_ms` milliseconds worth of data.
-  uint64 duration_ms = 1;
-
-  // The maximum number of events to return. By default (value 0), return all
-  // events.
-  uint64 max_events = 2;
-
-  // Required profiling tools name such as "input_pipeline_analyzer" etc
-  repeated string tools = 3;
-
-  // Specifies the requirement for each tools.
-  map<string, ToolRequestOptions> tool_options = 8;
-
-  // Optional profiling options that control how a TF session will be profiled.
-  ProfileOptions opts = 4;
-
-  // The place where we will dump profile data. We will normally use
-  // MODEL_DIR/plugin/profile/ as our repository root.
-  string repository_root = 5;
-
-  // The user provided profile session identifier.
-  string session_id = 6;
-
-  // The hostname of system where the profile should happen.
-  // We use it as identifier in part of our output filename.
-  string host_name = 7;
-
-  // In future, the caller will indicate which TF session is being profiled, and
-  // only data relating to that program will be returned. For now, we assume
-  // all activity during the profiling period is relevant.
-  // next-field: 9
-}
-
-message ProfileToolData {
-  // The file name which this data is associated (e.g. "input_pipeline.json",
-  // "cluster_xxx.memory_viewer.json").
-  string name = 1;
-
-  // The data payload (likely json) for the specific tool.
-  bytes data = 2;
-}
-
-message ProfileResponse {
-  reserved 1;  // was uint64 placeholder for returning something meaningful.
-  // Graphs of programs executed on TPUs during the profiling period.
-  repeated GraphDef computation_graph = 2;
-
-  // Performance profile that can be used to annotate HLO operations in the
-  // computation graph.
-  RunMetadata hlo_metadata = 5;
-
-  // Encoded Trace proto message that contains metadata about the trace captured
-  // during the profiling period. Describes the devices and resources that
-  // 'trace_events' refers to.
-  bytes encoded_trace = 3;
-
-  // Assembles a hierarchical performance profile based on HLOs in trace events.
-  // If the trace covers multiple programs, the longest-running one is analyzed.
-  // See op_profile.proto for the detailed semantics of the returned profile.
-  tpu.op_profile.Profile op_profile = 4;
-
-  // Data payload for each required tools.
-  repeated ProfileToolData tool_data = 6;
-
-  // When we write profiling data directly to repository directory, we need a
-  // way to figure out whether the captured trace is empty (due to idle TPU).
-  bool empty_trace = 7;
-
-  // next-field: 8
-}
-
-message MonitorRequest {
-  // Duration for which to profile between each update.
-  uint64 duration_ms = 1;
-
-  // Indicates the level at which we want to monitor. Currently, two levels are
-  // supported:
-  // Level 1: An ultra lightweight mode that captures only some utilization
-  // metrics.
-  // Level 2: More verbose than level 1. Collects utilization metrics, device
-  // information, step time information, etc. Do not use this option if the TPU
-  // host is being very heavily used.
-  int32 monitoring_level = 2;
-
-  // next-field: 3
-}
-
-message MonitorResponse {
-  // Properly formatted string data that can be directly returned back to user.
-  string data = 1;
-
-  // next-field: 2
-}

diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto
deleted file mode 100644
index d3c34bf..0000000
--- a/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto
+++ /dev/null

@@ -1,78 +0,0 @@
-syntax = "proto3";
-package tensorflow;
-
-import "tensorflow/contrib/tpu/profiler/tpu_profiler.proto";
-
-message NewProfileSessionRequest {
-  ProfileRequest request = 1;
-  string repository_root = 2;
-  repeated string hosts = 3;
-  string session_id = 4;
-}
-
-message NewProfileSessionResponse {
-  // Auxiliary error_message.
-  string error_message = 1;
-
-  // Whether all hosts had returned a empty trace.
-  bool empty_trace = 2;
-}
-
-message EnumProfileSessionsAndToolsRequest {
-  string repository_root = 1;
-}
-
-message ProfileSessionInfo {
-  string session_id = 1;
-  // Which tool data is available for consumption.
-  repeated string available_tools = 2;
-}
-
-message EnumProfileSessionsAndToolsResponse {
-  // Auxiliary error_message.
-  string error_message = 1;
-  // If success, the returned sessions information are stored here.
-  repeated ProfileSessionInfo sessions = 2;
-}
-
-message ProfileSessionDataRequest {
-  string repository_root = 1;
-  string session_id = 2;
-  // Which host the data is associated. if empty, data from all hosts are
-  // aggregated.
-  string host_name = 5;
-  // Which tool
-  string tool_name = 3;
-  // Tool's specific parameters. e.g. TraceViewer's viewport etc
-  map<string, string> parameters = 4;
-}
-
-message ProfileSessionDataResponse {
-  // Auxiliary error_message.
-  string error_message = 1;
-
-  // Output format. e.g. "json" or "proto" or "blob"
-  string output_format = 2;
-
-  // TODO(jiesun): figure out whether to put bytes or oneof tool specific proto.
-  bytes output = 3;
-}
-////////////////////////////////////////////////////////////////////////////////
-// TPUProfileAnalysis service provide entry point for profiling TPU and for
-// serving profiled data to Tensorboard through GRPC
-////////////////////////////////////////////////////////////////////////////////
-service TPUProfileAnalysis {
-  // Starts a profiling session, blocks until it completes.
-  // TPUProfileAnalysis service delegate this to TPUProfiler service.
-  // Populate the profiled data in repository, then return status to caller.
-  rpc NewSession(NewProfileSessionRequest) returns (NewProfileSessionResponse) {
-  }
-  // Enumerate existing sessions and return available profile tools.
-  rpc EnumSessions(EnumProfileSessionsAndToolsRequest)
-      returns (EnumProfileSessionsAndToolsResponse) {
-  }
-  // Retrieve specific tool's data for specific session.
-  rpc GetSessionToolData(ProfileSessionDataRequest)
-      returns (ProfileSessionDataResponse) {
-  }
-}

diff --git a/tensorflow/contrib/tpu/profiler/trace_events.proto b/tensorflow/contrib/tpu/profiler/trace_events.proto
deleted file mode 100644
index 96c4784..0000000
--- a/tensorflow/contrib/tpu/profiler/trace_events.proto
+++ /dev/null

@@ -1,62 +0,0 @@
-syntax = "proto3";
-
-package tensorflow.tpu;
-
-// A 'Trace' contains metadata for the individual traces of a system.
-message Trace {
-  // The devices that this trace has information about. Maps from device_id to
-  // more data about the specific device.
-  map<uint32, Device> devices = 1;
-
-  // All trace events capturing in the profiling period.
-  repeated TraceEvent trace_events = 4;
-}
-
-// A 'device' is a physical entity in the system and is comprised of several
-// resources.
-message Device {
-  // The name of the device.
-  string name = 1;
-
-  // The id of this device, unique in a single trace.
-  uint32 device_id = 2;
-
-  // The resources on this device, keyed by resource_id;
-  map<uint32, Resource> resources = 3;
-}
-
-// A 'resource' generally is a specific computation component on a device. These
-// can range from threads on CPUs to specific arithmetic units on hardware
-// devices.
-message Resource {
-  // The name of the resource.
-  string name = 1;
-
-  // The id of the resource. Unique within a device.
-  uint32 resource_id = 2;
-}
-
-message TraceEvent {
-  // The id of the device that this event occurred on. The full dataset should
-  // have this device present in the Trace object.
-  uint32 device_id = 1;
-
-  // The id of the resource that this event occurred on. The full dataset should
-  // have this resource present in the Device object of the Trace object. A
-  // resource_id is unique on a specific device, but not necessarily within the
-  // trace.
-  uint32 resource_id = 2;
-
-  // The name of this trace event.
-  string name = 3;
-
-  // The timestamp that this event occurred at (in picos since tracing started).
-  uint64 timestamp_ps = 9;
-
-  // The duration of the event in picoseconds if applicable.
-  // Events without duration are called instant events.
-  uint64 duration_ps = 10;
-
-  // Extra arguments that will be displayed in trace view.
-  map<string, string> args = 11;
-}

diff --git a/tensorflow/contrib/tpu/profiler/trace_events_to_json.cc b/tensorflow/contrib/tpu/profiler/trace_events_to_json.cc
index 3f7e67d..96d1094 100644
--- a/tensorflow/contrib/tpu/profiler/trace_events_to_json.cc
+++ b/tensorflow/contrib/tpu/profiler/trace_events_to_json.cc

@@ -17,6 +17,7 @@
 #include "include/json/json.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/profiler/trace_events.pb.h"
 
 namespace tensorflow {
 namespace tpu {
@@ -32,12 +33,13 @@
 }
 
 // Adds resource events for a single device.
-void AddResourceMetadata(uint32 device_id,
-                         const std::map<uint32, const Resource *> &resources,
-                         string *json) {
+void AddResourceMetadata(
+    uint32 device_id,
+    const std::map<uint32, const profiler::Resource *> &resources,
+    string *json) {
   for (const auto &pair : resources) {
     uint32 resource_id = pair.first;
-    const Resource &resource = *pair.second;
+    const profiler::Resource &resource = *pair.second;
     if (!resource.name().empty()) {
       Appendf(json,
               R"({"ph":"M","pid":%u,"tid":%u,)"
@@ -53,11 +55,11 @@
   }
 }
 
-void AddDeviceMetadata(const std::map<uint32, const Device *> &devices,
-                       string *json) {
+void AddDeviceMetadata(
+    const std::map<uint32, const profiler::Device *> &devices, string *json) {
   for (const auto &pair : devices) {
     uint32 device_id = pair.first;
-    const Device &device = *pair.second;
+    const profiler::Device &device = *pair.second;
     if (!device.name().empty()) {
       Appendf(json,
               R"({"ph":"M","pid":%u,"name":"process_name",)"
@@ -71,7 +73,7 @@
             R"("args":{"sort_index":%u}},)",
             device_id, device_id);
     // Convert to a std::map so that devices are sorted by the device id.
-    std::map<uint32, const Resource *> sorted_resources;
+    std::map<uint32, const profiler::Resource *> sorted_resources;
     for (const auto &pair : device.resources()) {
       sorted_resources[pair.first] = &pair.second;
     }
@@ -79,7 +81,7 @@
   }
 }
 
-inline void AddTraceEvent(const TraceEvent &event, string *json) {
+inline void AddTraceEvent(const profiler::TraceEvent &event, string *json) {
   Appendf(json, R"({"pid":%u,"tid":%u,"ts":%.5f,)", event.device_id(),
           event.resource_id(), event.timestamp_ps() / kPicosPerMicro);
   AppendEscapedName(json, event.name());
@@ -94,19 +96,19 @@
 
 }  // namespace
 
-string TraceEventsToJson(const Trace &trace) {
+string TraceEventsToJson(const profiler::Trace &trace) {
   string json;
   Appendf(&json,
           R"({"displayTimeUnit":"ns","metadata":{"highres-ticks":true},)");
   Appendf(&json,
           R"("traceEvents":[)");
   // Convert to a std::map so that devices are sorted by the device id.
-  std::map<uint32, const Device *> sorted_devices;
+  std::map<uint32, const profiler::Device *> sorted_devices;
   for (const auto &pair : trace.devices()) {
     sorted_devices[pair.first] = &pair.second;
   }
   AddDeviceMetadata(sorted_devices, &json);
-  for (const TraceEvent &event : trace.trace_events()) {
+  for (const profiler::TraceEvent &event : trace.trace_events()) {
     AddTraceEvent(event, &json);
   }
   // Add one fake event to avoid dealing with no-trailing-comma rule.

diff --git a/tensorflow/contrib/tpu/profiler/trace_events_to_json.h b/tensorflow/contrib/tpu/profiler/trace_events_to_json.h
index 3bd76dd..554d6ff 100644
--- a/tensorflow/contrib/tpu/profiler/trace_events_to_json.h
+++ b/tensorflow/contrib/tpu/profiler/trace_events_to_json.h

@@ -16,15 +16,15 @@
 #ifndef TENSORFLOW_CONTRIB_TPU_PROFILER_TRACE_EVENTS_TO_JSON_H_
 #define TENSORFLOW_CONTRIB_TPU_PROFILER_TRACE_EVENTS_TO_JSON_H_
 
-#include "tensorflow/contrib/tpu/profiler/trace_events.pb.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/trace_events.pb.h"
 
 namespace tensorflow {
 namespace tpu {
 
 // Converts trace events in the trace proto to a JSON string that can be
 // consumed by catapult trace viewer.
-string TraceEventsToJson(const Trace &trace);
+string TraceEventsToJson(const profiler::Trace &trace);
 
 }  // namespace tpu
 }  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/profiler/trace_events_to_json_test.cc b/tensorflow/contrib/tpu/profiler/trace_events_to_json_test.cc
index e97989c..8b39fc5 100644
--- a/tensorflow/contrib/tpu/profiler/trace_events_to_json_test.cc
+++ b/tensorflow/contrib/tpu/profiler/trace_events_to_json_test.cc

@@ -17,13 +17,14 @@
 #include "include/json/json.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/profiler/trace_events.pb.h"
 
 namespace tensorflow {
 namespace tpu {
 namespace {
 
 string ConvertTextFormattedTraceToJson(const string& trace_str) {
-  Trace trace;
+  profiler::Trace trace;
   ::tensorflow::protobuf::TextFormat::ParseFromString(trace_str, &trace);
   return TraceEventsToJson(trace);
 }

diff --git a/tensorflow/contrib/tpu/proto/tpu_embedding_configuration.proto b/tensorflow/contrib/tpu/proto/tpu_embedding_configuration.proto
deleted file mode 100644
index da19b13..0000000
--- a/tensorflow/contrib/tpu/proto/tpu_embedding_configuration.proto
+++ /dev/null

@@ -1,95 +0,0 @@
-syntax = "proto3";
-
-package tensorflow.tpu;
-
-import "tensorflow/contrib/tpu/proto/optimization_parameters.proto";
-import "tensorflow/contrib/tpu/proto/tpu_embedding_output_layout.proto";
-
-message TPUEmbeddingConfiguration {
-  // Description of the various embedding tables.
-  message TableDescriptor {
-    // Name of the table.
-    string name = 1;
-    // Size of the vocabulary (i.e., number of rows) in the table.
-    int32 vocabulary_size = 2;
-    // The embedding dimension (i.e., the width of the embedding table).
-    int32 dimension = 3;
-    // Number of features mapped to this table.
-    int32 num_features = 4;
-    // Details of the learning algorithm used to update the embedding
-    // parameters.
-    OptimizationParameters optimization_parameters = 5;
-  }
-  repeated TableDescriptor table_descriptor = 1;
-
-  // Mode. Should the embedding layer program be run for inference (just forward
-  // pass), training (both forward and backward pass) or just the backward_pass.
-  enum Mode {
-    UNSPECIFIED = 0;
-    INFERENCE = 1;
-    TRAINING = 2;
-    BACKWARD_PASS_ONLY = 3;
-  }
-  Mode mode = 2;
-
-  // Number of samples in each batch of embedding layer activations sent to
-  // the TensorCore.
-  int32 batch_size_per_tensor_core = 3;
-
-  // Number of TPU hosts used for inference/training.
-  int32 num_hosts = 4;
-
-  // Number of TensorCore used for inference/training.
-  int32 num_tensor_cores = 5;
-
-  // Sharding strategy of the embedding tables among the hosts.
-  // If the sharding_strategy is "mod", each id is assigned to host
-  // "id % num_hosts". For instance, 13 ids are split across 5 hosts as:
-  // [[0, 5, 10], [1, 6, 11], [2, 7, 12], [3, 8], [4, 9]].
-  // If the sharding_strategy is "div", ids are assigned to hosts in a
-  // contiguous manner. In this case, 13 ids are split across 5 hosts as:
-  // [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10], [11, 12]].
-  // In both the strategies, if the id space does not evenly divide the number
-  // of hosts, each of the first "table_descriptor.num_ids % num_hosts" hosts
-  // will be assigned one more id.
-  // This partitioning strategy exactly follows that in the embedding_lookup
-  // TensorFlow function at tensorflow/python/ops/embedding_ops.py.
-  enum ShardingStrategy {
-    DIV_DEFAULT = 0;
-    MOD = 1;
-  }
-  ShardingStrategy sharding_strategy = 6;
-
-  // This parameter determines if the execution of the sparse core will be
-  // pipelined with that of the TensorCore. This parameter only affects results
-  // when mode=TRAINING. If mode=INFERENCE or BACKWARD_PASS_ONLY, this parameter
-  // does not affect execution and hence, is a don't care value.
-  //
-  // false: The execution of the sparse core is not pipelined with that of the
-  // TensorCore. The forward pass of every step on the sparse core is executed
-  // only after the backward pass of the previous step is complete. And the
-  // backward pass on the sparse core is executed only after the embedding
-  // gradients have been computed on the TensorCore on every step. This ensures
-  // that the activations on every step observe the gradient updates from the
-  // previous step on both the sparse core and the TensorCore.
-  //
-  // true: The execution of the sparse core is pipelined with that of the
-  // TensorCore. The forward pass of every step on the sparse core can be
-  // executed after the forward pass of the previous step is complete without
-  // waiting for the backward pass. This improves the utilization of the sparse
-  // core allowing it to process step N+1 while the embedding gradients for step
-  // N are computed on the TensorCore. The backward pass of every step on the
-  // sparse core is executed directly after the forward pass for the next step
-  // is complete. The drawback is that embedding activations for step N+1 do not
-  // observe the embedding gradient updates from step N. This could affect model
-  // quality if step N and N+1 involve the same set of embedding IDs. However,
-  // since the embedding updates are sparse, this is generally not considered a
-  // problem.
-  bool pipeline_execution_with_tensor_core = 7;
-
-  // Extended output layout information; if not provided, a compatibility mode
-  // will use defaults that match the old layout. Providing a value for this
-  // field is EXPERIMENTAL and most ways of filling it will probably break. Do
-  // not set it unless you know what you are doing.
-  TPUEmbeddingOutputLayout output_layout = 8;
-}

diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
index 500dd2c..8605bae 100644
--- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py
+++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py

@@ -1,394 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# =============================================================================
-
-"""Operations for TPUs."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import platform
-
-from tensorflow.contrib.tpu.python.tpu import tpu_function
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import tf_logging as logging
-
-if platform.system() != "Windows":
-  # pylint: disable=wildcard-import,unused-import,g-import-not-at-top
-  from tensorflow.contrib.tpu.ops import gen_tpu_ops
-  from tensorflow.contrib.tpu.ops.gen_tpu_ops import *
-
-  from tensorflow.contrib.util import loader
-  from tensorflow.python.platform import resource_loader
-  # pylint: enable=wildcard-import,unused-import,g-import-not-at-top
-
-  _tpu_ops = loader.load_op_library(
-      resource_loader.get_path_to_datafile("_tpu_ops.so"))
-
-  def _create_default_group_assignment():
-    num_shards = tpu_function.get_tpu_context().number_of_shards
-    if num_shards is None:
-      logging.warning(
-          "cross_replica_sum should be used within a tpu_shard_context, but "
-          "got unset number_of_shards. Assuming 1.")
-      num_shards = 1
-    group_assignment = [list(range(num_shards))]
-    return group_assignment
-
-  def all_to_all(x,
-                 concat_dimension,
-                 split_dimension,
-                 split_count,
-                 group_assignment=None,
-                 name=None):
-    """Exchange data across TPU replicas.
-
-    Args:
-      x: The local tensor.
-      concat_dimension: The dimension number to concatenate.
-      split_dimension: The dimension number to split.
-      split_count: The number of splits, this number must equal to the sub-group
-        size(group_assignment.get_shape()[1])
-      group_assignment: Optional 2d int32 lists with shape [num_groups,
-        num_replicas_per_group]. `group_assignment[i]` represents the replica
-        ids in the ith subgroup.
-      name: Optional op name.
-
-    Returns:
-      A `Tensor` which is concatenated by data from different replicas.
-    """
-    if group_assignment is None:
-      group_assignment = _create_default_group_assignment()
-    return gen_tpu_ops.all_to_all(
-        x,
-        group_assignment,
-        concat_dimension=concat_dimension,
-        split_dimension=split_dimension,
-        split_count=split_count,
-        name=name)
-
-  @ops.RegisterGradient("AllToAll")
-  def _all_to_all_grad(op, grad):
-    # The gradient of a all-to-all is also a all-to-all but the
-    # split_dimension and concat_dimension is swapped.
-    # The graident with respect to group_assignment is None.
-    return [
-        gen_tpu_ops.all_to_all(
-            grad,
-            op.inputs[1],
-            concat_dimension=op.get_attr("split_dimension"),
-            split_dimension=op.get_attr("concat_dimension"),
-            split_count=op.get_attr("split_count")), None
-    ]
-
-  def cross_replica_sum(x, group_assignment=None, name=None):
-    """Sum the input tensor across replicas according to group_assignment.
-
-    Args:
-      x: The local tensor to the sum.
-      group_assignment: Optional 2d int32 lists with shape [num_groups,
-        num_replicas_per_group]. `group_assignment[i]` represents the replica
-        ids in the ith subgroup.
-      name: Optional op name.
-
-    Returns:
-      A `Tensor` which is summed across replicas.
-    """
-    if group_assignment is None:
-      group_assignment = _create_default_group_assignment()
-
-    return gen_tpu_ops.cross_replica_sum(x, group_assignment, name=name)
-
-  def collective_permute(x, source_target_pairs, name=None):
-    """Permute the input tensor across replicas given source_target_pairs.
-
-    For each source_target_pair <a, b>, we send replica a's input to replica b.
-    Each replica id must only appear once in the source column. Also it must
-    only appear once in the target column.
-    For the replica id not in the target column, this op returns a zero tensor
-    with the same shape and dtype of the input x.
-
-    For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing
-    source_target_pairs=`[[0,1],[1,2],[2,3]]` gets the outputs:
-    `[0, A, B, C]`.
-
-    Args:
-      x: The local tensor to be permuted.
-      source_target_pairs: 2d int lists with shape [num_pairs, 2].
-        source_target_pairs[i][0] represents the source replica id and
-        source_target_pairs[i][1] represents the target replica id.
-      name: Optional op name.
-
-    Returns:
-      A `Tensor` which is permuted.
-    """
-    return gen_tpu_ops.collective_permute(x, source_target_pairs, name=name)
-
-  @ops.RegisterGradient("CollectivePermute")
-  def _collective_permute_grad(op, grad):
-    # The gradient of a collective permute operation is also a collective
-    # permute, but with source/target pairs reversed. The gradient with respect
-    # to input argument `source_target_pairs` is `None`.
-    source_target_pairs = op.inputs[1][:, ::-1]
-    return [gen_tpu_ops.collective_permute(grad, source_target_pairs), None]
-
-  @ops.RegisterGradient("CrossReplicaSum")
-  def _cross_replica_sum_grad(op, grad):
-    # The gradient of a cross replica sum is also a cross-replica sum.
-    # The gradient with respect to group_assignment is None.
-    return [gen_tpu_ops.cross_replica_sum(grad, op.inputs[1]), None]
-
-  # This extra type checking exists to give a more helpful error message in
-  # the common case that uint8 and int64 values are infed. Remove when both
-  # types are supported.
-
-  _SUPPORTED_INFEED_DTYPES = set([
-      dtypes.bool, dtypes.int32, dtypes.int64, dtypes.bfloat16, dtypes.float32,
-      dtypes.complex64, dtypes.uint32
-  ])
-
-  def infeed_dequeue(dtype, shape, name=None):
-    """A placeholder op for a value that will be fed into the computation.
-
-    Args:
-      dtype: A `tf.DType`. The type of elements in the tensor.
-      shape: A `tf.TensorShape` or list of `ints`. The shape of the tensor.
-      name: A name for the operation (optional).
-
-    Returns:
-      A `Tensor` of type `dtype`.
-      A tensor that will be provided using the infeed mechanism.
-
-    Raises:
-      TypeError: If 'dtype` is not a supported infeed type.
-    """
-    if dtype not in _SUPPORTED_INFEED_DTYPES:
-      raise TypeError(
-          "{} is not a supported TPU infeed type. Supported types are: "
-          "{}".format(dtype, list(_SUPPORTED_INFEED_DTYPES)))
-
-    return gen_tpu_ops.infeed_dequeue(dtype, shape, name=name)
-
-  # pylint: disable=redefined-outer-name
-  def infeed_dequeue_tuple(dtypes, shapes, name=None):
-    """A placeholder op for values fed into the TPU simultaneously as a tuple.
-
-    Args:
-      dtypes: A list of `tf.DType`s that has length `>= 1`.
-        The element types of each element in `outputs`.
-      shapes: A list of shapes (each a `tf.TensorShape` or list of `ints`).
-        The shapes of each tensor in `outputs`.
-      name: A name for the operation (optional).
-
-    Returns:
-      A list of `Tensor` objects of type `dtypes`.
-      A list of tensors that will be provided using the infeed mechanism.
-
-    Raises:
-      TypeError: If a type in 'dtypes` is not a supported infeed type.
-    """
-    for dtype in dtypes:
-      if dtype not in _SUPPORTED_INFEED_DTYPES:
-        raise TypeError(
-            "{} is not a supported TPU infeed type. Supported types are: "
-            "{}".format(dtype, list(_SUPPORTED_INFEED_DTYPES)))
-    return gen_tpu_ops.infeed_dequeue_tuple(dtypes, shapes, name=name)
-  # pylint: enable=redefined-outer-name
-
-  # pylint: disable=protected-access
-  def send_tpu_embedding_gradients(inputs,
-                                   config,
-                                   learning_rates=None,
-                                   name=None):
-    """A placeholder op for feeding per-sample gradients to the embedding layer.
-
-    Args:
-      inputs: A TensorList of gradients with which to update embedding tables.
-          This argument has the same length and shapes as the return value of
-          RecvTPUEmbeddingActivations, but contains gradients of the model's
-          loss with respect to the embedding activations. The embedding tables
-          are updated from these gradients via the optimizers specified in the
-          TPU embedding configuration given to tpu.initialize_system.
-      config: Serialized TPUEmbeddingConfiguration proto.
-      learning_rates: A TensorList of float32 scalars, one for each dynamic
-          learning rate tag: see the comments in
-          //third_party/tensorflow/contrib/tpu/proto/
-                                               optimization_parameters.proto.
-          Multiple tables can share the same dynamic learning rate tag as
-          specified in the configuration. If the learning rates for all tables
-          are constant, this list should be empty.
-      name: A name for the operation (optional).
-
-    Returns:
-      A SendTPUEmbeddingGradients operation.
-    """
-    if learning_rates is None:
-      learning_rates = []
-    return gen_tpu_ops._send_tpu_embedding_gradients(
-        inputs=inputs, learning_rates=learning_rates, config=config, name=name)
-
-
-  send_tpu_embedding_gradients.__doc__ = (
-      gen_tpu_ops._send_tpu_embedding_gradients.__doc__)
-
-  # pylint: disable=protected-access
-  def enqueue_tpu_embedding_integer_batch(batch,
-                                          device_ordinal,
-                                          mode_override=None,
-                                          name=None):
-    """A placeholder op for enqueueing embedding IDs to the TPU.
-
-    Args:
-      batch: A list of 1D tensors, one for each embedding table, containing the
-        indices into the tables.
-      device_ordinal: The TPU device to use. Should be >= 0 and less than the
-        number of TPU cores in the task on which the node is placed.
-      mode_override: A string input that overrides the mode specified in the
-        TPUEmbeddingConfiguration. Supported values are {'unspecified',
-        'inference', 'training', 'backward_pass_only'}. When set to
-        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
-        otherwise mode_override is used (optional).
-      name: A name for the operation (optional).
-
-    Returns:
-      An EnqueueTPUEmbeddingIntegerBatch operation.
-    """
-    if mode_override is None:
-      mode_override = "unspecified"
-    return gen_tpu_ops._enqueue_tpu_embedding_integer_batch(
-        batch=batch,
-        device_ordinal=device_ordinal,
-        mode_override=mode_override,
-        name=name)
-
-  enqueue_tpu_embedding_integer_batch.__doc__ = (
-      gen_tpu_ops._enqueue_tpu_embedding_integer_batch.__doc__)
-
-  # pylint: disable=protected-access
-  def enqueue_tpu_embedding_sparse_batch(sample_indices,
-                                         embedding_indices,
-                                         aggregation_weights,
-                                         device_ordinal,
-                                         combiners=None,
-                                         mode_override=None,
-                                         name=None):
-    """A placeholder op for enqueueing embedding IDs to the TPU.
-
-    Args:
-      sample_indices: A list of rank 1 Tensors specifying the training example
-        and feature to which the corresponding embedding_indices and
-        aggregation_weights values belong. sample_indices[i] must equal b * nf +
-        f, where nf is the number of features from the corresponding table, f is
-        in [0, nf), and b is in [0, batch size).
-      embedding_indices: A list of rank 1 Tensors, indices into the embedding
-        tables.
-      aggregation_weights: A list of rank 1 Tensors containing per sample --
-        i.e. per (training example, feature) -- aggregation weights.
-      device_ordinal: The TPU device to use. Should be >= 0 and less than the
-        number of TPU cores in the task on which the node is placed.
-      combiners: A list of string scalars, one for each embedding table that
-        specify how to normalize the embedding activations after weighted
-        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
-        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
-        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
-        is to use 'sum' for all tables (optional).
-      mode_override: A string input that overrides the mode specified in the
-        TPUEmbeddingConfiguration. Supported values are {'unspecified',
-        'inference', 'training', 'backward_pass_only'}. When set to
-        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
-        otherwise mode_override is used (optional).
-      name: A name for the operation (optional).
-
-    Returns:
-      An EnqueueTPUEmbeddingSparseBatch operation.
-    """
-    if mode_override is None:
-      mode_override = "unspecified"
-    return gen_tpu_ops._enqueue_tpu_embedding_sparse_batch(
-        sample_indices=sample_indices,
-        embedding_indices=embedding_indices,
-        aggregation_weights=aggregation_weights,
-        device_ordinal=device_ordinal,
-        combiners=combiners,
-        mode_override=mode_override,
-        name=name)
-
-  enqueue_tpu_embedding_sparse_batch.__doc__ = (
-      gen_tpu_ops._enqueue_tpu_embedding_sparse_batch.__doc__)
-
-  # pylint: disable=protected-access
-  def enqueue_tpu_embedding_sparse_tensor_batch(sample_indices,
-                                                embedding_indices,
-                                                aggregation_weights,
-                                                table_ids,
-                                                device_ordinal,
-                                                combiners=None,
-                                                mode_override=None,
-                                                name=None):
-    """A placeholder op for enqueueing embedding IDs to the TPU.
-
-    Args:
-      sample_indices: A list of rank 1 Tensors specifying the training example
-        to which the corresponding embedding_indices and aggregation_weights
-        values belong. It corresponds to sp_ids.indices[:,0] in
-        embedding_lookup_sparse().
-      embedding_indices: A list of rank 1 Tensors, indices into the embedding
-        tables. It corresponds to sp_ids.values in embedding_lookup_sparse().
-      aggregation_weights: A list of rank 1 Tensors containing per training
-        example aggregation weights. It corresponds to sp_weights.values in
-        embedding_lookup_sparse().
-      table_ids: A list of integers specifying the identifier of the embedding
-        table (offset of TableDescriptor in the TPUEmbeddingConfiguration) to
-        lookup the corresponding input. The ith input is looked up using
-        table_ids[i]. The size of the table_ids list must be equal to that of
-        sample_indices, embedding_indices and aggregation_weights.
-      device_ordinal: The TPU device to use. Should be >= 0 and less than the
-        number of TPU cores in the task on which the node is placed.
-      combiners: A list of string scalars, one for each embedding table that
-        specify how to normalize the embedding activations after weighted
-        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
-        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
-        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
-        is to use 'sum' for all tables (optional).
-      mode_override: A string input that overrides the mode specified in the
-        TPUEmbeddingConfiguration. Supported values are {'unspecified',
-        'inference', 'training', 'backward_pass_only'}. When set to
-        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
-        otherwise mode_override is used (optional).
-      name: A name for the operation (optional).
-
-    Returns:
-      An EnqueueTPUEmbeddingSparseTensorBatch operation.
-    """
-    if mode_override is None:
-      mode_override = "unspecified"
-    return gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch(
-        sample_indices=sample_indices,
-        embedding_indices=embedding_indices,
-        aggregation_weights=aggregation_weights,
-        table_ids=table_ids,
-        device_ordinal=device_ordinal,
-        combiners=combiners,
-        mode_override=mode_override,
-        name=name)
-
-  enqueue_tpu_embedding_sparse_tensor_batch.__doc__ = (
-      gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch.__doc__)
-
-else:
-  # We have already built the appropriate libraries into the binary via CMake
-  # if we have built contrib, so we don't need this
-  pass
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.ops.tpu_ops import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ordinal_selector_op.py b/tensorflow/contrib/tpu/python/ops/tpu_ordinal_selector_op.py
index 5ca38cd..788e1fe 100644
--- a/tensorflow/contrib/tpu/python/ops/tpu_ordinal_selector_op.py
+++ b/tensorflow/contrib/tpu/python/ops/tpu_ordinal_selector_op.py

@@ -1,38 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# =============================================================================
-
-"""Operations to select TPU core to run."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import platform
-
-if platform.system() != "Windows":
-  # pylint: disable=wildcard-import,unused-import,g-import-not-at-top
-  from tensorflow.contrib.tpu.ops.gen_tpu_ordinal_selector_op import *
-
-  from tensorflow.contrib.util import loader
-  from tensorflow.python.platform import resource_loader
-  # pylint: enable=wildcard-import,unused-import,g-import-not-at-top
-
-  _tpu_ordinal_selector_op = loader.load_op_library(
-      resource_loader.get_path_to_datafile("_tpu_ordinal_selector_op.so"))
-
-else:
-  # We have already built the appropriate libraries into the binary via CMake
-  # if we have built contrib, so we don't need this
-  pass
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.ops.tpu_ordinal_selector_op import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/profiler/__init__.py b/tensorflow/contrib/tpu/python/profiler/__init__.py
index 15ce6ac..aeb061d 100644
--- a/tensorflow/contrib/tpu/python/profiler/__init__.py
+++ b/tensorflow/contrib/tpu/python/profiler/__init__.py

@@ -1,31 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# =============================================================================
-
-"""Classes for TPU trace events."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=wildcard-import,unused-import
-from tensorflow.contrib.tpu.profiler.tpu_profiler_analysis_pb2 import *
-from tensorflow.contrib.tpu.profiler.trace_events_pb2 import *
+from tensorflow.python.tpu.profiler import *
 # pylint: enable=wildcard-import,unused-import
-
-from tensorflow.python.util.all_util import remove_undocumented
-
-_allowed_symbols = ['Trace', 'Resource', 'Device', 'TraceEvent']
-
-remove_undocumented(__name__, _allowed_symbols)

diff --git a/tensorflow/contrib/tpu/python/tpu/__init__.py b/tensorflow/contrib/tpu/python/tpu/__init__.py
index 0dffd70..82d4f68 100644
--- a/tensorflow/contrib/tpu/python/tpu/__init__.py
+++ b/tensorflow/contrib/tpu/python/tpu/__init__.py

@@ -1,20 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# =============================================================================
-
-"""Ops related to Tensor Processing Units."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/_tpu_estimator_embedding.py b/tensorflow/contrib/tpu/python/tpu/_tpu_estimator_embedding.py
index 6ce96e5..41aa4d2 100644
--- a/tensorflow/contrib/tpu/python/tpu/_tpu_estimator_embedding.py
+++ b/tensorflow/contrib/tpu/python/tpu/_tpu_estimator_embedding.py

@@ -1,273 +1,23 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ===================================================================
-"""Tooling for support TPU embedding in TPUEstimator."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-
-from tensorflow.contrib.tpu.python.tpu import feature_column as tpu_fc
-from tensorflow.contrib.tpu.python.tpu import tpu_embedding
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.feature_column import feature_column as core_fc
-from tensorflow.python.feature_column import feature_column_lib as core_fc_lib
-
-# pylint: disable=protected-access
-_TPU_EMBEDDING_COLUMN_CLASSES = (tpu_fc._TPUEmbeddingColumn,
-                                 tpu_fc._TPUSharedEmbeddingColumn)
-_EMBEDDING_COLUMN_CLASSES = (core_fc._EmbeddingColumn,
-                             core_fc_lib.EmbeddingColumn,
-                             core_fc._SharedEmbeddingColumn)
-_SUPPORTED_FEATURE_COLUMNS = (core_fc._NumericColumn, core_fc_lib.NumericColumn)
-
-# pylint: enable=protected-access
-
-
-def get_tpu_embedding_config_from_feature_columns(feature_columns):
-  """Create configs for TPUEmbedding from a list of feature columns.
-
-  This function will place one embedding tensor per table and the return is
-  intended to be used as input to TPUEmbedding.
-
-  Args:
-    feature_columns: a list of supported feature columns.
-
-  Returns:
-    A pair of dicts, the first maps tables to their config, the second maps
-    features to tables.
-  """
-
-  allowed = (tpu_fc._TPUEmbeddingColumn, tpu_fc._TPUSharedEmbeddingColumn)  # pylint: disable=protected-access
-
-  for column in feature_columns:
-    if not isinstance(column, allowed):
-      raise TypeError(
-          'Unsupported feature column {}. Supported types are {}.'.format(
-              type(column), allowed))
-
-  table_to_config = {}
-  feature_to_table = {}
-  for column in feature_columns:
-    feature_name = column.get_feature_key_name()
-    table_name = 'tbl_{}'.format(column.get_embedding_var_name())
-    if feature_name in feature_to_table:
-      raise ValueError(
-          'Feature column {} is used with multiple embeddings and this is '
-          'not supported.'.format(feature_name))
-    feature_to_table[feature_name] = table_name
-    vocabulary_size, dimension = column.get_embedding_table_size()
-    table_to_config[table_name] = tpu_embedding.TableConfig(
-        vocabulary_size=vocabulary_size,
-        dimension=dimension,
-        initializer=column.get_initializer(),
-        combiner=column.get_combiner())
-
-  return table_to_config, feature_to_table
-
-
-def _get_tpu_embedding_optimization_parameters(embedding_config_spec):
-  """Get tpu_embedding._OptimizationParameters from EmbeddingConfigSpec."""
-  if embedding_config_spec.optimizer_type == 'adagrad':
-    return tpu_embedding.AdagradParameters(
-        embedding_config_spec.learning_rate,
-        embedding_config_spec.adagrad_initial_accumulator,
-        embedding_config_spec.use_gradient_accumulation)
-  elif embedding_config_spec.optimizer_type == 'sgd':
-    return tpu_embedding.StochasticGradientDescentParameters(
-        embedding_config_spec.learning_rate,
-        embedding_config_spec.use_gradient_accumulattion)
-  elif embedding_config_spec.optimizer_type == 'adam':
-    return tpu_embedding.AdamParameters(
-        embedding_config_spec.learning_rate,
-        embedding_config_spec.adam_parameters.beta1,
-        embedding_config_spec.adam_parameters.beta2,
-        embedding_config_spec.adam_parameters.epsilon,
-        use_gradient_accumulation=embedding_config_spec
-        .use_gradient_accumulation)
-  else:
-    raise ValueError('optimizer_type must be adagrad or sgd or adam for now.')
-
-
-AdamParameters = collections.namedtuple('AdamParameters',
-                                        ['beta1', 'beta2', 'epsilon'])
-
-
-# TODO(shizhiw): Improve the API to support more optimizer parameters in API.
-class EmbeddingConfigSpec(
-    collections.namedtuple('EmbeddingConfigSpec', [
-        'feature_columns', 'learning_rate', 'optimizer_type',
-        'adagrad_initial_accumulator', 'clipping_limit',
-        'use_gradient_accumulation', 'adam_parameters'
-    ])):
-  """Class to keep track of embedding config specification."""
-
-  def __new__(cls,
-              feature_columns,
-              learning_rate,
-              optimizer_type='adagrad',
-              adagrad_initial_accumulator=None,
-              clipping_limit=None,
-              use_gradient_accumulation=False,
-              adam_parameters=None):
-    """Creates an EmbeddingConfigSpec instance.
-
-    Args:
-      feature_columns: All `FeatureColumn`s used by model.
-      learning_rate: embedding optimizer learning rate.
-      optimizer_type: (String) Name of the optimizer for embedding gradients
-        updates. Must be either 'adagrad' ( `tf.train.AdagradOptimizer`, default
-        value), 'sgd' (`tf.train.GradientDescentOptimizer`), or 'adam'
-        (`tf.contrib.opt.LazyAdamOptimizer`) for lazy Adam. This optimizer will
-        be applied to all embedding variables specified by `feature_columns`.
-      adagrad_initial_accumulator: Initial accumulator for Adagrad. Used when
-        optimizer_type is 'adagrad'. Default is `0.1`.
-      clipping_limit: (Optional) Clipping limit (absolute value).
-      use_gradient_accumulation: (Experimental) Whether to accumulate the
-        gradients across TPU embedding mini-batches. Gradient accumulation does
-        not affect SGD and therefore this is applicable only for Adagrad.
-      adam_parameters: AdamParameters. Used when optimizer_type is 'adam'.
-        Default is 0.9 for beta1, 0.999 for beta2 and 1e-8 for epsilon.
-
-    Returns:
-      An EmbeddingConfigSpec instance.
-
-    Raises:
-      ValueError: If the feature_columns are not specified.
-      TypeError: If the feature columns are not of ths correct type (one of
-        _SUPPORTED_FEATURE_COLUMNS, _TPU_EMBEDDING_COLUMN_CLASSES OR
-        _EMBEDDING_COLUMN_CLASSES).
-      ValueError: If use_gradient_accumulation is True for SGD.
-      ValueError: If `optimizer_type` is not one of "adagrad" or "sgd" or
-        "adam".
-    """
-    if not feature_columns:
-      raise ValueError('`feature_columns` cannot be `None` or empty.')
-
-    # It is unknown at this moment, whether the TPUEstimator is running in CPU
-    # or TPU mode. So allow non-TPU embedding columns also.
-    supported_classes = tuple(
-        list(_SUPPORTED_FEATURE_COLUMNS) + list(_TPU_EMBEDDING_COLUMN_CLASSES) +
-        list(_EMBEDDING_COLUMN_CLASSES))
-
-    for column in feature_columns:
-      if not isinstance(column, supported_classes):
-        raise TypeError(
-            'All feature columns must be supported types in {}. Got {}'.format(
-                supported_classes, type(column)))
-
-    if optimizer_type == 'adagrad':
-      if adagrad_initial_accumulator is None:
-        adagrad_initial_accumulator = 0.1
-      if adagrad_initial_accumulator <= 0:
-        raise ValueError('Adagrad initial_accumulator must be positive')
-    elif optimizer_type == 'sgd':
-      if use_gradient_accumulation:
-        raise ValueError('Gradient accumulation makes sense for Adagrad only.')
-    elif optimizer_type == 'adam':
-      if adam_parameters is None:
-        adam_parameters = AdamParameters(0.9, 0.999, 1e-8)
-      if adam_parameters.beta1 < 0. or adam_parameters.beta1 >= 1.:
-        raise ValueError('beta1 must be between 0. and 1; got {}.'.format(
-            adam_parameters.beta1))
-      if adam_parameters.beta2 < 0. or adam_parameters.beta2 >= 1.:
-        raise ValueError('beta2 must be between 0. and 1; got {}.'.format(
-            adam_parameters.beta2))
-      if adam_parameters.epsilon <= 0.:
-        raise ValueError('epsilon must be positive; got {}.'.format(
-            adam_parameters.epsilon))
-    else:
-      raise ValueError('optimizer_type must be adagrad or sgd or adam for now.')
-
-    return super(EmbeddingConfigSpec, cls).__new__(
-        cls,
-        feature_columns=feature_columns,
-        learning_rate=learning_rate,
-        optimizer_type=optimizer_type,
-        adagrad_initial_accumulator=adagrad_initial_accumulator,
-        clipping_limit=clipping_limit,
-        use_gradient_accumulation=use_gradient_accumulation,
-        adam_parameters=adam_parameters)
-
-
-class EmbeddingConfig(object):
-  """This is the internal immutable object for embedding config.
-
-  `_EmbeddingConfig` is responsible to _translate_ user provided
-  `EmbeddingConfigSpec` to internal data structures, mostly constructor
-  arguments of `TPUEmbedding`.
-  """
-
-  def __init__(self, embedding_config_spec, train_batch_size, eval_batch_size,
-               num_hosts, num_cores, master):
-    self._embedding_config_spec = embedding_config_spec
-    self._train_batch_size = train_batch_size
-    self._eval_batch_size = eval_batch_size
-    self._num_hosts = num_hosts
-    self._num_cores = num_cores
-    self._master = master
-
-    self._table_to_config_dict, self._feature_to_table_dict = (
-        get_tpu_embedding_config_from_feature_columns(
-            embedding_config_spec.feature_columns))
-    self._optimization_parameters = _get_tpu_embedding_optimization_parameters(
-        self._embedding_config_spec)
-    self._mode_to_tpu_embedding_dict = {}
-
-  def has_embedding_tables(self):
-    return bool(self._table_to_config_dict)
-
-  def _create_tpu_embedding(self, mode):
-    """Create tpu_embedding.TPUEmbedding based on mode."""
-    if mode == model_fn_lib.ModeKeys.TRAIN:
-      batch_size = self._train_batch_size
-    else:
-      batch_size = self._eval_batch_size
-
-    if mode == model_fn_lib.ModeKeys.TRAIN:
-      tpu_embedding_mode = tpu_embedding.TRAINING
-    elif (mode == model_fn_lib.ModeKeys.EVAL or
-          mode == model_fn_lib.ModeKeys.PREDICT):
-      tpu_embedding_mode = tpu_embedding.INFERENCE
-    else:
-      raise ValueError('Mode {} is not supported.'.format(mode))
-
-    tpu_embedding_ = tpu_embedding.TPUEmbedding(
-        self._table_to_config_dict,
-        self._feature_to_table_dict,
-        batch_size,
-        tpu_embedding_mode,
-        self._master,
-        self._optimization_parameters,
-    )
-    return tpu_embedding_
-
-  def get_tpu_embedding(self, mode):
-    if mode not in self._mode_to_tpu_embedding_dict:
-      self._mode_to_tpu_embedding_dict[mode] = (
-          self._create_tpu_embedding(mode))
-    return self._mode_to_tpu_embedding_dict[mode]
-
-
-def split_inputs(ctx, features, labels):
-  """Splits the dense and sparse tensors inside the features and labels."""
-  sparse_features = collections.OrderedDict()
-  if ctx.embedding_config:
-    tpu_embedding_ = ctx.embedding_config.tpu_embedding
-    for feature_key in tpu_embedding_.feature_to_table_dict:
-      sparse_features[feature_key] = features.pop(feature_key)
-
-  return features, labels, sparse_features
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu._tpu_estimator_embedding import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
index 1b09ce1..5eb8034 100644
--- a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
+++ b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py

@@ -1,212 +1,23 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the 'License');
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ======================================
-"""Hook for asynchronous checkpointing.
-
-This hook dispatches checkpoint writing operations in a separate thread to
-allow execution to continue on the main thread.
-"""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
-import threading
-import time
-
-from tensorflow.core.util.event_pb2 import SessionLog
-from tensorflow.python.framework import meta_graph
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import basic_session_run_hooks
-from tensorflow.python.training import training_util
-from tensorflow.python.training.session_run_hook import SessionRunArgs
-from tensorflow.python.training.summary_io import SummaryWriterCache
-
-
-class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook):
-  """Saves checkpoints every N steps or seconds."""
-
-  def __init__(self,
-               checkpoint_dir,
-               save_secs=None,
-               save_steps=None,
-               saver=None,
-               checkpoint_basename="model.ckpt",
-               scaffold=None,
-               listeners=None):
-    """Initializes a `CheckpointSaverHook`.
-
-    Args:
-      checkpoint_dir: `str`, base directory for the checkpoint files.
-      save_secs: `int`, save every N secs.
-      save_steps: `int`, save every N steps.
-      saver: `Saver` object, used for saving.
-      checkpoint_basename: `str`, base name for the checkpoint files.
-      scaffold: `Scaffold`, use to get saver object.
-      listeners: List of `CheckpointSaverListener` subclass instances. Used for
-        callbacks that run immediately before or after this hook saves the
-        checkpoint.
-
-    Raises:
-      ValueError: One of `save_steps` or `save_secs` should be set.
-      ValueError: At most one of `saver` or `scaffold` should be set.
-    """
-    logging.info("Create AsyncCheckpointSaverHook.")
-    if saver is not None and scaffold is not None:
-      raise ValueError("You cannot provide both saver and scaffold.")
-    self._saver = saver
-    self._save_thread = None
-    self._write_graph_thread = None
-    self._checkpoint_dir = checkpoint_dir
-    self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
-    self._scaffold = scaffold
-    self._timer = basic_session_run_hooks.SecondOrStepTimer(
-        every_secs=save_secs, every_steps=save_steps)
-    self._listeners = listeners or []
-    self._steps_per_run = 1
-    self._summary_writer = None
-    self._global_step_tensor = None
-
-    self._last_checkpoint_step = None
-
-  def _set_steps_per_run(self, steps_per_run):
-    self._steps_per_run = steps_per_run
-
-  def begin(self):
-    self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir)
-    self._global_step_tensor = training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
-    if self._global_step_tensor is None:
-      raise RuntimeError(
-          "Global step should be created to use CheckpointSaverHook.")
-    for l in self._listeners:
-      l.begin()
-
-  def after_create_session(self, session, coord):
-    global_step = session.run(self._global_step_tensor)
-
-    # We do write graph and saver_def at the first call of before_run.
-    # We cannot do this in begin, since we let other hooks to change graph and
-    # add variables in begin. Graph is finalized after all begin calls.
-    def _write_graph_fn(self):
-      training_util.write_graph(
-          ops.get_default_graph().as_graph_def(add_shapes=True),
-          self._checkpoint_dir, "graph.pbtxt")
-    self._write_graph_thread = threading.Thread(target=_write_graph_fn,
-                                                args=[self])
-    self._write_graph_thread.start()
-
-    saver_def = self._get_saver().saver_def if self._get_saver() else None
-    graph = ops.get_default_graph()
-    meta_graph_def = meta_graph.create_meta_graph_def(
-        graph_def=graph.as_graph_def(add_shapes=True), saver_def=saver_def)
-    self._summary_writer.add_graph(graph)
-    self._summary_writer.add_meta_graph(meta_graph_def)
-    # The checkpoint saved here is the state at step "global_step".
-    self._save(session, global_step)
-    self._timer.update_last_triggered_step(global_step)
-
-  def before_run(self, run_context):  # pylint: disable=unused-argument
-    return SessionRunArgs(self._global_step_tensor)
-
-  def after_run(self, run_context, run_values):
-    global_step = run_context.session.run(self._global_step_tensor)
-    if self._timer.should_trigger_for_step(global_step):
-      self._timer.update_last_triggered_step(global_step)
-      logging.info("Triggering checkpoint. %s", global_step)
-      if self._save(run_context.session, global_step):
-        run_context.request_stop()
-
-  def end(self, session):
-    if self._save_thread:
-      logging.info("Waiting for any pending checkpoints to finish.")
-      self._save_thread.join()
-    if self._write_graph_thread:
-      logging.info("Waiting for any pending write_graph to finish.")
-      self._write_graph_thread.join()
-
-    last_step = session.run(self._global_step_tensor)
-
-    if self._last_checkpoint_step != last_step:
-      self._save(session, last_step, asynchronous=False)
-
-    for l in self._listeners:
-      l.end(session, last_step)
-
-  def _save(self, session, step, asynchronous=True):
-    """Saves the latest checkpoint, returns should_stop."""
-
-    # Skip saving on step 0
-    if step == 0:
-      return
-
-    def _save_fn():
-      """Run the saver process."""
-      logging.info("Saving checkpoints for %d into %s.", step, self._save_path)
-
-      start_time = time.time()
-      for l in self._listeners:
-        l.before_save(session, step)
-
-      self._get_saver().save(session, self._save_path, global_step=step)
-      self._summary_writer.add_session_log(
-          SessionLog(
-              status=SessionLog.CHECKPOINT, checkpoint_path=self._save_path),
-          step)
-
-      for l in self._listeners:
-        l.after_save(session, step)
-
-      end_time = time.time()
-      logging.info("Checkpoint actual writing time: (%.3f sec)",
-                   end_time - start_time)
-      logging.info("Checkpoint finished for %d into %s.", step, self._save_path)
-
-    if not asynchronous:
-      self._last_checkpoint_step = step
-      _save_fn()
-      return
-
-    if self._save_thread is not None:
-      self._save_thread.join(timeout=0.1)
-      if self._save_thread.is_alive():
-        logging.info("Saver thread still in progress, skipping checkpoint.")
-        return
-
-    self._last_checkpoint_step = step
-    self._save_thread = threading.Thread(target=_save_fn)
-    self._save_thread.start()
-
-  def _get_saver(self):
-    if self._saver is not None:
-      return self._saver
-    elif self._scaffold is not None:
-      return self._scaffold.saver
-
-    # Get saver from the SAVERS collection if present.
-    collection_key = ops.GraphKeys.SAVERS
-    savers = ops.get_collection(collection_key)
-    if not savers:
-      raise RuntimeError(
-          "No items in collection {}. Please add a saver to the collection "
-          "or provide a saver or scaffold.".format(collection_key))
-    elif len(savers) > 1:
-      raise RuntimeError(
-          "More than one item in collection {}. "
-          "Please indicate which one to use by passing it to the constructor."
-          .format(collection_key))
-
-    self._saver = savers[0]
-    return savers[0]
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.async_checkpoint import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/bfloat16.py b/tensorflow/contrib/tpu/python/tpu/bfloat16.py
index fa74f65..f3d392a 100644
--- a/tensorflow/contrib/tpu/python/tpu/bfloat16.py
+++ b/tensorflow/contrib/tpu/python/tpu/bfloat16.py

@@ -1,77 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# =============================================================================
-
-"""Helper context for running models with bfloat16."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.util import tf_contextlib
-
-
-def _get_custom_getter():
-  """Returns a custom getter that this class's methods must be called under.
-
-  All methods of this class must be called under a variable scope that was
-  passed this custom getter. Example:
-
-  ```python
-  network = ConvNetBuilder(...)
-  with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
-    network.conv(...)
-    # Call more methods of network here
-  ```
-
-  Currently, this custom getter only does anything if self.use_tf_layers is
-  True. In that case, it causes variables to be stored as dtype
-  self.variable_type, then casted to the requested dtype, instead of directly
-  storing the variable as the requested dtype.
-  """
-
-  def inner_custom_getter(getter, *args, **kwargs):
-    """Custom getter that forces variables to have type self.variable_type."""
-    cast_to_bfloat16 = False
-    requested_dtype = kwargs['dtype']
-    if requested_dtype == dtypes.bfloat16:
-      # Only change the variable dtype if doing so does not decrease variable
-      # precision.
-      kwargs['dtype'] = dtypes.float32
-      cast_to_bfloat16 = True
-    var = getter(*args, **kwargs)
-    # This if statement is needed to guard the cast, because batch norm
-    # assigns directly to the return value of this custom getter. The cast
-    # makes the return value not a variable so it cannot be assigned. Batch
-    # norm variables are always in fp32 so this if statement is never
-    # triggered for them.
-    if cast_to_bfloat16:
-      var = math_ops.cast(var, dtypes.bfloat16)
-    return var
-
-  return inner_custom_getter
-
-
-@tf_contextlib.contextmanager
-def bfloat16_scope():
-  """Scope class for bfloat16 variables so that the model uses custom getter.
-
-  This enables variables to be read as bfloat16 type when using get_variable.
-  """
-  with variable_scope.variable_scope(
-      '', custom_getter=_get_custom_getter()) as varscope:
-    yield varscope
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.bfloat16 import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/bfloat16_test.py b/tensorflow/contrib/tpu/python/tpu/bfloat16_test.py
deleted file mode 100644
index 26fd3768..0000000
--- a/tensorflow/contrib/tpu/python/tpu/bfloat16_test.py
+++ /dev/null

@@ -1,50 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-"""Tests for bfloat16 helper."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.tpu.python.tpu import bfloat16
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import variable_scope
-
-from tensorflow.python.platform import test
-
-
-class BFloat16ScopeTest(test.TestCase):
-
-  def testScopeName(self):
-    """Test if name for the variable scope is propogated correctly.
-    """
-    with bfloat16.bfloat16_scope() as bf:
-      self.assertEqual(bf.name, "")
-
-  def testRequestedDType(self):
-    """Test if requested dtype is honored in the getter.
-    """
-    with bfloat16.bfloat16_scope() as scope:
-      v1 = variable_scope.get_variable("v1", [])
-      self.assertEqual(v1.dtype.base_dtype, dtypes.float32)
-      v2 = variable_scope.get_variable("v2", [], dtype=dtypes.bfloat16)
-      self.assertEqual(v2.dtype.base_dtype, dtypes.bfloat16)
-      self.assertEqual([dtypes.float32, dtypes.float32],
-                       [v.dtype.base_dtype for v in scope.global_variables()])
-
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py
index bc0cd41..c20aac7 100644
--- a/tensorflow/contrib/tpu/python/tpu/datasets.py
+++ b/tensorflow/contrib/tpu/python/tpu/datasets.py

@@ -1,191 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ======================================
-"""Library of Cloud TPU helper functions for data loading."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.experimental.ops import batching
-from tensorflow.python.data.experimental.ops import interleave_ops
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.data.ops import readers
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import function
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import functional_ops
-
-
-def _TextLineDataset(filename):
-  buffer_size = 8 * 1024 * 1024  # 8 MiB per file
-  dataset = readers.TextLineDataset(filename, buffer_size=buffer_size)
-  return dataset
-
-
-def _TFRecordDataset(filename):
-  buffer_size = 8 * 1024 * 1024  # 8 MiB per file
-  dataset = readers.TFRecordDataset(filename, buffer_size=buffer_size)
-  return dataset
-
-
-_FILETYPE_MAP = {
-    'tfrecord': _TFRecordDataset,
-    'textline': _TextLineDataset,
-    'text': _TextLineDataset,
-}
-
-
-def StreamingFilesDataset(files,
-                          filetype=None,
-                          file_reader_job=None,
-                          worker_job=None,
-                          num_epochs=None,
-                          filename_shuffle_buffer_size=None,
-                          num_parallel_reads=None,
-                          batch_transfer_size=None,
-                          sloppy=None):
-  """StreamingFilesDataset constructs a dataset to stream from workers (GCE VM).
-
-  Because Cloud TPUs are allocated over the network, a Cloud TPU cannot read
-  files local to your GCE VM. In order to train using files stored on your local
-  VM (e.g. on local SSD for extreme performance), use the StreamingFilesDataset
-  helper to generate a dataset to feed your Cloud TPU with files from your GCE
-  VM.
-
-  The resulting dataset may return an OutOfRangeError if there are no files
-  found as a result of the fileglob expansion.
-
-  Note: StreamingFilesDataset assumes that the session is using a
-  TPUClusterResolver and has therefore a worker and a coordinator job. File
-  loading will be done on the coordinator job.
-
-  Args:
-    files: A string glob to match files, or a `tf.data.Dataset` generating file
-      names.
-    filetype: A string (one of 'tfrecord', or 'textline') or a single-argument
-      TensorFlow function that when given a filename returns a dataset.
-    file_reader_job: An optional string that corresponds to the job that should
-      perform the file reads.
-    worker_job: An optional string that corresponds to the job that should
-      process the tensors (i.e. your GPU or TPU worker).
-    num_epochs: The number of epochs through the training set that should be
-      generated. By default, it will repeat infinitely.
-    filename_shuffle_buffer_size: An optional integer whose value controls the
-      shuffling of the file names. If you would like to read from the files in
-      the same order, set to 0 or False.
-    num_parallel_reads: An optional integer controlling the number of files to
-      read from concurrently. (Set to 1 for no parallelism.)
-    batch_transfer_size: An optional integer controlling the batching used to
-      amortize the remote function invocation overhead. Set to a very large
-      number to increase throughput. Set to a very small number to reduce memory
-      consumption. Set to False to skip batching.
-    sloppy: (Optional.) If `False`, read input data while maintaining a
-      deterministic order. (This may have significant performance impacts.)
-      sloppy defaults to: True.
-  Returns:
-    A `tf.data.Dataset` with an infinite stream of elements generated by a
-    parallel interleaving of the set of files matched (or generated) by `files`
-    with a type is the output of the dataset specified by `filetype`.
-
-  Raises:
-    ValueError: if any argument is not of the expected type.
-  """
-  if filetype is None:
-    filetype = 'tfrecord'
-
-  if isinstance(filetype, str):
-    if filetype not in _FILETYPE_MAP:
-      raise ValueError('Unexpected filetype: %s' % filetype)
-    reader_fn = _FILETYPE_MAP[filetype]
-  elif callable(filetype):
-    reader_fn = filetype
-  else:
-    raise ValueError('filetype should be a string or a callable')
-
-  file_reader_job = file_reader_job or 'coordinator'
-
-  worker_job = worker_job or 'worker'
-
-  if filename_shuffle_buffer_size is None:
-    filename_shuffle_buffer_size = 4096
-
-  num_parallel_reads = num_parallel_reads or 8
-
-  if batch_transfer_size is None:
-    batch_transfer_size = 256
-
-  if sloppy is None:
-    sloppy = True
-
-  with ops.device('/job:%s' % file_reader_job):
-    if isinstance(files, str):
-      source_dataset = dataset_ops.Dataset.list_files(files)
-    elif isinstance(files, dataset_ops.DatasetV2):
-      source_dataset = files
-    else:
-      raise ValueError('files was not a string or a dataset: %s' % files)
-
-    if filename_shuffle_buffer_size:
-      source_dataset = source_dataset.shuffle(
-          buffer_size=filename_shuffle_buffer_size)
-
-    source_dataset = source_dataset.apply(
-        interleave_ops.parallel_interleave(
-            reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy))
-
-    source_dataset = source_dataset.repeat(num_epochs)
-
-    if batch_transfer_size:
-      source_dataset = source_dataset.batch(batch_transfer_size)
-
-    source_dataset = source_dataset.prefetch(1)
-
-    source_iterator = dataset_ops.make_one_shot_iterator(source_dataset)
-    source_handle = source_iterator.string_handle()
-
-  @function.Defun(dtypes.string)
-  def LoadingFunc(h):
-    remote_iterator = iterator_ops.Iterator.from_string_handle(
-        h, source_dataset.output_types, source_dataset.output_shapes)
-    return remote_iterator.get_next()
-
-  def MapFn(unused_input):
-    if isinstance(source_dataset.output_types, dtypes.DType):
-      output_types = [source_dataset.output_types]
-    elif isinstance(source_dataset.output_types, (list, tuple)):
-      output_types = source_dataset.output_types
-    else:
-      raise ValueError('source dataset has invalid output types')
-    remote_calls = functional_ops.remote_call(
-        args=[source_handle],
-        Tout=output_types,
-        f=LoadingFunc,
-        target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)
-    if len(remote_calls) == 1:
-      return remote_calls[0]
-    else:
-      return remote_calls
-
-  with ops.device('/job:%s' % worker_job):
-    output_dataset = dataset_ops.Dataset.range(2).repeat().map(
-        MapFn, num_parallel_calls=4 if sloppy else None)
-    output_dataset = output_dataset.prefetch(1)
-
-    if batch_transfer_size:
-      # Undo the batching used during the transfer.
-      output_dataset = output_dataset.apply(batching.unbatch()).prefetch(1)
-
-  return output_dataset
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.datasets import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py
deleted file mode 100644
index 8a94f52..0000000
--- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py
+++ /dev/null

@@ -1,214 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""TPU datasets tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-from tensorflow.contrib.tpu.python.tpu import datasets
-from tensorflow.core.protobuf import cluster_pb2
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.client import session
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.ops import readers
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.lib.io import python_io
-from tensorflow.python.platform import test
-from tensorflow.python.training import server_lib
-from tensorflow.python.util import compat
-
-_NUM_FILES = 10
-_NUM_ENTRIES = 20
-
-
-class DatasetsTest(test.TestCase):
-
-  def setUp(self):
-    super(DatasetsTest, self).setUp()
-    self._coord = server_lib.Server.create_local_server()
-    self._worker = server_lib.Server.create_local_server()
-
-    self._cluster_def = cluster_pb2.ClusterDef()
-    worker_job = self._cluster_def.job.add()
-    worker_job.name = 'worker'
-    worker_job.tasks[0] = self._worker.target[len('grpc://'):]
-    coord_job = self._cluster_def.job.add()
-    coord_job.name = 'coordinator'
-    coord_job.tasks[0] = self._coord.target[len('grpc://'):]
-
-    session_config = config_pb2.ConfigProto(cluster_def=self._cluster_def)
-
-    self._sess = session.Session(self._worker.target, config=session_config)
-    self._worker_device = '/job:' + worker_job.name
-
-  def testTextLineDataset(self):
-    all_contents = []
-    for i in range(_NUM_FILES):
-      filename = os.path.join(self.get_temp_dir(), 'text_line.%d.txt' % i)
-      contents = []
-      for j in range(_NUM_ENTRIES):
-        contents.append(compat.as_bytes('%d: %d' % (i, j)))
-      with open(filename, 'wb') as f:
-        f.write(b'\n'.join(contents))
-      all_contents.extend(contents)
-
-    dataset = datasets.StreamingFilesDataset(
-        os.path.join(self.get_temp_dir(), 'text_line.*.txt'), filetype='text')
-
-    with ops.device(self._worker_device):
-      iterator = dataset_ops.make_initializable_iterator(dataset)
-    self._sess.run(iterator.initializer)
-    get_next = iterator.get_next()
-
-    retrieved_values = []
-    for _ in range(4 * len(all_contents)):
-      retrieved_values.append(compat.as_bytes(self._sess.run(get_next)))
-
-    self.assertEqual(set(all_contents), set(retrieved_values))
-
-  def testTFRecordDataset(self):
-    all_contents = []
-    for i in range(_NUM_FILES):
-      filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i)
-      writer = python_io.TFRecordWriter(filename)
-      for j in range(_NUM_ENTRIES):
-        record = compat.as_bytes('Record %d of file %d' % (j, i))
-        writer.write(record)
-        all_contents.append(record)
-      writer.close()
-
-    dataset = datasets.StreamingFilesDataset(
-        os.path.join(self.get_temp_dir(), 'tf_record*'), filetype='tfrecord')
-
-    with ops.device(self._worker_device):
-      iterator = dataset_ops.make_initializable_iterator(dataset)
-    self._sess.run(iterator.initializer)
-    get_next = iterator.get_next()
-
-    retrieved_values = []
-    for _ in range(4 * len(all_contents)):
-      retrieved_values.append(compat.as_bytes(self._sess.run(get_next)))
-
-    self.assertEqual(set(all_contents), set(retrieved_values))
-
-  def testTFRecordDatasetFromDataset(self):
-    filenames = []
-    all_contents = []
-    for i in range(_NUM_FILES):
-      filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i)
-      filenames.append(filename)
-      writer = python_io.TFRecordWriter(filename)
-      for j in range(_NUM_ENTRIES):
-        record = compat.as_bytes('Record %d of file %d' % (j, i))
-        writer.write(record)
-        all_contents.append(record)
-      writer.close()
-
-    filenames = dataset_ops.Dataset.from_tensor_slices(filenames)
-
-    dataset = datasets.StreamingFilesDataset(filenames, filetype='tfrecord')
-
-    with ops.device(self._worker_device):
-      iterator = dataset_ops.make_initializable_iterator(dataset)
-    self._sess.run(iterator.initializer)
-    get_next = iterator.get_next()
-
-    retrieved_values = []
-    for _ in range(4 * len(all_contents)):
-      retrieved_values.append(compat.as_bytes(self._sess.run(get_next)))
-
-    self.assertEqual(set(all_contents), set(retrieved_values))
-
-  def testArbitraryReaderFunc(self):
-
-    def MakeRecord(i, j):
-      return compat.as_bytes('%04d-%04d' % (i, j))
-
-    record_bytes = len(MakeRecord(10, 200))
-
-    all_contents = []
-    for i in range(_NUM_FILES):
-      filename = os.path.join(self.get_temp_dir(), 'fixed_length.%d' % i)
-      with open(filename, 'wb') as f:
-        for j in range(_NUM_ENTRIES):
-          record = MakeRecord(i, j)
-          f.write(record)
-          all_contents.append(record)
-
-    def FixedLengthFile(filename):
-      return readers.FixedLengthRecordDataset(filename, record_bytes)
-
-    dataset = datasets.StreamingFilesDataset(
-        os.path.join(self.get_temp_dir(), 'fixed_length*'),
-        filetype=FixedLengthFile)
-
-    with ops.device(self._worker_device):
-      iterator = dataset_ops.make_initializable_iterator(dataset)
-    self._sess.run(iterator.initializer)
-    get_next = iterator.get_next()
-
-    retrieved_values = []
-    for _ in range(4 * len(all_contents)):
-      retrieved_values.append(compat.as_bytes(self._sess.run(get_next)))
-
-    self.assertEqual(set(all_contents), set(retrieved_values))
-
-  def testArbitraryReaderFuncFromDatasetGenerator(self):
-
-    def my_generator():
-      yield (1, [1] * 10)
-
-    def gen_dataset(dummy):
-      return dataset_ops.Dataset.from_generator(
-          my_generator, (dtypes.int64, dtypes.int64),
-          (tensor_shape.TensorShape([]), tensor_shape.TensorShape([10])))
-
-    dataset = datasets.StreamingFilesDataset(
-        dataset_ops.Dataset.range(10), filetype=gen_dataset)
-
-    with ops.device(self._worker_device):
-      iterator = dataset_ops.make_initializable_iterator(dataset)
-    self._sess.run(iterator.initializer)
-    get_next = iterator.get_next()
-
-    retrieved_values = self._sess.run(get_next)
-
-    self.assertIsInstance(retrieved_values, (list, tuple))
-    self.assertEqual(len(retrieved_values), 2)
-    self.assertEqual(retrieved_values[0], 1)
-    self.assertItemsEqual(retrieved_values[1], [1] * 10)
-
-  def testUnexpectedFiletypeString(self):
-    with self.assertRaises(ValueError):
-      datasets.StreamingFilesDataset(
-          os.path.join(self.get_temp_dir(), '*'), filetype='foo')
-
-  def testUnexpectedFiletypeType(self):
-    with self.assertRaises(ValueError):
-      datasets.StreamingFilesDataset(
-          os.path.join(self.get_temp_dir(), '*'), filetype=3)
-
-  def testUnexpectedFilesType(self):
-    with self.assertRaises(ValueError):
-      datasets.StreamingFilesDataset(123, filetype='tfrecord')
-
-
-if __name__ == '__main__':
-  test.main()

diff --git a/tensorflow/contrib/tpu/python/tpu/device_assignment.py b/tensorflow/contrib/tpu/python/tpu/device_assignment.py
index 3313dc7..05dffef 100644
--- a/tensorflow/contrib/tpu/python/tpu/device_assignment.py
+++ b/tensorflow/contrib/tpu/python/tpu/device_assignment.py

@@ -1,313 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ======================================
-"""Library of TPU helper functions."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
-import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
-from tensorflow.contrib.tpu.python.tpu.topology import Topology
-
-
-SINGLE_CORE_ASSIGNMENT = [[[0, 0, 0]]]
-
-
-def _compute_task_and_cores_to_replicas(core_assignment, topology):
-  """Computes a nested dict which maps task and logical core to replicas."""
-  task_and_cores_to_replicas = {}
-  for replica in xrange(core_assignment.shape[0]):
-    for logical_core in xrange(core_assignment.shape[1]):
-      coordinates = core_assignment[replica, logical_core, :]
-      task_id = topology.task_ordinal_at_coordinates(coordinates)
-      if task_id not in task_and_cores_to_replicas:
-        task_and_cores_to_replicas[task_id] = {}
-      if logical_core not in task_and_cores_to_replicas[task_id]:
-        task_and_cores_to_replicas[task_id][logical_core] = set()
-
-      task_and_cores_to_replicas[task_id][logical_core].add(replica)
-
-  task_to_sorted_replica_id = {}
-
-  for task, core_to_replicas in task_and_cores_to_replicas.items():
-    core_to_sorted_replicas = {}
-    for core, replicas in core_to_replicas.items():
-      core_to_sorted_replicas[core] = sorted(replicas)
-
-    task_to_sorted_replica_id[task] = core_to_sorted_replicas
-  return task_to_sorted_replica_id
-
-
-class DeviceAssignment(object):
-  """Mapping from logical cores in a computation to the physical TPU topology.
-
-  Prefer to use the `device_assignment()` helper to construct a
-  `DeviceAssignment`; it is easier if less flexible than constructing a
-  `DeviceAssignment` directly.
-  """
-
-  def __init__(self, topology, core_assignment):
-    """Constructs a `DeviceAssignment` object.
-
-    Args:
-      topology: A `Topology` object that describes the physical TPU topology.
-      core_assignment: A logical to physical core mapping, represented as a
-        rank 3 numpy array. See the description of the `core_assignment`
-        property for more details.
-
-    Raises:
-      ValueError: If `topology` is not `Topology` object.
-      ValueError: If `core_assignment` is not a rank 3 numpy array.
-    """
-    if not isinstance(topology, Topology):
-      raise ValueError("topology must be a Topology object, got {}".format(
-          type(topology)))
-    core_assignment = np.asarray(core_assignment, dtype=np.int32)
-
-    self._topology = topology
-
-    if core_assignment.ndim != 3:
-      raise ValueError("core_assignment must be a rank 3 numpy array, "
-                       "got shape {}".format(core_assignment.shape))
-
-    self._num_replicas = core_assignment.shape[0]
-    self._num_cores_per_replica = core_assignment.shape[1]
-
-    if core_assignment.shape[-1] != topology.mesh_rank:
-      raise ValueError(
-          "minor dimension of core_assignment must have size equal to topology "
-          "rank ({}), got shape {}".format(topology.mesh_rank,
-                                           core_assignment.shape))
-
-    self._core_assignment = core_assignment
-    self._task_and_cores_to_replicas = _compute_task_and_cores_to_replicas(
-        self._core_assignment, topology)
-
-  @property
-  def topology(self):
-    """A `Topology` that describes the TPU topology."""
-    return self._topology
-
-  @property
-  def num_cores_per_replica(self):
-    """The number of cores per replica."""
-    return self._num_cores_per_replica
-
-  @property
-  def num_replicas(self):
-    """The number of replicas of the computation."""
-    return self._num_replicas
-
-  @property
-  def core_assignment(self):
-    """The logical to physical core mapping.
-
-    Returns:
-      An integer numpy array of rank 3, with shape
-      `[num_replicas, num_cores_per_replica, topology_rank]`. Maps
-      (replica, logical core) pairs to physical topology coordinates.
-    """
-    return self._core_assignment
-
-  def _coordinates(self, replica, logical_core):
-    """Returns the physical topology coordinates of a logical core."""
-    return tuple(self.core_assignment[replica, logical_core, :])
-
-  def lookup_replicas(self, task_id, logical_core):
-    """Lookup replica ids by task number and logical core.
-
-    Args:
-      task_id: TensorFlow task number.
-      logical_core: An integer, identifying a logical core.
-    Returns:
-      A sorted list of the replicas that are attached to that task and
-      logical_core.
-    Raises:
-      ValueError: If no replica exists in the task which contains the logical
-      core.
-    """
-    try:
-      return self._task_and_cores_to_replicas[task_id][logical_core]
-    except KeyError:
-      raise ValueError(
-          "Can not find any replica in task: {} contains logical_core: {} ".
-          format(task_id, logical_core))
-
-  def tpu_ordinal(self, replica=0, logical_core=0):
-    """Returns the ordinal of the TPU device assigned to a logical core."""
-    coordinates = self._coordinates(replica, logical_core)
-    return self._topology.tpu_device_ordinal_at_coordinates(coordinates)
-
-  def host_device(self, replica=0, logical_core=0, job=None):
-    """Returns the CPU device attached to a logical core."""
-    coordinates = self._coordinates(replica, logical_core)
-    return self._topology.cpu_device_name_at_coordinates(coordinates, job=job)
-
-  def tpu_device(self, replica=0, logical_core=0, job=None):
-    """Returns the name of the TPU device assigned to a logical core."""
-    coordinates = self._coordinates(replica, logical_core)
-    return self._topology.tpu_device_name_at_coordinates(coordinates, job=job)
-
-
-def device_assignment(topology,
-                      computation_shape=None,
-                      computation_stride=None,
-                      num_replicas=1):
-  """Computes a device_assignment of a computation across a TPU topology.
-
-  Attempts to choose a compact grid of cores for locality.
-
-  Returns a `DeviceAssignment` that describes the cores in the topology assigned
-  to each core of each replica.
-
-  `computation_shape` and `computation_stride` values should be powers of 2 for
-  optimal packing.
-
-  Args:
-    topology: A `Topology` object that describes the TPU cluster topology.
-      To obtain a TPU topology, evaluate the `Tensor` returned by
-      `initialize_system` using `Session.run`. Either a serialized
-      `TopologyProto` or a `Topology` object may be passed. Note: you must
-      evaluate the `Tensor` first; you cannot pass an unevaluated `Tensor` here.
-    computation_shape: A rank 1 int32 numpy array with size equal to the
-      topology rank, describing the shape of the computation's block of cores.
-      If None, the `computation_shape` is `[1] * topology_rank`.
-    computation_stride: A rank 1 int32 numpy array of size `topology_rank`,
-      describing the inter-core spacing of the `computation_shape` cores in the
-      TPU topology. If None, the `computation_stride` is `[1] * topology_rank`.
-    num_replicas: The number of computation replicas to run. The replicas will
-      be packed into the free spaces of the topology.
-
-  Returns:
-    A DeviceAssignment object, which describes the mapping between the logical
-    cores in each computation replica and the physical cores in the TPU
-    topology.
-
-  Raises:
-    ValueError: If `topology` is not a valid `Topology` object.
-    ValueError: If `computation_shape` or `computation_stride` are not 1D int32
-      numpy arrays with shape [3] where all values are positive.
-    ValueError: If computation's replicas cannot fit into the TPU topology.
-  """
-  # Deserialize the Topology proto, if it is a string.
-  if isinstance(topology, bytes):
-    topology = Topology(serialized=topology)
-
-  if not isinstance(topology, Topology):
-    raise ValueError("`topology` is not a Topology object; got {}".format(
-        type(topology)))
-
-  topology_rank = len(topology.mesh_shape)
-  mesh_shape = topology.mesh_shape
-  if computation_shape is None:
-    computation_shape = np.array([1] * topology_rank, dtype=np.int32)
-  else:
-    computation_shape = np.asarray(computation_shape, dtype=np.int32)
-
-  if computation_stride is None:
-    computation_stride = np.array([1] * topology_rank, dtype=np.int32)
-  else:
-    computation_stride = np.asarray(computation_stride, dtype=np.int32)
-
-  if computation_shape.shape != (topology_rank,):
-    raise ValueError("computation_shape must have shape [{}]; got {}".format(
-        topology_rank, computation_shape.shape))
-  if computation_stride.shape != (topology_rank,):
-    raise ValueError("computation_stride must have shape [{}]; got {}".format(
-        topology_rank, computation_stride.shape))
-
-  if any(computation_shape < 1):
-    raise ValueError(
-        "computation_shape must be positive; got computation_shape={}".format(
-            computation_shape))
-  if any(computation_stride < 1):
-    raise ValueError(
-        "computation_stride must be positive; got computation_stride={}".format(
-            computation_stride))
-
-  # Computes the physical size of one computation instance.
-  computation_footprint = computation_shape * computation_stride
-  if any(computation_footprint > mesh_shape):
-    raise ValueError(
-        "computation footprint {} does not fit in TPU topology shape {}".format(
-            computation_footprint, mesh_shape))
-
-  # Computes how many copies of the computation footprint fit in the mesh.
-  block_counts = mesh_shape // computation_footprint
-
-  replica_counts = block_counts * computation_stride
-  max_replicas = np.prod(replica_counts)
-  if num_replicas > max_replicas:
-    raise ValueError(
-        "requested {} replicas but only {} replicas with shape {} and "
-        "computation_stride {} fit in a TPU mesh of shape {}".format(
-            num_replicas, max_replicas, computation_shape, computation_stride,
-            mesh_shape))
-
-  def ceil_of_ratio(n, m):
-    return (n + m - 1) // m
-
-  replica_shape = [0] * topology_rank
-  if num_replicas > 0:
-    remaining_replicas = num_replicas
-    remaining_dims = topology_rank
-
-    # Choose dimensions as close to an equal cube as possible, in order of
-    # increasing dimension size. By visiting dimensions in increasing size, we
-    # assign the most constrained dimension first, so we won't make infeasible
-    # choices.
-    #
-    # As a secondary sort order, visit the dimensions in reverse order. This
-    # means we try to use both cores on the same chip in preference to two cores
-    # on different chips.
-    for x, ni in sorted(((x, -i) for (i, x) in enumerate(replica_counts))):
-      i = -ni
-      target_size = int(math.ceil(remaining_replicas**(1.0 / remaining_dims)))
-      replica_shape[i] = min(target_size, x)
-      remaining_replicas = ceil_of_ratio(remaining_replicas, replica_shape[i])
-      remaining_dims -= 1
-
-    assert remaining_replicas == 1 and remaining_dims == 0
-
-  # Assigns an offset to each replica such that no two replicas overlap.
-  replica_offsets = np.full([num_replicas, topology_rank], -1, dtype=np.int32)
-  for replica in xrange(num_replicas):
-    # Chooses a replica number in each axis.
-    t = replica
-    pos = []
-    for dim in replica_shape[::-1]:
-      pos.append(t % dim)
-      t //= dim
-    replica_pos = np.array(pos[::-1], dtype=np.int32)
-
-    # Determines where that replica starts in each axis.
-    outer = replica_pos // computation_stride
-    inner = replica_pos % computation_stride
-    replica_offsets[replica, :] = outer * computation_footprint + inner
-
-  # Computes a complete logical core -> physical core mapping for each replica.
-  indices = [
-      np.arange(0, computation_shape[i] * computation_stride[i],
-                computation_stride[i]) for i in xrange(topology_rank)
-  ]
-  indices = np.concatenate(
-      [i[..., np.newaxis] for i in np.meshgrid(*indices, indexing="ij")],
-      axis=-1)
-  indices = indices.reshape((-1, topology_rank))
-  assignment = indices + replica_offsets[:, np.newaxis, :]
-  return DeviceAssignment(topology, core_assignment=assignment)
+# pylint: disable=wildcard-import,unused-import,redefined-builtin
+from tensorflow.python.tpu.device_assignment import *
+# pylint: enable=wildcard-import,unused-import,redefined-builtin

diff --git a/tensorflow/contrib/tpu/python/tpu/error_handling.py b/tensorflow/contrib/tpu/python/tpu/error_handling.py
index 52e1ea4..1b1328b 100644
--- a/tensorflow/contrib/tpu/python/tpu/error_handling.py
+++ b/tensorflow/contrib/tpu/python/tpu/error_handling.py

@@ -1,132 +1,23 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ===================================================================
-"""ErrorRendezvous handler for collecting errors from multiple threads."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import contextlib
-import sys
-import threading
-import time
-
-import six
-
-from tensorflow.python.framework import errors
-from tensorflow.python.platform import tf_logging as logging
-
-_UNINTERESTING_ERRORS = (errors.CancelledError,)
-
-
-class ErrorRendezvous(object):
-  """Resolve errors from multiple threads during TPU execution.
-
-  TPU errors can occur on the infeed or outfeed threads as well as the main
-  training thread.
-
-  Depending on which thread "wins" and receives the session error first, we may
-  end up showing users a confusing and non-actionable error message (session
-  cancelled) instead of a root cause (e.g. a bad filename).
-
-  The rendezvous object provides a location to capture these errors until all
-  threads terminate.  At that point we can choose the most informative error
-  to report.
-  """
-
-  def __init__(self, num_sources):
-    # string -> (message, traceback)
-    self._errors = {}
-    self._num_sources = num_sources
-    self._session_cancel_timer = None
-
-  def record_error(self, source, exc_info, session=None):
-    """Report an exception from the given source.
-
-    If a session is passed, a timer will be registered to close it after a few
-    seconds.  This is necessary to ensure the main training loop does not hang
-    if an infeed/oufeed error occurs.  We sleep a few seconds to allow a more
-    interesting error from another thread to propagate.
-
-    Args:
-      source: string, source of the error
-      exc_info: Output from `sys.exc_info` (type, value, traceback)
-      session: Session to close after delay.
-    """
-    _, value, _ = exc_info
-    self._errors[source] = exc_info
-    logging.info('Error recorded from %s: %s', source, value)
-
-    if session is not None and self._session_cancel_timer is None:
-
-      def _cancel_session():
-        time.sleep(5)
-        try:
-          session.close()
-        except:  # pylint: disable=bare-except
-          pass
-
-      self._session_cancel_timer = threading.Thread(target=_cancel_session,)
-      self._session_cancel_timer.daemon = True
-      self._session_cancel_timer.start()
-
-  def record_done(self, source):
-    """Mark execution source `source` as done.
-
-    If an error was originally reported from `source` it is left intact.
-
-    Args:
-      source: `str`, source being recorded
-    """
-    logging.info('%s marked as finished', source)
-    if source not in self._errors:
-      self._errors[source] = None
-
-  @contextlib.contextmanager
-  def catch_errors(self, source, session=None):
-    """Context manager to report any errors within a block."""
-    try:
-      yield
-    except Exception:  # pylint: disable=broad-except
-      self.record_error(source, sys.exc_info(), session)
-
-  def raise_errors(self, timeout_sec=0):
-    """Wait for up to `timeout` seconds for all error sources to finish.
-
-    Preferentially raise "interesting" errors (errors not in the
-    _UNINTERESTING_ERRORS) set.
-
-    Args:
-      timeout_sec: Seconds to wait for other error sources.
-    """
-    for _ in range(timeout_sec):
-      if len(self._errors) == self._num_sources:
-        break
-      time.sleep(1)
-
-    kept_errors = [(k, v) for (k, v) in self._errors.items() if v is not None]
-
-    # First check for any interesting errors, then fall back on the session
-    # cancelled errors etc.
-    for k, (typ, value, traceback) in kept_errors:
-      if isinstance(value, _UNINTERESTING_ERRORS):
-        continue
-      else:
-        logging.warn('Reraising captured error')
-        six.reraise(typ, value, traceback)
-
-    for k, (typ, value, traceback) in kept_errors:
-      logging.warn('Reraising captured error')
-      six.reraise(typ, value, traceback)
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.error_handling import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/feature_column.py b/tensorflow/contrib/tpu/python/tpu/feature_column.py
index 68bcdb5..ded75e9 100644
--- a/tensorflow/contrib/tpu/python/tpu/feature_column.py
+++ b/tensorflow/contrib/tpu/python/tpu/feature_column.py

@@ -1,429 +1,30 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ===================================================================
-"""TPU Feature Column Library."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
-
-from tensorflow.contrib.tpu.python.tpu import tpu
-from tensorflow.contrib.tpu.python.tpu import tpu_function
-from tensorflow.python.feature_column import feature_column as fc
-from tensorflow.python.feature_column import feature_column_lib as fc_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import variable_scope
-# pylint: disable=protected-access
-
-
-_TPU_FC_TO_SCOPE = '_tpu_feature_column_scope'
-_SUPPORTED_CATEGORICAL_COLUMNS = (fc._IdentityCategoricalColumn,
-                                  fc._VocabularyFileCategoricalColumn,
-                                  fc._VocabularyListCategoricalColumn,
-                                  fc._WeightedCategoricalColumn,
-                                  fc_lib.IdentityCategoricalColumn,
-                                  fc_lib.VocabularyFileCategoricalColumn,
-                                  fc_lib.VocabularyListCategoricalColumn,
-                                  fc_lib.WeightedCategoricalColumn)
-
-
-def embedding_column(categorical_column,
-                     dimension,
-                     combiner='mean',
-                     initializer=None):
-  """TPU embedding_column for `tf.feature_column.embedding_column`.
-
-  Note that the interface for TPU embedding_column is different from the non-TPU
-  version. The following args available for the non-TPU version are NOT
-  supported: ckpt_to_load_from, tensor_name_in_ckp, max_norm and trainable.
-
-  Args:
-    categorical_column: A categorical_column returned from
-        categorical_column_with_identity,  weighted_categorical_column,
-        categorical_column_with_vocabulary_list or
-        categorical_column_with_vocabulary_file.
-    dimension: An integer specifying dimension of the embedding, must be > 0.
-    combiner: A string specifying how to reduce if there are multiple entries
-      in a single row. For more information, see
-      `tf.feature_column.embedding_column`.
-    initializer: A variable initializer function to be used in embedding
-      variable initialization. If not specified, defaults to
-      `tf.truncated_normal_initializer` with mean `0.0` and standard deviation
-      `1/sqrt(dimension)`.
-
-  Returns:
-    A  _TPUEmbeddingColumn.
-
-  Raises:
-    ValueError: if `dimension` not > 0.
-    ValueError: if `initializer` is specified but not callable.
-  """
-  if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS):
-    raise TypeError(
-        'categorical_column for tpu '
-        ' embedding_column must be type %s, got %s.' % (' or '.join([
-            cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS
-        ]), type(categorical_column)))
-  if (dimension is None) or (dimension < 1):
-    raise ValueError('Invalid dimension {}.'.format(dimension))
-
-  if (initializer is not None) and (not callable(initializer)):
-    raise ValueError('initializer must be callable if specified. '
-                     'Embedding of column_name: {}'.format(
-                         categorical_column.name))
-  if initializer is None:
-    initializer = init_ops.truncated_normal_initializer(
-        mean=0.0, stddev=1 / math.sqrt(dimension))
-
-  embedding_shape = categorical_column._num_buckets, dimension  # pylint: disable=protected-access
-
-  def _creator(weight_collections, scope):
-    embedding_column_layer = fc._EmbeddingColumnLayer(
-        embedding_shape=embedding_shape,
-        initializer=initializer,
-        weight_collections=weight_collections,
-        trainable=True,
-        name='embedding_column_layer')
-    return embedding_column_layer(None, scope=scope)  # pylint: disable=not-callable
-
-  column = _TPUEmbeddingColumn(
-      categorical_column=categorical_column,
-      dimension=dimension,
-      combiner=combiner,
-      layer_creator=_creator,
-      ckpt_to_load_from=None,
-      tensor_name_in_ckpt=None,
-      max_norm=None,
-      trainable=True)
-  # For Embedding column, the initializer is hidden inside the creator Fn, which
-  # is not accessiable later. So, we attach it to a speicial field. Also note
-  # that non-TPU Embedding column and non-TPU shared Embedding column handle the
-  # initializer differently. See shared_embedding_columns for details.
-  column._tpu_initializer = initializer
-  return column
-
-
-def shared_embedding_columns(categorical_columns,
-                             dimension,
-                             combiner='mean',
-                             initializer=None,
-                             shared_embedding_collection_name=None):
-  """List of dense columns that convert from sparse, categorical input."""
-  for categorical_column in categorical_columns:
-    if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS):
-      raise TypeError(
-          'categorical_column for tpu '
-          ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([
-              cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS
-          ]), type(categorical_column)))
-  columns = fc_lib.shared_embedding_columns(
-      categorical_columns,
-      dimension,
-      combiner=combiner,
-      initializer=initializer,
-      shared_embedding_collection_name=shared_embedding_collection_name,
-      ckpt_to_load_from=None,
-      tensor_name_in_ckpt=None,
-      max_norm=None,
-      trainable=True)
-
-  # Use the initializer and shared_embedding_collection_name to create TPU
-  # version
-  initializer = columns[0].initializer
-  shared_embedding_collection_name = columns[0].shared_embedding_collection_name
-  tpu_columns = []
-
-  # Create the state (_SharedEmbeddingColumnLayer) here.
-  for categorical_column in categorical_columns:
-    column = _TPUSharedEmbeddingColumn(
-        categorical_column=categorical_column,
-        dimension=dimension,
-        combiner=combiner,
-        initializer=initializer,
-        shared_embedding_collection_name=shared_embedding_collection_name,
-        ckpt_to_load_from=None,
-        tensor_name_in_ckpt=None,
-        max_norm=None,
-        trainable=True)
-    tpu_columns.append(column)
-
-  return tpu_columns
-
-
-class _TPUBaseEmbeddingColumn(object):
-  """Base class for TPU Embedding Column."""
-
-  def __init__(self, categorical_column):
-    self._tpu_categorical_column = categorical_column
-
-  def get_combiner(self):
-    """Returns the embedding combiner."""
-    raise NotImplementedError('not implemented')
-
-  def get_embedding_table_size(self):
-    """Returns the embedding table size, tuple of vocab size and dimension."""
-    raise NotImplementedError('not implemented')
-
-  def get_feature_key_name(self):
-    """Returns the feature key name in the features dict."""
-    raise NotImplementedError('not impl')
-
-  def get_weight_key_name(self):
-    """Return the key name for weights."""
-    raise NotImplementedError('not impl')
-
-  def get_embedding_var_name(self):
-    """Returns the embedding variable name.
-
-    Feature key name and embedding variable name are usually one-to-one mapping.
-    But for shared embedding columns, it is many-to-one mapping.
-    """
-    raise NotImplementedError('not impl')
-
-  def get_initializer(self):
-    """Returns the initializer."""
-    raise NotImplementedError('not impl')
-
-  def is_categorical_column_weighted(self):
-    """Check if the categorical column of the embedding column is weighted."""
-    raise NotImplementedError('not impl')
-
-
-class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
-  """Core Embedding Column."""
-
-  def __new__(cls,
-              categorical_column,
-              dimension,
-              combiner='mean',
-              layer_creator=None,
-              ckpt_to_load_from=None,
-              tensor_name_in_ckpt=None,
-              max_norm=None,
-              trainable=True):
-    # Note, args ckpt_to_load_from, tensor_name_in_ckpt, max_norm and trainable
-    # are not supported on TPU. They are solely for matching the signature of
-    # __new__ of parent class fc._EmbeddingColumn.
-    return fc._EmbeddingColumn.__new__(
-        cls,
-        categorical_column,
-        dimension,
-        combiner=combiner,
-        layer_creator=layer_creator,
-        ckpt_to_load_from=ckpt_to_load_from,
-        tensor_name_in_ckpt=tensor_name_in_ckpt,
-        max_norm=max_norm,
-        trainable=trainable)
-
-  def __init__(self,
-               categorical_column,
-               dimension,
-               combiner='mean',
-               layer_creator=None,
-               ckpt_to_load_from=None,
-               tensor_name_in_ckpt=None,
-               max_norm=None,
-               trainable=True):
-    _TPUBaseEmbeddingColumn.__init__(self, categorical_column)
-    self._key = None
-
-  def get_combiner(self):
-    return self.combiner
-
-  def get_embedding_table_size(self):
-    """Returns num_ids and width."""
-    return (self.categorical_column._num_buckets, self.dimension)
-
-  def get_feature_key_name(self):
-    """get_feature_key_name."""
-    if self.is_categorical_column_weighted():
-      return self.categorical_column.categorical_column.name
-    return self.categorical_column.name
-
-  def get_weight_key_name(self):
-    """get_weight_key_name."""
-    if self.is_categorical_column_weighted():
-      return self.categorical_column.weight_feature_key
-    return None
-
-  def get_embedding_var_name(self):
-    """get_embedding_var_name."""
-    return self.categorical_column.name
-
-  def get_initializer(self):
-    return self._tpu_initializer
-
-  def is_categorical_column_weighted(self):
-    """Check if the categorical column of the embedding column is weighted."""
-    if isinstance(
-        self.categorical_column,
-        (
-            fc._WeightedCategoricalColumn,  # pylint: disable=protected-access
-            fc_lib.WeightedCategoricalColumn)):
-      return True
-    return False
-
-  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
-    if tpu.under_tpu_inference_context():
-      def host_computation():
-        return fc._EmbeddingColumn._get_dense_tensor(
-            self, inputs, weight_collections, trainable)
-      return tpu.outside_compilation(host_computation)
-
-    if _is_running_on_cpu():
-      return fc._EmbeddingColumn._get_dense_tensor(
-          self, inputs, weight_collections, trainable)
-
-    # TPU mode
-    # Get the embeddings from the LazyBuilder.
-    tensor = inputs.get(self.get_feature_key_name())
-
-    # Add to collection for _create_tpu_embedding_variables_and_ops
-    _record_variable_scope_and_name(self.get_embedding_var_name(),
-                                    'embedding_weights')
-
-    return tensor
-
-
-class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
-                                fc._SharedEmbeddingColumn):
-  """Core Shared Embedding Column."""
-
-  def __new__(cls,
-              categorical_column,
-              dimension,
-              combiner='mean',
-              initializer=None,
-              shared_embedding_collection_name=None,
-              ckpt_to_load_from=None,
-              tensor_name_in_ckpt=None,
-              max_norm=None,
-              trainable=True):
-    return fc._SharedEmbeddingColumn.__new__(
-        cls,
-        categorical_column,
-        dimension,
-        combiner=combiner,
-        initializer=initializer,
-        shared_embedding_collection_name=shared_embedding_collection_name,
-        ckpt_to_load_from=ckpt_to_load_from,
-        tensor_name_in_ckpt=tensor_name_in_ckpt,
-        max_norm=max_norm,
-        trainable=trainable)
-
-  def __init__(self,
-               categorical_column,
-               dimension,
-               combiner='mean',
-               initializer=None,
-               shared_embedding_collection_name=None,
-               ckpt_to_load_from=None,
-               tensor_name_in_ckpt=None,
-               max_norm=None,
-               trainable=True):
-
-    _TPUBaseEmbeddingColumn.__init__(self, categorical_column)
-    self._key = None
-
-  def get_combiner(self):
-    return self.combiner
-
-  def get_embedding_table_size(self):
-    """Returns num_ids and width."""
-    return (self.categorical_column._num_buckets, self.dimension)
-
-  def get_feature_key_name(self):
-    """get_feature_key_name."""
-    if self.is_categorical_column_weighted():
-      return self.categorical_column.categorical_column.name
-    return self.categorical_column.name
-
-  def get_weight_key_name(self):
-    """get_weight_key_name."""
-    if self.is_categorical_column_weighted():
-      return self.categorical_column.weight_feature_key
-    return None
-
-  def get_embedding_var_name(self):
-    """get_embedding_var_name."""
-    return self.shared_embedding_collection_name
-
-  def get_initializer(self):
-    return self.initializer
-
-  def is_categorical_column_weighted(self):
-    """Check if the categorical column of the embedding column is weighted."""
-    if isinstance(
-        self.categorical_column,
-        (
-            fc._WeightedCategoricalColumn,  # pylint: disable=protected-access
-            fc_lib.WeightedCategoricalColumn)):
-      return True
-    return False
-
-  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
-    if tpu.under_tpu_inference_context():
-      def host_computation():
-        return fc._SharedEmbeddingColumn._get_dense_tensor(
-            self, inputs, weight_collections, trainable)
-      return tpu.outside_compilation(host_computation)
-
-    if _is_running_on_cpu():
-      return fc._SharedEmbeddingColumn._get_dense_tensor(
-          self, inputs, weight_collections, trainable)
-
-    # TPU mode
-    # Get the embeddings from the LazyBuilder.
-    tensor = inputs.get(self.get_feature_key_name())
-
-    # Add to collection for _create_tpu_embedding_variables_and_ops
-    _record_variable_scope_and_name(
-        self.get_embedding_var_name(),
-        'embedding_weights',
-        is_shared_embedding=True)
-    return tensor
-
-
-def _record_variable_scope_and_name(embedding_var_name,
-                                    embedding_var_name_in_fc,
-                                    is_shared_embedding=False):
-  """Add embedding variable name and scope to collection."""
-  g = ops.get_default_graph()
-  collection = g.get_collection_ref(_TPU_FC_TO_SCOPE)
-  if not collection:
-    collection.append({})
-
-  var_def_dict = collection[0]
-
-  captured_scope = None
-
-  if is_shared_embedding and (embedding_var_name in var_def_dict):
-    if var_def_dict[embedding_var_name][1] != embedding_var_name_in_fc:
-      raise ValueError(
-          'For embedding var name {}, the shared embedding name is different, '
-          'got {}; expected {}'.format(embedding_var_name,
-                                       embedding_var_name_in_fc,
-                                       var_def_dict[embedding_var_name][1]))
-  else:
-    # scope contains var_scope_name.
-    captured_scope = variable_scope.get_variable_scope()
-    var_def_dict[embedding_var_name] = (captured_scope.name,
-                                        embedding_var_name_in_fc)
-
-
-def _is_running_on_cpu():
-  """Returns True if the current context is CPU model."""
-  return tpu_function.get_tpu_context().number_of_shards is None
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.feature_column import *
+# used by tests
+from tensorflow.python.tpu.feature_column import _is_running_on_cpu
+from tensorflow.python.tpu.feature_column import _record_variable_scope_and_name
+from tensorflow.python.tpu.feature_column import _TPU_FC_TO_SCOPE
+from tensorflow.python.tpu.feature_column import _TPUBaseEmbeddingColumn
+from tensorflow.python.tpu.feature_column import _TPUEmbeddingColumn
+from tensorflow.python.tpu.feature_column import _TPUSharedEmbeddingColumn
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/feature_column_test.py b/tensorflow/contrib/tpu/python/tpu/feature_column_test.py
deleted file mode 100644
index 75164cc..0000000
--- a/tensorflow/contrib/tpu/python/tpu/feature_column_test.py
+++ /dev/null

@@ -1,286 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ===================================================================
-"""Tests for contrib.tpu.python.tpu.feature_column."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.tpu.python.tpu import feature_column as tpu_fc
-from tensorflow.python.client import session
-from tensorflow.python.feature_column import feature_column as fc
-from tensorflow.python.feature_column import feature_column_lib as fc_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import variables as variables_lib
-from tensorflow.python.platform import test
-
-
-def _initialized_session():
-  sess = session.Session()
-  sess.run(variables_lib.global_variables_initializer())
-  sess.run(lookup_ops.tables_initializer())
-  return sess
-
-
-class EmbeddingColumnTest(test.TestCase):
-
-  def test_defaults(self):
-    categorical_column = fc_lib.categorical_column_with_identity(
-        key='aaa', num_buckets=3)
-    embedding_dimension = 2
-    embedding_column = tpu_fc.embedding_column(
-        categorical_column, dimension=embedding_dimension)
-    self.assertIs(categorical_column, embedding_column.categorical_column)
-    self.assertEqual(embedding_dimension, embedding_column.dimension)
-    self.assertEqual('mean', embedding_column.combiner)
-    self.assertEqual('aaa_embedding', embedding_column.name)
-    self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
-    self.assertEqual((embedding_dimension,), embedding_column._variable_shape)
-    self.assertEqual({
-        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
-    }, embedding_column._parse_example_spec)
-
-  def test_all_constructor_args(self):
-    categorical_column = fc_lib.categorical_column_with_identity(
-        key='aaa', num_buckets=3)
-    embedding_dimension = 2
-    embedding_column = tpu_fc.embedding_column(
-        categorical_column,
-        dimension=embedding_dimension,
-        combiner='my_combiner',
-        initializer=lambda: 'my_initializer')
-    self.assertIs(categorical_column, embedding_column.categorical_column)
-    self.assertEqual(embedding_dimension, embedding_column.dimension)
-    self.assertEqual('my_combiner', embedding_column.combiner)
-    self.assertEqual('aaa_embedding', embedding_column.name)
-    self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
-    self.assertEqual((embedding_dimension,), embedding_column._variable_shape)
-    self.assertEqual({
-        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
-    }, embedding_column._parse_example_spec)
-
-  def test_get_dense_tensor(self):
-    # Inputs.
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(4, 5))
-
-    # Embedding variable.
-    embedding_dimension = 2
-    embedding_values = (
-        (1., 2.),  # id 0
-        (3., 5.),  # id 1
-        (7., 11.)  # id 2
-    )
-
-    def _initializer(shape, dtype, partition_info):
-      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
-      self.assertEqual(dtypes.float32, dtype)
-      self.assertIsNone(partition_info)
-      return embedding_values
-
-    # Expected lookup result, using combiner='mean'.
-    expected_lookups = (
-        # example 0, ids [2], embedding = [7, 11]
-        (7., 11.),
-        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
-        (2., 3.5),
-        # example 2, ids [], embedding = [0, 0]
-        (0., 0.),
-        # example 3, ids [1], embedding = [3, 5]
-        (3., 5.),
-    )
-
-    # Build columns.
-    categorical_column = fc_lib.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column = tpu_fc.embedding_column(
-        categorical_column,
-        dimension=embedding_dimension,
-        initializer=_initializer)
-
-    # Provide sparse input and get dense result.
-    embedding_lookup = embedding_column._get_dense_tensor(
-        fc._LazyBuilder({
-            'aaa': sparse_input
-        }))
-
-    # Assert expected embedding variable and lookups.
-    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertItemsEqual(('embedding_weights:0',),
-                          tuple([v.name for v in global_vars]))
-    with _initialized_session():
-      self.assertAllEqual(embedding_values, global_vars[0].eval())
-      self.assertAllEqual(expected_lookups, embedding_lookup.eval())
-
-
-class SharedEmbeddingColumnTest(test.TestCase):
-
-  def test_defaults(self):
-    categorical_column_a = fc_lib.categorical_column_with_identity(
-        key='aaa', num_buckets=3)
-    categorical_column_b = fc_lib.categorical_column_with_identity(
-        key='bbb', num_buckets=3)
-    embedding_dimension = 2
-    embedding_column_b, embedding_column_a = tpu_fc.shared_embedding_columns(
-        [categorical_column_b, categorical_column_a],
-        dimension=embedding_dimension)
-    self.assertIs(categorical_column_a, embedding_column_a.categorical_column)
-    self.assertIs(categorical_column_b, embedding_column_b.categorical_column)
-    self.assertEqual(embedding_dimension, embedding_column_a.dimension)
-    self.assertEqual(embedding_dimension, embedding_column_b.dimension)
-    self.assertEqual('mean', embedding_column_a.combiner)
-    self.assertEqual('mean', embedding_column_b.combiner)
-    self.assertIsNotNone(embedding_column_a.initializer)
-    self.assertIsNotNone(embedding_column_b.initializer)
-    self.assertEqual('aaa_bbb_shared_embedding',
-                     embedding_column_a.shared_embedding_collection_name)
-    self.assertEqual('aaa_bbb_shared_embedding',
-                     embedding_column_b.shared_embedding_collection_name)
-    self.assertEqual('aaa_shared_embedding', embedding_column_a.name)
-    self.assertEqual('bbb_shared_embedding', embedding_column_b.name)
-    self.assertEqual('aaa_bbb_shared_embedding',
-                     embedding_column_a._var_scope_name)
-    self.assertEqual('aaa_bbb_shared_embedding',
-                     embedding_column_b._var_scope_name)
-    self.assertEqual((embedding_dimension,), embedding_column_a._variable_shape)
-    self.assertEqual((embedding_dimension,), embedding_column_b._variable_shape)
-    self.assertEqual({
-        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
-    }, embedding_column_a._parse_example_spec)
-    self.assertEqual({
-        'bbb': parsing_ops.VarLenFeature(dtypes.int64)
-    }, embedding_column_b._parse_example_spec)
-
-  def test_all_constructor_args(self):
-    categorical_column_a = fc_lib.categorical_column_with_identity(
-        key='aaa', num_buckets=3)
-    categorical_column_b = fc_lib.categorical_column_with_identity(
-        key='bbb', num_buckets=3)
-    embedding_dimension = 2
-    embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns(
-        [categorical_column_a, categorical_column_b],
-        dimension=embedding_dimension,
-        combiner='my_combiner',
-        initializer=lambda: 'my_initializer',
-        shared_embedding_collection_name='var_scope_name')
-    self.assertIs(categorical_column_a, embedding_column_a.categorical_column)
-    self.assertIs(categorical_column_b, embedding_column_b.categorical_column)
-    self.assertEqual(embedding_dimension, embedding_column_a.dimension)
-    self.assertEqual(embedding_dimension, embedding_column_b.dimension)
-    self.assertEqual('my_combiner', embedding_column_a.combiner)
-    self.assertEqual('my_combiner', embedding_column_b.combiner)
-    self.assertEqual('my_initializer', embedding_column_a.initializer())
-    self.assertEqual('my_initializer', embedding_column_b.initializer())
-    self.assertEqual('var_scope_name',
-                     embedding_column_a.shared_embedding_collection_name)
-    self.assertEqual('var_scope_name',
-                     embedding_column_b.shared_embedding_collection_name)
-    self.assertEqual('aaa_shared_embedding', embedding_column_a.name)
-    self.assertEqual('bbb_shared_embedding', embedding_column_b.name)
-    self.assertEqual('var_scope_name', embedding_column_a._var_scope_name)
-    self.assertEqual('var_scope_name', embedding_column_b._var_scope_name)
-    self.assertEqual((embedding_dimension,), embedding_column_a._variable_shape)
-    self.assertEqual((embedding_dimension,), embedding_column_b._variable_shape)
-    self.assertEqual({
-        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
-    }, embedding_column_a._parse_example_spec)
-    self.assertEqual({
-        'bbb': parsing_ops.VarLenFeature(dtypes.int64)
-    }, embedding_column_b._parse_example_spec)
-
-  def test_get_dense_tensor(self):
-    # Inputs.
-    vocabulary_size = 3
-    # -1 values are ignored.
-    input_a = np.array([
-        [2, -1, -1],  # example 0, ids [2]
-        [0, 1, -1]
-    ])  # example 1, ids [0, 1]
-    input_b = np.array([
-        [0, -1, -1],  # example 0, ids [0]
-        [-1, -1, -1]
-    ])  # example 1, ids []
-    input_features = {'aaa': input_a, 'bbb': input_b}
-
-    # Embedding variable.
-    embedding_dimension = 2
-    embedding_values = (
-        (1., 2.),  # id 0
-        (3., 5.),  # id 1
-        (7., 11.)  # id 2
-    )
-
-    def _initializer(shape, dtype, partition_info):
-      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
-      self.assertEqual(dtypes.float32, dtype)
-      self.assertIsNone(partition_info)
-      return embedding_values
-
-    # Expected lookup result, using combiner='mean'.
-    expected_lookups_a = (
-        # example 0:
-        (7., 11.),  # ids [2], embedding = [7, 11]
-        # example 1:
-        (2., 3.5),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
-    )
-    expected_lookups_b = (
-        # example 0:
-        (1., 2.),  # ids [0], embedding = [1, 2]
-        # example 1:
-        (0., 0.),  # ids [], embedding = [0, 0]
-    )
-
-    # Build columns.
-    categorical_column_a = fc_lib.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    categorical_column_b = fc_lib.categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size)
-    embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns(
-        [categorical_column_a, categorical_column_b],
-        dimension=embedding_dimension,
-        initializer=_initializer)
-
-    # Provide sparse input and get dense result.
-    embedding_lookup_a = embedding_column_a._get_dense_tensor(
-        fc._LazyBuilder(input_features))
-    embedding_lookup_b = embedding_column_b._get_dense_tensor(
-        fc._LazyBuilder(input_features))
-
-    # Assert expected embedding variable and lookups.
-    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertItemsEqual(('embedding_weights:0',),
-                          tuple([v.name for v in global_vars]))
-    embedding_var = global_vars[0]
-    with _initialized_session():
-      self.assertAllEqual(embedding_values, embedding_var.eval())
-      self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
-      self.assertAllEqual(expected_lookups_b, embedding_lookup_b.eval())
-
-
-if __name__ == '__main__':
-  test.main()

diff --git a/tensorflow/contrib/tpu/python/tpu/functional.py b/tensorflow/contrib/tpu/python/tpu/functional.py
index 24c8515..9a57592 100644
--- a/tensorflow/contrib/tpu/python/tpu/functional.py
+++ b/tensorflow/contrib/tpu/python/tpu/functional.py

@@ -1,39 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# =============================================================================
-"""Functional operations."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import platform
-
-from tensorflow.contrib.tpu.python.tpu import gen_functional_ops
-
-
-TPUPartitionedCall = gen_functional_ops._tpu_partitioned_call  # pylint: disable=invalid-name,protected-access
-
-
-if platform.system() != "Windows":
-  # pylint: disable=wildcard-import,unused-import,g-import-not-at-top
-  from tensorflow.contrib.tpu.ops.gen_tpu_ordinal_selector_op import *
-
-  from tensorflow.contrib.util import loader
-  from tensorflow.python.platform import resource_loader
-  # pylint: enable=wildcard-import,unused-import,g-import-not-at-top
-
-  _tpu_partitioned_call_op = loader.load_op_library(
-      resource_loader.get_path_to_datafile("../ops/_functional_ops.so")
-  )
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.functional import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 37fe9af..6ad4e45 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py

@@ -55,8 +55,6 @@
 import six
 
 from tensorflow.contrib.cluster_resolver.python.training import tpu_cluster_resolver as tpu_cluster_resolver_lib
-from tensorflow.contrib.framework.python.framework import experimental
-from tensorflow.contrib.tpu.proto import compilation_result_pb2 as tpu_compilation_result
 from tensorflow.contrib.tpu.python.ops import tpu_ops
 from tensorflow.contrib.tpu.python.tpu import keras_tpu_variables
 from tensorflow.contrib.tpu.python.tpu import tpu
@@ -64,6 +62,7 @@
 from tensorflow.contrib.tpu.python.tpu import tpu_optimizer
 from tensorflow.contrib.tpu.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf.tpu import compilation_result_pb2 as tpu_compilation_result
 from tensorflow.python.client import session as tf_session
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
@@ -94,6 +93,7 @@
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.util.deprecation import deprecated
 
 
 # TODO(b/114775106): temporary shim to optionally initialize the TPU
@@ -2172,7 +2172,10 @@
 # pylint: enable=bad-continuation
 
 
-@experimental
+@deprecated(
+    '2019-02-20', 'Switch to tf.contrib.distribute.TPUStrategy. '
+    'https://www.tensorflow.org/api_docs/python/tf/contrib/distribute/DistributionStrategy'
+)
 def tpu_model(model, strategy=None):
   """Copy `model` along with weights to the TPU.
 

diff --git a/tensorflow/contrib/tpu/python/tpu/session_support.py b/tensorflow/contrib/tpu/python/tpu/session_support.py
index 5cb2ca6..ed8f952 100644
--- a/tensorflow/contrib/tpu/python/tpu/session_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/session_support.py

@@ -1,438 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the 'License');
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ======================================
-"""Operations for handling session logging and shutdown notifications."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import threading
-
-import time
-from google.protobuf import text_format
-
-from tensorflow.contrib.tpu.python.ops import tpu_ops
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.core.util import event_pb2
-from tensorflow.python.client import session as session_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.training import training_util
-
-_WATCHDOG = None
-
-
-class CoordinatorShutdownException(Exception):
-  """Raised when the coordinator needs to shutdown."""
-  pass
-
-
-def _clone_session(session, graph=None):
-  return session_lib.Session(
-      target=session.sess_str,
-      config=session._config,  # pylint: disable=protected-access
-      graph=graph if graph else session.graph)
-
-
-def _make_heartbeat_op(session, device, request_ph):
-  """Return a heartbeat op or None if heartbeats are not supported by device."""
-  try:
-    # Test if we can connect in a isolated graph + session
-    with ops.Graph().as_default():
-      with _clone_session(session) as temp_session:
-        with ops.device(device):
-          heartbeat_op = tpu_ops.worker_heartbeat('')
-          options = config_pb2.RunOptions(timeout_in_ms=5000)
-          temp_session.run(heartbeat_op, options=options)
-  except errors.InvalidArgumentError as _:
-    logging.warning('Error running heartbeat on %s', device)
-    return None
-  except errors.DeadlineExceededError as _:
-    logging.warning('Timeout connecting to %s when testing heartbeat', device)
-    return None
-
-  # If we successfully connected and pinged the worker, go ahead and construct
-  # the operation.
-  with ops.device(device):
-    return tpu_ops.worker_heartbeat(request_ph)
-
-
-class WorkerHeartbeatManager(object):
-  """Manages the status/heartbeat monitor for a set of workers."""
-
-  def __init__(self, session, devices, heartbeat_ops, request_placeholder):
-    """Construct a new WorkerHeartbeatManager.
-
-    (Prefer using `WorkerHeartbeatManager.from_devices` when possible.)
-
-    Args:
-      session: `tf.Session`, session to use for heartbeat operations.
-      devices: `list[string]` Set of devices to connect to.
-      heartbeat_ops: `list[tf.Operation]` Heartbeat operations.
-      request_placeholder: `tf.Placeholder[String]` Placeholder used to specify
-        the WorkerHeartbeatRequest protocol buffer.
-    """
-    self._session = session
-    self._devices = devices
-    self._ops = heartbeat_ops
-    self._request_placeholder = request_placeholder
-
-  @staticmethod
-  def from_devices(session, devices):
-    """Construct a heartbeat manager for the given devices."""
-    if not devices:
-      logging.error('Trying to create heartbeat manager with no devices?')
-
-    logging.info('Creating heartbeat manager for %s', devices)
-    request_placeholder = array_ops.placeholder(
-        name='worker_heartbeat_request', dtype=dtypes.string)
-
-    heartbeat_ops = []
-    kept_devices = []
-    for device in devices:
-      heartbeat_op = _make_heartbeat_op(session, device, request_placeholder)
-      if heartbeat_op is not None:
-        kept_devices.append(device)
-        heartbeat_ops.append(heartbeat_op)
-      else:
-        logging.warning('Heartbeat support not available for %s', device)
-
-    return WorkerHeartbeatManager(session, kept_devices, heartbeat_ops,
-                                  request_placeholder)
-
-  def num_workers(self):
-    return len(self._devices)
-
-  def configure(self, message):
-    """Configure heartbeat manager for all devices.
-
-    Args:
-      message: `event_pb2.WorkerHeartbeatRequest`
-    Returns: `None`
-    """
-    logging.info('Configuring worker heartbeat: %s',
-                 text_format.MessageToString(message))
-    self._session.run(self._ops,
-                      {self._request_placeholder: message.SerializeToString()})
-
-  def ping(self, request=None, timeout_in_ms=5000):
-    """Ping all workers, returning the parsed status results."""
-    if request is None:
-      request = event_pb2.WorkerHeartbeatRequest()
-
-    options = config_pb2.RunOptions(timeout_in_ms=timeout_in_ms)
-    results = self._session.run(
-        self._ops,
-        feed_dict={self._request_placeholder: request.SerializeToString()},
-        options=options)
-    parsed_results = [
-        event_pb2.WorkerHeartbeatResponse.FromString(res_pb)
-        for res_pb in results
-    ]
-    logging.debug('Ping results: %s', parsed_results)
-    return parsed_results
-
-  def lame_workers(self):
-    """Ping all workers, returning manager containing lame workers (or None)."""
-    ping_results = self.ping()
-    lame_workers = []
-
-    for ping_response, device, op in zip(ping_results, self._devices,
-                                         self._ops):
-      if ping_response.health_status != event_pb2.OK:
-        lame_workers.append((device, op))
-
-    if not lame_workers:
-      return None
-
-    bad_devices, bad_ops = zip(*lame_workers)
-    return WorkerHeartbeatManager(self._session, bad_devices, bad_ops,
-                                  self._request_placeholder)
-
-  def __repr__(self):
-    return 'HeartbeatManager(%s)' % ','.join(self._devices)
-
-  def shutdown(self, timeout_ms=10000):
-    """Shutdown all workers after `shutdown_timeout_secs`."""
-    logging.info('Shutting down %s.', self)
-    req = event_pb2.WorkerHeartbeatRequest(
-        watchdog_config=event_pb2.WatchdogConfig(timeout_ms=timeout_ms),
-        shutdown_mode=event_pb2.WAIT_FOR_COORDINATOR)
-    self.configure(req)
-
-    # Wait for workers to shutdown.  This isn't strictly required
-    # but it avoids triggering multiple checkpoints with the same lame worker.
-    logging.info('Waiting %dms for worker shutdown.', timeout_ms)
-    time.sleep(timeout_ms / 1000)
-
-
-def all_worker_devices(session):
-  """Return a list of devices for each worker in the system."""
-  devices = session.list_devices()
-  return [
-      device.name
-      for device in devices
-      if ':CPU:' in device.name and 'coordinator' not in device.name
-  ]
-
-
-class WatchdogManager(threading.Thread):
-  """Configures worker watchdog timer and handles periodic pings.
-
-  Usage:
-    # Ping workers every minute, shutting down workers if they haven't received
-    # a ping after 1 hour.
-    watchdog_manager = WatchdogManager(
-      ping_interval=60, shutdown_timeout=3600
-    )
-
-    # Use as a context manager, resetting watchdog on context exit:
-    with watchdog_manager:
-      session.run(...)
-
-    # Or setup globally; watchdog will remain active until program exit.
-    watchdog_manager.configure_and_run()
-  """
-
-  def __init__(self,
-               session,
-               devices=None,
-               ping_interval=60,
-               shutdown_timeout=3600):
-    """Initialize a watchdog manager.
-
-    Args:
-      session: Session connected to worker devices.  A cloned session and graph
-        will be created for managing worker pings.
-      devices: Set of devices to monitor.  If none, all workers will be
-        monitored.
-      ping_interval: Time, in seconds, between watchdog pings.
-      shutdown_timeout: Time, in seconds, before watchdog timeout.
-    """
-    threading.Thread.__init__(self)
-    self.ping_interval = ping_interval
-    self.shutdown_timeout = shutdown_timeout
-    self.daemon = True
-    self._config = session._config  # pylint: disable=protected-access
-    self._target = session.sess_str
-    self._running = False
-    self._devices = devices
-
-    self._graph = None
-    self._session = None
-    self._worker_manager = None
-
-  def _reset_manager(self):
-    """Reset the graph, session and worker manager."""
-    self._graph = ops.Graph()
-    self._session = session_lib.Session(
-        target=self._target,
-        graph=self._graph,
-        config=self._config,
-    )
-
-    if self._devices is None:
-      self._devices = all_worker_devices(self._session)
-
-    with self._graph.as_default():
-      self._worker_manager = WorkerHeartbeatManager.from_devices(
-          self._session, self._devices)
-
-    self._worker_manager.configure(
-        event_pb2.WorkerHeartbeatRequest(
-            watchdog_config=event_pb2.WatchdogConfig(
-                timeout_ms=self.shutdown_timeout * 1000,),
-            shutdown_mode=event_pb2.WAIT_FOR_COORDINATOR))
-
-  def configure_and_run(self):
-    logging.info(
-        'Enabling watchdog timer with %d second timeout '
-        'and %d second ping interval.', self.shutdown_timeout,
-        self.ping_interval)
-    self._reset_manager()
-    self._running = True
-    self.start()
-
-  def stop(self):
-    logging.info('Stopping worker watchdog.')
-    self._worker_manager.configure(
-        event_pb2.WorkerHeartbeatRequest(
-            watchdog_config=event_pb2.WatchdogConfig(timeout_ms=-1,),
-            shutdown_mode=event_pb2.NOT_CONFIGURED))
-    self._running = False
-    self.join()
-
-  def __enter__(self):
-    self.configure_and_run()
-
-  def __exit__(self, exc_type, exc_val, exc_tb):
-    self.stop()
-
-  def run(self):
-    # Don't fetch logs or adjust timing: just ping the watchdog.
-    #
-    # If we hit an exception, reset our session as it is likely broken.
-    while self._running:
-      try:
-        self._worker_manager.ping(request=None)
-        time.sleep(self.ping_interval)
-      except errors.OpError as e:
-        # Catch any TF errors that occur so we don't stop sending heartbeats
-        logging.debug('Caught error while sending heartbeat: %s', e)
-        self._reset_manager()
-
-
-def start_worker_watchdog(session,
-                          devices=None,
-                          ping_interval=60,
-                          shutdown_timeout=3600):
-  """Start global worker watchdog to shutdown workers on coordinator exit."""
-  global _WATCHDOG
-  if _WATCHDOG is None:
-    # Ensure we can send a few pings before we timeout!
-    ping_interval = min(shutdown_timeout / 10., ping_interval)
-    _WATCHDOG = WatchdogManager(session, devices, ping_interval,
-                                shutdown_timeout)
-    _WATCHDOG.configure_and_run()
-
-
-class GracefulShutdownHook(session_run_hook.SessionRunHook):
-  """Session hook that watches for shutdown events.
-
-  If a shutdown is indicated, `saver.save(checkpoint_prefix)` is executed, and a
-  SystemShutdown exception is raised to terminate the main session.  If `saver`
-  is None the `SAVERS` collection will be read to find a saver.
-
-  `on_shutdown_hooks` is an optional list of functions that should be called
-  after checkpointing.  The function is called with (`run_context`,
-  `all_workers`, `lame_workers`).
-
-  If `heartbeat_group` is not specified, it will default to all CPU workers
-  in the system.
-  """
-
-  def __init__(self, checkpoint_prefix, saver=None, on_shutdown_hooks=None):
-    self._saver = saver
-    self._checkpoint_prefix = checkpoint_prefix
-    self._on_shutdown_hooks = on_shutdown_hooks if on_shutdown_hooks else []
-
-    # Worker heartbeats are managed independently of the main training graph.
-    self._graph = ops.Graph()
-    self._workers = None
-    self._session = None
-    self._heartbeat_supported = False
-
-  def after_create_session(self, training_session, coord):  # pylint: disable=unused-argument
-    # N.B. We have to pull the global step here to avoid it being unavailable
-    # at checkpoint time; the graph has been frozen at that point.
-    if training_util.get_global_step() is None and self.saver() is not None:
-      raise ValueError(
-          'Saver defined but no global step.  Run `get_or_create_global_step()`'
-          ' in your model definition to allow checkpointing.')
-
-    with self._graph.as_default():
-      logging.info('Installing graceful shutdown hook.')
-      self._session = _clone_session(training_session, self._graph)
-      self._workers = WorkerHeartbeatManager.from_devices(
-          self._session, all_worker_devices(self._session))
-      self._heartbeat_supported = self._workers.num_workers() > 0
-      if self._heartbeat_supported:
-        self._workers.configure(
-            event_pb2.WorkerHeartbeatRequest(
-                shutdown_mode=event_pb2.WAIT_FOR_COORDINATOR))
-      else:
-        logging.warn(
-            'No workers support hearbeats. Failure handling will be disabled.')
-
-  def saver(self):
-    if self._saver:
-      return self._saver
-
-    savers = ops.get_collection(ops.GraphKeys.SAVERS)
-    if not savers:
-      return None
-
-    if not isinstance(savers, list):
-      return savers
-
-    if len(savers) > 1:
-      logging.error(
-          'Multiple savers in the SAVERS collection.  On-demand checkpointing '
-          'will be disabled. Pass an explicit `saver` to the constructor to '
-          'override this behavior.')
-      return None
-
-    return savers[0]
-
-  def after_run(self, run_context, run_values):
-    del run_values
-
-    if not self._heartbeat_supported:
-      return
-
-    lame_workers = self._workers.lame_workers()
-    if lame_workers:
-      logging.info('ShutdownHook: lame workers found: %s', lame_workers)
-
-      if self.saver():
-        logging.info('ShutdownHook: saving checkpoint to %s',
-                     self._checkpoint_prefix)
-        self.saver().save(
-            run_context.session,
-            self._checkpoint_prefix,
-            global_step=training_util.get_global_step(),
-            write_state=True,
-        )
-      else:
-        logging.info('ShutdownHook: no Saver defined.')
-
-      for fn in self._on_shutdown_hooks:
-        fn(run_context, self._workers, lame_workers)
-
-
-class RestartComputation(object):
-  """Restart the entire computation.
-
-  This hook shuts down all workers and returns control to the top-level by
-  throwing a CoordinatorShutdownException.
-  """
-
-  def __init__(self, timeout_ms=10000):
-    self.timeout_ms = timeout_ms
-
-  def __call__(self, run_context, all_workers, lame_workers):
-    del run_context, lame_workers
-    all_workers.shutdown(timeout_ms=self.timeout_ms)
-
-    logging.info('Terminating coordinator.')
-    raise CoordinatorShutdownException()
-
-
-class ShutdownLameWorkers(object):
-  """Shutdown lamed workers.
-
-  Processing will continue normally (typically by waiting for the down
-  workers to be restarted).
-  """
-
-  def __init__(self, timeout_ms=10000):
-    self.timeout_in_ms = timeout_ms
-
-  def __call__(self, run_context, all_workers, lame_workers):
-    lame_workers.shutdown(timeout_ms=self.timeout_in_ms)
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.session_support import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py b/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py
index 2c5ea65..73db253 100644
--- a/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py
+++ b/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py

@@ -1,1481 +1,23 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ========================================================================
-"""A utility to trace tensor values on TPU."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
-import os.path
-import re
-import sys
-
-from tensorflow.contrib.tpu.python.ops import tpu_ops
-from tensorflow.contrib.tpu.python.tpu import tpu
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import control_flow_util
-from tensorflow.python.ops import gen_math_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import linalg_ops
-from tensorflow.python.ops import logging_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging as logging
-
-_TRACER_LOG_PREFIX = ' [>>>TT>>>]'
-_DEVICE_TYPE_TPU = 'tpu'
-_DEVICE_TYPE_CPU = 'cpu'
-_TRACE_MODE_NAN_INF = 'nan-inf'
-_TRACE_MODE_PART_TENSOR = 'part-tensor'
-_TRACE_MODE_PART_TENSOR_SIZE = 3
-_TRACE_MODE_FULL_TENSOR = 'full-tensor'
-_TRACE_MODE_NORM = 'norm'
-_TRACE_MODE_MAX_ABS = 'max-abs'
-_SUBMODE_BRIEF = 'brief'
-_SUBMODE_DETAILED = 'detailed'
-_REASON_OUTSIDE_OP_RANGE = 'not-traced-outside-op-range'
-_REASON_UNSAFE_OP = 'not-traced-unsafe-op'
-_REASON_UNSAFE_SCALAR = 'not-traced-unsafe-scalar'
-_REASON_LESS_INTERESTING_OP = 'not-traced-less-interesting-op'
-_REASON_DEVICE_MISMATCH = 'not-traced-device-mismatch'
-_REASON_DYNAMIC_SHAPE = 'not-traced-dynamic-shape'
-_REASON_SCALAR_GET_TRACED = 'traced-scalar'
-_REASON_TENSOR_GET_TRACED = 'traced-tensor'
-_REASON_USER_INCLUDED = 'traced-user-included'
-_REASON_USER_EXCLUDED = 'not-traced-user-excluded'
-_REASON_NOT_EXECUTED = 'not-traced-not-in-exec-path'
-_REASON_NON_NUMERIC_TENSOR = 'not-traced-non-numeric-tensor'
-_MARKER_SECTION_BEGIN = '!!!!!!! section-begin:'
-_MARKER_SECTION_END = '!!!!!!! section-end:'
-_SECTION_NAME_CONFIG = 'configuration'
-_SECTION_NAME_REASON = 'reason'
-_SECTION_NAME_OP_LIST = 'op-list'
-_SECTION_NAME_TENSOR_LIST = 'tensor-list'
-_SECTION_NAME_CACHE_INDEX_MAP = 'cache-index-map'
-_SECTION_NAME_GRAPH = 'graph'
-_FIELD_NAME_VERSION = 'version:'
-_FIELD_NAME_DEVICE = 'device:'
-_FIELD_NAME_TRACE_MODE = 'trace-mode:'
-_FIELD_NAME_SUBMODE = 'submode:'
-_FIELD_NAME_NUM_REPLICAS = 'num-replicas:'
-_FIELD_NAME_NUM_REPLICAS_PER_HOST = 'num-replicas-per-host:'
-_FIELD_NAME_NUM_HOSTS = 'num-hosts:'
-_FIELD_NAME_NUM_OPS = 'number-of-ops:'
-_FIELD_NAME_NUM_TENSORS = 'number-of-tensors:'
-_FIELD_NAME_NUM_CACHE_INDICES = 'number-of-indices:'
-_FIELD_NAME_TOPOLOGICAL_SORT_SUCCEED = 'topological-sort-succeed:'
-_FLAGS_ENV_VAR = 'TENSOR_TRACER_FLAGS'
-_FLAG_SINGLE_QUOTE_PAT = re.compile(r"\s*--([^=]+)='([^']*)'")
-_FLAG_DOUBLE_QUOTE_PAT = re.compile(r'\s*--([^=]+)="([^"]*)"')
-_FLAG_NO_QUOTE_PAT = re.compile(r'\s*--([^=]+)=(\S*)')
-_FLAG_NO_EQUAL_PAT = re.compile(r'\s*--([^=]+)\s*')
-_FLAG_NAME_ENABLE = 'enable'
-_FLAG_NAME_TRACE_MODE = 'trace_mode'
-_FLAG_NAME_USE_COMPACT_TRACE = 'compact_trace'
-_FLAG_NAME_SUBMODE = 'submode'
-_FLAG_NAME_INCLUDE_LESS_INTERESTING_OPS = 'include_less_interesting_ops'
-_FLAG_NAME_EXCLUDED_OPNAMES = 'excluded_opnames'
-_FLAG_NAME_EXCLUDED_OPTYPES = 'excluded_optypes'
-_FLAG_NAME_INCLUDED_OPNAMES = 'included_opnames'
-_FLAG_NAME_INCLUDED_OPTYPES = 'included_optypes'
-_FLAG_NAME_TRACE_DIR = 'trace_dir'
-_FLAG_NAME_REPORT_FILE = 'report_file'
-_FLAG_NAME_USE_TEST_UNDECLARED_OUTPUTS_DIR = 'use_test_undeclared_outputs_dir'
-_FLAG_NAME_OP_RANGE = 'op_range'
-_OP_RANGE_PAT = re.compile(r'(\d+):(\d+)')
-_OUTPUT_STREAM_ESCAPE = 'file://'
-_TEST_UNDECLARED_OUTPUTS_DIR_ENV_VAR = 'TEST_UNDECLARED_OUTPUTS_DIR'
-_TENSOR_TRACER_COLLECTION = 'tensor_tracer_variables'
-_TENSOR_TRACER_CHECKPOINT = 'tensor_tracer_checkpoint'
-_TRACE_FILE_NAME = 'trace.all'
-_COMPACT_TRACE_FILE_PREFIX = 'compact_trace.'
-_COMPACT_TRACE_ENTRY_INIT_VALUE = -1.0
-_TENSOR_TRACER_STORAGE = 'tensor_tracer_storage'
-_TENSOR_VALUES_CACHE = 'tensor_values_cache'
-_REPLICA_ID_TAG = '#replica-id: '
-
-def tensor_tracepoint(tensor, checkpoint_name):
-  """Adds a checkpoint with the given checkpoint name for the given tensor.
-
-  The tensor will be added to the list of tensors that will be traced by the
-  tensor tracer.
-
-  Args:
-     tensor: the tensor object for which the tracing is requested.
-     checkpoint_name: a string name for the checkpoint. This name has to be a
-     unique name if used within model comparison. The tensors that have the same
-     checkpoint identifier is compared in model comparison.
-  Returns:
-    The provided tensor.
-  """
-
-  tensor.graph.get_collection(_TENSOR_TRACER_COLLECTION)
-  tensor.graph.add_to_collection(_TENSOR_TRACER_COLLECTION,
-                                 (tensor, checkpoint_name))
-  return tensor
-
-
-def keras_layer_tracepoint(layer, checkpoint_name):
-  """An interface for adding the tensor outputs of a keras layer.
-
-  Encapsulates tensor_tracepoint.
-
-  Args:
-     layer: A keras layer.
-     checkpoint_name: a string name for the checkpoint. This name has to be a
-     unique name if used within model comparison. The tensors that have the same
-     checkpoint identifier is compared in model comparison.
-
-  Returns:
-    The provided layer.
-  """
-  try:
-    outputs = layer.output
-    if tensor_util.is_tensor(outputs):
-      tensor_tracepoint(outputs, '%s' % (checkpoint_name))
-    else:
-      idx = 0
-      for output_tensor in outputs:
-        if tensor_util.is_tensor(outputs):
-          tensor_tracepoint(output_tensor, '%s_%d' % (checkpoint_name, idx))
-        idx += 1
-  except AttributeError:
-    pass
-  except RuntimeError:
-    pass
-  return layer
-
-
-def _trace_files_need_precreated(output_dir):
-  """Return True if trace files must be pre-created by users."""
-
-  if not output_dir.startswith('/'):
-    return False
-  if len(output_dir) < 5:
-    return False
-  if output_dir[2] != 'n':
-    return False
-  if output_dir[3] != 's':
-    return False
-  if output_dir[1] != 'c':
-    return False
-  if output_dir[4] != '/':
-    return False
-  return True
-
-
-def _get_tensor_values_cache(graph=None):
-  """Returns the variable that implements tensor-value caching."""
-
-  graph = graph or ops.get_default_graph()
-  collection = graph.get_collection(_TENSOR_TRACER_STORAGE)
-  if len(collection) == 1:
-    return collection[0]
-  elif not collection:
-    raise RuntimeError('%s has not been created'%_TENSOR_VALUES_CACHE)
-  else:
-    raise RuntimeError('Multiple %s created'%_TENSOR_VALUES_CACHE)
-  return None
-
-
-def _create_tensor_values_cache(graph, num_tensors):
-  """Creates a variable as the cache to store intermediate tensor values."""
-
-  graph = graph or ops.get_default_graph()
-  # Create in proper graph and base name_scope.
-  with graph.as_default() as g, g.name_scope(None):
-    return variable_scope.get_variable(
-        _TENSOR_VALUES_CACHE,
-        shape=[num_tensors],
-        dtype=dtypes.float32,
-        initializer=init_ops.constant_initializer(
-            _COMPACT_TRACE_ENTRY_INIT_VALUE),
-        trainable=False,
-        use_resource=True,
-        collections=[_TENSOR_TRACER_STORAGE, ops.GraphKeys.GLOBAL_VARIABLES])
-
-
-def _set_fetches(result_tensor, train_op):
-  """Sets the fetches from the result tensor and training op."""
-
-  fetches = []
-  if result_tensor is not None:
-    fetches.append(result_tensor)
-  if train_op is not None:
-    fetches.append(train_op)
-  if not fetches:
-    return None
-  return fetches
-
-
-class TensorTracer(object):
-  """A software construct for tracing tensor values in a TF graph on TPU.
-
-  This utility is disabled by default. It can be enabled by setting
-  the TENSOR_TRACER_FLAGS env variable as:
-    export TENSOR_TRACER_FLAGS="--enable=1"
-  If it is enabled, it will trace the output tensor values of
-  selected Ops in the graph. It has two outputs: (1) the traces and (2)
-  a report. The traces are dumped to a specified local file on the TPU
-  host. The report is printed to the log.info of the TPU job.
-  By passing options via the env variable, users can change:
-     (1) the trace mode (e.g., detecting NaN/Inf, printing partial or
-         full tensor values)
-     (2) which Ops to be traced (via op.name or op.type)
-     (3) output trace file path.
-  """
-
-  @staticmethod
-  def _match_next_flag(flags, pos):
-    """Returns the match for the next TensorTracer flag.
-
-    Args:
-       flags: a string that contains the flags.
-       pos: where in flags to start the search.
-
-    Returns:
-       A pair where the first element is the regular-expression
-       match found and the second element indicates if the match
-       has a value.
-    """
-
-    match = _FLAG_DOUBLE_QUOTE_PAT.match(flags, pos)
-    if match:
-      return match, True
-    match = _FLAG_SINGLE_QUOTE_PAT.match(flags, pos)
-    if match:
-      return match, True
-    match = _FLAG_NO_QUOTE_PAT.match(flags, pos)
-    if match:
-      return match, True
-    match = _FLAG_NO_EQUAL_PAT.match(flags, pos)
-    if match:
-      # The flag is found but is not given a value.
-      return match, False
-    # The flag is not found.
-    return None, False
-
-  @staticmethod
-  def validate_flag_names():
-    """Validates if the TensorTrace flags passed are valid."""
-    valid_flag_names = [_FLAG_NAME_ENABLE, _FLAG_NAME_TRACE_MODE,
-                        _FLAG_NAME_USE_COMPACT_TRACE,
-                        _FLAG_NAME_SUBMODE,
-                        _FLAG_NAME_EXCLUDED_OPNAMES,
-                        _FLAG_NAME_EXCLUDED_OPTYPES,
-                        _FLAG_NAME_INCLUDED_OPNAMES,
-                        _FLAG_NAME_INCLUDED_OPTYPES,
-                        _FLAG_NAME_TRACE_DIR,
-                        _FLAG_NAME_REPORT_FILE,
-                        _FLAG_NAME_USE_TEST_UNDECLARED_OUTPUTS_DIR,
-                        _FLAG_NAME_INCLUDE_LESS_INTERESTING_OPS,
-                        _FLAG_NAME_OP_RANGE]
-    tensor_tracer_flags = os.environ.get(_FLAGS_ENV_VAR)
-    if not tensor_tracer_flags:
-      return
-    pos = 0
-    while True:
-      match, _ = TensorTracer._match_next_flag(tensor_tracer_flags, pos)
-      if not match:
-        break
-      flag_name = match.group(1)
-      if flag_name not in valid_flag_names:
-        raise ValueError(
-            'The flag name "%s" passed via the environment variable "%s" '
-            'is invalid. Valid flag names are:'
-            '\n%s'%(flag_name, _FLAGS_ENV_VAR, valid_flag_names))
-      pos = match.end()
-
-  @staticmethod
-  def print_flag_values():
-    """Prints all TensorTracer flags passed via environment variables."""
-
-    tensor_tracer_flags = os.environ.get(_FLAGS_ENV_VAR)
-    if not tensor_tracer_flags:
-      return 'Env variable "%s" is not set'%_FLAGS_ENV_VAR
-    result = 'Env variable "%s" is set to "%s"\n'%(_FLAGS_ENV_VAR,
-                                                   tensor_tracer_flags)
-    result += 'Individual flag value:\n'
-    pos = 0
-    while True:
-      match, has_value = TensorTracer._match_next_flag(
-          tensor_tracer_flags, pos)
-      if not match:
-        break
-      flag_name = match.group(1)
-      if has_value:
-        flag_value = match.group(2)
-      else:
-        flag_value = None
-      result += '  %s: %s\n'%(flag_name, flag_value)
-      pos = match.end()
-    result += '\n'
-    return result
-
-  @staticmethod
-  def get_flag_value(wanted_flag_name):
-    """Returns the value of a TensorTracer flags.
-
-    Args:
-      wanted_flag_name: the name the the flag we are looking for.
-
-    Returns:
-      A pair where the first element indicates if the flag is
-      found and the second element is the value of the flag.
-
-    Raises:
-      RuntimeError: If supposedly deadcode is reached.
-    """
-
-    tensor_tracer_flags = os.getenv(_FLAGS_ENV_VAR)
-    if not tensor_tracer_flags:
-      return False, None
-    pos = 0
-    while True:
-      match, has_value = TensorTracer._match_next_flag(
-          tensor_tracer_flags, pos)
-      if not match:
-        return False, None
-      flag_name = match.group(1)
-      if has_value:
-        flag_value = match.group(2)
-      else:
-        flag_value = None
-      if flag_name == wanted_flag_name:
-        return True, flag_value
-      pos = match.end()
-    raise RuntimeError('Should not reach here.')
-
-  @staticmethod
-  def flag_value_to_re_list(flag_name):
-    """Converts list of strings to compiled RE."""
-
-    re_list = []
-    found, flag_value = TensorTracer.get_flag_value(flag_name)
-    if not found or not flag_value:
-      return re_list
-    list_of_values = flag_value.split()
-    for v in list_of_values:
-      r = re.compile(v)
-      re_list.append(r)
-    return re_list
-
-  @staticmethod
-  def _is_flag_on(flag_name):
-    """Returns True if the given flag is on."""
-
-    found, flag_value = TensorTracer.get_flag_value(flag_name)
-    if not found:
-      return False
-    if flag_value is None:
-      return True
-    # Depends on the flag value.
-    flag_value = flag_value.lower()
-    enabled = flag_value in ['1', 't', 'true', 'y', 'yes']
-    return enabled
-
-  @staticmethod
-  def is_enabled():
-    """Returns True if TensorTracer is enabled."""
-
-    return TensorTracer._is_flag_on(_FLAG_NAME_ENABLE)
-
-  @staticmethod
-  def use_test_undeclared_outputs_dir():
-    """Decides the output directory of the report and trace files.
-
-    Args:
-       None.
-
-    Returns:
-       True if the output files should be written to the
-       test-undeclared-outputs-directory defined via an
-       env variable.
-    """
-
-    return TensorTracer._is_flag_on(
-        _FLAG_NAME_USE_TEST_UNDECLARED_OUTPUTS_DIR)
-
-  @staticmethod
-  def use_compact_trace():
-    return TensorTracer._is_flag_on(
-        _FLAG_NAME_USE_COMPACT_TRACE)
-
-  @staticmethod
-  def check_device_type(device_type):
-    """Checks if the given device type is valid."""
-
-    if device_type not in [_DEVICE_TYPE_TPU, _DEVICE_TYPE_CPU]:
-      raise ValueError('Invalid device_type "%s"'%device_type)
-
-  @staticmethod
-  def check_trace_mode(trace_mode):
-    """Checks if the given trace mode is valid."""
-
-    valid_trace_modes = [_TRACE_MODE_NAN_INF, _TRACE_MODE_PART_TENSOR,
-                         _TRACE_MODE_FULL_TENSOR, _TRACE_MODE_NORM,
-                         _TRACE_MODE_MAX_ABS]
-    if trace_mode not in valid_trace_modes:
-      raise ValueError('Invalid trace mode "%s" given to the Tensor_Tracer.'
-                       'Valid trace modes are: %s'%(trace_mode,
-                                                    valid_trace_modes))
-
-  @staticmethod
-  def check_submode(submode):
-    """Checks if the given submode is valid."""
-
-    if not submode:
-      return
-    valid_submodes = [_SUBMODE_DETAILED, _SUBMODE_BRIEF]
-    if submode not in valid_submodes:
-      raise ValueError('Invalid submode "%s" given to the Tensor_Tracer.'
-                       'Valid submodes are: %s'%(submode,
-                                                 valid_submodes))
-
-  @staticmethod
-  def unsafe_op(op):
-    """Returns True if this op is not safe to be traced."""
-
-    if control_flow_util.IsInCond(op):
-      return True
-    # Reasons for not including following op types:
-    #    Assign: cause incorrect result with CPU tracing.
-    if op.type in ['Assign']:
-      return True
-    return False
-
-  @staticmethod
-  def device_mismatch(device_type, op):
-    if device_type == _DEVICE_TYPE_TPU:
-      # pylint: disable=protected-access
-      return tpu._TPU_REPLICATE_ATTR not in op.node_def.attr
-      # pylint: enable=protected-access
-    return False
-
-  @staticmethod
-  def unsafe_scalar_trace(op):
-    """Return true if scalar output tensor from Op is not safe to be traced."""
-
-    # Tracing the following causes cycle in the graph on TPU.
-    if op.type in ['LoopCond', 'Enter', 'Merge', 'Const',
-                   'Switch', 'Less', 'ReadVariableOp']:
-      return True
-    # Tracing the following will cause casting-issue
-    # with the norm tracing mode or other compilation issues on CPU.
-    if op.type in ['VarHandleOp', 'IteratorToStringHandle',
-                   'IteratorGetNext', 'OneShotIterator',
-                   'IteratorV2', 'MakeIterator',
-                   'BatchDatasetV2', 'MapDataset',
-                   'FixedLengthRecordDataset', 'TakeDataset', 'ZipDataset',
-                   'Placeholder', 'PlaceholderWithDefault', 'StridedSlice']:
-      return True
-    return False
-
-  @staticmethod
-  def less_interesting_op(op):
-    """Returns True if the given Op is not an interesting one to be traced."""
-
-    found, _ = TensorTracer.get_flag_value(
-        _FLAG_NAME_INCLUDE_LESS_INTERESTING_OPS)
-    if found:
-      # users force to include all ops.
-      return False
-    # Following ops are highly unlikey to cause bugs.
-    return op.type in ['Const', 'Identity', 'Cast', 'Shape']
-
-  @staticmethod
-  def reason(op_idx, details):
-    """Returns reason why the Op at op_idx is traced or not."""
-
-    return '%d %s'%(op_idx, details)
-
-  @staticmethod
-  def topological_sort(g):
-    """Performs topological sort on the given graph.
-
-    Args:
-       g: the graph.
-
-    Returns:
-       A pair where the first element indicates if the topological
-       sort succeeded (True if there is no cycle found; False if a
-       cycle is found) and the second element is either the sorted
-       list of nodes or the cycle of nodes found.
-    """
-
-    def visit(op, cycle, permanently_marked_ops,
-              temporarily_marked_ops, sorted_ops):
-      """Recursively visits all Ops in a graph.
-
-      Args:
-         op: the current Op being visited.
-         cycle: a cycle of Ops found.
-         permanently_marked_ops: the set of Ops that were already visited.
-         temporarily_marked_ops: the set of Ops that we have visited during
-                                 the current descent.
-         sorted_ops: the list of Ops sorted in topological order.
-      """
-
-      if cycle:
-        return
-      if op in permanently_marked_ops:
-        return
-      if op in temporarily_marked_ops:
-        cycle = temporarily_marked_ops
-        return
-      temporarily_marked_ops.add(op)
-      for i in range(len(op.outputs)):
-        out_tensor = op.outputs[i]
-        for consumer_op in out_tensor.consumers():
-          visit(consumer_op, cycle, permanently_marked_ops,
-                temporarily_marked_ops, sorted_ops)
-      # pylint: disable=protected-access
-      for ctrl_output_op in op._control_outputs:
-        # pylint: enable=protected-access
-        visit(ctrl_output_op, cycle, permanently_marked_ops,
-              temporarily_marked_ops, sorted_ops)
-      temporarily_marked_ops.remove(op)
-      permanently_marked_ops.add(op)
-      sorted_ops.insert(0, op)
-
-    graph_cycle = set([])
-    sorted_ops = []
-    permanently_marked_ops = set([])
-    temporarily_marked_ops = set([])
-    unsorted_ops = g.get_operations()
-    for op in unsorted_ops:
-      visit(op, graph_cycle, permanently_marked_ops,
-            temporarily_marked_ops, sorted_ops)
-    if graph_cycle:
-      return (False, graph_cycle)
-    else:
-      assert len(unsorted_ops) == len(sorted_ops)
-      return (True, sorted_ops)
-
-  @staticmethod
-  def _make_op_and_tensor_maps(op_list):
-    """Creates various maps and lists from op_list.
-
-    Args:
-       op_list: a list of Ops
-
-    Returns:
-       opname_idx_map: a map from Op's name to its index in op_list.
-       tensor_list: a list of output tensors of the Ops in op_list.
-       tensorname_idx_map: a map from output tensor name to its index
-                           in tensor_list.
-    """
-
-    opname_idx_map = {}
-    tensor_list = []
-    tensorname_idx_map = {}
-    for op_id, op in enumerate(op_list):
-      if op.name in opname_idx_map:
-        raise ValueError('Duplicated Op name: %s'%op.name)
-      opname_idx_map[op.name] = op_id
-      for output_tensor in op.outputs:
-        if output_tensor.name not in tensorname_idx_map:
-          tensor_list.append(output_tensor)
-          tensorname_idx_map[output_tensor.name] = len(tensor_list)-1
-    return (opname_idx_map, tensor_list, tensorname_idx_map)
-
-  def __init__(self):
-    """Initializes a TensorTracer.
-
-    Sets the various member fields from the flags (if given) or the defaults.
-    """
-    self._version = 'use-outside-compilation'
-    self._device_type = None
-    TensorTracer.validate_flag_names()
-    found, self._trace_mode = TensorTracer.get_flag_value(_FLAG_NAME_TRACE_MODE)
-    if not found or not self._trace_mode:
-      self._trace_mode = _TRACE_MODE_NAN_INF
-    TensorTracer.check_trace_mode(self._trace_mode)
-    found, self._submode = TensorTracer.get_flag_value(_FLAG_NAME_SUBMODE)
-    if not found or not self._submode:
-      self._submode = _SUBMODE_DETAILED
-    TensorTracer.check_submode(self._submode)
-    self._part_tensor_size = _TRACE_MODE_PART_TENSOR_SIZE
-    self._instrument_records = {}
-    self._set_trace_dir()
-    self._set_report_file()
-    self._set_op_range()
-    self._set_excluded_opnames()
-    self._set_excluded_optypes()
-    self._set_included_opnames()
-    self._set_included_optypes()
-    self._num_replicas = None
-    self._num_replicas_per_host = None
-    self._num_hosts = None
-    self._replica_id = None
-
-  def _add_replica_id_to_graph(self, result_tensor):
-    """Adds nodes for computing the replica ID to the graph."""
-
-    if not self._num_replicas:
-      self._replica_id = 'unknown'
-      return result_tensor
-
-    with ops.control_dependencies(None):
-      # Uses None as dependency to run outside of TPU graph rewrites.
-      self._replica_id = tpu_ops.tpu_replicated_input(
-          list(range(self._num_replicas)),
-          name='tt_replica_id')
-    use_replica_id = array_ops.identity(self._replica_id).op
-    with ops.control_dependencies([use_replica_id]):
-      # Adds a control dependency from the result_tensor to
-      # the replica_id to ensure that replica_id will be added to the graph.
-      return array_ops.identity(result_tensor)
-
-  def _set_trace_dir(self):
-    found, self._trace_dir = TensorTracer.get_flag_value(_FLAG_NAME_TRACE_DIR)
-    if found and self._trace_dir \
-       and TensorTracer.use_test_undeclared_outputs_dir():
-      raise ValueError('Cannot not use --%s and --%s at the same time'
-                       %(_FLAG_NAME_TRACE_DIR,
-                         _FLAG_NAME_USE_TEST_UNDECLARED_OUTPUTS_DIR))
-    if TensorTracer.use_test_undeclared_outputs_dir():
-      self._trace_dir = os.environ.get(_TEST_UNDECLARED_OUTPUTS_DIR_ENV_VAR)
-
-  def _set_report_file(self):
-    """Sets the path of the output report file."""
-
-    found, self._report_file_path = TensorTracer.get_flag_value(
-        _FLAG_NAME_REPORT_FILE)
-    if found and self._report_file_path \
-       and TensorTracer.use_test_undeclared_outputs_dir():
-      if os.path.isabs(self._report_file_path):
-        raise ValueError('If use_test_undeclared_outputs_dir is set,'
-                         'report_file_path cannot be an absolute path (%s)'
-                         %self._report_file_path)
-      outputs_dir = os.environ.get(_TEST_UNDECLARED_OUTPUTS_DIR_ENV_VAR)
-      self._report_file_path = os.path.join(outputs_dir,
-                                            self._report_file_path)
-    if not self._report_file_path:
-      self._report_file = None
-      return
-    try:
-      self._report_file = gfile.Open(self._report_file_path, 'w')
-    except IOError as e:
-      raise e
-
-  def _close_report_file(self):
-    if self._report_file:
-      self._report_file.close()
-
-  def _set_op_range(self):
-    """Sets the index range of the Ops that we will consider tracing."""
-
-    found, op_range = TensorTracer.get_flag_value(_FLAG_NAME_OP_RANGE)
-    if not found or not op_range:
-      self._op_range = (-1, -1)  # this means including all ops.
-      return
-    match = _OP_RANGE_PAT.match(op_range)
-    if not match:
-      self._op_range = (-1, -1)  # this means including all ops.
-      return
-    self._op_range = (int(match.group(1)), int(match.group(2)))
-
-  def _inside_op_range(self, idx):
-    """Return True if the given index is inside the selected range."""
-
-    if idx < self._op_range[0]:
-      return False
-    return self._op_range[1] < 0 or idx <= self._op_range[1]
-
-  def _set_excluded_opnames(self):
-    self._excluded_opname_re_list = TensorTracer.flag_value_to_re_list(
-        _FLAG_NAME_EXCLUDED_OPNAMES)
-
-  def _set_excluded_optypes(self):
-    self._excluded_optype_re_list = TensorTracer.flag_value_to_re_list(
-        _FLAG_NAME_EXCLUDED_OPTYPES)
-
-  def _set_included_opnames(self):
-    self._included_opname_re_list = TensorTracer.flag_value_to_re_list(
-        _FLAG_NAME_INCLUDED_OPNAMES)
-
-  def _set_included_optypes(self):
-    self._included_optype_re_list = TensorTracer.flag_value_to_re_list(
-        _FLAG_NAME_INCLUDED_OPTYPES)
-
-  def _is_user_included_op(self, op):
-    for opname_re in self._included_opname_re_list:
-      if opname_re.match(op.name):
-        return True
-    for optype_re in self._included_optype_re_list:
-      if optype_re.match(op.type):
-        return True
-    return False
-
-  def _is_user_excluded_op(self, op):
-    for opname_re in self._excluded_opname_re_list:
-      if opname_re.match(op.name):
-        return True
-    for optype_re in self._excluded_optype_re_list:
-      if optype_re.match(op.type):
-        return True
-    return False
-
-  def _use_tensor_values_cache(self):
-    """Returns True if immediate tensors should be first saved to a cache."""
-
-    if self._trace_mode not in set([_TRACE_MODE_NAN_INF,
-                                    _TRACE_MODE_NORM, _TRACE_MODE_MAX_ABS]):
-      return False
-    if self._trace_dir and _trace_files_need_precreated(self._trace_dir):
-      return True
-    if TensorTracer.use_compact_trace():
-      return True
-    return False
-
-  def _save_tensor_value_to_cache_op(self, graph, cache_idx, updates):
-    """Returns an Op that will save the given updates to an entry in the cache."""
-
-    cache = _get_tensor_values_cache(graph)
-    indices = constant_op.constant([cache_idx])
-    return state_ops.scatter_update(cache, indices, updates).op
-
-  def _write_report(self, content):
-    """Writes the given content to the report."""
-
-    line = '%s %s'%(_TRACER_LOG_PREFIX, content)
-    if self._report_file:
-      self._report_file.write(line)
-    else:
-      logging.info(line)
-
-  def _write_config_section(self):
-    """Writes the config section of the report."""
-
-    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN, _SECTION_NAME_CONFIG))
-    self._write_report('%s %s\n'%(_FIELD_NAME_VERSION, self._version))
-    self._write_report('%s %s\n'%(_FIELD_NAME_DEVICE, self._device_type))
-    self._write_report('%s %s\n'%(_FIELD_NAME_TRACE_MODE, self._trace_mode))
-    self._write_report('%s %s\n'%(_FIELD_NAME_SUBMODE, self._submode))
-    self._write_report('%s %s\n'%(_FIELD_NAME_NUM_REPLICAS, self._num_replicas))
-    self._write_report('%s %s\n'%(_FIELD_NAME_NUM_REPLICAS_PER_HOST,
-                                  self._num_replicas_per_host))
-    self._write_report('%s %s\n'%(_FIELD_NAME_NUM_HOSTS, self._num_hosts))
-    self._write_report('%s %s\n'%(_MARKER_SECTION_END, _SECTION_NAME_CONFIG))
-
-  def _write_reason_section(self):
-    """Writes the reason section of the report."""
-
-    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN, _SECTION_NAME_REASON))
-    for key in sorted(self._instrument_records):
-      self._write_report('"%s" %s\n'%(key, self._instrument_records[key]))
-    self._write_report('%s %s\n'%(_MARKER_SECTION_END, _SECTION_NAME_REASON))
-
-  def _write_op_list_section(self, op_list):
-    """Writes the Op-list section of the report."""
-
-    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN, _SECTION_NAME_OP_LIST))
-    self._write_report('%s %d\n'%(_FIELD_NAME_NUM_OPS, len(op_list)))
-    for i in range(0, len(op_list)):
-      op = op_list[i]
-      line = '%d "%s" %s'%(i, op.name, op.type)
-      for out_tensor in op.outputs:
-        if out_tensor.name not in self._tensorname_idx_map:
-          raise ValueError(
-              'out_tensor %s is not in tensorname_idx_map'%out_tensor.name)
-        line += ' %d'%self._tensorname_idx_map[out_tensor.name]
-      line += '\n'
-      self._write_report(line)
-    self._write_report('%s %s\n'%(_MARKER_SECTION_END, _SECTION_NAME_OP_LIST))
-
-  def _write_tensor_list_section(self, tensor_list, opname_idx_map):
-    """Writes the tensor-list section of the report."""
-
-    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN,
-                                  _SECTION_NAME_TENSOR_LIST))
-    self._write_report('%s %d\n'%(_FIELD_NAME_NUM_TENSORS, len(tensor_list)))
-    for i in range(0, len(tensor_list)):
-      tensor = tensor_list[i]
-      line = '%d "%s"'%(i, tensor.name)
-      for consumer_op in tensor.consumers():
-        if consumer_op.name not in opname_idx_map:
-          raise ValueError(
-              'consumer_op %s is not in opname_idx_map'%consumer_op.name)
-        line += ' %d'%opname_idx_map[consumer_op.name]
-      line += '\n'
-      self._write_report(line)
-    self._write_report('%s %s\n'%(_MARKER_SECTION_END,
-                                  _SECTION_NAME_TENSOR_LIST))
-
-  def _write_cache_index_map_section(self):
-    """Writes the mapping from cache index to tensor index to the report."""
-
-    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN,
-                                  _SECTION_NAME_CACHE_INDEX_MAP))
-    self._write_report('%s %d\n'%(_FIELD_NAME_NUM_CACHE_INDICES,
-                                  len(self._cache_idx_to_tensor_idx)))
-    for cache_idx in range(0, len(self._cache_idx_to_tensor_idx)):
-      tensor_idx = self._cache_idx_to_tensor_idx[cache_idx]
-      line = '%d %d\n'%(cache_idx, tensor_idx)
-      self._write_report(line)
-    self._write_report('%s %s\n'%(_MARKER_SECTION_END,
-                                  _SECTION_NAME_CACHE_INDEX_MAP))
-
-  def _write_graph_section(self, succeed, sorted_or_cycle):
-    """Writes the graph section of the report."""
-
-    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN, _SECTION_NAME_GRAPH))
-    self._write_report('%s %s\n'%(_FIELD_NAME_TOPOLOGICAL_SORT_SUCCEED,
-                                  succeed))
-    l = list(sorted_or_cycle)
-    for i in range(0, len(l)):
-      self._write_report('%d "%s"\n'%(i, l[i].name))
-    self._write_report('%s %s\n'%(_MARKER_SECTION_END, _SECTION_NAME_GRAPH))
-
-  def _preprocess_traced_tensor(self, tensor):
-    """Computes NAN/Norm/Max on TPUs before sending to CPU.
-
-    Args:
-      tensor: The tensor to be traced.
-    Returns:
-      A tensor that should be input to the trace_function.
-    Raises:
-      RuntimeError: If the trace mode is invalid.
-    """
-
-    def _detect_nan_inf(tensor):
-      """Trace function for detecting any NaN/Inf in the tensor."""
-
-      if tensor.dtype.is_floating:
-        mask = math_ops.reduce_any(
-            gen_math_ops.logical_or(
-                gen_math_ops.is_nan(tensor), gen_math_ops.is_inf(tensor)))
-        output_tensor = control_flow_ops.cond(mask,
-                                              lambda: constant_op.constant(1.0),
-                                              lambda: constant_op.constant(0.0))
-      else:
-        output_tensor = constant_op.constant(0.0)
-      # The shape has to be 1. Set it if it does not have the information.
-      output_tensor = array_ops.reshape(output_tensor, [1])
-      return output_tensor
-
-    def _show_norm(tensor):
-      tensor = math_ops.cast(tensor, dtypes.float32)
-      output_tensor = linalg_ops.norm(tensor)
-      # The shape has to be 1. Set it if it does not have the information.
-      output_tensor = array_ops.reshape(output_tensor, [1])
-      return output_tensor
-
-    def _show_max_abs(tensor):
-      tensor = math_ops.cast(tensor, dtypes.float32)
-      output_tensor = math_ops.reduce_max(math_ops.abs(tensor))
-      zero = constant_op.constant(0, dtypes.float32)
-      output_tensor = gen_math_ops.maximum(zero, output_tensor)
-      # The shape has to be 1. Set it if it does not have the information.
-      output_tensor = array_ops.reshape(output_tensor, [1])
-      return output_tensor
-
-    if self._trace_mode == _TRACE_MODE_NAN_INF:
-      return _detect_nan_inf(tensor)
-    if self._trace_mode == _TRACE_MODE_PART_TENSOR:
-      return tensor
-    if self._trace_mode == _TRACE_MODE_FULL_TENSOR:
-      return tensor
-    if self._trace_mode == _TRACE_MODE_NORM:
-      return _show_norm(tensor)
-    if self._trace_mode == _TRACE_MODE_MAX_ABS:
-      return _show_max_abs(tensor)
-    raise RuntimeError(
-        'Tensor trace fun for %s is not yet implemented' % self._trace_mode)
-
-  def _make_tensor_trace_fun(self, tensor_name):
-    """Makes the tensor tracing function called by outside compilation.
-
-    Args:
-      tensor_name: name of the tensor being traced.
-
-    Returns:
-      A function to be passed as the first argument to outside compilation.
-
-    Raises:
-      RuntimeError: If the trace mode is invalid.
-    """
-
-    def _print_tensor(tensor_name, num_elements, tensor, output_tensor):
-      """Prints a tensor value to a file.
-
-      Args:
-        tensor_name: name of the tensor being traced.
-        num_elements: number of elements to print (-1 means print all).
-        tensor: the tensor needs to be returned.
-        output_tensor: the tensor needs to be printed.
-
-      Returns:
-        The same tensor passed via the "tensor" argument.
-
-      Raises:
-        ValueError: If tensor_name is not already in
-                    self._tensorname_idx_map.
-      """
-
-      if self._submode == _SUBMODE_BRIEF:
-        if tensor_name not in self._tensorname_idx_map:
-          raise ValueError(
-              'Tensor name %s is not in the tensorname_idx_map'%tensor_name)
-        msg = '%d'%self._tensorname_idx_map[tensor_name]
-      else:
-        msg = '"%s"'%tensor_name
-
-      if self._trace_dir:
-        output_path = os.path.join(self._trace_dir, _TRACE_FILE_NAME)
-        output_stream = _OUTPUT_STREAM_ESCAPE + output_path
-      else:
-        output_stream = sys.stderr
-      print_op = logging_ops.print_v2(msg, array_ops.shape(output_tensor),
-                                      '@', self._replica_id,
-                                      '\n', output_tensor, '\n',
-                                      summarize=num_elements,
-                                      output_stream=output_stream)
-      with ops.control_dependencies([print_op]):
-        return array_ops.identity(tensor).op
-
-
-    def _show_part_tensor(tensor):
-      """Trace function for printing part of the tensor."""
-
-      return _print_tensor(tensor_name, self._part_tensor_size,
-                           tensor, tensor)
-
-    def _show_full_tensor(tensor):
-      """Trace function for printing the entire tensor."""
-
-      return _print_tensor(tensor_name, -1, tensor, tensor)
-
-    if self._trace_mode == _TRACE_MODE_PART_TENSOR:
-      return _show_part_tensor
-    # The input tensor has a shape of "[1]" for _TRACE_MODE_NAN_INF,
-    # _TRACE_MODE_NORM, and _TRACE_MODE_MAX_ABS, as related computations are
-    # performed within TPUs and only their results are transferred to CPU.
-    # Simply, print the full tensor for these trace modes.
-    if self._trace_mode in [
-        _TRACE_MODE_NAN_INF, _TRACE_MODE_NORM, _TRACE_MODE_FULL_TENSOR,
-        _TRACE_MODE_MAX_ABS
-    ]:
-      return _show_full_tensor
-
-    raise RuntimeError('Tensor trace fun for %s is not yet implemented'
-                       %self._trace_mode)
-
-  def _skip_op(self, op_id, op, user_included, user_excluded,
-               in_exec_path=True):
-    """Returns True if we should not trace Op."""
-
-    if user_included:
-      self._instrument_records[op.name] = TensorTracer.reason(
-          op_id, _REASON_USER_INCLUDED)
-      return False
-    if user_excluded:
-      self._instrument_records[op.name] = TensorTracer.reason(
-          op_id, _REASON_USER_EXCLUDED)
-      return True
-    if not in_exec_path:
-      self._instrument_records[op.name] = TensorTracer.reason(
-          op_id, _REASON_NOT_EXECUTED)
-      return True
-    if not self._inside_op_range(op_id):
-      self._instrument_records[op.name] = TensorTracer.reason(
-          op_id, _REASON_OUTSIDE_OP_RANGE)
-      return True
-    if TensorTracer.unsafe_op(op):
-      self._instrument_records[op.name] = TensorTracer.reason(
-          op_id, _REASON_UNSAFE_OP)
-      return True
-    if TensorTracer.device_mismatch(self._device_type, op):
-      self._instrument_records[op.name] = TensorTracer.reason(
-          op_id, _REASON_DEVICE_MISMATCH)
-      return True
-    if TensorTracer.less_interesting_op(op):
-      self._instrument_records[op.name] = TensorTracer.reason(
-          op_id, _REASON_LESS_INTERESTING_OP)
-      return True
-    return False
-
-  def _skip_tensor(self, op_id, out_tensor, user_included,
-                   user_excluded):
-    """Returns True if we should not trace out_tensor."""
-
-    # Skips a tensor if the tensor has a non-numeric type.
-    #   Note: we cannot use check_ops.is_numeric_tensor(out_tensor)
-    #         because it also excludes tensors with dtypes, bool, and
-    #         float32_ref, which we actually want to trace.
-    non_numeric_tensor_types = set([dtypes.variant, dtypes.resource,
-                                    dtypes.string])
-    if out_tensor.dtype in non_numeric_tensor_types:
-      self._instrument_records[out_tensor.name] = TensorTracer.reason(
-          op_id, _REASON_NON_NUMERIC_TENSOR)
-      return True
-
-    if user_included:
-      self._instrument_records[out_tensor.name] = TensorTracer.reason(
-          op_id, _REASON_USER_INCLUDED)
-      return False
-    if user_excluded:
-      self._instrument_records[out_tensor.name] = TensorTracer.reason(
-          op_id, _REASON_USER_EXCLUDED)
-      return True
-    if not out_tensor.get_shape().is_fully_defined():
-      # If trace mode is nan-inf, norm or max, then the tensor will be reduced
-      # to a scalar before the outside compilation call.
-      if self._trace_mode in [
-          _TRACE_MODE_NAN_INF, _TRACE_MODE_NORM, _TRACE_MODE_MAX_ABS
-      ]:
-        self._instrument_records[out_tensor.name] = TensorTracer.reason(
-            op_id, _REASON_TENSOR_GET_TRACED)
-        return False
-      else:
-        self._instrument_records[out_tensor.name] = TensorTracer.reason(
-            op_id, _REASON_DYNAMIC_SHAPE)
-        return True
-    rank = len(out_tensor.shape)
-    if rank < 1:
-      # scalar
-      if TensorTracer.unsafe_scalar_trace(out_tensor.op):
-        self._instrument_records[out_tensor.name] = TensorTracer.reason(
-            op_id, _REASON_UNSAFE_SCALAR)
-        return True
-      else:
-        self._instrument_records[out_tensor.name] = TensorTracer.reason(
-            op_id, _REASON_SCALAR_GET_TRACED)
-        return False
-    else:
-      # tensor
-      self._instrument_records[out_tensor.name] = TensorTracer.reason(
-          op_id, _REASON_TENSOR_GET_TRACED)
-      return False
-
-  def _filter_execution_path_operations(self, operations, fetches):
-    """Returns the set of ops in the execution path to compute given fetches."""
-
-    # If no fetch provided, then return all operations.
-    if fetches is None:
-      return set(operations)
-    # Convert to list, if a single element is provided.
-    if not isinstance(fetches, (list, tuple)):
-      fetches = [fetches]
-    # If a tensor is given as fetch, convert it to op.
-    op_fetches = []
-    for fetch in fetches:
-      if isinstance(fetch, ops.Operation):
-        op_fetches.append(fetch)
-      elif isinstance(fetch, ops.Tensor):
-        op_fetches.append(fetch.op)
-      else:
-        raise RuntimeError('Given fetch:%s is neither a tensor nor an op.'
-                           %fetch)
-
-    execution_path_operations = set(op_fetches)
-    traverse_stack = list(op_fetches)
-    while True:
-      if not traverse_stack:
-        break
-      head_op = traverse_stack.pop()
-      input_ops = [tensor_input.op for tensor_input in head_op.inputs]
-      input_ops.extend(head_op.control_inputs)
-
-      for input_op in input_ops:
-        if input_op not in execution_path_operations:
-          execution_path_operations.add(input_op)
-          traverse_stack.append(input_op)
-    return execution_path_operations
-
-  def _determine_traced_tensors(self, graph, fetches):
-    """Determines the tensors that will be traced."""
-
-    self._traced_tensorname_to_cache_idx_map = {}
-    self._cache_idx_to_tensor_idx = []
-    operations = graph.get_operations()
-    # Filter out the operations that won't be executed.
-    # if fetches=None, then ops_in_exec_path = set(operations)
-    ops_in_exec_path = self._filter_execution_path_operations(operations,
-                                                              fetches)
-    checkpoint_operations = self._get_checkpoints(graph)
-    for op_id, op in enumerate(operations):
-      if checkpoint_operations and op.name not in checkpoint_operations:
-        continue
-      user_included = self._is_user_included_op(op)
-      user_excluded = self._is_user_excluded_op(op)
-      in_exec_path = op in ops_in_exec_path
-      if self._skip_op(op_id, op, user_included, user_excluded, in_exec_path):
-        continue
-      for i in range(len(op.outputs)):
-        out_tensor = op.outputs[i]
-        if self._skip_tensor(op_id, out_tensor, user_included,
-                             user_excluded):
-          continue
-        tensor_name = out_tensor.name
-        if tensor_name in self._traced_tensorname_to_cache_idx_map:
-          raise ValueError(
-              'Tensor name %s should not be already in '
-              'traced_tensorname_to_cache_idx_map'%tensor_name)
-        if tensor_name not in self._tensorname_idx_map:
-          raise ValueError(
-              'Tensor name %s is not in the tensorname_idx_map'%tensor_name)
-        tensor_idx = self._tensorname_idx_map[tensor_name]
-        cache_idx = len(self._traced_tensorname_to_cache_idx_map)
-        self._traced_tensorname_to_cache_idx_map[tensor_name] = cache_idx
-        self._cache_idx_to_tensor_idx.append(tensor_idx)
-        if len(self._traced_tensorname_to_cache_idx_map) != len(
-            self._cache_idx_to_tensor_idx):
-          raise RuntimeError('len(self._traced_tensorname_to_cache_idx_map) != '
-                             'len(self._cache_idx_to_tensor_idx')
-
-  def _check_trace_files(self):
-    """Checks if any requirements for trace files are satisfied."""
-
-    if not self._trace_dir:
-      # traces will be written to stderr. No need to check trace files.
-      return
-    if _trace_files_need_precreated(self._trace_dir):
-      for replica_id in range(0, self._num_replicas):
-        trace_file_path = os.path.join(
-            self._trace_dir,
-            _COMPACT_TRACE_FILE_PREFIX) + '%d'%replica_id
-        if not gfile.Exists(trace_file_path):
-          raise RuntimeError(
-              '%s must be pre-created with the '
-              'appropriate properties.'%trace_file_path)
-    else:
-      if not gfile.Exists(self._trace_dir):
-        gfile.MkDir(self._trace_dir)
-        if not gfile.Exists(self._trace_dir):
-          raise RuntimeError('Failed to create %s'%self._trace_dir)
-
-  def _pre_tracing(self, graph, fetches):
-    """Work needs to be done prior to TPU or CPU tracing."""
-
-    self._check_trace_files()
-    operations = graph.get_operations()
-    (opname_idx_map, tensor_list, self._tensorname_idx_map) = (
-        TensorTracer._make_op_and_tensor_maps(operations))
-    self._write_config_section()
-    self._write_op_list_section(operations)
-    self._write_tensor_list_section(tensor_list, opname_idx_map)
-    self._determine_traced_tensors(graph, fetches)
-    self._write_cache_index_map_section()
-    # Does the topological sort before adding any nodes to the graph.
-    (succeed, sorted_or_cycle) = TensorTracer.topological_sort(graph)
-    if self._use_tensor_values_cache():
-      _create_tensor_values_cache(graph,
-                                  len(self._cache_idx_to_tensor_idx))
-    return (operations, succeed, sorted_or_cycle)
-
-  def _post_tracing(self, succeed, sorted_or_cycle):
-    """Work needs to be done after TPU or CPU tracing."""
-
-    self._write_reason_section()
-    self._write_graph_section(succeed, sorted_or_cycle)
-    self._close_report_file()
-
-  def _get_checkpoints(self, graph):
-    """Returns the list of Ops that produce the tensors traced with API.
-
-    Args:
-      graph: the graph of Ops.
-
-    Returns:
-      A set of operation names which should be traced.
-    """
-
-    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN,
-                                  _TENSOR_TRACER_CHECKPOINT))
-    checkpoint_operations = set()
-    tensor_tracer_variables = graph.get_collection(_TENSOR_TRACER_COLLECTION)
-    for (tensor, checkpoint_name) in tensor_tracer_variables:
-      self._write_report('%s %s\n'%(tensor.name, checkpoint_name))
-      checkpoint_operations.add(tensor.op.name)
-    self._write_report('%s %s\n'%(_MARKER_SECTION_END,
-                                  _TENSOR_TRACER_CHECKPOINT))
-    return checkpoint_operations
-
-  def _generate_flush_cache_op(self, graph, start_replica, on_tpu):
-    """Generates an Op that will flush the cache to file.
-
-    Args:
-      graph: the graph of Ops
-      start_replica: the ID of the first replica being flushed by this Op.
-      on_tpu: if the graph is executed on TPU.
-
-    Returns:
-      The Op to flush the cache to file.
-    """
-    def _make_flush_fun(replica_id):
-      """Makes a function for flushing the cache for the given replica."""
-
-      def _fun():
-        """A function that flushes the cache to a file."""
-
-        def _flush_fun(cache):
-          """Flushes the cache to a file."""
-
-          if isinstance(replica_id, str):
-            replica_id_str = replica_id
-          else:
-            replica_id_str = '%d'%replica_id
-          output_path = os.path.join(self._trace_dir,
-                                     _COMPACT_TRACE_FILE_PREFIX) \
-                                     + replica_id_str
-          output_stream = _OUTPUT_STREAM_ESCAPE + output_path
-          new_step_line = _REPLICA_ID_TAG + replica_id_str
-          print_op = logging_ops.print_v2(
-              new_step_line, '\n',
-              cache, '\n',
-              summarize=-1,
-              output_stream=output_stream)
-          with ops.control_dependencies([print_op]):
-            return constant_op.constant(0).op
-
-        cache = _get_tensor_values_cache(graph)
-        if on_tpu:
-          flush_op = tpu.outside_compilation(_flush_fun, cache.value())
-        else:
-          flush_op = _flush_fun(cache.value())
-        with ops.control_dependencies([flush_op]):
-          reset_value = constant_op.constant(_COMPACT_TRACE_ENTRY_INIT_VALUE,
-                                             dtype=cache.dtype,
-                                             shape=cache.shape)
-          assign_op = state_ops.assign(cache, reset_value).op
-          with ops.control_dependencies([assign_op]):
-            return flush_op.outputs[0]
-
-      return _fun
-
-    def _f(replica_id):
-      return _make_flush_fun(replica_id)
-    def _eq(x):
-      return math_ops.equal(x, self._replica_id)
-    def _do_nothing():
-      return constant_op.constant(0)
-
-    return control_flow_ops.case({\
-                                  _eq(start_replica): _f(start_replica), \
-                                  _eq(start_replica+1): _f(start_replica+1), \
-                                  _eq(start_replica+2): _f(start_replica+2), \
-                                  _eq(start_replica+3): _f(start_replica+3), \
-                                  _eq(start_replica+4): _f(start_replica+4), \
-                                  _eq(start_replica+5): _f(start_replica+5), \
-                                  _eq(start_replica+6): _f(start_replica+6), \
-                                  _eq(start_replica+7): _f(start_replica+7), \
-    },
-                                 default=_do_nothing,
-                                 exclusive=True).op
-
-  def _flush_tensor_values_cache(self, graph, result_tensor, train_op, on_tpu):
-    """Flushes the intermediate tensor values in the graph to the cache.
-
-    Args:
-      graph: the graph of Ops
-      result_tensor: a result tensor of evaluating the graph.
-      train_op: the training op.
-      on_tpu: if the graph is executed on TPU.
-
-    Returns:
-      An identical copy of result tensor.
-    """
-
-    train_op_list = []
-    if train_op is not None:
-      train_op_list.append(train_op)
-    with ops.control_dependencies(train_op_list):
-      flush_cache_op_list = []
-      for host in range(self._num_hosts):
-        start_replica = host * 8
-        flush_op = self._generate_flush_cache_op(graph, start_replica, on_tpu)
-        flush_cache_op_list.append(flush_op)
-      with ops.control_dependencies(flush_cache_op_list):
-        return array_ops.identity(result_tensor)
-
-  def trace_tpu(self, graph,
-                result_tensor,
-                train_op,
-                num_replicas=None,
-                num_replicas_per_host=None,
-                num_hosts=None):
-    """Traces the tensors generated by TPU Ops in a TF graph.
-
-    Args:
-      graph: the graph of Ops executed on the TPU.
-      result_tensor: a result tensor of evaluating the graph.
-      train_op: the training op.
-      num_replicas: number of replicas used on the TPU.
-      num_replicas_per_host: number of replicas per TPU host.
-      num_hosts: total number of TPU hosts.
-
-    Returns:
-      A tuple (result_tensor_copy, tracing_ops), where:
-        result_tensor_copy: an exact copy of result_tensor
-        tracing_ops: a list of tracing ops. If this list
-                     is non empty, the caller of this function
-                     should pose control dependencies upon these
-                     Ops so that they will be executed when the
-                     graph is evaluated.
-
-    Raises:
-      RuntimeError: If num_replicas_per_host > 8.
-    """
-
-    def _cast_unsupported_dtypes(tensor):
-      """Casts tensor to a supported type."""
-
-      if tensor.dtype.__eq__(dtypes.int64):
-        # outside-compilation doesn't support int64 input yet.
-        return math_ops.cast(tensor, dtypes.int32)
-      if tensor.dtype.__eq__(dtypes.bfloat16) or tensor.dtype.__eq__(
-          dtypes.float16):
-        # Since host can't handle bf16, convert tensor to f32.
-        return math_ops.cast(tensor, dtypes.float32)
-      return tensor
-
-    self._device_type = _DEVICE_TYPE_TPU
-    self._num_replicas = num_replicas
-    self._num_replicas_per_host = num_replicas_per_host
-    self._num_hosts = num_hosts
-    if self._num_replicas_per_host > 8:
-      # Checks for the assumption in _generate_flush_cache_op().
-      raise RuntimeError(
-          'num_replicas_per_host (%d) is '
-          'greater than 8'%self._num_replicas_per_host)
-
-    TensorTracer.check_device_type(self._device_type)
-    result_tensor_copy = self._add_replica_id_to_graph(result_tensor)
-    fetches = _set_fetches(result_tensor, train_op)
-    (operations, succeed, sorted_or_cycle) = self._pre_tracing(graph, fetches)
-
-    tracing_ops = []
-    for op in operations:
-      for i in range(len(op.outputs)):
-        out_tensor = op.outputs[i]
-        tensor_name = out_tensor.name
-        if tensor_name not in self._traced_tensorname_to_cache_idx_map:
-          continue
-        # Create the list of consumers before calling _preprocess_traced_tensor.
-        # Otherwise, adding control input below, will introduce a cycle in the
-        # graph.
-        consumers = out_tensor.consumers()
-        if not consumers:
-          continue
-        processed_out_tensor = self._preprocess_traced_tensor(out_tensor)
-        processed_out_tensor = _cast_unsupported_dtypes(processed_out_tensor)
-        if self._use_tensor_values_cache():
-          cache_idx = self._traced_tensorname_to_cache_idx_map[tensor_name]
-          trace_op = self._save_tensor_value_to_cache_op(graph,
-                                                         cache_idx,
-                                                         processed_out_tensor)
-        else:
-          trace_op = tpu.outside_compilation(
-              self._make_tensor_trace_fun(tensor_name), processed_out_tensor)
-        for consumer_op in consumers:
-          # pylint: disable=protected-access
-          consumer_op._add_control_input(trace_op)
-          # pylint: enable=protected-access
-    if self._use_tensor_values_cache():
-      result_tensor_final = self._flush_tensor_values_cache(graph,
-                                                            result_tensor_copy,
-                                                            train_op,
-                                                            on_tpu=True)
-    else:
-      result_tensor_final = result_tensor_copy
-    self._post_tracing(succeed, sorted_or_cycle)
-    return (result_tensor_final, tracing_ops)
-
-  def _generate_cpu_result(self, result_tensor, train_op, graph):
-    """Generates the final CPU result."""
-
-    if self._use_tensor_values_cache():
-      result_tensor_final = self._flush_tensor_values_cache(graph,
-                                                            result_tensor,
-                                                            train_op,
-                                                            on_tpu=False)
-    else:
-      result_tensor_final = array_ops.identity(result_tensor)
-    return result_tensor_final
-
-  def trace_cpu(self, graph, result_tensor, train_op):
-    """Traces the tensors generated by CPU Ops in a TF graph.
-
-    Args:
-      graph: the graph of Ops executed on the CPU.
-      result_tensor: a result tensor of evaluating the graph.
-      train_op: the training op.
-
-    Returns:
-      A pair (final_result_tensor, tracing_calls) where:
-         final_result_tensor: an identical copy of result_tensor.
-         tracing_calls: a map from keys to trace calls.
-                     A key is constructed from an Op's name.
-                     A trace call consists of a function and a tensor (
-                     the function will be invoked with the tensor).
-    """
-
-    if result_tensor is None:
-      raise ValueError(
-          'The result_tensor passed to trace_cpu should not be None')
-
-    self._device_type = _DEVICE_TYPE_CPU
-    TensorTracer.check_device_type(self._device_type)
-    self._num_replicas = 1
-    self._num_replicas_per_host = 1
-    self._num_hosts = 1
-    self._replica_id = 0
-    fetches = _set_fetches(result_tensor, train_op)
-    (operations, succeed, sorted_or_cycle) = self._pre_tracing(graph, fetches)
-
-    tracing_calls = {}
-    for op in operations:
-      for i in range(len(op.outputs)):
-        out_tensor = op.outputs[i]
-        tensor_name = out_tensor.name
-        if tensor_name not in self._traced_tensorname_to_cache_idx_map:
-          continue
-        # Create the list of consumers before calling _preprocess_traced_tensor.
-        # Otherwise, adding control input below, will introduce a cycle in the
-        # graph.
-        consumers = out_tensor.consumers()
-        if not consumers:
-          continue
-        processed_out_tensor = self._preprocess_traced_tensor(out_tensor)
-        if self._use_tensor_values_cache():
-          cache_idx = self._traced_tensorname_to_cache_idx_map[tensor_name]
-          trace_op = self._save_tensor_value_to_cache_op(graph,
-                                                         cache_idx,
-                                                         processed_out_tensor)
-          for consumer_op in consumers:
-            # pylint: disable=protected-access
-            consumer_op._add_control_input(trace_op)
-            # pylint: enable=protected-access
-        else:
-          trace_fun = self._make_tensor_trace_fun(tensor_name)
-          trace_call = (trace_fun, [processed_out_tensor])
-          trace_call_key = 'tensor_tracing_cpu-%s:%d'%(op.name, i)
-          tracing_calls[trace_call_key] = trace_call
-
-    self._post_tracing(succeed, sorted_or_cycle)
-    final_result_tensor = self._generate_cpu_result(result_tensor,
-                                                    train_op,
-                                                    graph)
-    return (final_result_tensor, tracing_calls)
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.tensor_tracer import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/topology.py b/tensorflow/contrib/tpu/python/tpu/topology.py
index 6ae718c..5bf8057 100644
--- a/tensorflow/contrib/tpu/python/tpu/topology.py
+++ b/tensorflow/contrib/tpu/python/tpu/topology.py

@@ -1,220 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ======================================
-"""Defines the `Topology` class, that describes a TPU fabric topology."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
-from tensorflow.contrib.tpu.proto import topology_pb2
-
-
-def _tpu_device_name(job, task, device):
-  """Returns the device name for the TPU `device` on `task` of `job`."""
-  if job is None:
-    return "/task:%d/device:TPU:%d" % (task, device)
-  else:
-    return "/job:%s/task:%d/device:TPU:%d" % (job, task, device)
-
-
-def _tpu_host_device_name(job, task):
-  """Returns the device name for the CPU device on `task` of `job`."""
-  if job is None:
-    return "/task:%d/device:CPU:0" % task
-  else:
-    return "/job:%s/task:%d/device:CPU:0" % (job, task)
-
-
-class Topology(object):
-  """Describes a set of TPU devices.
-
-  Represents both the shape of the physical mesh, and the mapping between
-  TensorFlow TPU devices to physical mesh coordinates.
-  """
-
-  def __init__(self, serialized=None, mesh_shape=None, device_coordinates=None):
-    """Builds a Topology object.
-
-    If `serialized` is not `None`, the topology is parsed from `serialized` and
-    the other arguments are ignored. Otherwise, the topology is computed from
-    `mesh_shape` and `device_coordinates`.
-
-    Args:
-      serialized: A serialized `TopologyProto`, or `None`. If not `None`, the
-        serialized proto is parsed to discover the topology.
-      mesh_shape: A sequence of 3 positive integers, or `None`. If not `None`,
-        the shape of the TPU topology, in number of cores. Ignored if
-        `serialized` is not `None`.
-      device_coordinates: A rank 3 numpy array that describes the mapping from
-        TensorFlow TPU devices to TPU fabric coordinates, or `None`. Ignored
-        if `serialized is not `None`.
-
-    Raises:
-      ValueError: If `serialized` does not describe a well-formed topology.
-      ValueError: If `serialized` is `None` and `mesh_shape` is not a sequence
-        of 3 positive integers.
-      ValueError: If `serialized` is `None` and `device_coordinates` is not a
-        rank 3 numpy int32 array that describes a valid coordinate mapping.
-    """
-
-    self._serialized = serialized
-
-    if serialized:
-      self._parse_topology(serialized)
-    else:
-      self._mesh_shape = np.asarray(mesh_shape, dtype=np.int32)
-      self._device_coordinates = np.asarray(device_coordinates, np.int32)
-      if len(self._mesh_shape) != 3 or any(self._mesh_shape < 1):
-        raise ValueError("`mesh_shape` must be a sequence of 3 positive "
-                         "entries; got {}".format(self._mesh_shape))
-
-      if (len(self._device_coordinates.shape) != 3 or
-          self._device_coordinates.shape[2] != len(self._mesh_shape)):
-        raise ValueError("`device_coordinates` must be a rank 3 int32 array "
-                         "with minor dimension equal to the mesh shape rank")
-
-    self._topology_tasks, self._topology_devices = self._invert_topology()
-
-  def _parse_topology(self, serialized):
-    """Parses a serialized `TopologyProto` into `self`."""
-    proto = topology_pb2.TopologyProto()
-    proto.ParseFromString(serialized)
-
-    self._mesh_shape = np.array(proto.mesh_shape, dtype=np.int32)
-    if len(self._mesh_shape) != 3 or any(self._mesh_shape < 1):
-      raise ValueError("`mesh_shape` must be a vector of size 3 with positive "
-                       "entries; got {}".format(self._mesh_shape))
-
-    if proto.num_tasks < 0:
-      raise ValueError("`num_tasks` must be >= 0; got {}".format(
-          proto.num_tasks))
-    if proto.num_tpu_devices_per_task < 0:
-      raise ValueError("`num_tpu_devices_per_task` must be >= 0; got {}".format(
-          proto.num_tpu_devices_per_task))
-
-    expected_coordinates_size = (
-        proto.num_tasks * proto.num_tpu_devices_per_task * len(
-            proto.mesh_shape))
-    if len(proto.device_coordinates) != expected_coordinates_size:
-      raise ValueError("`device_coordinates` must have shape num_tasks ({}) * "
-                       "num_tpu_devices_per_task ({}) * len(mesh_shape) ({}); "
-                       "got shape {}".format(proto.num_tasks,
-                                             proto.num_tpu_devices_per_task,
-                                             proto.mesh_shape,
-                                             len(proto.device_coordinates)))
-
-    coords = np.array(proto.device_coordinates, dtype=np.int32)
-    if any(coords < 0):
-      raise ValueError("`device_coordinates` must be >= 0")
-    coords = coords.reshape((proto.num_tasks, proto.num_tpu_devices_per_task,
-                             len(proto.mesh_shape)))
-    self._device_coordinates = coords
-
-  def _invert_topology(self):
-    """Inverts a [task,device,axis] topology to [x,y,z] -> task/device maps."""
-    tasks = np.full(list(self.mesh_shape), -1, dtype=np.int32)
-    devices = np.full(list(self.mesh_shape), -1, dtype=np.int32)
-    for task in xrange(self.device_coordinates.shape[0]):
-      for device in xrange(self.device_coordinates.shape[1]):
-        x, y, z = self.device_coordinates[task, device, :]
-        tasks[x, y, z] = task
-        devices[x, y, z] = device
-    return tasks, devices
-
-  @property
-  def mesh_shape(self):
-    """A rank 1 int32 array describing the shape of the TPU topology."""
-    return self._mesh_shape
-
-  @property
-  def mesh_rank(self):
-    """Returns the number of dimensions in the mesh."""
-    return len(self._mesh_shape)
-
-  @property
-  def device_coordinates(self):
-    """Describes the mapping from TPU devices to topology coordinates.
-
-    Returns:
-      A rank 3 int32 array with shape `[tasks, devices, axis]`.
-      `tasks` is the number of tasks in the TPU cluster, `devices` is the number
-      of TPU devices per task, and `axis` is the number of axes in the TPU
-      cluster topology. Each entry gives the `axis`-th coordinate in the
-      topology of a task/device pair. TPU topologies are 3-dimensional, with
-      dimensions `(x, y, core number)`.
-    """
-    return self._device_coordinates
-
-  def task_ordinal_at_coordinates(self, device_coordinates):
-    """Returns the TensorFlow task number attached to `device_coordinates`.
-
-    Args:
-      device_coordinates: An integer sequence describing a device's physical
-        coordinates in the TPU fabric.
-
-    Returns:
-      Returns the TensorFlow task number that contains the TPU device with those
-      physical coordinates.
-    """
-    return self._topology_tasks[tuple(device_coordinates)]
-
-  def tpu_device_ordinal_at_coordinates(self, device_coordinates):
-    """Returns the TensorFlow device number at `device_coordinates`.
-
-    Args:
-      device_coordinates: An integer sequence describing a device's physical
-        coordinates in the TPU fabric.
-
-    Returns:
-      Returns the TensorFlow device number within the task corresponding to
-      attached to the device with those physical coordinates.
-    """
-    return self._topology_devices[tuple(device_coordinates)]
-
-  def cpu_device_name_at_coordinates(self, device_coordinates, job=None):
-    """Returns the CPU device attached to a logical core."""
-    return _tpu_host_device_name(
-        job, self._topology_tasks[tuple(device_coordinates)])
-
-  def tpu_device_name_at_coordinates(self, device_coordinates, job=None):
-    """Returns the name of the TPU device assigned to a logical core."""
-    return _tpu_device_name(job,
-                            self._topology_tasks[tuple(device_coordinates)],
-                            self._topology_devices[tuple(device_coordinates)])
-
-  @property
-  def num_tasks(self):
-    """Returns the number of TensorFlow tasks in the TPU slice."""
-    return self._device_coordinates.shape[0]
-
-  @property
-  def num_tpus_per_task(self):
-    """Returns the number of TPU devices per task in the TPU slice."""
-    return self._device_coordinates.shape[1]
-
-  def serialized(self):
-    """Returns the serialized form of the topology."""
-    if self._serialized is None:
-      proto = topology_pb2.TopologyProto()
-      proto.mesh_shape[:] = list(self._mesh_shape)
-      proto.num_tasks = self._device_coordinates.shape[0]
-      proto.num_tpu_devices_per_task = self._device_coordinates.shape[1]
-      proto.device_coordinates.extend(list(self._device_coordinates.flatten()))
-      self._serialized = proto.SerializeToString()
-
-    return self._serialized
+# pylint: disable=wildcard-import,unused-import,redefined-builtin
+from tensorflow.python.tpu.topology import *
+# pylint: enable=wildcard-import,unused-import,redefined-builtin

diff --git a/tensorflow/contrib/tpu/python/tpu/topology_test.py b/tensorflow/contrib/tpu/python/tpu/topology_test.py
deleted file mode 100644
index fafe325..0000000
--- a/tensorflow/contrib/tpu/python/tpu/topology_test.py
+++ /dev/null

@@ -1,46 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-"""Tests for topology.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.tpu.python.tpu import topology
-
-from tensorflow.python.platform import test
-
-
-class TopologyTest(test.TestCase):
-
-  def testSerialization(self):
-    """Tests if the class is able to generate serialized strings."""
-    original_topology = topology.Topology(
-        mesh_shape=[1, 1, 2],
-        device_coordinates=[[[0, 0, 0], [0, 0, 1]]],
-    )
-    serialized_str = original_topology.serialized()
-    new_topology = topology.Topology(serialized=serialized_str)
-
-    # Make sure the topology recovered from serialized str is same as the
-    # original topology.
-    self.assertAllEqual(
-        original_topology.mesh_shape, new_topology.mesh_shape)
-    self.assertAllEqual(
-        original_topology.device_coordinates, new_topology.device_coordinates)
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index de2bfd4..5364b20 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py

@@ -1,1566 +1,25 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ======================================
-
-"""Library of TPU helper functions."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
-from tensorflow.contrib.compiler import xla
-from tensorflow.contrib.framework.python.framework import experimental
-from tensorflow.contrib.tpu.proto import dynamic_padding_pb2 as dynamic_padding
-from tensorflow.contrib.tpu.python.ops import tpu_ops
-from tensorflow.contrib.tpu.python.tpu import tpu_function
-
-from tensorflow.core.framework import attr_value_pb2
-from tensorflow.python.compat import compat as api_compat
-from tensorflow.python.framework import device as pydev
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util import compat
-from tensorflow.python.util import nest
-
-
-# Operations that indicate some error in the users graph, e.g. a placeholder
-# that's introduced outside of the infeed.
-_BLACKLISTED_OPS = set([
-    "Placeholder",
-])
-
-# XLA doesn't currently support reading of intermediate tensors, thus some ops
-# are not supported.
-_UNSUPPORTED_OPS = set([
-    "AudioSummary",
-    "AudioSummaryV2",
-    "HistogramSummary",
-    "ImageSummary",
-    "MergeSummary",
-    "Print",
-    "ScalarSummary",
-    "TensorSummary",
-    "TensorSummaryV2",
-    ])
-
-_MAX_WARNING_LINES = 5
-
-_TPU_REPLICATE_ATTR = "_tpu_replicate"
-_TPU_COMPILATION_STATUS_ATTR = "_tpu_compilation_status"
-_OUTSIDE_COMPILATION_ATTR = "_xla_outside_compilation"
-
-
-def _tpu_system_device_name(job):
-  """Returns the device name for the TPU_SYSTEM device of `job`."""
-  if job is None:
-    return "/device:TPU_SYSTEM:0"
-  else:
-    return "/job:%s/device:TPU_SYSTEM:0" % job
-
-
-def initialize_system(embedding_config=None, job=None):
-  """Initializes a distributed TPU system for use with TensorFlow.
-
-  Args:
-    embedding_config: If not None, a `TPUEmbeddingConfiguration` proto
-      describing the desired configuration of the hardware embedding lookup
-      tables. If embedding_config is None, no hardware embeddings can be used.
-    job: The job (the XXX in TensorFlow device specification /job:XXX) that
-      contains the TPU devices that will be initialized. If job=None it is
-      assumed there is only one job in the TensorFlow flock, and an error will
-      be returned if this assumption does not hold.
-  Returns:
-    A serialized `TopologyProto` that describes the TPU system. Note:
-      the topology must be evaluated using `Session.run` before it can be used.
-  """
-  config_string = ("" if embedding_config is None else
-                   embedding_config.SerializeToString())
-  with ops.device(_tpu_system_device_name(job)):
-    return tpu_ops.configure_distributed_tpu(embedding_config=config_string)
-
-
-def shutdown_system(job=None):
-  """Shuts down a running a distributed TPU system."""
-  with ops.device(_tpu_system_device_name(job)):
-    shutdown_distributed_tpu = tpu_ops.shutdown_distributed_tpu()
-  return shutdown_distributed_tpu
-
-
-def core(num):
-  """Returns the device name for a core in a replicated TPU computation.
-
-  Args:
-    num: the virtual core number within each replica to which operators should
-    be assigned.
-  Returns:
-    A device name, suitable for passing to `tf.device()`.
-  """
-  return "device:TPU_REPLICATED_CORE:{}".format(num)
-
-
-class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
-  """A `ControlFlowContext` for nodes inside a TPU computation.
-
-  The primary role of `TPUReplicateContext` is to mark operators inside a
-  tpu.replicate() computation with the attribute "_tpu_replicate=XYZ", where XYZ
-  is a unique name.
-
-  We use a `ControlFlowContext` to perform the annotation since it integrates
-  with Tensorflow constructs like ResourceVariables. For example, if a
-  `ResourceVariable` is constructed inside a tpu.replicate() block, the
-  `ResourceVariable` implementation can use
-  `with ops.control_dependencies(None)` to build the variable's definition
-  outside the replicated computation.
-  """
-
-  def __init__(self, name, num_replicas, pivot):
-    """Builds a new TPUReplicateContext.
-
-    Args:
-      name: a unique name for the context, used to populate the `_tpu_replicate`
-        attribute.
-      num_replicas: an integer that gives the number of replicas for the
-        computation.
-      pivot: a pivot node. Nodes in the TPUReplicateContext that do not have any
-        inputs will have a control dependency on the pivot node. This ensures
-        that nodes are correctly included in any enclosing control flow
-        contexts.
-    """
-    super(TPUReplicateContext, self).__init__()
-    self._num_replicas = num_replicas
-    self._outer_device_function_stack = None
-    self._oc_dev_fn_stack = None
-    self._outside_compilation_cluster = None
-    self._outside_compilation_counter = 0
-    self._in_gradient_colocation = None
-    self._gradient_colocation_stack = []
-    self._host_compute_core = []
-    self._name = name
-    self._name_as_bytes = compat.as_bytes(name)
-    self._unsupported_ops = []
-    self._pivot = pivot
-    self._replicated_vars = {}
-
-  def get_replicated_var_handle(self, name, vars_):
-    """Returns a variable handle for replicated TPU variable 'var'.
-
-    This is a method used by an experimental replicated variable implementation
-    and is not intended as a public API.
-
-    Args:
-      name: The common name of the variable.
-      vars_: The replicated TPU variables.
-
-    Returns:
-      The handle of the TPU replicated input node.
-    """
-    handle = self._replicated_vars.get(name)
-    if handle is not None:
-      return handle
-
-    # Builds a TPUReplicatedInput node for the variable, if one does not already
-    # exist. The TPUReplicatedInput node must belong to the enclosing
-    # control-flow scope of the TPUReplicateContext.
-    # TODO(phawkins): consider changing the contract of the TPU encapsulation
-    # so the TPUReplicatedInput nodes go inside the TPUReplicateContext scope
-    # instead.
-
-    # pylint: disable=protected-access
-    graph = ops.get_default_graph()
-    saved_context = graph._get_control_flow_context()
-    graph._set_control_flow_context(self.outer_context)
-    handle = tpu_ops.tpu_replicated_input(
-        [v.handle for v in vars_], name=name + "/handle")
-    graph._set_control_flow_context(saved_context)
-    # pylint: enable=protected-access
-    self._replicated_vars[name] = handle
-    return handle
-
-  def report_unsupported_operations(self):
-    if self._unsupported_ops:
-      op_str = "\n".join(["  %s (%s)" % (op.type, op.name)
-                          for op in self._unsupported_ops[:_MAX_WARNING_LINES]])
-      logging.warning("%d unsupported operations found: \n%s",
-                      len(self._unsupported_ops), op_str)
-      if len(self._unsupported_ops) > _MAX_WARNING_LINES:
-        logging.warning("... and %d more" %
-                        (len(self._unsupported_ops) - _MAX_WARNING_LINES))
-
-  def EnterGradientColocation(self, op, gradient_uid):
-    if op is not None:
-      self._gradient_colocation_stack.append(op)
-      if not self._outside_compilation_cluster:
-        try:
-          outside_attr = op.get_attr(_OUTSIDE_COMPILATION_ATTR)
-          if self._in_gradient_colocation:
-            raise NotImplementedError(
-                "Cannot nest gradient colocation operations outside compilation"
-            )
-          if gradient_uid == "__unsupported__":
-            raise NotImplementedError(
-                "No gradient_uid calling gradient within outside_compilation")
-          # When we take the gradient of an op X in an outside_compilation
-          # cluster C in a forward computation we would like to put the ops
-          # corresponding to the gradient of X into a new outside_compilation
-          # cluster C'. However, if we take the gradient of X twice, the second
-          # one should get yet another new outside_compilation cluster C''.
-          #
-          # The mechanism we adopt is to use a 'root_cluster' which is the
-          # cluster that X was in before we took gradients, and a 'gradient_uid'
-          # which is different for every invocation of gradients, and put the
-          # gradient of X in cluster 'root_cluster.gradient_uid'.
-          #
-          # When taking a gradient of a gradient, some ops will be colocated
-          # with Op in the forward pass (e.g., cluster root_cluster) and some in
-          # the backward pass (e.g., cluster root_cluster.initial_gradient_uid).
-          # We need all of the grad-of-grad ops to be in the same cluster to
-          # avoid cyclic dependencies between clusters. We adopt a heuristic
-          # that puts any op clustered with root_cluster.<xxx> in
-          # root_cluster.gradient_uid, even if xxx was initial_gradient_uid.
-          self._in_gradient_colocation = op
-          parts = outside_attr.split(".")
-          cluster = parts[0] + "." + gradient_uid
-          self._EnterOutsideCompilationScope(cluster=cluster)
-        except ValueError:
-          # The attr was not present: do nothing.
-          pass
-
-  def ExitGradientColocation(self, op, gradient_uid):
-    if op is not None:
-      if not self._gradient_colocation_stack:
-        raise errors.InternalError(
-            op.node_def, op,
-            "Badly nested gradient colocation: empty stack when popping Op " +
-            op.name)
-      last_op = self._gradient_colocation_stack.pop()
-      if op is last_op:
-        if op is self._in_gradient_colocation:
-          self._in_gradient_colocation = None
-          self._ExitOutsideCompilationScope()
-      else:
-        raise errors.InternalError(
-            op.node_def, op, "Badly nested gradient colocation, expected " +
-            last_op + ", got " + op.name)
-
-  def _EnterOutsideCompilationScope(self, cluster=None):
-
-    class FakeOp(object):
-      """A helper class to determine the current device.
-
-      Supports only the type and device set/get methods needed to run the
-      graph's _apply_device_function method.
-      """
-
-      def __init__(self):
-        self._device = ""
-
-      @property
-      def type(self):
-        return "FakeOp"
-
-      @property
-      def device(self):
-        return self._device
-
-      def _set_device(self, device):
-        if isinstance(device, pydev.DeviceSpec):
-          self._device = device.to_string()
-        else:
-          self._device = device
-
-    if self._outside_compilation_cluster:
-      raise NotImplementedError("Cannot nest outside_compilation clusters")
-    if cluster:
-      self._outside_compilation_cluster = cluster
-    else:
-      self._outside_compilation_cluster = str(self._outside_compilation_counter)
-      self._outside_compilation_counter += 1
-    graph = ops.get_default_graph()
-    fake_op = FakeOp()
-    graph._apply_device_functions(fake_op)  # pylint: disable=protected-access
-    device = pydev.DeviceSpec.from_string(fake_op.device)
-    if (device.device_type == "TPU_REPLICATED_CORE" and
-        device.device_index is not None):
-      self._host_compute_core.append(self._outside_compilation_cluster + ":" +
-                                     str(device.device_index))
-    self._oc_dev_fn_stack = graph._device_function_stack  # pylint: disable=protected-access
-    graph._device_function_stack = self._outer_device_function_stack  # pylint: disable=protected-access
-
-  def _ExitOutsideCompilationScope(self):
-    if not self._outside_compilation_cluster:
-      raise NotImplementedError(
-          "Attempted to exit outside_compilation scope when not in scope")
-    self._outside_compilation_cluster = None
-    graph = ops.get_default_graph()
-    graph._device_function_stack = self._oc_dev_fn_stack  # pylint: disable=protected-access
-
-  def Enter(self):
-    if not self._outer_device_function_stack:
-      # Capture the device function stack at the time of first entry
-      # since that is the stack that will be used outside_compilation.
-      graph = ops.get_default_graph()
-      # pylint: disable=protected-access
-      self._outer_device_function_stack = graph._device_function_stack.copy()
-      # pylint: enable=protected-access
-    super(TPUReplicateContext, self).Enter()
-
-  def HostComputeCore(self):
-    return self._host_compute_core
-
-  def _RemoveExternalControlEdges(self, op):
-    """Remove any external control dependency on this op."""
-    internal_control_inputs = []
-    external_control_inputs = []
-    for x in op.control_inputs:
-      # pylint: disable=protected-access
-      is_internal_op = False
-      ctxt = x._get_control_flow_context()
-      while ctxt is not None:
-        if ctxt == self:
-          is_internal_op = True
-          break
-        ctxt = ctxt._outer_context
-      if is_internal_op:
-        internal_control_inputs.append(x)
-      else:
-        external_control_inputs.append(x)
-      # pylint: enable=protected-access
-    # pylint: disable=protected-access
-    op._remove_all_control_inputs()
-    op._add_control_inputs(internal_control_inputs)
-    # pylint: enable=protected-access
-    return internal_control_inputs, external_control_inputs
-
-  def AddOp(self, op):
-    # pylint: disable=protected-access
-    if op.type in _BLACKLISTED_OPS:
-      logging.error("Operation of type %s (%s) is not supported on the TPU. "
-                    "Execution will fail if this op is used in the graph. " %
-                    (op.type, op.name))
-
-    if op.type in _UNSUPPORTED_OPS:
-      self._unsupported_ops.append(op)
-
-    if any(x.dtype._is_ref_dtype for x in op.inputs):
-      raise NotImplementedError(
-          "Non-resource Variables are not supported inside TPU computations "
-          "(operator name: %s)" % op.name)
-    if _TPU_REPLICATE_ATTR in op.node_def.attr:
-      raise ValueError("TPU computations cannot be nested")
-    op._set_attr(_TPU_REPLICATE_ATTR,
-                 attr_value_pb2.AttrValue(s=self._name_as_bytes))
-    if self._outside_compilation_cluster:
-      op._set_attr(
-          _OUTSIDE_COMPILATION_ATTR,
-          attr_value_pb2.AttrValue(
-              s=compat.as_bytes(self._outside_compilation_cluster)))
-    if self._num_replicas > 1 or not self._outside_compilation_cluster:
-      # Prevent feeding or fetching anything that is being compiled,
-      # and any replicated outside_compilation Op.
-      op.graph.prevent_feeding(op)
-      op.graph.prevent_fetching(op)
-
-    # Remove any control edges from outer control flow contexts. These may cause
-    # mismatched frame errors.
-    (internal_control_inputs,
-     external_control_inputs) = self._RemoveExternalControlEdges(op)
-
-    if not op.inputs:
-      # Add a control edge from the control pivot to this op.
-      if not internal_control_inputs:
-        # pylint: disable=protected-access
-        op._add_control_input(self.GetControlPivot())
-        # pylint: enable=protected-access
-    else:
-      for index in xrange(len(op.inputs)):
-        x = op.inputs[index]
-        real_x = self.AddValue(x)
-        if real_x != x:
-          op._update_input(index, real_x)  # pylint: disable=protected-access
-
-    if external_control_inputs:
-      # Use an identity to pull control inputs as data inputs. Note that we
-      # ignore ops which don't have outputs. TODO(phawkins): fix that.
-      with ops.control_dependencies(None):
-        self.Enter()
-        external_control_inputs = [
-            array_ops.identity(x.outputs[0]).op
-            for x in external_control_inputs
-            if x.outputs
-        ]
-        self.Exit()
-      # pylint: disable=protected-access
-      op._add_control_inputs(external_control_inputs)
-      # pylint: enable=protected-access
-
-    # Mark op's outputs as seen by this context and any outer contexts.
-    output_names = [x.name for x in op.outputs]
-    context = self
-    while context is not None:
-      # pylint: disable=protected-access
-      context._values.update(output_names)
-      context = context._outer_context
-      # pylint: enable=protected-access
-
-    if self._outer_context:
-      self._outer_context.AddInnerOp(op)
-
-  def AddValue(self, val):
-    """Add `val` to the current context and its outer context recursively."""
-    if val.name in self._values:
-      # Use the real value if it comes from outer context.
-      result = self._external_values.get(val.name)
-      return val if result is None else result
-
-    result = val
-    self._values.add(val.name)
-    if self._outer_context:
-      result = self._outer_context.AddValue(val)
-      self._values.add(result.name)
-
-    self._external_values[val.name] = result
-
-    return result
-
-  def AddInnerOp(self, op):
-    self.AddOp(op)
-    if self._outer_context:
-      self._outer_context.AddInnerOp(op)
-
-  @property
-  def grad_state(self):
-    # Define the gradient loop state associated with the TPUReplicateContext to
-    # be None as the TPUReplicateContext does not get nested nor does the
-    # grad_state outside the TPUReplicateContext affect the graph inside so the
-    # grad_state should be as if this is the top-level gradient state.
-    return None
-
-  @property
-  def back_prop(self):
-    """Forwards to the enclosing while context, if any."""
-    if self.GetWhileContext():
-      return self.GetWhileContext().back_prop
-    return False
-
-  def GetControlPivot(self):
-    return self._pivot
-
-
-def outside_compilation(computation, *args, **kwargs):
-  """Builds part of a computation outside any current TPU replicate scope.
-
-  Args:
-    computation: A Python function that builds the computation to
-      place on the host.
-    *args: the positional arguments for the computation.
-    **kwargs: the keyword arguments for the computation.
-
-  Returns:
-    The Tensors returned by computation.
-  """
-  args = [] if args is None else args
-  graph = ops.get_default_graph()
-
-  # If we are in a TPUReplicateContext, signal that we are now
-  # outside_compilation
-  initial_context = graph._get_control_flow_context()  # pylint: disable=protected-access
-  context = initial_context
-  while context:
-    if isinstance(context, TPUReplicateContext):
-      context._EnterOutsideCompilationScope()  # pylint: disable=protected-access
-    context = context.outer_context
-
-  retval = computation(*args, **kwargs)
-
-  # If we are in a TPUReplicateContext, signal that we are no longer
-  # outside_compilation
-  final_context = graph._get_control_flow_context()  # pylint: disable=protected-access
-  if initial_context is not final_context:
-    raise NotImplementedError(
-        "Control-flow context cannot be different at start and end of an "
-        "outside_compilation scope")
-  context = initial_context
-  while context:
-    if isinstance(context, TPUReplicateContext):
-      context._ExitOutsideCompilationScope()  # pylint: disable=protected-access
-    context = context.outer_context
-
-  return retval
-
-
-def replicate(computation,
-              inputs=None,
-              infeed_queue=None,
-              device_assignment=None,
-              name=None,
-              maximum_shapes=None):
-  """Builds a graph operator that runs a replicated TPU computation.
-
-  Args:
-    computation: A Python function that builds the computation to replicate.
-    inputs: A list of lists of input tensors or `None` (equivalent to
-      `[[]]`), indexed by `[replica_num][input_num]`. All replicas must
-      have the same number of inputs. Each input can be a nested structure
-      containing values that are convertible to tensors. Note that passing an
-      N-dimension list of compatible values will result in a N-dimention list of
-      scalar tensors rather than a single Rank-N tensors. If you need different
-      behavior, convert part of inputs to tensors with `tf.convert_to_tensor`.
-    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
-      of arguments as inputs to computation.
-    device_assignment: If not `None`, a `DeviceAssignment` describing the
-      mapping between logical cores in the computation with physical cores in
-      the TPU topology. Uses a default device assignment if `None`. The
-      `DeviceAssignment` may be omitted if each replica of the computation uses
-      only one core, and there is either only one replica, or the number of
-      replicas is equal to the number of cores in the TPU system.
-    name: (Deprecated) Does nothing.
-    maximum_shapes: A nested structure of tf.TensorShape representing the shape
-      to which the respective component of each input element in each replica
-      should be padded. Any unknown dimensions (e.g. tf.Dimension(None) in a
-      tf.TensorShape or -1 in a tensor-like object) will be padded to the
-      maximum size of that dimension over all replicas. Note that if the input
-      dimension is already static, we won't do padding on it and we require the
-      maximum_shapes to have the same value or None on that dimension. The
-      structure of `maximum_shapes` needs to be the same as `inputs[0]`.
-  Returns:
-    A list of outputs, indexed by `[replica_num]` each output can be a nested
-    structure same as what computation() returns with a few exceptions.
-
-    Exceptions include:
-      1) None output: a NoOp would be returned which control-depends on
-         computation.
-      2) Single value output: A tuple containing the value would be returned.
-      3) Operation-only outputs: a NoOp would be returned which
-         control-depends on computation.
-      TODO(b/121383831): Investigate into removing these special cases.
-
-  Raises:
-    ValueError: If all replicas do not have equal numbers of input tensors.
-    ValueError: If the number of inputs per replica does not match
-      the number of formal parameters to `computation`.
-    ValueError: If the static `inputs` dimensions don't match with the values
-      given in `maximum_shapes`.
-    ValueError: If the structure of inputs per replica does not match
-      the structure of `maximum_shapes`.
-  """
-  return split_compile_and_replicate(
-      computation,
-      inputs,
-      infeed_queue,
-      device_assignment,
-      name,
-      maximum_shapes=maximum_shapes)[1]
-
-
-def _pad_all_input(inputs, padded_shapes):
-  """Pad all input tensors given padded_shapes.
-
-  The real shape tensors will be concatenated with the padded original inputs.
-
-  Args:
-    inputs: The original inputs.
-    padded_shapes: A list of padded shapes for each input.
-
-  Returns:
-    The padded inputs and a PaddingMap list which maps the padded input
-    dimension to the real shape argument index.
-  """
-  input_shape_tensors = []
-  for core_idx, inputs_per_core in enumerate(inputs):
-    for idx, input_tensor in enumerate(inputs_per_core):
-      if core_idx == 0:
-        input_shape_tensors.append([])
-      input_shape_tensors[idx].append(array_ops.shape(input_tensor))
-
-  maximum_shapes = []
-  for shapes_per_input in input_shape_tensors:
-    maximum_shapes.append(
-        math_ops.reduce_max(array_ops.stack(shapes_per_input), axis=0))
-
-  padded_inputs = []
-  real_shapes = []
-  padding_maps = []
-  for core_idx, inputs_per_core in enumerate(inputs):
-    padded_inputs.append([])
-    real_shapes.append([])
-    real_shape_idx = len(inputs_per_core) - 1
-    for idx, input_tensor in enumerate(inputs_per_core):
-      input_shape_tensor = input_shape_tensors[idx][core_idx]
-      input_shape = input_tensor.get_shape()
-      padded_shape = padded_shapes[idx]
-
-      # The static shape of inputs should be compatible with the given padded
-      # shapes.
-      input_shape.assert_is_compatible_with(padded_shape)
-
-      if input_shape.is_fully_defined():
-        # Do nothing if the shape of the whole tensor is already static.
-        padded_inputs[core_idx].append(input_tensor)
-      else:
-        # Only pad the non static shape dimension.
-        for i, s in enumerate(input_shape):
-          if s.value is None:
-            if core_idx == 0:
-              real_shape_idx += 1
-              padding_map = dynamic_padding.PaddingMap()
-              padding_map.arg_index = idx
-              padding_map.shape_index = i
-              padding_map.padding_arg_index = real_shape_idx
-              padding_maps.append(padding_map)
-            real_shapes[core_idx].append(
-                math_ops.cast(input_shape_tensor[i], dtypes.uint32))
-
-        paddings = []
-        for i, s in enumerate(padded_shape):
-          if input_shape[i].value:
-            # Don't pad if input shape is already static.
-            padding = [0, 0]
-          else:
-            if s.value:
-              # Pad to the given maximum value.
-              padding = [0, s.value - input_shape_tensor[i]]
-            else:
-              # If maximum value is not given, then pad to the maximum dimension
-              # among all the cores.
-              padding = [0, maximum_shapes[idx][i] - input_shape_tensor[i]]
-          paddings.append(padding)
-
-        padded_input = array_ops.pad(input_tensor, paddings)
-        padded_inputs[core_idx].append(padded_input)
-
-  num_replicas = len(padded_inputs)
-  for i in range(num_replicas):
-    padded_inputs[i].extend(real_shapes[i])
-
-  return padded_inputs, padding_maps
-
-
-def split_compile_and_replicate(computation,
-                                inputs=None,
-                                infeed_queue=None,
-                                device_assignment=None,
-                                name=None,
-                                use_tpu=True,
-                                maximum_shapes=None):
-  """Builds graph operators that runs compilation and replicated computation.
-
-  This is a lower level interface than replicate that returns a separate compile
-  and execute output tensor. In the generated graph the compile op feeds into
-  the execute op and no additional compilation is incurred when running the
-  compile op before the execute op. The compile op returns additional
-  information about the compilation but does not return the compiled program.
-
-  Args:
-    computation: A Python function that builds the computation to replicate.
-    inputs: A list of lists of input tensors or `None` (equivalent to
-      `[[]]`), indexed by `[replica_num][input_num]`. All replicas must
-      have the same number of inputs. Each input can be a nested structure
-      containing values that are convertible to tensors. Note that passing an
-      N-dimension list of compatible values will result in a N-dimention list of
-      scalar tensors rather than a single Rank-N tensors. If you need different
-      behavior, convert part of inputs to tensors with `tf.convert_to_tensor`.
-    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
-      of arguments as inputs to computation.
-    device_assignment: If not `None`, a `DeviceAssignment` describing the
-      mapping between logical cores in the computation with physical cores in
-      the TPU topology. Uses a default device assignment if `None`. The
-      `DeviceAssignment` may be omitted if each replica of the computation uses
-      only one core, and there is either only one replica, or the number of
-      replicas is equal to the number of cores in the TPU system.
-    name: (Deprecated) Does nothing.
-    use_tpu: When false, the input `computation` is executed on the XLA CPU/GPU
-      backends. Currently, only supports a default placement (computation is
-      placed on GPU if one is available, and on CPU if not).
-    maximum_shapes: A nested structure of tf.TensorShape representing the shape
-      to which the respective component of each input element in each replica
-      should be padded. Any unknown dimensions (e.g. tf.Dimension(None) in a
-      tf.TensorShape or -1 in a tensor-like object) will be padded to the
-      maximum size of that dimension over all replicas. Note that if the input
-      dimension is already static, we won't do padding on it and we require the
-      maximum_shapes to have the same value or None on that dimension. The
-      structure of `maximum_shapes` needs to be the same as `inputs[0]`.
-
-  Returns:
-    A list of lists with the first list corresponding to the compile op and the
-    second a list of output tensors, indexed by `[replica_num][output_num]`.
-  Raises:
-    ValueError: If all replicas do not have equal numbers of input tensors.
-    ValueError: If the number of inputs per replica does not match
-      the number of formal parameters to `computation`.
-    ValueError: If the static `inputs` dimensions don't match with the values
-      given in `maximum_shapes`.
-    ValueError: If the structure of inputs per replica does not match
-      the structure of `maximum_shapes`.
-  """
-  del name
-  inputs = [[]] if inputs is None else inputs
-
-  metadata_kwargs = {}
-  if device_assignment is not None:
-    # Turn the Numpy array into a flattened list so we can pass it as an
-    # operator attribute.
-    metadata_kwargs = {
-        "topology":
-            device_assignment.topology.serialized(),
-        "device_assignment":
-            device_assignment.core_assignment.flatten().tolist()
-    }
-    # TODO(phawkins): remove this case after the forward compatibility window
-    # expires on 2018-10-5.
-    if api_compat.forward_compatible(2018, 10, 5):
-      metadata_kwargs["num_cores_per_replica"] = (
-          device_assignment.num_cores_per_replica)
-    else:
-      metadata_kwargs["computation_shape"] = [
-          device_assignment.num_cores_per_replica
-      ]
-
-  if ((not isinstance(inputs, list)) or
-      any(not isinstance(inp, (list, tuple)) for inp in inputs)):
-    raise TypeError("tpu.replicate() inputs must be a list of lists/tuples")
-
-  num_replicas = len(inputs)
-
-  # No replicas? Nothing to do.
-  if num_replicas == 0:
-    return []
-
-  # Checks all replicas have the same structure.
-  for i in xrange(1, num_replicas):
-    nest.assert_same_structure(inputs[0], inputs[i])
-
-  # Flatten inputs.
-  flat_inputs = [
-      nest.flatten(per_replica_input) for per_replica_input in inputs
-  ]
-  # Converts inputs to Tensors.
-  flat_inputs = [[ops.convert_to_tensor(x) for x in inp] for inp in flat_inputs]
-
-  # Verifies that all replicas have matching numbers and types of inputs
-  flat_input_types = [x.dtype for x in flat_inputs[0]]
-  input_arity = len(inputs[0])
-  flat_input_arity = len(flat_input_types)
-  for i in range(num_replicas):
-    if len(inputs[i]) != input_arity:
-      raise ValueError("Replicas must have the same number of inputs. "
-                       "Replica 0 had {} inputs, replica {} had {} "
-                       "inputs.".format(input_arity, i, len(inputs[i])))
-
-    types = [x.dtype for x in flat_inputs[i]]
-    if types != flat_input_types:
-      raise ValueError("Replicas must have matching input types. Replica 0 had "
-                       "input types {}, replica {} had input types {}".format(
-                           flat_input_types, i, types))
-
-  arg_error = xla.check_function_argument_count(
-      computation, input_arity, infeed_queue)
-  if arg_error is not None:
-    if infeed_queue is None:
-      raise TypeError(
-          "Supplied computation cannot be called with the specified inputs. "
-          "You specified %d inputs: %s, but the computation needs %s" % (
-              input_arity, str([i.name for i in inputs[0]]), arg_error))
-    else:
-      raise TypeError(
-          "Supplied computation cannot be called with the specified inputs. "
-          "You specified %d inputs: %s and %d additional inputs from infeed,"
-          " but the computation needs %s" % (input_arity, str(
-              [i.name
-               for i in inputs[0]]), infeed_queue.number_of_tuple_elements,
-                                             arg_error))
-
-  if maximum_shapes:
-    if infeed_queue:
-      raise ValueError(
-          "Dynamic input shapes are not supported with infeed queues")
-
-    # Make sure maximum_shapes has the same structure as inputs.
-    nest.assert_same_structure(inputs[0], maximum_shapes, check_types=False)
-
-    # Flatten padded shapes.
-    flat_maximum_shapes = nest.flatten(maximum_shapes)
-    flat_maximum_shapes = [
-        tensor_shape.TensorShape(s) for s in flat_maximum_shapes
-    ]
-
-    flat_inputs, padding_maps = _pad_all_input(flat_inputs, flat_maximum_shapes)
-
-    serialized_padding_maps = []
-    for padding_map in padding_maps:
-      serialized_padding_maps.append(padding_map.SerializeToString())
-    metadata_kwargs["padding_map"] = serialized_padding_maps
-
-  graph = ops.get_default_graph()
-
-  # Fan-in: Builds a TPUReplicatedInput node for each input.
-  flat_replicated_inputs = []
-  for i in range(0, len(flat_inputs[0])):
-    replicas = [flat_inputs[replica][i] for replica in xrange(num_replicas)]
-    flat_replicated_inputs.append(
-        tpu_ops.tpu_replicated_input(replicas, name="input{}".format(i)))
-
-  cluster_name = graph.unique_name("cluster")
-  pivot = control_flow_ops.no_op(name=cluster_name + "/pivot")
-  context = TPUReplicateContext(
-      name=cluster_name, num_replicas=num_replicas, pivot=pivot)
-  try:
-    context.Enter()
-
-    metadata = tpu_ops.tpu_replicate_metadata(
-        num_replicas=num_replicas, use_tpu=use_tpu, **metadata_kwargs)
-
-    with tpu_function.tpu_shard_context(
-        num_replicas), ops.control_dependencies([metadata]):
-
-      # Add identity ops so even unused inputs are "consumed" by the
-      # computation. This is to avoid orphaned TPUReplicatedInput nodes.
-      # TODO(phawkins): consider instead pruning unused TPUReplicatedInput
-      # and eliding trivial TPUReplicatedInput/TPUReplicatedOutput pairs.
-      flat_replicated_inputs = [
-          array_ops.identity(x, name="replicated_input_{}".format(i))
-          for i, x in enumerate(flat_replicated_inputs)
-      ]
-      for i in flat_replicated_inputs:
-        # pylint: disable=protected-access
-        # Add an attribute to the identity node so that they could be removed in
-        # encapsulate TPU computation pass if unused. However we don't remove
-        # inputs when dynamic padding is enabled.
-        # TODO(rxsang): Use other ways except argument index in padding_map so
-        # outside compilation can work with dynamic padding correctly.
-        if maximum_shapes is None:
-          i.op._set_attr("_tpu_input_identity",
-                         attr_value_pb2.AttrValue(b=True))
-        # pylint: enable=protected-access
-
-      # Unflatten the computation inputs to match original input structure.
-      computation_inputs = nest.pack_sequence_as(
-          structure=inputs[0],
-          flat_sequence=flat_replicated_inputs[:flat_input_arity])
-
-      # If there is an infeed queue, adds the dequeued values to the
-      # computation's inputs.
-      if infeed_queue is not None:
-        infeed_queue.set_number_of_shards(num_replicas)
-        for t in infeed_queue.generate_dequeue_op():
-          computation_inputs.append(t)
-
-      # Only resource variables work inside a TPU computation, so turn on
-      # resource variables for the computation.
-      # TODO(phawkins): consider removing this code. It will
-      # be less confusing to clients if they knowingly choose to use resource
-      # variables.
-      # Partitioned variables is not supported (b/112311320).
-      vscope = variable_scope.get_variable_scope()
-      saved_use_resource = vscope.use_resource
-      saved_custom_getter = vscope.custom_getter
-
-      def custom_getter(getter, name, *args, **kwargs):
-        """Variables on TPU have a few restrictions."""
-        partitioner = kwargs["partitioner"]
-        if partitioner is not None:
-          kwargs["partitioner"] = None
-          logging.warning(
-              "Partitioned variables are not supported on TPU. Got "
-              "`partitioner` that is {} for variable {}. "
-              "Setting `partitioner` to `None`."
-              .format(partitioner, name))
-        if saved_custom_getter is None:
-          return getter(name, *args, **kwargs)
-        else:
-          return saved_custom_getter(getter, name, *args, **kwargs)
-
-      vscope.set_use_resource(True)
-      vscope.set_custom_getter(custom_getter)
-
-      outputs = computation(*computation_inputs)
-
-      vscope.set_use_resource(saved_use_resource)
-      vscope.set_custom_getter(saved_custom_getter)
-
-    outputs_is_flat = xla.is_flat(outputs)
-    if outputs_is_flat:
-      output_tensors, control_deps = _postprocess_flat_outputs(outputs)
-    else:
-      output_tensors, control_deps = _postprocess_non_flat_outputs(outputs)
-
-    context.ExitResult(output_tensors)
-  finally:
-    context.report_unsupported_operations()
-    context.Exit()
-    host_compute_core = context.HostComputeCore()
-
-  if host_compute_core:
-    attr_value = attr_value_pb2.AttrValue()
-    attr_value.list.s.extend([compat.as_bytes(x) for x in host_compute_core])
-    metadata._set_attr("host_compute_core", attr_value)  # pylint: disable=protected-access
-
-  with ops.control_dependencies([metadata]):
-    if use_tpu:
-      compile_status = tpu_ops.tpu_compilation_result()
-      op = compile_status.op
-      attr_value = attr_value_pb2.AttrValue(s=compat.as_bytes(cluster_name))
-      op._set_attr(_TPU_COMPILATION_STATUS_ATTR, attr_value)  # pylint: disable=protected-access
-    else:
-      compile_status = control_flow_ops.no_op(name="compilation_status")
-
-  if not output_tensors:
-    # Returns a list of NoOps dependent on the replication Op, indexed by
-    # [replica_num].
-    return [
-        compile_status,
-        [
-            control_flow_ops.group(control_deps, name="shard_%d" % i)
-            for i in range(num_replicas)
-        ]
-    ]
-
-  # Fan-out: Builds a TPUReplicatedOutput node for each output.
-  replicated_outputs = [[] for i in xrange(num_replicas)]
-  for i, t in enumerate(output_tensors):
-    # Fan-out: Builds a TPUReplicatedOutput node for each output.
-    ys = tpu_ops.tpu_replicated_output(
-        t, num_replicas, name="output{}".format(i))
-
-    # Wraps the outputs in identity operators so the names of any possible
-    # `fetch` nodes are preserved by the replication rewrite.
-    with ops.control_dependencies(control_deps):
-      for replica in xrange(num_replicas):
-        replicated_outputs[replica].append(
-            array_ops.identity(
-                ys[replica], name="output_%d_shard_%d" % (i, replica)))
-
-  if not outputs_is_flat:
-    replicated_outputs = [
-        nest.pack_sequence_as(outputs, replica_outs)
-        for replica_outs in replicated_outputs
-    ]
-
-  return [compile_status, replicated_outputs]
-
-
-def _postprocess_flat_outputs(outputs):
-  """Validates non-flat outputs, add backs device assignments and other attrs.
-
-  Args:
-    outputs: Output from `computation` inside `tpu.rewrite`.
-
-  Returns:
-    Tensors and Operations extracted from outputs.
-  """
-  # Following code segment is to preserve legacy behavior. Previously we only
-  # supported flat outputs and thus for consistency it was nice to convert even
-  # single element into a tuple. But now that we support arbitrary output
-  # structure, this is no longer necessary.
-  # TODO(b/121383831): Migrate all legacy use cases and delete this special
-  # case.
-  # If the computation returns `None`, make it an empty tuple.
-  if outputs is None:
-    outputs = tuple()
-  # If the computation only returned one value, makes it a tuple.
-  if not isinstance(outputs, collections.Sequence):
-    outputs = (outputs,)
-
-  # Append `no_op` here so that fetching any return value of this function
-  # will trigger TPUExecute node.
-  outputs += (control_flow_ops.no_op(),)
-  try:
-    with ops.device(core(0)):
-      outputs = [
-          o if isinstance(o, ops.Operation) else ops.convert_to_tensor(o)
-          for o in outputs
-      ]
-  except Exception as e:
-    raise ValueError(
-        "TPU function return values must all either be Operations or "
-        "convertible to Tensors. Got '%s'" % str(e))
-
-  # Separates the returned Operations and Tensors.
-  output_operations = [o for o in outputs if isinstance(o, ops.Operation)]
-  output_tensors = [o for o in outputs if not isinstance(o, ops.Operation)]
-
-  if outputs != output_tensors + output_operations:
-    raise ValueError(
-        "TPU functions must return zero-or more Tensor values followed by "
-        "zero or more Operations.")
-
-  # Wraps outputs in Identity ops. Otherwise a replicated input copied
-  # straight to an output would bypass the replicate(). This would be bad
-  # because the TPUReplicatedInput/TPUReplicatedOutput operator would not
-  # be rewritten away, leading to a runtime error.
-  # TODO(phawkins): extend the rewrite to elide these nodes instead.
-  new_output_tensors = []
-  for t in output_tensors:
-    with ops.device(t.device if t.device else core(0)):
-      o = array_ops.identity(t)
-      # pylint: disable=protected-access
-      o.op._set_attr("_tpu_output_identity", attr_value_pb2.AttrValue(b=True))
-      # pylint: enable=protected-access
-      new_output_tensors.append(o)
-  return new_output_tensors, output_operations
-
-
-def _postprocess_non_flat_outputs(outputs):
-  """Validates non-flat outputs, add backs device assignments and other attrs.
-
-  Args:
-    outputs: Output from `computation` inside `tpu.rewrite`.
-
-  Returns:
-    Tensors extracted from outputs and an empty list because Operations are not
-    allowed in non-flat outputs..
-  """
-
-  # Flatten output items.
-  flat_outputs = nest.flatten(outputs)
-
-  # Convert all non-Operation outputs to Tensors.
-  for i, o in enumerate(flat_outputs):
-    if isinstance(o, ops.Operation):
-      raise ValueError(
-          "tpu.rewrite does not support Operation as return value in non-flat "
-          "output structure. You can set returned Operations as control "
-          "dependencies of returned Tensors so Operations are triggered when "
-          'Tensors are evaluated. Operation found: "%s"' % o.name)
-
-    try:
-      o = ops.convert_to_tensor(o)
-    except Exception as e:
-      raise ValueError(
-          "TPU function return values must all either be Operations or "
-          'convertible to Tensors. Got error: "%s"' % str(e))
-
-    # Wraps outputs in Identity ops. Otherwise a replicated input copied
-    # straight to an output would bypass the replicate(). This would be bad
-    # because the TPUReplicatedInput/TPUReplicatedOutput operator would not
-    # be rewritten away, leading to a runtime error.
-    # TODO(phawkins): extend the rewrite to elide these nodes instead.
-    with ops.device(core(0)):
-      o = array_ops.identity(o)
-      # pylint: disable=protected-access
-      o.op._set_attr("_tpu_output_identity", attr_value_pb2.AttrValue(b=True))
-      # pylint: enable=protected-access
-      flat_outputs[i] = array_ops.identity(o)
-
-  # All flat_outputs are Tensors, and no Operations.
-  return flat_outputs, []
-
-
-def split_compile_and_shard(computation,
-                            inputs=None,
-                            num_shards=1,
-                            input_shard_axes=None,
-                            outputs_from_all_shards=True,
-                            output_shard_axes=None,
-                            infeed_queue=None,
-                            device_assignment=None,
-                            name=None):
-  """Shards `computation` for parallel execution.
-
-  `inputs` must be a list of Tensors or None (equivalent to an empty list), each
-  of which has a corresponding split axis (from `input_shard_axes`). Each input
-  is split into `num_shards` pieces along the corresponding axis, and
-  computation is applied to each shard in parallel.
-
-  Tensors are broadcast to all shards if they are lexically captured by
-  `computation`. e.g.,
-
-  x = tf.constant(7)
-  def computation():
-    return x + 3
-  ... = shard(computation, ...)
-
-  If `outputs_from_all_shards` is true, the outputs from all shards of
-  `computation` are concatenated back together along their `output_shards_axes`.
-  Otherwise, each output is taken from an arbitrary shard.
-
-  Inputs and outputs of the computation must be at least rank-1 Tensors.
-
-  Args:
-    computation: A Python function that builds a computation to apply to each
-      shard of the input.
-    inputs: A list of input tensors or None (equivalent to an empty list). Each
-      input tensor has a corresponding shard axes, given by `input_shard_axes`,
-      which must have size divisible by `num_shards`.
-    num_shards: The number of shards.
-    input_shard_axes: A list of dimensions along which to shard `inputs`, or
-      `None`. `None` means "shard all inputs along dimension 0". If not `None`,
-      there must be one dimension per input.
-    outputs_from_all_shards: Boolean or list of boolean. For each output, if
-      `True`, outputs from all shards are concatenated along the corresponding
-      `output_shard_axes` entry. Otherwise, each output is taken
-      from an arbitrary shard. If the argument is a boolean, the argument's
-      value is used for each output.
-    output_shard_axes: A list of dimensions along which to concatenate the
-      outputs of `computation`, or `None`. `None` means "concatenate all outputs
-      along dimension 0". If not `None`, there must be one dimension per output.
-      Ignored if `outputs_from_all_shards` is False.
-    infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs
-      of `computation`.
-    device_assignment: If not `None`, a `DeviceAssignment` describing the
-      mapping between logical cores in the computation with physical cores in
-      the TPU topology. Uses a default device assignment if `None`. The
-      `DeviceAssignment` may be omitted if each shard of the computation uses
-      only one core, and there is either only one shard, or the number of shards
-      is equal to the number of cores in the TPU system.
-    name: (Deprecated) Does nothing.
-  Returns:
-    A tuple of (compile op, [output tensors]).
-  Raises:
-    ValueError: If num_shards <= 0
-    ValueError: If len(input_shard_axes) != len(inputs)
-    ValueError: If len(output_shard_axes) != len(outputs from `computation`)
-  """
-  # TODO(phawkins): consider adding support for broadcasting Tensors passed as
-  # inputs.
-
-  if num_shards <= 0:
-    raise ValueError("num_shards must be a positive integer.")
-
-  inputs = [] if inputs is None else inputs
-  if not isinstance(inputs, list):
-    raise TypeError("tpu.shard()'s inputs must be a list of Tensors or None.")
-
-  # Converts inputs to Tensors.
-  inputs = [ops.convert_to_tensor(x) for x in inputs]
-
-  if input_shard_axes is None:
-    input_shard_axes = [0] * len(inputs)
-  if len(inputs) != len(input_shard_axes):
-    raise ValueError("Length of input_shard_axes must be equal to the number "
-                     "of inputs.")
-
-  if inputs:
-    # Splits the `inputs` along the corresponding `input_shard_axes`, giving
-    # lists with layout [input][shard]
-    split_inputs = [
-        array_ops.split(x, num_shards, axis=axis)
-        for (axis, x) in zip(input_shard_axes, inputs)]
-
-    # Transposes the input lists to have layout [shard][input]
-    transposed_inputs = [list(i) for i in zip(*split_inputs)]
-  else:
-    transposed_inputs = [[]] * num_shards
-
-  compile_op, outputs = split_compile_and_replicate(
-      computation,
-      transposed_inputs,
-      infeed_queue=infeed_queue,
-      device_assignment=device_assignment,
-      name=name)
-
-  # There must be at least one shard since num_shards > 0.
-  # TODO(b/36647078) remove disable when pylint bug is fixed.
-  # pylint: disable=indexing-exception
-  if isinstance(outputs[0], ops.Operation):
-    # pylint: enable=indexing-exception
-    # There were no outputs from the computation and replicate returned a list
-    # of NoOps with control dependencies on the computation. Return the first
-    # one so it can be used as a control dependency or fetch node.
-    # TODO(b/36647078) remove disable when pylint bug is fixed.
-    # pylint: disable=indexing-exception
-    return compile_op, [outputs[0]]
-    # pylint: enable=indexing-exception
-
-  # TODO(b/36647078) remove disable when pylint bug is fixed.
-  # pylint: disable=indexing-exception
-  num_outputs = len(outputs[0])
-  # pylint: enable=indexing-exception
-
-  if output_shard_axes is None:
-    output_shard_axes = [0] * num_outputs
-  if num_outputs != len(output_shard_axes):
-    raise ValueError("Length of output_shard_axes must be equal to the number "
-                     "of outputs.")
-
-  if isinstance(outputs_from_all_shards, bool):
-    outputs_from_all_shards = [outputs_from_all_shards] * num_outputs
-
-  if num_outputs != len(outputs_from_all_shards):
-    raise ValueError("Length of outputs_from_all_shards must be equal to the "
-                     "number of outputs.")
-
-  results = []
-  for (axis, all_shards, x) in zip(output_shard_axes, outputs_from_all_shards,
-                                   zip(*outputs)):
-    if all_shards:
-      # Concatenate all of the outputs together (use stack for scalars).
-      shape = x[0].shape
-      is_scalar = shape is not None and (shape.ndims == 0)
-      results.append((array_ops.stack(list(x)) if is_scalar
-                      else array_ops.concat(list(x), axis=axis)))
-    else:
-      # TODO(phawkins): use a smarter policy, e.g., round-robin across shards.
-      results.append(x[0])
-
-  return compile_op, results
-
-
-def shard(computation,
-          inputs=None,
-          num_shards=1,
-          input_shard_axes=None,
-          outputs_from_all_shards=True,
-          output_shard_axes=None,
-          infeed_queue=None,
-          device_assignment=None,
-          name=None):
-  """Shards `computation` for parallel execution.
-
-  `inputs` must be a list of Tensors or None (equivalent to an empty list), each
-  of which has a corresponding split axis (from `input_shard_axes`). Each input
-  is split into `num_shards` pieces along the corresponding axis, and
-  computation is applied to each shard in parallel.
-
-  Tensors are broadcast to all shards if they are lexically captured by
-  `computation`. e.g.,
-
-  x = tf.constant(7)
-  def computation():
-    return x + 3
-  ... = shard(computation, ...)
-
-  TODO(phawkins): consider adding support for broadcasting Tensors passed
-  as inputs.
-
-  If `outputs_from_all_shards` is true, the outputs from all shards of
-  `computation` are concatenated back together along their `output_shards_axes`.
-  Otherwise, each output is taken from an arbitrary shard.
-
-  Inputs and outputs of the computation must be at least rank-1 Tensors.
-
-  Args:
-    computation: A Python function that builds a computation to apply to each
-      shard of the input.
-    inputs: A list of input tensors or None (equivalent to an empty list). Each
-      input tensor has a corresponding shard axes, given by `input_shard_axes`,
-      which must have size divisible by `num_shards`.
-    num_shards: The number of shards.
-    input_shard_axes: A list of dimensions along which to shard `inputs`, or
-      `None`. `None` means "shard all inputs along dimension 0". If not `None`,
-      there must be one dimension per input.
-    outputs_from_all_shards: Boolean or list of boolean. For each output, if
-      `True`, outputs from all shards are concatenated along the corresponding
-      `output_shard_axes` entry. Otherwise, each output is taken
-      from an arbitrary shard. If the argument is a boolean, the argument's
-      value is used for each output.
-    output_shard_axes: A list of dimensions along which to concatenate the
-      outputs of `computation`, or `None`. `None` means "concatenate all outputs
-      along dimension 0". If not `None`, there must be one dimension per output.
-      Ignored if `outputs_from_all_shards` is False.
-    infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs
-      of `computation`.
-    device_assignment: If not `None`, a `DeviceAssignment` describing the
-      mapping between logical cores in the computation with physical cores in
-      the TPU topology. Uses a default device assignment if `None`. The
-      `DeviceAssignment` may be omitted if each shard of the computation uses
-      only one core, and there is either only one shard, or the number of shards
-      is equal to the number of cores in the TPU system.
-    name: (Deprecated) Does nothing.
-  Returns:
-    A list of output tensors.
-  Raises:
-    ValueError: If num_shards <= 0
-    ValueError: If len(input_shard_axes) != len(inputs)
-    ValueError: If len(output_shard_axes) != len(outputs from `computation`)
-  """
-  return split_compile_and_shard(
-      computation,
-      inputs=inputs,
-      num_shards=num_shards,
-      input_shard_axes=input_shard_axes,
-      outputs_from_all_shards=outputs_from_all_shards,
-      output_shard_axes=output_shard_axes,
-      infeed_queue=infeed_queue,
-      device_assignment=device_assignment,
-      name=name)[1]
-
-
-def batch_parallel(computation,
-                   inputs=None,
-                   num_shards=1,
-                   infeed_queue=None,
-                   device_assignment=None,
-                   name=None):
-  """Shards `computation` along the batch dimension for parallel execution.
-
-  Convenience wrapper around shard().
-
-  `inputs` must be a list of Tensors or None (equivalent to an empty list).
-  Each input is split into `num_shards` pieces along the 0-th dimension, and
-  computation is applied to each shard in parallel.
-
-  Tensors are broadcast to all shards if they are lexically captured by
-  `computation`. e.g.,
-
-  x = tf.constant(7)
-  def computation():
-    return x + 3
-  ... = shard(computation, ...)
-
-  The outputs from all shards are concatenated back together along their 0-th
-  dimension.
-
-  Inputs and outputs of the computation must be at least rank-1 Tensors.
-
-  Args:
-    computation: A Python function that builds a computation to apply to each
-      shard of the input.
-    inputs: A list of input tensors or None (equivalent to an empty list). The
-      0-th dimension of each Tensor must have size divisible by `num_shards`.
-    num_shards: The number of shards.
-    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
-      of arguments as inputs to `computation`.
-    device_assignment: If not `None`, a `DeviceAssignment` describing the
-      mapping between logical cores in the computation with physical cores in
-      the TPU topology. Uses a default device assignment if `None`. The
-      `DeviceAssignment` may be omitted if each shard of the computation uses
-      only one core, and there is either only one shard, or the number of shards
-      is equal to the number of cores in the TPU system.
-    name: (Deprecated) Does nothing.
-  Returns:
-    A list of output tensors.
-  Raises:
-    ValueError: If `num_shards <= 0`
-  """
-  return shard(
-      computation,
-      inputs,
-      num_shards=num_shards,
-      infeed_queue=infeed_queue,
-      device_assignment=device_assignment,
-      name=name)
-
-
-def rewrite(computation,
-            inputs=None,
-            infeed_queue=None,
-            device_assignment=None,
-            name=None):
-  """Rewrites `computation` for execution on a TPU system.
-
-  Args:
-    computation: A Python function that builds a computation to apply to the
-      input. If the function takes n inputs, 'inputs' should be a list of n
-      tensors.
-
-      `computation` may return a list of operations and tensors. Tensors must
-      come before operations in the returned list.  The return value of
-      `rewrite` is a list of tensors corresponding to the tensors from the
-      output of `computation`.
-
-      All `Operation`s constructed during `computation` will be executed when
-      evaluating any of the returned output tensors, not just the ones returned.
-    inputs: A list of input tensors or `None` (equivalent to an empty list).
-      Each input can be a nested structure containing values that are
-      convertible to tensors. Note that passing an N-dimension list of
-      compatible values will result in a N-dimention list of scalar tensors
-      rather than a single Rank-N tensors. If you need different behavior,
-      convert part of inputs to tensors with `tf.convert_to_tensor`.
-    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
-      of arguments as inputs to `computation`.
-    device_assignment: if not `None`, a `DeviceAssignment` describing the
-      mapping between logical cores in the computation with physical cores in
-      the TPU topology. May be omitted for a single-core computation, in which
-      case the core attached to task 0, TPU device 0 is used.
-    name: (Deprecated) Does nothing.
-  Returns:
-    Same data structure as if computation(*inputs) is called directly with some
-    exceptions for correctness. Exceptions include:
-      1) None output: a NoOp would be returned which control-depends on
-         computation.
-      2) Single value output: A tuple containing the value would be returned.
-      3) Operation-only outputs: a NoOp would be returned which
-         control-depends on computation.
-      TODO(b/121383831): Investigate into removing these special cases.
-  """
-  # TODO(b/36647078) remove disable when pylint bug is fixed.
-  # pylint: disable=indexing-exception
-  return replicate(
-      computation,
-      None if inputs is None else [inputs],
-      infeed_queue=infeed_queue,
-      device_assignment=device_assignment,
-      name=name)[0]
-  # pylint: enable=indexing-exception
-
-  # Operations that indicate some error in the user's inference graph.
-_BLACKLISTED_INFERENCE_OPS = set([
-    "ReadVariableOp",
-    "AssignVariableOp",
-    "AssignAddVariableOp",
-    "AssignSubVariableOp",
-    "VarHandleOp",
-    "Variable",
-    "VariableV2",
-])
-
-
-def under_tpu_inference_context():
-  """Check if it is currently under `tpu.rewrite_for_inference()`."""
-  graph = ops.get_default_graph()
-
-  context = graph._get_control_flow_context()  # pylint: disable=protected-access
-  while context:
-    if isinstance(context, _TPUInferenceContext):
-      return True
-    context = context.outer_context
-
-  return False
-
-
-class _TPUInferenceContext(control_flow_ops.XLAControlFlowContext):
-  """A `ControlFlowContext` for nodes inside a TPU inference computation.
-
-  The primary role of `TPUReplicateContext` is to sanity check operators inside
-  a tpu.rewrite_for_inference() computation.
-  """
-
-  def __init__(self, name):
-    super(_TPUInferenceContext, self).__init__()
-    self._name = name
-
-  def AddOp(self, op):
-    self._AddOpInternal(op)
-
-  def _AddOpInternal(self, op):
-    # pylint: disable=protected-access
-    if op.type in _BLACKLISTED_INFERENCE_OPS:
-      raise NotImplementedError(
-          "Operation of type %s (%s) is not supported on the TPU for inference."
-          " Execution will fail if this op is used in the graph. Make sure your"
-          " variables are using variable_scope." % (op.type, op.name))
-    if self._outer_context:
-      self._outer_context.AddInnerOp(op)
-
-  def AddValue(self, val):
-    result = val
-    if self._outer_context:
-      result = self._outer_context.AddValue(val)
-    return result
-
-  def AddInnerOp(self, op):
-    self._AddOpInternal(op)
-
-  @property
-  def grad_state(self):
-    return None
-
-
-@experimental
-def validate_inference_rewrite_for_variables(graph):
-  """Validates whether rewrite_for_inference() 'worked' for variables.
-
-     The rewrite_for_inference() method is supposed to append GuaranteeConstOps
-     after ReadVariableOps, but this mechanism works only if you are using
-     tf.get_variable() to create and access variables in your tpu computation.
-     This validation method can be called immediately after calling
-     tpu.rewrite_for_inference() to check whether GuaranteeConstOps where added
-     to the graph.
-
-     Typical usages:
-       tpu.validate_inference_rewrite_for_variables(tf.get_default_graph())
-
-       tpu.validate_inference_rewrite_for_variables(sess.graph)
-
-  Args:
-    graph: The graph which needs to be validated.
-  Raises:
-    RuntimeError: if validation failed.
-  """
-  if not any(x.type == "GuaranteeConst" for x in graph.get_operations()):
-    raise RuntimeError(
-        "No GuaranteeConst ops found in the graph after running "
-        "tpu.rewrite_for_inference(...). Please check that you are using "
-        "tf.get_variable() to create and access variables in your tpu "
-        "computation.")
-
-
-@experimental
-def rewrite_for_inference(computation,
-                          inputs=None,
-                          infeed_queue=None,
-                          device_assignment=None,
-                          name=None):
-  """Rewrites `computation` for inference on a TPU system.
-
-     Other than 'rewriting' the computation to run on a TPU, if using variables
-     in your computation, it moves the ReadVariableOps outside the TPU
-     computation, and adds GuaranteeConst ops just after the ReadVariableOps.
-     This mechanism works only if you are using tf.get_variable() to create and
-     access variables in your tpu computation. You can validate whether this
-     worked, by calling validate_inference_rewrite_for_variables() method
-     immediately after this method to check whether GuaranteeConstOps where
-     added to the graph.
-
-  Args:
-    computation: A Python function that builds a computation to apply to the
-      input. If the function takes n inputs, 'inputs' should be a list of n
-      tensors. If the function returns m outputs, rewrite will return a list of
-      m tensors.
-    inputs: A list of input tensors or `None` (equivalent to an empty list).
-    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
-      of arguments as inputs to `computation`.
-    device_assignment: if not `None`, a `DeviceAssignment` describing the
-      mapping between logical cores in the computation with physical cores in
-      the TPU topology. May be omitted for a single-core computation, in which
-      case the core attached to task 0, TPU device 0 is used.
-    name: The name of the operator.
-  Returns:
-    A list of output tensors.
-  """
-
-  def guarantee_const_getter(getter, name, *args, **kwargs):
-    with ops.control_dependencies(None):
-      return array_ops.guarantee_const(
-          getter(name, *args, **kwargs), name=name + "/GuaranteeConst")
-
-  def wrapped_computation(*args, **kwargs):
-    """Execute computation under `_TPUInferenceContext`."""
-    context = _TPUInferenceContext(
-        name=ops.get_default_graph().unique_name("rewrite_for_inference"))
-    try:
-      context.Enter()
-
-      vscope = variable_scope.get_variable_scope()
-      prev_custom_getter = vscope.custom_getter
-      prev_caching_device = vscope.caching_device
-      vscope.set_custom_getter(guarantee_const_getter)
-      vscope.set_caching_device(lambda op: op.device)
-
-      result = computation(*args, **kwargs)
-
-      vscope.set_custom_getter(prev_custom_getter)
-      vscope.set_caching_device(prev_caching_device)
-    finally:
-      context.Exit()
-    return result
-
-  # pylint: disable=undefined-variable
-  return rewrite(
-      wrapped_computation,
-      inputs=inputs,
-      infeed_queue=infeed_queue,
-      device_assignment=device_assignment,
-      name=name)
-  # pylint: enable=undefined-variable
+# pylint: disable=wildcard-import,unused-import,redefined-builtin
+from tensorflow.python.tpu.tpu import *
+# used by tests
+from tensorflow.python.tpu.tpu import _TPU_REPLICATE_ATTR
+# pylint: enable=wildcard-import,unused-import,redefined-builtin

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py
index 9f8d147..c36aaa3 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py

@@ -1,275 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ===================================================================
-
-"""A RunConfig subclass with TPU support."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import json
-import os
-
-from tensorflow.contrib.tpu.python.tpu import util as util_lib
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.estimator import run_config as run_config_lib
-from tensorflow.python.platform import tf_logging as logging
-
-# pylint: disable=protected-access
-_TF_CONFIG_ENV = run_config_lib._TF_CONFIG_ENV
-_SERVICE_KEY = run_config_lib._SERVICE_KEY
-_TPU_WORKER_JOB_NAME = 'tpu_worker_job_name'
-# pylint: enable=protected-access
-
-
-class InputPipelineConfig(object):
-  r"""Please see the definition of these values in TPUConfig."""
-  PER_SHARD_V1 = 1
-  PER_HOST_V1 = 2
-  PER_HOST_V2 = 3
-  BROADCAST = 4
-
-
-class TPUConfig(
-    collections.namedtuple('TPUConfig', [
-        'iterations_per_loop',
-        'num_shards',
-        'num_cores_per_replica',
-        'per_host_input_for_training',
-        'tpu_job_name',
-        'initial_infeed_sleep_secs',
-        'input_partition_dims',
-    ])):
-  r"""TPU related configuration required by `TPUEstimator`.
-
-  Args:
-    iterations_per_loop: This is the number of train steps running in TPU
-      system before returning to CPU host for each `Session.run`. This means
-      global step is increased `iterations_per_loop` times in one `Session.run`.
-      It is recommended to be set as number of global steps for next checkpoint.
-    num_shards: (Deprecated, ignored by TPUEstimator).
-      The number of model replicas in the system. For non-model-parallelism
-      case, this number equals the total number of TPU cores. For
-      model-parallelism, the total number of TPU cores equals
-      num_cores_per_replica * num_shards.
-    num_cores_per_replica: Defaults to `None`, which disables model parallelism.
-      An integer which describes the number of TPU cores per model replica. This
-      is required by model-parallelism which enables partitioning
-      the model to multiple cores. Currently num_cores_per_replica must be
-      1, 2, 4, or 8.
-    per_host_input_for_training: If `True`, `PER_HOST_V1`, or `PER_HOST_V2`,
-      `input_fn` is invoked once on each host. With the per-core input pipeline
-      configuration, it is invoked once for each core.
-      With a global batch size `train_batch_size` in `TPUEstimator` constructor,
-      the batch size for each shard is `train_batch_size` // #hosts in the
-      `True` or `PER_HOST_V1` mode. In `PER_HOST_V2` mode, it is
-      `train_batch_size` // #cores. In `BROADCAST` mode, `input_fn` is only
-      invoked once on host 0 and the tensors are broadcasted to all other
-      replicas. The batch size equals to train_batch_size`. With the per-core
-      input pipeline configuration, the shard batch size is also
-      `train_batch_size` // #cores.
-      Note: per_host_input_for_training==PER_SHARD_V1 only supports mode.TRAIN.
-    tpu_job_name: The name of the TPU job. Typically, this name is auto-inferred
-      within TPUEstimator, however when using ClusterSpec propagation in more
-      esoteric cluster configurations, you may need to specify the job name as a
-      string.
-    initial_infeed_sleep_secs: The number of seconds the infeed thread should
-      wait before enqueueing the first batch. This helps avoid timeouts for
-      models that require a long compilation time.
-    input_partition_dims: A nested list to describe the partition dims
-      for all the tensors from input_fn(). The structure of
-      input_partition_dims must match the structure of `features` and
-      `labels` from input_fn(). The total number of partitions must match
-      `num_cores_per_replica`. For example, if input_fn() returns two tensors:
-      images with shape [N, H, W, C] and labels [N].
-      input_partition_dims = [[1, 2, 2, 1], None] will split the images to 4
-      pieces and feed into 4 TPU cores. labels tensor are directly broadcasted
-      to all the TPU cores since the partition dims is `None`.
-      Current limitations: This feature is only supported with the PER_HOST_V2
-      input mode.
-
-    Raises:
-      ValueError: If `num_cores_per_replica` is not 1, 2, 4, 8 or 16.
-  """
-
-  def __new__(cls,
-              iterations_per_loop=2,
-              num_shards=None,
-              num_cores_per_replica=None,
-              per_host_input_for_training=True,
-              tpu_job_name=None,
-              initial_infeed_sleep_secs=None,
-              input_partition_dims=None):
-
-    # Check iterations_per_loop.
-    util_lib.check_positive_integer(iterations_per_loop,
-                                    'TPUConfig iterations_per_loop')
-
-    # Check num_shards.
-    if num_shards is not None:
-      util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')
-
-    if input_partition_dims is not None:
-      if len(input_partition_dims) != 1 and len(input_partition_dims) != 2:
-        raise ValueError(
-            'input_partition_dims must be a list/tuple with one or two'
-            ' elements.')
-
-      if per_host_input_for_training is not InputPipelineConfig.PER_HOST_V2:
-        raise ValueError(
-            'input_partition_dims is only supported in PER_HOST_V2 mode.')
-
-      if num_cores_per_replica is None:
-        raise ValueError(
-            'input_partition_dims requires setting num_cores_per_replica.')
-
-    # Check num_cores_per_replica
-    if num_cores_per_replica is not None:
-      if num_cores_per_replica not in [1, 2, 4, 8, 16]:
-        raise ValueError(
-            'num_cores_per_replica must be 1, 2, 4, 8, or 16; got {}'.format(
-                str(num_cores_per_replica)))
-
-    # per_host_input_for_training may be True, False, or integer in [1..3].
-    # Map legacy values (True, False) to numeric values.
-    if per_host_input_for_training is False:
-      per_host_input_for_training = InputPipelineConfig.PER_SHARD_V1
-    elif per_host_input_for_training is True:
-      per_host_input_for_training = InputPipelineConfig.PER_HOST_V1
-
-    # Check initial_infeed_sleep_secs.
-    if initial_infeed_sleep_secs:
-      util_lib.check_positive_integer(initial_infeed_sleep_secs,
-                                      'TPUConfig initial_infeed_sleep_secs')
-
-    tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()
-
-    return super(TPUConfig, cls).__new__(
-        cls,
-        iterations_per_loop=iterations_per_loop,
-        num_shards=num_shards,
-        num_cores_per_replica=num_cores_per_replica,
-        per_host_input_for_training=per_host_input_for_training,
-        tpu_job_name=tpu_job_name,
-        initial_infeed_sleep_secs=initial_infeed_sleep_secs,
-        input_partition_dims=input_partition_dims)
-
-
-class RunConfig(run_config_lib.RunConfig):
-  """RunConfig with TPU support."""
-
-  def __init__(self,
-               tpu_config=None,
-               evaluation_master=None,
-               master=None,
-               cluster=None,
-               **kwargs):
-    """Constructs a RunConfig.
-
-    Args:
-      tpu_config: the TPUConfig that specifies TPU-specific configuration.
-      evaluation_master: a string. The address of the master to use for eval.
-        Defaults to master if not set.
-      master: a string. The address of the master to use for training.
-      cluster: a ClusterResolver
-      **kwargs: keyword config parameters.
-
-    Raises:
-      ValueError: if cluster is not None and the provided session_config has a
-        cluster_def already.
-    """
-    super(RunConfig, self).__init__(**kwargs)
-    self._tpu_config = tpu_config or TPUConfig()
-    self._cluster = cluster
-
-    # If user sets master and/or evaluation_master explicitly, including empty
-    # string '', take it. Otherwise, take the values set by parent class.
-    if master is not None:
-      if cluster is not None:
-        raise ValueError('Both master and cluster are set.')
-      self._master = master
-    else:
-      if cluster:
-        self._master = cluster.master()
-
-    if evaluation_master is not None:
-      self._evaluation_master = evaluation_master
-    elif (not self._evaluation_master and
-          self.task_type != run_config_lib.TaskType.EVALUATOR):
-      # If the task type is EVALUATOR, it means some cluster manager sets the
-      # TF_CONFIG. In that case, we respect the configuration in TF_CONFIG.
-      #
-      # Otherwise, it means user executes the code without external cluster
-      # manager. For that, we optimize the user experience by setting
-      # evaluation_master to master, unless user overwrites it.
-      self._evaluation_master = self._master
-
-    # Set the ClusterSpec to use
-    if cluster:
-      self._cluster_spec = cluster.cluster_spec()
-
-      # Merge the cluster_def into the ConfigProto.
-      if self._session_config is None:  # pylint: disable=access-member-before-definition
-        self._session_config = config_pb2.ConfigProto(allow_soft_placement=True)
-      if self._session_config.HasField('cluster_def'):
-        raise ValueError(
-            'You cannot provide a ClusterResolver and '
-            'session_config.cluster_def.')
-      if self._cluster_spec:
-        self._session_config.cluster_def.CopyFrom(
-            self._cluster_spec.as_cluster_def())
-
-  def _maybe_overwrite_session_config_for_distributed_training(self):
-    # Overrides the parent class session_config overwrite for between-graph. TPU
-    # runs with in-graph, which should not have device filter. Doing nothing
-    # ("pass") basically disables it.
-    pass
-
-  @property
-  def evaluation_master(self):
-    return self._evaluation_master
-
-  @property
-  def master(self):
-    return self._master
-
-  @property
-  def tpu_config(self):
-    return self._tpu_config
-
-  @property
-  def cluster(self):
-    return self._cluster
-
-  def replace(self, **kwargs):
-    if 'tpu_config' not in kwargs:
-      return super(RunConfig, self).replace(**kwargs)
-
-    tpu_config = kwargs.pop('tpu_config')
-    new_instance = super(RunConfig, self).replace(**kwargs)
-    new_instance._tpu_config = tpu_config  # pylint: disable=protected-access
-    return new_instance
-
-
-def _get_tpu_job_name_from_tf_config():
-  """Extracts the TPU job name from TF_CONFIG env variable."""
-  # TODO(xiejw): Extends this to support both TF_CONFIG env variable and cluster
-  # spec propagation.
-  tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV, '{}'))
-  tpu_job_name = tf_config.get(_SERVICE_KEY, {}).get(_TPU_WORKER_JOB_NAME)
-  if tpu_job_name:
-    logging.info('Load TPU job name from TF_CONFIG: %s', tpu_job_name)
-  return tpu_job_name
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.tpu_config import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_config_test.py
deleted file mode 100644
index b2fe0a6..0000000
--- a/tensorflow/contrib/tpu/python/tpu/tpu_config_test.py
+++ /dev/null

@@ -1,181 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""TPU RunConfig tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import json
-
-from tensorflow.contrib.tpu.python.tpu import tpu_config as tpu_config_lib
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.estimator import run_config as run_config_lib
-from tensorflow.python.platform import test
-
-
-def _set_tf_config_env_variable(tf_config):
-  return test.mock.patch.dict('os.environ', {
-      'TF_CONFIG': json.dumps(tf_config)
-  })
-
-
-class TPURunConfigTest(test.TestCase):
-
-  def test_no_session_config_set_in_local_case(self):
-    run_config = tpu_config_lib.RunConfig()
-    self.assertIsNone(run_config.session_config)
-
-  def test_no_session_config_overwrite_in_local_case(self):
-    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
-    run_config = tpu_config_lib.RunConfig(session_config=session_config)
-    self.assertEqual(session_config, run_config.session_config)
-
-  def test_no_session_config_set_with_cluster_spec(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host3:3'],
-            run_config_lib.TaskType.WORKER: ['host3:4']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        }
-    }
-    with _set_tf_config_env_variable(tf_config):
-      run_config = tpu_config_lib.RunConfig()
-      self.assertIsNone(run_config.session_config)
-
-  def test_no_session_config_overwrite_with_cluster_spec(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host3:3'],
-            run_config_lib.TaskType.WORKER: ['host3:4']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        }
-    }
-    with _set_tf_config_env_variable(tf_config):
-      session_config = config_pb2.ConfigProto(allow_soft_placement=True)
-      run_config = tpu_config_lib.RunConfig(session_config=session_config)
-      self.assertEqual(session_config, run_config.session_config)
-
-  def test_fail_with_invalid_num_shards(self):
-    with self.assertRaisesRegexp(ValueError, 'must be positive'):
-      tpu_config_lib.RunConfig(
-          tpu_config=tpu_config_lib.TPUConfig(num_shards=0))
-
-  def test_fail_with_iterations_per_loop(self):
-    with self.assertRaisesRegexp(ValueError, 'must be positive'):
-      tpu_config_lib.RunConfig(
-          tpu_config=tpu_config_lib.TPUConfig(iterations_per_loop=0))
-
-  def test_fail_with_invalid_num_cores_per_replica(self):
-    with self.assertRaisesRegexp(
-        ValueError, 'num_cores_per_replica must be 1, 2, 4, 8, or 16;'
-        ' got 7'):
-      tpu_config_lib.TPUConfig(num_cores_per_replica=7)
-
-
-class TPURunConfigMasterTest(test.TestCase):
-
-  def test_default_values(self):
-    run_config = tpu_config_lib.RunConfig()
-    self.assertEqual('', run_config.master)
-    self.assertEqual('', run_config.evaluation_master)
-
-  def test_user_provided_master_and_evaluation_master(self):
-    run_config = tpu_config_lib.RunConfig(
-        master='_master_123', evaluation_master='_eval_master_123')
-    self.assertEqual('_master_123', run_config.master)
-    self.assertEqual('_eval_master_123', run_config.evaluation_master)
-
-  def test_evaluation_master_defaults_to_master(self):
-    run_config = tpu_config_lib.RunConfig(master='_master_123')
-    self.assertEqual('_master_123', run_config.master)
-    self.assertEqual('_master_123', run_config.evaluation_master)
-
-  def test_tf_config(self):
-    tf_config = {
-        'session_master': '_master_123',
-        'eval_session_master': '_eval_master_123'
-    }
-    with _set_tf_config_env_variable(tf_config):
-      run_config = tpu_config_lib.RunConfig()
-      self.assertEqual('_master_123', run_config.master)
-      self.assertEqual('_eval_master_123', run_config.evaluation_master)
-
-  def test_evaluation_master_defaults_to_master_in_tf_config(self):
-    tf_config = {
-        'session_master': '_master_123',
-    }
-    with _set_tf_config_env_variable(tf_config):
-      run_config = tpu_config_lib.RunConfig()
-      self.assertEqual('_master_123', run_config.master)
-      self.assertEqual('_master_123', run_config.evaluation_master)
-
-  def test_respect_evaluation_master_in_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.EVALUATOR,
-            'index': 0
-        },
-    }
-    with _set_tf_config_env_variable(tf_config):
-      run_config = tpu_config_lib.RunConfig(master='_something')
-      self.assertEqual('', run_config.evaluation_master)
-
-  def test_user_overwrites_tf_config(self):
-    tf_config = {
-        'session_master': '_master_123',
-        'eval_session_master': '_eval_master_123'
-    }
-    with _set_tf_config_env_variable(tf_config):
-      run_config = tpu_config_lib.RunConfig(
-          master='_new_master_123', evaluation_master='_new_eval_master_123')
-      self.assertEqual('_new_master_123', run_config.master)
-      self.assertEqual('_new_eval_master_123', run_config.evaluation_master)
-
-  def test_user_overwrites_master_in_tf_config(self):
-    tf_config = {
-        'session_master': '_master_123',
-        'eval_session_master': '_eval_master_123'
-    }
-    with _set_tf_config_env_variable(tf_config):
-      run_config = tpu_config_lib.RunConfig(master='_new_master_123')
-      self.assertEqual('_new_master_123', run_config.master)
-      self.assertEqual('_eval_master_123', run_config.evaluation_master)
-
-
-class TPUJobNameTest(test.TestCase):
-
-  def test_default_name(self):
-    config = tpu_config_lib.RunConfig()
-    self.assertIsNone(config.tpu_config.tpu_job_name)
-
-  def test_with_tf_config(self):
-    tf_config = {'service': {'tpu_worker_job_name': '_my_new_name',}}
-    with _set_tf_config_env_variable(tf_config):
-      config = tpu_config_lib.RunConfig()
-      self.assertEqual('_my_new_name', config.tpu_config.tpu_job_name)
-
-
-if __name__ == '__main__':
-  test.main()

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
index ed1e0f0..b77b010 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py

@@ -1,763 +1,23 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ===================================================================
-"""TPU system metadata and associated tooling."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from contextlib import contextmanager
-import copy
-
-from tensorflow.contrib.tpu.python.tpu import _tpu_estimator_embedding
-from tensorflow.contrib.tpu.python.tpu import device_assignment  as tpu_device_assignment
-from tensorflow.contrib.tpu.python.tpu import tpu_config
-from tensorflow.contrib.tpu.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.platform import tf_logging as logging
-
-
-_DEFAULT_JOB_NAME = 'tpu_worker'
-_DEFAULT_COORDINATOR_JOB_NAME = 'coordinator'
-_LOCAL_MASTERS = ('', 'local')
-_NUM_CORES_TO_COMPUTATION_SHAPE = {
-    1: [1, 1, 1],
-    2: [1, 1, 2],
-    4: [1, 2, 2],
-    8: [2, 2, 2],
-    16: [4, 2, 2],
-}
-
-
-class TPUContext(object):
-  """A context that holds the current configuration of the TPU computation."""
-
-  def __init__(self,
-               internal_ctx,
-               input_device=None,
-               invocation_index=None,
-               call_from_input_fn=True):
-    self._internal_ctx = internal_ctx
-    self._input_device = input_device
-    self._invocation_index = invocation_index
-    self._call_from_input_fn = call_from_input_fn
-
-  def current_input_fn_deployment(self):
-    """The configuration of the current input_fn invocation.
-
-    The configuration depends on `TPUConfig.per_host_input_for_training`. See
-    `TPUConfig` for details.
-
-    Only set in params dict of input_fn
-
-    Returns:
-      A tuple of
-        1. Device spec string: String, is the current CPU host where the
-           input_fn is invoked.
-        2. Current invocation index: Int, 0-based index of the input_fn
-           invocation. See next item for details.
-        3. Total invocation count: Int, the total number of times to invoke the
-           input_fn on all CPU hosts. Each invocation will be passed with a new
-           `TPUContext` instance with current invocation index set properly.
-        4. Total number of replicas consumed by current_invocation: Int, the
-           number of replicas fed by the data returned by current input_fn. For
-           example, for per_core input pipeline deployment
-           and non-model-parallelism, total invocation count is equal to
-           the number of cores in the system and num replicas consumed by
-           current invocation is 1. For per-host v2 input pipeline deployment,
-           total invocation count is equal to the number of hosts in the system
-           and num replicas consumed by current invocation is equal to number of
-           cores per host.
-
-    Raises:
-      RuntimeError: If this method must not be called from input_fn.
-    """
-    if not self._call_from_input_fn:
-      raise RuntimeError('This TPUContext instance must not be called from'
-                         ' model_fn.')
-
-    if self._internal_ctx.is_input_sharded_per_core():
-      total_invocation_count = (self._internal_ctx.num_hosts
-                                * self._internal_ctx.num_of_replicas_per_host)
-      replicas_consumed = 1
-    elif self._internal_ctx.is_input_broadcast_with_iterators():
-      total_invocation_count = 1
-      replicas_consumed = self._internal_ctx.num_replicas
-    else:
-      total_invocation_count = self._internal_ctx.num_hosts
-      replicas_consumed = self._internal_ctx.num_of_replicas_per_host
-    return (self._input_device, self._invocation_index,
-            total_invocation_count, replicas_consumed)
-
-  @property
-  def num_replicas(self):
-    """The total number of replicas.
-
-    For non-model-parallelism, num_replicas should be the total num of TPU
-    cores in the system.
-
-    Returns:
-      The number of replicas.
-    """
-    return self._internal_ctx.num_replicas
-
-  @property
-  def num_hosts(self):
-    """The number of hosts for the TPU system."""
-    return self._internal_ctx.num_hosts
-
-  @property
-  def current_host(self):
-    """The current host index for the TPU system."""
-    return self._invocation_index
-
-  @property
-  def num_of_replicas_per_host(self):
-    """The number of replicas for each host."""
-    if self._internal_ctx.model_parallelism_enabled:
-      raise ValueError(
-          'num_of_replicas_per_host is not supported for model_parallelism')
-    return self._internal_ctx.num_of_replicas_per_host
-
-  @property
-  def device_assignment(self):
-    """Returns device_assignment object."""
-    if self._call_from_input_fn:
-      raise RuntimeError('This TPUContext instance must not be called from'
-                         ' input_fn.')
-    return self._internal_ctx.device_assignment
-
-  def device_for_replica(self, replica_id):
-    """Returns the tuple of (CPU device and device ordinal) for replica.
-
-    This should be used for full replicate for non-model-parallelism.
-
-    Args:
-       replica_id: Int, the replica index.
-
-    Returns:
-       A tuple of device spec for CPU device and int device ordinal.
-    """
-    # Note that: For the non-model parallelism, the mapping could be
-    # a random permutation. The order should not matter in most cases
-    # as far as model is replicated to all cores in the system.
-    return self._internal_ctx.device_for_replica(replica_id)
-
-  @property
-  def tpu_host_placement_function(self):
-    """Returns the TPU host place function.
-
-    The place function takes host_id as the input and returns the TF device
-    for the correspoding host.
-    """
-
-    def _placement_function(host_id):
-      """Return the host device given host_id."""
-      return self._internal_ctx.tpu_host_placement_function(host_id=host_id)
-
-    return _placement_function
-
-
-class _InternalTPUContext(object):
-  """A context holds immutable states of TPU computation.
-
-  This immutable object holds TPUEstimator config, train/eval batch size, and
-  `TPUEstimator.use_tpu`, which is expected to be passed around. It also
-  provides utility functions, based on the current state, to determine other
-  information commonly required by TPU computation, such as TPU device names,
-  TPU hosts, shard batch size, etc.
-
-  if eval_on_tpu is False, then execution of eval on TPU is disabled.
-  if eval_on_tpu is True, but use_tpu is False, a warning is issued,
-  and TPU execution is disabled for all modes.
-
-  N.B. As `mode` is not immutable state in Estimator, but essential to
-  distinguish between TPU training and evaluation, a common usage for
-  _InternalTPUContext with `mode` is as follows:
-  ```
-  with _ctx.with_mode(mode) as ctx:
-    if ctx.is_running_on_cpu():
-       ...
-  ```
-  """
-
-  def __init__(self,
-               config,
-               train_batch_size,
-               eval_batch_size,
-               predict_batch_size,
-               use_tpu,
-               eval_on_tpu=True,
-               embedding_config_spec=None):
-    self._config = config
-    self._train_batch_size = train_batch_size
-    self._eval_batch_size = eval_batch_size
-    self._predict_batch_size = predict_batch_size
-    self._use_tpu = use_tpu
-    logging.info('_TPUContext: eval_on_tpu %s', eval_on_tpu)
-    if not use_tpu and eval_on_tpu:
-      logging.warning('eval_on_tpu ignored because use_tpu is False.')
-
-    self._eval_on_tpu = eval_on_tpu
-    self._model_parallelism_enabled = (
-        use_tpu and config.tpu_config.num_cores_per_replica)
-    self._mode = None
-    num_cores_per_replica = config.tpu_config.num_cores_per_replica
-    if self._model_parallelism_enabled:
-      self._computation_shape = _NUM_CORES_TO_COMPUTATION_SHAPE[
-          num_cores_per_replica]
-    else:
-      self._computation_shape = None
-    self._lazy_tpu_system_metadata_dict = {}  # key by master address
-    self._lazy_device_assignment_dict = {}  # key by master address
-    self._lazy_validation_dict = {}  # key by ModeKeys
-    self._embedding_config_spec = embedding_config_spec
-    self._lazy_embedding_config_dict = {}  # key by master address
-
-  def _assert_mode(self):
-    if self._mode is None:
-      raise RuntimeError(
-          '`mode` needs to be set via contextmanager `with_mode`.')
-    return self._mode
-
-  @contextmanager
-  def with_mode(self, mode):
-    # NOTE(xiejw): Shallow copy is enough. It will share he lazy dictionaries,
-    # such as _lazy_tpu_system_metadata_dict between new copy and the original
-    # one. Note that all lazy states stored in properties _lazy_foo are sort of
-    # immutable as they should be same for the process lifetime.
-    new_ctx = copy.copy(self)
-    new_ctx._mode = mode  # pylint: disable=protected-access
-    yield new_ctx
-
-  @property
-  def mode(self):
-    return self._assert_mode()
-
-  def _get_master_address(self):
-    mode = self._assert_mode()
-    config = self._config
-    master = (
-        config.master
-        if mode != model_fn_lib.ModeKeys.EVAL else config.evaluation_master)
-    return master
-
-  def _get_tpu_system_metadata(self):
-    """Gets the (maybe cached) TPU system metadata."""
-    master = self._get_master_address()
-    tpu_system_metadata = self._lazy_tpu_system_metadata_dict.get(master)
-    if tpu_system_metadata is not None:
-      return tpu_system_metadata
-
-    cluster_def = None
-    if (self._config.session_config and
-        self._config.session_config.cluster_def.job):
-      cluster_def = self._config.session_config.cluster_def
-
-    # pylint: disable=protected-access
-    tpu_system_metadata = (
-        tpu_system_metadata_lib._query_tpu_system_metadata(
-            master,
-            cluster_def=cluster_def,
-            query_topology=self.model_parallelism_enabled))
-
-    self._lazy_tpu_system_metadata_dict[master] = tpu_system_metadata
-    return tpu_system_metadata
-
-  def _get_device_assignment(self):
-    """Gets the (maybe cached) TPU device assignment."""
-    master = self._get_master_address()
-    device_assignment = self._lazy_device_assignment_dict.get(master)
-    if device_assignment is not None:
-      return device_assignment
-
-    tpu_system_metadata = self._get_tpu_system_metadata()
-
-    device_assignment = tpu_device_assignment.device_assignment(
-        tpu_system_metadata.topology,
-        computation_shape=self._computation_shape,
-        num_replicas=self.num_replicas)
-
-    logging.info('num_cores_per_replica: %s',
-                 str(self._config.tpu_config.num_cores_per_replica))
-    logging.info('computation_shape: %s', str(self._computation_shape))
-    logging.info('num_replicas: %d', self.num_replicas)
-    logging.info('device_assignment.topology.device_coordinates: %s',
-                 str(device_assignment.topology.device_coordinates))
-    logging.info('device_assignment.core_assignment: %s',
-                 str(device_assignment.core_assignment))
-
-    self._lazy_device_assignment_dict[master] = device_assignment
-    return device_assignment
-
-  @property
-  def embedding_config(self):
-    """Returns the embedding config based on current mode."""
-    master = self._get_master_address()
-    if master in self._lazy_embedding_config_dict:
-      embedding_config = self._lazy_embedding_config_dict[master]
-    else:
-      embedding_config = None
-      if self._use_tpu and self._embedding_config_spec:
-        embedding_config = _tpu_estimator_embedding.EmbeddingConfig(
-            self._embedding_config_spec, self._train_batch_size,
-            self._eval_batch_size, self.num_hosts, self.num_cores, master)
-        if not embedding_config.has_embedding_tables():
-          embedding_config = None
-      self._lazy_embedding_config_dict[master] = embedding_config
-
-    if embedding_config is not None:
-      mode = self._assert_mode()
-      # Dynamically attach tpu_embedding based on mode. With
-      # this, we could keep embedding_config immutable but call site always
-      # accesses the unified API '.tpu_embedding'.
-      embedding_config.tpu_embedding = embedding_config.get_tpu_embedding(mode)
-    return embedding_config
-
-  @property
-  def model_parallelism_enabled(self):
-    return self._model_parallelism_enabled
-
-  @property
-  def input_partition_dims(self):
-    return self._config.tpu_config.input_partition_dims
-
-  @property
-  def device_assignment(self):
-    return (self._get_device_assignment()
-            if self._model_parallelism_enabled else None)
-
-  @property
-  def num_of_cores_per_host(self):
-    metadata = self._get_tpu_system_metadata()
-    return metadata.num_of_cores_per_host
-
-  @property
-  def num_cores(self):
-    metadata = self._get_tpu_system_metadata()
-    return metadata.num_cores
-
-  @property
-  def num_of_replicas_per_host(self):
-    """Return the number of replicas per host."""
-    if self.model_parallelism_enabled:
-      return self.num_replicas // self.num_hosts
-    else:
-      return self.num_of_cores_per_host
-
-  @property
-  def num_replicas(self):
-    num_cores_in_system = self.num_cores
-
-    if self.model_parallelism_enabled:
-      num_cores_per_replica = self._config.tpu_config.num_cores_per_replica
-      if num_cores_per_replica > num_cores_in_system:
-        raise ValueError(
-            'The num of cores required by the model parallelism, specified by '
-            'TPUConfig.num_cores_per_replica, is larger than the total num of '
-            'TPU cores in the system. num_cores_per_replica: {}, num cores '
-            'in the system: {}'.format(num_cores_per_replica,
-                                       num_cores_in_system))
-
-      if num_cores_in_system % num_cores_per_replica != 0:
-        raise RuntimeError(
-            'The num of cores in the system ({}) is not divisible by the num '
-            'of cores ({}) required by the model parallelism, specified by '
-            'TPUConfig.num_cores_per_replica. This should never happen!'.format(
-                num_cores_in_system, num_cores_per_replica))
-
-      return num_cores_in_system // num_cores_per_replica
-    else:
-      return num_cores_in_system
-
-  @property
-  def num_hosts(self):
-    metadata = self._get_tpu_system_metadata()
-    return metadata.num_hosts
-
-  @property
-  def config(self):
-    return self._config
-
-  def is_input_sharded_per_core(self):
-    """Return true if input_fn is invoked per-core (other than per-host)."""
-    mode = self._assert_mode()
-    return (mode == model_fn_lib.ModeKeys.TRAIN and
-            (self._config.tpu_config.per_host_input_for_training is
-             tpu_config.InputPipelineConfig.PER_SHARD_V1))
-
-  def is_input_per_host_with_iterators(self):
-    """Return true if input_fn should be run in the per-host v2 config."""
-    return (self._config.tpu_config.per_host_input_for_training is
-            tpu_config.InputPipelineConfig.PER_HOST_V2)
-
-  def is_input_broadcast_with_iterators(self):
-    """Return true if input_fn should be run in the full_replicae config."""
-    return (self._config.tpu_config.per_host_input_for_training is
-            tpu_config.InputPipelineConfig.BROADCAST)
-
-  def is_running_on_cpu(self, is_export_mode=False):
-    """Determines whether the input_fn and model_fn should be invoked on CPU.
-
-    This API also validates user provided configuration, such as batch size,
-    according the lazy initialized TPU system metadata.
-
-    Args:
-      is_export_mode: Indicates whether the current mode is for exporting the
-        model, when mode == PREDICT. Only with this bool, we could
-        tell whether user is calling the Estimator.predict or
-        Estimator.export_savedmodel, which are running on TPU and CPU
-        respectively. Parent class Estimator does not distinguish these two.
-
-    Returns:
-      bool, whether current input_fn or model_fn should be running on CPU.
-
-    Raises:
-      ValueError: any configuration is invalid.
-    """
-
-    is_running_on_cpu = self._is_running_on_cpu(is_export_mode)
-    if not is_running_on_cpu:
-      self._validate_tpu_configuration()
-    return is_running_on_cpu
-
-  def _is_running_on_cpu(self, is_export_mode):
-    """Determines whether the input_fn and model_fn should be invoked on CPU."""
-    mode = self._assert_mode()
-
-    if not self._use_tpu:
-      return True
-
-    if mode == model_fn_lib.ModeKeys.EVAL and not self._eval_on_tpu:
-      logging.info('_is_running_on_cpu: eval_on_tpu disabled')
-      return True
-
-    if is_export_mode:
-      return True
-
-    return False
-
-  @property
-  def global_batch_size(self):
-    mode = self._assert_mode()
-    if mode == model_fn_lib.ModeKeys.TRAIN:
-      return self._train_batch_size
-    elif mode == model_fn_lib.ModeKeys.EVAL:
-      return self._eval_batch_size
-    elif mode == model_fn_lib.ModeKeys.PREDICT:
-      return self._predict_batch_size
-    else:
-      return None
-
-  @property
-  def batch_size_for_input_fn(self):
-    """Returns the shard batch size for `input_fn`."""
-    global_batch_size = self.global_batch_size
-
-    if (self.is_running_on_cpu() or self.is_input_broadcast_with_iterators()):
-      return global_batch_size
-
-    # On TPU
-    if self.is_input_sharded_per_core() or (
-        self.is_input_per_host_with_iterators()):
-      return global_batch_size // self.num_replicas
-    else:
-      return global_batch_size // self.num_hosts
-
-  @property
-  def batch_size_for_model_fn(self):
-    """Returns the shard batch size for `model_fn`."""
-    global_batch_size = self.global_batch_size
-
-    if (self.is_running_on_cpu() or self.is_input_broadcast_with_iterators()):
-      return global_batch_size
-
-    # On TPU. always sharded per shard.
-    return global_batch_size // self.num_replicas
-
-  @property
-  def master_job(self):
-    """Returns the job name to use to place TPU computations on.
-
-    Returns:
-      A string containing the job name, or None if no job should be specified.
-
-    Raises:
-      ValueError: If the user needs to specify a tpu_job_name, because we are
-        unable to infer the job name automatically, or if the user-specified job
-        names are inappropriate.
-    """
-    run_config = self._config
-    # If the user specifies the tpu_job_name, use that.
-    if run_config.tpu_config.tpu_job_name:
-      return run_config.tpu_config.tpu_job_name
-
-    # The tpu job is determined by the run_config. Right now, this method is
-    # required as tpu_config is not part of the RunConfig.
-    mode = self._assert_mode()
-    master = (
-        run_config.evaluation_master
-        if mode == model_fn_lib.ModeKeys.EVAL else run_config.master)
-    if master in _LOCAL_MASTERS:
-      return None
-
-    if (not run_config.session_config or
-        not run_config.session_config.cluster_def.job):
-      return _DEFAULT_JOB_NAME
-    cluster_def = run_config.session_config.cluster_def
-    job_names = set([job.name for job in cluster_def.job])
-    if _DEFAULT_JOB_NAME in job_names:
-      # b/37868888 tracks allowing ClusterSpec propagation to reuse job names.
-      raise ValueError('Currently, tpu_worker is not an allowed job name.')
-    if len(job_names) == 1:
-      return cluster_def.job[0].name
-    if len(job_names) == 2:
-      if _DEFAULT_COORDINATOR_JOB_NAME in job_names:
-        job_names.remove(_DEFAULT_COORDINATOR_JOB_NAME)
-        return job_names.pop()
-      # TODO(b/67716447): Include more sophisticated heuristics.
-    raise ValueError(
-        'Could not infer TPU job name. Please specify a tpu_job_name as part '
-        'of your TPUConfig.')
-
-  @property
-  def tpu_host_placement_function(self):
-    """Returns the TPU host place function."""
-
-    master = self.master_job
-
-    def _placement_function(_sentinal=None, replica_id=None, host_id=None):  # pylint: disable=invalid-name
-      """Return the host device given replica_id or host_id."""
-      assert _sentinal is None
-      if replica_id is not None and host_id is not None:
-        raise RuntimeError(
-            'replica_id and host_id can have only one non-None value.')
-
-      if master is None:
-        return '/replica:0/task:0/device:CPU:0'
-      else:
-        if replica_id is not None:
-          if self.model_parallelism_enabled:
-            return self.device_assignment.host_device(
-                replica=replica_id, job=master)
-          else:
-            host_id = replica_id / self.num_of_cores_per_host
-
-        return '/job:%s/task:%d/device:CPU:0' % (master, host_id)
-
-    return _placement_function
-
-  @property
-  def tpu_device_placement_function(self):
-    """Returns a TPU device placement Fn."""
-    master = self.master_job
-    job_device = '' if master is None else ('/job:%s' % master)
-
-    def _placement_function(i):
-      if self.model_parallelism_enabled:
-        return self.device_assignment.tpu_device(replica=i, job=master)
-      else:
-        num_of_cores_per_host = self.num_of_cores_per_host
-        host_id = i / num_of_cores_per_host
-        ordinal_id = i % num_of_cores_per_host
-        return '%s/task:%d/device:TPU:%d' % (job_device, host_id, ordinal_id)
-
-    return _placement_function
-
-  def tpu_ordinal_function(self, host_id):
-    """Returns the TPU ordinal fn."""
-
-    def _tpu_ordinal_function(shard_index_in_host):
-      """Return the TPU ordinal associated with a shard.
-
-      Required because the enqueue ops are placed on CPU.
-
-      Args:
-        shard_index_in_host: the shard index
-
-      Returns:
-        The ordinal of the TPU device the shard's infeed should be placed on.
-      """
-      if self.model_parallelism_enabled:
-        # We put both enqueue/dequeue ops at tpu.core(0) in each replica.
-        replica = self.device_assignment.lookup_replicas(host_id,
-                                                         0)[shard_index_in_host]
-        return self.device_assignment.tpu_ordinal(replica=replica)
-      else:
-        return shard_index_in_host % self.num_of_cores_per_host
-
-    return _tpu_ordinal_function
-
-  def _validate_tpu_configuration(self):
-    """Validates the configuration based on the TPU system metadata."""
-    mode = self._assert_mode()
-    if self._lazy_validation_dict.get(mode):
-      return
-
-    # All following information is obtained from TPU system metadata.
-    num_cores = self.num_cores
-    num_replicas = self.num_replicas
-    num_hosts = self.num_hosts
-
-    if not num_cores:
-      tpu_system_metadata = self._get_tpu_system_metadata()
-      raise RuntimeError(
-          'Cannot find any TPU cores in the system. Please double check '
-          'Tensorflow master address and TPU worker(s). Available devices '
-          'are {}.'.format(tpu_system_metadata.devices))
-
-    if self._config.tpu_config.num_shards:
-      user_provided_num_replicas = self._config.tpu_config.num_shards
-      if user_provided_num_replicas != num_replicas:
-        message = (
-            'TPUConfig.num_shards is not set correctly. According to TPU '
-            'system metadata for Tensorflow master ({}): num_replicas should '
-            'be ({}), got ({}). For non-model-parallelism, num_replicas should '
-            'be the total num of TPU cores in the system. For '
-            'model-parallelism, the total number of TPU cores should be '
-            'num_cores_per_replica * num_replicas. Please set it '
-            'accordingly or leave it as `None`'.format(
-                self._get_master_address(), num_replicas,
-                user_provided_num_replicas))
-
-        raise ValueError(message)
-
-    if self._config.tpu_config.num_cores_per_replica:
-      num_cores_per_replica = self._config.tpu_config.num_cores_per_replica
-      num_cores_per_host = self._get_tpu_system_metadata().num_of_cores_per_host
-      if num_cores_per_replica > num_cores_per_host:
-        raise ValueError(
-            'The num of cores required by the model parallelism, specified by '
-            'TPUConfig.num_cores_per_replica, is larger than the '
-            'num_cores_per_host. num_cores_per_replica: {}, '
-            'num_cores_per_host: {}'.format(num_cores_per_replica,
-                                            num_cores_per_host))
-
-    if mode == model_fn_lib.ModeKeys.TRAIN:
-      if (self._train_batch_size % num_replicas != 0 and
-          not self.is_input_broadcast_with_iterators()):
-        raise ValueError(
-            'train batch size {} must be divisible by number of replicas {}'
-            .format(self._train_batch_size, num_replicas))
-
-    elif mode == model_fn_lib.ModeKeys.EVAL:
-      if self._eval_batch_size is None:
-        raise ValueError(
-            'eval_batch_size in TPUEstimator constructor cannot be `None`'
-            'if .evaluate is running on TPU.')
-      if (self._eval_batch_size % num_replicas != 0 and
-          not self.is_input_broadcast_with_iterators()):
-        raise ValueError(
-            'eval batch size {} must be divisible by number of replicas {}'
-            .format(self._eval_batch_size, num_replicas))
-      if num_hosts > 1 and not self.is_input_broadcast_with_iterators():
-        raise ValueError(
-            'TPUEstimator.evaluate should be running on single TPU'
-            ' instead of a Pod.')
-    else:
-      assert mode == model_fn_lib.ModeKeys.PREDICT
-      if self._predict_batch_size is None:
-        raise ValueError(
-            'predict_batch_size in TPUEstimator constructor should not be '
-            '`None` if .predict is running on TPU.')
-      if (self._predict_batch_size % num_replicas != 0 and
-          not self.is_input_broadcast_with_iterators()):
-        raise ValueError(
-            'predict batch size {} must be divisible by number of replicas {}'
-            .format(self._predict_batch_size, num_replicas))
-      if num_hosts > 1 and not self.is_input_broadcast_with_iterators():
-        raise ValueError(
-            'TPUEstimator.predict should be running on single TPU worker. '
-            'got {}.'.format(num_hosts))
-
-    # Record the state "validated" into lazy dictionary.
-    self._lazy_validation_dict[mode] = True
-
-  def device_for_replica(self, replica_id):
-    """Returns the tuple of (CPU device and device ordinal) for replica.
-
-    This should be used for full replicate for non-model-parallelism.
-
-    Args:
-       replica_id: Int, the replica index.
-
-    Returns:
-       A tuple of device spec for CPU device and int device ordinal.
-    """
-    master = self.master_job
-
-    if self.model_parallelism_enabled:
-      return (self.device_assignment.host_device(
-          replica=replica_id, job=master),
-              self.device_assignment.tpu_ordinal(replica=replica_id))
-
-    job_device = '' if master is None else ('/job:%s' % master)
-
-    num_of_replicas_per_host = self.num_of_replicas_per_host
-    host_id = replica_id / num_of_replicas_per_host
-    ordinal_id = replica_id % num_of_replicas_per_host
-
-    host_device = '%s/task:%d/device:CPU:0' % (job_device, host_id)
-    return (host_device, ordinal_id)
-
-
-class _OneCoreTPUContext(_InternalTPUContext):
-  """Special _InternalTPUContext for one core usage."""
-
-  def __init__(self, config, train_batch_size, eval_batch_size,
-               predict_batch_size, use_tpu):
-
-    super(_OneCoreTPUContext, self).__init__(
-        config, train_batch_size, eval_batch_size,
-        predict_batch_size, use_tpu)
-
-  def _get_tpu_system_metadata(self):
-    """Gets the (maybe cached) TPU system metadata."""
-    master = self._get_master_address()
-    tpu_system_metadata = self._lazy_tpu_system_metadata_dict.get(master)
-    if tpu_system_metadata is not None:
-      return tpu_system_metadata
-
-    tpu_system_metadata = (
-        tpu_system_metadata_lib._TPUSystemMetadata(  # pylint: disable=protected-access
-            num_cores=1,
-            num_hosts=1,
-            num_of_cores_per_host=1,
-            topology=None,
-            devices=[]))
-
-    self._lazy_tpu_system_metadata_dict[master] = tpu_system_metadata
-    return tpu_system_metadata
-
-
-def _get_tpu_context(config, train_batch_size, eval_batch_size,
-                     predict_batch_size, use_tpu, eval_on_tpu,
-                     embedding_config_spec):
-  """Returns an instance of `_InternalTPUContext`."""
-
-  if (config.tpu_config.num_shards == 1 and
-      config.tpu_config.num_cores_per_replica is None):
-    if embedding_config_spec is not None:
-      raise ValueError('Setting TPUConfig.num_shards==1 is unsupported '
-                       'when embedding_config_spec is not None.')
-    logging.warning(
-        'Setting TPUConfig.num_shards==1 is an unsupported behavior. '
-        'Please fix as soon as possible (leaving num_shards as None.)')
-    return _OneCoreTPUContext(config, train_batch_size, eval_batch_size,
-                              predict_batch_size, use_tpu)
-
-  return _InternalTPUContext(config, train_batch_size, eval_batch_size,
-                             predict_batch_size, use_tpu, eval_on_tpu,
-                             embedding_config_spec)
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.tpu_context import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py
index 1689126..cb38a8f 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py

@@ -1,10 +1,10 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -12,1082 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TPU embedding APIs."""
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import copy
-import math
-import re
-import six
-
-from tensorflow.contrib.framework.python.framework import experimental
-from tensorflow.contrib.tpu.ops import gen_tpu_ops
-from tensorflow.contrib.tpu.proto import optimization_parameters_pb2
-from tensorflow.contrib.tpu.proto import tpu_embedding_configuration_pb2 as elc
-from tensorflow.contrib.tpu.python.ops import tpu_ops
-from tensorflow.contrib.tpu.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-
-TRAINING = elc.TPUEmbeddingConfiguration.TRAINING
-INFERENCE = elc.TPUEmbeddingConfiguration.INFERENCE
-
-
-class TableConfig(
-    collections.namedtuple(
-        'TableConfig',
-        ['vocabulary_size', 'dimension', 'initializer', 'combiner'])):
-  """Embedding table configuration."""
-
-  @experimental
-  def __new__(cls,
-              vocabulary_size,
-              dimension,
-              initializer=None,
-              combiner='mean'):
-    """Embedding table configuration.
-
-    Args:
-      vocabulary_size: Number of vocabulary (/rows) in the table.
-      dimension: The embedding dimension.
-      initializer: A variable initializer function to be used in embedding
-        variable initialization. If not specified, defaults to
-        `tf.truncated_normal_initializer` with mean `0.0` and standard deviation
-        `1/sqrt(dimension)`.
-      combiner: A string specifying how to reduce if there are multiple entries
-        in a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with
-        'mean' the default. 'sqrtn' often achieves good accuracy, in particular
-        with bag-of-words columns. For more information, see
-        `tf.nn.embedding_lookup_sparse`.
-
-    Returns:
-      `TableConfig`.
-
-    Raises:
-      ValueError: if `vocabulary_size` is not positive integer.
-      ValueError: if `dimension` is not positive integer.
-      ValueError: if `initializer` is specified and is not callable.
-      ValueError: if `combiner` is not supported.
-    """
-    if not isinstance(vocabulary_size, int) or vocabulary_size < 1:
-      raise ValueError('Invalid vocabulary_size {}.'.format(vocabulary_size))
-
-    if not isinstance(dimension, int) or dimension < 1:
-      raise ValueError('Invalid dimension {}.'.format(dimension))
-
-    if (initializer is not None) and (not callable(initializer)):
-      raise ValueError('initializer must be callable if specified.')
-    if initializer is None:
-      initializer = init_ops.truncated_normal_initializer(
-          mean=0.0, stddev=1 / math.sqrt(dimension))
-
-    if combiner not in ('mean', 'sum', 'sqrtn'):
-      raise ValueError('Invalid combiner {}'.format(combiner))
-
-    return super(TableConfig, cls).__new__(cls, vocabulary_size, dimension,
-                                           initializer, combiner)
-
-
-AdamSlotVariableNames = collections.namedtuple(
-    'AdamSlotVariableNames', ['m', 'v'])
-
-AdagradSlotVariableName = collections.namedtuple(
-    'AdagradSlotVariableName', ['accumulator'])
-
-AdamSlotVariables = collections.namedtuple(
-    'AdamSlotVariables', ['m', 'v'])
-
-AdagradSlotVariable = collections.namedtuple(
-    'AdagradSlotVariable', ['accumulator'])
-
-VariablesAndOps = collections.namedtuple(
-    'VariablesAndOps',
-    ['embedding_variables_by_table', 'slot_variables_by_table',
-     'load_ops', 'retrieve_ops']
-)
-
-
-# TODO(shizhiw): Factor `use_gradient_accumulation` and
-# `pipeline_execution_with_tensor_core` out of `_OptimizationParameters`.
-class _OptimizationParameters(object):
-  """Parameters common to all optimizations."""
-
-  def __init__(self, learning_rate, use_gradient_accumulation,
-               pipeline_execution_with_tensor_core):
-    self.learning_rate = learning_rate
-    self.use_gradient_accumulation = use_gradient_accumulation
-    self.pipeline_execution_with_tensor_core = (
-        pipeline_execution_with_tensor_core)
-
-
-class AdagradParameters(_OptimizationParameters):
-  """Optimization parameters for Adagrad."""
-
-  def __init__(self, learning_rate, initial_accumulator,
-               use_gradient_accumulation=False,
-               pipeline_execution_with_tensor_core=True):
-    """Optimization parameters for Adagrad.
-
-    Args:
-      learning_rate: used for updating embedding table.
-      initial_accumulator: initial accumulator for Adagrad.
-      use_gradient_accumulation: setting this to `True` makes embedding
-         gradients calculation more accurate but slower. Please see
-         `optimization_parameters.proto` for details.
-         for details.
-      pipeline_execution_with_tensor_core: setting this to `True` makes training
-        faster, but trained model will be different if step N and step N+1
-        involve the same set of embedding ID. Please see
-        `tpu_embedding_configuration.proto` for details.
-    """
-    super(AdagradParameters, self).__init__(learning_rate,
-                                            use_gradient_accumulation,
-                                            pipeline_execution_with_tensor_core)
-    self.initial_accumulator = initial_accumulator
-
-
-class AdamParameters(_OptimizationParameters):
-  """Optimization parameters for Adam."""
-
-  def __init__(self, learning_rate,
-               beta1=0.9,
-               beta2=0.999,
-               epsilon=1e-08,
-               lazy_adam=True,
-               sum_inside_sqrt=True,
-               use_gradient_accumulation=False,
-               pipeline_execution_with_tensor_core=True):
-    """Optimization parameters for Adam.
-
-    Args:
-      learning_rate: a floating point value. The learning rate.
-      beta1: A float value.
-        The exponential decay rate for the 1st moment estimates.
-      beta2: A float value.
-        The exponential decay rate for the 2nd moment estimates.
-      epsilon: A small constant for numerical stability.
-      lazy_adam: Use lazy Adam instead of Adam. Lazy Adam trains faster.
-        Please see `optimization_parameters.proto` for details.
-      sum_inside_sqrt: This improves training speed. Please see
-        `optimization_parameters.proto` for details.
-      use_gradient_accumulation: setting this to `True` makes embedding
-        gradients calculation more accurate but slower. Please see
-        `optimization_parameters.proto` for details.
-        for details.
-      pipeline_execution_with_tensor_core: setting this to `True` makes training
-        faster, but trained model will be different if step N and step N+1
-        involve the same set of embedding ID. Please see
-        `tpu_embedding_configuration.proto` for details.
-    """
-    super(AdamParameters, self).__init__(learning_rate,
-                                         use_gradient_accumulation,
-                                         pipeline_execution_with_tensor_core)
-    self.beta1 = beta1
-    self.beta2 = beta2
-    self.epsilon = epsilon
-    self.lazy_adam = lazy_adam
-    self.sum_inside_sqrt = sum_inside_sqrt
-
-
-class StochasticGradientDescentParameters(_OptimizationParameters):
-  """Optimization parameters for stochastic gradient descent.
-
-  Args:
-    learning_rate: a floating point value. The learning rate.
-    use_gradient_accumulation: setting this to `True` makes embedding
-      gradients calculation more accurate but slower. Please see
-         `optimization_parameters.proto` for details.
-    pipeline_execution_with_tensor_core: setting this to `True` makes training
-      faster, but trained model will be different if step N and step N+1
-      involve the same set of embedding ID. Please see
-      `tpu_embedding_configuration.proto` for details.
-    """
-
-  def __init__(self, learning_rate, use_gradient_accumulation=False,
-               pipeline_execution_with_tensor_core=True):
-    super(StochasticGradientDescentParameters, self).__init__(
-        learning_rate, use_gradient_accumulation,
-        pipeline_execution_with_tensor_core)
-
-
-class TPUEmbedding(object):
-  """API for using TPU for embedding.
-
-    Example:
-    ```
-    table_config_user = tpu_embedding.TableConfig(
-        vocabulary_size=4, dimension=2,
-        initializer=initializer, combiner='mean')
-    table_to_config_dict = {'video': table_config_video,
-                          'user': table_config_user}
-    feature_to_table_dict = {'watched': 'video',
-                             'favorited': 'video',
-                             'friends': 'user'}
-    batch_size = 4
-    num_hosts = 1
-    optimization_parameters = tpu_embedding.AdagradParameters(1., 1.)
-    mode = tpu_embedding.TRAINING
-    embedding = tpu_embedding.TPUEmbedding(
-        table_to_config_dict, feature_to_table_dict,
-        batch_size, num_hosts, mode, optimization_parameters)
-
-    batch_size_per_core = embedding.batch_size_per_core
-    sparse_features_list = []
-    for host in hosts:
-      with ops.device(host):
-        for _ in range(embedding.num_cores_per_host):
-          sparse_features = {}
-          sparse_features['watched'] = sparse_tensor.SparseTensor(...)
-          sparse_features['favorited'] = sparse_tensor.SparseTensor(...)
-          sparse_features['friends'] = sparse_tensor.SparseTensor(...)
-          sparse_features_list.append(sparse_features)
-
-    enqueue_ops = embedding.generate_enqueue_ops(sparse_features_list)
-    embedding_variables_and_ops = embedding.create_variables_and_ops()
-
-    def computation():
-      activations = embedding.get_activations()
-      loss = compute_loss(activations)
-
-      base_optimizer = gradient_descent.GradientDescentOptimizer(
-          learning_rate=1)
-      cross_shard_optimizer = tpu_optimizer.CrossShardOptimizer(
-          base_optimizer)
-
-      train_op = cross_shard_optimizer.minimize(loss)
-      # `train_op` and `send_gradients_op` must happen in order.
-      with ops.control_dependencies([train_op]):
-        send_gradients_op = embedding.generate_send_gradients_op()
-      with ops.control_dependencies([send_gradients_op]):
-        loss = array_ops.identity(loss)
-
-    loss = tpu.shard(computation,
-                     num_shards=embedding.num_cores)
-
-    with self.test_session() as sess:
-      sess.run(tpu.initialize_system(embedding_config=
-                                     embedding.config_proto))
-      sess.run(variables.global_variables_initializer())
-      sess.run(embedding.init_ops)
-      sess.run(embedding_variables_and_ops.load_ops)
-      sess.run(enqueue_ops)
-      loss_val = sess.run(loss)
-    ```
-  """
-
-  # TODO(shizhiw): Instead of `feature_to_table_dict` which maps to table
-  # name, consider `feature_to_config_dict` which maps to `FeatureConfig`.
-  # `FeatureConfig` could have fields other than table name. For example, it
-  # could have a field to indicate that the feature should not be used to
-  # update embedding table (cr/204852758, cr/204940540). Also, this can support
-  # different combiners for different features within the same table.
-  # TODO(shizhiw, b/118512626): Remove `batch_size` from `__init__` and move it
-  # to `FeatureConfig`?
-
-  # TODO(shizhiw): will it be cleaner to make `table_to_config_dict` and
-  # `feature_to_table_dict` lists of `TableSpec` and `FeatureSpec` respectively?
-
-  # TODO(shizhiw): Consider adding `input_fn` as an option to remove boilerplate
-  # for-loops around construction of inputs.
-
-  # `optimization_parameter` applies to all tables. If the need arises,
-  # we can add `optimization_parameters` to `TableConfig` to override this
-  # global setting.
-  @experimental
-  def __init__(self,
-               table_to_config_dict,
-               feature_to_table_dict,
-               batch_size,
-               mode,
-               master,
-               optimization_parameters=None):
-    """API for using TPU for embedding lookups.
-
-    Args:
-      table_to_config_dict: A dictionary mapping from string of table name to
-        `TableConfig`. Table refers to an embedding table, e.g. `params`
-        argument to `tf.nn.embedding_lookup_sparse()`.
-      feature_to_table_dict: A dictionary mapping from string of feature name
-        to string of table name. Feature refers to ids to lookup in embedding
-        table, e.g. `sp_ids` argument to `tf.nn.embedding_lookup_sparse()`.
-      batch_size: An `int` representing the global batch size.
-      mode: `TRAINING` or `INFERENCE`.
-      master: A `string` representing the TensorFlow master to use.
-      optimization_parameters: `AdagradParameters`, `AdamParameters`,
-        `Stochasticgradientdescentparameters`. Must be set in training and must
-        be `None` in inference.
-
-    Raises:
-      ValueError: if any input is invalid.
-    """
-    _validate_table_to_config_dict(table_to_config_dict)
-    # Avoid nondeterminism from `Dict` iteration order by using `OrderedDict`.
-    self._table_to_config_dict = _create_ordered_dict(table_to_config_dict)
-    self._combiners = _create_combiners(self._table_to_config_dict)
-
-    _validate_feature_to_table_dict(table_to_config_dict, feature_to_table_dict)
-    self._feature_to_table_dict = _create_ordered_dict(feature_to_table_dict)
-    self._table_to_features_dict = _create_table_to_features_dict(
-        self._feature_to_table_dict)
-
-    self._batch_size = batch_size
-
-    self._master = master
-    self._tpu_system_metadata = (
-        tpu_system_metadata_lib._query_tpu_system_metadata(self._master))  # pylint: disable=protected-access
-    if self._tpu_system_metadata.num_cores == 0:
-      raise ValueError('TPUEmbedding needs TPUs, but master {} does not have '
-                       'TPUs.'.format(self._master))
-    self._num_hosts = self._tpu_system_metadata.num_hosts
-    self._hosts = [device.name for device in self._tpu_system_metadata.devices
-                   if 'device:CPU:' in device.name]
-    self._num_cores_per_host = self._tpu_system_metadata.num_of_cores_per_host
-    self._num_cores = self._tpu_system_metadata.num_cores
-
-    _validate_batch_size(self._batch_size, self._num_cores)
-    self._batch_size_per_core = self._batch_size // self._num_cores
-
-    self._init_ops = []
-
-    # TODO(shizhiw): remove `mode`?
-    if mode == TRAINING:
-      _validate_optimization_parameters(optimization_parameters)
-      self._optimization_parameters = optimization_parameters
-    elif mode == INFERENCE:
-      if optimization_parameters is not None:
-        raise ValueError('`optimization_parameters` should be `None` '
-                         'for inference mode.')
-      self._optimization_parameters = (
-          StochasticGradientDescentParameters(1.))
-    else:
-      raise ValueError('`mode` only supports {} and {}; got {}.'
-                       .format(TRAINING, INFERENCE, mode))
-    self._mode = mode
-
-    # TODO(shizhiw): move `optimization_parameters` into `_optimizer_handler`
-    # and create special handler for inference that inherits from
-    # StochasticGradientDescentHandler with more user-friendly error message
-    # on get_slot().
-    self._optimizer_handler = _get_optimization_handler(
-        self._optimization_parameters)
-
-    dummy_table_variables_init_op = self._create_dummy_table_variables()
-    self._init_ops.append(dummy_table_variables_init_op)
-
-    self._config_proto = self._create_config_proto()
-
-  @property
-  def hosts(self):
-    """A list of device names for CPU hosts.
-
-    Returns:
-      A list of device names for CPU hosts.
-    """
-    return copy.copy(self._hosts)
-
-  # TODO(shizhiw): change to num_tensor_cores_per_host to be more explicit and
-  # to be consistent with `tpu_embedding_configuration.proto`.
-  @property
-  def num_cores_per_host(self):
-    """Number of TPU cores on a CPU host.
-
-    Returns:
-      Number of TPU cores on a CPU host.
-    """
-    return self._num_cores_per_host
-
-  @property
-  def num_cores(self):
-    """Total number of TPU cores on all hosts.
-
-    Returns:
-      Total number of TPU cores on all hosts.
-    """
-    return self._num_cores
-
-  @property
-  def batch_size_per_core(self):
-    """Batch size for each TPU core.
-
-    The sparse tensors in `sparse_features_list` to `generate_enqueue_ops`
-       must have batch dimension equal to this.
-
-    Returns:
-      Batch size for each TPU core.
-    """
-    return self._batch_size_per_core
-
-  @property
-  def config_proto(self):
-    """Create embedding config proto for `tpu.initialize_system()`.
-
-    Returns:
-      an `TPUEmbeddingConfiguration` proto describing the desired
-         configuration of the hardware embedding lookup tables, which
-         is passed to `tpu.initialize_system()`.
-    """
-    return self._config_proto
-
-  @property
-  def init_ops(self):
-    """Initialization ops for TPU embedding.
-
-    It must be called after all global variables have been initialized,
-    i.e. after `global_variables_initializer()`, as it loads embedding
-    tables into TPU.
-
-    Returns:
-      A list of ops.
-    """
-    return self._init_ops
-
-  @property
-  def feature_to_table_dict(self):
-    return copy.copy(self._feature_to_table_dict)
-
-  def _create_config_proto(self):
-    """Create `TPUEmbeddingConfiguration`."""
-    config_proto = elc.TPUEmbeddingConfiguration()
-    for table in self._table_to_config_dict:
-      table_descriptor = config_proto.table_descriptor.add()
-      table_descriptor.name = table
-
-      table_config = self._table_to_config_dict[table]
-      table_descriptor.vocabulary_size = table_config.vocabulary_size
-      table_descriptor.dimension = table_config.dimension
-
-      features_for_table = self._table_to_features_dict[table]
-      table_descriptor.num_features = len(features_for_table)
-
-      table_descriptor.optimization_parameters.learning_rate.constant = (
-          self._optimization_parameters.learning_rate)
-      table_descriptor.optimization_parameters.gradient_accumulation_status = (
-          optimization_parameters_pb2.GradientAccumulationStatus.ENABLED
-          if self._optimization_parameters.use_gradient_accumulation else
-          optimization_parameters_pb2.GradientAccumulationStatus.DISABLED)
-      # For compatibility with old TPU workers.
-      table_descriptor.optimization_parameters.use_gradient_accumulation = (
-          self._optimization_parameters.use_gradient_accumulation)
-      self._optimizer_handler.set_optimization_parameters(table_descriptor)
-
-    config_proto.mode = self._mode
-    config_proto.batch_size_per_tensor_core = self._batch_size_per_core
-    config_proto.num_hosts = self._num_hosts
-    config_proto.num_tensor_cores = self._num_cores
-    config_proto.sharding_strategy = elc.TPUEmbeddingConfiguration.DIV_DEFAULT
-    config_proto.pipeline_execution_with_tensor_core = (
-        self._optimization_parameters.pipeline_execution_with_tensor_core)
-
-    return config_proto
-
-  def create_variables_and_ops(self, embedding_variable_name_by_table=None,
-                               slot_variable_names_by_table=None):
-    """Create embedding and slot variables, with ops to load and retrieve them.
-
-    Args:
-      embedding_variable_name_by_table: A dictionary mapping from string of
-        table name to string of embedding variable name. If `None`,
-        defaults from `get_default_slot_variable_names()` will be used.
-      slot_variable_names_by_table: A dictionary mapping from string of table
-        name to `AdamSlotVariableNames`, `AdagradSlotVariableNames` etc. If
-        `None`, defaults from `get_default_slot_variable_names()` will be used.
-
-    Returns:
-      `tpu_embedding.VariablesAndOps` with:
-        A dictionary mapping from string of table name to embedding variables,
-        A dictionary mapping from string of table name to AdagradSlotVariable,
-         AdamSlotVariables etc with slot variables,
-        A list of ops to load embedding and slot variables on CPU to TPU,
-        A list of ops to retrieve embedding and slot variables from TPU to CPU.
-    """
-    embedding_variables_by_table = {}
-    slot_variables_by_table = {}
-    load_ops = []
-    retrieve_ops = []
-    for table in self._table_to_config_dict:
-      if embedding_variable_name_by_table:
-        embedding_variable_name = embedding_variable_name_by_table[table]
-      else:
-        embedding_variable_name = table
-      if slot_variable_names_by_table:
-        slot_variable_names = slot_variable_names_by_table[table]
-      else:
-        slot_variable_names = (
-            self._optimizer_handler.get_default_slot_variable_names(table))
-
-      device_fn = _create_device_fn(self._hosts)
-      with ops.device(device_fn):
-        table_variables = _create_partitioned_variables(
-            name=embedding_variable_name,
-            num_hosts=self._num_hosts,
-            vocabulary_size=self._table_to_config_dict[table].vocabulary_size,
-            embedding_dimension=self._table_to_config_dict[table].dimension,
-            initializer=self._table_to_config_dict[table].initializer,
-            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
-        embedding_variables_by_table[table] = table_variables
-
-        slot_variables_for_table, load_ops_for_table, retrieve_ops_for_table = (
-            self._optimizer_handler.create_variables_and_ops(
-                table, slot_variable_names, self._num_hosts,
-                self._table_to_config_dict[table], table_variables)
-        )
-        slot_variables_by_table[table] = slot_variables_for_table
-        load_ops.extend(load_ops_for_table)
-        retrieve_ops.extend(retrieve_ops_for_table)
-    return VariablesAndOps(embedding_variables_by_table,
-                           slot_variables_by_table,
-                           load_ops, retrieve_ops)
-
-  def _create_dummy_table_variables(self):
-    """Create dummy embedding table variables.
-
-    The sole purpose of these dummy variables are to trigger gradient
-    calcuation wrt them so that the gradients wrt activation can be captured
-    and later sent to TPU embedding.
-
-    Returns:
-      Initializer for these variables.
-
-    Raises:
-      RuntimeError: if collection to store gradients already exists and is not
-      empty.
-    """
-    self._dummy_table_variables = []
-    # TODO(shizhiw): remove table id.
-    for table_id, table in enumerate(self._table_to_features_dict):
-      self._dummy_table_variables.append(
-          variable_scope.get_variable(
-              'tpu_embedding_dummy_table_variable_%s' % table,
-              dtype=dtypes.float32,
-              shape=[1],
-              use_resource=True,
-              trainable=True,
-              # TODO(shizhiw): Remove these dummy variables as
-              # tensorflow optimizer creates slot variable for them which
-              # is undesirable.
-              # e.g. tpu_embedding_dummy_table_variable_mlp_user/Adam{_1}.
-              # Explicitly specifying collections prevents this variable from
-              # being added to the GLOBAL_VARIABLES collection, so that Saver()
-              # ignores it.
-              collections=['tpu_embedding_dummy_table_variables']))
-
-      g = ops.get_default_graph()
-      table_gradients = g.get_collection_ref(
-          'tpu_embedding_gradients_table_%d' % table_id)
-      if table_gradients:
-        raise RuntimeError(
-            'tpu_embedding_gradients_table_%d is not empty.' % table_id)
-      table_gradients.extend([None] * len(self._table_to_features_dict[table]))
-
-    return variables.variables_initializer(
-        self._dummy_table_variables,
-        name='tpu_embedding_dummy_table_variables_init')
-
-  def generate_enqueue_ops(self, sparse_features_list):
-    """Generate enqueue ops.
-
-    Args:
-      sparse_features_list: a list of dictionary mapping from string
-        of feature names to sparse tensor. Each dictionary is for one
-        TPU core. Dictionaries for the same core should be contiguous
-        on the list.
-
-    Returns:
-      Ops to enqueue to TPU for embedding.
-    """
-    self._validate_generate_enqueue_ops_sparse_features_list(
-        sparse_features_list)
-    return [
-        self._generate_enqueue_op(
-            sparse_features, device_ordinal=i % self._num_cores_per_host)
-        for i, sparse_features in enumerate(sparse_features_list)
-    ]
-
-  def _validate_generate_enqueue_ops_sparse_features_list(
-      self, sparse_features_list):
-    """Validate `sparse_features_list`."""
-    if len(sparse_features_list) != self._num_cores:
-      raise ValueError('Length of `sparse_features_list` should match the '
-                       'number of cores; '
-                       '`len(sparse_features_list)` is {}, '
-                       'number of cores is {}.'.format(
-                           len(sparse_features_list), self._num_cores))
-
-    feature_set = set(self._feature_to_table_dict.keys())
-    contiguous_device = None
-    for i, sparse_features in enumerate(sparse_features_list):
-      used_feature_set = set(sparse_features.keys())
-
-      # Check features are valid.
-      missing_feature_set = feature_set - used_feature_set
-      if missing_feature_set:
-        raise ValueError('`sparse_features_list[{}]` misses a feature that is '
-                         'in `feature_to_config_dict`: {}.'.format(
-                             i, missing_feature_set))
-
-      extra_feature_set = used_feature_set - feature_set
-      if extra_feature_set:
-        raise ValueError('`sparse_features_list[{}]` has a feature that is not '
-                         'in `feature_to_config_dict`: {}.'.format(
-                             i, extra_feature_set))
-
-      device = None
-      device_feature = None
-      for feature, tensor in six.iteritems(sparse_features):
-        if not isinstance(tensor, sparse_tensor.SparseTensor):
-          raise ValueError('`sparse_features_list[{}]` has a feature that is '
-                           'not mapped to `SparseTensor`. '
-                           '`feature`: {}, type: {}'.format(
-                               i, feature, type(tensor)))
-
-        # Check all features are on the same device.
-        if device is None:
-          device = tensor.op.device
-          device_feature = feature
-        else:
-          if device != tensor.op.device:
-            raise ValueError('Devices are different between features in '
-                             '`sparse_features_list[{}]`; '
-                             'devices: {}, {}; features: {}, {}.'.format(
-                                 i, device, tensor.op.device, feature,
-                                 device_feature))
-
-      if i % self._num_cores_per_host:
-        if device != contiguous_device:
-          raise ValueError('We expect the `sparse_features` which are on the '
-                           'same host to be contiguous in '
-                           '`sparse_features_list`, '
-                           '`sparse_features_list[{}]` is on device {}, '
-                           'but is expected to be on device {}.'.format(
-                               i, device, contiguous_device))
-      else:
-        contiguous_device = device
-
-  def _generate_enqueue_op(self, sparse_features, device_ordinal):
-    with ops.colocate_with(list(sparse_features.values())[0]):
-      sample_idcs, embedding_idcs, aggregation_weights = (
-          self._format_for_tpu_embedding_sparse_batch(sparse_features))
-      return tpu_ops.enqueue_tpu_embedding_sparse_batch(
-          sample_idcs,
-          embedding_idcs,
-          aggregation_weights,
-          combiners=self._combiners,
-          device_ordinal=device_ordinal)
-
-  def _format_for_tpu_embedding_sparse_batch(self, sparse_features):
-    """Format sparse features for `enqueue_tpu_embedding_sparse_batch()`.
-
-    Args:
-      sparse_features: a `Dict` of `SparseTensor`s for embedding.
-
-    Returns:
-      Arguments for `enqueue_tpu_embedding_sparse_batch()`.
-    """
-
-    sample_idcs, embedding_idcs, aggregation_weights = list(), list(), list()
-    for table in self._table_to_features_dict:
-      sample_t, indices_t, weights_t = list(), list(), list()
-
-      features = self._table_to_features_dict[table]
-      for i, feature in enumerate(features):
-        tensor = sparse_features[feature]
-        sample_indices = tensor.indices[:, 0]
-        embedding_indices = tensor.values
-        weights = array_ops.ones_like(embedding_indices)
-        sample_t.append(i * self._batch_size_per_core + sample_indices)
-        indices_t.append(embedding_indices)
-        weights_t.append(weights)
-
-      sample_idcs.append(
-          math_ops.cast(array_ops.concat(sample_t, axis=0), dtype=dtypes.int32))
-      embedding_idcs.append(
-          math_ops.cast(
-              array_ops.concat(indices_t, axis=0), dtype=dtypes.int32))
-      aggregation_weights.append(
-          math_ops.cast(
-              array_ops.concat(weights_t, axis=0), dtype=dtypes.float32))
-
-    return sample_idcs, embedding_idcs, aggregation_weights
-
-  def get_activations(self):
-    """Get activations for features.
-
-    This should be called within `computation` that is passed to
-      `tpu.replicate` and friends.
-
-    Returns:
-      A dictionary mapping from `String` of feature name to `Tensor`
-        of activation.
-    """
-    recv_activations = tpu_ops.recv_tpu_embedding_activations(
-        num_outputs=len(self._table_to_config_dict),
-        config=self._config_proto.SerializeToString())
-
-    activations = collections.OrderedDict()
-    for table_id, table in enumerate(self._table_to_features_dict):
-      features = self._table_to_features_dict[table]
-      for lookup_id, feature in enumerate(features):
-        start_row = lookup_id * self._batch_size_per_core
-        end_row = start_row + self._batch_size_per_core
-        activations[feature] = gen_tpu_ops.tpu_embedding_activations(
-            self._dummy_table_variables[table_id],
-            recv_activations[table_id][start_row:end_row, :],
-            table_id=table_id,
-            lookup_id=lookup_id)
-    return activations
-
-  # TODO(shizhiw): Make `gradient_multiplier` per feature. Setting it to 0 would
-  # have the effect of `tf.stop_gradients()`.
-  # TODO(shizhiw): Consider alternative ways to capture gradients wrt embedding
-  # layer outputs to remove `_dummy_table_variables`,
-  # `_embedding_activation_grad` and `tpu_embedding_gradients_table_%d'.
-  def generate_send_gradients_op(self, gradient_multipliers=None):
-    """Retrieve gradients from collections and send them to TPU embedding.
-
-    Args:
-      gradient_multipliers: None, or dict mapping table names to gradient
-        multiplier Tensors.
-
-    Returns:
-      SendTPUEmbeddingGradients Op.
-
-    Raises:
-      ValueError: If required gradients have not been defined.
-      RuntimeError: If `mode` is not `TRAINING`.
-    """
-    if self._mode != TRAINING:
-      raise RuntimeError('Only in training mode gradients need to '
-                         'be sent to TPU embedding; got mode {}.'
-                         .format(self._mode))
-
-    g = ops.get_default_graph()
-    gradients = list()
-    for table_id, table in enumerate(self._table_to_config_dict):
-      table_gradients = g.get_collection(
-          'tpu_embedding_gradients_table_%d' % table_id)
-      if any(gradient is None for gradient in table_gradients):
-        raise ValueError(
-            'Table {}/{} has undefined gradients: this is probably because the '
-            'model asked TPUEmbedding to compute activations that were not '
-            'used.'.format(table_id, table))
-      concat_table_grads = array_ops.concat(table_gradients, axis=0)
-      if gradient_multipliers is not None:
-        concat_table_grads *= gradient_multipliers[table.name]
-      gradients.append(concat_table_grads)
-
-    return tpu_ops.send_tpu_embedding_gradients(
-        inputs=gradients, config=self.config_proto.SerializeToString())
-
-
-def _validate_table_to_config_dict(table_to_config_dict):
-  """Validate `table_to_config_dict`."""
-  for k, v in six.iteritems(table_to_config_dict):
-    if not isinstance(v, TableConfig):
-      raise ValueError('Value of `table_to_config_dict` must be of type '
-                       '`TableConfig`, got {} for {}.'.format(type(v), k))
-
-
-def _validate_feature_to_table_dict(table_to_config_dict,
-                                    feature_to_table_dict):
-  """Validate `feature_to_table_dict`."""
-  used_table_set = set(feature_to_table_dict.values())
-  table_set = set(table_to_config_dict.keys())
-
-  unused_table_set = table_set - used_table_set
-  if unused_table_set:
-    raise ValueError('`table_to_config_dict` specifies table that is not '
-                     'used in `feature_to_table_dict`: {}.'
-                     .format(unused_table_set))
-
-  extra_table_set = used_table_set - table_set
-  if extra_table_set:
-    raise ValueError('`feature_to_table_dict` refers to a table that is not '
-                     'specified in `table_to_config_dict`: {}.'
-                     .format(extra_table_set))
-
-
-def _validate_batch_size(batch_size, num_cores):
-  if batch_size % num_cores:
-    raise ValueError('`batch_size` is not a multiple of number of '
-                     'cores. `batch_size`={}, `_num_cores`={}.'.format(
-                         batch_size, num_cores))
-
-
-def _validate_optimization_parameters(optimization_parameters):
-  if not isinstance(optimization_parameters, _OptimizationParameters):
-    raise ValueError('`optimization_parameters` must inherit from '
-                     '`_OptimizationPramaters`. '
-                     '`type(optimization_parameters)`={}'.format(
-                         type(optimization_parameters)))
-
-
-class _OptimizerHandler(object):
-  """Interface class for handling optimizer specific logic."""
-
-  def __init__(self, optimization_parameters):
-    self._optimization_parameters = optimization_parameters
-
-  def set_optimization_parameters(self, table_descriptor):
-    raise NotImplementedError()
-
-  def get_default_slot_variable_names(self, table):
-    raise NotImplementedError()
-
-  def create_variables_and_ops(self, table, slot_variable_names, num_hosts,
-                               table_config, table_variables):
-    raise NotImplementedError()
-
-
-class _AdagradHandler(_OptimizerHandler):
-  """Handles Adagrad specific logic."""
-
-  def __init__(self, optimization_parameters):
-    super(_AdagradHandler, self).__init__(optimization_parameters)
-    self._table_to_accumulator_variables_dict = {}
-
-  def set_optimization_parameters(self, table_descriptor):
-    table_descriptor.optimization_parameters.adagrad.SetInParent()
-
-  def get_default_slot_variable_names(self, table):
-    return AdagradSlotVariableName('{}/{}'.format(table, 'Adagrad'))
-
-  def create_variables_and_ops(self, table, slot_variable_names, num_hosts,
-                               table_config, table_variables):
-    accumulator_initializer = init_ops.constant_initializer(
-        self._optimization_parameters.initial_accumulator)
-    accumulator_variables = _create_partitioned_variables(
-        name=slot_variable_names.accumulator,
-        num_hosts=num_hosts,
-        vocabulary_size=table_config.vocabulary_size,
-        embedding_dimension=table_config.dimension,
-        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
-        initializer=accumulator_initializer)
-    slot_variables = AdagradSlotVariable(accumulator_variables)
-
-    load_ops = []
-    retrieve_ops = []
-    for host_id, table_variable, accumulator_variable in (zip(
-        range(num_hosts), table_variables, accumulator_variables)):
-      with ops.colocate_with(table_variable):
-        load_parameters_op = (
-            tpu_ops.load_tpu_embedding_adagrad_parameters(
-                parameters=table_variable,
-                accumulators=accumulator_variable,
-                table_name=table,
-                num_shards=num_hosts,
-                shard_id=host_id))
-        retrieved_table, retrieved_accumulator = (
-            tpu_ops.retrieve_tpu_embedding_adagrad_parameters(
-                table_name=table,
-                num_shards=num_hosts,
-                shard_id=host_id))
-        retrieve_parameters_op = control_flow_ops.group(
-            state_ops.assign(table_variable, retrieved_table),
-            state_ops.assign(accumulator_variable, retrieved_accumulator))
-
-      load_ops.append(load_parameters_op)
-      retrieve_ops.append(retrieve_parameters_op)
-    return slot_variables, load_ops, retrieve_ops
-
-
-class _AdamHandler(_OptimizerHandler):
-  """Handles Adam specific logic."""
-
-  def __init__(self, optimization_parameters):
-    super(_AdamHandler, self).__init__(optimization_parameters)
-    self._table_to_m_variables_dict = {}
-    self._table_to_v_variables_dict = {}
-
-  def set_optimization_parameters(self, table_descriptor):
-    table_descriptor.optimization_parameters.adam.beta1 = (
-        self._optimization_parameters.beta1)
-    table_descriptor.optimization_parameters.adam.beta2 = (
-        self._optimization_parameters.beta2)
-    table_descriptor.optimization_parameters.adam.epsilon = (
-        self._optimization_parameters.epsilon)
-    table_descriptor.optimization_parameters.adam.use_non_lazy_adam = (
-        not self._optimization_parameters.lazy_adam)
-    table_descriptor.optimization_parameters.adam.use_sum_inside_sqrt = (
-        self._optimization_parameters.sum_inside_sqrt)
-
-  def get_default_slot_variable_names(self, table):
-    return AdamSlotVariableNames('{}/{}/m'.format(table, 'Adam'),
-                                 '{}/{}/v'.format(table, 'Adam'))
-
-  def create_variables_and_ops(self, table, slot_variable_names, num_hosts,
-                               table_config, table_variables):
-    m_initializer = init_ops.zeros_initializer()
-    m_variables = _create_partitioned_variables(
-        name=slot_variable_names.m,
-        num_hosts=num_hosts,
-        vocabulary_size=table_config.vocabulary_size,
-        embedding_dimension=table_config.dimension,
-        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
-        initializer=m_initializer)
-    v_initializer = init_ops.zeros_initializer()
-    v_variables = _create_partitioned_variables(
-        name=slot_variable_names.v,
-        num_hosts=num_hosts,
-        vocabulary_size=table_config.vocabulary_size,
-        embedding_dimension=table_config.dimension,
-        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
-        initializer=v_initializer)
-    slot_variables = AdamSlotVariables(m_variables, v_variables)
-
-    load_ops = []
-    retrieve_ops = []
-    for host_id, table_variable, m_variable, v_variable in (zip(
-        range(num_hosts), table_variables,
-        m_variables, v_variables)):
-      with ops.colocate_with(table_variable):
-        load_parameters_op = (
-            tpu_ops.load_tpu_embedding_adam_parameters(
-                parameters=table_variable,
-                momenta=m_variable,
-                velocities=v_variable,
-                table_name=table,
-                num_shards=num_hosts,
-                shard_id=host_id))
-        retrieved_table, retrieved_m, retrieved_v = (
-            tpu_ops.retrieve_tpu_embedding_adam_parameters(
-                table_name=table,
-                num_shards=num_hosts,
-                shard_id=host_id))
-        retrieve_parameters_op = control_flow_ops.group(
-            state_ops.assign(table_variable, retrieved_table),
-            state_ops.assign(m_variable, retrieved_m),
-            state_ops.assign(v_variable, retrieved_v))
-
-      load_ops.append(load_parameters_op)
-      retrieve_ops.append(retrieve_parameters_op)
-    return slot_variables, load_ops, retrieve_ops
-
-
-class _StochasticGradientDescentHandler(_OptimizerHandler):
-  """Handles stochastic gradient descent specific logic."""
-
-  def set_optimization_parameters(self, table_descriptor):
-    (table_descriptor.optimization_parameters.stochastic_gradient_descent
-     .SetInParent())
-
-  def get_default_slot_variable_names(self, table):
-    return None
-
-  def create_variables_and_ops(self, table, slot_variable_names, num_hosts,
-                               table_config, table_variables):
-    del table_config
-
-    load_ops = []
-    retrieve_ops = []
-    for host_id, table_variable in (zip(
-        range(num_hosts), table_variables)):
-      with ops.colocate_with(table_variable):
-        load_parameters_op = (
-            tpu_ops
-            .load_tpu_embedding_stochastic_gradient_descent_parameters(
-                parameters=table_variable,
-                table_name=table,
-                num_shards=num_hosts,
-                shard_id=host_id))
-        retrieved_table = (
-            tpu_ops
-            .retrieve_tpu_embedding_stochastic_gradient_descent_parameters(
-                table_name=table,
-                num_shards=num_hosts,
-                shard_id=host_id))
-        retrieve_parameters_op = control_flow_ops.group(
-            state_ops.assign(table_variable, retrieved_table))
-
-      load_ops.append(load_parameters_op)
-      retrieve_ops.append(retrieve_parameters_op)
-    return None, load_ops, retrieve_ops
-
-
-def _get_optimization_handler(optimization_parameters):
-  if isinstance(optimization_parameters, AdagradParameters):
-    return _AdagradHandler(optimization_parameters)
-  elif isinstance(optimization_parameters, AdamParameters):
-    return _AdamHandler(optimization_parameters)
-  elif isinstance(optimization_parameters, StochasticGradientDescentParameters):
-    return _StochasticGradientDescentHandler(optimization_parameters)
-  else:
-    return NotImplementedError()
-
-
-def _create_ordered_dict(d):
-  """Create an OrderedDict from Dict."""
-  return collections.OrderedDict((k, d[k]) for k in sorted(d))
-
-
-def _create_combiners(table_to_config_dict):
-  return [table_to_config_dict[t].combiner for t in table_to_config_dict]
-
-
-def _create_table_to_features_dict(feature_to_table_dict):
-  """Create mapping from table to a list of its features."""
-  table_to_features_dict_tmp = {}
-  for feature, table in six.iteritems(feature_to_table_dict):
-    if table in table_to_features_dict_tmp:
-      table_to_features_dict_tmp[table].append(feature)
-    else:
-      table_to_features_dict_tmp[table] = [feature]
-
-  table_to_features_dict = collections.OrderedDict()
-  for table in sorted(table_to_features_dict_tmp):
-    table_to_features_dict[table] = sorted(table_to_features_dict_tmp[table])
-  return table_to_features_dict
-
-
-def _create_device_fn(hosts):
-  """Create device_fn() to use with _create_partitioned_variables()."""
-
-  def device_fn(op):
-    """Returns the `device` for `op`."""
-    part_match = re.match(r'.*/part_(\d+)(/|$)', op.name)
-
-    if part_match:
-      idx = int(part_match.group(1))
-    else:
-      raise RuntimeError('Internal Error: '
-                         'Expected %s to contain /part_*.' % op.name)
-
-    device = hosts[idx]
-    return device
-
-  return device_fn
-
-
-def _create_partitioned_variables(name,
-                                  num_hosts,
-                                  vocabulary_size,
-                                  embedding_dimension,
-                                  initializer,
-                                  collections=None):  # pylint: disable=redefined-outer-name
-  """Creates ParitionedVariables based on `num_hosts` for `table`."""
-  # TODO(shizhiw): automatically place embedding lookup elsewhere?
-  if vocabulary_size < num_hosts:
-    raise ValueError('`vocabulary_size`({}) is smaller than `num_hosts`({}). '
-                     'As TPU embedding is not optimized for small tables, '
-                     'please consider other ways for this embedding lookup.')
-
-  return list(variable_scope.get_variable(
-      name,
-      shape=(vocabulary_size, embedding_dimension),
-      partitioner=partitioned_variables.fixed_size_partitioner(num_hosts),
-      dtype=dtypes.float32,
-      initializer=initializer,
-      collections=collections,
-      trainable=False))
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.tpu_embedding import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_embedding_gradient.py b/tensorflow/contrib/tpu/python/tpu/tpu_embedding_gradient.py
new file mode 100644
index 0000000..308adc7
--- /dev/null
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_embedding_gradient.py

@@ -0,0 +1,23 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.tpu_embedding_gradient import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 14dca03..71d1d0f 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py

@@ -1,3737 +1,30 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ===================================================================
-"""TPUEstimator class."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import copy
-import os
-import signal
-import sys
-import threading
-import time
-
-import numpy as np
-import six
-from six.moves import queue as Queue  # pylint: disable=redefined-builtin
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
-from tensorflow.contrib.tpu.proto import compilation_result_pb2 as tpu_compilation_result
-from tensorflow.contrib.tpu.python.ops import tpu_ops
-from tensorflow.contrib.tpu.python.ops import tpu_ordinal_selector_op
-from tensorflow.contrib.tpu.python.tpu import _tpu_estimator_embedding
-from tensorflow.contrib.tpu.python.tpu import error_handling
-from tensorflow.contrib.tpu.python.tpu import functional as tpu_functional
-from tensorflow.contrib.tpu.python.tpu import session_support
-from tensorflow.contrib.tpu.python.tpu import tensor_tracer
-from tensorflow.contrib.tpu.python.tpu import tpu
-from tensorflow.contrib.tpu.python.tpu import tpu_config
-from tensorflow.contrib.tpu.python.tpu import tpu_context
-from tensorflow.contrib.tpu.python.tpu import tpu_feed
-from tensorflow.contrib.tpu.python.tpu import training_loop
-from tensorflow.contrib.tpu.python.tpu import util as util_lib
-from tensorflow.contrib.tpu.python.tpu._tpu_estimator_embedding import AdamParameters  # pylint: disable=unused-import
-from tensorflow.contrib.tpu.python.tpu._tpu_estimator_embedding import EmbeddingConfigSpec  # pylint: disable=unused-import
-from tensorflow.contrib.training.python.training import hparam
-from tensorflow.core.framework import variable_pb2
-from tensorflow.core.framework.summary_pb2 import Summary
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.client import session as tf_session
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest as data_nest
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.export import export_output as export_output_lib
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import function
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import summary_ops_v2 as contrib_summary
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.saved_model import tag_constants
-from tensorflow.python.summary import summary
-from tensorflow.python.training import basic_session_run_hooks
-from tensorflow.python.training import evaluation
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.training import training
-from tensorflow.python.training import training_util
-from tensorflow.python.util import function_utils
-from tensorflow.python.util import nest
-from tensorflow.python.util import tf_inspect
-
-_INITIAL_LOSS = 1e7
-_ZERO_LOSS = 0.
-_TPU_ESTIMATOR = 'tpu_estimator'
-_ITERATIONS_PER_LOOP_VAR = 'iterations_per_loop'
-_BATCH_SIZE_KEY = 'batch_size'
-_CTX_KEY = 'context'
-_USE_TPU_KEY = 'use_tpu'
-_CROSS_REPLICA_SUM_OP = 'CrossReplicaSum'
-_ONE_GIGABYTE = 1024 * 1024 * 1024
-_TPU_ENQUEUE_OPS = '_tpu_enqueue_ops'
-_TPU_TRAIN_OP = '_tpu_train_op'
-_REWRITE_FOR_INFERENCE_MODE = '_rewrite_for_inference'
-_KEY_WHEN_PREDICTIONS_IS_A_TENSOR = '_key_when_predictions_is_a_tensor'
-
-# Ideally _USE_TPU_KEY should be reserved as well. However there are already
-# models that make use of this key, thus it can not be reserved now to prevent
-# breakage. In the long run, we would like to mitigate this by migrating models
-# off of using _USE_TPU_KEY.
-_RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY, _CTX_KEY]
-
-# TODO(b/65703635): Flip the value and remove all dead code. Currently, this is
-# only used for per-core based deployments. For per-host based pipelines, if a
-# user returns a Dataset instance it will be automatically wrapped in a
-# tf.while_loop (This can be disabled by returning features and labels
-# explicitly).
-_WRAP_INPUT_FN_INTO_WHILE_LOOP = False
-
-ops.register_proto_function(
-    '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR),
-    proto_type=variable_pb2.VariableDef,
-    to_proto=resource_variable_ops._to_proto_fn,  # pylint: disable=protected-access
-    from_proto=resource_variable_ops._from_proto_fn)  # pylint: disable=protected-access
-
-
-def _is_iterable(obj):
-  """A Python 2 and 3 compatible util to check whether `obj` is iterable."""
-  try:
-    iter(obj)
-    return True
-  except TypeError:
-    return False
-
-
-class CatchInvalidHostcallFunctions(control_flow_ops.XLAControlFlowContext):
-
-  def AddOp(self, op):
-    if op.type in [
-        'AudioSummary', 'AudioSummaryV2', 'HistogramSummary', 'ImageSummary',
-        'MergeSummary', 'ScalarSummary', 'TensorSummary', 'TensorSummaryV2'
-    ]:
-      raise ValueError('Use tf.contrib.summary inside of host_calls.')
-
-
-def _create_global_step(graph):
-  graph = graph or ops.get_default_graph()
-  if training.get_global_step(graph) is not None:
-    raise ValueError('"global_step" already exists.')
-  # Create in proper graph and base name_scope.
-  with graph.as_default() as g, g.name_scope(None):
-    return variable_scope.get_variable(
-        ops.GraphKeys.GLOBAL_STEP,
-        shape=[],
-        dtype=dtypes.int64,
-        initializer=init_ops.zeros_initializer(),
-        trainable=False,
-        use_resource=True,
-        collections=[ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.GLOBAL_STEP])
-
-
-def _create_or_get_iterations_per_loop():
-  """Creates or gets the iterations_per_loop variable.
-
-  In TPUEstimator, the user provided computation, the model_fn, is wrapped
-  inside a tf.while_loop for peak performance. The iterations of the loop are
-  specified by this variable, which adjusts its value on the CPU after each TPU
-  program execution and before the next TPU execution.
-
-  The purpose of using a variable, rather then a constant, is to allow
-  TPUEstimator adapt the TPU training iterations according to the final steps
-  specified by users. For example, if the user sets the iterations_per_loop as 4
-  in TPUConfig and steps as 10 in TPUEstimator.train(), the iterations_per_loop
-  variable will have the following value before each TPU training.
-
-      - 1-th TPU execution: iterations_per_loop = 4
-      - 2-th TPU execution: iterations_per_loop = 4
-      - 3-th TPU execution: iterations_per_loop = 2
-
-  As model_fn increases the global step once per train_op invocation, the global
-  step is 10 after all TPU executions, matching the steps=10 inputs passed in by
-  users.
-
-  Returns:
-    A TF non-trainable resource variable.
-
-  Raises:
-    RuntimeError: If multi iterations_per_loop variables were found.
-  """
-  graph = ops.get_default_graph()
-  collection_name = '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR)
-  iter_vars = graph.get_collection(collection_name)
-  if len(iter_vars) == 1:
-    return iter_vars[0]
-  elif len(iter_vars) > 1:
-    raise RuntimeError('Multiple iterations_per_loop_var in collection.')
-
-  with ops.colocate_with(training_util.get_global_step()):
-    with variable_scope.variable_scope(
-        _TPU_ESTIMATOR, reuse=variable_scope.AUTO_REUSE):
-      return variable_scope.get_variable(
-          _ITERATIONS_PER_LOOP_VAR,
-          initializer=init_ops.zeros_initializer(),
-          shape=[],
-          dtype=dtypes.int32,
-          trainable=False,
-          collections=[collection_name, ops.GraphKeys.LOCAL_VARIABLES],
-          use_resource=True)
-
-
-def _sync_variables_ops(ctx):
-  """Create varriables synchronization ops.
-
-  Gets the variables back from TPU nodes. This means the variables updated
-  by TPU will now be *synced* to host memory.
-  In BROADCAST mode, we skip this sync since the variables are ususally too
-  big to transmit via RPC.
-
-  Args:
-    ctx: A `_InternalTPUContext` instance with mode.
-
-  Returns:
-    A list of sync ops.
-  """
-
-  if not ctx.is_input_broadcast_with_iterators():
-    return [
-        array_ops.check_numerics(v.read_value(),
-                                 'Gradient for %s is NaN' % v.name).op
-        for v in variables.trainable_variables()
-    ]
-  else:
-    return [control_flow_ops.no_op()]
-
-
-def _increase_eval_step_op(iterations_per_loop):
-  """Returns an op to increase the eval step for TPU evaluation.
-
-  Args:
-    iterations_per_loop: Tensor. The number of eval steps running in TPU system
-      before returning to CPU host for each `Session.run`.
-
-  Returns:
-    An operation
-  """
-  eval_step = evaluation._get_or_create_eval_step()  # pylint: disable=protected-access
-  # Estimator evaluate increases 1 by default. So, we increase the difference.
-  return state_ops.assign_add(
-      eval_step,
-      math_ops.cast(iterations_per_loop - 1, dtype=eval_step.dtype),
-      use_locking=True)
-
-
-def _extract_key_names(tensor_or_dict):
-  if isinstance(tensor_or_dict, dict):
-    return sorted(tensor_or_dict.keys())
-  return []
-
-
-class _SIGNAL(object):
-  """Signal used to control the thread of infeed/outfeed.
-
-  All preserved signals must be negative numbers. Positive numbers are used to
-  indicate the number of iterations for next training/evaluation loop.
-  """
-  NEXT_BATCH = -1
-  STOP = -2
-
-
-class TPUEstimatorSpec(model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
-  """Ops and objects returned from a `model_fn` and passed to `TPUEstimator`.
-
-  See `EstimatorSpec` for `mode`, `predictions`, `loss`, `train_op`, and
-  `export_outputs`.
-
-  For evaluation, `eval_metrics `is a tuple of `metric_fn` and `tensors`, where
-  `metric_fn` runs on CPU to generate metrics and `tensors` represents the
-  `Tensor`s transferred from TPU system to CPU host and passed to `metric_fn`.
-  To be precise, TPU evaluation expects a slightly different signature from the
-  `tf.estimator.Estimator`. While `EstimatorSpec.eval_metric_ops` expects a
-  dict, `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`.
-  The `tensors` could be a list of `Tensor`s or dict of names to `Tensor`s. The
-  `tensors` usually specify the model logits, which are transferred back from
-  TPU system to CPU host. All tensors must have be batch-major, i.e., the batch
-  size is the first dimension. Once all tensors are available at CPU host from
-  all shards, they are concatenated (on CPU) and passed as positional arguments
-  to the `metric_fn` if `tensors` is list or keyword arguments if `tensors` is
-  a dict. `metric_fn` takes the `tensors` and returns a dict from metric string
-  name to the result of calling a metric function, namely a `(metric_tensor,
-  update_op)` tuple. See `TPUEstimator` for MNIST example how to specify the
-  `eval_metrics`.
-
-  `scaffold_fn` is a function running on CPU to generate the `Scaffold`. This
-  function should not capture any Tensors in `model_fn`.
-
-  `host_call` is a tuple of a `function` and a list or dictionary of `tensors`
-  to pass to that function and returns a list of Tensors. `host_call` currently
-  works for train() and evaluate(). The Tensors returned by the function is
-  executed on the CPU on every step, so there is communication overhead when
-  sending tensors from TPU to CPU. To reduce the overhead, try reducing the
-  size of the tensors. The `tensors` are concatenated along their major (batch)
-  dimension, and so must be >= rank 1. The `host_call` is useful for writing
-  summaries with `tf.contrib.summary.create_file_writer`.
-  """
-
-  def __new__(cls,
-              mode,
-              predictions=None,
-              loss=None,
-              train_op=None,
-              eval_metrics=None,
-              export_outputs=None,
-              scaffold_fn=None,
-              host_call=None,
-              training_hooks=None,
-              evaluation_hooks=None,
-              prediction_hooks=None):
-    """Creates a validated `TPUEstimatorSpec` instance."""
-    host_calls = {}
-    if eval_metrics is not None:
-      host_calls['eval_metrics'] = eval_metrics
-    if host_call is not None:
-      host_calls['host_call'] = host_call
-    _OutfeedHostCall.validate(host_calls)
-
-    training_hooks = tuple(training_hooks or [])
-    evaluation_hooks = tuple(evaluation_hooks or [])
-    prediction_hooks = tuple(prediction_hooks or [])
-
-    for hook in training_hooks + evaluation_hooks + prediction_hooks:
-      if not isinstance(hook, session_run_hook.SessionRunHook):
-        raise TypeError('All hooks must be SessionRunHook instances, given: {}'
-                        .format(hook))
-
-    return super(TPUEstimatorSpec, cls).__new__(
-        cls,
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metrics=eval_metrics,
-        export_outputs=export_outputs,
-        scaffold_fn=scaffold_fn,
-        host_call=host_call,
-        training_hooks=training_hooks,
-        evaluation_hooks=evaluation_hooks,
-        prediction_hooks=prediction_hooks)
-
-  def as_estimator_spec(self):
-    """Creates an equivalent `EstimatorSpec` used by CPU train/eval."""
-    host_calls = {}
-    if self.eval_metrics is not None:
-      host_calls['eval_metrics'] = self.eval_metrics
-    if self.host_call is not None:
-      host_calls['host_call'] = self.host_call
-    host_call_ret = _OutfeedHostCall.create_cpu_hostcall(host_calls)
-    eval_metric_ops = None
-    if self.eval_metrics is not None:
-      eval_metric_ops = host_call_ret['eval_metrics']
-    hooks = None
-    if self.host_call is not None:
-      hooks = [_OutfeedHostCallHook(host_call_ret['host_call'])]
-    loss = self.loss
-    if tensor_tracer.TensorTracer.is_enabled() \
-       and self.train_op is not None:
-      tt = tensor_tracer.TensorTracer()
-      (loss, tracing_calls) = tt.trace_cpu(ops.get_default_graph(),
-                                           loss, self.train_op)
-      tracing_call_ret = _OutfeedHostCall.create_cpu_hostcall(tracing_calls)
-      tracing_functions = tracing_call_ret.values()
-      if tracing_functions:
-        if hooks:
-          hooks.extend([_OutfeedHostCallHook(tracing_functions)])
-        else:
-          hooks = [_OutfeedHostCallHook(tracing_functions)]
-    hooks = tuple(hooks or [])
-    scaffold = self.scaffold_fn() if self.scaffold_fn else None
-    return model_fn_lib.EstimatorSpec(
-        mode=self.mode,
-        predictions=self.predictions,
-        loss=loss,
-        train_op=self.train_op,
-        eval_metric_ops=eval_metric_ops,
-        export_outputs=self.export_outputs,
-        scaffold=scaffold,
-        training_hooks=self.training_hooks + hooks,
-        evaluation_hooks=self.evaluation_hooks + hooks,
-        prediction_hooks=self.prediction_hooks + hooks)
-
-
-class _OpQueueContext(object):
-  """Manages work queue and thread for a infeed/outfeed thread."""
-
-  def __init__(self, name, target, args):
-    self._name = name
-    self._queue = Queue.Queue()
-    args = (self,) + args
-    self._thread = threading.Thread(name=name, target=target, args=args)
-    self._thread.daemon = True
-    self._thread.start()
-
-  def stop(self):
-    self._queue.put(_SIGNAL.STOP)
-
-  def send_next_batch_signal(self, iterations):
-    self._queue.put(iterations)
-
-  def read_iteration_counts(self):
-    while True:
-      iterations = self._queue.get(block=True)
-      logging.debug('%s read iterations %s', self._name, iterations)
-      if iterations == _SIGNAL.STOP:
-        logging.info('%s received shutdown signal, stopping.', self._name)
-        return
-      yield iterations
-
-  def join(self):
-    logging.info('Shutting down %s thread.', self._name)
-    self.stop()
-    self._thread.join()
-
-
-class _OpSignalOnceQueueContext(_OpQueueContext):
-  """Manages work queue and thread for a infeed/outfeed thread.
-
-  This subclass only signals once.
-  """
-
-  def __init__(self, name, target, args):
-    super(_OpSignalOnceQueueContext, self).__init__(name, target, args)
-    self._has_signaled = False
-
-  def send_next_batch_signal(self, iterations):
-    if not self._has_signaled:
-      self._queue.put(iterations)
-      self._has_signaled = True
-
-
-class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
-  """A Session hook setting up the TPU initialization, infeed, and outfeed.
-
-  This hook does two major things:
-  1. initialize and shutdown TPU system.
-  2. launch and join the threads for infeed enqueue and (optional) outfeed
-     dequeue.
-  """
-
-  def __init__(self,
-               ctx,
-               enqueue_ops,
-               dequeue_ops,
-               tpu_compile_op,
-               run_infeed_loop_on_coordinator=True,
-               rendezvous=None,
-               master=None,
-               session_config=None,
-               tpu_init_ops=None):
-    self._master_job = ctx.master_job
-    self._enqueue_ops = enqueue_ops
-    self._dequeue_ops = dequeue_ops
-    self._rendezvous = rendezvous
-    self._master = master
-    self._session_config = session_config
-    self._init_ops = list(tpu_init_ops or [])
-    if ctx.embedding_config is None:
-      self._embedding_layer_config = None
-    else:
-      self._embedding_layer_config = (
-          ctx.embedding_config.tpu_embedding.config_proto)
-    self._run_infeed_loop_on_coordinator = run_infeed_loop_on_coordinator
-    self._initial_infeed_sleep_secs = (
-        ctx.config.tpu_config.initial_infeed_sleep_secs)
-
-    self._feed_error = None
-    self._finished = False
-    # When using model parallelism, the TPU is pre-initialized at startup to
-    # fetch mesh information.  We skip re-initializing it here to avoid
-    # suspected issues due to the mesh layout changing on the second
-    # initialization.
-    self._should_initialize_tpu = not ctx.model_parallelism_enabled
-    self._tpu_compile_op = tpu_compile_op
-
-  def begin(self):
-    logging.info('TPU job name %s', self._master_job)
-    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
-    if self._should_initialize_tpu:
-      self._finalize_ops = [tpu.shutdown_system(job=self._master_job)]
-    else:
-      self._finalize_ops = []
-
-    summary_writer_init_ops = contrib_summary.summary_writer_initializer_op()
-    self._init_ops.extend(summary_writer_init_ops)
-    # Get all the writer resources from the initializer, so we know what to
-    # flush.
-    for op in summary_writer_init_ops:
-      self._finalize_ops.append(contrib_summary.flush(writer=op.inputs[0]))
-
-  def _run_infeed(self, queue_ctx, session):
-    logging.info('Starting infeed thread controller.')
-    if self._initial_infeed_sleep_secs:
-      logging.info('Infeed thread sleeping for %d seconds.',
-                   self._initial_infeed_sleep_secs)
-      time.sleep(self._initial_infeed_sleep_secs)
-      logging.info('Infeed thread starting after sleep')
-
-    with self._rendezvous.catch_errors(source='infeed', session=session):
-      if self._run_infeed_loop_on_coordinator:
-        for count, steps in enumerate(queue_ctx.read_iteration_counts()):
-          for i in xrange(steps):
-            logging.debug('Infeed enqueue for iteration (%d, %d)', count, i)
-            session.run(self._enqueue_ops)
-      else:
-        for _ in queue_ctx.read_iteration_counts():
-          session.run(self._enqueue_ops)
-      logging.info('Infeed thread finished, shutting down.')
-
-  def _run_outfeed(self, queue_ctx, session):
-    logging.info('Starting outfeed thread controller.')
-    with self._rendezvous.catch_errors(source='outfeed', session=session):
-      for count, steps in enumerate(queue_ctx.read_iteration_counts()):
-        for i in xrange(steps):
-          logging.debug('Outfeed dequeue for iteration (%d, %d)', count, i)
-          session.run(self._dequeue_ops)
-      logging.info('Outfeed thread finished, shutting down.')
-
-  def _create_infeed_controller(self, name, target, args):
-    return _OpQueueContext(name=name, target=target, args=args)
-
-  def _assertCompilationSucceeded(self, result, coord):
-    proto = tpu_compilation_result.CompilationResultProto()
-    proto.ParseFromString(result)
-    if proto.status_error_message:
-      logging.error('Compilation failed: {}'.format(proto.status_error_message))
-      coord.request_stop()
-    else:
-      logging.info('Compilation succeeded')
-
-  def after_create_session(self, session, coord):
-    if self._should_initialize_tpu:
-      logging.info('Init TPU system')
-      start = time.time()
-      with ops.Graph().as_default():
-        with tf_session.Session(
-            self._master, config=self._session_config) as sess:
-          sess.run(
-              tpu.initialize_system(
-                  job=self._master_job,
-                  embedding_config=self._embedding_layer_config))
-      logging.info('Initialized TPU in %d seconds', time.time() - start)
-
-    session.run(self._init_ops,
-                options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000))
-
-    if os.environ.get('TPU_SPLIT_COMPILE_AND_EXECUTE', '') == '1':
-      logging.info('Compiling user program: this may take a while...')
-      self._assertCompilationSucceeded(session.run(self._tpu_compile_op), coord)
-
-    self._infeed_controller = self._create_infeed_controller(
-        name='InfeedController', target=self._run_infeed, args=(session,))
-
-    self._outfeed_controller = _OpQueueContext(
-        name='OutfeedController', target=self._run_outfeed, args=(session,))
-
-    # Enable the worker watchdog to terminate workers on coordinator exit.
-    watchdog_timeout = int(os.environ.get('TF_TPU_WATCHDOG_TIMEOUT', '0'))
-    if watchdog_timeout > 0:
-      session_support.start_worker_watchdog(session,
-                                            shutdown_timeout=watchdog_timeout)
-
-  def before_run(self, run_context):
-    self._feed_error = None
-
-    iterations = run_context.session.run(self._iterations_per_loop_var)
-
-    logging.info('Enqueue next (%d) batch(es) of data to infeed.', iterations)
-    self._infeed_controller.send_next_batch_signal(iterations)
-
-    logging.info('Dequeue next (%d) batch(es) of data from outfeed.',
-                 iterations)
-    self._outfeed_controller.send_next_batch_signal(iterations)
-
-  def end(self, session):
-    self._finished = True
-    logging.info('Stop infeed thread controller')
-    self._infeed_controller.join()
-    self._rendezvous.record_done('infeed')
-
-    logging.info('Stop output thread controller')
-    self._outfeed_controller.join()
-    self._rendezvous.record_done('outfeed')
-
-    logging.info('Shutdown TPU system.')
-    session.run(self._finalize_ops)
-
-
-class TPUInfeedOutfeedSessionHookForPrediction(TPUInfeedOutfeedSessionHook):
-
-  def __init__(self, ctx, enqueue_ops, dequeue_ops, tpu_compile_op,
-               rendezvous=None, master=None, session_config=None):
-    super(TPUInfeedOutfeedSessionHookForPrediction, self).__init__(
-        ctx,
-        enqueue_ops,
-        dequeue_ops,
-        tpu_compile_op=tpu_compile_op,
-        run_infeed_loop_on_coordinator=False,
-        rendezvous=rendezvous,
-        master=master,
-        session_config=session_config)
-
-  def _create_infeed_controller(self, name, target, args):
-    return _OpSignalOnceQueueContext(name=name, target=target, args=args)
-
-
-class _TPUStopAtStepHook(session_run_hook.SessionRunHook):
-  """Hook that requests stop at a specified step.
-
-  This hook is similar to the `session_run_hook._StopAfterNEvalsHook` with
-  following differences for TPU training:
-
-  1. This hook sets the variable for iterations_per_loop, which is used by
-     `TPUInfeedOutfeedSessionHook` to control the iterations for infeed/outfeed.
-     As the hook execution order is not guaranteed, the variable update is
-     handled in `after_create_session` and `after_run` as
-     `TPUInfeedOutfeedSessionHook` reads the variable value in `before_run`.
-
-  2. For each training loop (session.run), the global step could be increased
-     multiple times on TPU. The global step tensor value will be explicitly read
-     again in `after_run` to ensure the latest value is retrieved to avoid race
-     condition.
-  """
-
-  def __init__(self, iterations, num_steps=None, last_step=None):
-    """Initializes a `StopAtStepHook`.
-
-    Args:
-      iterations: The number of iterations to run optimizer per training loop.
-      num_steps: Number of steps to execute.
-      last_step: Step after which to stop.
-
-    Raises:
-      ValueError: If one of the arguments is invalid.
-    """
-    if num_steps is None and last_step is None:
-      raise ValueError('One of num_steps or last_step must be specified.')
-    if num_steps is not None and last_step is not None:
-      raise ValueError('Only one of num_steps or last_step can be specified.')
-    self._num_steps = num_steps
-    self._last_step = last_step
-    self._iterations = iterations
-
-  def _next_iterations(self, global_step, last_step):
-    gap = last_step - global_step
-    return min(gap, self._iterations)
-
-  def begin(self):
-    self._global_step_tensor = training_util.get_global_step()
-    if self._global_step_tensor is None:
-      raise RuntimeError('Global step should be created.')
-
-    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
-
-  def after_create_session(self, session, coord):
-    global_step = session.run(self._global_step_tensor)
-    if self._last_step is None:
-      self._last_step = global_step + self._num_steps
-
-    iterations = self._next_iterations(global_step, self._last_step)
-
-    self._iterations_per_loop_var.load(iterations, session=session)
-
-  def after_run(self, run_context, run_values):
-    # Global step cannot be retrieved via SessionRunArgs and before_run due to
-    # race condition.
-    global_step = run_context.session.run(self._global_step_tensor)
-    if global_step >= self._last_step:
-      run_context.request_stop()
-    else:
-      iterations = self._next_iterations(global_step, self._last_step)
-      self._iterations_per_loop_var.load(
-          iterations, session=run_context.session)
-
-
-class _SetEvalIterationsHook(session_run_hook.SessionRunHook):
-  """Hook that requests stop at a specified step."""
-
-  def __init__(self, num_steps):
-    """Initializes a `_SetEvalIterationsHook`.
-
-    Args:
-      num_steps: Number of steps to execute.
-    """
-    self._num_steps = num_steps
-
-  def begin(self):
-    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
-
-  def after_create_session(self, session, coord):
-    self._iterations_per_loop_var.load(self._num_steps, session=session)
-
-
-class _StoppingPredictHook(session_run_hook.SessionRunHook):
-  """Hook that requests stop according to the stopping signal in prediction."""
-
-  def __init__(self, scalar_stopping_signal):
-    self._scalar_stopping_signal = scalar_stopping_signal
-
-  def begin(self):
-    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
-
-  def after_create_session(self, session, coord):
-    # This is not necessary as we do not run infeed enqueue and outfeed dequeue
-    # in side threads for prediction model. But it makes the
-    # TPUInfeedOutfeedSessionHook prints nice message.
-    self._iterations_per_loop_var.load(1, session=session)
-
-  def before_run(self, run_context):
-    return session_run_hook.SessionRunArgs(self._scalar_stopping_signal)
-
-  def after_run(self, run_context, run_values):
-    _ = run_context
-    scalar_stopping_signal = run_values.results
-    if _StopSignals.should_stop(scalar_stopping_signal):
-      # NOTE(xiejw): In prediction, stopping signals are inserted for each
-      # batch. And we append one more batch to signal the system it should stop.
-      # The data flow might look like
-      #
-      #  batch   0: images, labels, stop = 0  (user provided)
-      #  batch   1: images, labels, stop = 0  (user provided)
-      #  ...
-      #  batch  99: images, labels, stop = 0  (user provided)
-      #  batch 100: images, labels, stop = 1  (TPUEstimator appended)
-      #
-      # where the final batch (id = 100) is appended by TPUEstimator, so we
-      # should drop it before returning the predictions to user.
-      # To achieve that, we throw the OutOfRangeError in after_run. Once
-      # Monitored Session sees this error in SessionRunHook.after_run, the
-      # "current" prediction, i.e., batch with id=100, will be discarded
-      # immediately
-      raise errors.OutOfRangeError(None, None, 'Stopped by stopping signal.')
-
-
-def generate_per_core_enqueue_ops_fn_for_host(
-    ctx, input_fn, inputs_structure_recorder, host_device, host_id):
-  """Generates infeed enqueue ops for per-core input_fn on a single host."""
-  captured_infeed_queue = _CapturedObject()
-  tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
-
-  def enqueue_ops_fn():
-    """A fn returns enqueue_ops."""
-    num_cores_per_host = ctx.num_of_cores_per_host
-    per_host_sharded_inputs = []
-    for core_ordinal in range(num_cores_per_host):
-      with ops.name_scope('ordinal_%d' % (core_ordinal)):
-        user_context = tpu_context.TPUContext(
-            internal_ctx=ctx,
-            input_device=host_device,
-            invocation_index=host_id * ctx.num_of_cores_per_host + core_ordinal)
-        inputs = _Inputs.from_input_fn(input_fn(user_context))
-        if inputs.is_dataset:
-          raise TypeError(
-              '`input_fn` returning `Dataset`  is not yet supported in '
-              'per-Core input pipeline deployment yet. Please set '
-              'TPUConfig.per_host_input_for_training to True or return '
-              '`features` and `labels` from `input_fn`')
-        features, labels = inputs.features_and_labels()
-
-        inputs_structure_recorder.validate_and_record_structure(
-            features, labels)
-        flattened_inputs = (
-            inputs_structure_recorder.flatten_features_and_labels(
-                features, labels))
-        per_host_sharded_inputs.append(flattened_inputs)
-
-    infeed_queue = tpu_feed.InfeedQueue(
-        number_of_tuple_elements=len(per_host_sharded_inputs[0]))
-    captured_infeed_queue.capture(infeed_queue)
-
-    per_host_enqueue_ops = infeed_queue.generate_enqueue_ops(
-        per_host_sharded_inputs, tpu_ordinal_function=tpu_ordinal_function_impl)
-    return per_host_enqueue_ops
-
-  return enqueue_ops_fn, captured_infeed_queue
-
-
-def generate_per_host_enqueue_ops_fn_for_host(
-    ctx, input_fn, inputs_structure_recorder, batch_axis, device, host_id):
-  """Generates infeed enqueue ops for per-host input_fn on a single host."""
-  captured_infeed_queue = _CapturedObject()
-
-  dataset_initializer = None
-
-  with ops.device(device):
-    user_context = tpu_context.TPUContext(
-        internal_ctx=ctx, input_device=device, invocation_index=host_id)
-    inputs = _Inputs.from_input_fn(input_fn(user_context))
-
-    is_dataset = inputs.is_dataset
-    if ctx.mode == model_fn_lib.ModeKeys.PREDICT:
-      if not is_dataset:
-        raise TypeError(
-            'For mode PREDICT, `input_fn` must return `Dataset` instead of '
-            '`features` and `labels`.')
-      if batch_axis is not None:
-        raise TypeError('For mode PREDICT, batch_axis is not supported yet.')
-      inputs = _InputsWithStoppingSignals(
-          dataset=inputs.dataset,
-          batch_size=ctx.batch_size_for_input_fn,
-          add_padding=True)
-
-    if is_dataset:
-      dataset_initializer = inputs.dataset_initializer()
-
-    tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
-
-  def enqueue_ops_fn():
-    """A Fn returning the TPU infeed enqueue ops.
-
-    By providing as a Fn, it can be invoked inside the tf.while_loop such that
-    the input pipeline for multiple iterations can be executed by one
-    Session.run call.
-
-    Returns:
-      list of dict of ops.
-    """
-    with ops.device(device):
-      num_of_replicas_per_host = ctx.num_of_replicas_per_host
-      # Convert user input to features and labels.  If the user returns a
-      # dataset, it is initialized and the features and labels extracted via
-      # `dataset.iterator.get_next()`
-      features, labels = inputs.features_and_labels()
-      signals = inputs.signals()
-
-      inputs_structure_recorder.validate_and_record_structure(features, labels)
-      unsharded_tensor_list = (
-          inputs_structure_recorder.flatten_features_and_labels(
-              features, labels, signals))
-
-      infeed_queue = tpu_feed.InfeedQueue(
-          tuple_types=[t.dtype for t in unsharded_tensor_list],
-          tuple_shapes=[t.shape for t in unsharded_tensor_list],
-          shard_dimensions=batch_axis)
-      captured_infeed_queue.capture(infeed_queue)
-      infeed_queue.set_number_of_shards(num_of_replicas_per_host)
-      per_host_enqueue_ops = (
-          infeed_queue.split_inputs_and_generate_enqueue_ops(
-              unsharded_tensor_list,
-              placement_function=lambda x: device,
-              tpu_ordinal_function=tpu_ordinal_function_impl))
-      if signals is None:
-        return per_host_enqueue_ops
-      else:
-        return {
-            'ops': per_host_enqueue_ops,
-            'signals': signals,
-        }
-
-  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
-
-
-def generate_per_host_v2_enqueue_ops_fn_for_host(
-    ctx, input_fn, inputs_structure_recorder, device, host_id):
-  """Generates infeed enqueue ops for per-host input_fn on a single host."""
-  captured_infeed_queue = _CapturedObject()
-  dataset_initializer = None
-
-  with ops.device(device):
-    user_context = tpu_context.TPUContext(
-        internal_ctx=ctx, input_device=device, invocation_index=host_id)
-    inputs = _Inputs.from_input_fn(input_fn(user_context))
-
-    is_dataset = inputs.is_dataset
-    if not is_dataset:
-      raise TypeError('`input_fn` must return a `Dataset` for the PER_HOST_V2 '
-                      'input pipeline configuration.')
-
-    if ctx.mode == model_fn_lib.ModeKeys.PREDICT:
-      inputs = _InputsWithStoppingSignals(
-          dataset=inputs.dataset,
-          batch_size=ctx.batch_size_for_input_fn,
-          add_padding=True,
-          num_invocations_per_step=ctx.num_of_replicas_per_host)
-
-    dataset_initializer = inputs.dataset_initializer()
-    tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
-
-  def enqueue_ops_fn():
-    """Generates the per_host enqueue ops."""
-    control_deps = []
-    per_host_sharded_inputs = []
-    sparse_features_list = []
-    num_replicas_per_host = ctx.num_of_replicas_per_host
-    cached_signals = None
-    with ops.device(device):
-      if not inputs.is_dataset:
-        raise TypeError('`input_fn` must return a `Dataset` for this mode.')
-      for _ in range(num_replicas_per_host):
-        # Use control dependencies to ensure a deterministic ordering.
-        with ops.control_dependencies(control_deps):
-          features, labels = inputs.features_and_labels()  # Calls get_next()
-          signals = inputs.signals()
-
-          # All the replicas share the replica 0's stopping singal.
-          # This avoids inconsistent state among different model replcias.
-          if cached_signals:
-            signals['stopping'] = cached_signals['stopping']
-          else:
-            cached_signals = signals
-
-        features, labels, sparse_features = (
-            _tpu_estimator_embedding.split_inputs(ctx, features, labels))
-        sparse_features_list.append(sparse_features)
-
-        inputs_structure_recorder.validate_and_record_structure(
-            features, labels)
-        flattened_inputs = (
-            inputs_structure_recorder.flatten_features_and_labels(
-                features, labels, signals))
-        control_deps.extend(flattened_inputs)
-        per_host_sharded_inputs.append(flattened_inputs)
-
-      if inputs_structure_recorder.flattened_input_dims:
-        input_partition_dims = inputs_structure_recorder.flattened_input_dims
-        if signals:
-          input_partition_dims += [None] * len(signals)
-        # pylint: disable=protected-access
-        infeed_queue = tpu_feed._PartitionedInfeedQueue(
-            number_of_tuple_elements=len(per_host_sharded_inputs[0]),
-            host_id=host_id,
-            input_partition_dims=input_partition_dims,
-            device_assignment=ctx.device_assignment)
-        per_host_enqueue_ops = infeed_queue.generate_enqueue_ops(
-            per_host_sharded_inputs)
-      else:
-        infeed_queue = tpu_feed.InfeedQueue(
-            number_of_tuple_elements=len(per_host_sharded_inputs[0]))
-        per_host_enqueue_ops = infeed_queue.generate_enqueue_ops(
-            per_host_sharded_inputs,
-            tpu_ordinal_function=tpu_ordinal_function_impl)
-      captured_infeed_queue.capture(infeed_queue)
-
-    if ctx.embedding_config:
-      per_host_enqueue_ops.extend(
-          ctx.embedding_config.tpu_embedding.generate_enqueue_ops(
-              sparse_features_list))
-
-    if signals is None:
-      return per_host_enqueue_ops
-    else:
-      return {
-          'ops': per_host_enqueue_ops,
-          'signals': signals,
-      }
-
-  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
-
-
-def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder,
-                                      num_hosts):
-  """Generates infeed enqueue ops for one input_fn on all the hosts."""
-  captured_infeed_queue = _CapturedObject()
-  dataset_initializer = None
-  device_0 = ctx.tpu_host_placement_function(host_id=0)
-  with ops.device(device_0):
-    user_context = tpu_context.TPUContext(
-        internal_ctx=ctx, input_device=device_0, invocation_index=0)
-    inputs = _Inputs.from_input_fn(input_fn(user_context))
-
-    is_dataset = inputs.is_dataset
-    if ctx.mode == model_fn_lib.ModeKeys.PREDICT:
-      if not is_dataset:
-        raise TypeError(
-            'For mode PREDICT, `input_fn` must return `Dataset` instead of '
-            '`features` and `labels`.')
-
-      inputs = _InputsWithStoppingSignals(
-          dataset=inputs.dataset,
-          batch_size=ctx.batch_size_for_input_fn,
-          add_padding=True)
-
-    if is_dataset:
-      dataset_initializer = inputs.dataset_initializer()
-    num_replicas_per_host = ctx.num_of_replicas_per_host
-
-  def tpu_ordinal_function_impl(replica_id):
-    if ctx.device_assignment:
-      return ctx.device_assignment.tpu_ordinal(replica=replica_id)
-    else:
-      return replica_id % num_replicas_per_host
-
-  def device_function_impl(replica_id):
-    return ctx.tpu_host_placement_function(replica_id=replica_id)
-
-  def enqueue_ops_fn():
-    """Generates enqueue ops for all the hosts."""
-    broadcasted_inputs = []
-    flattened_inputs = None  # Cache result from input_fn.
-    signals = None
-    for host_id in xrange(num_hosts):
-      with ops.device(ctx.tpu_host_placement_function(host_id=host_id)):
-        for _ in xrange(ctx.num_of_replicas_per_host):
-          # Note: input_fn is only called once at host 0 for the first replica.
-          # The features and labels returned from that invocation are
-          # broadcasted to other replicas(including the replicas on other
-          # hosts).
-          if flattened_inputs is None:
-            features, labels = inputs.features_and_labels()  # Calls get_next()
-            signals = inputs.signals()
-
-            inputs_structure_recorder.validate_and_record_structure(
-                features, labels)
-            flattened_inputs = (
-                inputs_structure_recorder.flatten_features_and_labels(
-                    features, labels, signals))
-          broadcasted_inputs.append(flattened_inputs)
-
-    infeed_queue = tpu_feed.InfeedQueue(
-        number_of_tuple_elements=len(broadcasted_inputs[0]))
-    captured_infeed_queue.capture(infeed_queue)
-    enqueue_ops = infeed_queue.generate_enqueue_ops(
-        broadcasted_inputs,
-        tpu_ordinal_function=tpu_ordinal_function_impl,
-        placement_function=device_function_impl)
-
-    if signals is None:
-      return enqueue_ops
-    else:
-      return {
-          'ops': enqueue_ops,
-          'signals': signals,
-      }
-
-  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
-
-
-class _InputPipeline(object):
-  """`_InputPipeline` handles invoking `input_fn` and piping to infeed queue.
-
-  `_InputPipeline` abstracts the per-core/per-host `input_fn` invocation from
-  call site.  To be precise, based on the configuration in
-  `_InternalTPUContext`,  it invokes `input_fn` for all cores (usually
-  multi-host TPU training) or for one host (usually for single-host TPU
-  evaluation), and sends all `features` and `labels` returned by `input_fn` to
-  TPU infeed. For per-core invocation, `features` and `labels` are piped to
-  infeed directly, one tuple for each core. For per-host invocation,  `features`
-  and `labels` are split at host (with respect to `batch_axis`) and piped to all
-  cores accordingly.
-
-  In addition, flatten/unflatten are handled by `_InputPipeline` also.  Model
-  inputs returned by the `input_fn` can have one of the following forms:
-  1. features
-  2. (features, labels)
-  3. ((arbitrarily nested structure of features), labels)
-
-  Internally, form 1 is reformed to `(features, None)` as features and labels
-  are passed separately to underlying methods. For TPU training, TPUEstimator
-  may expect multiple `features` and `labels` tuples one for each core.
-
-  TPUEstimator allows various different structures for inputs (namely `features`
-  and `labels`).  Both `features` and `labels` can be any nested sturcture
-  supported by TF nest (namely, dict, tuples, namedtuples or any nested
-  structure of such of Tensors).  `labels` could be `None` as well.
-
-  These are flattened before they are passed to the infeed/outfeed library
-  as that expectes flattend lists.
-  """
-
-  class InputsStructureRecorder(object):
-    """The recorder to record inputs structure."""
-
-    def __init__(self, input_partition_dims=None):
-      # Holds the structure of inputs
-      self._feature_structure = {}
-      self._flattened_input_dims = None
-
-      if input_partition_dims:
-        # This should have been validated in TPUConfig.
-        assert len(input_partition_dims) <= 2, 'must have 1 or 2 elements.'
-        if len(input_partition_dims) == 2:
-          self._feature_dims, self._label_dims = input_partition_dims
-        else:
-          self._feature_dims = input_partition_dims[0]
-          self._label_dims = None
-
-        assert self._feature_dims is not None, ('input_partition_dims[0] must '
-                                                'not be None')
-      else:
-        self._feature_dims = None
-        self._label_dims = None
-
-      # Internal state.
-      self._initialized = False
-
-    @property
-    def flattened_input_dims(self):
-      assert self._initialized, 'InputsStructureRecorder is not initialized.'
-      return self._flattened_input_dims
-
-    def has_labels(self):
-      return 'labels' in self._feature_structure
-
-    def _flatten_input_dims(self, feature_dims, feature_dims_names, label_dims,
-                            label_dims_names, label_names, has_labels):
-      """Flatten input dims with the same order as flattened input tensors."""
-      flattened_input_dims = []
-      if feature_dims_names:
-        # We need a fixed ordering for matching the tensors in features.
-        flattened_input_dims.extend(
-            [feature_dims[name] for name in feature_dims_names])
-      else:
-        flattened_input_dims.append(feature_dims)
-
-      if label_dims_names:
-        # We need a fixed ordering for matching the tensors in labels.
-        flattened_input_dims.extend(
-            [label_dims[name] for name in label_dims_names])
-      else:
-        if label_names:
-          num_tensors_in_label = len(label_names)
-        else:
-          num_tensors_in_label = int(has_labels)
-        # Setting `None` in input_partition_dims[1] will apply `None` to
-        # all the tensors in labels, regardless of internal structure.
-        flattened_input_dims.extend([label_dims] * num_tensors_in_label)
-
-      return flattened_input_dims
-
-    def validate_and_record_structure(self, features, labels):
-      """Validates and records the structure of `features` and `labels`."""
-      # Extract structure.
-      has_labels = labels is not None
-      feature_names = _extract_key_names(features)
-      label_names = _extract_key_names(labels)
-
-      if not self._initialized:
-        # Record structure.
-        self._initialized = True
-        if self._feature_dims is not None:
-          feature_dims_names = _extract_key_names(self._feature_dims)
-          if feature_dims_names != feature_names:
-            raise ValueError(
-                'TPUConfig.input_partition_dims[0] mismatched feature'
-                ' keys. Expected {}, got {}'.format(feature_names,
-                                                    feature_dims_names))
-
-          label_dims_names = _extract_key_names(self._label_dims)
-          if self._label_dims is not None and label_dims_names != label_names:
-            raise ValueError(
-                'TPUConfig.input_partition_dims[1] mismatched label'
-                ' keys. Expected {}, got {}'.format(label_names,
-                                                    label_dims_names))
-
-          self._flattened_input_dims = self._flatten_input_dims(
-              self._feature_dims, feature_dims_names, self._label_dims,
-              label_dims_names, label_names, has_labels)
-
-    def flatten_features_and_labels(self, features, labels, signals=None):
-      """Flattens the `features` and `labels` to a single tensor list."""
-      self._feature_structure['features'] = features
-      if labels is not None:
-        self._feature_structure['labels'] = labels
-      if signals is not None:
-        self._feature_structure['signals'] = signals
-      return data_nest.flatten(self._feature_structure)
-
-    def unflatten_features_and_labels(self, flattened_inputs):
-      """Restores the flattened inputs to original features and labels form.
-
-      Args:
-        flattened_inputs: Flattened inputs for each shard.
-
-      Returns:
-        A tuple of (`features`, `labels`), where `labels` could be None.
-        Each one, if present, should have identical structure (single tensor vs
-        dict) as the one returned by input_fn.
-
-      Raises:
-        ValueError: If the number of expected tensors from `flattened_inputs`
-          mismatches the recorded structure.
-      """
-
-      unflattened_inputs = data_nest.pack_sequence_as(self._feature_structure,
-                                                      flattened_inputs)
-      return _Inputs(
-          unflattened_inputs['features'],
-          unflattened_inputs.get('labels'),
-          signals=unflattened_inputs.get('signals'))
-
-  def __init__(self, input_fn, batch_axis, ctx):
-    """Constructor.
-
-    Args:
-      input_fn: input fn for train or eval.
-      batch_axis: A python tuple of int values describing how each tensor
-        produced by the Estimator `input_fn` should be split across the TPU
-        compute shards.
-      ctx: A `_InternalTPUContext` instance with mode.
-
-    Raises:
-      ValueError: If both `sharded_features` and `num_cores` are `None`.
-    """
-    self._inputs_structure_recorder = _InputPipeline.InputsStructureRecorder(
-        ctx.input_partition_dims)
-
-    self._sharded_per_core = ctx.is_input_sharded_per_core()
-    self._input_fn = input_fn
-    self._infeed_queue = None
-    self._ctx = ctx
-    self._batch_axis = batch_axis
-
-  def generate_infeed_enqueue_ops_and_dequeue_fn(self):
-    """Generates infeed enqueue ops and dequeue_fn."""
-    # While tf.while_loop is called, the body function, which invokes
-    # `enqueue_fn` passed in, is called to construct the graph. So, input_fn
-    # structure is recorded.
-    enqueue_ops, all_hooks, run_infeed_loop_on_coordinator = (
-        self._invoke_input_fn_and_record_structure())
-
-    self._validate_input_pipeline()
-
-    def dequeue_fn():
-      """dequeue_fn is used by TPU to retrieve the tensors."""
-      # In the model-parallel case, both the host-side and device-side
-      # computations must agree on the core on which infeed takes place. We
-      # choose to perform infeed on logical core 0 of each replica.
-      values = self._infeed_queue.generate_dequeue_op(tpu_device=0)
-      # The unflatten process uses the structure information recorded above.
-      return self._inputs_structure_recorder.unflatten_features_and_labels(
-          values)
-
-    return (enqueue_ops, dequeue_fn, all_hooks, run_infeed_loop_on_coordinator)
-
-  def _invoke_input_fn_and_record_structure(self):
-    """Deploys the input pipeline and record input structure."""
-    enqueue_ops = []
-    infeed_queues = []
-    all_dataset_initializers = []
-    num_hosts = self._ctx.num_hosts
-    tpu_host_placement_fn = self._ctx.tpu_host_placement_function
-
-    run_infeed_loop_on_coordinator = True
-
-    if self._sharded_per_core:
-      # Per-Core input pipeline deployment.
-      # Invoke input pipeline for each core and placed on the corresponding
-      # host.
-      for host_id in range(num_hosts):
-        host_device = tpu_host_placement_fn(host_id=host_id)
-        with ops.device(host_device):
-          with ops.name_scope('input_pipeline_task%d' % (host_id)):
-            enqueue_ops_fn, captured_infeed_queue = (
-                generate_per_core_enqueue_ops_fn_for_host(
-                    self._ctx, self._input_fn, self._inputs_structure_recorder,
-                    host_device, host_id))
-
-            if _WRAP_INPUT_FN_INTO_WHILE_LOOP:
-              run_infeed_loop_on_coordinator = False
-              enqueue_ops.append(
-                  _wrap_computation_in_while_loop(
-                      device=host_device, op_fn=enqueue_ops_fn))
-            else:
-              enqueue_ops.append(enqueue_ops_fn())
-            # Infeed_queue_getter must be called after enqueue_ops_fn is called.
-            infeed_queues.append(captured_infeed_queue.get())
-
-    elif self._ctx.is_input_broadcast_with_iterators():
-      # Only calls input_fn in host 0.
-      host_device = tpu_host_placement_fn(host_id=0)
-      enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
-          generate_broadcast_enqueue_ops_fn(self._ctx, self._input_fn,
-                                            self._inputs_structure_recorder,
-                                            num_hosts))
-      if dataset_initializer:
-        all_dataset_initializers.append(dataset_initializer)
-        run_infeed_loop_on_coordinator = False
-        wrap_fn = (
-            _wrap_computation_in_while_loop
-            if self._ctx.mode != model_fn_lib.ModeKeys.PREDICT else
-            _wrap_computation_in_while_loop_with_stopping_signals)
-        enqueue_ops.append(wrap_fn(device=host_device, op_fn=enqueue_ops_fn))
-      else:
-        enqueue_ops.append(enqueue_ops_fn())
-      infeed_queues.append(captured_infeed_queue.get())
-    else:
-      for host_id in range(num_hosts):
-        host_device = tpu_host_placement_fn(host_id=host_id)
-        with ops.device(host_device):
-          with ops.name_scope('input_pipeline_task%d' % (host_id)):
-            if self._ctx.is_input_per_host_with_iterators():
-              enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
-                  generate_per_host_v2_enqueue_ops_fn_for_host(
-                      self._ctx, self._input_fn,
-                      self._inputs_structure_recorder, host_device, host_id))
-            else:
-              enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
-                  generate_per_host_enqueue_ops_fn_for_host(
-                      self._ctx, self._input_fn,
-                      self._inputs_structure_recorder, self._batch_axis,
-                      host_device, host_id))
-
-            # NOTE(xiejw): We dispatch here based on the return type of the
-            # users `input_fn`.
-            #
-            # 1. If input_fn returns a Dataset instance, we initialize the
-            # iterator outside of tf.while_loop, and call the iterator.get_next
-            # inside tf.while_loop.  This should be always safe.
-            #
-            # 2. If input_fn returns (features, labels), it is too late to wrap
-            # them inside tf.while_loop, as resource initialization cannot be
-            # handled in TF control flow properly. In this case, we will use
-            # python loop to enqueue the data into TPU system.  This may be
-            # slow compared to the previous case.
-            if dataset_initializer:
-              all_dataset_initializers.append(dataset_initializer)
-              run_infeed_loop_on_coordinator = False
-              wrap_fn = (
-                  _wrap_computation_in_while_loop
-                  if self._ctx.mode != model_fn_lib.ModeKeys.PREDICT else
-                  _wrap_computation_in_while_loop_with_stopping_signals)
-              enqueue_ops.append(
-                  wrap_fn(device=host_device, op_fn=enqueue_ops_fn))
-            else:
-              enqueue_ops.append(enqueue_ops_fn())
-            infeed_queues.append(captured_infeed_queue.get())
-    # infeed_queue is used to generate dequeue ops. The only thing it uses for
-    # dequeue is dtypes and types. So, any one can be used. Here, grab the
-    # first one.
-    self._infeed_queue = infeed_queues[0]
-    return enqueue_ops, [
-        util_lib.MultiHostDatasetInitializerHook(all_dataset_initializers)
-    ], run_infeed_loop_on_coordinator
-
-  def _validate_input_pipeline(self):
-    """Validates the input pipeline.
-
-    Perform some sanity checks to log user friendly information. We should
-    error out to give users better error message. But, if
-    _WRAP_INPUT_FN_INTO_WHILE_LOOP is False (legacy behavior), we cannot break
-    user code, so, log a warning.
-
-    Raises:
-      RuntimeError: If the validation failed.
-    """
-    if ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS):
-      err_msg = ('Input pipeline contains one or more QueueRunners. '
-                 'It could be slow and not scalable. Please consider '
-                 'converting your input pipeline to use `tf.data` instead (see '
-                 'https://www.tensorflow.org/guide/datasets for '
-                 'instructions.')
-      if _WRAP_INPUT_FN_INTO_WHILE_LOOP:
-        raise RuntimeError(err_msg)
-      else:
-        logging.warn(err_msg)
-
-
-def call_computation(computation,
-                     experimental_exported_model_uses_all_cores=True):
-  """Call computation.
-
-  computation uses a single-core for TPU inference. If
-  `experimental_exported_model_uses_all_cores` is `True`, this function will
-  round-robin
-  computation among all TPU cores visible to the host; otherwise, it will use
-  a single core.
-
-  Args:
-    computation: A Python function that takes no inputs and builds computation
-      graph. If `computation` returns m outputs, this function will return a
-      list of m Tensors.
-    experimental_exported_model_uses_all_cores: Whether to round-robin among all
-      cores visible to the host, or to use a single core.
-
-  Returns:
-    A list of output tensors.
-  """
-  if experimental_exported_model_uses_all_cores:
-    # Using `TPUPartitionedCall` makes it possible to target a different
-    # TPU core with every `Session.run()` call. Note that the entire inference
-    # graph executes on a single core, and that invocations of this graph
-    # will round-robin among the cores attached to a host.
-    @function.Defun()
-    def tpu_subgraph():
-      return computation()
-
-    return tpu_functional.TPUPartitionedCall(
-        args=tpu_subgraph.captured_inputs,
-        device_ordinal=tpu_ordinal_selector_op.tpu_ordinal_selector(),
-        Tout=[o.type for o in tpu_subgraph.definition.signature.output_arg],
-        f=tpu_subgraph)
-  else:
-    return computation()
-
-
-class _ModelFnWrapper(object):
-  """A `model_fn` wrapper.
-
-  This makes calling model_fn on CPU and TPU easier and more consistent and
-  performs necessary check and mutation required by TPU training and evaluation.
-
-  In addition, this wrapper manages converting the `model_fn` to a single TPU
-  train and eval step.
-  """
-
-  def __init__(self, model_fn, config, params, ctx):
-    self._model_fn = model_fn
-    self._config = config
-    self._params = params
-    self._ctx = ctx
-
-  def call_without_tpu(self, features, labels, is_export_mode):
-    return self._call_model_fn(features, labels, is_export_mode=is_export_mode)
-
-  def _add_embedding_features(self, features):
-    if self._ctx.embedding_config:
-      tpu_embedding_ = self._ctx.embedding_config.tpu_embedding
-      embedding_activations = tpu_embedding_.get_activations()
-      features.update(embedding_activations)
-
-  def convert_to_single_tpu_train_step(self, dequeue_fn):
-    """Converts user provided model_fn` as a single train step on TPU.
-
-    The user provided `model_fn` takes input tuple
-    (features, labels) and produces the EstimatorSpec with train_op and loss for
-    train `mode`. This usually represents a single train computation on CPU.
-
-    For TPU training, a train (computation) step is first wrapped in a
-    tf.while_loop control flow to repeat for many times and then replicated to
-    all TPU shards. Besides the input should be taken from TPU infeed rather
-    than input pipeline (input_fn) directly. To fit TPU loop and replicate
-    pattern, the original train computation should be reformed, which is the
-    returned `train_step`.
-
-    Args:
-      dequeue_fn: The function to retrieve inputs, features and labels, from TPU
-        infeed dequeue channel.
-
-    Returns:
-      A tuple of train_fn, host_calls, and captured scaffold_fn. The train_fn
-      representing the train step for TPU.
-    """
-
-    host_call = _OutfeedHostCall(self._ctx)
-    captured_scaffold_fn = _CapturedObject()
-    captured_training_hooks = _CapturedObject()
-
-    def train_step(loss):
-      """Training step function for use inside a while loop."""
-      del loss  # unused; required in function signature.
-      inputs = dequeue_fn()
-      features, labels = inputs.features_and_labels()
-      self._add_embedding_features(features)
-
-      estimator_spec = self._verify_estimator_spec(
-          self._call_model_fn(features, labels))
-      loss, train_op = estimator_spec.loss, estimator_spec.train_op
-
-      if isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
-        captured_scaffold_fn.capture(estimator_spec.scaffold_fn)
-      else:
-        captured_scaffold_fn.capture(None)
-
-      captured_training_hooks.capture(estimator_spec.training_hooks)
-
-      tracing_ops = []
-      if tensor_tracer.TensorTracer.is_enabled():
-        tt = tensor_tracer.TensorTracer()
-        loss, tracing_ops = tt.trace_tpu(ops.get_default_graph(),
-                                         loss, train_op,
-                                         self._ctx.num_replicas,
-                                         self._ctx.num_of_replicas_per_host,
-                                         self._ctx.num_hosts)
-
-      if self._ctx.embedding_config is None:
-        apply_sparse_grads = []
-      else:
-        tpu_embedding_ = self._ctx.embedding_config.tpu_embedding
-        apply_sparse_grads = [tpu_embedding_.generate_send_gradients_op()]
-
-      # We must run train_op to update the variables prior to running the
-      # outfeed.
-      with ops.control_dependencies([train_op] + tracing_ops +
-                                    apply_sparse_grads):
-        host_call_outfeed_ops = []
-        if (isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec)  # pylint: disable=protected-access
-            and estimator_spec.host_call is not None):
-          host_call.record({'host_call': estimator_spec.host_call})
-          host_call_outfeed_ops = host_call.create_enqueue_op()
-        with ops.control_dependencies(host_call_outfeed_ops):
-          return array_ops.identity(loss)
-
-    return (train_step, host_call, captured_scaffold_fn,
-            captured_training_hooks)
-
-  def convert_to_single_tpu_eval_step(self, dequeue_fn):
-    """Converts user provided model_fn` as a single eval step on TPU.
-
-    Similar to training, the user provided `model_fn` takes input tuple
-    (features, labels) and produces the TPUEstimatorSpec with eval_metrics for
-    eval `mode`. This usually represents a single evaluation computation on CPU.
-
-    For TPU evaluation, a eval (computation) step is first wrapped in a
-    tf.while_loop control flow to repeat for many times and then replicated to
-    all TPU shards. Besides the input and output are slightly different. Input,
-    features and labels, should be taken from TPU infeed rather than input
-    pipeline (input_fn) directly. Output is managed in two stages.  First, the
-    model outputs as the result of evaluation computation, usually model logits,
-    should be transferred from TPU system to CPU. Then, all model outputs are
-    concatenated first on CPU and sent to the metric_fn for metrics computation.
-    To fit TPU evaluation pattern, the original eval computation should be
-    reformed, which is the returned `eval_step`.
-
-    Args:
-      dequeue_fn: The function to retrieve inputs, features and labels, from TPU
-        infeed dequeue channel.
-
-    Returns:
-      A tuple of eval_fn, host_calls, and captured scaffold_fn. The eval_fn
-      representing the eval step for TPU.
-    """
-    host_calls = _OutfeedHostCall(self._ctx)
-    captured_scaffold_fn = _CapturedObject()
-    captured_eval_hooks = _CapturedObject()
-
-    def eval_step(total_loss):
-      """Evaluation step function for use inside a while loop."""
-      inputs = dequeue_fn()
-      features, labels = inputs.features_and_labels()
-      self._add_embedding_features(features)
-
-      tpu_estimator_spec = self._call_model_fn(features, labels)
-      if not isinstance(tpu_estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
-        raise RuntimeError(
-            'estimator_spec used by TPU evaluation must have type'
-            '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec)))
-
-      loss = tpu_estimator_spec.loss
-      captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn)
-      captured_eval_hooks.capture(tpu_estimator_spec.evaluation_hooks)
-
-      to_record = {}
-      if tpu_estimator_spec.eval_metrics:
-        to_record['eval_metrics'] = tpu_estimator_spec.eval_metrics
-      if tpu_estimator_spec.host_call is not None:
-        # We assume that evaluate won't update global step, so we don't wrap
-        # this host_call.
-        to_record['host_call'] = tpu_estimator_spec.host_call
-      host_calls.record(to_record)
-
-      with ops.control_dependencies(host_calls.create_enqueue_op()):
-        return math_ops.add(total_loss, loss)
-
-    return eval_step, host_calls, captured_scaffold_fn, captured_eval_hooks
-
-  def convert_to_single_tpu_predict_step(self, dequeue_fn):
-    """Converts user provided model_fn` as a single predict step on TPU.
-
-    Args:
-      dequeue_fn: The function to retrieve inputs, features and labels, from TPU
-        infeed dequeue channel.
-
-    Returns:
-      A tuple of predict_fn, host_calls, and captured scaffold_fn. The
-      predict_fn representing the predict step for TPU.
-    """
-    host_calls = _OutfeedHostCall(self._ctx)
-    captured_scaffold_fn = _CapturedObject()
-    captured_predict_hooks = _CapturedObject()
-
-    def predict_step(unused_scalar_stopping_signal):
-      """Evaluation step function for use inside a while loop."""
-      inputs = dequeue_fn()
-      features, labels = inputs.features_and_labels()
-      stopping_signals = inputs.signals()
-
-      assert stopping_signals is not None, (
-          'Internal Error: `signals` is missing.')
-
-      tpu_estimator_spec = self._call_model_fn(
-          features, labels, is_export_mode=False)
-      if not isinstance(tpu_estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
-        raise RuntimeError(
-            'estimator_spec used by TPU prediction must have type'
-            '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec)))
-
-      self._verify_tpu_spec_predictions(tpu_estimator_spec.predictions)
-
-      captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn)
-      captured_predict_hooks.capture(tpu_estimator_spec.prediction_hooks)
-      to_record = {}
-      identity_fn = lambda **kwargs: kwargs
-      to_record['predictions'] = [identity_fn, tpu_estimator_spec.predictions]
-      to_record['signals'] = [identity_fn, stopping_signals]
-      if tpu_estimator_spec.host_call is not None:
-        to_record['host_call'] = tpu_estimator_spec.host_call
-      host_calls.record(to_record)
-
-      with ops.control_dependencies(host_calls.create_enqueue_op()):
-        return _StopSignals.as_scalar_stopping_signal(stopping_signals)
-
-    return (predict_step, host_calls, captured_scaffold_fn,
-            captured_predict_hooks)
-
-  def _verify_tpu_spec_predictions(self, predictions):
-    """Validates TPUEstimatorSpec.predictions dict."""
-    # TODO(xiejw): Adds validation for prediction dictionrary.
-    # TODO(xiejw): Adds support for single tensor as predictions.
-    if not isinstance(predictions, dict):
-      raise TypeError('TPUEstimatorSpec.predictions must be dict of Tensors.')
-
-    for (key, tensor) in predictions.items():
-      if tensor.shape.dims[0].value is None:
-        raise ValueError(
-            'The tensor with key ({}) in TPUEstimatorSpec.predictions has '
-            'dynamic shape (should be static). Tensor: {}'.format(key, tensor))
-    return predictions
-
-  def _validate_model_features_and_labels(self, features, labels,
-                                          is_export_mode):
-    """Validates that the features and labels for the model function are valid.
-
-    A valid features/labels object is the one with:
-    - Type: A tensor or any nested structure of tensors supported by TF nest,
-        namely nested dictionary, tuple, namedtuple, or sequence of tensors.
-    - Static shape if is_export_mode is False.
-
-    Args:
-      features: the features that would be input to the model function.
-      labels: the labels that would be input to the model function.
-      is_export_mode: boolean value specifying if in export mode.
-
-    Raises:
-      TypeError: If features/labels are not of the correct type.
-      ValueError: If features/labels have dynamic shape.
-    """
-
-    def validate(obj, obj_name):
-      """Helper validate function."""
-      if is_export_mode or self._ctx.is_running_on_cpu(is_export_mode):
-        return
-      if isinstance(obj, ops.Tensor):
-        if not obj.get_shape().is_fully_defined():
-          raise ValueError(
-              'The {} to the model returned by input_fn must have static shape.'
-              ' Tensor: {}'.format(obj_name, obj))
-      else:
-        for tensor in data_nest.flatten(obj):
-          if not tensor.get_shape().is_fully_defined():
-            raise ValueError(
-                ('The {} to the model returned by input_fn must have static '
-                 'shape. Tensor: {}').format(obj_name, tensor))
-
-    validate(features, 'features')
-    if labels is not None:
-      validate(labels, 'labels')
-
-  def _call_model_fn(self, features, labels, is_export_mode=False):
-    """Calls the model_fn with required parameters."""
-    self._validate_model_features_and_labels(features, labels, is_export_mode)
-    model_fn_args = function_utils.fn_args(self._model_fn)
-    kwargs = {}
-
-    # Makes deep copy with `config` and params` in case user mutates them.
-    config = copy.deepcopy(self._config)
-    params = copy.deepcopy(self._params)
-
-    if 'labels' in model_fn_args:
-      kwargs['labels'] = labels
-    elif labels is not None:
-      raise ValueError(
-          'model_fn does not take labels, but input_fn returns labels.')
-    if 'mode' in model_fn_args:
-      kwargs['mode'] = self._ctx.mode
-    if 'config' in model_fn_args:
-      kwargs['config'] = config
-    if 'params' in model_fn_args:
-      kwargs['params'] = params
-
-    if 'params' not in model_fn_args:
-      raise ValueError('model_fn ({}) does not include params argument, '
-                       'required by TPUEstimator to pass batch size as '
-                       'params[\'batch_size\']'.format(self._model_fn))
-
-    if is_export_mode:
-      batch_size_for_model_fn = None
-    else:
-      batch_size_for_model_fn = self._ctx.batch_size_for_model_fn
-
-    if batch_size_for_model_fn is not None:
-      _add_item_to_params(params, _BATCH_SIZE_KEY, batch_size_for_model_fn)
-
-    running_on_cpu = self._ctx.is_running_on_cpu(is_export_mode)
-    _add_item_to_params(params, _USE_TPU_KEY, not running_on_cpu)
-
-    if not running_on_cpu:
-      user_context = tpu_context.TPUContext(
-          internal_ctx=self._ctx, call_from_input_fn=False)
-      _add_item_to_params(params, _CTX_KEY, user_context)
-
-    estimator_spec = self._model_fn(features=features, **kwargs)
-    if (running_on_cpu and
-        isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec)):  # pylint: disable=protected-access
-      # The estimator_spec will be passed to `Estimator` directly, which expects
-      # type `EstimatorSpec`.
-      return estimator_spec.as_estimator_spec()
-    else:
-      return estimator_spec
-
-  def _verify_estimator_spec(self, estimator_spec):
-    """Validates the estimator_spec."""
-    if isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
-      return estimator_spec
-
-    err_msg = '{} returned by EstimatorSpec is not supported in TPUEstimator.'
-    if estimator_spec.training_chief_hooks:
-      raise ValueError(
-          err_msg.format('training_chief_hooks') + 'If you want' +
-          ' to pass training hooks, please pass via training_hooks.')
-
-    if estimator_spec.scaffold:
-      logging.warning('EstimatorSpec.Scaffold is ignored by TPU train/eval. '
-                      'Please use TPUEstimatorSpec.')
-    return estimator_spec
-
-
-class _OutfeedHostCall(object):
-  """Support for `eval_metrics` and `host_call` in TPUEstimatorSpec."""
-
-  def __init__(self, ctx):
-    self._ctx = ctx
-    self._names = []
-    # All of these are dictionaries of lists keyed on the name.
-    self._host_fns = {}
-    self._tensor_keys = collections.defaultdict(list)
-    self._tensors = collections.defaultdict(list)
-    self._tensor_dtypes = collections.defaultdict(list)
-    self._tensor_shapes = collections.defaultdict(list)
-
-  @staticmethod
-  def validate(host_calls):
-    """Validates the `eval_metrics` and `host_call` in `TPUEstimatorSpec`."""
-
-    for name, host_call in host_calls.items():
-      if not isinstance(host_call, (tuple, list)):
-        raise ValueError('{} should be tuple or list'.format(name))
-      if len(host_call) != 2:
-        raise ValueError('{} should have two elements.'.format(name))
-      if not callable(host_call[0]):
-        raise TypeError('{}[0] should be callable.'.format(name))
-      if not isinstance(host_call[1], (tuple, list, dict)):
-        raise ValueError('{}[1] should be tuple or list, or dict.'.format(name))
-
-      if isinstance(host_call[1], (tuple, list)):
-        fullargspec = tf_inspect.getfullargspec(host_call[0])
-        fn_args = function_utils.fn_args(host_call[0])
-        # wrapped_hostcall_with_global_step uses varargs, so we allow that.
-        if fullargspec.varargs is None and len(host_call[1]) != len(fn_args):
-          raise RuntimeError(
-              'In TPUEstimatorSpec.{}, length of tensors {} does not match '
-              'method args of the function, which takes {}.'.format(
-                  name, len(host_call[1]), len(fn_args)))
-
-  @staticmethod
-  def create_cpu_hostcall(host_calls):
-    """Runs on the host_call on CPU instead of TPU when use_tpu=False."""
-
-    _OutfeedHostCall.validate(host_calls)
-    ret = {}
-    for name, host_call in host_calls.items():
-      host_fn, tensors = host_call
-      if isinstance(tensors, (tuple, list)):
-        ret[name] = host_fn(*tensors)
-      else:
-        # Must be dict.
-        try:
-          ret[name] = host_fn(**tensors)
-        except TypeError as e:
-          logging.warning(
-              'Exception while calling %s: %s. It is likely the tensors '
-              '(%s[1]) do not match the '
-              'function\'s arguments', name, e, name)
-          raise
-    return ret
-
-  def record(self, host_calls):
-    """Records the host_call structure."""
-
-    for name, host_call in host_calls.items():
-      host_fn, tensor_list_or_dict = host_call
-      self._names.append(name)
-      self._host_fns[name] = host_fn
-
-      if isinstance(tensor_list_or_dict, dict):
-        for (key, tensor) in six.iteritems(tensor_list_or_dict):
-          self._tensor_keys[name].append(key)
-          self._tensors[name].append(tensor)
-          self._tensor_dtypes[name].append(tensor.dtype)
-          self._tensor_shapes[name].append(tensor.shape)
-      else:
-        # List or tuple.
-        self._tensor_keys[name] = None
-        for tensor in tensor_list_or_dict:
-          self._tensors[name].append(tensor)
-          self._tensor_dtypes[name].append(tensor.dtype)
-          self._tensor_shapes[name].append(tensor.shape)
-
-  def create_enqueue_op(self):
-    """Create the op to enqueue the recorded host_calls.
-
-    Returns:
-      A list of enqueue ops, which is empty if there are no host calls.
-    """
-    if not self._names:
-      return []
-
-    tensors = []
-    # TODO(jhseu): Consider deduping tensors.
-    for name in self._names:
-      tensors.extend(self._tensors[name])
-
-    with ops.device(tpu.core(0)):
-      return [tpu_ops.outfeed_enqueue_tuple(tensors)]
-
-  def create_tpu_hostcall(self):
-    """Sends the tensors through outfeed and runs the host_fn on CPU.
-
-    The tensors are concatenated along dimension 0 to form a global tensor
-    across all shards. The concatenated function is passed to the host_fn and
-    executed on the first host.
-
-    Returns:
-      A dictionary mapping name to the return type of the host_call by that
-      name.
-
-    Raises:
-      RuntimeError: If outfeed tensor is scalar.
-    """
-    if not self._names:
-      return {}
-
-    ret = {}
-    # For each i, dequeue_ops[i] is a list containing the tensors from all
-    # shards. This list is concatenated later.
-    dequeue_ops = []
-    tensor_dtypes = []
-    tensor_shapes = []
-    for name in self._names:
-      for _ in self._tensors[name]:
-        dequeue_ops.append([])
-      for dtype in self._tensor_dtypes[name]:
-        tensor_dtypes.append(dtype)
-      for shape in self._tensor_shapes[name]:
-        tensor_shapes.append(shape)
-
-    # Outfeed ops execute on each replica's first logical core. Note: we must
-    # constraint it such that we have at most one outfeed dequeue and enqueue
-    # per replica.
-    for i in xrange(self._ctx.num_replicas):
-      host_device, ordinal_id = self._ctx.device_for_replica(i)
-      with ops.device(host_device):
-        outfeed_tensors = tpu_ops.outfeed_dequeue_tuple(
-            dtypes=tensor_dtypes,
-            shapes=tensor_shapes,
-            device_ordinal=ordinal_id)
-        for j, item in enumerate(outfeed_tensors):
-          dequeue_ops[j].append(item)
-
-    # Deconstruct dequeue ops.
-    flat_dequeue_ops = []
-    for l in dequeue_ops:
-      flat_dequeue_ops.extend(l)
-
-    dequeue_ops_by_name = {}
-    pos = 0
-    for name in self._names:
-      dequeue_ops_by_name[name] = dequeue_ops[pos:pos +
-                                              len(self._tensors[name])]
-      pos += len(self._tensors[name])
-
-    def _call_host_fn(fn, *args, **kw):
-      context = CatchInvalidHostcallFunctions()
-      context.Enter()
-      result = fn(*args, **kw)
-      context.Exit()
-      context.ExitResult(result)
-      return result
-
-    # It is assumed evaluation always happens on single host TPU system. So,
-    # place all ops on tpu host if possible.
-    #
-    # TODO(jhseu): Evaluate whether this is right for summaries.
-    with ops.device(self._ctx.tpu_host_placement_function(replica_id=0)):
-      for name in self._names:
-        dequeue_ops = dequeue_ops_by_name[name]
-        for i, item in enumerate(dequeue_ops):
-          if dequeue_ops[i][0].shape.ndims == 0:
-            raise RuntimeError(
-                'All tensors outfed from TPU should preserve batch size '
-                'dimension, but got scalar {}'.format(dequeue_ops[i][0]))
-          # TODO(xiejw): Make the specification of the outfeed combinaton
-          # function more explicit and well-documented.  We may want to give the
-          # user the option of concatenating along any axis.
-          if (self._ctx.config.tpu_config.per_host_input_for_training is
-              tpu_config.InputPipelineConfig.BROADCAST):
-            # If the infeed is in BROADCAST mode (each core recieving the same
-            # input), then we assume that the cores also produce identical
-            # copies of the same output, and we simply take the output from
-            # the first core.  This mode is used by Mesh-TensorFlow.
-            with ops.control_dependencies(dequeue_ops[i]):
-              dequeue_ops[i] = array_ops.identity(dequeue_ops[i][0])
-          else:
-            # Assume that the input has been batch-split and that axis 0 of the
-            # output tensors represents the batch size.  Concatenate along
-            # the axis 0 to re-combine the batch.
-            dequeue_ops[i] = array_ops.concat(dequeue_ops[i], axis=0)
-
-        if self._tensor_keys[name] is not None:
-          # The user-provided eval_metrics[1] is a dict.
-          dequeue_ops = dict(zip(self._tensor_keys[name], dequeue_ops))
-          try:
-            ret[name] = _call_host_fn(self._host_fns[name], **dequeue_ops)
-          except TypeError as e:
-            logging.warning(
-                'Exception while calling %s: %s. It is likely the tensors '
-                '(%s[1]) do not match the '
-                'function\'s arguments', name, e, name)
-            raise
-        else:
-          ret[name] = _call_host_fn(self._host_fns[name], *dequeue_ops)
-
-    # force all dequeue operations to be run if not consumed by the host calls
-    ret['__force_dequeue'] = control_flow_ops.group(*flat_dequeue_ops)
-    return ret
-
-
-class _OutfeedHostCallHook(session_run_hook.SessionRunHook):
-  """Hook to run host calls when use_tpu=False."""
-
-  def __init__(self, tensors):
-    self._tensors = tensors
-
-  def begin(self):
-    # We duplicate this code from the TPUInfeedOutfeedSessionHook rather than
-    # create a separate hook to guarantee execution order, because summaries
-    # need to be initialized before the outfeed thread starts.
-    # TODO(jhseu): Make a wrapper hook instead?
-    self._init_ops = contrib_summary.summary_writer_initializer_op()
-    # Get all the writer resources from the initializer, so we know what to
-    # flush.
-    self._finalize_ops = []
-    for op in self._init_ops:
-      self._finalize_ops.append(contrib_summary.flush(writer=op.inputs[0]))
-
-  def after_create_session(self, session, coord):
-    session.run(self._init_ops)
-
-  def before_run(self, run_context):
-    return basic_session_run_hooks.SessionRunArgs(self._tensors)
-
-  def end(self, session):
-    session.run(self._finalize_ops)
-
-
-class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook):
-  """Calculate and report global_step/sec and examples/sec during runtime."""
-
-  def __init__(self,
-               batch_size,
-               every_n_steps=100,
-               every_n_secs=None,
-               output_dir=None,
-               summary_writer=None):
-    self._batch_size = batch_size
-    super(ExamplesPerSecondHook, self).__init__(
-        every_n_steps=every_n_steps,
-        every_n_secs=every_n_secs,
-        output_dir=output_dir,
-        summary_writer=summary_writer)
-
-  def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
-    global_step_per_sec = elapsed_steps / elapsed_time
-    examples_per_sec = self._batch_size * global_step_per_sec
-    if self._summary_writer is not None:
-      global_step_summary = Summary(value=[
-          Summary.Value(tag='global_step/sec', simple_value=global_step_per_sec)
-      ])
-      example_summary = Summary(value=[
-          Summary.Value(tag='examples/sec', simple_value=examples_per_sec)
-      ])
-      self._summary_writer.add_summary(global_step_summary, global_step)
-      self._summary_writer.add_summary(example_summary, global_step)
-    logging.info('global_step/sec: %g', global_step_per_sec)
-    logging.info('examples/sec: %g', examples_per_sec)
-
-
-class InstallSignalHandlerHook(session_run_hook.SessionRunHook):
-  """Change SIGINT (CTRL^C) handler to force quit the process.
-
-  The default behavior often results in hanging processes.
-  The original handler is restored after training/evaluation.
-  """
-
-  def __init__(self):
-    self._signal_fn = signal.getsignal(signal.SIGINT)
-
-  def before_run(self, run_context):
-    signal.signal(signal.SIGINT, signal.SIG_DFL)
-
-  def end(self, session):
-    signal.signal(signal.SIGINT, self._signal_fn)
-
-
-class TPUEstimator(estimator_lib.Estimator):
-  """Estimator with TPU support.
-
-  TPUEstimator also supports training on CPU and GPU. You don't need to define
-  a separate `tf.estimator.Estimator`.
-
-  TPUEstimator handles many of the details of running on TPU devices, such as
-  replicating inputs and models for each core, and returning to host
-  periodically to run hooks.
-
-  TPUEstimator transforms a global batch size in params to a per-shard batch
-  size when calling the `input_fn` and `model_fn`. Users should specify
-  global batch size in constructor, and then get the batch size for each shard
-  in `input_fn` and `model_fn` by `params['batch_size']`.
-
-  - For training, `model_fn` gets per-core batch size; `input_fn` may get
-    per-core or per-host batch size depending on `per_host_input_for_training`
-    in `TPUConfig` (See docstring for TPUConfig for details).
-
-  - For evaluation and prediction, `model_fn` gets per-core batch size and
-    `input_fn` get per-host batch size.
-
-  Evaluation
-  ==========
-
-  `model_fn` should return `TPUEstimatorSpec`, which expects the `eval_metrics`
-  for TPU evaluation. However, if eval_on_tpu is False, `model_fn` must return
-  `EstimatorSpec` and the evaluation will execute on CPU or GPU; in this case
-  the following discussion on TPU evaluation does not apply.
-
-  `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`, where
-  `tensors` could be a list of any nested structure of `Tensor`s (See
-  `TPUEstimatorSpec` for details).  `metric_fn` takes the `tensors` and returns
-  a dict from metric string name to the result of calling a metric function,
-  namely a `(metric_tensor, update_op)` tuple.
-
-  One can set `use_tpu` to `False` for testing. All training, evaluation, and
-  predict will be executed on CPU. `input_fn` and `model_fn` will receive
-  `train_batch_size` or `eval_batch_size` unmodified as `params['batch_size']`.
-
-  Current limitations:
-  --------------------
-
-  1. TPU evaluation only works on a single host (one TPU worker) except
-     BROADCAST mode.
-
-  2. `input_fn` for evaluation should **NOT** raise an end-of-input exception
-     (`OutOfRangeError` or `StopIteration`). And all evaluation steps and all
-     batches should have the same size.
-
-  Example (MNIST):
-  ----------------
-
-  ```
-  # The metric Fn which runs on CPU.
-  def metric_fn(labels, logits):
-    predictions = tf.argmax(logits, 1)
-    return {
-      'accuracy': tf.metrics.precision(
-          labels=labels, predictions=predictions),
-    }
-
-  # Your model Fn which runs on TPU (eval_metrics is list in this example)
-  def model_fn(features, labels, mode, config, params):
-    ...
-    logits = ...
-
-    if mode = tf.estimator.ModeKeys.EVAL:
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode,
-          loss=loss,
-          eval_metrics=(metric_fn, [labels, logits]))
-
-  # or specify the eval_metrics tensors as dict.
-  def model_fn(features, labels, mode, config, params):
-    ...
-    final_layer_output = ...
-
-    if mode = tf.estimator.ModeKeys.EVAL:
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode,
-          loss=loss,
-          eval_metrics=(metric_fn, {
-              'labels': labels,
-              'logits': final_layer_output,
-          }))
-  ```
-
-  Prediction
-  ==========
-
-  Prediction on TPU is an experimental feature to support large batch inference.
-  It is not designed for latency-critical system. In addition, due to some
-  usability issues, for prediction with small dataset, CPU `.predict`, i.e.,
-  creating a new `TPUEstimator` instance with `use_tpu=False`, might be more
-  convenient.
-
-  Note: In contrast to TPU training/evaluation, the `input_fn` for prediction
-  *should* raise an end-of-input exception (`OutOfRangeError` or
-  `StopIteration`), which serves as the stopping signal to `TPUEstimator`. To be
-  precise, the ops created by `input_fn` produce one batch of the data.
-  The `predict()` API processes one batch at a time. When reaching the end of
-  the data source, an end-of-input exception should be raised by one of these
-  operations. The user usually does not need to do this manually. As long as the
-  dataset is not repeated forever, the `tf.data` API will raise an end-of-input
-  exception automatically after the last batch has been produced.
-
-  Note: Estimator.predict returns a Python generator. Please consume all the
-  data from the generator so that TPUEstimator can shutdown the TPU system
-  properly for user.
-
-  Current limitations:
-  --------------------
-  1. TPU prediction only works on a single host (one TPU worker).
-
-  2. `input_fn` must return a `Dataset` instance rather than `features`. In
-  fact, .train() and .evaluate() also support Dataset as return value.
-
-  Example (MNIST):
-  ----------------
-  ```
-  height = 32
-  width = 32
-  total_examples = 100
-
-  def predict_input_fn(params):
-    batch_size = params['batch_size']
-
-    images = tf.random_uniform(
-        [total_examples, height, width, 3], minval=-1, maxval=1)
-
-    dataset = tf.data.Dataset.from_tensor_slices(images)
-    dataset = dataset.map(lambda images: {'image': images})
-
-    dataset = dataset.batch(batch_size)
-    return dataset
-
-  def model_fn(features, labels, params, mode):
-     # Generate predictions, called 'output', from features['image']
-
-    if mode == tf.estimator.ModeKeys.PREDICT:
-      return tf.contrib.tpu.TPUEstimatorSpec(
-          mode=mode,
-          predictions={
-              'predictions': output,
-              'is_padding': features['is_padding']
-          })
-
-  tpu_est = TPUEstimator(
-      model_fn=model_fn,
-      ...,
-      predict_batch_size=16)
-
-  # Fully consume the generator so that TPUEstimator can shutdown the TPU
-  # system.
-  for item in tpu_est.predict(input_fn=input_fn):
-    # Filter out item if the `is_padding` is 1.
-    # Process the 'predictions'
-  ```
-
-  Exporting
-  =========
-
-  `export_savedmodel` exports 2 metagraphs, one with `tag_constants.SERVING`,
-  and another with `tag_constants.SERVING` and `tag_constants.TPU`.
-  At serving time, these tags are used to select metagraph to load.
-
-  Before running the graph on TPU, TPU system needs to be initialized. If
-  TensorFlow Serving model-server is used, this is done automatically. If
-  not, please call `session.run(tpu.initialize_system())`.
-
-  `tpu.outside_compilation` can be used to wrap TPU incompatible ops in
-  `model_fn`.
-
-  Example:
-  ----------------
-
-  ```
-  def model_fn(features, labels, mode, config, params):
-    ...
-    logits = ...
-    export_outputs = {
-      'logits': export_output_lib.PredictOutput(
-        {'logits': logits})
-    }
-
-    def host_call(logits):
-      class_ids = math_ops.argmax(logits)
-      classes = string_ops.as_string(class_ids)
-      export_outputs['classes'] =
-        export_output_lib.ClassificationOutput(classes=classes)
-
-    tpu.outside_compilation(host_call, logits)
-
-    ...
-  ```
-
-  """
-
-  def __init__(self,
-               model_fn=None,
-               model_dir=None,
-               config=None,
-               params=None,
-               use_tpu=True,
-               train_batch_size=None,
-               eval_batch_size=None,
-               predict_batch_size=None,
-               batch_axis=None,
-               eval_on_tpu=True,
-               export_to_tpu=True,
-               export_to_cpu=True,
-               warm_start_from=None,
-               experimental_exported_model_uses_all_cores=False,
-               experimental_export_device_assignment=False,
-               experimental_embedding_config_spec=None):
-    """Constructs an `TPUEstimator` instance.
-
-    Args:
-      model_fn: Model function as required by `Estimator` which returns
-        EstimatorSpec or TPUEstimatorSpec. `training_hooks`, 'evaluation_hooks',
-        and `prediction_hooks` must not capure any TPU Tensor inside the
-        model_fn.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model. If `None`, the model_dir in
-        `config` will be used if set. If both are set, they must be same. If
-        both are `None`, a temporary directory will be used.
-      config: An `tpu_config.RunConfig` configuration object. Cannot be `None`.
-      params: An optional `dict` of hyper parameters that will be passed into
-        `input_fn` and `model_fn`.  Keys are names of parameters, values are
-        basic python types. There are reserved keys for `TPUEstimator`,
-        including 'batch_size'.
-      use_tpu: A bool indicating whether TPU support is enabled. Currently, -
-        TPU training and evaluation respect this bit, but eval_on_tpu can
-        override execution of eval. See below. - Predict still happens on CPU.
-      train_batch_size: An int representing the global training batch size.
-        TPUEstimator transforms this global batch size to a per-shard batch
-        size, as params['batch_size'], when calling `input_fn` and `model_fn`.
-        Cannot be `None` if `use_tpu` is `True`. Must be divisible by total
-        number of replicas.
-      eval_batch_size: An int representing evaluation batch size. Must be
-        divisible by total number of replicas.
-      predict_batch_size: An int representing the prediction batch size. Must be
-        divisible by total number of replicas.
-      batch_axis: A python tuple of int values describing how each tensor
-        produced by the Estimator `input_fn` should be split across the TPU
-        compute shards. For example, if your input_fn produced (images, labels)
-        where the images tensor is in `HWCN` format, your shard dimensions would
-        be [3, 0], where 3 corresponds to the `N` dimension of your images
-        Tensor, and 0 corresponds to the dimension along which to split the
-        labels to match up with the corresponding images. If None is supplied,
-        and per_host_input_for_training is True, batches will be sharded based
-        on the major dimension. If tpu_config.per_host_input_for_training is
-        False or `PER_HOST_V2`, batch_axis is ignored.
-      eval_on_tpu: If False, evaluation runs on CPU or GPU. In this case, the
-        model_fn must return `EstimatorSpec` when called with `mode` as `EVAL`.
-      export_to_tpu: If True, `export_savedmodel()` exports a metagraph for
-        serving on TPU. Note that unsupported export modes such as EVAL will be
-        ignored. For those modes, only a CPU model will be exported.
-        Currently, export_to_tpu only supports PREDICT.
-      export_to_cpu: If True, `export_savedmodel()` exports a metagraph for
-        serving on CPU.
-      warm_start_from: Optional string filepath to a checkpoint or SavedModel to
-        warm-start from, or a `tf.estimator.WarmStartSettings` object to fully
-        configure warm-starting.  If the string filepath is provided instead of
-        a `WarmStartSettings`, then all variables are warm-started, and it is
-        assumed that vocabularies and Tensor names are unchanged.
-      experimental_exported_model_uses_all_cores: Whether to round-robin among
-        all cores visible to the host which is serving the saved model, or to
-        use a single core. This is a temporary flag to enable using all TPU
-        cores for inference with TPUPartitionedCall(). Once outside compilation
-        is supported in TPUPartitionedCall(), this flag will be enabled by
-        default.
-      experimental_export_device_assignment: Whether to include the device
-        assignment in the exported model. Doing so is useful in case of model
-        parallel inference but will tie the exported model to the TPU topology
-        used to export the model.
-      experimental_embedding_config_spec: Optional EmbeddingConfigSpec instance
-        to support using TPU embedding. IT IS STILL WORK IN PROGRESS, SO PLEASE
-        DO NOT USE.
-
-    Raises:
-      ValueError: `params` has reserved keys already.
-    """
-    if config is None or not isinstance(config, tpu_config.RunConfig):
-      raise ValueError(
-          '`config` must be provided with type `tpu_config.RunConfig`')
-
-    if params is not None and any(k in params for k in _RESERVED_PARAMS_KEYS):
-      raise ValueError('{} are reserved keys but existed in params {}.'.format(
-          _RESERVED_PARAMS_KEYS, params))
-
-    if use_tpu:
-      # Perform some very basic validations. More validations will be found in
-      # _InternalTPUContext.
-      if train_batch_size is None:
-        raise ValueError('`train_batch_size` cannot be `None`')
-      util_lib.check_positive_integer(train_batch_size, 'train_batch_size')
-
-      if (config.tpu_config.per_host_input_for_training is
-          tpu_config.InputPipelineConfig.PER_SHARD_V1 and
-          config.tpu_config.num_cores_per_replica):
-        raise ValueError(
-            'Model parallelism only supports per host input for training. '
-            'Please adjust TPURunconfig.per_host_input_for_training.')
-
-      if eval_batch_size is not None:
-        util_lib.check_positive_integer(eval_batch_size, 'eval_batch_size')
-
-      if predict_batch_size is not None:
-        util_lib.check_positive_integer(predict_batch_size,
-                                        'predict_batch_size')
-
-    # Verifies the model_fn signature according to Estimator framework.
-    estimator_lib._verify_model_fn_args(model_fn, params)  # pylint: disable=protected-access
-    # We cannot store config and params in this constructor as parent
-    # constructor might change them, such as assigning a temp dir for
-    # config.model_dir.
-    model_function = self._augment_model_fn(model_fn, batch_axis)
-
-    # Overwrite log_step_count_steps to disable TensorLoggingHook and
-    # StepCounterHook from being created in Estimator. TPUEstimator already
-    # added equivalent hooks in _augment_model_fn above.
-    self._log_every_n_steps = config.log_step_count_steps
-    config = config.replace(log_step_count_steps=None)
-
-    # Passing non-None params as wrapped model_fn has it.
-    params = params or {}
-    super(TPUEstimator, self).__init__(
-        model_fn=model_function,
-        model_dir=model_dir,
-        config=config,
-        params=params,
-        warm_start_from=warm_start_from)
-    self._iterations_per_training_loop = (
-        self._config.tpu_config.iterations_per_loop)
-
-    # All properties passed to _InternalTPUContext are immutable.
-    # pylint: disable=protected-access
-    self._ctx = tpu_context._get_tpu_context(
-        self._config, train_batch_size, eval_batch_size, predict_batch_size,
-        use_tpu, eval_on_tpu, experimental_embedding_config_spec)
-
-    self._export_to_cpu = export_to_cpu
-    self._export_to_tpu = export_to_tpu
-    self._experimental_exported_model_uses_all_cores = (
-        experimental_exported_model_uses_all_cores)
-    self._experimental_export_device_assignment = (
-        experimental_export_device_assignment)
-    if (experimental_exported_model_uses_all_cores and
-        experimental_export_device_assignment):
-      raise ValueError('experimental_exported_model_uses_all_cores and '
-                       'experimental_export_device_assignment is not supported '
-                       'at the same time.')
-
-    self._is_input_fn_invoked = None
-    self._rendezvous = {}
-
-  def _add_meta_graph_for_mode(self,
-                               builder,
-                               input_receiver_fn_map,
-                               checkpoint_path,
-                               save_variables=True,
-                               mode=model_fn_lib.ModeKeys.PREDICT,
-                               export_tags=None,
-                               check_variables=True):
-    if self._export_to_tpu and mode != model_fn_lib.ModeKeys.PREDICT:
-      logging.warning('TPUEstimator only handles mode PREDICT for exporting '
-                      'when `export_to_tpu` is `True`; Mode {} will be ignored '
-                      'for TPU.'.format(mode))
-
-    if not self._export_to_cpu and not self._export_to_tpu:
-      raise ValueError('One of export_to_cpu and export_to_tpu must be true.')
-
-    if self._export_to_cpu:
-      (super(TPUEstimator, self)._add_meta_graph_for_mode(
-          builder,
-          input_receiver_fn_map,
-          checkpoint_path,
-          save_variables,
-          mode=mode,
-          export_tags=export_tags,
-          check_variables=check_variables))
-
-    if self._export_to_tpu and mode == model_fn_lib.ModeKeys.PREDICT:
-      input_receiver_fn_map = {
-          _REWRITE_FOR_INFERENCE_MODE: input_receiver_fn_map[mode]
-      }
-      export_tags = [tag_constants.SERVING, tag_constants.TPU]
-      mode = _REWRITE_FOR_INFERENCE_MODE
-
-      # See b/110052256 for why `check_variables` is `False`.
-      if not self._export_to_cpu:
-        check_variables = save_variables = True
-      else:
-        check_variables = save_variables = False
-      (super(TPUEstimator, self)._add_meta_graph_for_mode(
-          builder,
-          input_receiver_fn_map,
-          checkpoint_path,
-          save_variables=save_variables,
-          mode=mode,
-          export_tags=export_tags,
-          check_variables=check_variables))
-
-  def _call_model_fn(self, features, labels, mode, config):
-    if mode == _REWRITE_FOR_INFERENCE_MODE:
-      return self._call_model_fn_for_inference(features, labels, mode, config)
-    else:
-      return super(TPUEstimator, self)._call_model_fn(features, labels, mode,
-                                                      config)
-
-  def _call_model_fn_for_inference(self, features, labels, mode, config):
-    """Wraps `_call_model_fn` for `export_savedmodel`."""
-    if mode != _REWRITE_FOR_INFERENCE_MODE:
-      raise ValueError('mode must be {}; '
-                       'got {}.'.format(_REWRITE_FOR_INFERENCE_MODE, mode))
-
-    computation, capture = self._build_computation_for_inference(
-        features, labels, mode, config)
-    tensors = call_computation(
-        computation,
-        experimental_exported_model_uses_all_cores=self
-        ._experimental_exported_model_uses_all_cores)
-    estimator_spec, export_outputs_dict, predictions_dict, none_indices = (
-        capture.get())
-    predictions_list = tensors[:len(predictions_dict)]
-    export_outputs_list_without_none = tensors[len(predictions_dict):]
-
-    # Reinsert `None`s which we've taken out in
-    # `_build_computation_for_inference()`.
-    export_outputs_list = []
-    while none_indices or export_outputs_list_without_none:
-      if none_indices and none_indices[0] == len(export_outputs_list):
-        export_outputs_list.append(None)
-        none_indices.pop(0)
-      else:
-        export_outputs_list.append(export_outputs_list_without_none.pop(0))
-
-    # Reconstruct `export_outputs` with updated tensors.
-    new_export_outputs_dict = nest.pack_sequence_as(export_outputs_dict,
-                                                    export_outputs_list)
-    export_outputs = estimator_spec.export_outputs
-    new_export_outputs = collections.OrderedDict(
-        (k, _clone_export_output_with_tensors(export_outputs[k], v))
-        for k, v in six.iteritems(new_export_outputs_dict))
-    # Reconstruct `predictions` with updated tensors.
-    new_predictions = nest.pack_sequence_as(predictions_dict, predictions_list)
-    if (len(new_predictions) == 1 and
-        _KEY_WHEN_PREDICTIONS_IS_A_TENSOR in new_predictions):
-      new_predictions = new_predictions[_KEY_WHEN_PREDICTIONS_IS_A_TENSOR]
-
-    return estimator_spec._replace(
-        export_outputs=new_export_outputs, predictions=new_predictions)
-
-  def _build_computation_for_inference(self, features, labels, mode, config):
-    capture = _CapturedObject()
-
-    def computation():
-      """Computation to be passed to `TPUPartitionedCall()`."""
-      tpu_computation, tpu_capture = self._build_tpu_computation_for_inference(
-          features, labels, mode, config)
-
-      if self._experimental_export_device_assignment:
-        # Export the device assignment as part of the model. This is useful for
-        # model parallel usecases where the model relies on the mapping between
-        # logical and physical devices.
-        with self._ctx.with_mode(mode) as ctx:
-          device_assignment = ctx.device_assignment
-      else:
-        device_assignment = None
-      tensors_on_cpu = tpu.rewrite_for_inference(
-          tpu_computation, device_assignment=device_assignment)
-      (estimator_spec, export_outputs_dict, export_outputs_list,
-       predictions_dict) = (
-           tpu_capture.get())
-      predictions_list = tensors_on_cpu[:len(predictions_dict)]
-      export_outputs_tpu_on_cpu_list = tensors_on_cpu[len(predictions_dict):]
-
-      # Reconstruct tensors used in export_outputs, with TPU tensors replaced
-      # with their CPU counterpart returned from `rewrite_for_inference()`.
-      # `function.Defun()` does not like `None`s in return values, so we leave
-      # `None`s out but record their positions for later reconstruction.
-      export_outputs_list_without_none = []
-      none_indices = []
-      for i, t in enumerate(export_outputs_list):
-        if t is None:
-          none_indices.append(i)
-        else:
-          export_outputs_list_without_none.append(
-              export_outputs_tpu_on_cpu_list.pop(0))
-
-      capture.capture((estimator_spec, export_outputs_dict, predictions_dict,
-                       none_indices))
-      return predictions_list + export_outputs_list_without_none
-
-    return computation, capture
-
-  def _build_tpu_computation_for_inference(self, features, labels, mode,
-                                           config):
-    capture = _CapturedObject()
-
-    def computation():
-      """Compute tpu tensors used in export_outputs.
-
-      Passed to rewrite_for_inference so that model_fn will be called under
-      the rewriting contexts. Only tpu tensors are returned, but export_outputs
-      and scaffold are captured.
-
-      Returns:
-         A list of Tensors used in export_outputs and not marked for
-         outside_compilation.
-      """
-      # We should only call model fn once and it should be inside `computation`
-      # so that building the graph will happen under `rewrite_for_inference`.
-      mode = model_fn_lib.ModeKeys.PREDICT
-      estimator_spec = self._call_model_fn(features, labels, mode, config)
-
-      # We pick the TPU tensors out from `export_output` and later return them
-      # from `computation` for rewriting.
-      export_outputs_dict = collections.OrderedDict(
-          (k, _export_output_to_tensors(v))
-          for k, v in six.iteritems(estimator_spec.export_outputs))
-      export_outputs_list = nest.flatten(export_outputs_dict)
-      export_outputs_tpu_list = [
-          t for t in export_outputs_list if t is not None
-      ]
-
-      if isinstance(estimator_spec.predictions, dict):
-        predictions_dict = collections.OrderedDict(
-            (k, v) for k, v in six.iteritems(estimator_spec.predictions))
-      else:
-        predictions_dict = {
-            _KEY_WHEN_PREDICTIONS_IS_A_TENSOR: estimator_spec.predictions
-        }
-      predictions_list = nest.flatten(predictions_dict)
-
-      # We cannot return everything we want through the return values, so
-      # capture the rest here for later use.
-      capture.capture((estimator_spec, export_outputs_dict, export_outputs_list,
-                       predictions_dict))
-      return predictions_list + export_outputs_tpu_list
-
-    return computation, capture
-
-  def _create_global_step(self, graph):
-    """Creates a global step suitable for TPUs.
-
-    Args:
-      graph: The graph in which to create the global step.
-
-    Returns:
-      A global step `Tensor`.
-
-    Raises:
-      ValueError: if the global step tensor is already defined.
-    """
-    return _create_global_step(graph)
-
-  def _convert_train_steps_to_hooks(self, steps, max_steps):
-    with self._ctx.with_mode(model_fn_lib.ModeKeys.TRAIN) as ctx:
-      if ctx.is_running_on_cpu():
-        return super(TPUEstimator, self)._convert_train_steps_to_hooks(
-            steps, max_steps)
-
-    # On TPU.
-    if steps is None and max_steps is None:
-      raise ValueError(
-          'For TPU training, one of `steps` or `max_steps` must be set. '
-          'Cannot be both `None`.')
-
-    # Estimator.train has explicit positiveness check.
-    if steps is not None:
-      util_lib.check_positive_integer(steps, 'Train steps')
-    if max_steps is not None:
-      util_lib.check_positive_integer(max_steps, 'Train max_steps')
-
-    return [
-        _TPUStopAtStepHook(self._iterations_per_training_loop, steps, max_steps)
-    ]
-
-  def _convert_eval_steps_to_hooks(self, steps):
-    with self._ctx.with_mode(model_fn_lib.ModeKeys.EVAL) as ctx:
-      if ctx.is_running_on_cpu():
-        return super(TPUEstimator, self)._convert_eval_steps_to_hooks(steps)
-
-    if steps is None:
-      raise ValueError('Evaluate `steps` must be set on TPU. Cannot be `None`.')
-
-    util_lib.check_positive_integer(steps, 'Eval steps')
-
-    return [
-        evaluation._StopAfterNEvalsHook(  # pylint: disable=protected-access
-            num_evals=steps),
-        _SetEvalIterationsHook(steps)
-    ]
-
-  def _call_input_fn(self, input_fn, mode):
-    """Calls the input function.
-
-    Args:
-      input_fn: The input function.
-      mode: ModeKeys
-
-    Returns:
-      In TPU mode, returns an input_fn to be called later in model_fn.
-      Otherwise, calls the input_fn and returns either fatures or
-        (features, labels).
-
-    Raises:
-      ValueError: if input_fn takes invalid arguments or does not have `params`.
-    """
-    input_fn_args = function_utils.fn_args(input_fn)
-    config = self.config  # a deep copy.
-    kwargs = {}
-    if 'params' in input_fn_args:
-      kwargs['params'] = self.params  # a deep copy.
-    else:
-      raise ValueError('input_fn ({}) does not include params argument, '
-                       'required by TPUEstimator to pass batch size as '
-                       'params["batch_size"]'.format(input_fn))
-    if 'config' in input_fn_args:
-      kwargs['config'] = config
-
-    if 'mode' in input_fn_args:
-      kwargs['mode'] = mode
-
-    # Records the fact input_fn has been invoked.
-    self._is_input_fn_invoked = True
-
-    with self._ctx.with_mode(mode) as ctx:
-      # Setting the batch size in params first. This helps user to have same
-      # input_fn for use_tpu=True/False.
-      batch_size_for_input_fn = ctx.batch_size_for_input_fn
-      if batch_size_for_input_fn is not None:
-        _add_item_to_params(kwargs['params'], _BATCH_SIZE_KEY,
-                            batch_size_for_input_fn)
-
-      # For export_savedmodel, input_fn is never passed to Estimator. So,
-      # `is_export_mode` must be False.
-      if ctx.is_running_on_cpu(is_export_mode=False):
-        with ops.device('/device:CPU:0'):
-          return input_fn(**kwargs)
-
-      # For TPU computation, input_fn should be invoked in a tf.while_loop for
-      # performance. While constructing the tf.while_loop, the structure of
-      # inputs returned by the `input_fn` needs to be recorded. The structure
-      # includes whether features or labels is dict or single Tensor, dict keys,
-      # tensor shapes, and dtypes. The recorded structure is used to create the
-      # infeed dequeue ops, which must be wrapped and passed as a Fn, called
-      # inside the TPU computation, as the TPU computation is wrapped inside a
-      # tf.while_loop also. So, we either pass input_fn to model_fn or pass
-      # dequeue_fn to model_fn. Here, `input_fn` is passed directly as
-      # `features` in `model_fn` signature.
-      def _input_fn(ctx):
-        _add_item_to_params(kwargs['params'], _CTX_KEY, ctx)
-        return input_fn(**kwargs)
-
-      return _input_fn
-
-  def _validate_features_in_predict_input(self, result):
-    """Skip the validation.
-
-    For TPUEstimator, we do not need to check the result type. `_InputPipeline`
-    has stronger check. Parent class's check generates confusing warning msg.
-
-    Args:
-      result: `features` returned by input_fn.
-    """
-    pass
-
-  def train(self,
-            input_fn,
-            hooks=None,
-            steps=None,
-            max_steps=None,
-            saving_listeners=None):
-    rendezvous = error_handling.ErrorRendezvous(num_sources=3)
-    self._rendezvous[model_fn_lib.ModeKeys.TRAIN] = rendezvous
-    try:
-      return super(TPUEstimator, self).train(
-          input_fn=input_fn,
-          hooks=hooks,
-          steps=steps,
-          max_steps=max_steps,
-          saving_listeners=saving_listeners)
-    except Exception:  # pylint: disable=broad-except
-      rendezvous.record_error('training_loop', sys.exc_info())
-    finally:
-      rendezvous.record_done('training_loop')
-      rendezvous.raise_errors()
-
-  def evaluate(self,
-               input_fn,
-               steps=None,
-               hooks=None,
-               checkpoint_path=None,
-               name=None):
-    rendezvous = error_handling.ErrorRendezvous(num_sources=3)
-    self._rendezvous[model_fn_lib.ModeKeys.EVAL] = rendezvous
-    try:
-      return super(TPUEstimator, self).evaluate(
-          input_fn,
-          steps=steps,
-          hooks=hooks,
-          checkpoint_path=checkpoint_path,
-          name=name)
-    except Exception:  # pylint: disable=broad-except
-      rendezvous.record_error('evaluation_loop', sys.exc_info())
-    finally:
-      rendezvous.record_done('evaluation_loop')
-      rendezvous.raise_errors()
-
-  def predict(self,
-              input_fn,
-              predict_keys=None,
-              hooks=None,
-              checkpoint_path=None,
-              yield_single_examples=True):
-    rendezvous = error_handling.ErrorRendezvous(num_sources=3)
-    self._rendezvous[model_fn_lib.ModeKeys.PREDICT] = rendezvous
-    try:
-      for result in super(TPUEstimator, self).predict(
-          input_fn=input_fn,
-          predict_keys=predict_keys,
-          hooks=hooks,
-          checkpoint_path=checkpoint_path,
-          yield_single_examples=yield_single_examples):
-        yield result
-    except Exception:  # pylint: disable=broad-except
-      rendezvous.record_error('prediction_loop', sys.exc_info())
-    finally:
-      rendezvous.record_done('prediction_loop')
-      rendezvous.raise_errors()
-
-    rendezvous.record_done('prediction_loop')
-    rendezvous.raise_errors()
-
-  def _augment_model_fn(self, model_fn, batch_axis):
-    """Returns a new model_fn, which wraps the TPU support."""
-
-    def _model_fn(features, labels, mode, config, params):
-      """A Estimator `model_fn` for TPUEstimator."""
-      with self._ctx.with_mode(mode) as ctx:
-        model_fn_wrapper = _ModelFnWrapper(model_fn, config, params, ctx)
-
-        # `input_fn` is called in `train()`, `evaluate()`, and `predict()`,
-        # but not in `export_savedmodel()`.
-        if self._is_input_fn_invoked:
-          is_export_mode = False
-        else:
-          is_export_mode = True
-
-        # Clear the bit.
-        self._is_input_fn_invoked = None
-
-        # examples_hook is added to training_hooks for both CPU and TPU
-        # execution.
-        if self._log_every_n_steps is not None:
-          examples_hook = ExamplesPerSecondHook(
-              ctx.global_batch_size,
-              # pylint:disable=g-long-ternary
-              output_dir=(self.model_dir
-                          if not config or config.save_summary_steps
-                          else None),
-              # pylint:enable=g-long-ternary
-              every_n_steps=self._log_every_n_steps)
-
-        if ctx.is_running_on_cpu(is_export_mode=is_export_mode):
-          logging.info('Running %s on CPU', mode)
-          estimator_spec = model_fn_wrapper.call_without_tpu(
-              features, labels, is_export_mode=is_export_mode)
-          if self._log_every_n_steps is not None:
-            estimator_spec = estimator_spec._replace(
-                training_hooks=estimator_spec.training_hooks + (examples_hook,))
-          return estimator_spec
-
-        assert labels is None, '`labels` passed to `model_fn` must be `None`.'
-        # TPUEstimator._call_input_fn passes `input_fn` as features to here.
-        assert callable(features), '`input_fn` is not callable.'
-        input_fn = features
-
-        tpu_init_ops = []
-        if ctx.embedding_config:
-          tpu_init_ops.extend(ctx.embedding_config.tpu_embedding.init_ops)
-          embedding_variables_and_ops = (
-              ctx.embedding_config.tpu_embedding.create_variables_and_ops())
-          tpu_init_ops.extend(embedding_variables_and_ops.load_ops)
-
-        input_holders = _InputPipeline(input_fn, batch_axis, ctx)
-        enqueue_ops, dequeue_fn, input_hooks, run_infeed_loop_on_coordinator = (
-            input_holders.generate_infeed_enqueue_ops_and_dequeue_fn())
-
-        graph = ops.get_default_graph()
-        for enqueue_op in enqueue_ops:
-          if isinstance(enqueue_op, list):
-            graph.get_collection_ref(_TPU_ENQUEUE_OPS).extend(enqueue_op)
-          else:
-            graph.add_to_collection(_TPU_ENQUEUE_OPS, enqueue_op)
-
-        if mode == model_fn_lib.ModeKeys.TRAIN:
-          compile_op, loss, host_call, scaffold, training_hooks = (
-              _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
-          host_ops = host_call.create_tpu_hostcall()
-          if host_ops is None:
-            host_ops = []
-
-          shutdown_hooks = []
-          shutdown_mode = os.environ.get('TF_TPU_GRACEFUL_SHUTDOWN_MODE',
-                                         'shutdown_worker')
-          if shutdown_mode:
-            if shutdown_mode == 'shutdown_worker':
-              finalizer_hooks = [
-                  session_support.ShutdownLameWorkers(timeout_ms=60 * 1000),
-              ]
-            elif shutdown_mode == 'shutdown_computation':
-              finalizer_hooks = [
-                  session_support.RestartComputation(timeout_ms=60 * 1000),
-              ]
-            else:
-              raise ValueError(
-                  'Unknown TF_TPU_GRACEFUL_SHUTDOWN_MODE "%s"' % shutdown_mode)
-
-            shutdown_hooks.append(
-                session_support.GracefulShutdownHook(
-                    checkpoint_prefix=self.model_dir + '/model.ckpt',
-                    on_shutdown_hooks=finalizer_hooks))
-
-          with ops.control_dependencies([loss]):
-            global_step = array_ops.identity(training.get_global_step())
-          hooks = input_hooks + shutdown_hooks
-          hooks.extend([
-              TPUInfeedOutfeedSessionHook(
-                  ctx,
-                  enqueue_ops,
-                  host_ops,
-                  tpu_compile_op=compile_op,
-                  run_infeed_loop_on_coordinator=(
-                      run_infeed_loop_on_coordinator),
-                  rendezvous=self._rendezvous[mode],
-                  master=self._config.master,
-                  session_config=self._session_config,
-                  tpu_init_ops=tpu_init_ops),
-              InstallSignalHandlerHook()
-          ])
-          if self._log_every_n_steps is not None:
-            logging_hook_frequency = (  # Divide and round up
-                (self._log_every_n_steps +
-                 self._config.tpu_config.iterations_per_loop - 1) //
-                self._config.tpu_config.iterations_per_loop)
-            hooks.append(
-                training.LoggingTensorHook({
-                    'loss': array_ops.identity(loss),
-                    'step': global_step,
-                },
-                                           every_n_iter=logging_hook_frequency))
-            examples_hook._set_steps_per_run(  # pylint: disable=protected-access
-                self._config.tpu_config.iterations_per_loop)
-            hooks.append(examples_hook)
-
-          if training_hooks:
-            hooks.extend(training_hooks)
-
-          chief_hooks = []
-          if (self._config.save_checkpoints_secs or
-              self._config.save_checkpoints_steps):
-            checkpoint_hook = training.CheckpointSaverHook(
-                self.model_dir,
-                save_secs=self._config.save_checkpoints_secs,
-                save_steps=self._config.save_checkpoints_steps,
-                scaffold=scaffold)
-            checkpoint_hook._set_steps_per_run(  # pylint: disable=protected-access
-                self._config.tpu_config.iterations_per_loop)
-            chief_hooks.append(checkpoint_hook)
-
-          summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss)
-          with ops.control_dependencies([loss]):
-            update_ops = _sync_variables_ops(ctx)
-
-          if ctx.embedding_config:
-            update_ops.extend(embedding_variables_and_ops.retrieve_ops)
-
-          # Validate the TPU training graph to catch basic errors
-          _validate_tpu_training_graph()
-
-          train_op = control_flow_ops.group(*update_ops)
-          graph.add_to_collection(_TPU_TRAIN_OP, train_op)
-
-          return model_fn_lib.EstimatorSpec(
-              mode,
-              loss=loss,
-              training_chief_hooks=chief_hooks,
-              training_hooks=hooks,
-              train_op=train_op,
-              scaffold=scaffold)
-
-        if mode == model_fn_lib.ModeKeys.EVAL:
-          compile_op, total_loss, host_calls, scaffold, eval_hooks = (
-              _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
-          iterations_per_loop_var = _create_or_get_iterations_per_loop()
-          mean_loss = math_ops.div(
-              total_loss,
-              math_ops.cast(iterations_per_loop_var, dtype=total_loss.dtype))
-
-          with ops.control_dependencies([mean_loss]):
-            # After TPU evaluation computation is done (the mean_loss tensor),
-            # reads all variables back from TPU and updates the eval step
-            # counter properly
-            internal_ops_to_run = _sync_variables_ops(ctx)
-            internal_ops_to_run.append(
-                _increase_eval_step_op(iterations_per_loop_var))
-
-          host_call_ret = host_calls.create_tpu_hostcall()
-          eval_metric_ops = {}
-          eval_update_ops = []
-
-          eval_metrics = host_call_ret.get('eval_metrics', {})
-          if eval_metrics:
-            # Creates a dummy metric update_op for all metrics. Estimator
-            # expects all metrics in `eval_metric_ops` have update_op and calls
-            # them one by one. The real metric update_ops are invoked in a
-            # separated thread. So, here give Estimator the dummy op for all
-            # metrics.
-            with ops.control_dependencies(internal_ops_to_run):
-              dummy_update_op = control_flow_ops.no_op()
-
-            for k, v in eval_metrics.items():
-              eval_metric_ops[k] = (v[0], dummy_update_op)
-              eval_update_ops.append(v[1])
-          else:
-            # If no eval metrics are passed, create an identity node for the
-            # loss and add `internal_ops_to_run` to its dependencies. So
-            # `internal_ops_to_run` can be executed.
-            with ops.control_dependencies(internal_ops_to_run):
-              mean_loss = array_ops.identity(mean_loss)
-
-          if 'host_call' not in host_call_ret:
-            host_ops = []
-          else:
-            host_ops = host_call_ret['host_call']
-          hooks = [
-              TPUInfeedOutfeedSessionHook(
-                  ctx,
-                  enqueue_ops,
-                  eval_update_ops + host_ops,
-                  tpu_compile_op=compile_op,
-                  run_infeed_loop_on_coordinator=(
-                      run_infeed_loop_on_coordinator),
-                  rendezvous=self._rendezvous[mode],
-                  master=self._config.evaluation_master,
-                  session_config=self._session_config,
-                  tpu_init_ops=tpu_init_ops)
-          ] + input_hooks
-
-          if eval_hooks:
-            hooks.extend(eval_hooks)
-
-          return model_fn_lib.EstimatorSpec(
-              mode,
-              loss=mean_loss,
-              evaluation_hooks=hooks,
-              eval_metric_ops=eval_metric_ops,
-              scaffold=scaffold)
-
-        # Predict
-        assert mode == model_fn_lib.ModeKeys.PREDICT
-
-        (compile_op, dummy_predict_op, host_calls,
-         scaffold, prediction_hooks) = _predict_on_tpu_system(
-             ctx, model_fn_wrapper, dequeue_fn)
-        with ops.control_dependencies([dummy_predict_op]):
-          internal_ops_to_run = _sync_variables_ops(ctx)
-          with ops.control_dependencies(internal_ops_to_run):
-            dummy_predict_op = control_flow_ops.no_op()
-
-        # In train and evaluation, the main TPU program is passed to monitored
-        # training session to run. Infeed enqueue and outfeed dequeue are
-        # executed in side threads. This is not the configuration for
-        # prediction mode.
-        #
-        # For prediction, the Estimator executes the EstimatorSpec.predictions
-        # directly and yield the element (via generator) to call site. So, the
-        # outfeed based prediction must be passed to MonitoredSession directly.
-        # Other parts of the TPU execution are organized as follows.
-        #
-        # 1. All outfeed based Tensors must be grouped with predictions Tensors
-        #    to form a single invocation. This avoid the issue we might trigger
-        #    multiple outfeeds incorrectly. To achieve this, `host_call` is
-        #    placed in control_dependencies of `stopping_signals`, and
-        #    `stopping_signals` is passed into _StoppingPredictHook, which sets
-        #    the `stopping_signals` as SessionRunArgs. MonitoredSession merges
-        #    all SessionRunArgs with the fetch in session.run together.
-        #
-        # 2. The TPU program (dummy_predict_op) and enqueue_ops (infeed Enqueue)
-        #    are grouped together. They will be launched once and only once in
-        #    side threads and they quit naturally according to the SAME stopping
-        #    condition.
-        enqueue_ops.append(dummy_predict_op)
-
-        host_call_ret = host_calls.create_tpu_hostcall()
-        if 'host_call' not in host_call_ret:
-          host_ops = []
-        else:
-          host_ops = host_call_ret['host_call']
-
-        predictions = host_call_ret['predictions']
-        _verify_cross_hosts_transfer_size(
-            predictions,
-            message=(
-                'The estimated size for TPUEstimatorSpec.predictions is too '
-                'large.'))
-        signals = host_call_ret['signals']
-
-        with ops.control_dependencies(host_ops):
-          host_ops = []  # Empty, we do do not need it anymore.
-          scalar_stopping_signal = _StopSignals.as_scalar_stopping_signal(
-              signals)
-          predictions = _PaddingSignals.slice_tensor_or_dict(
-              predictions, signals)
-
-        hooks = [
-            _StoppingPredictHook(scalar_stopping_signal),
-            TPUInfeedOutfeedSessionHookForPrediction(
-                ctx, enqueue_ops, host_ops, rendezvous=self._rendezvous[mode],
-                tpu_compile_op=compile_op,
-                master=self._config.master,
-                session_config=self._session_config),
-        ] + input_hooks
-
-        if prediction_hooks:
-          hooks.extend(prediction_hooks)
-
-        return model_fn_lib.EstimatorSpec(
-            mode,
-            prediction_hooks=hooks,
-            predictions=predictions,
-            scaffold=scaffold)
-
-    return _model_fn
-
-
-def _export_output_to_tensors(export_output):
-  """Get a list of `Tensors` used in `export_output`.
-
-  Args:
-    export_output: an `ExportOutput` object such as `ClassificationOutput`,
-      `RegressionOutput`, or `PredictOutput`.
-
-  Returns:
-    a list of tensors used in export_output.
-
-  Raises:
-    ValueError: if `export_output` is not one of `ClassificationOutput`,
-        `RegressionOutput`, or `PredictOutput`.
-  """
-  if isinstance(export_output, export_output_lib.ClassificationOutput):
-    return [export_output.scores, export_output.classes]
-  elif isinstance(export_output, export_output_lib.RegressionOutput):
-    return [export_output.value]
-  elif isinstance(export_output, export_output_lib.PredictOutput):
-    return list(export_output.outputs.values())
-  else:
-    raise ValueError(
-        '`export_output` must be have type `ClassificationOutput`, '
-        '`RegressionOutput`, or `PredictOutput`; got {}.'.format(export_output))
-
-
-def _clone_export_output_with_tensors(export_output, tensors):
-  """Clones `export_output` but with new `tensors`.
-
-  Args:
-    export_output: an `ExportOutput` object such as `ClassificationOutput`,
-      `RegressionOutput`, or `PredictOutput`.
-    tensors: a list of `Tensors` used to construct a new `export_output`.
-
-  Returns:
-    A dict similar to `export_output` but with `tensors`.
-
-  Raises:
-    ValueError: if `export_output` is not one of `ClassificationOutput`,
-        `RegressionOutput`, or `PredictOutput`.
-  """
-  if isinstance(export_output, export_output_lib.ClassificationOutput):
-    if len(tensors) != 2:
-      raise ValueError('tensors must be of length 2; '
-                       'got {}.'.format(len(tensors)))
-    return export_output_lib.ClassificationOutput(*tensors)
-  elif isinstance(export_output, export_output_lib.RegressionOutput):
-    if len(tensors) != 1:
-      raise ValueError('tensors must be of length 1; '
-                       'got {}'.format(len(tensors)))
-    return export_output_lib.RegressionOutput(*tensors)
-  elif isinstance(export_output, export_output_lib.PredictOutput):
-    return export_output_lib.PredictOutput(
-        dict(zip(export_output.outputs.keys(), tensors)))
-  else:
-    raise ValueError(
-        '`export_output` must be have type `ClassificationOutput`, '
-        '`RegressionOutput`, or `PredictOutput`; got {}.'.format(export_output))
-
-
-def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
-  """Executes `model_fn_wrapper` multiple times on all TPU shards."""
-  iterations_per_loop_var = _create_or_get_iterations_per_loop()
-
-  (single_tpu_eval_step, host_calls, captured_scaffold_fn, captured_eval_hooks
-  ) = model_fn_wrapper.convert_to_single_tpu_eval_step(dequeue_fn)
-
-  def multi_tpu_eval_steps_on_single_shard():
-    return training_loop.repeat(iterations_per_loop_var, single_tpu_eval_step,
-                                [_ZERO_LOSS])
-
-  (compile_op, loss,) = tpu.split_compile_and_shard(
-      multi_tpu_eval_steps_on_single_shard,
-      inputs=[],
-      num_shards=ctx.num_replicas,
-      outputs_from_all_shards=False,
-      device_assignment=ctx.device_assignment)
-
-  loss = loss[0]
-  scaffold = _get_scaffold(captured_scaffold_fn)
-  return compile_op, loss, host_calls, scaffold, captured_eval_hooks.get()
-
-
-def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
-  """Executes `model_fn_wrapper` multiple times on all TPU shards."""
-  iterations_per_loop_var = _create_or_get_iterations_per_loop()
-
-  (single_tpu_train_step, host_call, captured_scaffold_fn,
-   captured_training_hooks) = (
-       model_fn_wrapper.convert_to_single_tpu_train_step(dequeue_fn))
-
-  def multi_tpu_train_steps_on_single_shard():
-    return training_loop.repeat(iterations_per_loop_var, single_tpu_train_step,
-                                [_INITIAL_LOSS])
-
-  (compile_op, loss,) = tpu.split_compile_and_shard(
-      multi_tpu_train_steps_on_single_shard,
-      inputs=[],
-      num_shards=ctx.num_replicas,
-      outputs_from_all_shards=False,
-      device_assignment=ctx.device_assignment)
-
-  loss = loss[0]
-  scaffold = _get_scaffold(captured_scaffold_fn)
-  return compile_op, loss, host_call, scaffold, captured_training_hooks.get()
-
-
-def _predict_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
-  """Executes `model_fn_wrapper` multiple times on all TPU shards."""
-  (single_tpu_predict_step, host_calls, captured_scaffold_fn,
-   captured_predict_hooks
-  ) = model_fn_wrapper.convert_to_single_tpu_predict_step(dequeue_fn)
-
-  def multi_tpu_predict_steps_on_single_shard():
-
-    def cond(scalar_stopping_signal):
-      return math_ops.logical_not(
-          _StopSignals.should_stop(scalar_stopping_signal))
-
-    inputs = [_StopSignals.NON_STOPPING_SIGNAL]
-    outputs = training_loop.while_loop(
-        cond, single_tpu_predict_step, inputs=inputs, name=b'loop')
-    return outputs
-
-  (compile_op, dummy_predict_op,) = tpu.split_compile_and_shard(
-      multi_tpu_predict_steps_on_single_shard,
-      inputs=[],
-      num_shards=ctx.num_replicas,
-      outputs_from_all_shards=False,
-      device_assignment=ctx.device_assignment)
-
-  dummy_predict_op = dummy_predict_op[0]
-  scaffold = _get_scaffold(captured_scaffold_fn)
-  return (compile_op, dummy_predict_op, host_calls, scaffold,
-          captured_predict_hooks.get())
-
-
-def _wrap_computation_in_while_loop(device, op_fn):
-  """Wraps the ops generated by `op_fn` in tf.while_loop."""
-
-  def computation(i):
-    with ops.control_dependencies(op_fn()):
-      return i + 1
-
-  iterations_per_loop_var = _create_or_get_iterations_per_loop()
-  # By setting parallel_iterations=1, the parallel execution in while_loop is
-  # basically turned off.
-  with ops.device(device):
-    iterations = array_ops.identity(iterations_per_loop_var)
-    return control_flow_ops.while_loop(
-        lambda i: i < iterations,
-        computation, [constant_op.constant(0)],
-        parallel_iterations=1)
-
-
-def _wrap_computation_in_while_loop_with_stopping_signals(device, op_fn):
-  """Wraps the ops generated by `op_fn` in tf.while_loop."""
-
-  def cond(scalar_stopping_signal):
-    return math_ops.logical_not(
-        _StopSignals.should_stop(scalar_stopping_signal))
-
-  def computation(unused_scalar_stopping_signal):
-    return_value = op_fn()
-    execute_ops = return_value['ops']
-    signals = return_value['signals']
-    with ops.control_dependencies(execute_ops):
-      return _StopSignals.as_scalar_stopping_signal(signals)
-
-  # By setting parallel_iterations=1, the parallel execution in while_loop is
-  # basically turned off.
-  with ops.device(device):
-    return control_flow_ops.while_loop(
-        cond,
-        computation, [_StopSignals.NON_STOPPING_SIGNAL],
-        parallel_iterations=1)
-
-
-def _validate_tpu_training_graph():
-  """Validate graph before running distributed training.
-
-  Raises:
-    ValueError: If the graph seems invalid for running on device
-  """
-  operations = ops.get_default_graph().get_operations()
-
-  # Check if there is atleast one CrossReplicaSum operation in the graph
-  # This should be introduced by using the CrossShardOptimizer wrapper
-  cross_replica_sum_ops = [
-      o for o in operations if o.type == _CROSS_REPLICA_SUM_OP
-  ]
-  if not cross_replica_sum_ops:
-    raise ValueError(
-        'CrossShardOptimizer must be used for model training on TPUs.')
-
-
-class _CapturedObject(object):
-  """A placeholder to capture an object.
-
-  This is useful when we need to capture a Python object in the Tensorflow
-  control flow body function and use it outside the control flow.
-  """
-
-  def __init__(self):
-    self._object = None
-    self._captured = False
-
-  def capture(self, o):
-    if self._captured:
-      raise RuntimeError(
-          'InternalError: Object can capture only once. Please file bug.')
-
-    self._captured = True
-    self._object = o
-
-  def get(self):
-    if not self._captured:
-      raise RuntimeError(
-          'InternalError: Object is not captured properly before `get`. '
-          'Please file bug.')
-    return self._object
-
-
-def _get_scaffold(captured_scaffold_fn):
-  """Retrieves the Scaffold from `captured_scaffold_fn`."""
-  with _CapturingContext(message='Inside scaffold_fn'):
-    scaffold_fn = captured_scaffold_fn.get()
-    if scaffold_fn:
-      scaffold = scaffold_fn()
-      if scaffold is None:
-        raise ValueError(
-            'TPUEstimatorSpec.scaffold_fn returns None, which is not allowed')
-    else:
-      scaffold = None
-
-  if scaffold:
-    wrapped_finalize = scaffold.finalize
-
-    def _finalize():
-      with _CapturingContext('Inside Scaffold.finalize'):
-        wrapped_finalize()
-
-    scaffold.finalize = _finalize
-  return scaffold
-
-
-class _CapturingContext(control_flow_ops.ControlFlowContext):
-  """Tracks references to Tensors defined in TPU replication."""
-
-  def __init__(self, message):
-    control_flow_ops.ControlFlowContext.__init__(self)
-    self._message = message
-
-  def to_control_flow_context_def(self, context_def, export_scope=None):
-    # pylint: disable=useless-super-delegation
-    # NOTE(slebedev): the method is required by `ControlFlowContext`.
-    super(_CapturingContext, self).to_control_flow_context_def(
-        context_def, export_scope)
-
-  def AddOp(self, op):  # pylint: disable=invalid-name
-    for c in op.inputs:
-      if tpu._TPU_REPLICATE_ATTR in c.op.node_def.attr:  # pylint: disable=protected-access
-        raise ValueError('{}: Op {} depends on TPU computation {}, '
-                         'which is not allowed.'.format(self._message, op, c))
-
-  def __enter__(self):
-    # pylint: disable=protected-access
-    self._g = ops.get_default_graph()
-    self._old = self._g._get_control_flow_context()
-    self._g._set_control_flow_context(self)
-    # pylint: enable=protected-access
-
-  def __exit__(self, _, __, ___):  # pylint: disable=invalid-name
-    self._g._set_control_flow_context(self._old)  # pylint: disable=protected-access
-
-
-class _Inputs(object):
-  """A data structure representing the input_fn returned values.
-
-  This also supports the returned value from input_fn as `Dataset`.
-  """
-
-  def __init__(self, features=None, labels=None, dataset=None, signals=None):
-    if dataset is not None and (features is not None or labels is not None or
-                                signals is not None):
-      raise RuntimeError('Internal Error: Either (features and labels) or '
-                         'dataset should be provided, not both. Please file '
-                         'bug')
-
-    self._features = features
-    self._labels = labels
-    self._signals = signals
-
-    self._dataset = dataset
-    self._iterator = None
-
-  @staticmethod
-  def from_input_fn(return_values):
-    """Returns an `_Inputs` instance according to `input_fn` return value."""
-    if isinstance(return_values, dataset_ops.DatasetV2):
-      dataset = return_values
-      return _Inputs(dataset=dataset)
-
-    features, labels = _Inputs._parse_inputs(return_values)
-    return _Inputs(features, labels)
-
-  @staticmethod
-  def _parse_inputs(return_values):
-    if isinstance(return_values, tuple):
-      features, labels = return_values
-    else:
-      features, labels = return_values, None
-    return features, labels
-
-  @property
-  def is_dataset(self):
-    """Returns True if the return value from input_fn is Dataset."""
-    return self._dataset is not None
-
-  def dataset_initializer(self):
-    """Returns the dataset's initializer.
-
-    The initializer must be run before calling `features_and_labels`.
-    """
-    self._iterator = dataset_ops.make_initializable_iterator(self._dataset)
-    return self._iterator.initializer
-
-  def features_and_labels(self):
-    """Gets `features` and `labels`."""
-    if self.is_dataset:
-      if self._iterator is None:
-        raise RuntimeError('Internal error: Must run dataset_initializer '
-                           'before calling features_and_labels(). Please file '
-                           'a bug!')
-      return _Inputs._parse_inputs(self._iterator.get_next())
-
-    return (self._features, self._labels)
-
-  def signals(self):
-    return self._signals
-
-  @property
-  def dataset(self):
-    return self._dataset
-
-
-class _InputsWithStoppingSignals(_Inputs):
-  """Inputs with `_StopSignals` inserted into the dataset."""
-
-  def __init__(self,
-               dataset,
-               batch_size,
-               add_padding=False,
-               num_invocations_per_step=1):
-
-    assert dataset is not None
-    user_provided_dataset = dataset.map(
-        _InputsWithStoppingSignals.insert_stopping_signal(
-            stop=False, batch_size=batch_size, add_padding=add_padding))
-    if num_invocations_per_step == 1:
-      final_batch_dataset = dataset.take(1).map(
-          _InputsWithStoppingSignals.insert_stopping_signal(
-              stop=True, batch_size=batch_size, add_padding=add_padding))
-    else:
-      # We append (2 * num_invocations_per_step - 1) batches for exhausting the
-      # user_provided_dataset and stop properly.
-      # For example, if num_invocations_per_step is 2, we append 3 additional
-      # padding batches: b1, b2, b3.
-      # If user_provided_dataset contains two batches: a1, a2
-      # Step 1: [a1, a2]
-      # Step 2: [b1, b2] -> STOP
-      # If user_provided_dataset contains three batches: a1, a2, a3.
-      # The training loops:
-      # Step 1: [a1, a2]
-      # Step 2: [a3, b1]
-      # Step 3: [b2, b3] -> STOP.
-      final_batch_dataset = dataset.take(1).map(
-          _InputsWithStoppingSignals.insert_stopping_signal(
-              stop=True, batch_size=batch_size, add_padding=add_padding))
-      final_batch_dataset = final_batch_dataset.repeat(
-          2 * num_invocations_per_step - 1)
-
-      def _set_mask(data_dict):
-        signals = data_dict['signals']
-        signals['padding_mask'] = array_ops.ones_like(signals['padding_mask'])
-        data_dict['signals'] = signals
-        return data_dict
-
-      # Mask out the extra batch.
-      final_batch_dataset = final_batch_dataset.map(_set_mask)
-
-    dataset = user_provided_dataset.concatenate(final_batch_dataset).prefetch(2)
-
-    super(_InputsWithStoppingSignals, self).__init__(dataset=dataset)
-    self._current_inputs = None
-
-  def features_and_labels(self):
-    if self._current_inputs is not None:
-      raise RuntimeError(
-          'Internal Error: The previous inputs have not been properly '
-          'consumed. First call features_and_labels, then call signals.')
-
-    inputs_with_signals = self._iterator.get_next()
-    features = inputs_with_signals['features']
-    labels = inputs_with_signals.get('labels')
-
-    self._current_inputs = inputs_with_signals
-    return features, labels
-
-  def signals(self):
-    """Returns the `Signals` from `_Inputs`."""
-    if self._current_inputs is None:
-      raise RuntimeError(
-          'Internal Error: The current inputs have not been properly '
-          'generated. First call features_and_labels, then call signals.')
-    signals = self._current_inputs['signals']
-    self._current_inputs = None
-    return signals
-
-  @staticmethod
-  def insert_stopping_signal(stop, batch_size, add_padding=False):
-    """Inserts stopping_signal into dataset via _map_fn.
-
-    Here we change the data structure in the dataset, such that the return value
-    is a dictionary now and `features`, `labels`, and `signals` are three
-    distinguished keys in that dict. This provides a better structure, which
-    eases the process to decompose the inputs (see `features_and_labels`).
-
-    Args:
-      stop: bool, state of current stopping signals.
-      batch_size: int, batch size.
-      add_padding: bool, whether to pad the tensor to full batch size.
-
-    Returns:
-      A map_fn passed to dataset.map API.
-    """
-
-    def _map_fn(*args):
-      """The map fn to insert signals."""
-      if len(args) == 1:
-        # Unpack the single Tensor/dict argument as features. This is required
-        # for the input_fn returns no labels.
-        args = args[0]
-      features, labels = _Inputs._parse_inputs(args)
-      new_input_dict = {}
-
-      if add_padding:
-        padding_mask, features, labels = (
-            _PaddingSignals.pad_features_and_labels(features, labels,
-                                                    batch_size))
-
-        new_input_dict['features'] = features
-        if labels is not None:
-          new_input_dict['labels'] = labels
-
-      else:
-        new_input_dict['features'] = features
-        if labels is not None:
-          new_input_dict['labels'] = labels
-        padding_mask = None
-
-      new_input_dict['signals'] = _StopSignals(
-          stop=stop, batch_size=batch_size,
-          padding_mask=padding_mask).as_dict()
-
-      return new_input_dict
-
-    return _map_fn
-
-
-class _StopSignals(object):
-  """Signals class holding all logic to handle TPU stopping condition."""
-
-  NON_STOPPING_SIGNAL = False
-  STOPPING_SIGNAL = True
-
-  def __init__(self, stop, batch_size, padding_mask=None):
-    self._stop = stop
-    self._batch_size = batch_size
-    self._padding_mask = padding_mask
-
-  def as_dict(self):
-    """Returns the signals as Python dict."""
-    shape = [self._batch_size, 1]
-    dtype = dtypes.bool
-
-    if self._stop:
-      stopping = array_ops.ones(shape=shape, dtype=dtype)
-    else:
-      stopping = array_ops.zeros(shape=shape, dtype=dtype)
-
-    signals = {'stopping': stopping}
-    if self._padding_mask is not None:
-      signals['padding_mask'] = self._padding_mask
-    return signals
-
-  @staticmethod
-  def as_scalar_stopping_signal(signals):
-    return array_ops.identity(signals['stopping'][0][0])
-
-  @staticmethod
-  def should_stop(scalar_stopping_signal):
-    """Detects whether scalar_stopping_signal indicates stopping."""
-    if isinstance(scalar_stopping_signal, ops.Tensor):
-      # STOPPING_SIGNAL is a constant True. Here, the logical_and is just the TF
-      # way to express the bool check whether scalar_stopping_signal is True.
-      return math_ops.logical_and(scalar_stopping_signal,
-                                  _StopSignals.STOPPING_SIGNAL)
-    else:
-      # For non Tensor case, it is used in SessionRunHook. So, we cannot modify
-      # the graph anymore. Here, we use pure Python.
-      return bool(scalar_stopping_signal)
-
-
-class _PaddingSignals(object):
-  """Signals class holding all logic to handle padding."""
-
-  @staticmethod
-  def pad_features_and_labels(features, labels, batch_size):
-    """Pads out the batch dimension of features and labels."""
-    real_batch_size = array_ops.shape(
-        _PaddingSignals._find_any_tensor(features))[0]
-
-    batch_size_tensor = constant_op.constant(batch_size, dtypes.int32)
-
-    check_greater = check_ops.assert_greater_equal(
-        batch_size_tensor,
-        real_batch_size,
-        data=(batch_size_tensor, real_batch_size),
-        message='The real batch size should not be greater than batch_size.')
-
-    with ops.control_dependencies([check_greater]):
-      missing_count = batch_size_tensor - real_batch_size
-
-    def pad_single_tensor(tensor):
-      """Pads out the batch dimension of a tensor to the complete batch_size."""
-      rank = len(tensor.shape)
-      assert rank > 0
-      padding = array_ops.stack([[0, missing_count]] + [[0, 0]] * (rank - 1))
-      padded_shape = (batch_size,) + tuple(tensor.shape[1:])
-      padded_tensor = array_ops.pad(tensor, padding)
-      padded_tensor.set_shape(padded_shape)
-      return padded_tensor
-
-    def nest_pad(tensor_or_dict):
-      return nest.map_structure(pad_single_tensor, tensor_or_dict)
-
-    features = nest_pad(features)
-    if labels is not None:
-      labels = nest_pad(labels)
-
-    padding_mask = _PaddingSignals._padding_mask(real_batch_size, missing_count,
-                                                 batch_size)
-
-    return padding_mask, features, labels
-
-  @staticmethod
-  def slice_tensor_or_dict(tensor_or_dict, signals):
-    """Slice the real Tensors according to padding mask in signals."""
-
-    padding_mask = signals['padding_mask']
-    batch_size = array_ops.shape(padding_mask)[0]
-
-    def verify_batch_size(tensor):
-      check_batch_size = math_ops.equal(batch_size, tensor.shape[0])
-      with ops.control_dependencies([check_batch_size]):
-        return array_ops.identity(tensor)
-
-    def slice_single_tensor(tensor):
-      rank = len(tensor.shape)
-      assert rank > 0
-      real_batch_size = batch_size - math_ops.reduce_sum(padding_mask)
-      return verify_batch_size(tensor)[0:real_batch_size]
-
-    # As we split the Tensors to all TPU cores and concat them back, it is
-    # important to ensure the real data is placed before padded ones, i.e.,
-    # order is preserved. By that, the sliced padding mask should have all 0's.
-    # If this assertion failed, # the slice logic here would not hold.
-    sliced_padding_mask = slice_single_tensor(padding_mask)
-    assert_padding_mask = math_ops.equal(
-        math_ops.reduce_sum(sliced_padding_mask), 0)
-
-    with ops.control_dependencies([assert_padding_mask]):
-      should_stop = _StopSignals.should_stop(
-          _StopSignals.as_scalar_stopping_signal(signals))
-
-    is_full_batch = math_ops.equal(math_ops.reduce_sum(padding_mask), 0)
-
-    def slice_fn(tensor):
-      # If the current batch is full batch or part of stopping signals, we do
-      # not need to slice to save performance.
-      return control_flow_ops.cond(
-          math_ops.logical_or(should_stop, is_full_batch),
-          (lambda: verify_batch_size(tensor)),
-          (lambda: slice_single_tensor(tensor)))
-
-    return nest.map_structure(slice_fn, tensor_or_dict)
-
-  @staticmethod
-  def _find_any_tensor(batch_features):
-    tensors = [
-        x for x in nest.flatten(batch_features) if isinstance(x, ops.Tensor)
-    ]
-    if not tensors:
-      raise ValueError('Cannot find any Tensor in features dict.')
-    return tensors[0]
-
-  @staticmethod
-  def _padding_mask(real_batch_size, missing_count, batch_size):
-    padding_mask = array_ops.concat([
-        array_ops.zeros((real_batch_size,), dtype=dtypes.int32),
-        array_ops.ones((missing_count,), dtype=dtypes.int32)
-    ],
-                                    axis=0)
-    padding_mask.set_shape((batch_size,))
-    return padding_mask
-
-
-def _verify_cross_hosts_transfer_size(tensor_dict, message):
-  total_size = 0
-  tensor_structure = {}
-  for key, tensor in tensor_dict.items():
-    shape = tensor.shape
-    size = np.product(shape) * tensor.dtype.size
-    tensor_structure[key] = shape
-    total_size += size
-  if total_size >= _ONE_GIGABYTE:
-    raise ValueError(
-        '{} The transfer size is larger than the protobuf limit. Please '
-        'consider to use Tensors with smaller shapes or reduce batch '
-        'size. Given:\n'
-        '{}'.format(
-            message, '\n'.join([
-                ' -- Key: {}, Shape: {}'.format(k, v)
-                for k, v in tensor_structure.items()
-            ])))
-
-
-def _add_item_to_params(params, key, value):
-  """Adds a new item into `params`."""
-  if isinstance(params, hparam.HParams):
-    # For HParams, we need to use special API.
-    if key in params:
-      params.set_hparam(key, value)
-    else:
-      params.add_hparam(key, value)
-  else:
-    # Now params is Python dict.
-    params[key] = value
-
-
-def export_estimator_savedmodel(estimator,
-                                export_dir_base,
-                                serving_input_receiver_fn,
-                                assets_extra=None,
-                                as_text=False,
-                                checkpoint_path=None,
-                                strip_default_attrs=False):
-  """Export `Estimator` trained model for TPU inference.
-
-  Args:
-    estimator: `Estimator` with which model has been trained.
-    export_dir_base: A string containing a directory in which to create
-      timestamped subdirectories containing exported SavedModels.
-    serving_input_receiver_fn: A function that takes no argument and returns a
-      `ServingInputReceiver` or `TensorServingInputReceiver`.
-    assets_extra: A dict specifying how to populate the assets.extra directory
-      within the exported SavedModel, or `None` if no extra assets are needed.
-    as_text: whether to write the SavedModel proto in text format.
-    checkpoint_path: The checkpoint path to export.  If `None` (the default),
-      the most recent checkpoint found within the model directory is chosen.
-    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
-      removed from the NodeDefs.
-
-  Returns:
-    The string path to the exported directory.
-  """
-  # `TPUEstimator` requires `tpu_config.RunConfig`, so we cannot use
-  # `estimator.config`.
-  config = tpu_config.RunConfig(model_dir=estimator.model_dir)
-  est = TPUEstimator(
-      estimator._model_fn,  # pylint: disable=protected-access
-      config=config,
-      params=estimator.params,
-      use_tpu=True,
-      train_batch_size=2048,  # Does not matter.
-      eval_batch_size=2048,  # Does not matter.
-  )
-  return est.export_savedmodel(export_dir_base, serving_input_receiver_fn,
-                               assets_extra, as_text, checkpoint_path,
-                               strip_default_attrs)
+# pylint: disable=wildcard-import,unused-import,redefined-builtin
+from tensorflow.python.tpu.tpu_estimator import *
+# used by tests
+from tensorflow.python.tpu.tpu_estimator import _clone_export_output_with_tensors
+from tensorflow.python.tpu.tpu_estimator import _create_global_step
+from tensorflow.python.tpu.tpu_estimator import _export_output_to_tensors
+from tensorflow.python.tpu.tpu_estimator import _get_scaffold
+from tensorflow.python.tpu.tpu_estimator import _Inputs
+from tensorflow.python.tpu.tpu_estimator import _TPU_ESTIMATOR
+# pylint: enable=wildcard-import,unused-import,redefined-builtin

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
deleted file mode 100644
index e3ea983..0000000
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
+++ /dev/null

@@ -1,339 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""TPU Estimator Signalling Tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.tpu.python.tpu import tpu_estimator
-from tensorflow.python.client import session
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import test
-
-
-def make_input_fn(num_samples):
-  a = np.linspace(0, 100.0, num=num_samples)
-  b = np.reshape(np.array(a, dtype=np.float32), (len(a), 1))
-
-  def input_fn(params):
-    batch_size = params['batch_size']
-    da1 = dataset_ops.Dataset.from_tensor_slices(a)
-    da2 = dataset_ops.Dataset.from_tensor_slices(b)
-
-    dataset = dataset_ops.Dataset.zip((da1, da2))
-    dataset = dataset.map(lambda fa, fb: {'a': fa, 'b': fb})
-    dataset = dataset.batch(batch_size)
-    return dataset
-  return input_fn, (a, b)
-
-
-def make_input_fn_with_labels(num_samples):
-  a = np.linspace(0, 100.0, num=num_samples)
-  b = np.reshape(np.array(a, dtype=np.float32), (len(a), 1))
-
-  def input_fn(params):
-    batch_size = params['batch_size']
-    da1 = dataset_ops.Dataset.from_tensor_slices(a)
-    da2 = dataset_ops.Dataset.from_tensor_slices(b)
-
-    dataset = dataset_ops.Dataset.zip((da1, da2))
-    dataset = dataset.map(lambda fa, fb: ({'a': fa}, fb))
-    dataset = dataset.batch(batch_size)
-    return dataset
-  return input_fn, (a, b)
-
-
-class TPUEstimatorStoppingSignalsTest(test.TestCase):
-
-  def test_normal_output_without_signals(self):
-    num_samples = 4
-    batch_size = 2
-
-    params = {'batch_size': batch_size}
-    input_fn, (a, b) = make_input_fn(num_samples=num_samples)
-
-    with ops.Graph().as_default():
-      dataset = input_fn(params)
-      features = dataset_ops.make_one_shot_iterator(dataset).get_next()
-
-      # With tf.data.Dataset.batch, the batch is None, i.e., dynamic shape.
-      self.assertIsNone(features['a'].shape.as_list()[0])
-
-      with session.Session() as sess:
-        result = sess.run(features)
-        self.assertAllEqual(a[:batch_size], result['a'])
-        self.assertAllEqual(b[:batch_size], result['b'])
-
-        # This run should work as num_samples / batch_size = 2.
-        result = sess.run(features)
-        self.assertAllEqual(a[batch_size:num_samples], result['a'])
-        self.assertAllEqual(b[batch_size:num_samples], result['b'])
-
-        with self.assertRaises(errors.OutOfRangeError):
-          # Given num_samples and batch_size, this run should fail.
-          sess.run(features)
-
-  def test_output_with_stopping_signals(self):
-    num_samples = 4
-    batch_size = 2
-
-    params = {'batch_size': batch_size}
-    input_fn, (a, b) = make_input_fn(num_samples=num_samples)
-
-    with ops.Graph().as_default():
-      dataset = input_fn(params)
-      inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size)
-      dataset_initializer = inputs.dataset_initializer()
-      features, _ = inputs.features_and_labels()
-      signals = inputs.signals()
-
-      # With tf.data.Dataset.batch, the batch is None, i.e., dynamic shape.
-      self.assertIsNone(features['a'].shape.as_list()[0])
-
-      with session.Session() as sess:
-        sess.run(dataset_initializer)
-
-        result, evaluated_signals = sess.run([features, signals])
-        self.assertAllEqual(a[:batch_size], result['a'])
-        self.assertAllEqual(b[:batch_size], result['b'])
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-
-        # This run should work as num_samples / batch_size = 2.
-        result, evaluated_signals = sess.run([features, signals])
-        self.assertAllEqual(a[batch_size:num_samples], result['a'])
-        self.assertAllEqual(b[batch_size:num_samples], result['b'])
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-
-        # This run should work, *but* see STOP ('1') as signals
-        _, evaluated_signals = sess.run([features, signals])
-        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
-
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(features)
-
-
-class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
-
-  def test_num_samples_divisible_by_batch_size(self):
-    num_samples = 4
-    batch_size = 2
-
-    params = {'batch_size': batch_size}
-    input_fn, (a, b) = make_input_fn(num_samples=num_samples)
-
-    with ops.Graph().as_default():
-      dataset = input_fn(params)
-      inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
-                                                        add_padding=True)
-      dataset_initializer = inputs.dataset_initializer()
-      features, _ = inputs.features_and_labels()
-      signals = inputs.signals()
-
-      # With padding, all shapes are static now.
-      self.assertEqual(batch_size, features['a'].shape.as_list()[0])
-
-      with session.Session() as sess:
-        sess.run(dataset_initializer)
-
-        result, evaluated_signals = sess.run([features, signals])
-        self.assertAllEqual(a[:batch_size], result['a'])
-        self.assertAllEqual(b[:batch_size], result['b'])
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-        self.assertAllEqual([0.] * batch_size,
-                            evaluated_signals['padding_mask'])
-
-        # This run should work as num_samples / batch_size = 2.
-        result, evaluated_signals = sess.run([features, signals])
-        self.assertAllEqual(a[batch_size:num_samples], result['a'])
-        self.assertAllEqual(b[batch_size:num_samples], result['b'])
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-        self.assertAllEqual([0.] * batch_size,
-                            evaluated_signals['padding_mask'])
-
-        # This run should work, *but* see STOP ('1') as signals
-        _, evaluated_signals = sess.run([features, signals])
-        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
-
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(features)
-
-  def test_num_samples_not_divisible_by_batch_size(self):
-    num_samples = 5
-    batch_size = 2
-
-    params = {'batch_size': batch_size}
-    input_fn, (a, b) = make_input_fn_with_labels(num_samples=num_samples)
-
-    with ops.Graph().as_default():
-      dataset = input_fn(params)
-      inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
-                                                        add_padding=True)
-      dataset_initializer = inputs.dataset_initializer()
-      features, labels = inputs.features_and_labels()
-      signals = inputs.signals()
-
-      # With padding, all shapes are static.
-      self.assertEqual(batch_size, features['a'].shape.as_list()[0])
-
-      with session.Session() as sess:
-        sess.run(dataset_initializer)
-
-        evaluated_features, evaluated_labels, evaluated_signals = (
-            sess.run([features, labels, signals]))
-        self.assertAllEqual(a[:batch_size], evaluated_features['a'])
-        self.assertAllEqual(b[:batch_size], evaluated_labels)
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-        self.assertAllEqual([0.] * batch_size,
-                            evaluated_signals['padding_mask'])
-
-        # This run should work as num_samples / batch_size >= 2.
-        evaluated_features, evaluated_labels, evaluated_signals = (
-            sess.run([features, labels, signals]))
-        self.assertAllEqual(a[batch_size:2*batch_size], evaluated_features['a'])
-        self.assertAllEqual(b[batch_size:2*batch_size], evaluated_labels)
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-        self.assertAllEqual([0.] * batch_size,
-                            evaluated_signals['padding_mask'])
-
-        # This is the final partial batch.
-        evaluated_features, evaluated_labels, evaluated_signals = (
-            sess.run([features, labels, signals]))
-        real_batch_size = num_samples % batch_size
-
-        # Assert the real part.
-        self.assertAllEqual(a[2*batch_size:num_samples],
-                            evaluated_features['a'][:real_batch_size])
-        self.assertAllEqual(b[2*batch_size:num_samples],
-                            evaluated_labels[:real_batch_size])
-        # Assert the padded part.
-        self.assertAllEqual([0.0] * (batch_size - real_batch_size),
-                            evaluated_features['a'][real_batch_size:])
-        self.assertAllEqual([[0.0]] * (batch_size - real_batch_size),
-                            evaluated_labels[real_batch_size:])
-
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-
-        padding = ([.0] * real_batch_size
-                   + [1.] * (batch_size - real_batch_size))
-        self.assertAllEqual(padding, evaluated_signals['padding_mask'])
-
-        # This run should work, *but* see STOP ('1') as signals
-        _, evaluated_signals = sess.run([features, signals])
-        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
-
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(features)
-
-  def test_slice(self):
-    num_samples = 3
-    batch_size = 2
-
-    params = {'batch_size': batch_size}
-    input_fn, (a, b) = make_input_fn(num_samples=num_samples)
-
-    with ops.Graph().as_default():
-      dataset = input_fn(params)
-      inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
-                                                        add_padding=True)
-      dataset_initializer = inputs.dataset_initializer()
-      features, _ = inputs.features_and_labels()
-      signals = inputs.signals()
-
-      sliced_features = (
-          tpu_estimator._PaddingSignals.slice_tensor_or_dict(
-              features, signals))
-
-      with session.Session() as sess:
-        sess.run(dataset_initializer)
-
-        result, evaluated_signals = sess.run([sliced_features, signals])
-        self.assertAllEqual(a[:batch_size], result['a'])
-        self.assertAllEqual(b[:batch_size], result['b'])
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-
-        # This is the final partial batch.
-        result, evaluated_signals = sess.run([sliced_features, signals])
-        self.assertEqual(1, len(result['a']))
-        self.assertAllEqual(a[batch_size:num_samples], result['a'])
-        self.assertAllEqual(b[batch_size:num_samples], result['b'])
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-
-        # This run should work, *but* see STOP ('1') as signals
-        _, evaluated_signals = sess.run([sliced_features, signals])
-        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
-
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(sliced_features)
-
-  def test_slice_with_multi_invocations_per_step(self):
-    num_samples = 3
-    batch_size = 2
-
-    params = {'batch_size': batch_size}
-    input_fn, (a, b) = make_input_fn(num_samples=num_samples)
-
-    with ops.Graph().as_default():
-      dataset = input_fn(params)
-      inputs = tpu_estimator._InputsWithStoppingSignals(
-          dataset, batch_size, add_padding=True, num_invocations_per_step=2)
-      dataset_initializer = inputs.dataset_initializer()
-      features, _ = inputs.features_and_labels()
-      signals = inputs.signals()
-
-      sliced_features = (
-          tpu_estimator._PaddingSignals.slice_tensor_or_dict(features, signals))
-
-      with session.Session() as sess:
-        sess.run(dataset_initializer)
-
-        result, evaluated_signals = sess.run([sliced_features, signals])
-        self.assertAllEqual(a[:batch_size], result['a'])
-        self.assertAllEqual(b[:batch_size], result['b'])
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-
-        # This is the final partial batch.
-        result, evaluated_signals = sess.run([sliced_features, signals])
-        self.assertEqual(1, len(result['a']))
-        self.assertAllEqual(a[batch_size:num_samples], result['a'])
-        self.assertAllEqual(b[batch_size:num_samples], result['b'])
-        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
-
-        # We should see 3 continuous batches with STOP ('1') as signals and all
-        # of them have mask 1.
-        _, evaluated_signals = sess.run([sliced_features, signals])
-        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
-        self.assertAllEqual([1.] * batch_size,
-                            evaluated_signals['padding_mask'])
-
-        _, evaluated_signals = sess.run([sliced_features, signals])
-        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
-        self.assertAllEqual([1.] * batch_size,
-                            evaluated_signals['padding_mask'])
-
-        _, evaluated_signals = sess.run([sliced_features, signals])
-        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
-        self.assertAllEqual([1.] * batch_size,
-                            evaluated_signals['padding_mask'])
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(sliced_features)
-
-
-if __name__ == '__main__':
-  test.main()

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_feed.py b/tensorflow/contrib/tpu/python/tpu/tpu_feed.py
index d5957b7..af2542e 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_feed.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_feed.py

@@ -1,898 +1,25 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ===================================================================
-
-"""Helper library for handling infeed between hosts and TPUs.
-"""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import itertools
-
-import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
-from tensorflow.compiler.xla.experimental.xla_sharding import xla_sharding
-from tensorflow.contrib.tpu.python.ops import tpu_ops
-from tensorflow.contrib.tpu.python.tpu import tpu
-from tensorflow.contrib.tpu.python.tpu import tpu_sharding
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.util import nest
-
-
-class InfeedQueue(object):
-  """A helper object to build a device infeed queue.
-
-  The InfeedQueue builds the host-side and device-side Ops to enqueue and
-  dequeue elements, respectively, and ensures that their types and
-  shapes match.
-  """
-
-  def __init__(self,
-               number_of_tuple_elements=None,
-               tuple_types=None,
-               tuple_shapes=None,
-               shard_dimensions=None,
-               name=None):
-    """Creates a new InfeedQueue with the given configuration.
-
-    The configuration need not be fully specified at creation since it
-    can be modified subsequently by methods that set the values
-    explicitly or infer them from the shapes of inputs.
-
-    Args:
-      number_of_tuple_elements: the number of Tensors fed atomically through the
-        queue, must be present unless it can be inferred from other arguments.
-      tuple_types: if not None, a list of types of the elements of the queue.
-      tuple_shapes: if not None, a list of shapes of the elements of the queue.
-      shard_dimensions: if not None, a list of dimensions on which the
-        elements of the queue should be sharded during automatic
-        parallelization.
-      name: the name of the queue.
-
-    Raises:
-      ValueError: if number_of_tuple_elements <= 0; or
-        number_of_tuple_arguments, tuple_types, tuple_shapes, and
-        shard_dimensions are all None; or the length of tuple_types,
-        tuple_shapes, or shard_dimensions is not equal to
-        number_of_tuple_elements; or any element of shard_dimensions
-        can't be converted to a Dimension.
-      TypeError: if any element of tuple_types or tuple_shapes can't
-        be converted to a dtype or TensorShape, respectively.
-    """
-    self._frozen = False
-    self._generated_enqueue_ops = False
-    self._generated_dequeue_op = False
-    self._name = "InfeedQueue" if name is None else name
-    if number_of_tuple_elements is None:
-      if tuple_types is not None:
-        number_of_tuple_elements = len(tuple_types)
-      elif tuple_shapes is not None:
-        number_of_tuple_elements = len(tuple_shapes)
-      elif shard_dimensions is not None:
-        number_of_tuple_elements = len(shard_dimensions)
-      else:
-        raise ValueError(
-            "number of tuple elements cannot be inferred from InfeedQueue "
-            "constructor")
-    if number_of_tuple_elements <= 0:
-      raise ValueError("number_of_tuple_elements %d must be > 0" %
-                       number_of_tuple_elements)
-    # Make an empty sharding policy for each tuple element.
-    self._sharding_policies = [
-        tpu_sharding.ShardingPolicy()
-        for _ in xrange(number_of_tuple_elements)
-    ]
-    if tuple_types is not None:
-      self.set_tuple_types(tuple_types)
-    else:
-      self._tuple_types = None
-    if tuple_shapes is not None:
-      self.set_tuple_shapes(tuple_shapes)
-    else:
-      self._tuple_shapes = None
-    if shard_dimensions is not None:
-      self.set_shard_dimensions(shard_dimensions)
-    self._validate()
-
-  def _validate(self):
-    """Checks that the configuration is self-consistent.
-
-    Raises:
-      ValueError: if the shapes and sharding policies don't match.
-    """
-    if self.tuple_shapes is not None:
-      for (policy, shape) in zip(self._sharding_policies, self._tuple_shapes):
-        # Raise an error if the policy is incompatible with the shape.
-        _ = policy.get_sharded_shape(shape)
-
-  @property
-  def number_of_tuple_elements(self):
-    """Returns the number of InfeedQueue tuple elements."""
-    return len(self._sharding_policies)
-
-  @property
-  def tuple_types(self):
-    """Returns the types of the InfeedQueue tuple elements."""
-    return self._tuple_types
-
-  def set_tuple_types(self, tuple_types):
-    """Sets the type of each element of the queue.
-
-    tuple_types must be a list of length
-    self.number_of_tuple_elements, and each element must be
-    convertible to a dtype.
-
-    Args:
-      tuple_types: the types of each queue element.
-
-    Raises:
-      ValueError: if tuple_types is not of length
-        self.number_of_tuple_elements.
-      TypeError: if an element of tuple_types cannot be converted to a
-        dtype.
-    """
-    if len(tuple_types) != self.number_of_tuple_elements:
-      raise ValueError("tuple_types is %s, but must be a list of length %d" %
-                       (str(tuple_types), self.number_of_tuple_elements))
-    if self._frozen:
-      for (frozen, updated) in zip(self._tuple_types, tuple_types):
-        if frozen != updated:
-          raise ValueError(
-              "Trying to update InfeedQueue with frozen configuration with an "
-              "incompatible type. Frozen types are %s, updated types are %s" % (
-                  str(self._tuple_types), str(tuple_types)))
-    else:
-      try:
-        self._tuple_types = [dtypes.as_dtype(t) for t in tuple_types]
-      except (TypeError) as e:
-        raise TypeError(
-            "tuple_types is %s, but must be a list of elements each "
-            "convertible to dtype: got error %s" % (str(tuple_types), str(e)))
-
-  @property
-  def tuple_shapes(self):
-    """Returns the shapes of the InfeedQueue tuple elements."""
-    return self._tuple_shapes
-
-  def set_tuple_shapes(self, tuple_shapes):
-    """Sets the shape of each element of the queue.
-
-    tuple_shapes must be a list of length
-    self.number_of_tuple_elements, and each element must be
-    convertible to a TensorShape.
-
-    Args:
-      tuple_shapes: the shapes of each queue element.
-
-    Raises:
-      ValueError: if tuple_shapes is not of length
-        self.number_of_tuple_elements.
-      TypeError: if an element of tuple_shapes cannot be converted to
-        a TensorShape.
-    """
-    if len(tuple_shapes) != self.number_of_tuple_elements:
-      raise ValueError("tuple_shapes is %s, but must be a list of length %d" %
-                       (str(tuple_shapes), self.number_of_tuple_elements))
-    try:
-      tuple_shapes = [tensor_shape.as_shape(shape) for shape in tuple_shapes]
-    except (ValueError, TypeError) as e:
-      raise TypeError(
-          "tuple_shapes is %s, but must be a list of elements each "
-          "convertible to TensorShape: got error %s" % (str(tuple_shapes),
-                                                        str(e)))
-    if self._frozen:
-      for (frozen, updated) in zip(self._tuple_shapes, tuple_shapes):
-        if frozen != updated:
-          raise ValueError(
-              "Trying to update InfeedQueue with frozen configuration with an "
-              "incompatible shape. Frozen shapes are %s, updated shapes are %s"
-              % (str(self._tuple_shapes), str(tuple_shapes)))
-    else:
-      self._tuple_shapes = tuple_shapes
-    self._validate()
-
-  @property
-  def sharding_policies(self):
-    """Returns the sharding policies of the InfeedQueue tuple elements."""
-    return self._sharding_policies
-
-  @property
-  def shard_dimensions(self):
-    """Gets the shard dimension of each tuple element.
-
-    Returns:
-      A list of length number_of_tuple_elements, where each list entry
-      is the shard dimension of that tuple element or None if the
-      shard dimension has not been set.
-    """
-    # The number of shards is always the same for all the policies.
-    return [policy.shard_dimension for policy in self._sharding_policies]
-
-  def set_shard_dimensions(self, shard_dimensions):
-    """Sets the shard_dimension of each element of the queue.
-
-    shard_dimensions must be a list of length
-    self.number_of_tuple_elements, and each element must be
-    convertible to a Dimension compatible with self.tuple_shapes.
-
-    Args:
-      shard_dimensions: the dimensions of each queue element.
-
-    Raises:
-      ValueError: if shard_dimensions is not of length
-        self.number_of_tuple_elements; or an element of
-        shard_dimensions cannot be converted to a Dimension; or an
-        element of shard_dimensions is a Dimension that is out of
-        range for the corresponding tuple element shape.
-    """
-    if len(shard_dimensions) != self.number_of_tuple_elements:
-      raise ValueError("shard_dimensions is %s, but must be a list of length %d"
-                       % (str(shard_dimensions),
-                          self.number_of_tuple_elements))
-    for (policy, dimension) in zip(self._sharding_policies, shard_dimensions):
-      policy.set_shard_dimension(dimension)
-    self._validate()
-
-  @property
-  def number_of_shards(self):
-    """Gets the number of shards to use for the InfeedQueue.
-
-    Returns:
-      Number of shards or None if the number of shards has not been set.
-    """
-    # The number of shards is always the same for all the policies.
-    return self._sharding_policies[0].number_of_shards
-
-  def set_number_of_shards(self, number_of_shards):
-    """Sets the number of shards to use for the InfeedQueue.
-
-    Args:
-      number_of_shards: number of ways to shard the InfeedQueue.
-
-    Raises:
-      ValueError: if number_of_shards is not > 0; or the policies have
-        been frozen and number_of_shards was already set to something
-        else.
-    """
-    for policy in self._sharding_policies:
-      policy.set_number_of_shards(number_of_shards)
-    self._validate()
-
-  def set_configuration_from_input_tensors(self, input_tensors):
-    """Sets the shapes and types of the queue tuple elements.
-
-    input_tensors is a list of Tensors whose types and shapes are used
-    to set the queue configuration.
-
-    Args:
-      input_tensors: list of Tensors of the same types and shapes as
-        the desired queue Tuple.
-
-    Raises:
-      ValueError: if input_tensors is not a list of length
-        self.number_of_tuple_elements
-    """
-    if len(input_tensors) != self.number_of_tuple_elements:
-      raise ValueError("input_tensors is %s, but should be a list of %d Tensors"
-                       % (str(input_tensors), self.number_of_tuple_elements))
-    self.set_tuple_shapes([t.shape for t in input_tensors])
-    self.set_tuple_types([t.dtype for t in input_tensors])
-
-  def set_configuration_from_sharded_input_tensors(self, input_tensors):
-    """Sets the shapes and types of the queue tuple elements.
-
-    input_tensors is a list of lists of Tensors whose types and shapes are used
-    to set the queue configuration. The length of the outer list is the number
-    of shards required, and each inner list is the tuple of Tensors to use to
-    determine the types and shapes of the corresponding shard. This method
-    depends on the shard dimension, and calling it freezes the shard policy.
-
-    Args:
-      input_tensors: list of lists of Tensors. The outer list length corresponds
-        to the desired number of shards, and each inner list is the size
-        and shape of the desired configuration of the corresponding shard.
-
-    Raises:
-      ValueError: if any inner list is not a list of length
-        self.number_of_tuple_elements; or the inner lists do not combine to
-        form a consistent unsharded shape.
-      TypeError: if the types of the Tensors in the inner lists do not match.
-    """
-    if not self._frozen:
-      # Unset the tuple shapes in case the configuration becomes
-      # transiently inconsistent.
-      self._tuple_shapes = None
-    number_of_shards = len(input_tensors)
-    self.set_number_of_shards(number_of_shards)
-    for t in input_tensors:
-      if len(t) != self.number_of_tuple_elements:
-        raise ValueError(
-            "input_tensors is %s but must be a list of lists, where each inner"
-            " list has length number_of_tuple_elements=%d" % (
-                str(input_tensors), self.number_of_tuple_elements))
-    # Transpose the inputs to make a list of shard shapes for each tuple
-    # element.
-    sharded_shapes = [[t[i].shape for t in input_tensors]
-                      for i in xrange(self.number_of_tuple_elements)]
-    # For each tuple, get the unsharded shape using that tuple's policy.
-    unsharded_shapes = [
-        policy.get_unsharded_shape(s)
-        for (policy, s) in zip(self._sharding_policies, sharded_shapes)
-    ]
-    self.set_tuple_shapes(unsharded_shapes)
-    for i in xrange(1, self.number_of_shards):
-      for (t1, t2) in zip(input_tensors[0], input_tensors[i]):
-        if t1.dtype != t2.dtype:
-          raise TypeError(
-              "types of the tuple elements of input_tensors %s are not "
-              "consistent" % str(input_tensors))
-    self.set_tuple_types([t.dtype for t in input_tensors[0]])
-
-  def freeze(self):
-    """Freezes the InfeedQueue so it can no longer be modified.
-
-    The configuration is implicitly frozen before any host-side or
-    device-side Ops are generated. The configuration cannot be frozen
-    until the types and shapes of the tuple elements have been set.
-
-    Raises:
-      ValueError: if the types or shapes of the tuple elements have not been
-      set.
-    """
-    self._frozen = True
-    if self._tuple_types is None:
-      raise ValueError(
-          "Can't freeze an InfeedQueue without setting all tuple types.")
-    if self._tuple_shapes is None:
-      raise ValueError(
-          "Can't freeze an InfeedQueue without setting all tuple shapes.")
-    for shape in self._tuple_shapes:
-      if shape.dims is None:
-        raise ValueError(
-            "Can't freeze an InfeedQueue without setting all tuple shapes.")
-    for policy in self._sharding_policies:
-      policy.freeze()
-    self._validate()
-
-  def generate_dequeue_op(self, tpu_device=0):
-    """Generates the device-side Op to dequeue a tuple from the queue.
-
-    Implicitly freezes the queue configuration if it is not already
-    frozen, which will raise errors if the shapes and types have not
-    been fully specified.
-
-    Args:
-      tpu_device: The TPU device ordinal where the infeed instruction should be
-        placed. If None, no explicit placement will be performed, and it is up
-        to the user to call this API from within a proper TPU device scope.
-        The XLA code will fail if the TPU dequeue instruction is not bound to
-        any device.
-
-    Returns:
-      A list of Outputs corresponding to a shard of infeed dequeued
-      into XLA, suitable for use within a replicated block.
-
-    Raises:
-      ValueError: if the types or shapes of the tuple elements have not been
-      set; or if a dequeue op has already been generated.
-    """
-    self.freeze()
-    if self._generated_dequeue_op:
-      raise ValueError("Can't generate two dequeue Ops from the same queue")
-    self._generated_dequeue_op = True
-    full_name = "%s/dequeue" % self._name
-    sharded_shapes = [
-        policy.get_sharded_shape(shape)
-        for (shape, policy) in zip(self._tuple_shapes, self._sharding_policies)
-    ]
-    if tpu_device is not None:
-      with ops.device(tpu.core(tpu_device)):
-        return tpu_ops.infeed_dequeue_tuple(
-            dtypes=self._tuple_types, shapes=sharded_shapes, name=full_name)
-    else:
-      return tpu_ops.infeed_dequeue_tuple(
-          dtypes=self._tuple_types, shapes=sharded_shapes, name=full_name)
-
-  def _generate_enqueue_op(self,
-                           inputs,
-                           name_prefix,
-                           index,
-                           device=None,
-                           tpu_ordinal=-1):
-    """Generate a host-side Op to enqueue a tuple to the queue.
-
-    If device is None the inputs are all required to have the same
-    device specification, and the enqueue Op is colocated with
-    inputs[0]. Otherwise the enqueue Op is placed on 'device'.
-
-    Args:
-      inputs: a list of Tensors with the types and shapes of the tuple elements.
-      name_prefix: the base name for the Op.
-      index: the shard index, used to uniquify the Op name.
-      device: device to place the Op on, or None if it should be
-        colocated with the inputs.
-      tpu_ordinal: ordinal of the TPU device on the host to use for
-      infeed if device is a CPU device. Should be set to -1 if device
-      is a TPU device.
-
-    Returns:
-      An Op corresponding to a shard of infeed enqueued at the host,
-      suitable for use within a replicated block.
-
-    Raises:
-      ValueError: if device is None and inputs do not all have the
-        same device specification.
-    """
-    full_name = "%s/%d" % (name_prefix, index)
-    shapes = [t.shape for t in inputs]
-    if device is None:
-      devices = [t.device for t in inputs]
-      for i in xrange(1, self.number_of_tuple_elements):
-        if devices[0] != devices[i]:
-          raise ValueError(
-              "input devices for shard %d are %s, but should all be the same" %
-              (index, str(devices)))
-      with ops.colocate_with(inputs[0]):
-        return tpu_ops.infeed_enqueue_tuple(
-            inputs=inputs,
-            shapes=shapes,
-            name=full_name,
-            device_ordinal=tpu_ordinal)
-    else:
-      with ops.device(device):
-        return tpu_ops.infeed_enqueue_tuple(
-            inputs=inputs,
-            shapes=shapes,
-            name=full_name,
-            device_ordinal=tpu_ordinal)
-
-  def generate_enqueue_ops(self,
-                           sharded_inputs,
-                           tpu_ordinal_function=None,
-                           placement_function=None):
-    """Generates the host-side Ops to enqueue the shards of a tuple.
-
-    sharded_inputs is a list, one for each shard, of lists of
-    Tensors. sharded_inputs[0] is the tuple of Tensors to use to feed
-    shard 0 if the queue. Returns the host-side Ops that must be run to
-    enqueue the sharded tuple. The Op for shard i is colocated with the inputs
-    for shard i.
-
-    Implicitly freezes the queue configuration if it is not already
-    frozen. If the configuration has already been frozen, and is not
-    compatible with the types and shapes of sharded_inputs, an error
-    will be raised.
-
-    Args:
-      sharded_inputs: a list of lists of Tensors. The length of the outer list
-        determines the number of shards. Each inner list indicates the types
-        and shapes of the tuples in the corresponding shard.
-      tpu_ordinal_function: if not None, a function that takes the
-        shard index as input and returns the ordinal of the TPU device
-        the shard's infeed should be placed on. tpu_ordinal_function must be
-        set if the inputs are placed on CPU devices.
-      placement_function: if not None, a function that takes the shard index as
-        input and returns the host device where the enqueue op should be placed
-        on.
-
-    Returns:
-      A list of host-side Ops, one for each shard, that when executed together
-      will enqueue a full-size element of infeed.
-
-    Raises:
-      ValueError: if the queue configuration has previously been frozen and the
-        shapes of the elements of sharded_inputs are not compatible with the
-        frozen configuration; or if the shapes of the elements of sharded_inputs
-        don't form a consistent unsharded tuple; or if the elements of a tuple
-        have different device constraints.
-      TypeError: if the queue configuration has previously been frozen and the
-        types of the elements of sharded_inputs are not compatible with the
-        frozen configuration; or if the types of the elements of sharded_inputs
-        don't form a consistent unsharded tuple.
-    """
-    self.set_configuration_from_sharded_input_tensors(sharded_inputs)
-    self.freeze()
-    if self._generated_enqueue_ops:
-      raise ValueError("Can't generate two enqueue Ops from the same queue")
-    self._generated_enqueue_ops = True
-    if tpu_ordinal_function is None:
-      tpu_ordinal_function = lambda index: -1
-    name_prefix = "%s/enqueue" % self._name
-    return [
-        self._generate_enqueue_op(
-            shard,
-            name_prefix,
-            index,
-            tpu_ordinal=tpu_ordinal_function(index),
-            device=placement_function(index) if placement_function else None)
-        for (shard, index) in zip(sharded_inputs, xrange(self.number_of_shards))
-    ]
-
-  # TODO(misard) Generalize this to the case of systems that don't
-  # have 8 devices per host, and figure out what to do with
-  # model-parallelism.
-  def _default_placement_function(self, index):
-    return "/task:%d/device:CPU:0" % (index / 8)
-
-  def _default_ordinal_function(self, index):
-    return index % 8
-
-  # TODO(b/36470756) remove this from tutorials once we have a better story
-  # for automatic placement of input pipelines.
-  def split_inputs_and_generate_enqueue_ops(self,
-                                            inputs,
-                                            device_assignment=None,
-                                            placement_function=None,
-                                            tpu_ordinal_function=None):
-    """POORLY-PERFORMING ON MULTI-HOST SYSTEMS.
-
-    Generates the host-side Ops to enqueue a tuple.
-
-    This method performs poorly because it takes an entire input on a single
-    host, splits it, and distributes it to all of the cores. It is present only
-    to simplify tutorial examples.
-
-    inputs is a list of Tensors to use to feed the queue. Each input is split
-    into self.number_of_shards shards. Returns an Op for each shard to enqueue
-    the shard. The Op for shard i is placed on device placement_function(i).
-
-    Implicitly freezes the queue configuration if it is not already
-    frozen. If the configuration has already been frozen, and is not
-    compatible with the types and shapes of inputs, an error
-    will be raised.
-
-    Args:
-      inputs: a list of Tensors which indicates the types and shapes of the
-        queue tuple.
-     device_assignment: if not `None`, a TPU `DeviceAssignment`. If
-        device_assignment is not `None`, but `placement_function` and
-        `ordinal_function` are None, then `device_assignment` will be used to
-        place infeeds on the first k TPU shards, where k is the number of shards
-        in the queue. If all three are `None`, then default placement and
-        ordinal functions are used.
-      placement_function: if not None, a function that takes the shard
-        index as input and returns a device string indicating which
-        device the shard's infeed should be placed on. If placement_function
-        and tpu_ordinal_function are None, inputs are sharded round-robin
-        across the devices in the system.
-      tpu_ordinal_function: if not None, a function that takes the
-        shard index as input and returns the ordinal of the TPU device
-        the shard's infeed should be placed on. If placement_function
-        and tpu_ordinal_function are None, inputs are sharded round-robin
-        across the devices in the system.
-
-    Returns:
-      A list of host-side Ops, one for each shard, that when executed together
-      will enqueue a full-size element of infeed.
-
-    Raises:
-      ValueError: if the queue configuration has previously been frozen and the
-        shapes of the elements of inputs are not compatible with the frozen
-        configuration.
-      TypeError: if the queue configuration has previously been frozen and the
-        types of the elements of inputs are not compatible with the frozen
-        configuration.
-    """
-    if device_assignment is None:
-      if placement_function is None:
-        placement_function = self._default_placement_function
-      if tpu_ordinal_function is None:
-        tpu_ordinal_function = self._default_ordinal_function
-    else:
-
-      def _placement_function_from_map(index):
-        return device_assignment.host_device(replica=index)
-
-      def _ordinal_function_from_map(index):
-        return device_assignment.tpu_ordinal(replica=index)
-
-      if placement_function is None:
-        placement_function = _placement_function_from_map
-      if tpu_ordinal_function is None:
-        tpu_ordinal_function = _ordinal_function_from_map
-    self.set_configuration_from_input_tensors(inputs)
-    self.freeze()
-    if self._generated_enqueue_ops:
-      raise ValueError("Can't generate two enqueue Ops from the same queue")
-    self._generated_enqueue_ops = True
-    split_name_prefix = "%s/split" % self._name
-    if self.number_of_shards == 1:
-      transposed_sharded_inputs = [[inp] for inp in inputs]
-    else:
-
-      def split_fn(inp, num_shards, axis, name):
-        with ops.colocate_with(inp):
-          return array_ops.split(inp, num_shards, axis=axis, name=name)
-
-      transposed_sharded_inputs = [
-          split_fn(
-              inp,
-              self.number_of_shards,
-              axis=policy.shard_dimension,
-              name="%s/%d" % (split_name_prefix, index))
-          for (inp, policy, index) in zip(inputs, self._sharding_policies,
-                                          xrange(self.number_of_tuple_elements))
-      ]
-    sharded_inputs = [[shard[i] for shard in transposed_sharded_inputs]
-                      for i in xrange(self.number_of_shards)]
-    name_prefix = "%s/enqueue" % self._name
-    return [
-        self._generate_enqueue_op(
-            shard,
-            name_prefix,
-            index,
-            device=placement_function(index),
-            tpu_ordinal=tpu_ordinal_function(index))
-        for (shard, index) in zip(sharded_inputs, xrange(self.number_of_shards))
-    ]
-
-
-class _PartitionedInfeedQueue(InfeedQueue):
-  """A helper object to build a device infeed queue with input partition.
-
-  Args:
-    number_of_tuple_elements: the number of Tensors fed atomically through the
-      queue, must be present unless it can be inferred from other arguments.
-    device_assignment: A TPU `DeviceAssignment` which is used to place all the
-      partitions to different TPU infeed queues.
-    host_id: The id of the host machine.
-    input_partition_dims: A nested list/tuple of integers. Each inner
-      list/tuple describes how to partition the corresponding input tensor.
-    tuple_types: If not None, a list of types of the elements of the queue.
-    tuple_shapes: If not None, a list of shapes of the elements of the queue.
-    name: The name of the queue.
-  """
-
-  def __init__(self,
-               number_of_tuple_elements,
-               device_assignment,
-               host_id,
-               input_partition_dims=None,
-               tuple_types=None,
-               tuple_shapes=None,
-               name=None):
-    super(_PartitionedInfeedQueue, self).__init__(
-        number_of_tuple_elements=number_of_tuple_elements,
-        tuple_types=tuple_types,
-        tuple_shapes=None,
-        shard_dimensions=None,
-        name="PartitionedInfeedQueue" if name is None else name)
-    self._input_partition_dims = input_partition_dims
-    self._host_id = host_id
-    self._device_assignment = device_assignment
-
-  def generate_dequeue_op(self, tpu_device=0):
-    """Generate TPU dequeue ops.
-
-    Args:
-      tpu_device: The TPU device ordinal where the infeed instruction should be
-        placed.
-
-    Returns:
-      A list of Outputs corresponding to a partition of infeed dequeued
-      into XLA, suitable for use within a replicated block.
-
-    Raises:
-      ValueError: if the types or shapes of the tuple elements have not been
-      set; or if a dequeue op has already been generated.
-    """
-    self.freeze()
-    if self._generated_dequeue_op:
-      raise ValueError("Can't generate two dequeue Ops from the same queue")
-    self._generated_dequeue_op = True
-    full_name = "%s/dequeue" % self._name
-    sharded_shapes = [
-        policy.get_sharded_shape(shape)
-        for (shape, policy) in zip(self._tuple_shapes, self._sharding_policies)
-    ]
-    with ops.device(tpu.core(tpu_device)):
-      values = tpu_ops.infeed_dequeue_tuple(
-          dtypes=self._tuple_types, shapes=sharded_shapes, name=full_name)
-    return self._tag_sharding_attribute_for_dequeued_tensors(
-        values, self._input_partition_dims)
-
-  def generate_enqueue_ops(self, per_host_sharded_inputs):
-    """Generates the host-side Ops to enqueue the partitioned inputs.
-
-    per_host_sharded_inputs is a list, one for each replica, of lists of
-    Tensors. sharded_inputs[i] is the tuple of Tensors to use to feed
-    replica i.
-    sharded_inputs[i][j] is partitioned by self._input_partition_dims[j].
-
-    For example, if sharded_inputs[i][j] is a 2-D Tensor:
-    [[A, B, C, D],
-     [E ,F, G, H]]
-    self._input_partition_dims[j] is [2, 4].
-
-    sharded_inputs[i][j] will be partitioned and flattened into:
-    [A, B, C, D, E, F, G, H] and fed into the logical core ids:
-    [0, 1, 2, 3, 4, 5, 6, 7] respectively.
-
-    Args:
-      per_host_sharded_inputs: a list of lists of Tensors. The length of the
-        outer list determines the number of shards. Each inner list indicates
-        the types and shapes of the tuples in the corresponding shard.
-
-    Returns:
-      A list of host-side Ops, one for each shard, that when executed together
-      will enqueue a full-size element of infeed.
-
-    Raises:
-      ValueError: if the queue configuration has previously been frozen and the
-        shapes of the elements of sharded_inputs are not compatible with the
-        frozen configuration; or if the shapes of the elements of sharded_inputs
-        don't form a consistent unsharded tuple; or if the elements of a tuple
-        have different device constraints; or if the partition dims are invalid.
-      TypeError: if the queue configuration has previously been frozen and the
-        types of the elements of sharded_inputs are not compatible with the
-        frozen configuration; or if the types of the elements of sharded_inputs
-        don't form a consistent unsharded tuple.
-    """
-    self.set_configuration_from_sharded_input_tensors(per_host_sharded_inputs)
-    number_of_replicas_per_host = len(per_host_sharded_inputs)
-    number_of_tuple_elements = len(per_host_sharded_inputs[0])
-
-    assert len(self._input_partition_dims) == number_of_tuple_elements
-    per_host_enqueue_ops = []
-
-    for replica_index in range(number_of_replicas_per_host):
-      flattened_inputs = per_host_sharded_inputs[replica_index]
-      inputs_part_dims_flat = nest.flatten_up_to(flattened_inputs,
-                                                 self._input_partition_dims)
-      inputs_parted_iters = [
-          iter(self._partition_or_replicate_on_host(x, dims)) for x, dims in
-          zip(per_host_sharded_inputs[replica_index], inputs_part_dims_flat)
-      ]
-
-      for logical_core in xrange(self._device_assignment.num_cores_per_replica):
-        # Places different partitions to different logic cores.
-        replica_id = self._device_assignment.lookup_replicas(
-            self._host_id, logical_core)[replica_index]
-        ordinal = self._device_assignment.tpu_ordinal(
-            replica=replica_id, logical_core=logical_core)
-        infeed_inputs = []
-        for it in inputs_parted_iters:
-          input_for_device = next(it, None)
-          if input_for_device is not None:
-            infeed_inputs.append(input_for_device)
-
-        if infeed_inputs:
-          per_host_enqueue_ops.append(
-              tpu_ops.infeed_enqueue_tuple(
-                  inputs=infeed_inputs,
-                  shapes=[x.shape for x in infeed_inputs],
-                  name="enqueue/replica_{0}/input_{1}".format(
-                      replica_index, logical_core),
-                  device_ordinal=ordinal))
-    return per_host_enqueue_ops
-
-  def _check_input_partition_dims(self, tensor, dims):
-    """Checks that input partition dims are valid for the `Tensor`.
-
-    Args:
-      tensor: Input tensor for partitioning.
-      dims: 1-D np.array of the list of integer describes how to partition the
-        input tensor.
-
-    Raises:
-      ValueError: If the tensor can't be partitioned by dims or the
-        num_cores_per_replica doesn't match the number of
-        partitions(dims.prod()).
-    """
-    if (dims < 1).any():
-      raise ValueError("All input partition dims must be >= 1.")
-
-    # No partitioning, so don't perform further checks.
-    if dims.prod() == 1:
-      return
-
-    if dims.prod() != self._device_assignment.num_cores_per_replica:
-      raise ValueError(
-          "The product of each input parition dim should equal to "
-          "num_cores_per_replica. (dim = {}, num_cores_per_replica "
-          "= {})".format(dims, self._device_assignment.num_cores_per_replica))
-    if dims.shape[0] != tensor.shape.ndims:
-      raise ValueError(
-          "Input partition dims must have the same number of dimensions "
-          "as the `Tensor` to be partitioned. (tensor shape = {}, input "
-          "partition dims = {}).".format(tensor.shape.as_list(), dims))
-
-    tensor.shape.assert_is_fully_defined()
-
-  def _partition_or_replicate_on_host(self, tensor, dims):
-    """Partitions or replicates the input tensor.
-
-      The ops inside this function are placed on the host side.
-
-    Args:
-      tensor: The input tensor which will be partioned or replicated.
-      dims: A list of integer describes how to partition the input tensor.
-    Returns:
-      An iterator of `Tensor`s or a list of partioned tensors.
-    """
-    if dims is None:
-      return itertools.repeat(tensor)
-    dims = np.array(dims)
-    self._check_input_partition_dims(tensor, dims)
-    output = [tensor]
-    shape_list = np.array(tensor.shape.as_list())
-    quotients, remainders = np.divmod(shape_list, dims)
-    for axis, (quotient, remainder, dim, original_size) in enumerate(
-        zip(quotients, remainders, dims, shape_list)):
-      if dim <= 1:
-        continue
-      if remainder > 0:
-        # For each dimension, when it cannot be evenly partitioned, XLA assumes
-        # tensors are partitioned in a greedy manner by using
-        # ceil_ratio(size/dim) first. E.g. 2D tensor with shape (5, 14) and dims
-        # are (2, 4). Since 5 % 2 = 1 and 14 % 4 = 2, [5, 14] =>
-        # [[(3, 4), (3, 4), (2, 4), (2, 2)],
-        # [(2, 4), (2, 4), (2, 4), (2, 2)]]
-        ceil_ratio = quotient + 1
-        num_full_slots, left_over = np.divmod(original_size, ceil_ratio)
-        num_or_size_splits = [ceil_ratio] * num_full_slots + [left_over]
-        if len(num_or_size_splits) < dim:
-          num_or_size_splits += [0] * (dim - len(num_or_size_splits))
-        new_output = []
-        for x in output:
-          new_output.append(
-              array_ops.split(
-                  x, num_or_size_splits=num_or_size_splits, axis=axis))
-        output = new_output
-      else:
-        output = [array_ops.split(x, dim, axis=axis) for x in output]
-      output = nest.flatten(output)
-    return output
-
-  def _tag_sharding_attribute_for_dequeued_tensor(self, tensor, dims):
-    """Tags appropriate XLA sharding attribute to the dequeued tensor.
-
-    Args:
-      tensor: The dequeued tensor on TPU.
-      dims: A list of integer describes how the tensor is partitioned.
-
-    Returns:
-      The same tensor with the xla_sharding attribute.
-    """
-    if dims is None:
-      return xla_sharding.replicate(tensor)
-    elif np.prod(dims) == 1:
-      return xla_sharding.assign_device(tensor, 0)
-    else:
-      tile_assignment = np.arange(np.prod(dims)).reshape(dims)
-      return xla_sharding.tile(
-          tensor=tensor,
-          tile_assignment=tile_assignment)
-
-  def _tag_sharding_attribute_for_dequeued_tensors(self, dequeues, dims):
-    """Tags appropriate XLA sharding attribute to the dequeued tensors.
-
-    Args:
-      dequeues: A list of dequeued tensors on TPU.
-      dims: A list of integer describes how the tensor is partitioned.
-
-    Returns:
-      The same dequeues with appropriate xla_sharding attribute.
-    """
-    nest.assert_shallow_structure(dequeues, dims)
-    return nest.map_structure_up_to(
-        dequeues, self._tag_sharding_attribute_for_dequeued_tensor, dequeues,
-        dims)
+# pylint: disable=wildcard-import,unused-import,redefined-builtin
+from tensorflow.python.tpu.tpu_feed import *
+# used by tests
+from tensorflow.python.tpu.tpu_feed import _PartitionedInfeedQueue
+# pylint: enable=wildcard-import,unused-import,redefined-builtin

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_function.py b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
index 84d5967..f2755c6 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_function.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_function.py

@@ -1,57 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# =============================================================================
-
-"""Helper library for functions used during TPU compilation."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import contextlib
-
-
-class TpuContext(object):
-  """A context object holding state about the TPU computation being built."""
-
-  def __init__(self):
-    """Creates a new TpuContext."""
-    self._number_of_shards = None
-
-  @property
-  def number_of_shards(self):
-    return self._number_of_shards
-
-  def set_number_of_shards(self, number_of_shards):
-    self._number_of_shards = number_of_shards
-
-
-# The Tpu context holds the number of shards when a sharded computation is
-# being built, or None if no computation is being built.
-_current_tpu_context = TpuContext()
-
-
-@contextlib.contextmanager
-def tpu_shard_context(number_of_shards):
-  if _current_tpu_context.number_of_shards is not None:
-    raise NotImplementedError("tpu_shard_context cannot be nested.")
-  try:
-    _current_tpu_context.set_number_of_shards(number_of_shards)
-    yield
-  finally:
-    _current_tpu_context.set_number_of_shards(None)
-
-
-def get_tpu_context():
-  return _current_tpu_context
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.tpu_function import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_infeed_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_infeed_test.py
deleted file mode 100644
index a41ff60..0000000
--- a/tensorflow/contrib/tpu/python/tpu/tpu_infeed_test.py
+++ /dev/null

@@ -1,130 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-"""Tests for TPU InfeedQueue methods."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.tpu.python.tpu import tpu_feed
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.platform import test
-
-
-class InfeedTest(test.TestCase):
-
-  def testConstructor(self):
-    """Tests that the constructor can be called with different arguments."""
-    i = tpu_feed.InfeedQueue(number_of_tuple_elements=2)
-    self.assertEqual(i.number_of_tuple_elements, 2)
-    self.assertEqual(i.tuple_types, None)
-    self.assertEqual(i.tuple_shapes, None)
-    self.assertEqual(i.number_of_shards, None)
-    i = tpu_feed.InfeedQueue(
-        tuple_types=[dtypes.float32, dtypes.int32, dtypes.int32])
-    self.assertEqual(i.number_of_tuple_elements, 3)
-    self.assertEqual(i.tuple_types,
-                     [dtypes.float32, dtypes.int32, dtypes.int32])
-    self.assertEqual(i.tuple_shapes, None)
-    self.assertEqual(i.number_of_shards, None)
-    i = tpu_feed.InfeedQueue(tuple_shapes=[[1], [2, 3]])
-    self.assertEqual(i.number_of_tuple_elements, 2)
-    self.assertEqual(i.tuple_types, None)
-    self.assertEqual(i.tuple_shapes, [[1], [2, 3]])
-    self.assertEqual(i.number_of_shards, None)
-    i = tpu_feed.InfeedQueue(shard_dimensions=[1, 0, 7])
-    self.assertEqual(i.number_of_tuple_elements, 3)
-    self.assertEqual(i.tuple_types, None)
-    self.assertEqual(i.tuple_shapes, None)
-    self.assertEqual([p.shard_dimension
-                      for p in i.sharding_policies], [1, 0, 7])
-    with self.assertRaises(ValueError):
-      i = tpu_feed.InfeedQueue()
-    with self.assertRaises(ValueError):
-      i = tpu_feed.InfeedQueue(
-          number_of_tuple_elements=2, tuple_types=[dtypes.float32])
-    with self.assertRaises(ValueError):
-      i = tpu_feed.InfeedQueue(number_of_tuple_elements=2, tuple_shapes=[[1]])
-    with self.assertRaises(ValueError):
-      i = tpu_feed.InfeedQueue(number_of_tuple_elements=2, shard_dimensions=[1])
-    with self.assertRaises(ValueError):
-      i = tpu_feed.InfeedQueue(tuple_shapes=[[1], [2, 3]], shard_dimensions=[1])
-
-  def testModification(self):
-    """Tests modification of the queue post-construction."""
-    i = tpu_feed.InfeedQueue(number_of_tuple_elements=2)
-    i.set_tuple_types([dtypes.float32, dtypes.int32])
-    self.assertEqual(i.tuple_types, [dtypes.float32, dtypes.int32])
-    i.set_tuple_types([dtypes.float32, dtypes.float32])
-    self.assertEqual(i.tuple_types, [dtypes.float32, dtypes.float32])
-    with self.assertRaises(ValueError):
-      i.set_tuple_types([dtypes.float32])
-    i.set_tuple_shapes([[1], [2, 3]])
-    self.assertEqual(i.tuple_shapes, [[1], [2, 3]])
-    i.set_tuple_shapes([[1, 2], [3, 4]])
-    self.assertEqual(i.tuple_shapes, [[1, 2], [3, 4]])
-    with self.assertRaises(ValueError):
-      i.set_tuple_shapes([[1, 2]])
-    i.set_number_of_shards(2)
-    self.assertEqual(i.number_of_shards, 2)
-    i.set_number_of_shards(3)
-    self.assertEqual(i.number_of_shards, 3)
-    t1 = constant_op.constant(1, dtypes.int32, shape=[6])
-    t2 = constant_op.constant(2.0, dtypes.float32, shape=[3, 18])
-    i.set_configuration_from_input_tensors([t1, t2])
-    self.assertEqual(i.tuple_shapes, [[6], [3, 18]])
-    self.assertEqual(i.tuple_types, [dtypes.int32, dtypes.float32])
-    i.set_configuration_from_sharded_input_tensors([[t2, t1], [t2, t1]])
-    self.assertEqual(i.number_of_shards, 2)
-    self.assertEqual(i.tuple_shapes, [[6, 18], [12]])
-    self.assertEqual(i.tuple_types, [dtypes.float32, dtypes.int32])
-    i.set_shard_dimensions([1, 0])
-    i.set_number_of_shards(3)
-    with self.assertRaises(ValueError):
-      i.set_number_of_shards(4)
-
-  def testFreezing(self):
-    """Tests freezing the queue."""
-    i = tpu_feed.InfeedQueue(number_of_tuple_elements=2)
-    t1 = constant_op.constant(1, dtypes.int32, shape=[2])
-    t2 = constant_op.constant(2.0, dtypes.float32, shape=[2, 4])
-    i.set_configuration_from_sharded_input_tensors([[t2, t1], [t2, t1]])
-    self.assertEqual(i.number_of_shards, 2)
-    self.assertEqual(i.tuple_shapes, [[4, 4], [4]])
-    self.assertEqual(i.tuple_types, [dtypes.float32, dtypes.int32])
-    self.assertEqual(i.shard_dimensions, [0, 0])
-    i.freeze()
-    i.set_number_of_shards(2)
-    i.set_tuple_shapes([[4, 4], [4]])
-    i.set_tuple_types([dtypes.float32, dtypes.int32])
-    i.set_shard_dimensions([0, 0])
-    with self.assertRaises(ValueError):
-      i.set_number_of_shards(1)
-    with self.assertRaises(ValueError):
-      i.set_tuple_shapes([[8, 8], [8]])
-    with self.assertRaises(ValueError):
-      i.set_tuple_types([dtypes.int32, dtypes.float32])
-    with self.assertRaises(ValueError):
-      i.set_shard_dimensions([1, 0])
-    self.assertEqual(i.number_of_shards, 2)
-    self.assertEqual(i.tuple_shapes, [[4, 4], [4]])
-    self.assertEqual(i.tuple_types, [dtypes.float32, dtypes.int32])
-    self.assertEqual(i.shard_dimensions, [0, 0])
-
-if __name__ == '__main__':
-  test.main()

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py b/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py
index 1e11de6..ca58e78 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py

@@ -1,203 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# =============================================================================
-
-"""Optimizer that implements cross-shard gradient reduction for TPU."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
-from tensorflow.contrib.tpu.python.ops import tpu_ops
-from tensorflow.contrib.tpu.python.tpu import tpu_function
-from tensorflow.python.framework import ops
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import optimizer
-
-
-class CrossShardOptimizer(optimizer.Optimizer):
-  """An optimizer that averages gradients across TPU shards."""
-
-  def __init__(self,
-               opt,
-               reduction=losses.Reduction.MEAN,
-               name="CrossShardOptimizer",
-               group_assignment=None):
-    """Construct a new cross-shard optimizer.
-
-    Args:
-      opt: An existing `Optimizer` to encapsulate.
-      reduction: The reduction to apply to the shard losses.
-      name: Optional name prefix for the operations created when applying
-        gradients. Defaults to "CrossShardOptimizer".
-      group_assignment: Optional 2d int32 lists with shape
-        [num_groups, num_replicas_per_group] which describles how to apply
-        optimizer to subgroups.
-
-    Raises:
-      ValueError: If reduction is not a valid cross-shard reduction.
-    """
-    if reduction not in (losses.Reduction.SUM, losses.Reduction.MEAN):
-      raise ValueError("Unsupported reduction: %s." % reduction)
-
-    super(CrossShardOptimizer, self).__init__(False, name)
-    self._opt = opt
-    self._reduction = reduction
-    self._group_assignment = group_assignment
-
-  def _verify_and_get_subgroup_size(self, group_assignment, num_shards):
-    """Verify group_assignment and get the subgroup size".
-
-    Args:
-      group_assignment: list of group ids for applying the optimizer
-        to subgroups.
-      num_shards: The number of TPU shards.
-
-    Returns:
-      The size of one subgroup in group_assignment.
-
-    Raises:
-      ValueError: If group_assignment is invalid.
-    """
-    if not group_assignment:
-      return None
-    if not (isinstance(group_assignment, list) and
-            all(isinstance(i, list) for i in group_assignment)):
-      raise ValueError("group_assignment must be a list of list. Got {}".format(
-          group_assignment))
-
-    replica_ids = set()
-    for g in group_assignment:
-      for i in g:
-        replica_ids.add(i)
-
-    if set(range(num_shards)) != replica_ids:
-      raise ValueError("group_assignment must be a permutation of range({0})."
-                       " Got group_assignment={1}".format(
-                           num_shards, group_assignment))
-
-    subgroup_size_list = [len(group) for group in group_assignment]
-    if all(subgroup_size_list[0] == size for size in subgroup_size_list):
-      return subgroup_size_list[0]
-    else:
-      raise ValueError("The size of each subgroup in group_assignment must "
-                       "be equal. Got group_assignment={}".format(
-                           self._group_assignment))
-
-  def compute_gradients(self, loss, var_list=None, **kwargs):
-    """Compute gradients of "loss" for the variables in "var_list".
-
-    This simply wraps the compute_gradients() from the real optimizer. The
-    gradients will be aggregated in the apply_gradients() so that user can
-    modify the gradients like clipping with per replica global norm if needed.
-    The global norm with aggregated gradients can be bad as one replica's huge
-    gradients can hurt the gradients from other replicas.
-
-    Args:
-      loss: A Tensor containing the value to minimize.
-      var_list: Optional list or tuple of `tf.Variable` to update to minimize
-        `loss`.  Defaults to the list of variables collected in the graph
-        under the key `GraphKey.TRAINABLE_VARIABLES`.
-      **kwargs: Keyword arguments for compute_gradients().
-
-    Returns:
-      A list of (gradient, variable) pairs.
-
-    Raises:
-      ValueError: If not within a tpu_shard_context or group_assignment is
-        invalid.
-    """
-    num_shards = tpu_function.get_tpu_context().number_of_shards
-    if num_shards is None:
-      logging.warning(
-          "CrossShardOptimizer should be used within a tpu_shard_context, but "
-          "got unset number_of_shards. Assuming 1.")
-      num_shards = 1
-
-    subgroup_size = self._verify_and_get_subgroup_size(self._group_assignment,
-                                                       num_shards)
-
-    if num_shards > 1 and self._reduction == losses.Reduction.MEAN:
-      if self._group_assignment:
-        scale = 1.0 / subgroup_size
-      else:
-        scale = 1.0 / num_shards
-      loss *= scale
-
-    return self._opt.compute_gradients(loss, var_list=var_list, **kwargs)
-
-  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
-    """Apply gradients to variables.
-
-    Calls tpu_ops.cross_replica_sum() to sum gradient contributions across
-    replicas, and then applies the real optimizer.
-
-    Args:
-      grads_and_vars: List of (gradient, variable) pairs as returned by
-        compute_gradients().
-      global_step: Optional Variable to increment by one after the
-        variables have been updated.
-      name: Optional name for the returned operation.  Default to the
-        name passed to the Optimizer constructor.
-
-    Returns:
-      An `Operation` that applies the gradients. If `global_step` was not None,
-      that operation also increments `global_step`.
-
-    Raises:
-      ValueError: If the grads_and_vars is malformed.
-    """
-    summed_grads_and_vars = []
-    for (grad, var) in grads_and_vars:
-      if grad is None:
-        summed_grads_and_vars.append((grad, var))
-      else:
-        with ops.colocate_with(grad):
-          summed_grads_and_vars.append((tpu_ops.cross_replica_sum(
-              grad, self._group_assignment), var))
-    return self._opt.apply_gradients(summed_grads_and_vars, global_step, name)
-
-  def get_slot(self, *args, **kwargs):
-    """Return a slot named "name" created for "var" by the Optimizer.
-
-    This simply wraps the get_slot() from the actual optimizer.
-
-    Args:
-      *args: Arguments for get_slot().
-      **kwargs: Keyword arguments for get_slot().
-
-    Returns:
-      The `Variable` for the slot if it was created, `None` otherwise.
-    """
-    return self._opt.get_slot(*args, **kwargs)
-
-  def get_slot_names(self, *args, **kwargs):
-    """Return a list of the names of slots created by the `Optimizer`.
-
-    This simply wraps the get_slot_names() from the actual optimizer.
-
-    Args:
-      *args: Arguments for get_slot().
-      **kwargs: Keyword arguments for get_slot().
-
-    Returns:
-      A list of strings.
-    """
-    return self._opt.get_slot_names(*args, **kwargs)
-
-  def variables(self):
-    """Forwarding the variables from the underlying optimizer."""
-    return self._opt.variables()
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.tpu_optimizer import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_sharding.py b/tensorflow/contrib/tpu/python/tpu/tpu_sharding.py
index f5af03f..93c5233 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_sharding.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_sharding.py

@@ -1,253 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# =============================================================================
-
-"""Helper library for sharding during TPU compilation."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
-from tensorflow.python.framework import tensor_shape
-
-_DEFAULT_NUMBER_OF_SHARDS = 1
-_DEFAULT_SHARD_DIMENSION = 0
-
-
-# TODO(b/36777903) change other parts of tpu.py to use this class.
-class ShardingPolicy(object):
-  """An object use to hold the sharding policy for a Tensor.
-  """
-
-  def __init__(self):
-    self._number_of_shards = None
-    self._shard_dimension = None
-    self._frozen = False
-
-  def __str__(self):
-    if self.number_of_shards is None or self.shard_dimension is None:
-      return "ShardingPolicy(unset)"
-    else:
-      return ("ShardingPolicy(%d shards dimension %d)" %
-              (self.number_of_shards, self.shard_dimension))
-
-  def _fill_default_values(self):
-    if self._number_of_shards is None:
-      self._number_of_shards = _DEFAULT_NUMBER_OF_SHARDS
-    if self._shard_dimension is None:
-      self._shard_dimension = tensor_shape.as_dimension(
-          _DEFAULT_SHARD_DIMENSION)
-
-  def freeze(self):
-    """Prevents further modification to the sharding policy.
-
-    Any values that have not been set when freeze is called are set to
-    defaults. If the ShardingPolicy is already frozen, this is a NoOp.
-    """
-    if not self._frozen:
-      self._fill_default_values()
-      self._frozen = True
-
-  @property
-  def number_of_shards(self):
-    """Returns the number of shards in the policy or None if unspecified."""
-    return self._number_of_shards
-
-  def set_number_of_shards(self, number_of_shards):
-    """Sets the number of shards for the current policy.
-
-    If the policy has been frozen then number_of_shards must match the
-    existing setting.
-
-    Args:
-      number_of_shards: The number of shards to use in the policy.
-
-    Raises:
-      ValueError: If the policy has been frozen and number_of_shards
-        differs from the frozen value; or number_of_shards <= 0.
-    """
-    if self._frozen:
-      if self._number_of_shards != number_of_shards:
-        raise ValueError(
-            "Can't set sharding policy to use %d shards since it has been "
-            "frozen to use %d." % (number_of_shards, self._number_of_shards))
-    else:
-      if number_of_shards > 0:
-        self._number_of_shards = number_of_shards
-      else:
-        raise ValueError(
-            "Can't set sharding policy to use %s shards; value must be >0",
-            str(number_of_shards))
-
-  @property
-  def shard_dimension(self):
-    """Returns the shard dimension of the policy or None if unspecified."""
-    return self._shard_dimension
-
-  def set_shard_dimension(self, shard_dimension):
-    """Sets the shard dimension for the current policy.
-
-    If the policy has been frozen then shard_dimension must match the
-    existing setting.
-
-    Args:
-      shard_dimension: The shard dimension to use in the policy.
-
-    Raises:
-      ValueError: If the policy has been frozen and shard_dimension
-        differs from the frozen value, or shard_dimension can't be
-        interpreted as a Dimension.
-    """
-    if self._frozen:
-      if self._shard_dimension != shard_dimension:
-        raise ValueError(
-            "Can't set shard dimension to %d since it has been frozen to "
-            "use %d." % (shard_dimension, self._shard_dimension))
-    else:
-      self._shard_dimension = tensor_shape.as_dimension(shard_dimension)
-
-  def merge(self, other):
-    """Merges the policy of another policy into the current policy.
-
-    Args:
-      other: The policy to merge into this one.
-
-    Raises:
-      ValueError: If this policy has been frozen and the merge conflicts with
-      the frozen policy.
-    """
-    if other.number_of_shards is not None:
-      self.set_number_of_shards(other.number_of_shards)
-    if other.shard_dimension is not None:
-      self.set_shard_dimension(other.shard_dimension)
-
-  def get_sharded_shape(self, shape, shard_index=None):
-    """Returns the shape of a shard of a full Tensor.
-
-    When given the shape of a 'full-size' Tensor, returns the shape of
-    the sub-Tensor after it has been sharded. Freezes the policy if it
-    has not yet been frozen.
-
-    Args:
-      shape: The shape of the full-size Tensor to be sharded.
-      shard_index: The index of the shard whose shape should be returned.
-        shard_index can be None for sharding policies that use the same
-        shape for every shard.
-      freeze_config:
-
-    Returns:
-      The shape of the sharded version of the Tensor.
-
-    Raises:
-      ValueError: If shard_index is None when shards are of different
-        shapes; or shard_index is not None and
-        !(0<=shard_index<number_of_shards); or shape does not have at
-        least self.shard_dimension+1 dimensions; or the value of
-        shape's shard dimension is not a multiple of
-        self.number_of_shards
-    """
-    if self._shard_dimension is None or self._number_of_shards is None:
-      # Don't raise an error if the config is unset.
-      return None
-    if shard_index is not None:
-      if shard_index < 0 or shard_index >= self.number_of_shards:
-        raise ValueError("shard_index %d, but must be in [0,%d)." %
-                         (shard_index, self._number_of_shards))
-    shape = tensor_shape.as_shape(shape)
-    if self._number_of_shards == 1:
-      # Don't do anything when there's only one shard.
-      return shape
-    ndims = shape.ndims
-    if ndims is None:
-      raise ValueError("shape must be a specified shape not Unknown")
-    if ndims <= self._shard_dimension:
-      raise ValueError("shape %s does not contain shard_dimension %d" %
-                       (shape.as_list(), self._shard_dimension))
-    dims = shape.as_list()
-    if dims[self._shard_dimension] is None:
-      raise ValueError("shape %s must have a fixed size for dimension %d "
-                       "that is known at graph construction time." %
-                       (shape.as_list(), self._shard_dimension))
-    if (dims[self._shard_dimension] % self._number_of_shards) != 0:
-      raise ValueError("shape %s cannot be sharded %d ways along dimension %d" %
-                       (shape.as_list(), self._number_of_shards,
-                        self._shard_dimension))
-    dims[self._shard_dimension] /= self._number_of_shards
-    return tensor_shape.as_shape(dims)
-
-  def _unshard_shape(self, shape):
-    """Return the unsharded shape that would generate a given sharded shape.
-
-    Args:
-      shape: the sharded shape to unshard
-
-    Returns:
-      The unsharded shape.
-
-    Raises:
-      ValueError: if shape is unknown or does not contain
-        self.shard_dimension
-      TypeError: if shape is not convertible to a TensorShape
-    """
-    shape = tensor_shape.as_shape(shape)
-    if self._number_of_shards == 1:
-      # Don't do anything when there's only one shard.
-      return shape
-    ndims = shape.ndims
-    if ndims is None:
-      raise ValueError("shape must be a specified shape not Unknown")
-    if ndims <= self._shard_dimension:
-      raise ValueError("shape %s does not contain shard_dimension %d" %
-                       (shape.as_list(), self._shard_dimension))
-    dims = shape.as_list()
-    dims[self._shard_dimension] *= self._number_of_shards
-    return tensor_shape.as_shape(dims)
-
-  def get_unsharded_shape(self, shapes):
-    """Returns the shape of an unsharded Tensor given a list of shards.
-
-    When given a list of shapes of shards, returns the shape of the
-    unsharded Tensor that would generate the shards. Sets defaults for the
-    policy if number_of_shards or shard_dimension is None.
-
-    Args:
-      shapes: The shapes of the Tensor shards to be combined.
-
-    Returns:
-      The shape of the unsharded version of the Tensor.
-
-    Raises:
-      ValueError: if shapes is not a list of length
-        self.number_of_shards; or any element of shapes is not a valid
-        shape consistent with the sharding policy; or the list of
-        shapes is not a valid sharding of a full shape.
-      TypeError: if an element of shapes is not convertible to a
-        TensorShape
-    """
-    self._fill_default_values()
-    if len(shapes) != self.number_of_shards:
-      raise ValueError(
-          "shapes is %s but must be a list of length number_of_shards=%d" % (
-              str(shapes), self.number_of_shards))
-    unsharded_shapes = [self._unshard_shape(s) for s in shapes]
-    for i in xrange(self.number_of_shards - 1):
-      if not unsharded_shapes[i].is_compatible_with(
-          unsharded_shapes[self.number_of_shards - 1]):
-        raise ValueError(
-            "sharded shapes %s are not consistent shards of a full shape "
-            "sharded %d ways along dimension %d" % (
-                str(shapes), self.number_of_shards, self.shard_dimension))
-    return unsharded_shapes[0]
+# pylint: disable=wildcard-import,unused-import,redefined-builtin
+from tensorflow.python.tpu.tpu_sharding import *
+# pylint: enable=wildcard-import,unused-import,redefined-builtin

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_sharding_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_sharding_test.py
deleted file mode 100644
index b0a5511..0000000
--- a/tensorflow/contrib/tpu/python/tpu/tpu_sharding_test.py
+++ /dev/null

@@ -1,138 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-"""Tests for tpu_function helpers."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.tpu.python.tpu import tpu_sharding
-
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.platform import test
-
-
-class ShardingTest(test.TestCase):
-
-  def testFreeze(self):
-    """Tests that freezing a policy applies default values."""
-    p1 = tpu_sharding.ShardingPolicy()
-    p1.freeze()
-    self.assertEqual(p1.number_of_shards,
-                     tpu_sharding._DEFAULT_NUMBER_OF_SHARDS)
-    self.assertEqual(p1.shard_dimension, tpu_sharding._DEFAULT_SHARD_DIMENSION)
-    p2 = tpu_sharding.ShardingPolicy()
-    p2.set_number_of_shards(17)
-    p2.set_shard_dimension(23)
-    p2.freeze()
-    self.assertEqual(p2.number_of_shards, 17)
-    self.assertEqual(p2.shard_dimension, 23)
-
-  def testFrozen(self):
-    """Tests that frozen policies can't be changed."""
-    p1 = tpu_sharding.ShardingPolicy()
-    p1.freeze()
-    with self.assertRaises(ValueError):
-      p1.set_number_of_shards(17)
-    with self.assertRaises(ValueError):
-      p1.set_shard_dimension(22)
-
-  def testStr(self):
-    """Tests the string representation."""
-    p1 = tpu_sharding.ShardingPolicy()
-    self.assertEqual(str(p1), "ShardingPolicy(unset)")
-    p1.set_number_of_shards(17)
-    self.assertEqual(str(p1), "ShardingPolicy(unset)")
-    p1.set_shard_dimension(8)
-    self.assertEqual(str(p1), "ShardingPolicy(17 shards dimension 8)")
-
-  def testMerge(self):
-    """Tests that merging works."""
-    p1 = tpu_sharding.ShardingPolicy()
-    p1.set_number_of_shards(17)
-    p1.set_shard_dimension(23)
-    p2 = tpu_sharding.ShardingPolicy()
-    p2.merge(p1)
-    self.assertEqual(p2.number_of_shards, 17)
-    self.assertEqual(p2.shard_dimension, 23)
-    p1 = tpu_sharding.ShardingPolicy()
-    p1.set_shard_dimension(12)
-    p2.merge(p1)
-    self.assertEqual(p2.number_of_shards, 17)
-    self.assertEqual(p2.shard_dimension, 12)
-    p2.freeze()
-    p2.merge(p1)
-    self.assertEqual(p2.number_of_shards, 17)
-    self.assertEqual(p2.shard_dimension, 12)
-    p1.set_number_of_shards(1)
-    with self.assertRaises(ValueError):
-      p2.merge(p1)
-    p1 = tpu_sharding.ShardingPolicy()
-    p1.set_number_of_shards(17)
-    p2.merge(p1)
-    p1.set_shard_dimension(2)
-    with self.assertRaises(ValueError):
-      p2.merge(p1)
-
-  def testGetShardedShape(self):
-    """Tests getting a sharded shape."""
-    p = tpu_sharding.ShardingPolicy()
-    p.set_number_of_shards(3)
-    p.set_shard_dimension(1)
-    self.assertEqual(p.get_sharded_shape([4, 9]), [4, 3])
-    p.freeze()
-    with self.assertRaises(ValueError):
-      p.set_shard_dimension(0)
-    with self.assertRaises(ValueError):
-      _ = p.get_sharded_shape([4, 9], shard_index=4)
-    with self.assertRaises(ValueError):
-      _ = p.get_sharded_shape([4, 9], shard_index=-1)
-    with self.assertRaises(TypeError):
-      _ = p.get_sharded_shape("not_a_shape")
-    with self.assertRaises(ValueError):
-      _ = p.get_sharded_shape(tensor_shape.TensorShape(None))
-    with self.assertRaises(ValueError):
-      _ = p.get_sharded_shape([4, 10], shard_index=-1)
-
-  def testGetUnshardedShape(self):
-    """Tests getting an unsharded shape."""
-    p = tpu_sharding.ShardingPolicy()
-    p.set_number_of_shards(2)
-    p.set_shard_dimension(1)
-    self.assertEqual(p.get_unsharded_shape([[4, 3], [4, 3]]), [4, 6])
-    with self.assertRaises(ValueError):
-      _ = p.get_unsharded_shape([[4, 3]])
-    with self.assertRaises(ValueError):
-      _ = p.get_unsharded_shape([[4, 3], [4, 3], [4, 3]])
-    with self.assertRaises(ValueError):
-      _ = p.get_unsharded_shape([[4, 3], [4, 2]])
-    with self.assertRaises(TypeError):
-      _ = p.get_unsharded_shape([[4, 3], "not_a_shape"])
-    with self.assertRaises(ValueError):
-      _ = p.get_unsharded_shape([None, [4, 3]])
-    with self.assertRaises(ValueError):
-      _ = p.get_unsharded_shape([[2], [4, 3]])
-
-  def testScalar(self):
-    """Tests sharding and unsharding scalars."""
-    p = tpu_sharding.ShardingPolicy()
-    p.freeze()
-    self.assertEqual(p.get_sharded_shape([]), [])
-    self.assertEqual(p.get_unsharded_shape([[]]), [])
-
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py
index d66ecfc..258d34d 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py

@@ -1,156 +1,25 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ===================================================================
-"""TPU system metadata and associated tooling."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import re
-
-from tensorflow.contrib.tpu.python.tpu import tpu
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.client import session as session_lib
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import tf_logging as logging
-
-_PINGING_MASTER_TIMEOUT_IN_MS = 60 * 1000  # 1 min
-_RETRY_TIMES = 120
-_INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS = 300 * 1000  # 5 mins
-
-_TPU_DEVICE_REG = re.compile(r'.*task:(\d+)/.*device:TPU:(\d+)$')
-
-# _TPUSystemMetadata is used by TPUEstimator to hold TPU configuration,
-# including num_cores and num_hosts.
-_TPUSystemMetadata = collections.namedtuple('_TPUSystemMetadata', [
-    'num_cores',
-    'num_hosts',
-    'num_of_cores_per_host',
-    'topology',
-    'devices',
-])
-
-
-def _query_tpu_system_metadata(master_address, cluster_def=None,
-                               query_topology=False):
-  """Automatically detects the TPU system metadata in the system."""
-  tpu_core_count = 0
-  devices = []
-  device_dict = collections.defaultdict(list)
-
-  # TODO(b/120564445): Replace with standard library for retries.
-  retry_count = 1
-  while True:
-    logging.info('Querying Tensorflow master (%s) for TPU system metadata.',
-                 master_address)
-    try:
-      with ops.Graph().as_default():
-        with session_lib.Session(
-            master_address,
-            config=get_session_config_with_timeout(
-                _PINGING_MASTER_TIMEOUT_IN_MS,
-                cluster_def)) as sess:
-          devices = sess.list_devices()
-          for device in devices:
-            match = _TPU_DEVICE_REG.match(device.name)
-            if match:
-              host_id = match.group(1)
-              core_id = match.group(2)
-              device_dict[host_id].append(core_id)
-              tpu_core_count += 1
-          break
-    except errors.DeadlineExceededError:
-      msg = ('Failed to connect to the Tensorflow master. The TPU worker may '
-             'not be ready (still scheduling) or the Tensorflow master address '
-             'is incorrect: got (%s).' %
-             (master_address))
-
-      # TODO(xiejw): For local or grpc master we might not need retry logic
-      # here.
-      if retry_count <= _RETRY_TIMES:
-        logging.warning('%s', msg)
-        logging.warning('Retrying (%d/%d).', retry_count, _RETRY_TIMES)
-        retry_count += 1
-      else:
-        raise ValueError(msg)
-
-  num_of_cores_per_host = 0
-  if tpu_core_count:
-    num_cores_per_host_set = set(
-        [len(core_ids) for core_ids in device_dict.values()])
-    if len(num_cores_per_host_set) != 1:
-      raise RuntimeError(
-          'TPU cores on each host is not same. This should not happen!. '
-          'devices: {}'.format(devices))
-    num_of_cores_per_host = num_cores_per_host_set.pop()
-
-  topology = None
-  if query_topology:
-    if not tpu_core_count:
-      raise RuntimeError(
-          'Cannot find any TPU cores in the system (master address {}). '
-          'This usually means the master address is incorrect or the '
-          'TPU worker has some problems. Available devices: {}'.format(
-              master_address, devices))
-
-    topology = _obtain_topology(master_address, cluster_def)
-
-  metadata = _TPUSystemMetadata(
-      num_cores=tpu_core_count,
-      num_hosts=len(device_dict),
-      num_of_cores_per_host=num_of_cores_per_host,
-      topology=topology,
-      devices=devices)
-
-  if tpu_core_count:
-    logging.info('Found TPU system:')
-    logging.info('*** Num TPU Cores: %d', metadata.num_cores)
-    logging.info('*** Num TPU Workers: %d', metadata.num_hosts)
-    logging.info('*** Num TPU Cores Per Worker: %d',
-                 metadata.num_of_cores_per_host)
-    for device in metadata.devices:
-      logging.info('*** Available Device: %s', device)
-  else:
-    logging.info('Failed to find TPU: %s', metadata)
-  return metadata
-
-
-def _obtain_topology(master_address, cluster_def):
-  """Obtains TPU fabric topology."""
-  try:
-    logging.info('Initializing TPU system (master: %s) to fetch topology '
-                 'for model parallelism. This might take a while.',
-                 master_address)
-    with ops.Graph().as_default():
-      session_config = get_session_config_with_timeout(
-          _INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS, cluster_def)
-      with session_lib.Session(
-          master_address, config=session_config) as sess:
-        topology = sess.run(tpu.initialize_system())
-        return topology
-  except errors.DeadlineExceededError:
-    raise ValueError(
-        'Fail to initialize TPU system with master (%s). '
-        'Please double check the TPU system is functional.' % (
-            master_address))
-
-
-def get_session_config_with_timeout(timeout_in_secs, cluster_def):
-  """Returns a session given a timeout and a cluster configuration."""
-  config = config_pb2.ConfigProto(
-      operation_timeout_in_ms=timeout_in_secs, cluster_def=cluster_def)
-  return config
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.tpu_system_metadata import *
+# used by tests
+from tensorflow.python.tpu.tpu_system_metadata import _query_tpu_system_metadata
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_test.py
deleted file mode 100644
index 6bdaa52..0000000
--- a/tensorflow/contrib/tpu/python/tpu/tpu_test.py
+++ /dev/null

@@ -1,82 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-"""Tests for tpu_function helpers."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.tpu.python.tpu import tpu
-from tensorflow.contrib.tpu.python.tpu import tpu_feed
-from tensorflow.contrib.tpu.python.tpu import training_loop
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.layers import convolutional
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import control_flow_util
-from tensorflow.python.ops import math_ops
-
-from tensorflow.python.platform import test
-
-
-class TPUContextTest(test.TestCase):
-
-  def testIsInContext(self):
-    """Test that control_flow_util can check that we're in a TPU context."""
-    z1 = array_ops.identity(1)
-    pivot = control_flow_ops.no_op()
-    context = tpu.TPUReplicateContext(b"context", 1, pivot=pivot)
-    context.Enter()
-    z2 = array_ops.identity(1)
-    context.Exit()
-    self.assertFalse(control_flow_util.IsInXLAContext(z1.op))
-    self.assertTrue(control_flow_util.IsInXLAContext(z2.op))
-
-
-class TPULayerRewriteTest(test.TestCase):
-
-  def testUsingInfeedQueueWithRegularizer(self):
-    """Test that Layer regularizers can reference data created in loops."""
-
-    def make_regularizer(scale):
-      return lambda inputs: scale * math_ops.reduce_sum(math_ops.square(inputs))
-
-    def training_step(inputs, scale):
-      outputs = convolutional.conv2d(
-          inputs,
-          filters=16,
-          kernel_size=(3, 3),
-          data_format="channels_first",
-          kernel_regularizer=make_regularizer(scale))
-      loss = math_ops.reduce_mean(math_ops.square(outputs))
-      return loss.op
-
-    inputs = array_ops.zeros(shape=(128, 32, 32, 16))
-    scale = array_ops.ones(shape=())
-    infeed = tpu_feed.InfeedQueue(
-        tuple_types=[dtypes.float32, dtypes.float32],
-        tuple_shapes=[inputs.shape, scale.shape])
-
-    def loop():
-      return training_loop.repeat(5, training_step, infeed_queue=infeed)
-
-    # This should not throw an error.
-    tpu.rewrite(loop)
-
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/contrib/tpu/python/tpu/training_loop.py b/tensorflow/contrib/tpu/python/tpu/training_loop.py
index 0187b4b..673359b 100644
--- a/tensorflow/contrib/tpu/python/tpu/training_loop.py
+++ b/tensorflow/contrib/tpu/python/tpu/training_loop.py

@@ -1,214 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# =============================================================================
-
-"""Library for constructing a training loop, suitable for TPUs."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.compiler import xla
-from tensorflow.contrib.tpu.python.tpu import tpu_function
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-
-
-def while_loop(condition, body, inputs=None, infeed_queue=None, name=None):
-  """Builds a training loop for TPUs.
-
-  The set of loop-carried tensors corresponds to `inputs`.  Both
-  `condition` and `body` take the current value of the loop-carried
-  tensors. 'body' additionally takes a tuple of infeed from
-  infeed_queue if infeed_queue is not None. `condition` must return a
-  single boolean value that determines whether iteration
-  continues. `body` must return an updated list of values for the
-  loop-carried tensors.
-
-  Args:
-    condition: a Python function that builds the loop condition.
-    body: a Python function that builds the loop body.
-    inputs: a list of initial values passed into the training loop, or
-      None (equivalent to an empty list).
-    infeed_queue: if not None, the infeed queue from which to append a tuple
-      of arguments as inputs to condition.
-    name: (Deprecated) Does nothing.
-
-  Returns:
-    The final values of the loop-carried tensors.
-
-  Raises:
-    TypeError: if body or condition has the wrong signature.
-  """
-  del name
-  # Converts inputs to Tensors.
-  inputs = [] if inputs is None else [ops.convert_to_tensor(x) for
-                                      x in inputs]
-  input_types = [x.dtype for x in inputs]
-  input_arity = len(inputs)
-
-  body_arg_error = xla.check_function_argument_count(
-      body, input_arity, infeed_queue)
-  if body_arg_error is not None:
-    if infeed_queue is None:
-      raise TypeError(
-          "Supplied loop body function cannot be called with the specified "
-          "inputs. You specified %d inputs: %s, but the loop body needs %s" % (
-              input_arity, str([i.name for i in inputs]), body_arg_error))
-    else:
-      raise TypeError(
-          "Supplied loop body function cannot be called with the specified "
-          "inputs. You specified %d inputs: %s and %d additional inputs from "
-          "infeed, but the computation needs %s" % (input_arity, str(
-              [i.name for i in inputs]), infeed_queue.number_of_tuple_elements,
-                                                    body_arg_error))
-  condition_arg_error = xla.check_function_argument_count(
-      condition, input_arity, None)
-  if condition_arg_error is not None:
-    if infeed_queue is None:
-      raise TypeError(
-          "Supplied loop condition function cannot be called with the "
-          "specified inputs. You specified %d inputs: %s, but the loop "
-          "condition needs %s" % (input_arity, str([i.name for i in inputs]),
-                                  condition_arg_error))
-    else:
-      raise TypeError(
-          "Supplied loop condition function cannot be called with the "
-          "specified inputs. You specified %d inputs: %s, but the loop "
-          "condition needs %s. Note that infeed is not passed to the loop "
-          "condition." % (input_arity, str([i.name for i in inputs]),
-                          condition_arg_error))
-
-  def condition_wrapper(*inputs):
-    # Discards the dummy output added for arity-0 loops.
-    if input_arity == 0:
-      inputs = []
-    return condition(*inputs)
-
-  def body_wrapper(*inputs):
-    """Wrapper around `body` that handles infeed queues and control deps."""
-    inputs = list(inputs)
-
-    # Discards the dummy output added for arity-0 loops.
-    if input_arity == 0:
-      inputs = []
-
-    # Runs `body` with the dequeue_ops appended.
-    if infeed_queue:
-      number_of_shards = tpu_function.get_tpu_context().number_of_shards
-      if number_of_shards is None:
-        raise ValueError("Can't build training loop with infeed when there is "
-                         "no tpu_shard_context. Are you building a loop or "
-                         "graph directly rather than from inside tpu.rewrite, "
-                         "tpu.batch_parallel, tpu.shard, or tpu.replicate?")
-      infeed_queue.set_number_of_shards(number_of_shards)
-      dequeue_ops = [d for d in infeed_queue.generate_dequeue_op()]
-    else:
-      dequeue_ops = []
-    outputs = body(*(inputs + dequeue_ops))
-
-    # If the computation only returned one value, make it a tuple.
-    if not isinstance(outputs, (list, tuple)):
-      outputs = (outputs,)
-
-    outputs = [
-        o if isinstance(o, ops.Operation) else ops.convert_to_tensor(o)
-        for o in outputs
-    ]
-
-    # Separates the returned Operations and Tensors.
-    output_operations = [o for o in outputs if isinstance(o, ops.Operation)]
-    output_tensors = [o for o in outputs
-                      if not isinstance(o, ops.Operation)]
-
-    if outputs != output_tensors + output_operations:
-      raise ValueError(
-          "TPU training loop body must return zero or more Tensor values "
-          "followed by zero or more Operations.")
-
-    output_types = [op.dtype for op in output_tensors]
-    if input_types != output_types:
-      raise TypeError(
-          "Mismatch between input types and output types for training loop "
-          "body: {} vs {}".format(input_types, output_types))
-
-    # Add the dequeue operations to output_operations to ensure they are run
-    # by the loop, even if the programmer's loop body does not use them.
-    output_operations += dequeue_ops
-
-    # Add a dummy output, if needed.
-    if not output_tensors:
-      output_tensors = array_ops.constant(0)
-
-    if output_operations:
-      # TODO(phawkins): in principle this is too restrictive since it serializes
-      # the training loop steps. In practice it does not matter since this loop
-      # will be compiled by XLA.
-      return control_flow_ops.tuple(output_tensors,
-                                    control_inputs=output_operations)
-    else:
-      return output_tensors
-
-  # If the body has arity 0, add a dummy loop-carried value to which we can add
-  # control dependencies from any side-effecting operations.
-  if input_arity == 0:
-    inputs = [array_ops.constant(0)]
-  return control_flow_ops.while_loop(
-      condition_wrapper, body_wrapper, inputs, name="", parallel_iterations=1)
-
-
-def repeat(n, body, inputs=None, infeed_queue=None, name=None):
-  """Builds a training loop that executes a fixed number of iterations.
-
-  The set of loop-carried tensors correspond to `inputs`.
-  `body` must be a function that takes and returns the values of the
-  loop-carried tensors.
-
-  Args:
-    n: the number of loop iterations
-    body: a Python function that builds the loop body.
-    inputs: a list of initial values passed into the training loop or
-      None (equivalent to an empty list).
-    infeed_queue: if not None, the infeed queue from which to append a tuple
-      of arguments as inputs to condition.
-    name: (Deprecated) Does nothing.
-  Returns:
-    The final values of the loop-carried tensors.
-  Raises:
-    ValueError: if there is a type error.
-  """
-  def _convert_to_list(xs):
-    if not isinstance(xs, (list, tuple)):
-      return [xs]
-    else:
-      return list(xs)
-
-  def cond(i, *args):
-    del args
-    return i < n
-
-  def body_wrapper(i, *args):
-    return [i + 1] + _convert_to_list(body(*args))
-
-  inputs = [0] if inputs is None else [0] + _convert_to_list(inputs)
-  outputs = while_loop(
-      cond, body_wrapper, inputs=inputs, infeed_queue=infeed_queue, name=name)
-  outputs = _convert_to_list(outputs)
-  if len(outputs) == 1:
-    # Returns the Op rather than an empty list.
-    return outputs[0].op
-  else:
-    return outputs[1:]
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.training_loop import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/python/tpu/util.py b/tensorflow/contrib/tpu/python/tpu/util.py
index dfb8ce1..8d9b70d 100644
--- a/tensorflow/contrib/tpu/python/tpu/util.py
+++ b/tensorflow/contrib/tpu/python/tpu/util.py

@@ -1,51 +1,23 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ===================================================================
-
-"""Utilities for the functionalities."""
+# ==============================================================================
+"""Stub file to maintain backwards compatibility."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import time
-import six
-
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import training
-
-def check_positive_integer(value, name):
-  """Checks whether `value` is a positive integer."""
-  if not isinstance(value, six.integer_types):
-    raise TypeError('{} must be int, got {}'.format(name, type(value)))
-
-  if value <= 0:
-    raise ValueError('{} must be positive, got {}'.format(name, value))
-
-
-# TODO(b/118302029) Remove this copy of MultiHostDatasetInitializerHook after we
-# release a tensorflow_estimator with MultiHostDatasetInitializerHook in
-# python/estimator/util.py.
-class MultiHostDatasetInitializerHook(training.SessionRunHook):
-  """Creates a SessionRunHook that initializes all passed iterators."""
-
-  def __init__(self, dataset_initializers):
-    self._initializers = dataset_initializers
-
-  def after_create_session(self, session, coord):
-    del coord
-    start = time.time()
-    session.run(self._initializers)
-    logging.info('Initialized dataset iterators in %d seconds',
-                 time.time() - start)
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.tpu.util import *
+# pylint: enable=wildcard-import,unused-import

diff --git a/tensorflow/contrib/tpu/utils/BUILD b/tensorflow/contrib/tpu/utils/BUILD
deleted file mode 100644
index c27b737..0000000
--- a/tensorflow/contrib/tpu/utils/BUILD
+++ /dev/null

@@ -1,30 +0,0 @@
-# Description: Utilities for TPU Operations
-
-licenses(["notice"])  # Apache 2.0
-
-cc_library(
-    name = "tpu_embedding_optimization_parameters_utils",
-    srcs = ["tpu_embedding_optimization_parameters_utils.cc"],
-    hdrs = ["tpu_embedding_optimization_parameters_utils.h"],
-    visibility = ["//visibility:public"],
-    deps = [
-        "//tensorflow/contrib/tpu/proto:optimization_parameters_proto_cc",
-        "//tensorflow/core:framework_headers_lib",
-        "//tensorflow/core:lib_proto_parsing",
-        "@com_google_absl//absl/base",
-    ],
-)
-
-cc_library(
-    name = "tpu_embedding_output_layout_utils",
-    srcs = ["tpu_embedding_output_layout_utils.cc"],
-    hdrs = ["tpu_embedding_output_layout_utils.h"],
-    visibility = ["//visibility:public"],
-    deps = [
-        "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_cc",
-        "//tensorflow/contrib/tpu/proto:tpu_embedding_output_layout_proto_cc",
-        "//tensorflow/core:framework_headers_lib",
-        "//tensorflow/core:lib_proto_parsing",
-        "//tensorflow/core:protos_all_cc",
-    ],
-)

diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.cc b/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.cc
deleted file mode 100644
index d98e0b7..0000000
--- a/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.cc
+++ /dev/null

@@ -1,264 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace tensorflow {
-namespace tpu {
-
-string GetOptimizationAlgorithmName(OptimizationAlgorithm alg) {
-  switch (alg) {
-    case OptimizationAlgorithm::kAdagrad:
-      return "Adagrad";
-    case OptimizationAlgorithm::kStochasticGradientDescent:
-      return "StochasticGradientDescent";
-    case OptimizationAlgorithm::kFtrl:
-      return "FTRL";
-    case OptimizationAlgorithm::kAdam:
-      return "ADAM";
-    case OptimizationAlgorithm::kMomentum:
-      return "Momentum";
-    case OptimizationAlgorithm::kRmsProp:
-      return "RMSProp";
-    case OptimizationAlgorithm::kCenteredRmsProp:
-      return "CenteredRMSProp";
-    case OptimizationAlgorithm::kMdlAdagradLight:
-      return "MDLAdagradLight";
-    case OptimizationAlgorithm::kAdadelta:
-      return "Adadelta";
-    case OptimizationAlgorithm::kProximalAdagrad:
-      return "ProximalAdagrad";
-    case OptimizationAlgorithm::PARAMETERS_NOT_SET:
-      return "*** Not set ***";
-  }
-}
-
-string GetOptimizationAlgorithmFriendlyName(OptimizationAlgorithm alg) {
-  switch (alg) {
-    case OptimizationAlgorithm::kAdagrad:
-      return "Adagrad";
-    case OptimizationAlgorithm::kStochasticGradientDescent:
-      return "stochastic gradient descent";
-    case OptimizationAlgorithm::kFtrl:
-      return "FTRL";
-    case OptimizationAlgorithm::kAdam:
-      return "ADAM";
-    case OptimizationAlgorithm::kMomentum:
-      return "Momentum";
-    case OptimizationAlgorithm::kRmsProp:
-      return "RMSProp";
-    case OptimizationAlgorithm::kCenteredRmsProp:
-      return "centered RMSProp";
-    case OptimizationAlgorithm::kMdlAdagradLight:
-      return "MDL Adagrad Light";
-    case OptimizationAlgorithm::kAdadelta:
-      return "Adadelta";
-    case OptimizationAlgorithm::kProximalAdagrad:
-      return "proximal Adagrad";
-    case OptimizationAlgorithm::PARAMETERS_NOT_SET:
-      return "unknown (not specified)";
-  }
-}
-
-// Returns the number of optimization parameter vectors used by the optimization
-// algorithm, excluding the weights themselves and assuming no gradient
-// accumulation.
-Status GetBaseAuxiliaryParameterCount(OptimizationAlgorithm alg, int* count) {
-  switch (alg) {
-    case OptimizationAlgorithm::kAdagrad:
-      *count = 1;
-      return Status::OK();
-    case OptimizationAlgorithm::kStochasticGradientDescent:
-      *count = 0;
-      return Status::OK();
-    case OptimizationAlgorithm::kFtrl:
-      *count = 2;
-      return Status::OK();
-    case OptimizationAlgorithm::kAdam:
-      *count = 2;
-      return Status::OK();
-    case OptimizationAlgorithm::kMomentum:
-      *count = 1;
-      return Status::OK();
-    case OptimizationAlgorithm::kRmsProp:
-      *count = 2;
-      return Status::OK();
-    case OptimizationAlgorithm::kCenteredRmsProp:
-      *count = 3;
-      return Status::OK();
-    case OptimizationAlgorithm::kMdlAdagradLight:
-      *count = 3;
-      return Status::OK();
-    case OptimizationAlgorithm::kAdadelta:
-      *count = 2;
-      return Status::OK();
-    case OptimizationAlgorithm::kProximalAdagrad:
-      *count = 1;
-      return Status::OK();
-    case OptimizationAlgorithm::PARAMETERS_NOT_SET:
-      return errors::InvalidArgument("No optimization algorithm specified");
-  }
-}
-
-Status GetGradientAccumulationSupport(OptimizationAlgorithm alg,
-                                      GradientAccumulationSupport* support) {
-  switch (alg) {
-    case OptimizationAlgorithm::kAdagrad:
-      *support = GradientAccumulationSupport::kSupported;
-      return Status::OK();
-    case OptimizationAlgorithm::kStochasticGradientDescent:
-      *support = GradientAccumulationSupport::kUnnecessary;
-      return Status::OK();
-    default: {
-      int auxiliary_parameter_count;
-      TF_RETURN_IF_ERROR(
-          GetBaseAuxiliaryParameterCount(alg, &auxiliary_parameter_count));
-      *support = auxiliary_parameter_count + 1 <= kMaxAuxiliaryParameterCount
-                     ? GradientAccumulationSupport::kSupported
-                     : GradientAccumulationSupport::kNotSupported;
-      return Status::OK();
-    }
-  }
-}
-namespace {
-// Make a normal state variable specification. Please refer to
-// //third_party/tensorflow/contrib/tpu/proto/optimization_parameters.proto
-// (StateVariableSpecification message) for instructions on how to set the
-// padding_initial_value field.
-StateVariableSpecification MakeStandardStateVariableSpecification(
-    const string& name, double padding_initial_value) {
-  StateVariableSpecification result;
-  result.set_name(name);
-  result.mutable_user_defined()->set_padding_initial_value(
-      padding_initial_value);
-  return result;
-}
-}  // namespace
-
-Status GetOptimizationAlgorithmStateVariables(
-    OptimizationAlgorithm alg, bool use_gradient_accumulation,
-    std::vector<StateVariableSpecification>* state_variables) {
-  // The first parameter set is always the weights themselves.
-  state_variables->push_back(
-      MakeStandardStateVariableSpecification("parameters", 0.0));
-  // The order of the returned parameters needs to match the offsets used by
-  // the algorithm implementations in test_util.cc and
-  // address_handler_program_creator.cc.
-  switch (alg) {
-    case OptimizationAlgorithm::kAdagrad: {
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("accumulators", 0.1));
-      break;
-    }
-    case OptimizationAlgorithm::kStochasticGradientDescent: {
-      // None.
-      break;
-    }
-    case OptimizationAlgorithm::kFtrl: {
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("accumulators", 0.1));
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("linears", 0.0));
-      break;
-    }
-    case OptimizationAlgorithm::kAdam: {
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("momenta", 0.0));
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("velocities", 0.0));
-      break;
-    }
-    case OptimizationAlgorithm::kMomentum: {
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("momenta", 0.0));
-      break;
-    }
-    case OptimizationAlgorithm::kRmsProp: {
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("ms", 1.0));
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("mom", 0.0));
-      break;
-    }
-    case OptimizationAlgorithm::kCenteredRmsProp: {
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("ms", 1.0));
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("mom", 0.0));
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("mg", 0.0));
-      break;
-    }
-    case OptimizationAlgorithm::kMdlAdagradLight: {
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("accumulators", 0.1));
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("weights", 0.0));
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("benefits", 0.0));
-      break;
-    }
-    case OptimizationAlgorithm::kAdadelta: {
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("accumulators", 0.0));
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("updates", 0.0));
-      break;
-    }
-    case OptimizationAlgorithm::kProximalAdagrad: {
-      state_variables->push_back(
-          MakeStandardStateVariableSpecification("accumulators", 0.1));
-      break;
-    }
-    case OptimizationAlgorithm::PARAMETERS_NOT_SET: {
-      return errors::InvalidArgument("No optimization algorithm specified");
-    }
-  }
-  // This needs to be last so that the save/restore ops do not need to know
-  // about gradient accumulation.
-  if (use_gradient_accumulation) {
-    StateVariableSpecification gradient_acc;
-    gradient_acc.set_name("gradient_accumulators");
-    gradient_acc.mutable_fill_with_constant()->set_initial_value(
-        kGradientAccumulatorInitialValue);
-    state_variables->push_back(std::move(gradient_acc));
-  }
-  if (state_variables->size() > kMaxAuxiliaryParameterCount + 1) {
-    return errors::InvalidArgument(
-        "Optimization algorithm", GetOptimizationAlgorithmName(alg),
-        "does not support gradient accumulation because it "
-        "already has too many other accumulators");
-  }
-  return Status::OK();
-}  // namespace tpu
-
-std::vector<OptimizationAlgorithm> GetOptimizationAlgorithms() {
-  return {
-      OptimizationAlgorithm::kAdagrad,
-      OptimizationAlgorithm::kStochasticGradientDescent,
-      OptimizationAlgorithm::kFtrl,
-      OptimizationAlgorithm::kAdam,
-      OptimizationAlgorithm::kMomentum,
-      OptimizationAlgorithm::kRmsProp,
-      OptimizationAlgorithm::kCenteredRmsProp,
-      OptimizationAlgorithm::kMdlAdagradLight,
-      OptimizationAlgorithm::kAdadelta,
-      OptimizationAlgorithm::kProximalAdagrad,
-  };
-}
-
-}  // namespace tpu
-}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h b/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h
deleted file mode 100644
index 81d5026..0000000
--- a/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h
+++ /dev/null

@@ -1,90 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_
-#define TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_
-
-#include <string>
-#include "absl/base/casts.h"
-#include "tensorflow/contrib/tpu/proto/optimization_parameters.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-
-namespace tensorflow {
-namespace tpu {
-
-using OptimizationAlgorithm = OptimizationParameters::ParametersCase;
-
-// Returns the name of the optimization algorithm.
-string GetOptimizationAlgorithmName(OptimizationAlgorithm alg);
-
-// Returns a user-friendly name for the optimization algorithm.
-string GetOptimizationAlgorithmFriendlyName(OptimizationAlgorithm alg);
-
-// Returns all supported optimization algorithms.
-std::vector<OptimizationAlgorithm> GetOptimizationAlgorithms();
-
-enum class GradientAccumulationSupport {
-  // Accumulation cannot be used with this optimizer.
-  kNotSupported,
-
-  // Accumulation is unnecessary because optimizer application is commutative.
-  kUnnecessary,
-
-  // Accumulation is allowed and changes optimizer behavior.
-  kSupported,
-};
-
-// Returns the number of optimization parameter vectors used by the optimization
-// algorithm, excluding the weights themselves and assuming no gradient
-// accumulation.
-Status GetBaseAuxiliaryParameterCount(OptimizationAlgorithm alg, int *count);
-
-// Returns whether (and how) an optimization algorithm supports gradient
-// accumulation.
-Status GetGradientAccumulationSupport(OptimizationAlgorithm alg,
-                                      GradientAccumulationSupport *support);
-
-// Returns the parameter specifications for the optimization algorithm (the main
-// parameters first, followed by any auxiliary parameters such as Adagrad
-// accumulators).
-Status GetOptimizationAlgorithmStateVariables(
-    OptimizationAlgorithm alg, bool use_gradient_accumulation,
-    std::vector<StateVariableSpecification> *state_variables);
-
-// Maximum value of auxiliar_parameter_count for any optimization algorithm.
-static constexpr int kMaxAuxiliaryParameterCount = 3;
-
-// Fill value for gradient accumulators. This is a denormal so that it will be
-// flushed to zero on the current TPU platforms and needs to continue to have
-// the following properties in the future:
-//
-// 1. Does not have the same bit pattern as a zero and can be distinguished from
-// it using integer operations.
-// 2. Treated as zero by floating-point arithmetic operations (at least addition
-// and subtraction).
-// 3. Cannot be produced by any floating-point arithmetic operation, including
-// those involving itself.
-//
-// It does not need to compare equal or not equal to zero in floating point. We
-// need to use a non-zero value here because some optimization algorithms are
-// not no-ops on zero gradients, so we need to distinguish an accumulated
-// gradient of zero from one that has been cleared after its gradients have
-// already been applied to the parameters and accumulators.
-const float kGradientAccumulatorInitialValue = absl::bit_cast<float, uint32>(1);
-
-}  // namespace tpu
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_

diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.cc b/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.cc
deleted file mode 100644
index 8480ec4..0000000
--- a/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.cc
+++ /dev/null

@@ -1,98 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h"
-#include "tensorflow/contrib/tpu/proto/tpu_embedding_output_layout.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace tensorflow {
-namespace tpu {
-
-void AddDefaultEmbeddingOutputLayoutIfNeeded(
-    TPUEmbeddingConfiguration* config) {
-  if (config->has_output_layout()) {
-    // Model or previous step has already filled this in.
-    return;
-  }
-
-  TPUEmbeddingOutputLayout* layout = config->mutable_output_layout();
-  // Create output tensors.
-  for (const auto& table : config->table_descriptor()) {
-    TPUEmbeddingOutputLayout::EmbeddingOutputTensor* output =
-        layout->add_output();
-    TPUEmbeddingOutputLayout::TwoDOutputTensor* two_d = output->mutable_two_d();
-    two_d->set_dim1_size(table.dimension());
-    two_d->set_dim0_size_per_sample(table.num_features());
-  }
-
-  // Create table output locations.
-  for (int table_id = 0; table_id < config->table_descriptor_size();
-       ++table_id) {
-    TPUEmbeddingOutputLayout::TableDescriptor* output_table =
-        layout->add_table();
-    const auto& table = config->table_descriptor(table_id);
-    for (int feature_index = 0; feature_index < table.num_features();
-         ++feature_index) {
-      TPUEmbeddingOutputLayout::FeatureDescriptor* output_feature =
-          output_table->add_feature();
-      TPUEmbeddingOutputLayout::OutputLocation* output_location =
-          output_feature->add_output_location();
-      output_location->set_tensor_index(table_id);
-      output_location->set_dim0_offset(feature_index);
-      output_location->set_dim1_offset(0);
-    }
-  }
-}
-
-Status ComputeOutputTensorShapes(const TPUEmbeddingConfiguration& config,
-                                 std::vector<TensorShapeProto>* shapes) {
-  if (!config.has_output_layout()) {
-    return errors::InvalidArgument(
-        "TPUEmbeddingConfiguration is missing output layout.");
-  }
-  const TPUEmbeddingOutputLayout& layout = config.output_layout();
-  int batch_size = config.batch_size_per_tensor_core();
-
-  for (int i = 0; i < layout.output_size(); ++i) {
-    const auto& output = layout.output(i);
-    TensorShapeProto shape;
-    switch (output.output_format_case()) {
-      case TPUEmbeddingOutputLayout::EmbeddingOutputTensor::OutputFormatCase::
-          kTwoD: {
-        auto* dim0 = shape.add_dim();
-        dim0->set_size(output.two_d().dim0_size_per_sample() * batch_size);
-        auto* dim1 = shape.add_dim();
-        dim1->set_size(output.two_d().dim1_size());
-        break;
-      }
-      case TPUEmbeddingOutputLayout::EmbeddingOutputTensor::OutputFormatCase::
-          OUTPUT_FORMAT_NOT_SET: {
-        return errors::InvalidArgument(
-            "Output layout in TPUEmbeddingConfiguration has unset embedding "
-            "output tensor format.");
-      }
-      default: {
-        return errors::InvalidArgument(
-            "Output layout in TPUEmbeddingConfiguration has invalid or "
-            "unhandled embedding output tensor format.");
-      }
-    }
-    shapes->push_back(shape);
-  }
-  return Status::OK();
-}
-
-}  // namespace tpu
-}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h b/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h
deleted file mode 100644
index c10fbee..0000000
--- a/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h
+++ /dev/null

@@ -1,38 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_
-#define TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_
-
-#include "tensorflow/contrib/tpu/proto/tpu_embedding_configuration.pb.h"
-#include "tensorflow/core/framework/tensor_shape.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-
-namespace tensorflow {
-namespace tpu {
-
-// Creates a default output layout for compatibility if none was provided by the
-// model.
-void AddDefaultEmbeddingOutputLayoutIfNeeded(TPUEmbeddingConfiguration* config);
-
-// Computes the shape of the output tensors from an output layout.
-Status ComputeOutputTensorShapes(
-    const TPUEmbeddingConfiguration& config,
-    std::vector<tensorflow::TensorShapeProto>* shapes);
-
-}  // namespace tpu
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_

diff --git a/tensorflow/contrib/util/BUILD b/tensorflow/contrib/util/BUILD
index 07dbd5c..ada08f9 100644
--- a/tensorflow/contrib/util/BUILD
+++ b/tensorflow/contrib/util/BUILD

@@ -22,7 +22,9 @@
         "//tensorflow/core:functional_ops_op_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:no_op_op_lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:sendrecv_ops_op_lib",
         "//tensorflow/core:tensorflow",
         "//tensorflow/core/kernels:immutable_constant_op",
     ],

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 4cf37a6..906e869 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD

@@ -128,7 +128,6 @@
     "tf_additional_libdevice_srcs",
     "tf_additional_minimal_lib_srcs",
     "tf_additional_mpi_lib_defines",
-    "tf_additional_proto_compiler_hdrs",
     "tf_additional_proto_hdrs",
     "tf_additional_proto_srcs",
     "tf_additional_test_deps",
@@ -147,6 +146,7 @@
     "tf_protos_grappler",
     "tf_protos_grappler_impl",
     "tf_pyclif_proto_library",
+    "tf_grpc_service_all",
 )
 load(
     "//tensorflow/core:platform/default/build_config_root.bzl",
@@ -229,7 +229,7 @@
 # ones with individual proto_library targets.
 ADDITIONAL_CORE_PROTO_SRCS = [
     "example/example_parser_configuration.proto",
-    "protobuf/checkpointable_object_graph.proto",
+    "protobuf/trackable_object_graph.proto",
     "protobuf/control_flow.proto",
     # TODO(ebrevdo): Re-enable once CriticalSection is in core.
     # "protobuf/critical_section.proto",
@@ -418,9 +418,8 @@
     name = "platform_protobuf",
     srcs = tf_platform_hdrs([
         "protobuf.h",
-    ]) + tf_platform_srcs([
-        "protobuf.cc",
     ]) + [
+        "platform/protobuf.cc",
         "platform/protobuf_util.cc",
         "lib/core/status.h",
     ],
@@ -440,6 +439,17 @@
 )
 
 cc_library(
+    name = "grpc_services",
+    srcs = [],
+    hdrs = [
+        "platform/grpc_services.h",
+    ],
+    copts = tf_copts(),
+    visibility = ["//visibility:public"],
+    deps = tf_grpc_service_all(),
+)
+
+cc_library(
     name = "human_readable_json",
     srcs = tf_platform_srcs(["human_readable_json.cc"]),
     hdrs = ["platform/human_readable_json.h"],
@@ -457,10 +467,7 @@
     hdrs = ["platform/logger.h"],
     copts = tf_copts(),
     visibility = ["//visibility:public"],
-    deps = [
-        ":lib_proto_parsing",
-        "@protobuf_archive//:protobuf",
-    ],
+    deps = [":lib_proto_parsing"],
 )
 
 filegroup(
@@ -667,7 +674,7 @@
     name = "lib_proto_compiler",
     hdrs = [
         "platform/protobuf_compiler.h",
-    ] + tf_additional_proto_compiler_hdrs(),
+    ],
     copts = tf_copts(),
     deps = tf_lib_proto_compiler_deps() + [
         ":lib_proto_parsing",
@@ -1052,13 +1059,13 @@
         "platform/default/integral_types.h",
         "platform/default/logging.h",
         "platform/default/mutex.h",
-        "platform/default/protobuf.h",
         "platform/default/thread_annotations.h",
         "platform/dynamic_annotations.h",
         "platform/macros.h",
         "platform/mutex.h",
         "platform/platform.h",
         "platform/prefetch.h",
+        "platform/protobuf.h",
         "platform/thread_annotations.h",
         "platform/types.h",
         "platform/cpu_info.h",
@@ -1146,6 +1153,13 @@
 
 tf_gen_op_libs(
     op_lib_names = [
+        "mkl_array_ops",
+    ],
+    deps = [":protos_all_cc"],
+)
+
+tf_gen_op_libs(
+    op_lib_names = [
         "audio_ops",
     ],
     deps = [":lib"],
@@ -1164,6 +1178,29 @@
     deps = [":lib"],
 )
 
+tf_gen_op_libs(
+    op_lib_names = [
+        "tpu_configuration_ops",
+        "tpu_cross_replica_ops",
+        "tpu_embedding_ops",
+        "tpu_functional_ops",
+        "tpu_heartbeat_ops",
+        "tpu_host_compute_ops",
+        "tpu_infeed_ops",
+        "tpu_outfeed_ops",
+        "tpu_ordinal_selector_ops",
+        "tpu_replication_ops",
+    ],
+    deps = [
+        ":lib",
+        ":lib_proto_parsing",
+        ":protos_all_cc",
+        "//tensorflow/core/protobuf/tpu:tpu_embedding_configuration_proto_cc",
+        "//tensorflow/core/tpu:tpu_embedding_optimization_parameters_utils",
+        "//tensorflow/core/tpu:tpu_embedding_output_layout_utils",
+    ],
+)
+
 # And one for all user ops
 cc_library(
     name = "user_ops_op_lib",
@@ -1280,10 +1317,23 @@
         ":state_ops_op_lib",
         ":stateless_random_ops_op_lib",
         ":string_ops_op_lib",
+        ":tpu_configuration_ops_op_lib",
+        ":tpu_cross_replica_ops_op_lib",
+        ":tpu_embedding_ops_op_lib",
+        ":tpu_functional_ops_op_lib",
+        ":tpu_heartbeat_ops_op_lib",
+        ":tpu_host_compute_ops_op_lib",
+        ":tpu_infeed_ops_op_lib",
+        ":tpu_outfeed_ops_op_lib",
+        ":tpu_ordinal_selector_ops_op_lib",
+        ":tpu_replication_ops_op_lib",
         ":training_ops_op_lib",
         ":user_ops_op_lib",
         ":word2vec_ops",
-    ] + if_mkl([":mkl_nn_ops_op_lib"]) + tf_additional_cloud_op_deps(),
+    ] + if_mkl([
+        ":mkl_array_ops_op_lib",
+        ":mkl_nn_ops_op_lib",
+    ]) + tf_additional_cloud_op_deps(),
     alwayslink = 1,
 )
 
@@ -1385,9 +1435,8 @@
 # This includes implementations of all kernels built into TensorFlow.
 cc_library(
     name = "all_kernels_impl",
-    visibility = ["//visibility:private"],
+    visibility = ["//tensorflow/core:__subpackages__"],
     deps = [
-        "//tensorflow/c/kernels:bitcast_op",
         "//tensorflow/core/kernels:array",
         "//tensorflow/core/kernels:audio",
         "//tensorflow/core/kernels:batch_kernels",
@@ -1456,6 +1505,7 @@
         "//tensorflow/core/kernels:mkl_identity_op",
         "//tensorflow/core/kernels:mkl_input_conversion_op",
         "//tensorflow/core/kernels:mkl_lrn_op",
+        "//tensorflow/core/kernels:mkl_requantize_ops",
         "//tensorflow/core/kernels:mkl_pooling_ops",
         "//tensorflow/core/kernels:mkl_relu_op",
         "//tensorflow/core/kernels:mkl_reshape_op",
@@ -1544,6 +1594,7 @@
         ":framework_internal",
         ":lib",
         ":lib_internal",
+        ":ops",
         ":protos_all_cc",
         ":shape_inference_testutil",
         ":tensor_testutil",
@@ -1890,6 +1941,7 @@
             "**/*testutil*",
             "**/*testlib*",
             "**/*main.cc",
+            "**/tpu_*",
         ],
     ),
     visibility = ["//visibility:public"],
@@ -2275,6 +2327,7 @@
             "platform/**/logging.cc",
             "platform/**/human_readable_json.cc",
             "platform/abi.cc",
+            "platform/protobuf.cc",
         ],
     ) + tf_additional_lib_srcs(
         exclude = [
@@ -2951,6 +3004,7 @@
     "common_runtime/lower_if_while.h",
     "common_runtime/lower_while_op.h",
     "common_runtime/memory_types.h",
+    "common_runtime/metrics.h",
     "common_runtime/mkl_cpu_allocator.h",
     "common_runtime/optimization_registry.h",
     "common_runtime/pending_counts.h",
@@ -2962,6 +3016,8 @@
     "common_runtime/rendezvous_mgr.h",
     "common_runtime/rendezvous_util.h",
     "common_runtime/ring_reducer.h",
+    "common_runtime/ring_alg.h",
+    "common_runtime/ring_gatherer.h",
     "common_runtime/session_factory.h",
     "common_runtime/single_threaded_cpu_device.h",
     "common_runtime/stats_publisher_interface.h",
@@ -2986,6 +3042,8 @@
         "common_runtime/collective_param_resolver_local.cc",
         "common_runtime/collective_rma_local.cc",
         "common_runtime/collective_util.cc",
+        "common_runtime/colocation_graph.cc",
+        "common_runtime/colocation_graph.h",
         "common_runtime/constant_folding.cc",
         "common_runtime/copy_tensor.cc",
         "common_runtime/costmodel_manager.cc",
@@ -3006,6 +3064,7 @@
         "common_runtime/lower_if_while.cc",
         "common_runtime/lower_while_op.cc",
         "common_runtime/memory_types.cc",
+        "common_runtime/metrics.cc",
         "common_runtime/mkl_cpu_allocator.cc",
         "common_runtime/optimization_registry.cc",
         "common_runtime/parallel_concat_optimizer.cc",
@@ -3018,6 +3077,8 @@
         "common_runtime/renamed_device.cc",
         "common_runtime/rendezvous_mgr.cc",
         "common_runtime/rendezvous_util.cc",
+        "common_runtime/ring_alg.cc",
+        "common_runtime/ring_gatherer.cc",
         "common_runtime/ring_reducer.cc",
         "common_runtime/session.cc",
         "common_runtime/session_factory.cc",
@@ -3076,7 +3137,6 @@
         ":framework",
         ":graph",
         ":lib",
-        ":metrics",
         ":proto_text",
         ":protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
@@ -3108,15 +3168,6 @@
 )
 
 tf_cuda_library(
-    name = "metrics",
-    srcs = ["common_runtime/metrics.cc"],
-    hdrs = ["common_runtime/metrics.h"],
-    deps = [
-        ":lib",
-    ],
-)
-
-tf_cuda_library(
     name = "direct_session_internal",
     srcs = ["common_runtime/direct_session.cc"],
     hdrs = [
@@ -3132,7 +3183,6 @@
         ":graph",
         ":lib",
         ":lib_internal",
-        ":metrics",
         ":proto_text",
         ":protos_all_cc",
         "//tensorflow/core/debug:debug_graph_utils",
@@ -3499,6 +3549,7 @@
         "platform/vmodule_benchmark_test.cc",
     ],
     deps = [
+        ":core_cpu_internal",
         ":lib",
         ":lib_internal",
         ":lib_test_internal",
@@ -3929,7 +3980,6 @@
         "ops/cudnn_rnn_ops_test.cc",
     ],
     deps = [
-        ":cudnn_rnn_ops",
         "//tensorflow/core",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -3990,6 +4040,35 @@
 )
 
 tf_cc_tests_gpu(
+    name = "ring_gatherer_test",
+    size = "medium",
+    srcs = [
+        "common_runtime/ring_gatherer_test.cc",
+    ],
+    linkstatic = tf_kernel_tests_linkstatic(),
+    tags = tf_cuda_tests_tags(),
+    deps = [
+        ":all_kernels",
+        ":core",
+        ":core_cpu",
+        ":core_cpu_internal",
+        ":direct_session_internal",
+        ":framework",
+        ":framework_internal",
+        ":gpu_runtime",
+        ":lib",
+        ":lib_internal",
+        ":ops",
+        ":protos_all_cc",
+        ":protos_test_cc",
+        ":test",
+        ":test_main",
+        ":testlib",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
+tf_cc_tests_gpu(
     name = "hierarchical_tree_broadcaster_test",
     size = "medium",
     srcs = [
@@ -4527,7 +4606,7 @@
         "//tensorflow/cc:scope",
         "//tensorflow/core/kernels:cwise_op",
         "//third_party/eigen3",
-    ],
+    ] + if_mkl([":mkl_array_ops_op_lib"]),
 )
 
 tf_cc_test(

diff --git a/tensorflow/core/api_def/base_api/api_def_AllToAll.pbtxt b/tensorflow/core/api_def/base_api/api_def_AllToAll.pbtxt
new file mode 100644
index 0000000..d6f28bd
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_AllToAll.pbtxt

@@ -0,0 +1,67 @@
+op {
+  graph_op_name: "AllToAll"
+  in_arg {
+    name: "input"
+    description: <<END
+The local input to the sum.
+END
+  }
+  in_arg {
+    name: "group_assignment"
+    description: <<END
+An int32 tensor with shape
+[num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
+replica ids in the ith subgroup.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+The exchanged result.
+END
+  }
+  attr {
+    name: "T"
+    description: <<END
+The type of elements to be exchanged.
+END
+  }
+  attr {
+    name: "concat_dimension"
+    description: <<END
+The dimension number to concatenate.
+END
+  }
+  attr {
+    name: "split_dimension"
+    description: <<END
+The dimension number to split.
+END
+  }
+  attr {
+    name: "split_count"
+    description: <<END
+The number of splits, this number must equal to the sub-group
+size(group_assignment.get_shape()[1])
+END
+  }
+  summary: "An Op to exchange data across TPU replicas."
+  description: <<END
+On each replica, the input is split into `split_count` blocks along
+`split_dimension` and send to the other replicas given group_assignment. After
+receiving `split_count` - 1 blocks from other replicas, we concatenate the
+blocks along `concat_dimension` as the output.
+
+For example, suppose there are 2 TPU replicas:
+replica 0 receives input: `[[A, B]]`
+replica 1 receives input: `[[C, D]]`
+
+group_assignment=`[[0, 1]]`
+concat_dimension=0
+split_dimension=1
+split_count=2
+
+replica 0's output: `[[A], [C]]`
+replica 1's output: `[[B], [D]]`
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_Case.pbtxt b/tensorflow/core/api_def/base_api/api_def_Case.pbtxt
new file mode 100644
index 0000000..56fef3a
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_Case.pbtxt

@@ -0,0 +1,43 @@
+op {
+  graph_op_name: "Case"
+  in_arg {
+    name: "branch_index"
+    description: "The branch selector, an int32 Tensor."
+  }
+  in_arg {
+    name: "input"
+    description: "A list of input tensors passed to the branch function."
+  }
+  out_arg {
+    name: "output"
+    description: "A list of return values."
+  }
+  attr { name: "Tin"  description: "A list of input types." }
+  attr { name: "Tout"  description: "A list of output types." }
+  attr {
+    name: "branches"
+    description: <<END
+      A list of functions each of which takes 'inputs' and returns a list of
+      tensors, whose types are the same as what every other branch returns.
+END
+  }
+  summary: "An n-way switch statement which calls a single branch function."
+  description: <<END
+    An n-way switch statement, implementing the following:
+    ```
+    switch (branch_index) {
+      case 0:
+        output = branches[0](input);
+        break;
+      case 1:
+        output = branches[1](input);
+        break;
+      ...
+      case [[nbranches-1]]:
+      default:
+        output = branches[nbranches-1](input);
+        break;
+    }
+    ```
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_CollectiveGather.pbtxt b/tensorflow/core/api_def/base_api/api_def_CollectiveGather.pbtxt
new file mode 100644
index 0000000..3cd833b
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_CollectiveGather.pbtxt

@@ -0,0 +1,5 @@
+op {
+  graph_op_name: "CollectiveGather"
+  summary: "Mutually accumulates multiple tensors of identical type and shape."
+  visibility: HIDDEN
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_CollectivePermute.pbtxt b/tensorflow/core/api_def/base_api/api_def_CollectivePermute.pbtxt
new file mode 100644
index 0000000..aec724a
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_CollectivePermute.pbtxt

@@ -0,0 +1,36 @@
+op {
+  graph_op_name: "CollectivePermute"
+  in_arg {
+    name: "input"
+    description: <<END
+The local input to be permuted. Currently only supports float and
+bfloat16.
+END
+  }
+  in_arg {
+    name: "source_target_pairs"
+    description: <<END
+A tensor with shape [num_pairs, 2].
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+The permuted input.
+END
+  }
+  attr {
+    name: "T"
+    description: <<END
+The type of elements to be exchanged.
+END
+  }
+  summary: "An Op to permute tensors across replicated TPU instances."
+  description: <<END
+Each instance supplies its own input.
+
+For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing
+source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs:
+`[D, A, B, C]`.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_ConfigureDistributedTPU.pbtxt b/tensorflow/core/api_def/base_api/api_def_ConfigureDistributedTPU.pbtxt
new file mode 100644
index 0000000..a710f60
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ConfigureDistributedTPU.pbtxt

@@ -0,0 +1,30 @@
+op {
+  graph_op_name: "ConfigureDistributedTPU"
+  out_arg {
+    name: "topology"
+    description: <<END
+A serialized tensorflow.tpu.TopologyProto that describes the TPU
+topology.
+END
+  }
+  attr {
+    name: "embedding_config"
+    description: <<END
+Reserved. Do not use.
+END
+  }
+  attr {
+    name: "tpu_embedding_config"
+    description: <<END
+Serialized tensorflow.tpu.TPUEmbeddingConfiguration that
+describes the embedding lookups of the program.
+END
+  }
+  attr {
+    name: "is_global_init"
+    description: <<END
+Reserved. Do not use.
+END
+  }
+  summary: "Sets up the centralized structures for a distributed TPU system."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_CrossReplicaSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_CrossReplicaSum.pbtxt
new file mode 100644
index 0000000..fd4c343
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_CrossReplicaSum.pbtxt

@@ -0,0 +1,38 @@
+op {
+  graph_op_name: "CrossReplicaSum"
+  in_arg {
+    name: "input"
+    description: <<END
+The local input to the sum.
+END
+  }
+  in_arg {
+    name: "group_assignment"
+    description: <<END
+An int32 tensor with shape
+[num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
+replica ids in the ith subgroup.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+The sum of all the distributed inputs.
+END
+  }
+  attr {
+    name: "T"
+    description: <<END
+The type of elements to be summed.
+END
+  }
+  summary: "An Op to sum inputs across replicated TPU instances."
+  description: <<END
+Each instance supplies its own input.
+
+For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`.
+Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0,
+and `B, D, F, H` as group 1. Thus we get the outputs:
+`[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingIntegerBatch.pbtxt b/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingIntegerBatch.pbtxt
new file mode 100644
index 0000000..0317c4e
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingIntegerBatch.pbtxt

@@ -0,0 +1,27 @@
+op {
+  graph_op_name: "EnqueueTPUEmbeddingIntegerBatch"
+  in_arg {
+    name: "batch"
+    description: <<END
+A list of 1D tensors, one for each embedding table, containing the
+indices into the tables.
+END
+  }
+  in_arg {
+    name: "mode_override"
+    description: <<END
+A string input that overrides the mode specified in the
+TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+END
+  }
+  attr {
+    name: "device_ordinal"
+    description: <<END
+The TPU device to use. Should be >= 0 and less than the number
+of TPU cores in the task on which the node is placed.
+END
+  }
+  summary: "An op that enqueues a list of input batch tensors to TPUEmbedding."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseBatch.pbtxt b/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseBatch.pbtxt
new file mode 100644
index 0000000..bb476ce
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseBatch.pbtxt

@@ -0,0 +1,65 @@
+op {
+  graph_op_name: "EnqueueTPUEmbeddingSparseBatch"
+  in_arg {
+    name: "sample_indices"
+    description: <<END
+A list of rank 1 Tensors specifying the training example and
+feature to which the corresponding embedding_indices and aggregation_weights
+values belong. sample_indices[i] must equal b * nf + f, where nf is the
+number of features from the corresponding table, f is in [0, nf), and
+b is in [0, batch size).
+END
+  }
+  in_arg {
+    name: "embedding_indices"
+    description: <<END
+A list of rank 1 Tensors, indices into the embedding tables.
+END
+  }
+  in_arg {
+    name: "aggregation_weights"
+    description: <<END
+A list of rank 1 Tensors containing per sample -- i.e. per
+(training example, feature) -- aggregation weights.
+END
+  }
+  in_arg {
+    name: "mode_override"
+    description: <<END
+A string input that overrides the mode specified in the
+TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+END
+  }
+  attr {
+    name: "device_ordinal"
+    description: <<END
+The TPU device to use. Should be >= 0 and less than the number
+of TPU cores in the task on which the node is placed.
+END
+  }
+  attr {
+    name: "combiners"
+    description: <<END
+A list of string scalars, one for each embedding table that specify
+how to normalize the embedding activations after weighted summation.
+Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
+the sum of the weights be 0 for 'mean' or the sum of the squared weights be
+0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
+all tables.
+END
+  }
+  summary: "An op that enqueues TPUEmbedding input indices from a SparseTensor."
+  description: <<END
+This Op eases the porting of code that uses embedding_lookup_sparse(),
+although some Python preprocessing of the SparseTensor arguments to
+embedding_lookup_sparse() is required to produce the arguments to this Op,
+since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training
+step.
+
+The tensors at corresponding positions in the three input lists
+must have the same shape, i.e. rank 1 with dim_size() equal to the total
+number of lookups into the table described by the corresponding table_id.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseTensorBatch.pbtxt b/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseTensorBatch.pbtxt
new file mode 100644
index 0000000..8cef870
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseTensorBatch.pbtxt

@@ -0,0 +1,74 @@
+op {
+  graph_op_name: "EnqueueTPUEmbeddingSparseTensorBatch"
+  in_arg {
+    name: "sample_indices"
+    description: <<END
+A list of rank 1 Tensors specifying the training example to
+which the corresponding embedding_indices and aggregation_weights values
+belong. It corresponds to sp_ids.indices[:,0] in  embedding_lookup_sparse().
+END
+  }
+  in_arg {
+    name: "embedding_indices"
+    description: <<END
+A list of rank 1 Tensors, indices into the embedding tables.
+It corresponds to sp_ids.values in embedding_lookup_sparse().
+END
+  }
+  in_arg {
+    name: "aggregation_weights"
+    description: <<END
+A list of rank 1 Tensors containing per training example
+aggregation weights. It corresponds to sp_weights.values in
+embedding_lookup_sparse().
+END
+  }
+  in_arg {
+    name: "mode_override"
+    description: <<END
+A string input that overrides the mode specified in the
+TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+END
+  }
+  attr {
+    name: "device_ordinal"
+    description: <<END
+The TPU device to use. Should be >= 0 and less than the number
+of TPU cores in the task on which the node is placed.
+END
+  }
+  attr {
+    name: "combiners"
+    description: <<END
+A list of string scalars, one for each embedding table that specify
+how to normalize the embedding activations after weighted summation.
+Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
+the sum of the weights be 0 for 'mean' or the sum of the squared weights be
+0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
+all tables.
+END
+  }
+  attr {
+    name: "table_ids"
+    description: <<END
+A list of integers specifying the identifier of the embedding table
+(offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
+corresponding input. The ith input is looked up using table_ids[i]. The size
+of the table_ids list must be equal to that of sample_indices,
+embedding_indices and aggregation_weights.
+END
+  }
+  summary: "Eases the porting of code that uses tf.nn.embedding_lookup_sparse()."
+  description: <<END
+sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
+to the ith feature. table_ids[i] indicates which embedding table to look up ith
+feature.
+
+The tensors at corresponding positions in the three input lists (sample_indices,
+embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
+with dim_size() equal to the total number of lookups into the table described by
+the corresponding feature.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_EuclideanNorm.pbtxt b/tensorflow/core/api_def/base_api/api_def_EuclideanNorm.pbtxt
new file mode 100644
index 0000000..7d815b8
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_EuclideanNorm.pbtxt

@@ -0,0 +1,39 @@
+op {
+  graph_op_name: "EuclideanNorm"
+  endpoint {
+    name: "EuclideanNorm"
+  }
+  in_arg {
+    name: "input"
+    description: <<END
+The tensor to reduce.
+END
+  }
+  in_arg {
+    name: "reduction_indices"
+    rename_to: "axis"
+    description: <<END
+The dimensions to reduce. Must be in the range
+`[-rank(input), rank(input))`.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+The reduced tensor.
+END
+  }
+  attr {
+    name: "keep_dims"
+    description: <<END
+If true, retain reduced dimensions with length 1.
+END
+  }
+  summary: "Computes the euclidean norm of elements across dimensions of a tensor."
+  description: <<END
+Reduces `input` along the dimensions given in `reduction_indices`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_InfeedDequeue.pbtxt b/tensorflow/core/api_def/base_api/api_def_InfeedDequeue.pbtxt
new file mode 100644
index 0000000..99ca55a
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_InfeedDequeue.pbtxt

@@ -0,0 +1,22 @@
+op {
+  graph_op_name: "InfeedDequeue"
+  out_arg {
+    name: "output"
+    description: <<END
+A tensor that will be provided using the infeed mechanism.
+END
+  }
+  attr {
+    name: "dtype"
+    description: <<END
+The type of elements in the tensor.
+END
+  }
+  attr {
+    name: "shape"
+    description: <<END
+The shape of the tensor.
+END
+  }
+  summary: "A placeholder op for a value that will be fed into the computation."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_InfeedDequeueTuple.pbtxt b/tensorflow/core/api_def/base_api/api_def_InfeedDequeueTuple.pbtxt
new file mode 100644
index 0000000..61b6ded
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_InfeedDequeueTuple.pbtxt

@@ -0,0 +1,22 @@
+op {
+  graph_op_name: "InfeedDequeueTuple"
+  out_arg {
+    name: "outputs"
+    description: <<END
+A list of tensors that will be provided using the infeed mechanism.
+END
+  }
+  attr {
+    name: "dtypes"
+    description: <<END
+The element types of each element in `outputs`.
+END
+  }
+  attr {
+    name: "shapes"
+    description: <<END
+The shapes of each tensor in `outputs`.
+END
+  }
+  summary: "Fetches multiple values from infeed as an XLA tuple."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_InfeedEnqueue.pbtxt b/tensorflow/core/api_def/base_api/api_def_InfeedEnqueue.pbtxt
new file mode 100644
index 0000000..e08f4e6
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_InfeedEnqueue.pbtxt

@@ -0,0 +1,38 @@
+op {
+  graph_op_name: "InfeedEnqueue"
+  in_arg {
+    name: "input"
+    description: <<END
+A tensor that will be provided using the infeed mechanism.
+END
+  }
+  attr {
+    name: "dtype"
+    description: <<END
+The type of elements in the tensor.
+END
+  }
+  attr {
+    name: "shape"
+    description: <<END
+The shape of the tensor.
+END
+  }
+  attr {
+    name: "layout"
+    description: <<END
+A vector holding the requested layout in minor-to-major sequence.
+If a layout attribute is passed, but its values are all -1, the layout will
+be computed by the infeed operation.
+END
+  }
+  attr {
+    name: "device_ordinal"
+    description: <<END
+The TPU device to use. This should be -1 when the Op
+is running on a TPU device, and >= 0 when the Op is running on the CPU
+device.
+END
+  }
+  summary: "An op which feeds a single Tensor value into the computation."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_InfeedEnqueueTuple.pbtxt b/tensorflow/core/api_def/base_api/api_def_InfeedEnqueueTuple.pbtxt
new file mode 100644
index 0000000..f87d6d7
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_InfeedEnqueueTuple.pbtxt

@@ -0,0 +1,39 @@
+op {
+  graph_op_name: "InfeedEnqueueTuple"
+  in_arg {
+    name: "inputs"
+    description: <<END
+A list of tensors that will be provided using the infeed mechanism.
+END
+  }
+  attr {
+    name: "dtypes"
+    description: <<END
+The element types of each element in `inputs`.
+END
+  }
+  attr {
+    name: "shapes"
+    description: <<END
+The shapes of each tensor in `inputs`.
+END
+  }
+  attr {
+    name: "layouts"
+    description: <<END
+A vector holding the requested layout in minor-to-major sequence for
+all the tuple shapes, in the order the shapes appear in the "shapes" input.
+The layout elements for a sub-shape can be set to -1, in which case the
+corresponding layout will be computed by the infeed operation.
+END
+  }
+  attr {
+    name: "device_ordinal"
+    description: <<END
+The TPU device to use. This should be -1 when the Op
+is running on a TPU device, and >= 0 when the Op is running on the CPU
+device.
+END
+  }
+  summary: "Feeds multiple Tensor values into the computation as an XLA tuple."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParameters.pbtxt
new file mode 100644
index 0000000..43901e1
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParameters.pbtxt

@@ -0,0 +1,29 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingADAMParameters"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the ADAM optimization algorithm.
+END
+  }
+  in_arg {
+    name: "momenta"
+    description: <<END
+Value of momenta used in the ADAM optimization algorithm.
+END
+  }
+  in_arg {
+    name: "velocities"
+    description: <<END
+Value of velocities used in the ADAM optimization algorithm.
+END
+  }
+  summary: "Load ADAM embedding parameters."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..aaa52f6
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParametersGradAccumDebug.pbtxt

@@ -0,0 +1,35 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingADAMParametersGradAccumDebug"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the ADAM optimization algorithm.
+END
+  }
+  in_arg {
+    name: "momenta"
+    description: <<END
+Value of momenta used in the ADAM optimization algorithm.
+END
+  }
+  in_arg {
+    name: "velocities"
+    description: <<END
+Value of velocities used in the ADAM optimization algorithm.
+END
+  }
+  in_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Value of gradient_accumulators used in the ADAM optimization algorithm.
+END
+  }
+  summary: "Load ADAM embedding parameters with debug support."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdadeltaParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdadeltaParameters.pbtxt
new file mode 100644
index 0000000..e306329
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdadeltaParameters.pbtxt

@@ -0,0 +1,29 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingAdadeltaParameters"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the Adadelta optimization algorithm.
+END
+  }
+  in_arg {
+    name: "accumulators"
+    description: <<END
+Value of accumulators used in the Adadelta optimization algorithm.
+END
+  }
+  in_arg {
+    name: "updates"
+    description: <<END
+Value of updates used in the Adadelta optimization algorithm.
+END
+  }
+  summary: "Load Adadelta embedding parameters."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdadeltaParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdadeltaParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..dd2e3b9
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdadeltaParametersGradAccumDebug.pbtxt

@@ -0,0 +1,35 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the Adadelta optimization algorithm.
+END
+  }
+  in_arg {
+    name: "accumulators"
+    description: <<END
+Value of accumulators used in the Adadelta optimization algorithm.
+END
+  }
+  in_arg {
+    name: "updates"
+    description: <<END
+Value of updates used in the Adadelta optimization algorithm.
+END
+  }
+  in_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Value of gradient_accumulators used in the Adadelta optimization algorithm.
+END
+  }
+  summary: "Load Adadelta parameters with debug support."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdagradParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdagradParameters.pbtxt
new file mode 100644
index 0000000..94db7df
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdagradParameters.pbtxt

@@ -0,0 +1,23 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingAdagradParameters"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the Adagrad optimization algorithm.
+END
+  }
+  in_arg {
+    name: "accumulators"
+    description: <<END
+Value of accumulators used in the Adagrad optimization algorithm.
+END
+  }
+  summary: "Load Adagrad embedding parameters."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdagradParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdagradParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..e2d1044
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdagradParametersGradAccumDebug.pbtxt

@@ -0,0 +1,29 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingAdagradParametersGradAccumDebug"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the Adagrad optimization algorithm.
+END
+  }
+  in_arg {
+    name: "accumulators"
+    description: <<END
+Value of accumulators used in the Adagrad optimization algorithm.
+END
+  }
+  in_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Value of gradient_accumulators used in the Adagrad optimization algorithm.
+END
+  }
+  summary: "Load Adagrad embedding parameters with debug support."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingCenteredRMSPropParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingCenteredRMSPropParameters.pbtxt
new file mode 100644
index 0000000..43cf884
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingCenteredRMSPropParameters.pbtxt

@@ -0,0 +1,35 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingCenteredRMSPropParameters"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the centered RMSProp optimization algorithm.
+END
+  }
+  in_arg {
+    name: "ms"
+    description: <<END
+Value of ms used in the centered RMSProp optimization algorithm.
+END
+  }
+  in_arg {
+    name: "mom"
+    description: <<END
+Value of mom used in the centered RMSProp optimization algorithm.
+END
+  }
+  in_arg {
+    name: "mg"
+    description: <<END
+Value of mg used in the centered RMSProp optimization algorithm.
+END
+  }
+  summary: "Load centered RMSProp embedding parameters."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingFTRLParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingFTRLParameters.pbtxt
new file mode 100644
index 0000000..d351688
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingFTRLParameters.pbtxt

@@ -0,0 +1,29 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingFTRLParameters"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the FTRL optimization algorithm.
+END
+  }
+  in_arg {
+    name: "accumulators"
+    description: <<END
+Value of accumulators used in the FTRL optimization algorithm.
+END
+  }
+  in_arg {
+    name: "linears"
+    description: <<END
+Value of linears used in the FTRL optimization algorithm.
+END
+  }
+  summary: "Load FTRL embedding parameters."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingFTRLParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingFTRLParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..d6c80f6
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingFTRLParametersGradAccumDebug.pbtxt

@@ -0,0 +1,35 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingFTRLParametersGradAccumDebug"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the FTRL optimization algorithm.
+END
+  }
+  in_arg {
+    name: "accumulators"
+    description: <<END
+Value of accumulators used in the FTRL optimization algorithm.
+END
+  }
+  in_arg {
+    name: "linears"
+    description: <<END
+Value of linears used in the FTRL optimization algorithm.
+END
+  }
+  in_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Value of gradient_accumulators used in the FTRL optimization algorithm.
+END
+  }
+  summary: "Load FTRL embedding parameters with debug support."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMDLAdagradLightParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMDLAdagradLightParameters.pbtxt
new file mode 100644
index 0000000..24b3343
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMDLAdagradLightParameters.pbtxt

@@ -0,0 +1,35 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingMDLAdagradLightParameters"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the MDL Adagrad Light optimization algorithm.
+END
+  }
+  in_arg {
+    name: "accumulators"
+    description: <<END
+Value of accumulators used in the MDL Adagrad Light optimization algorithm.
+END
+  }
+  in_arg {
+    name: "weights"
+    description: <<END
+Value of weights used in the MDL Adagrad Light optimization algorithm.
+END
+  }
+  in_arg {
+    name: "benefits"
+    description: <<END
+Value of benefits used in the MDL Adagrad Light optimization algorithm.
+END
+  }
+  summary: "Load MDL Adagrad Light embedding parameters."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMomentumParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMomentumParameters.pbtxt
new file mode 100644
index 0000000..e98956e
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMomentumParameters.pbtxt

@@ -0,0 +1,23 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingMomentumParameters"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the Momentum optimization algorithm.
+END
+  }
+  in_arg {
+    name: "momenta"
+    description: <<END
+Value of momenta used in the Momentum optimization algorithm.
+END
+  }
+  summary: "Load Momentum embedding parameters."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMomentumParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMomentumParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..fff35ad
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMomentumParametersGradAccumDebug.pbtxt

@@ -0,0 +1,29 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingMomentumParametersGradAccumDebug"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the Momentum optimization algorithm.
+END
+  }
+  in_arg {
+    name: "momenta"
+    description: <<END
+Value of momenta used in the Momentum optimization algorithm.
+END
+  }
+  in_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Value of gradient_accumulators used in the Momentum optimization algorithm.
+END
+  }
+  summary: "Load Momentum embedding parameters with debug support."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingProximalAdagradParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingProximalAdagradParameters.pbtxt
new file mode 100644
index 0000000..da049a7
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingProximalAdagradParameters.pbtxt

@@ -0,0 +1,23 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingProximalAdagradParameters"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the proximal Adagrad optimization algorithm.
+END
+  }
+  in_arg {
+    name: "accumulators"
+    description: <<END
+Value of accumulators used in the proximal Adagrad optimization algorithm.
+END
+  }
+  summary: "Load proximal Adagrad embedding parameters."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..5e0f275
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug.pbtxt

@@ -0,0 +1,29 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the proximal Adagrad optimization algorithm.
+END
+  }
+  in_arg {
+    name: "accumulators"
+    description: <<END
+Value of accumulators used in the proximal Adagrad optimization algorithm.
+END
+  }
+  in_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Value of gradient_accumulators used in the proximal Adagrad optimization algorithm.
+END
+  }
+  summary: "Load proximal Adagrad embedding parameters with debug support."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingRMSPropParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingRMSPropParameters.pbtxt
new file mode 100644
index 0000000..e79a3a4
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingRMSPropParameters.pbtxt

@@ -0,0 +1,29 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingRMSPropParameters"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the RMSProp optimization algorithm.
+END
+  }
+  in_arg {
+    name: "ms"
+    description: <<END
+Value of ms used in the RMSProp optimization algorithm.
+END
+  }
+  in_arg {
+    name: "mom"
+    description: <<END
+Value of mom used in the RMSProp optimization algorithm.
+END
+  }
+  summary: "Load RMSProp embedding parameters."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingRMSPropParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingRMSPropParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..233e5af
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingRMSPropParametersGradAccumDebug.pbtxt

@@ -0,0 +1,35 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the RMSProp optimization algorithm.
+END
+  }
+  in_arg {
+    name: "ms"
+    description: <<END
+Value of ms used in the RMSProp optimization algorithm.
+END
+  }
+  in_arg {
+    name: "mom"
+    description: <<END
+Value of mom used in the RMSProp optimization algorithm.
+END
+  }
+  in_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Value of gradient_accumulators used in the RMSProp optimization algorithm.
+END
+  }
+  summary: "Load RMSProp embedding parameters with debug support."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingStochasticGradientDescentParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingStochasticGradientDescentParameters.pbtxt
new file mode 100644
index 0000000..37d0dcc
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingStochasticGradientDescentParameters.pbtxt

@@ -0,0 +1,17 @@
+op {
+  graph_op_name: "LoadTPUEmbeddingStochasticGradientDescentParameters"
+  in_arg {
+    name: "parameters"
+    description: <<END
+Value of parameters used in the stochastic gradient descent optimization algorithm.
+END
+  }
+  summary: "Load SGD embedding parameters."
+  description: <<END
+An op that loads optimization parameters into HBM for embedding. Must be
+preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+embedding table configuration. For example, this op is used to install
+parameters that are loaded from a checkpoint before a training loop is
+executed.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_OutfeedDequeue.pbtxt b/tensorflow/core/api_def/base_api/api_def_OutfeedDequeue.pbtxt
new file mode 100644
index 0000000..f262bdc
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_OutfeedDequeue.pbtxt

@@ -0,0 +1,33 @@
+op {
+  graph_op_name: "OutfeedDequeue"
+  out_arg {
+    name: "output"
+    description: <<END
+A tensor that will be read from the device outfeed.
+END
+  }
+  attr {
+    name: "dtype"
+    description: <<END
+The type of elements in the tensor.
+END
+  }
+  attr {
+    name: "shape"
+    description: <<END
+The shape of the tensor.
+END
+  }
+  attr {
+    name: "device_ordinal"
+    description: <<END
+The TPU device to use. This should be -1 when the Op
+is running on a TPU device, and >= 0 when the Op is running on the CPU
+device.
+END
+  }
+  summary: "Retrieves a single tensor from the computation outfeed."
+  description: <<END
+This operation will block indefinitely until data is available.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_OutfeedDequeueTuple.pbtxt b/tensorflow/core/api_def/base_api/api_def_OutfeedDequeueTuple.pbtxt
new file mode 100644
index 0000000..457e495
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_OutfeedDequeueTuple.pbtxt

@@ -0,0 +1,34 @@
+op {
+  graph_op_name: "OutfeedDequeueTuple"
+  out_arg {
+    name: "outputs"
+    description: <<END
+A list of tensors that will be read from the outfeed.
+END
+  }
+  attr {
+    name: "dtypes"
+    description: <<END
+The element types of each element in `outputs`.
+END
+  }
+  attr {
+    name: "shapes"
+    description: <<END
+The shapes of each tensor in `outputs`.
+END
+  }
+  attr {
+    name: "device_ordinal"
+    description: <<END
+The TPU device to use. This should be -1 when the Op
+is running on a TPU device, and >= 0 when the Op is running on the CPU
+device.
+END
+  }
+  summary: "Retrieve multiple values from the computation outfeed."
+  description: <<END
+This operation will block indefinitely until data is available. Output `i`
+corresponds to XLA tuple element `i`.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_OutfeedEnqueue.pbtxt b/tensorflow/core/api_def/base_api/api_def_OutfeedEnqueue.pbtxt
new file mode 100644
index 0000000..fa6cb96
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_OutfeedEnqueue.pbtxt

@@ -0,0 +1,10 @@
+op {
+  graph_op_name: "OutfeedEnqueue"
+  in_arg {
+    name: "input"
+    description: <<END
+A tensor that will be inserted into the outfeed queue.
+END
+  }
+  summary: "Enqueue a Tensor on the computation outfeed."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_OutfeedEnqueueTuple.pbtxt b/tensorflow/core/api_def/base_api/api_def_OutfeedEnqueueTuple.pbtxt
new file mode 100644
index 0000000..fb1ab3d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_OutfeedEnqueueTuple.pbtxt

@@ -0,0 +1,11 @@
+op {
+  graph_op_name: "OutfeedEnqueueTuple"
+  in_arg {
+    name: "inputs"
+    description: <<END
+A list of tensors that will be inserted into the outfeed queue as an
+XLA tuple.
+END
+  }
+  summary: "Enqueue multiple Tensor values on the computation outfeed."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RecvTPUEmbeddingActivations.pbtxt b/tensorflow/core/api_def/base_api/api_def_RecvTPUEmbeddingActivations.pbtxt
new file mode 100644
index 0000000..d1921fd
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RecvTPUEmbeddingActivations.pbtxt

@@ -0,0 +1,32 @@
+op {
+  graph_op_name: "RecvTPUEmbeddingActivations"
+  out_arg {
+    name: "outputs"
+    description: <<END
+A TensorList of embedding activations containing one Tensor per
+embedding table in the model.
+END
+  }
+  attr {
+    name: "num_outputs"
+    description: <<END
+The number of output activation tensors, equal to the number of
+embedding tables in the model.
+END
+  }
+  attr {
+    name: "config"
+    description: <<END
+Serialized TPUEmbeddingConfiguration proto.
+END
+  }
+  summary: "An op that receives embedding activations on the TPU."
+  description: <<END
+The TPU system performs the embedding lookups and aggregations specified by
+the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The
+results of these aggregations are visible to the Tensorflow Graph as the
+outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing
+one Tensor of activations per table specified in the model. There can be at
+most one RecvTPUEmbeddingActivations op in the TPU graph.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt
new file mode 100644
index 0000000..58cf122
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt

@@ -0,0 +1,48 @@
+op {
+  graph_op_name: "RequantizationRangePerChannel"
+  visibility : HIDDEN
+  in_arg {
+    name: "input"
+    description: <<END
+The original input tensor.
+END
+  }
+  in_arg {
+    name: "input_min"
+    description: <<END
+The minimum value of the input tensor
+END
+  }
+  in_arg {
+    name: "input_max"
+    description: <<END
+The maximum value of the input tensor.
+END
+  }
+  out_arg {
+    name: "output_min"
+    description: <<END
+The minimum value of the final output tensor
+END
+  }
+  out_arg {
+    name: "output_max"
+    description: <<END
+The maximum value of the final output tensor.
+END
+  }
+  attr {
+    name: "T"
+    description: <<END
+The quantized type of input tensor that needs to be converted. 
+END
+  }
+  attr {
+    name: "clip_value_max"
+    description: <<END
+The maximum value of the output that needs to be clipped.
+Example: set this to 6 for Relu6. 
+END
+  }
+  summary: "Computes requantization range per channel."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt
new file mode 100644
index 0000000..bce6c3d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt

@@ -0,0 +1,65 @@
+op {
+  graph_op_name: "RequantizePerChannel"
+  visibility : HIDDEN
+  in_arg {
+    name: "input"
+    description: <<END
+The original input tensor.
+END
+  }
+  in_arg {
+    name: "input_min"
+    description: <<END
+The minimum value of the input tensor
+END
+  }
+  in_arg {
+    name: "input_max"
+    description: <<END
+The maximum value of the input tensor.
+END
+  }
+  in_arg {
+    name: "requested_output_min"
+    description: <<END
+The minimum value of the output tensor requested.
+END
+  }
+  in_arg {
+    name: "requested_output_max"
+    description: <<END
+The maximum value of the output tensor requested.
+END
+  }  
+  out_arg {
+    name: "output"
+    description: <<END
+Output tensor.
+END
+  }
+  out_arg {
+    name: "output_min"
+    description: <<END
+The minimum value of the final output tensor
+END
+  }
+  out_arg {
+    name: "output_max"
+    description: <<END
+The maximum value of the final output tensor.
+END
+  }
+  attr {
+    name: "T"
+    description: <<END
+The quantized type of input tensor that needs to be converted. 
+END
+  }
+  attr {
+    name: "out_type"
+    description: <<END
+The quantized type of output tensor that needs to be converted.
+END
+  }
+  summary: "Requantizes input with min and max values known per channel."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingADAMParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingADAMParameters.pbtxt
new file mode 100644
index 0000000..3de7ad9
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingADAMParameters.pbtxt

@@ -0,0 +1,28 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingADAMParameters"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the ADAM optimization algorithm.
+END
+  }
+  out_arg {
+    name: "momenta"
+    description: <<END
+Parameter momenta updated by the ADAM optimization algorithm.
+END
+  }
+  out_arg {
+    name: "velocities"
+    description: <<END
+Parameter velocities updated by the ADAM optimization algorithm.
+END
+  }
+  summary: "Retrieve ADAM embedding parameters."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingADAMParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingADAMParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..4ecfd08
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingADAMParametersGradAccumDebug.pbtxt

@@ -0,0 +1,34 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the ADAM optimization algorithm.
+END
+  }
+  out_arg {
+    name: "momenta"
+    description: <<END
+Parameter momenta updated by the ADAM optimization algorithm.
+END
+  }
+  out_arg {
+    name: "velocities"
+    description: <<END
+Parameter velocities updated by the ADAM optimization algorithm.
+END
+  }
+  out_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Parameter gradient_accumulators updated by the ADAM optimization algorithm.
+END
+  }
+  summary: "Retrieve ADAM embedding parameters with debug support."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdadeltaParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdadeltaParameters.pbtxt
new file mode 100644
index 0000000..3c7e688
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdadeltaParameters.pbtxt

@@ -0,0 +1,28 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingAdadeltaParameters"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the Adadelta optimization algorithm.
+END
+  }
+  out_arg {
+    name: "accumulators"
+    description: <<END
+Parameter accumulators updated by the Adadelta optimization algorithm.
+END
+  }
+  out_arg {
+    name: "updates"
+    description: <<END
+Parameter updates updated by the Adadelta optimization algorithm.
+END
+  }
+  summary: "Retrieve Adadelta embedding parameters."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..5ee8cda
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug.pbtxt

@@ -0,0 +1,34 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the Adadelta optimization algorithm.
+END
+  }
+  out_arg {
+    name: "accumulators"
+    description: <<END
+Parameter accumulators updated by the Adadelta optimization algorithm.
+END
+  }
+  out_arg {
+    name: "updates"
+    description: <<END
+Parameter updates updated by the Adadelta optimization algorithm.
+END
+  }
+  out_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Parameter gradient_accumulators updated by the Adadelta optimization algorithm.
+END
+  }
+  summary: "Retrieve Adadelta embedding parameters with debug support."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdagradParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdagradParameters.pbtxt
new file mode 100644
index 0000000..6f070f5
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdagradParameters.pbtxt

@@ -0,0 +1,22 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingAdagradParameters"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the Adagrad optimization algorithm.
+END
+  }
+  out_arg {
+    name: "accumulators"
+    description: <<END
+Parameter accumulators updated by the Adagrad optimization algorithm.
+END
+  }
+  summary: "Retrieve Adagrad embedding parameters."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdagradParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdagradParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..078cda0
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdagradParametersGradAccumDebug.pbtxt

@@ -0,0 +1,28 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingAdagradParametersGradAccumDebug"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the Adagrad optimization algorithm.
+END
+  }
+  out_arg {
+    name: "accumulators"
+    description: <<END
+Parameter accumulators updated by the Adagrad optimization algorithm.
+END
+  }
+  out_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Parameter gradient_accumulators updated by the Adagrad optimization algorithm.
+END
+  }
+  summary: "Retrieve Adagrad embedding parameters with debug support."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingCenteredRMSPropParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingCenteredRMSPropParameters.pbtxt
new file mode 100644
index 0000000..2a4cc4e
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingCenteredRMSPropParameters.pbtxt

@@ -0,0 +1,34 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingCenteredRMSPropParameters"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the centered RMSProp optimization algorithm.
+END
+  }
+  out_arg {
+    name: "ms"
+    description: <<END
+Parameter ms updated by the centered RMSProp optimization algorithm.
+END
+  }
+  out_arg {
+    name: "mom"
+    description: <<END
+Parameter mom updated by the centered RMSProp optimization algorithm.
+END
+  }
+  out_arg {
+    name: "mg"
+    description: <<END
+Parameter mg updated by the centered RMSProp optimization algorithm.
+END
+  }
+  summary: "Retrieve centered RMSProp embedding parameters."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingFTRLParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingFTRLParameters.pbtxt
new file mode 100644
index 0000000..daf87de
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingFTRLParameters.pbtxt

@@ -0,0 +1,28 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingFTRLParameters"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the FTRL optimization algorithm.
+END
+  }
+  out_arg {
+    name: "accumulators"
+    description: <<END
+Parameter accumulators updated by the FTRL optimization algorithm.
+END
+  }
+  out_arg {
+    name: "linears"
+    description: <<END
+Parameter linears updated by the FTRL optimization algorithm.
+END
+  }
+  summary: "Retrieve FTRL embedding parameters."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingFTRLParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingFTRLParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..2f72d1d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingFTRLParametersGradAccumDebug.pbtxt

@@ -0,0 +1,34 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingFTRLParametersGradAccumDebug"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the FTRL optimization algorithm.
+END
+  }
+  out_arg {
+    name: "accumulators"
+    description: <<END
+Parameter accumulators updated by the FTRL optimization algorithm.
+END
+  }
+  out_arg {
+    name: "linears"
+    description: <<END
+Parameter linears updated by the FTRL optimization algorithm.
+END
+  }
+  out_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Parameter gradient_accumulators updated by the FTRL optimization algorithm.
+END
+  }
+  summary: "Retrieve FTRL embedding parameters with debug support."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMDLAdagradLightParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMDLAdagradLightParameters.pbtxt
new file mode 100644
index 0000000..478fbda
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMDLAdagradLightParameters.pbtxt

@@ -0,0 +1,34 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingMDLAdagradLightParameters"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the MDL Adagrad Light optimization algorithm.
+END
+  }
+  out_arg {
+    name: "accumulators"
+    description: <<END
+Parameter accumulators updated by the MDL Adagrad Light optimization algorithm.
+END
+  }
+  out_arg {
+    name: "weights"
+    description: <<END
+Parameter weights updated by the MDL Adagrad Light optimization algorithm.
+END
+  }
+  out_arg {
+    name: "benefits"
+    description: <<END
+Parameter benefits updated by the MDL Adagrad Light optimization algorithm.
+END
+  }
+  summary: "Retrieve MDL Adagrad Light embedding parameters."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMomentumParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMomentumParameters.pbtxt
new file mode 100644
index 0000000..f71e620
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMomentumParameters.pbtxt

@@ -0,0 +1,22 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingMomentumParameters"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the Momentum optimization algorithm.
+END
+  }
+  out_arg {
+    name: "momenta"
+    description: <<END
+Parameter momenta updated by the Momentum optimization algorithm.
+END
+  }
+  summary: "Retrieve Momentum embedding parameters."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMomentumParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMomentumParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..0f00680
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMomentumParametersGradAccumDebug.pbtxt

@@ -0,0 +1,28 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the Momentum optimization algorithm.
+END
+  }
+  out_arg {
+    name: "momenta"
+    description: <<END
+Parameter momenta updated by the Momentum optimization algorithm.
+END
+  }
+  out_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Parameter gradient_accumulators updated by the Momentum optimization algorithm.
+END
+  }
+  summary: "Retrieve Momentum embedding parameters with debug support."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingProximalAdagradParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingProximalAdagradParameters.pbtxt
new file mode 100644
index 0000000..e279ca4
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingProximalAdagradParameters.pbtxt

@@ -0,0 +1,22 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingProximalAdagradParameters"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the proximal Adagrad optimization algorithm.
+END
+  }
+  out_arg {
+    name: "accumulators"
+    description: <<END
+Parameter accumulators updated by the proximal Adagrad optimization algorithm.
+END
+  }
+  summary: "Retrieve proximal Adagrad embedding parameters."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..37ae5db
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug.pbtxt

@@ -0,0 +1,28 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the proximal Adagrad optimization algorithm.
+END
+  }
+  out_arg {
+    name: "accumulators"
+    description: <<END
+Parameter accumulators updated by the proximal Adagrad optimization algorithm.
+END
+  }
+  out_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Parameter gradient_accumulators updated by the proximal Adagrad optimization algorithm.
+END
+  }
+  summary: "Retrieve proximal Adagrad embedding parameters with debug support."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingRMSPropParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingRMSPropParameters.pbtxt
new file mode 100644
index 0000000..aad8ca8
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingRMSPropParameters.pbtxt

@@ -0,0 +1,28 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingRMSPropParameters"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the RMSProp optimization algorithm.
+END
+  }
+  out_arg {
+    name: "ms"
+    description: <<END
+Parameter ms updated by the RMSProp optimization algorithm.
+END
+  }
+  out_arg {
+    name: "mom"
+    description: <<END
+Parameter mom updated by the RMSProp optimization algorithm.
+END
+  }
+  summary: "Retrieve RMSProp embedding parameters."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug.pbtxt
new file mode 100644
index 0000000..1d9bd6659
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug.pbtxt

@@ -0,0 +1,34 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the RMSProp optimization algorithm.
+END
+  }
+  out_arg {
+    name: "ms"
+    description: <<END
+Parameter ms updated by the RMSProp optimization algorithm.
+END
+  }
+  out_arg {
+    name: "mom"
+    description: <<END
+Parameter mom updated by the RMSProp optimization algorithm.
+END
+  }
+  out_arg {
+    name: "gradient_accumulators"
+    description: <<END
+Parameter gradient_accumulators updated by the RMSProp optimization algorithm.
+END
+  }
+  summary: "Retrieve RMSProp embedding parameters with debug support."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingStochasticGradientDescentParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingStochasticGradientDescentParameters.pbtxt
new file mode 100644
index 0000000..33ca8a7
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingStochasticGradientDescentParameters.pbtxt

@@ -0,0 +1,16 @@
+op {
+  graph_op_name: "RetrieveTPUEmbeddingStochasticGradientDescentParameters"
+  out_arg {
+    name: "parameters"
+    description: <<END
+Parameter parameters updated by the stochastic gradient descent optimization algorithm.
+END
+  }
+  summary: "Retrieve SGD embedding parameters."
+  description: <<END
+An op that retrieves optimization parameters from embedding to host
+memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+the correct embedding table configuration. For example, this op is
+used to retrieve updated parameters before saving a checkpoint.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_SendTPUEmbeddingGradients.pbtxt b/tensorflow/core/api_def/base_api/api_def_SendTPUEmbeddingGradients.pbtxt
new file mode 100644
index 0000000..9a3be3d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SendTPUEmbeddingGradients.pbtxt

@@ -0,0 +1,32 @@
+op {
+  graph_op_name: "SendTPUEmbeddingGradients"
+  in_arg {
+    name: "inputs"
+    description: <<END
+A TensorList of gradients with which to update embedding tables.
+This argument has the same length and shapes as the return value of
+RecvTPUEmbeddingActivations, but contains gradients of the model's loss
+with respect to the embedding activations. The embedding tables are updated
+from these gradients via the optimizer specified in the TPU embedding
+configuration given to tpu.initialize_system.
+END
+  }
+  in_arg {
+    name: "learning_rates"
+    description: <<END
+A TensorList of float32 scalars, one for each dynamic learning
+rate tag: see the comments in
+//third_party/tensorflow/core/protobuf/tpu/optimization_parameters.proto.
+Multiple tables can share the same dynamic learning rate tag as specified
+in the configuration. If the learning rates for all tables are constant,
+this list should be empty.
+END
+  }
+  attr {
+    name: "config"
+    description: <<END
+Serialized TPUEmbeddingConfiguration proto.
+END
+  }
+  summary: "Performs gradient updates of embedding tables."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_ShutdownDistributedTPU.pbtxt b/tensorflow/core/api_def/base_api/api_def_ShutdownDistributedTPU.pbtxt
new file mode 100644
index 0000000..87d4e8d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ShutdownDistributedTPU.pbtxt

@@ -0,0 +1,7 @@
+op {
+  graph_op_name: "ShutdownDistributedTPU"
+  summary: "Shuts down a running distributed TPU system."
+  description: <<END
+The op returns an error if no system is running.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_StatefulStandardNormalV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_StatefulStandardNormalV2.pbtxt
new file mode 100644
index 0000000..8c145e0
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_StatefulStandardNormalV2.pbtxt

@@ -0,0 +1,37 @@
+op {
+  graph_op_name: "StatefulStandardNormalV2"
+  in_arg {
+    name: "resource"
+    description: <<END
+The handle of the resource variable that stores the state of the RNG.
+END
+  }
+  in_arg {
+    name: "algorithm"
+    description: <<END
+The RNG algorithm.
+END
+  }
+  in_arg {
+    name: "shape"
+    description: <<END
+The shape of the output tensor.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+A tensor of the specified shape filled with random normal values.
+END
+  }
+  attr {
+    name: "dtype"
+    description: <<END
+The type of the output.
+END
+  }
+  summary: "Outputs random values from a normal distribution."
+  description: <<END
+The generated values will have mean 0 and standard deviation 1.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_TPUCompilationResult.pbtxt b/tensorflow/core/api_def/base_api/api_def_TPUCompilationResult.pbtxt
new file mode 100644
index 0000000..98643e2
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TPUCompilationResult.pbtxt

@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TPUCompilationResult"
+  summary: "CompilationResultProto indicating the status of the TPU compilation."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_TPUEmbeddingActivations.pbtxt b/tensorflow/core/api_def/base_api/api_def_TPUEmbeddingActivations.pbtxt
new file mode 100644
index 0000000..0763a26
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TPUEmbeddingActivations.pbtxt

@@ -0,0 +1,37 @@
+op {
+  graph_op_name: "TPUEmbeddingActivations"
+  in_arg {
+    name: "embedding_variable"
+    description: <<END
+A trainable variable, enabling optimizers to find this op.
+END
+  }
+  in_arg {
+    name: "sliced_activations"
+    description: <<END
+The embedding activations Tensor to return.
+END
+  }
+  attr {
+    name: "table_id"
+    description: <<END
+The id of the table in the embedding layer configuration from which
+these activations were computed.
+END
+  }
+  attr {
+    name: "lookup_id"
+    description: <<END
+Identifier of the set of embedding indices which produced these
+activations.
+END
+  }
+  summary: "An op enabling differentiation of TPU Embeddings."
+  description: <<END
+This op simply returns its first input, which is assumed to have been sliced
+from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of
+this op, and its first argument being a trainable Variable, enables automatic
+differentiation of graphs containing embeddings via the TPU Embedding Python
+libraries.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_TPUOrdinalSelector.pbtxt b/tensorflow/core/api_def/base_api/api_def_TPUOrdinalSelector.pbtxt
new file mode 100644
index 0000000..3c72abc
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TPUOrdinalSelector.pbtxt

@@ -0,0 +1,15 @@
+op {
+  graph_op_name: "TPUOrdinalSelector"
+  out_arg {
+    name: "device_ordinals"
+    description: <<END
+A vector 1 or more TPU cores.
+END
+  }
+  summary: "A TPU core selector Op."
+  description: <<END
+This Op produces a set of TPU cores (for warm-up) or a single TPU core
+(for regular inference) to execute the TPU program on. The output is
+consumed by TPUPartitionedCall.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_TPUPartitionedCall.pbtxt b/tensorflow/core/api_def/base_api/api_def_TPUPartitionedCall.pbtxt
new file mode 100644
index 0000000..4f384c2
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TPUPartitionedCall.pbtxt

@@ -0,0 +1,40 @@
+op {
+  graph_op_name: "TPUPartitionedCall"
+  in_arg {
+    name: "args"
+    description: <<END
+The arguments to the function.
+END
+  }
+  in_arg {
+    name: "device_ordinal"
+    description: <<END
+The TPU device ordinal to run the function on.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+The output of the function call.
+END
+  }
+  attr {
+    name: "Tin"
+    description: <<END
+The types of the arguments to the function.
+END
+  }
+  attr {
+    name: "Tout"
+    description: <<END
+The types of the outputs of the function.
+END
+  }
+  attr {
+    name: "f"
+    description: <<END
+The function to call.
+END
+  }
+  summary: "Calls a function placed on a specified TPU device."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_TPUReplicate.pbtxt b/tensorflow/core/api_def/base_api/api_def_TPUReplicate.pbtxt
new file mode 100644
index 0000000..5664dfb
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TPUReplicate.pbtxt

@@ -0,0 +1,99 @@
+op {
+  graph_op_name: "TPUReplicate"
+  in_arg {
+    name: "inputs"
+    description: <<END
+the inputs to 'computation', flattened, in replica-major order.
+END
+  }
+  in_arg {
+    name: "broadcast_inputs"
+    description: <<END
+additional arguments to broadcast to all replicas. The
+broadcast inputs are appended to the per-replica inputs when calling
+computation.
+END
+  }
+  in_arg {
+    name: "guaranteed_constants"
+    description: <<END
+arguments which have been guaranteed to not
+change their values during the session lifetime. These contain tensors marked as
+constant using the GuaranteeConstOp.
+END
+  }
+  out_arg {
+    name: "outputs"
+    description: <<END
+the outputs of 'computation'.
+END
+  }
+  attr {
+    name: "computation"
+    description: <<END
+a function containing the computation to run.
+END
+  }
+  attr {
+    name: "num_replicas"
+    description: <<END
+the number of replicas of the computation to run.
+END
+  }
+  attr {
+    name: "num_cores_per_replica"
+    description: <<END
+the number of logical cores in each replica.
+END
+  }
+  attr {
+    name: "topology"
+    description: <<END
+A serialized tensorflow.tpu.TopologyProto that describes the TPU
+topology.
+END
+  }
+  attr {
+    name: "use_tpu"
+    description: <<END
+a bool indicating if this computation will run on TPU or CPU/GPU.
+Currently, only supports a default placement (computation is placed on GPU
+if one is available, and on CPU if not).
+END
+  }
+  attr {
+    name: "device_assignment"
+    description: <<END
+a flattened array with shape
+[replica, num_cores_per_replica, mesh_dimension] that maps the coordinates
+of logical cores in each replica of a computation to physical coordinates in
+the TPU topology.
+END
+  }
+  attr {
+    name: "Tinputs"
+    description: <<END
+the types of the arguments to 'computation'.
+END
+  }
+  attr {
+    name: "Tbroadcast_inputs"
+    description: <<END
+the types of the additional arguments to broadcast to all
+replicas.
+END
+  }
+  attr {
+    name: "Tguaranteed_constants"
+    description: <<END
+the types of the arguments to 'guaranteed_constants'.
+END
+  }
+  attr {
+    name: "output_types"
+    description: <<END
+the types of the outputs of 'computation'.
+END
+  }
+  summary: "Runs replicated computations on a distributed TPU system."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_TPUReplicateMetadata.pbtxt b/tensorflow/core/api_def/base_api/api_def_TPUReplicateMetadata.pbtxt
new file mode 100644
index 0000000..b30d1b6
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TPUReplicateMetadata.pbtxt

@@ -0,0 +1,46 @@
+op {
+  graph_op_name: "TPUReplicateMetadata"
+  attr {
+    name: "num_replicas"
+    description: <<END
+Number of replicas of the computation
+END
+  }
+  attr {
+    name: "num_cores_per_replica"
+    description: <<END
+Number of cores per replica. Used for model parallelism.
+END
+  }
+  attr {
+    name: "topology"
+    description: <<END
+TopologyProto indicating the topology of the TPU pod slice.
+END
+  }
+  attr {
+    name: "use_tpu"
+    description: <<END
+Whether to place the computation on the TPU.
+END
+  }
+  attr {
+    name: "device_assignment"
+    description: <<END
+The assignment of devices for the computation.
+END
+  }
+  attr {
+    name: "computation_shape"
+    description: <<END
+DEPRECATED. Use num_cores_per_replica instead.
+END
+  }
+  attr {
+    name: "host_compute_core"
+  }
+  attr {
+    name: "padding_map"
+  }
+  summary: "Metadata indicaitng how the TPU computation should be replicated."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_TPUReplicatedInput.pbtxt b/tensorflow/core/api_def/base_api/api_def_TPUReplicatedInput.pbtxt
new file mode 100644
index 0000000..1bba92e
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TPUReplicatedInput.pbtxt

@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TPUReplicatedInput"
+  summary: "Connects N inputs to an N-way replicated TPU computation."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_TPUReplicatedOutput.pbtxt b/tensorflow/core/api_def/base_api/api_def_TPUReplicatedOutput.pbtxt
new file mode 100644
index 0000000..cab78c7
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TPUReplicatedOutput.pbtxt

@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TPUReplicatedOutput"
+  summary: "Connects outputs of an N-way replicated computation to N outputs."
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_TensorListScatterIntoExistingList.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorListScatterIntoExistingList.pbtxt
new file mode 100644
index 0000000..23da422
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TensorListScatterIntoExistingList.pbtxt

@@ -0,0 +1,13 @@
+op {
+  graph_op_name: "TensorListScatterIntoExistingList"
+  summary: "Scatters tensor at indices in an input list."
+  description: <<END
+Each member of the TensorList corresponds to one row of the input tensor,
+specified by the given index (see `tf.gather`).
+
+input_handle: The list to scatter into.
+tensor: The input tensor.
+indices: The indices used to index into the list.
+output_handle: The TensorList.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_TridiagonalSolve.pbtxt b/tensorflow/core/api_def/base_api/api_def_TridiagonalSolve.pbtxt
new file mode 100644
index 0000000..80f3675
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TridiagonalSolve.pbtxt

@@ -0,0 +1,33 @@
+op {
+  graph_op_name: "TridiagonalSolve"
+  visibility: HIDDEN
+  in_arg {
+    name: "diagonals"
+    description: <<END
+Shape is `[..., 3, M]`.
+END
+  }
+  in_arg {
+    name: "rhs"
+    description: <<END
+Shape is `[..., M, K]`.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Shape is `[..., M, K]`.
+END
+  }
+
+  summary: "Solves tridiagonal systems of equations."
+  description: <<END
+`diagonals` is a tensor of shape `[..., 3, M]` whose inner-most 2 dimensions
+represent matrices with three rows being the superdiagonal, diagonals, and
+subdiagonals, in order. The last element of the superdiagonal and the first
+element of the subdiagonal is ignored.
+`rhs` is a tensor of shape `[..., M, K]`, representing K right-hand sides per
+each left-hand side.
+The output is a tensor of shape `[..., M, K]` containing the solutions.
+END
+}

diff --git a/tensorflow/core/api_def/base_api/api_def_WorkerHeartbeat.pbtxt b/tensorflow/core/api_def/base_api/api_def_WorkerHeartbeat.pbtxt
new file mode 100644
index 0000000..e886b04
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_WorkerHeartbeat.pbtxt

@@ -0,0 +1,20 @@
+op {
+  graph_op_name: "WorkerHeartbeat"
+  in_arg {
+    name: "request"
+    description: <<END
+A string tensor containing a serialized WorkerHeartbeatRequest
+END
+  }
+  out_arg {
+    name: "response"
+    description: <<END
+A string tensor containing a serialized WorkerHeartbeatResponse
+END
+  }
+  summary: "Worker heartbeat op."
+  description: <<END
+Heartbeats may be sent periodically to indicate the coordinator is still active,
+to retrieve the current worker status and to expedite shutdown when necessary.
+END
+}

diff --git a/tensorflow/core/api_def/excluded_ops.cc b/tensorflow/core/api_def/excluded_ops.cc
index 02026e9..65d2102 100644
--- a/tensorflow/core/api_def/excluded_ops.cc
+++ b/tensorflow/core/api_def/excluded_ops.cc

@@ -24,9 +24,9 @@
            "GcsConfigureBlockCache", "GcsConfigureCredentials",
 #ifdef INTEL_MKL
            // QuantizedFusedOps for Intel CPU
-           "QuantizedConv2DAndRequantize", "QuantizedConv2DWithBias",
-           "QuantizedConv2DWithBiasAndRequantize", "QuantizedConv2DAndRelu",
-           "QuantizedConv2DAndReluAndRequantize",
+           "QuantizedConcatV2", "QuantizedConv2DAndRequantize",
+           "QuantizedConv2DWithBias", "QuantizedConv2DWithBiasAndRequantize",
+           "QuantizedConv2DAndRelu", "QuantizedConv2DAndReluAndRequantize",
            "QuantizedConv2DWithBiasAndRelu",
            "QuantizedConv2DWithBiasAndReluAndRequantize",
            "QuantizedConv2DWithBiasSumAndRelu",

diff --git a/tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt
index cc16523..72c281d 100644
--- a/tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt

@@ -1,6 +1,4 @@
 op {
   graph_op_name: "AvgPool3D"
-  endpoint {
-    name: "nn.avg_pool3d"
-  }
+  visibility: HIDDEN
 }

diff --git a/tensorflow/core/api_def/python_api/api_def_Case.pbtxt b/tensorflow/core/api_def/python_api/api_def_Case.pbtxt
new file mode 100644
index 0000000..a4c8193
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_Case.pbtxt

@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "Case"
+  visibility: HIDDEN
+}

diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt
index 590b37c..edbcba2 100644
--- a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt

@@ -2,6 +2,7 @@
   graph_op_name: "Conv3DBackpropFilterV2"
   endpoint {
     name: "nn.conv3d_backprop_filter"
+    deprecation_version: 2
   }
   endpoint {
     name: "nn.conv3d_backprop_filter_v2"

diff --git a/tensorflow/core/api_def/python_api/api_def_EuclideanNorm.pbtxt b/tensorflow/core/api_def/python_api/api_def_EuclideanNorm.pbtxt
new file mode 100644
index 0000000..a3ea885
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_EuclideanNorm.pbtxt

@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "EuclideanNorm"
+  visibility: HIDDEN
+}

diff --git a/tensorflow/core/api_def/python_api/api_def_StatefulStandardNormalV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatefulStandardNormalV2.pbtxt
new file mode 100644
index 0000000..a1816dd
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StatefulStandardNormalV2.pbtxt

@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatefulStandardNormalV2"
+  visibility: HIDDEN
+}

diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListScatterIntoExistingList.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListScatterIntoExistingList.pbtxt
new file mode 100644
index 0000000..20d9a43
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListScatterIntoExistingList.pbtxt

@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListScatterIntoExistingList"
+  visibility: HIDDEN
+}

diff --git a/tensorflow/core/common_runtime/base_collective_executor.cc b/tensorflow/core/common_runtime/base_collective_executor.cc
index 03c0e9c..c9e3cf4 100644
--- a/tensorflow/core/common_runtime/base_collective_executor.cc
+++ b/tensorflow/core/common_runtime/base_collective_executor.cc

@@ -63,7 +63,7 @@
       (chunk_bytes < EIGEN_MAX_ALIGN_BYTES)
           ? (EIGEN_MAX_ALIGN_BYTES - chunk_bytes)
           : (EIGEN_MAX_ALIGN_BYTES - (chunk_bytes % EIGEN_MAX_ALIGN_BYTES));
-  CHECK_EQ(0, diff % elt_bytes);
+  DCHECK_EQ(0, diff % elt_bytes);
   base_chunk_elts += (diff / elt_bytes);
   DCHECK_EQ(0, ((base_chunk_elts * elt_bytes) % EIGEN_MAX_ALIGN_BYTES))
       << "total_elts=" << total_elts << " num_chunks=" << num_chunks
@@ -78,17 +78,23 @@
  public:
   // Takes ownership of output and prepares to properly alias its chunks.
   // Ownership is taken because the shape may temporarily change.
-  CollectiveAdapterImpl(Tensor* output, int64 num_chunks, Allocator* allocator)
+  CollectiveAdapterImpl(Tensor* output, int64 num_chunks, Allocator* allocator,
+                        bool align_chunks)
       : output_(std::move(*output)),
         dt_(output_.dtype()),
         old_shape_(output_.shape()),
         num_chunks_(num_chunks),
         allocator_(allocator),
         total_elts_(output_.NumElements()),
-        chunk_elts_(AlignedChunkElts(sizeof(T), total_elts_, num_chunks_)),
+        chunk_elts_(align_chunks
+                        ? AlignedChunkElts(sizeof(T), total_elts_, num_chunks_)
+                        : total_elts_ / num_chunks_),
         data_start_(reinterpret_cast<T*>(DMAHelper::base(&output_))),
         data_end_(data_start_ + total_elts_) {
-    CHECK_GT(chunk_elts_, 0);
+    if (!align_chunks) {
+      DCHECK_EQ(total_elts_, num_chunks_ * chunk_elts_);
+    }
+    DCHECK_GT(chunk_elts_, 0);
     Flatten();
   }
 
@@ -176,19 +182,24 @@
 }  // namespace
 
 CollectiveAdapter* MakeCollectiveAdapter(Tensor* output, int num_chunks,
-                                         Allocator* allocator) {
+                                         Allocator* allocator,
+                                         bool align_chunks) {
   switch (output->dtype()) {
     case DT_FLOAT:
-      return new CollectiveAdapterImpl<float>(output, num_chunks, allocator);
+      return new CollectiveAdapterImpl<float>(output, num_chunks, allocator,
+                                              align_chunks);
       break;
     case DT_DOUBLE:
-      return new CollectiveAdapterImpl<double>(output, num_chunks, allocator);
+      return new CollectiveAdapterImpl<double>(output, num_chunks, allocator,
+                                               align_chunks);
       break;
     case DT_INT32:
-      return new CollectiveAdapterImpl<int32>(output, num_chunks, allocator);
+      return new CollectiveAdapterImpl<int32>(output, num_chunks, allocator,
+                                              align_chunks);
       break;
     case DT_INT64:
-      return new CollectiveAdapterImpl<int64>(output, num_chunks, allocator);
+      return new CollectiveAdapterImpl<int64>(output, num_chunks, allocator,
+                                              align_chunks);
       break;
     default:
       LOG(FATAL) << "Unsupported type " << output->dtype()
@@ -227,6 +238,7 @@
 
   Tensor* output = ctx->mutable_output(0);
   const Tensor* input = (col_params.instance.type == REDUCTION_COLLECTIVE ||
+                         col_params.instance.type == GATHER_COLLECTIVE ||
                          (col_params.instance.type == BROADCAST_COLLECTIVE &&
                           col_params.is_source))
                             ? &ctx->input(0)
@@ -301,6 +313,8 @@
   for (int32 instance : col_params.instance.impl_details.dependencies) {
     auto find_iter = launched_.find(instance);
     if (find_iter == launched_.end() || find_iter->second != 0) {
+      VLOG(1) << "Collective " << col_params.ToString()
+              << " blocked by instance " << instance;
       return false;
     }
   }
@@ -313,6 +327,7 @@
   while (!CheckDependencies(col_params)) {
     launch_cv_.wait(l);
   }
+  VLOG(1) << "Unblocking collective " << col_params.ToString();
 }
 
 void BaseCollectiveExecutor::Launched(const CollectiveParams& col_params) {
@@ -325,6 +340,8 @@
     launched_[col_params.instance.instance_key] = num_devices;
   }
   if (--launched_[col_params.instance.instance_key] == 0) {
+    VLOG(1) << "Unblocking dependencies for collective instance "
+            << col_params.instance.instance_key;
     launch_cv_.notify_all();
   }
 }

diff --git a/tensorflow/core/common_runtime/base_collective_executor.h b/tensorflow/core/common_runtime/base_collective_executor.h
index b711aa6..bc85b5a 100644
--- a/tensorflow/core/common_runtime/base_collective_executor.h
+++ b/tensorflow/core/common_runtime/base_collective_executor.h

@@ -78,9 +78,15 @@
 };
 
 // Create a CollectiveAdaptor wrapping 'output', specialized to its
-// data-type and shape.
+// data-type and shape.  If align_chunks == true then chunk size may
+// be larger than output->NumElements() / num_chunks and one or more
+// of the suffix chunks may be empty.  Chunks will be arranged to start
+// and end on alignment boundaries.  If align_chunks == false then
+// output->NumElements() % num_chunks must be 0 and all chunks will
+// have exactly the same size, ignoring alignment issues.
 CollectiveAdapter* MakeCollectiveAdapter(Tensor* output, int num_chunks,
-                                         Allocator* allocator);
+                                         Allocator* allocator,
+                                         bool align_chunks = true);
 
 // Default implementation of CollectiveExecutor.  Delegates the actual
 // work of moving data to a class specialized for the operation type,

diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc
index c7e535c..0e4ddb1 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/bfc_allocator.cc

@@ -335,10 +335,10 @@
         // Update stats.
         ++stats_.num_allocs;
         stats_.bytes_in_use += chunk->size;
-        stats_.max_bytes_in_use =
-            std::max(stats_.max_bytes_in_use, stats_.bytes_in_use);
-        stats_.max_alloc_size =
-            std::max<std::size_t>(stats_.max_alloc_size, chunk->size);
+        stats_.peak_bytes_in_use =
+            std::max(stats_.peak_bytes_in_use, stats_.bytes_in_use);
+        stats_.largest_alloc_size =
+            std::max<std::size_t>(stats_.largest_alloc_size, chunk->size);
 
         VLOG(4) << "Returning: " << chunk->ptr;
         if (VLOG_IS_ON(4)) {
@@ -391,6 +391,7 @@
 }
 
 void BFCAllocator::DeallocateRaw(void* ptr) {
+  VLOG(1) << "DeallocateRaw " << Name() << " " << RequestedSize(ptr);
   DeallocateRawInternal(ptr);
   retry_helper_.NotifyDealloc();
 }
@@ -685,16 +686,16 @@
   LOG(INFO) << "Stats: \n" << stats_.DebugString();
 }
 
-void BFCAllocator::GetStats(AllocatorStats* stats) {
+absl::optional<AllocatorStats> BFCAllocator::GetStats() {
   mutex_lock l(lock_);
-  *stats = stats_;
+  return stats_;
 }
 
 void BFCAllocator::ClearStats() {
   mutex_lock l(lock_);
   stats_.num_allocs = 0;
-  stats_.max_bytes_in_use = stats_.bytes_in_use;
-  stats_.max_alloc_size = 0;
+  stats_.peak_bytes_in_use = stats_.bytes_in_use;
+  stats_.largest_alloc_size = 0;
 }
 
 std::array<BFCAllocator::BinDebugInfo, BFCAllocator::kNumBins>

diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h
index 261bacb..b0fd0d8 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.h
+++ b/tensorflow/core/common_runtime/bfc_allocator.h

@@ -69,7 +69,7 @@
 
   int64 AllocationId(const void* ptr) override;
 
-  void GetStats(AllocatorStats* stats) override;
+  absl::optional<AllocatorStats> GetStats() override;
 
   void ClearStats() override;
 

diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.cc b/tensorflow/core/common_runtime/collective_param_resolver_local.cc
index 5bb2a7d..a767083 100644
--- a/tensorflow/core/common_runtime/collective_param_resolver_local.cc
+++ b/tensorflow/core/common_runtime/collective_param_resolver_local.cc

@@ -584,7 +584,7 @@
 void CollectiveParamResolverLocal::CompleteParamsAsync(
     const string& device, CollectiveParams* cp, CancellationManager* cancel_mgr,
     const StatusCallback& done) {
-  VLOG(1) << "CompleteParams " << device << " for " << cp << ": "
+  VLOG(1) << "CompleteParams local " << device << " for " << cp << ": "
           << cp->ToString();
   CompleteGroupLocal(
       device, cp,
@@ -618,6 +618,8 @@
     } else {
       cp->instance.impl_details.collective_name = "RingReduce";
     }
+  } else if (cp->instance.type == GATHER_COLLECTIVE) {
+    cp->instance.impl_details.collective_name = "RingGather";
   } else {
     cp->instance.impl_details.collective_name = "undef";
   }

diff --git a/tensorflow/core/common_runtime/colocation_graph.cc b/tensorflow/core/common_runtime/colocation_graph.cc
new file mode 100644
index 0000000..046f00c
--- /dev/null
+++ b/tensorflow/core/common_runtime/colocation_graph.cc

@@ -0,0 +1,994 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/colocation_graph.h"
+
+#include <memory>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include "absl/strings/str_join.h"
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/device_set.h"
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/device_attributes.pb.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/util/device_name_utils.h"
+#include "tensorflow/core/util/dump_graph.h"
+#include "tensorflow/core/util/port.h"
+
+namespace tensorflow {
+
+namespace {
+
+// We hoist the conversion from C-style string literal to StringPiece here,
+// so that we can avoid the many repeated calls to strlen().
+const StringPiece kColocationAttrNameStringPiece(kColocationAttrName);
+const StringPiece kColocationGroupPrefixStringPiece(kColocationGroupPrefix);
+
+// Returns a list of devices having type in supported_device_types.  The
+// returned list is sorted by preferred type (higher numeric type is preferred).
+std::vector<Device*> FilterSupportedDevices(
+    const std::vector<Device*>& devices,
+    const PrioritizedDeviceTypeVector& supported_device_types,
+    const Device* default_device) {
+  Device* filtered_default_device = nullptr;
+  std::vector<std::pair<Device*, int32>> prioritized_filtered_devices;
+  for (const auto& supported_device_type : supported_device_types) {
+    for (Device* device : devices) {
+      if (DeviceType(device->attributes().device_type()) ==
+          supported_device_type.first) {
+        if (device == default_device) {
+          filtered_default_device = device;
+        } else {
+          prioritized_filtered_devices.emplace_back(
+              device, supported_device_type.second);
+        }
+      }
+    }
+  }
+
+  auto device_sort = [](const std::pair<Device*, int32>& a,
+                        const std::pair<Device*, int32>& b) {
+    if (a.second != b.second) {
+      return a.second > b.second;
+    }
+
+    auto a_priority =
+        DeviceSet::DeviceTypeOrder(DeviceType(a.first->device_type()));
+    auto b_priority =
+        DeviceSet::DeviceTypeOrder(DeviceType(b.first->device_type()));
+    // First sort by prioritized device type (higher is preferred) and
+    // then by device name (lexicographically).
+    if (a_priority != b_priority) {
+      return a_priority > b_priority;
+    }
+    return StringPiece(a.first->name()) < StringPiece(b.first->name());
+  };
+  std::sort(prioritized_filtered_devices.begin(),
+            prioritized_filtered_devices.end(), device_sort);
+
+  std::vector<Device*> filtered_devices;
+  if (filtered_default_device != nullptr) {
+    filtered_devices.emplace_back(filtered_default_device);
+  }
+  for (const auto& prioritized_filtered_device : prioritized_filtered_devices) {
+    filtered_devices.push_back(prioritized_filtered_device.first);
+  }
+  return filtered_devices;
+}
+
+// Using absl::StrJoin with lambda does not work in tf-lite builds.
+std::vector<string> DevicesToString(const std::vector<Device*> devices) {
+  std::vector<string> v;
+  v.reserve(devices.size());
+  for (Device* d : devices) {
+    v.push_back(d->name());
+  }
+  return v;
+}
+
+// Using absl::StrJoin with lambda does not work in tf-lite builds.
+std::vector<string> DeviceTypeAndPriorityToString(
+    const PrioritizedDeviceTypeVector& devices) {
+  std::vector<string> v;
+  v.reserve(devices.size());
+  for (const std::pair<DeviceType, int32>& device_and_type : devices) {
+    v.push_back(DeviceTypeString(device_and_type.first));
+  }
+  return v;
+}
+
+// While Placer can override requested device on ops processing
+// resources, i.e. node that take (and potentially return) a resource,
+// it must not override requested device on ops generating a resource,
+// e.g. VarHandleOp, _Arg. Such ops are currently no-input, single resource/ref
+// output nodes.
+bool IsResourceGeneratorNode(const Node& node) {
+  return node.num_inputs() == 0 && node.num_outputs() == 1 &&
+         (IsRefType(node.output_type(0)) || node.output_type(0) == DT_RESOURCE);
+}
+
+bool IsExemptFromResourceInputColocation(const Node* node) {
+  // Note: Partitioned function calls, which place and partition their
+  // function bodies, are exempt from this check: they forward resource and
+  // ref inputs to operations that are appropriately placed, instead of
+  // dereferencing them.
+  const string& op_type = node->op_def().name();
+  return op_type == "PartitionedCall" || op_type == "StatefulPartitionedCall";
+}
+
+bool HasPriorities(const PrioritizedDeviceTypeVector& device_types) {
+  for (const auto& prioritized_device_type : device_types) {
+    if (prioritized_device_type.second != 0) return true;
+  }
+  return false;
+}
+
+bool ArePrioritiesSame(const PrioritizedDeviceTypeVector& a_types,
+                       const PrioritizedDeviceTypeVector& b_types) {
+  if (a_types.size() != b_types.size()) {
+    return false;
+  }
+  for (int i = 0; i < a_types.size(); ++i) {
+    if (a_types[i].first != b_types[i].first) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+Status Member::SetParentAndSupportedDevices(
+    const Node& node, const std::vector<DeviceType>& types) {
+  int id = node.id();
+  if (id < 0) {
+    return errors::Internal("Placer should not be creating a Member for node: ",
+                            node.DebugString());
+  }
+  parent_ = id;
+  return SupportedDeviceTypesForNode(types, node.def(),
+                                     &supported_device_types_);
+}
+
+Status Member::SetAssignedDeviceName(const string& device_name) {
+  if (DeviceNameUtils::HasSomeDetails(requested_device_name_)) {
+    return errors::Internal(
+        "Setting assigned device name when there is a requested device set "
+        "is unsupported");
+  }
+  if (!DeviceNameUtils::ParseFullName(device_name, &assigned_device_name_)) {
+    return errors::Internal("Malformed assigned device '", device_name, "'");
+  }
+  // Set requested device to assigned_device to maintain the invariant that
+  // requested is a specialization of assigned.
+  requested_device_name_ = assigned_device_name_;
+  return Status::OK();
+}
+
+Status Member::SetRequestedDeviceName(const Node& node) {
+  if (!DeviceNameUtils::ParseFullName(node.requested_device(),
+                                      &requested_device_name_)) {
+    return errors::InvalidArgument("Malformed device specification '",
+                                   node.requested_device(),
+                                   "' in node: ", node.DebugString());
+  }
+  if (DeviceNameUtils::HasSomeDetails(assigned_device_name_)) {
+    return errors::Internal(
+        "Setting requested device name when there is an assigned device set "
+        "is unsupported");
+  }
+  return Status::OK();
+}
+
+Status Member::EnsureCompatibilityAcrossResourceEdge(
+    const Node& src, const Member& src_root,
+    const Node& dst, /*dst_root is this*/
+    bool log_device_placement) {
+  if (!DeviceNameUtils::AreCompatibleDevNames(src_root.assigned_device_name_,
+                                              assigned_device_name_)) {
+    return errors::InvalidArgument(
+        "Cannot place the graph because a reference or resource edge "
+        "connects colocation groups with incompatible assigned devices: ",
+        DeviceNameUtils::ParsedNameToString(src_root.assigned_device_name_),
+        " vs ", DeviceNameUtils::ParsedNameToString(assigned_device_name_));
+  }
+
+  if (DeviceNameUtils::AreCompatibleDevNames(src_root.requested_device_name_,
+                                             requested_device_name_)) {
+    return Status::OK();
+  }
+
+  // If we are here, assigned devices are compatible but requested ones are
+  // not. We will be overriding the requested device for destination node, but
+  // need to preserve the invariant that it will be a specialization of
+  // the assigned device.
+  if (log_device_placement) {
+    LOG(INFO) << "Ignoring device specification "
+              << DeviceNameUtils::ParsedNameToString(requested_device_name_)
+              << " for node '" << dst.name()
+              << "' because the input edge from '" << src.name()
+              << "' is a reference connection and already has a device "
+                 "field set to "
+              << DeviceNameUtils::ParsedNameToString(
+                     src_root.requested_device_name_);
+  }
+  requested_device_name_ = src_root.requested_device_name_;
+  DeviceNameUtils::EnsureSpecification(&requested_device_name_,
+                                       assigned_device_name_);
+  return Status::OK();
+}
+
+void Member::Merge(std::vector<Member>* tree, int x_root, int y_root,
+                   Member** new_root, Member** old_root, bool dry_run) {
+  Member& x_root_member = (*tree)[x_root];
+  Member& y_root_member = (*tree)[y_root];
+
+  // Merge the sets by setting the parent pointer of the smaller tree's root
+  // node to point to the root of the larger tree. Together with path
+  // compression in ColocationGraph::FindRoot, this ensures that we do not
+  // experience pathological performance on graphs such as chains.
+  int new_root_id, old_root_id;
+  if (x_root_member.rank_ < y_root_member.rank_) {
+    // The tree rooted at x_root is shallower, so connect it to
+    // y_root. The rank of y_root is unchanged because its new
+    // child has strictly less rank.
+    if (!dry_run) {
+      x_root_member.parent_ = y_root;
+    }
+    new_root_id = y_root;
+    old_root_id = x_root;
+  } else if (x_root_member.rank_ > y_root_member.rank_) {
+    // The tree rooted at y_root is shallower, so connect it to
+    // x_root. The rank of x_root is unchanged because its new
+    // child has strictly less rank.
+    if (!dry_run) {
+      y_root_member.parent_ = x_root;
+    }
+    new_root_id = x_root;
+    old_root_id = y_root;
+  } else {
+    if (!dry_run) {
+      // Both trees have the same rank, so break the tie by choosing
+      // x_root as the new root.
+      y_root_member.parent_ = x_root;
+      // Increment the rank of the tree rooted at x_root, because it
+      // is now strictly deeper than before.
+      ++x_root_member.rank_;
+    }
+    new_root_id = x_root;
+    old_root_id = y_root;
+  }
+
+  *new_root = &(*tree)[new_root_id];
+  *old_root = &(*tree)[old_root_id];
+}
+
+// tree is non-const because we can change some `parent` pointers in some
+// members for more efficient future lookups. The vector itself is not
+// changed.
+int Member::FindRoot(std::vector<Member>* tree, int node_id) {
+  Member& member = (*tree)[node_id];
+  if (member.parent_ == node_id) {
+    // member.parent is the root of this disjoint tree.  Do nothing.
+  } else {
+    member.parent_ = FindRoot(tree, member.parent_);
+  }
+  // Now it is guaranteed that member.parent is the root of this disjoint
+  // tree.
+  return member.parent_;
+}
+
+Status Member::MergeDeviceNames(const Member& other,
+                                bool allow_soft_placement) {
+  // Assuming the "requested is a specialization of assigned" invariant holds
+  // for this and `other`, it will hold after the two merges below.
+  DeviceNameUtils::ParsedName assigned_device_name_copy = assigned_device_name_;
+  TF_RETURN_IF_ERROR(DeviceNameUtils::MergeDevNames(
+      &assigned_device_name_copy, other.assigned_device_name_));
+
+  DeviceNameUtils::ParsedName requested_device_name_copy =
+      requested_device_name_;
+  TF_RETURN_IF_ERROR(DeviceNameUtils::MergeDevNames(
+      &requested_device_name_copy, other.requested_device_name_,
+      allow_soft_placement));
+
+  // We checked for all errors, now change the devices.
+  assigned_device_name_ = assigned_device_name_copy;
+  requested_device_name_ = requested_device_name_copy;
+  return Status::OK();
+}
+
+// Updates this to contain the intersection of the device types in
+// this and "other".
+bool Member::MergeSupportedDevices(const Member& other) {
+  // Generate intersection with priorities.
+  // Each vector contains the same device types but with different priorities.
+  // The priorities are taken from the corresponding source vector.
+  PrioritizedDeviceTypeVector target_intersection;
+  PrioritizedDeviceTypeVector other_intersection;
+  for (const auto& prioritized_device_type : supported_device_types_) {
+    bool found = false;
+    for (const auto& other_prioritized_device_type :
+         other.supported_device_types_) {
+      if (prioritized_device_type.first ==
+          other_prioritized_device_type.first) {
+        found = true;
+        other_intersection.push_back(other_prioritized_device_type);
+        break;
+      }
+    }
+    if (found) {
+      target_intersection.push_back(prioritized_device_type);
+    }
+  }
+
+  // Sort the devices by priority order.
+  auto device_sort = [](const std::pair<DeviceType, int32>& a,
+                        const std::pair<DeviceType, int32>& b) {
+    // First look at set priorities.
+    if (a.second != b.second) {
+      return a.second > b.second;
+    }
+    // Then fallback to default priorities.
+    auto a_priority = DeviceSet::DeviceTypeOrder(a.first);
+    auto b_priority = DeviceSet::DeviceTypeOrder(b.first);
+    if (a_priority != b_priority) {
+      return a_priority > b_priority;
+    }
+    // Finally just look at the Device type strings.
+    return a.first.type_string() < b.first.type_string();
+  };
+
+  std::sort(target_intersection.begin(), target_intersection.end(),
+            device_sort);
+  std::sort(other_intersection.begin(), other_intersection.end(), device_sort);
+
+  PrioritizedDeviceTypeVector result;
+
+  bool is_target_prioritized = HasPriorities(target_intersection);
+  bool is_other_prioritized = HasPriorities(other_intersection);
+  if (!is_target_prioritized && !is_other_prioritized) {
+    // If neither are prioritized then we just return the original i.e. target
+    // prioritization.
+    result = target_intersection;
+  } else if (is_target_prioritized && !is_other_prioritized) {
+    // If only one is prioritized, then we respect priorities of that in the
+    // intersection.
+    result = target_intersection;
+  } else if (!is_target_prioritized && is_other_prioritized) {
+    result = other_intersection;
+  } else {
+    // If both have priorities and agree then we go with that. If the
+    // prioritization order is different, then we just fallback to the default
+    // i.e. what the DeviceTypeOrder suggests. In that case, we also set the
+    // merged priorities to 0, so that downstream merges work correctly as well.
+    if (ArePrioritiesSame(target_intersection, other_intersection)) {
+      result = target_intersection;
+    } else {
+      for (const auto& prioritized_device : target_intersection) {
+        result.push_back(std::make_pair(prioritized_device.first, 0));
+      }
+      std::sort(result.begin(), result.end(), device_sort);
+    }
+  }
+
+  if (result.empty()) {
+    return false;
+  }
+  supported_device_types_ = result;
+  return true;
+}
+
+Status Member::AssignDevice(const Node& node, bool allow_soft_placement) {
+  if (node.assigned_device_name_index() == assigned_device_name_index_) {
+    return Status::OK();
+  }
+
+  DeviceNameUtils::ParsedName parsed;
+  DeviceNameUtils::ParseFullName(node.assigned_device_name(), &parsed);
+  Status s = DeviceNameUtils::MergeDevNames(&assigned_device_name_, parsed,
+                                            allow_soft_placement);
+  if (!s.ok()) {
+    return errors::Internal(
+        "Constraining by assigned device should not cause an error. Original "
+        "root's assigned device name: ",
+        DeviceNameUtils::ParsedNameToString(assigned_device_name_),
+        " node's assigned device name \"", node.assigned_device_name(),
+        ". Error: ", s.error_message());
+  }
+  s = DeviceNameUtils::MergeDevNames(&requested_device_name_, parsed,
+                                     allow_soft_placement);
+  if (!s.ok()) {
+    return errors::Internal(
+        "Constraining by assigned device should not cause an error. Original "
+        "root's requested device name: \"",
+        DeviceNameUtils::ParsedNameToString(requested_device_name_),
+        "\", node's assigned device name \"", node.assigned_device_name(),
+        "\". Error: ", s.error_message());
+  }
+
+  assigned_device_name_index_ = node.assigned_device_name_index();
+  // Clear cached possible_devices, if any.
+  possible_devices_.clear();
+  return Status::OK();
+}
+string Member::DebugString() {
+  return absl::StrCat(
+      "Member(assigned_device_name_index_=", assigned_device_name_index_,
+      " requested_device_name_=",
+      DeviceNameUtils::ParsedNameToString(requested_device_name_),
+      " assigned_device_name_=",
+      DeviceNameUtils::ParsedNameToString(assigned_device_name_),
+      " supported_device_types_=[",
+      absl::StrJoin(DeviceTypeAndPriorityToString(supported_device_types_),
+                    ", "),
+      "] possible_devices_=[",
+      absl::StrJoin(DevicesToString(possible_devices_), ", "), "]");
+}
+ColocationGraph::ColocationGraph(const Graph* graph,
+                                 const DeviceSet* device_set,
+                                 const Device* default_device,
+                                 bool allow_soft_placement,
+                                 bool log_device_placement)
+    : graph_(graph),
+      device_set_(device_set),
+      device_types_(device_set->PrioritizedDeviceTypeList()),
+      default_device_(default_device),
+      allow_soft_placement_(allow_soft_placement),
+      log_device_placement_(log_device_placement) {
+  members_.resize(graph->num_node_ids());
+}
+
+// Adds each node of the Graph to this ColocationGraph as a singleton.
+//
+// NOTE: The implementation assumes that the ids of nodes passed to
+// this method are dense and zero-based; the memory used will be linear in
+// the largest node ID.
+// NOTE: If this method returns an error, *this is left in an undefined
+// state.
+Status ColocationGraph::ColocateAllNodes() {
+  // This maps from a colocation group identifier to the 'root' of that
+  // colocation group.  Note that the keys in this map are StringPiece; the
+  // actual strings are stored under the NodeDef.  The lifetime of this map
+  // is limited to this ColocateAllNodes() method, and no part of the
+  // NodeDef trees are changed during the lifetime of this method, so using
+  // StringPiece as a key is safe.
+  //
+  // Also, as a further optimization, we remove the "loc:@" prefix from
+  // "class" attribute values, when they are used as keys in this table.
+  // This allows us to use StringPiece values that refer to substrings of
+  // 'string' values stored in NodeDef attribute lists, as well as StringPiece
+  // values that refer to 'string' values from NodeDef::name(), without
+  // performing any string allocations.
+  std::unordered_map<StringPiece, const Node*, StringPieceHasher>
+      colocation_group_root;
+
+  for (const Node* node : graph_->op_nodes()) {
+    // When adding the node, identify whether it is part of a colocation
+    // group.
+
+    // This code is effectively the equivalent of GetNodeAttr() for a string
+    // array, but it avoids all internal allocations (the allocation of the
+    // backing store of the std::vector<string> as well as the copies of the
+    // strings within it).  Instead, we combine the query of the colocation
+    // attribute with the calls to ColocateNodeToGroup.
+    bool found_spec = false;
+    const AttrValue* attr_value =
+        node->attrs().Find(kColocationAttrNameStringPiece);
+    if (attr_value != nullptr && attr_value->has_list()) {
+      for (const string& class_spec : attr_value->list().s()) {
+        StringPiece spec(class_spec);
+        if (str_util::ConsumePrefix(&spec, kColocationGroupPrefixStringPiece)) {
+          found_spec = true;
+          TF_RETURN_IF_ERROR(
+              ColocateNodeToGroup(&colocation_group_root, node, spec));
+        }
+      }
+    }
+
+    // TODO(iga): Even when the node has a spec, we need to colocate the
+    // node to its "name group" because other nodes can still use
+    // "loc:@<this_node_name>" in their colocation specs.
+    if (!found_spec) {
+      // If the node does not specify a colocation group, then use the
+      // name of this node as the colocation group.
+      TF_RETURN_IF_ERROR(
+          ColocateNodeToGroup(&colocation_group_root, node, node->name()));
+    }
+  }
+
+  return Status::OK();
+}
+
+Status ColocationGraph::ColocateResourceOrRefEdge(Node* src, Node* dst) {
+  // Colocate `src` and `dst` to maintain the invariant that nodes
+  // connected by reference edges are colocated.
+  int src_root_id = FindRoot(src->id());
+  int dst_root_id = FindRoot(dst->id());
+  auto& src_root = members_[src_root_id];
+  auto& dst_root = members_[dst_root_id];
+
+  TF_RETURN_IF_ERROR(dst_root.EnsureCompatibilityAcrossResourceEdge(
+      *src, src_root, *dst, log_device_placement_));
+  Status status = ColocateNodes(*src, src_root_id, *dst, dst_root_id);
+  if (!status.ok()) {
+    return AttachDef(
+        errors::InvalidArgument("Nodes were connected by a "
+                                "reference connection (requiring them to "
+                                "be on the same device), but the two nodes "
+                                "were assigned two different devices: ",
+                                status.error_message()),
+        *dst);
+  }
+  return Status::OK();
+}
+
+Status ColocationGraph::ColocateResourceAndRefEdges() {
+  // Enumerate the constraint edges, and use them to update the disjoint
+  // node set.
+  // If `node` has an input edge with reference type, add an edge from the
+  // source of that edge to `node`.
+  for (const Edge* edge : graph_->edges()) {
+    if (edge->IsControlEdge()) {
+      continue;
+    }
+    Node* src = edge->src();
+    Node* dst = edge->dst();
+    DataType input_type = dst->input_type(edge->dst_input());
+    if ((input_type == DT_RESOURCE || IsRefType(input_type)) &&
+        !IsExemptFromResourceInputColocation(dst)) {
+      TF_RETURN_IF_ERROR(ColocateResourceOrRefEdge(src, dst));
+    }
+  }
+  return Status::OK();
+}
+
+Status ColocationGraph::Initialize() {
+  TF_RETURN_IF_ERROR(InitializeMembers());
+  TF_RETURN_IF_ERROR(ColocateResourceAndRefEdges());
+  TF_RETURN_IF_ERROR(ColocateAllNodes());
+  return Status::OK();
+}
+
+Status ColocationGraph::ColocateNodeToGroup(
+    std::unordered_map<StringPiece, const Node*, StringPieceHasher>*
+        colocation_group_root,
+    const Node* node, StringPiece colocation_group) {
+  const Node*& root_node = (*colocation_group_root)[colocation_group];
+  if (root_node == nullptr) {
+    // This is the first node of the colocation group, so
+    // designate this node as the 'root' of that colocation group.
+    root_node = node;
+  } else {
+    // Try to colocate the node with the root.  If there is an
+    // error, return it.
+    Status s = ColocateNodes(*node, *root_node);
+    if (!s.ok()) {
+      if (!allow_soft_placement_) {
+        return AttachDef(s, *node);
+      }
+      if (log_device_placement_) {
+        LOG(INFO) << "Ignoring request to colocate node '" << node->name()
+                  << "' with nodes in colocation group '" << colocation_group
+                  << "' because soft placement is on and an attempt at doing "
+                     "so resulted in the following error: "
+                  << AttachDef(s, *node).ToString();
+      }
+    }
+  }
+  return Status::OK();
+}
+
+// Merge the (possibly disjoint) sets containing nodes "x" and
+// "y". Returns OK if the all nodes in the union of these sets can
+// be placed on the same device type.
+//
+// NOTE: If this method returns an error, *this is left in an undefined
+// state.
+Status ColocationGraph::ColocateNodes(const Node& x, const Node& y) {
+  int x_root = FindRoot(x.id());
+  int y_root = FindRoot(y.id());
+  return ColocateNodes(x, x_root, y, y_root);
+}
+
+// This overload of ColocateNodes() allows a caller to provide the root node
+// ids for the two nodes. For large graphs, this noticeably reduces the
+// graph load time.
+Status ColocationGraph::ColocateNodes(const Node& x, int x_root, const Node& y,
+                                      int y_root) {
+  if (x_root == y_root) {
+    return Status::OK();
+  }
+
+  Member* new_root_member;
+  Member* old_root_member;
+  Member::Merge(&members_, x_root, y_root, &new_root_member, &old_root_member,
+                /*dry_run=*/true);
+
+  // Merge the partial device specifications, and ensure that they are
+  // compatible. NULL options_ is treated as allowing soft placement.
+  // If there is an error, nothing is modified.
+  // TODO(mrry): Consider enriching the error message by pointing
+  // out which nodes have the explicit partial device
+  // specifications that caused this conflict.
+  Status s = new_root_member->MergeDeviceNames(*old_root_member,
+                                               allow_soft_placement_);
+  if (!s.ok()) {
+    return errors::InvalidArgument(
+        "Cannot colocate nodes ",
+        errors::FormatColocationNodeForError(x.name()), " and ",
+        errors::FormatColocationNodeForError(y.name()), ": ",
+        s.error_message());
+  }
+
+  // Ensure that the common root has at least one supported device
+  // type, by computing the intersection of
+  // new_root_member.supported_device_types and
+  // old_root_member.supported_device_types.
+  if (!new_root_member->MergeSupportedDevices(*old_root_member)) {
+    return errors::InvalidArgument(
+        "Cannot colocate nodes ",
+        errors::FormatColocationNodeForError(x.name()), " and ",
+        errors::FormatColocationNodeForError(y.name()),
+        " because no device type supports both of those nodes and the "
+        "other nodes colocated with them.",
+        DebugInfo(x_root), DebugInfo(y_root));
+  }
+
+  // All error checks are done, merge the colocation graphs.
+  Member::Merge(&members_, x_root, y_root, &new_root_member, &old_root_member,
+                /*dry_run=*/false);
+  return Status::OK();
+}
+
+// Limits the possible devices of `node`'s colocation group to the device
+// to which `node` is assigned. This makes sure that all nodes in this
+// colocation group will be assigned to the same device. Without this
+// explicit restriction, heuristics can choose a different possible device
+// for other nodes in the group.
+Status ColocationGraph::LimitToAssignedDevice(const Node& node) {
+  if (node.assigned_device_name_index() < 0) {
+    return errors::Internal(
+        "Expected an assigned node as argument to LimitToAssignedDevice but "
+        "got: ",
+        node.DebugString());
+  }
+  int root = FindRoot(node.id());
+  Member& root_member = members_[root];
+  return root_member.AssignDevice(node, allow_soft_placement_);
+}
+
+// For the given node, subject to the constraints previously given
+// to this ColocationGraph, set its assigned_device_name. Returns OK
+// if a satisfying device can be found, otherwise an error.
+//
+// Note: This method returns a pointer to a field within members_.
+// The caller must not use the returned pointer after there is any possibility
+// that the members_[i].possible_devices field has been modified.
+Status ColocationGraph::GetDevicesForNode(
+    Node* node, const std::vector<Device*>** possible_devices) {
+  *possible_devices = nullptr;
+  const int node_root = FindRoot(node->id());
+  if (!members_[node_root].possible_devices().empty()) {
+    *possible_devices = &members_[node_root].possible_devices();
+    return Status::OK();
+  }
+
+  // We have not yet computed the possible devices for the
+  // colocated node set containing 'node', so we do so now using the
+  // constraints on the root node.
+
+  // "devices" will contain the set of feasible placements for the
+  // colocated node set containing 'node'.
+  std::vector<Device*> devices;
+  if (DeviceNameUtils::HasSomeDetails(
+          members_[node_root].requested_device_name())) {
+    // The root node has a (possibly partial) device
+    // specification, so enumerate the physical devices that
+    // conform to it.
+    device_set_->FindMatchingDevices(
+        members_[node_root].requested_device_name(), &devices);
+
+    if (!devices.empty()) {
+      // Filter devices into those that are compatible with the root
+      // node (and its children).
+      devices = FilterSupportedDevices(
+          devices, members_[node_root].supported_device_types(),
+          default_device_);
+    }
+
+    // Perform soft placement if allow_soft_placement_ is set.
+    if (devices.empty() && allow_soft_placement_) {
+      // The soft_device_name is the same as the node's device name
+      // without specifying the device type or ID.
+      DeviceNameUtils::ParsedName soft_device_name =
+          members_[node_root].requested_device_name();
+      soft_device_name.type.clear();
+      soft_device_name.has_type = false;
+      soft_device_name.has_id = false;
+      device_set_->FindMatchingDevices(soft_device_name, &devices);
+      if (!devices.empty()) {
+        devices = FilterSupportedDevices(
+            devices, members_[node_root].supported_device_types(),
+            default_device_);
+      }
+    }
+
+    if (devices.empty()) {
+      // Return an error when a physical device that matches an explicit
+      // device specification is not found. This ensures that we don't
+      // assign a node to GPU when the user wanted to force it on CPU.
+      string debug_info = DebugInfo(node_root);
+
+      DeviceNameUtils::ParsedName specified_device_name;
+      if (DeviceNameUtils::ParseFullName(node->requested_device(),
+                                         &specified_device_name) &&
+          specified_device_name ==
+              members_[node_root].requested_device_name()) {
+        // The specified device and merged set device match, and
+        // will appear in the GraphDef (for debugging), so just
+        // print the specified device.
+        std::vector<Device*> devices_matching_nodedef;
+        device_set_->FindMatchingDevices(specified_device_name,
+                                         &devices_matching_nodedef);
+        if (devices_matching_nodedef.empty()) {
+          // Sometimes it is almost impossible to understand the problem
+          // without a list of available devices.
+          std::vector<string> device_names;
+          for (const Device* device : device_set_->devices()) {
+            device_names.push_back(device->name());
+          }
+          std::sort(device_names.begin(), device_names.end());
+
+          string gpu_msg = "";
+          if (!IsGoogleCudaEnabled() &&
+              str_util::Lowercase(specified_device_name.type) == "gpu") {
+            gpu_msg =
+                " The requested device appears to be a GPU, but CUDA is not "
+                "enabled.";
+          }
+
+          return errors::InvalidArgument(
+              errors::FormatNodeNameForError(node->name()),
+              "was explicitly assigned to ", node->requested_device(),
+              " but available devices are [ ",
+              str_util::Join(device_names, ", "), " ]. Make sure ",
+              "the device specification refers to a valid device.", gpu_msg);
+        } else if (specified_device_name.has_type) {
+          return errors::InvalidArgument(
+              "Could not satisfy explicit device specification '",
+              node->requested_device(), "' because no supported kernel for ",
+              specified_device_name.type, " devices is available.", debug_info,
+              "\nRegistered kernels:\n",
+              KernelsRegisteredForOp(node->type_string()));
+        } else {
+          return errors::InvalidArgument(
+              "Could not satisfy explicit device specification '",
+              node->requested_device(), debug_info);
+        }
+      } else {
+        // The specified device may be a valid device but the
+        // merged set device is different, so print both.
+        return errors::InvalidArgument(
+            "Could not satisfy explicit device specification '",
+            node->requested_device(), "' because the node ",
+            errors::FormatColocationNodeForError(node->name()),
+            " was colocated with a group of nodes that ",
+            "required incompatible device '",
+            DeviceNameUtils::ParsedNameToString(
+                members_[node_root].requested_device_name()),
+            "'", debug_info);
+      }
+    }
+  } else {
+    // The device is completely unspecified, so enumerate the devices that
+    // support all of the nodes in the set.
+    if (device_set_->devices().empty()) {
+      return errors::Internal("No devices are registered");
+    }
+    devices = FilterSupportedDevices(
+        device_set_->devices(), members_[node_root].supported_device_types(),
+        default_device_);
+
+    if (devices.empty()) {
+      return errors::InvalidArgument(
+          "Node had no OpKernel registered to support this operation: ",
+          "Operation was ", node->type_string(), " and inputs were ",
+          DataTypeVectorString(node->input_types()), DebugInfo(node_root));
+    }
+  }
+
+  // Cache the result of the possible devices for this node group.
+  members_[node_root].set_possible_devices(std::move(devices));
+  *possible_devices = &members_[node_root].possible_devices();
+  return Status::OK();
+}
+
+Status ColocationGraph::InitializeMembers() {
+  for (Node* node : graph_->op_nodes()) {
+    Status status = InitializeMember(*node, &members_[node->id()]);
+    if (!status.ok()) {
+      return AttachDef(status, *node);
+    }
+  }
+  return Status::OK();
+}
+
+string ColocationGraph::DebugString() {
+  std::unordered_set<int> roots;
+  std::vector<string> root_strings;
+  for (const Node* node : graph_->nodes()) {
+    if (!node->IsOp()) {
+      continue;
+    }
+    int node_root = FindRoot(node->id());
+    if (roots.count(node_root) == 0) {
+      root_strings.push_back(DebugInfo(node_root));
+      roots.insert(node_root);
+    }
+  }
+  return absl::StrJoin(root_strings, "\n");
+}
+
+// Returns debugging info for the node referred to by 'node_root'.
+string ColocationGraph::DebugInfo(const int node_root) {
+  string text(
+      "\nColocation Debug Info:\n"
+      "Colocation group had the following types and devices: ");
+
+  // If this node is part of a colocation group, then we want to
+  // collect the mapping of ops to supported devices, so that
+  // the user can see why an unsatisfiable placement occurred.
+
+  std::unordered_map<string, string> type_to_devices;
+  std::vector<const Node*> colocation_nodes;
+  int num_nodes_found = 0;
+
+  for (const Node* node : graph_->nodes()) {
+    if (!node->IsOp()) {
+      continue;
+    }
+    int id = node->id();
+    if (FindRoot(id) != node_root) {
+      continue;
+    }
+    ++num_nodes_found;
+    colocation_nodes.push_back(node);
+    const string& op_type = node->type_string();
+    string devices_registered;
+    for (const auto& device_type : members_[id].supported_device_types()) {
+      strings::StrAppend(&devices_registered,
+                         DeviceTypeString(device_type.first), " ");
+    }
+
+    type_to_devices[op_type] = std::move(devices_registered);
+  }
+
+  for (const auto& td : type_to_devices) {
+    strings::StrAppend(&text, "\n", td.first, ": ", td.second);
+  }
+  strings::StrAppend(&text,
+                     "\n\nColocation members and user-requested devices:");
+  for (const Node* node : colocation_nodes) {
+    strings::StrAppend(&text, "\n  ", node->name(), " (", node->type_string(),
+                       ") ", node->requested_device());
+  }
+  strings::StrAppend(&text, "\n");
+
+  if (num_nodes_found <= 0) {
+    text.clear();
+  }
+  return text;
+}
+
+Status ColocationGraph::InitializeMemberWithAssignedDevice(
+    const string& assigned_device_name, const string& node_type,
+    bool must_be_full_name, Member* member) {
+  // This node has already been assigned to a device, so we
+  // respect this placement, after sanity-checking it.
+  // NOTE: Since any assignment must have been performed by
+  // the TensorFlow runtime, we consider errors in this branch to
+  // be INTERNAL.
+  TF_RETURN_IF_ERROR(member->SetAssignedDeviceName(assigned_device_name));
+  if (!must_be_full_name) {
+    return Status::OK();
+  }
+  // Since assigned device must be a full specification, do extra checks.
+  const Device* assigned_device =
+      device_set_->FindDeviceByName(assigned_device_name);
+  if (assigned_device == nullptr) {
+    return errors::Internal("Assigned device '", assigned_device_name,
+                            "' does not match any device");
+  }
+
+  for (const auto& d : member->supported_device_types()) {
+    if (DeviceType(assigned_device->attributes().device_type()) == d.first) {
+      return Status::OK();
+    }
+  }
+
+  return errors::Internal("Assigned device '", assigned_device_name,
+                          "' does not have registered OpKernel support "
+                          "for ",
+                          node_type);
+}
+
+Status ColocationGraph::InitializeMember(const Node& node, Member* member) {
+  TF_RETURN_IF_ERROR(member->SetParentAndSupportedDevices(node, device_types_));
+
+  if (node.has_assigned_device_name()) {
+    TF_RETURN_IF_ERROR(InitializeMemberWithAssignedDevice(
+        node.assigned_device_name(), node.type_string(), true, member));
+  } else {
+    // This node has not yet been assigned to a device, so we
+    // calculate any constraints due to the set of registered
+    // kernels and any (partial) user-provided device specification
+    // in the NodeDef.
+
+    // If no kernels are registered for this op type, fail with an error.
+    if (member->supported_device_types().empty()) {
+      std::set<string> registered_device_types;
+      for (Device* d : device_set_->devices()) {
+        registered_device_types.insert(d->device_type());
+      }
+      std::vector<string> attr_key_vals;
+      for (const auto& it : node.attrs()) {
+        const string& name = it.first;
+        const AttrValue& attr_value = it.second;
+        attr_key_vals.push_back(
+            strings::StrCat(name, "=", SummarizeAttrValue(attr_value)));
+      }
+      return errors::InvalidArgument(
+          "No OpKernel was registered to support Op '", node.type_string(),
+          "' used by ", errors::FormatNodeNameForError(node.name()),
+          "with these attrs: [", str_util::Join(attr_key_vals, ", "),
+          "]\n"
+          "Registered devices: [",
+          str_util::Join(registered_device_types, ", "), "]\n",
+          "Registered kernels:\n", KernelsRegisteredForOp(node.type_string()));
+    }
+
+    // If the NodeDef contains a device, then we interpret it as a
+    // (partial) device specification.
+    if (!node.requested_device().empty()) {
+      if (IsResourceGeneratorNode(node)) {
+        // Treat requested device on resource generating nodes as assigned
+        // device so that we don't override it.
+        TF_RETURN_IF_ERROR(InitializeMemberWithAssignedDevice(
+            node.requested_device(), node.type_string(), false, member));
+      } else {
+        // The user has specified a device in the NodeDef, try to find a
+        // valid device matching their specification in the set of
+        // devices.
+        // NOTE: The full name may specify a device that is not in
+        // n.supported_device_types(), but we check that in AssignDevice().
+        TF_RETURN_IF_ERROR(member->SetRequestedDeviceName(node));
+      }
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/common_runtime/colocation_graph.h b/tensorflow/core/common_runtime/colocation_graph.h
new file mode 100644
index 0000000..1261149
--- /dev/null
+++ b/tensorflow/core/common_runtime/colocation_graph.h

@@ -0,0 +1,253 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_COLOCATION_GRAPH_H_
+#define TENSORFLOW_CORE_COMMON_RUNTIME_COLOCATION_GRAPH_H_
+
+#include <unordered_map>
+#include <vector>
+
+#include "absl/strings/str_join.h"
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/util/device_name_utils.h"
+#include "tensorflow/core/util/port.h"
+
+namespace tensorflow {
+
+// Represents a node in the disjoint node forest and the
+// accumulated constraints on the device used by that node.
+class Member {
+ public:
+  Member() = default;
+
+  Status SetParentAndSupportedDevices(const Node& node,
+                                      const std::vector<DeviceType>& types);
+
+  const DeviceNameUtils::ParsedName& requested_device_name() const {
+    return requested_device_name_;
+  }
+
+  Status SetAssignedDeviceName(const string& device_name);
+
+  Status SetRequestedDeviceName(const Node& node);
+
+  Status EnsureCompatibilityAcrossResourceEdge(
+      const Node& src, const Member& src_root,
+      const Node& dst, /*dst_root is this*/
+      bool log_device_placement);
+
+  const PrioritizedDeviceTypeVector& supported_device_types() const {
+    return supported_device_types_;
+  }
+
+  // If `dry_run` is true, just sets `new_root` and `old_root` and does not
+  // actually modify anything in the `tree`.
+  static void Merge(std::vector<Member>* tree, int x_root, int y_root,
+                    Member** new_root, Member** old_root, bool dry_run);
+
+  // tree is non-const because we can change some `parent` pointers in some
+  // members for more efficient future lookups. The vector itself is not
+  // changed.
+  static int FindRoot(std::vector<Member>* tree, int node_id);
+
+  Status MergeDeviceNames(const Member& other, bool allow_soft_placement);
+
+  // Updates this to contain the intersection of the device types in
+  // this and "other". If the intersection is empty, returns false and does
+  // not update this. Else returns true and updates this.
+  bool MergeSupportedDevices(const Member& other);
+
+  Status AssignDevice(const Node& node, bool allow_soft_placement);
+
+  void set_possible_devices(std::vector<Device*>&& devices) {
+    possible_devices_ = devices;
+  }
+  const std::vector<Device*>& possible_devices() { return possible_devices_; }
+
+  string DebugString();
+
+ private:
+  // The id of the node that is the parent of this one, or its own
+  // id if it is a root. parent <= 0 indicates that this member is invalid.
+  int parent_ = -1;
+
+  // A proxy for the depth of the tree that is used to prefer
+  // connecting smaller trees to larger trees when merging disjoint
+  // sets.
+  int rank_ = 0;
+
+  // Once colocation groups have been formed, the Placer starts actually
+  // choosing devices. All nodes in a group must be assigned to the same
+  // device. Once we assigned the first device to some node in this group,
+  // we set assigned_device_name_index to this device name's index in the
+  // graph.
+  // The `*_device_name_` fields will contain the parsed name of this device
+  // and `possible_devices`, if computed, will contain just this device.
+  // `assigned_device_name_index` is an optimization to avoid parsing and
+  // comparing device names. The value of -1 signals that a single device
+  // has not been chosen yet.
+  int assigned_device_name_index_ = -1;
+
+  // The merged form of the device requested for this node, with those of all of
+  // its children. requested_device_name_ is always kept a specialization (i.e.
+  // DeviceNameUtils::IsSpecialization) of assigned_device_name_. When no device
+  // is requested, this field is set to assigned_device_name_.  As a
+  // specialization of assigned_device_name_, requested_device_name_ represents
+  // the most specific form of all assigned and requested devices of this node
+  // and its children, if this node is a root. requested_device_name_ is used
+  // to finally select devices for nodes.  We can override requested devices due
+  // to resource colocation constraints but not assigned devices (unless soft
+  // placement is on).
+  DeviceNameUtils::ParsedName requested_device_name_;
+
+  // The merged form of the device assigned for this node, with
+  // those of all of its children.
+  // This field is used to raise errors due to unsatisfiable constraints.
+  // Can be a partial specification.
+  // INVARIANT: requested_device_name_ is always a
+  // DeviceNameUtils::IsSpecialization of assigned_device_name_.
+  DeviceNameUtils::ParsedName assigned_device_name_;
+
+  // The intersection of all device types supported by this node,
+  // and those of all of its children, in priority order
+  // of the preferred device.
+  PrioritizedDeviceTypeVector supported_device_types_;
+
+  // If this node is a root, stores a list of Devices to which this node
+  // and all of its children have been assigned, or nullptr if this
+  // has not yet been computed.
+  std::vector<Device*> possible_devices_;
+};  // namespace
+
+// This class maintains the connected components of a colocation
+// constraint graph, and uses this information to assign a satisfying
+// device placement to the nodes of the graph.
+//
+// The typical usage pattern is:
+//
+//   Graph graph = ...;
+//   DeviceSet device_set = ...;
+//   ColocationGraph colocation_graph(graph, device_set);
+//
+//   // Add all the nodes of the `graph` to the `colocation_graph`.
+//   for (Node* node : graph.nodes()) {
+//     TF_RETURN_IF_ERROR(colocation_graph.AddNode(*node));
+//   }
+//
+//   // Add one or more colocation constraints.
+//   Node node_1 = *graph.FindNodeId(...);
+//   Node node_2 = *graph.FindNodeId(...);
+//   TF_RETURN_IF_ERROR(colocation_graph.ColocateNodes(node_1, node_2));
+//
+//   // Assign devices based on the accumulated constraints.
+//   for (Node* node : graph.nodes()) {
+//     TF_RETURN_IF_ERROR(colocation_graph.AssignDevice(node));
+//   }
+//
+// This implementation uses the Union-Find algorithm to efficiently maintain the
+// connected components and incrementally adds edges via
+// ColocationGraph::ColocateNodes() invocations.
+//
+// ColocationGraph does not assign any devices to graph nodes. The
+// `log_device_placement` argument is used to log messages when requested
+// device is ignored.
+class ColocationGraph {
+ public:
+  ColocationGraph(const Graph* graph, const DeviceSet* device_set,
+                  const Device* default_device, bool allow_soft_placement,
+                  bool log_device_placement);
+
+  // Adds each node of the Graph to this ColocationGraph as a singleton.
+  //
+  // NOTE: The implementation assumes that the ids of nodes passed to
+  // this method are dense and zero-based; the memory used will be linear in
+  // the largest node ID.
+  // NOTE: If this method returns an error, *this is left in an undefined
+  // state.
+  Status ColocateAllNodes();
+
+  Status ColocateResourceOrRefEdge(Node* src, Node* dst);
+
+  Status ColocateResourceAndRefEdges();
+
+  Status Initialize();
+
+  Status ColocateNodeToGroup(
+      std::unordered_map<StringPiece, const Node*, StringPieceHasher>*
+          colocation_group_root,
+      const Node* node, StringPiece colocation_group);
+
+  // Merge the (possibly disjoint) sets containing nodes "x" and
+  // "y". Returns OK if the all nodes in the union of these sets can
+  // be placed on the same device type.
+  //
+  // If this method returns an error, *this is unchanged.
+  Status ColocateNodes(const Node& x, const Node& y);
+
+  // This overload of ColocateNodes() allows a caller to provide the root node
+  // ids for the two nodes. For large graphs, this noticeably reduces the
+  // graph load time.
+  // If this method returns an error, *this is unchanged.
+  Status ColocateNodes(const Node& x, int x_root, const Node& y, int y_root);
+
+  // Limits the possible devices of `node`'s colocation group to the device
+  // to which `node` is assigned. This makes sure that all nodes in this
+  // colocation group will be assigned to the same device. Without this
+  // explicit restriction, heuristics can choose a different possible device
+  // for other nodes in the group.
+  Status LimitToAssignedDevice(const Node& node);
+
+  // For the given node, subject to the constraints previously given
+  // to this ColocationGraph, set its assigned_device_name. Returns OK
+  // if a satisfying device can be found, otherwise an error.
+  //
+  // Note: This method returns a pointer to a field within members_.
+  // The caller must not use the returned pointer after there is any possibility
+  // that the members_[i].possible_devices field has been modified.
+  Status GetDevicesForNode(Node* node,
+                           const std::vector<Device*>** possible_devices);
+
+  Status InitializeMembers();
+
+  string DebugString();
+
+  // Returns debugging info for the node referred to by 'node_root'.
+  string DebugInfo(const int node_root);
+
+  Status InitializeMemberWithAssignedDevice(const string& assigned_device_name,
+                                            const string& node_type,
+                                            bool must_be_full_name,
+                                            Member* member);
+
+  Status InitializeMember(const Node& node, Member* member);
+
+  // Returns the root node of the disjoint tree to which the node with the
+  // given id is connected.
+  int FindRoot(int node_id) { return Member::FindRoot(&members_, node_id); }
+
+  const Graph* const graph_;  // Not owned.
+  std::vector<Member> members_;
+  const DeviceSet* device_set_;  // Not owned.
+  const std::vector<DeviceType> device_types_;
+  const Device* default_device_;
+  const bool allow_soft_placement_;
+  const bool log_device_placement_;
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_COLOCATION_GRAPH_H_

diff --git a/tensorflow/core/common_runtime/eager/attr_builder.h b/tensorflow/core/common_runtime/eager/attr_builder.h
index aa64b5f..1b3fbcb 100644
--- a/tensorflow/core/common_runtime/eager/attr_builder.h
+++ b/tensorflow/core/common_runtime/eager/attr_builder.h

@@ -54,10 +54,6 @@
 Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name,
                       TF_AttrType* out, unsigned char* is_list);
 
-// Looks for 'attr_name' in 'm' and sets 'out' and 'is_list'.
-Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name,
-                      TF_AttrType* out, unsigned char* is_list);
-
 // KernelAndDevice::Init needs a NodeDef only to pass the attribute map through.
 // An AttrBuilder is a convenience class to help with that - providing a smaller
 // interface than NodeDefBuilder and avoiding expensive (unnecessary?) sanity

diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index 12e6483..cdd5632 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc

@@ -351,14 +351,27 @@
   gtl::InsertOrUpdate(&kernel_cache_, cache_key, kernel);
 }
 
-bool EagerContext::ShouldStoreMetadata() {
+bool EagerContext::ShouldStoreGraphs() {
   mutex_lock ml(metadata_mu_);
-  return should_store_metadata_.load() || metadata_listener_ != nullptr;
+  return should_store_graphs_.load() || metadata_listener_ != nullptr;
 }
 
-void EagerContext::SetShouldStoreMetadata(bool value) {
+bool EagerContext::ShouldStoreStepStats() {
   mutex_lock ml(metadata_mu_);
-  should_store_metadata_.store(value);
+  return should_store_step_stats_.load() || metadata_listener_ != nullptr;
+}
+
+void EagerContext::SetShouldStoreGraphs(bool value) {
+  mutex_lock ml(metadata_mu_);
+  should_store_graphs_.store(value);
+  if (!value || metadata_listener_ != nullptr) {
+    run_metadata_.Clear();
+  }
+}
+
+void EagerContext::SetShouldStoreStepStats(bool value) {
+  mutex_lock ml(metadata_mu_);
+  should_store_step_stats_.store(value);
   if (!value || metadata_listener_ != nullptr) {
     run_metadata_.Clear();
   }

diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h
index 1f24109..330936e 100644
--- a/tensorflow/core/common_runtime/eager/context.h
+++ b/tensorflow/core/common_runtime/eager/context.h

@@ -183,8 +183,10 @@
 
   // TODO(apassos) clean up RunMetadata storage.
   mutex* MetadataMu() LOCK_RETURNED(metadata_mu_) { return &metadata_mu_; }
-  bool ShouldStoreMetadata() LOCKS_EXCLUDED(metadata_mu_);
-  void SetShouldStoreMetadata(bool value);
+  bool ShouldStoreStepStats() LOCKS_EXCLUDED(metadata_mu_);
+  void SetShouldStoreStepStats(bool value);
+  bool ShouldStoreGraphs() LOCKS_EXCLUDED(metadata_mu_);
+  void SetShouldStoreGraphs(bool value);
   RunMetadata* RunMetadataProto() { return &run_metadata_; }
   void ClearRunMetadata() EXCLUSIVE_LOCKS_REQUIRED(metadata_mu_);
 
@@ -284,7 +286,8 @@
       GUARDED_BY(cache_mu_);
 
   // Whether we should compute RunMetadata.
-  std::atomic<bool> should_store_metadata_{false};
+  std::atomic<bool> should_store_step_stats_{false};
+  std::atomic<bool> should_store_graphs_{false};
   mutex metadata_mu_;
   RunMetadata run_metadata_ GUARDED_BY(metadata_mu_);
   RunMetadataListener* metadata_listener_ GUARDED_BY(metadata_mu_) = nullptr;

diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index c6e8573..f2af626 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc

@@ -400,7 +400,7 @@
           device->name());
     }
     GraphCollector* graph_collector = nullptr;
-    if (ctx->ShouldStoreMetadata()) {
+    if (ctx->ShouldStoreGraphs()) {
       graph_collector = ctx->GetGraphCollector();
     }
     // Treat the function as multi_device only when we are not compiling
@@ -450,13 +450,15 @@
   }
   status = ValidateInputTypeAndPlacement(
       ctx, device, op, kernel,
-      ctx->ShouldStoreMetadata() ? ctx->RunMetadataProto() : nullptr);
+      ctx->ShouldStoreStepStats() ? ctx->RunMetadataProto() : nullptr);
   if (!status.ok()) return status;
   std::unique_ptr<NodeExecStats> maybe_stats;
   StepStats* maybe_step_stats = nullptr;
   GraphCollector* graph_collector = nullptr;
-  if (ctx->ShouldStoreMetadata()) {
+  if (ctx->ShouldStoreGraphs()) {
     graph_collector = ctx->GetGraphCollector();
+  }
+  if (ctx->ShouldStoreStepStats()) {
     maybe_step_stats = ctx->RunMetadataProto()->mutable_step_stats();
     int64 now_nanos = Env::Default()->NowNanos();
     maybe_stats.reset(new NodeExecStats);
@@ -919,6 +921,31 @@
                                    maybe_stats, maybe_step_stats,
                                    graph_collector));
   }
+  if (graph_collector != nullptr) {
+    mutex_lock ml(*ctx->MetadataMu());
+    {
+      GraphCollector* collector = ctx->GetGraphCollector();
+      mutex_lock mll(collector->mu);
+
+      // Adding to partition graphs for backward compatibility.
+      for (const auto& graph : collector->partitioned_graphs) {
+        *ctx->RunMetadataProto()->add_partition_graphs() = graph;
+      }
+
+      if (collector->dirty) {
+        auto* function_graphs = ctx->RunMetadataProto()->add_function_graphs();
+        *function_graphs->mutable_post_optimization_graph() =
+            collector->optimized_graph;
+        *function_graphs->mutable_pre_optimization_graph() =
+            collector->raw_graph;
+        for (const auto& graph : collector->partitioned_graphs) {
+          *function_graphs->add_partition_graphs() = graph;
+        }
+      }
+
+      collector->ClearGraphs();
+    }
+  }
   if (maybe_stats != nullptr) {
     int64 nanos = Env::Default()->NowNanos();
     maybe_stats->set_op_end_rel_micros(nanos / EnvTime::kMicrosToNanos -
@@ -927,34 +954,28 @@
     maybe_stats->set_all_end_rel_micros(nanos / EnvTime::kMicrosToNanos -
                                         maybe_stats->all_start_micros());
     maybe_stats->set_all_end_rel_nanos(nanos - maybe_stats->all_start_nanos());
-    if (ctx->ShouldStoreMetadata()) {
+    if (ctx->ShouldStoreStepStats()) {
       mutex_lock ml(*ctx->MetadataMu());
       {
-        GraphCollector* collector = ctx->GetGraphCollector();
-        mutex_lock mll(collector->mu);
-        for (const auto& graph : collector->graphs) {
-          *ctx->RunMetadataProto()->add_partition_graphs() = graph;
+        auto* step_stats = ctx->RunMetadataProto()->mutable_step_stats();
+        // Lazily initialize the RunMetadata with information about all devices
+        // if this is the first call.
+        while (step_stats->dev_stats_size() < ctx->devices()->size()) {
+          step_stats->add_dev_stats();
         }
-        collector->graphs.clear();
-      }
-      auto* step_stats = ctx->RunMetadataProto()->mutable_step_stats();
-      // Lazily initialize the RunMetadata with information about all devices if
-      // this is the first call.
-      while (step_stats->dev_stats_size() < ctx->devices()->size()) {
-        step_stats->add_dev_stats();
-      }
-      // Find the current device's index.
-      int device_idx = 0;
-      for (int i = 0; i < ctx->devices()->size(); ++i) {
-        if (ctx->devices()->at(i) == device) {
-          device_idx = i;
-          break;
+        // Find the current device's index.
+        int device_idx = 0;
+        for (int i = 0; i < ctx->devices()->size(); ++i) {
+          if (ctx->devices()->at(i) == device) {
+            device_idx = i;
+            break;
+          }
         }
+        // Populate the device stats for this device.
+        auto* dev_stats = step_stats->mutable_dev_stats(device_idx);
+        dev_stats->set_device(device->name());
+        *dev_stats->add_node_stats() = *maybe_stats;
       }
-      // Populate the device stats for this device.
-      auto* dev_stats = step_stats->mutable_dev_stats(device_idx);
-      dev_stats->set_device(device->name());
-      *dev_stats->add_node_stats() = *maybe_stats;
     }
   }
   DCHECK_EQ(num_retvals, outputs.size());

diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
index 41b4608..60b8075 100644
--- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc
+++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc

@@ -111,9 +111,9 @@
 
     options.optimize_graph_fn = std::bind(
         grappler::OptimizeGraph, std::placeholders::_1, std::placeholders::_2,
-        std::placeholders::_3, std::placeholders::_4, config_proto,
-        function_def->signature().name(), optimization_options,
-        std::placeholders::_5);
+        std::placeholders::_3, std::placeholders::_4, std::placeholders::_5,
+        config_proto, function_def->signature().name(), optimization_options,
+        std::placeholders::_6);
   }
 #endif
   options.graph_collector = graph_collector;
@@ -147,9 +147,11 @@
     memory->set_peak_bytes(std::get<1>(sizes));
     memory->set_live_bytes(std::get<2>(sizes));
 
-    AllocatorStats allocator_stats;
-    allocator_pair.first->GetStats(&allocator_stats);
-    memory->set_allocator_bytes_in_use(allocator_stats.bytes_in_use);
+    absl::optional<AllocatorStats> allocator_stats =
+        allocator_pair.first->GetStats();
+    if (stats) {
+      memory->set_allocator_bytes_in_use(allocator_stats->bytes_in_use);
+    }
     allocator_pair.second->GetRecordsAndUnRef();
   }
   auto* ms = stats->mutable_memory_stats();
@@ -194,6 +196,7 @@
   params.slice_reader_cache = &slice_reader_cache_;
   params.rendezvous = rendez_;
   params.cancellation_manager = &cm_;
+  cm_.Reset();
   params.log_memory = log_memory_;
   std::unique_ptr<StepStatsCollector> step_stats_collector;
   if (stats != nullptr) {
@@ -258,6 +261,7 @@
   opts.rendezvous = nullptr;
   opts.create_rendezvous = true;
   opts.cancellation_manager = &cm_;
+  cm_.Reset();
   // eager runtime does not yet support collective ops.
   opts.collective_executor = nullptr;
   opts.allow_dead_tensors = true;

diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h
index e4ccb11..ac99fdb 100644
--- a/tensorflow/core/common_runtime/eager/tensor_handle.h
+++ b/tensorflow/core/common_runtime/eager/tensor_handle.h

@@ -133,7 +133,7 @@
 
  private:
   // If the contents of the Tensor pointed to by this handle is yet to be
-  // computed by a EagerNode, this function will block till that compuatation is
+  // computed by a EagerNode, this function will block till that computation is
   // done and the handle is "ready".
   Status WaitReady();
   Status WaitForNode(uint64 node_id, bool return_if_is_ready);

diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 05f3e85..7e2a85b 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc

@@ -46,6 +46,7 @@
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -55,6 +56,7 @@
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/context.h"
+#include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
@@ -1358,6 +1360,9 @@
 
   // Clean up when this executor is done.
   void Finish();
+  // Schedule Finish() on a separate thread if it needs to wait for deferred
+  // async ops to complete; otherwise run it on the current thread.
+  void ScheduleFinish();
 
   // A standalone routine for this expression so that we can express
   // that we don't want thread safety analysis on this reference (it's
@@ -1778,7 +1783,7 @@
           const bool completed =
               NodeDone(s, state->item->node, ready, stats, nullptr);
           delete state;
-          if (completed) Finish();
+          if (completed) ScheduleFinish();
         };
         nodestats::SetOpStart(stats);
         device->ComputeAsync(async, &state->ctx, done);
@@ -1865,7 +1870,7 @@
   }  // while !inline_ready.empty()
 
   // This thread of computation is done if completed = true.
-  if (completed) Finish();
+  if (completed) ScheduleFinish();
 }
 
 Status ExecutorState::PrepareInputs(const NodeItem& item, Entry* first_input,
@@ -2421,6 +2426,25 @@
   }
 }
 
+void ExecutorState::ScheduleFinish() {
+  int num_deferred_ops;
+  {
+    mutex_lock lock(num_deferred_ops_mu_);
+    num_deferred_ops = num_deferred_ops_;
+  }
+  if (num_deferred_ops > 0) {
+    // Finish() may be blocked waiting for deferred async ops to complete. The
+    // execution of deferred async ops may be waiting for non-enqueued ops of
+    // other executors to complete. So running Finish() on the current thread
+    // (inter-op threadpool thread) may lead to a deadlock due to threadpool
+    // exhaustion. Instead, we run it on a separate thread to unblock the
+    // threadpool thread.
+    Env::Default()->SchedClosure([this]() { Finish(); });
+  } else {
+    Finish();
+  }
+}
+
 void ExecutorState::Finish() {
   mu_.lock();
   auto status = status_;

diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 82bbb7e..e602b5d 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc

@@ -803,12 +803,24 @@
 
 namespace {
 // Removes all stateless nodes that do not contribute to a return
-// value from the function body.  Unlike `RemoveDeadNodes()`, which is
+// value from the function body. Unlike `RemoveDeadNodes()`, which is
 // triggered by `OptimizerOptions.do_function_inlining`, this pass
 // ignores the SINK node, from which (by definition) all nodes are
-// reverse reachable.
-void PruneFunctionBody(Graph* g) {
-  VLOG(2) << "Pruning function body";
+// reverse reachable, and preserves all nodes that are reachable from
+// control output nodes.
+//
+// TODO(ezhulenev, skyewm): Function body should not have special treatment of
+// stateful ops, graph should encode nodes that must execute with `control_ret`
+// and `control_output`.
+void PruneFunctionBody(const FunctionDef& fdef, Graph* g) {
+  VLOG(2) << "Pruning function body: function_name=" << fdef.signature().name();
+
+  // `control_ret` nodes must be always executed.
+  std::unordered_set<StringPiece, StringPieceHasher> control_ret_nodes;
+  for (const auto& control_ret : fdef.control_ret()) {
+    control_ret_nodes.insert(control_ret.second);
+  }
+
   std::unordered_set<const Node*> nodes;
   for (auto n : g->nodes()) {
     // NOTE(mrry): "_Retval" nodes are stateful, and so will be added
@@ -818,8 +830,9 @@
     // TODO(mrry): Investigate whether the `n->IsControlFlow()` test is
     // still needed. It would be preferable to prune entire loops and/or
     // conditionals if they are not used in the graph.
-    if (n->IsControlFlow() || n->IsDataset() ||
-        (n->op_def().is_stateful() && n->type_string() != kArgOp)) {
+    if (n->IsControlFlow() ||
+        (n->op_def().is_stateful() && n->type_string() != kArgOp) ||
+        (control_ret_nodes.find(n->name()) != control_ret_nodes.end())) {
       nodes.insert(n);
     }
   }
@@ -846,7 +859,7 @@
   std::unique_ptr<Graph> g(new Graph(lib_def));
   CopyGraph(*fbody->graph, g.get());
 
-  PruneFunctionBody(g.get());
+  PruneFunctionBody(fbody->fdef, g.get());
   optimizer_.Optimize(this, env(), device(), &g, /*shape_map=*/nullptr);
   TF_RETURN_IF_ERROR(EnsureMemoryTypes(DeviceType(device()->device_type()),
                                        device()->name(), g.get()));
@@ -1392,6 +1405,12 @@
   if (static_cast<size_t>(node->num_outputs()) != fbody->ret_nodes.size()) {
     return false;
   }
+  // TODO(ezhulenev): Currently common_runtime function inlining can't guarantee
+  // that all side-effectful ops will be executed after inlining. See Grappler
+  // function_optimizer for details. Unify all function inlining mechanism.
+  if (!fbody->control_ret_nodes.empty()) {
+    return false;
+  }
   for (int i = 0; i < node->num_inputs(); ++i) {
     if (node->input_type(i) != fbody->arg_types[i]) return false;
   }
@@ -1424,6 +1443,7 @@
     if (e->IsControlEdge()) {
       if (input_control_node == nullptr) {
         input_control_node = AddNoOp(g);
+        input_control_node->set_requested_device(caller->def().device());
       }
       g->AddControlEdge(e->src(), input_control_node);
     } else {
@@ -1684,6 +1704,7 @@
       graph(g),
       arg_types(arg_t.begin(), arg_t.end()),
       ret_types(ret_t.begin(), ret_t.end()) {
+  // 1. Find regular Arg/Ret nodes.
   this->arg_nodes.resize(arg_types.size());
   this->ret_nodes.resize(ret_types.size());
   for (Node* n : this->graph->op_nodes()) {
@@ -1701,6 +1722,17 @@
     CHECK_LT(index, node_vec->size());
     (*node_vec)[index] = n;
   }
+  // 2. Find ControlRet nodes that must be always executed.
+  std::unordered_set<StringPiece, StringPieceHasher> control_ret_node_names;
+  for (const auto& control_ret : fdef.control_ret()) {
+    control_ret_node_names.insert(control_ret.second);
+  }
+  this->control_ret_nodes.reserve(control_ret_node_names.size());
+  for (Node* n : this->graph->op_nodes()) {
+    if (control_ret_node_names.count(n->name()) > 0) {
+      this->control_ret_nodes.push_back(n);
+    }
+  }
 }
 
 FunctionBody::~FunctionBody() { delete this->graph; }

diff --git a/tensorflow/core/common_runtime/function.h b/tensorflow/core/common_runtime/function.h
index 94b6bee..37df90f 100644
--- a/tensorflow/core/common_runtime/function.h
+++ b/tensorflow/core/common_runtime/function.h

@@ -79,6 +79,7 @@
   DataTypeVector ret_types;
   gtl::InlinedVector<Node*, 4> arg_nodes;
   gtl::InlinedVector<Node*, 4> ret_nodes;
+  gtl::InlinedVector<Node*, 4> control_ret_nodes;
 
   FunctionBody() {}
   FunctionBody(const FunctionDef& f, DataTypeSlice arg_types,

diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index 97e46f4..83694d2 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc

@@ -945,6 +945,48 @@
   EXPECT_EQ(expected_node_names, executed_node_names);
 }
 
+TEST_F(FunctionLibraryRuntimeTest, DoNotPruneControlOutputsFromBody) {
+  // `add` node is not required to compute regular output `o`, but it must
+  // execute because it is in `control_ret`.
+  const FunctionDef func =
+      FDH::Create("FunctionWithControlOutputs", {"i: float"}, {"o: float"}, {},
+                  {
+                      {{"add"}, "Add", {"i", "i"}, {{"T", DT_FLOAT}}},
+                      {{"ret"}, "Mul", {"i", "i"}, {{"T", DT_FLOAT}}},
+                  },
+                  /*ret_def=*/{{"o", "ret:z:0"}},
+                  /*control_ret_def=*/{{"must_execute", "add"}});
+
+  Init({func});
+
+  auto x = test::AsTensor<float>({1.25});
+  Tensor z;
+
+  FunctionLibraryRuntime::Handle handle;
+  TF_CHECK_OK(Instantiate(flr1_, "FunctionWithControlOutputs", {}, &handle));
+
+  StepStats stats;
+  StepStatsCollector stats_collector(&stats);
+  FunctionLibraryRuntime::Options opts;
+  opts.stats_collector = &stats_collector;
+  TF_CHECK_OK(Run(flr1_, handle, opts, {x}, {&z}));
+  TF_CHECK_OK(flr1_->ReleaseHandle(handle));
+
+  TF_CHECK_OK(
+      InstantiateAndRun(flr1_, "FunctionWithControlOutputs", {}, {x}, {&z}));
+  test::ExpectTensorEqual<float>(z, test::AsTensor<float>({1.25 * 1.25}));
+
+  stats_collector.FinalizeAndSwap(&stats);
+
+  std::set<string> expected_node_names(
+      {"_SOURCE", "i", "add", "ret", "o_RetVal"});
+  std::set<string> executed_node_names;
+  for (const auto& node_stats : stats.dev_stats()[0].node_stats()) {
+    executed_node_names.insert(node_stats.node_name());
+  }
+  EXPECT_EQ(expected_node_names, executed_node_names);
+}
+
 // Constant folding generates names using a global counter.
 // This function invokes constant folding and parses the counter
 // from the generated node name.

diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
index 60e82ed..9c0abd9 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc

@@ -36,14 +36,17 @@
 namespace {
 
 static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use,
-                       int64 max_bytes_in_use, int64 max_alloc_size) {
-  AllocatorStats stats;
-  a->GetStats(&stats);
-  LOG(INFO) << "Alloc stats: " << std::endl << stats.DebugString();
-  EXPECT_EQ(stats.bytes_in_use, bytes_in_use);
-  EXPECT_EQ(stats.max_bytes_in_use, max_bytes_in_use);
-  EXPECT_EQ(stats.num_allocs, num_allocs);
-  EXPECT_EQ(stats.max_alloc_size, max_alloc_size);
+                       int64 peak_bytes_in_use, int64 largest_alloc_size) {
+  absl::optional<AllocatorStats> stats = a->GetStats();
+  EXPECT_TRUE(stats);
+  if (!stats) {
+    return;
+  }
+  LOG(INFO) << "Alloc stats: " << std::endl << stats->DebugString();
+  EXPECT_EQ(stats->bytes_in_use, bytes_in_use);
+  EXPECT_EQ(stats->peak_bytes_in_use, peak_bytes_in_use);
+  EXPECT_EQ(stats->num_allocs, num_allocs);
+  EXPECT_EQ(stats->largest_alloc_size, largest_alloc_size);
 }
 
 TEST(GPUBFCAllocatorTest, NoDups) {
@@ -291,9 +294,10 @@
     a.DeallocateRaw(existing_ptrs[i]);
   }
 
-  AllocatorStats stats;
-  a.GetStats(&stats);
-  LOG(INFO) << "Alloc stats: \n" << stats.DebugString();
+  absl::optional<AllocatorStats> stats = a.GetStats();
+  if (stats) {
+    LOG(INFO) << "Alloc stats: \n" << stats->DebugString();
+  }
 }
 
 TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {

diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
index c22bfce..0727196 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc

@@ -132,8 +132,8 @@
                                        MASK_BYTES);
 }
 
-void GPUDebugAllocator::GetStats(AllocatorStats* stats) {
-  base_allocator_->GetStats(stats);
+absl::optional<AllocatorStats> GPUDebugAllocator::GetStats() {
+  return base_allocator_->GetStats();
 }
 
 void GPUDebugAllocator::ClearStats() { base_allocator_->ClearStats(); }
@@ -208,8 +208,8 @@
   return base_allocator_->AllocatedSize(ptr);
 }
 
-void GPUNanResetAllocator::GetStats(AllocatorStats* stats) {
-  base_allocator_->GetStats(stats);
+absl::optional<AllocatorStats> GPUNanResetAllocator::GetStats() {
+  return base_allocator_->GetStats();
 }
 
 void GPUNanResetAllocator::ClearStats() { base_allocator_->ClearStats(); }

diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
index 17757a1..fa0394c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h

@@ -43,7 +43,7 @@
   size_t RequestedSize(const void* ptr) override;
   size_t AllocatedSize(const void* ptr) override;
   int64 AllocationId(const void* ptr) override;
-  void GetStats(AllocatorStats* stats) override;
+  absl::optional<AllocatorStats> GetStats() override;
   void ClearStats() override;
 
   // For testing.
@@ -71,7 +71,7 @@
   void DeallocateRaw(void* ptr) override;
   size_t RequestedSize(const void* ptr) override;
   size_t AllocatedSize(const void* ptr) override;
-  void GetStats(AllocatorStats* stats) override;
+  absl::optional<AllocatorStats> GetStats() override;
   void ClearStats() override;
 
  private:

diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 80d221a..607193a 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc

@@ -280,23 +280,19 @@
   timestamped_allocator_ =
       options.config.gpu_options().experimental().timestamped_allocator();
   if (timestamped_allocator_ || pending_cap_ > 0) {
-    std::unique_ptr<SharedCounter> timing_counter;
+    SharedCounter* timing_counter = nullptr;
     if (timestamped_allocator_) {
       // In this case the SharedCounter was already created and set in the
-      // associated Allocator, with ownership by GPUProcessState.  Here we take
-      // over ownership of that SharedAllocator to transfer it to the
-      // GPUKernelTracker.
+      // associated Allocator, with ownership by GPUProcessState.
+      // The GPUKernelTracker will use this SharedCounter, instead of
+      // owning its own.
       timing_counter =
-          GPUProcessState::singleton()->ReleaseGPUAllocatorCounter(tf_gpu_id);
-      DCHECK(timing_counter.get());
+          GPUProcessState::singleton()->GPUAllocatorCounter(tf_gpu_id);
+      DCHECK(timing_counter);
     } else {
       DCHECK_GT(pending_cap_, 0);
-      // In this case we need a SharedCounter to be owned by GPUKernelTracker
-      // but one was not created for use by the Allocator, so we create one.
-      timing_counter.reset(new SharedCounter);
     }
-    kernel_tracker_.reset(
-        new GPUKernelTracker(Env::Default(), std::move(timing_counter)));
+    kernel_tracker_.reset(new GPUKernelTracker(Env::Default(), timing_counter));
   }
 }
 
@@ -1134,21 +1130,24 @@
                             tf_gpu_id.value(), " with ", memory_limit,
                             " bytes of memory.");
   }
-  AllocatorStats stats;
-  gpu_allocator->GetStats(&stats);
+  absl::optional<AllocatorStats> stats = gpu_allocator->GetStats();
+  if (!stats) {
+    return errors::Internal("No allocator statistics");
+  }
   // 'memory_limit' is the required memory size, but if the allocator with given
   // tf_gpu_id was created before, we'll use it instead of creating a new one
   // (as TF gpu device is a shared resource), in which case the actual memory
   // limit represented by 'stats.bytes_limit' used by that allocator may be
   // different (which should be an error).
   //
-  // TODO(laigd): report error if memory_limit doesn't match stats.bytes_limit.
+  // TODO(laigd): report error if memory_limit doesn't match stats->bytes_limit.
+  int64 bytes_limit = stats->bytes_limit ? *stats->bytes_limit : 0;
   std::unique_ptr<BaseGPUDevice> gpu_device = CreateGPUDevice(
-      options, device_name, static_cast<Bytes>(stats.bytes_limit), dev_locality,
+      options, device_name, static_cast<Bytes>(bytes_limit), dev_locality,
       tf_gpu_id, GetShortDeviceDescription(platform_gpu_id, desc),
       gpu_allocator, ProcessState::singleton()->GetCPUAllocator(numa_node));
   LOG(INFO) << "Created TensorFlow device (" << device_name << " with "
-            << (stats.bytes_limit >> 20) << " MB memory) -> physical GPU ("
+            << (bytes_limit >> 20) << " MB memory) -> physical GPU ("
             << GetShortDeviceDescription(platform_gpu_id, desc) << ")";
   TF_RETURN_IF_ERROR(gpu_device->Init(options));
   devices->push_back(std::move(gpu_device));

diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index 33f0585..f8f2a2e 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h

@@ -184,11 +184,18 @@
 // time of the most recently terminated kernel.
 class GPUKernelTracker {
  public:
-  explicit GPUKernelTracker(Env* env,
-                            std::unique_ptr<SharedCounter> timing_counter)
-      : env_(env),
-        timing_counter_(std::move(timing_counter)),
-        pending_kernels_(64) {}
+  // If we're going to share a SharedCounter with an allocator, it's owned
+  // by the allocator because allocators are initialized once per process.
+  // Devices are per-session.
+  explicit GPUKernelTracker(Env* env, SharedCounter* timing_counter)
+      : env_(env), timing_counter_(timing_counter), pending_kernels_(64) {
+    if (!timing_counter_) {
+      // There's not a preexisting counter owned by GPUProcessState, i.e.
+      // pending_cap > 0 but timestamped_allocator == false.
+      owned_counter_.reset(new SharedCounter);
+      timing_counter_ = owned_counter_.get();
+    }
+  }
 
   // Record that a GPU kernel has just been enqueued on the compute stream.
   // Inserts a new timing counter value in a new PendingKernel record appended
@@ -222,7 +229,8 @@
 
  private:
   Env* env_;
-  std::unique_ptr<SharedCounter> timing_counter_;
+  SharedCounter* timing_counter_;
+  std::unique_ptr<SharedCounter> owned_counter_;
 
   // Records when a kernel was queued for execution.  Kernel launches are
   // identified by a unique count value from a per-GPU device timing counter.

diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
index fba937a..2628cd4 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc

@@ -280,14 +280,13 @@
 class GPUKernelTrackerTest : public ::testing::Test {
  protected:
   void SetUp() {
-    std::unique_ptr<SharedCounter> counter(new SharedCounter);
-    timing_counter_ = counter.get();
+    timing_counter_.reset(new SharedCounter);
     kernel_tracker_.reset(
-        new GPUKernelTracker(Env::Default(), std::move(counter)));
+        new GPUKernelTracker(Env::Default(), timing_counter_.get()));
   }
 
   std::unique_ptr<GPUKernelTracker> kernel_tracker_;
-  SharedCounter* timing_counter_ = nullptr;
+  std::unique_ptr<SharedCounter> timing_counter_;
 };
 
 TEST_F(GPUKernelTrackerTest, basic) {

diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
index 7804596..39883d3 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc

@@ -160,8 +160,7 @@
 #endif  // GOOGLE_CUDA
 }
 
-std::unique_ptr<SharedCounter> GPUProcessState::ReleaseGPUAllocatorCounter(
-    TfGpuId tf_gpu_id) {
+SharedCounter* GPUProcessState::GPUAllocatorCounter(TfGpuId tf_gpu_id) {
   DCHECK(process_state_);
 #if GOOGLE_CUDA
   GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
@@ -171,7 +170,7 @@
   }
 
   AllocatorParts& allocator_parts = gpu_allocators_[tf_gpu_id.value()];
-  return std::move(allocator_parts.counter);
+  return allocator_parts.counter.get();
 #else
   return nullptr;
 #endif

diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.h b/tensorflow/core/common_runtime/gpu/gpu_process_state.h
index c7c9f3a..861157c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.h

@@ -110,7 +110,7 @@
   // Returns bus_id for the given GPU id.
   virtual int BusIdForGPU(TfGpuId tf_gpu_id);
 
-  std::unique_ptr<SharedCounter> ReleaseGPUAllocatorCounter(TfGpuId tf_gpu_id);
+  SharedCounter* GPUAllocatorCounter(TfGpuId tf_gpu_id);
 
  protected:
   // GPUProcessState is a singleton that should not normally be deleted except

diff --git a/tensorflow/core/common_runtime/local_device.cc b/tensorflow/core/common_runtime/local_device.cc
index f1fcca1..2a6d6f5 100644
--- a/tensorflow/core/common_runtime/local_device.cc
+++ b/tensorflow/core/common_runtime/local_device.cc

@@ -19,6 +19,7 @@
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/eigen_thread_pool.h"
 #include "tensorflow/core/common_runtime/process_state.h"
+#include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/byte_order.h"
 #include "tensorflow/core/platform/cpu_feature_guard.h"
@@ -53,15 +54,22 @@
 
   explicit EigenThreadPoolInfo(const SessionOptions& options, int numa_node,
                                Allocator* allocator) {
+    // Use session setting if specified.
     int32 intra_op_parallelism_threads =
         options.config.intra_op_parallelism_threads();
+    // If no session setting, use environment setting.
     if (intra_op_parallelism_threads == 0) {
-      intra_op_parallelism_threads = port::NumSchedulableCPUs();
-      if (numa_node != port::kNUMANoAffinity) {
-        // Assume that CPUs are equally distributed over available NUMA nodes.
-        // This may not be true, but there isn't currently a better way of
-        // determining the number of CPUs specific to the requested node.
-        intra_op_parallelism_threads /= port::NUMANumNodes();
+      static int env_num_threads = NumIntraOpThreadsFromEnvironment();
+      intra_op_parallelism_threads = env_num_threads;
+      // If no session setting or environment, compute a reasonable default.
+      if (intra_op_parallelism_threads == 0) {
+        intra_op_parallelism_threads = port::NumSchedulableCPUs();
+        if (numa_node != port::kNUMANoAffinity) {
+          // Assume that CPUs are equally distributed over available NUMA nodes.
+          // This may not be true, but there isn't currently a better way of
+          // determining the number of CPUs specific to the requested node.
+          intra_op_parallelism_threads /= port::NUMANumNodes();
+        }
       }
     }
     ThreadOptions thread_opts;

diff --git a/tensorflow/core/common_runtime/lower_while_op.cc b/tensorflow/core/common_runtime/lower_while_op.cc
index 7552838..f1c3bbd 100644
--- a/tensorflow/core/common_runtime/lower_while_op.cc
+++ b/tensorflow/core/common_runtime/lower_while_op.cc

@@ -200,6 +200,7 @@
                            .Input(NodeOut(edge->src(), edge->src_output()))
                            .Attr("frame_name", name_)
                            .Attr("parallel_iterations", parallel_iterations_)
+                           .Device(while_op_->requested_device())
                            .Finalize(graph_, &enter_node));
     enter_nodes_[edge->dst_input()] = enter_node;
   }
@@ -216,6 +217,7 @@
     TF_RETURN_IF_ERROR(NodeBuilder(NewName("LoopControlInputs"), "NoOp",
                                    graph_->op_registry(), &debug_info_)
                            .ControlInputs(control_inputs)
+                           .Device(while_op_->requested_device())
                            .Finalize(graph_, &incoming_control_node));
     for (Node* n : enter_nodes_) {
       graph_->AddControlEdge(incoming_control_node, n);
@@ -231,6 +233,7 @@
         NodeBuilder(NewName("merge"), "Merge", graph_->op_registry(),
                     &debug_info_)
             .Input({NodeOut(enter_node, 0), NodeOut(enter_node, 0)})
+            .Device(while_op_->requested_device())
             .Finalize(graph_, &merge_node));
     merge_nodes_.emplace_back(merge_node);
   }
@@ -241,6 +244,7 @@
   for (Node* merge_node : merge_nodes_) {
     cond_call_builder_.Input(NodeOut(merge_node, 0));
   }
+  cond_call_builder_.Device(while_op_->requested_device());
   TF_RETURN_IF_ERROR(cond_call_builder_.Finalize(graph_, &cond_call_node_));
   // Add a control edge to make sure the Const nodes in the cond function
   // are in the same frame as the rest of the function, otherwise
@@ -249,6 +253,7 @@
   TF_RETURN_IF_ERROR(NodeBuilder(NewName("LoopCond"), "LoopCond",
                                  graph_->op_registry(), &debug_info_)
                          .Input(NodeOut(cond_call_node_, 0))
+                         .Device(while_op_->requested_device())
                          .Finalize(graph_, &loop_cond_node_));
   return Status::OK();
 }
@@ -270,6 +275,7 @@
                                    graph_->op_registry(), &debug_info_)
                            .Input(NodeOut(merge_nodes_[i], 0))
                            .Input(NodeOut(loop_cond_node_, 0))
+                           .Device(while_op_->requested_device())
                            .Finalize(graph_, &switch_node));
     switch_nodes_.emplace_back(switch_node);
   }
@@ -280,6 +286,7 @@
   for (Node* switch_node : switch_nodes_) {
     body_call_builder_.Input(NodeOut(switch_node, 1));
   }
+  body_call_builder_.Device(while_op_->requested_device());
   TF_RETURN_IF_ERROR(body_call_builder_.Finalize(graph_, &body_call_node_));
   // Add a control edge to make sure the Const nodes in the body function
   // are in the same frame as the rest of the function, otherwise
@@ -296,6 +303,7 @@
   TF_RETURN_IF_ERROR(NodeBuilder(NewName("loop_body_control"), op_type,
                                  graph_->op_registry(), &debug_info_)
                          .Input(NodeOut(switch_nodes_[0], 1))
+                         .Device(while_op_->requested_device())
                          .Finalize(graph_, &body_control_node_));
   graph_->AddControlEdge(body_control_node_, body_call_node_);
   return Status::OK();
@@ -309,6 +317,7 @@
     TF_RETURN_IF_ERROR(NodeBuilder(NewName("exit"), "Exit",
                                    graph_->op_registry(), &debug_info_)
                            .Input(NodeOut(switch_node, 0))
+                           .Device(while_op_->requested_device())
                            .Finalize(graph_, &exit_node));
     exit_nodes_.emplace_back(exit_node);
     outputs.emplace_back(NodeOut(exit_node, 0));
@@ -320,6 +329,7 @@
   // 2. Fetching the output of the While node by name in calls to sess.run.
   NodeBuilder ib(name_, "IdentityN", OpRegistry::Global(), &debug_info_);
   ib.Input(outputs);
+  ib.Device(while_op_->requested_device());
   TF_RETURN_IF_ERROR(ib.Finalize(graph_, &lowered_while_output_));
   return Status::OK();
 }
@@ -330,6 +340,7 @@
     TF_RETURN_IF_ERROR(NodeBuilder(NewName("next_iteration"), "NextIteration",
                                    graph_->op_registry(), &debug_info_)
                            .Input(NodeOut(body_call_node_, i))
+                           .Device(while_op_->requested_device())
                            .Finalize(graph_, &next_iteration));
     next_iterations_nodes_.emplace_back(next_iteration);
   }

diff --git a/tensorflow/core/common_runtime/metrics.cc b/tensorflow/core/common_runtime/metrics.cc
index a34a580..fcdab26 100644
--- a/tensorflow/core/common_runtime/metrics.cc
+++ b/tensorflow/core/common_runtime/metrics.cc

@@ -32,6 +32,10 @@
 auto* tf_data_autotune_counter = monitoring::Counter<1>::New(
     "/tensorflow/data/autotune", "tf.data autotuning", "name");
 
+auto* tf_data_bytes_read_counter = monitoring::Counter<1>::New(
+    "/tensorflow/data/bytes_read",
+    "The number of bytes read by tf.data Dataset sources.", "name");
+
 auto* tf_data_elements_counter = monitoring::Counter<1>::New(
     "/tensorflow/data/elements", "tf.data elements", "name");
 
@@ -61,6 +65,10 @@
   tf_data_autotune_counter->GetCell(name)->IncrementBy(1);
 }
 
+void RecordTFDataBytesRead(const string& name, int64 num_bytes) {
+  tf_data_bytes_read_counter->GetCell(name)->IncrementBy(num_bytes);
+}
+
 void RecordTFDataElements(const string& name, int64 num_elements) {
   tf_data_elements_counter->GetCell(name)->IncrementBy(num_elements);
 }

diff --git a/tensorflow/core/common_runtime/metrics.h b/tensorflow/core/common_runtime/metrics.h
index 49dbddd..bc73da4 100644
--- a/tensorflow/core/common_runtime/metrics.h
+++ b/tensorflow/core/common_runtime/metrics.h

@@ -21,14 +21,20 @@
 namespace tensorflow {
 namespace metrics {
 
-// Records that a tf.data dataset op executed by the program used autotuning.
+// Records that a tf.data.Dataset executed by the program used autotuning.
 //
-// The `name` argument identifies the dataset (e.g. "ParallelMap").
+// The `name` argument identifies the Dataset type (e.g. "ParallelMap").
 void RecordTFDataAutotune(const string& name);
 
-// Records the number of elements produced by a tf.data dataset.
+// Records the number of bytes read from the filesystem by a tf.data.Dataset
+// source.
 //
-// The `name` argument identifies the dataset (e.g. "Batch" or "Map").
+// The `name` argument identifies the Dataset type (e.g. "TFRecordDataset").
+void RecordTFDataBytesRead(const string& name, int64 num_bytes);
+
+// Records the number of elements produced by a tf.data.Dataset.
+//
+// The `name` argument identifies the Dataset type (e.g. "Batch" or "Map").
 void RecordTFDataElements(const string& name, int64 num_elements);
 
 // Records the number of independent graph changes resulting from the applicaton

diff --git a/tensorflow/core/common_runtime/optimization_registry.cc b/tensorflow/core/common_runtime/optimization_registry.cc
index 8120a20..e7db3ae 100644
--- a/tensorflow/core/common_runtime/optimization_registry.cc
+++ b/tensorflow/core/common_runtime/optimization_registry.cc

@@ -41,11 +41,13 @@
         Status s = pass->Run(options);
         if (!s.ok()) return s;
         if (VLOG_IS_ON(1)) {
-          DumpGraphToFile(
-              strings::StrCat(
-                  "after_phase_", phase.first, "_", pass->name(), "_",
-                  reinterpret_cast<uintptr_t>((*options.graph).get())),
-              **options.graph);
+          if (options.graph) {
+            DumpGraphToFile(
+                strings::StrCat(
+                    "after_phase_", phase.first, "_", pass->name(), "_",
+                    reinterpret_cast<uintptr_t>((*options.graph).get())),
+                **options.graph);
+          }
           if (options.partition_graphs) {
             for (auto& part : *options.partition_graphs) {
               DumpGraphToFile(

diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc
index 72e5efc..b2f4f1a 100644
--- a/tensorflow/core/common_runtime/placer.cc
+++ b/tensorflow/core/common_runtime/placer.cc

@@ -20,6 +20,8 @@
 #include <utility>
 #include <vector>
 
+#include "absl/strings/str_join.h"
+#include "tensorflow/core/common_runtime/colocation_graph.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
@@ -33,69 +35,13 @@
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/util/device_name_utils.h"
+#include "tensorflow/core/util/dump_graph.h"
 #include "tensorflow/core/util/port.h"
 
 namespace tensorflow {
 
 namespace {
 
-// We hoist the conversion from C-style string literal to StringPiece here,
-// so that we can avoid the many repeated calls to strlen().
-const StringPiece kColocationAttrNameStringPiece(kColocationAttrName);
-const StringPiece kColocationGroupPrefixStringPiece(kColocationGroupPrefix);
-
-// Returns a list of devices having type in supported_device_types.  The
-// returned list is sorted by preferred type (higher numeric type is preferred).
-std::vector<Device*> FilterSupportedDevices(
-    const std::vector<Device*>& devices,
-    const PrioritizedDeviceTypeVector& supported_device_types,
-    const Device* default_device) {
-  Device* filtered_default_device = nullptr;
-  std::vector<std::pair<Device*, int32>> prioritized_filtered_devices;
-  for (const auto& supported_device_type : supported_device_types) {
-    for (Device* device : devices) {
-      if (DeviceType(device->attributes().device_type()) ==
-          supported_device_type.first) {
-        if (device == default_device) {
-          filtered_default_device = device;
-        } else {
-          prioritized_filtered_devices.emplace_back(
-              device, supported_device_type.second);
-        }
-      }
-    }
-  }
-
-  auto device_sort = [](const std::pair<Device*, int32>& a,
-                        const std::pair<Device*, int32>& b) {
-    if (a.second != b.second) {
-      return a.second > b.second;
-    }
-
-    auto a_priority =
-        DeviceSet::DeviceTypeOrder(DeviceType(a.first->device_type()));
-    auto b_priority =
-        DeviceSet::DeviceTypeOrder(DeviceType(b.first->device_type()));
-    // First sort by prioritized device type (higher is preferred) and
-    // then by device name (lexicographically).
-    if (a_priority != b_priority) {
-      return a_priority > b_priority;
-    }
-    return StringPiece(a.first->name()) < StringPiece(b.first->name());
-  };
-  std::sort(prioritized_filtered_devices.begin(),
-            prioritized_filtered_devices.end(), device_sort);
-
-  std::vector<Device*> filtered_devices;
-  if (filtered_default_device != nullptr) {
-    filtered_devices.emplace_back(filtered_default_device);
-  }
-  for (const auto& prioritized_filtered_device : prioritized_filtered_devices) {
-    filtered_devices.push_back(prioritized_filtered_device.first);
-  }
-  return filtered_devices;
-}
-
 // Returns true if the node has no inputs and produces outputs
 // that are consumed by a single node.
 //
@@ -107,974 +53,6 @@
          !IsRefType(node->output_type(0));
 }
 
-bool IsExemptFromResourceInputColocation(const Node* node) {
-  // Note: Partitioned function calls, which place and partition their
-  // function bodies, are exempt from this check: they forward resource and
-  // ref inputs to operations that are appropriately placed, instead of
-  // dereferencing them.
-  const string& op_type = node->op_def().name();
-  return op_type == "PartitionedCall" || op_type == "StatefulPartitionedCall";
-}
-
-bool HasPriorities(const PrioritizedDeviceTypeVector& device_types) {
-  for (const auto& prioritized_device_type : device_types) {
-    if (prioritized_device_type.second != 0) return true;
-  }
-  return false;
-}
-
-bool ArePrioritiesSame(const PrioritizedDeviceTypeVector& a_types,
-                       const PrioritizedDeviceTypeVector& b_types) {
-  if (a_types.size() != b_types.size()) {
-    return false;
-  }
-  for (int i = 0; i < a_types.size(); ++i) {
-    if (a_types[i].first != b_types[i].first) {
-      return false;
-    }
-  }
-  return true;
-}
-
-// Represents a node in the disjoint node set forest, and the
-// accumulated constraints on the device used by that node.
-class Member {
- public:
-  Member() = default;
-
-  Status SetParentAndSupportedDevices(const Node& node,
-                                      const std::vector<DeviceType>& types) {
-    int id = node.id();
-    if (id < 0) {
-      return errors::Internal(
-          "Placer should not be creating a Member for node: ",
-          node.DebugString());
-    }
-    parent_ = id;
-    return SupportedDeviceTypesForNode(types, node.def(),
-                                       &supported_device_types_);
-  }
-
-  const DeviceNameUtils::ParsedName& device_name() const {
-    return device_name_;
-  }
-
-  Status SetDeviceName(const string& device_name) {
-    if (!DeviceNameUtils::ParseFullName(device_name, &device_name_)) {
-      return errors::Internal("Malformed assigned device '", device_name, "'");
-    }
-    return Status::OK();
-  }
-  void SetDeviceName(const DeviceNameUtils::ParsedName& device_name) {
-    device_name_ = device_name;
-  }
-
-  const PrioritizedDeviceTypeVector& supported_device_types() const {
-    return supported_device_types_;
-  }
-
-  static void Merge(std::vector<Member>* tree, int x_root, int y_root,
-                    Member** new_root, Member** old_root) {
-    Member& x_root_member = (*tree)[x_root];
-    Member& y_root_member = (*tree)[y_root];
-
-    // Merge the sets by setting the parent pointer of the smaller tree's root
-    // node to point to the root of the larger tree. Together with path
-    // compression in ColocationGraph::FindRoot, this ensures that we do not
-    // experience pathological performance on graphs such as chains.
-    int new_root_id, old_root_id;
-    if (x_root_member.rank_ < y_root_member.rank_) {
-      // The tree rooted at x_root is shallower, so connect it to
-      // y_root. The rank of y_root is unchanged because its new
-      // child has strictly less rank.
-      x_root_member.parent_ = y_root;
-      new_root_id = y_root;
-      old_root_id = x_root;
-    } else if (x_root_member.rank_ > y_root_member.rank_) {
-      // The tree rooted at y_root is shallower, so connect it to
-      // x_root. The rank of x_root is unchanged because its new
-      // child has strictly less rank.
-      y_root_member.parent_ = x_root;
-      new_root_id = x_root;
-      old_root_id = y_root;
-    } else {
-      // Both trees have the same rank, so break the tie by choosing
-      // x_root as the new root.
-      y_root_member.parent_ = x_root;
-      // Increment the rank of the tree rooted at x_root, because it
-      // is now strictly deeper than before.
-      ++x_root_member.rank_;
-      new_root_id = x_root;
-      old_root_id = y_root;
-    }
-
-    *new_root = &(*tree)[new_root_id];
-    *old_root = &(*tree)[old_root_id];
-  }
-
-  // tree is non-const because we can change some `parent` pointers in some
-  // members for more efficient future lookups. The vector itself is not
-  // changed.
-  static int FindRoot(std::vector<Member>* tree, int node_id) {
-    Member& member = (*tree)[node_id];
-    if (member.parent_ == node_id) {
-      // member.parent is the root of this disjoint tree.  Do nothing.
-    } else {
-      member.parent_ = FindRoot(tree, member.parent_);
-    }
-    // Now it is guaranteed that member.parent is the root of this disjoint
-    // tree.
-    return member.parent_;
-  }
-
-  Status MergeDeviceNames(const Member& other, bool allow_soft_placement) {
-    return DeviceNameUtils::MergeDevNames(&device_name_, other.device_name_,
-                                          allow_soft_placement);
-  }
-  Status MergeDeviceNames(const string& dev_name, bool allow_soft_placement) {
-    DeviceNameUtils::ParsedName parsed;
-    DeviceNameUtils::ParseFullName(dev_name, &parsed);
-    return DeviceNameUtils::MergeDevNames(&device_name_, parsed,
-                                          allow_soft_placement);
-  }
-
-  // Updates this to contain the intersection of the device types in
-  // this and "other".
-  void MergeSupportedDevices(const Member& other) {
-    PrioritizedDeviceTypeVector temp = supported_device_types_;
-    supported_device_types_.clear();
-
-    // Generate intersection with priorities.
-    PrioritizedDeviceTypeVector target_intersection;
-    PrioritizedDeviceTypeVector other_intersection;
-    for (const auto& prioritized_device_type : temp) {
-      bool found = false;
-      for (const auto& other_prioritized_device_type :
-           other.supported_device_types_) {
-        if (prioritized_device_type.first ==
-            other_prioritized_device_type.first) {
-          found = true;
-          other_intersection.push_back(other_prioritized_device_type);
-          break;
-        }
-      }
-      if (found) {
-        target_intersection.push_back(prioritized_device_type);
-      }
-    }
-
-    // Sort the devices by priority order.
-    auto device_sort = [](const std::pair<DeviceType, int32>& a,
-                          const std::pair<DeviceType, int32>& b) {
-      // First look at set priorities.
-      if (a.second != b.second) {
-        return a.second > b.second;
-      }
-      // Then fallback to default priorities.
-      auto a_priority = DeviceSet::DeviceTypeOrder(a.first);
-      auto b_priority = DeviceSet::DeviceTypeOrder(b.first);
-      if (a_priority != b_priority) {
-        return a_priority > b_priority;
-      }
-      // Finally just look at the Device type strings.
-      return a.first.type_string() < b.first.type_string();
-    };
-
-    std::sort(target_intersection.begin(), target_intersection.end(),
-              device_sort);
-    std::sort(other_intersection.begin(), other_intersection.end(),
-              device_sort);
-
-    bool is_target_prioritized = HasPriorities(target_intersection);
-    bool is_other_prioritized = HasPriorities(other_intersection);
-    // If neither are prioritized then we just return the original i.e. target
-    // prioritization.
-    if (!is_target_prioritized && !is_other_prioritized) {
-      supported_device_types_ = target_intersection;
-    }
-    // If only one is prioritized, then we respect priorities of that in the
-    // intersection.
-    if (is_target_prioritized && !is_other_prioritized) {
-      supported_device_types_ = target_intersection;
-    }
-    if (!is_target_prioritized && is_other_prioritized) {
-      supported_device_types_ = other_intersection;
-    }
-    // If both have priorities and agree then we go with that. If the
-    // prioritization order is different, then we just fallback to the default
-    // i.e. what the DeviceTypeOrder suggests. In that case, we also set the
-    // merged priorities to 0, so that downstream merges work correctly as well.
-    if (is_target_prioritized && is_other_prioritized) {
-      bool priorities_agree =
-          ArePrioritiesSame(target_intersection, other_intersection);
-      if (priorities_agree) {
-        supported_device_types_ = target_intersection;
-      } else {
-        for (const auto& prioritized_device : target_intersection) {
-          supported_device_types_.push_back(
-              std::make_pair(prioritized_device.first, 0));
-        }
-        std::sort(supported_device_types_.begin(),
-                  supported_device_types_.end(), device_sort);
-      }
-    }
-  }
-
-  Status AssignDevice(const Node& node, bool allow_soft_placement) {
-    if (node.assigned_device_name_index() == assigned_device_name_index_) {
-      return Status::OK();
-    }
-    Status s =
-        MergeDeviceNames(node.assigned_device_name(), allow_soft_placement);
-    if (!s.ok()) {
-      return errors::Internal(
-          "Constraining by assigned device should not cause an error. Original "
-          "root device name: ",
-          DeviceNameUtils::ParsedNameToString(device_name_),
-          " assigned device name \"", node.assigned_device_name(),
-          ". Error: ", s.error_message());
-    }
-
-    assigned_device_name_index_ = node.assigned_device_name_index();
-    // Clear cached possible_devices, if any.
-    possible_devices_.clear();
-    return Status::OK();
-  }
-
-  void set_possible_devices(std::vector<Device*>&& devices) {
-    possible_devices_ = devices;
-  }
-  const std::vector<Device*>& possible_devices() { return possible_devices_; }
-
- private:
-  // The id of the node that is the parent of this one, or its own
-  // id if it is a root. parent <= 0 indicates that this member is invalid.
-  int parent_ = -1;
-
-  // A proxy for the depth of the tree that is used to prefer
-  // connecting smaller trees to larger trees when merging disjoint
-  // sets.
-  int rank_ = 0;
-
-  // Once colocation groups have been formed and we assigned at least
-  // one node in this group to a device, assigned_device_name_index will
-  // contain this device name's index in the graph. The `device_name` will
-  // contain the parsed name of this device and `possible_devices`, if
-  // computed, will contain just this device.
-  // `assigned_device_name_index` is an optimization to avoid parsing and
-  // comparing device names. The value of -1 signals that a single device
-  // has not been chosen yet.
-  int assigned_device_name_index_ = -1;
-
-  // The merged form of the device requested for this node, with
-  // those of all of its children.
-  DeviceNameUtils::ParsedName device_name_;
-
-  // The intersection of all device types supported by this node,
-  // and those of all of its children, in priority order
-  // of the preferred device.
-  PrioritizedDeviceTypeVector supported_device_types_;
-
-  // If this node is a root, stores a list of Devices to which this node
-  // and all of its children have been assigned, or nullptr if this
-  // has not yet been computed.
-  std::vector<Device*> possible_devices_;
-};
-
-// This class maintains the connected components of a colocation
-// constraint graph, and uses this information to assign a satisfying
-// device placement to the nodes of the graph.
-//
-// The typical usage pattern is:
-//
-//   Graph graph = ...;
-//   DeviceSet device_set = ...;
-//   ColocationGraph colocation_graph(graph, device_set);
-//
-//   // Add all the nodes of the `graph` to the `colocation_graph`.
-//   for (Node* node : graph.nodes()) {
-//     TF_RETURN_IF_ERROR(colocation_graph.AddNode(*node));
-//   }
-//
-//   // Add one or more colocation constraints.
-//   Node node_1 = *graph.FindNodeId(...);
-//   Node node_2 = *graph.FindNodeId(...);
-//   TF_RETURN_IF_ERROR(colocation_graph.ColocateNodes(node_1, node_2));
-//
-//   // Assign devices based on the accumulated constraints.
-//   for (Node* node : graph.nodes()) {
-//     TF_RETURN_IF_ERROR(colocation_graph.AssignDevice(node));
-//   }
-//
-// This implementation uses the Union-Find algorithm to efficiently maintain the
-// connected components and incrementally adds edges via
-// ColocationGraph::ColocateNodes() invocations.
-//
-// ColocationGraph does not assign any devices to graph nodes. The
-// `log_device_placement` argument is used to log messages when requested
-// device is ignored.
-class ColocationGraph {
- public:
-  ColocationGraph(const Graph* graph, const DeviceSet* device_set,
-                  const Device* default_device, bool allow_soft_placement,
-                  bool log_device_placement)
-      : graph_(graph),
-        device_set_(device_set),
-        device_types_(device_set->PrioritizedDeviceTypeList()),
-        default_device_(default_device),
-        allow_soft_placement_(allow_soft_placement),
-        log_device_placement_(log_device_placement) {
-    members_.resize(graph->num_node_ids());
-  }
-
-  // Adds each node of the Graph to this ColocationGraph as a singleton.
-  //
-  // NOTE: The implementation assumes that the ids of nodes passed to
-  // this method are dense and zero-based; the memory used will be linear in
-  // the largest node ID.
-  // NOTE: If this method returns an error, *this is left in an undefined
-  // state.
-  Status ColocateAllNodes() {
-    // This maps from a colocation group identifier to the 'root' of that
-    // colocation group.  Note that the keys in this map are StringPiece; the
-    // actual strings are stored under the NodeDef.  The lifetime of this map
-    // is limited to this ColocateAllNodes() method, and no part of the
-    // NodeDef trees are changed during the lifetime of this method, so using
-    // StringPiece as a key is safe.
-    //
-    // Also, as a further optimization, we remove the "loc:@" prefix from
-    // "class" attribute values, when they are used as keys in this table.
-    // This allows us to use StringPiece values that refer to substrings of
-    // 'string' values stored in NodeDef attribute lists, as well as StringPiece
-    // values that refer to 'string' values from NodeDef::name(), without
-    // performing any string allocations.
-    std::unordered_map<StringPiece, const Node*, StringPieceHasher>
-        colocation_group_root;
-
-    for (const Node* node : graph_->op_nodes()) {
-      // When adding the node, identify whether it is part of a colocation
-      // group.
-
-      // This code is effectively the equivalent of GetNodeAttr() for a string
-      // array, but it avoids all internal allocations (the allocation of the
-      // backing store of the std::vector<string> as well as the copies of the
-      // strings within it).  Instead, we combine the query of the colocation
-      // attribute with the calls to ColocateNodeToGroup.
-      bool found_spec = false;
-      const AttrValue* attr_value =
-          node->attrs().Find(kColocationAttrNameStringPiece);
-      if (attr_value != nullptr && attr_value->has_list()) {
-        for (const string& class_spec : attr_value->list().s()) {
-          StringPiece spec(class_spec);
-          if (str_util::ConsumePrefix(&spec,
-                                      kColocationGroupPrefixStringPiece)) {
-            found_spec = true;
-            TF_RETURN_IF_ERROR(
-                ColocateNodeToGroup(&colocation_group_root, node, spec));
-          }
-        }
-      }
-
-      if (!found_spec) {
-        // If the node does not specify a colocation group, then use the
-        // name of this node as the colocation group.
-        TF_RETURN_IF_ERROR(
-            ColocateNodeToGroup(&colocation_group_root, node, node->name()));
-      }
-    }
-
-    return Status::OK();
-  }
-
-  Status ColocateResourceOrRefEdge(Node* src, Node* dst) {
-    // Colocate `src` and `dst` to maintain the invariant that nodes
-    // connected by reference edges are colocated.
-    int src_root_id = FindRoot(src->id());
-    int dst_root_id = FindRoot(dst->id());
-    auto& src_root = members_[src_root_id];
-    auto& dst_root = members_[dst_root_id];
-    // If both the source node and this node have partially
-    // specified a device, then 'dst's device should be
-    // cleared: the reference edge forces 'node' to be on the
-    // same device as the source node.
-    const auto& source_parsed_name = src_root.device_name();
-    const auto& dest_parsed_name = dst_root.device_name();
-    if (DeviceNameUtils::HasSomeDetails(source_parsed_name) &&
-        DeviceNameUtils::HasSomeDetails(dest_parsed_name)) {
-      // Ignore a specified device for 'dst' if the two names were
-      // incompatible.
-      if (!DeviceNameUtils::AreCompatibleDevNames(source_parsed_name,
-                                                  dest_parsed_name)) {
-        TF_RETURN_IF_ERROR(VerifyResourceAndRefInputsCanBeColocated(
-            dst, src, source_parsed_name));
-        if (log_device_placement_) {
-          LOG(INFO) << "Ignoring device specification "
-                    << DeviceNameUtils::ParsedNameToString(dest_parsed_name)
-                    << " for node '" << dst->name()
-                    << "' because the input edge from '" << src->name()
-                    << "' is a reference connection and already has a device "
-                       "field set to "
-                    << DeviceNameUtils::ParsedNameToString(source_parsed_name);
-        }
-
-        // Make 'dst' colocated with the source
-        dst_root.SetDeviceName(source_parsed_name);
-      }
-    }
-    Status status = ColocateNodes(*src, src_root_id, *dst, dst_root_id);
-    if (!status.ok()) {
-      return AttachDef(
-          errors::InvalidArgument("Nodes were connected by a "
-                                  "reference connection (requiring them to "
-                                  "be on the same device), but the two nodes "
-                                  "were assigned two different devices: ",
-                                  status.error_message()),
-          *dst);
-    }
-    return Status::OK();
-  }
-
-  Status ColocateResourceAndRefEdges() {
-    // Enumerate the constraint edges, and use them to update the disjoint
-    // node set.
-    // If `node` has an input edge with reference type, add an edge from the
-    // source of that edge to `node`.
-    for (const Edge* edge : graph_->edges()) {
-      if (edge->IsControlEdge()) {
-        continue;
-      }
-      Node* src = edge->src();
-      Node* dst = edge->dst();
-      DataType input_type = dst->input_type(edge->dst_input());
-      if ((input_type == DT_RESOURCE || IsRefType(input_type)) &&
-          !IsExemptFromResourceInputColocation(dst)) {
-        TF_RETURN_IF_ERROR(ColocateResourceOrRefEdge(src, dst));
-      }
-    }
-    return Status::OK();
-  }
-
-  Status Initialize() {
-    TF_RETURN_IF_ERROR(InitializeMembers());
-    TF_RETURN_IF_ERROR(ColocateAllNodes());
-    return ColocateResourceAndRefEdges();
-  }
-
-  Status ColocateNodeToGroup(
-      std::unordered_map<StringPiece, const Node*, StringPieceHasher>*
-          colocation_group_root,
-      const Node* node, StringPiece colocation_group) {
-    const Node*& root_node = (*colocation_group_root)[colocation_group];
-    if (root_node == nullptr) {
-      // This is the first node of the colocation group, so
-      // designate this node as the 'root' of that colocation group.
-      root_node = node;
-    } else {
-      // Try to colocate the node with the root.  If there is an
-      // error, return it.
-      Status s = ColocateNodes(*node, *root_node);
-      if (!s.ok()) {
-        return AttachDef(s, *node);
-      }
-    }
-    return Status::OK();
-  }
-
-  // Merge the (possibly disjoint) sets containing nodes "x" and
-  // "y". Returns OK if the all nodes in the union of these sets can
-  // be placed on the same device type.
-  //
-  // NOTE: If this method returns an error, *this is left in an undefined
-  // state.
-  Status ColocateNodes(const Node& x, const Node& y) {
-    int x_root = FindRoot(x.id());
-    int y_root = FindRoot(y.id());
-    return ColocateNodes(x, x_root, y, y_root);
-  }
-
-  // This overload of ColocateNodes() allows a caller to provide the root node
-  // ids for the two nodes. For large graphs, this noticeably reduces the
-  // graph load time.
-  Status ColocateNodes(const Node& x, int x_root, const Node& y, int y_root) {
-    if (x_root == y_root) {
-      return Status::OK();
-    }
-
-    DCHECK_EQ(x_root, FindRoot(x.id()));
-    DCHECK_EQ(y_root, FindRoot(y.id()));
-
-    Member* new_root_member;
-    Member* old_root_member;
-    Member::Merge(&members_, x_root, y_root, &new_root_member,
-                  &old_root_member);
-
-    // Merge the partial device specifications, and ensure that they are
-    // compatible. NULL options_ is treated as allowing soft placement.
-    // TODO(mrry): Consider enriching the error message by pointing
-    // out which nodes have the explicit partial device
-    // specifications that caused this conflict.
-    Status s = new_root_member->MergeDeviceNames(*old_root_member,
-                                                 allow_soft_placement_);
-    if (!s.ok()) {
-      return errors::InvalidArgument(
-          "Cannot colocate nodes ",
-          errors::FormatColocationNodeForError(x.name()), " and ",
-          errors::FormatColocationNodeForError(y.name()), ": ",
-          s.error_message());
-    }
-
-    // Ensure that the common root has at least one supported device
-    // type, by computing the intersection of
-    // new_root_member.supported_device_types and
-    // old_root_member.supported_device_types.
-    new_root_member->MergeSupportedDevices(*old_root_member);
-    if (new_root_member->supported_device_types().empty()) {
-      return errors::InvalidArgument(
-          "Cannot colocate nodes ",
-          errors::FormatColocationNodeForError(x.name()), " and ",
-          errors::FormatColocationNodeForError(y.name()),
-          " because no device type supports both of those nodes and the "
-          "other nodes colocated with them.",
-          DebugInfo(x_root), DebugInfo(y_root));
-    }
-
-    return Status::OK();
-  }
-
-  // Limits the possible devices of `node`'s colocation group to the device
-  // to which `node` is assigned. This makes sure that all nodes in this
-  // colocation group will be assigned to the same device. Without this
-  // explicit restriction, heuristics can choose a different possible device
-  // for other nodes in the group.
-  Status LimitToAssignedDevice(const Node& node) {
-    if (node.assigned_device_name_index() < 0) {
-      return errors::Internal(
-          "Expected an assigned node as argument to LimitToAssignedDevice but "
-          "got: ",
-          node.DebugString());
-    }
-    int root = FindRoot(node.id());
-    Member& root_member = members_[root];
-    return root_member.AssignDevice(node, allow_soft_placement_);
-  }
-
-  // For the given node, subject to the constraints previously given
-  // to this ColocationGraph, set its assigned_device_name. Returns OK
-  // if a satisfying device can be found, otherwise an error.
-  //
-  // Note: This method returns a pointer to a field within members_.
-  // The caller must not use the returned pointer after there is any possibility
-  // that the members_[i].possible_devices field has been modified.
-  Status GetDevicesForNode(Node* node,
-                           const std::vector<Device*>** possible_devices) {
-    *possible_devices = nullptr;
-    const int node_root = FindRoot(node->id());
-    if (!members_[node_root].possible_devices().empty()) {
-      *possible_devices = &members_[node_root].possible_devices();
-      return Status::OK();
-    }
-
-    // We have not yet computed the possible devices for the
-    // colocated node set containing 'node', so we do so now using the
-    // constraints on the root node.
-
-    // "devices" will contain the set of feasible placements for the
-    // colocated node set containing 'node'.
-    std::vector<Device*> devices;
-    if (DeviceNameUtils::HasSomeDetails(members_[node_root].device_name())) {
-      // The root node has a (possibly partial) device
-      // specification, so enumerate the physical devices that
-      // conform to it.
-      device_set_->FindMatchingDevices(members_[node_root].device_name(),
-                                       &devices);
-
-      if (!devices.empty()) {
-        // Filter devices into those that are compatible with the root
-        // node (and its children).
-        devices = FilterSupportedDevices(
-            devices, members_[node_root].supported_device_types(),
-            default_device_);
-      }
-
-      // Perform soft placement if allow_soft_placement_ is set.
-      if (devices.empty() && allow_soft_placement_) {
-        // The soft_device_name is the same as the node's device name
-        // without specifying the device type or ID.
-        DeviceNameUtils::ParsedName soft_device_name =
-            members_[node_root].device_name();
-        soft_device_name.type.clear();
-        soft_device_name.has_type = false;
-        soft_device_name.has_id = false;
-        device_set_->FindMatchingDevices(soft_device_name, &devices);
-        if (!devices.empty()) {
-          devices = FilterSupportedDevices(
-              devices, members_[node_root].supported_device_types(),
-              default_device_);
-        }
-      }
-
-      if (devices.empty()) {
-        // Return an error when a physical device that matches an explicit
-        // device specification is not found. This ensures that we don't
-        // assign a node to GPU when the user wanted to force it on CPU.
-        string debug_info = DebugInfo(node_root);
-
-        DeviceNameUtils::ParsedName specified_device_name;
-        if (DeviceNameUtils::ParseFullName(node->requested_device(),
-                                           &specified_device_name) &&
-            specified_device_name == members_[node_root].device_name()) {
-          // The specified device and merged set device match, and
-          // will appear in the GraphDef (for debugging), so just
-          // print the specified device.
-          std::vector<Device*> devices_matching_nodedef;
-          device_set_->FindMatchingDevices(specified_device_name,
-                                           &devices_matching_nodedef);
-          if (devices_matching_nodedef.empty()) {
-            // Sometimes it is almost impossible to understand the problem
-            // without a list of available devices.
-            std::vector<string> device_names;
-            for (const Device* device : device_set_->devices()) {
-              device_names.push_back(device->name());
-            }
-            std::sort(device_names.begin(), device_names.end());
-
-            string gpu_msg = "";
-            if (!IsGoogleCudaEnabled() &&
-                str_util::Lowercase(specified_device_name.type) == "gpu") {
-              gpu_msg =
-                  " The requested device appears to be a GPU, but CUDA is not "
-                  "enabled.";
-            }
-
-            return errors::InvalidArgument(
-                errors::FormatNodeNameForError(node->name()),
-                "was explicitly assigned to ", node->requested_device(),
-                " but available devices are [ ",
-                str_util::Join(device_names, ", "), " ]. Make sure ",
-                "the device specification refers to a valid device.", gpu_msg);
-          } else if (specified_device_name.has_type) {
-            return errors::InvalidArgument(
-                "Could not satisfy explicit device specification '",
-                node->requested_device(), "' because no supported kernel for ",
-                specified_device_name.type, " devices is available.",
-                debug_info, "\nRegistered kernels:\n",
-                KernelsRegisteredForOp(node->type_string()));
-          } else {
-            return errors::InvalidArgument(
-                "Could not satisfy explicit device specification '",
-                node->requested_device(), debug_info);
-          }
-        } else {
-          // The specified device may be a valid device but the
-          // merged set device is different, so print both.
-          return errors::InvalidArgument(
-              "Could not satisfy explicit device specification '",
-              node->requested_device(), "' because the node ",
-              errors::FormatColocationNodeForError(node->name()),
-              " was colocated with a group of nodes that ",
-              "required incompatible device '",
-              DeviceNameUtils::ParsedNameToString(
-                  members_[node_root].device_name()),
-              "'", debug_info);
-        }
-      }
-    } else {
-      // The device is completely unspecified, so enumerate the devices that
-      // support all of the nodes in the set.
-      if (device_set_->devices().empty()) {
-        return errors::Internal("No devices are registered");
-      }
-      devices = FilterSupportedDevices(
-          device_set_->devices(), members_[node_root].supported_device_types(),
-          default_device_);
-
-      if (devices.empty()) {
-        return errors::InvalidArgument(
-            "Node had no OpKernel registered to support this operation: ",
-            "Operation was ", node->type_string(), " and inputs were ",
-            DataTypeVectorString(node->input_types()), DebugInfo(node_root));
-      }
-    }
-
-    // Cache the result of the possible devices for this node group.
-    members_[node_root].set_possible_devices(std::move(devices));
-    *possible_devices = &members_[node_root].possible_devices();
-    return Status::OK();
-  }
-
-  Status InitializeMembers() {
-    for (Node* node : graph_->nodes()) {
-      if (!node->IsOp()) {
-        continue;
-      }
-      Status status = InitializeMember(*node, &members_[node->id()]);
-      if (!status.ok()) {
-        return AttachDef(status, *node);
-      }
-    }
-    return Status::OK();
-  }
-
-  // Returns debugging info for the node referred to by 'node_root'.
-  string DebugInfo(const int node_root) {
-    string text(
-        "\nColocation Debug Info:\n"
-        "Colocation group had the following types and devices: ");
-
-    // If this node is part of a colocation group, then we want to
-    // collect the mapping of ops to supported devices, so that
-    // the user can see why an unsatisfiable placement occurred.
-
-    std::unordered_map<string, string> type_to_devices;
-    std::vector<const Node*> colocation_nodes;
-    int num_nodes_found = 0;
-
-    for (const Node* node : graph_->nodes()) {
-      if (!node->IsOp()) {
-        continue;
-      }
-      int id = node->id();
-      if (FindRoot(id) != node_root) {
-        continue;
-      }
-      ++num_nodes_found;
-      colocation_nodes.push_back(node);
-      const string& op_type = node->type_string();
-      string devices_registered;
-      for (const auto& device_type : members_[id].supported_device_types()) {
-        strings::StrAppend(&devices_registered,
-                           DeviceTypeString(device_type.first), " ");
-      }
-
-      type_to_devices[op_type] = std::move(devices_registered);
-    }
-
-    for (const auto& td : type_to_devices) {
-      strings::StrAppend(&text, "\n", td.first, ": ", td.second);
-    }
-    strings::StrAppend(&text,
-                       "\n\nColocation members and user-requested devices:");
-    for (const Node* node : colocation_nodes) {
-      strings::StrAppend(&text, "\n  ", node->name(), " (", node->type_string(),
-                         ") ", node->requested_device());
-    }
-    strings::StrAppend(&text, "\n");
-
-    if (num_nodes_found <= 1) {
-      text.clear();
-    }
-    return text;
-  }
-
-  Status InitializeMember(const Node& node, Member* member) {
-    TF_RETURN_IF_ERROR(
-        member->SetParentAndSupportedDevices(node, device_types_));
-
-    if (node.has_assigned_device_name()) {
-      // This node has already been assigned to a device, so we
-      // respect this placement, after sanity-checking it.  The
-      // device_name and supported_device_types for this node reflect
-      // the assigned device, so any nodes colocated with this node
-      // will be assigned to the same device (assuming this is
-      // possible).
-      // NOTE: Since any assignment must have been performed by
-      // the TensorFlow runtime, we consider errors in this branch to
-      // be INTERNAL.
-      const string& assigned_device_name = node.assigned_device_name();
-      TF_RETURN_IF_ERROR(member->SetDeviceName(assigned_device_name));
-      const Device* assigned_device =
-          device_set_->FindDeviceByName(assigned_device_name);
-      if (assigned_device == nullptr) {
-        return errors::Internal("Assigned device '", assigned_device_name,
-                                "' does not match any device");
-      }
-
-      for (const auto& d : member->supported_device_types()) {
-        if (DeviceType(assigned_device->attributes().device_type()) ==
-            d.first) {
-          return Status::OK();
-        }
-      }
-
-      return errors::Internal("Assigned device '", assigned_device_name,
-                              "' does not have registered OpKernel support "
-                              "for ",
-                              node.type_string());
-    } else {
-      // This node has not yet been assigned to a device, so we
-      // calculate any constraints due to the set of registered
-      // kernels and any (partial) user-provided device specification
-      // in the NodeDef.
-
-      // If no kernels are registered for this op type, fail with an error.
-      if (member->supported_device_types().empty()) {
-        std::set<string> registered_device_types;
-        for (Device* d : device_set_->devices()) {
-          registered_device_types.insert(d->device_type());
-        }
-        std::vector<string> attr_key_vals;
-        for (const auto& it : node.attrs()) {
-          const string& name = it.first;
-          const AttrValue& attr_value = it.second;
-          attr_key_vals.push_back(
-              strings::StrCat(name, "=", SummarizeAttrValue(attr_value)));
-        }
-        return errors::InvalidArgument(
-            "No OpKernel was registered to support Op '", node.type_string(),
-            "' used by ", errors::FormatNodeNameForError(node.name()),
-            "with these attrs: [", str_util::Join(attr_key_vals, ", "),
-            "]\n"
-            "Registered devices: [",
-            str_util::Join(registered_device_types, ", "), "]\n",
-            "Registered kernels:\n",
-            KernelsRegisteredForOp(node.type_string()));
-      }
-
-      // If the NodeDef contains a device, then we interpret it as a
-      // (partial) device specification.
-      if (!node.requested_device().empty()) {
-        // The user has specified a device in the NodeDef, try to find a
-        // valid device matching their specification in the set of
-        // devices.
-        // NOTE: The full name may specify a device that is not in
-        // n.supported_device_types(), but we check that in AssignDevice().
-        if (!member->SetDeviceName(node.requested_device()).ok()) {
-          return errors::InvalidArgument("Malformed device specification '",
-                                         node.requested_device(),
-                                         "' in node: ", node.DebugString());
-        }
-      }
-    }
-    return Status::OK();
-  }
-
-  // Updates target to contain the intersection of the device types in
-  // "target" and "other".
-  static void MergeSupportedDevices(PrioritizedDeviceTypeVector* target,
-                                    const PrioritizedDeviceTypeVector& other) {
-    PrioritizedDeviceTypeVector temp = *target;
-    target->clear();
-
-    // Generate intersection with priorities.
-    PrioritizedDeviceTypeVector target_intersection;
-    PrioritizedDeviceTypeVector other_intersection;
-    for (const auto& prioritized_device_type : temp) {
-      bool found = false;
-      for (const auto& other_prioritized_device_type : other) {
-        if (prioritized_device_type.first ==
-            other_prioritized_device_type.first) {
-          found = true;
-          other_intersection.push_back(other_prioritized_device_type);
-          break;
-        }
-      }
-      if (found) {
-        target_intersection.push_back(prioritized_device_type);
-      }
-    }
-
-    // Sort the devices by priority order.
-    auto device_sort = [](const std::pair<DeviceType, int32>& a,
-                          const std::pair<DeviceType, int32>& b) {
-      // First look at set priorities.
-      if (a.second != b.second) {
-        return a.second > b.second;
-      }
-      // Then fallback to default priorities.
-      auto a_priority = DeviceSet::DeviceTypeOrder(a.first);
-      auto b_priority = DeviceSet::DeviceTypeOrder(b.first);
-      if (a_priority != b_priority) {
-        return a_priority > b_priority;
-      }
-      // Finally just look at the Device type strings.
-      return a.first.type_string() < b.first.type_string();
-    };
-
-    std::sort(target_intersection.begin(), target_intersection.end(),
-              device_sort);
-    std::sort(other_intersection.begin(), other_intersection.end(),
-              device_sort);
-
-    bool is_target_prioritized = HasPriorities(target_intersection);
-    bool is_other_prioritized = HasPriorities(other_intersection);
-    // If neither are prioritized then we just return the original i.e. target
-    // prioritization.
-    if (!is_target_prioritized && !is_other_prioritized) {
-      *target = target_intersection;
-    }
-    // If only one is prioritized, then we respect priorities of that in the
-    // intersection.
-    if (is_target_prioritized && !is_other_prioritized) {
-      *target = target_intersection;
-    }
-    if (!is_target_prioritized && is_other_prioritized) {
-      *target = other_intersection;
-    }
-    // If both have priorities and agree then we go with that. If the
-    // prioritization order is different, then we just fallback to the default
-    // i.e. what the DeviceTypeOrder suggests. In that case, we also set the
-    // merged priorities to 0, so that downstream merges work correctly as well.
-    if (is_target_prioritized && is_other_prioritized) {
-      bool priorities_agree =
-          ArePrioritiesSame(target_intersection, other_intersection);
-      if (priorities_agree) {
-        *target = target_intersection;
-      } else {
-        for (const auto& prioritized_device : target_intersection) {
-          target->push_back(std::make_pair(prioritized_device.first, 0));
-        }
-        std::sort(target->begin(), target->end(), device_sort);
-      }
-    }
-  }
-
-  // Returns the root node of the disjoint tree to which the node with the
-  // given id is connected.
-  int FindRoot(int node_id) { return Member::FindRoot(&members_, node_id); }
-
-  // Ensures that the devices of 'dst's resource and reference match the device
-  // specified for 'src', which is an input of 'dst' with a partially or fully
-  // specified device.
-  Status VerifyResourceAndRefInputsCanBeColocated(
-      const Node* dst, const Node* src,
-      const DeviceNameUtils::ParsedName& src_parsed_name) {
-    std::vector<const Edge*> edges;
-    TF_RETURN_IF_ERROR(dst->input_edges(&edges));
-    for (const Edge* edge : edges) {
-      DataType input_type = dst->input_type(edge->dst_input());
-      if (input_type == DT_RESOURCE || IsRefType(input_type)) {
-        const Node* input_node = edge->src();
-        if (input_node == src) {
-          continue;
-        }
-        const auto& input_root = members_[FindRoot(input_node->id())];
-        const auto& input_parsed_name = input_root.device_name();
-        if (DeviceNameUtils::HasSomeDetails(input_parsed_name) &&
-            !DeviceNameUtils::AreCompatibleDevNames(input_parsed_name,
-                                                    src_parsed_name)) {
-          return AttachDef(
-              errors::InvalidArgument(
-                  "Could not colocate node with its "
-                  "resource and reference inputs; devices ",
-                  DeviceNameUtils::ParsedNameToString(input_parsed_name),
-                  " and ", DeviceNameUtils::ParsedNameToString(src_parsed_name),
-                  " are not compatible."),
-              *dst);
-        }
-      }
-    }
-    return Status::OK();
-  }
-
-  const Graph* const graph_;  // Not owned.
-  std::vector<Member> members_;
-  const DeviceSet* device_set_;  // Not owned.
-  const std::vector<DeviceType> device_types_;
-  const Device* default_device_;
-  const bool allow_soft_placement_;
-  const bool log_device_placement_;
-};
-
 void LogDeviceAssignment(const Node* node, bool log_device_placement) {
   // Log placement if log_device_placement is set.
   if (log_device_placement) {
@@ -1119,6 +97,15 @@
     return errors::FailedPrecondition("No devices are registered");
   }
 
+  if (VLOG_IS_ON(3)) {
+    DumpGraphToFile("placer_input", *graph_, nullptr, "/tmp");
+    for (const Node* node : graph_->op_nodes()) {
+      VLOG(3) << "    " << node->name() << ": requested: '"
+              << node->requested_device() << "' assigned: '"
+              << node->assigned_device_name() << "'";
+    }
+  }
+
   ColocationGraph colocation_graph(
       graph_, devices_, default_device_,
       options_ == nullptr || options_->config.allow_soft_placement(),
@@ -1126,14 +113,15 @@
 
   TF_RETURN_IF_ERROR(colocation_graph.Initialize());
 
-  // For each node, assign a device based on the constraints in the
-  // disjoint node set.
+  // For each node, assign a device based on the constraints in the disjoint
+  // node set.
   std::vector<Node*> second_pass;
   for (Node* node : graph_->op_nodes()) {
     // The graph may have come pre-populated by the framework with assigned
     // devices (e.g., for stateful placements), so the placer should not try to
     // place nodes that are already placed.
     if (node->has_assigned_device_name()) {
+      TF_RETURN_IF_ERROR(colocation_graph.LimitToAssignedDevice(*node));
       LogDeviceAssignment(node, log_device_placement_);
       continue;
     }
@@ -1234,6 +222,9 @@
                                     log_device_placement_));
   }
 
+  if (VLOG_IS_ON(3)) {
+    DumpGraphToFile("placer_output", *graph_, nullptr, "/tmp");
+  }
   return Status::OK();
 }
 

diff --git a/tensorflow/core/common_runtime/placer_test.cc b/tensorflow/core/common_runtime/placer_test.cc
index 04e77e5..ece4fe0 100644
--- a/tensorflow/core/common_runtime/placer_test.cc
+++ b/tensorflow/core/common_runtime/placer_test.cc

@@ -17,6 +17,7 @@
 
 #include <memory>
 #include <string>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -24,11 +25,15 @@
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/graph/graph_def_builder_util.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
@@ -40,6 +45,16 @@
 
 namespace tensorflow {
 
+using ::tensorflow::test::function::GDef;
+using ::tensorflow::test::function::NDef;
+using FDH = ::tensorflow::FunctionDefHelper;
+
+constexpr char kCPU[] = "/device:fakecpu:0";
+constexpr char kGPU[] = "/device:fakegpu:0";
+
+constexpr char kFullCPU[] = "/job:a/replica:0/task:0/device:fakecpu:0";
+constexpr char kFullGPU[] = "/job:a/replica:0/task:0/device:fakegpu:0";
+
 namespace {
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -210,6 +225,16 @@
     return Status::OK();
   }
 
+  Status BuildGraph(const GraphDef& graph_def, Graph* out_graph) {
+    GraphConstructorOptions opts;
+    TF_RETURN_IF_ERROR(ConvertGraphDefToGraph(opts, graph_def, out_graph));
+    nodes_by_name_.clear();
+    for (Node* node : out_graph->nodes()) {
+      nodes_by_name_[node->name()] = node->id();
+    }
+    return Status::OK();
+  }
+
   // Invokes the Placer on "graph". If no DeviceSet is specified, the
   // placement will use the default DeviceSet (of 10 CPU and 10 GPU devices).
   //
@@ -248,6 +273,16 @@
                              const DeviceType& expected_device_type);
 };
 
+// Fixture that add a parameter for allow_soft_placement.
+// Test cases that want to test behavior with and without soft placement
+// can use this fixture instead of PlacerTest.
+class SoftPlacementPlacerTest : public PlacerTest,
+                                public ::testing::WithParamInterface<bool> {};
+
+INSTANTIATE_TEST_SUITE_P(, SoftPlacementPlacerTest,
+                         ::testing::Values(false, true),
+                         ::testing::PrintToStringParamName());
+
 #define EXPECT_COLOCATED(g, name_a, name_b)                         \
   do {                                                              \
     Graph& g_ = (g);                                                \
@@ -866,7 +901,7 @@
 }
 
 TEST_F(PlacerTest, TestResourceHandlesOnDifferentDevicesFails) {
-  auto handle_test = [this](bool allow_soft_placement) {
+  auto handle_test = [this](bool allow_soft_placement, bool set_assigned) {
     Graph g(OpRegistry::Global());
     {  // Scope for temporary variables used to construct g.
       GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
@@ -878,27 +913,41 @@
                     b.opts().WithName("two_handles_in"));
       TF_EXPECT_OK(BuildGraph(b, &g));
 
-      GetNodeByName(g, "var_cpu")
-          ->set_assigned_device_name(
-              "/job:a/replica:0/task:0/device:fakecpu:0");
-      GetNodeByName(g, "var_gpu")
-          ->set_assigned_device_name(
-              "/job:a/replica:0/task:0/device:fakegpu:0");
+      if (set_assigned) {
+        GetNodeByName(g, "var_cpu")
+            ->set_assigned_device_name(
+                "/job:a/replica:0/task:0/device:fakecpu:0");
+        GetNodeByName(g, "var_gpu")
+            ->set_assigned_device_name(
+                "/job:a/replica:0/task:0/device:fakegpu:0");
+      } else {
+        GetNodeByName(g, "var_cpu")
+            ->set_requested_device("/job:a/replica:0/task:0/device:fakecpu:0");
+        GetNodeByName(g, "var_gpu")
+            ->set_requested_device("/job:a/replica:0/task:0/device:fakegpu:0");
+      }
     }
 
     SessionOptions options;
     options.config.set_allow_soft_placement(allow_soft_placement);
     options.config.set_log_device_placement(true);
     Status s = Place(&g, &options);
-    EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
+    EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
     EXPECT_TRUE(str_util::StrContains(
         s.error_message(),
-        "Could not colocate node with its resource and reference inputs"));
+        "Cannot place the graph because a reference or resource edge "
+        "connects "
+        "colocation groups with incompatible assigned devices: "
+        "/job:a/replica:0/task:0/device:fakegpu:0 vs "
+        "/job:a/replica:0/task:0/device:fakecpu:0"));
+
     return Status::OK();
   };
 
-  TF_EXPECT_OK(handle_test(false));
-  TF_EXPECT_OK(handle_test(true));
+  TF_EXPECT_OK(handle_test(false, false));
+  TF_EXPECT_OK(handle_test(false, true));
+  TF_EXPECT_OK(handle_test(true, false));
+  TF_EXPECT_OK(handle_test(true, true));
 }
 
 // Test that an assignment of an operator to the wrong device
@@ -1034,7 +1083,7 @@
   EXPECT_COLOCATED(g, "in", "foo");
 }
 
-TEST_F(PlacerTest, TestInvalidMultipleColocationGroups) {
+TEST_P(SoftPlacementPlacerTest, TestInvalidMultipleColocationGroups) {
   Graph g(OpRegistry::Global());
   {  // Scope for temporary variables used to construct g.
     GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
@@ -1051,12 +1100,24 @@
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
-  Status s = Place(&g);
-  EXPECT_TRUE(str_util::StrContains(
-      s.error_message(),
-      "Cannot colocate nodes {{colocation_node foo}} and "
-      "{{colocation_node in}} because no device type supports both of those "
-      "nodes and the other nodes colocated with them"));
+  bool allow_soft_placement = GetParam();
+  SessionOptions options;
+  options.config.set_allow_soft_placement(allow_soft_placement);
+  options.config.set_log_device_placement(true);
+  Status s = Place(&g, &options);
+  if (allow_soft_placement) {
+    EXPECT_EQ(error::OK, s.code()) << s.ToString();
+    EXPECT_DEVICE_TYPE(g, "in", "FakeCPU");
+    EXPECT_DEVICE_TYPE(g, "colocated_1", "FakeCPU");
+    EXPECT_DEVICE_TYPE(g, "foo", "FakeGPU");
+  } else {
+    EXPECT_TRUE(str_util::StrContains(
+        s.error_message(),
+        "Cannot colocate nodes {{colocation_node foo}} and "
+        "{{colocation_node in}} because no device type supports both of those "
+        "nodes and the other nodes colocated with them"))
+        << s.ToString();
+  }
 }
 
 TEST_F(PlacerTest, TestColocationGroupWithReferenceConnections) {
@@ -1086,7 +1147,8 @@
   EXPECT_COLOCATED(g, "var2", "assign1");
 }
 
-TEST_F(PlacerTest, TestColocationGroupWithUnsatisfiableReferenceConnections) {
+TEST_P(SoftPlacementPlacerTest,
+       TestColocationGroupWithUnsatisfiableReferenceConnections) {
   Graph g(OpRegistry::Global());
   {  // Scope for temporary variables used to construct g.
     GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
@@ -1116,12 +1178,22 @@
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
-  Status s = Place(&g);
-  EXPECT_TRUE(str_util::StrContains(
-      s.error_message(),
-      "Cannot colocate nodes {{colocation_node var3}} and {{colocation_node "
-      "assign3}} because no device type supports both of those nodes and the "
-      "other nodes colocated with them."));
+  bool allow_soft_placement = GetParam();
+  SessionOptions options;
+  options.config.set_allow_soft_placement(allow_soft_placement);
+  options.config.set_log_device_placement(true);
+  Status s = Place(&g, &options);
+  if (allow_soft_placement) {
+    EXPECT_EQ(error::OK, s.code()) << s.ToString();
+  } else {
+    EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
+    EXPECT_TRUE(str_util::StrContains(
+        s.error_message(),
+        "Cannot colocate nodes {{colocation_node assign3}} and "
+        "{{colocation_node var2}} because no device type supports both of "
+        "those nodes and the other nodes colocated with them."))
+        << s.ToString();
+  }
 }
 
 TEST_F(PlacerTest, TestColocationAndReferenceConnections) {
@@ -1617,5 +1689,160 @@
   EXPECT_DEVICE_TYPE(g, "in", "FakeGPU");
 }
 
+REGISTER_KERNEL_BUILDER(Name("_Arg").Device("FakeCPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("_Arg").Device("FakeGPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("_Retval").Device("FakeCPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("_Retval").Device("FakeGPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("Identity").Device("FakeCPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("Identity").Device("FakeGPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("Const").Device("FakeCPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("Const").Device("FakeGPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("Mul").Device("FakeCPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("Mul").Device("FakeGPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("Add").Device("FakeCPU"), DummyOp);
+REGISTER_KERNEL_BUILDER(Name("Add").Device("FakeGPU"), DummyOp);
+
+TEST_P(SoftPlacementPlacerTest,
+       RequestedDeviceOnResourceGeneratorIsTreatedAsAssigned) {
+  /*
+   *    a:RES:GPU  b:RES:CPU
+   *       |         |
+   *       |         |
+   *       v         v
+   *      id1       id2
+   *     @loc:id2
+   */
+  FunctionDef func = test::function::ResourceOutput();
+  GraphDef graph = GDef(
+      {
+          NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}, kGPU),
+          NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
+          NDef("id1", "Identity", {"a"},
+               {{"T", DT_RESOURCE},
+                {"_class", gtl::ArraySlice<string>({"loc:@id2"})}}),
+          NDef("id2", "Identity", {"b"}, {{"T", DT_RESOURCE}}),
+      },
+      // FunctionLib
+      {func});
+
+  Graph g(OpRegistry::Global());
+  TF_ASSERT_OK(BuildGraph(graph, &g));
+
+  bool allow_soft_placement = GetParam();
+  SessionOptions options;
+  options.config.set_allow_soft_placement(allow_soft_placement);
+  options.config.set_log_device_placement(true);
+  Status s = Place(&g, &options);
+  if (allow_soft_placement) {
+    EXPECT_EQ(error::OK, s.code()) << s.ToString();
+    EXPECT_DEVICE_TYPE(g, "a", "FakeGPU");
+    EXPECT_DEVICE_TYPE(g, "id1", "FakeGPU");
+    EXPECT_DEVICE_TYPE(g, "b", "FakeCPU");
+    EXPECT_DEVICE_TYPE(g, "id2", "FakeCPU");
+  } else {
+    EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
+    EXPECT_TRUE(str_util::StrContains(
+        s.error_message(),
+        "Cannot colocate nodes {{colocation_node id2}} and {{colocation_node "
+        "id1}}: Cannot merge devices with incompatible types: "
+        "'/device:fakecpu:0' and '/device:fakegpu:0'"))
+        << s.ToString();
+  }
+}
+
+TEST_F(PlacerTest, RequestedDeviceCanBeOverridden) {
+  /*
+   *     a:RES      b:RES
+   *       |         |
+   *     id_a:GPU   id_b:CPU
+   *       |         |
+   *       v         v
+   *      id1       id2
+   *     @loc:id2
+   */
+  FunctionDef func = test::function::ResourceOutput();
+  GraphDef graph = GDef(
+      {
+          NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
+          NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}),
+          NDef("id_a", "Identity", {"a"}, {{"T", DT_RESOURCE}}, kGPU),
+          NDef("id_b", "Identity", {"b"}, {{"T", DT_RESOURCE}}, kCPU),
+          NDef("id1", "Identity", {"id_a"},
+               {{"T", DT_RESOURCE},
+                {"_class", gtl::ArraySlice<string>({"loc:@id2"})}}),
+          NDef("id2", "Identity", {"id_b"}, {{"T", DT_RESOURCE}}),
+      },
+      // FunctionLib
+      {func});
+
+  Graph g(OpRegistry::Global());
+  TF_ASSERT_OK(BuildGraph(graph, &g));
+  TF_ASSERT_OK(Place(&g));
+
+  // All should be colocated
+  EXPECT_COLOCATED(g, "a", "b");
+  EXPECT_COLOCATED(g, "id_a", "id_b");
+  EXPECT_COLOCATED(g, "id1", "id2");
+  EXPECT_COLOCATED(g, "a", "id_a");
+  EXPECT_COLOCATED(g, "a", "id1");
+}
+
+TEST_P(SoftPlacementPlacerTest,
+       AssignedDevicesAreNotOverriddenDueToResourcesAndColocation) {
+  /*
+   *     a:RES      b:RES
+   *       |         |
+   *     id_a:GPU   id_b:CPU
+   *       |         |
+   *       v         v
+   *      id1       id2
+   *     @loc:id2
+   */
+  FunctionDef func = test::function::ResourceOutput();
+  GraphDef graph = GDef(
+      {
+          NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
+          NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}),
+          NDef("id_a", "Identity", {"a"}, {{"T", DT_RESOURCE}}),
+          NDef("id_b", "Identity", {"b"}, {{"T", DT_RESOURCE}}),
+          NDef("id1", "Identity", {"id_a"},
+               {{"T", DT_RESOURCE},
+                {"_class", gtl::ArraySlice<string>({"loc:@id2"})}}),
+          NDef("id2", "Identity", {"id_b"}, {{"T", DT_RESOURCE}}),
+      },
+      // FunctionLib
+      {func});
+
+  Graph g(OpRegistry::Global());
+  TF_ASSERT_OK(BuildGraph(graph, &g));
+  std::unordered_map<string, Node*> nodes = g.BuildNodeNameIndex();
+  GetNodeByName(g, "id_a")->set_assigned_device_name(kFullGPU);
+  GetNodeByName(g, "id_b")->set_assigned_device_name(kFullCPU);
+
+  bool allow_soft_placement = GetParam();
+
+  SessionOptions options;
+  options.config.set_allow_soft_placement(allow_soft_placement);
+  Status s = Place(&g, &options);
+  if (allow_soft_placement) {
+    EXPECT_EQ(error::OK, s.code()) << s.ToString();
+    EXPECT_DEVICE_TYPE(g, "a", "FakeGPU");
+    EXPECT_DEVICE_TYPE(g, "id_a", "FakeGPU");
+    EXPECT_DEVICE_TYPE(g, "id1", "FakeGPU");
+    EXPECT_DEVICE_TYPE(g, "b", "FakeCPU");
+    EXPECT_DEVICE_TYPE(g, "id_b", "FakeCPU");
+    EXPECT_DEVICE_TYPE(g, "id2", "FakeCPU");
+  } else {
+    EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
+    EXPECT_TRUE(str_util::StrContains(
+        s.error_message(),
+        "Cannot colocate nodes {{colocation_node id2}} and {{colocation_node "
+        "id1}}: Cannot merge devices with incompatible types: "
+        "'/job:a/replica:0/task:0/device:fakecpu:0' and "
+        "'/job:a/replica:0/task:0/device:fakegpu:0'"))
+        << s.ToString();
+  }
+}
+
 }  // namespace
 }  // namespace tensorflow

diff --git a/tensorflow/core/common_runtime/pool_allocator.h b/tensorflow/core/common_runtime/pool_allocator.h
index 8be9c7b..603e28b 100644
--- a/tensorflow/core/common_runtime/pool_allocator.h
+++ b/tensorflow/core/common_runtime/pool_allocator.h

@@ -99,8 +99,6 @@
     return pool_size_limit_;
   }
 
-  void GetStats(AllocatorStats* stats) override { stats->Clear(); }
-
  private:
   struct PtrRecord {
     void* ptr;

diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc
index 950a936..6db1fae 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime.cc
+++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc

@@ -464,7 +464,8 @@
                        const FunctionDef* fdef,
                        const FunctionLibraryDefinition* lib_def,
                        std::unique_ptr<Graph>* graph,
-                       std::vector<string>* ret_node_names) {
+                       std::vector<string>* ret_node_names,
+                       std::vector<string>* control_ret_node_names) {
   auto get_func_sig = [lib_def](const string& op, const OpDef** sig) {
     return lib_def->LookUpOpDef(op, sig);
   };
@@ -484,6 +485,10 @@
   for (const Node* node : fbody->ret_nodes) {
     ret_node_names->push_back(node->name());
   }
+  control_ret_node_names->reserve(fbody->control_ret_nodes.size());
+  for (const Node* node : fbody->control_ret_nodes) {
+    control_ret_node_names->push_back(node->name());
+  }
   return Status::OK();
 }
 
@@ -522,9 +527,18 @@
 
   std::unique_ptr<Graph> graph;
   std::vector<string> ret_node_names;
+  std::vector<string> control_ret_node_names;
 
   TF_RETURN_IF_ERROR(GetGraphAndRets(function_name, attrs, fdef, lib_def,
-                                     &graph, &ret_node_names));
+                                     &graph, &ret_node_names,
+                                     &control_ret_node_names));
+
+  if (options.graph_collector != nullptr) {
+    GraphDef def;
+    graph->ToGraphDef(&def);
+    *def.mutable_library() = lib_def->ReachableDefinitions(def).ToProto();
+    options.graph_collector->CollectRawGraph(def);
+  }
 
   DeviceSet device_set;
   for (auto d : device_mgr_->ListDevices()) {
@@ -577,9 +591,9 @@
 
   if (options.optimize_graph_fn) {
     DumpGraph("Before running graph optimization fn", graph.get());
-    Status status = options.optimize_graph_fn(std::move(ret_node_names),
-                                              &data->overlay_lib_, device_set,
-                                              cpu_device, &graph);
+    Status status = options.optimize_graph_fn(
+        std::move(ret_node_names), std::move(control_ret_node_names),
+        &data->overlay_lib_, device_set, cpu_device, &graph);
     if (!status.ok()) {
       LOG(WARNING) << "Ignoring multi-device function optimization failure: "
                    << status.ToString();
@@ -592,6 +606,13 @@
       OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, optimization_options));
   DumpGraph("After all optimization passes", graph.get());
 
+  if (options.graph_collector != nullptr) {
+    GraphDef def;
+    graph->ToGraphDef(&def);
+    *def.mutable_library() = lib_def->ReachableDefinitions(def).ToProto();
+    options.graph_collector->CollectOptimizedGraph(def);
+  }
+
   std::unordered_map<string, std::unique_ptr<Graph>> subgraphs;
   TF_RETURN_IF_ERROR(
       PartitionFunctionGraph(device_set, std::move(graph), &subgraphs));
@@ -600,7 +621,8 @@
     for (const auto& pair : subgraphs) {
       GraphDef def;
       pair.second->ToGraphDef(&def);
-      options.graph_collector->CollectGraph(def);
+      *def.mutable_library() = lib_def->ReachableDefinitions(def).ToProto();
+      options.graph_collector->CollectPartitionedGraph(def);
     }
   }
 

diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h
index f30e440..bc877db 100644
--- a/tensorflow/core/common_runtime/process_state.h
+++ b/tensorflow/core/common_runtime/process_state.h

@@ -130,7 +130,7 @@
   bool TracksAllocationSizes() override { return a_->TracksAllocationSizes(); }
   size_t RequestedSize(const void* p) override { return a_->RequestedSize(p); }
   size_t AllocatedSize(const void* p) override { return a_->AllocatedSize(p); }
-  void GetStats(AllocatorStats* stats) override { a_->GetStats(stats); }
+  absl::optional<AllocatorStats> GetStats() override { return a_->GetStats(); }
   void ClearStats() override { a_->ClearStats(); }
   ProcessState::MDMap* mm_;  // not owned
   Allocator* a_;             // not owned

diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index e1dc08d..d42b8d5 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc

@@ -34,14 +34,23 @@
 
 namespace {
 
+int32 DefaultNumInterOpThreads() {
+  // Use environment setting if specified (init once)
+  static int env_num_threads = NumInterOpThreadsFromEnvironment();
+  if (env_num_threads > 0) {
+    return env_num_threads;
+  }
+
+  // Default to using the number of cores available in the process.
+  return port::NumSchedulableCPUs();
+}
+
 static thread::ThreadPool* InitComputePool(const SessionOptions& options) {
   int32 inter_op_parallelism_threads =
       options.config.inter_op_parallelism_threads();
   if (inter_op_parallelism_threads == 0) {
-    // Default to using the number of cores available in the process.
-    inter_op_parallelism_threads = port::NumSchedulableCPUs();
+    inter_op_parallelism_threads = DefaultNumInterOpThreads();
   }
-
   return new thread::ThreadPool(Env::Default(), "Compute",
                                 inter_op_parallelism_threads);
 }
@@ -53,6 +62,18 @@
   return compute_pool;
 }
 
+int32 NumInterOpThreadsFromEnvironment() {
+  int32 num;
+  const char* val = std::getenv("TF_NUM_INTEROP_THREADS");
+  return (val && strings::safe_strto32(val, &num)) ? num : 0;
+}
+
+int32 NumIntraOpThreadsFromEnvironment() {
+  int32 num;
+  const char* val = std::getenv("TF_NUM_INTRAOP_THREADS");
+  return (val && strings::safe_strto32(val, &num)) ? num : 0;
+}
+
 int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
   const int32 inter_op = options.config.inter_op_parallelism_threads();
   if (inter_op != 0) return inter_op;
@@ -67,7 +88,7 @@
 #endif  // _OPENMP
     DCHECK_GE(mkl_intra_op, 1);
     const int32 mkl_inter_op = std::max(
-        (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
+        (DefaultNumInterOpThreads() + mkl_intra_op - 1) / mkl_intra_op, 2);
     VLOG(0)
         << "Creating new thread pool with default inter op setting: "
         << mkl_inter_op
@@ -75,8 +96,7 @@
     return mkl_inter_op;
   }
 #endif  // INTEL_MKL
-  // Default to using the number of cores available in the process.
-  return port::NumSchedulableCPUs();
+  return DefaultNumInterOpThreads();
 }
 
 thread::ThreadPool* NewThreadPoolFromSessionOptions(

diff --git a/tensorflow/core/common_runtime/process_util.h b/tensorflow/core/common_runtime/process_util.h
index 5d92666..7ad658b 100644
--- a/tensorflow/core/common_runtime/process_util.h
+++ b/tensorflow/core/common_runtime/process_util.h

@@ -30,7 +30,18 @@
 // using 'options'.  Caller does not take ownership over threadpool.
 thread::ThreadPool* ComputePool(const SessionOptions& options);
 
-// Returns number of inter op threads.
+// Returns the TF_NUM_INTEROP_THREADS environment value, or 0 if not specified.
+int32 NumInterOpThreadsFromEnvironment();
+
+// Returns the TF_NUM_INTRAOP_THREADS environment value, or 0 if not specified.
+int32 NumIntraOpThreadsFromEnvironment();
+
+// Returns the number of inter op threads specified in `options` or a default.
+// If no value is specified in the provided options, then the function returns
+// the value defined in the TF_NUM_INTEROP_THREADS environment variable.
+// If neither a value is specified in the options or in the environment,
+// this function will return a reasonable default value based on the number
+// of schedulable CPUs, and any MKL and OpenMP configurations.
 int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options);
 
 // Creates a thread pool with number of inter op threads.

diff --git a/tensorflow/core/common_runtime/ring_alg.cc b/tensorflow/core/common_runtime/ring_alg.cc
new file mode 100644
index 0000000..c20cc74
--- /dev/null
+++ b/tensorflow/core/common_runtime/ring_alg.cc

@@ -0,0 +1,430 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/common_runtime/ring_alg.h"
+
+#include <stdlib.h>
+#include <atomic>
+#include <functional>
+#include <utility>
+
+#include "tensorflow/core/common_runtime/collective_rma_local.h"
+#include "tensorflow/core/common_runtime/collective_util.h"
+#include "tensorflow/core/common_runtime/copy_tensor.h"
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/device_mgr.h"
+#include "tensorflow/core/common_runtime/dma_helper.h"
+#include "tensorflow/core/common_runtime/process_util.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/device_base.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/notification.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/types.h"
+
+// Set true for greater intelligibility of debug mode log messages.
+#define READABLE_KEYS false
+// A ring algorithm exchanges chunks of tensor between devices.  The chunk size
+// depends on the number of subdivisions specified in the algorithm.  If the
+// user does not specify the number of subdivisions we may infer the number
+// dynamically so that the resulting chunk size does not exceed
+// kMaxChunkSizeBytes, empirically set at 4 MiB.
+constexpr size_t kMaxChunkSizeBytes = (4 * 1024 * 1024);
+// kMaxSubdivsPerDev is used to give an upper bound on the number of
+// subdivisions dynamically generated.  A reasonable value would be a small
+// multiple of the number of NICs adjacent to each device.
+constexpr int kMaxSubdivsPerDevice = 2;
+
+namespace tensorflow {
+namespace {
+// Each CollectiveOp implementation is free to define its own
+// BufRendezvous key format.  This function produces the key used by
+// RingAlg instances.  Note that the exec_key will differentiate between
+// different instances consequently we don't need to further differentiate
+// between subclasses of RingAlg.
+string RingAlgBufKey(const string& name, const string& exec_key, int pass,
+                     int section, int source_rank) {
+  if (READABLE_KEYS) {
+    return strings::StrCat(name, "(", exec_key, "):pass(", pass, "):section(",
+                           section, "):srcrank(", source_rank, ")");
+  } else {
+    // TODO(b/78352018): Try out some kind of denser encoding, e.g. 128 bit
+    // hash.
+    return strings::StrCat(exec_key, ":", pass, ":", section, ":", source_rank);
+  }
+}
+
+}  // namespace
+
+void RingAlg::PCQueue::Enqueue(RingField* rf) {
+  mutex_lock l(pcq_mu_);
+  deque_.push_back(rf);
+  if (waiter_count_ > 0) {
+    cv_.notify_one();
+  }
+}
+
+RingAlg::RingField* RingAlg::PCQueue::Dequeue() {
+  mutex_lock l(pcq_mu_);
+  if (deque_.empty()) {
+    ++waiter_count_;
+    while (deque_.empty()) {
+      cv_.wait(l);
+    }
+    --waiter_count_;
+  }
+  RingField* rf = deque_.front();
+  deque_.pop_front();
+  return rf;
+}
+
+RingAlg::RingAlg(CollectiveType type, const string& name)
+    : type_(type),
+      name_(name),
+      col_ctx_(nullptr),
+      col_params_(nullptr),
+      done_(nullptr),
+      group_size_(-1),
+      num_subdivs_(-1) {}
+
+namespace {
+Status GenerateSubdivsInCollectiveParams(CollectiveParams* col_params) {
+  if (col_params->instance.shape.num_elements() == 0) {
+    return errors::Internal("shape in CollectiveParams should be non-empty");
+  }
+  const int kAvgDevPerTask =
+      col_params->group.group_size / col_params->group.num_tasks;
+  const int kMaxNumSubdivs = kMaxSubdivsPerDevice * kAvgDevPerTask;
+  if (kMaxNumSubdivs <= 0) {
+    return errors::Internal("Unexpected kMaxNumSubdivs ", kMaxNumSubdivs,
+                            " in ",
+                            col_params->instance.impl_details.collective_name);
+  }
+  // NOTE(ayushd): If no subdiv_offsets have been specified, dynamically add
+  // as many offsets as needed so that the size of tensor chunks <=
+  // kMaxChunkSizeBytes.  Empirically, chunks that are too small or too large
+  // lead to worse performance.
+  int num_subdivs = 0;
+  const size_t tensor_size = col_params->instance.shape.num_elements() *
+                             DataTypeSize(col_params->instance.data_type);
+  size_t chunk_size;
+  do {
+    ++num_subdivs;
+    int num_chunks = col_params->group.group_size * num_subdivs;
+    chunk_size = tensor_size / num_chunks;
+    VLOG(2) << "num_subdivs " << num_subdivs << " num_chunks " << num_chunks
+            << " chunk_size " << chunk_size;
+  } while (chunk_size > kMaxChunkSizeBytes && num_subdivs < kMaxNumSubdivs);
+  if (num_subdivs <= 0) {
+    return errors::Internal("Unexpected num_subdivs ", num_subdivs, " in ",
+                            col_params->instance.impl_details.collective_name);
+  }
+
+  int subdiv_stride = kAvgDevPerTask / num_subdivs;
+  if (subdiv_stride == 0) subdiv_stride = 1;
+  col_params->instance.impl_details.subdiv_offsets.reserve(num_subdivs);
+  for (int sdi = 0; sdi < num_subdivs; ++sdi) {
+    int subdiv_offset = subdiv_stride * sdi;
+    if (sdi % 2 == 1) subdiv_offset *= -1;
+    col_params->instance.impl_details.subdiv_offsets.push_back(subdiv_offset);
+  }
+
+  if (VLOG_IS_ON(2)) {
+    string subdiv_buf;
+    for (const int subdiv_offset :
+         col_params->instance.impl_details.subdiv_offsets) {
+      strings::StrAppend(&subdiv_buf, " ", subdiv_offset);
+    }
+    VLOG(2) << "Dynamically generated " << num_subdivs
+            << " subdiv_offsets:" << subdiv_buf << " tensor_size "
+            << tensor_size << " chunk_size " << chunk_size;
+  }
+
+  return Status::OK();
+}
+}  // namespace
+
+Status RingAlg::InitializeCollectiveParams(CollectiveParams* col_params) {
+  const string& device_name =
+      col_params->instance.device_names[col_params->default_rank];
+  // Each subdiv permutation is a ring formed by rotating each
+  // single-task subsequence of devices by an offset.  This makes most
+  // sense when each task has the same number of devices but we can't
+  // depend on that being the case so we'll compute something that
+  // works in any case.
+
+  // Start by counting the devices in each task.
+  // Precondition: device_names must be sorted so that all devices in
+  // the same task are adjacent.
+  VLOG(2) << "Sorted task names: "
+          << str_util::Join(col_params->instance.task_names, ", ");
+  std::vector<int> dev_per_task;
+  const string* prior_task_name = &col_params->instance.task_names[0];
+  int dev_count = 1;
+  for (int di = 1; di < col_params->group.group_size; ++di) {
+    if (col_params->instance.task_names[di] != *prior_task_name) {
+      dev_per_task.push_back(dev_count);
+      dev_count = 1;
+      prior_task_name = &col_params->instance.task_names[di];
+    } else {
+      ++dev_count;
+    }
+  }
+  dev_per_task.push_back(dev_count);
+  DCHECK_EQ(col_params->group.num_tasks, dev_per_task.size());
+
+  if (col_params->instance.impl_details.subdiv_offsets.empty()) {
+    TF_RETURN_IF_ERROR(GenerateSubdivsInCollectiveParams(col_params));
+  }
+
+  // Generate a ring permutation for requested offset.
+  VLOG(2) << "Setting up perms for col_params " << col_params
+          << " subdiv_permutations "
+          << &col_params->instance.impl_details.subdiv_permutations;
+  col_params->instance.impl_details.subdiv_permutations.resize(
+      col_params->instance.impl_details.subdiv_offsets.size());
+  col_params->subdiv_rank.resize(
+      col_params->instance.impl_details.subdiv_offsets.size(), -1);
+  for (int sdi = 0;
+       sdi < col_params->instance.impl_details.subdiv_offsets.size(); ++sdi) {
+    std::vector<int>& perm =
+        col_params->instance.impl_details.subdiv_permutations[sdi];
+    DCHECK_EQ(perm.size(), 0);
+    int offset = col_params->instance.impl_details.subdiv_offsets[sdi];
+    // A negative subdivision offset is interpreted as follows:
+    //  1. Reverse the local device ordering.
+    //  2. Begin the subdivision at abs(offset) in the reversed ordering.
+    bool reverse = false;
+    if (offset < 0) {
+      offset = abs(offset);
+      reverse = true;
+    }
+    int prior_dev_count = 0;  // sum over prior worker device counts
+    for (int ti = 0; ti < col_params->group.num_tasks; ++ti) {
+      for (int di = 0; di < dev_per_task[ti]; ++di) {
+        int di_offset = (di + offset) % dev_per_task[ti];
+        int offset_di =
+            reverse ? (dev_per_task[ti] - (di_offset + 1)) : di_offset;
+        // Device index in global subdivision permutation.
+        int permuted_di = prior_dev_count + offset_di;
+        int rank = static_cast<int>(perm.size());
+        perm.push_back(permuted_di);
+        if (col_params->instance.device_names[permuted_di] == device_name) {
+          DCHECK_EQ(permuted_di, col_params->default_rank);
+          col_params->subdiv_rank[sdi] = rank;
+        }
+      }
+      prior_dev_count += dev_per_task[ti];
+    }
+    DCHECK_EQ(col_params->group.group_size, perm.size());
+  }
+
+  VLOG(2) << collective_util::SubdivPermDebugString(*col_params);
+  return Status::OK();
+}
+
+Status RingAlg::InitializeCollectiveContext(CollectiveContext* col_ctx) {
+  DCHECK(col_ctx->dev_mgr);
+  col_ctx_ = col_ctx;
+  col_params_ = &col_ctx->col_params;
+  return collective_util::InitializeDeviceAndLocality(
+      col_ctx->dev_mgr, col_ctx->device_name, &col_ctx->device,
+      &col_ctx->device_locality);
+}
+
+string RingAlg::TensorDebugString(const Tensor& tensor) {
+  const DeviceBase::GpuDeviceInfo* gpu_device_info =
+      col_ctx_->op_ctx->device()->tensorflow_gpu_device_info();
+  if (gpu_device_info) {
+    Tensor cpu_tensor(tensor.dtype(), tensor.shape());
+    Notification note;
+    gpu_device_info->default_context->CopyDeviceTensorToCPU(
+        &tensor, "" /*tensor_name*/, col_ctx_->device, &cpu_tensor,
+        [&note](const Status& s) {
+          DCHECK(s.ok());
+          note.Notify();
+        });
+    note.WaitForNotification();
+    return cpu_tensor.SummarizeValue(64);
+  } else {
+    return tensor.SummarizeValue(64);
+  }
+}
+
+void RingAlg::StartAbort(const Status& s) {
+  // In abort mode we stop issuing additional ProvideBuf
+  // and ConsumeBuf calls, but we need to wait for all of the
+  // outstanding callbacks to be invoked before quitting.
+  bool abort_started = false;
+  {
+    mutex_lock l(status_mu_);
+    if (status_.ok()) {
+      LOG(ERROR) << "Aborting Ring" << name_ << " with " << s;
+      abort_started = true;
+      status_.Update(s);
+    }
+  }
+  // If this is the initial entry to abort mode then invoke StartAbort
+  // on the CollectiveExecutor that invoked us.  That should start
+  // cancellation on all of the outstanding CollectiveRemoteAccess
+  // actions.
+  if (abort_started) {
+    col_ctx_->col_exec->StartAbort(s);
+  }
+}
+
+void RingAlg::Finish(bool ok) {
+  if (ok) {
+    // Recover the output from the adaptor.
+    ca_->ConsumeFinalValue(col_ctx_->output);
+  }
+  Status s;
+  {
+    mutex_lock l(status_mu_);
+    s = status_;
+  }
+  rfv_.clear();  // Give up Refs on output tensor.
+  done_(s);
+}
+
+// At the beginning of the algorithm initialize a RingField struct for
+// every independent field of the tensor.
+void RingAlg::InitRingField(RingField* rf, int chunk_idx, int subdiv_idx,
+                            int field_idx) {
+  // Note on field indexing: There are group_size_ devices in the
+  // instance, implying the same number of chunks per tensor, where a
+  // chunk is the unit of data transferred in a time step.  However, if
+  // a device can simultaneously send data by 2 or more independent
+  // channels we can speed up the transfer by subdividing chunks and
+  // processing multiple subdivisions at once.  So the actual number
+  // of RingFields is group_size_ * num_subdivs_.
+  DCHECK_EQ(field_idx, (chunk_idx * num_subdivs_) + subdiv_idx);
+  rf->chunk_idx = chunk_idx;
+  rf->subdiv_idx = subdiv_idx;
+  rf->sc_idx = field_idx;
+  rf->rank = col_params_->subdiv_rank[subdiv_idx];
+  rf->second_pass = false;
+  rf->action = RF_INIT;
+  // Recv from the device with preceding rank within the subdivision.
+  int recv_from_rank = (rf->rank + (group_size_ - 1)) % group_size_;
+  int send_to_rank = (rf->rank + 1) % group_size_;
+  rf->recv_dev_idx = col_params_->instance.impl_details
+                         .subdiv_permutations[subdiv_idx][recv_from_rank];
+  int send_dev_idx = col_params_->instance.impl_details
+                         .subdiv_permutations[subdiv_idx][send_to_rank];
+  rf->recv_is_remote = !col_params_->task.is_local[rf->recv_dev_idx];
+  rf->send_is_remote = !col_params_->task.is_local[send_dev_idx];
+  if (ca_->ChunkBytes(rf->sc_idx) > 0) {
+    // In pass 0 we skip Recv when rank = chunk_idx
+    rf->do_recv = (rf->chunk_idx != rf->rank);
+    // In pass 0 we skip Send when rank = chunk_idx-1
+    rf->do_send =
+        (rf->rank != ((rf->chunk_idx + (group_size_ - 1)) % group_size_));
+  }
+  rf->is_final =
+      (rf->rank == ((rf->chunk_idx + (group_size_ - 1)) % group_size_));
+  if (rf->do_send || rf->do_recv) {
+    rf->chunk = ca_->ChunkAlias(rf->sc_idx);
+  }
+  VLOG(2) << this << " InitRingField " << rf->DebugString() << " chunk "
+          << ca_->TBounds(rf->chunk);
+}
+
+// When a RingField transitions from first to second recompute the
+// do_send and do_recv values.
+void RingAlg::AdvanceToSecondPass(RingField* rf) {
+  VLOG(3) << "IncrRingField old value " << rf->DebugString();
+  DCHECK(!rf->second_pass);
+  rf->second_pass = true;
+  rf->action = RF_INIT;
+  if (ca_->ChunkBytes(rf->sc_idx) > 0) {
+    // In pass 1 the send/no-send boundary moves down 1 place.
+    rf->do_recv =
+        (rf->rank != ((rf->chunk_idx + (group_size_ - 1)) % group_size_));
+    rf->do_send =
+        (rf->rank != ((rf->chunk_idx + (group_size_ - 2)) % group_size_));
+  }
+  rf->is_final =
+      (rf->rank == ((rf->chunk_idx + (group_size_ - 2)) % group_size_));
+  VLOG(3) << "IncrRingField new value " << rf->DebugString();
+}
+
+string RingAlg::RingField::DebugString() const {
+  string rv = strings::StrCat("RingField rank=", rank, " chunk_idx=", chunk_idx,
+                              " subdiv=", subdiv_idx, " sc_idx=", sc_idx,
+                              " action=", action);
+  strings::StrAppend(&rv, " pass=", second_pass);
+  strings::StrAppend(&rv, " do_send=", do_send, " do_recv=", do_recv,
+                     " is_final=", is_final, " recv_is_remote=", recv_is_remote,
+                     " recv_dev_idx=", recv_dev_idx, " sc_idx=", sc_idx);
+  return rv;
+}
+
+void RingAlg::DispatchSend(RingField* rf, const StatusCallback& done) {
+  DCHECK(rf->do_send);
+  string send_buf_key = RingAlgBufKey(name_, col_ctx_->exec_key,
+                                      rf->second_pass, rf->sc_idx, rf->rank);
+  VLOG(3) << "DispatchSend rank=" << col_params_->default_rank << " send key "
+          << send_buf_key << " chunk " << ca_->TBounds(rf->chunk) << " sc_idx "
+          << rf->sc_idx;
+  int send_to_rank = (rf->rank + 1) % group_size_;
+  int send_to_dev_idx = col_params_->instance.impl_details
+                            .subdiv_permutations[rf->subdiv_idx][send_to_rank];
+  col_ctx_->col_exec->PostToPeer(
+      col_params_->instance.device_names[send_to_dev_idx],
+      col_params_->instance.task_names[send_to_dev_idx], send_buf_key,
+      col_ctx_->device, col_ctx_->op_ctx->op_device_context(),
+      col_ctx_->op_ctx->output_alloc_attr(0), &rf->chunk,
+      col_ctx_->device_locality, done);
+}
+
+void RingAlg::DispatchRecv(RingField* rf, const StatusCallback& done) {
+  DCHECK(rf->do_recv);
+  string recv_buf_key =
+      RingAlgBufKey(name_, col_ctx_->exec_key, rf->second_pass, rf->sc_idx,
+                    (rf->rank + (group_size_ - 1)) % group_size_);
+  VLOG(3) << "DispatchRecv rank=" << col_params_->default_rank << " recv key "
+          << recv_buf_key << " chunk " << ca_->TBounds(rf->chunk) << " into "
+          << ((col_params_->merge_op != nullptr) ? "tmp_chunk" : "chunk");
+  Tensor* dst_tensor = (!rf->second_pass && (col_params_->merge_op != nullptr))
+                           ? &rf->tmp_chunk
+                           : &rf->chunk;
+  col_ctx_->col_exec->RecvFromPeer(
+      col_params_->instance.device_names[rf->recv_dev_idx],
+      col_params_->instance.task_names[rf->recv_dev_idx],
+      col_params_->task.is_local[rf->recv_dev_idx], recv_buf_key,
+      col_ctx_->device, col_ctx_->op_ctx->op_device_context(),
+      col_ctx_->op_ctx->output_alloc_attr(0), dst_tensor,
+      col_ctx_->device_locality, rf->subdiv_idx, done);
+}
+
+string RingAlg::FieldState() {
+  string s = strings::StrCat(
+      "Ring", name_, " ", strings::Hex(reinterpret_cast<uint64>(this)),
+      " exec ", col_ctx_->exec_key, " step_id=", col_ctx_->step_id,
+      " state of all ", rfv_.size(), " fields:");
+  for (int i = 0; i < rfv_.size(); ++i) {
+    s.append("\n");
+    s.append(rfv_[i].DebugString());
+  }
+  return s;
+}
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/common_runtime/ring_alg.h b/tensorflow/core/common_runtime/ring_alg.h
new file mode 100644
index 0000000..dc07618
--- /dev/null
+++ b/tensorflow/core/common_runtime/ring_alg.h

@@ -0,0 +1,124 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_RING_ALG_H_
+#define TENSORFLOW_CORE_COMMON_RUNTIME_RING_ALG_H_
+
+#include <deque>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/common_runtime/base_collective_executor.h"
+#include "tensorflow/core/framework/collective.h"
+
+namespace tensorflow {
+class Device;
+
+// Basic ring-algorithm implementation to be further specialized
+// for specific collective functions.
+class RingAlg : public CollectiveImplementationInterface {
+ public:
+  explicit RingAlg(CollectiveType type, const string& name);
+  ~RingAlg() override {}
+
+  // Establishes the requested number of subdivision permutations based on the
+  // ring order implicit in the device order.
+  Status InitializeCollectiveParams(CollectiveParams* col_params) override;
+
+  // Initializes members of CollectiveContext not yet initialized, i.e. device
+  // and device_locality.  Also saves the CollectiveContext in this object.
+  Status InitializeCollectiveContext(CollectiveContext* col_ctx) override;
+
+  // No-op for ring alg.
+  Status InitializeInstanceBeforeGroupDiscovery(CollectiveParams*) override {
+    return Status::OK();
+  }
+
+ protected:
+  // Called when a bad status is received that implies we should terminate
+  // execution and return a bad status.
+  void StartAbort(const Status& s);
+  void Finish(bool ok);
+
+  // Current status of a RingField
+  enum RingFieldAction {
+    RF_INIT = 0,    // Just initialized for a pass
+    RF_RECV,        // Recv pending
+    RF_REDUCE,      // Reduce pending
+    RF_FINALIZE,    // FinalOp pending
+    RF_SEND_READY,  // Ready to send
+    RF_SEND,        // Send pending
+    RF_DONE,        // No more work
+  };
+
+  // Tracks progress of actions on a single subfield of the entire tensor.
+  struct RingField {
+    int16 chunk_idx;     // major division index
+    int16 subdiv_idx;    // minor division index
+    int16 sc_idx;        // subchunk index
+    int16 rank;          // rank within subdiv permutation
+    int16 recv_dev_idx;  // dev from which value should be recv'd
+    RingFieldAction action;
+    bool second_pass;
+    bool recv_is_remote = false;
+    bool send_is_remote = false;
+    bool do_send = false;   // is the value sent in this pass?
+    bool do_recv = false;   // is the value recv'd in this pass?
+    bool is_final = false;  // is the last field in the pass for this rank
+    Tensor chunk;           // alias to field values
+    Tensor tmp_chunk;
+    Status status;
+    string DebugString() const;
+  };
+  virtual void InitRingField(RingField* rf, int chunk_idx, int subdiv_idx,
+                             int field_idx);
+  void AdvanceToSecondPass(RingField* rf);
+  void DispatchSend(RingField* rf, const StatusCallback& done);
+  void DispatchRecv(RingField* rf, const StatusCallback& done);
+
+  // For constructing log messages for debugging.
+  string FieldState();
+  string TensorDebugString(const Tensor& tensor);
+
+  // Producer/Consumer Queue of RingField structs.
+  class PCQueue {
+   public:
+    void Enqueue(RingField* rf);
+    RingField* Dequeue();
+
+   private:
+    mutex pcq_mu_;
+    condition_variable cv_;
+    int waiter_count_ GUARDED_BY(pcq_mu_) = 0;
+    std::deque<RingField*> deque_ GUARDED_BY(pcq_mu_);
+  };
+
+  const CollectiveType type_;
+  const string name_;
+  CollectiveContext* col_ctx_;          // Not owned
+  const CollectiveParams* col_params_;  // Not owned
+  StatusCallback done_;
+  int group_size_;
+  int num_subdivs_;
+  Tensor group_size_tensor_;
+  Notification group_size_tensor_ready_;
+  std::unique_ptr<CollectiveAdapter> ca_;
+  mutex status_mu_;
+  Status status_ GUARDED_BY(status_mu_);
+  std::vector<RingField> rfv_;
+};
+
+}  // namespace tensorflow
+#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_RING_ALG_H_

diff --git a/tensorflow/core/common_runtime/ring_gatherer.cc b/tensorflow/core/common_runtime/ring_gatherer.cc
new file mode 100644
index 0000000..58251fc
--- /dev/null
+++ b/tensorflow/core/common_runtime/ring_gatherer.cc

@@ -0,0 +1,266 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/common_runtime/ring_gatherer.h"
+
+#include <stdlib.h>
+#include <atomic>
+#include <functional>
+#include <utility>
+
+#include "tensorflow/core/common_runtime/collective_rma_local.h"
+#include "tensorflow/core/common_runtime/collective_util.h"
+#include "tensorflow/core/common_runtime/copy_tensor.h"
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/device_mgr.h"
+#include "tensorflow/core/common_runtime/dma_helper.h"
+#include "tensorflow/core/common_runtime/process_util.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/device_base.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/notification.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+Status RingGatherer::InitializeCollectiveParams(CollectiveParams* col_params) {
+  DCHECK_EQ(col_params->instance.type, GATHER_COLLECTIVE);
+  DCHECK_EQ(col_params->instance.impl_details.collective_name, "RingGather");
+  // TODO(tucker): Maybe add subdiv support.  It's only useful with
+  // multiple NICS, and maybe gather performance isn't important enough.
+  // For now, there must always be only a single subdiv at offset 0.
+  if (!col_params->instance.impl_details.subdiv_offsets.empty() &&
+      (col_params->instance.impl_details.subdiv_offsets.size() > 1 ||
+       col_params->instance.impl_details.subdiv_offsets[0] != 0)) {
+    return errors::InvalidArgument(
+        "RingGather cannot take any subdiv offset other than 0.");
+  }
+  if (col_params->instance.impl_details.subdiv_offsets.empty()) {
+    col_params->instance.impl_details.subdiv_offsets.push_back(0);
+  }
+  return RingAlg::InitializeCollectiveParams(col_params);
+}
+
+void RingGatherer::Run(StatusCallback done) {
+  DCHECK(col_ctx_);
+  DCHECK(col_params_);
+  done_ = std::move(done);
+  group_size_ = col_params_->group.group_size;
+  num_subdivs_ = static_cast<int>(
+      col_params_->instance.impl_details.subdiv_permutations.size());
+  DCHECK_GT(num_subdivs_, 0);
+
+  if (VLOG_IS_ON(1)) {
+    string buf;
+    for (int r = 0; r < col_params_->instance.device_names.size(); ++r) {
+      strings::StrAppend(&buf, "dev ", r, " : ",
+                         col_params_->instance.device_names[r], "\n");
+    }
+    for (int sd = 0;
+         sd < col_params_->instance.impl_details.subdiv_permutations.size();
+         ++sd) {
+      strings::StrAppend(&buf, "\nsubdiv ", sd, " perm: ");
+      for (auto x :
+           col_params_->instance.impl_details.subdiv_permutations[sd]) {
+        strings::StrAppend(&buf, x, ", ");
+      }
+    }
+    VLOG(1) << "RingGatherer::Run for device " << col_ctx_->device_name
+            << " default_rank " << col_params_->default_rank << "\n"
+            << buf;
+  }
+
+  // Prepare to alias fields within the output.
+  AllocatorAttributes attr = col_ctx_->op_ctx->output_alloc_attr(0);
+  ca_.reset(MakeCollectiveAdapter(col_ctx_->output, group_size_ * num_subdivs_,
+                                  col_ctx_->device->GetAllocator(attr),
+                                  false /*align_chunks*/));
+
+  // Start by copying input to the rank-specific offset of output.
+  // We are running in a blockable thread and the callback can't block so
+  // just wait here on the copy.
+  Notification note;
+  Status status;
+  Tensor alias_chunk(ca_->ChunkAlias(col_params_->subdiv_rank[0]));
+  CollectiveRemoteAccessLocal::MemCpyAsync(
+      col_ctx_->op_ctx->input_device_context(0),
+      col_ctx_->op_ctx->op_device_context(), col_ctx_->device, col_ctx_->device,
+      col_ctx_->op_ctx->input_alloc_attr(0),
+      col_ctx_->op_ctx->output_alloc_attr(0), col_ctx_->input, &alias_chunk,
+      0 /*dev_to_dev_stream_index*/, [&note, &status](const Status& s) {
+        status.Update(s);
+        note.Notify();
+      });
+  note.WaitForNotification();
+  if (!status.ok()) {
+    done_(status);
+    return;
+  }
+  Finish(RunAsyncParts());
+}
+
+bool RingGatherer::RunAsyncParts() {
+  // This function orchestrates RingGatherer actions on behalf of a
+  // single device. It is entered by a blockable thread that
+  // loops within it until all actions assigned to that device
+  // complete. Hence function local variables are accessible only by that
+  // one thread and do not require an explicit mutex.
+  rfv_.clear();
+  rfv_.resize(group_size_ * num_subdivs_);
+  PCQueue ready_queue;
+  for (int chunk_idx = 0; chunk_idx < group_size_; ++chunk_idx) {
+    for (int subdiv_idx = 0; subdiv_idx < num_subdivs_; ++subdiv_idx) {
+      int rf_index = (chunk_idx * num_subdivs_) + subdiv_idx;
+      InitRingField(&rfv_[rf_index], chunk_idx, subdiv_idx, rf_index);
+      ready_queue.Enqueue(&rfv_[rf_index]);
+    }
+  }
+  const DeviceBase::GpuDeviceInfo* gpu_info =
+      col_ctx_->device->tensorflow_gpu_device_info();
+  if (gpu_info) {
+    // Wait for all currently queued events on the CPU compute stream to
+    // complete before proceeding.  The previous InitRingField calls allocated
+    // temp memory buffers that are not guaranteed to be valid (e.g. for RDMA
+    // write) unless we do.
+    Notification note;
+    Status s = gpu_info->default_context->ThenExecute(
+        col_ctx_->device, gpu_info->stream, [&note]() { note.Notify(); });
+    if (s.ok()) {
+      note.WaitForNotification();
+    } else {
+      mutex_lock l(status_mu_);
+      status_ =
+          errors::Internal("Failed to dispatch ThenExecute in RingGatherer");
+      return false;
+    }
+  }
+
+  int field_done_count = 0;
+  int send_pending_count = 0;
+  int recv_pending_count = 0;
+  std::atomic<bool> aborted(false);
+
+  // Loop until all RingFields have advanced to completion.
+  while (field_done_count < rfv_.size()) {
+    VLOG(4) << FieldState();
+    // Wait for a RingField to appear in the ready_queue.
+    RingField* rf = ready_queue.Dequeue();
+    // Advance the RingField to its next action and execute, repeating
+    // until either an async action has been started or the RingField
+    // is done.
+    bool dispatched = false;  // true if async action was initiated
+    do {
+      if (aborted) {
+        // Requeue this RingField to be counted off below.
+        ready_queue.Enqueue(rf);
+        break;
+      }
+      switch (rf->action) {
+        case RF_INIT:
+          if (rf->do_recv) {
+            rf->action = RF_RECV;
+            auto requeue = [this, rf, &ready_queue, &aborted](Status s) {
+              if (!s.ok()) {
+                aborted = true;
+                StartAbort(s);
+              }
+              ready_queue.Enqueue(rf);
+            };
+            DispatchRecv(rf, requeue);
+            dispatched = true;
+            ++recv_pending_count;
+          } else {
+            rf->action = RF_SEND_READY;
+          }
+          break;
+        case RF_RECV:
+          DCHECK_GT(recv_pending_count, 0);
+          --recv_pending_count;
+          rf->action = RF_SEND_READY;
+          break;
+        case RF_REDUCE:
+          // Never used for Gather, so just fall through.
+          TF_FALLTHROUGH_INTENDED;
+        case RF_FINALIZE:
+          // Never used for Gather, so just fall through.
+          TF_FALLTHROUGH_INTENDED;
+        case RF_SEND_READY:
+          if (rf->do_send) {
+            rf->action = RF_SEND;
+            auto send_complete = [this, rf, &ready_queue, &aborted](Status s) {
+              if (!s.ok()) {
+                aborted = true;
+                StartAbort(s);
+              }
+              ready_queue.Enqueue(rf);
+            };
+            DispatchSend(rf, send_complete);
+            dispatched = true;
+            ++send_pending_count;
+          } else {
+            rf->action = RF_DONE;
+          }
+          break;
+        case RF_SEND:
+          DCHECK_GT(send_pending_count, 0);
+          --send_pending_count;
+          rf->action = RF_DONE;
+          break;
+        case RF_DONE:
+          break;
+      }
+      if (rf->action == RF_DONE) {
+        // There's only one pass.
+        ++field_done_count;
+        break;  // from do while(!dispatched)
+      }
+    } while (!dispatched);
+    if (aborted) break;
+  }  // while (field_done_count < number of fields)
+
+  if (aborted) {
+    // All of the pending data actions should be aborted; field the
+    // callbacks and clear the queue before quitting.
+    while ((send_pending_count > 0) || (recv_pending_count > 0)) {
+      RingField* rf = ready_queue.Dequeue();
+      switch (rf->action) {
+        case RF_RECV:
+          --recv_pending_count;
+          break;
+        case RF_SEND:
+          --send_pending_count;
+          break;
+        default: {
+        }  // Ignore any other actions
+      }
+    }
+  }
+
+  DCHECK_EQ(send_pending_count, 0);
+  DCHECK_EQ(recv_pending_count, 0);
+
+  VLOG(2) << this << " device=" << col_ctx_->device_name << " finish;"
+          << " final value " << TensorDebugString(ca_->Value());
+  return !aborted;
+}
+
+REGISTER_COLLECTIVE(RingGather, RingGatherer);
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/common_runtime/ring_gatherer.h b/tensorflow/core/common_runtime/ring_gatherer.h
new file mode 100644
index 0000000..ee96348
--- /dev/null
+++ b/tensorflow/core/common_runtime/ring_gatherer.h

@@ -0,0 +1,51 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_RING_GATHERER_H_
+#define TENSORFLOW_CORE_COMMON_RUNTIME_RING_GATHERER_H_
+
+#include <deque>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/common_runtime/base_collective_executor.h"
+#include "tensorflow/core/common_runtime/ring_alg.h"
+#include "tensorflow/core/framework/collective.h"
+
+namespace tensorflow {
+class Device;
+
+// Ring-algorithm implementation of collective all-gather.
+class RingGatherer : public RingAlg {
+ public:
+  RingGatherer() : RingAlg(GATHER_COLLECTIVE, "Gather") {}
+  ~RingGatherer() override {}
+
+  Status InitializeCollectiveParams(CollectiveParams* col_params) override;
+
+  // Begins async execution of the ring gather algorithm.
+  // Must be called in a blockable thread.
+  // TODO(b/80529858): remove the previous warning when we have a dedicated
+  // collective threadpool.
+  void Run(StatusCallback done) override;
+
+ private:
+  bool RunAsyncParts();
+
+  friend class RingGathererTest;
+};
+
+}  // namespace tensorflow
+#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_RING_GATHERER_H_

diff --git a/tensorflow/core/common_runtime/ring_gatherer_test.cc b/tensorflow/core/common_runtime/ring_gatherer_test.cc
new file mode 100644
index 0000000..97ff7b5
--- /dev/null
+++ b/tensorflow/core/common_runtime/ring_gatherer_test.cc

@@ -0,0 +1,651 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/common_runtime/ring_gatherer.h"
+
+#include <algorithm>
+#include "absl/memory/memory.h"
+#include "tensorflow/core/common_runtime/base_collective_executor.h"
+#include "tensorflow/core/common_runtime/collective_rma_local.h"
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/device_mgr.h"
+#include "tensorflow/core/common_runtime/device_resolver_local.h"
+#include "tensorflow/core/common_runtime/dma_helper.h"
+#include "tensorflow/core/common_runtime/process_util.h"
+#include "tensorflow/core/common_runtime/test_collective_executor_mgr.h"
+#include "tensorflow/core/common_runtime/threadpool_device.h"
+#include "tensorflow/core/framework/collective.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/notification.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/public/version.h"
+
+namespace tensorflow {
+
+// Wraps CollectiveRemoteAccessLocal with the ability to return an
+// error status to the N'th action.
+class FailTestRMA : public CollectiveRemoteAccessLocal {
+ public:
+  FailTestRMA(const DeviceMgr* dev_mgr, DeviceResolverInterface* dev_resolver,
+              int64 step_id, int fail_after)
+      : CollectiveRemoteAccessLocal(dev_mgr, dev_resolver, step_id),
+        fail_after_(fail_after) {}
+
+  bool MaybeFail(const StatusCallback& done) {
+    bool fail_now = false;
+    {
+      mutex_lock l(mu_);
+      if (fail_after_ > 0) {
+        fail_now = (--fail_after_ == 0);
+      }
+    }
+    if (fail_now) {
+      done(errors::Internal("Deliberate failure"));
+      return true;
+    }
+    return false;
+  }
+
+  void RecvFromPeer(const string& peer_device, const string& peer_task,
+                    bool peer_is_local, const string& key, Device* to_device,
+                    DeviceContext* to_device_ctx,
+                    const AllocatorAttributes& to_alloc_attr, Tensor* to_tensor,
+                    const DeviceLocality& client_locality,
+                    int dev_to_dev_stream_index,
+                    const StatusCallback& done) override {
+    if (MaybeFail(done)) return;
+    CollectiveRemoteAccessLocal::RecvFromPeer(
+        peer_device, peer_task, peer_is_local, key, to_device, to_device_ctx,
+        to_alloc_attr, to_tensor, client_locality, dev_to_dev_stream_index,
+        done);
+  }
+
+  void PostToPeer(const string& peer_device, const string& peer_task,
+                  const string& key, Device* from_device,
+                  DeviceContext* from_device_ctx,
+                  const AllocatorAttributes& from_alloc_attr,
+                  const Tensor* from_tensor,
+                  const DeviceLocality& client_locality,
+                  const StatusCallback& done) override {
+    if (MaybeFail(done)) return;
+    CollectiveRemoteAccessLocal::PostToPeer(
+        peer_device, peer_task, key, from_device, from_device_ctx,
+        from_alloc_attr, from_tensor, client_locality, done);
+  }
+
+  mutex mu_;
+  int fail_after_ GUARDED_BY(mu_);
+};
+
+std::unique_ptr<OpKernel> GetKernel(const NodeDef& node,
+                                    const DeviceType& device_type,
+                                    DeviceBase* device) {
+  Status status;
+  std::unique_ptr<OpKernel> k = CreateOpKernel(
+      device_type, device, device->GetAllocator(AllocatorAttributes()), node,
+      TF_GRAPH_DEF_VERSION, &status);
+  if (!status.ok()) {
+    LOG(FATAL) << status;
+  }
+  return k;
+}
+
+static int64 kStepId = 123;
+
+class RingGathererTest : public ::testing::Test {
+ protected:
+  RingGathererTest() : device_type_(DEVICE_CPU) {}
+
+#ifdef GOOGLE_CUDA
+  void InitGPUDevices() {
+    auto device_factory = DeviceFactory::GetFactory("GPU");
+    CHECK(device_factory);
+    SessionOptions options;
+    Status s = device_factory->CreateDevices(
+        options, "/job:worker/replica:0/task:0", &gpu_devices_);
+    CHECK(s.ok());
+  }
+#endif
+
+  ~RingGathererTest() override {
+    stop_ = true;
+    for (auto i : instances_) delete i;
+    if (col_exec_) col_exec_->Unref();
+  }
+
+  void Init(int num_workers, int num_devices, DataType dtype,
+            const DeviceType& device_type, int num_subdivs, int fail_after) {
+#ifdef GOOGLE_CUDA
+    InitGPUDevices();
+#endif
+    device_type_ = device_type;
+    std::vector<std::unique_ptr<Device>> local_devices;
+    SessionOptions sess_opts;
+    sess_opts.env = Env::Default();
+    Bytes mem_limit(4 << 20);
+    DeviceLocality dev_locality;
+    for (int wi = 0; wi < num_workers; ++wi) {
+      for (int di = 0; di < num_devices; ++di) {
+        if (device_type == DEVICE_CPU) {
+          string dev_name =
+              strings::StrCat("/job:worker/replica:0/task:", wi, "/cpu:", di);
+          local_devices.push_back(absl::make_unique<ThreadPoolDevice>(
+              sess_opts, dev_name, mem_limit, dev_locality, cpu_allocator()));
+        } else if (device_type == DEVICE_GPU && !gpu_devices_.empty()) {
+          int dev_idx = (wi * num_devices) + di;
+          if (dev_idx >= static_cast<int>(gpu_devices_.size())) {
+            LOG(INFO) << "dev_mgr has access to limited GPUs, reusing for more "
+                         "than one ring node.";
+          } else {
+            local_devices.push_back(std::move(gpu_devices_[dev_idx]));
+          }
+        } else {
+          LOG(FATAL) << "Unsupported device_type " << device_type;
+        }
+      }
+    }
+    if (!dev_mgr_ || device_type == DEVICE_CPU) {
+      LOG(ERROR) << "resetting dev_mgr for " << local_devices.size()
+                 << " devices: ";
+      dev_mgr_.reset(new DeviceMgr(std::move(local_devices)));
+    }
+    if (!gpu_ring_order_) gpu_ring_order_.reset(new string());
+    dev_resolver_.reset(new DeviceResolverLocal(dev_mgr_.get()));
+    rma_ = new FailTestRMA(dev_mgr_.get(), dev_resolver_.get(), kStepId,
+                           fail_after);
+    col_exec_ = new BaseCollectiveExecutor(
+        &col_exec_mgr_, rma_, kStepId, dev_mgr_.get(), gpu_ring_order_.get());
+    col_params_.name = "test_collective";
+    static const int kGroupKey = 5;
+    col_params_.group.group_key = kGroupKey;
+    col_params_.group.device_type = device_type;
+    col_params_.group.group_size = num_workers * num_devices;
+    static const int kInstanceKey = 17;
+    col_params_.instance.instance_key = kInstanceKey;
+    col_params_.instance.impl_details.subdiv_offsets.clear();
+    col_params_.instance.type = GATHER_COLLECTIVE;
+    col_params_.instance.impl_details.collective_name = "RingGather";
+    col_params_.instance.data_type = dtype;
+    col_params_.instance.impl_details.subdiv_permutations.resize(num_subdivs);
+    col_params_.subdiv_rank.resize(num_subdivs);
+    int subdiv_stride = num_devices / num_subdivs;
+    for (int sdi = 0; sdi < num_subdivs; ++sdi) {
+      col_params_.instance.impl_details.subdiv_offsets.push_back(sdi *
+                                                                 subdiv_stride);
+      col_params_.subdiv_rank[sdi] = sdi * subdiv_stride;
+    }
+
+    // Set up a local device ring order that's not just 0,1,2...
+    std::vector<int> local_ring_order;
+    for (int di = 0; di < num_devices; ++di) {
+      local_ring_order.push_back(di);
+    }
+    for (int di = 0; di < num_devices; ++di) {
+      bool is_odd = ((di % 2) == 1);
+      int other = (di + (is_odd ? 7 : 3)) % num_devices;
+      if (di == other) continue;
+      iter_swap(local_ring_order.begin() + di,
+                local_ring_order.begin() + other);
+    }
+    string lro_buf;
+    for (auto d : local_ring_order) strings::StrAppend(&lro_buf, d, ", ");
+    VLOG(1) << "local_ring_order " << lro_buf;
+
+    // Set up all of the fake device contexts.
+    for (int wi = 0; wi < num_workers; ++wi) {
+      for (int di = 0; di < num_devices; ++di) {
+        string task_name = strings::StrCat("/job:worker/replica:0/task:", wi);
+        string dev_name = strings::StrCat(task_name, "/cpu:", di);
+        if (device_type == DEVICE_GPU) {
+          dev_name =
+              strings::StrCat(task_name, "/gpu:", di % gpu_devices_.size());
+        }
+        col_params_.instance.device_names.push_back(dev_name);
+        col_params_.instance.task_names.push_back(task_name);
+        // Normally each device would set is_local to its own perspective but
+        // this test runs in a single process so is_local is always true.
+        col_params_.task.is_local.push_back(true);
+        for (int sdi = 0; sdi < num_subdivs; ++sdi) {
+          int rotated_di =
+              (di + col_params_.instance.impl_details.subdiv_offsets[sdi]) %
+              num_devices;
+          col_params_.instance.impl_details.subdiv_permutations[sdi].push_back(
+              wi * num_devices + local_ring_order[rotated_di]);
+        }
+      }
+    }
+    for (int wi = 0; wi < num_workers; ++wi) {
+      for (int di = 0; di < num_devices; ++di) {
+        int rank = wi * num_devices + di;
+        instances_.push_back(new DeviceInstance(
+            rank, col_params_.instance.device_names[rank], device_type_, this));
+      }
+    }
+  }
+
+  void Gather(int fail_after) {
+    std::atomic<int> done(0);
+    for (auto di : instances_) {
+      SchedClosure([di, &done] {
+        di->DoGather();
+        ++done;
+      });
+      if (fail_after > 0) {
+        // Stagger the op execution starts.
+        Env::Default()->SleepForMicroseconds(100);
+      }
+    }
+    while (done < static_cast<int>(instances_.size())) {
+      if (stop_) break;
+      Env::Default()->SleepForMicroseconds(1000);
+    }
+  }
+
+  template <typename T>
+  void RunTest(DataType dtype, const DeviceType& device_type, int num_workers,
+               int num_devices, int num_subdivs, int tensor_len,
+               int fail_after) {
+    Init(num_workers, num_devices, dtype, device_type, num_subdivs, fail_after);
+    int32 output_len = tensor_len * num_workers * num_devices;
+    std::vector<T> expected(output_len, 0.0);
+    for (int di = 0; di < static_cast<int>(instances_.size()); ++di) {
+      DeviceInstance* instance = instances_[di];
+      int32 instance_offset = di * tensor_len;
+      instance->InitTensor(dtype, TensorShape({tensor_len}),
+                           [instance_offset, &expected, dtype, di](Tensor* t) {
+                             for (size_t i = 0; i < t->NumElements(); ++i) {
+                               // The cast is necessary to prevent clang-tidy
+                               // from insisting that a faster non-open source
+                               // function be substituted.
+                               float value =
+                                   pow(10, static_cast<double>(di)) * i;
+                               if (dtype == DT_INT32 || dtype == DT_INT64) {
+                                 value = di * 10 + i;
+                               }
+                               t->flat<T>()(i) = static_cast<T>(value);
+                               expected[instance_offset + i] = value;
+                             }
+                           });
+    }
+    Gather(fail_after);
+    if (fail_after > 0) {
+      // Confirm that every device terminated with the expected error status.
+      for (int di = 0; di < static_cast<int>(instances_.size()); ++di) {
+        EXPECT_EQ("Deliberate failure",
+                  instances_[di]->status_.error_message());
+      }
+    } else {
+      // Confirm that every device accumulated the same set of correct
+      // values.
+      for (int di = 0; di < static_cast<int>(instances_.size()); ++di) {
+        TF_EXPECT_OK(instances_[di]->status_);
+        Tensor* inst = &instances_[di]->output_tensor_;
+        CHECK(inst);
+        Tensor actual(dtype, TensorShape({output_len}));
+        if (device_type_ == DEVICE_CPU) {
+          CHECK(actual.CopyFrom(*inst, inst->shape()));
+          VLOG(1) << "actual " << actual.SummarizeValue(100);
+        } else if (device_type_ == DEVICE_GPU) {
+          Notification note;
+          Device* dev = instances_[di]->device_;
+          auto* dev_info = dev->tensorflow_gpu_device_info();
+          CHECK(dev_info);
+          dev_info->default_context->CopyDeviceTensorToCPU(
+              inst, "" /*tensor_name*/, dev, &actual, [&note](const Status& s) {
+                CHECK(s.ok());
+                note.Notify();
+              });
+          note.WaitForNotification();
+        }
+
+        auto alias = actual.template unaligned_flat<T>();
+        for (int i = 0; i < output_len; ++i) {
+          switch (dtype) {
+            case DT_FLOAT:
+              EXPECT_FLOAT_EQ(expected[i], alias(i))
+                  << "Mismatch at device " << di << " index " << i;
+              break;
+            case DT_DOUBLE:
+              EXPECT_DOUBLE_EQ(expected[i], alias(i))
+                  << "Mismatch at device " << di << " index " << i;
+              break;
+            case DT_INT32:
+            case DT_INT64:
+              EXPECT_EQ(expected[i], alias(i))
+                  << "Mismatch at device " << di << " index " << i;
+              break;
+            default:
+              LOG(FATAL) << "unimplemented";
+          }
+        }
+      }
+    }
+  }
+
+  std::unique_ptr<OpKernel> GetCollectiveGather(const CollectiveParams& params,
+                                                Tensor* input,
+                                                const DeviceType& device_type,
+                                                DeviceBase* device) {
+    mutex_lock l(mu_);
+    NodeDef node_def;
+    NodeDefBuilder builder(
+        strings::StrCat("collective_gather_", gather_counter_++),
+        "CollectiveGather");
+    TF_CHECK_OK(builder.Attr("T", params.instance.data_type)
+                    .Attr("group_size", params.group.group_size)
+                    .Attr("group_key", params.group.group_key)
+                    .Attr("instance_key", params.instance.instance_key)
+                    .Attr("shape", params.instance.shape)
+                    .Input(FakeInput(params.instance.data_type))
+                    .Finalize(&node_def));
+    return GetKernel(node_def, device_type, device);
+  }
+
+  void RunSubdivPermsTest(
+      CollectiveParams* cp,
+      const std::vector<std::vector<int>>& expected_subdiv_perms,
+      const std::vector<int>& expected_subdiv_rank) {
+    col_exec_ = nullptr;
+    cp->instance.impl_details.subdiv_permutations.clear();
+    cp->subdiv_rank.clear();
+    // Create a stub ring gatherer only for testing param initialization.
+    RingGatherer gatherer;
+    TF_CHECK_OK(gatherer.InitializeCollectiveParams(cp));
+    EXPECT_EQ(expected_subdiv_perms,
+              cp->instance.impl_details.subdiv_permutations);
+    EXPECT_EQ(expected_subdiv_rank, cp->subdiv_rank);
+  }
+
+  class DeviceInstance {
+   public:
+    DeviceInstance(int rank, const string& dev_name,
+                   const DeviceType& device_type, RingGathererTest* parent)
+        : parent_(parent),
+          dev_name_(dev_name),
+          device_type_(device_type),
+          rank_(rank) {
+      TF_CHECK_OK(parent_->dev_mgr_->LookupDevice(dev_name, &device_))
+          << "Couldn't find device " << dev_name
+          << " existing devices: " << parent_->dev_mgr_->DebugString();
+      col_params_.name = parent_->col_params_.name;
+      col_params_.group.group_key = parent_->col_params_.group.group_key;
+      col_params_.group.device_type = parent_->col_params_.group.device_type;
+      col_params_.group.group_size = parent_->col_params_.group.group_size;
+      col_params_.instance = parent->col_params_.instance;
+      col_params_.task.is_local = parent_->col_params_.task.is_local;
+      col_params_.subdiv_rank = parent_->col_params_.subdiv_rank;
+
+      int num_subdivs = static_cast<int>(col_params_.subdiv_rank.size());
+      int group_size = col_params_.group.group_size;
+      CHECK_EQ(group_size,
+               static_cast<int>(col_params_.instance.device_names.size()));
+      // Id of this device is at rank position in first subdiv perm.
+      int my_device_id =
+          col_params_.instance.impl_details.subdiv_permutations[0][rank];
+      col_params_.default_rank = my_device_id;
+      // Set rank for all other subdivs by finding that device_id.
+      for (int sdi = 0; sdi < num_subdivs; ++sdi) {
+        for (int r = 0; r < static_cast<int>(col_params_.instance.impl_details
+                                                 .subdiv_permutations[sdi]
+                                                 .size());
+             ++r) {
+          if (my_device_id ==
+              col_params_.instance.impl_details.subdiv_permutations[sdi][r]) {
+            col_params_.subdiv_rank[sdi] = r;
+            break;
+          }
+        }
+      }
+    }
+
+    void InitTensor(DataType dtype, const TensorShape& shape,
+                    const std::function<void(Tensor*)>& init_f) {
+      input_tensor_ =
+          Tensor(device_->GetAllocator(AllocatorAttributes()), dtype, shape);
+      if (device_type_ == DEVICE_CPU) {
+        init_f(&input_tensor_);
+      } else if (device_type_ == DEVICE_GPU) {
+        Tensor cpu_tensor(dtype, shape);
+        init_f(&cpu_tensor);
+        auto* dev_info = device_->tensorflow_gpu_device_info();
+        CHECK(dev_info);
+        Notification note;
+        dev_info->default_context->CopyCPUTensorToDevice(
+            &cpu_tensor, device_, &input_tensor_, [&note](const Status& s) {
+              CHECK(s.ok());
+              note.Notify();
+            });
+        note.WaitForNotification();
+      } else {
+        LOG(FATAL) << "Unsupported device_type " << device_type_;
+      }
+    }
+
+    void DoGather() {
+      // Prepare an OpKernelContext.
+      OpKernelContext::Params op_params;
+      op_params.step_id = kStepId;
+      op_params.device = device_;
+      gtl::InlinedVector<TensorValue, 4> inputs;
+      inputs.push_back(TensorValue(&input_tensor_));
+      op_params.inputs = &inputs;
+      gtl::InlinedVector<AllocatorAttributes, 4> input_aa(
+          {AllocatorAttributes()});
+      op_params.input_alloc_attrs = &input_aa;
+      gtl::InlinedVector<DeviceContext*, 4> input_dc;
+      DeviceContext* dev_ctx = nullptr;
+      auto* dev_info = device_->tensorflow_gpu_device_info();
+      if (dev_info) {
+        dev_ctx = dev_info->default_context;
+        dev_ctx->Ref();
+      } else {
+        dev_ctx = new DeviceContext;
+      }
+      input_dc.push_back(dev_ctx);
+      op_params.input_device_contexts = &input_dc;
+      op_params.op_device_context = dev_ctx;
+      AllocatorAttributes generic_alloc_attr;
+      op_params.output_attr_array = &generic_alloc_attr;
+      std::unique_ptr<OpKernel> op = parent_->GetCollectiveGather(
+          col_params_, &input_tensor_, DEVICE_CPU, device_);
+      op_params.op_kernel = op.get();
+      OpKernelContext ctx(&op_params, 1);
+
+      // We never actually execute the kernel, so we need to do the output
+      // allocation it would do, ourselves.
+      Tensor* output_tensor_ptr = nullptr;
+      TensorShape output_shape({static_cast<int64>(
+          parent_->instances_.size() * input_tensor_.shape().num_elements())});
+      TF_CHECK_OK(ctx.forward_input_or_allocate_output({0}, 0, output_shape,
+                                                       &output_tensor_ptr));
+      CHECK_EQ(output_tensor_ptr, ctx.mutable_output(0));
+      // Prepare a RingGatherer instance.
+      string exec_key =
+          strings::StrCat(col_params_.instance.instance_key, ":0:0");
+      RingGatherer gatherer;
+      CollectiveContext col_ctx(parent_->col_exec_, parent_->dev_mgr_.get(),
+                                &ctx, &op_params, col_params_, exec_key,
+                                kStepId, &input_tensor_, output_tensor_ptr);
+      TF_CHECK_OK(gatherer.InitializeCollectiveContext(&col_ctx));
+
+      // Run the all-gather.
+      gatherer.Run([this](Status s) { status_ = s; });
+      if (status_.ok()) {
+        CHECK(output_tensor_.CopyFrom(*ctx.mutable_output(0),
+                                      ctx.mutable_output(0)->shape()));
+      }
+
+      dev_ctx->Unref();
+    }
+
+    const Tensor& input_tensor() { return input_tensor_; }
+    const Tensor& output_tensor() { return output_tensor_; }
+
+    RingGathererTest* parent_;
+    string dev_name_;
+    DeviceType device_type_;
+    int rank_;
+    Tensor input_tensor_;
+    Tensor output_tensor_;
+    Device* device_;
+    CollectiveParams col_params_;
+    std::unique_ptr<CollectiveAdapter> ca_;
+    std::unique_ptr<OpKernelContext> ctx_;
+    Status status_;
+  };
+
+  bool stop_ = false;
+  DeviceType device_type_;
+  TestCollectiveExecutorMgr col_exec_mgr_;
+  CollectiveExecutor* col_exec_;
+  CollectiveRemoteAccessLocal* rma_;
+  std::unique_ptr<DeviceResolverLocal> dev_resolver_;
+  std::vector<DeviceInstance*> instances_;
+  CollectiveParams col_params_;
+  std::vector<std::unique_ptr<tensorflow::Device>> gpu_devices_;
+  std::unique_ptr<tensorflow::DeviceMgr> dev_mgr_;
+  std::unique_ptr<string> gpu_ring_order_;
+  mutex mu_;
+  int32 gather_counter_ GUARDED_BY(mu_) = 0;
+};
+
+CollectiveParams SetUpCollectiveParams(const int num_devs_per_task,
+                                       const int num_tasks) {
+  CollectiveParams cp;
+  const int kNumDevs = num_devs_per_task * num_tasks;
+  cp.group.group_key = 1;
+  cp.group.group_size = kNumDevs;
+  cp.group.device_type = DeviceType("GPU");
+  cp.group.num_tasks = num_tasks;
+  cp.instance.instance_key = 3;
+  cp.instance.type = GATHER_COLLECTIVE;
+  cp.instance.data_type = DataType(DT_FLOAT);
+  cp.instance.shape = TensorShape({kNumDevs * kNumDevs});
+  cp.instance.impl_details.collective_name = "RingGather";
+  cp.instance.impl_details.subdiv_offsets.push_back(0);
+  cp.is_source = false;
+  for (int i = 0; i < kNumDevs; ++i) {
+    int task_id = i / num_devs_per_task;
+    int dev_id = i % num_devs_per_task;
+    string task_name = strings::StrCat("/job:worker/replica:0/task:", task_id);
+    string device_name = strings::StrCat(task_name, "/device:GPU:", dev_id);
+    cp.instance.task_names.push_back(task_name);
+    cp.instance.device_names.push_back(device_name);
+  }
+  return cp;
+}
+
+TEST_F(RingGathererTest, InitializeParams) {
+  const int kNumDevsPerTask = 8;
+  const int kNumTasks = 3;
+  CollectiveParams cp = SetUpCollectiveParams(kNumDevsPerTask, kNumTasks);
+
+  cp.default_rank = 0;
+  cp.instance.impl_details.subdiv_offsets = {};
+  RunSubdivPermsTest(&cp, {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                            12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
+                     {0});
+
+  cp.instance.impl_details.subdiv_offsets = {0};
+  RunSubdivPermsTest(&cp, {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                            12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
+                     {0});
+
+  cp.default_rank = 3;
+  cp.instance.impl_details.subdiv_offsets = {};
+  RunSubdivPermsTest(&cp, {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                            12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
+                     {3});
+}
+
+// TODO(b/113171733): change to use TEST_P.
+#define DEF_TEST(B, T, W, D, S, L, A)                                         \
+  TEST_F(RingGathererTest,                                                    \
+         DaTy##B##_DevTy##T##_Wkr##W##_Dev##D##_Sdiv##S##_Len##L##_Abrt##A) { \
+    DataType dtype = DT_##B;                                                  \
+    switch (dtype) {                                                          \
+      case DT_FLOAT: {                                                        \
+        RunTest<float>(dtype, DEVICE_##T, W, D, S, L, A);                     \
+      } break;                                                                \
+      case DT_DOUBLE: {                                                       \
+        RunTest<double>(dtype, DEVICE_##T, W, D, S, L, A);                    \
+      } break;                                                                \
+      case DT_INT32: {                                                        \
+        RunTest<int32>(dtype, DEVICE_##T, W, D, S, L, A);                     \
+      } break;                                                                \
+      case DT_INT64: {                                                        \
+        RunTest<int64>(dtype, DEVICE_##T, W, D, S, L, A);                     \
+      } break;                                                                \
+      default:                                                                \
+        LOG(FATAL) << "Unimplemented";                                        \
+    }                                                                         \
+  }
+
+#ifndef GOOGLE_CUDA
+// Success tests
+DEF_TEST(FLOAT, CPU, 1, 2, 1, 1, 0)
+DEF_TEST(FLOAT, CPU, 1, 2, 1, 2, 0)
+DEF_TEST(FLOAT, CPU, 1, 2, 1, 8, 0)
+DEF_TEST(FLOAT, CPU, 1, 2, 1, 16, 0)
+DEF_TEST(FLOAT, CPU, 1, 2, 1, 1001, 0)
+DEF_TEST(FLOAT, CPU, 2, 4, 1, 128, 0)
+DEF_TEST(FLOAT, CPU, 2, 8, 1, 1001, 0)
+DEF_TEST(FLOAT, CPU, 2, 8, 1, 4096, 0)
+DEF_TEST(FLOAT, CPU, 2, 8, 1, 9408, 0)
+DEF_TEST(FLOAT, CPU, 4, 4, 1, 32768, 0)
+DEF_TEST(DOUBLE, CPU, 1, 2, 1, 1001, 0)
+DEF_TEST(DOUBLE, CPU, 2, 8, 1, 4095, 0)
+DEF_TEST(INT32, CPU, 1, 2, 1, 1001, 0)
+DEF_TEST(INT32, CPU, 2, 8, 1, 4095, 0)
+DEF_TEST(INT64, CPU, 1, 2, 1, 1001, 0)
+DEF_TEST(INT64, CPU, 2, 8, 1, 4095, 0)
+
+// Failure tests
+DEF_TEST(FLOAT, CPU, 2, 8, 1, 9408, 1)
+DEF_TEST(FLOAT, CPU, 2, 8, 1, 9408, 7)
+DEF_TEST(FLOAT, CPU, 2, 8, 1, 9408, 11)
+#endif
+
+#ifdef GOOGLE_CUDA
+// GPU tests.  So long as the device names are all in a single tasks we
+// bypass inter-worker routing code and can fake multiple GPUs with a single
+// GPU, from the perspective of the RingGatherer logic.  So these tests
+// are all single-worker.
+DEF_TEST(FLOAT, GPU, 1, 2, 1, 1, 0)
+DEF_TEST(FLOAT, GPU, 1, 2, 1, 2, 0)
+DEF_TEST(FLOAT, GPU, 1, 2, 1, 8, 0)
+DEF_TEST(FLOAT, GPU, 1, 2, 1, 16, 0)
+DEF_TEST(FLOAT, GPU, 1, 2, 1, 1001, 0)
+DEF_TEST(FLOAT, GPU, 1, 8, 1, 1001, 0)
+DEF_TEST(FLOAT, GPU, 1, 8, 1, 4096, 0)
+DEF_TEST(FLOAT, GPU, 1, 8, 1, 4095, 0)
+DEF_TEST(FLOAT, GPU, 1, 8, 1, 32768, 0)
+DEF_TEST(FLOAT, GPU, 1, 4, 1, 32768, 0)
+DEF_TEST(DOUBLE, GPU, 1, 2, 1, 1001, 0)
+// INT32 values are never on the GPU.
+// DEF_TEST(INT32, GPU, 1, 1, 1, 1001, 0)
+DEF_TEST(INT64, GPU, 1, 2, 1, 1001, 0)
+
+// Failure tests
+DEF_TEST(FLOAT, GPU, 1, 8, 1, 9408, 2)
+DEF_TEST(FLOAT, GPU, 1, 8, 1, 9408, 5)
+#endif
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/common_runtime/ring_reducer.cc b/tensorflow/core/common_runtime/ring_reducer.cc
index 8ed2fc2..3328804 100644
--- a/tensorflow/core/common_runtime/ring_reducer.cc
+++ b/tensorflow/core/common_runtime/ring_reducer.cc

@@ -39,212 +39,15 @@
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/types.h"
 
-// Set true for greater intelligibility of debug mode log messages.
-#define READABLE_KEYS false
-// RingReduce algorithm exchanges chunks of tensor between devices.  The chunk
-// size depends on the number of subdivisions specified in the algorithm.  If
-// the user does not specify the number of subdivisions, we infer the number
-// dynamically so that the resulting chunk size does not exceed
-// kMaxChunkSizeBytes, empirically set at 4 MiB.
-constexpr size_t kMaxChunkSizeBytes = (4 * 1024 * 1024);
-// kMaxSubdivsPerDev is used to give an upper bound on the number of
-// subdivisions dynamically generated.  A reasonable value would be a small
-// multiple of the number of NICs adjacent to each device.
-constexpr int kMaxSubdivsPerDevice = 2;
-
 namespace tensorflow {
-namespace {
-// Each CollectiveOp implementation is free to define its own
-// BufRendezvous key format.  This function produces the key used by
-// RingReducer.
-string RingReduceBufKey(const string& exec_key, int pass, int section,
-                        int source_rank) {
-  if (READABLE_KEYS) {
-    return strings::StrCat("rred(", exec_key, "):pass(", pass, "):section(",
-                           section, "):srcrank(", source_rank, ")");
-  } else {
-    // TODO(b/78352018): Try out some kind of denser encoding, e.g. 128 bit
-    // hash.
-    return strings::StrCat(exec_key, ":", pass, ":", section, ":", source_rank);
-  }
-}
-
-}  // namespace
-
-void RingReducer::PCQueue::Enqueue(RingField* rf) {
-  mutex_lock l(pcq_mu_);
-  deque_.push_back(rf);
-  if (waiter_count_ > 0) {
-    cv_.notify_one();
-  }
-}
-
-RingReducer::RingField* RingReducer::PCQueue::Dequeue() {
-  mutex_lock l(pcq_mu_);
-  if (deque_.empty()) {
-    ++waiter_count_;
-    while (deque_.empty()) {
-      cv_.wait(l);
-    }
-    --waiter_count_;
-  }
-  RingField* rf = deque_.front();
-  deque_.pop_front();
-  return rf;
-}
-
-RingReducer::RingReducer()
-    : col_ctx_(nullptr),
-      col_params_(nullptr),
-      done_(nullptr),
-      group_size_(-1),
-      num_subdivs_(-1) {}
 
 RingReducer::~RingReducer() { group_size_tensor_ready_.WaitForNotification(); }
 
-Status GenerateSubdivsInCollectiveParams(CollectiveParams* col_params) {
-  if (col_params->instance.shape.num_elements() == 0) {
-    return errors::Internal("shape in CollectiveParams should be non-empty");
-  }
-  const int kAvgDevPerTask =
-      col_params->group.group_size / col_params->group.num_tasks;
-  const int kMaxNumSubdivs = kMaxSubdivsPerDevice * kAvgDevPerTask;
-  if (kMaxNumSubdivs <= 0) {
-    return errors::Internal("Unexpected kMaxNumSubdivs ", kMaxNumSubdivs,
-                            " in RingReducer");
-  }
-  // NOTE(ayushd): If no subdiv_offsets have been specified, dynamically add
-  // as many offsets as needed so that the size of tensor chunks <=
-  // kMaxChunkSizeBytes.  Empirically, chunks that are too small or too large
-  // lead to worse performance.
-  int num_subdivs = 0;
-  const size_t tensor_size = col_params->instance.shape.num_elements() *
-                             DataTypeSize(col_params->instance.data_type);
-  size_t chunk_size;
-  do {
-    ++num_subdivs;
-    int num_chunks = col_params->group.group_size * num_subdivs;
-    chunk_size = tensor_size / num_chunks;
-    VLOG(2) << "num_subdivs " << num_subdivs << " num_chunks " << num_chunks
-            << " chunk_size " << chunk_size;
-  } while (chunk_size > kMaxChunkSizeBytes && num_subdivs < kMaxNumSubdivs);
-  if (num_subdivs <= 0) {
-    return errors::Internal("Unexpected num_subdivs ", num_subdivs,
-                            " in RingReducer");
-  }
-
-  int subdiv_stride = kAvgDevPerTask / num_subdivs;
-  if (subdiv_stride == 0) subdiv_stride = 1;
-  col_params->instance.impl_details.subdiv_offsets.reserve(num_subdivs);
-  for (int sdi = 0; sdi < num_subdivs; ++sdi) {
-    int subdiv_offset = subdiv_stride * sdi;
-    if (sdi % 2 == 1) subdiv_offset *= -1;
-    col_params->instance.impl_details.subdiv_offsets.push_back(subdiv_offset);
-  }
-
-  if (VLOG_IS_ON(2)) {
-    string subdiv_buf;
-    for (const int subdiv_offset :
-         col_params->instance.impl_details.subdiv_offsets) {
-      strings::StrAppend(&subdiv_buf, " ", subdiv_offset);
-    }
-    VLOG(2) << "Dynamically generated " << num_subdivs
-            << " subdiv_offsets:" << subdiv_buf << " tensor_size "
-            << tensor_size << " chunk_size " << chunk_size;
-  }
-
-  return Status::OK();
-}
-
 Status RingReducer::InitializeCollectiveParams(CollectiveParams* col_params) {
   // TODO(b/113171733): change CHECKs to return errors.
   CHECK_EQ(col_params->instance.type, REDUCTION_COLLECTIVE);
   CHECK_EQ(col_params->instance.impl_details.collective_name, "RingReduce");
-  const string& device_name =
-      col_params->instance.device_names[col_params->default_rank];
-  // Each subdiv permutation is a ring formed by rotating each
-  // single-task subsequence of devices by an offset.  This makes most
-  // sense when each task has the same number of devices but we can't
-  // depend on that being the case so we'll compute something that
-  // works in any case.
-
-  // Start by counting the devices in each task.
-  // Precondition: device_names must be sorted so that all devices in
-  // the same task are adjacent.
-  VLOG(2) << "Sorted task names: "
-          << str_util::Join(col_params->instance.task_names, ", ");
-  std::vector<int> dev_per_task;
-  const string* prior_task_name = &col_params->instance.task_names[0];
-  int dev_count = 1;
-  for (int di = 1; di < col_params->group.group_size; ++di) {
-    if (col_params->instance.task_names[di] != *prior_task_name) {
-      dev_per_task.push_back(dev_count);
-      dev_count = 1;
-      prior_task_name = &col_params->instance.task_names[di];
-    } else {
-      ++dev_count;
-    }
-  }
-  dev_per_task.push_back(dev_count);
-  CHECK_EQ(col_params->group.num_tasks, dev_per_task.size());
-
-  if (col_params->instance.impl_details.subdiv_offsets.empty()) {
-    TF_RETURN_IF_ERROR(GenerateSubdivsInCollectiveParams(col_params));
-  }
-
-  // Generate a ring permutation for requested offset.
-  VLOG(2) << "Setting up perms for col_params " << col_params
-          << " subdiv_permutations "
-          << &col_params->instance.impl_details.subdiv_permutations;
-  col_params->instance.impl_details.subdiv_permutations.resize(
-      col_params->instance.impl_details.subdiv_offsets.size());
-  col_params->subdiv_rank.resize(
-      col_params->instance.impl_details.subdiv_offsets.size(), -1);
-  for (int sdi = 0;
-       sdi < col_params->instance.impl_details.subdiv_offsets.size(); ++sdi) {
-    std::vector<int>& perm =
-        col_params->instance.impl_details.subdiv_permutations[sdi];
-    CHECK_EQ(perm.size(), 0);
-    int offset = col_params->instance.impl_details.subdiv_offsets[sdi];
-    // A negative subdivision offset is interpreted as follows:
-    //  1. Reverse the local device ordering.
-    //  2. Begin the subdivision at abs(offset) in the reversed ordering.
-    bool reverse = false;
-    if (offset < 0) {
-      offset = abs(offset);
-      reverse = true;
-    }
-    int prior_dev_count = 0;  // sum over prior worker device counts
-    for (int ti = 0; ti < col_params->group.num_tasks; ++ti) {
-      for (int di = 0; di < dev_per_task[ti]; ++di) {
-        int di_offset = (di + offset) % dev_per_task[ti];
-        int offset_di =
-            reverse ? (dev_per_task[ti] - (di_offset + 1)) : di_offset;
-        // Device index in global subdivision permutation.
-        int permuted_di = prior_dev_count + offset_di;
-        int rank = static_cast<int>(perm.size());
-        perm.push_back(permuted_di);
-        if (col_params->instance.device_names[permuted_di] == device_name) {
-          CHECK_EQ(permuted_di, col_params->default_rank);
-          col_params->subdiv_rank[sdi] = rank;
-        }
-      }
-      prior_dev_count += dev_per_task[ti];
-    }
-    CHECK_EQ(col_params->group.group_size, perm.size());
-  }
-
-  VLOG(2) << collective_util::SubdivPermDebugString(*col_params);
-  return Status::OK();
-}
-
-Status RingReducer::InitializeCollectiveContext(CollectiveContext* col_ctx) {
-  CHECK(col_ctx->dev_mgr);
-  col_ctx_ = col_ctx;
-  col_params_ = &col_ctx->col_params;
-  return collective_util::InitializeDeviceAndLocality(
-      col_ctx->dev_mgr, col_ctx->device_name, &col_ctx->device,
-      &col_ctx->device_locality);
+  return RingAlg::InitializeCollectiveParams(col_params);
 }
 
 void RingReducer::Run(StatusCallback done) {
@@ -303,25 +106,6 @@
   ContinueAfterInputCopy();
 }
 
-string RingReducer::TensorDebugString(const Tensor& tensor) {
-  const DeviceBase::GpuDeviceInfo* gpu_device_info =
-      col_ctx_->op_ctx->device()->tensorflow_gpu_device_info();
-  if (gpu_device_info) {
-    Tensor cpu_tensor(tensor.dtype(), tensor.shape());
-    Notification note;
-    gpu_device_info->default_context->CopyDeviceTensorToCPU(
-        &tensor, "" /*tensor_name*/, col_ctx_->device, &cpu_tensor,
-        [&note](const Status& s) {
-          CHECK(s.ok());
-          note.Notify();
-        });
-    note.WaitForNotification();
-    return cpu_tensor.SummarizeValue(64);
-  } else {
-    return tensor.SummarizeValue(64);
-  }
-}
-
 // Note that this function is blocking and must not run in any thread
 // which cannot be blocked.
 void RingReducer::ContinueAfterInputCopy() {
@@ -358,170 +142,16 @@
   Finish(RunAsyncParts());
 }
 
-void RingReducer::StartAbort(const Status& s) {
-  // In abort mode we stop issuing additional ProvideBuf
-  // and ConsumeBuf calls, but we need to wait for all of the
-  // outstanding callbacks to be invoked before quitting.
-  bool abort_started = false;
-  {
-    mutex_lock l(status_mu_);
-    if (status_.ok()) {
-      LOG(ERROR) << "Aborting RingReduce with " << s;
-      abort_started = true;
-      status_.Update(s);
-    }
+void RingReducer::InitRingField(RingField* rf, int chunk_idx, int subdiv_idx,
+                                int field_idx) {
+  RingAlg::InitRingField(rf, chunk_idx, subdiv_idx, field_idx);
+  if (rf->do_recv) {
+    rf->tmp_chunk = ca_->TempChunk(rf->sc_idx);
   }
-  // If this is the initial entry to abort mode then invoke StartAbort
-  // on the CollectiveExecutor that invoked us.  That should start
-  // cancellation on all of the outstanding CollectiveRemoteAccess
-  // actions.
-  if (abort_started) {
-    col_ctx_->col_exec->StartAbort(s);
-  }
-}
-
-void RingReducer::Finish(bool ok) {
-  if (ok) {
-    // Recover the output from the adaptor.
-    ca_->ConsumeFinalValue(col_ctx_->output);
-  }
-  Status s;
-  {
-    mutex_lock l(status_mu_);
-    s = status_;
-  }
-  rfv_.clear();  // Give up Refs on output tensor.
-  done_(s);
 }
 
 // At the beginning of the algorithm initialize a RingField struct for
 // every independent field of the tensor.
-void RingReducer::InitRingField(RingField* rf, int chunk_idx, int subdiv_idx,
-                                int field_idx) {
-  // Note on field indexing: There are group_size_ devices in the
-  // instance, implying the same number of chunks per tensor, where a
-  // chunk is the unit of data transferred in a time step.  However, if
-  // a device can simultaneously send data by 2 or more independent
-  // channels we can speed up the transfer by subdividing chunks and
-  // processing multiple subdivisions at once.  So the actual number
-  // of RingFields is group_size_ * num_subdivs_.
-  DCHECK_EQ(field_idx, (chunk_idx * num_subdivs_) + subdiv_idx);
-  rf->chunk_idx = chunk_idx;
-  rf->subdiv_idx = subdiv_idx;
-  rf->sc_idx = field_idx;
-  rf->rank = col_params_->subdiv_rank[subdiv_idx];
-  rf->second_pass = false;
-  rf->action = RF_INIT;
-  // Recv from the device with preceding rank within the subdivision.
-  int recv_from_rank = (rf->rank + (group_size_ - 1)) % group_size_;
-  int send_to_rank = (rf->rank + 1) % group_size_;
-  rf->recv_dev_idx = col_params_->instance.impl_details
-                         .subdiv_permutations[subdiv_idx][recv_from_rank];
-  int send_dev_idx = col_params_->instance.impl_details
-                         .subdiv_permutations[subdiv_idx][send_to_rank];
-  rf->recv_is_remote = !col_params_->task.is_local[rf->recv_dev_idx];
-  rf->send_is_remote = !col_params_->task.is_local[send_dev_idx];
-  if (ca_->ChunkBytes(rf->sc_idx) > 0) {
-    // In pass 0 we skip Recv when rank = chunk_idx
-    rf->do_recv = (rf->chunk_idx != rf->rank);
-    // In pass 0 we skip Send when rank = chunk_idx-1
-    rf->do_send =
-        (rf->rank != ((rf->chunk_idx + (group_size_ - 1)) % group_size_));
-  }
-  rf->is_final =
-      (rf->rank == ((rf->chunk_idx + (group_size_ - 1)) % group_size_));
-  if (rf->do_send || rf->do_recv) {
-    rf->chunk = ca_->ChunkAlias(rf->sc_idx);
-    CHECK(rf->chunk.IsAligned()) << rf->DebugString();
-  }
-  if (rf->do_recv) {
-    rf->tmp_chunk = ca_->TempChunk(rf->sc_idx);
-    CHECK(rf->tmp_chunk.IsAligned()) << rf->DebugString();
-  }
-  VLOG(2) << this << " InitRingField " << rf->DebugString() << " chunk "
-          << ca_->TBounds(rf->chunk);
-}
-
-// When a RingField transitions from first to second recompute the
-// do_send and do_recv values.
-void RingReducer::AdvanceToSecondPass(RingField* rf) {
-  VLOG(3) << "IncrRingField old value " << rf->DebugString();
-  CHECK(!rf->second_pass);
-  rf->second_pass = true;
-  rf->action = RF_INIT;
-  if (ca_->ChunkBytes(rf->sc_idx) > 0) {
-    // In pass 1 the send/no-send boundary moves down 1 place.
-    rf->do_recv =
-        (rf->rank != ((rf->chunk_idx + (group_size_ - 1)) % group_size_));
-    rf->do_send =
-        (rf->rank != ((rf->chunk_idx + (group_size_ - 2)) % group_size_));
-  }
-  rf->is_final =
-      (rf->rank == ((rf->chunk_idx + (group_size_ - 2)) % group_size_));
-  VLOG(3) << "IncrRingField new value " << rf->DebugString();
-}
-
-string RingReducer::RingField::DebugString() const {
-  string rv = strings::StrCat("RingField rank=", rank, " chunk_idx=", chunk_idx,
-                              " subdiv=", subdiv_idx, " sc_idx=", sc_idx,
-                              " action=", action);
-  strings::StrAppend(&rv, " pass=", second_pass);
-  strings::StrAppend(&rv, " do_send=", do_send, " do_recv=", do_recv,
-                     " is_final=", is_final, " recv_is_remote=", recv_is_remote,
-                     " recv_dev_idx=", recv_dev_idx, " sc_idx=", sc_idx);
-  return rv;
-}
-
-void RingReducer::DispatchSend(RingField* rf, const StatusCallback& done) {
-  CHECK(rf->do_send);
-  string send_buf_key = RingReduceBufKey(col_ctx_->exec_key, rf->second_pass,
-                                         rf->sc_idx, rf->rank);
-  VLOG(3) << "DispatchSend rank=" << col_params_->default_rank << " send key "
-          << send_buf_key << " chunk " << ca_->TBounds(rf->chunk) << " sc_idx "
-          << rf->sc_idx;
-  int send_to_rank = (rf->rank + 1) % group_size_;
-  int send_to_dev_idx = col_params_->instance.impl_details
-                            .subdiv_permutations[rf->subdiv_idx][send_to_rank];
-  col_ctx_->col_exec->PostToPeer(
-      col_params_->instance.device_names[send_to_dev_idx],
-      col_params_->instance.task_names[send_to_dev_idx], send_buf_key,
-      col_ctx_->device, col_ctx_->op_ctx->op_device_context(),
-      col_ctx_->op_ctx->output_alloc_attr(0), &rf->chunk,
-      col_ctx_->device_locality, done);
-}
-
-void RingReducer::DispatchRecv(RingField* rf, const StatusCallback& done) {
-  CHECK(rf->do_recv);
-  string recv_buf_key =
-      RingReduceBufKey(col_ctx_->exec_key, rf->second_pass, rf->sc_idx,
-                       (rf->rank + (group_size_ - 1)) % group_size_);
-  VLOG(3) << "DispatchRecv rank=" << col_params_->default_rank << " recv key "
-          << recv_buf_key << " chunk " << ca_->TBounds(rf->chunk) << " into "
-          << ((col_params_->merge_op != nullptr) ? "tmp_chunk" : "chunk");
-  Tensor* dst_tensor = (!rf->second_pass && (col_params_->merge_op != nullptr))
-                           ? &rf->tmp_chunk
-                           : &rf->chunk;
-  col_ctx_->col_exec->RecvFromPeer(
-      col_params_->instance.device_names[rf->recv_dev_idx],
-      col_params_->instance.task_names[rf->recv_dev_idx],
-      col_params_->task.is_local[rf->recv_dev_idx], recv_buf_key,
-      col_ctx_->device, col_ctx_->op_ctx->op_device_context(),
-      col_ctx_->op_ctx->output_alloc_attr(0), dst_tensor,
-      col_ctx_->device_locality, rf->subdiv_idx, done);
-}
-
-string RingReducer::FieldState() {
-  string s = strings::StrCat(
-      "RingReducer ", strings::Hex(reinterpret_cast<uint64>(this)), " exec ",
-      col_ctx_->exec_key, " step_id=", col_ctx_->step_id, " state of all ",
-      rfv_.size(), " fields:");
-  for (int i = 0; i < rfv_.size(); ++i) {
-    s.append("\n");
-    s.append(rfv_[i].DebugString());
-  }
-  return s;
-}
-
 bool RingReducer::RunAsyncParts() {
   // This function orchestrates RingReduce actions on behalf of a
   // single device. It is entered by a blockable thread that

diff --git a/tensorflow/core/common_runtime/ring_reducer.h b/tensorflow/core/common_runtime/ring_reducer.h
index a5aa8fa..a681fab 100644
--- a/tensorflow/core/common_runtime/ring_reducer.h
+++ b/tensorflow/core/common_runtime/ring_reducer.h

@@ -21,108 +21,36 @@
 #include <vector>
 
 #include "tensorflow/core/common_runtime/base_collective_executor.h"
+#include "tensorflow/core/common_runtime/ring_alg.h"
 #include "tensorflow/core/framework/collective.h"
 
 namespace tensorflow {
 class Device;
 
 // Ring-algorithm implementation of collective all-reduce.
-class RingReducer : public CollectiveImplementationInterface {
+class RingReducer : public RingAlg {
  public:
-  RingReducer();
+  RingReducer() : RingAlg(REDUCTION_COLLECTIVE, "Reduce") {}
   ~RingReducer() override;
 
-  // Establishes the requested number of subdivision permutations based on the
-  // ring order implicit in the device order.
-  Status InitializeCollectiveParams(CollectiveParams* col_params) override;
-
-  // Initializes members of CollectiveContext not yet initialized, i.e. device
-  // and device_locality.  Also saves the CollectiveContext in this object.
-  Status InitializeCollectiveContext(CollectiveContext* col_ctx) override;
-
-  // No-op for ring reducer.
-  Status InitializeInstanceBeforeGroupDiscovery(CollectiveParams*) override {
-    return Status::OK();
-  }
-
   // Begins async execution of the ring reduce algorithm.
   // Must be called in a blockable thread.
   // TODO(b/80529858): remove the previous warning when we have a dedicated
   // collective threadpool.
   void Run(StatusCallback done) override;
 
+  Status InitializeCollectiveParams(CollectiveParams* col_params) override;
+
+ protected:
+  void InitRingField(RingField* rf, int chunk_idx, int subdiv_idx,
+                     int field_idx) override;
+
  private:
-  // Called when a bad status is received that implies we should terminate
-  // execution and return a bad status.
-  void StartAbort(const Status& s);
   void ContinueAfterInputCopy();
-  void Finish(bool ok);
   bool RunAsyncParts();
 
-  // Current status of a RingField
-  enum RingFieldAction {
-    RF_INIT = 0,    // Just initialized for a pass
-    RF_RECV,        // Recv pending
-    RF_REDUCE,      // Reduce pending
-    RF_FINALIZE,    // FinalOp pending
-    RF_SEND_READY,  // Ready to send
-    RF_SEND,        // Send pending
-    RF_DONE,        // No more work
-  };
-
-  // Tracks progress of actions on a single subfield of the entire tensor.
-  struct RingField {
-    int16 chunk_idx;     // major division index
-    int16 subdiv_idx;    // minor division index
-    int16 sc_idx;        // subchunk index
-    int16 rank;          // rank within subdiv permutation
-    int16 recv_dev_idx;  // dev from which value should be recv'd
-    RingFieldAction action;
-    bool second_pass;
-    bool recv_is_remote = false;
-    bool send_is_remote = false;
-    bool do_send = false;   // is the value sent in this pass?
-    bool do_recv = false;   // is the value recv'd in this pass?
-    bool is_final = false;  // is the last field in the pass for this rank
-    Tensor chunk;           // alias to field values
-    Tensor tmp_chunk;
-    Status status;
-    string DebugString() const;
-  };
-  void AdvanceToSecondPass(RingField* rf);
-  void InitRingField(RingField* rf, int chunk_idx, int subdiv_idx,
-                     int field_idx);
-  void DispatchSend(RingField* rf, const StatusCallback& done);
-  void DispatchRecv(RingField* rf, const StatusCallback& done);
-
-  // For constructing log messages for debugging.
-  string FieldState();
-  string TensorDebugString(const Tensor& tensor);
-
-  // Producer/Consumer Queue of RingField structs.
-  class PCQueue {
-   public:
-    void Enqueue(RingField* rf);
-    RingField* Dequeue();
-
-   private:
-    mutex pcq_mu_;
-    condition_variable cv_;
-    int waiter_count_ GUARDED_BY(pcq_mu_) = 0;
-    std::deque<RingField*> deque_ GUARDED_BY(pcq_mu_);
-  };
-
-  CollectiveContext* col_ctx_;          // Not owned
-  const CollectiveParams* col_params_;  // Not owned
-  StatusCallback done_;
-  int group_size_;
-  int num_subdivs_;
   Tensor group_size_tensor_;
   Notification group_size_tensor_ready_;
-  std::unique_ptr<CollectiveAdapter> ca_;
-  mutex status_mu_;
-  Status status_ GUARDED_BY(status_mu_);
-  std::vector<RingField> rfv_;
 
   friend class RingReducerTest;
 };

diff --git a/tensorflow/core/common_runtime/ring_reducer_test.cc b/tensorflow/core/common_runtime/ring_reducer_test.cc
index 7feb29a..7f18cdb 100644
--- a/tensorflow/core/common_runtime/ring_reducer_test.cc
+++ b/tensorflow/core/common_runtime/ring_reducer_test.cc

@@ -335,19 +335,20 @@
           note.WaitForNotification();
         }
 
+        auto alias = actual.template unaligned_flat<T>();
         for (int i = 0; i < tensor_len; ++i) {
           switch (dtype) {
             case DT_FLOAT:
-              EXPECT_FLOAT_EQ(expected[i], actual.template flat<T>()(i))
+              EXPECT_FLOAT_EQ(expected[i], alias(i))
                   << "Mismatch at device " << di << " index " << i;
               break;
             case DT_DOUBLE:
-              EXPECT_DOUBLE_EQ(expected[i], actual.template flat<T>()(i))
+              EXPECT_DOUBLE_EQ(expected[i], alias(i))
                   << "Mismatch at device " << di << " index " << i;
               break;
             case DT_INT32:
             case DT_INT64:
-              EXPECT_EQ(expected[i], actual.template flat<T>()(i))
+              EXPECT_EQ(expected[i], alias(i))
                   << "Mismatch at device " << di << " index " << i;
               break;
             default:

diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc
index 1bdccf5..318cfec 100644
--- a/tensorflow/core/common_runtime/step_stats_collector.cc
+++ b/tensorflow/core/common_runtime/step_stats_collector.cc

@@ -176,9 +176,10 @@
   memory->set_peak_bytes(std::get<1>(sizes));
   memory->set_live_bytes(std::get<2>(sizes));
 
-  AllocatorStats stats;
-  allocator->GetStats(&stats);
-  memory->set_allocator_bytes_in_use(stats.bytes_in_use);
+  absl::optional<AllocatorStats> stats = allocator->GetStats();
+  if (stats) {
+    memory->set_allocator_bytes_in_use(stats->bytes_in_use);
+  }
   allocations_.push_back(std::make_pair(memory, tracking_allocator));
 }
 

diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD
index da88f9c..9196d91 100644
--- a/tensorflow/core/distributed_runtime/BUILD
+++ b/tensorflow/core/distributed_runtime/BUILD

@@ -17,7 +17,6 @@
 
 load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_cuda_library")
 load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
-load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_tests")
 load("//tensorflow:tensorflow.bzl", "tf_copts")
 
 # For platform specific build config
@@ -298,6 +297,7 @@
     deps = [
         ":call_options",
         ":message_wrappers",
+        ":request_id",
         "//tensorflow/core:lib",
         "//tensorflow/core:master_proto_cc",
     ],
@@ -311,6 +311,7 @@
         ":call_options",
         ":master_env",
         ":master_session",
+        ":recent_request_ids",
         ":remote_device",
         ":worker_cache",
         ":worker_interface",
@@ -425,7 +426,6 @@
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:metrics",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:worker_proto_cc",
         "//tensorflow/core/debug",
@@ -765,6 +765,7 @@
     srcs = ["recent_request_ids.cc"],
     hdrs = ["recent_request_ids.h"],
     deps = [
+        ":message_wrappers",
         "//tensorflow/core:lib",
         "//tensorflow/core:worker_proto_cc",
     ],

diff --git a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc
index de6e4b4..a642313 100644
--- a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc
+++ b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc

@@ -293,8 +293,11 @@
                                      const Rendezvous::Args& recv_args,
                                      DoneCallback done) {
   VLOG(1) << "RemoteRendezvous Recv " << this << " " << parsed.FullKey();
-  CHECK(is_initialized()) << "RecvAsync called when uninitialized.";
   Status s = ValidateDevices(parsed, false /*!is_src*/);
+  if (s.ok() && !is_initialized()) {
+    s.Update(errors::Internal(
+        "RecvAsync called when uninitialized (key:", parsed.FullKey(), ")."));
+  }
   if (!s.ok()) {
     done(s, Args(), recv_args, Tensor(), false);
     return;

diff --git a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc
index df735b1..443759a 100644
--- a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc
+++ b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc

@@ -86,11 +86,17 @@
       worker_cache_(worker_cache),
       group_leader_(task_name == config.experimental().collective_group_leader()
                         ? ""
-                        : config.experimental().collective_group_leader()) {}
+                        : config.experimental().collective_group_leader()) {
+  VLOG(1) << "CompleteParamResolverDistributed ctor task={" << task_name
+          << "} config.collective_group_leader={"
+          << config.experimental().collective_group_leader() << "}";
+}
 
 void CollectiveParamResolverDistributed::CompleteParamsAsync(
     const string& device, CollectiveParams* cp, CancellationManager* cancel_mgr,
     const StatusCallback& done) {
+  VLOG(1) << "CompleteParams distributed " << device << " for " << cp << ": "
+          << cp->ToString();
   CompleteGroupDistributed(device, cp, cancel_mgr,
                            [this, device, cp, cancel_mgr, done](
                                const Status& s, const GroupRec* gr) {

diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index 1065f02..144113a 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc

@@ -356,6 +356,12 @@
   Rendezvous* rendezvous = worker_env_->rendezvous_mgr->Find(step_id);
   Status s = RecvOutputsFromRendezvous(rendezvous, out, Rendezvous::Args());
   rendezvous->Unref();
+  if (!s.ok()) {
+    // Failing to fetch the outputs should not be possible, so rewrite the error
+    // status to an INTERNAL error.
+    s = errors::Internal("Failed to fetch outputs for step ", step_id,
+                         ". (Original error message: ", s.ToString(), ")");
+  }
   return s;
 }
 

diff --git a/tensorflow/core/distributed_runtime/master.cc b/tensorflow/core/distributed_runtime/master.cc
index 269f620..fc8d287 100644
--- a/tensorflow/core/distributed_runtime/master.cc
+++ b/tensorflow/core/distributed_runtime/master.cc

@@ -65,7 +65,8 @@
     : env_(env),
       last_1000_steps_(1000),
       step_count_(0),
-      session_gc_seconds_(session_gc_seconds) {
+      session_gc_seconds_(session_gc_seconds),
+      recent_request_ids_(10000) {
   // Right now, a master service must be co-located with a device.
   // Otherwise, fetches do not work.
   CHECK(!env->local_devices.empty());
@@ -510,6 +511,12 @@
 
 void Master::PartialRunSetup(const PartialRunSetupRequest* req,
                              PartialRunSetupResponse* resp, MyClosure done) {
+  Status s = recent_request_ids_.TrackUnique(req->request_id(),
+                                             "PartialRunSetup (Master)", *req);
+  if (!s.ok()) {
+    done(s);
+    return;
+  }
   auto session = FindMasterSession(req->session_handle());
   if (session == nullptr) {
     done(errors::Aborted("Session ", req->session_handle(), " is not found."));
@@ -525,6 +532,12 @@
 
 void Master::RunStep(CallOptions* opts, const RunStepRequestWrapper* req,
                      MutableRunStepResponseWrapper* resp, MyClosure done) {
+  Status s = recent_request_ids_.TrackUnique(req->request_id(),
+                                             "RunStep (Master)", req);
+  if (!s.ok()) {
+    done(s);
+    return;
+  }
   auto start_time = env_->env->NowMicros();
   auto session = FindMasterSession(req->session_handle());
   if (session == nullptr) {
@@ -664,6 +677,12 @@
 
 void Master::MakeCallable(const MakeCallableRequest* req,
                           MakeCallableResponse* resp, MyClosure done) {
+  Status s = recent_request_ids_.TrackUnique(req->request_id(),
+                                             "MakeCallable (Master)", *req);
+  if (!s.ok()) {
+    done(s);
+    return;
+  }
   auto session = FindMasterSession(req->session_handle());
   if (session == nullptr) {
     done(errors::Aborted("Session ", req->session_handle(), " is not found."));
@@ -681,6 +700,12 @@
 
 void Master::RunCallable(CallOptions* opts, const RunCallableRequest* req,
                          RunCallableResponse* resp, MyClosure done) {
+  Status s = recent_request_ids_.TrackUnique(req->request_id(),
+                                             "RunCallable (Master)", *req);
+  if (!s.ok()) {
+    done(s);
+    return;
+  }
   auto session = FindMasterSession(req->session_handle());
   if (session == nullptr) {
     done(errors::Aborted("Session ", req->session_handle(), " is not found."));

diff --git a/tensorflow/core/distributed_runtime/master.h b/tensorflow/core/distributed_runtime/master.h
index dbb337f..0524582 100644
--- a/tensorflow/core/distributed_runtime/master.h
+++ b/tensorflow/core/distributed_runtime/master.h

@@ -22,6 +22,7 @@
 #include "tensorflow/core/distributed_runtime/call_options.h"
 #include "tensorflow/core/distributed_runtime/master_env.h"
 #include "tensorflow/core/distributed_runtime/master_session.h"
+#include "tensorflow/core/distributed_runtime/recent_request_ids.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/macros.h"
@@ -95,6 +96,9 @@
   // closed automatically.
   const double session_gc_seconds_;
 
+  // Used to track ids for incoming requests so we can detect duplicates.
+  RecentRequestIds recent_request_ids_;
+
   // Call CleanupAll on all workers.
   void CleanupWorkers(const ResetRequest& reset);
 

diff --git a/tensorflow/core/distributed_runtime/master_interface.h b/tensorflow/core/distributed_runtime/master_interface.h
index a8ae3cb..cde47fb 100644
--- a/tensorflow/core/distributed_runtime/master_interface.h
+++ b/tensorflow/core/distributed_runtime/master_interface.h

@@ -18,6 +18,7 @@
 
 #include "tensorflow/core/distributed_runtime/call_options.h"
 #include "tensorflow/core/distributed_runtime/message_wrappers.h"
+#include "tensorflow/core/distributed_runtime/request_id.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/protobuf/master.pb.h"
@@ -66,7 +67,9 @@
   // The message returned from this method must only be used in a
   // `RunStep()` call on the same `MasterInterface` instance.
   virtual MutableRunStepRequestWrapper* CreateRunStepRequest() {
-    return new MutableProtoRunStepRequest;
+    MutableProtoRunStepRequest* ret = new MutableProtoRunStepRequest;
+    ret->request_.set_request_id(GetUniqueRequestId());
+    return ret;
   }
 
   // Returns a response object for use in calls to

diff --git a/tensorflow/core/distributed_runtime/message_wrappers.cc b/tensorflow/core/distributed_runtime/message_wrappers.cc
index 40bf564..c9bc558 100644
--- a/tensorflow/core/distributed_runtime/message_wrappers.cc
+++ b/tensorflow/core/distributed_runtime/message_wrappers.cc

@@ -97,6 +97,10 @@
   return store_errors_in_response_body_;
 }
 
+int64 InMemoryRunStepRequest::request_id() const {
+  return 0;  // no need to track request id for local version.
+}
+
 void InMemoryRunStepRequest::set_store_errors_in_response_body(
     bool store_errors) {
   store_errors_in_response_body_ = store_errors;
@@ -210,6 +214,10 @@
   request_.set_store_errors_in_response_body(store_errors);
 }
 
+int64 MutableProtoRunStepRequest::request_id() const {
+  return request_.request_id();
+}
+
 string MutableProtoRunStepRequest::DebugString() const {
   return request_.DebugString();
 }
@@ -272,6 +280,8 @@
   return request_->store_errors_in_response_body();
 }
 
+int64 ProtoRunStepRequest::request_id() const { return request_->request_id(); }
+
 string ProtoRunStepRequest::DebugString() const {
   return request_->DebugString();
 }

diff --git a/tensorflow/core/distributed_runtime/message_wrappers.h b/tensorflow/core/distributed_runtime/message_wrappers.h
index 474ac0e..2cdbd1b 100644
--- a/tensorflow/core/distributed_runtime/message_wrappers.h
+++ b/tensorflow/core/distributed_runtime/message_wrappers.h

@@ -87,6 +87,8 @@
   // truncate long metadata messages.
   virtual bool store_errors_in_response_body() const = 0;
 
+  virtual int64 request_id() const = 0;
+
   // Returns a human-readable representation of this message for debugging.
   virtual string DebugString() const = 0;
 
@@ -127,6 +129,7 @@
   string DebugString() const override;
   const RunStepRequest& ToProto() const override;
   bool store_errors_in_response_body() const override;
+  int64 request_id() const override;
 
   // MutableRunStepRequestWrapper methods.
   void set_session_handle(const string& handle) override;
@@ -177,6 +180,7 @@
   string DebugString() const override;
   const RunStepRequest& ToProto() const override;
   bool store_errors_in_response_body() const override;
+  int64 request_id() const override;
 
   // MutableRunStepRequestWrapper methods.
   void set_session_handle(const string& handle) override;
@@ -189,6 +193,7 @@
 
  private:
   RunStepRequest request_;
+  friend class MasterInterface;
 };
 
 // Wrapper for immutable RunStep requests that use a non-owned
@@ -216,6 +221,7 @@
   string DebugString() const override;
   const RunStepRequest& ToProto() const override;
   bool store_errors_in_response_body() const override;
+  int64 request_id() const override;
 
  private:
   const RunStepRequest* const request_;  // Not owned.
@@ -234,7 +240,7 @@
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-// Abstract interface for an immutable RunStepRequest message.
+// Abstract interface for an immutable RunGraphRequest message.
 //
 // This interface is typically used by server-side components in the
 // TensorFlow worker.

diff --git a/tensorflow/core/distributed_runtime/recent_request_ids.cc b/tensorflow/core/distributed_runtime/recent_request_ids.cc
index 4f6866c..2c953e1 100644
--- a/tensorflow/core/distributed_runtime/recent_request_ids.cc
+++ b/tensorflow/core/distributed_runtime/recent_request_ids.cc

@@ -28,12 +28,10 @@
   set_.reserve(num_tracked_request_ids);
 }
 
-Status RecentRequestIds::TrackUnique(int64 request_id,
-                                     const string& method_name,
-                                     const protobuf::Message& request) {
+bool RecentRequestIds::Insert(int64 request_id) {
   if (request_id == 0) {
     // For backwards compatibility, allow all requests with request_id 0.
-    return Status::OK();
+    return true;
   }
 
   mutex_lock l(mu_);
@@ -43,9 +41,7 @@
     // request_id's age in the circular_buffer_ if it's tracked again. Strict
     // LRU is not useful here because returning this error will close the
     // current Session.
-    return errors::Aborted("The same ", method_name,
-                           " request was received twice. ",
-                           request.ShortDebugString());
+    return false;
   }
 
   // Remove the oldest request_id from the set_. circular_buffer_ is
@@ -54,7 +50,30 @@
   set_.erase(circular_buffer_[next_index_]);
   circular_buffer_[next_index_] = request_id;
   next_index_ = (next_index_ + 1) % circular_buffer_.size();
-  return Status::OK();
+  return true;
+}
+
+Status RecentRequestIds::TrackUnique(int64 request_id,
+                                     const string& method_name,
+                                     const protobuf::Message& request) {
+  if (Insert(request_id)) {
+    return Status::OK();
+  } else {
+    return errors::Aborted("The same ", method_name,
+                           " request was received twice. ",
+                           request.ShortDebugString());
+  }
+}
+Status RecentRequestIds::TrackUnique(int64 request_id,
+                                     const string& method_name,
+                                     const RunStepRequestWrapper* wrapper) {
+  if (Insert(request_id)) {
+    return Status::OK();
+  } else {
+    return errors::Aborted("The same ", method_name,
+                           " request was received twice. ",
+                           wrapper->ToProto().ShortDebugString());
+  }
 }
 
 }  // namespace tensorflow

diff --git a/tensorflow/core/distributed_runtime/recent_request_ids.h b/tensorflow/core/distributed_runtime/recent_request_ids.h
index 11cf937..4094fcb 100644
--- a/tensorflow/core/distributed_runtime/recent_request_ids.h
+++ b/tensorflow/core/distributed_runtime/recent_request_ids.h

@@ -20,6 +20,7 @@
 #include <unordered_set>
 #include <vector>
 
+#include "tensorflow/core/distributed_runtime/message_wrappers.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -58,8 +59,13 @@
   // ShortDebugString are added to returned errors.
   Status TrackUnique(int64 request_id, const string& method_name,
                      const protobuf::Message& request);
+  // Overloaded versions of the above function for wrapped protos.
+  Status TrackUnique(int64 request_id, const string& method_name,
+                     const RunStepRequestWrapper* wrapper);
 
  private:
+  bool Insert(int64 request_id);
+
   mutex mu_;
   // next_index_ indexes into circular_buffer_, and points to the next storage
   // space to use. When the buffer is full, next_index_ points at the oldest

diff --git a/tensorflow/core/distributed_runtime/rpc/BUILD b/tensorflow/core/distributed_runtime/rpc/BUILD
index 6079634..dd22e74 100644
--- a/tensorflow/core/distributed_runtime/rpc/BUILD
+++ b/tensorflow/core/distributed_runtime/rpc/BUILD

@@ -46,6 +46,7 @@
         "//tensorflow/core:lib",
         # Required to be able to overload TensorResponse parsing.
         "//tensorflow/core/distributed_runtime:tensor_coding",
+        "//tensorflow/core:lib_internal",
     ],
 )
 
@@ -166,7 +167,6 @@
     deps = [
         ":grpc_util",
         "//tensorflow/core:lib",
-        "@com_google_absl//absl/time",
     ],
 )
 
@@ -196,7 +196,6 @@
         "//tensorflow/core/distributed_runtime:worker_env",
         "//tensorflow/core/distributed_runtime:worker_session",
         "@com_google_absl//absl/container:flat_hash_map",
-        "@com_google_absl//absl/time",
     ],
 )
 
@@ -408,6 +407,7 @@
         "//tensorflow/core/distributed_runtime:local_master",
         "//tensorflow/core/distributed_runtime:master_interface",
         "//tensorflow/core/distributed_runtime:message_wrappers",
+        "//tensorflow/core/distributed_runtime:request_id",
     ],
     alwayslink = 1,
 )

diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
index b832a21..a845590 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc

@@ -24,6 +24,7 @@
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/protobuf/master.pb.h"
 
@@ -43,104 +44,139 @@
   Status CreateSession(CallOptions* call_options,
                        const CreateSessionRequest* request,
                        CreateSessionResponse* response) override {
-    ::grpc::ClientContext ctx;
-    return Call(&ctx, call_options, request, response,
-                &MasterServiceStub::CreateSession);
+    return CallWithRetry(call_options, request, response,
+                         &MasterServiceStub::CreateSession);
   }
 
   Status ExtendSession(CallOptions* call_options,
                        const ExtendSessionRequest* request,
                        ExtendSessionResponse* response) override {
-    ::grpc::ClientContext ctx;
-    return Call(&ctx, call_options, request, response,
-                &MasterServiceStub::ExtendSession);
+    return CallWithRetry(call_options, request, response,
+                         &MasterServiceStub::ExtendSession);
   }
 
   Status PartialRunSetup(CallOptions* call_options,
                          const PartialRunSetupRequest* request,
                          PartialRunSetupResponse* response) override {
-    ::grpc::ClientContext ctx;
-    return Call(&ctx, call_options, request, response,
-                &MasterServiceStub::PartialRunSetup);
+    return CallWithRetry(call_options, request, response,
+                         &MasterServiceStub::PartialRunSetup);
   }
 
   Status RunStep(CallOptions* call_options, RunStepRequestWrapper* request,
                  MutableRunStepResponseWrapper* response) override {
-    ::grpc::ClientContext ctx;
-    auto trace = TraceRpc("RunStep/Client", &ctx);
-    return Call(&ctx, call_options, &request->ToProto(),
-                get_proto_from_wrapper(response), &MasterServiceStub::RunStep);
+    return CallWithRetry(call_options, &request->ToProto(),
+                         get_proto_from_wrapper(response),
+                         &MasterServiceStub::RunStep, "RunStep/Client");
   }
 
   Status CloseSession(CallOptions* call_options,
                       const CloseSessionRequest* request,
                       CloseSessionResponse* response) override {
-    ::grpc::ClientContext ctx;
-    return Call(&ctx, call_options, request, response,
-                &MasterServiceStub::CloseSession);
+    return CallWithRetry(call_options, request, response,
+                         &MasterServiceStub::CloseSession);
   }
 
   Status ListDevices(CallOptions* call_options,
                      const ListDevicesRequest* request,
                      ListDevicesResponse* response) override {
-    ::grpc::ClientContext ctx;
-    return Call(&ctx, call_options, request, response,
-                &MasterServiceStub::ListDevices);
+    return CallWithRetry(call_options, request, response,
+                         &MasterServiceStub::ListDevices);
   }
 
   Status Reset(CallOptions* call_options, const ResetRequest* request,
                ResetResponse* response) override {
-    ::grpc::ClientContext ctx;
-    return Call(&ctx, call_options, request, response,
-                &MasterServiceStub::Reset);
+    return CallWithRetry(call_options, request, response,
+                         &MasterServiceStub::Reset);
   }
 
   Status MakeCallable(CallOptions* call_options,
                       const MakeCallableRequest* request,
                       MakeCallableResponse* response) override {
-    ::grpc::ClientContext ctx;
-    return Call(&ctx, call_options, request, response,
-                &MasterServiceStub::MakeCallable);
+    return CallWithRetry(call_options, request, response,
+                         &MasterServiceStub::MakeCallable);
   }
   Status RunCallable(CallOptions* call_options,
                      const RunCallableRequest* request,
                      RunCallableResponse* response) override {
-    ::grpc::ClientContext ctx;
-    return Call(&ctx, call_options, request, response,
-                &MasterServiceStub::RunCallable);
+    return CallWithRetry(call_options, request, response,
+                         &MasterServiceStub::RunCallable);
   }
   Status ReleaseCallable(CallOptions* call_options,
                          const ReleaseCallableRequest* request,
                          ReleaseCallableResponse* response) override {
-    ::grpc::ClientContext ctx;
-    return Call(&ctx, call_options, request, response,
-                &MasterServiceStub::ReleaseCallable);
+    return CallWithRetry(call_options, request, response,
+                         &MasterServiceStub::ReleaseCallable);
   }
 
  private:
   // Start tracing, attaching a unique ID to both the trace and the RPC.
-  tracing::ScopedActivity TraceRpc(StringPiece name,
-                                   ::grpc::ClientContext* ctx) {
+  tracing::ScopedActivity* NewTraceRpc(StringPiece name,
+                                       ::grpc::ClientContext* ctx) {
     string trace_id = strings::StrCat(tracing::GetUniqueArg());
     ctx->AddMetadata(GrpcIdKey(), trace_id);
-    return tracing::ScopedActivity(name, trace_id);
-  }
-
-  void SetDeadline(::grpc::ClientContext* ctx, int64 time_in_ms) {
-    if (time_in_ms > 0) {
-      ctx->set_deadline(gpr_time_from_millis(time_in_ms, GPR_TIMESPAN));
-    }
+    return new tracing::ScopedActivity(name, trace_id);
   }
 
   template <typename Request, typename Response>
-  Status Call(::grpc::ClientContext* ctx, CallOptions* call_options,
-              const Request* request, Response* response,
-              ::grpc::Status (MasterServiceStub::*pfunc)(::grpc::ClientContext*,
-                                                         const Request&,
-                                                         Response*)) {
-    ctx->set_fail_fast(false);
-    SetDeadline(ctx, call_options->GetTimeout());
-    return FromGrpcStatus((stub_.get()->*pfunc)(ctx, *request, response));
+  Status CallWithRetry(CallOptions* call_options, const Request* request,
+                       Response* response,
+                       ::grpc::Status (MasterServiceStub::*pfunc)(
+                           ::grpc::ClientContext*, const Request&, Response*),
+                       string trace_string = {}) {
+    int64 timeout_in_ms = call_options->GetTimeout();
+    int64 expired_time_micros = Env::Default()->NowMicros();
+    if (timeout_in_ms > 0) {
+      expired_time_micros += (timeout_in_ms / 1000.);
+    }
+    Status s;
+    for (int num_retries = 0;; ++num_retries) {
+      ::grpc::ClientContext ctx;
+      std::unique_ptr<tracing::ScopedActivity> trace;
+      if (!trace_string.empty()) {
+        trace.reset(NewTraceRpc(trace_string, &ctx));
+      }
+      ctx.set_fail_fast(false);
+      if (timeout_in_ms > 0) {
+        // We do not modify the timeout here to match legacy behavior. However,
+        // this could violate the contract of tensorflow::Session. If we retry
+        // an RPC just before the deadline is exceeded, we will still set the
+        // timeout to the original value. This leads to the overall timeout
+        // being double what was expected.
+        // TODO(b/117162170): investigate fixing this behavior for legacy and
+        // gRPC RPC layers.
+        ctx.set_deadline(gpr_time_from_millis(timeout_in_ms, GPR_TIMESPAN));
+      }
+      s = FromGrpcStatus((stub_.get()->*pfunc)(&ctx, *request, response));
+      if (!errors::IsUnavailable(s)) {
+        return s;
+      }
+      // TODO(b/117162170): we may want to make this configurable.
+      constexpr int kMaxRetries = 10;
+      LOG(WARNING) << "RPC failed with status = \"" << s
+                   << "\" and grpc_error_string = \""
+                   << ctx.debug_error_string() << "\", maybe retrying the RPC";
+      if (num_retries >= kMaxRetries) {
+        LOG(WARNING) << "Too many retries, returning last status: " << s;
+        return s;
+      }
+      const int64 now_micros = Env::Default()->NowMicros();
+      const int64 deadline_with_backoff_micros =
+          now_micros + ComputeBackoffMicroseconds(num_retries);
+      // Wait for a short period of time before retrying the RPC.  If our
+      // backoff would put us past the RPC deadline, we truncate it to ensure
+      // our RPC starts before the deadline.
+      const auto backoff_until =
+          (timeout_in_ms <= 0 ||
+           expired_time_micros > deadline_with_backoff_micros)
+              ? deadline_with_backoff_micros
+              : expired_time_micros;
+      Env::Default()->SleepForMicroseconds(backoff_until - now_micros);
+      if (Env::Default()->NowMicros() > expired_time_micros &&
+          timeout_in_ms > 0) {
+        // If timeout_in_ms is set, exit the retry loop on timeout.
+        return errors::DeadlineExceeded(ctx.debug_error_string());
+      }
+    }
   }
 
   std::unique_ptr<MasterServiceStub> stub_;

diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_session.cc b/tensorflow/core/distributed_runtime/rpc/grpc_session.cc
index 32063fe..c14bfd2 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_session.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_session.cc

@@ -21,6 +21,7 @@
 #include "tensorflow/core/distributed_runtime/call_options.h"
 #include "tensorflow/core/distributed_runtime/local_master.h"
 #include "tensorflow/core/distributed_runtime/master_interface.h"
+#include "tensorflow/core/distributed_runtime/request_id.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_channel.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_remote_master.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
@@ -312,6 +313,7 @@
   for (const string& target : target_nodes) {
     req.add_target(target);
   }
+  req.set_request_id(GetUniqueRequestId());
   call_options.SetTimeout(options_.config.operation_timeout_in_ms());
   TF_RETURN_IF_ERROR(master_->PartialRunSetup(&call_options, &req, &resp));
   *handle = resp.partial_run_handle();
@@ -408,6 +410,7 @@
   MakeCallableRequest req;
   TF_RETURN_IF_ERROR(Handle(req.mutable_session_handle()));
   *req.mutable_options() = callable_options;
+  req.set_request_id(GetUniqueRequestId());
   MakeCallableResponse resp;
   CallOptions call_options;
   call_options.SetTimeout(options_.config.operation_timeout_in_ms());
@@ -423,6 +426,7 @@
   RunCallableRequest req;
   TF_RETURN_IF_ERROR(Handle(req.mutable_session_handle()));
   req.set_handle(handle);
+  req.set_request_id(GetUniqueRequestId());
   for (const Tensor& feed : feed_tensors) {
     feed.AsProtoTensorContent(req.mutable_feed()->Add());
   }

diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_util.cc b/tensorflow/core/distributed_runtime/rpc/grpc_util.cc
index e211c33..471e2c1 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_util.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_util.cc

@@ -15,9 +15,61 @@
 
 #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h"
 #include "tensorflow/core/distributed_runtime/tensor_coding.h"
+#include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
 
+namespace {
+
+double GenerateUniformRandomNumber() {
+  return random::New64() * (1.0 / std::numeric_limits<uint64>::max());
+}
+
+double GenerateUniformRandomNumberBetween(double a, double b) {
+  if (a == b) return a;
+  DCHECK_LT(a, b);
+  return a + GenerateUniformRandomNumber() * (b - a);
+}
+
+}  // namespace
+
+int64 ComputeBackoffMicroseconds(int current_retry_attempt, int64 min_delay,
+                                 int64 max_delay) {
+  DCHECK_GE(current_retry_attempt, 0);
+
+  // This function with the constants below is calculating:
+  //
+  // (0.4 * min_delay) + (random[0.6,1.0] * min_delay * 1.3^retries)
+  //
+  // Note that there is an extra truncation that occurs and is documented in
+  // comments below.
+  constexpr double kBackoffBase = 1.3;
+  constexpr double kBackoffRandMult = 0.4;
+
+  // This first term does not vary with current_retry_attempt or a random
+  // number. It exists to ensure the final term is >= min_delay
+  const double first_term = kBackoffRandMult * min_delay;
+
+  // This is calculating min_delay * 1.3^retries
+  double uncapped_second_term = min_delay;
+  while (current_retry_attempt > 0 &&
+         uncapped_second_term < max_delay - first_term) {
+    current_retry_attempt--;
+    uncapped_second_term *= kBackoffBase;
+  }
+  // Note that first_term + uncapped_second_term can exceed max_delay here
+  // because of the final multiply by kBackoffBase.  We fix that problem with
+  // the min() below.
+  double second_term = std::min(uncapped_second_term, max_delay - first_term);
+
+  // This supplies the random jitter to ensure that retried don't cause a
+  // thundering herd problem.
+  second_term *=
+      GenerateUniformRandomNumberBetween(1.0 - kBackoffRandMult, 1.0);
+
+  return std::max(static_cast<int64>(first_term + second_term), min_delay);
+}
+
 ::grpc::Status GrpcMaybeUnparseProto(const protobuf::Message& src,
                                      grpc::ByteBuffer* dst) {
   bool own_buffer;

diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_util.h b/tensorflow/core/distributed_runtime/rpc/grpc_util.h
index 45259aa..976f3e6 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_util.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_util.h

@@ -29,6 +29,15 @@
 
 namespace tensorflow {
 
+// Given the total number of RPC retries attempted, return a randomized
+// amount of time to delay before retrying the request.
+//
+// The average computed backoff increases with the number of RPCs attempted.
+// See implementation for details on the calculations.
+int64 ComputeBackoffMicroseconds(int current_retry_attempt,
+                                 int64 min_delay = 1000,
+                                 int64 max_delay = 10000000);
+
 // Thin wrapper around ::grpc::ProtoBufferReader to give TensorResponse an
 // efficient byte reader from which to decode a RecvTensorResponse.
 class GrpcByteSource : public TensorResponse::Source {

diff --git a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
index 9fb9204..ee561e1 100644
--- a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
+++ b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc

@@ -71,9 +71,12 @@
     req_.set_request_id(GetUniqueRequestId());
   }
 
-  void Reset(WorkerCacheInterface* wc) {
-    wc->ReleaseWorker(src_worker_, wi_);
-    wi_ = nullptr;
+  void Reset() {
+    // The RpcRemoteRendezvous using this object is responsible for calling
+    // ReleaseWorker() before Reset().
+    DCHECK_EQ(static_cast<WorkerInterface*>(nullptr), wi_)
+        << "Leaking WorkerInterface in RpcRecvTensorCall::Reset().";
+
     alloc_attrs_ = AllocatorAttributes();
     dst_device_ = nullptr;
     // We don't clear opts_ and assume that Init will set up the state for
@@ -89,9 +92,8 @@
 
   ~RpcRecvTensorCall() override {
     // Since only the RpcRecvTensorFreeList will delete an
-    // RpcRecvTensorCall, and it always sets this->wi_ to null when
-    // a call object is released to it, we can assert that this->wi_ is
-    // always null at the point of deletion.
+    // RpcRecvTensorCall, we require that ReleaseWorker() has been called before
+    // the user releases a Call object to the free list.
     CHECK_EQ(static_cast<WorkerInterface*>(nullptr), wi_)
         << "Leaking WorkerInterface in RpcRecvTensorCall destructor.";
   }
@@ -113,6 +115,13 @@
     return status_;
   }
 
+  void ReleaseWorker(WorkerCacheInterface* worker_cache) {
+    DCHECK_NE(static_cast<WorkerInterface*>(nullptr), wi_)
+        << "RpcRecvTensorCall::ReleaseWorker() called twice.";
+    worker_cache->ReleaseWorker(src_worker_, wi_);
+    wi_ = nullptr;
+  }
+
   const Tensor& tensor() const { return resp_.tensor(); }
 
   bool is_dead() const { return resp_.metadata().is_dead(); }
@@ -144,7 +153,7 @@
 
   string src_worker_;
   string src_rel_device_;
-  WorkerInterface* wi_;
+  WorkerInterface* wi_;  // Not owned.
   AllocatorAttributes alloc_attrs_;
   Device* dst_device_;
   CallOptions opts_;
@@ -180,8 +189,8 @@
     return new RpcRecvTensorCall;
   }
 
-  void Release(RpcRecvTensorCall* obj, WorkerCacheInterface* wc) {
-    obj->Reset(wc);
+  void Release(RpcRecvTensorCall* obj) {
+    obj->Reset();
     {
       mutex_lock l(mu_);
       if (objects_.size() < kMaxObjects) {
@@ -220,6 +229,9 @@
                          " is invalid remote source device.");
   }
   WorkerSession* sess = session();
+  // The worker will be released in a subsequent call to
+  // `sess->worker_cache->ReleaseWorker()` (if the call has not yet been
+  // initialized) or `call->ReleaseWorker()` (if it has been initialized).
   WorkerInterface* rwi = sess->worker_cache->CreateWorker(call->src_worker_);
   if (s.ok() && rwi == nullptr) {
     s = errors::Internal("No worker known as ", call->src_worker_);
@@ -233,7 +245,7 @@
     if (rwi != nullptr) {
       sess->worker_cache->ReleaseWorker(call->src_worker_, rwi);
     }
-    get_call_freelist()->Release(call, sess->worker_cache.get());
+    get_call_freelist()->Release(call);
     done(s, Args(), recv_args, Tensor{}, false);
     return;
   }
@@ -246,10 +258,12 @@
 
   // RendezvousMgr already aborted, shouldn't send RPC call any more
   if (!call->status().ok()) {
+    // NOTE: `*sess` can potentially be deleted before we return from
+    // `call->done()(...)`, so we must release the worker before calling the
+    // callback.
+    call->ReleaseWorker(sess->worker_cache.get());
     call->done()(call->status(), Args(), Args(), Tensor(), false);
-    session()->worker_cache->ReleaseWorker(call->src_worker_, call->wi_);
-    call->wi_ = nullptr;
-    get_call_freelist()->Release(call, session()->worker_cache.get());
+    get_call_freelist()->Release(call);
     return;
   }
 
@@ -261,10 +275,12 @@
     // If StartAbort was called prior to DeregisterCall, then the
     // current status should be bad.
     Status s = call->status();
+    // NOTE: `*session()` can potentially be deleted before we return from
+    // `call->done()(...)`, so we must release the worker before calling the
+    // callback.
+    call->ReleaseWorker(session()->worker_cache.get());
     call->done()(s, Args(), call->recv_args(), call->tensor(), call->is_dead());
-    session()->worker_cache->ReleaseWorker(call->src_worker_, call->wi_);
-    call->wi_ = nullptr;
-    get_call_freelist()->Release(call, session()->worker_cache.get());
+    get_call_freelist()->Release(call);
     Unref();
   });
 }

diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc
index e942191..f21f76f 100644
--- a/tensorflow/core/framework/allocator.cc
+++ b/tensorflow/core/framework/allocator.cc

@@ -26,14 +26,6 @@
 
 namespace tensorflow {
 
-void AllocatorStats::Clear() {
-  this->num_allocs = 0;
-  this->bytes_in_use = 0;
-  this->max_bytes_in_use = 0;
-  this->max_alloc_size = 0;
-  this->bytes_limit = 0;
-}
-
 string AllocatorStats::DebugString() const {
   return strings::Printf(
       "Limit:        %20lld\n"
@@ -41,8 +33,8 @@
       "MaxInUse:     %20lld\n"
       "NumAllocs:    %20lld\n"
       "MaxAllocSize: %20lld\n",
-      this->bytes_limit, this->bytes_in_use, this->max_bytes_in_use,
-      this->num_allocs, this->max_alloc_size);
+      this->bytes_limit ? *this->bytes_limit : 0, this->bytes_in_use,
+      this->peak_bytes_in_use, this->num_allocs, this->largest_alloc_size);
 }
 
 constexpr size_t Allocator::kAllocatorAlignment;
@@ -132,10 +124,10 @@
       mutex_lock l(mu_);
       ++stats_.num_allocs;
       stats_.bytes_in_use += alloc_size;
-      stats_.max_bytes_in_use =
-          std::max<int64>(stats_.max_bytes_in_use, stats_.bytes_in_use);
-      stats_.max_alloc_size =
-          std::max<int64>(stats_.max_alloc_size, alloc_size);
+      stats_.peak_bytes_in_use =
+          std::max<int64>(stats_.peak_bytes_in_use, stats_.bytes_in_use);
+      stats_.largest_alloc_size =
+          std::max<int64>(stats_.largest_alloc_size, alloc_size);
 
       if (stats_.bytes_in_use > TotalAllocationWarningBytes() &&
           total_allocation_warning_count_ < kMaxTotalAllocationWarnings) {
@@ -158,16 +150,16 @@
     port::AlignedFree(ptr);
   }
 
-  void GetStats(AllocatorStats* stats) override {
+  absl::optional<AllocatorStats> GetStats() override {
     mutex_lock l(mu_);
-    *stats = stats_;
+    return stats_;
   }
 
   void ClearStats() override {
     mutex_lock l(mu_);
     stats_.num_allocs = 0;
-    stats_.max_bytes_in_use = stats_.bytes_in_use;
-    stats_.max_alloc_size = 0;
+    stats_.peak_bytes_in_use = stats_.bytes_in_use;
+    stats_.largest_alloc_size = 0;
   }
 
   size_t AllocatedSizeSlow(const void* ptr) override {

diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index 4d0c6d4..4dc5eaf 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h

@@ -20,6 +20,8 @@
 
 #include <limits>
 
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
 #include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/framework/resource_handle.h"
 #include "tensorflow/core/framework/type_traits.h"
@@ -52,21 +54,25 @@
   std::function<uint64()> freed_by_func = nullptr;
 };
 
-// Runtime statistics collected by an allocator.
+// Runtime statistics collected by an allocator. Exactly the same as
+// stream_executor::AllocatorStats, but independently defined to preserve the
+// mutual independence of StreamExecutor and TensorFlow.
 struct AllocatorStats {
-  int64 num_allocs;        // Number of allocations.
-  int64 bytes_in_use;      // Number of bytes in use.
-  int64 max_bytes_in_use;  // The maximum bytes in use.
-  int64 max_alloc_size;    // The max single allocation seen.
+  int64 num_allocs;          // Number of allocations.
+  int64 bytes_in_use;        // Number of bytes in use.
+  int64 peak_bytes_in_use;   // The peak bytes in use.
+  int64 largest_alloc_size;  // The largest single allocation seen.
 
-  // The upper limit what the allocator can allocate, if such a limit
-  // is known. Certain allocator may return 0 to indicate the limit is
-  // unknown.
-  int64 bytes_limit;
+  // The upper limit of bytes of user allocatable device memory, if such a limit
+  // is known.
+  absl::optional<int64> bytes_limit;
 
-  AllocatorStats() { Clear(); }
+  AllocatorStats()
+      : num_allocs(0),
+        bytes_in_use(0),
+        peak_bytes_in_use(0),
+        largest_alloc_size(0) {}
 
-  void Clear();
   string DebugString() const;
 };
 
@@ -198,7 +204,7 @@
   }
 
   // Fills in 'stats' with statistics collected by this allocator.
-  virtual void GetStats(AllocatorStats* stats) { stats->Clear(); }
+  virtual absl::optional<AllocatorStats> GetStats() { return absl::nullopt; }
 
   // Clears the internal stats except for the `in_use` field.
   virtual void ClearStats() {}

diff --git a/tensorflow/core/framework/allocator_registry.h b/tensorflow/core/framework/allocator_registry.h
index 9dc7434..d9f3280 100644
--- a/tensorflow/core/framework/allocator_registry.h
+++ b/tensorflow/core/framework/allocator_registry.h

@@ -21,6 +21,7 @@
 #include <vector>
 
 #include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/numa.h"
 

diff --git a/tensorflow/core/framework/allocator_test.cc b/tensorflow/core/framework/allocator_test.cc
index a409cb2..85e8ba6 100644
--- a/tensorflow/core/framework/allocator_test.cc
+++ b/tensorflow/core/framework/allocator_test.cc

@@ -25,20 +25,23 @@
 namespace tensorflow {
 
 static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use,
-                       int64 max_bytes_in_use, int64 max_alloc_size) {
-  AllocatorStats stats;
-  a->GetStats(&stats);
-  LOG(INFO) << "Alloc stats: \n" << stats.DebugString();
+                       int64 peak_bytes_in_use, int64 largest_alloc_size) {
+  absl::optional<AllocatorStats> stats = a->GetStats();
+  EXPECT_TRUE(stats);
+  if (!stats) {
+    return;
+  }
+  LOG(INFO) << "Alloc stats: \n" << stats->DebugString();
 #if defined(PLATFORM_GOOGLE) && defined(NDEBUG)
   // NOTE: allocator stats expectation depends on the system malloc,
   // and can vary as that changes.
   static const int64 kSlop = 5 * 1024;
-  EXPECT_GT(stats.bytes_in_use, bytes_in_use - kSlop);
-  EXPECT_LT(stats.bytes_in_use, bytes_in_use + kSlop);
-  EXPECT_GT(stats.max_bytes_in_use, max_bytes_in_use - kSlop);
-  EXPECT_LT(stats.max_bytes_in_use, max_bytes_in_use + kSlop);
-  EXPECT_EQ(stats.num_allocs, num_allocs);
-  EXPECT_EQ(stats.max_alloc_size, max_alloc_size);
+  EXPECT_GT(stats->bytes_in_use, bytes_in_use - kSlop);
+  EXPECT_LT(stats->bytes_in_use, bytes_in_use + kSlop);
+  EXPECT_GT(stats->peak_bytes_in_use, peak_bytes_in_use - kSlop);
+  EXPECT_LT(stats->peak_bytes_in_use, peak_bytes_in_use + kSlop);
+  EXPECT_EQ(stats->num_allocs, num_allocs);
+  EXPECT_EQ(stats->largest_alloc_size, largest_alloc_size);
 #endif
 }
 

diff --git a/tensorflow/core/framework/cancellation.cc b/tensorflow/core/framework/cancellation.cc
index af59500..7f639b5 100644
--- a/tensorflow/core/framework/cancellation.cc
+++ b/tensorflow/core/framework/cancellation.cc

@@ -27,6 +27,12 @@
       is_cancelled_(false),
       next_cancellation_token_(0) {}
 
+void CancellationManager::Reset() {
+  mutex_lock l(mu_);
+  is_cancelling_ = false;
+  is_cancelled_.store(false);
+}
+
 void CancellationManager::StartCancel() {
   gtl::FlatMap<CancellationToken, CancelCallback> callbacks_to_run;
   {

diff --git a/tensorflow/core/framework/cancellation.h b/tensorflow/core/framework/cancellation.h
index 7a5d942..51b2004 100644
--- a/tensorflow/core/framework/cancellation.h
+++ b/tensorflow/core/framework/cancellation.h

@@ -56,6 +56,9 @@
   // Returns true iff StartCancel() has been called.
   bool IsCancelled() { return is_cancelled_.load(std::memory_order_acquire); }
 
+  // Resets the cancellation manager to its original pre-cancelled state.
+  void Reset();
+
   // Returns a token that must be used in calls to RegisterCallback
   // and DeregisterCallback.
   CancellationToken get_cancellation_token();

diff --git a/tensorflow/core/framework/collective.h b/tensorflow/core/framework/collective.h
index 546e393..e00cc17 100644
--- a/tensorflow/core/framework/collective.h
+++ b/tensorflow/core/framework/collective.h

@@ -42,6 +42,7 @@
 enum CollectiveType {
   REDUCTION_COLLECTIVE = 0,
   BROADCAST_COLLECTIVE,
+  GATHER_COLLECTIVE,
   UNDEFINED_COLLECTIVE,
 };
 

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 83bc950..5c974a7 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc

@@ -1303,6 +1303,12 @@
                            c->num_inputs() - 1 /* dim_index */);
 }
 
+Status QuantizedConcatV2Shape(InferenceContext* c, int num_inputs_to_concat) {
+  return ConcatShapeHelper(c, 0 /* start_value_index */,
+                           num_inputs_to_concat /* end_value_index */,
+                           num_inputs_to_concat /* dim_index */);
+}
+
 Status BroadcastBinaryOpOutputShapeFnHelper(InferenceContext* c,
                                             ShapeHandle shape_x,
                                             ShapeHandle shape_y,

diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h
index 14b9688..d421844 100644
--- a/tensorflow/core/framework/common_shape_fns.h
+++ b/tensorflow/core/framework/common_shape_fns.h

@@ -279,6 +279,8 @@
 // Shape function for concat operations.
 Status ConcatV2Shape(shape_inference::InferenceContext* c);
 
+Status QuantizedConcatV2Shape(InferenceContext* c, int num_inputs_to_concat);
+
 // Shape function for binary operators that broadcast their inputs
 // and with output to output_index.
 // Note: out cannot be NULL.

diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index b7adfd0..35d04eb 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc

@@ -569,6 +569,9 @@
   for (const auto& n : fdef.node_def()) {
     strings::StrAppend(&out, "  ", Print(n), "\n");
   }
+  for (const auto& cr : fdef.control_ret()) {
+    strings::StrAppend(&out, "  @return ", cr.first, " = ", cr.second, "\n");
+  }
   for (const auto& r : fdef.ret()) {
     strings::StrAppend(&out, "  return ", r.first, " = ", r.second, "\n");
   }
@@ -825,6 +828,12 @@
   std::map<string, string> ret2(f2.ret().begin(), f2.ret().end());
   if (ret1 != ret2) return false;
 
+  std::map<string, string> control_ret1(f1.control_ret().begin(),
+                                        f1.control_ret().end());
+  std::map<string, string> control_ret2(f2.control_ret().begin(),
+                                        f2.control_ret().end());
+  if (control_ret1 != control_ret2) return false;
+
   return true;
 }
 
@@ -849,6 +858,14 @@
     h = Hash64(p.second.data(), p.second.size(), h);
   }
 
+  // control output names
+  std::map<string, string> control_ret(fdef.control_ret().begin(),
+                                       fdef.control_ret().end());
+  for (const auto& p : control_ret) {
+    h = Hash64(p.first.data(), p.first.size(), h);
+    h = Hash64(p.second.data(), p.second.size(), h);
+  }
+
   return h;
 }
 
@@ -1339,7 +1356,7 @@
 
 namespace {
 
-constexpr char kExperimentalApiImplements[] = "experimental_api_implements";
+constexpr char kApiImplements[] = "api_implements";
 
 absl::flat_hash_set<string> ReachableFunctions(
     const FunctionLibraryDefinition& flib,
@@ -1347,10 +1364,10 @@
   // Functions that are reachable from the graph.
   absl::flat_hash_set<string> reachable_funcs;
 
-  // For any functions, if it has attribute "experimental_api_implements" =
+  // For any functions, if it has attribute "api_implements" =
   // "some_interface" and it is reachable, then it means any other
   // function with same attribute name and value could also be potentially
-  // reachable, eg via experimental_implementation_selector swapping the
+  // reachable, eg via implementation_selector swapping the
   // nodedef.
   absl::flat_hash_set<string> reachable_api_interface;
 
@@ -1400,7 +1417,7 @@
     const string& func_name = func->signature().name();
     reachable_funcs.insert(func_name);
 
-    const auto attr_it = func->attr().find(kExperimentalApiImplements);
+    const auto attr_it = func->attr().find(kApiImplements);
     if (attr_it != func->attr().end()) {
       reachable_api_interface.insert(attr_it->second.s());
     }
@@ -1416,7 +1433,7 @@
 
   for (const auto& func_name : flib.ListFunctionNames()) {
     const auto& func_def = flib.Find(func_name);
-    const auto attr_it = func_def->attr().find(kExperimentalApiImplements);
+    const auto attr_it = func_def->attr().find(kApiImplements);
     if (attr_it != func_def->attr().end()) {
       if (reachable_api_interface.contains(attr_it->second.s())) {
         reachable_funcs.insert(func_name);
@@ -1512,7 +1529,8 @@
     const string& function_name, gtl::ArraySlice<string> in_def,
     gtl::ArraySlice<string> out_def, gtl::ArraySlice<string> attr_def,
     gtl::ArraySlice<Node> node_def,
-    gtl::ArraySlice<std::pair<string, string>> ret_def) {
+    gtl::ArraySlice<std::pair<string, string>> ret_def,
+    gtl::ArraySlice<std::pair<string, string>> control_ret_def) {
   FunctionDef fdef;
 
   // Signature
@@ -1520,6 +1538,7 @@
   for (const auto& i : in_def) b.Input(i);
   for (const auto& o : out_def) b.Output(o);
   for (const auto& a : attr_def) b.Attr(a);
+  for (const auto& c : control_ret_def) b.ControlOutput(c.first);
 
   OpRegistrationData op_reg_data;
   TF_CHECK_OK(b.Finalize(&op_reg_data));
@@ -1535,6 +1554,11 @@
     fdef.mutable_ret()->insert({r.first, r.second});
   }
 
+  // Control returns
+  for (const auto& cr : control_ret_def) {
+    fdef.mutable_control_ret()->insert({cr.first, cr.second});
+  }
+
   auto* op_def_registry = OpRegistry::Global();
   // Check if any op is stateful.
   for (const auto& n : node_def) {
@@ -1551,6 +1575,16 @@
 }
 
 /* static */
+FunctionDef FunctionDefHelper::Create(
+    const string& function_name, gtl::ArraySlice<string> in_def,
+    gtl::ArraySlice<string> out_def, gtl::ArraySlice<string> attr_def,
+    gtl::ArraySlice<Node> node_def,
+    gtl::ArraySlice<std::pair<string, string>> ret_def) {
+  return Create(function_name, in_def, out_def, attr_def, node_def, ret_def,
+                /*control_ret_def=*/{});
+}
+
+/* static */
 FunctionDef FunctionDefHelper::Define(const string& name,
                                       gtl::ArraySlice<string> arg_def,
                                       gtl::ArraySlice<string> ret_def,

diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index 79755f5..4f0d595 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h

@@ -122,9 +122,23 @@
     NodeDef ToNodeDef() const;
   };
 
-  // The Create() function uses the new NodeDef field.  `ret_def`
-  // holds a mapping from the function output names from `out_def` to
-  // the node outputs from `node_def`.
+  // Creates a FunctionDef from the given parameters. Node inputs must use
+  // function encoding (node_name:output_name[:output_index]).
+  // - `ret_def` holds a mapping from the function output names from `out_def`
+  //   to the node outputs from `node_def`.
+  // - `control_ret_def` holds a mapping from the function control
+  //   output names to the nodes from `node_def`.
+  static FunctionDef Create(
+      const string& function_name, gtl::ArraySlice<string> in_def,
+      gtl::ArraySlice<string> out_def, gtl::ArraySlice<string> attr_def,
+      gtl::ArraySlice<Node> node_def,
+      gtl::ArraySlice<std::pair<string, string>> ret_def,
+      gtl::ArraySlice<std::pair<string, string>> control_ret_def);
+
+  // Creates a FunctionDef from the given parameters. Node inputs must use
+  // function encoding (node_name:output_name[:output_index]).
+  // - `ret_def` holds a mapping from the function output names from `out_def`
+  //   to the node outputs from `node_def`.
   static FunctionDef Create(const string& function_name,
                             gtl::ArraySlice<string> in_def,
                             gtl::ArraySlice<string> out_def,
@@ -132,7 +146,6 @@
                             gtl::ArraySlice<Node> node_def,
                             gtl::ArraySlice<std::pair<string, string>> ret_def);
 
-  // The two Define() functions use the old FunctionDef::Node field.
   // TODO(josh11b): Get rid of these and transition to the one above.
   static FunctionDef Define(const string& function_name,
                             gtl::ArraySlice<string> arg_def,
@@ -554,6 +567,7 @@
     // If provided, this optimization function will be invoked before
     // the placer for multi-device functions.
     std::function<Status(std::vector<string> /*ret_node_names*/,
+                         std::vector<string> /*keep_node_names*/,
                          FunctionLibraryDefinition*, const DeviceSet&,
                          Device* /*cpu_device*/, std::unique_ptr<Graph>*)>
         optimize_graph_fn;

diff --git a/tensorflow/core/framework/function.proto b/tensorflow/core/framework/function.proto
index e69d393..64f406b 100644
--- a/tensorflow/core/framework/function.proto
+++ b/tensorflow/core/framework/function.proto

@@ -76,6 +76,10 @@
   // A mapping from the output arg names from `signature` to the
   // outputs from `node_def` that should be returned by the function.
   map<string, string> ret = 4;
+
+  // A mapping from control output names from `signature` to node names in
+  // `node_def` which should be control outputs of this function.
+  map<string, string> control_ret = 6;
 }
 
 // GradientDef defines the gradient function of a function defined in

diff --git a/tensorflow/core/framework/function_test.cc b/tensorflow/core/framework/function_test.cc
index 6a828e9..6fbbabf 100644
--- a/tensorflow/core/framework/function_test.cc
+++ b/tensorflow/core/framework/function_test.cc

@@ -156,6 +156,48 @@
   EXPECT_EQ(DebugString(result.nodes), e2);
 }
 
+TEST(TFunc, ControlRet) {
+  auto fdef = FDH::Create(
+      // Name
+      "ControlRet",
+      // Inputs
+      {"x: int32"},
+      // Outputs
+      {"y: int32"},
+      // Attrs
+      {},
+      // Nodes
+      {
+          {{"a"}, "Identity", {"x"}, {{"T", DT_INT32}}},
+      },
+      // Returns
+      {{"y", "a:output:0"}},
+      // Control returns
+      {{"must_execute", "a"}});
+
+  const char* e = R"P(
+ControlRet(x:int32) -> (y:int32) {
+  a = Identity[T=int32](x)
+  @return must_execute = a
+  return y = a:output:0
+}
+)P";
+  EXPECT_EQ(DebugString(fdef), e);
+
+  // Instantiate one with T=float
+  InstantiationResult result;
+  TF_ASSERT_OK(
+      InstantiateFunction(fdef, Attrs({{"T", DT_FLOAT}}), GetOpSig, &result));
+  const char* e2 = R"P(
+(x:int32) -> (a:int32) {
+  a = Identity[T=int32](x)
+}
+)P";
+  EXPECT_EQ(result.arg_types, DataTypeVector({DT_INT32}));
+  EXPECT_EQ(result.ret_types, DataTypeVector({DT_INT32}));
+  EXPECT_EQ(DebugString(result.nodes), e2);
+}
+
 REGISTER_OP("HasDefaultType")
     .Output("out: T")
     .Attr("T: {float, double, int32, int64} = DT_FLOAT");
@@ -1320,7 +1362,7 @@
 
     if (!interface_name.empty()) {
       auto* attr = func_def.mutable_attr();
-      (*attr)["experimental_api_implements"].set_s(interface_name);
+      (*attr)["api_implements"].set_s(interface_name);
     }
     return func_def;
   };

diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc
index e369e88..fee5237 100644
--- a/tensorflow/core/framework/node_def_util.cc
+++ b/tensorflow/core/framework/node_def_util.cc

@@ -515,10 +515,13 @@
           ". (Check whether your GraphDef-interpreting binary is up to date "
           "with your GraphDef-generating binary.).");
     }
-    TF_RETURN_WITH_CONTEXT_IF_ERROR(
-        ValidateAttrValue(attr.second, *iter->second),
-        "; NodeDef: ", FormatNodeDefForError(node_def), "; ",
-        SummarizeOpDef(op_def));
+    // If attr value is placeholder, do not check it.
+    if (attr.second.placeholder().empty()) {
+      TF_RETURN_WITH_CONTEXT_IF_ERROR(
+          ValidateAttrValue(attr.second, *iter->second),
+          "; NodeDef: ", FormatNodeDefForError(node_def), "; ",
+          SummarizeOpDef(op_def));
+    }
     // Keep track of which attr names have (not) been found in the NodeDef.
     op_attrs.erase(iter);
   }

diff --git a/tensorflow/core/framework/op.cc b/tensorflow/core/framework/op.cc
index b8309ea..b29d7ae 100644
--- a/tensorflow/core/framework/op.cc
+++ b/tensorflow/core/framework/op.cc

@@ -60,6 +60,21 @@
 
 Status OpRegistry::LookUp(const string& op_type_name,
                           const OpRegistrationData** op_reg_data) const {
+  {
+    tf_shared_lock l(mu_);
+    if (initialized_) {
+      if (const OpRegistrationData* res =
+              gtl::FindWithDefault(registry_, op_type_name, nullptr)) {
+        *op_reg_data = res;
+        return Status::OK();
+      }
+    }
+  }
+  return LookUpSlow(op_type_name, op_reg_data);
+}
+
+Status OpRegistry::LookUpSlow(const string& op_type_name,
+                              const OpRegistrationData** op_reg_data) const {
   *op_reg_data = nullptr;
   const OpRegistrationData* res = nullptr;
 

diff --git a/tensorflow/core/framework/op.h b/tensorflow/core/framework/op.h
index 81ed5f9..538ce04 100644
--- a/tensorflow/core/framework/op.h
+++ b/tensorflow/core/framework/op.h

@@ -144,6 +144,9 @@
   Status RegisterAlreadyLocked(const OpRegistrationDataFactory& op_data_factory)
       const EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
+  Status LookUpSlow(const string& op_type_name,
+                    const OpRegistrationData** op_reg_data) const;
+
   mutable mutex mu_;
   // Functions in deferred_ may only be called with mu_ held.
   mutable std::vector<OpRegistrationDataFactory> deferred_ GUARDED_BY(mu_);

diff --git a/tensorflow/core/framework/op_def.proto b/tensorflow/core/framework/op_def.proto
index aea2d2b..e44ecc9 100644
--- a/tensorflow/core/framework/op_def.proto
+++ b/tensorflow/core/framework/op_def.proto

@@ -54,6 +54,10 @@
   // Description of the output(s).
   repeated ArgDef output_arg = 3;
 
+  // Named control outputs for this operation. Useful only for composite
+  // operations (i.e. functions) which want to name different control outputs.
+  repeated string control_output = 20;
+
   // Description of the graph-construction-time configuration of this
   // Op.  That is to say, this describes the attr fields that will
   // be specified in the NodeDef.

diff --git a/tensorflow/core/framework/op_def_builder.cc b/tensorflow/core/framework/op_def_builder.cc
index 8a9bb63..0a62a2e 100644
--- a/tensorflow/core/framework/op_def_builder.cc
+++ b/tensorflow/core/framework/op_def_builder.cc

@@ -316,6 +316,14 @@
       .GetResult(sp, out);
 }
 
+bool ConsumeControlOutName(StringPiece* sp, StringPiece* out) {
+  return Scanner(*sp)
+      .One(Scanner::LETTER)
+      .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
+      .StopCapture()
+      .GetResult(sp, out);
+}
+
 #define VERIFY(expr, ...)                                             \
   do {                                                                \
     if (!(expr)) {                                                    \
@@ -409,6 +417,25 @@
 
 #undef VERIFY
 
+string ControlOutError(StringPiece orig, const string& op_name) {
+  return strings::StrCat(" from ControlOutput(\"", orig, "\") for Op ",
+                         op_name);
+}
+
+void FinalizeControlOutput(StringPiece name, OpDef* op_def,
+                           std::vector<string>* errors) {
+  StringPiece orig(name);
+
+  // Parse control output name.
+  StringPiece tmp_name;
+  if (!ConsumeControlOutName(&orig, &tmp_name)) {
+    errors->push_back(strings::StrCat("Trouble parsing 'name:'",
+                                      ControlOutError(orig, op_def->name())));
+  }
+
+  *op_def->add_control_output() = string(tmp_name.data(), tmp_name.size());
+}
+
 int num_leading_spaces(StringPiece s) {
   size_t i = 0;
   while (i < s.size() && s[i] == ' ') {
@@ -545,6 +572,11 @@
   return *this;
 }
 
+OpDefBuilder& OpDefBuilder::ControlOutput(string name) {
+  control_outputs_.push_back(std::move(name));
+  return *this;
+}
+
 #ifndef TF_LEAN_BINARY
 OpDefBuilder& OpDefBuilder::Doc(string text) {
   if (!doc_.empty()) {
@@ -614,6 +646,9 @@
   for (StringPiece output : outputs_) {
     FinalizeInputOrOutput(output, true, op_def, &errors);
   }
+  for (StringPiece control_output : control_outputs_) {
+    FinalizeControlOutput(control_output, op_def, &errors);
+  }
   FinalizeDoc(doc_, op_def, &errors);
 
   if (errors.empty()) return Status::OK();

diff --git a/tensorflow/core/framework/op_def_builder.h b/tensorflow/core/framework/op_def_builder.h
index 8077b20..38d3f5c 100644
--- a/tensorflow/core/framework/op_def_builder.h
+++ b/tensorflow/core/framework/op_def_builder.h

@@ -28,6 +28,8 @@
 
 namespace tensorflow {
 
+class FunctionDefHelper;
+
 namespace shape_inference {
 class InferenceContext;
 }
@@ -150,12 +152,20 @@
   Status Finalize(OpRegistrationData* op_reg_data) const;
 
  private:
+  friend class FunctionDefHelper;
+
+  // Adds control output to this OpDefBuilder (and returns *this).
+  // The <name> must be a valid node name (matches regexp
+  // [a-zA-Z][a-zA-Z0-9_]*). Named control output can only exist for functions.
+  OpDefBuilder& ControlOutput(string name);
+
   OpDef* op_def() { return &op_reg_data_.op_def; }
 
   OpRegistrationData op_reg_data_;
   std::vector<string> attrs_;
   std::vector<string> inputs_;
   std::vector<string> outputs_;
+  std::vector<string> control_outputs_;
   string doc_;
   std::vector<string> errors_;
 };

diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc
index d629719..080db8f 100644
--- a/tensorflow/core/framework/op_def_util.cc
+++ b/tensorflow/core/framework/op_def_util.cc

@@ -114,6 +114,8 @@
         length = attr_value.list().shape_size();
       } else if (attr.type() == "list(tensor)") {
         length = attr_value.list().tensor_size();
+      } else if (attr.type() == "list(func)") {
+        length = attr_value.list().func_size();
       }
       if (length < attr.minimum()) {
         return errors::InvalidArgument(

diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 789f0fd..16ca40c 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc

@@ -20,6 +20,9 @@
 #include <utility>
 #include <vector>
 
+#include <cstdlib>
+#include <cstring>
+
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
 #include "tensorflow/core/framework/graph.pb_text.h"
@@ -998,6 +1001,12 @@
 
 void LoadDynamicKernelsInternal() {
   Env* env = Env::Default();
+
+  // Override to allow loading unsafe packages for development.
+  // DO NOT USE UNLESS YOU KNOW WHAT ABI ISSUES YOU CAN ENCOUNTER.
+  bool override_abi_check =
+      strcmp(getenv("TF_REALLY_LOAD_UNSAFE_PACKAGES"), "1") == 0;
+
   string bazel_kernel_dir = io::JoinPath(env->GetRunfilesDir(),
                                          "tensorflow",
                                          "core",
@@ -1010,7 +1019,12 @@
       string fullpath = io::JoinPath(bazel_kernel_dir, file);
       if (env->MatchPath(fullpath, dll_spec)) {
         Status s = IsProbablySafeToLoad(fullpath);
-        if (s.ok()) {
+        if (!s.ok() && override_abi_check) {
+          LOG(WARNING) << "Loading UNSAFE library " << fullpath
+                       << " because ABI check override is set: "
+                       << s.error_message();
+        }
+        if (s.ok() || override_abi_check) {
           // TODO(gunan): Store the handles to the opened files.
           void* unused_filehandle;
           TF_CHECK_OK(env->LoadLibrary(fullpath.c_str(), &unused_filehandle));
@@ -1074,6 +1088,11 @@
   delete kernel_def;
 }
 
+OpKernel* OpKernelRegistrar::PtrOpKernelFactory::Create(
+    OpKernelConstruction* context) {
+  return (*create_func_)(context);
+}
+
 }  // namespace kernel_factory
 
 namespace {

diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 06b9096..ff0b446 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h

@@ -525,11 +525,42 @@
 // Used to store partitioned graphs from function-calling ops.
 struct GraphCollector {
   mutex mu;
-  std::vector<GraphDef> graphs GUARDED_BY(mu);
+  std::vector<GraphDef> partitioned_graphs GUARDED_BY(mu);
+  GraphDef raw_graph GUARDED_BY(mu);
+  GraphDef optimized_graph GUARDED_BY(mu);
 
-  void CollectGraph(const GraphDef& graph) {
+  bool dirty GUARDED_BY(mu);
+
+  GraphCollector() : dirty(false) {}
+
+  void CollectRawGraph(const GraphDef& graph) {
     mutex_lock ml(mu);
-    graphs.push_back(graph);
+    raw_graph.MergeFrom(graph);
+    dirty = true;
+  }
+
+  void CollectOptimizedGraph(const GraphDef& graph) {
+    mutex_lock ml(mu);
+    optimized_graph.MergeFrom(graph);
+    dirty = true;
+  }
+
+  void CollectPartitionedGraph(const GraphDef& graph) {
+    mutex_lock ml(mu);
+    partitioned_graphs.push_back(graph);
+    dirty = true;
+  }
+
+  void ClearGraphs() EXCLUSIVE_LOCKS_REQUIRED(mu) {
+    raw_graph.Clear();
+    optimized_graph.Clear();
+    partitioned_graphs.clear();
+    dirty = false;
+  }
+
+  bool HasUpdatedGraphs() {
+    mutex_lock ml(mu);
+    return dirty;
   }
 };
 
@@ -1436,23 +1467,21 @@
     // Perform the check in the header to allow compile-time optimization
     // to a no-op, allowing the linker to remove the kernel symbols.
     if (kernel_def != nullptr) {
-      struct PtrOpKernelFactory : public OpKernelFactory {
-        explicit PtrOpKernelFactory(
-            OpKernel* (*create_func)(OpKernelConstruction*))
-            : create_func_(create_func) {}
-
-        OpKernel* Create(OpKernelConstruction* context) override {
-          return (*create_func_)(context);
-        }
-
-        OpKernel* (*create_func_)(OpKernelConstruction*);
-      };
       InitInternal(kernel_def, kernel_class_name,
                    absl::make_unique<PtrOpKernelFactory>(create_fn));
     }
   }
 
  private:
+  struct PtrOpKernelFactory : public OpKernelFactory {
+    explicit PtrOpKernelFactory(OpKernel* (*create_func)(OpKernelConstruction*))
+        : create_func_(create_func) {}
+
+    OpKernel* Create(OpKernelConstruction* context) override;
+
+    OpKernel* (*create_func_)(OpKernelConstruction*);
+  };
+
   void InitInternal(const KernelDef* kernel_def, StringPiece kernel_class_name,
                     std::unique_ptr<OpKernelFactory> factory);
 };

diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h
index 9c381e7..da547d5 100644
--- a/tensorflow/core/framework/resource_mgr.h
+++ b/tensorflow/core/framework/resource_mgr.h

@@ -89,9 +89,17 @@
   // step_id: the unique ID of this step. Doesn't have to be sequential, just
   // has to be unique.
   // cleanup: callback to delete a container of this name.
+  // prefix: optional string prefix to disambiguate step containers.
   ScopedStepContainer(const int64 step_id,
                       std::function<void(const string&)> cleanup)
       : name_(strings::StrCat("__per_step_", step_id)), cleanup_(cleanup) {}
+
+  ScopedStepContainer(const int64 step_id,
+                      std::function<void(const string&)> cleanup,
+                      const string& prefix)
+      : name_(strings::StrCat("__", prefix, "_per_step_", step_id)),
+        cleanup_(cleanup) {}
+
   ~ScopedStepContainer() { cleanup_(name_); }
 
   const string& name() const { return name_; }
@@ -124,14 +132,14 @@
   //
   // REQUIRES: std::is_base_of<ResourceBase, T>
   // REQUIRES: resource != nullptr
-  template <typename T>
+  template <typename T, bool use_dynamic_cast = false>
   Status Lookup(const string& container, const string& name,
                 T** resource) const TF_MUST_USE_RESULT;
 
   // Similar to Lookup, but looks up multiple resources at once, with only a
   // single lock acquisition.  If containers_and_names[i] is uninitialized
   // then this function does not modify resources[i].
-  template <typename T>
+  template <typename T, bool use_dynamic_cast = false>
   Status LookupMany(absl::Span<std::pair<const string*, const string*> const>
                         containers_and_names,
                     std::vector<std::unique_ptr<T, core::RefCountDeleter>>*
@@ -147,7 +155,7 @@
   //
   // REQUIRES: std::is_base_of<ResourceBase, T>
   // REQUIRES: resource != nullptr
-  template <typename T>
+  template <typename T, bool use_dynamic_cast = false>
   Status LookupOrCreate(const string& container, const string& name,
                         T** resource,
                         std::function<Status(T**)> creator) TF_MUST_USE_RESULT;
@@ -188,7 +196,7 @@
   mutable mutex mu_;
   std::unordered_map<string, Container*> containers_ GUARDED_BY(mu_);
 
-  template <typename T>
+  template <typename T, bool use_dynamic_cast = false>
   Status LookupInternal(const string& container, const string& name,
                         T** resource) const
       SHARED_LOCKS_REQUIRED(mu_) TF_MUST_USE_RESULT;
@@ -259,7 +267,7 @@
 //
 // If the lookup is successful, the caller takes the ownership of one ref on
 // `*value`, and must call its `Unref()` method when it has finished using it.
-template <typename T>
+template <typename T, bool use_dynamic_cast = false>
 Status LookupResource(OpKernelContext* ctx, const ResourceHandle& p, T** value);
 
 // Looks up multiple resources pointed by a sequence of resource handles.  If
@@ -429,15 +437,15 @@
   return DoCreate(container, MakeTypeIndex<T>(), name, resource);
 }
 
-template <typename T>
+template <typename T, bool use_dynamic_cast>
 Status ResourceMgr::Lookup(const string& container, const string& name,
                            T** resource) const {
   CheckDeriveFromResourceBase<T>();
   tf_shared_lock l(mu_);
-  return LookupInternal(container, name, resource);
+  return LookupInternal<T, use_dynamic_cast>(container, name, resource);
 }
 
-template <typename T>
+template <typename T, bool use_dynamic_cast>
 Status ResourceMgr::LookupMany(
     absl::Span<std::pair<const string*, const string*> const>
         containers_and_names,
@@ -447,8 +455,9 @@
   resources->resize(containers_and_names.size());
   for (size_t i = 0; i < containers_and_names.size(); ++i) {
     T* resource;
-    Status s = LookupInternal(*containers_and_names[i].first,
-                              *containers_and_names[i].second, &resource);
+    Status s = LookupInternal<T, use_dynamic_cast>(
+        *containers_and_names[i].first, *containers_and_names[i].second,
+        &resource);
     if (s.ok()) {
       (*resources)[i].reset(resource);
     }
@@ -456,7 +465,18 @@
   return Status::OK();
 }
 
+// Simple wrapper to allow conditional dynamic / static casts.
+template <typename T, bool use_dynamic_cast>
+struct TypeCastFunctor {
+  static T* Cast(ResourceBase* r) { return static_cast<T*>(r); }
+};
+
 template <typename T>
+struct TypeCastFunctor<T, true> {
+  static T* Cast(ResourceBase* r) { return dynamic_cast<T*>(r); }
+};
+
+template <typename T, bool use_dynamic_cast>
 Status ResourceMgr::LookupInternal(const string& container, const string& name,
                                    T** resource) const {
   ResourceBase* found = nullptr;
@@ -464,12 +484,12 @@
   if (s.ok()) {
     // It's safe to down cast 'found' to T* since
     // typeid(T).hash_code() is part of the map key.
-    *resource = static_cast<T*>(found);
+    *resource = TypeCastFunctor<T, use_dynamic_cast>::Cast(found);
   }
   return s;
 }
 
-template <typename T>
+template <typename T, bool use_dynamic_cast>
 Status ResourceMgr::LookupOrCreate(const string& container, const string& name,
                                    T** resource,
                                    std::function<Status(T**)> creator) {
@@ -478,11 +498,11 @@
   Status s;
   {
     tf_shared_lock l(mu_);
-    s = LookupInternal(container, name, resource);
+    s = LookupInternal<T, use_dynamic_cast>(container, name, resource);
     if (s.ok()) return s;
   }
   mutex_lock l(mu_);
-  s = LookupInternal(container, name, resource);
+  s = LookupInternal<T, use_dynamic_cast>(container, name, resource);
   if (s.ok()) return s;
   TF_RETURN_IF_ERROR(creator(resource));
   s = DoCreate(container, MakeTypeIndex<T>(), name, *resource);
@@ -558,11 +578,12 @@
   return ctx->resource_manager()->Create(p.container(), p.name(), value);
 }
 
-template <typename T>
+template <typename T, bool use_dynamic_cast>
 Status LookupResource(OpKernelContext* ctx, const ResourceHandle& p,
                       T** value) {
   TF_RETURN_IF_ERROR(internal::ValidateDeviceAndType<T>(ctx, p));
-  return ctx->resource_manager()->Lookup(p.container(), p.name(), value);
+  return ctx->resource_manager()->Lookup<T, use_dynamic_cast>(p.container(),
+                                                              p.name(), value);
 }
 
 template <typename T>

diff --git a/tensorflow/core/framework/tensor_util.cc b/tensorflow/core/framework/tensor_util.cc
index 65f6dc1..d6e2224 100644
--- a/tensorflow/core/framework/tensor_util.cc
+++ b/tensorflow/core/framework/tensor_util.cc

@@ -37,10 +37,10 @@
              other_data.size());
     }
   } else if (other.dtype() == DT_STRING) {
-    tmp.flat<string>() = other.flat<string>();
+    tmp.unaligned_flat<string>() = other.unaligned_flat<string>();
   } else {
     CHECK_EQ(DT_VARIANT, other.dtype());
-    tmp.flat<Variant>() = other.flat<Variant>();
+    tmp.unaligned_flat<Variant>() = other.unaligned_flat<Variant>();
   }
   return tmp;
 }

diff --git a/tensorflow/core/framework/tensor_util.h b/tensorflow/core/framework/tensor_util.h
index a7cf600..82a05e4 100644
--- a/tensorflow/core/framework/tensor_util.h
+++ b/tensorflow/core/framework/tensor_util.h

@@ -16,11 +16,14 @@
 #ifndef TENSORFLOW_CORE_FRAMEWORK_TENSOR_UTIL_H_
 #define TENSORFLOW_CORE_FRAMEWORK_TENSOR_UTIL_H_
 
+#include <algorithm>
+#include <vector>
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/types.h"
 
-#include <vector>
 namespace tensorflow {
 namespace tensor {
 
@@ -61,12 +64,59 @@
                          TensorShapeProto* shape_proto);
 
 // Defines value type dependent methods to manipulate `TensorProto`.
-// Class specializations has to define following methods:
+// Class specializations have to define following methods:
 //   static DataType GetDataType()
 //   static void AddValue(Type value, TensorProto* proto)
+//   template <typename IterType>
+//   static void AddValues(IterType begin, IterType end, TensorProto* proto)
+
 template <typename Type>
 class TensorProtoHelper : public std::false_type {};
 
+#define DEFINE_PROTO_HELPER(TYPE, TF_TYPE, FIELDTYPE)                         \
+  template <>                                                                 \
+  class TensorProtoHelper<TYPE> : public std::true_type {                     \
+   public:                                                                    \
+    static DataType GetDataType() { return DataType::TF_TYPE; }               \
+    static void AddValue(const TYPE& value, TensorProto* proto) {             \
+      proto->mutable_##FIELDTYPE##_val()->Add(value);                         \
+    }                                                                         \
+    template <typename IterType>                                              \
+    static void AddValues(IterType begin, IterType end, TensorProto* proto) { \
+      using SrcType = typename std::iterator_traits<IterType>::value_type;    \
+      size_t n = std::distance(begin, end);                                   \
+      FIELDTYPE* dst_ptr = AppendUninitialized(n, proto);                     \
+      if (std::is_same<SrcType, FIELDTYPE>::value) {                          \
+        std::copy(begin, end, dst_ptr);                                       \
+      } else {                                                                \
+        std::transform(begin, end, dst_ptr, [](SrcType x) -> FIELDTYPE {      \
+          return static_cast<FIELDTYPE>(x);                                   \
+        });                                                                   \
+      }                                                                       \
+    }                                                                         \
+                                                                              \
+   private:                                                                   \
+    static FIELDTYPE* AppendUninitialized(size_t n, TensorProto* proto) {     \
+      auto* field = proto->mutable_##FIELDTYPE##_val();                       \
+      field->Reserve(field->size() + n);                                      \
+      return reinterpret_cast<FIELDTYPE*>(field->AddNAlreadyReserved(n));     \
+    }                                                                         \
+  }
+
+DEFINE_PROTO_HELPER(float, DT_FLOAT, float);
+DEFINE_PROTO_HELPER(double, DT_DOUBLE, double);
+DEFINE_PROTO_HELPER(int8, DT_INT8, int);
+DEFINE_PROTO_HELPER(uint8, DT_UINT8, int);
+DEFINE_PROTO_HELPER(int16, DT_INT16, int);
+DEFINE_PROTO_HELPER(uint16, DT_UINT16, int);
+DEFINE_PROTO_HELPER(int32, DT_INT32, int);
+DEFINE_PROTO_HELPER(uint32, DT_UINT32, uint32);
+DEFINE_PROTO_HELPER(int64, DT_INT64, int64);
+DEFINE_PROTO_HELPER(uint64, DT_UINT64, uint64);
+DEFINE_PROTO_HELPER(bool, DT_BOOL, bool);
+
+#undef DEFINE_PROTO_HELPER
+
 template <>
 class TensorProtoHelper<string> : public std::true_type {
  public:
@@ -74,70 +124,14 @@
   static void AddValue(const string& value, TensorProto* proto) {
     *proto->mutable_string_val()->Add() = value;
   }
-};
-
-template <>
-class TensorProtoHelper<int32> : public std::true_type {
- public:
-  static DataType GetDataType() { return DataType::DT_INT32; }
-  static void AddValue(int32 value, TensorProto* proto) {
-    proto->mutable_int_val()->Add(value);
+  template <typename IterType>
+  static void AddValues(IterType begin, IterType end, TensorProto* proto) {
+    for (IterType it = begin; it != end; ++it) {
+      AddValue(*it, proto);
+    }
   }
 };
 
-template <>
-class TensorProtoHelper<int64> : public std::true_type {
- public:
-  static DataType GetDataType() { return DataType::DT_INT64; }
-  static void AddValue(int64 value, TensorProto* proto) {
-    proto->mutable_int64_val()->Add(value);
-  }
-};
-
-template <>
-class TensorProtoHelper<uint32> : public std::true_type {
- public:
-  static DataType GetDataType() { return DataType::DT_UINT32; }
-  static void AddValue(uint32 value, TensorProto* proto) {
-    proto->mutable_uint32_val()->Add(value);
-  }
-};
-
-template <>
-class TensorProtoHelper<uint64> : public std::true_type {
- public:
-  static DataType GetDataType() { return DataType::DT_UINT64; }
-  static void AddValue(uint64 value, TensorProto* proto) {
-    proto->mutable_uint64_val()->Add(value);
-  }
-};
-
-template <>
-class TensorProtoHelper<float> : public std::true_type {
- public:
-  static DataType GetDataType() { return DataType::DT_FLOAT; }
-  static void AddValue(float value, TensorProto* proto) {
-    proto->mutable_float_val()->Add(value);
-  }
-};
-
-template <>
-class TensorProtoHelper<double> : public std::true_type {
- public:
-  static DataType GetDataType() { return DataType::DT_DOUBLE; }
-  static void AddValue(double value, TensorProto* proto) {
-    proto->mutable_double_val()->Add(value);
-  }
-};
-
-template <>
-class TensorProtoHelper<bool> : public std::true_type {
- public:
-  static DataType GetDataType() { return DataType::DT_BOOL; }
-  static void AddValue(bool value, TensorProto* proto) {
-    proto->mutable_bool_val()->Add(value);
-  }
-};
 }  // namespace internal
 
 // Creates a 'TensorProto' with specified shape and values.
@@ -152,9 +146,7 @@
   using TypeHelper = internal::TensorProtoHelper<Type>;
   tensor.set_dtype(TypeHelper::GetDataType());
   internal::SetTensorProtoShape(shape, tensor.mutable_tensor_shape());
-  for (const auto& value : values) {
-    TypeHelper::AddValue(value, &tensor);
-  }
+  TypeHelper::AddValues(values.begin(), values.end(), &tensor);
   return tensor;
 }
 

diff --git a/tensorflow/core/framework/tensor_util_test.cc b/tensorflow/core/framework/tensor_util_test.cc
index 2b4e1ca..c8545bc 100644
--- a/tensorflow/core/framework/tensor_util_test.cc
+++ b/tensorflow/core/framework/tensor_util_test.cc

@@ -18,6 +18,9 @@
 #include <vector>
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/framework/variant_encode_decode.h"
+#include "tensorflow/core/framework/variant_tensor_data.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -145,6 +148,68 @@
   }
 }
 
+TEST(TensorUtil, DeepCopySliceString) {
+  Tensor x(DT_STRING, TensorShape({10}));
+  x.flat<string>().setConstant("hello");
+
+  // Slice 'x' -- y still refers to the same buffer.
+  Tensor y = x.Slice(3, 7);
+
+  // Do a deep copy of y, which is a slice.
+  Tensor z = tensor::DeepCopy(y);
+
+  // Set x to be different.
+  x.flat<string>().setConstant("goodbye");
+
+  EXPECT_EQ(TensorShape({10}), x.shape());
+  EXPECT_EQ(TensorShape({4}), y.shape());
+  EXPECT_EQ(TensorShape({4}), z.shape());
+  EXPECT_EQ(DT_STRING, x.dtype());
+  EXPECT_EQ(DT_STRING, y.dtype());
+  EXPECT_EQ(DT_STRING, z.dtype());
+
+  // x and y should now all be 'goodbye', but z should be 'hello'.
+  for (int i = 0; i < 10; ++i) {
+    EXPECT_EQ("goodbye", x.flat<string>()(i));
+  }
+  for (int i = 0; i < 4; ++i) {
+    EXPECT_EQ("goodbye", y.unaligned_flat<string>()(i));
+    EXPECT_EQ("hello", z.flat<string>()(i));
+  }
+}
+
+TEST(TensorUtil, DeepCopySliceVariant) {
+  Tensor x(DT_VARIANT, TensorShape({10}));
+  x.flat<Variant>().setConstant(Tensor(42.0f));
+
+  // Slice 'x' -- y still refers to the same buffer.
+  Tensor y = x.Slice(3, 7);
+
+  // Do a deep copy of y, which is a slice.
+  Tensor z = tensor::DeepCopy(y);
+
+  // Set x to be different.
+  x.flat<Variant>().setConstant(Tensor("foo"));
+
+  EXPECT_EQ(TensorShape({10}), x.shape());
+  EXPECT_EQ(TensorShape({4}), y.shape());
+  EXPECT_EQ(TensorShape({4}), z.shape());
+  EXPECT_EQ(DT_VARIANT, x.dtype());
+  EXPECT_EQ(DT_VARIANT, y.dtype());
+  EXPECT_EQ(DT_VARIANT, z.dtype());
+
+  // Each element of x and y should now be a DT_STRING Tensor containing "foo",
+  // but each element of z should be a DT_FLOAT tensor containing 42.0.
+  for (int i = 0; i < 10; ++i) {
+    EXPECT_EQ("foo", x.flat<Variant>()(i).get<Tensor>()->scalar<string>()());
+  }
+  for (int i = 0; i < 4; ++i) {
+    EXPECT_EQ("foo",
+              y.unaligned_flat<Variant>()(i).get<Tensor>()->scalar<string>()());
+    EXPECT_EQ(42.0, z.flat<Variant>()(i).get<Tensor>()->scalar<float>()());
+  }
+}
+
 TEST(TensorUtil, Concat) {
   std::vector<int64> sizes = {1, 4, 5};
   std::vector<Tensor> to_concat;

diff --git a/tensorflow/core/framework/tracking_allocator.cc b/tensorflow/core/framework/tracking_allocator.cc
index 2df4025..ff454f5 100644
--- a/tensorflow/core/framework/tracking_allocator.cc
+++ b/tensorflow/core/framework/tracking_allocator.cc

@@ -152,8 +152,8 @@
   }
 }
 
-void TrackingAllocator::GetStats(AllocatorStats* stats) {
-  allocator_->GetStats(stats);
+absl::optional<AllocatorStats> TrackingAllocator::GetStats() {
+  return allocator_->GetStats();
 }
 
 void TrackingAllocator::ClearStats() { allocator_->ClearStats(); }

diff --git a/tensorflow/core/framework/tracking_allocator.h b/tensorflow/core/framework/tracking_allocator.h
index 5eafce6..3b45d1c 100644
--- a/tensorflow/core/framework/tracking_allocator.h
+++ b/tensorflow/core/framework/tracking_allocator.h

@@ -66,7 +66,7 @@
   size_t RequestedSize(const void* ptr) override;
   size_t AllocatedSize(const void* ptr) override;
   int64 AllocationId(const void* ptr) override;
-  void GetStats(AllocatorStats* stats) override;
+  absl::optional<AllocatorStats> GetStats() override;
   void ClearStats() override;
 
   // If the underlying allocator tracks allocation sizes, this returns

diff --git a/tensorflow/core/framework/tracking_allocator_test.cc b/tensorflow/core/framework/tracking_allocator_test.cc
index 2cdc7ed..554af60 100644
--- a/tensorflow/core/framework/tracking_allocator_test.cc
+++ b/tensorflow/core/framework/tracking_allocator_test.cc

@@ -44,7 +44,7 @@
     EXPECT_NE(size_map_.end(), iter);
     return iter->second;
   }
-  void GetStats(AllocatorStats* stats) override { stats->Clear(); }
+  absl::optional<AllocatorStats> GetStats() override { return absl::nullopt; }
 
  private:
   std::unordered_map<const void*, size_t> size_map_;
@@ -58,7 +58,7 @@
   }
   void DeallocateRaw(void* ptr) override {}
   bool TracksAllocationSizes() override { return true; }
-  void GetStats(AllocatorStats* stats) override { stats->Clear(); }
+  absl::optional<AllocatorStats> GetStats() override { return absl::nullopt; }
 };
 
 TEST(TrackingAllocatorTest, SimpleNoTracking) {

diff --git a/tensorflow/core/framework/variant_tensor_data.cc b/tensorflow/core/framework/variant_tensor_data.cc
index c169e86..993a898 100644
--- a/tensorflow/core/framework/variant_tensor_data.cc
+++ b/tensorflow/core/framework/variant_tensor_data.cc

@@ -20,14 +20,10 @@
 
 namespace tensorflow {
 
-VariantTensorData::VariantTensorData() {}
-
 VariantTensorData::VariantTensorData(VariantTensorDataProto proto) {
   FromProto(std::move(proto));
 }
 
-VariantTensorData::~VariantTensorData() {}
-
 int VariantTensorData::tensors_size() const { return tensors_.size(); }
 
 const Tensor& VariantTensorData::tensors(int index) const {

diff --git a/tensorflow/core/framework/variant_tensor_data.h b/tensorflow/core/framework/variant_tensor_data.h
index ca99e83..d98cf6b 100644
--- a/tensorflow/core/framework/variant_tensor_data.h
+++ b/tensorflow/core/framework/variant_tensor_data.h

@@ -37,11 +37,11 @@
 // separate so that kernels do not need to depend on protos.
 class VariantTensorData {
  public:
-  VariantTensorData();
+  VariantTensorData() = default;
+
   // TODO(b/118823936): This silently returns if the proto is invalid.
   // Consider calling FromProto explicitly instead.
   VariantTensorData(VariantTensorDataProto proto);
-  ~VariantTensorData();
 
   // Name of the type of objects being serialized.
   const string& type_name() const { return type_name_; }

diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 00d3549..3ea222c 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc

@@ -85,10 +85,6 @@
         {"CollectiveBcastSend", NC_COLLECTIVE},
         {"CollectiveBcastRecv", NC_COLLECTIVE},
         {"FakeParam", NC_FAKE_PARAM},
-        {"IteratorGetNext", NC_DATASET},
-        {"IteratorGetNextSync", NC_DATASET},
-        {"DatasetToSingleElement", NC_DATASET},
-        {"ReduceDataset", NC_DATASET},
     });
 
 #undef REF_CLASS

diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index f65e4b9..289a3d2 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h

@@ -174,8 +174,6 @@
   bool IsMetadata() const { return class_ == NC_METADATA; }
   bool IsFakeParam() const { return class_ == NC_FAKE_PARAM; }
 
-  bool IsDataset() const { return class_ == NC_DATASET; }
-
   template <typename T>
   void AddAttr(const string& name, const T& val) {
     SetAttrValue(val, AddAttrHelper(name));
@@ -256,7 +254,6 @@
     NC_SCOPED_ALLOCATOR,
     NC_COLLECTIVE,
     NC_FAKE_PARAM,
-    NC_DATASET,
     NC_OTHER  // Not a special kind of node
   };
 

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index e934978..d5dcd16 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc

@@ -291,10 +291,6 @@
     csinfo_.pad = "Pad";
     csinfo_.pad_with_conv2d = "__MklDummyPadWithConv2D";
     csinfo_.pad_with_fused_conv2d = "__MklDummyPadWithFusedConv2D";
-// Temporarily don't convert quantized operators into MKL versions for now.
-// TODO(Intel-tf) Once all the relevant PRs have been merged then remove
-// the ifdef.
-#ifdef INTEL_MKL_QUANTIZED
     csinfo_.quantized_avg_pool = "QuantizedAvgPool";
     csinfo_.quantized_concatv2 = "QuantizedConcatV2";
     csinfo_.quantized_conv2d = "QuantizedConv2D";
@@ -316,14 +312,11 @@
         "QuantizedConv2DWithBiasSumAndReluAndRequantize";
     csinfo_.quant_conv2d_with_bias_signed_sum_and_relu_and_requantize =
         "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize";
-#endif
     csinfo_.relu = "Relu";
     csinfo_.relu_grad = "ReluGrad";
     csinfo_.relu6 = "Relu6";
     csinfo_.relu6_grad = "Relu6Grad";
-#ifdef INTEL_MKL_QUANTIZED
     csinfo_.requantize = "Requantize";
-#endif
     csinfo_.tanh = "Tanh";
     csinfo_.tanh_grad = "TanhGrad";
     csinfo_.reshape = "Reshape";
@@ -443,7 +436,6 @@
     rinfo_.push_back({csinfo_.pad_with_fused_conv2d,
                       csinfo_.mkl_pad_with_fused_conv2d,
                       CopyAttrsPadWithFusedConv2D, AlwaysRewrite});
-#ifdef INTEL_MKL_QUANTIZED
     rinfo_.push_back({csinfo_.quantized_avg_pool,
                       mkl_op_registry::GetMklOpName(csinfo_.quantized_avg_pool),
                       CopyAttrsQuantizedPooling, AlwaysRewrite});
@@ -499,7 +491,6 @@
          mkl_op_registry::GetMklOpName(
              csinfo_.quant_conv2d_with_bias_signed_sum_and_relu_and_requantize),
          CopyAttrsQuantizedConv2D, AlwaysRewrite});
-#endif
     rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu),
                       CopyAttrsDataType, AlwaysRewrite});
     rinfo_.push_back({csinfo_.relu_grad,
@@ -511,11 +502,9 @@
     rinfo_.push_back({csinfo_.relu6_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.relu6_grad),
                       CopyAttrsDataType, AlwaysRewrite});
-#ifdef INTEL_MKL_QUANTIZED
     rinfo_.push_back({csinfo_.requantize,
                       mkl_op_registry::GetMklOpName(csinfo_.requantize),
                       CopyAttrsRequantize, AlwaysRewrite});
-#endif
     /*
     rinfo_.push_back({csinfo_.tanh,
                       mkl_op_registry::GetMklOpName(csinfo_.tanh),
@@ -3187,9 +3176,7 @@
   // Set the Mkl layer label for this op.
   if (DataTypeIsQuantized(orig_node->input_type(0)) ||
       DataTypeIsQuantized(orig_node->output_type(0))) {
-#ifdef INTEL_MKL_QUANTIZED
     nb.Attr("_kernel", mkl_op_registry::kMklQuantizedOpLabel);
-#endif
   } else {
     nb.Attr("_kernel", mkl_op_registry::kMklOpLabel);
   }
@@ -3243,7 +3230,6 @@
 // Current implementation reflects only QuantizedConv2D and its fused Ops.
 const MklLayoutRewritePass::RewriteInfo*
 MklLayoutRewritePass::CheckForQuantizedNodeRewrite(const Node* n) const {
-#ifdef INTEL_MKL_QUANTIZED
   DataType Tinput, Tfilter;
   if (!(GetNodeAttr(n->def(), "Tinput", &Tinput).ok() &&
         GetNodeAttr(n->def(), "Tfilter", &Tfilter).ok())) {
@@ -3257,7 +3243,6 @@
       }
     }
   }
-#endif
   return nullptr;
 }
 

diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index 9fe6993..7730770 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD

@@ -1,7 +1,6 @@
 licenses(["notice"])  # Apache 2.0
 
-load("//tensorflow:tensorflow.bzl", "tf_cc_test")
-load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
+load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_cuda_library")
 
 cc_library(
     name = "op_types",
@@ -45,6 +44,7 @@
         "//tensorflow/core:tensor_testutil",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -71,7 +71,6 @@
     deps = [
         ":graph_view",
         "//tensorflow/core:graph",
-        "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",

diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc
index e4f6bf7..e7c72b8 100644
--- a/tensorflow/core/grappler/clusters/single_machine.cc
+++ b/tensorflow/core/grappler/clusters/single_machine.cc

@@ -227,14 +227,14 @@
 
   device_peak_memory->clear();
   for (Device* device : devices) {
-    AllocatorStats stats;
     auto* allocator = device->GetAllocator(AllocatorAttributes());
     if (!allocator->TracksAllocationSizes()) {
       return Status(error::INVALID_ARGUMENT,
                     "Tracking allocation is not enabled.");
     }
-    allocator->GetStats(&stats);
-    (*device_peak_memory)[device->name()] = stats.max_bytes_in_use;
+    absl::optional<AllocatorStats> stats = allocator->GetStats();
+    (*device_peak_memory)[device->name()] =
+        (stats ? stats->peak_bytes_in_use : 0);
   }
 
   return Status::OK();

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index 8ec558b..6907988 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc

@@ -481,6 +481,7 @@
           DT_QINT8,
           DT_QUINT8,
           DT_QINT16,
+          DT_QUINT16,
           DT_QINT32,
           // Bool.
           DT_BOOL,

diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index 1e2e160..11877d8 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc

@@ -64,6 +64,7 @@
 constexpr char kAvgPoolGrad[] = "AvgPoolGrad";
 constexpr char kFusedBatchNorm[] = "FusedBatchNorm";
 constexpr char kFusedBatchNormGrad[] = "FusedBatchNormGrad";
+constexpr char kQuantizedMatMul[] = "QuantizedMatMul";
 constexpr char kQuantizedMatMulV2[] = "QuantizedMatMulV2";
 // Persistent ops.
 constexpr char kConst[] = "Const";
@@ -243,6 +244,7 @@
       {kMatMul, wrap(&OpLevelCostEstimator::PredictMatMul)},
       {kSparseMatMul, wrap(&OpLevelCostEstimator::PredictMatMul)},
       {kBatchMatMul, wrap(&OpLevelCostEstimator::PredictBatchMatMul)},
+      {kQuantizedMatMul, wrap(&OpLevelCostEstimator::PredictMatMul)},
       {kQuantizedMatMulV2, wrap(&OpLevelCostEstimator::PredictMatMul)},
 
       {kNoOp, wrap(&OpLevelCostEstimator::PredictNoOp)},

diff --git a/tensorflow/core/grappler/mutable_graph_view.cc b/tensorflow/core/grappler/mutable_graph_view.cc
index 64098e7..6a5e60e 100644
--- a/tensorflow/core/grappler/mutable_graph_view.cc
+++ b/tensorflow/core/grappler/mutable_graph_view.cc

@@ -106,6 +106,146 @@
   return false;
 }
 
+using FanoutsMap =
+    absl::flat_hash_map<MutableGraphView::OutputPort,
+                        absl::flat_hash_set<MutableGraphView::InputPort>>;
+
+void SwapControlledFanoutInputs(const MutableGraphView& graph,
+                                const FanoutsMap::iterator& control_fanouts,
+                                absl::string_view to_node_name) {
+  absl::string_view from_node_name(control_fanouts->first.node->name());
+  string control = TensorIdToString({to_node_name, Graph::kControlSlot});
+  for (const auto& control_fanout : control_fanouts->second) {
+    const int start = graph.NumFanins(*control_fanout.node,
+                                      /*include_controlling_nodes=*/false);
+    for (int i = start; i < control_fanout.node->input_size(); ++i) {
+      TensorId tensor_id = ParseTensorName(control_fanout.node->input(i));
+      if (tensor_id.node() == from_node_name) {
+        control_fanout.node->set_input(i, control);
+        break;
+      }
+    }
+  }
+}
+
+void SwapRegularFanoutInputs(FanoutsMap* fanouts, NodeDef* from_node,
+                             absl::string_view to_node_name, int max_port) {
+  MutableGraphView::OutputPort port;
+  port.node = from_node;
+  for (int i = 0; i <= max_port; ++i) {
+    port.port_id = i;
+    auto it = fanouts->find(port);
+    if (it == fanouts->end()) {
+      continue;
+    }
+    string input = TensorIdToString({to_node_name, i});
+    for (const auto& fanout : it->second) {
+      fanout.node->set_input(fanout.port_id, input);
+    }
+  }
+}
+
+using MaxOutputPortsMap = absl::flat_hash_map<const NodeDef*, int>;
+
+void SwapFanoutInputs(const MutableGraphView& graph, FanoutsMap* fanouts,
+                      MaxOutputPortsMap* max_output_ports, NodeDef* from_node,
+                      NodeDef* to_node) {
+  auto from_control_fanouts = fanouts->find({from_node, Graph::kControlSlot});
+  if (from_control_fanouts != fanouts->end()) {
+    SwapControlledFanoutInputs(graph, from_control_fanouts, to_node->name());
+  }
+  auto to_control_fanouts = fanouts->find({to_node, Graph::kControlSlot});
+  if (to_control_fanouts != fanouts->end()) {
+    SwapControlledFanoutInputs(graph, to_control_fanouts, from_node->name());
+  }
+  auto from_max_port = max_output_ports->find(from_node);
+  if (from_max_port != max_output_ports->end()) {
+    SwapRegularFanoutInputs(fanouts, from_node, to_node->name(),
+                            from_max_port->second);
+  }
+  auto to_max_port = max_output_ports->find(to_node);
+  if (to_max_port != max_output_ports->end()) {
+    SwapRegularFanoutInputs(fanouts, to_node, from_node->name(),
+                            to_max_port->second);
+  }
+}
+
+void SwapFanoutsMapValues(FanoutsMap* fanouts,
+                          const MutableGraphView::OutputPort& from_port,
+                          const FanoutsMap::iterator& from_fanouts,
+                          const MutableGraphView::OutputPort& to_port,
+                          const FanoutsMap::iterator& to_fanouts) {
+  const bool from_exists = from_fanouts != fanouts->end();
+  const bool to_exists = to_fanouts != fanouts->end();
+
+  if (from_exists && to_exists) {
+    std::swap(from_fanouts->second, to_fanouts->second);
+  } else if (from_exists) {
+    fanouts->emplace(to_port, std::move(from_fanouts->second));
+    fanouts->erase(from_port);
+  } else if (to_exists) {
+    fanouts->emplace(from_port, std::move(to_fanouts->second));
+    fanouts->erase(to_port);
+  }
+}
+
+void SwapRegularFanoutsAndMaxPortValues(FanoutsMap* fanouts,
+                                        MaxOutputPortsMap* max_output_ports,
+                                        NodeDef* from_node, NodeDef* to_node) {
+  auto from_max_port = max_output_ports->find(from_node);
+  auto to_max_port = max_output_ports->find(to_node);
+  bool from_exists = from_max_port != max_output_ports->end();
+  bool to_exists = to_max_port != max_output_ports->end();
+
+  auto forward_fanouts = [fanouts](NodeDef* from, NodeDef* to, int start,
+                                   int end) {
+    for (int i = start; i <= end; ++i) {
+      MutableGraphView::OutputPort from_port(from, i);
+      auto from_fanouts = fanouts->find(from_port);
+      if (from_fanouts != fanouts->end()) {
+        MutableGraphView::OutputPort to_port(to, i);
+        fanouts->emplace(to_port, std::move(from_fanouts->second));
+        fanouts->erase(from_port);
+      }
+    }
+  };
+
+  if (from_exists && to_exists) {
+    const int from = from_max_port->second;
+    const int to = to_max_port->second;
+    const int shared = std::min(from, to);
+    for (int i = 0; i <= shared; ++i) {
+      MutableGraphView::OutputPort from_port(from_node, i);
+      auto from_fanouts = fanouts->find(from_port);
+      MutableGraphView::OutputPort to_port(to_node, i);
+      auto to_fanouts = fanouts->find(to_port);
+      SwapFanoutsMapValues(fanouts, from_port, from_fanouts, to_port,
+                           to_fanouts);
+    }
+    if (to > from) {
+      forward_fanouts(to_node, from_node, shared + 1, to);
+    } else if (from > to) {
+      forward_fanouts(from_node, to_node, shared + 1, from);
+    }
+
+    std::swap(from_max_port->second, to_max_port->second);
+  } else if (from_exists) {
+    forward_fanouts(from_node, to_node, 0, from_max_port->second);
+
+    max_output_ports->emplace(to_node, from_max_port->second);
+    max_output_ports->erase(from_node);
+  } else if (to_exists) {
+    forward_fanouts(to_node, from_node, 0, to_max_port->second);
+
+    max_output_ports->emplace(from_node, to_max_port->second);
+    max_output_ports->erase(to_node);
+  }
+}
+
+bool HasFanoutValue(const FanoutsMap& fanouts, const FanoutsMap::iterator& it) {
+  return it != fanouts.end() && !it->second.empty();
+}
+
 Status MutationError(absl::string_view function_name, absl::string_view params,
                      absl::string_view msg) {
   return errors::InvalidArgument(absl::Substitute(
@@ -180,6 +320,12 @@
   return Status::OK();
 }
 
+string SwapNodeNamesSwitchControlErrorMsg(absl::string_view node_name) {
+  return absl::Substitute(
+      "can't swap node name '$0' as it will become a Switch control dependency",
+      node_name);
+}
+
 string GeneratedNameForIdentityConsumingSwitch(
     const MutableGraphView::OutputPort& fanin) {
   return AddPrefixToNodeName(
@@ -398,6 +544,182 @@
   return Status::OK();
 }
 
+Status MutableGraphView::UpdateNodeName(absl::string_view from_node_name,
+                                        absl::string_view to_node_name,
+                                        bool update_fanouts) {
+  auto error_status = [from_node_name, to_node_name,
+                       update_fanouts](absl::string_view msg) {
+    string params = absl::Substitute(
+        "from_node_name='$0', to_node_name='$1', update_fanouts=$2",
+        from_node_name, to_node_name, update_fanouts);
+    return MutationError("UpdateNodeName", params, msg);
+  };
+
+  NodeDef* node = GetNode(from_node_name);
+  TF_RETURN_IF_ERROR(CheckNodeExists(from_node_name, node, error_status));
+
+  if (node->name() == to_node_name) {
+    return Status::OK();
+  }
+  if (HasNode(to_node_name)) {
+    return error_status(
+        "can't update node name because new node name is in use");
+  }
+  auto max_output_port = max_regular_output_port().find(node);
+  const bool has_max_output_port =
+      max_output_port != max_regular_output_port().end();
+  auto control_fanouts = fanouts().find({node, Graph::kControlSlot});
+
+  if (update_fanouts) {
+    SwapControlledFanoutInputs(*this, control_fanouts, to_node_name);
+    if (has_max_output_port) {
+      SwapRegularFanoutInputs(&fanouts(), node, to_node_name,
+                              max_output_port->second);
+    }
+  } else if (has_max_output_port ||
+             HasFanoutValue(fanouts(), control_fanouts)) {
+    return error_status("can't update node name because node has fanouts");
+  }
+
+  nodes().erase(node->name());
+  node->set_name(string(to_node_name));
+  nodes().emplace(node->name(), node);
+  return Status::OK();
+}
+
+Status MutableGraphView::SwapNodeNames(absl::string_view from_node_name,
+                                       absl::string_view to_node_name,
+                                       bool update_fanouts) {
+  auto error_status = [from_node_name, to_node_name,
+                       update_fanouts](absl::string_view msg) {
+    string params = absl::Substitute(
+        "from_node_name='$0', to_node_name='$1', update_fanouts=$2",
+        from_node_name, to_node_name, update_fanouts);
+    return MutationError("SwapNodeNames", params, msg);
+  };
+
+  NodeDef* from_node = GetNode(from_node_name);
+  TF_RETURN_IF_ERROR(CheckNodeExists(from_node_name, from_node, error_status));
+  if (from_node_name == to_node_name) {
+    return Status::OK();
+  }
+  NodeDef* to_node = GetNode(to_node_name);
+  TF_RETURN_IF_ERROR(CheckNodeExists(to_node_name, to_node, error_status));
+
+  auto swap_names = [this, from_node, to_node]() {
+    nodes().erase(from_node->name());
+    nodes().erase(to_node->name());
+    std::swap(*from_node->mutable_name(), *to_node->mutable_name());
+    nodes().emplace(from_node->name(), from_node);
+    nodes().emplace(to_node->name(), to_node);
+  };
+
+  if (update_fanouts) {
+    SwapFanoutInputs(*this, &fanouts(), &max_regular_output_port(), from_node,
+                     to_node);
+    swap_names();
+    return Status::OK();
+  }
+
+  bool from_is_switch = IsSwitch(*from_node);
+  MutableGraphView::OutputPort to_control(to_node, Graph::kControlSlot);
+  auto to_control_fanouts = fanouts().find(to_control);
+  if (from_is_switch && HasFanoutValue(fanouts(), to_control_fanouts)) {
+    return error_status(SwapNodeNamesSwitchControlErrorMsg(from_node_name));
+  }
+
+  bool to_is_switch = IsSwitch(*to_node);
+  MutableGraphView::OutputPort from_control(from_node, Graph::kControlSlot);
+  auto from_control_fanouts = fanouts().find(from_control);
+  if (to_is_switch && HasFanoutValue(fanouts(), from_control_fanouts)) {
+    return error_status(SwapNodeNamesSwitchControlErrorMsg(to_node_name));
+  }
+
+  // Swap node names.
+  swap_names();
+
+  // Swap controlling fanouts.
+  SwapFanoutsMapValues(&fanouts(), from_control, from_control_fanouts,
+                       to_control, to_control_fanouts);
+
+  // Swap regular fanouts.
+  SwapRegularFanoutsAndMaxPortValues(&fanouts(), &max_regular_output_port(),
+                                     from_node, to_node);
+
+  // Update fanins to remove self loops.
+  auto update_fanins = [this](NodeDef* node, absl::string_view old_node_name) {
+    for (int i = 0; i < node->input_size(); ++i) {
+      TensorId tensor_id = ParseTensorName(node->input(i));
+      if (tensor_id.node() == node->name()) {
+        const int idx = tensor_id.index();
+        const int node_idx =
+            IsTensorIdControlling(tensor_id) ? Graph::kControlSlot : i;
+
+        MutableGraphView::OutputPort from_fanin(node, idx);
+        absl::flat_hash_set<InputPort>* from_fanouts = &fanouts()[from_fanin];
+        from_fanouts->erase({node, node_idx});
+        UpdateMaxRegularOutputPortForRemovedFanin(from_fanin, *from_fanouts);
+
+        MutableGraphView::OutputPort to_fanin(nodes().at(old_node_name), idx);
+        fanouts()[to_fanin].insert({node, node_idx});
+        UpdateMaxRegularOutputPortForAddedFanin(to_fanin);
+        node->set_input(i, TensorIdToString({old_node_name, idx}));
+      }
+    }
+  };
+  update_fanins(from_node, to_node->name());
+  update_fanins(to_node, from_node->name());
+
+  // Dedup control dependencies.
+  auto dedup_control_fanouts =
+      [this](NodeDef* node, const FanoutsMap::iterator& control_fanouts) {
+        if (CanDedupControlWithRegularInput(*this, *node) &&
+            control_fanouts != fanouts().end()) {
+          for (const auto& control_fanout : control_fanouts->second) {
+            if (HasRegularFaninNode(*this, *control_fanout.node,
+                                    node->name())) {
+              RemoveControllingFaninInternal(control_fanout.node, node);
+            }
+          }
+        }
+      };
+  auto dedup_switch_control = [this, dedup_control_fanouts](NodeDef* node) {
+    OutputPort port;
+    port.node = node;
+    const int max_port =
+        gtl::FindWithDefault(max_regular_output_port(), node, -1);
+    for (int i = 0; i <= max_port; ++i) {
+      port.port_id = i;
+      auto it = fanouts().find(port);
+      if (it == fanouts().end()) {
+        continue;
+      }
+      for (const auto& fanout : it->second) {
+        auto fanout_controls =
+            fanouts().find({fanout.node, Graph::kControlSlot});
+        dedup_control_fanouts(fanout.node, fanout_controls);
+      }
+    }
+  };
+
+  if (!from_is_switch) {
+    if (to_is_switch) {
+      dedup_switch_control(from_node);
+    } else {
+      dedup_control_fanouts(from_node, from_control_fanouts);
+    }
+  }
+  if (!to_is_switch) {
+    if (from_is_switch) {
+      dedup_switch_control(to_node);
+    } else {
+      dedup_control_fanouts(to_node, to_control_fanouts);
+    }
+  }
+
+  return Status::OK();
+}
+
 Status MutableGraphView::UpdateFanouts(absl::string_view from_node_name,
                                        absl::string_view to_node_name) {
   NodeDef* from_node = GetNode(from_node_name);
@@ -625,9 +947,9 @@
   if (!IsSwitch(*fanin.node)) {
     return fanin.node;
   } else {
-    TensorId tensor_id(fanin.node->name(), fanin.port_id);
     if (IsOutputPortControlling(fanin)) {
       // Can't add a Switch node control dependency.
+      TensorId tensor_id(fanin.node->name(), fanin.port_id);
       *error_msg = absl::Substitute(
           "can't add fanin '$0' as it will become a Switch control dependency",
           tensor_id.ToString());
@@ -639,18 +961,15 @@
     // dependency is only triggered when the corresponding output is triggered.
     // We start by looking for an identity node connected to the output of the
     // switch node, and use it to anchor the control dependency.
-    auto fanouts = GetFanouts(*fanin.node, /*include_controlled_nodes=*/false);
-    for (auto fanout : fanouts) {
+    for (const auto& fanout : GetFanout(fanin)) {
       if (IsIdentity(*fanout.node) || IsIdentityNSingleInput(*fanout.node)) {
-        if (ParseTensorName(fanout.node->input(0)) == tensor_id) {
-          if (fanout.node->name() == node_name) {
-            *error_msg =
-                absl::Substitute("can't add found fanin '$0' to self",
-                                 AsControlDependency(fanout.node->name()));
-            return nullptr;
-          }
-          return fanout.node;
+        if (fanout.node->name() == node_name) {
+          *error_msg =
+              absl::Substitute("can't add found fanin '$0' to self",
+                               AsControlDependency(fanout.node->name()));
+          return nullptr;
         }
+        return fanout.node;
       }
     }
 

diff --git a/tensorflow/core/grappler/mutable_graph_view.h b/tensorflow/core/grappler/mutable_graph_view.h
index 08a1cd1..a09c147 100644
--- a/tensorflow/core/grappler/mutable_graph_view.h
+++ b/tensorflow/core/grappler/mutable_graph_view.h

@@ -84,6 +84,40 @@
                     absl::string_view device,
                     absl::Span<const std::pair<string, AttrValue>> attrs);
 
+  // Updates node `from_node_name` name to `to_node_name`. If `to_node_name` is
+  // in use, node `from_node_name` does not exist, or node `from_node_name` has
+  // fanouts and `update_fanouts` is set to false, an error will be returned and
+  // nothing will be modified in the graph.
+  Status UpdateNodeName(absl::string_view from_node_name,
+                        absl::string_view to_node_name, bool update_fanouts);
+
+  // Swap node names `from_node_name` and `to_node_name`. Self loops of one node
+  // are removed by updating the inputs introducing self loops to use the other
+  // node's name. Setting `update_fanouts` to false will exclude other fanouts
+  // from having their inputs updated, but inputs introducing self loops will
+  // always be updated regardless of `update_fanouts.
+  //
+  // Example:
+  //   1. foo(other:3, bar:2, ^bar)
+  //   2. bar(foo:3, other:1, foo:1, ^foo)
+  //   3. other(foo:5, bar:6)
+  //
+  // After calling SwapNodeNames("foo", "bar", false):
+  //   1. bar(other:3, foo:2, ^foo)
+  //   2. foo(bar:3, other:1, bar:1, ^bar)
+  //   3. other(foo:5, bar:6)
+  //
+  // After calling SwapNodeNames("foo", "bar", true):
+  //   1. bar(other:3, foo:2, ^foo)
+  //   2. foo(bar:3, other:1, bar:1, ^bar)
+  //   3. other(bar:5, foo:6)
+  //
+  // If it is not possible to swap node names (i.e. nodes do not exist or Switch
+  // control dependency may be introduced), an error will be returned and
+  // nothing will be modified in the graph.
+  Status SwapNodeNames(absl::string_view from_node_name,
+                       absl::string_view to_node_name, bool update_fanouts);
+
   // Updates all fanouts (input ports fetching output tensors) from
   // `from_node_name` to the `to_node_name`, including control dependencies.
   //
@@ -92,7 +126,7 @@
   //   2. foo2(bar:1, other:1)
   //   3. foo3(other:2, ^bar)
   //
-  // After calling ForwardOutputs(bar, new_bar):
+  // After calling UpdateFanouts(bar, new_bar):
   //   1. foo1(new_bar:0, new_bar:1, other:0)
   //   2. foo2(new_bar:1, other:1)
   //   3. foo3(other:2, ^new_bar)
@@ -240,7 +274,7 @@
   //   2. foo2(bar:1, other:1)
   //   3. foo3(other:2, ^bar)
   //
-  // After calling ForwardOutputs(bar, new_bar):
+  // After calling UpdateFanouts(bar, new_bar):
   //   1. foo1(new_bar:0, new_bar:1, other:0)
   //   2. foo2(new_bar:1, other:1)
   //   3. foo3(other:2, ^new_bar)

diff --git a/tensorflow/core/grappler/mutable_graph_view_test.cc b/tensorflow/core/grappler/mutable_graph_view_test.cc
index 06333d3..07818d1 100644
--- a/tensorflow/core/grappler/mutable_graph_view_test.cc
+++ b/tensorflow/core/grappler/mutable_graph_view_test.cc

@@ -305,6 +305,450 @@
   CheckGraph(graph);
 }
 
+absl::flat_hash_map<string, std::vector<string>> GetNodeInputsFromGraph(
+    const GraphDef& graph, absl::string_view node_to_exclude) {
+  absl::flat_hash_map<string, std::vector<string>> node_inputs;
+  for (const auto& node : graph.node()) {
+    if (node.name() == node_to_exclude) {
+      continue;
+    }
+    node_inputs[node.name()] =
+        std::vector<string>(node.input().begin(), node.input().end());
+  }
+  return node_inputs;
+}
+
+void CheckUnmodifiedNodeFanins(
+    const GraphDef& graph, absl::string_view node_to_exclude,
+    const absl::flat_hash_map<string, std::vector<string>>&
+        unmodified_node_inputs) {
+  for (const auto& node : graph.node()) {
+    if (node.name() == node_to_exclude) {
+      continue;
+    }
+    auto it = unmodified_node_inputs.find(node.name());
+    ASSERT_NE(it, unmodified_node_inputs.end());
+    ASSERT_EQ(it->second.size(), node.input_size());
+    for (int i = 0; i < node.input_size(); ++i) {
+      EXPECT_EQ(node.input(i), it->second[i]);
+    }
+  }
+}
+
+void TestUpdateNodeName(absl::string_view from_node_name, bool node_exists,
+                        absl::string_view to_node_name, bool update_fanouts,
+                        bool success, const string& error_msg,
+                        absl::Span<const string> expected_fanins) {
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}), NDef("b", "NotImportant", {"a"}),
+       NDef("c", "NotImportant", {}, {})},
+      /*funcs=*/{});
+
+  MutableGraphView graph(&graph_def);
+
+  NodeDef* node = graph.GetNode(from_node_name);
+  if (node_exists) {
+    EXPECT_NE(node, nullptr);
+  } else {
+    EXPECT_EQ(node, nullptr);
+  }
+
+  absl::flat_hash_map<string, std::vector<string>> unmodified_node_inputs =
+      GetNodeInputsFromGraph(graph_def, from_node_name);
+
+  Status s = graph.UpdateNodeName(from_node_name, to_node_name, update_fanouts);
+  EXPECT_EQ(s.ok(), success);
+  string updated_node_name;
+  if (success) {
+    updated_node_name = string(to_node_name);
+  } else {
+    updated_node_name = string(from_node_name);
+    EXPECT_EQ(s.error_message(), error_msg);
+  }
+  if (node_exists) {
+    EXPECT_EQ(node->name(), updated_node_name);
+    CompareNodeFanins(graph, node, expected_fanins);
+  }
+
+  CheckUnmodifiedNodeFanins(graph_def, updated_node_name,
+                            unmodified_node_inputs);
+
+  CheckGraph(graph);
+}
+
+TEST(MutableGraphViewTest, UpdateNodeName) {
+  string error_msg;
+  // Node has no fanouts.
+  TestUpdateNodeName("b", /*node_exists=*/true, "d", /*update_fanouts=*/false,
+                     /*success=*/true, error_msg, {"a"});
+  // Node has fanouts and rename to self.
+  TestUpdateNodeName("b", /*node_exists=*/true, "b", /*update_fanouts=*/false,
+                     /*success=*/true, error_msg, {"a"});
+  // Node has no fanouts and rename to self.
+  TestUpdateNodeName("a", /*node_exists=*/true, "a", /*update_fanouts=*/false,
+                     /*success=*/true, error_msg, {});
+
+  // New node name is in use.
+  error_msg =
+      "MutableGraphView::UpdateNodeName(from_node_name='c', to_node_name='b', "
+      "update_fanouts=false) error: can't update node name because new node "
+      "name is in use.";
+  TestUpdateNodeName("c", /*node_exists=*/true, "b", /*update_fanouts=*/false,
+                     /*success=*/false, error_msg, {});
+  error_msg =
+      "MutableGraphView::UpdateNodeName(from_node_name='a', to_node_name='b', "
+      "update_fanouts=true) error: can't update node name because new node "
+      "name is in use.";
+  TestUpdateNodeName("a", /*node_exists=*/true, "b", /*update_fanouts=*/true,
+                     /*success=*/false, error_msg, {});
+  // Node has fanouts.
+  error_msg =
+      "MutableGraphView::UpdateNodeName(from_node_name='a', to_node_name='d', "
+      "update_fanouts=false) error: can't update node name because node has "
+      "fanouts.";
+  TestUpdateNodeName("a", /*node_exists=*/true, "d", /*update_fanouts=*/false,
+                     /*success=*/false, error_msg, {});
+  // Node does not exist.
+  error_msg =
+      "MutableGraphView::UpdateNodeName(from_node_name='d', to_node_name='e', "
+      "update_fanouts=false) error: node 'd' was not found.";
+  TestUpdateNodeName("d", /*node_exists=*/false, "e", /*update_fanouts=*/false,
+                     /*success=*/false, error_msg, {});
+  error_msg =
+      "MutableGraphView::UpdateNodeName(from_node_name='d', to_node_name='e', "
+      "update_fanouts=true) error: node 'd' was not found.";
+  TestUpdateNodeName("d", /*node_exists=*/false, "e", /*update_fanouts=*/true,
+                     /*success=*/false, error_msg, {});
+}
+
+TEST(MutableGraphViewTest, UpdateNodeNameWithFanouts) {
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}), NDef("b", "NotImportant", {"a:2"}),
+       NDef("c", "NotImportant", {"b", "^a"}),
+       NDef("d", "NotImportant", {"^b", "^a"}),
+       NDef("e", "NotImportant", {"b:2", "c:4", "b:1", "^a"})},
+      /*funcs=*/{});
+
+  MutableGraphView graph(&graph_def);
+
+  TF_EXPECT_OK(graph.UpdateNodeName("b", "f", /*update_fanouts=*/true));
+
+  CheckNode(graph, "a", "NotImportant", "", {}, {}, {"f", "^c", "^d", "^e"});
+  CheckNode(graph, "f", "NotImportant", "", {}, {"a:2"},
+            {"c", "^d", "e", "e:2"});
+  CheckNode(graph, "c", "NotImportant", "", {}, {"f", "^a"}, {"e:1"});
+  CheckNode(graph, "d", "NotImportant", "", {}, {"^f", "^a"}, {});
+  CheckNode(graph, "e", "NotImportant", "", {}, {"f:2", "c:4", "f:1", "^a"},
+            {});
+
+  CheckGraph(graph);
+}
+
+GraphDef SimpleSwapNodeNamesMutationGraph() {
+  return test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}), NDef("switch_1", "Switch", {"a"}),
+       NDef("identity_1", "Identity", {"switch_1:1"}),
+       NDef("b", "NotImportant", {}, {}), NDef("switch_2", "Switch", {"b"}),
+       NDef("identity_2", "Identity", {"switch_2:0"}),
+       NDef("foo_1", "NotImportant", {"identity_1", "^identity_1"}),
+       NDef("foo_2", "NotImportant", {"identity_2", "^identity_2"})},
+      /*funcs=*/{});
+}
+
+void TestSwapNodeNames(bool update_fanouts) {
+  GraphDef graph_def = SimpleSwapNodeNamesMutationGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  TF_EXPECT_OK(graph.SwapNodeNames("foo_1", "foo_2", update_fanouts));
+
+  CheckNode(graph, "a", "NotImportant", "", {}, {}, {"switch_1"});
+  CheckNode(graph, "switch_1", "Switch", "", {}, {"a"}, {"identity_1"});
+  CheckNode(graph, "identity_1", "Identity", "", {}, {"switch_1:1"},
+            {"foo_2", "^foo_2"});
+  CheckNode(graph, "b", "NotImportant", "", {}, {}, {"switch_2"});
+  CheckNode(graph, "switch_2", "Switch", "", {}, {"b"}, {"identity_2"});
+  CheckNode(graph, "identity_2", "Identity", "", {}, {"switch_2:0"},
+            {"foo_1", "^foo_1"});
+  CheckNode(graph, "foo_2", "NotImportant", "", {},
+            {"identity_1", "^identity_1"}, {});
+  CheckNode(graph, "foo_1", "NotImportant", "", {},
+            {"identity_2", "^identity_2"}, {});
+
+  CheckGraph(graph);
+}
+
+TEST(MutableGraphView, SwapNodeNames) {
+  TestSwapNodeNames(/*update_fanouts=*/false);
+  TestSwapNodeNames(/*update_fanouts=*/true);
+}
+
+void TestSwapNodeNamesWithSameNames(bool update_fanouts) {
+  GraphDef graph_def = SimpleSwapNodeNamesMutationGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  TF_EXPECT_OK(graph.SwapNodeNames("identity_1", "identity_1", update_fanouts));
+
+  // No changes to graph.
+  CheckNode(graph, "a", "NotImportant", "", {}, {}, {"switch_1"});
+  CheckNode(graph, "switch_1", "Switch", "", {}, {"a"}, {"identity_1"});
+  CheckNode(graph, "identity_1", "Identity", "", {}, {"switch_1:1"},
+            {"foo_1", "^foo_1"});
+  CheckNode(graph, "b", "NotImportant", "", {}, {}, {"switch_2"});
+  CheckNode(graph, "switch_2", "Switch", "", {}, {"b"}, {"identity_2"});
+  CheckNode(graph, "identity_2", "Identity", "", {}, {"switch_2:0"},
+            {"foo_2", "^foo_2"});
+  CheckNode(graph, "foo_1", "NotImportant", "", {},
+            {"identity_1", "^identity_1"}, {});
+  CheckNode(graph, "foo_2", "NotImportant", "", {},
+            {"identity_2", "^identity_2"}, {});
+
+  CheckGraph(graph);
+}
+
+TEST(MutableGraphView, SwapNodeNamesSameName) {
+  TestSwapNodeNamesWithSameNames(/*update_fanouts=*/false);
+  TestSwapNodeNamesWithSameNames(/*update_fanouts=*/true);
+}
+
+TEST(MutableGraphView, SwapNodeNamesBetweenSwitches) {
+  GraphDef graph_def = SimpleSwapNodeNamesMutationGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  TF_EXPECT_OK(
+      graph.SwapNodeNames("switch_1", "switch_2", /*update_fanouts=*/false));
+
+  CheckNode(graph, "a", "NotImportant", "", {}, {}, {"switch_2"});
+  CheckNode(graph, "switch_2", "Switch", "", {}, {"a"}, {"identity_2"});
+  CheckNode(graph, "identity_1", "Identity", "", {}, {"switch_1:1"},
+            {"foo_1", "^foo_1"});
+  CheckNode(graph, "b", "NotImportant", "", {}, {}, {"switch_1"});
+  CheckNode(graph, "switch_1", "Switch", "", {}, {"b"}, {"identity_1"});
+  CheckNode(graph, "identity_2", "Identity", "", {}, {"switch_2:0"},
+            {"foo_2", "^foo_2"});
+  CheckNode(graph, "foo_1", "NotImportant", "", {},
+            {"identity_1", "^identity_1"}, {});
+  CheckNode(graph, "foo_2", "NotImportant", "", {},
+            {"identity_2", "^identity_2"}, {});
+
+  CheckGraph(graph);
+}
+
+TEST(MutableGraphView, SwapNodeNamesBetweenSwitchesAndUpdateFanouts) {
+  GraphDef graph_def = SimpleSwapNodeNamesMutationGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  TF_EXPECT_OK(
+      graph.SwapNodeNames("switch_1", "switch_2", /*update_fanouts=*/true));
+
+  CheckNode(graph, "a", "NotImportant", "", {}, {}, {"switch_2"});
+  CheckNode(graph, "switch_2", "Switch", "", {}, {"a"}, {"identity_1"});
+  CheckNode(graph, "identity_1", "Identity", "", {}, {"switch_2:1"},
+            {"foo_1", "^foo_1"});
+  CheckNode(graph, "b", "NotImportant", "", {}, {}, {"switch_1"});
+  CheckNode(graph, "switch_1", "Switch", "", {}, {"b"}, {"identity_2"});
+  CheckNode(graph, "identity_2", "Identity", "", {}, {"switch_1:0"},
+            {"foo_2", "^foo_2"});
+  CheckNode(graph, "foo_1", "NotImportant", "", {},
+            {"identity_1", "^identity_1"}, {});
+  CheckNode(graph, "foo_2", "NotImportant", "", {},
+            {"identity_2", "^identity_2"}, {});
+
+  CheckGraph(graph);
+}
+
+TEST(MutableGraphView, SwapNodeNamesSwitchAndNonSwitch) {
+  GraphDef graph_def = SimpleSwapNodeNamesMutationGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  TF_EXPECT_OK(graph.SwapNodeNames("a", "switch_1", /*update_fanouts=*/false));
+
+  // Dedup controls and fix self loop.
+  CheckNode(graph, "switch_1", "NotImportant", "", {}, {}, {"a", "identity_1"});
+  CheckNode(graph, "a", "Switch", "", {}, {"switch_1"}, {});
+  CheckNode(graph, "identity_1", "Identity", "", {}, {"switch_1:1"}, {"foo_1"});
+  CheckNode(graph, "b", "NotImportant", "", {}, {}, {"switch_2"});
+  CheckNode(graph, "switch_2", "Switch", "", {}, {"b"}, {"identity_2"});
+  CheckNode(graph, "identity_2", "Identity", "", {}, {"switch_2:0"},
+            {"foo_2", "^foo_2"});
+  CheckNode(graph, "foo_1", "NotImportant", "", {}, {"identity_1"}, {});
+  CheckNode(graph, "foo_2", "NotImportant", "", {},
+            {"identity_2", "^identity_2"}, {});
+
+  CheckGraph(graph);
+}
+
+TEST(MutableGraphView, SwapNodeNamesSwitchAndNonSwitchAndUpdateFanouts) {
+  GraphDef graph_def = SimpleSwapNodeNamesMutationGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  TF_EXPECT_OK(graph.SwapNodeNames("a", "switch_1", /*update_fanouts=*/true));
+
+  CheckNode(graph, "switch_1", "NotImportant", "", {}, {}, {"a"});
+  CheckNode(graph, "a", "Switch", "", {}, {"switch_1"}, {"identity_1"});
+  CheckNode(graph, "identity_1", "Identity", "", {}, {"a:1"},
+            {"foo_1", "^foo_1"});
+  CheckNode(graph, "b", "NotImportant", "", {}, {}, {"switch_2"});
+  CheckNode(graph, "switch_2", "Switch", "", {}, {"b"}, {"identity_2"});
+  CheckNode(graph, "identity_2", "Identity", "", {}, {"switch_2:0"},
+            {"foo_2", "^foo_2"});
+  CheckNode(graph, "foo_1", "NotImportant", "", {},
+            {"identity_1", "^identity_1"}, {});
+  CheckNode(graph, "foo_2", "NotImportant", "", {},
+            {"identity_2", "^identity_2"}, {});
+
+  CheckGraph(graph);
+}
+
+TEST(MutableGraphView, SwapNodeNamesNonSwitchAndSwitch) {
+  GraphDef graph_def = SimpleSwapNodeNamesMutationGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  TF_EXPECT_OK(graph.SwapNodeNames("switch_2", "b", /*update_fanouts=*/false));
+
+  // Dedup controls and fix self loop.
+  CheckNode(graph, "a", "NotImportant", "", {}, {}, {"switch_1"});
+  CheckNode(graph, "switch_1", "Switch", "", {}, {"a"}, {"identity_1"});
+  CheckNode(graph, "identity_1", "Identity", "", {}, {"switch_1:1"},
+            {"foo_1", "^foo_1"});
+  CheckNode(graph, "switch_2", "NotImportant", "", {}, {}, {"b", "identity_2"});
+  CheckNode(graph, "b", "Switch", "", {}, {"switch_2"}, {});
+  CheckNode(graph, "identity_2", "Identity", "", {}, {"switch_2:0"}, {"foo_2"});
+  CheckNode(graph, "foo_1", "NotImportant", "", {},
+            {"identity_1", "^identity_1"}, {});
+  CheckNode(graph, "foo_2", "NotImportant", "", {}, {"identity_2"}, {});
+
+  CheckGraph(graph);
+}
+
+TEST(MutableGraphView, SwapNodeNamesNonSwitchAndSwitchAndUpdateFanouts) {
+  GraphDef graph_def = SimpleSwapNodeNamesMutationGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  TF_EXPECT_OK(graph.SwapNodeNames("switch_2", "b", /*update_fanouts=*/true));
+
+  CheckNode(graph, "a", "NotImportant", "", {}, {}, {"switch_1"});
+  CheckNode(graph, "switch_1", "Switch", "", {}, {"a"}, {"identity_1"});
+  CheckNode(graph, "identity_1", "Identity", "", {}, {"switch_1:1"},
+            {"foo_1", "^foo_1"});
+  CheckNode(graph, "switch_2", "NotImportant", "", {}, {}, {"b"});
+  CheckNode(graph, "b", "Switch", "", {}, {"switch_2"}, {"identity_2"});
+  CheckNode(graph, "identity_2", "Identity", "", {}, {"b:0"},
+            {"foo_2", "^foo_2"});
+  CheckNode(graph, "foo_1", "NotImportant", "", {},
+            {"identity_1", "^identity_1"}, {});
+  CheckNode(graph, "foo_2", "NotImportant", "", {},
+            {"identity_2", "^identity_2"}, {});
+
+  CheckGraph(graph);
+}
+
+void TestSwapNodeNamesSimpleSelfLoop(bool update_fanouts) {
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {"b:7"}), NDef("b", "NotImportant", {"a:10"})},
+      /*funcs=*/{});
+
+  MutableGraphView graph(&graph_def);
+
+  TF_EXPECT_OK(graph.SwapNodeNames("a", "b", update_fanouts));
+
+  // No self loops.
+  CheckNode(graph, "a", "NotImportant", "", {}, {"b:10"}, {"b:0"});
+  CheckNode(graph, "b", "NotImportant", "", {}, {"a:7"}, {"a:0"});
+
+  CheckGraph(graph);
+}
+
+TEST(MutableGraphView, SwapNodeNamesSelfLoops) {
+  TestSwapNodeNamesSimpleSelfLoop(/*update_fanouts=*/false);
+  TestSwapNodeNamesSimpleSelfLoop(/*update_fanouts=*/true);
+}
+
+void TestSwapNodeNamesError(absl::string_view from_node_name,
+                            absl::string_view to_node_name, bool update_fanouts,
+                            const string& error_msg) {
+  GraphDef graph_def = SimpleSwapNodeNamesMutationGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  Status s = graph.SwapNodeNames(from_node_name, to_node_name, update_fanouts);
+  EXPECT_EQ(s.ok(), false);
+  EXPECT_EQ(s.error_message(), error_msg);
+
+  // No changes to graph.
+  CheckNode(graph, "a", "NotImportant", "", {}, {}, {"switch_1"});
+  CheckNode(graph, "switch_1", "Switch", "", {}, {"a"}, {"identity_1"});
+  CheckNode(graph, "identity_1", "Identity", "", {}, {"switch_1:1"},
+            {"foo_1", "^foo_1"});
+  CheckNode(graph, "b", "NotImportant", "", {}, {}, {"switch_2"});
+  CheckNode(graph, "switch_2", "Switch", "", {}, {"b"}, {"identity_2"});
+  CheckNode(graph, "identity_2", "Identity", "", {}, {"switch_2:0"},
+            {"foo_2", "^foo_2"});
+  CheckNode(graph, "foo_1", "NotImportant", "", {},
+            {"identity_1", "^identity_1"}, {});
+  CheckNode(graph, "foo_2", "NotImportant", "", {},
+            {"identity_2", "^identity_2"}, {});
+
+  CheckGraph(graph);
+}
+
+// TODO(lyandy): add tests with update_fanouts == true.
+TEST(MutableGraphView, SwapNodeNamesError) {
+  string error_msg;
+  // Missing nodes.
+  error_msg =
+      "MutableGraphView::SwapNodeNames(from_node_name='foo_3', "
+      "to_node_name='foo_2', update_fanouts=false) error: node 'foo_3' was not "
+      "found.";
+  TestSwapNodeNamesError("foo_3", "foo_2", /*update_fanouts=*/false, error_msg);
+  error_msg =
+      "MutableGraphView::SwapNodeNames(from_node_name='foo_3', "
+      "to_node_name='foo_2', update_fanouts=true) error: node 'foo_3' was not "
+      "found.";
+  TestSwapNodeNamesError("foo_3", "foo_2", /*update_fanouts=*/true, error_msg);
+  error_msg =
+      "MutableGraphView::SwapNodeNames(from_node_name='foo_1', "
+      "to_node_name='foo_4', update_fanouts=false) error: node 'foo_4' was not "
+      "found.";
+  TestSwapNodeNamesError("foo_1", "foo_4", /*update_fanouts=*/false, error_msg);
+  error_msg =
+      "MutableGraphView::SwapNodeNames(from_node_name='foo_1', "
+      "to_node_name='foo_4', update_fanouts=true) error: node 'foo_4' was not "
+      "found.";
+  TestSwapNodeNamesError("foo_1", "foo_4", /*update_fanouts=*/true, error_msg);
+  error_msg =
+      "MutableGraphView::SwapNodeNames(from_node_name='foo_5', "
+      "to_node_name='foo_6', update_fanouts=false) error: node 'foo_5' was not "
+      "found.";
+  TestSwapNodeNamesError("foo_5", "foo_6", /*update_fanouts=*/false, error_msg);
+  error_msg =
+      "MutableGraphView::SwapNodeNames(from_node_name='foo_5', "
+      "to_node_name='foo_6', update_fanouts=true) error: node 'foo_5' was not "
+      "found.";
+  TestSwapNodeNamesError("foo_5", "foo_6", /*update_fanouts=*/true, error_msg);
+
+  // Switch control dependencies.
+  error_msg =
+      "MutableGraphView::SwapNodeNames(from_node_name='switch_2', "
+      "to_node_name='identity_1', update_fanouts=false) error: can't swap node "
+      "name 'switch_2' as it will become a Switch control dependency.";
+  TestSwapNodeNamesError("switch_2", "identity_1", /*update_fanouts=*/false,
+                         error_msg);
+  error_msg =
+      "MutableGraphView::SwapNodeNames(from_node_name='identity_2', "
+      "to_node_name='switch_1', update_fanouts=false) error: can't swap node "
+      "name 'switch_1' as it will become a Switch control dependency.";
+  TestSwapNodeNamesError("identity_2", "switch_1", /*update_fanouts=*/false,
+                         error_msg);
+}
+
 TEST(MutableGraphViewTest, AddAndUpdateFanouts) {
   // Actual node.op() is not important in this test.
   GraphDef graph_def = test::function::GDef(
@@ -461,36 +905,6 @@
   return graph_def;
 }
 
-absl::flat_hash_map<string, std::vector<string>> GetNodeInputsFromGraph(
-    const GraphDef& graph, absl::string_view node_to_exclude) {
-  absl::flat_hash_map<string, std::vector<string>> node_inputs;
-  for (const auto& node : graph.node()) {
-    if (node.name() == node_to_exclude) {
-      continue;
-    }
-    node_inputs[node.name()] =
-        std::vector<string>(node.input().begin(), node.input().end());
-  }
-  return node_inputs;
-}
-
-void CheckUnmodifiedNodeFanins(
-    const GraphDef& graph, absl::string_view node_to_exclude,
-    const absl::flat_hash_map<string, std::vector<string>>&
-        unmodified_node_inputs) {
-  for (const auto& node : graph.node()) {
-    if (node.name() == node_to_exclude) {
-      continue;
-    }
-    auto it = unmodified_node_inputs.find(node.name());
-    ASSERT_NE(it, unmodified_node_inputs.end());
-    ASSERT_EQ(it->second.size(), node.input_size());
-    for (int i = 0; i < node.input_size(); ++i) {
-      EXPECT_EQ(node.input(i), it->second[i]);
-    }
-  }
-}
-
 void TestAddRegularFanin(absl::string_view node_name, bool node_exists,
                          const TensorId& fanin_to_add, bool success,
                          const string& error_msg,

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index ee83fe0..59400dc 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc

@@ -47,6 +47,12 @@
          node.op() == "FloorDiv" || node.op() == "TruncateDiv";
 }
 
+bool IsAnyMaxPool(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "MaxPool" || op == "MaxPoolV2" || op == "MaxPool3D" ||
+         op == "MaxPoolWithArgmax" || op == "FractionalMaxPool";
+}
+
 bool IsApproximateEqual(const NodeDef& node) {
   return node.op() == "ApproximateEqual";
 }
@@ -170,8 +176,7 @@
           "Sign",  "Sinh", "Softsign", "Softplus", "Sqrt",  "Tanh",
       }));
   static const gtl::FlatSet<string>* const kMonotonicNonIncreasingOps =
-      CHECK_NOTNULL((new gtl::FlatSet<string>{"Acos", "Erfc", "Inv", "Neg",
-                                              "Reciprocal", "Rsqrt"}));
+      CHECK_NOTNULL((new gtl::FlatSet<string>{"Acos", "Erfc", "Neg", "Rsqrt"}));
   if (kMonotonicNonDecreasingOps->count(node.op()) > 0) {
     if (is_non_decreasing) {
       *is_non_decreasing = true;
@@ -274,8 +279,8 @@
 
 bool IsMatMul(const NodeDef& node) {
   const auto& op = node.op();
-  return op == "MatMul" || op == "BatchMatMul" || op == "QuantizedMatMul" ||
-         op == "SparseMatMul";
+  return op == "MatMul" || op == "BatchMatMul" || op == "SparseMatMul" ||
+         IsQuantizedMatMul(node);
 }
 
 bool IsMax(const NodeDef& node) { return node.op() == "Max"; }
@@ -345,6 +350,10 @@
 
 bool IsProd(const NodeDef& node) { return node.op() == "Prod"; }
 
+bool IsQuantizedMatMul(const NodeDef& node) {
+  return node.op() == "QuantizedMatMul" || node.op() == "QuantizedMatMulV2";
+}
+
 bool IsQueue(const NodeDef& node) {
   return str_util::EndsWith(node.op(), "QueueV2");
 }

diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 7395151..bc1bb33 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h

@@ -28,6 +28,7 @@
 bool IsAngle(const NodeDef& node);
 bool IsAny(const NodeDef& node);
 bool IsAnyDiv(const NodeDef& node);
+bool IsAnyMaxPool(const NodeDef& node);
 bool IsApproximateEqual(const NodeDef& node);
 bool IsAvgPoolGrad(const NodeDef& node);
 bool IsAssert(const NodeDef& node);
@@ -105,6 +106,7 @@
 bool IsPad(const NodeDef& node);
 bool IsPack(const NodeDef& node);
 bool IsPartitionedCall(const NodeDef& node);
+bool IsQuantizedMatMul(const NodeDef& node);
 bool IsNeg(const NodeDef& node);
 bool IsNoOp(const NodeDef& node);
 bool IsNotEqual(const NodeDef& node);

diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index cbf0d68..cdf6180 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD

@@ -1,9 +1,7 @@
 licenses(["notice"])  # Apache 2.0
 
-load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_kernel_library")
 load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
-load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
-load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
 
 # Platform specific build config
 load(
@@ -111,6 +109,9 @@
     name = "constant_folding_test",
     srcs = ["constant_folding_test.cc"],
     shard_count = 5,
+    # Running cuda on cpu will trigger tests guarded by GOOGLE_CUDA but NCHW
+    # won't be available, which result in test failures. So disable that.
+    tags = ["no_cuda_on_cpu_tap"],
     deps = [
         ":constant_folding",
         ":dependency_optimizer",
@@ -271,13 +272,29 @@
     ],
 )
 
+cc_library(
+    name = "arithmetic_optimizer_test_utils",
+    testonly = 1,
+    hdrs = [
+        "arithmetic_optimizer_test_utils.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":arithmetic_optimizer",
+        ":constant_folding",
+        ":model_pruner",
+        "//tensorflow/core:test",
+        "//tensorflow/core/grappler/utils:grappler_test",
+    ],
+)
+
 tf_cuda_cc_test(
     name = "arithmetic_optimizer_test",
     size = "small",
     srcs = ["arithmetic_optimizer_test.cc"],
     deps = [
         ":arithmetic_optimizer",
-        ":constant_folding",
+        ":arithmetic_optimizer_test_utils",
         ":model_pruner",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:cc_ops_internal",
@@ -292,7 +309,6 @@
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder",
-        "//tensorflow/core/grappler/utils:grappler_test",
     ],
 )
 
@@ -307,14 +323,19 @@
         ":constant_folding",
         ":graph_optimizer",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/costs:graph_properties",
         "//tensorflow/core/grappler/utils:topological_sort",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -518,9 +539,9 @@
         ":custom_graph_optimizer_registry",
         ":debug_stripper",
         ":dependency_optimizer",
-        ":experimental_implementation_selector",
         ":function_optimizer",
         ":graph_optimizer",
+        ":implementation_selector",
         ":layout_optimizer",
         ":loop_optimizer",
         ":memory_optimizer",
@@ -866,9 +887,9 @@
 )
 
 cc_library(
-    name = "experimental_implementation_selector",
-    srcs = ["experimental_implementation_selector.cc"],
-    hdrs = ["experimental_implementation_selector.h"],
+    name = "implementation_selector",
+    srcs = ["implementation_selector.cc"],
+    hdrs = ["implementation_selector.h"],
     deps = [
         ":custom_graph_optimizer",
         ":custom_graph_optimizer_registry",
@@ -884,14 +905,14 @@
 )
 
 tf_cc_test(
-    name = "experimental_implementation_selector_test",
+    name = "implementation_selector_test",
     size = "small",
-    srcs = ["experimental_implementation_selector_test.cc"],
+    srcs = ["implementation_selector_test.cc"],
     deps = [
         ":custom_graph_optimizer",
         ":custom_graph_optimizer_registry",
-        ":experimental_implementation_selector",
         ":function_api_info",
+        ":implementation_selector",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index db72571..902cb3f 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc

@@ -2721,7 +2721,7 @@
   ~OptimizeMaxOrMinOfMonotonicStage() override = default;
 
   bool IsSupported(const NodeDef* node) const override {
-    return IsMax(*node) || IsMin(*node);
+    return IsMax(*node) || IsMin(*node) || IsAnyMaxPool(*node);
   }
 
   Status TrySimplify(NodeDef* reduction_node,
@@ -2735,10 +2735,13 @@
     // 0. inner_function is not in the preserve set,
     // 1. inner_function's Op is element-wise monotonic
     // 2. inner_function's output is not being consumed elsewhere.
+    // 3. is monotonic increasing if reduction_node is a pooling operation
+    //    since we don't have MinPool operations.
     bool is_non_decreasing = false;
     if (!IsInPreserveSet(*inner_function) &&
         IsElementWiseMonotonic(*inner_function, &is_non_decreasing) &&
-        ctx().node_map->GetOutputs(inner_function->name()).size() == 1) {
+        ctx().node_map->GetOutputs(inner_function->name()).size() == 1 &&
+        (is_non_decreasing || !IsAnyMaxPool(*reduction_node))) {
       // Swap the first inputs of the inner function Op & the reduction Op.
       NodeDef* inner_input;
       TF_RETURN_IF_ERROR(GetInputNode(inner_function->input(0), &inner_input));

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index 94c59c6..2778334 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc

@@ -20,10 +20,9 @@
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
-#include "tensorflow/core/grappler/optimizers/constant_folding.h"
+#include "tensorflow/core/grappler/optimizers/arithmetic_optimizer_test_utils.h"
 #include "tensorflow/core/grappler/optimizers/model_pruner.h"
 #include "tensorflow/core/grappler/utils.h"
-#include "tensorflow/core/grappler/utils/grappler_test.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -92,211 +91,6 @@
 }
 }  // namespace
 
-class ArithmeticOptimizerTest : public GrapplerTest {
- protected:
-  // Optimize a graph using ArithmeticOptimizer and prune all the nodes that no
-  // longer have any output consumers.
-  void OptimizeAndPrune(ArithmeticOptimizer* optimizer, GrapplerItem* item,
-                        GraphDef* output) {
-    TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output));
-    item->graph.Swap(output);
-    output->Clear();
-    TF_EXPECT_OK(ModelPruner().Optimize(nullptr, *item, output));
-  }
-
-  // Run ArithmeticOptimizer twice to make sure the rewrite is idempotent.
-  void OptimizeTwice(ArithmeticOptimizer* optimizer, GrapplerItem* item,
-                     GraphDef* output) {
-    TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output));
-    item->graph.Swap(output);
-    output->Clear();
-    TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output));
-  }
-
-  // Run ArithmeticOptimizer twice to make sure the rewrite is idempotent.
-  // Optionally run a constant folding pass before pruning.
-  void OptimizeTwiceAndPrune(ArithmeticOptimizer* optimizer, GrapplerItem* item,
-                             GraphDef* output, bool const_folding = false) {
-    TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output));
-
-    item->graph.Swap(output);
-    output->Clear();
-    TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output));
-
-    if (const_folding) {
-      item->graph.Swap(output);
-      output->Clear();
-      TF_EXPECT_OK(ConstantFolding(/*cpu_device=*/nullptr)
-                       .Optimize(nullptr, *item, output));
-    }
-
-    item->graph.Swap(output);
-    output->Clear();
-    TF_EXPECT_OK(ModelPruner().Optimize(nullptr, *item, output));
-  }
-
-  // TODO(ezhulenev): Make private. After migration to stages each test
-  // should explicitly enable required optimization for tests isolation
-  void DisableAllStages(ArithmeticOptimizer* optimizer) {
-    ArithmeticOptimizer::ArithmeticOptimizerOptions options;
-    options.dedup_computations = false;
-    options.combine_add_to_addn = false;
-    options.convert_sqrt_div_to_rsqrt_mul = false;
-    options.convert_pow = false;
-    options.convert_log1p = false;
-    options.optimize_max_or_min_of_monotonic = false;
-    options.fold_conjugate_into_transpose = false;
-    options.fold_multiply_into_conv = false;
-    options.fold_transpose_into_matmul = false;
-    options.hoist_common_factor_out_of_aggregation = false;
-    options.hoist_cwise_unary_chains = false;
-    options.minimize_broadcasts = false;
-    options.remove_identity_transpose = false;
-    options.remove_involution = false;
-    options.remove_idempotent = false;
-    options.remove_redundant_bitcast = false;
-    options.remove_redundant_cast = false;
-    options.remove_redundant_reshape = false;
-    options.remove_negation = false;
-    options.remove_logical_not = false;
-    options.reorder_cast_like_and_value_preserving = false;
-    options.replace_mul_with_square = false;
-    options.simplify_aggregation = false;
-    options.unary_ops_composition = false;
-    optimizer->options_ = options;
-  }
-
-  void DisableAddToAddNCombining(ArithmeticOptimizer* optimizer) {
-    optimizer->options_.combine_add_to_addn = false;
-  }
-
-  void EnableOnlyAddToAddNCombining(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.combine_add_to_addn = true;
-  }
-
-  void EnableOnlyFoldConjugateIntoTranspose(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.fold_conjugate_into_transpose = true;
-  }
-
-  void EnableOnlyFoldMultipleIntoConv(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.fold_multiply_into_conv = true;
-  }
-
-  void EnableOnlyFoldTransposeIntoMatMul(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.fold_transpose_into_matmul = true;
-  }
-
-  void EnableOnlyHoistCommonFactor(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.hoist_common_factor_out_of_aggregation = true;
-  }
-
-  void EnableOnlyMinimizeBroadcasts(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.minimize_broadcasts = true;
-  }
-
-  void EnableOnlyRemoveIdentityTranspose(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.remove_identity_transpose = true;
-  }
-
-  void EnableOnlyRemoveInvolution(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.remove_involution = true;
-  }
-
-  void EnableOnlyRemoveRedundantBitcast(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.remove_redundant_bitcast = true;
-  }
-
-  void EnableOnlyRemoveRedundantCast(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.remove_redundant_cast = true;
-  }
-
-  void EnableOnlyRemoveRedundantReshape(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.remove_redundant_reshape = true;
-  }
-
-  void EnableOnlyRemoveNegation(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.remove_negation = true;
-  }
-
-  void EnableOnlyReorderCastAndTranspose(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.reorder_cast_like_and_value_preserving = true;
-  }
-
-  void EnableOnlyReplaceMulWithSquare(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.replace_mul_with_square = true;
-  }
-
-  void EnableOnlyHoistCWiseUnaryChains(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.hoist_cwise_unary_chains = true;
-  }
-
-  void EnableOnlySqrtDivToRsqrtMul(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.convert_sqrt_div_to_rsqrt_mul = true;
-  }
-
-  void EnableOnlyConvertPow(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.convert_pow = true;
-  }
-
-  void EnableOnlyRemoveIdempotent(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.remove_idempotent = true;
-  }
-
-  void EnableOnlyRemoveLogicalNot(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.remove_logical_not = true;
-  }
-
-  void EnableOnlySimplifyAggregation(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.simplify_aggregation = true;
-  }
-
-  void EnableOnlyLog1p(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.convert_log1p = true;
-  }
-
-  void EnableOnlyOptimizeMaxOrMinOfMonotonic(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.optimize_max_or_min_of_monotonic = true;
-  }
-
-  void EnableOnlyExpm1(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.convert_expm1 = true;
-  }
-
-  void EnableOnlyUnaryOpsComposition(ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.unary_ops_composition = true;
-  }
-
-  void EnableOnlyRemoveStackStridedSliceSameAxis(
-      ArithmeticOptimizer* optimizer) {
-    DisableAllStages(optimizer);
-    optimizer->options_.remove_stack_strided_slice_same_axis = true;
-  }
-};
-
 TEST_F(ArithmeticOptimizerTest, NoOp) {
   // This trivial graph is so basic there's nothing to optimize.
   TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
@@ -3561,6 +3355,75 @@
   EXPECT_EQ(2, required_node_count);
 }
 
+TEST_F(ArithmeticOptimizerTest,
+       OptimizeMaxOrMinOfMonotonicElementWiseNonIncreasingDoNotChangeMaxPool) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto x = ops::Const(s.WithOpName("x"), 1.5f, {3, 3, 3, 1});
+  Output neg = ops::Neg(s.WithOpName("neg"), x);
+  Output max_pool = ops::MaxPool(s.WithOpName("max_pool"), neg, {1, 2, 2, 1},
+                                 {1, 2, 2, 1}, "VALID");
+
+  GrapplerItem item;
+  item.fetch = {"max_pool"};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
+  ASSERT_EQ(1, tensors_expected.size());
+
+  GraphDef output;
+  ArithmeticOptimizer optimizer;
+  EnableOnlyOptimizeMaxOrMinOfMonotonic(&optimizer);
+  OptimizeTwice(&optimizer, &item, &output);
+
+  // Should be a NoOp
+  VerifyGraphsMatch(item.graph, output, __LINE__);
+
+  auto tensors = EvaluateNodes(output, item.fetch);
+  ASSERT_EQ(1, tensors.size());
+  test::ExpectTensorNear<float>(tensors_expected[0], tensors[0], 1e-6);
+}
+
+TEST_F(ArithmeticOptimizerTest, OptimizeMaxOrMinOfMonotonicElementWiseMaxPool) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto x = ops::Const(s.WithOpName("x"), 1.5f, {3, 3, 3, 1});
+  Output sqrt = ops::Sqrt(s.WithOpName("sqrt"), x);
+  Output max_pool = ops::MaxPool(s.WithOpName("max_pool"), sqrt, {1, 2, 2, 1},
+                                 {1, 2, 2, 1}, "VALID");
+  Output final_out = ops::Identity(s.WithOpName("final_out"), max_pool);
+
+  GrapplerItem item;
+  item.fetch = {"final_out"};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
+  EXPECT_EQ(1, tensors_expected.size());
+
+  GraphDef output;
+  ArithmeticOptimizer optimizer;
+  EnableOnlyOptimizeMaxOrMinOfMonotonic(&optimizer);
+  OptimizeAndPrune(&optimizer, &item, &output);
+  auto tensors = EvaluateNodes(output, item.fetch);
+  EXPECT_EQ(1, tensors.size());
+
+  test::ExpectTensorNear<float>(tensors_expected[0], tensors[0], 1e-6);
+  EXPECT_EQ(item.graph.node_size(), output.node_size());
+  // Check if the inputs are switched
+  int required_node_count = 0;
+  for (int i = 0; i < output.node_size(); ++i) {
+    const NodeDef& node = output.node(i);
+    if (node.name() == "sqrt") {
+      EXPECT_EQ("Sqrt", node.op());
+      EXPECT_EQ(1, node.input_size());
+      EXPECT_EQ("max_pool", node.input(0));
+      ++required_node_count;
+    } else if (node.name() == "max_pool") {
+      EXPECT_EQ("MaxPool", node.op());
+      EXPECT_EQ(1, node.input_size());
+      EXPECT_EQ("x", node.input(0));
+      ++required_node_count;
+    }
+  }
+  EXPECT_EQ(2, required_node_count);
+}
+
 TEST_F(ArithmeticOptimizerTest, UnaryOpsComposition) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
 

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test_utils.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test_utils.h
new file mode 100644
index 0000000..94d0adc
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test_utils.h

@@ -0,0 +1,236 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_ARITHMETIC_OPTIMIZER_TEST_UTILS_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_ARITHMETIC_OPTIMIZER_TEST_UTILS_H_
+
+#include "tensorflow/core/grappler/optimizers/arithmetic_optimizer.h"
+#include "tensorflow/core/grappler/optimizers/constant_folding.h"
+#include "tensorflow/core/grappler/optimizers/model_pruner.h"
+#include "tensorflow/core/grappler/utils/grappler_test.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace tensorflow {
+namespace grappler {
+
+class ArithmeticOptimizerTest : public GrapplerTest {
+ protected:
+  // Optimize a graph using ArithmeticOptimizer and prune all the nodes that no
+  // longer have any output consumers.
+  void OptimizeAndPrune(ArithmeticOptimizer* optimizer, GrapplerItem* item,
+                        GraphDef* output) {
+    TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output));
+    item->graph.Swap(output);
+    output->Clear();
+    TF_EXPECT_OK(ModelPruner().Optimize(nullptr, *item, output));
+  }
+
+  // Run ArithmeticOptimizer twice to make sure the rewrite is idempotent.
+  void OptimizeTwice(ArithmeticOptimizer* optimizer, GrapplerItem* item,
+                     GraphDef* output) {
+    TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output));
+    item->graph.Swap(output);
+    output->Clear();
+    TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output));
+  }
+
+  // Run ArithmeticOptimizer twice to make sure the rewrite is idempotent.
+  // Optionally run a constant folding pass before pruning.
+  void OptimizeTwiceAndPrune(ArithmeticOptimizer* optimizer, GrapplerItem* item,
+                             GraphDef* output, bool const_folding = false) {
+    TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output));
+
+    item->graph.Swap(output);
+    output->Clear();
+    TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output));
+
+    if (const_folding) {
+      item->graph.Swap(output);
+      output->Clear();
+      TF_EXPECT_OK(ConstantFolding(/*cpu_device=*/nullptr)
+                       .Optimize(nullptr, *item, output));
+    }
+
+    item->graph.Swap(output);
+    output->Clear();
+    TF_EXPECT_OK(ModelPruner().Optimize(nullptr, *item, output));
+  }
+
+  // TODO(ezhulenev): Make private. After migration to stages each test
+  // should explicitly enable required optimization for tests isolation
+  void DisableAllStages(ArithmeticOptimizer* optimizer) {
+    ArithmeticOptimizer::ArithmeticOptimizerOptions options;
+    options.dedup_computations = false;
+    options.combine_add_to_addn = false;
+    options.convert_sqrt_div_to_rsqrt_mul = false;
+    options.convert_pow = false;
+    options.convert_log1p = false;
+    options.optimize_max_or_min_of_monotonic = false;
+    options.fold_conjugate_into_transpose = false;
+    options.fold_multiply_into_conv = false;
+    options.fold_transpose_into_matmul = false;
+    options.hoist_common_factor_out_of_aggregation = false;
+    options.hoist_cwise_unary_chains = false;
+    options.minimize_broadcasts = false;
+    options.remove_identity_transpose = false;
+    options.remove_involution = false;
+    options.remove_idempotent = false;
+    options.remove_redundant_bitcast = false;
+    options.remove_redundant_cast = false;
+    options.remove_redundant_reshape = false;
+    options.remove_negation = false;
+    options.remove_logical_not = false;
+    options.reorder_cast_like_and_value_preserving = false;
+    options.replace_mul_with_square = false;
+    options.simplify_aggregation = false;
+    options.unary_ops_composition = false;
+    optimizer->options_ = options;
+  }
+
+  void DisableAddToAddNCombining(ArithmeticOptimizer* optimizer) {
+    optimizer->options_.combine_add_to_addn = false;
+  }
+
+  void EnableOnlyAddToAddNCombining(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.combine_add_to_addn = true;
+  }
+
+  void EnableOnlyFoldConjugateIntoTranspose(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.fold_conjugate_into_transpose = true;
+  }
+
+  void EnableOnlyFoldMultipleIntoConv(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.fold_multiply_into_conv = true;
+  }
+
+  void EnableOnlyFoldTransposeIntoMatMul(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.fold_transpose_into_matmul = true;
+  }
+
+  void EnableOnlyHoistCommonFactor(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.hoist_common_factor_out_of_aggregation = true;
+  }
+
+  void EnableOnlyMinimizeBroadcasts(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.minimize_broadcasts = true;
+  }
+
+  void EnableOnlyRemoveIdentityTranspose(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.remove_identity_transpose = true;
+  }
+
+  void EnableOnlyRemoveInvolution(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.remove_involution = true;
+  }
+
+  void EnableOnlyRemoveRedundantBitcast(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.remove_redundant_bitcast = true;
+  }
+
+  void EnableOnlyRemoveRedundantCast(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.remove_redundant_cast = true;
+  }
+
+  void EnableOnlyRemoveRedundantReshape(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.remove_redundant_reshape = true;
+  }
+
+  void EnableOnlyRemoveNegation(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.remove_negation = true;
+  }
+
+  void EnableOnlyReorderCastAndTranspose(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.reorder_cast_like_and_value_preserving = true;
+  }
+
+  void EnableOnlyReplaceMulWithSquare(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.replace_mul_with_square = true;
+  }
+
+  void EnableOnlyHoistCWiseUnaryChains(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.hoist_cwise_unary_chains = true;
+  }
+
+  void EnableOnlySqrtDivToRsqrtMul(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.convert_sqrt_div_to_rsqrt_mul = true;
+  }
+
+  void EnableOnlyConvertPow(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.convert_pow = true;
+  }
+
+  void EnableOnlyRemoveIdempotent(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.remove_idempotent = true;
+  }
+
+  void EnableOnlyRemoveLogicalNot(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.remove_logical_not = true;
+  }
+
+  void EnableOnlySimplifyAggregation(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.simplify_aggregation = true;
+  }
+
+  void EnableOnlyLog1p(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.convert_log1p = true;
+  }
+
+  void EnableOnlyOptimizeMaxOrMinOfMonotonic(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.optimize_max_or_min_of_monotonic = true;
+  }
+
+  void EnableOnlyExpm1(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.convert_expm1 = true;
+  }
+
+  void EnableOnlyUnaryOpsComposition(ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.unary_ops_composition = true;
+  }
+
+  void EnableOnlyRemoveStackStridedSliceSameAxis(
+      ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.remove_stack_strided_slice_same_axis = true;
+  }
+};
+
+}  // end namespace grappler
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_ARITHMETIC_OPTIMIZER_TEST_UTILS_H_

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index f883f89..37fa7d9 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc

@@ -17,7 +17,10 @@
 
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
 
+#include <cmath>
+
 #include "absl/strings/string_view.h"
+#include "absl/strings/substitute.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.pb.h"
@@ -35,6 +38,7 @@
 #include "tensorflow/core/grappler/optimizers/evaluation_utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/symbolic_shapes.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -168,6 +172,55 @@
   return false;
 }
 
+template <typename T>
+bool PackedValuesNotEqual(T a, T b) {
+  return a != b;
+}
+
+template <>
+bool PackedValuesNotEqual(float a, float b) {
+  return reinterpret_cast<int32_t&>(a) != reinterpret_cast<int32_t&>(b);
+}
+
+template <>
+bool PackedValuesNotEqual(double a, double b) {
+  return reinterpret_cast<int64_t&>(a) != reinterpret_cast<int64_t&>(b);
+}
+
+float QuantizedTypeMinAsFloat(DataType data_type) {
+  switch (data_type) {
+    case DT_QINT8:
+      return Eigen::NumTraits<qint8>::lowest();
+    case DT_QUINT8:
+      return Eigen::NumTraits<quint8>::lowest();
+    case DT_QINT16:
+      return Eigen::NumTraits<qint16>::lowest();
+    case DT_QUINT16:
+      return Eigen::NumTraits<quint16>::lowest();
+    case DT_QINT32:
+      return Eigen::NumTraits<qint32>::lowest();
+    default:
+      return 0.0f;
+  }
+}
+
+float QuantizedTypeMaxAsFloat(DataType data_type) {
+  switch (data_type) {
+    case DT_QINT8:
+      return Eigen::NumTraits<qint8>::highest();
+    case DT_QUINT8:
+      return Eigen::NumTraits<quint8>::highest();
+    case DT_QINT16:
+      return Eigen::NumTraits<qint16>::highest();
+    case DT_QUINT16:
+      return Eigen::NumTraits<quint16>::highest();
+    case DT_QINT32:
+      return Eigen::NumTraits<qint32>::highest();
+    default:
+      return 0.0f;
+  }
+}
+
 }  // namespace
 
 ConstantFolding::ConstantFolding(RewriterConfig::Toggle opt_level,
@@ -928,6 +981,11 @@
       SET_TENSOR_VAL_CASE(DT_UINT16, int32, int);
       SET_TENSOR_VAL_CASE(DT_INT8, int32, int);
       SET_TENSOR_VAL_CASE(DT_UINT8, int32, int);
+      SET_TENSOR_VAL_CASE(DT_QINT32, int32, int);
+      SET_TENSOR_VAL_CASE(DT_QINT16, int32, int);
+      SET_TENSOR_VAL_CASE(DT_QUINT16, int32, int);
+      SET_TENSOR_VAL_CASE(DT_QINT8, int32, int);
+      SET_TENSOR_VAL_CASE(DT_QUINT8, int32, int);
       SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool);
     default:
       return errors::InvalidArgument("Unsupported type: ", type);
@@ -955,6 +1013,42 @@
   return dtype;
 }
 
+// Checks whether the shape of the const input of the Mul op is valid to perform
+// the MulConvPushDown optimization.
+bool IsValidConstShapeForMulConvPushDown(
+    const string& data_format, const TensorShapeProto& filter_shape,
+    const TensorShapeProto& mul_const_input_shape) {
+  // If the const is a scalar, or it has fewer or same number of dimensions
+  // than the filter and it only has single element, the optimization should
+  // work.
+  if (mul_const_input_shape.dim_size() <= data_format.size() &&
+      TensorShape(mul_const_input_shape).num_elements() == 1) {
+    return true;
+  }
+
+  // Otherwise, check the eligibility according to data format.
+  if (data_format == "NHWC" || data_format == "NDHWC") {
+    TensorShapeProto new_filter_shape;
+    if (!ShapeAfterBroadcast(filter_shape, mul_const_input_shape,
+                             &new_filter_shape)) {
+      return false;
+    }
+    if (!ShapesSymbolicallyEqual(filter_shape, new_filter_shape)) {
+      return false;
+    }
+    // Only the last dimension could be larger than one, since broadcasting over
+    // the last dimension (the output channel) will result in invalid filter.
+    for (int i = 0; i < mul_const_input_shape.dim_size() - 1; ++i) {
+      if (mul_const_input_shape.dim(i).size() > 1) return false;
+    }
+    return true;
+  } else if (data_format == "NCHW" || data_format == "NCDHW") {
+    // TODO(laigd): support NCHW and NCDHW (b/111214513).
+    return false;
+  }
+  return false;
+}
+
 }  // namespace
 
 // static
@@ -975,29 +1069,28 @@
   // Use the packed representation whenever possible to avoid generating large
   // graphdefs. Moreover, avoid repeating the last values if they're equal.
   if (tensor->NumElements() > 4) {
-#define POPULATE_TENSOR_PROTO(tensor, t, TYPE, NAME)                  \
-  {                                                                   \
-    const TYPE* val_ptr = tensor->flat<TYPE>().data();                \
-    TYPE last = *val_ptr;                                             \
-    int64 last_index = 0;                                             \
-    for (int64 i = 0; i < tensor->NumElements(); ++i) {               \
-      TYPE cur = *val_ptr++;                                          \
-      if (cur != last) {                                              \
-        last = cur;                                                   \
-        last_index = i;                                               \
-      }                                                               \
-    }                                                                 \
-    if (last_index < kint32max) {                                     \
-      optimized = true;                                               \
-      encoded_size = (last_index + 1) * sizeof(NAME);                 \
-      t->mutable_##NAME##_val()->Reserve(last_index + 1);             \
-      t->mutable_##NAME##_val()->AddNAlreadyReserved(last_index + 1); \
-      val_ptr = tensor->flat<TYPE>().data();                          \
-      for (int64 i = 0; i <= last_index; ++i) {                       \
-        t->set_##NAME##_val(i, *val_ptr++);                           \
-      }                                                               \
-    }                                                                 \
-  }                                                                   \
+#define POPULATE_TENSOR_PROTO(tensor, t, TYPE, NAME)                      \
+  {                                                                       \
+    const auto* val_ptr = tensor->flat<TYPE>().data();                    \
+    auto last = *val_ptr;                                                 \
+    int64 last_index = 0;                                                 \
+    for (int64 i = 0; i < tensor->NumElements(); ++i) {                   \
+      TYPE cur = *val_ptr++;                                              \
+      if (PackedValuesNotEqual(cur, last)) {                              \
+        last = cur;                                                       \
+        last_index = i;                                                   \
+      }                                                                   \
+    }                                                                     \
+    if (last_index < kint32max) {                                         \
+      optimized = true;                                                   \
+      encoded_size = (last_index + 1) * sizeof(NAME);                     \
+      t->mutable_##NAME##_val()->Reserve(last_index + 1);                 \
+      const auto* src_ptr = tensor->flat<TYPE>().data();                  \
+      auto* dst_ptr =                                                     \
+          t->mutable_##NAME##_val()->AddNAlreadyReserved(last_index + 1); \
+      std::copy(src_ptr, src_ptr + last_index + 1, dst_ptr);              \
+    }                                                                     \
+  }                                                                       \
   break
 
     switch (tensor->dtype()) {
@@ -1033,6 +1126,8 @@
     t->set_dtype(tensor->dtype());
     tensor->shape().AsProto(t->mutable_tensor_shape());
   } else {
+    // DT_HALF, DT_BFLOAT16, DT_QINT32, DT_QINT16, DT_QUINT16, DT_QINT8,
+    // DT_QUINT8
     tensor->AsProtoTensorContent(t);
     encoded_size = t->tensor_content().size();
   }
@@ -1481,6 +1576,11 @@
     IS_ONES_CASE(DT_INT16);
     IS_ONES_CASE(DT_INT32);
     IS_ONES_CASE(DT_INT64);
+    IS_ONES_CASE(DT_QINT32);
+    IS_ONES_CASE(DT_QINT16);
+    IS_ONES_CASE(DT_QUINT16);
+    IS_ONES_CASE(DT_QINT8);
+    IS_ONES_CASE(DT_QUINT8);
     default:
       VLOG(1) << "Unsupported type " << DataTypeString(dtype);
       return false;
@@ -1515,6 +1615,11 @@
     IS_ZEROS_CASE(DT_INT16);
     IS_ZEROS_CASE(DT_INT32);
     IS_ZEROS_CASE(DT_INT64);
+    IS_ZEROS_CASE(DT_QINT32);
+    IS_ZEROS_CASE(DT_QINT16);
+    IS_ZEROS_CASE(DT_QUINT16);
+    IS_ZEROS_CASE(DT_QINT8);
+    IS_ZEROS_CASE(DT_QUINT8);
     default:
       VLOG(1) << "Unsupported type " << DataTypeString(dtype);
       return false;
@@ -2524,6 +2629,7 @@
   *success = false;
   const bool is_mul = IsMul(*node) || IsLogicalAnd(*node);
   const bool is_matmul = IsMatMul(*node);
+  const bool is_quantized_matmul = IsQuantizedMatMul(*node);
   const bool is_add = IsAdd(*node) || IsBiasAdd(*node) || IsLogicalOr(*node);
   const bool is_sub = IsSub(*node);
   const bool is_any_div = IsAnyDiv(*node);
@@ -2618,6 +2724,10 @@
         if (!replace_op_status.ok()) {
           return replace_op_status;
         } else if (replace_succeed) {
+          if (is_quantized_matmul) {
+            TF_RETURN_IF_ERROR(
+                AddQuantizedMatMulMinMaxOutConstNodes(node, optimized_graph));
+          }
           *success = true;
           return Status::OK();
         }
@@ -2781,118 +2891,110 @@
   //                 X  C1                       C1  C2
   //
   // where C1 and C2 are constants and X is non-constant.
-  if (IsMul(*node) && NumNonControlInputs(*node) == 2) {
-    NodeDef* mul_left_child = node_map_->GetNode(node->input(0));
-    NodeDef* mul_right_child = node_map_->GetNode(node->input(1));
-    // One child must be constant, and the second must be Conv op.
-    const bool left_child_is_constant = IsReallyConstant(*mul_left_child);
-    const bool right_child_is_constant = IsReallyConstant(*mul_right_child);
-    if (!left_child_is_constant && !right_child_is_constant) {
-      return false;
-    }
-    NodeDef* conv_node =
-        left_child_is_constant ? mul_right_child : mul_left_child;
-    if (!IsConv2D(*conv_node) && !IsConv3D(*conv_node)) {
-      return false;
-    }
-    if (node->device() != mul_left_child->device() ||
-        node->device() != mul_right_child->device()) {
-      return false;
-    }
+  if (!IsMul(*node) || NumNonControlInputs(*node) != 2) return false;
 
-    // Make sure that it is safe to change the value of the convolution
-    // output.
-    if (conv_node->input_size() < 2 ||
-        NumNonControlOutputs(*conv_node, *node_map_) > 1 ||
-        nodes_to_preserve_.find(conv_node->name()) !=
-            nodes_to_preserve_.end()) {
-      return false;
-    }
-
-    // Identify the nodes to swap.
-    NodeDef* conv_left_child = node_map_->GetNode(conv_node->input(0));
-    NodeDef* conv_right_child = node_map_->GetNode(conv_node->input(1));
-    const bool conv_left_is_constant = IsReallyConstant(*conv_left_child);
-    const bool conv_right_is_constant = IsReallyConstant(*conv_right_child);
-    if (!conv_left_is_constant && !conv_right_is_constant) {
-      // At least one of the convolution inputs should be constant.
-      return false;
-    }
-    if (conv_left_is_constant && conv_right_is_constant) {
-      // Leverage regular constant folding to handle this.
-      return false;
-    }
-    const auto& mul_props = properties.GetOutputProperties(node->name());
-    const auto& conv_props = properties.GetOutputProperties(conv_node->name());
-    if (mul_props.empty() || conv_props.empty()) {
-      return false;
-    }
-    const auto& mul_shape = mul_props[0].shape();
-    const auto& conv_shape = conv_props[0].shape();
-    if (!ShapesSymbolicallyEqual(mul_shape, conv_shape)) {
-      return false;
-    }
-
-    const auto& input_props = properties.GetInputProperties(conv_node->name());
-    if (input_props.size() < 2) {
-      return false;
-    }
-    const auto& filter_shape = input_props[1].shape();
-
-    NodeDef* const_node =
-        left_child_is_constant ? mul_left_child : mul_right_child;
-    const auto& const_props =
-        properties.GetOutputProperties(const_node->name());
-    if (const_props.empty()) {
-      return false;
-    }
-    const auto& const_shape = const_props[0].shape();
-
-    TensorShapeProto new_filter_shape;
-    if (!ShapeAfterBroadcast(filter_shape, const_shape, &new_filter_shape)) {
-      return false;
-    }
-    if (!ShapesSymbolicallyEqual(filter_shape, new_filter_shape)) {
-      return false;
-    }
-
-    string mul_new_name =
-        AddPrefixToNodeName("merged_input", conv_node->name());
-    if (node_map_->NodeExists(mul_new_name)) {
-      return false;
-    }
-    // Make sure we don't introduce loops in the graph by removing control
-    // dependencies from the conv2d node to c2.
-    string conv_const_input =
-        conv_left_is_constant ? conv_node->input(0) : conv_node->input(1);
-    if (MaybeRemoveControlInput(conv_node->name(), const_node, optimized_graph,
-                                node_map_.get())) {
-      // Add a control dep from c1 to c2 to ensure c2 is in the right frame
-      MaybeAddControlInput(conv_const_input, const_node, optimized_graph,
-                           node_map_.get());
-    }
-
-    conv_node->set_name(node->name());
-    node->set_name(mul_new_name);
-    if (conv_left_is_constant) {
-      node_map_->UpdateInput(conv_node->name(), node->input(0), mul_new_name);
-      conv_node->set_input(0, mul_new_name);
-    } else {
-      node_map_->UpdateInput(conv_node->name(), node->input(1), mul_new_name);
-      conv_node->set_input(1, mul_new_name);
-    }
-    NodeDef* conv_const_node =
-        conv_left_is_constant ? conv_left_child : conv_right_child;
-    if (left_child_is_constant) {
-      node->set_input(1, conv_const_node->name());
-    } else {
-      node->set_input(0, conv_const_node->name());
-    }
-    node_map_->AddNode(mul_new_name, node);
-
-    return true;
+  NodeDef* mul_left_child = node_map_->GetNode(node->input(0));
+  NodeDef* mul_right_child = node_map_->GetNode(node->input(1));
+  // One child must be constant, and the second must be Conv op.
+  const bool left_child_is_constant = IsReallyConstant(*mul_left_child);
+  const bool right_child_is_constant = IsReallyConstant(*mul_right_child);
+  if (!left_child_is_constant && !right_child_is_constant) {
+    return false;
   }
-  return false;
+  NodeDef* conv_node =
+      left_child_is_constant ? mul_right_child : mul_left_child;
+  if (!IsConv2D(*conv_node) && !IsConv3D(*conv_node)) {
+    return false;
+  }
+  if (node->device() != mul_left_child->device() ||
+      node->device() != mul_right_child->device()) {
+    return false;
+  }
+
+  // Make sure that it is safe to change the value of the convolution
+  // output.
+  if (conv_node->input_size() < 2 ||
+      NumNonControlOutputs(*conv_node, *node_map_) > 1 ||
+      nodes_to_preserve_.find(conv_node->name()) != nodes_to_preserve_.end()) {
+    return false;
+  }
+
+  // Identify the nodes to swap.
+  NodeDef* conv_left_child = node_map_->GetNode(conv_node->input(0));
+  NodeDef* conv_right_child = node_map_->GetNode(conv_node->input(1));
+  const bool conv_left_is_constant = IsReallyConstant(*conv_left_child);
+  const bool conv_right_is_constant = IsReallyConstant(*conv_right_child);
+  if (!conv_left_is_constant && !conv_right_is_constant) {
+    // At least one of the convolution inputs should be constant.
+    return false;
+  }
+  if (conv_left_is_constant && conv_right_is_constant) {
+    // Leverage regular constant folding to handle this.
+    return false;
+  }
+  const auto& mul_props = properties.GetOutputProperties(node->name());
+  const auto& conv_props = properties.GetOutputProperties(conv_node->name());
+  if (mul_props.empty() || conv_props.empty()) {
+    return false;
+  }
+  const auto& mul_shape = mul_props[0].shape();
+  const auto& conv_shape = conv_props[0].shape();
+  if (!ShapesSymbolicallyEqual(mul_shape, conv_shape)) {
+    return false;
+  }
+
+  const auto& input_props = properties.GetInputProperties(conv_node->name());
+  if (input_props.size() < 2) {
+    return false;
+  }
+  const auto& filter_shape = input_props[1].shape();
+
+  NodeDef* const_node =
+      left_child_is_constant ? mul_left_child : mul_right_child;
+  const auto& const_props = properties.GetOutputProperties(const_node->name());
+  if (const_props.empty()) {
+    return false;
+  }
+  const auto& const_shape = const_props[0].shape();
+  if (!IsValidConstShapeForMulConvPushDown(
+          conv_node->attr().at("data_format").s(), filter_shape, const_shape)) {
+    return false;
+  }
+
+  string mul_new_name = AddPrefixToNodeName("merged_input", conv_node->name());
+  if (node_map_->NodeExists(mul_new_name)) {
+    return false;
+  }
+  // Make sure we don't introduce loops in the graph by removing control
+  // dependencies from the conv2d node to c2.
+  string conv_const_input =
+      conv_left_is_constant ? conv_node->input(0) : conv_node->input(1);
+  if (MaybeRemoveControlInput(conv_node->name(), const_node, optimized_graph,
+                              node_map_.get())) {
+    // Add a control dep from c1 to c2 to ensure c2 is in the right frame
+    MaybeAddControlInput(conv_const_input, const_node, optimized_graph,
+                         node_map_.get());
+  }
+
+  conv_node->set_name(node->name());
+  node->set_name(mul_new_name);
+  if (conv_left_is_constant) {
+    node_map_->UpdateInput(conv_node->name(), node->input(0), mul_new_name);
+    conv_node->set_input(0, mul_new_name);
+  } else {
+    node_map_->UpdateInput(conv_node->name(), node->input(1), mul_new_name);
+    conv_node->set_input(1, mul_new_name);
+  }
+  NodeDef* conv_const_node =
+      conv_left_is_constant ? conv_left_child : conv_right_child;
+  if (left_child_is_constant) {
+    node->set_input(1, conv_const_node->name());
+  } else {
+    node->set_input(0, conv_const_node->name());
+  }
+  node_map_->AddNode(mul_new_name, node);
+
+  return true;
 }
 
 bool ConstantFolding::PartialConstPropThroughIdentityN(NodeDef* node) {
@@ -3193,6 +3295,65 @@
   return true;
 }
 
+Status ConstantFolding::AddQuantizedMatMulMinMaxOutConstNodes(
+    NodeDef* node, GraphDef* optimized_graph) {
+  auto add_quantized_out = [this, node, optimized_graph](
+                               const string& out_const_name, int index) {
+    NodeDef* out_node = optimized_graph->add_node();
+    Tensor value(DT_FLOAT, TensorShape({}));
+    const bool is_min = index == 1;
+    const DataType type_attr = node->attr().at("dtype").type();
+
+    value.flat<float>()(0) = is_min ? QuantizedTypeMinAsFloat(type_attr)
+                                    : QuantizedTypeMaxAsFloat(type_attr);
+    TF_RETURN_IF_ERROR(
+        CreateNodeDef(out_const_name, TensorValue(&value), out_node));
+    node_map_->AddNode(out_const_name, out_node);
+    out_node->set_device(node->device());
+
+    // Copy all inputs from node.
+    out_node->mutable_input()->CopyFrom(node->input());
+    for (const string& input : out_node->input()) {
+      node_map_->AddOutput(NodeName(input), out_const_name);
+    }
+
+    // Update output nodes consuming node:index to new const node.
+    string old_input = absl::StrCat(node->name(), ":", index);
+    int old_node_count = 0;
+    auto outputs = node_map_->GetOutputs(node->name());
+    for (const auto& output : outputs) {
+      for (int i = 0; i < output->input_size(); ++i) {
+        if (output->input(i) == old_input) {
+          output->set_input(i, out_const_name);
+          node_map_->AddOutput(out_const_name, output->name());
+        } else if (NodeName(output->input(i)) == node->name()) {
+          ++old_node_count;
+        }
+      }
+      if (old_node_count == 0) {
+        node_map_->RemoveOutput(node->name(), output->name());
+      }
+    }
+
+    return Status::OK();
+  };
+  const string min_out_const_name =
+      OptimizedNodeName(*node, "-quantized_matmul_min_out");
+  const string max_out_const_name =
+      OptimizedNodeName(*node, "-quantized_matmul_max_out");
+  if (node_map_->GetNode(min_out_const_name) == nullptr &&
+      node_map_->GetNode(max_out_const_name) == nullptr) {
+    TF_RETURN_IF_ERROR(add_quantized_out(min_out_const_name, 1));
+    TF_RETURN_IF_ERROR(add_quantized_out(max_out_const_name, 2));
+  } else {
+    return errors::Internal(absl::Substitute(
+        "Can't create Const for QuantizedMatMul min_out/max_out of "
+        "node '$0' because of node name conflict",
+        node->name()));
+  }
+  return Status::OK();
+}
+
 Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
                                             const GrapplerItem& item,
                                             GraphDef* optimized_graph) {

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index 7cf01b4..418176c 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h

@@ -236,6 +236,9 @@
   bool MergeConcat(const GraphProperties& properties, bool use_shape_info,
                    GraphDef* optimized_graph, NodeDef* node);
 
+  Status AddQuantizedMatMulMinMaxOutConstNodes(NodeDef* node,
+                                               GraphDef* optimized_graph);
+
   // Points to an externally provided device or to owned_device_;
   RewriterConfig::Toggle opt_level_;
   DeviceBase* cpu_device_;

diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index 81d00fa..76e149d 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc

@@ -119,6 +119,100 @@
       }
     }
   }
+
+  void MulConvPushDownTest(const TensorShape& input_shape,
+                           const TensorShape& filter_shape,
+                           const TensorShape& mul_const_input_shape,
+                           const bool use_3d_conv, const char* padding,
+                           const char* data_format, const bool expect_folded) {
+    // Tests if the following rewrite is performed:
+    //
+    //         *                       Conv2D
+    //        / \                       / \
+    //       c  Conv2D        -->      x  (c * filter)
+    //           / \
+    //          x  filter
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+
+    Tensor filter_values(DT_FLOAT, filter_shape);
+    for (int i = 0; i < filter_values.NumElements(); ++i) {
+      filter_values.flat<float>()(i) = std::sqrt(static_cast<float>(i));
+    }
+    Output filter =
+        ops::Const(s.WithOpName("filter"), Input::Initializer(filter_values));
+
+    Output input = ops::Placeholder(s.WithOpName("x"), DT_FLOAT,
+                                    ops::Placeholder::Shape(input_shape));
+
+    Output conv;
+    if (use_3d_conv) {
+      conv = ops::Conv3D(s.WithOpName("conv"), input, filter, {1, 1, 1, 1, 1},
+                         padding, ops::Conv3D::DataFormat(data_format));
+    } else {
+      conv = ops::Conv2D(s.WithOpName("conv"), input, filter, {1, 1, 1, 1},
+                         padding, ops::Conv2D::DataFormat(data_format));
+    }
+    Tensor mul_const_input(DT_FLOAT, mul_const_input_shape);
+    for (int i = 0; i < mul_const_input.NumElements(); ++i) {
+      mul_const_input.flat<float>()(i) = static_cast<float>(i + 3);
+    }
+    Output c =
+        ops::Const(s.WithOpName("c"), Input::Initializer(mul_const_input));
+    Output mul = ops::Mul(s.WithOpName("mul"), c, conv);
+
+    GrapplerItem item;
+    TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+    ConstantFolding optimizer(/*cpu_device=*/nullptr);
+    GraphDef output;
+    Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output);
+    TF_EXPECT_OK(status);
+
+    EXPECT_EQ(5, output.node_size());
+    int found = 0;
+    if (expect_folded) {
+      for (const auto& node : output.node()) {
+        if (node.name() == "mul") {
+          found++;
+          EXPECT_EQ(use_3d_conv ? "Conv3D" : "Conv2D", node.op());
+          EXPECT_EQ(2, node.input_size());
+          EXPECT_EQ("x", node.input(0));
+          EXPECT_EQ("conv/merged_input", node.input(1));
+        } else if (node.name() == "conv/merged_input") {
+          found++;
+          EXPECT_EQ("Const", node.op());
+          EXPECT_EQ(0, node.input_size());
+        }
+      }
+    } else {
+      for (const auto& node : output.node()) {
+        if (node.name() == "mul") {
+          found++;
+          EXPECT_EQ("Mul", node.op());
+          EXPECT_EQ(2, node.input_size());
+          EXPECT_EQ("c", node.input(0));
+          EXPECT_EQ("conv", node.input(1));
+        } else if (node.name() == "conv") {
+          found++;
+          EXPECT_EQ(use_3d_conv ? "Conv3D" : "Conv2D", node.op());
+          EXPECT_EQ(2, node.input_size());
+          EXPECT_EQ("x", node.input(0));
+          EXPECT_EQ("filter", node.input(1));
+        }
+      }
+    }
+    EXPECT_EQ(2, found);
+
+    // Check that const folded multiplication node has the expected value.
+    std::vector<string> fetch = {"mul"};
+    Tensor value(DT_FLOAT, input_shape);
+    for (int i = 0; i < value.NumElements(); ++i) {
+      value.flat<float>()(i) = i;
+    }
+    auto actual = EvaluateNodes(output, fetch, {{"x", value}});
+    auto expected = EvaluateNodes(item.graph, fetch, {{"x", value}});
+    test::ExpectTensorEqual<float>(expected[0], actual[0]);
+  }
 };
 
 TEST_F(ConstantFoldingTest, SimpleFolding) {
@@ -242,73 +336,147 @@
   }
 }
 
-TEST_F(ConstantFoldingTest, ConvPushDownTest) {
-  // Tests if the following rewrite is performed:
-  //
-  //         *                       Conv2D
-  //        / \                       / \
-  //       c  Conv2D        -->      x  (c * filter)
-  //           / \
-  //          x  filter
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-
-  int input_depth = 3;
-  int filter_count = 5;
-  int filter_size = 2;
-  TensorShape filter_shape(
-      {filter_size, filter_size, input_depth, filter_count});
-  Tensor filter_values(DT_FLOAT, filter_shape);
-  for (int i = 0; i < filter_values.NumElements(); ++i) {
-    filter_values.flat<float>()(i) = std::sqrt(static_cast<float>(i));
+TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_ScalarConst) {
+  for (string data_format : {
+         "NHWC",
+#if GOOGLE_CUDA
+             "NCHW"
+#endif  // GOOGLE_CUDA
+       }) {
+    MulConvPushDownTest(
+        /*input_shape=*/data_format == "NHWC" ? TensorShape{4, 10, 10, 3}
+                                              : TensorShape{4, 3, 10, 10},
+        /*filter_shape=*/{2, 2, 3, 5},
+        /*mul_const_input_shape=*/{},
+        /*use_3d_conv=*/false,
+        /*padding=*/"VALID", data_format.c_str(),
+        /*expect_folded=*/true);
   }
-  Output filter =
-      ops::Const(s.WithOpName("filter"), Input::Initializer(filter_values));
+}
 
-  int batch_size = 4;
-  int input_dim = 10;
-  TensorShape input_shape({batch_size, input_dim, input_dim, input_depth});
-  Output input = ops::Placeholder(s.WithOpName("x"), DT_FLOAT,
-                                  ops::Placeholder::Shape(input_shape));
-
-  Output conv =
-      ops::Conv2D(s.WithOpName("conv"), input, filter, {1, 1, 1, 1}, "VALID");
-  Output c = ops::Const(s.WithOpName("c"), 3.0f, {1});
-  Output mul = ops::Mul(s.WithOpName("mul"), c, conv);
-
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-
-  ConstantFolding optimizer(/*cpu_device=*/nullptr);
-  GraphDef output;
-  Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output);
-  TF_EXPECT_OK(status);
-
-  EXPECT_EQ(5, output.node_size());
-  int found = 0;
-  for (const auto& node : output.node()) {
-    if (node.name() == "mul") {
-      found++;
-      EXPECT_EQ("Conv2D", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("x", node.input(0));
-      EXPECT_EQ("conv/merged_input", node.input(1));
-    } else if (node.name() == "conv/merged_input") {
-      found++;
-      EXPECT_EQ("Const", node.op());
-      EXPECT_EQ(0, node.input_size());
+TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_SingletonConst) {
+  for (string data_format : {
+         "NHWC",
+#if GOOGLE_CUDA
+             "NCHW"
+#endif  // GOOGLE_CUDA
+       }) {
+    for (auto mul_const_input_shape :
+         {TensorShape{1}, TensorShape{1, 1, 1, 1}}) {
+      MulConvPushDownTest(
+          /*input_shape=*/data_format == "NHWC" ? TensorShape{4, 10, 10, 3}
+                                                : TensorShape{4, 3, 10, 10},
+          /*filter_shape=*/{2, 2, 3, 5}, mul_const_input_shape,
+          /*use_3d_conv=*/false,
+          /*padding=*/"VALID", data_format.c_str(),
+          /*expect_folded=*/true);
     }
   }
-  EXPECT_EQ(2, found);
+}
 
-  // Check that const folded multiplication node has the expected value.
-  std::vector<string> fetch = {"mul"};
-  Tensor value(DT_FLOAT, input_shape);
-  for (int i = 0; i < value.NumElements(); ++i) {
-    value.flat<float>()(i) = i;
+TEST_F(ConstantFoldingTest,
+       MulConvPushDownTest_Conv2D_SingletonConst_ShapeMismatch) {
+  for (string data_format : {
+         "NHWC",
+#if GOOGLE_CUDA
+             "NCHW"
+#endif  // GOOGLE_CUDA
+       }) {
+    MulConvPushDownTest(
+        /*input_shape=*/data_format == "NHWC" ? TensorShape{4, 10, 10, 3}
+                                              : TensorShape{4, 3, 10, 10},
+        /*filter_shape=*/{2, 2, 3, 5},
+        /*mul_const_input_shape=*/{1, 1, 1, 1, 1},
+        /*use_3d_conv=*/false,
+        /*padding=*/"VALID", data_format.c_str(),
+        /*expect_folded=*/false);
   }
-  auto actual = EvaluateNodes(output, fetch, {{"x", value}});
-  auto expected = EvaluateNodes(item.graph, fetch, {{"x", value}});
-  test::ExpectTensorEqual<float>(expected[0], actual[0]);
+}
+
+TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_3x1x3Const) {
+  for (auto data_format : {
+         "NHWC",
+#if GOOGLE_CUDA
+             "NCHW"
+#endif  // GOOGLE_CUDA
+       }) {
+    MulConvPushDownTest(
+        /*input_shape=*/{3, 3, 3, 3},
+        /*filter_shape=*/{3, 3, 3, 3},
+        /*mul_const_input_shape=*/{3, 1, 3},
+        /*use_3d_conv=*/false,
+        /*padding=*/"SAME", data_format,
+        /*expect_folded=*/false);
+  }
+}
+
+TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_NHWC_VectorLikeConst) {
+  for (auto mul_const_input_shape :
+       {TensorShape{3}, TensorShape{1, 3}, TensorShape{1, 1, 1, 3}}) {
+    MulConvPushDownTest(
+        /*input_shape=*/{3, 3, 3, 3},
+        /*filter_shape=*/{3, 3, 3, 3}, mul_const_input_shape,
+        /*use_3d_conv=*/false,
+        /*padding=*/"SAME",
+        /*data_format=*/"NHWC",
+        /*expect_folded=*/true);
+  }
+}
+
+#if GOOGLE_CUDA
+TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_NCHW_VectorLikeConst) {
+  for (auto mul_const_input_shape :
+       {TensorShape{3}, TensorShape{3, 1, 1}, TensorShape{1, 3, 1, 1}}) {
+    MulConvPushDownTest(
+        /*input_shape=*/{3, 3, 3, 3},
+        /*filter_shape=*/{3, 3, 3, 3}, mul_const_input_shape,
+        /*use_3d_conv=*/false,
+        /*padding=*/"SAME",
+        /*data_format=*/"NCHW",
+        // TODO(laigd): optimization should happen in this case.
+        /*expect_folded=*/false);
+  }
+}
+#endif  // GOOGLE_CUDA
+
+TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_3x1Const) {
+  for (auto data_format : {
+         "NHWC",
+#if GOOGLE_CUDA
+             "NCHW"
+#endif  // GOOGLE_CUDA
+       }) {
+    MulConvPushDownTest(
+        /*input_shape=*/{3, 3, 3, 3},
+        /*filter_shape=*/{3, 3, 3, 3},
+        /*mul_const_input_shape=*/{3, 1},
+        /*use_3d_conv=*/false,
+        /*padding=*/"SAME", data_format,
+        /*expect_folded=*/false);
+  }
+}
+
+TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv3D_NDHWC_1x1x3Const) {
+  MulConvPushDownTest(
+      /*input_shape=*/{3, 3, 3, 3, 3},
+      /*filter_shape=*/{3, 3, 3, 3, 3},
+      /*mul_const_input_shape=*/{1, 1, 3},
+      /*use_3d_conv=*/true,
+      /*padding=*/"SAME",
+      /*data_format=*/"NDHWC",
+      /*expect_folded=*/true);
+}
+
+TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv3D_NCDHW_3x1x1x1Const) {
+  MulConvPushDownTest(
+      /*input_shape=*/{3, 3, 3, 3, 3},
+      /*filter_shape=*/{3, 3, 3, 3, 3},
+      /*mul_const_input_shape=*/{3, 1, 1, 1},
+      /*use_3d_conv=*/true,
+      /*padding=*/"SAME",
+      /*data_format=*/"NDHWC",
+      // TODO(laigd): optimization should happen in this case.
+      /*expect_folded=*/false);
 }
 
 TEST_F(ConstantFoldingTest, NeutralElement) {
@@ -3515,6 +3683,39 @@
   }
 }
 
+TEST_F(ConstantFoldingTest, BitcastDenormalFloats) {
+  tensorflow::Scope scope = tensorflow::Scope::NewRootScope();
+
+  Tensor x_t(DT_INT64, TensorShape({2, 2}));
+  x_t.flat<int64>()(0) = 9223372036854775807L;
+  x_t.flat<int64>()(1) = 1L;
+  x_t.flat<int64>()(2) = 9223372036854775807L;
+  x_t.flat<int64>()(3) = 1L;
+  Output x = ops::Const(scope.WithOpName("x"), x_t);
+  Output y = ops::Bitcast(scope.WithOpName("y"), x, DT_FLOAT);
+  Output z = ops::Bitcast(scope.WithOpName("z"), y, DT_INT64);
+
+  GrapplerItem item;
+  TF_CHECK_OK(scope.ToGraphDef(&item.graph));
+  item.fetch = {"z"};
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch, {});
+
+  ConstantFolding optimizer(/*cpu_device=*/nullptr);
+  GraphDef output;
+  Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  ASSERT_EQ(output.node_size(), 1);
+  const NodeDef& node = output.node(0);
+  EXPECT_EQ(node.name(), "z");
+  EXPECT_EQ(node.op(), "Const");
+
+  auto tensors = EvaluateNodes(output, item.fetch, {});
+  ASSERT_EQ(tensors.size(), 1);
+  ASSERT_EQ(tensors_expected.size(), 1);
+  test::ExpectTensorEqual<int64>(tensors[0], tensors_expected[0]);
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow

diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index ef02962..5f06078 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD

@@ -547,11 +547,15 @@
     hdrs = ["rebatch.h"],
     deps = [
         ":graph_utils",
+        ":optimizer_base",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "//tensorflow/core:framework",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:mutable_graph_view",
-        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
-    ],
+        "//tensorflow/core/grappler/utils:functions",
+        "//tensorflow/core:lib",
+    ] + tf_protos_all(),
     alwayslink = 1,
 )
 
@@ -596,7 +600,7 @@
         "optimizer_base.h",
     ],
     deps = [
-        "//tensorflow/core:metrics",
+        "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
     ],
 )

diff --git a/tensorflow/core/grappler/optimizers/data/rebatch.cc b/tensorflow/core/grappler/optimizers/data/rebatch.cc
index 187e1a6..14c3931 100644
--- a/tensorflow/core/grappler/optimizers/data/rebatch.cc
+++ b/tensorflow/core/grappler/optimizers/data/rebatch.cc

@@ -15,10 +15,17 @@
 
 #include "tensorflow/core/grappler/optimizers/data/rebatch.h"
 
+#include "absl/container/flat_hash_map.h"
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/grappler/grappler_item.h"
-#include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+#include "tensorflow/core/grappler/utils/functions.h"
+#include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -35,7 +42,58 @@
 
 constexpr char kCastOp[] = "Cast";
 constexpr char kRealDivOp[] = "RealDiv";
-constexpr char kBatchDatasetOp[] = "BatchDatasetV2";
+constexpr char kConstOp[] = "Const";
+
+constexpr std::array<const char*, 5> kBatchDatasetOps = {
+    "BatchDataset",
+    "BatchDatasetV2",
+    "ExperimentalMapAndBatchDataset",
+    "PaddedBatchDataset",
+    "PaddedBatchDatasetV2"
+};
+
+constexpr std::array<const char*, 2> kMultipleInputsDatasetOps = {
+    "ConcatenateDataset",
+    "ZipDataset"
+};
+
+constexpr std::array<const char*, 17> kPassThroughOps = {
+    "CacheDataset",
+    "FilterDataset",
+    "FilterByLastComponentDataset",
+    "Identity",
+    "MapDataset",
+    "ModelDataset",
+    "OptimizeDataset",
+    "ParallelMapDataset",
+    "PrefetchDataset",
+    "ReduceDataset",
+    "RepeatDataset",
+    "ShardDataset",
+    "ShuffleAndRepeatDataset",
+    "ShuffleDataset",
+    "SkipDataset",
+    "TakeDataset",
+    "WindowDataset"
+};
+
+constexpr std::array<const char*, 3> kFuncDatasetOps = {
+    "FlatMapDataset",
+    "InterleaveDataset",
+    "ParallelInterleaveDatasetV2"
+};
+
+constexpr std::array<const char*, 9> kSourceDatasetOps = {
+    "FixedLengthRecordDataset",
+    "FixedLengthRecordDatasetV2",
+    "GeneratorDataset",
+    "RangeDataset",
+    "SparseTensorsSliceDataset",
+    "TensorDataset",
+    "TensorSliceDataset",
+    "TextLineDataset",
+    "TFRecordDataset"
+};
 
 NodeDef* AddCastNode(const string& input, DataType src_t, DataType dst_t,
                      MutableGraphView* graph) {
@@ -68,40 +126,180 @@
   return AddBinaryNode(input_x, input_y, kRealDivOp, DT_FLOAT, graph);
 }
 
-}  // anonymous namespace
+template <std::size_t SIZE>
+bool IsDatasetNodeOfType(const NodeDef& node,
+                         const std::array<const char*, SIZE>& arr) {
+  for (const auto& dataset_op_name : arr) {
+    if (node.op() == dataset_op_name) return true;
+  }
+  return false;
+}
 
-Status RebatchOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
-                                  GraphDef* output) {
+// Given a "batch" dataset node, modifies the batch_size input to divide the
+// current batch size by num_workers.
+Status MutateBatchSize(const NodeDef& node, int64 num_workers,
+                       MutableGraphView* graph) {
+  // TODO(rohanj): Fix up the output_shapes attribute as well. For this Dataset
+  // as well as all the downstream datasets.
+  // For all the batching datasets the batch_size is input number 1.
+  NodeDef* batch_size_node = graph_utils::GetInputNode(node, *graph, 1);
+  // By the time this optimization is run, the batch_size is computed and
+  // is a constant.
+  if (batch_size_node->op() != kConstOp) {
+    return errors::Internal("Batch size node should be a Const. Obtained: ",
+                            batch_size_node->op(), " instead.");
+  }
+  Tensor batch_size_tensor;
+  TF_RETURN_IF_ERROR(
+      GetNodeAttr(*batch_size_node, "value", &batch_size_tensor));
+  if (!TensorShapeUtils::IsScalar(batch_size_tensor.shape())) {
+    return errors::Internal("Batch size node shape should be scalar");
+  }
+  int64 batch_size = batch_size_tensor.scalar<int64>()();
+  if (batch_size % num_workers != 0) {
+    return errors::InvalidArgument(
+        "Batch size: ", batch_size,
+        " is not divisible by num_workers: ", num_workers);
+  }
+  batch_size /= num_workers;
+  NodeDef* new_batch_size_node =
+      graph_utils::AddScalarConstNode<int64>(batch_size, graph);
+  // We don't call UpdateFanouts here because CSE elimination might lead to
+  // multiple nodes sharing the same batch size constant node. This is also
+  // why we don't delete batch_size_node as well.
+  TF_RETURN_IF_ERROR(graph->UpdateRegularFaninByPort(
+      node.name(), 1, {new_batch_size_node->name(), 0}));
+  return Status::OK();
+}
+
+// There is one Sink node at least that is added to the end of the graph. We
+// find that node and return it. It is possible that there are multiple
+// Identity ops from the final Dataset op to that Sink node, but the recursive
+// graph traversal handles that.
+Status FindSinkNode(const GraphDef& graph_def, NodeDef* sink_node) {
+  absl::flat_hash_map<string, int> all_node_names;
+  absl::flat_hash_map<string, int> node_input_map;
+  for (int i = 0; i < graph_def.node_size(); ++i) {
+    all_node_names.insert_or_assign(graph_def.node(i).name(), i);
+    node_input_map.insert_or_assign(graph_def.node(i).name(), 0);
+  }
+  // Counts how many graph nodes is this node the input to. Candidate sink
+  // nodes are ones which are inputs into zero nodes.
+  for (const NodeDef& node : graph_def.node()) {
+    for (const string& input_name : node.input()) {
+      node_input_map[input_name]++;
+    }
+  }
+  for (const auto& it : node_input_map) {
+    if (it.second == 0) {
+      const NodeDef& sink_graph_node = graph_def.node(all_node_names[it.first]);
+      // Sometimes the searching surfaces Arg nodes in function cases that
+      // have no input. This check rejects those.
+      if (sink_graph_node.input_size() == 0) {
+        continue;
+      }
+      *sink_node = sink_graph_node;
+      return Status::OK();
+    }
+  }
+  return errors::InvalidArgument("Failed to find a sink node");
+}
+
+Status OptimizeGraph(const GrapplerItem& item, int64 num_workers,
+                     GraphDef* output);
+
+// Helper function that starts from a node in the graph and recurses into its
+// inputs trying to find a BatchDataset type operation to modify. During the
+// recursion it handles four kinds of cases.
+// 1. BatchDataset type ops: Mutates the batch_size input node and stops.
+// 2. Zip / Concatenate dataset ops: Recurses into all inputs to these ops
+//      as they are datasets themselves.
+// 3. Core dataset ops + Identity op: Recurses into first input parameter.
+// 4. FlatMap type mapping dataset ops: Recurses into the function definition.
+Status RecursivelyHandleOp(const NodeDef& node, int64 num_workers,
+                           FunctionLibraryDefinition* flib,
+                           MutableGraphView* graph) {
+  if (IsDatasetNodeOfType(node, kBatchDatasetOps)) {
+    return MutateBatchSize(node, num_workers, graph);
+  } else if (IsDatasetNodeOfType(node, kMultipleInputsDatasetOps)) {
+    // For all multiple input datasets, all inputs are datasets themselves.
+    for (int i = 0; i < node.input_size(); ++i) {
+      NodeDef* input_node = graph_utils::GetInputNode(node, *graph, i);
+      TF_RETURN_IF_ERROR(
+          RecursivelyHandleOp(*input_node, num_workers, flib, graph));
+    }
+  } else if (IsDatasetNodeOfType(node, kPassThroughOps)) {
+    // For all the dataset ops that are pass through, the input dataset is
+    // input 0.
+    NodeDef* input_node = graph_utils::GetInputNode(node, *graph, 0);
+    TF_RETURN_IF_ERROR(
+        RecursivelyHandleOp(*input_node, num_workers, flib, graph));
+  } else if (IsDatasetNodeOfType(node, kFuncDatasetOps)) {
+    const string func_name = node.attr().at("f").func().name();
+    const FunctionDef* fdef = flib->Find(func_name);
+    GrapplerFunctionItem f_item;
+    TF_RETURN_IF_ERROR(MakeGrapplerFunctionItem(
+        *fdef, *flib, graph->graph()->versions().producer(), &f_item));
+    GraphDef optimized_func_graph;
+    Status s = OptimizeGraph(f_item, num_workers, &optimized_func_graph);
+    if (s.ok()) {
+      // Function body optimization might have created new specialized
+      // functions for each instantiation context. Add them to the library.
+      for (const FunctionDef& func_def :
+           optimized_func_graph.library().function()) {
+        if (flib->Find(func_def.signature().name()) == nullptr) {
+          TF_RETURN_IF_ERROR(flib->AddFunctionDef(func_def));
+        }
+      }
+
+      // Convert optimized graph back to FunctionDef.
+      FunctionDef optimized_func;
+      f_item.SwapFunctionBody(std::move(optimized_func_graph));
+      TF_RETURN_IF_ERROR(MakeFunctionDef(f_item, *flib, &optimized_func));
+
+      // Replace optimized function with a new FunctionDef.
+      TF_RETURN_IF_ERROR(flib->ReplaceFunction(func_name, optimized_func));
+    }
+  } else if (IsDatasetNodeOfType(node, kSourceDatasetOps)) {
+    return errors::InvalidArgument(
+        "Reached a source dataset: ", node.op(),
+        " without encountering a batch transformation.");
+  } else {
+    return errors::InvalidArgument("Encountered an unsupported op: ",
+                                   node.op());
+  }
+  return Status::OK();
+}
+
+// Helper function that given a GrapplerItem generates a mutated graph def
+// with the batch size changed. The GrapplerItem could be generated from the
+// main graph or could be a function graph.
+Status OptimizeGraph(const GrapplerItem& item, int64 num_workers,
+                     GraphDef* output) {
   *output = item.graph;
   MutableGraphView graph(output);
 
-  absl::flat_hash_set<string> nodes_to_delete;
-  for (const NodeDef& node : item.graph.node()) {
-    if (node.op() == kBatchDatasetOp) {
-      NodeDef* batch_size_node = graph_utils::GetInputNode(node, graph, 1);
-      NodeDef tmp_node;
-      tmp_node = *batch_size_node;
-      graph_utils::SetUniqueGraphNodeName(tmp_node.op(), graph.graph(),
-                                          &tmp_node);
-      NodeDef* copy_batch_size_node = graph.AddNode(std::move(tmp_node));
-      NodeDef* float_copy_batch_size_node =
-          AddCastNode(copy_batch_size_node->name(), DT_INT64, DT_FLOAT, &graph);
-      NodeDef* num_worker_node =
-          graph_utils::AddScalarConstNode<int64>(num_workers_, &graph);
-      NodeDef* float_num_worker_node =
-          AddCastNode(num_worker_node->name(), DT_INT64, DT_FLOAT, &graph);
-      NodeDef* divided_batch_size_node =
-          AddFloatDivNode(float_copy_batch_size_node->name(),
-                          float_num_worker_node->name(), &graph);
-      NodeDef* cast_new_batch_size_node = AddCastNode(
-          divided_batch_size_node->name(), DT_FLOAT, DT_INT64, &graph);
-      TF_RETURN_IF_ERROR(graph.UpdateFanouts(batch_size_node->name(),
-                                             cast_new_batch_size_node->name()));
-      nodes_to_delete.insert(batch_size_node->name());
-      break;
-    }
-  }
-  TF_RETURN_IF_ERROR(graph.DeleteNodes(nodes_to_delete));
+  FunctionLibraryDefinition flib(OpRegistry::Global(), item.graph.library());
+
+  NodeDef sink_node;
+  TF_RETURN_IF_ERROR(FindSinkNode(item.graph, &sink_node));
+  TF_RETURN_IF_ERROR(
+      RecursivelyHandleOp(sink_node, num_workers, &flib, &graph));
+  *output->mutable_library() = flib.ToProto();
+  return Status::OK();
+}
+
+}  // anonymous namespace
+
+Status RebatchOptimizer::OptimizeAndCollectStats(Cluster* cluster,
+                                                 const GrapplerItem& item,
+                                                 GraphDef* output,
+                                                 OptimizationStats* stats) {
+  *output = item.graph;
+  MutableGraphView graph(output);
+
+  TF_RETURN_IF_ERROR(OptimizeGraph(item, num_workers_, output));
+  stats->num_changes++;
   return Status::OK();
 }
 

diff --git a/tensorflow/core/grappler/optimizers/data/rebatch.h b/tensorflow/core/grappler/optimizers/data/rebatch.h
index f7aa69f..29a6100 100644
--- a/tensorflow/core/grappler/optimizers/data/rebatch.h
+++ b/tensorflow/core/grappler/optimizers/data/rebatch.h

@@ -15,7 +15,9 @@
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_REBATCH_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_REBATCH_H_
 
-#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/grappler/mutable_graph_view.h"
+#include "tensorflow/core/grappler/optimizers/data/optimizer_base.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -23,10 +25,7 @@
 // This optimizer changes the batch size of the output dataset by dividing the
 // current batch size by parameter `num_workers`. Currently, this works only
 // for very simple pipelines with a single BatchDatasetV2 transformation.
-//
-// TODO(rohanj): Extend this logic to correctly handle any input pipeline that
-// uses core tf.data APIs + MapAndBatch.
-class RebatchOptimizer : public CustomGraphOptimizer {
+class RebatchOptimizer : public TFDataOptimizerBase {
  public:
   RebatchOptimizer() = default;
   ~RebatchOptimizer() override = default;
@@ -36,8 +35,9 @@
   Status Init(
       const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override;
 
-  Status Optimize(Cluster* cluster, const GrapplerItem& item,
-                  GraphDef* output) override;
+  Status OptimizeAndCollectStats(Cluster* cluster, const GrapplerItem& item,
+                                 GraphDef* output,
+                                 OptimizationStats* stats) override;
 
   void Feedback(Cluster* cluster, const GrapplerItem& item,
                 const GraphDef& optimize_output, double result) override;

diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index c57a7b1..1969ff0 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc

@@ -415,6 +415,10 @@
 // NodeBuilder
 Status Vectorization::StackTensor(WrappedTensor* unstacked,
                                   TensorDesc* result) {
+  if (unstacked->node->output_type(unstacked->output_index) == DT_VARIANT) {
+    // TODO(b/124069171): "ExpandDims" doesn't work with Variant tensors.
+    return errors::Unimplemented("Cannot stack tensor with Variant type.");
+  }
   // Note that all these nodes are necessary as the size of the batch may not be
   // constant.
   if (unstacked->stacked) {

diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 8b81cb2..2dfa5e9 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc

@@ -15,13 +15,13 @@
 
 #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h"
 
-#include <unordered_map>
-#include <unordered_set>
-
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
 #include "tensorflow/core/grappler/utils.h"
@@ -38,20 +38,15 @@
 
 namespace {
 
-bool RemoveInput(NodeDef* node, const string& input, NodeMap* node_map) {
-  bool removed_input = false;
-  int pos = 0;
-  while (pos < node->input_size()) {
-    if (node->input(pos) == input) {
-      node->mutable_input()->SwapElements(pos, node->input_size() - 1);
-      node->mutable_input()->RemoveLast();
-      node_map->RemoveOutput(NodeName(input), node->name());
-      removed_input = true;
-    } else {
-      ++pos;
-    }
+// Builds a map from the &graph->node(i) to i.
+absl::flat_hash_map<const NodeDef*, int> BuildNodeToIdx(const GraphDef& graph) {
+  // Set up &node -> index map.
+  absl::flat_hash_map<const NodeDef*, int> node_to_idx;
+  for (int i = 0; i < graph.node_size(); ++i) {
+    const NodeDef& node = graph.node(i);
+    node_to_idx[&node] = i;
   }
-  return removed_input;
+  return node_to_idx;
 }
 
 }  // namespace
@@ -68,7 +63,9 @@
     // The output values of this node may be needed.
     return false;
   }
-  const NodeDef* input = node_map_->GetNode(NodeName(node.input(0)));
+  MutableGraphView::OutputPort port = graph_view_->GetRegularFanin(
+      MutableGraphView::InputPort(const_cast<NodeDef*>(&node), 0));
+  NodeDef* input = port.node;
   CHECK(input != nullptr) << "node = " << node.name()
                           << " input = " << node.input(0);
   // Don't remove Identity nodes corresponding to Variable reads or following
@@ -77,22 +74,28 @@
     return false;
   } else if (IsSwitch(*input)) {
     // Don't turn Identity nodes following Switch into NoOp or remove them
-    // if it requires anchoring a control dependencies the Switch node, which
+    // if it requires anchoring a control dependencies to the Switch node, which
     // is not valid.
-    if (str_util::StartsWith(node.name(), kConstantFoldingCtrl)) {
-      // TODO(rmlarsen): Try to remove this artificial contraint.
+    MutableGraphView::OutputPort control_port(const_cast<NodeDef*>(&node),
+                                              Graph::kControlSlot);
+    auto control_fanouts = graph_view_->GetFanout(control_port);
+    if (!control_fanouts.empty()) {
       return false;
     }
   }
-  for (auto consumer : node_map_->GetOutputs(node.name())) {
-    if (node.input_size() > 1 && IsMerge(*consumer)) {
+  bool node_has_multiple_inputs =
+      graph_view_->NumFanins(node, /*include_controlling_nodes=*/true) > 1;
+
+  auto fanouts =
+      graph_view_->GetFanouts(node, /*include_controlled_nodes=*/true);
+  for (auto fanout : fanouts) {
+    if (node_has_multiple_inputs && IsMerge(*fanout.node)) {
       return false;
     }
     if (IsSwitch(*input)) {
-      for (const string& consumer_input : consumer->input()) {
-        if (consumer_input == AsControlDependency(node.name())) {
-          return false;
-        }
+      if (graph_view_->HasFanin(*fanout.node,
+                                {node.name(), Graph::kControlSlot})) {
+        return false;
       }
     }
   }
@@ -116,7 +119,7 @@
   if (!status.ok() || op_def->output_arg_size() == 0) {
     return false;
   }
-  const std::unordered_set<string> do_not_rewrite_ops{
+  const absl::flat_hash_set<string> do_not_rewrite_ops{
       "Assert",     "CheckNumerics",         "_Retval",
       "_Arg",       "_ParallelConcatUpdate", "TPUExecute",
       "TPUCompile", "ControlTrigger"};
@@ -126,7 +129,7 @@
   if (!SafeToRemoveIdentity(node)) {
     return false;
   }
-  if (NumNonControlOutputs(node, *node_map_) > 0) {
+  if (graph_view_->NumFanouts(node, /*include_controlled_nodes=*/false) > 0) {
     // The output values of this node may be needed.
     return false;
   }
@@ -134,61 +137,61 @@
 }
 
 int DependencyOptimizer::NumEdgesIfBypassed(
-    const NodeDef& node, const std::vector<NodeDef*>& output_nodes) const {
+    const NodeDef& node, int num_controlling_fanins,
+    const absl::flat_hash_set<MutableGraphView::Edge>& fanin_edges,
+    const absl::flat_hash_set<MutableGraphView::Edge>& fanout_edges,
+    int num_unique_fanout_nodes) const {
   const bool is_multi_input_identity_n =
       IsIdentityN(node) && !IsIdentityNSingleInput(node);
-  const int num_outputs = output_nodes.size();
-  const int num_inputs = node.input_size();
+  const int num_fanins = fanin_edges.size();
 
   if (is_multi_input_identity_n) {
     // multi-input identity_n with input/output control dependencies will likely
     // increase number of edges after optimization.
-    int num_edges_if_bypassed(0);
-    for (string input_node_name : node.input()) {
-      if (IsControlInput(input_node_name)) {
-        num_edges_if_bypassed += num_outputs;
+    int num_edges_if_bypassed = 0;
+    int num_non_controlling_fanins = num_fanins - num_controlling_fanins;
+    num_edges_if_bypassed += num_non_controlling_fanins;
+    num_edges_if_bypassed += num_controlling_fanins * num_unique_fanout_nodes;
+
+    for (const auto& fanout : fanout_edges) {
+      if (fanout.dst.port_id == Graph::kControlSlot) {
+        num_edges_if_bypassed += num_fanins;
       } else {
         ++num_edges_if_bypassed;
       }
     }
-
-    for (auto consumer : output_nodes) {
-      for (int j = 0; j < consumer->input_size(); ++j) {
-        const TensorId consumer_input = ParseTensorName(consumer->input(j));
-        if (consumer_input.node() == node.name()) {
-          if (IsControlInput(consumer_input)) {
-            num_edges_if_bypassed += num_inputs;
-          } else {
-            ++num_edges_if_bypassed;
-          }
-        }
-      }
-    }
     return num_edges_if_bypassed;
   } else {
-    return num_inputs * num_outputs;
+    return num_fanins * num_unique_fanout_nodes;
   }
 }
 
 bool DependencyOptimizer::BypassingNodeIsBeneficial(
-    const NodeDef& node, const std::vector<NodeDef*>& input_nodes,
-    const std::vector<NodeDef*>& output_nodes) const {
+    const NodeDef& node, int num_controlling_fanins,
+    const absl::flat_hash_set<MutableGraphView::Edge>& fanin_edges,
+    const absl::flat_hash_set<MutableGraphView::Edge>& fanout_edges) const {
   const bool is_identity = IsIdentity(node) || IsIdentityNSingleInput(node);
   const bool is_multi_input_identity_n =
       IsIdentityN(node) && !IsIdentityNSingleInput(node);
-  const int num_outputs = output_nodes.size();
-  const int num_inputs = node.input_size();
+  const int num_fanins = fanin_edges.size();
+  absl::flat_hash_set<NodeDef*> unique_fanout_nodes;
+  for (const auto& fanout_edge : fanout_edges) {
+    unique_fanout_nodes.insert(fanout_edge.dst.node);
+  }
+  const int num_unique_fanout_nodes = unique_fanout_nodes.size();
 
-  if (NumEdgesIfBypassed(node, output_nodes) > num_inputs + num_outputs) {
+  if (NumEdgesIfBypassed(node, num_controlling_fanins, fanin_edges,
+                         fanout_edges, num_unique_fanout_nodes) >
+      num_fanins + num_unique_fanout_nodes) {
     return false;
   }
 
   // Make sure that we don't increase the number of edges that cross
   // device boundaries.
-  if ((num_inputs == 1 && num_outputs > 1 &&
-       input_nodes[0]->device() != node.device()) ||
-      (num_inputs > 1 && num_outputs == 1 &&
-       output_nodes[0]->device() != node.device())) {
+  if ((num_fanins == 1 && num_unique_fanout_nodes > 1 &&
+       fanin_edges.begin()->src.node->device() != node.device()) ||
+      (num_fanins > 1 && num_unique_fanout_nodes == 1 &&
+       fanout_edges.begin()->dst.node->device() != node.device())) {
     return false;
   }
 
@@ -197,21 +200,21 @@
   // cost before and after.
   const string& node_dev = node.device();
   int num_cross_in = 0;
-  for (NodeDef* input_node : input_nodes) {
-    num_cross_in += static_cast<int>(input_node->device() != node_dev);
+  for (const auto& fanin : fanin_edges) {
+    num_cross_in += static_cast<int>(fanin.src.node->device() != node_dev);
   }
   int num_cross_out = 0;
-  for (NodeDef* output_node : output_nodes) {
-    num_cross_out += static_cast<int>(output_node->device() != node_dev);
+  for (const auto& fanout : unique_fanout_nodes) {
+    num_cross_out += static_cast<int>(fanout->device() != node_dev);
   }
 
   // Make sure we do not increase the number of device crossings.
   const int num_cross_before = num_cross_in + num_cross_out;
   int num_cross_after = 0;
-  for (NodeDef* input_node : input_nodes) {
-    for (NodeDef* output_node : output_nodes) {
+  for (const auto& fanin : fanin_edges) {
+    for (const auto& fanout : unique_fanout_nodes) {
       num_cross_after +=
-          static_cast<int>(input_node->device() != output_node->device());
+          static_cast<int>(fanin.src.node->device() != fanout->device());
     }
   }
   if (num_cross_after > num_cross_before) {
@@ -229,83 +232,58 @@
   return true;
 }
 
-void DependencyOptimizer::OptimizeNode(int node_idx,
-                                       SetVector<int>* nodes_to_simplify,
-                                       std::set<int>* nodes_to_delete) {
-  NodeDef* node = optimized_graph_->mutable_node(node_idx);
+Status DependencyOptimizer::OptimizeNode(
+    const string& node_name, SetVector<string>* nodes_to_simplify,
+    absl::flat_hash_set<string>* nodes_to_delete) {
+  NodeDef* node = graph_view_->GetNode(node_name);
   const bool is_noop = IsNoOp(*node);
   const bool is_identity = IsIdentity(*node) || IsIdentityNSingleInput(*node);
   const bool is_multi_input_identity =
       IsIdentityN(*node) && !IsIdentityNSingleInput(*node);
-  const string node_name = node->name();
-  // Constant nodes with no input control dependency are always executed early,
-  // so we can prune all their output control dependencies.
-  if (IsConstant(*node) && node->input_size() == 0) {
-    const std::set<NodeDef*> output_nodes = node_map_->GetOutputs(node_name);
-    for (NodeDef* fanout : output_nodes) {
-      bool optimize_fanout = false;
-      bool data_connection = false;
-      for (int i = fanout->input_size() - 1; i >= 0; --i) {
-        const TensorId input_tensor = ParseTensorName(fanout->input(i));
-        if (input_tensor.node() == node_name) {
-          if (input_tensor.index() < 0) {
-            fanout->mutable_input()->SwapElements(i, fanout->input_size() - 1);
-            fanout->mutable_input()->RemoveLast();
-            optimize_fanout = true;
-          } else {
-            data_connection = true;
-          }
-        }
-      }
-      if (optimize_fanout) {
-        nodes_to_simplify->PushBack(node_to_idx_[fanout]);
-        if (!data_connection) {
-          node_map_->RemoveOutput(node_name, fanout->name());
-        }
-      }
+  // WARNING: This is a strong assumption based on the executor behavior that
+  // constant nodes with no input control dependency are always executed early.
+  // In this case we then can prune all their output control dependencies.
+  if (IsConstant(*node) &&
+      graph_view_->NumFanins(*node, /*include_controlling_nodes=*/true) == 0) {
+    MutableGraphView::OutputPort control_port(node, Graph::kControlSlot);
+    auto control_fanouts = graph_view_->GetFanout(control_port);
+    for (const auto& fanout : control_fanouts) {
+      TF_RETURN_IF_ERROR(
+          graph_view_->RemoveControllingFanin(fanout.node->name(), node_name));
+      nodes_to_simplify->PushBack(fanout.node->name());
     }
-    if (node_map_->GetOutputs(node_name).empty() && fetch_nodes_known_ &&
+
+    if (graph_view_->NumFanouts(*node, /*include_controlled_nodes=*/true) ==
+            0 &&
+        fetch_nodes_known_ &&
         nodes_to_preserve_.find(node_name) == nodes_to_preserve_.end()) {
       // Mark the node for deletion.
-      nodes_to_delete->insert(node_to_idx_[node]);
+      nodes_to_delete->insert(node_name);
     }
-    return;
+    return Status::OK();
   }
 
   // Change ops that only have control dependencies as outputs to NoOps.
   if (!is_noop && SafeToConvertToNoOp(*node)) {
-    VLOG(1) << "***** Replacing  " << node_name << " (" << node->op()
+    VLOG(1) << "***** Replacing " << node_name << " (" << node->op()
             << ") with NoOp.";
     // The outputs of this node are not consumed. Replace its inputs with
     // control dependencies and replace the op itself with the NoOp op.
-    std::unordered_set<string> ctrl_inputs;
-    int pos = 0;
-    while (pos < node->input_size()) {
-      const string old_input = node->input(pos);
-      if (IsControlInput(old_input)) {
-        if (!ctrl_inputs.insert(old_input).second) {
-          // We found a duplicate control input. Remove it.
-          node->mutable_input()->SwapElements(pos, node->input_size() - 1);
-          node->mutable_input()->RemoveLast();
-        } else {
-          ++pos;
-        }
-        continue;
-      }
-      // Replace a normal input with a control input.
-      const string ctrl_input = ConstantFolding::AddControlDependency(
-          old_input, optimized_graph_, node_map_.get());
-      ctrl_inputs.insert(ctrl_input);
-      node->set_input(pos, ctrl_input);
-      node_map_->UpdateInput(node_name, old_input, ctrl_input);
-      const NodeDef* old_input_node = node_map_->GetNode(old_input);
-      nodes_to_simplify->PushBack(node_to_idx_[old_input_node]);
-      ++pos;
+    const int num_regular_fanins =
+        graph_view_->NumFanins(*node, /*include_controlling_nodes=*/false);
+    absl::flat_hash_set<string> regular_fanin_names;
+    for (int i = 0; i < num_regular_fanins; ++i) {
+      regular_fanin_names.emplace(ParseTensorName(node->input(i)).node());
     }
-    node->set_op("NoOp");
-    node->clear_attr();
-    nodes_to_simplify->PushBack(node_to_idx_[node]);
-    return;
+    TF_RETURN_IF_ERROR(
+        graph_view_->UpdateAllRegularFaninsToControlling(node_name));
+    TF_RETURN_IF_ERROR(
+        graph_view_->UpdateNode(node_name, "NoOp", node->device(), {}));
+    for (const string& regular_fanin_name : regular_fanin_names) {
+      nodes_to_simplify->PushBack(regular_fanin_name);
+    }
+    nodes_to_simplify->PushBack(node_name);
+    return Status::OK();
   }
 
   // Remove NoOp nodes if the product of their fan-in and fan-out is less than
@@ -358,154 +336,131 @@
 
   if (is_noop || ((is_identity || is_multi_input_identity) &&
                   SafeToRemoveIdentity(*node))) {
-    const auto& output_node_set = node_map_->GetOutputs(node_name);
-    const std::vector<NodeDef*> output_nodes(output_node_set.begin(),
-                                             output_node_set.end());
-    const int num_inputs = node->input_size();
-    std::vector<NodeDef*> input_nodes;
-    for (int i = 0; i < num_inputs; ++i) {
-      NodeDef* input_node = node_map_->GetNode(node->input(i));
-      if (input_node == nullptr) {
-        LOG(ERROR) << "Invalid input " << node->input(i);
-        return;
+    auto fanin_edges =
+        graph_view_->GetFaninEdges(*node, /*include_controlling_edges=*/true);
+    std::vector<NodeDef*> controlling_fanins;
+    controlling_fanins.reserve(fanin_edges.size());
+    for (const auto& fanin_edge : fanin_edges) {
+      if (fanin_edge.src.port_id == Graph::kControlSlot) {
+        controlling_fanins.push_back(fanin_edge.src.node);
       }
-      input_nodes.push_back(input_node);
     }
-
-    if (!BypassingNodeIsBeneficial(*node, input_nodes, output_nodes)) {
-      return;
+    auto fanout_edges =
+        graph_view_->GetFanoutEdges(*node, /*include_controlled_edges=*/true);
+    if (!BypassingNodeIsBeneficial(*node, controlling_fanins.size(),
+                                   fanin_edges, fanout_edges)) {
+      return Status::OK();
     }
 
     VLOG(1) << "***** Rerouting input around\n" << node->DebugString();
-    // Now remove the node and re-wire its inputs to its outputs.
-    for (auto consumer : output_nodes) {
-      bool updated_consumer = false;
-      VLOG(1) << "consumer before:\n" << consumer->DebugString();
-      for (int i = 0; i < num_inputs; ++i) {
-        const NodeDef* input = input_nodes[i];
-        // Forward dependency from input to consumer if it doesn't already
-        // depend on it.
-        if ((is_identity && i == 0) ||
-            (is_multi_input_identity && !IsControlInput(node->input(i)))) {
-          // Replace regular input from Identity node.
-          string new_input;
-          const string& input_to_forward = node->input(i);
-          CHECK(!IsControlInput(input_to_forward));
-          for (int j = 0; j < consumer->input_size(); ++j) {
-            const TensorId old_input = ParseTensorName(consumer->input(j));
-            if (old_input.node() == node_name) {
-              if (old_input.index() == i) {
-                // Regular input
-                new_input = input_to_forward;
-                node_map_->UpdateInput(consumer->name(), old_input.ToString(),
-                                       new_input);
-                consumer->set_input(j, new_input);
-              } else if (old_input.index() == -1) {
-                // Control dependency
-                new_input = AsControlDependency(NodeName(input_to_forward));
-                node_map_->UpdateInput(consumer->name(), old_input.ToString(),
-                                       new_input);
-                consumer->set_input(j, new_input);
-              }
-            }
-          }
-          updated_consumer = true;
-        } else {
-          // Forward dependency from input to consumer if it doesn't already
-          // depend on it.
-          if (node_map_->GetOutputs(input->name()).count(consumer) == 0) {
-            consumer->add_input(AsControlDependency(input->name()));
-            node_map_->AddOutput(input->name(), consumer->name());
-            nodes_to_simplify->PushBack(node_to_idx_[input]);
-            updated_consumer = true;
-          }
-        }
+
+    absl::flat_hash_set<NodeDef*> processed_nodes;
+    for (const auto& fanout_edge : fanout_edges) {
+      NodeDef* consumer = fanout_edge.dst.node;
+      const int src_port = fanout_edge.src.port_id;
+      if ((is_identity && src_port == 0) ||
+          (is_multi_input_identity && src_port > Graph::kControlSlot)) {
+        // Identity regular fanins.
+        const string& input_to_forwards = node->input(src_port);
+        TF_RETURN_IF_ERROR(graph_view_->UpdateRegularFaninByPort(
+            consumer->name(), fanout_edge.dst.port_id,
+            ParseTensorName(input_to_forwards)));
+      } else if (is_identity || is_multi_input_identity) {
+        // Identity control dependency.
+        // TODO(lyandy): Handle IdentityN properly here by adding all regular
+        // fanins as controlling fanins.
+        const string& node_first_input = node->input(0);
+        TF_RETURN_IF_ERROR(graph_view_->UpdateFanin(
+            consumer->name(), {node_name, Graph::kControlSlot},
+            {ParseTensorName(node_first_input).node(), Graph::kControlSlot}));
+      } else {
+        // NoOp.
+        TF_RETURN_IF_ERROR(
+            graph_view_->RemoveControllingFanin(consumer->name(), node_name));
       }
-      // Remove dependency on node from consumer.
-      updated_consumer |= RemoveInput(consumer, AsControlDependency(node_name),
-                                      node_map_.get());
-      if (updated_consumer) {
-        nodes_to_simplify->PushBack(node_to_idx_[consumer]);
-      }
-      VLOG(1) << "consumer after:\n" << consumer->DebugString();
+      processed_nodes.insert(consumer);
+      nodes_to_simplify->PushBack(consumer->name());
     }
-    node_map_->RemoveOutputs(node_name);
+    for (const auto& processed_node : processed_nodes) {
+      // Forward dependency from input to consumer if it doesn't already
+      // depend on it.
+      for (const auto& controlling_fanin : controlling_fanins) {
+        TF_RETURN_IF_ERROR(graph_view_->AddControllingFanin(
+            processed_node->name(),
+            {controlling_fanin->name(), Graph::kControlSlot}));
+        nodes_to_simplify->PushBack(controlling_fanin->name());
+      }
+    }
+
     if (fetch_nodes_known_ &&
         nodes_to_preserve_.find(node_name) == nodes_to_preserve_.end()) {
-      // Mark the node for deletion.
-      nodes_to_delete->insert(node_idx);
-
       // Disconnect the node from its inputs to enable further optimizations.
-      node_map_->RemoveInputs(node_name);
-      node->clear_input();
+      TF_RETURN_IF_ERROR(graph_view_->RemoveAllFanins(
+          node_name, /*keep_controlling_fanins=*/false));
+      // Mark the node for deletion.
+      nodes_to_delete->insert(node_name);
     }
   }
-}
-
-void DependencyOptimizer::CleanControlInputs() {
-  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
-    DedupControlInputs(optimized_graph_->mutable_node(i));
-  }
+  return Status::OK();
 }
 
 Status DependencyOptimizer::OptimizeDependencies() {
-  SetVector<int> nodes_to_simplify;
-  std::set<int> nodes_to_delete;
-  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
-    const NodeDef& node = optimized_graph_->node(i);
+  SetVector<string> nodes_to_simplify;
+  absl::flat_hash_set<string> nodes_to_delete;
+  for (int i = 0; i < graph_view_->graph()->node_size(); ++i) {
+    const NodeDef& node = graph_view_->graph()->node(i);
     if (IsNoOp(node) || IsIdentity(node) || IsIdentityN(node) ||
         IsConstant(node) || SafeToConvertToNoOp(node)) {
-      nodes_to_simplify.PushBack(i);
+      nodes_to_simplify.PushBack(node.name());
     }
   }
   while (!nodes_to_simplify.Empty()) {
-    int node_to_simplify = nodes_to_simplify.PopBack();
+    string node_to_simplify = nodes_to_simplify.PopBack();
     // Discard nodes that were marked for deletion already.
     while (nodes_to_delete.find(node_to_simplify) != nodes_to_delete.end()) {
       node_to_simplify = nodes_to_simplify.PopBack();
     }
-    OptimizeNode(node_to_simplify, &nodes_to_simplify, &nodes_to_delete);
+    TF_RETURN_IF_ERROR(
+        OptimizeNode(node_to_simplify, &nodes_to_simplify, &nodes_to_delete));
   }
 
   if (fetch_nodes_known_) {
     VLOG(1) << "Deleted " << nodes_to_delete.size() << " out of "
-            << optimized_graph_->node_size() << " nodes.";
-    EraseNodesFromGraph(nodes_to_delete, optimized_graph_);
-    node_map_.reset(new NodeMap(optimized_graph_));
-    BuildNodeToIdx();
+            << graph_view_->graph()->node_size() << " nodes.";
+    TF_RETURN_IF_ERROR(graph_view_->DeleteNodes(nodes_to_delete));
   }
   return Status::OK();
 }
 
 Status DependencyOptimizer::TransitiveReduction() {
   // PRECONDITION: optimized_graph_ must be sorted topologically.
-  const int num_nodes = optimized_graph_->node_size();
+  GraphDef* graph = graph_view_->graph();
+  auto node_to_idx = BuildNodeToIdx(*graph);
+  const int num_nodes = graph->node_size();
   // Set up a compressed version of the graph to save a constant factor in the
   // expensive algorithm below. Also cache the set of control outputs and the
   // highest index of a target of any control output from each node.
   int num_controls = 0;
   std::vector<gtl::InlinedVector<int, 4>> inputs(num_nodes);
-  std::vector<gtl::InlinedVector<std::pair<int, int>, 2>> control_outputs(
-      num_nodes);
+  std::vector<gtl::InlinedVector<int, 2>> control_outputs(num_nodes);
   for (int node_idx = 0; node_idx < num_nodes; ++node_idx) {
-    const NodeDef& node = optimized_graph_->node(node_idx);
+    const NodeDef& node = graph->node(node_idx);
     if (ModifiesFrameInfo(node) || !HasOpDef(node)) {
       // Ignore function nodes and nodes that modify frame info.
       continue;
     }
-    for (int input_slot = 0; input_slot < node.input_size(); ++input_slot) {
-      const string& input = node.input(input_slot);
-      const NodeDef* input_node = node_map_->GetNode(input);
+    for (const string& input : node.input()) {
+      const NodeDef* input_node = graph_view_->GetNode(NodeName(input));
       if (ModifiesFrameInfo(*input_node) || IsMerge(*input_node)) {
         // Ignore edges from nodes that modify frame info and from Merge nodes,
         // because we cannot know which of it's input paths executes.
         continue;
       }
-      const int input_node_idx = node_to_idx_[input_node];
+      const int input_node_idx = node_to_idx[input_node];
       inputs[node_idx].push_back(input_node_idx);
       if (IsControlInput(input)) {
         ++num_controls;
-        control_outputs[input_node_idx].emplace_back(node_idx, input_slot);
+        control_outputs[input_node_idx].emplace_back(node_idx);
       }
     }
   }
@@ -520,14 +475,12 @@
   // such that when we swap them out so we don't clobber the
   // node(target).input() repeated field.
   typedef std::pair<int, int> InputSlotAndSource;
-  std::unordered_map<
-      int, std::set<InputSlotAndSource, std::greater<InputSlotAndSource>>>
-      control_edges_to_remove;
+  absl::flat_hash_map<int, absl::flat_hash_set<int>> control_edges_to_remove;
   for (int source = 0; source < num_nodes; ++source) {
     int highest_control_target = -1;
     for (const auto& control_output : control_outputs[source]) {
-      if (control_output.first > highest_control_target) {
-        highest_control_target = control_output.first;
+      if (control_output > highest_control_target) {
+        highest_control_target = control_output;
       }
     }
     if (highest_control_target <= source) {
@@ -557,26 +510,21 @@
     // longer than 1, there exists an alternate path, and we can eliminate the
     // redundant direct control dependency.
     for (const auto& control_output : control_outputs[source]) {
-      const int target = control_output.first;
+      const int target = control_output;
       if (longest_distance[target] > 1) {
-        const int input_slot = control_output.second;
-        control_edges_to_remove[target].emplace(input_slot, source);
+        control_edges_to_remove[target].emplace(source);
       }
     }
   }
 
   for (const auto& it : control_edges_to_remove) {
     const int target = it.first;
-    NodeDef* target_node = optimized_graph_->mutable_node(target);
-    for (const InputSlotAndSource& slot_and_source : it.second) {
-      const int input_slot = slot_and_source.first;
-      const int source = slot_and_source.second;
-      const NodeDef& source_node = optimized_graph_->node(source);
-      CHECK_LT(input_slot, target_node->input_size());
-      target_node->mutable_input()->SwapElements(input_slot,
-                                                 target_node->input_size() - 1);
-      node_map_->RemoveOutput(source_node.name(), target_node->name());
-      target_node->mutable_input()->RemoveLast();
+    const NodeDef& target_node = graph->node(target);
+    const string target_node_name = target_node.name();
+    for (const int& source : it.second) {
+      const NodeDef& source_node = graph->node(source);
+      TF_RETURN_IF_ERROR(graph_view_->RemoveControllingFanin(
+          target_node_name, source_node.name()));
       ++num_controls_removed;
     }
   }
@@ -585,26 +533,17 @@
   return Status::OK();
 }
 
-void DependencyOptimizer::BuildNodeToIdx() {
-  // Set up &node -> index map.
-  node_to_idx_.clear();
-  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
-    const NodeDef& node = optimized_graph_->node(i);
-    node_to_idx_[&node] = i;
-  }
-}
-
 // Suppose there are cross-device control inputs to node C from multiple nodes
 // that are located on another device, e.g., we have control edges:
 // A->C, B->C
 // where A and B are on device X and C is on device Y.
 // We can reduce cross-device communication by introducing an intermediate
 // NoOp node C' on device X and rewriting the control edges to:
-// A->C', B->C', C' -> C
-void DependencyOptimizer::GroupCrossDeviceControlEdges() {
-  const int num_nodes = optimized_graph_->node_size();
+// A->C', B->C', C'->C
+Status DependencyOptimizer::GroupCrossDeviceControlEdges() {
+  const int num_nodes = graph_view_->graph()->node_size();
   for (int i = 0; i < num_nodes; ++i) {
-    NodeDef* node = optimized_graph_->mutable_node(i);
+    NodeDef* node = graph_view_->graph()->mutable_node(i);
     if (node->device().empty()) continue;
 
     // Creates new noop nodes for devices on which multiple control inputs are
@@ -615,88 +554,71 @@
     // that device.
     std::map<string, NodeDef*> noops;
     int num_noops = 0;
-    for (int j = 0; j < node->input_size(); ++j) {
-      if (IsControlInput(node->input(j))) {
-        const NodeDef* input = node_map_->GetNode(node->input(j));
-        if (input != nullptr && !input->device().empty() &&
-            input->device() != node->device()) {
-          auto emplace_result = noops.emplace(input->device(), nullptr);
-          if (!emplace_result.second &&
-              emplace_result.first->second == nullptr) {
-            // This is the second cross-device control input from the same
-            // device. Creates an intermediate noop node on that device.
-            string group_name;
-            NodeDef* noop;
-            // Creates a fresh node name; there may be conflicting names from
-            // a previous iteration of the optimizer.
-            do {
-              group_name = AddPrefixToNodeName(
-                  node->name(),
-                  strings::StrCat("GroupCrossDeviceControlEdges_", num_noops));
-              noop = node_map_->GetNode(group_name);
-              ++num_noops;
-            } while (noop != nullptr);
-            noop = optimized_graph_->add_node();
-            noop->set_name(group_name);
-            noop->set_device(input->device());
-            noop->set_op("NoOp");
-            node_map_->AddNode(noop->name(), noop);
-            emplace_result.first->second = noop;
-          }
+    auto controlling_fanins = graph_view_->GetFanin(
+        MutableGraphView::InputPort(node, Graph::kControlSlot));
+    for (const auto& controlling_fanin : controlling_fanins) {
+      const NodeDef* fanin_node = controlling_fanin.node;
+      if (!fanin_node->device().empty() &&
+          fanin_node->device() != node->device()) {
+        auto emplace_result = noops.emplace(fanin_node->device(), nullptr);
+        if (!emplace_result.second && emplace_result.first->second == nullptr) {
+          // This is the second cross-device control input from the same
+          // device. Creates an intermediate noop node on that device.
+          string group_name;
+          NodeDef* noop;
+          // Creates a fresh node name; there may be conflicting names from
+          // a previous iteration of the optimizer.
+          do {
+            group_name = AddPrefixToNodeName(
+                node->name(),
+                strings::StrCat("GroupCrossDeviceControlEdges_", num_noops));
+            noop = graph_view_->GetNode(group_name);
+            ++num_noops;
+          } while (noop != nullptr);
+          NodeDef new_node;
+          new_node.set_name(group_name);
+          new_node.set_device(fanin_node->device());
+          new_node.set_op("NoOp");
+          emplace_result.first->second =
+              graph_view_->AddNode(std::move(new_node));
         }
       }
     }
 
     // Reroute existing control edges to go via the newly introduced NoOp nodes.
-    int pos = 0;
-    while (pos < node->input_size()) {
-      const string& input_name = node->input(pos);
-      if (IsControlInput(input_name)) {
-        NodeDef* input = node_map_->GetNode(input_name);
-        if (input == nullptr) {
-          ++pos;
-        } else {
-          auto it = noops.find(input->device());
-          if (it == noops.end() || it->second == nullptr) {
-            ++pos;
-          } else {
-            node->mutable_input()->SwapElements(pos, node->input_size() - 1);
-            node->mutable_input()->RemoveLast();
-            it->second->add_input(AsControlDependency(*input));
-            node_map_->UpdateOutput(input_name, node->name(),
-                                    it->second->name());
-          }
-        }
-      } else {
-        ++pos;
+    for (const auto& controlling_fanin : controlling_fanins) {
+      auto it = noops.find(controlling_fanin.node->device());
+      if (it != noops.end() && it->second != nullptr) {
+        TF_RETURN_IF_ERROR(graph_view_->RemoveControllingFanin(
+            node->name(), controlling_fanin.node->name()));
+        TF_RETURN_IF_ERROR(graph_view_->AddControllingFanin(
+            it->second->name(),
+            {controlling_fanin.node->name(), Graph::kControlSlot}));
       }
     }
     for (const auto& entry : noops) {
       if (entry.second) {
-        node->add_input(AsControlDependency(*entry.second));
-        node_map_->AddOutput(entry.second->name(), node->name());
+        TF_RETURN_IF_ERROR(graph_view_->AddControllingFanin(
+            node->name(), {entry.second->name(), Graph::kControlSlot}));
       }
     }
   }
+  return Status::OK();
 }
 
 Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                      GraphDef* optimized_graph) {
-  optimized_graph_ = optimized_graph;
-  *optimized_graph_ = item.graph;
+  *optimized_graph = item.graph;
   nodes_to_preserve_ = item.NodesToPreserve();
   fetch_nodes_known_ = !item.fetch.empty();
-  CleanControlInputs();
+  graph_view_.reset(new MutableGraphView(optimized_graph));
 
   const int num_iterations = 2;
   for (int iteration = 0; iteration < num_iterations; ++iteration) {
     GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
     Status topo_sort_status;
     // Perform topological sort to prepare the graph for transitive reduction.
-    topo_sort_status = TopologicalSort(optimized_graph_);
-    // Set up index-based graph datastructures to speed up analysis steps below.
-    node_map_.reset(new NodeMap(optimized_graph_));
-    BuildNodeToIdx();
+    topo_sort_status = TopologicalSort(optimized_graph);
 
     if (topo_sort_status.ok()) {
       // Remove redundant control dependencies.
@@ -710,10 +632,7 @@
     // nodes.
     TF_RETURN_IF_ERROR(OptimizeDependencies());
 
-    // Dedup control inputs.
-    CleanControlInputs();
-
-    GroupCrossDeviceControlEdges();
+    TF_RETURN_IF_ERROR(GroupCrossDeviceControlEdges());
   }
 
   return Status::OK();

diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.h b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
index 99021b9..a60e7a3 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.h

@@ -17,6 +17,10 @@
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DEPENDENCY_OPTIMIZER_H_
 
 #include <unordered_set>
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
@@ -45,24 +49,25 @@
   // Returns true if bypassing node does not increase the number of edges or
   // number of edges crossing a device boundary.
   bool BypassingNodeIsBeneficial(
-      const NodeDef& node, const std::vector<NodeDef*>& input_nodes,
-      const std::vector<NodeDef*>& output_nodes) const;
-  int NumEdgesIfBypassed(const NodeDef& node,
-                         const std::vector<NodeDef*>& output_nodes) const;
+      const NodeDef& node, int num_controlling_fanins,
+      const absl::flat_hash_set<MutableGraphView::Edge>& fanin_edges,
+      const absl::flat_hash_set<MutableGraphView::Edge>& fanout_edges) const;
+  int NumEdgesIfBypassed(
+      const NodeDef& node, int num_controlling_fanins,
+      const absl::flat_hash_set<MutableGraphView::Edge>& fanin_edges,
+      const absl::flat_hash_set<MutableGraphView::Edge>& fanout_edges,
+      int num_unique_fanout_nodes) const;
   // Returns true if node is not an Identity node or if it is an Identity
   // that is safe to remove.
   bool SafeToRemoveIdentity(const NodeDef& node) const;
   // Returns true if it is safe to convert node to NoOp.
   bool SafeToConvertToNoOp(const NodeDef& node) const;
-  // Removes all duplicate control dependencies.
-  void CleanControlInputs();
-  // Builds a map from the &optimized_graph_->node(i) to i.
-  void BuildNodeToIdx();
-  // Tries to optimize the node with the given index, possibly additional
+  // Tries to optimize the node with the given node name, possibly additional
   // optimizations by inserting nodes in nodes_to_simplify, and pruning nodes by
   // inserting them in nodes_to_delete.
-  void OptimizeNode(int node_idx, SetVector<int>* nodes_to_simplify,
-                    std::set<int>* nodes_to_delete);
+  Status OptimizeNode(const string& node_name,
+                      SetVector<string>* nodes_to_simplify,
+                      absl::flat_hash_set<string>* nodes_to_delete);
   // Eliminates redundant control dependencies by computing the transitive
   // reduction of the graph.
   Status TransitiveReduction();
@@ -70,13 +75,11 @@
   Status OptimizeDependencies();
   // Replaces multiple cross-device control edges from the same device with a
   // single control edge.
-  void GroupCrossDeviceControlEdges();
+  Status GroupCrossDeviceControlEdges();
 
   bool fetch_nodes_known_;
   std::unordered_set<string> nodes_to_preserve_;
-  std::unique_ptr<NodeMap> node_map_;
-  std::unordered_map<const NodeDef*, int> node_to_idx_;
-  GraphDef* optimized_graph_;  // Not owned.
+  std::unique_ptr<MutableGraphView> graph_view_;
 };
 
 }  // end namespace grappler

diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
index 5883fcb..80a0189 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc

@@ -41,13 +41,32 @@
     const NodeDef& optimized = optimized_graph.node(i);
     EXPECT_EQ(original.name(), optimized.name()) << func;
     EXPECT_EQ(original.op(), optimized.op()) << func;
-    EXPECT_EQ(original.input_size(), optimized.input_size()) << func;
+    ASSERT_EQ(original.input_size(), optimized.input_size()) << func;
     for (int j = 0; j < original.input_size(); ++j) {
       EXPECT_EQ(original.input(j), optimized.input(j)) << func;
     }
   }
 }
 
+bool NodeHasControllingFanins(const NodeDef& node,
+                              const absl::flat_hash_set<string>& expected) {
+  absl::flat_hash_set<string> actual;
+  for (const string& fanin : node.input()) {
+    if (IsControlInput(fanin)) {
+      actual.insert(fanin);
+    }
+  }
+  if (actual.size() != expected.size()) {
+    return false;
+  }
+  for (const auto& expected_fanin : expected) {
+    if (!actual.contains(expected_fanin)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 TEST_F(DependencyOptimizerTest, NoOp) {
   // This trivial graph is so basic there's nothing to optimize.
   TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
@@ -89,12 +108,12 @@
   TF_EXPECT_OK(status);
 
   // The 'z' node should have been optimized away leaving only 5 nodes.
-  EXPECT_EQ(5, output.node_size());
+  EXPECT_EQ(output.node_size(), 5);
 
-  for (const NodeDef& node : item.graph.node()) {
+  for (const NodeDef& node : output.node()) {
     if (node.name() == "id1" || node.name() == "id2") {
-      EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("add", node.input(0));
+      ASSERT_EQ(node.input_size(), 1);
+      EXPECT_EQ(node.input(0), "add");
     }
   }
 }
@@ -123,30 +142,30 @@
   status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(item.graph.node_size(), output.node_size());
+  EXPECT_EQ(output.node_size(), item.graph.node_size());
   int found = 0;
-  for (int i = 0; i < item.graph.node_size(); ++i) {
-    const NodeDef& node = item.graph.node(i);
+  for (int i = 0; i < output.node_size(); ++i) {
+    const NodeDef& node = output.node(i);
     // "add" should get turned into a NoOp and removed.
-    EXPECT_NE("add", node.name());
+    EXPECT_NE(node.name(), "add");
     if (node.name() == "id1") {
-      EXPECT_EQ("Identity", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("x", node.input(0));
-      EXPECT_EQ("^y", node.input(1));
+      EXPECT_EQ(node.op(), "Identity");
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "x");
+      EXPECT_EQ(node.input(1), "^y");
       ++found;
     } else if (node.name() == "id2") {
-      EXPECT_EQ("Identity", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("y", node.input(0));
-      EXPECT_EQ("^x", node.input(1));
+      EXPECT_EQ(node.op(), "Identity");
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "y");
+      EXPECT_EQ(node.input(1), "^x");
       ++found;
     }
   }
-  EXPECT_EQ(2, found);
+  EXPECT_EQ(found, 2);
 }
 
-TEST_F(DependencyOptimizerTest, ChangeToNoop_RepeatedInput) {
+TEST_F(DependencyOptimizerTest, ChangeToNoopRepeatedInput) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::RandomUniform(s.WithOpName("x"), {1, 2}, DT_FLOAT);
   Output add = ops::Add(s.WithOpName("add"), x, x);
@@ -164,25 +183,24 @@
   item.graph.Swap(&output);
   status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
-  LOG(INFO) << output.DebugString();
 
-  EXPECT_EQ(item.graph.node_size(), output.node_size());
+  EXPECT_EQ(output.node_size(), item.graph.node_size());
   int found = 0;
   for (int i = 0; i < item.graph.node_size(); ++i) {
     const NodeDef& node = item.graph.node(i);
     // "add" should get turned into a NoOp and removed.
-    EXPECT_NE("add", node.name());
+    EXPECT_NE(node.name(), "add");
     if (node.name() == "id1") {
-      EXPECT_EQ("Identity", node.op());
-      EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("x", node.input(0));
+      EXPECT_EQ(node.op(), "Identity");
+      EXPECT_EQ(node.input_size(), 1);
+      EXPECT_EQ(node.input(0), "x");
       ++found;
     }
   }
-  EXPECT_EQ(1, found);
+  EXPECT_EQ(found, 1);
 }
 
-TEST_F(DependencyOptimizerTest, ChangeToNoop_SwitchIdentity) {
+TEST_F(DependencyOptimizerTest, ChangeToNoopSwitchIdentity) {
   // This tests that we don't try to repeatedly add Identity nodes
   // with names like "ConstantFoldingCtrl/foo/bar/switch_$port" when
   // multiple nodes reading the same output of a Switch node get
@@ -220,23 +238,23 @@
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(item.graph.node_size() - 1, output.node_size());
+  EXPECT_EQ(output.node_size(), item.graph.node_size() - 1);
   for (int i = 0; i < output.node_size(); ++i) {
     const NodeDef& node = output.node(i);
     // "neg" should be eliminated.
-    EXPECT_NE("neg", node.name());
+    EXPECT_NE(node.name(), "neg");
     // A control dep from "^ConstantFoldingCtrl/switch_1"
     // should be attached to "c1".
     if (node.name() == "c1") {
-      EXPECT_EQ("Const", node.op());
-      EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("^ConstantFoldingCtrl/switch_1", node.input(0));
+      EXPECT_EQ(node.op(), "Const");
+      ASSERT_EQ(node.input_size(), 1);
+      EXPECT_EQ(node.input(0), "^ConstantFoldingCtrl/switch_1");
     }
   }
 }
 
 // TODO(rmlarsen): Add test to make sure we skip Switch and Merge.
-TEST_F(DependencyOptimizerTest, ChangeToNoop_NoFetch) {
+TEST_F(DependencyOptimizerTest, ChangeToNoopNoFetch) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::RandomUniform(s.WithOpName("x"), {1, 2}, DT_FLOAT);
   Output y = ops::RandomUniform(s.WithOpName("y"), {1, 2}, DT_FLOAT);
@@ -258,7 +276,7 @@
   VerifyGraphsEqual(item.graph, output, __FUNCTION__);
 }
 
-TEST_F(DependencyOptimizerTest, RemoveNoOps_EmptyInputOrOutput) {
+TEST_F(DependencyOptimizerTest, RemoveNoOpsEmptyInputOrOutput) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::RandomUniform(s, {1, 2}, DT_FLOAT);
   auto noop1 = ops::NoOp(s);
@@ -278,18 +296,18 @@
   status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(item.graph.node_size(), output.node_size());
+  EXPECT_EQ(output.node_size(), item.graph.node_size());
   for (const NodeDef& node : output.node()) {
     if (node.name() == "NoOp" || node.name() == "NoOp_1") {
       EXPECT_EQ(0, node.input_size());
     } else if (node.name() == "Identity") {
-      EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("RandomUniform", node.input(0));
+      ASSERT_EQ(node.input_size(), 1);
+      EXPECT_EQ(node.input(0), "RandomUniform");
     }
   }
 }
 
-TEST_F(DependencyOptimizerTest, RemoveNoOps_DeviceBoundaries) {
+TEST_F(DependencyOptimizerTest, RemoveNoOpsDeviceBoundaries) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::RandomUniform(s.WithOpName("x").WithDevice("/CPU:0"), {1, 2},
                                 DT_FLOAT);
@@ -324,7 +342,7 @@
   VerifyGraphsEqual(item.graph, output, __FUNCTION__);
 }
 
-TEST_F(DependencyOptimizerTest, RemoveIdentityOps_DeviceBoundaries) {
+TEST_F(DependencyOptimizerTest, RemoveIdentityOpsDeviceBoundaries) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::RandomUniform(s.WithOpName("x").WithDevice("/CPU:0"), {1, 2},
                                 DT_FLOAT);
@@ -356,7 +374,7 @@
   VerifyGraphsEqual(item.graph, output, __FUNCTION__);
 }
 
-TEST_F(DependencyOptimizerTest, RemoveIdentityOps_IdenticalDevices) {
+TEST_F(DependencyOptimizerTest, RemoveIdentityOpsIdenticalDevices) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::RandomUniform(s.WithOpName("x").WithDevice("/CPU:0"), {1, 2},
                                 DT_FLOAT);
@@ -373,16 +391,17 @@
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(item.graph.node_size() - 1, output.node_size());
+  EXPECT_EQ(output.node_size(), item.graph.node_size() - 1);
   for (const NodeDef& node : output.node()) {
     EXPECT_NE(node.name(), "id_a");
     if (node.name() == "Identity") {
+      ASSERT_EQ(node.input_size(), 1);
       EXPECT_EQ(node.input(0), "x");
     }
   }
 }
 
-TEST_F(DependencyOptimizerTest, RemoveNoOps_SingleInputOrOutput) {
+TEST_F(DependencyOptimizerTest, RemoveNoOpsSingleInputOrOutput) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::RandomUniform(s.WithOpName("x"), {1, 2}, DT_FLOAT);
   Output y = ops::RandomUniform(s.WithOpName("y"), {1, 2}, DT_FLOAT);
@@ -409,15 +428,17 @@
   status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(item.graph.node_size(), output.node_size());
+  EXPECT_EQ(output.node_size(), item.graph.node_size());
   for (const NodeDef& node : output.node()) {
     if (node.name() == "NoOp" || node.name() == "NoOp_1") {
-      EXPECT_EQ(0, node.input_size());
+      EXPECT_EQ(node.input_size(), 0);
     } else if (node.name() == "Identity") {
-      EXPECT_EQ("x", node.input(0));
+      ASSERT_EQ(node.input_size(), 1);
+      EXPECT_EQ(node.input(0), "x");
     } else if (node.name() == "Identity_1") {
-      EXPECT_EQ("y", node.input(0));
-      EXPECT_EQ("^x", node.input(1));
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "y");
+      EXPECT_EQ(node.input(1), "^x");
     }
   }
 }
@@ -462,48 +483,46 @@
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(item.graph.node_size() - 3, output.node_size());
+  EXPECT_EQ(output.node_size(), item.graph.node_size() - 3);
   int found = 0;
   for (const NodeDef& node : output.node()) {
-    EXPECT_NE("id_a", node.name());
-    EXPECT_NE("id_b", node.name());
-    EXPECT_NE("id_c", node.name());
+    EXPECT_NE(node.name(), "id_a");
+    EXPECT_NE(node.name(), "id_b");
+    EXPECT_NE(node.name(), "id_c");
     if (node.name() == "a_a" || node.name() == "a_b") {
-      EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("x", node.input(0));
+      ASSERT_EQ(node.input_size(), 1);
+      EXPECT_EQ(node.input(0), "x");
       ++found;
     }
     if (node.name() == "a_c" || node.name() == "a_d") {
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("z", node.input(0));
-      EXPECT_EQ("^x", node.input(1));
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "z");
+      EXPECT_EQ(node.input(1), "^x");
       ++found;
     }
     if (node.name() == "b_a") {
-      EXPECT_EQ(3, node.input_size());
-      EXPECT_EQ("x", node.input(0));
-      EXPECT_EQ("^y", node.input(1));
-      EXPECT_EQ("^z", node.input(2));
+      ASSERT_EQ(node.input_size(), 3);
+      EXPECT_EQ(node.input(0), "x");
+      EXPECT_TRUE(NodeHasControllingFanins(node, {"^y", "^z"}));
       ++found;
     }
     if (node.name() == "c_a") {
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("x", node.input(0));
-      EXPECT_EQ("^y", node.input(1));
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "x");
+      EXPECT_EQ(node.input(1), "^y");
       ++found;
     }
     if (node.name() == "c_b") {
-      EXPECT_EQ(3, node.input_size());
-      EXPECT_EQ("z", node.input(0));
-      EXPECT_EQ("^x", node.input(1));
-      EXPECT_EQ("^y", node.input(2));
+      ASSERT_EQ(node.input_size(), 3);
+      EXPECT_EQ(node.input(0), "z");
+      EXPECT_TRUE(NodeHasControllingFanins(node, {"^x", "^y"}));
       ++found;
     }
   }
   EXPECT_EQ(found, 7);
 }
 
-TEST_F(DependencyOptimizerTest, RemoveIdentity_RepeatedInputs) {
+TEST_F(DependencyOptimizerTest, RemoveIdentityRepeatedInputs) {
   // Corner cases with repeated inputs.
   tensorflow::Scope scope = tensorflow::Scope::NewRootScope();
   ops::Variable x(scope.WithOpName("x"), {}, DT_BOOL);
@@ -529,35 +548,35 @@
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(item.graph.node_size() - 1, output.node_size());
+  EXPECT_EQ(output.node_size(), item.graph.node_size() - 1);
   int found = 0;
   for (const NodeDef& node : output.node()) {
-    EXPECT_NE("id0", node.name());
+    EXPECT_NE(node.name(), "id0");
     if (node.name() == "or0") {
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("switch:1", node.input(0));
-      EXPECT_EQ("switch:1", node.input(1));
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "switch:1");
+      EXPECT_EQ(node.input(1), "switch:1");
       ++found;
     }
     if (node.name() == "or1") {
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("switch:1", node.input(0));
-      EXPECT_EQ("y", node.input(1));
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "switch:1");
+      EXPECT_EQ(node.input(1), "y");
       ++found;
     }
     if (node.name() == "or2") {
       // or1 should be unchanged.
-      EXPECT_EQ(3, node.input_size());
-      EXPECT_EQ("y", node.input(0));
-      EXPECT_EQ("y", node.input(1));
-      EXPECT_EQ("^id1", node.input(2));
+      ASSERT_EQ(node.input_size(), 3);
+      EXPECT_EQ(node.input(0), "y");
+      EXPECT_EQ(node.input(1), "y");
+      EXPECT_EQ(node.input(2), "^id1");
       ++found;
     }
   }
   EXPECT_EQ(found, 3);
 }
 
-TEST_F(DependencyOptimizerTest, Transitive_Reduction_Simple) {
+TEST_F(DependencyOptimizerTest, TransitiveReductionSimple) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output c = ops::Const(s.WithOpName("c"), {1.0f, 2.0f}, {1, 2});
   Output x = ops::Square(s.WithOpName("x"), c);
@@ -572,13 +591,13 @@
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
-  EXPECT_EQ(4, output.node_size());
-  EXPECT_EQ("neg2", output.node(3).name());
-  EXPECT_EQ(1, output.node(3).input_size());
-  EXPECT_EQ("neg1", output.node(3).input(0));
+  ASSERT_EQ(output.node_size(), 4);
+  EXPECT_EQ(output.node(3).name(), "neg2");
+  ASSERT_EQ(output.node(3).input_size(), 1);
+  EXPECT_EQ(output.node(3).input(0), "neg1");
 }
 
-TEST_F(DependencyOptimizerTest, ChangeToNoop_Identity) {
+TEST_F(DependencyOptimizerTest, ChangeToNoopIdentity) {
   tensorflow::Scope scope = tensorflow::Scope::NewRootScope();
   ops::Variable v_in(scope.WithOpName("v_in"), {3}, DT_FLOAT);
   Output id_after_var = ops::Identity(scope.WithOpName("id_after_var"), v_in);
@@ -609,18 +628,18 @@
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(item.graph.node_size() - 2, output.node_size());
+  EXPECT_EQ(output.node_size(), item.graph.node_size() - 2);
   bool found = false;
   for (int i = 0; i < output.node_size(); ++i) {
     const NodeDef& node = output.node(i);
     // "id0" and "id1" but neither "ConstantFoldingCtrl/switch_1",
     // "id_after_var, nor "id2"" should be eliminated.
-    EXPECT_NE("id0", node.name());
-    EXPECT_NE("id1", node.name());
+    EXPECT_NE(node.name(), "id0");
+    EXPECT_NE(node.name(), "id1");
     if (node.name() == "c1") {
-      EXPECT_EQ("Const", node.op());
-      EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("^ConstantFoldingCtrl/switch_1", node.input(0));
+      EXPECT_EQ(node.op(), "Const");
+      ASSERT_EQ(node.input_size(), 1);
+      EXPECT_EQ(node.input(0), "^ConstantFoldingCtrl/switch_1");
       found = true;
     }
   }
@@ -650,17 +669,17 @@
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(6, output.node_size());
-  EXPECT_EQ("out1", output.node(4).name());
-  EXPECT_EQ(1, output.node(4).input_size());
-  EXPECT_EQ("s", output.node(4).input(0));
+  ASSERT_EQ(output.node_size(), 6);
+  EXPECT_EQ(output.node(4).name(), "out2");
+  ASSERT_EQ(output.node(4).input_size(), 1);
+  EXPECT_EQ(output.node(4).input(0), "s:1");
 
-  EXPECT_EQ("out2", output.node(5).name());
-  EXPECT_EQ(1, output.node(5).input_size());
-  EXPECT_EQ("s:1", output.node(5).input(0));
+  EXPECT_EQ(output.node(5).name(), "out1");
+  ASSERT_EQ(output.node(5).input_size(), 1);
+  EXPECT_EQ(output.node(5).input(0), "s");
 }
 
-TEST_F(DependencyOptimizerTest, RemoveIdentityN_SwitchInput) {
+TEST_F(DependencyOptimizerTest, RemoveIdentityNSwitchInput) {
   tensorflow::Scope scope = tensorflow::Scope::NewRootScope();
   Output b = ops::Placeholder(scope.WithOpName("b"), DT_BOOL);
   Output x = ops::RandomUniform(scope.WithOpName("x"), {1, 2}, DT_FLOAT);
@@ -687,27 +706,27 @@
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(8, output.node_size());
+  ASSERT_EQ(output.node_size(), 8);
 
   auto out1_node = output.node(7);
-  EXPECT_EQ("out1", out1_node.name());
-  EXPECT_EQ(1, out1_node.input_size());
-  EXPECT_EQ("s", out1_node.input(0));
+  EXPECT_EQ(out1_node.name(), "out1");
+  ASSERT_EQ(out1_node.input_size(), 1);
+  EXPECT_EQ(out1_node.input(0), "s");
 
-  auto out2_node = output.node(4);
-  EXPECT_EQ("out2", out2_node.name());
-  EXPECT_EQ(1, out2_node.input_size());
-  EXPECT_EQ("s:1", out2_node.input(0));
+  auto out2_node = output.node(6);
+  EXPECT_EQ(out2_node.name(), "out2");
+  ASSERT_EQ(out2_node.input_size(), 1);
+  EXPECT_EQ(out2_node.input(0), "s:1");
 
   auto out3_node = output.node(5);
-  EXPECT_EQ("out3", out3_node.name());
-  EXPECT_EQ(1, out3_node.input_size());
-  EXPECT_EQ("s", out3_node.input(0));
+  EXPECT_EQ(out3_node.name(), "out3");
+  ASSERT_EQ(out3_node.input_size(), 1);
+  EXPECT_EQ(out3_node.input(0), "s");
 
-  auto out4_node = output.node(6);
-  EXPECT_EQ("out4", out4_node.name());
-  EXPECT_EQ(1, out4_node.input_size());
-  EXPECT_EQ("s:1", out4_node.input(0));
+  auto out4_node = output.node(4);
+  EXPECT_EQ(out4_node.name(), "out4");
+  ASSERT_EQ(out4_node.input_size(), 1);
+  EXPECT_EQ(out4_node.input(0), "s:1");
 }
 
 TEST_F(DependencyOptimizerTest, DoNotRemoveIdentityNWithControlDependency) {
@@ -730,11 +749,11 @@
   Status status = optimizer.Optimize(nullptr, item, &optimized_graph_def);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(6, optimized_graph_def.node_size());
+  EXPECT_EQ(optimized_graph_def.node_size(), 6);
 }
 
 TEST_F(DependencyOptimizerTest,
-       Identity_DeviceCrossing_ConsumerOnDifferentDevice) {
+       IdentityDeviceCrossingConsumerOnDifferentDevice) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x_on_1 =
       ops::Const(s.WithOpName("x_on_1").WithDevice("/gpu:1"), {1.0f}, {});
@@ -756,7 +775,7 @@
   VerifyGraphsEqual(item.graph, output, __FUNCTION__);
 }
 
-TEST_F(DependencyOptimizerTest, Identity_DeviceCrossing_ConsumerOnSameDevice) {
+TEST_F(DependencyOptimizerTest, IdentityDeviceCrossingConsumerOnSameDevice) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x_on_1 =
       ops::Const(s.WithOpName("x_on_1").WithDevice("/gpu:1"), {1.0f}, {});
@@ -774,12 +793,14 @@
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
-  LOG(INFO) << output.DebugString();
-  EXPECT_EQ(3, output.node_size());
+
+  EXPECT_EQ(output.node_size(), 3);
   for (const auto& node : output.node()) {
-    EXPECT_NE("x_on_2", node.name());
+    EXPECT_NE(node.name(), "x_on_2");
     if (node.name() == "result") {
-      EXPECT_EQ("x_on_1", node.input(0));
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "x_on_1");
+      EXPECT_EQ(node.input(1), "one_on_2");
     }
   }
 }
@@ -807,25 +828,25 @@
   for (const NodeDef& node : output.node()) {
     if (node.name() == "x") {
       count++;
-      EXPECT_EQ("Placeholder", node.op());
-      EXPECT_EQ(0, node.input_size());
+      EXPECT_EQ(node.op(), "Placeholder");
+      EXPECT_EQ(node.input_size(), 0);
     } else if (node.name() == "y") {
       count++;
-      EXPECT_EQ("Placeholder", node.op());
-      EXPECT_EQ(0, node.input_size());
+      EXPECT_EQ(node.op(), "Placeholder");
+      EXPECT_EQ(node.input_size(), 0);
     } else if (node.name() == "GreaterEqual") {
       count++;
     } else if (node.name() == "NoOp") {
       count++;
     } else if (node.name() == "z") {
       count++;
-      EXPECT_EQ("Add", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("x", node.input(0));
-      EXPECT_EQ("y", node.input(1));
+      EXPECT_EQ(node.op(), "Add");
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "x");
+      EXPECT_EQ(node.input(1), "y");
     }
   }
-  EXPECT_EQ(3, count);
+  EXPECT_EQ(count, 3);
 }
 
 TEST_F(DependencyOptimizerTest, GroupCrossDeviceControlDeps) {

diff --git a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc b/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc
deleted file mode 100644
index 75ad8bf..0000000
--- a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc
+++ /dev/null

@@ -1,195 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/grappler/optimizers/experimental_implementation_selector.h"
-
-#include <string>
-
-#include "absl/strings/numbers.h"
-#include "absl/strings/str_split.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/grappler/costs/graph_properties.h"
-#include "tensorflow/core/grappler/grappler_item.h"
-#include "tensorflow/core/grappler/op_types.h"
-#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
-#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
-#include "tensorflow/core/grappler/optimizers/function_api_info.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/util/device_name_utils.h"
-
-namespace tensorflow {
-namespace grappler {
-
-Status UpdateNodeDef(NodeDef* node_def, const string& funcName,
-                     const FunctionApiInfo& apiInfo) {
-  VLOG(3) << "Node def before swap is: " << node_def->DebugString();
-  auto tin = node_def->mutable_attr()->find("Tin");
-  tin->second.mutable_list()->clear_type();
-  for (const auto& tin_dtype : apiInfo.input_arg_dtypes()) {
-    tin->second.mutable_list()->add_type(tin_dtype);
-  }
-
-  auto tout = node_def->mutable_attr()->find("Tout");
-  tout->second.mutable_list()->clear_type();
-  for (const auto& tout_dtype : apiInfo.output_arg_dtypes()) {
-    tout->second.mutable_list()->add_type(tout_dtype);
-  }
-
-  if (apiInfo.function_type() == FunctionApiInfo::BACKWARD) {
-    // Update the inputs since for backward function, it might have different
-    // number of inputs due the different number output from forward function.
-    // The output of forward function are composed by two parts:
-    //   1. Real output tensors from defun.
-    //   2. Internal states that will be used for gradient calculation.
-    // Part 1 will be static, and part 2 could be different based on the
-    // different implementation.
-
-    const int prev_input_size = node_def->input_size();
-    const int diff = prev_input_size - apiInfo.input_arg_dtypes().size();
-    if (diff >= 0) {
-      for (int i = 0; i < diff; ++i) node_def->mutable_input()->RemoveLast();
-    } else {
-      // Adding new inputs for internal states, the name of the internal states
-      // should be in format "{forward_node_name}:{index}", where the newly
-      // added index should start from last index of the state.
-      // Eg:
-      // {
-      //   input: "gradients/unified_lstm/strided_slice_1_grad/StridedSliceGrad"
-      //   input: "gradients/zeros_like_1"
-      //   input: "gradients/zeros_like_2"
-      //   input: "unified_lstm/StatefulPartitionedCall:3"
-      //   input: "unified_lstm/StatefulPartitionedCall:4"
-      //   # New input should be "unified_lstm/StatefulPartitionedCall:5"
-      // }
-      const string last_input = node_def->input(prev_input_size - 1);
-      const std::vector<string> name_index = ::absl::StrSplit(last_input, ':');
-      if (name_index.size() != 2) {
-        return errors::InvalidArgument(
-            "Invalid format of input node name: ", last_input,
-            " Expected: {forward_node_name}:{index}");
-      }
-      const absl::string_view node_name = name_index[0];
-      int last_index;
-      if (!::absl::SimpleAtoi(name_index[1], &last_index)) {
-        return errors::InvalidArgument(
-            "The index of input node is expected to be number, got: ",
-            name_index[1]);
-      }
-      for (int i = 1; i <= -diff; ++i)
-        node_def->add_input(strings::StrCat(node_name, ":", i + last_index));
-    }
-  }
-
-  node_def->mutable_attr()->find("f")->second.mutable_func()->set_name(
-      funcName);
-
-  VLOG(3) << "Node def after swap is: " << node_def->DebugString();
-  return Status::OK();
-}
-
-Status ExperimentalImplementationSelector::LoadFunctions(
-    const GraphDef& graph) {
-  lib_info_.reset(new FunctionLibraryApiInfo);
-  TF_RETURN_IF_ERROR(lib_info_->Init(graph.library()));
-  return Status::OK();
-}
-
-Status ExperimentalImplementationSelector::MaybeOptimizeFunctionCall(
-    NodeDef* node_def) const {
-  // There are two ways of calling functions:
-  //  1. By specifying an op name as a function name, or
-  //  2. Via the @defun functional interface, where the real function call
-  //     happens with partitionedcall op, and the function name appear as the
-  //     attribute with name "f" and type func. In this use case, there are more
-  //     attributes need to be taken care, like Tin and Tout which take care of
-  //     the DTYPE of input/output.
-  std::vector<string> function_attribute_names;
-  for (const auto& attr : node_def->attr()) {
-    if (attr.second.has_func() &&
-        lib_info_->GetApiInfo(attr.second.func().name()) != nullptr) {
-      function_attribute_names.emplace_back(attr.first);
-    }
-  }
-
-  if (function_attribute_names.empty() &&
-      lib_info_->GetApiInfo(node_def->op()) == nullptr) {
-    // A regular op, or a function which has no interface.
-    return Status::OK();
-  }
-
-  string task, device;
-  if (!DeviceNameUtils::SplitDeviceName(node_def->device(), &task, &device)) {
-    return errors::Internal("Could not split device name:", node_def->device());
-  }
-  VLOG(2) << "Op " << node_def->name() << " runs on " << node_def->device()
-          << " = (" << task << ", " << device << ")";
-  DeviceNameUtils::ParsedName parsed_name;
-  DeviceNameUtils::ParseLocalName(device, &parsed_name);
-
-  for (const auto& attr_name : function_attribute_names) {
-    string function_name = node_def->attr().at(attr_name).func().name();
-    std::vector<string> equiv_func_names;
-    TF_RETURN_IF_ERROR(lib_info_->GetEquivalentImplementations(
-        function_name, &equiv_func_names));
-    for (const auto& func_name : equiv_func_names) {
-      const auto& func_api_info = lib_info_->GetApiInfo(func_name);
-      if (func_api_info->preferred_device() == parsed_name.type) {
-        VLOG(2) << "Swapping: " << function_name << " TO: " << func_name;
-        TF_RETURN_IF_ERROR(UpdateNodeDef(node_def, func_name, *func_api_info));
-        break;
-      }
-    }
-  }
-
-  if (lib_info_->GetApiInfo(node_def->op()) != nullptr) {
-    std::vector<string> equiv_func_names;
-    TF_RETURN_IF_ERROR(lib_info_->GetEquivalentImplementations(
-        node_def->op(), &equiv_func_names));
-    for (const string& func_name : equiv_func_names) {
-      const auto func_api_info = lib_info_->GetApiInfo(func_name);
-      if (func_api_info->preferred_device() == parsed_name.type) {
-        node_def->set_op(func_name);
-        break;
-      }
-    }
-  }
-  return Status::OK();
-}
-
-Status ExperimentalImplementationSelector::SelectImplementation(
-    GraphDef* graph) const {
-  if (!graph->has_library()) {
-    VLOG(2) << "Skipping graph since it does not have function def";
-    return Status::OK();
-  }
-
-  for (int k = 0; k < graph->node_size(); ++k)
-    TF_RETURN_IF_ERROR(MaybeOptimizeFunctionCall(graph->mutable_node(k)));
-
-  return Status::OK();
-}
-
-Status ExperimentalImplementationSelector::Optimize(Cluster* cluster,
-                                                    const GrapplerItem& item,
-                                                    GraphDef* optimized_graph) {
-  *optimized_graph = item.graph;
-  TF_RETURN_IF_ERROR(LoadFunctions(*optimized_graph));
-  return SelectImplementation(optimized_graph);
-}
-
-}  // end namespace grappler
-}  // end namespace tensorflow

diff --git a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.h b/tensorflow/core/grappler/optimizers/experimental_implementation_selector.h
deleted file mode 100644
index 82f7473..0000000
--- a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.h
+++ /dev/null

@@ -1,115 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_EXPERIMENTAL_IMPLEMENTATION_SELECTOR_H_
-#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_EXPERIMENTAL_IMPLEMENTATION_SELECTOR_H_
-
-#include <string>
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/grappler/costs/graph_properties.h"
-#include "tensorflow/core/grappler/grappler_item.h"
-#include "tensorflow/core/grappler/op_types.h"
-#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
-#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
-#include "tensorflow/core/grappler/optimizers/function_api_info.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/util/device_name_utils.h"
-
-namespace tensorflow {
-namespace grappler {
-
-// -- EXPERIMENTAL --
-// This transformation replaces function calls by the appropriate function
-// definition based on properties of the runtime system. For instance,
-// we may choose one implementation over another if we have a GPU with
-// enough memory available.
-//
-// It is a way for the programmer to specify alternative implementations
-// of the same functionality in the graph, and let TensorFlow pick the
-// most appropriate one at runtime.
-//
-// For instance, the python code might specify:
-// @Defun(tf.float32,
-//        experimental_api_implements='plus_one',
-//        experimental_api_preferred_device='GPU')
-// def plus_one_gpu(x): return x + 1.0
-//
-// @Defun(tf.float32,
-//        experimental_api_implements='plus_one')
-// def plus_one_reference_implementation(x): return x + 1.0
-// input = tf.constant(2.0, dtype=tf.float32)
-//
-// z = plus_one_reference_implementation(input)
-// z = plus_one_gpu(input)
-// print(sess.run(z))
-//
-// At runtime, we will trim either `plus_one_gpu` or
-// `plus_one_reference_implementation` based on the availability of the GPU.
-//
-// Available annotations:
-//  - experimental_api_implements(string): all functions mapping to the same
-//    string can be interchanged. For now, all functions must have the same
-//    signature and overloads are not allowed. Defuns within defuns are
-//    allowed.
-//  - experimental_api_preferred_device(string): sets which device is preferred.
-class ExperimentalImplementationSelector : public CustomGraphOptimizer {
- public:
-  ExperimentalImplementationSelector() = default;
-  ~ExperimentalImplementationSelector() override = default;
-  Status Init(
-      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
-    return Status::OK();
-  }
-  string name() const override {
-    return "experimental_implementation_selector";
-  }
-
-  // This call is not thread-safe.
-  Status Optimize(Cluster* cluster, const GrapplerItem& item,
-                  GraphDef* optimized_graph) override;
-
-  // Does not take any feedback.
-  void Feedback(Cluster* cluster, const GrapplerItem& item,
-                const GraphDef& optimized_graph, double result) override {}
-
- private:
-  Status LoadFunctions(const GraphDef& graph);
-  Status MaybeOptimizeFunctionCall(NodeDef* node_def) const;
-
-  // Finds all call sites for functions, then replace with the appropriate
-  // implementation.
-  // There are two ways of calling functions:
-  //  1. By specifying an op name as a function name, and
-  //  2. Via the functional interface, where the function name appears as an
-  //  Attr.
-  //
-  // There may be multiple call sites for a given function. The function body
-  // may call into another function, so a function might have to be duplicated.
-  // For simplicity, we do not change function bodies. Also, we do not change
-  // gradients.
-  Status SelectImplementation(GraphDef* graph) const;
-
-  std::unique_ptr<FunctionLibraryApiInfo> lib_info_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(ExperimentalImplementationSelector);
-};
-
-}  // namespace grappler
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_EXPERIMENTAL_IMPLEMENTATION_SELECTOR_H_

diff --git a/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc b/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc
deleted file mode 100644
index e330835..0000000
--- a/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc
+++ /dev/null

@@ -1,233 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/core/grappler/optimizers/experimental_implementation_selector.h"
-
-#include <algorithm>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "tensorflow/core/framework/function.h"
-#include "tensorflow/core/framework/function_testlib.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/grappler/grappler_item.h"
-#include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
-#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
-#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
-#include "tensorflow/core/grappler/utils/grappler_test.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace grappler {
-namespace {
-
-constexpr char CpuDevice[] = "/device:CPU:0";
-constexpr char GpuDevice[] = "/device:GPU:0";
-
-class ExperimentalImplementationSelectorTest : public GrapplerTest {};
-
-TEST_F(ExperimentalImplementationSelectorTest, NoUpdate) {
-  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {CpuDevice});
-  GrapplerItem item;
-  CHECK(fake_input.NextItem(&item));
-
-  std::unique_ptr<CustomGraphOptimizer> optimizer(
-      new ExperimentalImplementationSelector);
-  ASSERT_NE(nullptr, optimizer);
-  TF_ASSERT_OK(optimizer->Init());
-
-  GraphDef output;
-  const Status status = optimizer->Optimize(nullptr, item, &output);
-  TF_EXPECT_OK(status);
-
-  // This is a trivial graph so there is nothing to update.
-  EXPECT_EQ(item.graph.node_size(), output.node_size());
-}
-
-TEST_F(ExperimentalImplementationSelectorTest, SwapImplementation) {
-  using test::function::NDef;
-  auto cpu_def = test::function::XTimesTwo();
-  auto* func_attr = cpu_def.mutable_attr();
-  (*func_attr)["experimental_api_implements"].set_s("times_two");
-  (*func_attr)["experimental_api_preferred_device"].set_s("CPU");
-
-  auto gpu_def = test::function::XAddX();
-  auto* func2_attr = gpu_def.mutable_attr();
-  (*func2_attr)["experimental_api_implements"].set_s("times_two");
-  (*func2_attr)["experimental_api_preferred_device"].set_s("GPU");
-
-  ExperimentalImplementationSelector optimizer;
-  GraphDef output;
-  GrapplerItem item;
-  item.graph = test::function::GDef(
-      {NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, GpuDevice),
-       NDef("y1", "XTimesTwo", {"x"}, {{"T", DT_FLOAT}}, GpuDevice),
-       NDef("z1", "Identity", {"y1"}, {{"T", DT_FLOAT}}, GpuDevice),
-       NDef("y2", "XTimesTwo", {"x"}, {{"T", DT_FLOAT}}, CpuDevice),
-       NDef("z2", "Identity", {"y2"}, {{"T", DT_FLOAT}}, CpuDevice)},
-      // FunctionLib
-      {cpu_def, gpu_def});
-
-  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
-
-  EXPECT_EQ(output.node_size(), 5);
-  for (const NodeDef& node : output.node()) {
-    if (node.name() == "y1") {
-      // Make sure the implementation has been swapped to use the GPU version.
-      EXPECT_EQ("XAddX", node.op());
-    } else if (node.name() == "y2") {
-      // Make sure the implementation is not changed.
-      EXPECT_EQ("XTimesTwo", node.op());
-    }
-  }
-}
-
-TEST_F(ExperimentalImplementationSelectorTest, SwapImplementationEval) {
-  using test::function::NDef;
-  auto cpu_def = test::function::XTimesTwo();
-  auto* func_attr = cpu_def.mutable_attr();
-  (*func_attr)["experimental_api_implements"].set_s("random_boost");
-  (*func_attr)["experimental_api_preferred_device"].set_s("CPU");
-
-  auto gpu_def = test::function::XTimesFour();
-  auto* func2_attr = gpu_def.mutable_attr();
-  (*func2_attr)["experimental_api_implements"].set_s("random_boost");
-  (*func2_attr)["experimental_api_preferred_device"].set_s("GPU");
-
-  ExperimentalImplementationSelector optimizer;
-  GraphDef output;
-  GrapplerItem item;
-  item.graph = test::function::GDef(
-      {NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, CpuDevice),
-       NDef("y", "XTimesFour", {"x"}, {{"T", DT_FLOAT}}, CpuDevice),
-       NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, CpuDevice)},
-      // FunctionLib
-      {cpu_def, gpu_def});
-
-  const Tensor input = test::AsScalar<float>(1.0f);
-  item.fetch = {"z"};
-  item.feed.emplace_back("x", input);
-
-  const auto four_times_boosted_tensor = EvaluateFetchNodes(item);
-  test::ExpectTensorEqual<float>(four_times_boosted_tensor[0],
-                                 test::AsScalar<float>(4.0f));
-
-  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
-  GrapplerItem optimized = item.WithGraph(std::move(output));
-  const auto twice_boosted_tensor = EvaluateFetchNodes(optimized);
-  test::ExpectTensorEqual<float>(twice_boosted_tensor[0],
-                                 test::AsScalar<float>(2.0f));
-}
-
-TEST_F(ExperimentalImplementationSelectorTest, SwapImplementationWithGradient) {
-  using test::function::NDef;
-  using FDH = FunctionDefHelper;
-  // boost_1 returns the doubled input and a const as the internal state, the
-  // state will be feed to gradient function to mimic the behavior of backward
-  // function of defun that use internal states as extra inputs.
-  FunctionDef boost_1 = FDH::Create(
-      "Boost1", {"x:float"}, {"z:float", "s:float"}, {},
-      {{{"boost"}, "Add", {"x", "x"}, {{"T", DT_FLOAT}}},
-       FDH::Const("one", 1.0f)},
-      /* Mapping between function returns and function node outputs. */
-      {{"z", "boost:z:0"}, {"s", "one:output:0"}});
-  auto* boost_1_attr = boost_1.mutable_attr();
-  (*boost_1_attr)["experimental_api_implements"].set_s("random_boost");
-  (*boost_1_attr)["experimental_api_preferred_device"].set_s("CPU");
-  (*boost_1_attr)["backward_function_name"].set_s("BoostCpuGradient");
-
-  FunctionDef boost_1_gradient = FDH::Create(
-      "Boost1Gradient", {"x:float", "s:float"}, {"dx:float"}, {},
-      {FDH::Const("two", 2.0f),
-       {{"grad"}, "Mul", {"x", "two:output:0"}, {{"T", DT_FLOAT}}}},
-      /* Mapping between function returns and function node outputs. */
-      {{"dx", "grad:z:0"}});
-  auto* boost_1_grad_attr = boost_1_gradient.mutable_attr();
-  (*boost_1_grad_attr)["experimental_api_implements"].set_s("random_boost");
-  (*boost_1_grad_attr)["experimental_api_preferred_device"].set_s("CPU");
-  (*boost_1_grad_attr)["forward_function_name"].set_s("BoostCpu");
-
-  // boost_2 return the input * 4, and with two extra internal states.
-  FunctionDef boost_2_func = FDH::Create(
-      "Boost2", {"x:float"}, {"z:float", "s1:float", "s2:float"}, {},
-      {FDH::Const("four", 4.0f),
-       {{"boost"}, "Mul", {"x", "four:output:0"}, {{"T", DT_FLOAT}}},
-       FDH::Const("one", 1.0f),
-       FDH::Const("two", 2.0f)},
-      /* Mapping between function returns and function node outputs. */
-      {{"z", "boost:z:0"}, {"s1", "one:output:0"}, {"s2", "two:output:0"}});
-  auto* boost_2_attr = boost_2_func.mutable_attr();
-  (*boost_2_attr)["experimental_api_implements"].set_s("random_boost");
-  (*boost_2_attr)["experimental_api_preferred_device"].set_s("GPU");
-  (*boost_2_attr)["backward_function_name"].set_s("BoostGpuGradient");
-
-  FunctionDef boost_2_gradient = FDH::Create(
-      "Boost2Gradient", {"x:float", "s1:float", "s2:float"}, {"dx:float"}, {},
-      {FDH::Const("four", 4.0f),
-       {{"grad"}, "Mul", {"x", "four:output:0"}, {{"T", DT_FLOAT}}}},
-      /* Mapping between function returns and function node outputs. */
-      {{"dx", "grad:z:0"}});
-  auto* boost_2_grad_attr = boost_2_gradient.mutable_attr();
-  (*boost_2_grad_attr)["experimental_api_implements"].set_s("random_boost");
-  (*boost_2_grad_attr)["experimental_api_preferred_device"].set_s("GPU");
-  (*boost_2_grad_attr)["forward_function_name"].set_s("BoostGpu");
-
-  // Define the forward function with f = boost2 function but with CPU device.
-  // Expect the grappler plugin to swap f and attributes to use the boost1.
-  const auto forward =
-      NDef("lstm/StatefulPartitionedCall", "StatefulPartitionedCall", {"input"},
-           {{"Tin", DataTypeSlice{DT_FLOAT}},
-            {"Tout", DataTypeSlice{DT_FLOAT, DT_FLOAT, DT_FLOAT}},
-            {"f", FDH::FunctionRef("Boost2")}},
-           CpuDevice);
-  const auto backward =
-      NDef("gradient/lstm/StatefulPartitionedCall", "StatefulPartitionedCall",
-           {"input", "lstm/StatefulPartitionedCall:1",
-            "lstm/StatefulPartitionedCall:2"},
-           {{"Tin", DataTypeSlice{DT_FLOAT, DT_FLOAT, DT_FLOAT}},
-            {"Tout", DataTypeSlice{DT_FLOAT}},
-            {"f", FDH::FunctionRef("Boost2Gradient")}},
-           CpuDevice);
-
-  ExperimentalImplementationSelector optimizer;
-  GraphDef output;
-  GrapplerItem item;
-  item.graph = test::function::GDef(
-      {NDef("input", "Placeholder", {}, {{"dtype", DT_FLOAT}}, CpuDevice),
-       forward, backward,
-       NDef("output", "Identity", {"lstm/StatefulPartitionedCall:0"},
-            {{"T", DT_FLOAT}}, CpuDevice)},
-      // FunctionLib
-      {boost_1, boost_1_gradient, boost_2_func, boost_2_gradient});
-
-  const Tensor input = test::AsScalar<float>(1.0f);
-  item.fetch = {"output"};
-  item.feed.emplace_back("input", input);
-
-  const auto four_times_boosted_tensor = EvaluateFetchNodes(item);
-  test::ExpectTensorEqual<float>(four_times_boosted_tensor[0],
-                                 test::AsScalar<float>(4.0f));
-
-  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
-  GrapplerItem optimized = item.WithGraph(std::move(output));
-  const auto twice_boosted_tensor = EvaluateFetchNodes(optimized);
-  test::ExpectTensorEqual<float>(twice_boosted_tensor[0],
-                                 test::AsScalar<float>(2.0f));
-}
-}  // namespace
-}  // namespace grappler
-}  // namespace tensorflow

diff --git a/tensorflow/core/grappler/optimizers/function_api_info.cc b/tensorflow/core/grappler/optimizers/function_api_info.cc
index 497ad60..9f6352f 100644
--- a/tensorflow/core/grappler/optimizers/function_api_info.cc
+++ b/tensorflow/core/grappler/optimizers/function_api_info.cc

@@ -29,10 +29,10 @@
 Status FunctionApiInfo::Init(const FunctionDef& function_def) {
   function_type_ = FunctionApiInfo::FunctionType::INFERENCE;
   for (const auto& attr : function_def.attr()) {
-    if (attr.first == "experimental_api_preferred_device") {
+    if (attr.first == "api_preferred_device") {
       preferred_device_ = attr.second.s();
     }
-    if (attr.first == "experimental_api_implements") {
+    if (attr.first == "api_implements") {
       interface_name_ = attr.second.s();
     }
     if (attr.first == "forward_function_name") {

diff --git a/tensorflow/core/grappler/optimizers/function_api_info.h b/tensorflow/core/grappler/optimizers/function_api_info.h
index 9a5f548..ffa53a7 100644
--- a/tensorflow/core/grappler/optimizers/function_api_info.h
+++ b/tensorflow/core/grappler/optimizers/function_api_info.h

@@ -80,6 +80,8 @@
       const string& function_name, std::vector<string>* other_functions) const;
 
   const FunctionApiInfo* GetApiInfo(const string& function_name) const;
+  bool empty() const { return func_info_.empty(); }
+  std::size_t size() const { return func_info_.size(); }
 
  private:
   // Map between function name to function details.

diff --git a/tensorflow/core/grappler/optimizers/function_api_info_test.cc b/tensorflow/core/grappler/optimizers/function_api_info_test.cc
index b683d26..9bb517f 100644
--- a/tensorflow/core/grappler/optimizers/function_api_info_test.cc
+++ b/tensorflow/core/grappler/optimizers/function_api_info_test.cc

@@ -58,9 +58,9 @@
 
   auto* func_attr = func_def->mutable_attr();
   if (!api_interface_name.empty())
-    (*func_attr)["experimental_api_implements"].set_s(api_interface_name);
+    (*func_attr)["api_implements"].set_s(api_interface_name);
   if (!preferred_device.empty())
-    (*func_attr)["experimental_api_preferred_device"].set_s(preferred_device);
+    (*func_attr)["api_preferred_device"].set_s(preferred_device);
   if (!forward_function_name.empty())
     (*func_attr)["forward_function_name"].set_s(forward_function_name);
   if (!backward_function_name.empty())

diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc
index c737398..2787d9d 100644
--- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc

@@ -1378,7 +1378,7 @@
 
   // All `PartitionedCall` nodes in the optimized graph must be inlined, and
   // `If` node must be lowered to `Switch` and `Merge` nodes.
-  EXPECT_EQ(count_nodes_with_op("PartitionedCallOp"), 0);
+  EXPECT_EQ(count_nodes_with_op("PartitionedCall"), 0);
   EXPECT_EQ(count_nodes_with_op("If"), 0);
   EXPECT_EQ(count_nodes_with_op("Switch"), 3);
   EXPECT_EQ(count_nodes_with_op("Merge"), 1);

diff --git a/tensorflow/core/grappler/optimizers/implementation_selector.cc b/tensorflow/core/grappler/optimizers/implementation_selector.cc
new file mode 100644
index 0000000..a370bf9
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/implementation_selector.cc

@@ -0,0 +1,199 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/implementation_selector.h"
+
+#include <string>
+
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_split.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/grappler/costs/graph_properties.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/grappler/optimizers/function_api_info.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/util/device_name_utils.h"
+
+namespace tensorflow {
+namespace grappler {
+
+Status UpdateNodeDef(NodeDef* node_def, const string& funcName,
+                     const FunctionApiInfo& apiInfo) {
+  VLOG(3) << "Node def before swap is: " << node_def->DebugString();
+  auto tin = node_def->mutable_attr()->find("Tin");
+  tin->second.mutable_list()->clear_type();
+  for (const auto& tin_dtype : apiInfo.input_arg_dtypes()) {
+    tin->second.mutable_list()->add_type(tin_dtype);
+  }
+
+  auto tout = node_def->mutable_attr()->find("Tout");
+  tout->second.mutable_list()->clear_type();
+  for (const auto& tout_dtype : apiInfo.output_arg_dtypes()) {
+    tout->second.mutable_list()->add_type(tout_dtype);
+  }
+
+  if (apiInfo.function_type() == FunctionApiInfo::BACKWARD) {
+    // Update the inputs since for backward function, it might have different
+    // number of inputs due the different number output from forward function.
+    // The output of forward function are composed by two parts:
+    //   1. Real output tensors from defun.
+    //   2. Internal states that will be used for gradient calculation.
+    // Part 1 will be static, and part 2 could be different based on the
+    // different implementation.
+
+    const int prev_input_size = node_def->input_size();
+    const int diff = prev_input_size - apiInfo.input_arg_dtypes().size();
+    if (diff >= 0) {
+      for (int i = 0; i < diff; ++i) node_def->mutable_input()->RemoveLast();
+    } else {
+      // Adding new inputs for internal states, the name of the internal states
+      // should be in format "{forward_node_name}:{index}", where the newly
+      // added index should start from last index of the state.
+      // Eg:
+      // {
+      //   input: "gradients/unified_lstm/strided_slice_1_grad/StridedSliceGrad"
+      //   input: "gradients/zeros_like_1"
+      //   input: "gradients/zeros_like_2"
+      //   input: "unified_lstm/StatefulPartitionedCall:3"
+      //   input: "unified_lstm/StatefulPartitionedCall:4"
+      //   # New input should be "unified_lstm/StatefulPartitionedCall:5"
+      // }
+      const string last_input = node_def->input(prev_input_size - 1);
+      const std::vector<string> name_index = ::absl::StrSplit(last_input, ':');
+      if (name_index.size() != 2) {
+        return errors::InvalidArgument(
+            "Invalid format of input node name: ", last_input,
+            " Expected: {forward_node_name}:{index}");
+      }
+      const absl::string_view node_name = name_index[0];
+      int last_index;
+      if (!::absl::SimpleAtoi(name_index[1], &last_index)) {
+        return errors::InvalidArgument(
+            "The index of input node is expected to be number, got: ",
+            name_index[1]);
+      }
+      for (int i = 1; i <= -diff; ++i)
+        node_def->add_input(strings::StrCat(node_name, ":", i + last_index));
+    }
+  }
+
+  node_def->mutable_attr()->find("f")->second.mutable_func()->set_name(
+      funcName);
+
+  VLOG(3) << "Node def after swap is: " << node_def->DebugString();
+  return Status::OK();
+}
+
+Status ImplementationSelector::LoadFunctions(
+    const GraphDef& graph) {
+  lib_info_.reset(new FunctionLibraryApiInfo);
+  TF_RETURN_IF_ERROR(lib_info_->Init(graph.library()));
+  return Status::OK();
+}
+
+Status ImplementationSelector::MaybeOptimizeFunctionCall(
+    NodeDef* node_def) const {
+  // There are two ways of calling functions:
+  //  1. By specifying an op name as a function name, or
+  //  2. Via the @defun functional interface, where the real function call
+  //     happens with partitionedcall op, and the function name appear as the
+  //     attribute with name "f" and type func. In this use case, there are more
+  //     attributes need to be taken care, like Tin and Tout which take care of
+  //     the DTYPE of input/output.
+  std::vector<string> function_attribute_names;
+  for (const auto& attr : node_def->attr()) {
+    if (attr.second.has_func() &&
+        lib_info_->GetApiInfo(attr.second.func().name()) != nullptr) {
+      function_attribute_names.emplace_back(attr.first);
+    }
+  }
+
+  if (function_attribute_names.empty() &&
+      lib_info_->GetApiInfo(node_def->op()) == nullptr) {
+    // A regular op, or a function which has no interface.
+    return Status::OK();
+  }
+
+  string task, device;
+  if (!DeviceNameUtils::SplitDeviceName(node_def->device(), &task, &device)) {
+    return errors::Internal("Could not split device name:", node_def->device());
+  }
+  VLOG(2) << "Op " << node_def->name() << " runs on " << node_def->device()
+          << " = (" << task << ", " << device << ")";
+  DeviceNameUtils::ParsedName parsed_name;
+  DeviceNameUtils::ParseLocalName(device, &parsed_name);
+
+  for (const auto& attr_name : function_attribute_names) {
+    string function_name = node_def->attr().at(attr_name).func().name();
+    std::vector<string> equiv_func_names;
+    TF_RETURN_IF_ERROR(lib_info_->GetEquivalentImplementations(
+        function_name, &equiv_func_names));
+    for (const auto& func_name : equiv_func_names) {
+      const auto& func_api_info = lib_info_->GetApiInfo(func_name);
+      if (func_api_info->preferred_device() == parsed_name.type) {
+        VLOG(2) << "Swapping: " << function_name << " TO: " << func_name;
+        TF_RETURN_IF_ERROR(UpdateNodeDef(node_def, func_name, *func_api_info));
+        break;
+      }
+    }
+  }
+
+  if (lib_info_->GetApiInfo(node_def->op()) != nullptr) {
+    std::vector<string> equiv_func_names;
+    TF_RETURN_IF_ERROR(lib_info_->GetEquivalentImplementations(
+        node_def->op(), &equiv_func_names));
+    for (const string& func_name : equiv_func_names) {
+      const auto func_api_info = lib_info_->GetApiInfo(func_name);
+      if (func_api_info->preferred_device() == parsed_name.type) {
+        node_def->set_op(func_name);
+        break;
+      }
+    }
+  }
+  return Status::OK();
+}
+
+Status ImplementationSelector::SelectImplementation(
+    GraphDef* graph) const {
+  if (!graph->has_library()) {
+    VLOG(2) << "Skipping graph since it does not have function def";
+    return Status::OK();
+  }
+  if (lib_info_->empty()) {
+    VLOG(2) << "Skipping optimization since lib_info is empty";
+    return Status::OK();
+  }
+
+  for (int k = 0; k < graph->node_size(); ++k)
+    TF_RETURN_IF_ERROR(MaybeOptimizeFunctionCall(graph->mutable_node(k)));
+
+  return Status::OK();
+}
+
+Status ImplementationSelector::Optimize(Cluster* cluster,
+                                        const GrapplerItem& item,
+                                        GraphDef* optimized_graph) {
+  *optimized_graph = item.graph;
+  TF_RETURN_IF_ERROR(LoadFunctions(*optimized_graph));
+  return SelectImplementation(optimized_graph);
+}
+
+}  // end namespace grappler
+}  // end namespace tensorflow

diff --git a/tensorflow/core/grappler/optimizers/implementation_selector.h b/tensorflow/core/grappler/optimizers/implementation_selector.h
new file mode 100644
index 0000000..c206d21
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/implementation_selector.h

@@ -0,0 +1,114 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_IMPLEMENTATION_SELECTOR_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_IMPLEMENTATION_SELECTOR_H_
+
+#include <string>
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/grappler/costs/graph_properties.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/grappler/optimizers/function_api_info.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/util/device_name_utils.h"
+
+namespace tensorflow {
+namespace grappler {
+
+// This transformation replaces function calls by the appropriate function
+// definition based on properties of the runtime system. For instance,
+// we may choose one implementation over another if we have a GPU with
+// enough memory available.
+//
+// It is a way for the programmer to specify alternative implementations
+// of the same functionality in the graph, and let TensorFlow pick the
+// most appropriate one at runtime.
+//
+// For instance, the python code might specify:
+// @Defun(tf.float32,
+//        api_implements='plus_one',
+//        api_preferred_device='GPU')
+// def plus_one_gpu(x): return x + 1.0
+//
+// @Defun(tf.float32,
+//        api_implements='plus_one')
+// def plus_one_reference_implementation(x): return x + 1.0
+// input = tf.constant(2.0, dtype=tf.float32)
+//
+// z = plus_one_reference_implementation(input)
+// z = plus_one_gpu(input)
+// print(sess.run(z))
+//
+// At runtime, we will trim either `plus_one_gpu` or
+// `plus_one_reference_implementation` based on the availability of the GPU.
+//
+// Available annotations:
+//  - api_implements(string): all functions mapping to the same
+//    string can be interchanged. For now, all functions must have the same
+//    signature and overloads are not allowed. Defuns within defuns are
+//    allowed.
+//  - api_preferred_device(string): sets which device is preferred.
+class ImplementationSelector : public CustomGraphOptimizer {
+ public:
+  ImplementationSelector() = default;
+  ~ImplementationSelector() override = default;
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+  string name() const override {
+    return "implementation_selector";
+  }
+
+  // This call is not thread-safe.
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* optimized_graph) override;
+
+  // Does not take any feedback.
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimized_graph, double result) override {}
+
+ private:
+  Status LoadFunctions(const GraphDef& graph);
+  Status MaybeOptimizeFunctionCall(NodeDef* node_def) const;
+
+  // Finds all call sites for functions, then replace with the appropriate
+  // implementation.
+  // There are two ways of calling functions:
+  //  1. By specifying an op name as a function name, and
+  //  2. Via the functional interface, where the function name appears as an
+  //  Attr.
+  //
+  // There may be multiple call sites for a given function. The function body
+  // may call into another function, so a function might have to be duplicated.
+  // For simplicity, we do not change function bodies. Also, we do not change
+  // gradients.
+  Status SelectImplementation(GraphDef* graph) const;
+
+  std::unique_ptr<FunctionLibraryApiInfo> lib_info_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(ImplementationSelector);
+};
+
+}  // namespace grappler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_IMPLEMENTATION_SELECTOR_H_

diff --git a/tensorflow/core/grappler/optimizers/implementation_selector_test.cc b/tensorflow/core/grappler/optimizers/implementation_selector_test.cc
new file mode 100644
index 0000000..e2f5896
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/implementation_selector_test.cc

@@ -0,0 +1,232 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/grappler/optimizers/implementation_selector.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/grappler/utils/grappler_test.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+constexpr char CpuDevice[] = "/device:CPU:0";
+constexpr char GpuDevice[] = "/device:GPU:0";
+
+class ImplementationSelectorTest : public GrapplerTest {};
+
+TEST_F(ImplementationSelectorTest, NoUpdate) {
+  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {CpuDevice});
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+
+  std::unique_ptr<CustomGraphOptimizer> optimizer(new ImplementationSelector);
+  ASSERT_NE(nullptr, optimizer);
+  TF_ASSERT_OK(optimizer->Init());
+
+  GraphDef output;
+  const Status status = optimizer->Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  // This is a trivial graph so there is nothing to update.
+  EXPECT_EQ(item.graph.node_size(), output.node_size());
+}
+
+TEST_F(ImplementationSelectorTest, SwapImplementation) {
+  using test::function::NDef;
+  auto cpu_def = test::function::XTimesTwo();
+  auto* func_attr = cpu_def.mutable_attr();
+  (*func_attr)["api_implements"].set_s("times_two");
+  (*func_attr)["api_preferred_device"].set_s("CPU");
+
+  auto gpu_def = test::function::XAddX();
+  auto* func2_attr = gpu_def.mutable_attr();
+  (*func2_attr)["api_implements"].set_s("times_two");
+  (*func2_attr)["api_preferred_device"].set_s("GPU");
+
+  ImplementationSelector optimizer;
+  GraphDef output;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, GpuDevice),
+       NDef("y1", "XTimesTwo", {"x"}, {{"T", DT_FLOAT}}, GpuDevice),
+       NDef("z1", "Identity", {"y1"}, {{"T", DT_FLOAT}}, GpuDevice),
+       NDef("y2", "XTimesTwo", {"x"}, {{"T", DT_FLOAT}}, CpuDevice),
+       NDef("z2", "Identity", {"y2"}, {{"T", DT_FLOAT}}, CpuDevice)},
+      // FunctionLib
+      {cpu_def, gpu_def});
+
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  EXPECT_EQ(output.node_size(), 5);
+  for (const NodeDef& node : output.node()) {
+    if (node.name() == "y1") {
+      // Make sure the implementation has been swapped to use the GPU version.
+      EXPECT_EQ("XAddX", node.op());
+    } else if (node.name() == "y2") {
+      // Make sure the implementation is not changed.
+      EXPECT_EQ("XTimesTwo", node.op());
+    }
+  }
+}
+
+TEST_F(ImplementationSelectorTest, SwapImplementationEval) {
+  using test::function::NDef;
+  auto cpu_def = test::function::XTimesTwo();
+  auto* func_attr = cpu_def.mutable_attr();
+  (*func_attr)["api_implements"].set_s("random_boost");
+  (*func_attr)["api_preferred_device"].set_s("CPU");
+
+  auto gpu_def = test::function::XTimesFour();
+  auto* func2_attr = gpu_def.mutable_attr();
+  (*func2_attr)["api_implements"].set_s("random_boost");
+  (*func2_attr)["api_preferred_device"].set_s("GPU");
+
+  ImplementationSelector optimizer;
+  GraphDef output;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, CpuDevice),
+       NDef("y", "XTimesFour", {"x"}, {{"T", DT_FLOAT}}, CpuDevice),
+       NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, CpuDevice)},
+      // FunctionLib
+      {cpu_def, gpu_def});
+
+  const Tensor input = test::AsScalar<float>(1.0f);
+  item.fetch = {"z"};
+  item.feed.emplace_back("x", input);
+
+  const auto four_times_boosted_tensor = EvaluateFetchNodes(item);
+  test::ExpectTensorEqual<float>(four_times_boosted_tensor[0],
+                                 test::AsScalar<float>(4.0f));
+
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+  GrapplerItem optimized = item.WithGraph(std::move(output));
+  const auto twice_boosted_tensor = EvaluateFetchNodes(optimized);
+  test::ExpectTensorEqual<float>(twice_boosted_tensor[0],
+                                 test::AsScalar<float>(2.0f));
+}
+
+TEST_F(ImplementationSelectorTest, SwapImplementationWithGradient) {
+  using test::function::NDef;
+  using FDH = FunctionDefHelper;
+  // boost_1 returns the doubled input and a const as the internal state, the
+  // state will be feed to gradient function to mimic the behavior of backward
+  // function of defun that use internal states as extra inputs.
+  FunctionDef boost_1 = FDH::Create(
+      "Boost1", {"x:float"}, {"z:float", "s:float"}, {},
+      {{{"boost"}, "Add", {"x", "x"}, {{"T", DT_FLOAT}}},
+       FDH::Const("one", 1.0f)},
+      /* Mapping between function returns and function node outputs. */
+      {{"z", "boost:z:0"}, {"s", "one:output:0"}});
+  auto* boost_1_attr = boost_1.mutable_attr();
+  (*boost_1_attr)["api_implements"].set_s("random_boost");
+  (*boost_1_attr)["api_preferred_device"].set_s("CPU");
+  (*boost_1_attr)["backward_function_name"].set_s("BoostCpuGradient");
+
+  FunctionDef boost_1_gradient = FDH::Create(
+      "Boost1Gradient", {"x:float", "s:float"}, {"dx:float"}, {},
+      {FDH::Const("two", 2.0f),
+       {{"grad"}, "Mul", {"x", "two:output:0"}, {{"T", DT_FLOAT}}}},
+      /* Mapping between function returns and function node outputs. */
+      {{"dx", "grad:z:0"}});
+  auto* boost_1_grad_attr = boost_1_gradient.mutable_attr();
+  (*boost_1_grad_attr)["api_implements"].set_s("random_boost");
+  (*boost_1_grad_attr)["api_preferred_device"].set_s("CPU");
+  (*boost_1_grad_attr)["forward_function_name"].set_s("BoostCpu");
+
+  // boost_2 return the input * 4, and with two extra internal states.
+  FunctionDef boost_2_func = FDH::Create(
+      "Boost2", {"x:float"}, {"z:float", "s1:float", "s2:float"}, {},
+      {FDH::Const("four", 4.0f),
+       {{"boost"}, "Mul", {"x", "four:output:0"}, {{"T", DT_FLOAT}}},
+       FDH::Const("one", 1.0f),
+       FDH::Const("two", 2.0f)},
+      /* Mapping between function returns and function node outputs. */
+      {{"z", "boost:z:0"}, {"s1", "one:output:0"}, {"s2", "two:output:0"}});
+  auto* boost_2_attr = boost_2_func.mutable_attr();
+  (*boost_2_attr)["api_implements"].set_s("random_boost");
+  (*boost_2_attr)["api_preferred_device"].set_s("GPU");
+  (*boost_2_attr)["backward_function_name"].set_s("BoostGpuGradient");
+
+  FunctionDef boost_2_gradient = FDH::Create(
+      "Boost2Gradient", {"x:float", "s1:float", "s2:float"}, {"dx:float"}, {},
+      {FDH::Const("four", 4.0f),
+       {{"grad"}, "Mul", {"x", "four:output:0"}, {{"T", DT_FLOAT}}}},
+      /* Mapping between function returns and function node outputs. */
+      {{"dx", "grad:z:0"}});
+  auto* boost_2_grad_attr = boost_2_gradient.mutable_attr();
+  (*boost_2_grad_attr)["api_implements"].set_s("random_boost");
+  (*boost_2_grad_attr)["api_preferred_device"].set_s("GPU");
+  (*boost_2_grad_attr)["forward_function_name"].set_s("BoostGpu");
+
+  // Define the forward function with f = boost2 function but with CPU device.
+  // Expect the grappler plugin to swap f and attributes to use the boost1.
+  const auto forward =
+      NDef("lstm/StatefulPartitionedCall", "StatefulPartitionedCall", {"input"},
+           {{"Tin", DataTypeSlice{DT_FLOAT}},
+            {"Tout", DataTypeSlice{DT_FLOAT, DT_FLOAT, DT_FLOAT}},
+            {"f", FDH::FunctionRef("Boost2")}},
+           CpuDevice);
+  const auto backward =
+      NDef("gradient/lstm/StatefulPartitionedCall", "StatefulPartitionedCall",
+           {"input", "lstm/StatefulPartitionedCall:1",
+            "lstm/StatefulPartitionedCall:2"},
+           {{"Tin", DataTypeSlice{DT_FLOAT, DT_FLOAT, DT_FLOAT}},
+            {"Tout", DataTypeSlice{DT_FLOAT}},
+            {"f", FDH::FunctionRef("Boost2Gradient")}},
+           CpuDevice);
+
+  ImplementationSelector optimizer;
+  GraphDef output;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("input", "Placeholder", {}, {{"dtype", DT_FLOAT}}, CpuDevice),
+       forward, backward,
+       NDef("output", "Identity", {"lstm/StatefulPartitionedCall:0"},
+            {{"T", DT_FLOAT}}, CpuDevice)},
+      // FunctionLib
+      {boost_1, boost_1_gradient, boost_2_func, boost_2_gradient});
+
+  const Tensor input = test::AsScalar<float>(1.0f);
+  item.fetch = {"output"};
+  item.feed.emplace_back("input", input);
+
+  const auto four_times_boosted_tensor = EvaluateFetchNodes(item);
+  test::ExpectTensorEqual<float>(four_times_boosted_tensor[0],
+                                 test::AsScalar<float>(4.0f));
+
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+  GrapplerItem optimized = item.WithGraph(std::move(output));
+  const auto twice_boosted_tensor = EvaluateFetchNodes(optimized);
+  test::ExpectTensorEqual<float>(twice_boosted_tensor[0],
+                                 test::AsScalar<float>(2.0f));
+}
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow

diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc
index cf5e4db..54776e7 100644
--- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc

@@ -581,8 +581,19 @@
   return Status::OK();
 }
 
+// TODO(lyandy): Consolidate with ConstantFolding implementation.
+bool IsReallyConstant(const NodeDef& node,
+                      const absl::flat_hash_set<string>& feed_nodes) {
+  if (!IsConstant(node)) {
+    return false;
+  }
+  // If the node is fed it's not constant anymore.
+  return feed_nodes.find(node.name()) == feed_nodes.end();
+}
+
 Status CheckForDeadFanout(const MutableGraphView& view,
                           const NodeDef& switch_node, const NodeMap& node_map,
+                          const absl::flat_hash_set<string>& feed_nodes,
                           DeviceBase* cpu_device, ResourceMgr* resource_mgr,
                           bool* has_dead_fanout, int* dead_fanout) {
   *has_dead_fanout = false;
@@ -591,7 +602,7 @@
       view.GetRegularFanin(switch_loopcond_port).node;
 
   // CASE 1: Control is a constant.
-  if (IsConstant(*switch_predicate)) {
+  if (IsReallyConstant(*switch_predicate, feed_nodes)) {
     Tensor selector;
     CHECK(selector.FromProto(switch_predicate->attr().at("value").tensor()));
     *has_dead_fanout = true;
@@ -630,7 +641,7 @@
     if (IsMerge(*node)) {
       merge_node = node;
     }
-    if (IsConstant(*node)) {
+    if (IsReallyConstant(*node, feed_nodes)) {
       constant_ctrl_input = node;
       constant_index = i;
     }
@@ -646,7 +657,7 @@
     if (IsEnter(*node)) {
       enter_node = node;
     }
-    if (IsConstant(*node)) {
+    if (IsReallyConstant(*node, feed_nodes)) {
       constant_init_node = node;
     }
   }
@@ -654,7 +665,7 @@
     if (constant_init_node != nullptr) return Status::OK();
     for (const auto& input : enter_node->input()) {
       NodeDef* node = node_map.GetNode(input);
-      if (IsConstant(*node)) {
+      if (IsReallyConstant(*node, feed_nodes)) {
         constant_init_node = node;
       }
     }
@@ -710,8 +721,12 @@
     // TODO(srjoglekar): Figure out if we can optimize NodeMap creations across
     // optimizer passes.
     NodeMap node_map(optimized_graph);
-    TF_RETURN_IF_ERROR(
-        RemoveDeadBranches(item.NodesToPreserve(), node_map, optimized_graph));
+    absl::flat_hash_set<string> feed_nodes;
+    for (const auto& feed : item.feed) {
+      feed_nodes.insert(NodeName(feed.first));
+    }
+    TF_RETURN_IF_ERROR(RemoveDeadBranches(item.NodesToPreserve(), node_map,
+                                          feed_nodes, optimized_graph));
   }
 
   return Status::OK();
@@ -719,7 +734,8 @@
 
 Status LoopOptimizer::RemoveDeadBranches(
     const std::unordered_set<string>& nodes_to_preserve,
-    const NodeMap& node_map, GraphDef* optimized_graph) {
+    const NodeMap& node_map, const absl::flat_hash_set<string>& feed_nodes,
+    GraphDef* optimized_graph) {
   std::unordered_set<const NodeDef*> dead_nodes;
   std::unordered_map<NodeDef*, std::set<int>> dead_merge_inputs;
   // TODO(bsteiner): also rewrite switches as identity. For now we just record
@@ -737,9 +753,9 @@
 
     int dead_fanout;
     bool has_dead_fanout;
-    TF_RETURN_IF_ERROR(CheckForDeadFanout(view, node, node_map, cpu_device_,
-                                          resource_mgr_.get(), &has_dead_fanout,
-                                          &dead_fanout));
+    TF_RETURN_IF_ERROR(CheckForDeadFanout(view, node, node_map, feed_nodes,
+                                          cpu_device_, resource_mgr_.get(),
+                                          &has_dead_fanout, &dead_fanout));
     if (!has_dead_fanout) {
       continue;
     }

diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.h b/tensorflow/core/grappler/optimizers/loop_optimizer.h
index d467237..7fa1976 100644
--- a/tensorflow/core/grappler/optimizers/loop_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/loop_optimizer.h

@@ -60,7 +60,9 @@
   };
 
   Status RemoveDeadBranches(const std::unordered_set<string>& nodes_to_preserve,
-                            const NodeMap& node_map, GraphDef* optimized_graph);
+                            const NodeMap& node_map,
+                            const absl::flat_hash_set<string>& feed_nodes,
+                            GraphDef* optimized_graph);
 
   RewriterConfig::Toggle opt_level_;
   DeviceBase* cpu_device_;

diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc
index 587767c..db4494d 100644
--- a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc

@@ -504,11 +504,11 @@
   for (int i = 0; i < original_graph.node_size(); ++i) {
     const NodeDef& original = original_graph.node(i);
     const NodeDef& optimized = optimized_graph.node(i);
-    EXPECT_EQ(original.name(), optimized.name()) << func;
-    EXPECT_EQ(original.op(), optimized.op()) << func;
-    EXPECT_EQ(original.input_size(), optimized.input_size()) << func;
+    EXPECT_EQ(optimized.name(), original.name()) << func;
+    EXPECT_EQ(optimized.op(), original.op()) << func;
+    ASSERT_EQ(optimized.input_size(), original.input_size()) << func;
     for (int j = 0; j < original.input_size(); ++j) {
-      EXPECT_EQ(original.input(j), optimized.input(j)) << func;
+      EXPECT_EQ(optimized.input(j), original.input(j)) << func;
     }
   }
 }
@@ -528,7 +528,7 @@
   VerifyGraphsEqual(item.graph, output, __FUNCTION__);
 }
 
-TEST_F(LoopOptimizerTest, RemovePush_NoOp) {
+TEST_F(LoopOptimizerTest, RemovePushNoOp) {
   GrapplerItem item;
   GraphDef& graph = item.graph;
   AddSimpleNode("c", "Const", {}, &graph);
@@ -557,7 +557,7 @@
   VerifyGraphsEqual(item.graph, output, __FUNCTION__);
 }
 
-TEST_F(LoopOptimizerTest, RemovePush_NoPopButStackLives) {
+TEST_F(LoopOptimizerTest, RemovePushNoPopButStackLives) {
   GrapplerItem item;
   GraphDef& graph = item.graph;
   AddSimpleNode("c", "Const", {}, &graph);
@@ -609,32 +609,32 @@
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
-  EXPECT_EQ(13, output.node_size());
+  EXPECT_EQ(output.node_size(), 13);
   for (int i = 0; i < output.node_size(); ++i) {
     const NodeDef& node = output.node(i);
     if (node.name() == "push1") {
-      EXPECT_EQ("Identity", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("c", node.input(0));
-      EXPECT_EQ("^stack1", node.input(1));
+      EXPECT_EQ(node.op(), "Identity");
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "c");
+      EXPECT_EQ(node.input(1), "^stack1");
     } else if (node.name() == "push2") {
-      EXPECT_EQ("Identity", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("enter_c", node.input(0));
-      EXPECT_EQ("^enter_stack2", node.input(1));
+      EXPECT_EQ(node.op(), "Identity");
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "enter_c");
+      EXPECT_EQ(node.input(1), "^enter_stack2");
     } else if (node.name() == "push3") {
-      EXPECT_EQ("Identity", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("c", node.input(0));
-      EXPECT_EQ("^stack3", node.input(1));
+      EXPECT_EQ(node.op(), "Identity");
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "c");
+      EXPECT_EQ(node.input(1), "^stack3");
     } else {
       const NodeDef& orig_node = item.graph.node(i);
-      EXPECT_EQ(orig_node.ShortDebugString(), node.ShortDebugString());
+      EXPECT_EQ(node.ShortDebugString(), orig_node.ShortDebugString());
     }
   }
 }
 
-TEST_F(LoopOptimizerTest, RemoveDeadBranches_ConstantCondition) {
+TEST_F(LoopOptimizerTest, RemoveDeadBranchesConstantCondition) {
   Scope scope = Scope::NewRootScope();
   Output v_in = ops::Variable(scope.WithOpName("v_in"), {3}, DT_FLOAT);
 
@@ -691,57 +691,57 @@
 
   for (const NodeDef& node : output.node()) {
     // These nodes should have been pruned
-    EXPECT_NE("Square1", node.name());
-    EXPECT_NE("Sqrt2", node.name());
-    EXPECT_NE("m5", node.name());
-    EXPECT_NE("m7", node.name());
+    EXPECT_NE(node.name(), "Square1");
+    EXPECT_NE(node.name(), "Sqrt2");
+    EXPECT_NE(node.name(), "m5");
+    EXPECT_NE(node.name(), "m7");
 
     if (node.name() == "m1") {
       // sqrt1 is dead
-      EXPECT_EQ("Identity", node.op());
-      EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("square1", node.input(0));
+      EXPECT_EQ(node.op(), "Identity");
+      ASSERT_EQ(node.input_size(), 1);
+      EXPECT_EQ(node.input(0), "square1");
     } else if (node.name() == "m2") {
       // both inputs are alive
-      EXPECT_EQ("Merge", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("v_in", node.input(0));
-      EXPECT_EQ("square1", node.input(1));
+      EXPECT_EQ(node.op(), "Merge");
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "v_in");
+      EXPECT_EQ(node.input(1), "square1");
     } else if (node.name() == "m3") {
       // sqrt1 is dead
-      EXPECT_EQ("Identity", node.op());
-      EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("v_in", node.input(0));
+      EXPECT_EQ(node.op(), "Identity");
+      ASSERT_EQ(node.input_size(), 1);
+      EXPECT_EQ(node.input(0), "v_in");
     } else if (node.name() == "m4") {
       // both inputs are alive
-      EXPECT_EQ("Merge", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("square1", node.input(0));
-      EXPECT_EQ("sqrt2", node.input(1));
+      EXPECT_EQ(node.op(), "Merge");
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "square1");
+      EXPECT_EQ(node.input(1), "sqrt2");
     } else if (node.name() == "m6") {
       // both inputs are alive and the control dependency can get triggered
-      EXPECT_EQ("Merge", node.op());
-      EXPECT_EQ(3, node.input_size());
-      EXPECT_EQ("v_in", node.input(0));
-      EXPECT_EQ("square1", node.input(1));
-      EXPECT_EQ("^sqrt2", node.input(2));
+      EXPECT_EQ(node.op(), "Merge");
+      ASSERT_EQ(node.input_size(), 3);
+      EXPECT_EQ(node.input(0), "v_in");
+      EXPECT_EQ(node.input(1), "square1");
+      EXPECT_EQ(node.input(2), "^sqrt2");
     } else if (node.name() == "m8") {
       // The node is to be preserved because of a fetch
-      EXPECT_EQ("Merge", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("id1", node.input(0));
-      EXPECT_EQ("id2", node.input(1));
+      EXPECT_EQ(node.op(), "Merge");
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "id1");
+      EXPECT_EQ(node.input(1), "id2");
     } else if (node.name() == "m9") {
       // The node is to be preserved because of a fetch
-      EXPECT_EQ("Merge", node.op());
-      EXPECT_EQ(2, node.input_size());
-      EXPECT_EQ("id3", node.input(0));
-      EXPECT_EQ("id4", node.input(1));
+      EXPECT_EQ(node.op(), "Merge");
+      ASSERT_EQ(2, node.input_size());
+      EXPECT_EQ(node.input(0), "id3");
+      EXPECT_EQ(node.input(1), "id4");
     }
   }
 }
 
-TEST_F(LoopOptimizerTest, RemoveDeadBranches_FullyRemoveDeadBranches) {
+TEST_F(LoopOptimizerTest, RemoveDeadBranchesFullyRemoveDeadBranches) {
   const string gdef_ascii = R"EOF(
 node {
   name: "episodicreplaybuffer_add_readvariableop_resource"
@@ -1153,7 +1153,7 @@
       << "Merge node was deleted, but it shouldn't have been.";
 }
 
-TEST_F(LoopOptimizerTest, RemoveDeadBranches_ZeroIterWhile) {
+TEST_F(LoopOptimizerTest, RemoveDeadBranchesZeroIterWhile) {
   const string gdef_ascii = R"EOF(
 node {
   name: "Const"
@@ -1358,15 +1358,15 @@
   CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, &item.graph));
   item.fetch = {"while/Exit"};
   auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
-  EXPECT_EQ(1, tensors_expected.size());
+  ASSERT_EQ(tensors_expected.size(), 1);
 
   LoopOptimizer optimizer(RewriterConfig::AGGRESSIVE, nullptr);
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_CHECK_OK(status);
   auto tensors_got = EvaluateNodes(output, item.fetch);
-  EXPECT_EQ(1, tensors_got.size());
-  test::ExpectTensorEqual<int32>(tensors_expected[0], tensors_got[0]);
+  ASSERT_EQ(tensors_got.size(), 1);
+  test::ExpectTensorEqual<int32>(tensors_got[0], tensors_expected[0]);
 
   int nodes_present = 0;
   for (const NodeDef& node : output.node()) {
@@ -1382,7 +1382,200 @@
     }
     ++nodes_present;
   }
-  EXPECT_EQ(8, nodes_present);
+  EXPECT_EQ(nodes_present, 8);
+}
+
+TEST_F(LoopOptimizerTest, RemoveDeadBranchesConstantFeed) {
+  const string gdef_ascii = R"EOF(
+node {
+  name: "Const"
+  op: "Const"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        string_val: "I\'m a value!"
+      }
+    }
+  }
+}
+node {
+  name: "cond/Switch_1"
+  op: "Switch"
+  input: "Const"
+  input: "Const_1"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@Const"
+      }
+    }
+  }
+}
+node {
+  name: "Const_1"
+  op: "Const"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: true
+      }
+    }
+  }
+}
+node {
+  name: "cond/Switch"
+  op: "Switch"
+  input: "Const_1"
+  input: "Const_1"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+}
+node {
+  name: "cond/switch_t"
+  op: "Identity"
+  input: "cond/Switch:1"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+}
+node {
+  name: "cond/Const"
+  op: "Const"
+  input: "^cond/switch_t"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "cond/Merge"
+  op: "Merge"
+  input: "cond/Switch_1"
+  input: "cond/Const"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+}
+node {
+  name: "Identity"
+  op: "Identity"
+  input: "cond/Merge"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+}
+library {
+}
+versions {
+  producer: 27
+}
+  )EOF";
+
+  GrapplerItem item;
+  CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, &item.graph));
+  item.fetch = {"Identity"};
+  Tensor feed_tensor(DT_BOOL, {});
+  feed_tensor.flat<bool>()(0) = false;
+  item.feed.push_back({"Const_1", feed_tensor});
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
+  ASSERT_EQ(tensors_expected.size(), 1);
+
+  LoopOptimizer optimizer(RewriterConfig::AGGRESSIVE, nullptr);
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_CHECK_OK(status);
+  auto tensors_got = EvaluateNodes(output, item.fetch);
+  ASSERT_EQ(tensors_got.size(), 1);
+  test::ExpectTensorEqual<string>(tensors_got[0], tensors_expected[0]);
+
+  EXPECT_EQ(output.node_size(), 8);
+
+  // No rewrite because branch has a constant feed node.
+  bool found = false;
+  for (const NodeDef& node : output.node()) {
+    if (node.name() == "cond/Merge") {
+      EXPECT_EQ(node.op(), "Merge");
+      ASSERT_EQ(node.input_size(), 2);
+      EXPECT_EQ(node.input(0), "cond/Switch_1");
+      EXPECT_EQ(node.input(1), "cond/Const");
+      found = true;
+      break;
+    }
+  }
+  EXPECT_TRUE(found);
 }
 
 }  // namespace grappler

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 33942a5..36d68a7 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc

@@ -26,8 +26,8 @@
 #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
 #include "tensorflow/core/grappler/optimizers/debug_stripper.h"
 #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h"
-#include "tensorflow/core/grappler/optimizers/experimental_implementation_selector.h"
 #include "tensorflow/core/grappler/optimizers/function_optimizer.h"
+#include "tensorflow/core/grappler/optimizers/implementation_selector.h"
 #include "tensorflow/core/grappler/optimizers/layout_optimizer.h"
 #include "tensorflow/core/grappler/optimizers/loop_optimizer.h"
 #include "tensorflow/core/grappler/optimizers/memory_optimizer.h"
@@ -148,6 +148,9 @@
   if (!cfg_.disable_model_pruning()) {
     optimizers->push_back(MakeUnique<ModelPruner>());
   }
+  if (cfg_.implementation_selector() != RewriterConfig::OFF) {
+    optimizers->push_back(MakeUnique<ImplementationSelector>());
+  }
   if (cfg_.function_optimization() != RewriterConfig::OFF) {
     optimizers->push_back(
         MakeUnique<FunctionOptimizer>(cfg_.function_optimization()));
@@ -241,18 +244,10 @@
         pre_initialized_optimizers.end()) {
       continue;
     }
-    // Initialize the ExperimentalImplementationSelector here instead of
-    // CustomizeOptimizer registry, due the static link issue in TensorRT for
-    // double registry.
-    // TODO(laigd): Remove this hack and change it back to use the registry once
-    // the duplicate static import issue is fixed.
-    std::unique_ptr<CustomGraphOptimizer> custom_optimizer;
-    if (optimizer_config.name() == "ExperimentalImplementationSelector") {
-      custom_optimizer.reset(new ExperimentalImplementationSelector());
-    } else {
-      custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull(
-          optimizer_config.name());
-    }
+
+    auto custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull(
+        optimizer_config.name());
+
     if (custom_optimizer) {
       VLOG(2) << "Registered custom configurable graph optimizer: "
               << optimizer_config.name();
@@ -701,9 +696,10 @@
 }
 
 Status OptimizeGraph(
-    std::vector<string> ret_node_names, FunctionLibraryDefinition* flib,
-    const DeviceSet& device_set, Device* cpu_device,
-    const ConfigProto& config_proto, const string& grappler_item_id,
+    std::vector<string> ret_node_names, std::vector<string> keep_node_names,
+    FunctionLibraryDefinition* flib, const DeviceSet& device_set,
+    Device* cpu_device, const ConfigProto& config_proto,
+    const string& grappler_item_id,
     const GrapplerItem::OptimizationOptions& optimization_options,
     std::unique_ptr<tensorflow::Graph>* g) {
   if (!tensorflow::grappler::MetaOptimizerEnabled(config_proto)) {
@@ -723,6 +719,9 @@
   // Add fetches so that the graph can be pruned.
   item.fetch.swap(ret_node_names);
 
+  // Add noes that can't be removed from the graph.
+  item.keep_ops = std::move(keep_node_names);
+
   (*g)->ToGraphDef(&item.graph);
 
   if (flib) {

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h
index 751a9e5..b8f0c8e 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h

@@ -129,9 +129,10 @@
 // complete copy. Therefore, the caller should not keep any references
 // to nodes *g.
 Status OptimizeGraph(
-    std::vector<string> ret_node_names, FunctionLibraryDefinition* lib,
-    const DeviceSet& device_set, Device* cpu_device,
-    const ConfigProto& config_proto, const string& grappler_item_id,
+    std::vector<string> ret_node_names, std::vector<string> keep_node_names,
+    FunctionLibraryDefinition* lib, const DeviceSet& device_set,
+    Device* cpu_device, const ConfigProto& config_proto,
+    const string& grappler_item_id,
     const GrapplerItem::OptimizationOptions& optimization_options,
     std::unique_ptr<tensorflow::Graph>* g);
 

diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 375c3e5..7d4dfb0 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc

@@ -40,7 +40,7 @@
 namespace grappler {
 namespace {
 template <typename T>
-bool SafeSetScalarTensorValue(double value, Tensor* tensor) {
+bool SafeSetDoubleScalarTensorValue(double value, Tensor* tensor) {
   using RealType = typename Eigen::NumTraits<T>::Real;
   if (value > static_cast<double>(Eigen::NumTraits<RealType>::highest()) ||
       value < static_cast<double>(Eigen::NumTraits<RealType>::lowest())) {
@@ -50,6 +50,17 @@
   return true;
 }
 
+template <typename T>
+bool SafeSetIntScalarTensorValue(int value, Tensor* tensor) {
+  using RealType = typename Eigen::NumTraits<T>::Real;
+  if (value > static_cast<int>(Eigen::NumTraits<RealType>::highest()) ||
+      value < static_cast<int>(Eigen::NumTraits<RealType>::lowest())) {
+    return false;
+  }
+  tensor->flat<T>()(0) = static_cast<T>(value);
+  return true;
+}
+
 // Is 'node' an operator that consumes only the shape of its input, not the
 // data itself?
 // TODO(ezhulenev): move to op_types.h. Requires to break circular dependency.
@@ -410,35 +421,50 @@
   EraseNodesFromGraphImpl(nodes_idx_to_delete, graph);
 }
 
-#define HANDLE_CASE(DTYPE)                                          \
-  case DTYPE:                                                       \
-    if (!SafeSetScalarTensorValue<EnumToDataType<DTYPE>::Type>(     \
-            static_cast<double>(value), tensor)) {                  \
-      return errors::InvalidArgument("Cannot store value ", value,  \
-                                     " in tensor of type " #DTYPE); \
-    }                                                               \
+#define HANDLE_DOUBLE_CASE(DTYPE)                                     \
+  case DTYPE:                                                         \
+    if (!SafeSetDoubleScalarTensorValue<EnumToDataType<DTYPE>::Type>( \
+            static_cast<double>(value), tensor)) {                    \
+      return errors::InvalidArgument("Cannot store value ", value,    \
+                                     " in tensor of type " #DTYPE);   \
+    }                                                                 \
+    break
+
+#define HANDLE_INT_CASE(DTYPE)                                               \
+  case DTYPE:                                                                \
+    if (!SafeSetIntScalarTensorValue<EnumToDataType<DTYPE>::Type>(value,     \
+                                                                  tensor)) { \
+      return errors::InvalidArgument("Cannot store value ", value,           \
+                                     " in tensor of type " #DTYPE);          \
+    }                                                                        \
     break
 
 Status SetTensorValue(DataType dtype, int value, Tensor* tensor) {
   // TODO(rmlarsen): Support more general shapes.
+  // TODO(lyandy): Change `value` to be int64 once int64 -> qint32 is supported.
   if (tensor->NumElements() != 1) {
     return errors::InvalidArgument(
         "Expected scalar tensor, got num_elements = ", tensor->NumElements());
   }
   switch (dtype) {
-    HANDLE_CASE(DT_HALF);
-    HANDLE_CASE(DT_BFLOAT16);
-    HANDLE_CASE(DT_BOOL);
-    HANDLE_CASE(DT_FLOAT);
-    HANDLE_CASE(DT_DOUBLE);
-    HANDLE_CASE(DT_UINT8);
-    HANDLE_CASE(DT_INT8);
-    HANDLE_CASE(DT_UINT16);
-    HANDLE_CASE(DT_INT16);
-    HANDLE_CASE(DT_INT32);
-    HANDLE_CASE(DT_INT64);
-    HANDLE_CASE(DT_COMPLEX64);
-    HANDLE_CASE(DT_COMPLEX128);
+    HANDLE_DOUBLE_CASE(DT_HALF);
+    HANDLE_DOUBLE_CASE(DT_BFLOAT16);
+    HANDLE_DOUBLE_CASE(DT_BOOL);
+    HANDLE_DOUBLE_CASE(DT_FLOAT);
+    HANDLE_DOUBLE_CASE(DT_DOUBLE);
+    HANDLE_DOUBLE_CASE(DT_UINT8);
+    HANDLE_DOUBLE_CASE(DT_INT8);
+    HANDLE_DOUBLE_CASE(DT_UINT16);
+    HANDLE_DOUBLE_CASE(DT_INT16);
+    HANDLE_DOUBLE_CASE(DT_INT32);
+    HANDLE_DOUBLE_CASE(DT_INT64);
+    HANDLE_DOUBLE_CASE(DT_COMPLEX64);
+    HANDLE_DOUBLE_CASE(DT_COMPLEX128);
+    HANDLE_INT_CASE(DT_QINT8);
+    HANDLE_INT_CASE(DT_QUINT8);
+    HANDLE_INT_CASE(DT_QINT16);
+    HANDLE_INT_CASE(DT_QUINT16);
+    HANDLE_INT_CASE(DT_QINT32);
     default:
       return errors::InvalidArgument("Unsupported type ",
                                      DataTypeString(dtype));

diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc
index 35979d4..2ec9794 100644
--- a/tensorflow/core/grappler/utils/functions.cc
+++ b/tensorflow/core/grappler/utils/functions.cc

@@ -311,15 +311,14 @@
     string func_name, string description, AttrSlice func_attr,
     std::vector<InputArgExpansion> input_arg_expansions,
     std::vector<OutputArgExpansion> output_arg_expansions,
-    std::vector<string> keep_nodes, const int graph_def_version,
+    std::vector<ControlOutput> control_outputs, const int graph_def_version,
     const bool is_stateful, GraphDef&& function_body)
     : description_(std::move(description)),
       func_attr_(func_attr),
       input_arg_expansions_(std::move(input_arg_expansions)),
       output_arg_expansions_(std::move(output_arg_expansions)),
+      control_outputs_(std::move(control_outputs)),
       is_stateful_(is_stateful) {
-  // Move assign GrapplerItem members.
-  keep_ops = std::move(keep_nodes);
   id = std::move(func_name);
   graph = std::move(function_body);
 
@@ -336,6 +335,10 @@
       fetch.push_back(output_node);
     }
   }
+  // We must keep all control output nodes.
+  for (const ControlOutput& control_output : control_outputs_) {
+    keep_ops.push_back(control_output.node_name);
+  }
 
   // Tensorflow functions execution semantics is different from the main graph,
   // and we need to preserve it when we do graph optimizations.
@@ -368,6 +371,15 @@
   return output_arg_expansions_.size();
 }
 
+const std::vector<ControlOutput>& GrapplerFunctionItem::control_outputs()
+    const {
+  return control_outputs_;
+}
+
+const std::size_t GrapplerFunctionItem::control_output_size() const {
+  return control_outputs_.size();
+}
+
 const AttrSlice& GrapplerFunctionItem::func_attr() const { return func_attr_; }
 
 const GraphDef& GrapplerFunctionItem::function_body() const { return graph; }
@@ -624,15 +636,20 @@
     outputs.push_back(std::move(output));
   }
 
-  std::vector<string> keep_ops;
-  bool is_stateful = signature.is_stateful();
+  // Control outputs ensure that all side-effectful nodes in the function body
+  // will execute, even if they are not required to compute regular output args.
+  std::vector<ControlOutput> control_outputs;
+  control_outputs.reserve(func.control_ret_size());
+  for (const auto& control_ret : func.control_ret()) {
+    control_outputs.push_back({control_ret.first, control_ret.second});
+  }
 
   *item = GrapplerFunctionItem(
       /*func_name=*/signature.name(),
       /*description=*/signature.description(),
       /*func_attr=*/AttrSlice(&func.attr()), std::move(inputs),
-      std::move(outputs), std::move(keep_ops), graph_def_version, is_stateful,
-      std::move(function_body));
+      std::move(outputs), std::move(control_outputs), graph_def_version,
+      signature.is_stateful(), std::move(function_body));
   return Status::OK();
 }
 
@@ -844,6 +861,13 @@
         &(*func->mutable_ret())[output_arg.output_name]));
   }
 
+  // Add function control outputs.
+  for (const ControlOutput& control_out : item.control_outputs()) {
+    func->mutable_control_ret()->insert(
+        {control_out.output_name, control_out.node_name});
+    *func->mutable_signature()->add_control_output() = control_out.output_name;
+  }
+
   // Copy function definition specific attributes.
   for (const auto& attr : item.func_attr()) {
     const auto& attr_name = attr.first;

diff --git a/tensorflow/core/grappler/utils/functions.h b/tensorflow/core/grappler/utils/functions.h
index d5a41e7..d450f6a 100644
--- a/tensorflow/core/grappler/utils/functions.h
+++ b/tensorflow/core/grappler/utils/functions.h

@@ -74,6 +74,12 @@
   absl::InlinedVector<string, 1> output_nodes;
 };
 
+// A mapping from control output name to node name in function body graph.
+struct ControlOutput {
+  string output_name;
+  string node_name;
+};
+
 // FunctionDef uses different connectivity encoding for the function body nodes,
 // then a GraphDef (see function.proto for details). Input name in FunctionDef
 // can potentially represent a sequence of tensors (instead just one tensor in
@@ -161,6 +167,9 @@
   const OutputArgExpansion& output(int i) const;
   const std::size_t output_size() const;
 
+  const std::vector<ControlOutput>& control_outputs() const;
+  const std::size_t control_output_size() const;
+
   const AttrSlice& func_attr() const;
   const GraphDef& function_body() const;
   GraphDef& mutable_function_body();
@@ -183,8 +192,9 @@
                        AttrSlice func_attr,
                        std::vector<InputArgExpansion> input_arg_expansions,
                        std::vector<OutputArgExpansion> output_arg_expansions,
-                       std::vector<string> keep_nodes, int graph_def_version,
-                       bool is_stateful, GraphDef&& function_body);
+                       std::vector<ControlOutput> control_outputs,
+                       int graph_def_version, bool is_stateful,
+                       GraphDef&& function_body);
 
   string description_;
   AttrSlice func_attr_;  // Attributes specific to function definition that
@@ -192,6 +202,7 @@
 
   std::vector<InputArgExpansion> input_arg_expansions_;
   std::vector<OutputArgExpansion> output_arg_expansions_;
+  std::vector<ControlOutput> control_outputs_;
 
   bool is_stateful_ = false;
 };
@@ -241,7 +252,7 @@
                              GrapplerFunctionItem* item,
                              std::vector<std::pair<int, int>>* output_mapping);
 
-// TODO(ezhulennev, b/120103818): Add RemoveFunctionInputs.
+// TODO(ezhulenev, b/120103818): Add RemoveFunctionInputs.
 
 // Make a GrapplerFunctionItem from the function definition and function
 // instantiation attributes (caller node attributes). Returns error if the given

diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc
index 30b6195..813e6a3 100644
--- a/tensorflow/core/grappler/utils/functions_test.cc
+++ b/tensorflow/core/grappler/utils/functions_test.cc

@@ -646,6 +646,38 @@
   EXPECT_FALSE(opts.allow_pruning_stateful_and_dataset_ops);
 }
 
+TEST_F(FunctionsTest, FromFunctionDefWithControlOutputs) {
+  const Tensor kOne = test::AsScalar<float>(1.0);
+  FunctionDef func = FunctionDefHelper::Create(
+      "WithControlOutputs", /*in_def=*/{"x: Ref(float)"}, /*out_def=*/{}, {},
+      {
+          {{"one"}, "Const", {}, {{"value", kOne}, {"dtype", DT_FLOAT}}},
+          {{"update"}, "AssignAdd", {"x", "one:output:0"}, {{"T", DT_FLOAT}}},
+      },
+      {}, {{"side_effects", "update"}});
+
+  protobuf::Map<string, AttrValue> func_instantiation_attr;
+  FunctionLibraryDefinition flib(OpRegistry::Global(), FunctionDefLibrary());
+
+  GrapplerFunctionItem item;
+  TF_EXPECT_OK(MakeGrapplerFunctionItem(func,
+                                        AttrSlice(&func_instantiation_attr),
+                                        flib, TF_GRAPH_DEF_VERSION, &item));
+
+  EXPECT_EQ("WithControlOutputs", item.id);
+  EXPECT_EQ(3, item.function_body().node_size());
+  EXPECT_EQ(1, item.input_size());
+  EXPECT_EQ(0, item.output_size());
+
+  ASSERT_EQ(1, item.keep_ops.size());
+  EXPECT_EQ("update", item.keep_ops[0]);
+
+  ASSERT_EQ(1, item.control_output_size());
+  const ControlOutput &ctrl = item.control_outputs()[0];
+  EXPECT_EQ("side_effects", ctrl.output_name);
+  EXPECT_EQ("update", ctrl.node_name);
+}
+
 TEST_F(FunctionsTest, MakeFunctionDef) {
   const Tensor kTwo = test::AsScalar<int64>(2);
   FunctionDef func = FunctionDefHelper::Define(
@@ -827,17 +859,14 @@
 }
 
 TEST_F(FunctionsTest, FunctionDefGrapplerFunctionItemRoundTrip) {
-  FunctionDef func = FunctionDefHelper::Define(
-      // Name
-      "DoNothing",
-      // Args
-      {"i: int32"},
-      // Return values
-      {"o: int32"},
-      // Attr def
-      {},
-      // Nodes
-      {{{"o"}, "Identity", {"i"}, {{"T", DT_INT32}}}});
+  FunctionDef func = FunctionDefHelper::Create(
+      "DoNothing", /*in_def=*/{"i: int32"}, /*out_def*/ {"o: int32"},
+      /*attr_def*/ {},
+      {
+          {{"id"}, "Identity", {"i"}, {{"T", DT_INT32}}},
+      },
+      /*ret_def=*/{{"o", "id:output:0"}},
+      /*control_ret_def=*/{{"must_execute", "id"}});
 
   constexpr char description[] = "This is a helpful description.";
   func.mutable_signature()->set_description(description);

diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc
index 1b4b9f9..3a0eec6 100644
--- a/tensorflow/core/grappler/utils/grappler_test.cc
+++ b/tensorflow/core/grappler/utils/grappler_test.cc

@@ -81,6 +81,7 @@
   cfg->set_debug_stripper(RewriterConfig::OFF);
   cfg->set_dependency_optimization(RewriterConfig::OFF);
   cfg->set_function_optimization(RewriterConfig::OFF);
+  cfg->set_implementation_selector(RewriterConfig::OFF);
   cfg->set_layout_optimizer(RewriterConfig::OFF);
   cfg->set_loop_optimization(RewriterConfig::OFF);
   cfg->set_pin_to_host_optimization(RewriterConfig::OFF);

diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc
index f5ae398..e30b1c5 100644
--- a/tensorflow/core/grappler/utils_test.cc
+++ b/tensorflow/core/grappler/utils_test.cc

@@ -18,6 +18,8 @@
 #include <unistd.h>
 #include <limits>
 #include <memory>
+
+#include "absl/strings/substitute.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
@@ -124,56 +126,56 @@
 };
 
 TEST_F(UtilsTest, NodeName) {
-  EXPECT_EQ("abc", NodeName("abc"));
-  EXPECT_EQ("abc", NodeName("^abc"));
-  EXPECT_EQ("abc", NodeName("abc:0"));
-  EXPECT_EQ("abc", NodeName("^abc:0"));
+  EXPECT_EQ(NodeName("abc"), "abc");
+  EXPECT_EQ(NodeName("^abc"), "abc");
+  EXPECT_EQ(NodeName("abc:0"), "abc");
+  EXPECT_EQ(NodeName("^abc:0"), "abc");
 
-  EXPECT_EQ("abc/def", NodeName("abc/def"));
-  EXPECT_EQ("abc/def", NodeName("^abc/def"));
-  EXPECT_EQ("abc/def", NodeName("abc/def:1"));
-  EXPECT_EQ("abc/def", NodeName("^abc/def:1"));
+  EXPECT_EQ(NodeName("abc/def"), "abc/def");
+  EXPECT_EQ(NodeName("^abc/def"), "abc/def");
+  EXPECT_EQ(NodeName("abc/def:1"), "abc/def");
+  EXPECT_EQ(NodeName("^abc/def:1"), "abc/def");
 
-  EXPECT_EQ("abc/def0", NodeName("abc/def0"));
-  EXPECT_EQ("abc/def0", NodeName("^abc/def0"));
-  EXPECT_EQ("abc/def0", NodeName("abc/def0:0"));
-  EXPECT_EQ("abc/def0", NodeName("^abc/def0:0"));
+  EXPECT_EQ(NodeName("abc/def0"), "abc/def0");
+  EXPECT_EQ(NodeName("^abc/def0"), "abc/def0");
+  EXPECT_EQ(NodeName("abc/def0:0"), "abc/def0");
+  EXPECT_EQ(NodeName("^abc/def0:0"), "abc/def0");
 
-  EXPECT_EQ("abc/def_0", NodeName("abc/def_0"));
-  EXPECT_EQ("abc/def_0", NodeName("^abc/def_0"));
-  EXPECT_EQ("abc/def_0", NodeName("abc/def_0:3"));
-  EXPECT_EQ("abc/def_0", NodeName("^abc/def_0:3"));
+  EXPECT_EQ(NodeName("abc/def_0"), "abc/def_0");
+  EXPECT_EQ(NodeName("^abc/def_0"), "abc/def_0");
+  EXPECT_EQ(NodeName("abc/def_0:3"), "abc/def_0");
+  EXPECT_EQ(NodeName("^abc/def_0:3"), "abc/def_0");
 
-  EXPECT_EQ("abc/def_0", NodeName("^abc/def_0:3214"));
+  EXPECT_EQ(NodeName("^abc/def_0:3214"), "abc/def_0");
 }
 
 TEST_F(UtilsTest, NodePosition) {
-  EXPECT_EQ(2, NodePosition("abc:2"));
-  EXPECT_EQ(123, NodePosition("abc:123"));
-  EXPECT_EQ(-1, NodePosition("^abc:123"));
-  EXPECT_EQ(-1, NodePosition("^abc"));
-  EXPECT_EQ(0, NodePosition(""));
+  EXPECT_EQ(NodePosition("abc:2"), 2);
+  EXPECT_EQ(NodePosition("abc:123"), 123);
+  EXPECT_EQ(NodePosition("^abc:123"), -1);
+  EXPECT_EQ(NodePosition("^abc"), -1);
+  EXPECT_EQ(NodePosition(""), 0);
 }
 
 TEST_F(UtilsTest, NodePositionIfSameNode) {
-  EXPECT_EQ(-2, NodePositionIfSameNode(":123", ""));
-  EXPECT_EQ(-2, NodePositionIfSameNode(":", ""));
-  EXPECT_EQ(-2, NodePositionIfSameNode("", ""));
-  EXPECT_EQ(123, NodePositionIfSameNode("abc:123", "abc"));
-  EXPECT_EQ(-1, NodePositionIfSameNode("^abc", "abc"));
-  EXPECT_EQ(-1, NodePositionIfSameNode("^abc:123", "abc"));
-  EXPECT_EQ(-2, NodePositionIfSameNode("abc", "xyz"));
-  EXPECT_EQ(-2, NodePositionIfSameNode("abc", "abc/xyz"));
-  EXPECT_EQ(-2, NodePositionIfSameNode("abc/xyz", "abc"));
-  EXPECT_EQ(-2, NodePositionIfSameNode("abc:123", "xyz"));
-  EXPECT_EQ(-2, NodePositionIfSameNode("^abc", "xyz"));
-  EXPECT_EQ(-2, NodePositionIfSameNode("^abc:123", "xyz"));
+  EXPECT_EQ(NodePositionIfSameNode(":123", ""), -2);
+  EXPECT_EQ(NodePositionIfSameNode(":", ""), -2);
+  EXPECT_EQ(NodePositionIfSameNode("", ""), -2);
+  EXPECT_EQ(NodePositionIfSameNode("abc:123", "abc"), 123);
+  EXPECT_EQ(NodePositionIfSameNode("^abc", "abc"), -1);
+  EXPECT_EQ(NodePositionIfSameNode("^abc:123", "abc"), -1);
+  EXPECT_EQ(NodePositionIfSameNode("abc", "xyz"), -2);
+  EXPECT_EQ(NodePositionIfSameNode("abc", "abc/xyz"), -2);
+  EXPECT_EQ(NodePositionIfSameNode("abc/xyz", "abc"), -2);
+  EXPECT_EQ(NodePositionIfSameNode("abc:123", "xyz"), -2);
+  EXPECT_EQ(NodePositionIfSameNode("^abc", "xyz"), -2);
+  EXPECT_EQ(NodePositionIfSameNode("^abc:123", "xyz"), -2);
 }
 
 TEST_F(UtilsTest, AddNodeNamePrefix) {
-  EXPECT_EQ("OPTIMIZED/abc", AddPrefixToNodeName("abc", "OPTIMIZED"));
-  EXPECT_EQ("^OPTIMIZED/abc", AddPrefixToNodeName("^abc", "OPTIMIZED"));
-  EXPECT_EQ("OPTIMIZED/", AddPrefixToNodeName("", "OPTIMIZED"));
+  EXPECT_EQ(AddPrefixToNodeName("abc", "OPTIMIZED"), "OPTIMIZED/abc");
+  EXPECT_EQ(AddPrefixToNodeName("^abc", "OPTIMIZED"), "^OPTIMIZED/abc");
+  EXPECT_EQ(AddPrefixToNodeName("", "OPTIMIZED"), "OPTIMIZED/");
 }
 
 TEST_F(UtilsTest, ExecuteWithTimeout) {
@@ -204,17 +206,17 @@
 
 TEST_F(UtilsTest, NumOutputs) {
   GraphDef graph;
-  EXPECT_EQ(2, NumOutputs(CreateConcatOffsetNode(), &graph));
-  EXPECT_EQ(5, NumOutputs(CreateFusedBatchNormNode(), &graph));
-  EXPECT_EQ(1, NumOutputs(CreateDequeueNode(), &graph));
+  EXPECT_EQ(NumOutputs(CreateConcatOffsetNode(), &graph), 2);
+  EXPECT_EQ(NumOutputs(CreateFusedBatchNormNode(), &graph), 5);
+  EXPECT_EQ(NumOutputs(CreateDequeueNode(), &graph), 1);
 }
 
 TEST_F(UtilsTest, AsControlDependency) {
   NodeDef node;
   node.set_name("foo");
-  EXPECT_EQ("^foo", AsControlDependency(node));
-  EXPECT_EQ("^foo", AsControlDependency(node.name()));
-  EXPECT_EQ("^foo", AsControlDependency("^foo"));
+  EXPECT_EQ(AsControlDependency(node), "^foo");
+  EXPECT_EQ(AsControlDependency(node.name()), "^foo");
+  EXPECT_EQ(AsControlDependency("^foo"), "^foo");
 }
 
 TEST_F(UtilsTest, GetTailOfChain) {
@@ -233,22 +235,23 @@
   GraphDef graph;
   TF_CHECK_OK(s.ToGraphDef(&graph));
 
-  ASSERT_EQ("c0", graph.node(0).name());
-  ASSERT_EQ("c1", graph.node(1).name());
-  ASSERT_EQ("neg0", graph.node(2).name());
-  ASSERT_EQ("neg1", graph.node(3).name());
-  ASSERT_EQ("neg2", graph.node(4).name());
-  ASSERT_EQ("id1", graph.node(5).name());
-  ASSERT_EQ("id2", graph.node(6).name());
-  ASSERT_EQ("noop", graph.node(7).name());
+  ASSERT_EQ(graph.node_size(), 8);
+  ASSERT_EQ(graph.node(0).name(), "c0");
+  ASSERT_EQ(graph.node(1).name(), "c1");
+  ASSERT_EQ(graph.node(2).name(), "neg0");
+  ASSERT_EQ(graph.node(3).name(), "neg1");
+  ASSERT_EQ(graph.node(4).name(), "neg2");
+  ASSERT_EQ(graph.node(5).name(), "id1");
+  ASSERT_EQ(graph.node(6).name(), "id2");
+  ASSERT_EQ(graph.node(7).name(), "noop");
 
   NodeMap node_map(&graph);
   auto is_neg = [&](const NodeDef& node) { return node.op() == "Neg"; };
   // We walk backwards, starting as "id1", so tail should be "neg1".
   NodeDef* tail = GetTailOfChain(graph.node(5), node_map,
                                  /*follow_control_input=*/false, is_neg);
-  EXPECT_NE(tail, nullptr);
-  EXPECT_EQ("neg1", tail->name());
+  ASSERT_NE(tail, nullptr);
+  EXPECT_EQ(tail->name(), "neg1");
 
   // We stop at branching nodes, so tail should be "neg2".
   auto is_neg_and_non_branching = [&](const NodeDef& node) {
@@ -257,22 +260,22 @@
   tail =
       GetTailOfChain(graph.node(5), node_map,
                      /*follow_control_input=*/false, is_neg_and_non_branching);
-  EXPECT_NE(tail, nullptr);
-  EXPECT_EQ("neg2", tail->name());
+  ASSERT_NE(tail, nullptr);
+  EXPECT_EQ(tail->name(), "neg2");
 
   // We walk backwards, starting from "noop", also following control inputs,
   // so tail should be "neg0".
   tail = GetTailOfChain(graph.node(7), node_map,
                         /*follow_control_input=*/true, is_neg);
-  EXPECT_NE(tail, nullptr);
-  EXPECT_EQ("neg0", tail->name());
+  ASSERT_NE(tail, nullptr);
+  EXPECT_EQ(tail->name(), "neg0");
 
   // We walk backwards, starting from "noop", not following control inputs,
   // so tail should be "noop" itself.
   tail = GetTailOfChain(graph.node(7), node_map,
                         /*follow_control_input=*/false, is_neg);
-  EXPECT_NE(tail, nullptr);
-  EXPECT_EQ("noop", tail->name());
+  ASSERT_NE(tail, nullptr);
+  EXPECT_EQ(tail->name(), "noop");
 }
 
 TEST_F(UtilsTest, DedupControlInputs) {
@@ -280,40 +283,40 @@
   foo.set_name("foo");
   foo.add_input("bar");
   DedupControlInputs(&foo);
-  EXPECT_EQ(1, foo.input_size());
-  EXPECT_EQ("bar", foo.input(0));
+  ASSERT_EQ(foo.input_size(), 1);
+  EXPECT_EQ(foo.input(0), "bar");
 
   foo.set_input(0, "^bar");
   DedupControlInputs(&foo);
-  EXPECT_EQ(1, foo.input_size());
-  EXPECT_EQ("^bar", foo.input(0));
+  ASSERT_EQ(foo.input_size(), 1);
+  EXPECT_EQ(foo.input(0), "^bar");
 
   foo.set_input(0, "bar");
   foo.add_input("bar");
   DedupControlInputs(&foo);
-  EXPECT_EQ(2, foo.input_size());
-  EXPECT_EQ("bar", foo.input(0));
-  EXPECT_EQ("bar", foo.input(1));
+  ASSERT_EQ(foo.input_size(), 2);
+  EXPECT_EQ(foo.input(0), "bar");
+  EXPECT_EQ(foo.input(1), "bar");
 
   foo.set_input(1, "^bar");
   DedupControlInputs(&foo);
-  EXPECT_EQ(1, foo.input_size());
-  EXPECT_EQ("bar", foo.input(0));
+  ASSERT_EQ(foo.input_size(), 1);
+  EXPECT_EQ(foo.input(0), "bar");
 
   foo.set_input(0, "^bar");
   foo.add_input("^bar");
   DedupControlInputs(&foo);
-  EXPECT_EQ(1, foo.input_size());
-  EXPECT_EQ("^bar", foo.input(0));
+  ASSERT_EQ(foo.input_size(), 1);
+  EXPECT_EQ(foo.input(0), "^bar");
 
   foo.set_input(0, "bar");
   foo.add_input("gnu");
   foo.add_input("^bar");
   foo.add_input("^gnu");
   DedupControlInputs(&foo);
-  EXPECT_EQ(2, foo.input_size());
-  EXPECT_EQ("bar", foo.input(0));
-  EXPECT_EQ("gnu", foo.input(1));
+  ASSERT_EQ(foo.input_size(), 2);
+  EXPECT_EQ(foo.input(0), "bar");
+  EXPECT_EQ(foo.input(1), "gnu");
 }
 
 TEST_F(UtilsTest, NumNonControlOutputs) {
@@ -347,14 +350,14 @@
   NodeMap node_map(&graph);
 
   const NodeDef* add_node = node_map.GetNode("add");
-  ASSERT_TRUE(add_node != nullptr);
+  ASSERT_NE(add_node, nullptr);
 
   // [a, b] are only non-control inputs
-  EXPECT_EQ(2, NumNonControlInputs(*add_node));
+  EXPECT_EQ(NumNonControlInputs(*add_node), 2);
   // [sqrt, shape] are non control outputs
-  EXPECT_EQ(2, NumNonControlOutputs(*add_node, node_map));
+  EXPECT_EQ(NumNonControlOutputs(*add_node, node_map), 2);
   // sqrt is the only data output
-  EXPECT_EQ(1, NumNonControlDataOutputs(*add_node, node_map));
+  EXPECT_EQ(NumNonControlDataOutputs(*add_node, node_map), 1);
 }
 
 TEST(CheckAttrExists, All) {
@@ -465,10 +468,104 @@
 }
 
 TEST_F(UtilsTest, TensorIdToString) {
-  EXPECT_EQ("^foo", TensorIdToString({"foo", -1}));
-  EXPECT_EQ("foo", TensorIdToString({"foo", 0}));
-  EXPECT_EQ("foo:1", TensorIdToString({"foo", 1}));
-  EXPECT_EQ("foo:2", TensorIdToString({"foo", 2}));
+  EXPECT_EQ(TensorIdToString({"foo", -1}), "^foo");
+  EXPECT_EQ(TensorIdToString({"foo", 0}), "foo");
+  EXPECT_EQ(TensorIdToString({"foo", 1}), "foo:1");
+  EXPECT_EQ(TensorIdToString({"foo", 2}), "foo:2");
+}
+
+template <typename T>
+void TestSetTensorValue(DataType type, int val, bool success,
+                        absl::string_view error_msg) {
+  Tensor t(type, TensorShape({}));
+  Status s = SetTensorValue(t.dtype(), val, &t);
+  EXPECT_EQ(s.ok(), success);
+  if (s.ok()) {
+    test::ExpectTensorEqual<T>(Tensor(static_cast<T>(val)), t);
+  } else {
+    EXPECT_EQ(s.error_message(), error_msg);
+  }
+}
+
+TEST(SetTensorValueTest, Quantized) {
+  auto int_min_error = [](DataType type) {
+    return absl::Substitute(
+        "Cannot store value -2147483648 in tensor of type $0",
+        DataType_Name(type));
+  };
+  auto int_max_error = [](DataType type) {
+    return absl::Substitute(
+        "Cannot store value 2147483647 in tensor of type $0",
+        DataType_Name(type));
+  };
+  const int kMinInt = std::numeric_limits<int>::min();
+  const int kMaxInt = std::numeric_limits<int>::max();
+
+  TestSetTensorValue<qint8>(DT_QINT8, -8, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint8>(DT_QINT8, 0, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint8>(DT_QINT8, 8, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint8>(DT_QINT8, std::numeric_limits<qint8>::min(),
+                            /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint8>(DT_QINT8, std::numeric_limits<qint8>::max(),
+                            /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint8>(DT_QINT8, kMinInt, /*success=*/false,
+                            int_min_error(DT_QINT8));
+  TestSetTensorValue<qint8>(DT_QINT8, kMaxInt, /*success=*/false,
+                            int_max_error(DT_QINT8));
+
+  TestSetTensorValue<quint8>(
+      DT_QUINT8, -8, /*success=*/false,
+      /*error_msg=*/"Cannot store value -8 in tensor of type DT_QUINT8");
+  TestSetTensorValue<quint8>(DT_QUINT8, 0, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<quint8>(DT_QUINT8, 8, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<quint8>(DT_QUINT8, std::numeric_limits<quint8>::min(),
+                             /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<quint8>(DT_QUINT8, std::numeric_limits<quint8>::max(),
+                             /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<quint8>(DT_QUINT8, kMinInt, /*success=*/false,
+                             int_min_error(DT_QUINT8));
+  TestSetTensorValue<quint8>(DT_QUINT8, kMaxInt, /*success=*/false,
+                             int_max_error(DT_QUINT8));
+
+  TestSetTensorValue<qint16>(DT_QINT16, -8, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint16>(DT_QINT16, 0, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint16>(DT_QINT16, 8, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint16>(DT_QINT16, std::numeric_limits<qint16>::min(),
+                             /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint16>(DT_QINT16, std::numeric_limits<qint16>::max(),
+                             /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint16>(DT_QINT16, kMinInt, /*success=*/false,
+                             int_min_error(DT_QINT16));
+  TestSetTensorValue<qint16>(DT_QINT16, kMaxInt, /*success=*/false,
+                             int_max_error(DT_QINT16));
+
+  TestSetTensorValue<quint16>(
+      DT_QUINT16, -8, /*success=*/false,
+      /*error_msg=*/"Cannot store value -8 in tensor of type DT_QUINT16");
+  TestSetTensorValue<quint16>(DT_QUINT16, 0, /*success=*/true,
+                              /*error_msg=*/"");
+  TestSetTensorValue<quint16>(DT_QUINT16, 8, /*success=*/true,
+                              /*error_msg=*/"");
+  TestSetTensorValue<quint16>(DT_QUINT16, std::numeric_limits<quint16>::min(),
+                              /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<quint16>(DT_QUINT16, std::numeric_limits<quint16>::max(),
+                              /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<quint16>(DT_QUINT16, kMinInt, /*success=*/false,
+                              int_min_error(DT_QUINT16));
+  TestSetTensorValue<quint16>(DT_QUINT16, kMaxInt, /*success=*/false,
+                              int_max_error(DT_QUINT16));
+
+  TestSetTensorValue<qint32>(DT_QINT32, -8, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint32>(DT_QINT32, 0, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint32>(DT_QINT32, 8, /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint32>(DT_QINT32, std::numeric_limits<qint32>::min(),
+                             /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint32>(DT_QINT32, std::numeric_limits<qint32>::max(),
+                             /*success=*/true, /*error_msg=*/"");
+  TestSetTensorValue<qint32>(DT_QINT32, kMinInt, /*success=*/true,
+                             /*error_msg=*/"");
+  TestSetTensorValue<qint32>(DT_QINT32, kMaxInt, /*success=*/true,
+                             /*error_msg=*/"");
 }
 
 }  // namespace

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index ae3bb94..1ac147c 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD

@@ -160,7 +160,6 @@
     name = "clustering_ops",
     prefix = "clustering_ops",
     deps = [
-        "//tensorflow/core:clustering_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib",
@@ -191,7 +190,6 @@
     ]),
     prefix = "collective_ops",
     deps = [
-        "//tensorflow/core:collective_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -276,7 +274,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:nn_ops_op_lib",
         "//third_party/eigen3",
     ],
     alwayslink = 1,
@@ -367,7 +364,6 @@
         "//tensorflow/core/nccl:nccl_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:gpu_headers_lib",
-        "//tensorflow/core:nccl_ops_op_lib",
     ]),
 )
 
@@ -569,7 +565,6 @@
         ":concat_lib_hdrs",
         ":ops_util_hdrs",
         ":split_lib_hdrs",
-        "//tensorflow/core:batch_ops_op_lib",
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels/batching_util:periodic_function_dynamic",
@@ -790,7 +785,6 @@
     ":ops_util",
     ":transpose_functor",
     "//tensorflow/core:array_grad",
-    "//tensorflow/core:array_ops_op_lib",
     "//tensorflow/core:core_cpu",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
@@ -819,7 +813,6 @@
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib",
-        "//tensorflow/core:set_ops_op_lib",
         "//third_party/eigen3",
     ],
 )
@@ -838,6 +831,7 @@
     deps = [
         ":batch_space_ops",
         ":bcast_ops",
+        ":bitcast_op",
         ":broadcast_to_op",
         ":concat_op",
         ":constant_op",
@@ -888,6 +882,12 @@
 )
 
 tf_kernel_library(
+    name = "bitcast_op",
+    prefix = "bitcast_op",
+    deps = ARRAY_DEPS,
+)
+
+tf_kernel_library(
     name = "broadcast_to_op",
     prefix = "broadcast_to_op",
     deps = ARRAY_DEPS,
@@ -1185,7 +1185,6 @@
     srcs = ["ragged_gather_op.cc"],
     deps = [
         "//tensorflow/core:framework",
-        "//tensorflow/core:ragged_array_ops_op_lib",
     ],
 )
 
@@ -1197,7 +1196,6 @@
         ":ops_testutil",
         ":ragged_gather_op",
         "//tensorflow/core:framework",
-        "//tensorflow/core:ragged_array_ops_op_lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
@@ -1209,7 +1207,6 @@
     srcs = ["ragged_range_op.cc"],
     deps = [
         "//tensorflow/core:framework",
-        "//tensorflow/core:ragged_math_ops_op_lib",
     ],
 )
 
@@ -1220,7 +1217,6 @@
         ":ops_testutil",
         ":ragged_range_op",
         "//tensorflow/core:framework",
-        "//tensorflow/core:ragged_math_ops_op_lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
@@ -1232,7 +1228,6 @@
     srcs = ["ragged_tensor_to_sparse_kernel.cc"],
     deps = [
         "//tensorflow/core:framework",
-        "//tensorflow/core:ragged_conversion_ops_op_lib",
     ],
 )
 
@@ -1245,7 +1240,6 @@
         ":ragged_tensor_to_sparse_kernel",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:ragged_conversion_ops_op_lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
@@ -1259,7 +1253,6 @@
     deps = [
         ":bounds_check_lib",
         ":gpu_util_hdrs",
-        "//tensorflow/core:cudnn_rnn_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
@@ -1335,7 +1328,6 @@
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
@@ -1860,7 +1852,6 @@
     prefix = "candidate_sampler_ops",
     deps = [
         ":range_sampler",
-        "//tensorflow/core:candidate_sampling_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
     ],
@@ -1894,7 +1885,6 @@
     name = "control_flow_ops",
     prefix = "control_flow_ops",
     deps = [
-        "//tensorflow/core:control_flow_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
     ],
@@ -1906,7 +1896,6 @@
     deps = [
         ":bounds_check",
         ":ops_util",
-        "//tensorflow/core:ctc_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/util/ctc:ctc_beam_search_lib",
@@ -1987,7 +1976,6 @@
     ":typed_queue",
     "//third_party/eigen3",
     "//tensorflow/core:core_cpu",
-    "//tensorflow/core:data_flow_ops_op_lib",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
@@ -2052,7 +2040,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:scoped_allocator_ops_op_lib",
     ],
 )
 
@@ -2071,7 +2058,6 @@
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
@@ -2119,7 +2105,6 @@
 DYNAMIC_DEPS = [
     ":bounds_check",
     "//tensorflow/core:core_cpu",
-    "//tensorflow/core:data_flow_ops_op_lib",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
@@ -2152,7 +2137,6 @@
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
-    "//tensorflow/core:lookup_ops_op_lib",
 ]
 
 tf_kernel_library(
@@ -2191,7 +2175,6 @@
     deps = [
         ":lookup_table_init_op",
         ":lookup_table_op",
-        "//tensorflow/core:checkpoint_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//third_party/eigen3",
@@ -2202,7 +2185,6 @@
     name = "load_and_remap_matrix_op",
     srcs = ["load_and_remap_matrix_op.cc"],
     deps = [
-        "//tensorflow/core:checkpoint_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
@@ -2347,7 +2329,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:resource_variable_ops_op_lib",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -2365,7 +2346,6 @@
         ":fill_functor",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:list_ops_op_lib",
         "//third_party/eigen3",
     ],
 )
@@ -2376,7 +2356,6 @@
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:user_ops_op_lib",
     ],
 )
 
@@ -2399,7 +2378,6 @@
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
-        "//tensorflow/core:functional_ops_op_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//third_party/eigen3",
@@ -2412,7 +2390,6 @@
     deps = [
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
-        "//tensorflow/core:functional_ops_op_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
@@ -2457,7 +2434,6 @@
     "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:gif_internal",
-    "//tensorflow/core:image_ops_op_lib",
     "//tensorflow/core:jpeg_internal",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
@@ -2798,7 +2774,6 @@
 IO_DEPS = [
     ":ops_util",
     "//tensorflow/core:framework",
-    "//tensorflow/core:io_ops_op_lib",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
     "//tensorflow/core:protos_all_cc",
@@ -2842,7 +2817,6 @@
     ":bounds_check_lib",
     ":save_restore_tensor",
     "//tensorflow/core:framework",
-    "//tensorflow/core:io_ops_op_lib",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
     "//tensorflow/core:protos_all_cc",
@@ -2930,6 +2904,7 @@
         ":self_adjoint_eig_op",
         ":self_adjoint_eig_v2_op",
         ":svd_op",
+        ":tridiagonal_solve_op",
     ],
 )
 
@@ -2961,7 +2936,6 @@
     "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
-    "//tensorflow/core:linalg_ops_op_lib",
 ] + if_cuda([
     ":cuda_solvers",
     ":transpose_functor",
@@ -3049,6 +3023,12 @@
 )
 
 tf_kernel_library(
+    name = "tridiagonal_solve_op",
+    srcs = ["tridiagonal_solve_op.cc"],
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
     name = "qr_op",
     prefix = "qr_op",
     deps = LINALG_DEPS + if_cuda([
@@ -3105,7 +3085,6 @@
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
-    "//tensorflow/core:logging_ops_op_lib",
     "//tensorflow/core:protos_all_cc",
 ]
 
@@ -3177,7 +3156,6 @@
         ":bounds_check",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:manip_ops_op_lib",
         "//third_party/eigen3",
     ],
 )
@@ -3209,7 +3187,6 @@
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
     "//tensorflow/core:math_grad",
-    "//tensorflow/core:math_ops_op_lib",
     "//third_party/eigen3",
 ]
 
@@ -3320,7 +3297,7 @@
 tf_kernel_library(
     name = "cwise_op",
     prefix = "cwise_op",
-    deps = MATH_DEPS + ["//tensorflow/core:bitwise_ops_op_lib"],
+    deps = MATH_DEPS,
 )
 
 tf_kernel_library(
@@ -3339,7 +3316,6 @@
     name = "fft_ops",
     prefix = "fft_ops",
     deps = MATH_DEPS + [
-        "//tensorflow/core:spectral_ops_op_lib",
     ] + if_cuda([
         "//tensorflow/core/platform/default/build_config:cufft_plugin",
     ]),
@@ -3546,10 +3522,7 @@
         ":quantized_ops",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:client_session",
-        "//tensorflow/core:array_ops_op_lib",
         "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
@@ -3781,12 +3754,12 @@
         ":image_resizer_state",
         ":fill_functor",
         ":ops_util",
+        "@com_google_absl//absl/base:dynamic_annotations",
         "@com_google_absl//absl/strings",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:nn_ops_op_lib",
     ] + select({
         ":xsmm_convolutions": [
             "@libxsmm_archive//:xsmm_avx",
@@ -3816,7 +3789,6 @@
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:nn_ops_op_lib",
     ] + if_cuda([
         "@cub_archive//:cub",
         "@local_config_cuda//cuda:cudnn_header",
@@ -3836,7 +3808,6 @@
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:nn_ops_op_lib",
     ] + if_cuda([
         "@local_config_cuda//cuda:cudnn_header",
     ]),
@@ -3883,9 +3854,8 @@
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
     "//tensorflow/core:nn_grad",
-    "//tensorflow/core:nn_ops_op_lib",
     "//third_party/eigen3",
-] + if_mkl(["//tensorflow/core:mkl_nn_ops_op_lib"])
+]
 
 tf_kernel_library(
     name = "batch_norm_op",
@@ -4021,7 +3991,6 @@
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:nn_grad",
-        "//tensorflow/core:nn_ops_op_lib",
     ] + if_cuda(["@cub_archive//:cub"]),
 )
 
@@ -4130,7 +4099,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:nn_ops_op_lib",
         "//tensorflow/core:stream_executor",
         "//third_party/eigen3",
     ],
@@ -4174,7 +4142,6 @@
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:nn_ops_op_lib",
         "//third_party/eigen3",
     ],
 )
@@ -4256,7 +4223,6 @@
 PARSING_DEPS = [
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
-    "//tensorflow/core:parsing_ops_op_lib",
     "//tensorflow/core:proto_text",
     "//tensorflow/core:protos_all_cc",
 ]
@@ -4325,7 +4291,6 @@
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
-    "//tensorflow/core:random_ops_op_lib",
 ]
 
 tf_kernel_library(
@@ -4389,7 +4354,6 @@
         ":random_op",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:stateless_random_ops_op_lib",
     ],
 )
 
@@ -4404,8 +4368,6 @@
 REQUIRED_DEPS = [
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
-    "//tensorflow/core:no_op_op_lib",
-    "//tensorflow/core:sendrecv_ops_op_lib",
 ]
 
 tf_kernel_library(
@@ -4466,7 +4428,6 @@
 SPARSE_DEPS = [
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
-    "//tensorflow/core:sparse_ops_op_lib",
 ]
 
 tf_kernel_library(
@@ -4714,7 +4675,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:sdca_ops_op_lib",
         "//third_party/eigen3",
         "@farmhash_archive//:farmhash",
     ],
@@ -4754,7 +4714,6 @@
     "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
-    "//tensorflow/core:state_ops_op_lib",
 ] + if_sycl(["//tensorflow/core:sycl_runtime"])
 
 tf_kernel_library(
@@ -4892,7 +4851,6 @@
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
-    "//tensorflow/core:string_ops_op_lib",
 ]
 
 tf_kernel_library(
@@ -5043,7 +5001,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:string_ops_op_lib",
         "//third_party/eigen3",
         "//third_party/icu/data:conversion_data",
         "@icu//:common",
@@ -5065,7 +5022,6 @@
         ":variable_ops",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:training_ops_op_lib",
         "//third_party/eigen3",
     ],
 )
@@ -5127,7 +5083,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:random_ops_op_lib",
     ],
 )
 
@@ -5155,7 +5110,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:random_ops_op_lib",
     ],
 )
 
@@ -6063,12 +6017,9 @@
         ":ops_util",
         ":pooling_ops",
         ":quantization_utils",
-        "//tensorflow/core:array_ops_op_lib",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
         "//third_party/eigen3",
         "@gemmlowp",
     ],
@@ -6554,6 +6505,30 @@
     ],
 )
 
+tf_cc_test_mkl(
+    name = "mkl_quantized_concat_op_test",
+    size = "small",
+    srcs = ["mkl_quantized_concat_op_test.cc"],
+    deps = [
+        ":mkl_concat_op",
+        ":ops_testutil",
+        ":ops_util",
+        ":quantization_utils",
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:mkl_array_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_cc_test(
     name = "quantized_batch_norm_op_test",
     size = "small",
@@ -6638,7 +6613,6 @@
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:remote_fused_graph_ops_op_lib",
     ],
 )
 
@@ -6793,8 +6767,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:mkl_nn_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
     ] + mkl_deps(),
 )
 
@@ -6802,6 +6774,7 @@
     name = "mkl_conv_ops_test",
     size = "small",
     srcs = ["mkl_conv_ops_test.cc"],
+    linkstatic = 1,  # Fixes dyld error on MacOS.
     deps = [
         ":ops_testutil",
         ":ops_util",
@@ -6844,8 +6817,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:mkl_nn_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
     ] + mkl_deps(),
 )
 
@@ -6864,8 +6835,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:mkl_nn_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
     ] + mkl_deps(),
 )
 
@@ -6879,8 +6848,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:mkl_nn_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
         "//third_party/eigen3",
     ] + mkl_deps(),
 )
@@ -6895,8 +6862,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:mkl_nn_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
         "//third_party/eigen3",
     ] + mkl_deps(),
 )
@@ -6954,6 +6919,68 @@
     deps = NN_DEPS + mkl_deps() + [":cwise_op"],
 )
 
+tf_mkl_kernel_library(
+    name = "mkl_requantize_ops",
+    srcs = [
+        "mkl_requantization_range_per_channel_op.cc",
+        "mkl_requantize_per_channel_op.cc",
+    ],
+    hdrs = [
+        "meta_support.h",
+        "no_op.h",
+        "reference_gemm.h",
+    ],
+    deps = if_mkl(
+        [
+            ":concat_lib_hdrs",
+            ":conv_ops",
+            ":cwise_op",
+            ":eigen_helpers",
+            ":image_resizer_state",
+            ":ops_util",
+            ":pooling_ops",
+            ":quantization_utils",
+            ":transpose_functor",
+            "//third_party/eigen3",
+            "@gemmlowp",
+            "@mkl_dnn",
+            "//tensorflow/core:array_ops_op_lib",
+            "//tensorflow/core:core_cpu",
+            "//tensorflow/core:framework",
+            "//tensorflow/core:lib",
+            "//tensorflow/core:math_ops_op_lib",
+            "//tensorflow/core:nn_ops_op_lib",
+            "//third_party/mkl:intel_binary_blob",
+        ],
+    ),
+)
+
+tf_cc_test_mkl(
+    name = "mkl_requantize_ops_test",
+    size = "small",
+    srcs = ["mkl_requantize_ops_test.cc"],
+    linkstatic = 1,  # Fixes dyld error on MacOS.
+    deps = [
+        ":mkl_requantize_ops",
+        ":ops_testutil",
+        ":ops_util",
+        ":quantization_utils",
+        ":quantized_ops",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:tensorflow",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_cc_test_mkl(
     name = "mkl_fused_ops_test",
     size = "small",
@@ -7037,7 +7064,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:summary_ops_op_lib",
         "//tensorflow/core/lib/db:sqlite",
         "//tensorflow/core/summary:schema",
         "//tensorflow/core/summary:summary_db_writer",
@@ -7051,7 +7077,6 @@
         "decode_proto_op.cc",
     ],
     deps = [
-        "//tensorflow/core:decode_proto_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/util/proto:decode",
@@ -7066,7 +7091,6 @@
     name = "encode_proto_op",
     srcs = ["encode_proto_op.cc"],
     deps = [
-        "//tensorflow/core:encode_proto_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/util/proto:descriptors",
@@ -7084,7 +7108,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:rpc_ops_op_lib",
         "//tensorflow/core/util/rpc:call_container",
         "//tensorflow/core/util/rpc:rpc_factory",
         "//tensorflow/core/util/rpc:rpc_factory_registry",
@@ -7097,7 +7120,6 @@
     srcs = ["unicode_script_op.cc"],
     deps = [
         "//tensorflow/core:framework",
-        "//tensorflow/core:string_ops_op_lib",
         "@icu//:common",
     ],
 )
@@ -7140,3 +7162,12 @@
         ":cwise_lib",
     ],
 )
+
+# Library to link with when compiling the quantize and dequantize kernels directly,
+# e.g. for selective registration.
+cc_header_only_library(
+    name = "quantize_and_dequantize_op_hdrs",
+    deps = [
+        ":quantize_and_dequantize_op",
+    ],
+)

diff --git a/tensorflow/core/kernels/adjust_saturation_op.cc b/tensorflow/core/kernels/adjust_saturation_op.cc
index 87d34fc..98264c4 100644
--- a/tensorflow/core/kernels/adjust_saturation_op.cc
+++ b/tensorflow/core/kernels/adjust_saturation_op.cc

@@ -193,8 +193,8 @@
         *context->device()->tensorflow_cpu_worker_threads();
     Shard(worker_threads.num_threads, worker_threads.workers, channel_count,
           kCostPerChannel,
-          [channel_count, &input_data, &output_data, scale_h](
-              int64 start_channel, int64 end_channel) {
+          [&input_data, &output_data, scale_h](int64 start_channel,
+                                               int64 end_channel) {
             const float* p = input_data.data() + start_channel * kChannelSize;
             float* q = output_data.data() + start_channel * kChannelSize;
             for (int i = start_channel; i < end_channel; i++) {

diff --git a/tensorflow/core/kernels/barrier_ops.cc b/tensorflow/core/kernels/barrier_ops.cc
index d5bd36b..89d742c 100644
--- a/tensorflow/core/kernels/barrier_ops.cc
+++ b/tensorflow/core/kernels/barrier_ops.cc

@@ -247,7 +247,6 @@
           keys = t[1];
           values.insert(values.begin(), t.begin() + 2, t.end());
           callback(indices, keys, values);
-          return;
         });
   }
 
@@ -509,7 +508,7 @@
     Barrier* barrier = nullptr;
     OP_REQUIRES_OK_ASYNC(ctx, GetResourceFromContext(ctx, "handle", &barrier),
                          callback);
-    ComputeAsync(ctx, barrier, [this, callback, barrier]() {
+    ComputeAsync(ctx, barrier, [callback, barrier]() {
       barrier->Unref();
       callback();
     });
@@ -618,7 +617,6 @@
             values_output.set(i, values[i]);
           }
           callback();
-          return;
         });
   }
 

diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc
index 5ba461a..338f61f 100644
--- a/tensorflow/core/kernels/batch_kernels.cc
+++ b/tensorflow/core/kernels/batch_kernels.cc

@@ -720,8 +720,7 @@
 
   void ComputeAsync(OpKernelContext* c, DoneCallback done) final {
     BatchResource* br;
-    std::function<Status(BatchResource * *r)> creator = [this,
-                                                         c](BatchResource** r) {
+    std::function<Status(BatchResource**)> creator = [this](BatchResource** r) {
       std::unique_ptr<BatchResource> new_resource;
       TF_RETURN_IF_ERROR(
           BatchResource::Create(num_batch_threads_, max_batch_size_,
@@ -801,16 +800,15 @@
 
   void ComputeAsync(OpKernelContext* c, DoneCallback done) final {
     BatchResource* br;
-    std::function<Status(BatchResource * *r)> creator =
-        [this](BatchResource** r) {
-          std::unique_ptr<BatchResource> new_resource;
-          TF_RETURN_IF_ERROR(BatchResource::Create(
-              num_batch_threads_, max_batch_size_, batch_timeout_micros_,
-              max_enqueued_batches_, allowed_batch_sizes_, kInvalidHandle,
-              &new_resource));
-          *r = new_resource.release();
-          return Status::OK();
-        };
+    std::function<Status(BatchResource**)> creator = [this](BatchResource** r) {
+      std::unique_ptr<BatchResource> new_resource;
+      TF_RETURN_IF_ERROR(BatchResource::Create(
+          num_batch_threads_, max_batch_size_, batch_timeout_micros_,
+          max_enqueued_batches_, allowed_batch_sizes_, kInvalidHandle,
+          &new_resource));
+      *r = new_resource.release();
+      return Status::OK();
+    };
     OP_REQUIRES_OK_ASYNC(c,
                          c->resource_manager()->LookupOrCreate(
                              container_, shared_name_, &br, creator),
@@ -1066,7 +1064,7 @@
 
   void ComputeAsync(OpKernelContext* c, DoneCallback done) final {
     UnbatchResource* ubr;
-    std::function<Status(UnbatchResource * *r)> creator =
+    std::function<Status(UnbatchResource**)> creator =
         [this](UnbatchResource** r) {
           *r = new UnbatchResource(timeout_micros_);
           return Status::OK();
@@ -1252,8 +1250,8 @@
 
   void ComputeAsync(OpKernelContext* c, DoneCallback done) final {
     UnbatchGradResource* ubr;
-    std::function<Status(UnbatchGradResource * *r)> creator =
-        [this](UnbatchGradResource** r) {
+    std::function<Status(UnbatchGradResource**)> creator =
+        [](UnbatchGradResource** r) {
           *r = new UnbatchGradResource();
           return Status::OK();
         };

diff --git a/tensorflow/core/kernels/bitcast_op.cc b/tensorflow/core/kernels/bitcast_op.cc
new file mode 100644
index 0000000..02c8808
--- /dev/null
+++ b/tensorflow/core/kernels/bitcast_op.cc

@@ -0,0 +1,82 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/array_ops.cc.
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+
+class BitcastOp : public OpKernel {
+ public:
+  explicit BitcastOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("T", &input_data_type_));
+    OP_REQUIRES_OK(context, context->GetAttr("type", &output_data_type_));
+    in_size_ = DataTypeSize(input_data_type_);
+    out_size_ = DataTypeSize(output_data_type_);
+    int check_size =
+        std::max(in_size_, out_size_) % std::min(in_size_, out_size_);
+    OP_REQUIRES(
+        context, check_size == 0,
+        errors::InvalidArgument("cannot convert between datatype ",
+                                input_data_type_, " and ", output_data_type_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor = context->input(0);
+
+    TensorShape adjusted_shape = input_tensor.shape();
+    OP_REQUIRES(context,
+                in_size_ >= out_size_ ||
+                    (input_tensor.dims() > 0 &&
+                     input_tensor.dim_size(input_tensor.dims() - 1) ==
+                         out_size_ / in_size_),
+                errors::InvalidArgument(
+                    "Cannot bitcast from ", DataTypeString(input_data_type_),
+                    " to ", DataTypeString(output_data_type_), ": shape ",
+                    input_tensor.shape().DebugString()));
+
+    if (out_size_ < in_size_) {
+      adjusted_shape.AddDim(in_size_ / out_size_);
+    } else if (out_size_ > in_size_) {
+      adjusted_shape.RemoveDim(input_tensor.dims() - 1);
+    }
+    Tensor output_tensor;
+
+    OP_REQUIRES_OK(context,
+                   output_tensor.BitcastFrom(input_tensor, output_data_type_,
+                                             adjusted_shape));
+    context->set_output(0, output_tensor);
+  }
+
+  bool IsExpensive() override { return false; }
+
+ private:
+  DataType input_data_type_;
+  DataType output_data_type_;
+  int in_size_;
+  int out_size_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("Bitcast").Device(DEVICE_CPU), BitcastOp);
+
+#if GOOGLE_CUDA
+REGISTER_KERNEL_BUILDER(Name("Bitcast").Device(DEVICE_GPU), BitcastOp);
+#endif  // GOOGLE_CUDA
+
+}  // end namespace tensorflow

diff --git a/tensorflow/core/kernels/bitcast_op.h b/tensorflow/core/kernels/bitcast_op.h
new file mode 100644
index 0000000..1f3659f
--- /dev/null
+++ b/tensorflow/core/kernels/bitcast_op.h

@@ -0,0 +1,29 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/array_ops.cc.
+
+#ifndef TENSORFLOW_CORE_KERNELS_BITCAST_OP_H_
+#define TENSORFLOW_CORE_KERNELS_BITCAST_OP_H_
+
+#include <string.h>  // for memcpy
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+
+#endif  // TENSORFLOW_CORE_KERNELS_BITCAST_OP_H_

diff --git a/tensorflow/core/kernels/boosted_trees/boosted_trees.proto b/tensorflow/core/kernels/boosted_trees/boosted_trees.proto
index 4e9bab3..3aa3bb8 100644
--- a/tensorflow/core/kernels/boosted_trees/boosted_trees.proto
+++ b/tensorflow/core/kernels/boosted_trees/boosted_trees.proto

@@ -52,6 +52,16 @@
   // the rule feature <= threshold.
   int32 feature_id = 1;
   int32 threshold = 2;
+  // If feature column is multivalent, this holds the index of the dimension
+  // for the split. Defaults to 0.
+  int32 dimension_id = 5;
+  enum DefaultDirection {
+    // Left is the default direction.
+    DEFAULT_LEFT = 0;
+    DEFAULT_RIGHT = 1;
+  }
+  // default direction for missing values.
+  DefaultDirection default_direction = 6;
 
   // Node children indexing into a contiguous
   // vector of nodes starting from the root.

diff --git a/tensorflow/core/kernels/boosted_trees/prediction_ops.cc b/tensorflow/core/kernels/boosted_trees/prediction_ops.cc
index 4ae26fb..04d3359 100644
--- a/tensorflow/core/kernels/boosted_trees/prediction_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/prediction_ops.cc

@@ -113,8 +113,7 @@
       output_tree_ids.setConstant(latest_tree);
       auto do_work = [&resource, &batch_bucketized_features, &cached_tree_ids,
                       &cached_node_ids, &output_partial_logits,
-                      &output_node_ids, batch_size,
-                      latest_tree](int32 start, int32 end) {
+                      &output_node_ids, latest_tree](int32 start, int32 end) {
         for (int32 i = start; i < end; ++i) {
           int32 tree_id = cached_tree_ids(i);
           int32 node_id = cached_node_ids(i);
@@ -129,7 +128,9 @@
             // Logic in the loop adds the cached node value again if it is a
             // leaf. If it is not a leaf anymore we need to subtract the old
             // node's value. The following logic handles both of these cases.
-            partial_tree_logit -= resource->node_value(tree_id, node_id);
+            const auto& node_logits = resource->node_value(tree_id, node_id);
+            DCHECK_EQ(node_logits.size(), 1);
+            partial_tree_logit -= node_logits[0];
           } else {
             // No cache exists, start from the very first node.
             node_id = 0;
@@ -137,7 +138,9 @@
           float partial_all_logit = 0.0;
           while (true) {
             if (resource->is_leaf(tree_id, node_id)) {
-              partial_tree_logit += resource->node_value(tree_id, node_id);
+              const auto& leaf_logits = resource->node_value(tree_id, node_id);
+              DCHECK_EQ(leaf_logits.size(), 1);
+              partial_tree_logit += leaf_logits[0];
 
               // Tree is done
               partial_all_logit +=
@@ -187,9 +190,6 @@
                                              &num_bucketized_features_));
     OP_REQUIRES_OK(context,
                    context->GetAttr("logits_dimension", &logits_dimension_));
-    OP_REQUIRES(context, logits_dimension_ == 1,
-                errors::InvalidArgument(
-                    "Currently only one dimensional outputs are supported."));
   }
 
   void Compute(OpKernelContext* const context) override {
@@ -225,18 +225,20 @@
     }
 
     const int32 last_tree = resource->num_trees() - 1;
-
     auto do_work = [&resource, &batch_bucketized_features, &output_logits,
-                    batch_size, last_tree](int32 start, int32 end) {
+                    last_tree, this](int32 start, int32 end) {
       for (int32 i = start; i < end; ++i) {
-        float tree_logit = 0.0;
+        std::vector<float> tree_logits(logits_dimension_, 0.0);
         int32 tree_id = 0;
         int32 node_id = 0;
         while (true) {
           if (resource->is_leaf(tree_id, node_id)) {
-            tree_logit += resource->GetTreeWeight(tree_id) *
-                          resource->node_value(tree_id, node_id);
-
+            const float tree_weight = resource->GetTreeWeight(tree_id);
+            const auto& leaf_logits = resource->node_value(tree_id, node_id);
+            DCHECK_EQ(leaf_logits.size(), logits_dimension_);
+            for (int32 j = 0; j < logits_dimension_; ++j) {
+              tree_logits[j] += tree_weight * leaf_logits[j];
+            }
             // Stop if it was the last tree.
             if (tree_id == last_tree) {
               break;
@@ -249,7 +251,9 @@
                                           batch_bucketized_features);
           }
         }
-        output_logits(i, 0) = tree_logit;
+        for (int32 j = 0; j < logits_dimension_; ++j) {
+          output_logits(i, j) = tree_logits[j];
+        }
       }
     };
     // 10 is the magic number. The actual number might depend on (the number of
@@ -329,13 +333,14 @@
     // path. Note: feature_ids has one less value than logits_path because the
     // first value of each logit path will be the bias.
     auto do_work = [&resource, &batch_bucketized_features, &output_debug_info,
-                    batch_size, last_tree](int32 start, int32 end) {
+                    last_tree](int32 start, int32 end) {
       for (int32 i = start; i < end; ++i) {
         // Proto to store debug outputs, per example.
         boosted_trees::DebugOutput example_debug_info;
         // Initial bias prediction. E.g., prediction based off training mean.
-        float tree_logit =
-            resource->GetTreeWeight(0) * resource->node_value(0, 0);
+        const auto& tree_logits = resource->node_value(0, 0);
+        DCHECK_EQ(tree_logits.size(), 1);
+        float tree_logit = resource->GetTreeWeight(0) * tree_logits[0];
         example_debug_info.add_logits_path(tree_logit);
         int32 node_id = 0;
         int32 tree_id = 0;
@@ -358,8 +363,9 @@
             // Get logit after split.
             node_id = resource->next_node(tree_id, node_id, i,
                                           batch_bucketized_features);
-            tree_logit = resource->GetTreeWeight(tree_id) *
-                         resource->node_value(tree_id, node_id);
+            const auto& tree_logits = resource->node_value(tree_id, node_id);
+            DCHECK_EQ(tree_logits.size(), 1);
+            tree_logit = resource->GetTreeWeight(tree_id) * tree_logits[0];
             // Output logit incorporates sum of leaf logits from prior trees.
             example_debug_info.add_logits_path(tree_logit + past_trees_logit);
           }

diff --git a/tensorflow/core/kernels/boosted_trees/resources.cc b/tensorflow/core/kernels/boosted_trees/resources.cc
index 42df484..5ab9f97 100644
--- a/tensorflow/core/kernels/boosted_trees/resources.cc
+++ b/tensorflow/core/kernels/boosted_trees/resources.cc

@@ -82,15 +82,38 @@
   return -1;
 }
 
-float BoostedTreesEnsembleResource::node_value(const int32 tree_id,
-                                               const int32 node_id) const {
+std::vector<float> BoostedTreesEnsembleResource::node_value(
+    const int32 tree_id, const int32 node_id) const {
   DCHECK_LT(tree_id, tree_ensemble_->trees_size());
   DCHECK_LT(node_id, tree_ensemble_->trees(tree_id).nodes_size());
   const auto& node = tree_ensemble_->trees(tree_id).nodes(node_id);
   if (node.node_case() == boosted_trees::Node::kLeaf) {
-    return node.leaf().scalar();
+    // TODO(crawles): only use vector leaf even if # logits=1.
+    if (node.leaf().has_vector()) {
+      std::vector<float> leaf_values;
+      const auto& leaf_value_vector = node.leaf().vector();
+      const int size = leaf_value_vector.value_size();
+      leaf_values.reserve(size);
+      for (int i = 0; i < size; ++i) {
+        leaf_values.push_back(leaf_value_vector.value(i));
+      }
+      return leaf_values;
+    } else {
+      return {node.leaf().scalar()};
+    }
   } else {
-    return node.metadata().original_leaf().scalar();
+    if (node.metadata().original_leaf().has_vector()) {
+      std::vector<float> node_values;
+      const auto& leaf_value_vector = node.metadata().original_leaf().vector();
+      const int size = leaf_value_vector.value_size();
+      node_values.reserve(size);
+      for (int i = 0; i < size; ++i) {
+        node_values.push_back(leaf_value_vector.value(i));
+      }
+      return node_values;
+    } else {
+      return {node.metadata().original_leaf().scalar()};
+    }
   }
 }
 
@@ -452,15 +475,18 @@
 
     // Change node back into leaf.
     *node->mutable_leaf() = node_metadata.original_leaf();
-    const float parent_value = node_value(tree_id, node_id);
+    const auto& parent_values = node_value(tree_id, node_id);
+    DCHECK_EQ(parent_values.size(), 1);
+    const float parent_value = parent_values[0];
 
     // Save the old values of weights of children.
     (*nodes_meta)[left_id].first = node_id;
-    (*nodes_meta)[left_id].second = parent_value - node_value(tree_id, left_id);
+    (*nodes_meta)[left_id].second =
+        parent_value - node_value(tree_id, left_id)[0];
 
     (*nodes_meta)[right_id].first = node_id;
     (*nodes_meta)[right_id].second =
-        parent_value - node_value(tree_id, right_id);
+        parent_value - node_value(tree_id, right_id)[0];
 
     // Clear gain for leaf node.
     node->clear_metadata();

diff --git a/tensorflow/core/kernels/boosted_trees/resources.h b/tensorflow/core/kernels/boosted_trees/resources.h
index 3c7b2df..34a35f1 100644
--- a/tensorflow/core/kernels/boosted_trees/resources.h
+++ b/tensorflow/core/kernels/boosted_trees/resources.h

@@ -68,7 +68,7 @@
       const int32 tree_id, const int32 node_id, const int32 index_in_batch,
       const std::vector<TTypes<int32>::ConstVec>& bucketized_features) const;
 
-  float node_value(const int32 tree_id, const int32 node_id) const;
+  std::vector<float> node_value(const int32 tree_id, const int32 node_id) const;
 
   void set_node_value(const int32 tree_id, const int32 node_id,
                       const float logits);

diff --git a/tensorflow/core/kernels/boosted_trees/training_ops.cc b/tensorflow/core/kernels/boosted_trees/training_ops.cc
index 973cdec..7c025b3 100644
--- a/tensorflow/core/kernels/boosted_trees/training_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/training_ops.cc

@@ -288,7 +288,9 @@
       ensemble_resource->AddNewTreeWithLogits(kLayerByLayerTreeWeight, logits);
       current_bias = logits;
     } else {
-      current_bias = ensemble_resource->node_value(0, 0);
+      const auto& current_biases = ensemble_resource->node_value(0, 0);
+      DCHECK_EQ(current_biases.size(), 1);
+      current_bias = current_biases[0];
       continue_centering =
           std::abs(logits / current_bias) > kMinDeltaForCenterBias;
       current_bias += logits;

diff --git a/tensorflow/core/kernels/broadcast_to_op.cc b/tensorflow/core/kernels/broadcast_to_op.cc
index 2810925..8c43413 100644
--- a/tensorflow/core/kernels/broadcast_to_op.cc
+++ b/tensorflow/core/kernels/broadcast_to_op.cc

@@ -15,13 +15,17 @@
 
 #define EIGEN_USE_THREADS
 
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif  // GOOGLE_CUDA
 
+#include "tensorflow/core/kernels/broadcast_to_op.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/broadcast_to_op.h"
+#include "tensorflow/core/util/bcast.h"
 
 namespace tensorflow {
 
@@ -43,12 +47,42 @@
     OP_REQUIRES_OK(ctx,
                    ctx->op_kernel().MakeShape(shape_tensor, &output_shape));
 
+    // Handle copy.
+    if (output_shape == input_shape) {
+      ctx->set_output(0, input_tensor);
+      return;
+    }
+
+    OP_REQUIRES(ctx, input_shape.dims() <= output_shape.dims(),
+                errors::InvalidArgument(
+                    "Rank of input (", input_shape.dims(),
+                    ") must be no greater than rank of output shape (",
+                    output_shape.dims(), ")."));
+
     Tensor* output_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_tensor));
+    // Handle empty case.
+    if (output_shape.num_elements() == 0) {
+      return;
+    }
 
-    const Device& d = ctx->eigen_device<Device>();
-    functor::BroadcastTo<Device, T>()(d, ctx, *output_tensor, output_shape,
-                                      input_tensor, input_shape);
+    // Handle broadcast from Scalar.
+    const Device& device = ctx->eigen_device<Device>();
+    if (input_shape.dims() == 0) {
+      functor::FillFunctor<Device, T>()(device, output_tensor->flat<T>(),
+                                        input_tensor.scalar<T>());
+      return;
+    }
+
+    BCast bcast(BCast::FromShape(input_shape), BCast::FromShape(output_shape),
+                /*fewer_dims_optimization=*/true);
+    OP_REQUIRES(ctx, bcast.IsValid(),
+                errors::InvalidArgument(
+                    "Incompatible shapes: ", input_shape.DebugString(), " vs. ",
+                    output_shape.DebugString()));
+
+    functor::BroadcastTo<Device, T>()(device, ctx, *output_tensor, output_shape,
+                                      input_tensor, input_shape, bcast);
   }
 };
 
@@ -65,12 +99,12 @@
 #if GOOGLE_CUDA
 
 namespace functor {
-#define DECLARE_GPU_TEMPLATE(Type)                              \
-  template <>                                                   \
-  void BroadcastTo<GPUDevice, Type>::operator()(                \
-      const GPUDevice& d, OpKernelContext* ctx, Tensor& output, \
-      const TensorShape& output_shape, const Tensor& input,     \
-      const TensorShape& input_shape);                          \
+#define DECLARE_GPU_TEMPLATE(Type)                               \
+  template <>                                                    \
+  void BroadcastTo<GPUDevice, Type>::operator()(                 \
+      const GPUDevice& d, OpKernelContext* ctx, Tensor& output,  \
+      const TensorShape& output_shape, const Tensor& input,      \
+      const TensorShape& input_shape, const BCast& bcast) const; \
   extern template struct BroadcastTo<GPUDevice, Type>;
 
 TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_TEMPLATE);

diff --git a/tensorflow/core/kernels/broadcast_to_op.h b/tensorflow/core/kernels/broadcast_to_op.h
index bc11c5f..6ae860c 100644
--- a/tensorflow/core/kernels/broadcast_to_op.h
+++ b/tensorflow/core/kernels/broadcast_to_op.h

@@ -23,196 +23,81 @@
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/util/bcast.h"
 
 namespace tensorflow {
 
 namespace functor {
 
-#define BROADCAST_SHAPE(NDIMS, input_shape, output_shape)                 \
-  auto reshape = AsEigenDSizesWithPrefix<NDIMS>(input_shape);             \
-  auto broadcast = output_shape.AsEigenDSizes<NDIMS>();                   \
-  auto reshape_32bit = AsEigenDSizesWithPrefix<NDIMS, int>(input_shape);  \
-  auto broadcast_32bit = output_shape.AsEigenDSizes<NDIMS, int>();        \
-  if (input_shape.dims() > 0) {                                           \
-    for (int i = 0; i < NDIMS; i++) {                                     \
-      if (reshape[i] != broadcast[i]) {                                   \
-        OP_REQUIRES(                                                      \
-            ctx, ((reshape[i] != 0) && (broadcast[i] % reshape[i] == 0)), \
-            errors::InvalidArgument("invalid shape to broadcast from ",   \
-                                    input_shape.DebugString(), " to ",    \
-                                    output_shape.DebugString()));         \
-        broadcast[i] = broadcast[i] / reshape[i];                         \
-      } else {                                                            \
-        broadcast[i] = 1;                                                 \
-      }                                                                   \
-      if (can_use_32bit) {                                                \
-        broadcast_32bit[i] = static_cast<int>(broadcast[i]);              \
-      }                                                                   \
-    }                                                                     \
-  }
-
-#define HANDLE_BROADCAST_FROM_SCALAR()                              \
-  if (std::is_same<Eigen::GpuDevice, Device>::value) {              \
-    FillFunctor<Device, T>()(d, output_tensor.flat<T>(),            \
-                             input_tensor.scalar<T>());             \
-  } else {                                                          \
-    output.device(d) = output.constant(input_tensor.scalar<T>()()); \
-  }
-
-#define HANDLE_BROADCAST_CASE(dim_i)                                        \
-  case dim_i: {                                                             \
-    if (can_use_32bit) {                                                    \
-      auto input = input_tensor.tensor<T, dim_i>();                         \
-      To32Bit(output).device(d) =                                           \
-          To32Bit(input).reshape(reshape_32bit).broadcast(broadcast_32bit); \
-    } else {                                                                \
-      auto input = input_tensor.tensor<T, dim_i>();                         \
-      output.device(d) = input.reshape(reshape).broadcast(broadcast);       \
-    }                                                                       \
-  } break
-
 template <typename Device, typename T>
 struct BroadcastTo {
-  void operator()(const Device &d, OpKernelContext *ctx, Tensor &output_tensor,
-                  const TensorShape &output_shape, const Tensor &input_tensor,
-                  const TensorShape &input_shape) {
-    if (output_shape.num_elements() == 0) {
-      return;
-    }
-    if (output_shape == input_shape) {
-      output_tensor.flat<T>().device(d) = input_tensor.flat<T>();
-      return;
-    }
+  template <int NDIMS>
+  void DoBCast32Bit(const Device &device, typename TTypes<T, NDIMS>::Tensor out,
+                    typename TTypes<T, NDIMS>::ConstTensor in,
+                    const typename Eigen::array<int, NDIMS> &bcast) const {
+    To32Bit(out).device(device) = To32Bit(in).broadcast(bcast);
+  }
 
+  template <int NDIMS>
+  void DoBCast(
+      const Device &device, typename TTypes<T, NDIMS>::Tensor out,
+      typename TTypes<T, NDIMS>::ConstTensor in,
+      const typename Eigen::array<Eigen::DenseIndex, NDIMS> &bcast) const {
+    out.device(device) = in.broadcast(bcast);
+  }
+
+  template <int NDIMS>
+  void ReshapeAndBCast(const Device &device, Tensor &output_tensor,
+                       const Tensor &input_tensor, const BCast &bcast) const {
     const bool can_use_32bit = std::is_same<Eigen::GpuDevice, Device>::value &&
                                output_tensor.NumElements() < kint32max &&
                                input_tensor.NumElements() < kint32max;
-
-    switch (output_shape.dims()) {
-      case 0: {
-        if (input_shape.dims() > 0) {
-          ctx->CtxFailure(errors::InvalidArgument(
-              "invalid shape to broadcast from ", input_shape.DebugString(),
-              " to ", output_shape.DebugString()));
-          break;
-        }
-        output_tensor.scalar<T>().device(d) = input_tensor.scalar<T>();
-        break;
-      }
-      case 1: {
-        BROADCAST_SHAPE(1, input_shape, output_shape);
-
-        auto output = output_tensor.tensor<T, 1>();
-        switch (input_shape.dims()) {
-          case 0: {
-            HANDLE_BROADCAST_FROM_SCALAR();
-          } break;
-            HANDLE_BROADCAST_CASE(1);
-          default:
-            ctx->CtxFailure(errors::InvalidArgument(
-                "invalid shape to broadcast from ", input_shape.DebugString(),
-                " to ", output_shape.DebugString()));
-            break;
-        }
-      } break;
-      case 2: {
-        BROADCAST_SHAPE(2, input_shape, output_shape);
-        auto output = output_tensor.tensor<T, 2>();
-        switch (input_shape.dims()) {
-          case 0: {
-            HANDLE_BROADCAST_FROM_SCALAR();
-          } break;
-            HANDLE_BROADCAST_CASE(1);
-            HANDLE_BROADCAST_CASE(2);
-          default:
-            ctx->CtxFailure(errors::InvalidArgument(
-                "invalid shape to broadcast from ", input_shape.DebugString(),
-                " to ", output_shape.DebugString()));
-            break;
-        }
-      } break;
-      case 3: {
-        BROADCAST_SHAPE(3, input_shape, output_shape);
-        auto output = output_tensor.tensor<T, 3>();
-        switch (input_shape.dims()) {
-          case 0: {
-            HANDLE_BROADCAST_FROM_SCALAR();
-          } break;
-            HANDLE_BROADCAST_CASE(1);
-            HANDLE_BROADCAST_CASE(2);
-            HANDLE_BROADCAST_CASE(3);
-          default:
-            ctx->CtxFailure(errors::InvalidArgument(
-                "invalid shape to broadcast from ", input_shape.DebugString(),
-                " to ", output_shape.DebugString()));
-            break;
-        }
-      } break;
-      case 4: {
-        BROADCAST_SHAPE(4, input_shape, output_shape);
-        auto output = output_tensor.tensor<T, 4>();
-        switch (input_shape.dims()) {
-          case 0: {
-            HANDLE_BROADCAST_FROM_SCALAR();
-          } break;
-            HANDLE_BROADCAST_CASE(1);
-            HANDLE_BROADCAST_CASE(2);
-            HANDLE_BROADCAST_CASE(3);
-            HANDLE_BROADCAST_CASE(4);
-          default:
-            ctx->CtxFailure(errors::InvalidArgument(
-                "invalid shape to broadcast from ", input_shape.DebugString(),
-                " to ", output_shape.DebugString()));
-            break;
-        }
-      } break;
-      case 5: {
-        BROADCAST_SHAPE(5, input_shape, output_shape);
-        auto output = output_tensor.tensor<T, 5>();
-        switch (input_shape.dims()) {
-          case 0: {
-            HANDLE_BROADCAST_FROM_SCALAR();
-          } break;
-            HANDLE_BROADCAST_CASE(1);
-            HANDLE_BROADCAST_CASE(2);
-            HANDLE_BROADCAST_CASE(3);
-            HANDLE_BROADCAST_CASE(4);
-            HANDLE_BROADCAST_CASE(5);
-          default:
-            ctx->CtxFailure(errors::InvalidArgument(
-                "invalid shape to broadcast from ", input_shape.DebugString(),
-                " to ", output_shape.DebugString()));
-            break;
-        }
-      } break;
-      default:
-        ctx->CtxFailure(errors::InvalidArgument(
-            "invalid shape to broadcast from ", input_shape.DebugString(),
-            " to ", output_shape.DebugString()));
-        break;
+    if (can_use_32bit) {
+      DoBCast32Bit<NDIMS>(
+          device, output_tensor.template shaped<T, NDIMS>(bcast.result_shape()),
+          input_tensor.template shaped<T, NDIMS>(bcast.x_reshape()),
+          BCast::ToIndexArrayType<int, NDIMS>(bcast.x_bcast()));
+    } else {
+      DoBCast<NDIMS>(
+          device, output_tensor.template shaped<T, NDIMS>(bcast.result_shape()),
+          input_tensor.template shaped<T, NDIMS>(bcast.x_reshape()),
+          BCast::ToIndexArrayType<Eigen::DenseIndex, NDIMS>(bcast.x_bcast()));
     }
   }
 
- private:
-  template <int NDIMS, typename DimType = Eigen::DenseIndex>
-  Eigen::DSizes<DimType, NDIMS> AsEigenDSizesWithPrefix(
-      const TensorShape &shape) const {
-    Eigen::DSizes<DimType, NDIMS> dsizes;
-    for (int d = 0; d < NDIMS - shape.dims(); d++) {
-      dsizes[d] = 1;
+  // PRECONDITION: rank(input_shape) > 0 &&
+  //               rank(input_shape) <= rank(output_shape)  &&
+  //               output_shape.num_elements() > 0.
+  void operator()(const Device &device, OpKernelContext *ctx,
+                  Tensor &output_tensor, const TensorShape &output_shape,
+                  const Tensor &input_tensor, const TensorShape &input_shape,
+                  const BCast &bcast) const {
+    const int ndims = bcast.y_reshape().size();
+    switch (ndims) {
+      case 1:
+        ReshapeAndBCast<1>(device, output_tensor, input_tensor, bcast);
+        break;
+      case 2:
+        ReshapeAndBCast<2>(device, output_tensor, input_tensor, bcast);
+        break;
+      case 3:
+        ReshapeAndBCast<3>(device, output_tensor, input_tensor, bcast);
+        break;
+      case 4:
+        ReshapeAndBCast<4>(device, output_tensor, input_tensor, bcast);
+        break;
+      case 5:
+        ReshapeAndBCast<5>(device, output_tensor, input_tensor, bcast);
+        break;
+      default:
+        ctx->SetStatus(errors::Unimplemented(
+            "Broadcast between ", input_shape.DebugString(), " and ",
+            output_shape.DebugString(), " is not supported yet."));
+        break;
     }
-    for (int d = NDIMS - shape.dims(); d < NDIMS; d++) {
-      dsizes[d] =
-          static_cast<DimType>(shape.dim_size(d - (NDIMS - shape.dims())));
-    }
-    return dsizes;
   }
 };
 
-#undef BROADCAST_SHAPE
-#undef HANDLE_BROADCAST_FROM_SCALAR
-#undef HANDLE_BROADCAST_CASE
-
 }  // namespace functor
 }  // namespace tensorflow
 

diff --git a/tensorflow/core/kernels/collective_nccl_reducer.cc b/tensorflow/core/kernels/collective_nccl_reducer.cc
index 113f148..c5e6f06 100644
--- a/tensorflow/core/kernels/collective_nccl_reducer.cc
+++ b/tensorflow/core/kernels/collective_nccl_reducer.cc

@@ -149,7 +149,9 @@
           << col_params_->group.num_tasks << " current task "
           << col_params_->instance.task_names[col_params_->default_rank]
           << " num local devices " << num_local_devices
-          << " num global devices " << num_global_devices;
+          << " num global devices " << num_global_devices << " device "
+          << col_ctx_->device_name << " instance "
+          << col_params_->instance.instance_key;
   NcclManager::instance()->AddToAllReduce(
       std::move(participant),
       {nccl_collective_key, num_local_devices, num_global_devices,

diff --git a/tensorflow/core/kernels/collective_ops.cc b/tensorflow/core/kernels/collective_ops.cc
index 04e37e8..2335628 100644
--- a/tensorflow/core/kernels/collective_ops.cc
+++ b/tensorflow/core/kernels/collective_ops.cc

@@ -43,6 +43,10 @@
       // Call in a blockable thread because it's not guaranteed that
       // this call cannot block.
       c->env()->SchedClosure([this, c, done, col_exec]() {
+        VLOG(1) << "CollectiveOpKernel CompleteParams for collective "
+                << col_params_.name << " device " << c->device()->name()
+                << " group " << col_params_.group.group_key << " instance "
+                << col_params_.instance.instance_key;
         col_exec->CompleteParamsAsync(
             c->device()->name(), &col_params_, c->cancellation_manager(),
             [this, c, done](const Status& s) {
@@ -64,6 +68,57 @@
   std::vector<int32> dependencies_;
 };
 
+class CollectiveGatherOpKernel : public CollectiveOpKernel {
+ public:
+  explicit CollectiveGatherOpKernel(OpKernelConstruction* c)
+      : CollectiveOpKernel(c) {
+    col_params_.instance.type = GATHER_COLLECTIVE;
+    OP_REQUIRES_OK(c, c->GetAttr("group_size", &col_params_.group.group_size));
+    OP_REQUIRES_OK(c, c->GetAttr("group_key", &col_params_.group.group_key));
+    OP_REQUIRES_OK(
+        c, c->GetAttr("instance_key", &col_params_.instance.instance_key));
+    OP_REQUIRES_OK(c, c->GetAttr("T", &col_params_.instance.data_type));
+    OP_REQUIRES_OK(c, c->GetAttr("shape", &col_params_.instance.shape));
+    const NodeDef& real_node = c->def();
+    col_params_.name = strings::StrCat(real_node.name(), ": Gather");
+    col_params_.group.device_type = c->device_type();
+  }
+
+  void ComputeAsync(OpKernelContext* c, DoneCallback done) override {
+    CollectiveExecutor* col_exec = c->collective_executor();
+    OP_REQUIRES_ASYNC(
+        c, col_exec,
+        errors::Internal(
+            "Failed to get CollectiveExecutor from OpKernelContext for Op ",
+            col_params_.name),
+        done);
+    // Allocate output on the first pass through this function.  This must be
+    // done immediately, while we're still in the executor thread.  Otherwise
+    // the memory is not guaranteed to be unused by any concurrently executing
+    // GPU kernel.
+    if (c->mutable_output(0) == nullptr) {
+      // Allocate the output tensor.
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK_ASYNC(
+          c, c->allocate_output(0, col_params_.instance.shape, &output), done);
+    }
+    if (!CanProceedWithCompute(c, col_exec, done)) return;
+    auto actual_done = [c, done](const Status& s) {
+      OP_REQUIRES_OK_ASYNC(c, s, done);
+      done();
+    };
+    col_exec->ExecuteAsync(c, col_params_, GetCollectiveKey(c), actual_done);
+  }
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(CollectiveGatherOpKernel);
+};
+
+REGISTER_KERNEL_BUILDER(Name("CollectiveGather").Device(DEVICE_CPU),
+                        CollectiveGatherOpKernel);
+REGISTER_KERNEL_BUILDER(Name("CollectiveGather").Device(DEVICE_GPU),
+                        CollectiveGatherOpKernel);
+
 class CollectiveReduceOpKernel : public CollectiveOpKernel {
  public:
   explicit CollectiveReduceOpKernel(OpKernelConstruction* c)
@@ -149,10 +204,18 @@
       col_params_.instance.shape = c->input(0).shape();
     }
     if (!CanProceedWithCompute(c, col_exec, done)) return;
-    auto actual_done = [c, col_exec, done](const Status& s) {
+
+    int32 instance_key = col_params_.instance.instance_key;
+    auto actual_done = [c, instance_key, done](const Status& s) {
       OP_REQUIRES_OK_ASYNC(c, s, done);
       done();
+      VLOG(1) << "CollectiveReduceKernel ExecuteAsync done for device "
+              << c->device()->name() << " instance " << instance_key;
     };
+    VLOG(1) << "CollectiveReduceKernel ExecuteAsync start for collective "
+            << col_params_.name << " device " << c->device()->name()
+            << " group " << col_params_.group.group_key << " instance "
+            << instance_key;
     col_exec->ExecuteAsync(c, col_params_, GetCollectiveKey(c), actual_done);
   }
 
@@ -211,10 +274,17 @@
                          " does not match shape of input"),
         done);
 
-    auto actual_done = [c, col_exec, done](const Status& s) {
+    int32 instance_key = col_params_.instance.instance_key;
+    auto actual_done = [c, instance_key, done](const Status& s) {
       OP_REQUIRES_OK_ASYNC(c, s, done);
       done();
+      VLOG(1) << "CollectiveBcastSendOpKernel ExecuteAsync done for device "
+              << c->device()->name() << " instance " << instance_key;
     };
+    VLOG(1) << "CollectiveBcastSendOpKernel ExecuteAsync start for collective "
+            << col_params_.name << " device " << c->device()->name()
+            << " group " << col_params_.group.group_key << " instance "
+            << instance_key;
     col_exec->ExecuteAsync(c, col_params_, GetCollectiveKey(c), actual_done);
   }
 
@@ -266,10 +336,17 @@
     }
     if (!CanProceedWithCompute(c, col_exec, done)) return;
 
-    auto actual_done = [c, col_exec, done](const Status& s) {
+    int32 instance_key = col_params_.instance.instance_key;
+    auto actual_done = [c, instance_key, done](const Status& s) {
       OP_REQUIRES_OK_ASYNC(c, s, done);
       done();
+      VLOG(1) << "CollectiveBcastRecvOpKernel ExecuteAsync done for device "
+              << c->device()->name() << " instance " << instance_key;
     };
+    VLOG(1) << "CollectiveBcastRecvOpKernel ExecuteAsync start for collective "
+            << col_params_.name << " device " << c->device()->name()
+            << " group " << col_params_.group.group_key << " instance "
+            << instance_key;
     col_exec->ExecuteAsync(c, col_params_, GetCollectiveKey(c), actual_done);
   }
 

diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index 5520cf7..a988f63 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc

@@ -23,6 +23,7 @@
 #include <algorithm>
 #include <vector>
 
+#include "absl/base/dynamic_annotations.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -184,6 +185,80 @@
 };
 #endif
 
+template <typename T>
+struct Conv2DCustomBackpropInputMatMulFunctor {
+  using MatrixMap = Eigen::Map<
+      Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
+  using ConstMatrixMap = Eigen::Map<
+      const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
+
+  void operator()(OpKernelContext* ctx, const T* out_data, const T* filter_data,
+                  const int filter_total_size, const int output_image_size,
+                  const int dims_out_depth, T* im2col_buf) {
+    // Compute gradient into 'im2col_buf'.
+    MatrixMap C(im2col_buf, output_image_size, filter_total_size);
+
+    ConstMatrixMap A(out_data, output_image_size, dims_out_depth);
+    ConstMatrixMap B(filter_data, filter_total_size, dims_out_depth);
+
+    C.noalias() = A * B.transpose();
+  }
+};
+
+#if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
+template <>
+struct Conv2DCustomBackpropInputMatMulFunctor<float> {
+  using T = float;
+
+  void operator()(OpKernelContext* ctx, const T* out_data, const T* filter_data,
+                  const int filter_total_size, const int output_image_size,
+                  const int dims_out_depth, T* im2col_buf) {
+    // Inputs are in RowMajor order, we "cheat" by swapping the LHS and RHS:
+    //   RowMajor: C   = A   * B
+    //   ColMajor: C^T = B^T * A^T
+    //
+    // Dimension names:
+    //   out_image_size    -> ois
+    //   filter_total_size -> fts
+    //   dims_out_depth    -> dod
+    //
+    // RowMajor:
+    //   im2col      = out_data    * filter_data^T
+    //   [ois x fts] = [ois x dod] * [fts x dod]^T
+    //
+    // ColMajor:
+    //   im2col^T    = filter_data *  out_data^T
+    //   [fts x ois] = [fts x dod] * [dod x ois]*
+
+    const int m = filter_total_size;
+    const int n = output_image_size;
+    const int k = dims_out_depth;  // contraction dim
+
+    const char transposeA = 'T';  // sgemm(A) == filter_data
+    const char transposeB = 'N';  // sgemm(B) == out_data
+
+    const int ldA = dims_out_depth;
+    const int ldB = dims_out_depth;
+    const int ldC = filter_total_size;
+
+    const float alpha = 1.0;
+    const float beta = 0.0;
+
+    // mkldnn_sgemm code can't be instrumented with msan.
+    ANNOTATE_MEMORY_IS_INITIALIZED(
+        im2col_buf, filter_total_size * output_image_size * sizeof(T));
+
+    mkldnn_status_t st =
+        mkldnn_sgemm(&transposeA, &transposeB, &m, &n, &k, &alpha, filter_data,
+                     &ldA, out_data, &ldB, &beta, im2col_buf, &ldC);
+
+    OP_REQUIRES(
+        ctx, st == 0,
+        errors::Internal("Failed to call mkldnn_sgemm. Error code: ", st));
+  }
+};
+#endif
+
 // Based on implementation written by Yangqing Jia (jiayq).
 template <typename Device, class T>
 class Conv2DCustomBackpropInputOp : public OpKernel {
@@ -417,21 +492,14 @@
         input_backprop_data += input_offset;
       }
     } else {
-      typedef Eigen::Map<
-          Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
-          MatrixMap;
-      typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic,
-                                             Eigen::RowMajor>>
-          ConstMatrixMap;
-
       for (int image_id = 0; image_id < dims.batch_size;
            image_id += shard_size) {
         const int shard_limit =
             std::min(static_cast<int>(shard_size),
                      static_cast<int>(dims.batch_size) - image_id);
 
-        auto shard = [&dims, &pad_top, &pad_left, &pad_bottom, &pad_right,
-                      &output_image_size, &filter_total_size,
+        auto shard = [&context, &dims, &pad_top, &pad_left, &pad_bottom,
+                      &pad_right, &output_image_size, &filter_total_size,
                       &input_backprop_data, &col_buffer_data,
                       &out_backprop_data, &filter_data, &input_offset,
                       &output_offset, &size_C](int64 start, int64 limit) {
@@ -440,13 +508,9 @@
             T* input_data = input_backprop_data + shard_id * input_offset;
             const T* out_data = out_backprop_data + shard_id * output_offset;
 
-            // Compute gradient into 'im2col_buf'.
-            MatrixMap C(im2col_buf, output_image_size, filter_total_size);
-
-            ConstMatrixMap A(out_data, output_image_size, dims.out_depth);
-            ConstMatrixMap B(filter_data, filter_total_size, dims.out_depth);
-
-            C.noalias() = A * B.transpose();
+            Conv2DCustomBackpropInputMatMulFunctor<T>()(
+                context, out_data, filter_data, filter_total_size,
+                output_image_size, dims.out_depth, im2col_buf);
 
             Col2im<T>(im2col_buf, dims.in_depth,
                       dims.spatial_dims[0].input_size,

diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h
index 105a4b1..ccd24fc 100644
--- a/tensorflow/core/kernels/conv_ops.h
+++ b/tensorflow/core/kernels/conv_ops.h

@@ -110,7 +110,7 @@
 
 // Computes and validates convolutions dimensions from Conv2D parameters. If
 // parameters are valid, dimensions will be updated with derived convolution
-// dimensions, otherwise error will be returned.
+// dimensions, otherwise an error will be returned.
 Status ComputeConv2DDimension(const Conv2DParameters& params,
                               const Tensor& input, const Tensor& filter,
                               Conv2DDimensions* dimensions);

diff --git a/tensorflow/core/kernels/crop_and_resize_op_test.cc b/tensorflow/core/kernels/crop_and_resize_op_test.cc
index 6921020..0eadf4c 100644
--- a/tensorflow/core/kernels/crop_and_resize_op_test.cc
+++ b/tensorflow/core/kernels/crop_and_resize_op_test.cc

@@ -423,7 +423,7 @@
   //  ... (altogether 999 lines)
   //  0, 1, 2, ..., 998
   AddInput<float>(TensorShape({1, kLength, kLength, 1}),
-                  [kLength](int i) -> float { return i % kLength; });
+                  [](int i) -> float { return i % kLength; });
   AddInputFromArray<float>(TensorShape({2, 4}),
                            {0, 0, 0.5, 0.5, 0.5, 0.5, 1, 1});
   AddInputFromArray<int32>(TensorShape({2}), {0, 0});
@@ -437,15 +437,15 @@
   //  ... (altogether 500 lines)
   //  0, 1, 2, ..., 499
   Tensor result1(allocator(), DT_FLOAT, TensorShape({1, kHalf, kHalf, 1}));
-  test::FillFn<float>(&result1, [kHalf](int i) -> float { return i % kHalf; });
+  test::FillFn<float>(&result1, [](int i) -> float { return i % kHalf; });
 
   // Result 2:
   //  499, 500, 501, ..., 998
   //  ... (altogether 500 lines)
   //  499, 500, 501, ..., 998
   Tensor result2(allocator(), DT_FLOAT, TensorShape({1, kHalf, kHalf, 1}));
-  test::FillFn<float>(
-      &result2, [kHalf](int i) -> float { return i % kHalf + kHalf - 1; });
+  test::FillFn<float>(&result2,
+                      [](int i) -> float { return i % kHalf + kHalf - 1; });
 
   // Expected result is the concat of the two tensors.
   Tensor expected(allocator(), DT_FLOAT, TensorShape({2, kHalf, kHalf, 1}));

diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/kernels/cuda_solvers.cc
index 39d0a99..82d9238 100644
--- a/tensorflow/core/kernels/cuda_solvers.cc
+++ b/tensorflow/core/kernels/cuda_solvers.cc

@@ -643,6 +643,50 @@
 
 TF_CALL_LAPACK_TYPES_NO_COMPLEX(GESVD_INSTANCE);
 
+template <typename Scalar, typename BufSizeFnT, typename SolverFnT>
+static inline Status GesvdjBatchedImpl(BufSizeFnT bufsize, SolverFnT solver,
+                                       CudaSolver* cuda_solver,
+                                       OpKernelContext* context,
+                                       cusolverDnHandle_t cusolver_dn_handle,
+                                       cusolverEigMode_t jobz, int m, int n,
+                                       Scalar* A, int lda, Scalar* S, Scalar* U,
+                                       int ldu, Scalar* V, int ldv,
+                                       int* dev_lapack_info, int batch_size) {
+  mutex_lock lock(handle_map_mutex);
+  /* Get amount of workspace memory required. */
+  int lwork;
+  /* Default parameters for gesvdj and gesvdjBatched. */
+  gesvdjInfo_t svdj_info;
+  TF_RETURN_IF_CUSOLVER_ERROR(cusolverDnCreateGesvdjInfo(&svdj_info));
+  TF_RETURN_IF_CUSOLVER_ERROR(bufsize(
+      cusolver_dn_handle, jobz, m, n, CUDAComplex(A), lda, S, CUDAComplex(U),
+      ldu, CUDAComplex(V), ldv, &lwork, svdj_info, batch_size));
+  /* Allocate device memory for workspace. */
+  auto dev_workspace =
+      cuda_solver->GetScratchSpace<Scalar>(lwork, "", /* on_host */ false);
+  TF_RETURN_IF_CUSOLVER_ERROR(solver(
+      cusolver_dn_handle, jobz, m, n, CUDAComplex(A), lda, S, CUDAComplex(U),
+      ldu, CUDAComplex(V), ldv, CUDAComplex(dev_workspace.mutable_data()),
+      lwork, dev_lapack_info, svdj_info, batch_size));
+  TF_RETURN_IF_CUSOLVER_ERROR(cusolverDnDestroyGesvdjInfo(svdj_info));
+  return Status::OK();
+}
+
+#define GESVDJBATCHED_INSTANCE(Scalar, type_prefix)                            \
+  template <>                                                                  \
+  Status CudaSolver::GesvdjBatched<Scalar>(                                    \
+      cusolverEigMode_t jobz, int m, int n, Scalar* dev_A, int lda,            \
+      Scalar* dev_S, Scalar* dev_U, int ldu, Scalar* dev_V, int ldv,           \
+      int* dev_lapack_info, int batch_size) {                                  \
+    return GesvdjBatchedImpl(DN_BUFSIZE_FN(gesvdjBatched, type_prefix),        \
+                             DN_SOLVER_FN(gesvdjBatched, type_prefix), this,   \
+                             context_, cusolver_dn_handle_, jobz, m, n, dev_A, \
+                             lda, dev_S, dev_U, ldu, dev_V, ldv,               \
+                             dev_lapack_info, batch_size);                     \
+  }
+
+TF_CALL_LAPACK_TYPES_NO_COMPLEX(GESVDJBATCHED_INSTANCE);
+
 //=============================================================================
 // Wrappers of cuBlas computational methods begin here.
 //

diff --git a/tensorflow/core/kernels/cuda_solvers.h b/tensorflow/core/kernels/cuda_solvers.h
index 1fc3447..fa8b4e2 100644
--- a/tensorflow/core/kernels/cuda_solvers.h
+++ b/tensorflow/core/kernels/cuda_solvers.h

@@ -312,6 +312,11 @@
   Status Gesvd(signed char jobu, signed char jobvt, int m, int n, Scalar* dev_A,
                int lda, Scalar* dev_S, Scalar* dev_U, int ldu, Scalar* dev_VT,
                int ldvt, int* dev_lapack_info) TF_MUST_USE_RESULT;
+  template <typename Scalar>
+  Status GesvdjBatched(cusolverEigMode_t jobz, int m, int n, Scalar* dev_A,
+                       int lda, Scalar* dev_S, Scalar* dev_U, int ldu,
+                       Scalar* dev_V, int ldv, int* dev_lapack_info,
+                       int batch_size);
 
  private:
   OpKernelContext* context_;  // not owned.

diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index b03ee93..9657e37 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h

@@ -158,13 +158,20 @@
       return 0;
     }
   }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet
+  packetOp(const Packet& a, const Packet& b) const {
+    const Packet mask = pcmp_eq(b, pzero(b));
+    const Packet quotient = scalar_quotient_op<T>().packetOp(a, b);
+    return pandnot(quotient, mask);
+  }
 };
 
 template <typename T>
 struct functor_traits<div_no_nan_op<T>> {
   enum {
     Cost = functor_traits<scalar_quotient_op<T>>::Cost + NumTraits<T>::AddCost,
-    PacketAccess = false,
+    PacketAccess = true,
   };
 };
 

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 56c8399..f2a77af 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD

@@ -263,7 +263,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:metrics",
         "//tensorflow/core:protos_all_cc",
     ],
 )
@@ -321,7 +320,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:metrics",
     ],
 )
 
@@ -355,7 +353,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:metrics",
         "//tensorflow/core:protos_all_cc",
     ],
 )
@@ -489,6 +486,7 @@
     name = "reader_dataset_ops",
     srcs = ["reader_dataset_ops.cc"],
     deps = [
+        "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -564,6 +562,7 @@
     srcs = ["graph_rewrite_dataset.cc"],
     hdrs = ["graph_rewrite_dataset.h"],
     deps = [
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:protos_all_cc",
@@ -600,7 +599,6 @@
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:metrics",
         "@com_google_absl//absl/memory",
     ],
 )

diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index 5a6c8ba..9def8c9 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc

@@ -231,5 +231,35 @@
   return Status::OK();
 }
 
+Status AddToFunctionLibrary(FunctionLibraryDefinition* base,
+                            const FunctionLibraryDefinition& to_add) {
+  for (const auto& fn : to_add.ListFunctionNames()) {
+    if (auto found = base->Find(fn)) {
+      if (!OpDefEqual(found->signature(), to_add.Find(fn)->signature())) {
+        return errors::InvalidArgument("Cannot add function '", fn,
+                                       "' because a different function with "
+                                       "the same signature already exists.");
+      }
+      TF_RETURN_IF_ERROR(base->RemoveFunction(fn));
+    }
+  }
+  return base->AddLibrary(to_add);
+}
+
+Status AddToFunctionLibrary(FunctionLibraryDefinition* base,
+                            const FunctionDefLibrary& to_add) {
+  for (const auto& fd : to_add.function()) {
+    if (auto found = base->Find(fd.signature().name())) {
+      if (!OpDefEqual(found->signature(), fd.signature())) {
+        return errors::InvalidArgument("Cannot add function '",
+                                       fd.signature().name(),
+                                       "' because a different function with "
+                                       "the same signature already exists.");
+      }
+      TF_RETURN_IF_ERROR(base->RemoveFunction(fd.signature().name()));
+    }
+  }
+  return base->AddLibrary(to_add);
+}
 }  // namespace data
 }  // namespace tensorflow

diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index 0754d1d..d85e87c 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h

@@ -98,6 +98,13 @@
   std::vector<string> keys_;
 };
 
+// Adds the functions in `to_add` to `base`. If a function with a matching
+// signature already exists in `base`, replaces it with the function from
+// `to_add`.
+Status AddToFunctionLibrary(FunctionLibraryDefinition* base,
+                            const FunctionLibraryDefinition& to_add);
+Status AddToFunctionLibrary(FunctionLibraryDefinition* base,
+                            const FunctionDefLibrary& to_add);
 }  // namespace data
 }  // namespace tensorflow
 

diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc
index 290399b..bddd2d4 100644
--- a/tensorflow/core/kernels/data/dataset_utils_test.cc
+++ b/tensorflow/core/kernels/data/dataset_utils_test.cc

@@ -14,6 +14,8 @@
 ==============================================================================*/
 
 #include "tensorflow/core/kernels/data/dataset_utils.h"
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
@@ -77,6 +79,90 @@
             reader.ReadTensor("NonExistentKey", &val_tensor).code());
 }
 
+TEST(DatasetUtilsTest, AddToFunctionLibrary) {
+  auto make_fn_a = [](const string& fn_name) {
+    return FunctionDefHelper::Create(
+        /*function_name=*/fn_name,
+        /*in_def=*/{"arg: int64"},
+        /*out_def=*/{"ret: int64"},
+        /*attr_def=*/{},
+        /*node_def=*/{{{"node"}, "Identity", {"arg"}, {{"T", DT_INT64}}}},
+        /*ret_def=*/{{"ret", "node:output:0"}});
+  };
+
+  auto make_fn_b = [](const string& fn_name) {
+    return FunctionDefHelper::Create(
+        /*function_name=*/fn_name,
+        /*in_def=*/{"arg: int64"},
+        /*out_def=*/{"ret: int64"},
+        /*attr_def=*/{},
+        /*node_def=*/
+        {{{"node"}, "Identity", {"arg"}, {{"T", DT_INT64}}},
+         {{"node2"}, "Identity", {"node:output:0"}, {{"T", DT_INT64}}}},
+        /*ret_def=*/{{"ret", "node2:output:0"}});
+  };
+
+  FunctionDefLibrary fdef_base;
+  *fdef_base.add_function() = make_fn_a("0");
+  *fdef_base.add_function() = make_fn_a("1");
+  *fdef_base.add_function() = make_fn_a("2");
+
+  FunctionDefLibrary fdef_to_add;
+  *fdef_to_add.add_function() = make_fn_b("0");  // Override
+  *fdef_to_add.add_function() = make_fn_a("1");  // Do nothing
+  *fdef_to_add.add_function() = make_fn_b("3");  // Add new function
+
+  FunctionLibraryDefinition flib_0(OpRegistry::Global(), fdef_base);
+  TF_ASSERT_OK(AddToFunctionLibrary(&flib_0, fdef_to_add));
+
+  FunctionLibraryDefinition flib_1(OpRegistry::Global(), fdef_base);
+  FunctionLibraryDefinition flib_to_add(OpRegistry::Global(), fdef_to_add);
+  TF_ASSERT_OK(AddToFunctionLibrary(&flib_1, flib_to_add));
+
+  for (const auto& flib : {flib_0, flib_1}) {
+    EXPECT_TRUE(FunctionDefsEqual(*flib.Find("0"), make_fn_b("0")));
+    EXPECT_TRUE(FunctionDefsEqual(*flib.Find("1"), make_fn_a("1")));
+    EXPECT_TRUE(FunctionDefsEqual(*flib.Find("2"), make_fn_a("2")));
+    EXPECT_TRUE(FunctionDefsEqual(*flib.Find("3"), make_fn_b("3")));
+  }
+}
+
+TEST(DatasetUtilsTest, AddToFunctionLibraryWithConflictingSignatures) {
+  FunctionDefLibrary fdef_base;
+  *fdef_base.add_function() = FunctionDefHelper::Create(
+      /*function_name=*/"0",
+      /*in_def=*/{"arg: int64"},
+      /*out_def=*/{"ret: int64"},
+      /*attr_def=*/{},
+      /*node_def=*/{},
+      /*ret_def=*/{{"ret", "arg"}});
+
+  FunctionDefLibrary fdef_to_add;
+  *fdef_to_add.add_function() = FunctionDefHelper::Create(
+      /*function_name=*/"0",
+      /*in_def=*/{"arg: int64"},
+      /*out_def=*/{"ret: int64", "ret2: int64"},
+      /*attr_def=*/{},
+      /*node_def=*/{},
+      /*ret_def=*/{{"ret", "arg"}, {"ret2", "arg"}});
+
+  FunctionLibraryDefinition flib_0(OpRegistry::Global(), fdef_base);
+  Status s = AddToFunctionLibrary(&flib_0, fdef_to_add);
+  EXPECT_EQ(error::Code::INVALID_ARGUMENT, s.code());
+  EXPECT_EQ(
+      "Cannot add function '0' because a different function with the same "
+      "signature already exists.",
+      s.error_message());
+
+  FunctionLibraryDefinition flib_1(OpRegistry::Global(), fdef_base);
+  FunctionLibraryDefinition flib_to_add(OpRegistry::Global(), fdef_to_add);
+  s = AddToFunctionLibrary(&flib_1, flib_to_add);
+  EXPECT_EQ(error::Code::INVALID_ARGUMENT, s.code());
+  EXPECT_EQ(
+      "Cannot add function '0' because a different function with the same "
+      "signature already exists.",
+      s.error_message());
+}
 }  // namespace
 }  // namespace data
 }  // namespace tensorflow

diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD
index 9171b91..4f59e63 100644
--- a/tensorflow/core/kernels/data/experimental/BUILD
+++ b/tensorflow/core/kernels/data/experimental/BUILD

@@ -125,7 +125,6 @@
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:metrics",
         "//tensorflow/core:nn_ops_op_lib",
         "//tensorflow/core/kernels:inplace_ops",
         "//tensorflow/core/kernels/data:captured_function",

diff --git a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
index 0ef5691..77c1749 100644
--- a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc

@@ -450,7 +450,7 @@
               result->UpdateStatus(allocate_status, offset);
             } else {
               for (size_t i = 0; i < return_values->size(); ++i) {
-                const Tensor& tensor = return_values->at(i);
+                Tensor& tensor = return_values->at(i);
                 Tensor* batch = &(result->output)[i];
                 if (tensor.NumElements() !=
                     (batch->NumElements() / batch->dim_size(0))) {
@@ -468,8 +468,8 @@
                 // TODO(mrry): Add a version of DoParallelConcat that allows us
                 // to move `tensor` where possible, to speed up string tensor
                 // batching.
-                Status copy_status =
-                    batch_util::CopyElementToSlice(tensor, batch, offset);
+                Status copy_status = batch_util::CopyElementToSlice(
+                    std::move(tensor), batch, offset);
                 if (!copy_status.ok()) {
                   result->UpdateStatus(copy_status, offset);
                   break;

diff --git a/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc
index a95773a..0397ca0 100644
--- a/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc

@@ -35,9 +35,9 @@
                    DatasetBase** output) override {
     int64 num_workers;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_workers", &num_workers));
-    OP_REQUIRES(ctx, num_workers > 0,
-                errors::InvalidArgument(
-                    "num_parallel_calls must be greater than zero."));
+    OP_REQUIRES(
+        ctx, num_workers > 0,
+        errors::InvalidArgument("num_workers must be greater than zero."));
 
     Dataset* dataset =
         new Dataset(ctx, input, num_workers, output_types_, output_shapes_);
@@ -62,8 +62,17 @@
     string DebugString() const override { return "RebatchDatasetOp::Dataset"; }
 
    private:
+    bool ShouldOptimizeFunctions() override {
+      // We only want to optimize functions for some particular datasets like
+      // FlatMapDataset, InterleaveDataset etc. So we disable generalized
+      // function optimization and explicitly handle function modifications
+      // for those datasets in the rewrite.
+      return false;
+    }
+
     RewriterConfig CreateGrapplerRewriteConfig() override {
       RewriterConfig rewriter_config;
+      rewriter_config.set_fail_on_optimizer_errors(true);
       rewriter_config.add_optimizers(kOptimizerName);
       rewriter_config.set_meta_optimizer_iterations(
           RewriterConfig_NumIterationsType_ONE);

diff --git a/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc b/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
index 7a16cda..e8fd051 100644
--- a/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc

@@ -98,8 +98,9 @@
                                   EXCLUSIVE_LOCKS_REQUIRED(mu_) {
                                     *ret = new ThreadPoolResource(
                                         ctx->env(), {}, display_name_,
-                                        num_threads_, max_intra_op_parallelism_,
-                                        false /* low_latency_hint */);
+                                        num_threads_,
+                                        /*low_latency_hint=*/false,
+                                        max_intra_op_parallelism_);
                                     return Status::OK();
                                   }));
       initialized_ = true;

diff --git a/tensorflow/core/kernels/data/generator_dataset_op.cc b/tensorflow/core/kernels/data/generator_dataset_op.cc
index 5dff2be..3469743 100644
--- a/tensorflow/core/kernels/data/generator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/generator_dataset_op.cc

@@ -71,7 +71,7 @@
         : DatasetIterator<Dataset>(params) {}
 
     ~Iterator() override {
-      if (!finalized_) {
+      if (!finalized_ && initialized_) {
         std::vector<Tensor> ignored;
         Status s =
             instantiated_finalize_func_->RunInstantiated(state_, &ignored);

diff --git a/tensorflow/core/kernels/data/graph_rewrite_dataset.cc b/tensorflow/core/kernels/data/graph_rewrite_dataset.cc
index bc4bb46..cd80266 100644
--- a/tensorflow/core/kernels/data/graph_rewrite_dataset.cc
+++ b/tensorflow/core/kernels/data/graph_rewrite_dataset.cc

@@ -14,6 +14,8 @@
 ==============================================================================*/
 #include "tensorflow/core/kernels/data/graph_rewrite_dataset.h"
 
+#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
 
@@ -57,14 +59,9 @@
 
   // Some functions may have been modified without having their names
   // changed (for example, nested dataset graphs from FlatMap or
-  // Interleave). To avoid name conflicts, we remove these functions from
-  // flib_def_ before adding the optimized function library.
-  for (const FunctionDef& fd : graph_def.library().function()) {
-    if (flib_def_->Find(fd.signature().name()) != nullptr) {
-      TF_RETURN_IF_ERROR(flib_def_->RemoveFunction(fd.signature().name()));
-    }
-  }
-  TF_RETURN_IF_ERROR(flib_def_->AddLibrary(graph_def.library()));
+  // Interleave).
+  TF_RETURN_IF_ERROR(
+      AddToFunctionLibrary(flib_def_.get(), graph_def.library()));
 
   Graph graph(OpRegistry::Global());
   TF_RETURN_IF_ERROR(ImportGraphDef({}, graph_def, &graph, nullptr));
@@ -82,9 +79,21 @@
 Status GraphRewriteDataset::AsGraphDefInternal(SerializationContext* ctx,
                                                DatasetGraphDefBuilder* b,
                                                Node** output) const {
+  SerializationContext::Params params;
+  // The optimized input needs access to the newly optimized functions when
+  // it is serialized. Here, we use the optimized function library for
+  // serialization, which is the union of the function library from the
+  // OpKernelContext at dataset creation time and newly optimized functions.
+  // This includes all functions that optimized_input_ may use.
+  params.flib_def = flib_def_.get();
+  params.input_list = ctx->input_list();
+  params.optimization_only = ctx->optimization_only();
+  SerializationContext optimized_ctx(params);
+
   // We only serialize the optimized dataset to avoid re-running
   // optimizations when the input pipeline is restored from a checkpoint.
-  TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, optimized_input_, output));
+  TF_RETURN_IF_ERROR(
+      b->AddInputDataset(&optimized_ctx, optimized_input_, output));
   return Status::OK();
 }
 
@@ -161,6 +170,8 @@
   std::unique_ptr<tensorflow::grappler::GrapplerItem> grappler_item =
       tensorflow::grappler::GrapplerItemFromMetaGraphDef(
           "graph", meta_graph_def, item_config);
+  grappler_item->optimization_options().optimize_function_library =
+      ShouldOptimizeFunctions();
   std::unordered_map<string, tensorflow::DeviceProperties> device_map;
   tensorflow::grappler::VirtualCluster cluster(device_map);
 

diff --git a/tensorflow/core/kernels/data/graph_rewrite_dataset.h b/tensorflow/core/kernels/data/graph_rewrite_dataset.h
index dedbdce..856fcd3 100644
--- a/tensorflow/core/kernels/data/graph_rewrite_dataset.h
+++ b/tensorflow/core/kernels/data/graph_rewrite_dataset.h

@@ -73,6 +73,9 @@
   // optimizations to be run by the Grappler Meta Optimizer.
   virtual RewriterConfig CreateGrapplerRewriteConfig() = 0;
 
+  // Option specifying whether we want to optimize the function library as well.
+  virtual bool ShouldOptimizeFunctions() { return true; }
+
   Status ApplyOptimizations(OpKernelContext* ctx, GraphDef* graph_def,
                             string* output_node);
 

diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 808f834..7e23ca5 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc

@@ -99,7 +99,17 @@
       captured_state = iterator_state_;
     }
     if (captured_state) {
-      return captured_state->iterator->Save(ctx, writer);
+      SerializationContext::Params params;
+      // The iterator state may contain functions that are not present
+      // in ctx's function library. Namely, an iterator may be restored from
+      // a serialized iterator with a modified function library (for example, as
+      // a result of OptimizeDataset). These modified functions are needed
+      // to serialize the iterator again.
+      params.flib_def = captured_state->flib_def.get();
+      params.input_list = ctx->input_list();
+      params.optimization_only = ctx->optimization_only();
+      SerializationContext ctx_with_functions(params);
+      return captured_state->iterator->Save(&ctx_with_functions, writer);
     } else {
       return errors::FailedPrecondition(
           "Save() failed because the iterator has not been initialized. "
@@ -134,7 +144,14 @@
     std::unique_ptr<FunctionLibraryDefinition> flib_def(nullptr);
     std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(nullptr);
     TF_RETURN_IF_ERROR(ctx->function_library()->Clone(&flib_def, &pflr, &lib));
-    TF_RETURN_IF_ERROR(flib_def->AddLibrary(graph_def.library()));
+
+    // Some function names may be duplicated (for example, if the serialized
+    // graph has an optimized function that retains its original name). We
+    // override functions in flib_def in the event of conflict. It is
+    // safe to assume that any node in the serialized graph is referring to the
+    // serialized function when there is a conflict.
+    TF_RETURN_IF_ERROR(
+        AddToFunctionLibrary(flib_def.get(), graph_def.library()));
     std::unique_ptr<State> new_state = absl::make_unique<State>(
         std::move(flib_def), std::move(pflr), lib, nullptr /* iterator */);
 
@@ -567,7 +584,7 @@
       // NOTE(jsimsa): We must destroy the iterator before calling `done()`, to
       // avoid destruction races.
       IteratorBase* raw_iterator = iterator.release();
-      auto cleanup = gtl::MakeCleanup([ctx, raw_iterator, done] {
+      auto cleanup = gtl::MakeCleanup([raw_iterator, done] {
         delete raw_iterator;
         done();
       });
@@ -967,78 +984,58 @@
   }
 }
 
-namespace {
+void IteratorGetNextAsOptionalOp::ComputeAsync(OpKernelContext* ctx,
+                                               DoneCallback done) {
+  IteratorResource* iterator;
+  OP_REQUIRES_OK_ASYNC(
+      ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator), done);
+  // The call to `iterator->GetNext()` may block and depend on an
+  // inter-op thread pool thread, so we issue the call from the
+  // owned thread pool.
+  background_worker_.Schedule(std::bind(
+      [this, ctx, iterator](DoneCallback done) {
+        std::vector<Tensor> components;
+        bool end_of_sequence = false;
 
-class IteratorGetNextAsOptionalOp : public AsyncOpKernel {
- public:
-  explicit IteratorGetNextAsOptionalOp(OpKernelConstruction* ctx)
-      : AsyncOpKernel(ctx),
-        background_worker_(ctx->env(),
-                           "tf_data_iterator_get_next_as_optional") {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
-  }
+        Status s = iterator->GetNext(IteratorContext(ctx), &components,
+                                     &end_of_sequence);
+        // NOTE(mrry): We must unref the iterator before calling `done()`, to
+        // avoid destruction races.
+        iterator->Unref();
 
-  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
-    IteratorResource* iterator;
-    OP_REQUIRES_OK_ASYNC(
-        ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator), done);
-    // The call to `iterator->GetNext()` may block and depend on an
-    // inter-op thread pool thread, so we issue the call from the
-    // owned thread pool.
-    background_worker_.Schedule(std::bind(
-        [this, ctx, iterator](DoneCallback done) {
-          std::vector<Tensor> components;
-          bool end_of_sequence = false;
-
-          Status s = iterator->GetNext(IteratorContext(ctx), &components,
-                                       &end_of_sequence);
-          // NOTE(mrry): We must unref the iterator before calling `done()`, to
-          // avoid destruction races.
-          iterator->Unref();
-
-          if (!s.ok()) {
-            ctx->SetStatus(s);
-          } else if (end_of_sequence) {
-            OP_REQUIRES_OK_ASYNC(ctx, WriteOptionalNoneToOutput(ctx, 0), done);
-          } else {
-            for (int i = 0; i < components.size(); ++i) {
-              OP_REQUIRES_ASYNC(
-                  ctx, components[i].dtype() == output_types_[i],
-                  errors::InvalidArgument(
-                      "The given optional does not match the expected type for "
-                      "component ",
-                      i, ". Expected: ", DataTypeString(output_types_[i]),
-                      ". Actual: ", DataTypeString(components[i].dtype()), "."),
-                  done);
-              OP_REQUIRES_ASYNC(
-                  ctx,
-                  output_shapes_[i].IsCompatibleWith(components[i].shape()),
-                  errors::InvalidArgument(
-                      "The given optional does not match the expected shape "
-                      "for component ",
-                      i, ". Expected: ", output_shapes_[i].DebugString(),
-                      ". Actual: ", components[i].shape().DebugString(), "."),
-                  done);
-            }
-
-            OP_REQUIRES_OK_ASYNC(
-                ctx,
-                WriteOptionalWithValueToOutput(ctx, 0, std::move(components)),
+        if (!s.ok()) {
+          ctx->SetStatus(s);
+        } else if (end_of_sequence) {
+          OP_REQUIRES_OK_ASYNC(ctx, WriteOptionalNoneToOutput(ctx, 0), done);
+        } else {
+          for (int i = 0; i < components.size(); ++i) {
+            OP_REQUIRES_ASYNC(
+                ctx, components[i].dtype() == output_types_[i],
+                errors::InvalidArgument(
+                    "The given optional does not match the expected type for "
+                    "component ",
+                    i, ". Expected: ", DataTypeString(output_types_[i]),
+                    ". Actual: ", DataTypeString(components[i].dtype()), "."),
+                done);
+            OP_REQUIRES_ASYNC(
+                ctx, output_shapes_[i].IsCompatibleWith(components[i].shape()),
+                errors::InvalidArgument(
+                    "The given optional does not match the expected shape "
+                    "for component ",
+                    i, ". Expected: ", output_shapes_[i].DebugString(),
+                    ". Actual: ", components[i].shape().DebugString(), "."),
                 done);
           }
-          done();
-        },
-        std::move(done)));
-  }
 
- private:
-  BackgroundWorker background_worker_;
-  DataTypeVector output_types_;
-  std::vector<PartialTensorShape> output_shapes_;
-};
-
-}  // namespace
+          OP_REQUIRES_OK_ASYNC(
+              ctx,
+              WriteOptionalWithValueToOutput(ctx, 0, std::move(components)),
+              done);
+        }
+        done();
+      },
+      std::move(done)));
+}
 
 void IteratorToStringHandleOp::Compute(OpKernelContext* ctx) {
   const Tensor& resource_handle_t = ctx->input(0);

diff --git a/tensorflow/core/kernels/data/iterator_ops.h b/tensorflow/core/kernels/data/iterator_ops.h
index cd72269..7d769d3 100644
--- a/tensorflow/core/kernels/data/iterator_ops.h
+++ b/tensorflow/core/kernels/data/iterator_ops.h

@@ -19,6 +19,8 @@
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/ops_util.h"
 
 namespace tensorflow {
@@ -115,6 +117,24 @@
   BackgroundWorker background_worker_;
 };
 
+class IteratorGetNextAsOptionalOp : public AsyncOpKernel {
+ public:
+  explicit IteratorGetNextAsOptionalOp(OpKernelConstruction* ctx)
+      : AsyncOpKernel(ctx),
+        background_worker_(ctx->env(),
+                           "tf_data_iterator_get_next_as_optional") {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+  }
+
+  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override;
+
+ private:
+  BackgroundWorker background_worker_;
+  DataTypeVector output_types_;
+  std::vector<PartialTensorShape> output_shapes_;
+};
+
 class IteratorGetNextSyncOp : public OpKernel {
  public:
   explicit IteratorGetNextSyncOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}

diff --git a/tensorflow/core/kernels/data/map_defun_op.cc b/tensorflow/core/kernels/data/map_defun_op.cc
index 8122048..f1be942 100644
--- a/tensorflow/core/kernels/data/map_defun_op.cc
+++ b/tensorflow/core/kernels/data/map_defun_op.cc

@@ -258,6 +258,7 @@
             "output: ",
             index);
       }
+      Tensor* out;
       {  // Locking scope
         mutex_lock l(compute_opts_->mu);
         if (!compute_opts_->output_shapes.at(index).IsCompatibleWith(
@@ -272,15 +273,15 @@
           // this index. Store the shape and allocate the output accordingly.
           compute_opts_->output_shapes.at(index) = val.shape();
 
-          Tensor* out = nullptr;
           TensorShape actual_shape = val.shape();
           actual_shape.InsertDim(0, compute_opts_->batch_size);
           TF_RETURN_IF_ERROR(
               compute_opts_->output.allocate(index, actual_shape, &out));
+        } else {
+          out = (compute_opts_->output)[index];
         }
-        return batch_util::CopyElementToSlice(
-            val, (compute_opts_->output)[index], iter_);
       }
+      return batch_util::CopyElementToSlice(val, out, iter_);
     }
 
    private:

diff --git a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc
index f0c6f58..d78ed60 100644
--- a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc
+++ b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc

@@ -415,14 +415,10 @@
           resource = new MultiDeviceIterator(
               output_types_, output_shapes_, devices_, std::move(flib_def),
               std::move(pflr), lib, std::move(function_handle_cache));
+          // NOTE: `mgr->Create()` transfers the one reference on `resource` to
+          // `mgr`.
           OP_REQUIRES_OK(context, mgr->Create<MultiDeviceIterator>(
                                       container_name, unique_name, resource));
-          Status s = VerifyResource(resource);
-          if (TF_PREDICT_FALSE(!s.ok())) {
-            resource->Unref();
-            context->SetStatus(s);
-            return;
-          }
         } else {
           unique_name = cinfo_.name();
           container_name = cinfo_.container();

diff --git a/tensorflow/core/kernels/data/optional_ops.cc b/tensorflow/core/kernels/data/optional_ops.cc
index 6590c7e..473dbeb 100644
--- a/tensorflow/core/kernels/data/optional_ops.cc
+++ b/tensorflow/core/kernels/data/optional_ops.cc

@@ -23,133 +23,6 @@
 namespace data {
 namespace {
 
-class OptionalNoneOp : public OpKernel {
- public:
-  explicit OptionalNoneOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    OP_REQUIRES_OK(ctx, WriteOptionalNoneToOutput(ctx, 0));
-  }
-};
-
-class OptionalFromValueOp : public OpKernel {
- public:
-  explicit OptionalFromValueOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    OpInputList components_input;
-    OP_REQUIRES_OK(ctx, ctx->input_list("components", &components_input));
-    std::vector<Tensor> components(components_input.begin(),
-                                   components_input.end());
-    OP_REQUIRES_OK(
-        ctx, WriteOptionalWithValueToOutput(ctx, 0, std::move(components)));
-  }
-};
-
-class OptionalHasValueOp : public OpKernel {
- public:
-  explicit OptionalHasValueOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor* optional_input;
-    OP_REQUIRES_OK(ctx, ctx->input("optional", &optional_input));
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(optional_input->shape()),
-                errors::InvalidArgument(
-                    "Input to OptionalHasValue must be a scalar tensor "
-                    "containing an OptionalVariant object."));
-    const OptionalVariant* optional =
-        optional_input->scalar<Variant>()().get<OptionalVariant>();
-    OP_REQUIRES(
-        ctx, optional != nullptr,
-        errors::InvalidArgument(
-            "Input to OptionalHasValue must be an OptionalVariant object."));
-    Tensor* result;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, {}, &result));
-    result->scalar<bool>()() = optional->has_value();
-  }
-};
-
-class OptionalGetValueOp : public OpKernel {
- public:
-  explicit OptionalGetValueOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
-    OP_REQUIRES(
-        ctx, output_shapes_.size() == output_types_.size(),
-        errors::InvalidArgument(
-            "output_types and output_shapes must be same length, got:\n",
-            "output_types: ", output_types_.size(), "\n",
-            "output_shapes: ", output_shapes_.size()));
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor* optional_input;
-    OP_REQUIRES_OK(ctx, ctx->input("optional", &optional_input));
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(optional_input->shape()),
-                errors::InvalidArgument(
-                    "Input to OptionalHasValue must be a scalar tensor "
-                    "containing an OptionalVariant object."));
-    const OptionalVariant* optional =
-        optional_input->scalar<Variant>()().get<OptionalVariant>();
-    OP_REQUIRES(
-        ctx, optional != nullptr,
-        errors::InvalidArgument(
-            "Input to OptionalHasValue must be an OptionalVariant object."));
-    OP_REQUIRES(
-        ctx, optional->has_value(),
-        errors::InvalidArgument("The given optional does not have a value."));
-    const auto& components = optional->get_values();
-    OP_REQUIRES(ctx, components.size() == output_types_.size(),
-                errors::InvalidArgument(
-                    "The given optional has ", components.size(),
-                    " components, expected ", output_types_.size()));
-    for (int i = 0; i < components.size(); ++i) {
-      OP_REQUIRES(
-          ctx, components[i].dtype() == output_types_[i],
-          errors::InvalidArgument(
-              "The given optional does not match the expected type for "
-              "component ",
-              i, ". Expected: ", DataTypeString(output_types_[i]),
-              ". Actual: ", DataTypeString(components[i].dtype()), "."));
-      OP_REQUIRES(ctx,
-                  output_shapes_[i].IsCompatibleWith(components[i].shape()),
-                  errors::InvalidArgument(
-                      "The given optional does not match the expected shape "
-                      "for component ",
-                      i, ". Expected: ", output_shapes_[i].DebugString(),
-                      ". Actual: ", components[i].shape().DebugString(), "."));
-      ctx->set_output(i, components[i]);
-    }
-  }
-
- private:
-  DataTypeVector output_types_;
-  std::vector<PartialTensorShape> output_shapes_;
-};
-
-REGISTER_KERNEL_BUILDER(Name("OptionalNone").Device(DEVICE_CPU).Priority(2),
-                        OptionalNoneOp);
-REGISTER_KERNEL_BUILDER(Name("OptionalNone").Device(DEVICE_GPU).Priority(1),
-                        OptionalNoneOp);
-REGISTER_KERNEL_BUILDER(
-    Name("OptionalFromValue").Device(DEVICE_CPU).Priority(2),
-    OptionalFromValueOp);
-REGISTER_KERNEL_BUILDER(
-    Name("OptionalFromValue").Device(DEVICE_GPU).Priority(1),
-    OptionalFromValueOp);
-
-REGISTER_KERNEL_BUILDER(Name("OptionalHasValue").Device(DEVICE_CPU).Priority(2),
-                        OptionalHasValueOp);
-REGISTER_KERNEL_BUILDER(Name("OptionalHasValue")
-                            .Device(DEVICE_GPU)
-                            .HostMemory("has_value")
-                            .Priority(1),
-                        OptionalHasValueOp);
-REGISTER_KERNEL_BUILDER(Name("OptionalGetValue").Device(DEVICE_CPU).Priority(2),
-                        OptionalGetValueOp);
-REGISTER_KERNEL_BUILDER(Name("OptionalGetValue").Device(DEVICE_GPU).Priority(1),
-                        OptionalGetValueOp);
-
 static Status OptionalDeviceCopy(
     const OptionalVariant& from, OptionalVariant* to,
     const UnaryVariantOpRegistry::AsyncTensorDeviceCopyFn& copy) {
@@ -190,6 +63,75 @@
 
 }  // namespace
 
+void OptionalNoneOp::Compute(OpKernelContext* ctx) {
+  OP_REQUIRES_OK(ctx, WriteOptionalNoneToOutput(ctx, 0));
+}
+
+void OptionalFromValueOp::Compute(OpKernelContext* ctx) {
+  OpInputList components_input;
+  OP_REQUIRES_OK(ctx, ctx->input_list("components", &components_input));
+  std::vector<Tensor> components(components_input.begin(),
+                                 components_input.end());
+  OP_REQUIRES_OK(ctx,
+                 WriteOptionalWithValueToOutput(ctx, 0, std::move(components)));
+}
+
+void OptionalHasValueOp::Compute(OpKernelContext* ctx) {
+  const Tensor* optional_input;
+  OP_REQUIRES_OK(ctx, ctx->input("optional", &optional_input));
+  OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(optional_input->shape()),
+              errors::InvalidArgument(
+                  "Input to OptionalHasValue must be a scalar tensor "
+                  "containing an OptionalVariant object."));
+  const OptionalVariant* optional =
+      optional_input->scalar<Variant>()().get<OptionalVariant>();
+  OP_REQUIRES(
+      ctx, optional != nullptr,
+      errors::InvalidArgument(
+          "Input to OptionalHasValue must be an OptionalVariant object."));
+  Tensor* result;
+  OP_REQUIRES_OK(ctx, ctx->allocate_output(0, {}, &result));
+  result->scalar<bool>()() = optional->has_value();
+}
+
+void OptionalGetValueOp::Compute(OpKernelContext* ctx) {
+  const Tensor* optional_input;
+  OP_REQUIRES_OK(ctx, ctx->input("optional", &optional_input));
+  OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(optional_input->shape()),
+              errors::InvalidArgument(
+                  "Input to OptionalHasValue must be a scalar tensor "
+                  "containing an OptionalVariant object."));
+  const OptionalVariant* optional =
+      optional_input->scalar<Variant>()().get<OptionalVariant>();
+  OP_REQUIRES(
+      ctx, optional != nullptr,
+      errors::InvalidArgument(
+          "Input to OptionalHasValue must be an OptionalVariant object."));
+  OP_REQUIRES(
+      ctx, optional->has_value(),
+      errors::InvalidArgument("The given optional does not have a value."));
+  const auto& components = optional->get_values();
+  OP_REQUIRES(
+      ctx, components.size() == output_types_.size(),
+      errors::InvalidArgument("The given optional has ", components.size(),
+                              " components, expected ", output_types_.size()));
+  for (int i = 0; i < components.size(); ++i) {
+    OP_REQUIRES(ctx, components[i].dtype() == output_types_[i],
+                errors::InvalidArgument(
+                    "The given optional does not match the expected type for "
+                    "component ",
+                    i, ". Expected: ", DataTypeString(output_types_[i]),
+                    ". Actual: ", DataTypeString(components[i].dtype()), "."));
+    OP_REQUIRES(ctx, output_shapes_[i].IsCompatibleWith(components[i].shape()),
+                errors::InvalidArgument(
+                    "The given optional does not match the expected shape "
+                    "for component ",
+                    i, ". Expected: ", output_shapes_[i].DebugString(),
+                    ". Actual: ", components[i].shape().DebugString(), "."));
+    ctx->set_output(i, components[i]);
+  }
+}
+
 Status WriteOptionalWithValueToOutput(OpKernelContext* ctx, int output_index,
                                       std::vector<Tensor> value) {
   OptionalVariant v(std::move(value));
@@ -213,6 +155,33 @@
   return Status::OK();
 }
 
+namespace {
+
+REGISTER_KERNEL_BUILDER(Name("OptionalNone").Device(DEVICE_CPU).Priority(2),
+                        OptionalNoneOp);
+REGISTER_KERNEL_BUILDER(Name("OptionalNone").Device(DEVICE_GPU).Priority(1),
+                        OptionalNoneOp);
+REGISTER_KERNEL_BUILDER(
+    Name("OptionalFromValue").Device(DEVICE_CPU).Priority(2),
+    OptionalFromValueOp);
+REGISTER_KERNEL_BUILDER(
+    Name("OptionalFromValue").Device(DEVICE_GPU).Priority(1),
+    OptionalFromValueOp);
+
+REGISTER_KERNEL_BUILDER(Name("OptionalHasValue").Device(DEVICE_CPU).Priority(2),
+                        OptionalHasValueOp);
+REGISTER_KERNEL_BUILDER(Name("OptionalHasValue")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("has_value")
+                            .Priority(1),
+                        OptionalHasValueOp);
+REGISTER_KERNEL_BUILDER(Name("OptionalGetValue").Device(DEVICE_CPU).Priority(2),
+                        OptionalGetValueOp);
+REGISTER_KERNEL_BUILDER(Name("OptionalGetValue").Device(DEVICE_GPU).Priority(1),
+                        OptionalGetValueOp);
+
+}  // namespace
+
 REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(ZEROS_LIKE_VARIANT_UNARY_OP,
                                          DEVICE_CPU, OptionalVariant,
                                          OptionalZerosLike<CPUDevice>);

diff --git a/tensorflow/core/kernels/data/optional_ops.h b/tensorflow/core/kernels/data/optional_ops.h
index 7089a42..24eb1b8 100644
--- a/tensorflow/core/kernels/data/optional_ops.h
+++ b/tensorflow/core/kernels/data/optional_ops.h

@@ -152,6 +152,47 @@
   return Status::OK();
 }
 
+class OptionalNoneOp : public OpKernel {
+ public:
+  explicit OptionalNoneOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override;
+};
+
+class OptionalFromValueOp : public OpKernel {
+ public:
+  explicit OptionalFromValueOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override;
+};
+
+class OptionalHasValueOp : public OpKernel {
+ public:
+  explicit OptionalHasValueOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override;
+};
+
+class OptionalGetValueOp : public OpKernel {
+ public:
+  explicit OptionalGetValueOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
+    OP_REQUIRES(
+        ctx, output_shapes_.size() == output_types_.size(),
+        errors::InvalidArgument(
+            "output_types and output_shapes must be same length, got:\n",
+            "output_types: ", output_types_.size(), "\n",
+            "output_shapes: ", output_shapes_.size()));
+  }
+
+  void Compute(OpKernelContext* ctx) override;
+
+ private:
+  DataTypeVector output_types_;
+  std::vector<PartialTensorShape> output_shapes_;
+};
+
 }  // namespace data
 }  // namespace tensorflow
 

diff --git a/tensorflow/core/kernels/data/reader_dataset_ops.cc b/tensorflow/core/kernels/data/reader_dataset_ops.cc
index 789f9c8..cbc987d 100644
--- a/tensorflow/core/kernels/data/reader_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/reader_dataset_ops.cc

@@ -12,6 +12,7 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/core/common_runtime/metrics.h"
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -29,6 +30,8 @@
 // See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following ops.
 
+constexpr char kTextLineDatasetName[] = "TextLine";
+
 class TextLineDatasetOp : public DatasetOpKernel {
  public:
   using DatasetOpKernel::DatasetOpKernel;
@@ -91,8 +94,8 @@
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return absl::make_unique<Iterator>(
-          Iterator::Params{this, strings::StrCat(prefix, "::TextLine")});
+      return absl::make_unique<Iterator>(Iterator::Params{
+          this, strings::StrCat(prefix, "::", kTextLineDatasetName)});
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -142,6 +145,8 @@
 
             if (s.ok()) {
               // Produce the line as output.
+              metrics::RecordTFDataBytesRead(kTextLineDatasetName,
+                                             line_contents.size());
               out_tensors->emplace_back(ctx->allocator({}), DT_STRING,
                                         TensorShape({}));
               out_tensors->back().scalar<string>()() = std::move(line_contents);
@@ -268,9 +273,12 @@
 REGISTER_KERNEL_BUILDER(Name("TextLineDataset").Device(DEVICE_CPU),
                         TextLineDatasetOp);
 
+constexpr char kFixedLengthRecordDatasetName[] = "FixedLengthRecord";
+
 class FixedLengthRecordDatasetOp : public DatasetOpKernel {
  public:
   using DatasetOpKernel::DatasetOpKernel;
+
   explicit FixedLengthRecordDatasetOp(OpKernelConstruction* ctx)
       : DatasetOpKernel(ctx),
         op_version_(ctx->def().op() == "FixedLengthRecordDataset" ? 1 : 2) {}
@@ -346,10 +354,12 @@
       if (compression_type_.empty()) {
         return absl::make_unique<UncompressedIterator>(
             UncompressedIterator::Params{
-                this, strings::StrCat(prefix, "::FixedLengthRecord")});
+                this,
+                strings::StrCat(prefix, "::", kFixedLengthRecordDatasetName)});
       } else {
         return absl::make_unique<CompressedIterator>(CompressedIterator::Params{
-            this, strings::StrCat(prefix, "::FixedLengthRecord")});
+            this,
+            strings::StrCat(prefix, "::", kFixedLengthRecordDatasetName)});
       }
     }
 
@@ -411,6 +421,9 @@
               string record;
               TF_RETURN_IF_ERROR(
                   input_buffer_->ReadNBytes(dataset()->record_bytes_, &record));
+              metrics::RecordTFDataBytesRead(kFixedLengthRecordDatasetName,
+                                             dataset()->record_bytes_);
+
               // Produce the record as output.
               Tensor record_tensor(ctx->allocator({}), DT_STRING, {});
               record_tensor.scalar<string>()() = record;
@@ -532,6 +545,9 @@
                 string record;
                 TF_RETURN_IF_ERROR(buffered_input_stream_->ReadNBytes(
                     dataset()->record_bytes_, &record));
+                metrics::RecordTFDataBytesRead(kFixedLengthRecordDatasetName,
+                                               dataset()->record_bytes_);
+
                 // Produce the record as output.
                 Tensor record_tensor(ctx->allocator({}), DT_STRING, {});
                 record_tensor.scalar<string>()() = std::move(record);
@@ -544,6 +560,8 @@
               Status s = buffered_input_stream_->ReadNBytes(
                   dataset()->record_bytes_, &record);
               if (s.ok()) {
+                metrics::RecordTFDataBytesRead(kFixedLengthRecordDatasetName,
+                                               dataset()->record_bytes_);
                 lookahead_cache_.append(record);
                 record = lookahead_cache_.substr(0, dataset()->record_bytes_);
                 lookahead_cache_ =
@@ -717,6 +735,8 @@
 REGISTER_KERNEL_BUILDER(Name("FixedLengthRecordDatasetV2").Device(DEVICE_CPU),
                         FixedLengthRecordDatasetOp);
 
+constexpr char kTFRecordDatasetName[] = "TFRecord";
+
 class TFRecordDatasetOp : public DatasetOpKernel {
  public:
   using DatasetOpKernel::DatasetOpKernel;
@@ -766,8 +786,8 @@
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return absl::make_unique<Iterator>(
-          Iterator::Params{this, strings::StrCat(prefix, "::TFRecord")});
+      return absl::make_unique<Iterator>(Iterator::Params{
+          this, strings::StrCat(prefix, "::", kTFRecordDatasetName)});
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -816,6 +836,9 @@
             Status s =
                 reader_->ReadRecord(&out_tensors->back().scalar<string>()());
             if (s.ok()) {
+              metrics::RecordTFDataBytesRead(
+                  kTFRecordDatasetName,
+                  out_tensors->back().scalar<string>()().size());
               *end_of_sequence = false;
               return Status::OK();
             }

diff --git a/tensorflow/core/kernels/decode_csv_op.cc b/tensorflow/core/kernels/decode_csv_op.cc
index 6bfb5bd..ba63695 100644
--- a/tensorflow/core/kernels/decode_csv_op.cc
+++ b/tensorflow/core/kernels/decode_csv_op.cc

@@ -145,7 +145,7 @@
               output[f]->flat<float>()(i) = record_defaults[f].flat<float>()(0);
             } else {
               float value;
-              OP_REQUIRES(ctx, strings::safe_strtof(fields[f].c_str(), &value),
+              OP_REQUIRES(ctx, strings::safe_strtof(fields[f], &value),
                           errors::InvalidArgument(
                               "Field ", f, " in record ", i,
                               " is not a valid float: ", fields[f]));
@@ -165,7 +165,7 @@
                   record_defaults[f].flat<double>()(0);
             } else {
               double value;
-              OP_REQUIRES(ctx, strings::safe_strtod(fields[f].c_str(), &value),
+              OP_REQUIRES(ctx, strings::safe_strtod(fields[f], &value),
                           errors::InvalidArgument(
                               "Field ", f, " in record ", i,
                               " is not a valid double: ", fields[f]));

diff --git a/tensorflow/core/kernels/decode_raw_op.cc b/tensorflow/core/kernels/decode_raw_op.cc
index eaef5a6..3dd019c 100644
--- a/tensorflow/core/kernels/decode_raw_op.cc
+++ b/tensorflow/core/kernels/decode_raw_op.cc

@@ -110,6 +110,8 @@
 REGISTER(int16);
 REGISTER(int8);
 REGISTER(int64);
+REGISTER(complex64);
+REGISTER(complex128);
 
 #undef REGISTER
 

diff --git a/tensorflow/core/kernels/functional_ops.cc b/tensorflow/core/kernels/functional_ops.cc
index a3e7b53..246a6ce 100644
--- a/tensorflow/core/kernels/functional_ops.cc
+++ b/tensorflow/core/kernels/functional_ops.cc

@@ -23,6 +23,7 @@
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 
 namespace tensorflow {
@@ -211,6 +212,98 @@
   };
 };
 
+class CaseOp : public AsyncOpKernel {
+ public:
+  explicit CaseOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) {
+    auto lib = ctx->function_library();
+    OP_REQUIRES(ctx, lib != nullptr, errors::Internal("No function library"));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("branches", &branch_funcs_));
+  }
+
+  ~CaseOp() override {}
+
+  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
+    auto lib = ctx->function_library();
+    OP_REQUIRES_ASYNC(ctx, lib != nullptr,
+                      errors::Internal("No function library"), done);
+
+    // TODO(b/37549631): Because this op has `SetIsStateful()` in its op
+    // registration, this kernel may be shared by multiple subgraphs, which have
+    // different associated `FunctionLibraryRuntime` objects and hence different
+    // `FHandle` namespaces. So we must call Instantiate() to make sure we get
+    // the correct function handles with respect to `lib`. Note the underlying
+    // `lib->Instantiate()` caches the created function handles, so calling
+    // `Instantiate()` repeatedly on the same `lib` and function is cheap.
+    std::vector<FHandle> branch_handles(branch_funcs_.size());
+    for (int i = 0; i < branch_funcs_.size(); i++) {
+      OP_REQUIRES_OK_ASYNC(
+          ctx, Instantiate(lib, branch_funcs_[i], &branch_handles[i]), done);
+    }
+
+    const Tensor& branch_index = ctx->input(0);
+    OP_REQUIRES_ASYNC(ctx, TensorShapeUtils::IsScalar(branch_index.shape()),
+                      errors::InvalidArgument("branch_index must be scalar"),
+                      done);
+    int32 branch = branch_index.scalar<int32>()();
+    (new State(this, ctx, branch, branch_handles, done))->Start();
+  }
+
+ private:
+  std::vector<NameAttrList> branch_funcs_;
+
+  class State {
+   public:
+    State(CaseOp* kernel, OpKernelContext* ctx, int branch,
+          std::vector<FHandle> branch_handles, DoneCallback done)
+        : kernel_(kernel),
+          ctx_(ctx),
+          branch_(branch),
+          branch_handles_(branch_handles),
+          done_(std::move(done)),
+          lib_(CHECK_NOTNULL(ctx_->function_library())) {
+      SetRunOptions(ctx_, &opts_, true /* always_collect_stats */);
+      for (int i = 1; i < ctx_->num_inputs(); ++i) {
+        args_.push_back(ctx_->input(i));
+      }
+    }
+
+    ~State() {}
+
+    void Start() {
+      int branch = branch_;
+      // The last branch is the default branch.
+      if (branch < 0 || branch >= branch_handles_.size()) {
+        branch = branch_handles_.size() - 1;
+      }
+      rets_.clear();
+      lib_->Run(
+          // Evaluate one of the branch.
+          opts_, branch_handles_[branch], args_, &rets_,
+          // Done callback
+          [this](Status s) {
+            if (s.ok()) {
+              s = SetOutputs(kernel_, ctx_, rets_);
+            }
+            ctx_->SetStatus(s);
+            DoneCallback captured_done(std::move(done_));
+            delete this;
+            captured_done();
+          });
+    }
+
+   private:
+    CaseOp* const kernel_;
+    OpKernelContext* const ctx_;
+    const int branch_;
+    std::vector<FHandle> branch_handles_;
+    DoneCallback done_;
+    FunctionLibraryRuntime* const lib_;
+    FunctionLibraryRuntime::Options opts_;
+    TensorVec args_;
+    TensorVec rets_;
+  };
+};
+
 // TODO(drpng): remove this.
 REGISTER_KERNEL_BUILDER(Name("_If").Device(DEVICE_CPU), IfOp);
 REGISTER_KERNEL_BUILDER(Name("_If").Device(DEVICE_GPU).HostMemory("cond"),
@@ -219,6 +312,10 @@
 REGISTER_KERNEL_BUILDER(Name("If").Device(DEVICE_CPU), IfOp);
 REGISTER_KERNEL_BUILDER(Name("If").Device(DEVICE_GPU).HostMemory("cond"), IfOp);
 
+REGISTER_KERNEL_BUILDER(Name("Case").Device(DEVICE_CPU), CaseOp);
+REGISTER_KERNEL_BUILDER(
+    Name("Case").Device(DEVICE_GPU).HostMemory("branch_index"), CaseOp);
+
 REGISTER_KERNEL_BUILDER(Name("StatelessIf").Device(DEVICE_CPU), IfOp);
 REGISTER_KERNEL_BUILDER(
     Name("StatelessIf").Device(DEVICE_GPU).HostMemory("cond"), IfOp);

diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc
index 1286381..9f09052 100644
--- a/tensorflow/core/kernels/list_kernels.cc
+++ b/tensorflow/core/kernels/list_kernels.cc

@@ -79,12 +79,10 @@
   to->max_num_elements = from.max_num_elements;
   to->tensors.reserve(from.tensors.size());
   for (const Tensor& t : from.tensors) {
-    Tensor tmp(t.dtype());
-    // Do not copy uninitialized tensors.
+    to->tensors.emplace_back(t.dtype());
     if (t.dtype() != DT_INVALID) {
-      TF_RETURN_IF_ERROR(copy(t, &tmp));
+      TF_RETURN_IF_ERROR(copy(t, &to->tensors.back()));
     }
-    to->tensors.push_back(tmp);
   }
   return Status::OK();
 }
@@ -178,6 +176,46 @@
   return Status::OK();
 }
 
+Status GetInputList(OpKernelContext* c, int index, const TensorList** list) {
+  if (!TensorShapeUtils::IsScalar(c->input(index).shape())) {
+    return errors::InvalidArgument("Input list must be a scalar saw: ",
+                                   c->input(index).shape().DebugString());
+  }
+  const TensorList* l = c->input(index).scalar<Variant>()().get<TensorList>();
+  if (l == nullptr) {
+    return errors::InvalidArgument(
+        "Input handle is not a list. Saw: '",
+        c->input(index).scalar<Variant>()().DebugString(), "'");
+  }
+  *list = l;
+  return Status::OK();
+}
+
+Status ForwardInputOrCreateNewList(OpKernelContext* c, int32 input_index,
+                                   int32 output_index,
+                                   const TensorList& input_list,
+                                   TensorList** output_list) {
+  // Attempt to forward the input tensor to the output if possible.
+  AllocatorAttributes attr;
+  attr.set_on_host(true);
+  std::unique_ptr<Tensor> maybe_output =
+      c->forward_input(input_index, output_index, DT_VARIANT, TensorShape{},
+                       c->input_memory_type(input_index), attr);
+  Tensor* output_tensor;
+  if (maybe_output != nullptr) {
+    // Woohoo, forwarding succeeded!
+    output_tensor = maybe_output.get();
+  } else {
+    // If forwarding is not possible allocate a new output tensor and copy
+    // the `input_list` to it.
+    TF_RETURN_IF_ERROR(
+        c->allocate_output(output_index, {}, &output_tensor, attr));
+    output_tensor->scalar<Variant>()() = input_list;
+  }
+  *output_list = output_tensor->scalar<Variant>()().get<TensorList>();
+  return Status::OK();
+}
+
 class EmptyTensorList : public OpKernel {
  public:
   explicit EmptyTensorList(OpKernelConstruction* ctx) : OpKernel(ctx) {
@@ -239,11 +277,8 @@
                                         " but tried to append ",
                                         DataTypeString(input.dtype())));
 
-    const TensorList* l = c->input(0).scalar<Variant>()().get<TensorList>();
-    OP_REQUIRES(c, l != nullptr,
-                errors::InvalidArgument(
-                    "Input handle is not a list. Saw: '",
-                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    const TensorList* l = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &l));
     OP_REQUIRES(c, l->element_shape.IsCompatibleWith(input.shape()),
                 errors::InvalidArgument(
                     "Tried to append a tensor with incompatible shape to a "
@@ -264,21 +299,9 @@
                                   " max_num_elements: ", l->max_num_elements));
     }
 
-    AllocatorAttributes attr;
-    attr.set_on_host(true);
-    std::unique_ptr<Tensor> maybe_result = c->forward_input(
-        0, 0, DT_VARIANT, TensorShape{}, c->input_memory_type(0), attr);
-    if (maybe_result != nullptr) {
-      maybe_result->scalar<Variant>()().get<TensorList>()->tensors.push_back(
-          input);
-    } else {
-      Tensor* result;
-      OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result, attr));
-      TensorList output;
-      output = *l;
-      output.tensors.push_back(input);
-      result->scalar<Variant>()() = std::move(output);
-    }
+    TensorList* output_list = nullptr;
+    OP_REQUIRES_OK(c, ForwardInputOrCreateNewList(c, 0, 0, *l, &output_list));
+    output_list->tensors.push_back(input);
   }
 
  private:
@@ -301,12 +324,8 @@
   ~TensorListLength() override {}
 
   void Compute(OpKernelContext* c) override {
-    const TensorList* l = c->input(0).scalar<Variant>()().get<TensorList>();
-    OP_REQUIRES(
-        c, l != nullptr,
-        errors::InvalidArgument(
-            "TensorListLength received a variant which is not a list. Saw: '",
-            c->input(0).scalar<Variant>()().DebugString(), "'"));
+    const TensorList* l = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &l));
     Tensor* result;
     OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result));
     result->scalar<int32>()() = l->tensors.size();
@@ -329,15 +348,8 @@
   explicit TensorListElementShape(OpKernelConstruction* c) : OpKernel(c) {}
 
   void Compute(OpKernelContext* c) override {
-    OP_REQUIRES(
-        c, c->input(0).shape().num_elements() == 1,
-        errors::InvalidArgument("List tensors are supposed to be scalars."));
-    const TensorList* l = c->input(0).scalar<Variant>()().get<TensorList>();
-    OP_REQUIRES(c, l != nullptr,
-                errors::InvalidArgument(
-                    "TensorListElementShape received a variant which is not a "
-                    "list. Saw: '",
-                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    const TensorList* l = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &l));
     Tensor* result;
     if (l->element_shape.unknown_rank()) {
       OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape({}), &result));
@@ -405,12 +417,8 @@
   explicit TensorListResize(OpKernelConstruction* c) : OpKernel(c) {}
 
   void Compute(OpKernelContext* c) override {
-    const TensorList* input_list =
-        c->input(0).scalar<Variant>()().get<TensorList>();
-    OP_REQUIRES(c, input_list != nullptr,
-                errors::InvalidArgument(
-                    "Input handle is not a list. Saw: '",
-                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    const TensorList* input_list = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &input_list));
     int32 size = c->input(1).scalar<int32>()();
     OP_REQUIRES(
         c, size >= 0,
@@ -466,11 +474,8 @@
   }
 
   void Compute(OpKernelContext* c) override {
-    const TensorList* l = c->input(0).scalar<Variant>()().get<TensorList>();
-    OP_REQUIRES(c, l != nullptr,
-                errors::InvalidArgument(
-                    "Input handle is not a list. Saw: '",
-                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    const TensorList* l = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &l));
     OP_REQUIRES(c, element_dtype_ == l->element_dtype,
                 errors::InvalidArgument("Invalid data types; op elements ",
                                         DataTypeString(element_dtype_),
@@ -488,21 +493,9 @@
                     "list index. Item element shape: ",
                     value.shape().DebugString(),
                     " list shape: ", l->element_shape.DebugString()));
-    AllocatorAttributes attr;
-    attr.set_on_host(true);
-    std::unique_ptr<Tensor> maybe_result = c->forward_input(
-        0, 0, DT_VARIANT, TensorShape{}, c->input_memory_type(0), attr);
-    if (maybe_result != nullptr) {
-      maybe_result->scalar<Variant>()().get<TensorList>()->tensors[index] =
-          value;
-    } else {
-      TensorList output;
-      output = *l;
-      output.tensors[index] = value;
-      Tensor* result;
-      OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result, attr));
-      result->scalar<Variant>()() = std::move(output);
-    }
+    TensorList* output_list = nullptr;
+    OP_REQUIRES_OK(c, ForwardInputOrCreateNewList(c, 0, 0, *l, &output_list));
+    output_list->tensors[index] = value;
   }
 
  private:
@@ -624,50 +617,54 @@
 
 #endif  // GOOGLE_CUDA
 
-#define REGISTER_TENSOR_LIST_OPS_CPU(T)                           \
-  REGISTER_KERNEL_BUILDER(Name("TensorListStack")                 \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
-                          TensorListStack<CPUDevice, T>)          \
-  REGISTER_KERNEL_BUILDER(Name("TensorListGather")                \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
-                          TensorListGather<CPUDevice, T>)         \
-  REGISTER_KERNEL_BUILDER(Name("TensorListConcat")                \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
-                          TensorListConcat<CPUDevice, T>)         \
-  REGISTER_KERNEL_BUILDER(Name("TensorListConcatV2")              \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
-                          TensorListConcat<CPUDevice, T>)         \
-  REGISTER_KERNEL_BUILDER(Name("TensorListGetItem")               \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
-                          TensorListGetItem<CPUDevice, T>)        \
-  REGISTER_KERNEL_BUILDER(Name("TensorListPopBack")               \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
-                          TensorListPopBack<CPUDevice, T>)        \
-  REGISTER_KERNEL_BUILDER(Name("TensorListFromTensor")            \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
-                          TensorListFromTensor<CPUDevice, T>)     \
-  REGISTER_KERNEL_BUILDER(Name("TensorListScatter")               \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
-                          TensorListScatter<CPUDevice, T>)        \
-  REGISTER_KERNEL_BUILDER(Name("TensorListScatterV2")             \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
-                          TensorListScatter<CPUDevice, T>)        \
-  REGISTER_KERNEL_BUILDER(Name("TensorListSplit")                 \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
-                          TensorListSplit<CPUDevice, T>)          \
-  REGISTER_KERNEL_BUILDER(Name("TensorListPushBackBatch")         \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_CPU),                \
+#define REGISTER_TENSOR_LIST_OPS_CPU(T)                                    \
+  REGISTER_KERNEL_BUILDER(Name("TensorListStack")                          \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListStack<CPUDevice, T>)                   \
+  REGISTER_KERNEL_BUILDER(Name("TensorListGather")                         \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListGather<CPUDevice, T>)                  \
+  REGISTER_KERNEL_BUILDER(Name("TensorListConcat")                         \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListConcat<CPUDevice, T>)                  \
+  REGISTER_KERNEL_BUILDER(Name("TensorListConcatV2")                       \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListConcat<CPUDevice, T>)                  \
+  REGISTER_KERNEL_BUILDER(Name("TensorListGetItem")                        \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListGetItem<CPUDevice, T>)                 \
+  REGISTER_KERNEL_BUILDER(Name("TensorListPopBack")                        \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListPopBack<CPUDevice, T>)                 \
+  REGISTER_KERNEL_BUILDER(Name("TensorListFromTensor")                     \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListFromTensor<CPUDevice, T>)              \
+  REGISTER_KERNEL_BUILDER(Name("TensorListScatter")                        \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListScatter<CPUDevice, T>)                 \
+  REGISTER_KERNEL_BUILDER(Name("TensorListScatterV2")                      \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListScatter<CPUDevice, T>)                 \
+  REGISTER_KERNEL_BUILDER(Name("TensorListScatterIntoExistingList")        \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListScatterIntoExistingList<CPUDevice, T>) \
+  REGISTER_KERNEL_BUILDER(Name("TensorListSplit")                          \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
+                          TensorListSplit<CPUDevice, T>)                   \
+  REGISTER_KERNEL_BUILDER(Name("TensorListPushBackBatch")                  \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_CPU),                         \
                           TensorListPushBackBatch<CPUDevice, T>)
 
 TF_CALL_POD_STRING_TYPES(REGISTER_TENSOR_LIST_OPS_CPU);

diff --git a/tensorflow/core/kernels/list_kernels.cu.cc b/tensorflow/core/kernels/list_kernels.cu.cc
index 652ca2b..9922a92 100644
--- a/tensorflow/core/kernels/list_kernels.cu.cc
+++ b/tensorflow/core/kernels/list_kernels.cu.cc

@@ -36,68 +36,73 @@
 
 typedef Eigen::GpuDevice GPUDevice;
 
-#define REGISTER_TENSOR_LIST_OPS_GPU(T)                           \
-  REGISTER_KERNEL_BUILDER(Name("TensorListStack")                 \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU)                 \
-                              .HostMemory("element_shape"),       \
-                          TensorListStack<GPUDevice, T>)          \
-  REGISTER_KERNEL_BUILDER(Name("TensorListGather")                \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU)                 \
-                              .HostMemory("indices")              \
-                              .HostMemory("element_shape"),       \
-                          TensorListGather<GPUDevice, T>)         \
-  REGISTER_KERNEL_BUILDER(Name("TensorListGetItem")               \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU)                 \
-                              .HostMemory("index")                \
-                              .HostMemory("element_shape"),       \
-                          TensorListGetItem<GPUDevice, T>)        \
-  REGISTER_KERNEL_BUILDER(Name("TensorListPopBack")               \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU)                 \
-                              .HostMemory("element_shape"),       \
-                          TensorListPopBack<GPUDevice, T>)        \
-  REGISTER_KERNEL_BUILDER(Name("TensorListConcat")                \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU)                 \
-                              .HostMemory("lengths"),             \
-                          TensorListConcat<GPUDevice, T>)         \
-  REGISTER_KERNEL_BUILDER(Name("TensorListConcatV2")              \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU)                 \
-                              .HostMemory("leading_dims")         \
-                              .HostMemory("element_shape")        \
-                              .HostMemory("lengths"),             \
-                          TensorListConcat<GPUDevice, T>)         \
-  REGISTER_KERNEL_BUILDER(Name("TensorListPushBackBatch")         \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU),                \
-                          TensorListPushBackBatch<GPUDevice, T>)  \
-  REGISTER_KERNEL_BUILDER(Name("TensorListFromTensor")            \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU)                 \
-                              .HostMemory("element_shape"),       \
-                          TensorListFromTensor<GPUDevice, T>)     \
-  REGISTER_KERNEL_BUILDER(Name("TensorListScatter")               \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU)                 \
-                              .HostMemory("element_shape")        \
-                              .HostMemory("indices"),             \
-                          TensorListScatter<GPUDevice, T>)        \
-  REGISTER_KERNEL_BUILDER(Name("TensorListScatterV2")             \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU)                 \
-                              .HostMemory("element_shape")        \
-                              .HostMemory("num_elements")         \
-                              .HostMemory("indices"),             \
-                          TensorListScatter<GPUDevice, T>)        \
-  REGISTER_KERNEL_BUILDER(Name("TensorListSplit")                 \
-                              .TypeConstraint<T>("element_dtype") \
-                              .Device(DEVICE_GPU)                 \
-                              .HostMemory("element_shape")        \
-                              .HostMemory("lengths"),             \
+#define REGISTER_TENSOR_LIST_OPS_GPU(T)                                    \
+  REGISTER_KERNEL_BUILDER(Name("TensorListStack")                          \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("element_shape"),                \
+                          TensorListStack<GPUDevice, T>)                   \
+  REGISTER_KERNEL_BUILDER(Name("TensorListGather")                         \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("indices")                       \
+                              .HostMemory("element_shape"),                \
+                          TensorListGather<GPUDevice, T>)                  \
+  REGISTER_KERNEL_BUILDER(Name("TensorListGetItem")                        \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("index")                         \
+                              .HostMemory("element_shape"),                \
+                          TensorListGetItem<GPUDevice, T>)                 \
+  REGISTER_KERNEL_BUILDER(Name("TensorListPopBack")                        \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("element_shape"),                \
+                          TensorListPopBack<GPUDevice, T>)                 \
+  REGISTER_KERNEL_BUILDER(Name("TensorListConcat")                         \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("lengths"),                      \
+                          TensorListConcat<GPUDevice, T>)                  \
+  REGISTER_KERNEL_BUILDER(Name("TensorListConcatV2")                       \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("leading_dims")                  \
+                              .HostMemory("element_shape")                 \
+                              .HostMemory("lengths"),                      \
+                          TensorListConcat<GPUDevice, T>)                  \
+  REGISTER_KERNEL_BUILDER(Name("TensorListPushBackBatch")                  \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU),                         \
+                          TensorListPushBackBatch<GPUDevice, T>)           \
+  REGISTER_KERNEL_BUILDER(Name("TensorListFromTensor")                     \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("element_shape"),                \
+                          TensorListFromTensor<GPUDevice, T>)              \
+  REGISTER_KERNEL_BUILDER(Name("TensorListScatter")                        \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("element_shape")                 \
+                              .HostMemory("indices"),                      \
+                          TensorListScatter<GPUDevice, T>)                 \
+  REGISTER_KERNEL_BUILDER(Name("TensorListScatterV2")                      \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("element_shape")                 \
+                              .HostMemory("num_elements")                  \
+                              .HostMemory("indices"),                      \
+                          TensorListScatter<GPUDevice, T>)                 \
+  REGISTER_KERNEL_BUILDER(Name("TensorListScatterIntoExistingList")        \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("indices"),                      \
+                          TensorListScatterIntoExistingList<GPUDevice, T>) \
+  REGISTER_KERNEL_BUILDER(Name("TensorListSplit")                          \
+                              .TypeConstraint<T>("element_dtype")          \
+                              .Device(DEVICE_GPU)                          \
+                              .HostMemory("element_shape")                 \
+                              .HostMemory("lengths"),                      \
                           TensorListSplit<GPUDevice, T>)
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_TENSOR_LIST_OPS_GPU);

diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h
index c25e9ce..682ea15 100644
--- a/tensorflow/core/kernels/list_kernels.h
+++ b/tensorflow/core/kernels/list_kernels.h

@@ -70,6 +70,13 @@
                                 const TensorList& tensor_list, int index,
                                 PartialTensorShape* element_shape);
 
+Status GetInputList(OpKernelContext* c, int index, const TensorList** list);
+
+Status ForwardInputOrCreateNewList(OpKernelContext* c, int32 input_index,
+                                   int32 output_index,
+                                   const TensorList& input_list,
+                                   TensorList** output_list);
+
 template <typename Device, typename T>
 class TensorListStack : public OpKernel {
  public:
@@ -81,12 +88,8 @@
   }
 
   void Compute(OpKernelContext* c) override {
-    const TensorList* tensor_list =
-        c->input(0).scalar<Variant>()().get<TensorList>();
-    OP_REQUIRES(c, tensor_list != nullptr,
-                errors::InvalidArgument(
-                    "Input handle is not a list. Saw: '",
-                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    const TensorList* tensor_list = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &tensor_list));
     OP_REQUIRES(
         c, element_dtype_ == tensor_list->element_dtype,
         errors::InvalidArgument(
@@ -184,14 +187,8 @@
   }
 
   void Compute(OpKernelContext* c) override {
-    OP_REQUIRES(
-        c, c->input(0).shape().num_elements() == 1,
-        errors::InvalidArgument("List tensors are supposed to be scalars."));
-    const TensorList* l = c->input(0).scalar<Variant>()().get<TensorList>();
-    OP_REQUIRES(c, l != nullptr,
-                errors::InvalidArgument(
-                    "Input handle is not a list. Saw: '",
-                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    const TensorList* l = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &l));
     OP_REQUIRES(c, element_dtype_ == l->element_dtype,
                 errors::InvalidArgument("Invalid data types; op elements ",
                                         DataTypeString(element_dtype_),
@@ -255,11 +252,8 @@
   }
 
   void Compute(OpKernelContext* c) override {
-    const TensorList* l = c->input(0).scalar<Variant>()().get<TensorList>();
-    OP_REQUIRES(c, l != nullptr,
-                errors::InvalidArgument(
-                    "Input handle is not a list. Saw: '",
-                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    const TensorList* l = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &l));
     OP_REQUIRES(c, element_dtype_ == l->element_dtype,
                 errors::InvalidArgument("Invalid data types; op elements ",
                                         DataTypeString(element_dtype_),
@@ -291,20 +285,10 @@
       functor::SetZeroFunctor<Device, T>()(c->eigen_device<Device>(),
                                            result->flat<T>());
     }
-    AllocatorAttributes attr;
-    attr.set_on_host(true);
-    std::unique_ptr<Tensor> maybe_result = c->forward_input(
-        0, 0, DT_VARIANT, TensorShape{}, c->input_memory_type(0), attr);
-    if (maybe_result != nullptr) {
-      maybe_result->scalar<Variant>()().get<TensorList>()->tensors.pop_back();
-    } else {
-      TensorList output;
-      output = *l;
-      output.tensors.pop_back();
-      Tensor* result;
-      OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result, attr));
-      result->scalar<Variant>()() = std::move(output);
-    }
+
+    TensorList* output_list = nullptr;
+    OP_REQUIRES_OK(c, ForwardInputOrCreateNewList(c, 0, 0, *l, &output_list));
+    output_list->tensors.pop_back();
   }
 
  private:
@@ -334,12 +318,8 @@
   void Compute(OpKernelContext* c) override {
     // Check that the input Variant tensor is indeed a TensorList and has the
     // correct element type.
-    const TensorList* tensor_list =
-        c->input(0).scalar<Variant>()().get<TensorList>();
-    OP_REQUIRES(c, tensor_list != nullptr,
-                errors::InvalidArgument(
-                    "Input handle is not a list. Saw: '",
-                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    const TensorList* tensor_list = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &tensor_list));
     OP_REQUIRES(
         c, element_dtype_ == tensor_list->element_dtype,
         errors::InvalidArgument(
@@ -597,12 +577,8 @@
   }
 
   void Compute(OpKernelContext* c) override {
-    const TensorList* tensor_list =
-        c->input(0).scalar<Variant>()().get<TensorList>();
-    OP_REQUIRES(c, tensor_list != nullptr,
-                errors::InvalidArgument(
-                    "Input handle is not a list. Saw: '",
-                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    const TensorList* tensor_list = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &tensor_list));
     OP_REQUIRES(
         c, element_dtype_ == tensor_list->element_dtype,
         errors::InvalidArgument(
@@ -736,6 +712,81 @@
   }
 };
 
+// Scatters values in `value` into `list`. Assumes that `indices` are valid.
+template <typename Device, typename T>
+Status Scatter(OpKernelContext* c, const Tensor& value, const Tensor& indices,
+               TensorList* list) {
+  for (int index = 0; index < indices.NumElements(); ++index) {
+    const int i = indices.flat<int32>()(index);
+    Tensor tmp = value.Slice(index, index + 1);
+    TensorShape tmp_shape = tmp.shape();
+    tmp_shape.RemoveDim(0);
+    if (!tmp.CopyFrom(tmp, tmp_shape)) {
+      return errors::Unknown("Unexpected shape error.");
+    }
+    // TODO(apassos) maybe not always align; but weird compiler bugs seem to
+    // prevent this.
+    Tensor aligned;
+    TF_RETURN_IF_ERROR(c->allocate_temp(tmp.dtype(), tmp.shape(), &aligned));
+    // TODO(apassos) do all slices in a single kernel invocation instead of
+    // many small ones.
+    aligned.flat<T>().device(c->eigen_device<Device>()) =
+        tmp.unaligned_flat<T>();
+    std::swap(list->tensors[i], aligned);
+  }
+  return Status::OK();
+}
+
+template <typename Device, typename T>
+class TensorListScatterIntoExistingList : public OpKernel {
+ public:
+  TensorListScatterIntoExistingList(OpKernelConstruction* c) : OpKernel(c) {}
+
+  void Compute(OpKernelContext* c) override {
+    const TensorList* l = nullptr;
+    OP_REQUIRES_OK(c, GetInputList(c, 0, &l));
+    const Tensor& input_tensor = c->input(1);
+    const Tensor& indices = c->input(2);
+
+    // Check that inputs are valid.
+    OP_REQUIRES(c, input_tensor.dtype() == l->element_dtype,
+                errors::InvalidArgument(
+                    "Invalid data types; input tensor type: ",
+                    DataTypeString(input_tensor.dtype()),
+                    " list element_type: ", DataTypeString(l->element_dtype)));
+    OP_REQUIRES(c, TensorShapeUtils::IsVectorOrHigher(input_tensor.shape()),
+                errors::InvalidArgument(
+                    "Tensor must be at least a vector, but saw shape: ",
+                    input_tensor.shape().DebugString()));
+    OP_REQUIRES(c, TensorShapeUtils::IsVector(indices.shape()),
+                errors::InvalidArgument(
+                    "Expected indices to be a vector, but received shape: ",
+                    indices.shape().DebugString()));
+    OP_REQUIRES(
+        c, indices.NumElements() == input_tensor.shape().dim_size(0),
+        errors::InvalidArgument(
+            "Expected len(indices) == tensor.shape[0], but saw: ",
+            indices.NumElements(), " vs. ", input_tensor.shape().dim_size(0)));
+
+    // Resize the list if needed to accommodate all indices.
+    TensorList* output_list = nullptr;
+    OP_REQUIRES_OK(c, ForwardInputOrCreateNewList(c, 0, 0, *l, &output_list));
+    const auto indices_vec = indices.vec<int32>();
+    int32 max_index =
+        (indices.NumElements() == 0)
+            ? -1
+            : *std::max_element(indices_vec.data(),
+                                indices_vec.data() + indices.NumElements());
+    if (max_index + 1 > output_list->tensors.size()) {
+      output_list->tensors.resize(max_index + 1);
+    }
+
+    // Scatter the values.
+    OP_REQUIRES_OK(c,
+                   Scatter<Device, T>(c, input_tensor, indices, output_list));
+  }
+};
+
 template <typename Device, typename T>
 class TensorListScatter : public OpKernel {
  public:
@@ -798,23 +849,8 @@
                                  Tensor(DT_INVALID));
     }
 
-    for (int index = 0; index < indices.NumElements(); ++index) {
-      const int i = indices.flat<int32>()(index);
-      Tensor tmp = input_tensor.Slice(index, index + 1);
-      TensorShape tmp_shape = tmp.shape();
-      tmp_shape.RemoveDim(0);
-      OP_REQUIRES(c, tmp.CopyFrom(tmp, tmp_shape),
-                  errors::Unknown("Unexpected shape error."));
-      // TODO(apassos) maybe not always align; but weird compiler bugs seem to
-      // prevent this.
-      Tensor aligned;
-      OP_REQUIRES_OK(c, c->allocate_temp(tmp.dtype(), tmp.shape(), &aligned));
-      // TODO(apassos) do all slices in a single kernel invocation instead of
-      // many small ondes.
-      aligned.flat<T>().device(c->eigen_device<Device>()) =
-          tmp.unaligned_flat<T>();
-      std::swap(output_list.tensors[i], aligned);
-    }
+    OP_REQUIRES_OK(c,
+                   Scatter<Device, T>(c, input_tensor, indices, &output_list));
     output_tensor->scalar<Variant>()() = std::move(output_list);
   }
 };

diff --git a/tensorflow/core/kernels/lookup_tables/BUILD b/tensorflow/core/kernels/lookup_tables/BUILD
index f4a41a5..a25660e 100644
--- a/tensorflow/core/kernels/lookup_tables/BUILD
+++ b/tensorflow/core/kernels/lookup_tables/BUILD

@@ -13,48 +13,77 @@
 load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
 
 cc_library(
-    name = "lookup_table_interface",
-    hdrs = ["lookup_table_interface.h"],
+    name = "resource_interface_templates",
+    hdrs = ["resource_interface_templates.h"],
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/types:optional",
+    ],
+)
+
+cc_library(
+    name = "op_kernel_templates",
+    hdrs = ["op_kernel_templates.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/kernels:tensor_flag_utils",
+        "//third_party/eigen3",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/meta:type_traits",
         "@com_google_absl//absl/types:span",
     ],
 )
 
-cc_library(
-    name = "table_resource_utils",
-    hdrs = ["table_resource_utils.h"],
-    deps = [
-        ":lookup_table_interface",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
+tf_kernel_library(
+    name = "fingerprint64_map_op_kernels",
+    srcs = [
+        "fingerprint64_map_op_kernels.cc",
     ],
-)
-
-cc_library(
-    name = "table_op_utils",
-    hdrs = ["table_op_utils.h"],
     deps = [
+        ":op_kernel_templates",
+        ":resource_interface_templates",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/strings",
     ],
 )
 
 tf_kernel_library(
-    name = "fingerprint64_map_ops",
+    name = "flat_hash_map_op_kernels",
     srcs = [
-        "fingerprint64_map_ops.cc",
+        "flat_hash_map_op_kernels.cc",
     ],
     deps = [
-        ":table_op_utils",
-        ":table_resource_utils",
+        ":op_kernel_templates",
+        ":resource_interface_templates",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/kernels:tensor_flag_utils",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+tf_kernel_library(
+    name = "generic_table_op_kernels",
+    srcs = [
+        "generic_table_op_kernels.cc",
+    ],
+    deps = [
+        ":op_kernel_templates",
+        ":resource_interface_templates",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/kernels:string_view_variant_wrapper",
         "@com_google_absl//absl/strings",
     ],
 )

diff --git a/tensorflow/core/kernels/lookup_tables/fingerprint64_map_op_kernels.cc b/tensorflow/core/kernels/lookup_tables/fingerprint64_map_op_kernels.cc
new file mode 100644
index 0000000..36274bc
--- /dev/null
+++ b/tensorflow/core/kernels/lookup_tables/fingerprint64_map_op_kernels.cc

@@ -0,0 +1,124 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/kernels/lookup_tables/op_kernel_templates.h"
+#include "tensorflow/core/kernels/lookup_tables/resource_interface_templates.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/fingerprint.h"
+#include "tensorflow/core/platform/macros.h"
+
+namespace tensorflow {
+namespace tables {
+
+// Map x -> (Fingerprint64(x) % num_oov_buckets) + offset.
+// num_oov_buckets and offset are node attributes provided at construction
+// time.
+template <typename KeyType, typename ValueType>
+class Fingerprint64Map final
+    : public virtual LookupInterface<ValueType*, const KeyType&>,
+      public virtual LookupWithPrefetchInterface<absl::Span<ValueType>,
+                                                 absl::Span<const KeyType>> {
+ public:
+  using key_type = KeyType;
+
+  Fingerprint64Map(int64 num_oov_buckets, int64 offset)
+      : num_oov_buckets_(num_oov_buckets), offset_(offset) {}
+
+  Status Lookup(const KeyType& key_to_find, ValueType* value) const override {
+    *value = LookupHelper(key_to_find);
+    return Status::OK();
+  }
+
+  Status Lookup(absl::Span<const KeyType> keys, absl::Span<ValueType> values,
+                int64 prefetch_lookahead) const override {
+    if (ABSL_PREDICT_FALSE(keys.size() != values.size())) {
+      return errors::InvalidArgument(
+          "keys and values do not have the same number of elements (found ",
+          keys.size(), " vs ", values.size(), ").");
+    }
+    for (size_t i = 0; i < keys.size(); ++i) {
+      values[i] = LookupHelper(keys[i]);
+    }
+    return Status::OK();
+  }
+
+  mutex* GetMutex() const override { return nullptr; }
+
+  string DebugString() const override { return __PRETTY_FUNCTION__; }
+
+ private:
+  ABSL_ATTRIBUTE_ALWAYS_INLINE ValueType
+  LookupHelper(const KeyType& key_to_find) const {
+    // This can cause a downcast.
+    return static_cast<ValueType>(Fingerprint64(key_to_find) %
+                                  num_oov_buckets_) +
+           offset_;
+  }
+
+  const int64 num_oov_buckets_;
+  const int64 offset_;
+  TF_DISALLOW_COPY_AND_ASSIGN(Fingerprint64Map);
+};
+
+template <typename Fingerprint64Map>
+struct Fingerprint64MapFactory {
+  struct Functor {
+    using resource_type = Fingerprint64Map;
+
+    static Status AllocateContainer(OpKernelContext* ctx, OpKernel* kernel,
+                                    Fingerprint64Map** container) {
+      int64 num_oov_buckets;
+      int64 offset;
+      TF_RETURN_IF_ERROR(
+          GetNodeAttr(kernel->def(), "num_oov_buckets", &num_oov_buckets));
+      TF_RETURN_IF_ERROR(GetNodeAttr(kernel->def(), "offset", &offset));
+      *container = new Fingerprint64Map(num_oov_buckets, offset);
+      return Status::OK();
+    }
+  };
+};
+
+template <typename KeyType, typename ValueType>
+using ResourceOp = ResourceConstructionOp<
+    typename Fingerprint64MapFactory<
+        Fingerprint64Map<KeyType, ValueType>>::Functor,
+    // These are the aliases.
+    LookupInterface<ValueType*, const KeyType&>,
+    LookupWithPrefetchInterface<absl::Span<ValueType>,
+                                absl::Span<const KeyType>>>;
+
+#define REGISTER_STRING_KERNEL(ValueType)                     \
+  REGISTER_KERNEL_BUILDER(                                    \
+      Name("Fingerprint64Map")                                \
+          .Device(DEVICE_CPU)                                 \
+          .TypeConstraint<Variant>("heterogeneous_key_dtype") \
+          .TypeConstraint<ValueType>("table_value_dtype"),    \
+      ResourceOp<absl::string_view, ValueType>);              \
+  REGISTER_KERNEL_BUILDER(                                    \
+      Name("Fingerprint64Map")                                \
+          .Device(DEVICE_CPU)                                 \
+          .TypeConstraint<string>("heterogeneous_key_dtype")  \
+          .TypeConstraint<ValueType>("table_value_dtype"),    \
+      ResourceOp<string, ValueType>);
+
+REGISTER_STRING_KERNEL(int32);
+REGISTER_STRING_KERNEL(int64);
+
+#undef REGISTER_STRING_KERNEL
+
+}  // namespace tables
+}  // namespace tensorflow

diff --git a/tensorflow/core/kernels/lookup_tables/fingerprint64_map_ops.cc b/tensorflow/core/kernels/lookup_tables/fingerprint64_map_ops.cc
deleted file mode 100644
index a000828..0000000
--- a/tensorflow/core/kernels/lookup_tables/fingerprint64_map_ops.cc
+++ /dev/null

@@ -1,144 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "absl/strings/string_view.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/kernels/lookup_tables/table_op_utils.h"
-#include "tensorflow/core/kernels/lookup_tables/table_resource_utils.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/platform/fingerprint.h"
-#include "tensorflow/core/platform/macros.h"
-
-namespace tensorflow {
-namespace tables {
-
-// Map x -> (Fingerprint64(x) % num_oov_buckets) + offset.
-// num_oov_buckets and offset are node attributes provided at construction
-// time.
-template <class HeterogeneousKeyType, class ValueType>
-class Fingerprint64Map final
-    : public LookupTableInterface<HeterogeneousKeyType, ValueType> {
- public:
-  Fingerprint64Map(int64 num_oov_buckets, int64 offset)
-      : num_oov_buckets_(num_oov_buckets), offset_(offset) {}
-
-  mutex* GetMutex() const override { return nullptr; }
-
-  bool UnsafeInsertOrAssign(const HeterogeneousKeyType& key,
-                            const ValueType& value) override {
-    return true;
-  }
-
-  Status TableUnbatchedInsertStatus() const override {
-    return errors::Unimplemented("Fingerprint64Map does not support inserts.");
-  }
-
-  Status BatchInsertOrAssign(absl::Span<const HeterogeneousKeyType> keys,
-                             absl::Span<const ValueType> values) override {
-    return errors::Unimplemented("Fingerprint64Map does not support inserts.");
-  }
-
-  ValueType UnsafeLookupKey(
-      const HeterogeneousKeyType& key_to_find) const override {
-    // This can cause a downcast.
-    return static_cast<ValueType>(Fingerprint64(key_to_find) %
-                                  num_oov_buckets_) +
-           offset_;
-  }
-
-  Status TableUnbatchedLookupStatus() const override { return Status::OK(); }
-
-  Status BatchLookup(absl::Span<const HeterogeneousKeyType> keys,
-                     absl::Span<ValueType> values,
-                     int64 prefetch_lookahead) const override {
-    if (ABSL_PREDICT_FALSE(keys.size() != values.size())) {
-      return errors::InvalidArgument(
-          "keys and values do not have the same number of elements (found ",
-          keys.size(), " vs ", values.size(), ").");
-    }
-    for (size_t i = 0; i < keys.size(); ++i) {
-      values[i] = Fingerprint64Map::UnsafeLookupKey(keys[i]);
-    }
-    return Status::OK();
-  }
-
-  const absl::optional<const ValueType> DefaultValue() const override {
-    return {};
-  }
-
-  void UnsafePrefetchKey(
-      const HeterogeneousKeyType& key_to_find) const override {}
-
-  size_t UnsafeSize() const override { return 0; }
-
-  Status SizeStatus() const override {
-    return errors::Unimplemented(
-        "Fingerprint64Map does not have a concept of size.");
-  }
-
-  bool UnsafeContainsKey(
-      const HeterogeneousKeyType& key_to_find) const override {
-    return true;
-  }
-
- private:
-  const int64 num_oov_buckets_;
-  const int64 offset_;
-  TF_DISALLOW_COPY_AND_ASSIGN(Fingerprint64Map);
-};
-
-template <typename Fingerprint64Map>
-struct Fingerprint64MapFactory {
-  struct Functor {
-    template <typename ContainerBase>
-    static Status AllocateContainer(OpKernelContext* ctx, OpKernel* kernel,
-                                    ContainerBase** container) {
-      int64 num_oov_buckets;
-      int64 offset;
-      TF_RETURN_IF_ERROR(
-          GetNodeAttr(kernel->def(), "num_oov_buckets", &num_oov_buckets));
-      TF_RETURN_IF_ERROR(GetNodeAttr(kernel->def(), "offset", &offset));
-      *container = new Fingerprint64Map(num_oov_buckets, offset);
-      return Status::OK();
-    }
-  };
-};
-
-#define REGISTER_STRING_KERNEL(table_value_dtype)                             \
-  REGISTER_KERNEL_BUILDER(                                                    \
-      Name("Fingerprint64Map")                                                \
-          .Device(DEVICE_CPU)                                                 \
-          .TypeConstraint<Variant>("heterogeneous_key_dtype")                 \
-          .TypeConstraint<table_value_dtype>("table_value_dtype"),            \
-      ResourceConstructionOp<                                                 \
-          LookupTableInterface<absl::string_view, table_value_dtype>,         \
-          Fingerprint64MapFactory<Fingerprint64Map<                           \
-              absl::string_view, table_value_dtype>>::Functor>);              \
-  REGISTER_KERNEL_BUILDER(                                                    \
-      Name("Fingerprint64Map")                                                \
-          .Device(DEVICE_CPU)                                                 \
-          .TypeConstraint<string>("heterogeneous_key_dtype")                  \
-          .TypeConstraint<table_value_dtype>("table_value_dtype"),            \
-      ResourceConstructionOp<LookupTableInterface<string, table_value_dtype>, \
-                             Fingerprint64MapFactory<Fingerprint64Map<        \
-                                 string, table_value_dtype>>::Functor>);
-
-REGISTER_STRING_KERNEL(int32);
-REGISTER_STRING_KERNEL(int64);
-
-#undef REGISTER_STRING_KERNEL
-
-}  // namespace tables
-}  // namespace tensorflow

diff --git a/tensorflow/core/kernels/lookup_tables/flat_hash_map_op_kernels.cc b/tensorflow/core/kernels/lookup_tables/flat_hash_map_op_kernels.cc
new file mode 100644
index 0000000..9c37ca8
--- /dev/null
+++ b/tensorflow/core/kernels/lookup_tables/flat_hash_map_op_kernels.cc

@@ -0,0 +1,275 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+#include <type_traits>
+#include "absl/base/attributes.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/lookup_tables/op_kernel_templates.h"
+#include "tensorflow/core/kernels/lookup_tables/resource_interface_templates.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/fingerprint.h"
+
+namespace tensorflow {
+namespace tables {
+
+using errors::InvalidArgument;
+
+// absl::flat_hash_map<HeterogeneousKeyType, ValueType> backed table with inline
+// fallback to x -> (Fingerprint64(x) % num_oov_buckets) + offset when looked
+// up keys are not in the flat_hash_map. Inlining the fallback table turns out
+// to be quite efficient in comparison to virtual dispatch for the fallback
+// lookup.
+template <typename ValueType>
+class StaticStringFlatHashMap final
+    : public virtual LookupInterface<ValueType*, const absl::string_view&>,
+      public virtual LookupInterface<ValueType*, const string&>,
+      public virtual LookupWithPrefetchInterface<
+          absl::Span<ValueType>, absl::Span<const absl::string_view>>,
+      public virtual LookupWithPrefetchInterface<absl::Span<ValueType>,
+                                                 absl::Span<const string>>,
+      public virtual KeyValueTableInitializerInterface<
+          absl::Span<const ValueType>, absl::Span<const absl::string_view>>,
+      public virtual KeyValueTableInitializerInterface<
+          absl::Span<const ValueType>, absl::Span<const string>>,
+      public virtual SizeInterface {
+ public:
+  using value_type = ValueType;
+
+  StaticStringFlatHashMap(bool enable_synchronization, int64 num_oov_buckets)
+      : num_oov_buckets_(num_oov_buckets) {
+    if (enable_synchronization) {
+      mutex_ = absl::make_unique<mutex>();
+    }
+  }
+
+  Status Initialize(absl::Span<const absl::string_view> keys,
+                    absl::Span<const ValueType> values) override {
+    if (ABSL_PREDICT_FALSE(keys.size() != values.size())) {
+      return errors::InvalidArgument(
+          "keys and values do not have the same number of elements (found ",
+          keys.size(), " vs ", values.size(), ").");
+    }
+
+    table_.reserve(table_.size() + keys.size());
+    for (size_t i = 0; i < keys.size(); ++i) {
+      table_.insert_or_assign(string(keys[i]), values[i]);
+    }
+    return Status::OK();
+  }
+
+  Status Initialize(absl::Span<const string> keys,
+                    absl::Span<const ValueType> values) override {
+    if (ABSL_PREDICT_FALSE(keys.size() != values.size())) {
+      return errors::InvalidArgument(
+          "keys and values do not have the same number of elements (found ",
+          keys.size(), " vs ", values.size(), ").");
+    }
+
+    table_.reserve(table_.size() + keys.size());
+    for (size_t i = 0; i < keys.size(); ++i) {
+      table_.insert_or_assign(keys[i], values[i]);
+    }
+    return Status::OK();
+  }
+
+  Status Lookup(const absl::string_view& key, ValueType* value) const override {
+    *value = LookupHelper(key);
+    return Status::OK();
+  }
+
+  Status Lookup(const string& key, ValueType* value) const override {
+    *value = LookupHelper(key);
+    return Status::OK();
+  }
+
+  // keys and values are guaranteed to have the same size by convention.
+  Status Lookup(absl::Span<const absl::string_view> keys,
+                absl::Span<ValueType> values,
+                int64 prefetch_lookahead) const override {
+    const auto keys_size = keys.size();
+    if (prefetch_lookahead <= 0 || prefetch_lookahead >= keys_size) {
+      for (size_t i = 0; i < keys_size; ++i) {
+        values[i] = LookupHelper(keys[i]);
+      }
+    } else {
+      for (size_t i = 0; i < keys_size; ++i) {
+        if (i + prefetch_lookahead < keys.size()) {
+          table_.prefetch(keys[i + prefetch_lookahead]);
+        }
+        values[i] = LookupHelper(keys[i]);
+      }
+    }
+    return Status::OK();
+  }
+
+  // keys and values are guaranteed to have the same size by convention.
+  Status Lookup(absl::Span<const string> keys, absl::Span<ValueType> values,
+                int64 prefetch_lookahead) const override {
+    const auto keys_size = keys.size();
+    if (prefetch_lookahead <= 0 || prefetch_lookahead >= keys_size) {
+      for (size_t i = 0; i < keys_size; ++i) {
+        values[i] = LookupHelper(keys[i]);
+      }
+    } else {
+      for (size_t i = 0; i < keys_size; ++i) {
+        if (i + prefetch_lookahead < keys.size()) {
+          table_.prefetch(keys[i + prefetch_lookahead]);
+        }
+        values[i] = LookupHelper(keys[i]);
+      }
+    }
+    return Status::OK();
+  }
+
+  uint64 Size() const override { return table_.size(); }
+
+  mutex* GetMutex() const override { return mutex_.get(); }
+
+  string DebugString() const override { return __PRETTY_FUNCTION__; }
+
+ private:
+  template <typename T>
+  ABSL_ATTRIBUTE_ALWAYS_INLINE ValueType
+  LookupHelper(const T& key_to_find) const {
+    auto it = table_.find(key_to_find);
+    if (it != table_.end()) {
+      return it->second;
+    } else {
+      return static_cast<ValueType>(Fingerprint64(key_to_find) %
+                                    num_oov_buckets_) +
+             StaticStringFlatHashMap::Size();
+    }
+  }
+
+  const int64 num_oov_buckets_;
+  std::unique_ptr<mutex> mutex_;
+  // The underlying table.
+  absl::flat_hash_map<string, ValueType> table_;
+  TF_DISALLOW_COPY_AND_ASSIGN(StaticStringFlatHashMap);
+};
+
+// Used to allocate StaticStringFlatHashMap objects via the AllocateContainer
+// method.
+template <typename StaticStringFlatHashMap>
+struct StaticStringFlatHashMapFactory {
+  struct Functor {
+    using resource_type = StaticStringFlatHashMap;
+
+    template <typename StaticStringFlatHashMapBase>
+    static Status AllocateContainer(OpKernelContext* ctx, OpKernel* kernel,
+                                    StaticStringFlatHashMapBase** container) {
+      OpInputList table_int64_args;
+      TF_RETURN_IF_ERROR(
+          ctx->input_list("table_int64_args", &table_int64_args));
+      const size_t variadic_arg_size = table_int64_args.size();
+      if (ABSL_PREDICT_FALSE(variadic_arg_size != 2)) {
+        return errors::InvalidArgument(
+            "table_int64_args should have 2 elements (found ",
+            variadic_arg_size,
+            "). Set the first element to 1 to enable synchronized table use "
+            "and to 0 otherwise. The second element should be "
+            "num_oov_buckets.");
+      }
+
+      const bool enable_synchronization = ctx->input(0).scalar<int64>()() != 0;
+      const int64 num_oov_buckets = ctx->input(1).scalar<int64>()();
+      if (ABSL_PREDICT_FALSE(num_oov_buckets <= 0)) {
+        return errors::InvalidArgument(
+            "num_oov_buckets must be positive. Found: ", num_oov_buckets);
+      }
+      auto* non_virtual_container =
+          new StaticStringFlatHashMap(enable_synchronization, num_oov_buckets);
+      *container = non_virtual_container;
+      const Tensor& keys = ctx->input(table_int64_args.size());
+      const Tensor& values = ctx->input(table_int64_args.size() + 1);
+      if (keys.NumElements() == 0) {
+        return Status::OK();
+      } else if (keys.dtype() == DT_STRING) {
+        return Functor::Initialize(
+            keys.flat<string>(),
+            values.flat<typename StaticStringFlatHashMap::value_type>(),
+            non_virtual_container);
+      } else if (keys.dtype() == DT_VARIANT) {
+        auto keys_flat = keys.flat<Variant>();
+        if (keys_flat(0).get<absl::string_view>() == nullptr) {
+          return errors::InvalidArgument(
+              "Variant keys tensor must have subtype absl::string_view.");
+        }
+        return Functor::Initialize(
+            keys.flat<Variant>(),
+            values.flat<typename StaticStringFlatHashMap::value_type>(),
+            non_virtual_container);
+      }
+      return errors::InvalidArgument(
+          "keys tensor must have type DT_STRING or type DT_VARIANT with "
+          "subtype absl::string_view.");
+    }
+
+    static Status Initialize(
+        const absl::Span<const string> keys,
+        const absl::Span<const typename StaticStringFlatHashMap::value_type>
+            values,
+        StaticStringFlatHashMap* container) {
+      return container->Initialize(keys, values);
+    }
+
+    static Status Initialize(
+        const absl::Span<const Variant> keys,
+        const absl::Span<const typename StaticStringFlatHashMap::value_type>
+            values,
+        StaticStringFlatHashMap* container) {
+      std::vector<typename absl::string_view> keys_vec;
+      keys_vec.reserve(keys.size());
+      for (size_t i = 0; i < keys.size(); ++i) {
+        keys_vec.push_back(*keys[i].get<absl::string_view>());
+      }
+      return container->Initialize(keys_vec, values);
+    }
+  };
+};
+
+template <typename ValueType>
+using ResourceOp = ResourceConstructionOp<
+    typename StaticStringFlatHashMapFactory<
+        StaticStringFlatHashMap<ValueType>>::Functor,
+    // These are the aliases.
+    LookupInterface<ValueType*, const absl::string_view&>,
+    LookupWithPrefetchInterface<absl::Span<ValueType>,
+                                absl::Span<const absl::string_view>>,
+    LookupInterface<ValueType*, const string&>,
+    LookupWithPrefetchInterface<absl::Span<ValueType>,
+                                absl::Span<const string>>,
+    SizeInterface>;
+
+#define REGISTER_STRING_KERNEL(table_value_dtype)                  \
+  REGISTER_KERNEL_BUILDER(                                         \
+      Name("StaticStringFlatHashMap")                              \
+          .Device(DEVICE_CPU)                                      \
+          .TypeConstraint<Variant>("heterogeneous_key_dtype")      \
+          .TypeConstraint<table_value_dtype>("table_value_dtype"), \
+      ResourceOp<table_value_dtype>);
+
+REGISTER_STRING_KERNEL(int32);
+REGISTER_STRING_KERNEL(int64);
+
+#undef REGISTER_STRING_KERNEL
+
+}  // namespace tables
+}  // namespace tensorflow

diff --git a/tensorflow/core/kernels/lookup_tables/generic_table_op_kernels.cc b/tensorflow/core/kernels/lookup_tables/generic_table_op_kernels.cc
new file mode 100644
index 0000000..9bb29af
--- /dev/null
+++ b/tensorflow/core/kernels/lookup_tables/generic_table_op_kernels.cc

@@ -0,0 +1,227 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <type_traits>
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/kernels/lookup_tables/op_kernel_templates.h"
+#include "tensorflow/core/kernels/lookup_tables/resource_interface_templates.h"
+#include "tensorflow/core/kernels/string_view_variant_wrapper.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace tables {
+
+template <typename KeyType, typename ValueType>
+struct TensorInsertFactory {
+  class Functor {
+   public:
+    // If KeyType is not 'valid' then use the value it wraps as the table key
+    // type.
+    using resource_type = InsertOrAssignInterface<
+        absl::Span<const ValueType>,
+        typename absl::conditional_t<
+            IsValidDataType<KeyType>::value, absl::Span<const KeyType>,
+            absl::Span<const typename KeyType::value_type>>>;
+
+    static Status TensorInsert(const Tensor& keys, const Tensor& values,
+                               resource_type* table) {
+      if (keys.NumElements() != values.NumElements()) {
+        return errors::InvalidArgument(
+            "OpKernel tried to map keys vector of size ", keys.NumElements(),
+            " to values vector of size ", values.NumElements());
+      }
+      return TensorInsertHelper(keys, values, table);
+    }
+
+   private:
+    // keys and *values arguments to TensorInsert must have the same number of
+    // elements. This is guaranteed above.
+
+    // 'Simple' types below are types which are natively supported in TF.
+    // Non-variant KeyType which is the same as Container::key_type.
+    // No need to static_cast.
+    template <typename SfinaeArg = KeyType>
+    static absl::enable_if_t<IsValidDataType<SfinaeArg>::value, Status>
+    TensorInsertHelper(const Tensor& keys, const Tensor& values,
+                       resource_type* table) {
+      return table->InsertOrAssign(keys.flat<KeyType>(),
+                                   values.flat<ValueType>());
+    }
+
+    // Variant KeyType; the wrapped type is convertible to
+    // Container::key_type.
+    template <typename VariantSubType = KeyType>
+    static absl::enable_if_t<!IsValidDataType<VariantSubType>::value, Status>
+    TensorInsertHelper(const Tensor& keys, const Tensor& values,
+                       resource_type* table) {
+      const auto keys_flat = keys.flat<Variant>();
+      std::vector<typename VariantSubType::value_type> keys_vec;
+      keys_vec.reserve(keys_flat.size());
+      for (size_t i = 0; i < keys_flat.size(); ++i) {
+        keys_vec.emplace_back(
+            *keys_flat(i).get<typename VariantSubType::value_type>());
+      }
+      return table->InsertOrAssign(keys_vec, values.flat<ValueType>());
+    }
+  };
+};
+
+template <typename KeyType, typename ValueType>
+using InsertOp = LookupTableInsertOp<
+    typename TensorInsertFactory<KeyType, ValueType>::Functor>;
+
+template <typename KeyType, typename ValueType>
+struct TensorLookupFactory {
+  class Functor {
+   public:
+    // If KeyType is not 'valid' then use the value it wraps as the table key
+    // type.
+    using resource_type = LookupWithPrefetchInterface<
+        absl::Span<ValueType>,
+        typename absl::conditional_t<
+            IsValidDataType<KeyType>::value, absl::Span<const KeyType>,
+            absl::Span<const typename KeyType::value_type>>>;
+
+    static Status TensorLookup(const resource_type& table, const Tensor& keys,
+                               const int64 prefetch_lookahead,
+                               const int64 num_keys_per_thread,
+                               thread::ThreadPool* threadpool, Tensor* values) {
+      if (keys.NumElements() != values->NumElements()) {
+        return errors::InvalidArgument(
+            "OpKernel tried to map keys vector of size ", keys.NumElements(),
+            " to values vector of size ", values->NumElements());
+      }
+      return TensorLookupHelper(table, keys, prefetch_lookahead,
+                                num_keys_per_thread, threadpool, values);
+    }
+
+   private:
+    // keys and *values arguments to TensorLookup must have the same number of
+    // elements. This is guaranteed above.
+
+    // 'Simple' types below are types which are natively supported in TF.
+    template <typename SfinaeArg = KeyType>
+    static absl::enable_if_t<IsValidDataType<SfinaeArg>::value, Status>
+    TensorLookupHelper(const resource_type& table, const Tensor& keys,
+                       const int64 prefetch_lookahead,
+                       const int64 num_keys_per_thread,
+                       thread::ThreadPool* threadpool, Tensor* values) {
+      const auto keys_flat = keys.flat<KeyType>();
+      auto key_span = absl::MakeSpan(keys_flat);
+      auto value_span = absl::MakeSpan(values->flat<ValueType>().data(),
+                                       values->NumElements());
+      return MultithreadedTensorLookup(table, prefetch_lookahead,
+                                       num_keys_per_thread, key_span,
+                                       value_span, threadpool);
+    }
+
+    // Non-simple KeyType. We'll try an implicit conversion to
+    // Container::key_type.
+    template <typename VariantSubType = KeyType>
+    static absl::enable_if_t<!IsValidDataType<VariantSubType>::value, Status>
+    TensorLookupHelper(const resource_type& table, const Tensor& keys,
+                       const int64 prefetch_lookahead,
+                       const int64 num_keys_per_thread,
+                       thread::ThreadPool* threadpool, Tensor* values) {
+      const auto keys_flat = keys.flat<Variant>();
+      std::vector<typename VariantSubType::value_type> keys_vec;
+      const auto keys_size = keys_flat.size();
+      keys_vec.reserve(keys_size);
+      for (size_t i = 0; i < keys_size; ++i) {
+        keys_vec.emplace_back(*keys_flat(i).get<VariantSubType>()->get());
+      }
+      absl::Span<const typename VariantSubType::value_type> key_span(keys_vec);
+      auto value_span = absl::MakeSpan(values->flat<ValueType>().data(),
+                                       values->NumElements());
+      return MultithreadedTensorLookup(table, prefetch_lookahead,
+                                       num_keys_per_thread, key_span,
+                                       value_span, threadpool);
+    }
+
+    // Wrapper around table.BatchLookup which permits sharding across cores.
+    template <typename K, typename V>
+    static Status MultithreadedTensorLookup(const resource_type& table,
+                                            int64 prefetch_lookahead,
+                                            int64 num_keys_per_thread, K keys,
+                                            V values,
+                                            thread::ThreadPool* threadpool) {
+      mutex temp_mutex;  // Protect status.
+      Status status;
+      auto lookup_keys = [&](int64 begin, int64 end) {
+        auto temp_status = table.Lookup(keys.subspan(begin, end - begin),
+                                        values.subspan(begin, end - begin),
+                                        prefetch_lookahead);
+        if (ABSL_PREDICT_FALSE(!temp_status.ok())) {
+          mutex_lock lock(temp_mutex);
+          status.Update(temp_status);
+        }
+      };
+      threadpool->TransformRangeConcurrently(
+          num_keys_per_thread /* block_size */, keys.size(), lookup_keys);
+      return status;
+    }
+  };
+};
+
+template <typename KeyType, typename ValueType>
+using LookupOp = LookupTableFindOp<
+    typename TensorLookupFactory<KeyType, ValueType>::Functor>;
+
+struct TableSizeFunctor {
+  using resource_type = SizeInterface;
+
+  static Status Size(const SizeInterface& table, uint64* size) {
+    *size = table.Size();
+    return Status::OK();
+  }
+};
+
+#define REGISTER_STRING_KERNEL(table_value_dtype)                     \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("LookupTableInsertOrAssignOp")                             \
+          .Device(DEVICE_CPU)                                         \
+          .TypeConstraint<string>("insert_key_tensor_dtype")          \
+          .TypeConstraint<table_value_dtype>("table_value_dtype"),    \
+      InsertOp<string, table_value_dtype>);                           \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("LookupTableInsertOrAssignOp")                             \
+          .Device(DEVICE_CPU)                                         \
+          .TypeConstraint<Variant>("insert_key_tensor_dtype")         \
+          .TypeConstraint<table_value_dtype>("table_value_dtype"),    \
+      InsertOp<StringViewVariantWrapper, table_value_dtype>);         \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("LookupTableFindOp")                                       \
+          .Device(DEVICE_CPU)                                         \
+          .TypeConstraint<string>("lookup_key_tensor_dtype")          \
+          .TypeConstraint<table_value_dtype>("table_value_dtype"),    \
+      LookupOp<string, table_value_dtype>);                           \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("LookupTableFindOp")                                       \
+          .Device(DEVICE_CPU)                                         \
+          .TypeConstraint<Variant>("lookup_key_tensor_dtype")         \
+          .TypeConstraint<table_value_dtype>("table_value_dtype"),    \
+      LookupOp<StringViewVariantWrapper, table_value_dtype>);         \
+  REGISTER_KERNEL_BUILDER(Name("ContainerSizeOp").Device(DEVICE_CPU), \
+                          ContainerSizeOp<TableSizeFunctor>);
+
+REGISTER_STRING_KERNEL(int32);
+REGISTER_STRING_KERNEL(int64);
+
+#undef REGISTER_STRING_KERNEL
+
+}  // namespace tables
+}  // namespace tensorflow

diff --git a/tensorflow/core/kernels/lookup_tables/lookup_table_interface.h b/tensorflow/core/kernels/lookup_tables/lookup_table_interface.h
deleted file mode 100644
index 0cfe44e..0000000
--- a/tensorflow/core/kernels/lookup_tables/lookup_table_interface.h
+++ /dev/null

@@ -1,120 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_LOOKUP_TABLE_INTERFACE_H_
-#define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_LOOKUP_TABLE_INTERFACE_H_
-
-#include <cstddef>
-#include <string>
-
-#include "absl/types/optional.h"
-#include "absl/types/span.h"
-#include "tensorflow/core/framework/resource_mgr.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/mutex.h"
-
-namespace tensorflow {
-namespace tables {
-
-// Interface for key-value pair lookups with support for heterogeneous keys.
-// This class contains two main kinds of methods: methods which operate on
-// a batch of inputs and methods which do not. The latter have the prefix
-// 'Unsafe'. Clients must call the corresponding status methods to determine
-// whether they are safe to call within a code block.
-// Implementations must guarantee thread-safety when GetMutex is used to
-// synchronize method access.
-template <typename HeterogeneousKeyType, typename ValueType>
-class LookupTableInterface : public ResourceBase {
- public:
-  using heterogeneous_key_type = HeterogeneousKeyType;
-  using value_type = ValueType;
-  using key_type = heterogeneous_key_type;
-
-  // Return value should be used to synchronize read/write access to
-  // all public methods. If null, no synchronization is needed.
-  virtual mutex* GetMutex() const = 0;
-
-  // Insert the KV pair into the underlying table. If a key equivalent to key
-  // already exists in the underlying table, its corresponding value is
-  // overridden. Returns true only if the key was inserted for the first time.
-  // Undefined if TableUnbatchedInsertStatus() != OK.
-  virtual bool UnsafeInsertOrAssign(const HeterogeneousKeyType& key,
-                                    const ValueType& value) = 0;
-
-  // Returns OK if it is safe to call InsertOrAssign.
-  // Once OK is returned, it is safe to call InsertOrAssign for the rest of the
-  // program.
-  virtual Status TableUnbatchedInsertStatus() const TF_MUST_USE_RESULT = 0;
-
-  // Stores each KV pair {keys[i], values[i]} in the underlying map, overriding
-  // pre-existing pairs which have equivalent keys.
-  // keys and values should have the same size.
-  virtual Status BatchInsertOrAssign(
-      absl::Span<const HeterogeneousKeyType> keys,
-      absl::Span<const ValueType> values) = 0;
-
-  // Prefetch key_to_find into implementation defined data caches.
-  // Implementations are free to leave this a no-op.
-  // Undefined if TableUnbatchedLookupStatus() != OK.
-  virtual void UnsafePrefetchKey(
-      const HeterogeneousKeyType& key_to_find) const {}
-
-  // Returns true if and only if the table contains key_to_find.
-  // Undefined if TableUnbatchedLookupStatus() != OK.
-  virtual bool UnsafeContainsKey(
-      const HeterogeneousKeyType& key_to_find) const = 0;
-
-  // Lookup the value for key_to_find. This value must always be well-defined,
-  // even when ContainsKey(key_to_find) == false. When
-  // dv = DefaultValue() != absl::nullopt and ContainsKey(key_to_find) == false,
-  // dv is returned.
-  // Undefined if TableUnbatchedLookupStatus() != OK.
-  virtual ValueType UnsafeLookupKey(
-      const HeterogeneousKeyType& key_to_find) const = 0;
-
-  // Returns OK if it is safe to call PrefetchKey, ContainsKey, and
-  // UnsafeLookupKey.
-  // If OK is returned, it is safe to call these methods until the next
-  // non-const method of this class is called.
-  virtual Status TableUnbatchedLookupStatus() const TF_MUST_USE_RESULT = 0;
-
-  // Lookup the values for keys and store them in values.
-  // prefetch_lookahead is used to prefetch the key at index
-  // i + prefetch_lookahead at the ith iteration of the implemented loop.
-  // keys and values must have the same size.
-  virtual Status BatchLookup(absl::Span<const HeterogeneousKeyType> keys,
-                             absl::Span<ValueType> values,
-                             int64 prefetch_lookahead) const = 0;
-
-  // Returns the number of elements in the table.
-  // Undefined if SizeStatus() != OK.
-  virtual size_t UnsafeSize() const = 0;
-
-  // Returns OK if the return value of UnsafeSize() is always well-defined.
-  virtual Status SizeStatus() const TF_MUST_USE_RESULT = 0;
-
-  // If non-null value is returned, LookupKey returns that value only for keys
-  // which satisfy ContainsKey(key_to_find) == false.
-  virtual const absl::optional<const ValueType> DefaultValue() const = 0;
-
-  string DebugString() const override { return "A lookup table"; }
-
-  ~LookupTableInterface() override = default;
-};
-
-}  // namespace tables
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_LOOKUP_TABLE_INTERFACE_H_

diff --git a/tensorflow/core/kernels/lookup_tables/op_kernel_templates.h b/tensorflow/core/kernels/lookup_tables/op_kernel_templates.h
new file mode 100644
index 0000000..d830062
--- /dev/null
+++ b/tensorflow/core/kernels/lookup_tables/op_kernel_templates.h

@@ -0,0 +1,448 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_OP_KERNEL_TEMPLATES_H_
+#define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_OP_KERNEL_TEMPLATES_H_
+
+#include <cstddef>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "absl/base/thread_annotations.h"
+#include "absl/meta/type_traits.h"
+#include "absl/types/span.h"
+#include "tensorflow/core/framework/device_base.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/kernels/tensor_flag_utils.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/refcount.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace tables {
+
+// Create resources of type ResourceType and AliasesToRegister using
+// Functor::AllocateContainer(OpKernelConstruction*, OpKernel*,
+// ResourceType**). ResourceType = Functor::resource_type.
+// No-op for resources which have already been created.
+template <typename Functor, typename... AliasesToRegister>
+class ResourceConstructionOp : public OpKernel {
+ public:
+  explicit ResourceConstructionOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx), table_handle_set_(false) {
+    OP_REQUIRES_OK(
+        ctx, ctx->GetAttr("use_node_name_sharing", &use_node_name_sharing_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    mutex_lock l(mu_);
+
+    if (!table_handle_set_) {
+      OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(),
+                                      use_node_name_sharing_));
+    }
+
+    auto creator = [ctx,
+                    this](ResourceType** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      ResourceType* resource = nullptr;
+      auto status = Functor::AllocateContainer(ctx, this, &resource);
+      if (ABSL_PREDICT_FALSE(!status.ok())) {
+        // Ideally resource is non-null only if status is OK but we try
+        // to compensate here.
+        if (resource != nullptr) {
+          resource->Unref();
+        }
+        return status;
+      }
+      if (ctx->track_allocations()) {
+        ctx->record_persistent_memory_allocation(resource->MemoryUsed());
+      }
+      *ret = resource;
+      return Status::OK();
+    };
+
+    // Register the ResourceType alias.
+    ResourceType* resource = nullptr;
+    core::ScopedUnref unref_me(resource);
+    OP_REQUIRES_OK(
+        ctx,
+        cinfo_.resource_manager()->template LookupOrCreate<ResourceType, true>(
+            cinfo_.container(), cinfo_.name(), &resource, creator));
+
+    // Put a handle to resource in the output tensor (the other aliases will
+    // have the same handle).
+    Tensor* handle;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &handle));
+    handle->scalar<ResourceHandle>()() = MakeResourceHandle<ResourceType>(
+        ctx, cinfo_.container(), cinfo_.name());
+    table_handle_set_ = true;
+
+    // Create other alias resources.
+    Status status;
+    char dummy[sizeof...(AliasesToRegister)] = {
+        (status.Update(RegisterAlias<AliasesToRegister>(resource)), 0)...};
+    (void)dummy;
+    OP_REQUIRES_OK(ctx, status);
+  }
+
+  ~ResourceConstructionOp() override {
+    // If the table object was not shared, delete it.
+    if (table_handle_set_ && cinfo_.resource_is_private_to_kernel()) {
+      if (!cinfo_.resource_manager()
+               ->template Delete<ResourceType>(cinfo_.container(),
+                                               cinfo_.name())
+               .ok()) {
+        // Do nothing; the resource may have been deleted by session resets.
+      }
+      // Attempt to delete other resource aliases.
+      Status dummy_status;
+      char dummy[sizeof...(AliasesToRegister)] = {
+          (dummy_status.Update(DeleteAlias<AliasesToRegister>()), 0)...};
+      (void)dummy;
+    }
+  }
+
+ private:
+  using ResourceType = typename Functor::resource_type;
+  template <typename T>
+  Status RegisterAlias(ResourceType* resource) {
+    auto creator = [resource](T** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      *ret = resource;
+      return Status::OK();
+    };
+
+    T* alias_resource = nullptr;
+    core::ScopedUnref unref_me(alias_resource);
+    return cinfo_.resource_manager()->template LookupOrCreate<T, true>(
+        cinfo_.container(), cinfo_.name(), &alias_resource, creator);
+  }
+
+  template <typename T>
+  Status DeleteAlias() {
+    return cinfo_.resource_manager()->template Delete<T>(cinfo_.container(),
+                                                         cinfo_.name());
+  }
+
+  mutex mu_;
+  bool table_handle_set_ GUARDED_BY(mu_);
+  ContainerInfo cinfo_;
+  bool use_node_name_sharing_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(ResourceConstructionOp);
+};
+
+// Create resources of type ContainerBase using the static method
+// Functor::AllocateContainer(OpKernelConstruction*, OpKernel*,
+// FallbackTableBaseType*, ContainerBase**)
+// If the resource has already been created it will be looked up.
+// Container must decrease the reference count of the FallbackTableBaseType*
+// constructor argument before its destructor completes.
+template <typename Functor, typename... AliasesToRegister>
+class TableWithFallbackConstructionOp : public OpKernel {
+ public:
+  explicit TableWithFallbackConstructionOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx), table_handle_set_(false) {
+    OP_REQUIRES_OK(
+        ctx, ctx->GetAttr("use_node_name_sharing", &use_node_name_sharing_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    OpInputList table_int64_args;
+    OP_REQUIRES_OK(ctx, ctx->input_list("table_int64_args", &table_int64_args));
+    if (ctx->num_inputs() == table_int64_args.size()) {
+      ctx->SetStatus(errors::InvalidArgument(
+          "Expected op to have a resource input after the table_int64_args "
+          "input but no such input found."));
+      return;
+    }
+
+    // Look up the fallback table.
+    FallbackTableBaseType* fallback_table = nullptr;
+    {
+      const Tensor& table_handle = ctx->input(table_int64_args.size());
+      ResourceHandle handle(table_handle.scalar<ResourceHandle>()());
+      OP_REQUIRES_OK(
+          ctx, ctx->resource_manager()->Lookup<FallbackTableBaseType, true>(
+                   handle.container(), handle.name(), &fallback_table));
+    }
+    mutex_lock l(mu_);
+
+    if (!table_handle_set_) {
+      OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(),
+                                      use_node_name_sharing_));
+    }
+
+    auto creator = [ctx, this, fallback_table](
+                       ResourceType** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      // container construction logic can't be merged with
+      // ResourceConstructionOp because Container constructor requires an
+      // input which can only be constructed if the resource manager
+      // internal lock is not already held.
+      ResourceType* resource = nullptr;
+      auto status =
+          Functor::AllocateContainer(ctx, this, fallback_table, &resource);
+      if (ABSL_PREDICT_FALSE(!status.ok())) {
+        // Ideally resource is non-null only if status is OK but we try
+        // to compensate here.
+        if (resource != nullptr) {
+          resource->Unref();
+        }
+        return status;
+      }
+      if (ctx->track_allocations()) {
+        ctx->record_persistent_memory_allocation(resource->MemoryUsed());
+      }
+      *ret = resource;
+      return Status::OK();
+    };
+
+    // Register the ResourceType alias.
+    ResourceType* table = nullptr;
+    core::ScopedUnref unref_me(table);
+    OP_REQUIRES_OK(
+        ctx,
+        cinfo_.resource_manager()->template LookupOrCreate<ResourceType, true>(
+            cinfo_.container(), cinfo_.name(), &table, creator));
+
+    // Put a handle to resource in the output tensor (the other aliases will
+    // have the same handle).
+    Tensor* handle;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &handle));
+    handle->scalar<ResourceHandle>()() = MakeResourceHandle<ResourceType>(
+        ctx, cinfo_.container(), cinfo_.name());
+    table_handle_set_ = true;
+
+    // Create other alias resources.
+    Status status;
+    char dummy[sizeof...(AliasesToRegister)] = {
+        (status.Update(RegisterAlias<AliasesToRegister>(table)), 0)...};
+    (void)dummy;
+    OP_REQUIRES_OK(ctx, status);
+  }
+
+  ~TableWithFallbackConstructionOp() override {
+    // If the table object was not shared, delete it.
+    if (table_handle_set_ && cinfo_.resource_is_private_to_kernel()) {
+      if (!cinfo_.resource_manager()
+               ->template Delete<ResourceType>(cinfo_.container(),
+                                               cinfo_.name())
+               .ok()) {
+        // Do nothing; the resource may have been deleted by session resets.
+      }
+      // Attempt to delete other resource aliases.
+      Status dummy_status;
+      char dummy[sizeof...(AliasesToRegister)] = {
+          (dummy_status.Update(DeleteAlias<AliasesToRegister>()), 0)...};
+      (void)dummy;
+    }
+  }
+
+ private:
+  using ResourceType = typename Functor::resource_type;
+  using FallbackTableBaseType = typename Functor::fallback_table_type;
+
+  template <typename T>
+  Status RegisterAlias(ResourceType* resource) {
+    auto creator = [resource](T** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      *ret = resource;
+      return Status::OK();
+    };
+
+    T* alias_resource = nullptr;
+    core::ScopedUnref unref_me(alias_resource);
+    return cinfo_.resource_manager()->template LookupOrCreate<T, true>(
+        cinfo_.container(), cinfo_.name(), &alias_resource, creator);
+  }
+
+  template <typename T>
+  Status DeleteAlias() {
+    return cinfo_.resource_manager()->template Delete<T>(cinfo_.container(),
+                                                         cinfo_.name());
+  }
+
+  mutex mu_;
+  bool table_handle_set_ GUARDED_BY(mu_);
+  ContainerInfo cinfo_;
+  bool use_node_name_sharing_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(TableWithFallbackConstructionOp);
+};
+
+// Lookup a table of type ResourceAlias and insert the passed in keys and
+// values tensors using Functor::TensorInsert(keys, values, table).
+template <typename Functor,
+          typename ResourceAlias = typename Functor::resource_type>
+class LookupTableInsertOp : public OpKernel {
+ public:
+  explicit LookupTableInsertOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    OpInputList table_int64_args;
+    OP_REQUIRES_OK(ctx, ctx->input_list("table_int64_args", &table_int64_args));
+    const size_t tensor_index_offset = table_int64_args.size();
+    // Business logic for checking tensor shapes, etc, is delegated to the
+    // Functor.
+    const Tensor& keys = ctx->input(tensor_index_offset + 1);
+    const Tensor& values = ctx->input(tensor_index_offset + 2);
+
+    const Tensor& table_handle = ctx->input(tensor_index_offset);
+    ResourceHandle handle(table_handle.scalar<ResourceHandle>()());
+    ResourceAlias* table;
+    core::ScopedUnref unref_me(table);
+    OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup<ResourceAlias, true>(
+                            handle.container(), handle.name(), &table));
+
+    int memory_used_before = 0;
+    if (ctx->track_allocations()) {
+      memory_used_before = table->MemoryUsed();
+    }
+    auto* mutex = table->GetMutex();
+    if (mutex != nullptr) {
+      mutex_lock lock(*mutex);
+      OP_REQUIRES_OK(ctx, Functor::TensorInsert(keys, values, table));
+    } else {
+      OP_REQUIRES_OK(ctx, Functor::TensorInsert(keys, values, table));
+    }
+    if (ctx->track_allocations()) {
+      ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+                                               memory_used_before);
+    }
+  }
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(LookupTableInsertOp);
+};
+
+// Lookup a table of type ResourceAlias and look up the passed in keys using
+// Functor::TensorLookup(
+//     table, keys, prefetch_lookahead, num_keys_per_thread, threadpool, out).
+template <typename Functor,
+          typename ResourceAlias = typename Functor::resource_type>
+class LookupTableFindOp : public OpKernel {
+ public:
+  explicit LookupTableFindOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    OpInputList table_int64_args;
+    {
+      auto status = ctx->input_list("table_int64_args", &table_int64_args);
+      if (ABSL_PREDICT_FALSE(!status.ok())) {
+        ctx->SetStatus(status);
+        return;
+      }
+    }
+    // We lookup tensors using positional indices because that's more
+    // efficient than looking up their string names.
+    const Tensor& prefetch_lookahead_t = ctx->input(0);
+    const size_t tensor_index_offset = table_int64_args.size();
+    const Tensor& keys = ctx->input(tensor_index_offset + 1);
+    const Tensor& num_threads = ctx->input(tensor_index_offset + 2);
+
+    TensorShape output_shape = keys.shape();
+    Tensor* out;
+    {
+      auto status = ctx->allocate_output(0, output_shape, &out);
+      if (ABSL_PREDICT_FALSE(!status.ok())) {
+        ctx->SetStatus(status);
+        return;
+      }
+    }
+
+    int64 num_threads_scalar;
+    if (TensorShapeUtils::IsScalar(num_threads.shape())) {
+      num_threads_scalar = num_threads.template scalar<int64>()();
+    } else {
+      // Scans through rows of num_threads and returns second entry of first
+      // row whose first entry is <= the number of keys to process.
+      // This allows the user to control parallelism as a function of
+      // the number of keys to lookup.
+      num_threads_scalar = tensor_flag_utils::FindConfigValueForKey<int64, int>(
+          num_threads.template matrix<int64>(), keys.dim_size(0));
+    }
+    const int64 num_keys_per_thread =
+        num_threads_scalar > 0
+            ? std::max(1ll, keys.dim_size(0) / num_threads_scalar)
+            : keys.dim_size(0);
+
+    const int64 prefetch_lookahead = prefetch_lookahead_t.scalar<int64>()();
+
+    const Tensor& table_handle = ctx->input(tensor_index_offset);
+    ResourceHandle handle(table_handle.scalar<ResourceHandle>()());
+    ResourceAlias* table;
+    core::ScopedUnref unref_me(table);
+    OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup<ResourceAlias, true>(
+                            handle.container(), handle.name(), &table));
+
+    auto* mutex = table->GetMutex();
+    auto* threadpool = ctx->device()->tensorflow_cpu_worker_threads()->workers;
+    if (mutex != nullptr) {
+      // There are many subtle problems with using reader locks so we opt for a
+      // writer lock here.
+      mutex_lock lock(*mutex);
+      OP_REQUIRES_OK(
+          ctx, Functor::TensorLookup(*table, keys, prefetch_lookahead,
+                                     num_keys_per_thread, threadpool, out));
+    } else {
+      OP_REQUIRES_OK(
+          ctx, Functor::TensorLookup(*table, keys, prefetch_lookahead,
+                                     num_keys_per_thread, threadpool, out));
+    }
+  }
+};
+
+// Lookup a container of type ResourceAlias and return its size using
+// Functor::Size(container, &size).
+template <typename Functor,
+          typename ResourceAlias = typename Functor::resource_type>
+class ContainerSizeOp : public OpKernel {
+ public:
+  explicit ContainerSizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& container_handle = ctx->input(0);
+    ResourceHandle handle(container_handle.scalar<ResourceHandle>()());
+    ResourceAlias* container;
+    core::ScopedUnref unref_me(container);
+    OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup<ResourceAlias, true>(
+                            handle.container(), handle.name(), &container));
+
+    Tensor* out;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &out));
+
+    auto* mutex = container->GetMutex();
+    if (mutex != nullptr) {
+      tf_shared_lock lock(*mutex);
+      OP_REQUIRES_OK(ctx, Functor::Size(*container, &out->scalar<uint64>()()));
+    } else {
+      OP_REQUIRES_OK(ctx, Functor::Size(*container, &out->scalar<uint64>()()));
+    }
+  }
+};
+
+}  // namespace tables
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_OP_KERNEL_TEMPLATES_H_

diff --git a/tensorflow/core/kernels/lookup_tables/resource_interface_templates.h b/tensorflow/core/kernels/lookup_tables/resource_interface_templates.h
new file mode 100644
index 0000000..7331fb4
--- /dev/null
+++ b/tensorflow/core/kernels/lookup_tables/resource_interface_templates.h

@@ -0,0 +1,99 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_RESOURCE_INTERFACE_TEMPLATES_H_
+#define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_RESOURCE_INTERFACE_TEMPLATES_H_
+
+#include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/mutex.h"
+
+namespace tensorflow {
+namespace tables {
+
+// Interface for resources with mutable state.
+class SynchronizedInterface : public virtual ResourceBase {
+ public:
+  // Return value should be used to synchronize read/write access to
+  // all public methods. If null, no synchronization is needed.
+  virtual mutex* GetMutex() const = 0;
+};
+
+// Interface for containers which support batch lookups.
+template <typename ValueType, typename... KeyContext>
+class InsertOrAssignInterface : public virtual SynchronizedInterface {
+ public:
+  using value_type = ValueType;
+
+  // Stores each KV pair {keys[i], values[i]} in the underlying map, overriding
+  // pre-existing pairs which have equivalent keys.
+  // keys and values should have the same size.
+  virtual Status InsertOrAssign(KeyContext... key_context,
+                                ValueType values) = 0;
+};
+
+// Interface for containers which support lookups.
+template <typename ValueType, typename... KeyContext>
+class LookupInterface : public virtual SynchronizedInterface {
+ public:
+  using value_type = ValueType;
+
+  // Lookup the values for keys and store them in values.
+  // prefetch_lookahead is used to prefetch the key at index
+  // i + prefetch_lookahead at the ith iteration of the implemented loop.
+  // keys and values must have the same size.
+  virtual Status Lookup(KeyContext... key_context, ValueType values) const = 0;
+};
+
+// Interface for containers which support lookups with prefetching.
+template <typename ValueType, typename... KeyContext>
+class LookupWithPrefetchInterface : public virtual SynchronizedInterface {
+ public:
+  using value_type = ValueType;
+
+  // Lookup the values for keys and store them in values.
+  // prefetch_lookahead is used to prefetch the key at index
+  // i + prefetch_lookahead at the ith iteration of the implemented loop.
+  // keys and values must have the same size.
+  virtual Status Lookup(KeyContext... key_context, ValueType values,
+                        int64 prefetch_lookahead) const = 0;
+};
+
+// Interface for containers with size concepts.
+// Implementations must guarantee thread-safety when GetMutex is used to
+// synchronize method access.
+class SizeInterface : public virtual SynchronizedInterface {
+ public:
+  // Returns the number of elements in the container.
+  virtual uint64 Size() const = 0;
+};
+
+// Interface for tables which can be initialized from key and value arguments.
+template <typename ValueType, typename... KeyContext>
+class KeyValueTableInitializerInterface : public virtual SynchronizedInterface {
+ public:
+  using value_type = ValueType;
+
+  // Lookup the values for keys and store them in values.
+  // prefetch_lookahead is used to prefetch the key at index
+  // i + prefetch_lookahead at the ith iteration of the implemented loop.
+  // keys and values must have the same size.
+  virtual Status Initialize(KeyContext... key_context, ValueType values) = 0;
+};
+
+}  // namespace tables
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_RESOURCE_INTERFACE_TEMPLATES_H_

diff --git a/tensorflow/core/kernels/lookup_tables/table_op_utils.h b/tensorflow/core/kernels/lookup_tables/table_op_utils.h
deleted file mode 100644
index 4bc18c7..0000000
--- a/tensorflow/core/kernels/lookup_tables/table_op_utils.h
+++ /dev/null

@@ -1,234 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_OP_UTILS_H_
-#define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_OP_UTILS_H_
-
-#include <cstddef>
-#include <string>
-#include <type_traits>
-
-#include "absl/base/thread_annotations.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/resource_mgr.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-namespace tables {
-
-// Create resources of type ContainerBase using the static method
-// Functor::AllocateContainer(OpKernelConstruction*, OpKernel*,
-// ContainerBase**)
-// If the resource has already been created it will be looked up.
-template <class ContainerBase, typename Functor>
-class ResourceConstructionOp : public OpKernel {
- public:
-  explicit ResourceConstructionOp(OpKernelConstruction* ctx)
-      : OpKernel(ctx), table_handle_set_(false) {
-    OP_REQUIRES_OK(
-        ctx, ctx->GetAttr("use_node_name_sharing", &use_node_name_sharing_));
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    mutex_lock l(mu_);
-
-    if (!table_handle_set_) {
-      OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(),
-                                      use_node_name_sharing_));
-    }
-
-    auto creator = [ctx,
-                    this](ContainerBase** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-      ContainerBase* container;
-      auto status = Functor::AllocateContainer(ctx, this, &container);
-      if (TF_PREDICT_FALSE(!status.ok())) {
-        container->Unref();
-        return status;
-      }
-      if (ctx->track_allocations()) {
-        ctx->record_persistent_memory_allocation(container->MemoryUsed());
-      }
-      *ret = container;
-      return Status::OK();
-    };
-
-    ContainerBase* container_base = nullptr;
-    OP_REQUIRES_OK(
-        ctx, cinfo_.resource_manager()->template LookupOrCreate<ContainerBase>(
-                 cinfo_.container(), cinfo_.name(), &container_base, creator));
-    core::ScopedUnref unref_me(container_base);
-
-    Tensor* handle;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &handle));
-    handle->scalar<ResourceHandle>()() = MakeResourceHandle<ContainerBase>(
-        ctx, cinfo_.container(), cinfo_.name());
-    table_handle_set_ = true;
-  }
-
-  ~ResourceConstructionOp() override {
-    // If the table object was not shared, delete it.
-    if (table_handle_set_ && cinfo_.resource_is_private_to_kernel()) {
-      if (!cinfo_.resource_manager()
-               ->template Delete<ContainerBase>(cinfo_.container(),
-                                                cinfo_.name())
-               .ok()) {
-        // Do nothing; the resource may have been deleted by session resets.
-      }
-    }
-  }
-
- private:
-  mutex mu_;
-  bool table_handle_set_ GUARDED_BY(mu_);
-  ContainerInfo cinfo_;
-  bool use_node_name_sharing_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(ResourceConstructionOp);
-};
-
-// Create resources of type ContainerBase using the static method
-// Functor::AllocateContainer(OpKernelConstruction*, OpKernel*,
-// FallbackTableBaseType*, ContainerBase**)
-// If the resource has already been created it will be looked up.
-// Container must decrease the reference count of the FallbackTableBaseType*
-// constructor argument before its destructor completes.
-template <class ContainerBase, class Functor,
-          class FallbackTableBaseType = ContainerBase>
-class TableWithFallbackConstructionOp : public OpKernel {
- public:
-  explicit TableWithFallbackConstructionOp(OpKernelConstruction* ctx)
-      : OpKernel(ctx), table_handle_set_(false) {
-    OP_REQUIRES_OK(
-        ctx, ctx->GetAttr("use_node_name_sharing", &use_node_name_sharing_));
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    OpInputList table_int64_args;
-    OP_REQUIRES_OK(ctx, ctx->input_list("table_int64_args", &table_int64_args));
-    if (ctx->num_inputs() == table_int64_args.size()) {
-      ctx->SetStatus(errors::InvalidArgument(
-          "Expected op to have a resource input after the table_int64_args "
-          "input but no such input found."));
-      return;
-    }
-
-    FallbackTableBaseType* fallback_table = nullptr;
-    {
-      const Tensor& table_handle = ctx->input(table_int64_args.size());
-      ResourceHandle handle(table_handle.scalar<ResourceHandle>()());
-      OP_REQUIRES_OK(
-          ctx, ctx->resource_manager()->Lookup(handle.container(),
-                                               handle.name(), &fallback_table));
-    }
-    mutex_lock l(mu_);
-
-    if (!table_handle_set_) {
-      OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(),
-                                      use_node_name_sharing_));
-    }
-
-    auto creator = [ctx, this, fallback_table](
-                       ContainerBase** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-      // container construction logic can't be merged with
-      // ResourceConstructionOp because Container constructor requires an
-      // input which can only be constructed if the resource manager
-      // internal lock is not already held.
-      ContainerBase* container;
-      auto status =
-          Functor::AllocateContainer(ctx, this, fallback_table, &container);
-      if (TF_PREDICT_FALSE(!status.ok())) {
-        container->Unref();
-        return status;
-      }
-      if (ctx->track_allocations()) {
-        ctx->record_persistent_memory_allocation(container->MemoryUsed());
-      }
-      *ret = container;
-      return Status::OK();
-    };
-
-    ContainerBase* table = nullptr;
-    OP_REQUIRES_OK(
-        ctx, cinfo_.resource_manager()->template LookupOrCreate<ContainerBase>(
-                 cinfo_.container(), cinfo_.name(), &table, creator));
-    core::ScopedUnref unref_me(table);
-
-    Tensor* handle;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &handle));
-    handle->scalar<ResourceHandle>()() = MakeResourceHandle<ContainerBase>(
-        ctx, cinfo_.container(), cinfo_.name());
-    table_handle_set_ = true;
-  }
-
-  ~TableWithFallbackConstructionOp() override {
-    // If the table object was not shared, delete it.
-    if (table_handle_set_ && cinfo_.resource_is_private_to_kernel()) {
-      if (!cinfo_.resource_manager()
-               ->template Delete<ContainerBase>(cinfo_.container(),
-                                                cinfo_.name())
-               .ok()) {
-        // Do nothing; the resource may have been deleted by session resets.
-      }
-    }
-  }
-
- private:
-  mutex mu_;
-  bool table_handle_set_ GUARDED_BY(mu_);
-  ContainerInfo cinfo_;
-  bool use_node_name_sharing_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(TableWithFallbackConstructionOp);
-};
-
-// Op that returns the size of a container.
-template <class Container>
-class ContainerSizeOp : public OpKernel {
- public:
-  explicit ContainerSizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& container_handle = ctx->input(0);
-    ResourceHandle handle(container_handle.scalar<ResourceHandle>()());
-    Container* container;
-    OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup(
-                            handle.container(), handle.name(), &container));
-    core::ScopedUnref unref_me(container);
-    OP_REQUIRES_OK(ctx, container->SizeStatus());
-
-    Tensor* out;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &out));
-
-    auto* mutex = container->GetMutex();
-    if (mutex != nullptr) {
-      tf_shared_lock lock(*mutex);
-      out->scalar<int64>()() = container->UnsafeSize();
-    } else {
-      out->scalar<int64>()() = container->UnsafeSize();
-    }
-  }
-};
-
-}  // namespace tables
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_OP_UTILS_H_

diff --git a/tensorflow/core/kernels/lookup_tables/table_resource_utils.h b/tensorflow/core/kernels/lookup_tables/table_resource_utils.h
deleted file mode 100644
index 2065904..0000000
--- a/tensorflow/core/kernels/lookup_tables/table_resource_utils.h
+++ /dev/null

@@ -1,101 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_RESOURCE_UTILS_H_
-#define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_RESOURCE_UTILS_H_
-
-#include <memory>
-
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/kernels/lookup_tables/lookup_table_interface.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/platform/mutex.h"
-
-namespace tensorflow {
-namespace tables {
-
-// Parent class for tables with support for multithreaded synchronization.
-template <typename HeterogeneousKeyType, typename ValueType>
-class LookupTableWithSynchronization
-    : public LookupTableInterface<HeterogeneousKeyType, ValueType> {
- public:
-  // By convention, it is assumed that the OpKernel which creates this
-  // resource is bound to an op whose first input is an in64 tensor list with a
-  // first element whose boolean value indicates whether a mutex should be
-  // exported to synchronize access to state. If the tensor list is empty,
-  // the status in ctx is set to InvalidArgument and this object is in an
-  // undefined state.
-  LookupTableWithSynchronization(OpKernelContext* ctx, OpKernel* kernel) {
-    OpInputList table_int64_args;
-    OP_REQUIRES_OK(ctx, ctx->input_list("table_int64_args", &table_int64_args));
-    if (table_int64_args.size() == 0) {
-      ctx->SetStatus(errors::InvalidArgument(
-          "table_int64_args should not be empty. Set the first element "
-          "to 1 to enable synchronized table use and to 0 otherwise."));
-      return;
-    }
-    if (ctx->input(0).scalar<int64>()() != 0) {
-      mutex_ = absl::make_unique<mutex>();
-    }
-  }
-
-  // Mutex for synchronizing access to unsynchronized methods.
-  mutex* GetMutex() const override { return mutex_.get(); }
-
- private:
-  // Use this for locking.
-  mutable std::unique_ptr<mutex> mutex_;
-};
-
-// Parent class for tables which can be constructed with arbitrary
-// lookup fallbacks.
-// Since LookupTableInterface::LookupKey assumes that all keys can be mapped
-// to values, LookupTableWithFallbackInterface allows clients to implement
-// two-stage lookups. If the first key lookup fails, clients can choose
-// to perform a fallback lookup using an externally supplied table.
-template <typename HeterogeneousKeyType, typename ValueType,
-          typename FallbackTableRegisteredType =
-              LookupTableInterface<HeterogeneousKeyType, ValueType>>
-class LookupTableWithFallbackInterface
-    : public LookupTableWithSynchronization<HeterogeneousKeyType, ValueType> {
- public:
-  LookupTableWithFallbackInterface(OpKernelContext* ctx, OpKernel* kernel,
-                                   FallbackTableRegisteredType* fallback_table)
-      : LookupTableWithSynchronization<HeterogeneousKeyType, ValueType>(ctx,
-                                                                        kernel),
-        fallback_table_(fallback_table) {}
-
-  // Clients are required to fail when ctx is set to a not-OK status in
-  // the constructor so this dereference is safe.
-  const FallbackTableRegisteredType& fallback_table() const {
-    return *fallback_table_;
-  }
-
-  ~LookupTableWithFallbackInterface() override {
-    if (fallback_table_ != nullptr) {
-      fallback_table_->Unref();
-    }
-  }
-
- private:
-  FallbackTableRegisteredType* fallback_table_;
-};
-
-}  // namespace tables
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_RESOURCE_UTILS_H_

diff --git a/tensorflow/core/kernels/lrn_op_test.cc b/tensorflow/core/kernels/lrn_op_test.cc
index 5d8c5c2..496c697 100644
--- a/tensorflow/core/kernels/lrn_op_test.cc
+++ b/tensorflow/core/kernels/lrn_op_test.cc

@@ -102,7 +102,7 @@
                    .Finalize(node_def()));
   TF_ASSERT_OK(InitOp());
   AddInput<float>(TensorShape({1, 1, 1, 96}),
-                  [this](int i) -> float { return i + 1; });
+                  [](int i) -> float { return i + 1; });
   TF_ASSERT_OK(RunOpKernel());
   auto actual = GetOutput(0)->tensor<float, 4>();
 
@@ -138,7 +138,7 @@
                    .Finalize(node_def()));
   TF_ASSERT_OK(InitOp());
   AddInput<float>(TensorShape({1, 1, 1, 16}),
-                  [this](int i) -> float { return i + 1; });
+                  [](int i) -> float { return i + 1; });
   TF_ASSERT_OK(RunOpKernel());
   auto actual = GetOutput(0)->tensor<float, 4>();
 

diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
index 3a5c874..b95bbca 100644
--- a/tensorflow/core/kernels/mkl_concat_op.cc
+++ b/tensorflow/core/kernels/mkl_concat_op.cc

@@ -25,12 +25,14 @@
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/kernels/concat_lib_cpu.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/mkl_util.h"
 
 using mkldnn::concat;
 using mkldnn::stream;
-#include "tensorflow/core/util/mkl_util.h"
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -226,8 +228,50 @@
       // format and avoid calling eigen version.
       if (!are_all_tf_inputs && !are_all_mkl_inputs) invoke_eigen = true;
 
+      OpInputList input_mins, input_maxes;
+      if (std::is_same<T, qint8>::value || std::is_same<T, quint8>::value) {
+        // MKL-DNN concat does not support input tensors that have different
+        // ranges. Check if the ranges of the all input tensors are the same.
+        // If not, forward it to Eigen implementation.
+
+        OP_REQUIRES_OK(context, context->input_list("input_mins", &input_mins));
+        OP_REQUIRES(context, (input_mins.size() == N),
+                    errors::InvalidArgument(
+                        "QuantizedConcatOp : Expected mins input list length ",
+                        input_mins.size(), " to equal values length ", N));
+
+        OP_REQUIRES_OK(context,
+                       context->input_list("input_maxes", &input_maxes));
+        OP_REQUIRES(context, (input_maxes.size() == N),
+                    errors::InvalidArgument(
+                        "QuantizedConcatOp : Expected maxes input list length ",
+                        input_maxes.size(), " to equal values length ", N));
+        float input_min = input_mins[0].flat<float>()(0);
+        float input_max = input_maxes[0].flat<float>()(0);
+        const float eps = 1.0e-6;
+        for (int i = 1; i < N; ++i) {
+          float min = input_mins[i].flat<float>()(0);
+          float max = input_maxes[i].flat<float>()(0);
+
+          if (fabs(input_min - min) > eps || fabs(input_max - max) > eps) {
+            invoke_eigen = true;
+            break;
+          }
+        }
+      }
+
       // Call Eigen library
       if (invoke_eigen) {
+        // MKL-DNN quantized concat does not support input tensors with
+        // different ranges.
+        // TODO (mabuzain): Add quantized version of CallEigen() to support
+        // this case.
+        OP_REQUIRES(
+            context,
+            (!std::is_same<T, qint8>::value && !std::is_same<T, quint8>::value),
+            errors::Unimplemented("MKL DNN quantized concat does not "
+                                  "support input tensors that have "
+                                  "different ranges"));
         CallEigenVersion(context, input_tensors, mkl_input_shapes);
         return;
       }
@@ -374,6 +418,23 @@
       std::vector<primitive> net;
       net.push_back(concat_op);
       stream(stream::kind::eager).submit(net).wait();
+
+      // For quantized concat, min and max outputs are also computed.
+      if (std::is_same<T, qint8>::value || std::is_same<T, quint8>::value) {
+        Tensor* output_min = nullptr;
+        Tensor* output_max = nullptr;
+        MklDnnShape output_min_mkl_shape, output_max_mkl_shape;
+        output_min_mkl_shape.SetMklTensor(false);
+        output_max_mkl_shape.SetMklTensor(false);
+        AllocateOutputSetMklShape(context, 1, &output_min, {},
+                                  output_min_mkl_shape);
+        AllocateOutputSetMklShape(context, 2, &output_max, {},
+                                  output_max_mkl_shape);
+        // All input tensors should have the same range, just use the
+        // first one
+        output_min->flat<float>()(0) = input_mins[0].flat<float>()(0);
+        output_max->flat<float>()(0) = input_maxes[0].flat<float>()(0);
+      }
     } catch (mkldnn::error& e) {
       string error_msg = "Status: " + std::to_string(e.status) +
                          ", message: " + string(e.message) + ", in file " +
@@ -490,6 +551,20 @@
 
 TF_CALL_float(REGISTER_MKL_CPU);
 
+REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConcatV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<quint8>("T")
+                            .HostMemory("axis")
+                            .Label(mkl_op_registry::kMklQuantizedOpLabel),
+                        MklConcatOp<CPUDevice, quint8, NAME_IS_AXIS>)
+
+REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConcatV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<qint8>("T")
+                            .HostMemory("axis")
+                            .Label(mkl_op_registry::kMklQuantizedOpLabel),
+                        MklConcatOp<CPUDevice, qint8, NAME_IS_AXIS>)
+
 #undef REGISTER_CONCAT_MKL
 }  // namespace tensorflow
 

diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index b96cc23..c12a4ff 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h

@@ -392,14 +392,24 @@
     int64 pad_D1, pad_D2;
 
     if (is_conv2d) {
+      Padding padding_type;
+      if (pad_enabled) {
+        padding_type = Padding::EXPLICIT;
+        pad_top = static_cast<int64>((*pad_l)[0]);
+        pad_left = static_cast<int64>((*pad_l)[1]);
+        pad_bottom = static_cast<int64>((*pad_r)[0]);
+        pad_right = static_cast<int64>((*pad_r)[1]);
+      } else {
+        padding_type = padding_;
+      }
       OP_REQUIRES_OK(context_,
                      GetWindowedOutputSizeVerboseV2(
                          input_rows, filter_rows, dilation_rows, stride_rows,
-                         padding_, &out_rows, &pad_top, &pad_bottom));
+                         padding_type, &out_rows, &pad_top, &pad_bottom));
       OP_REQUIRES_OK(context_,
                      GetWindowedOutputSizeVerboseV2(
                          input_cols, filter_cols, dilation_cols, stride_cols,
-                         padding_, &out_cols, &pad_left, &pad_right));
+                         padding_type, &out_cols, &pad_left, &pad_right));
     } else {
       OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose(
                                    input_planes, filter_planes, stride_planes,
@@ -413,25 +423,11 @@
     }
 
     if (is_conv2d) {
-      // Conv + pad fusion is enabled only for 2D
+      // Conv + pad fusion is enabled only for 2D.
       // If pad_enabled, i.e., pad and conv op are fused, then
       // all pads are already passed from pad op through
-      // *pad_l and *pad_r
-      if (pad_enabled) {
-        pad_top = static_cast<int64>((*pad_l)[0]);
-        pad_left = static_cast<int64>((*pad_l)[1]);
-        pad_bottom = static_cast<int64>((*pad_r)[0]);
-        pad_right = static_cast<int64>((*pad_r)[1]);
-        // update the out_rows and out_cols based on all
-        // sides of the pads coming from pad op.
-        out_rows = out_rows + (pad_top + pad_bottom) / stride_rows;
-        out_cols = out_cols + (pad_left + pad_right) / stride_cols;
-      }
-      // Handle padding. MKL-DNN uses asymetric padding.
-      // But, if pad_enabled, i.e., pad and conv op are fused,
-      // then, *pad_l and *pad_r are already set from pad op.
-      // In that case they need not set here.
-      else {
+      // *pad_l and *pad_r and they don't need to be set here.
+      if (!pad_enabled) {
         *pad_l = {static_cast<int>(pad_top), static_cast<int>(pad_left)};
         *pad_r = {static_cast<int>(pad_bottom), static_cast<int>(pad_right)};
       }

diff --git a/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc b/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc
new file mode 100644
index 0000000..fc68480
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc

@@ -0,0 +1,234 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
+
+#define EIGEN_USE_THREADS
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace tensorflow {
+
+using test::graph::Constant;
+
+static const uint8 dummy_tensor[] = {0, 0, 0, 0, 0, 0, 0, 0};
+static const TensorShape dummy_shape({8});
+
+// Helper class for converting MKL tensors to TF tensors and comparing to
+// expected values
+
+class ConvMklToTF : public OpsTestBase {
+ public:
+  template <typename T>
+  void ConvertMKL2TF(DataType dtype, const Tensor& first, const Tensor& second,
+                     Tensor& output) {
+    // Create an MKL to TF conversion node and execute it
+    TF_EXPECT_OK(NodeDefBuilder("mkl_to_tf_op", "_MklToTf")
+                     .Input(FakeInput(dtype))     // Input
+                     .Input(FakeInput(DT_UINT8))  // MKL second tensor
+                     .Attr("T", dtype)
+                     .Attr("_kernel", "MklOp")
+                     .Finalize(node_def()));
+    TF_EXPECT_OK(InitOp());
+    AddInputFromArray<T>(first.shape(), first.flat<T>());
+    AddInputFromArray<uint8>(second.shape(), second.flat<uint8>());
+    TF_ASSERT_OK(RunOpKernel());
+
+    output = *GetOutput(0);
+  }
+  void TestBody(){};
+};
+
+class QuantizedConcatTest : public OpsTestBase {
+ protected:
+  QuantizedConcatTest() {}
+
+  void TestSmall8Bit(float first_min, float first_max, float second_min,
+                     float second_max);
+  void TestSecondDim8Bit(float first_min, float first_max, float second_min,
+                         float second_max);
+};
+
+TEST_F(QuantizedConcatTest, Small8BitSameRange) {
+  // Range for both is the same, so impl can use memcpy.
+  TestSmall8Bit(0.0f, 255.0f, 0.0f, 255.0f);
+}
+
+void QuantizedConcatTest::TestSmall8Bit(float first_min, float first_max,
+                                        float second_min, float second_max) {
+  TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "_MklQuantizedConcatV2")
+                   .Input(FakeInput(2, DT_QUINT8))
+                   .Input(FakeInput(DT_INT32))
+                   .Input(FakeInput(2, DT_FLOAT))
+                   .Input(FakeInput(2, DT_FLOAT))
+                   .Input(FakeInput(2, DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(DT_UINT8))     // MKL second tensor
+                   .Input(FakeInput(2, DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(2, DT_UINT8))  // MKL second tensor
+                   .Attr("N", 2)
+                   .Attr("T", DataTypeToEnum<quint8>::v())
+                   .Attr("Tidx", DT_INT32)
+                   .Attr("_kernel", "QuantizedMklOp")
+                   .Finalize(node_def()));
+  TF_ASSERT_OK(InitOp());
+  const int first_batch = 2;
+  const int first_height = 2;
+  const int first_width = 3;
+  const int first_depth = 1;
+  Tensor first_float(DT_FLOAT,
+                     {first_batch, first_height, first_width, first_depth});
+  test::FillValues<float>(&first_float,
+                          {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+  Tensor first_quantized =
+      FloatTensorToQuantized<quint8>(first_float, first_min, first_max);
+
+  const int second_batch = 2;
+  const int second_height = 2;
+  const int second_width = 3;
+  const int second_depth = 1;
+  Tensor second_float(
+      DT_FLOAT, {second_batch, second_height, second_width, second_depth});
+  test::FillValues<float>(&second_float,
+                          {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
+  Tensor second_quantized =
+      FloatTensorToQuantized<quint8>(second_float, second_min, second_max);
+
+  const int expected_batch = first_batch + second_batch;
+  Tensor expected_float(
+      DT_FLOAT, {expected_batch, first_height, first_width, first_depth});
+  test::FillValues<float>(&expected_float,
+                          {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                           13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
+
+  AddInputFromArray<quint8>(first_quantized.shape(),
+                            first_quantized.flat<quint8>());
+  AddInputFromArray<quint8>(second_quantized.shape(),
+                            second_quantized.flat<quint8>());
+  AddInputFromArray<int32>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {first_min});
+  AddInputFromArray<float>(TensorShape({}), {second_min});
+  AddInputFromArray<float>(TensorShape({}), {first_max});
+  AddInputFromArray<float>(TensorShape({}), {second_max});
+  AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+  AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+  AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+  AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+  TF_ASSERT_OK(RunOpKernel());
+  const Tensor& output_quantized = *GetOutput(0);
+  const float output_min = GetOutput(1)->flat<float>()(0);
+  const float output_max = GetOutput(2)->flat<float>()(0);
+  Tensor output_float =
+      QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max);
+  test::ExpectTensorNear<float>(expected_float, output_float, 0.2);
+}
+
+TEST_F(QuantizedConcatTest, SecondDim8BitSameRange) {
+  TestSecondDim8Bit(-10.0f, 150.0f, -10.0f, 150.0f);
+}
+
+void QuantizedConcatTest::TestSecondDim8Bit(float first_min, float first_max,
+                                            float second_min,
+                                            float second_max) {
+  TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "_MklQuantizedConcatV2")
+                   .Input(FakeInput(2, DT_QUINT8))
+                   .Input(FakeInput(DT_INT32))
+                   .Input(FakeInput(2, DT_FLOAT))
+                   .Input(FakeInput(2, DT_FLOAT))
+                   .Input(FakeInput(2, DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(DT_UINT8))     // MKL second tensor
+                   .Input(FakeInput(2, DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(2, DT_UINT8))  // MKL second tensor
+                   .Attr("N", 2)
+                   .Attr("T", DataTypeToEnum<quint8>::v())
+                   .Attr("Tidx", DT_INT32)
+                   .Attr("_kernel", "QuantizedMklOp")
+                   .Finalize(node_def()));
+  TF_ASSERT_OK(InitOp());
+  const int first_batch = 2;
+  const int first_height = 2;
+  const int first_width = 3;
+  const int first_depth = 1;
+  Tensor first_float(DT_FLOAT,
+                     {first_batch, first_height, first_width, first_depth});
+  test::FillValues<float>(&first_float,
+                          {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+  Tensor first_quantized =
+      FloatTensorToQuantized<quint8>(first_float, first_min, first_max);
+
+  const int second_batch = 2;
+  const int second_height = 2;
+  const int second_width = 3;
+  const int second_depth = 1;
+
+  Tensor second_float(
+      DT_FLOAT, {second_batch, second_height, second_width, second_depth});
+  test::FillValues<float>(&second_float,
+                          {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
+  Tensor second_quantized =
+      FloatTensorToQuantized<quint8>(second_float, second_min, second_max);
+
+  const int expected_height = first_height + second_height;
+  Tensor expected_float(
+      DT_FLOAT, {first_batch, expected_height, first_width, first_depth});
+  test::FillValues<float>(&expected_float,
+                          {1, 2, 3, 4,  5,  6,  13, 14, 15, 16, 17, 18,
+                           7, 8, 9, 10, 11, 12, 19, 20, 21, 22, 23, 24});
+
+  AddInputFromArray<quint8>(first_quantized.shape(),
+                            first_quantized.flat<quint8>());
+  AddInputFromArray<quint8>(second_quantized.shape(),
+                            second_quantized.flat<quint8>());
+  AddInputFromArray<int32>(TensorShape({}), {1});
+  AddInputFromArray<float>(TensorShape({}), {first_min});
+  AddInputFromArray<float>(TensorShape({}), {second_min});
+  AddInputFromArray<float>(TensorShape({}), {first_max});
+  AddInputFromArray<float>(TensorShape({}), {second_max});
+  AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+  AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+  AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+  AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+  TF_ASSERT_OK(RunOpKernel());
+  const Tensor& output_quantized = *GetOutput(0);
+  const float output_min = GetOutput(1)->flat<float>()(0);
+  const float output_max = GetOutput(2)->flat<float>()(0);
+  Tensor output_float =
+      QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max);
+  // Using the same error tolerance as in Eigen QuantizedConcat test
+  test::ExpectTensorNear<float>(expected_float, output_float, 1.0);
+}
+
+}  // namespace tensorflow
+
+#endif  // INTEL_MKL && ENABLE_MKL

diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl_quantized_conv_ops.h
index 10825f6..fef2d83 100644
--- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_quantized_conv_ops.h

@@ -24,8 +24,13 @@
 namespace tensorflow {
 template <class T>
 float MklFloatForOneQuantizedLevel(float range_min, float range_max) {
-  const int64 highest = static_cast<int64>(Eigen::NumTraits<T>::highest());
-  const int64 lowest = static_cast<int64>(Eigen::NumTraits<T>::lowest());
+  int64 highest = static_cast<int64>(Eigen::NumTraits<T>::highest());
+  int64 lowest = static_cast<int64>(Eigen::NumTraits<T>::lowest());
+
+  // Adjusting for having a symmetric range.
+  // for example: for 8-bit [-127, 127] as opposed to [-128, 127].
+  if (lowest < -highest) ++lowest;
+
   const float float_for_one_quantized_level =
       (range_max - range_min) / (highest - lowest);
   return float_for_one_quantized_level;
@@ -48,6 +53,35 @@
   *min_c = c_float_for_one_quant_level * c_lowest;
   *max_c = c_float_for_one_quant_level * c_highest;
 }
+
+template <class T1, class T2, class T3>
+void MklQuantizationRangeForMultiplication(float min_a, float max_a,
+                                           const Tensor& min_b_vector,
+                                           const Tensor& max_b_vector,
+                                           Tensor** min_c_vector,
+                                           Tensor** max_c_vector) {
+  DCHECK(min_b_vector.NumElements() == (*min_c_vector)->NumElements());
+  DCHECK(max_b_vector.NumElements() == (*max_c_vector)->NumElements());
+  size_t n_channel = min_b_vector.NumElements();
+  const int64 c_highest = static_cast<int64>(Eigen::NumTraits<T3>::highest());
+  const int64 c_lowest = static_cast<int64>(Eigen::NumTraits<T3>::lowest());
+  const float* min_b = min_b_vector.flat<float>().data();
+  const float* max_b = max_b_vector.flat<float>().data();
+  float* min_c = (*min_c_vector)->flat<float>().data();
+  float* max_c = (*max_c_vector)->flat<float>().data();
+#pragma omp parallel for
+  for (size_t n = 0; n < n_channel; ++n) {
+    float a_float_for_one_quant_level =
+        MklFloatForOneQuantizedLevel<T1>(min_a, max_a);
+    float b_float_for_one_quant_level =
+        MklFloatForOneQuantizedLevel<T2>(min_b[n], max_b[n]);
+    float c_float_for_one_quant_level =
+        a_float_for_one_quant_level * b_float_for_one_quant_level;
+    min_c[n] = c_float_for_one_quant_level * c_lowest;
+    max_c[n] = c_float_for_one_quant_level * c_highest;
+  }
+}
+
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL

diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc
new file mode 100644
index 0000000..767a6f1
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc

@@ -0,0 +1,124 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/array_ops.cc.
+#ifdef INTEL_MKL
+#define EIGEN_USE_THREADS
+
+#include <math.h>
+#include <limits>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/type_traits.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/no_op.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+class MklRequantizationRangePerChannelOp : public OpKernel {
+ public:
+  explicit MklRequantizationRangePerChannelOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("clip_value_max", &clip_value_max_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& input = ctx->input(kInputTensorIndex);
+    const Tensor& input_min = ctx->input(kInputMinIndex);
+    const Tensor& input_max = ctx->input(kInputMaxIndex);
+
+    const size_t depth = input_max.NumElements();
+    OP_REQUIRES(
+        ctx, input_min.dim_size(0) == depth,
+        errors::InvalidArgument("input_min has incorrect size, expected ",
+                                depth, " was ", input_min.dim_size(0)));
+    OP_REQUIRES(
+        ctx, input_max.dim_size(0) == depth,
+        errors::InvalidArgument("input_max has incorrect size, expected ",
+                                depth, " was ", input_max.dim_size(0)));
+
+    const float* input_min_data = input_min.flat<float>().data();
+    const float* input_max_data = input_max.flat<float>().data();
+    std::vector<float> ranges(depth);
+    bool is_non_negative = true;
+    Eigen::array<int, 2> shuffling({1, 0});
+    auto input_matrix = input.flat_inner_dims<qint32>();
+
+    // TODO: verify performance of not transposing and finding the min max
+    // directly from input_matrix vs the one presented below of transposing and
+    // using the transposed matrix as the transposing operation in itself might
+    // be more costly.
+    // Note that this operation is a calibration step for quantization and will
+    // cease to exist in the final inference graph(will exist as a const node).
+    auto transposed_input = input_matrix.shuffle(shuffling);
+
+    // Find the ranges of each channel in parallel.
+    float out_min_max = std::numeric_limits<float>::min();
+#pragma omp parallel for reduction(max : out_min_max)
+    for (size_t i = 0; i < depth; ++i) {
+      Eigen::Tensor<qint32, 0, Eigen::RowMajor> min =
+          transposed_input.chip<0>(i).minimum();
+      Eigen::Tensor<qint32, 0, Eigen::RowMajor> max =
+          transposed_input.chip<0>(i).maximum();
+      const int32_t min_per_channel = min();
+      const int32_t max_per_channel = max();
+      const int32_t abs_max =
+          std::max(std::abs(min_per_channel), std::abs(max_per_channel));
+      float scale =
+          std::max(std::abs(input_min_data[i]), std::abs(input_max_data[i]));
+      ranges[i] =
+          scale * static_cast<float>(abs_max) / static_cast<float>(1L << 31);
+      if (min_per_channel < 0) is_non_negative = false;
+
+      // Thread-local out_min_max.
+      out_min_max = std::max(out_min_max, ranges[i]);
+    }
+    // All local out_min_max gets max-reduced into one global out_min_max at
+    // the end of the loop by specifying reduction(max:out_min_max) along with
+    // omp parallel for.
+
+    // Fixing max to clip_value_max_ (example 6.0 to support relu6)
+    if (out_min_max > clip_value_max_) out_min_max = clip_value_max_;
+
+    Tensor* output_min = nullptr;
+    Tensor* output_max = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMinIndex, {}, &output_min));
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMaxIndex, {}, &output_max));
+    output_min->flat<float>()(0) = is_non_negative ? 0.0f : -out_min_max;
+    output_max->flat<float>()(0) = out_min_max;
+  }
+
+ private:
+  float clip_value_max_ = std::numeric_limits<float>::infinity();
+  const int kInputTensorIndex = 0;
+  const int kInputMinIndex = 1;
+  const int kInputMaxIndex = 2;
+  const int kOutputMinIndex = 0;
+  const int kOutputMaxIndex = 1;
+};
+
+REGISTER_KERNEL_BUILDER(Name("RequantizationRangePerChannel")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<qint32>("T"),
+                        MklRequantizationRangePerChannelOp);
+}  // namespace tensorflow
+#endif  // INTEL_MKL

diff --git a/tensorflow/core/kernels/mkl_requantize_ops_test.cc b/tensorflow/core/kernels/mkl_requantize_ops_test.cc
new file mode 100644
index 0000000..9961462
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_requantize_ops_test.cc

@@ -0,0 +1,300 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#if defined(INTEL_MKL) && defined(ENABLE_MKL)
+
+#include <cmath>
+
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace tensorflow {
+
+class MklRequantizatedOpsTest : public OpsTestBase {};
+
+class MklRequantizatedOpsTestHelper : public OpsTestBase {
+ public:
+  void Setup(Tensor &input_tensor_qint32, float &range_weights_ch1,
+             float &range_weights_ch2);
+  void TestBody() {}
+};
+
+void MklRequantizatedOpsTestHelper::Setup(Tensor &input_tensor_qint32,
+                                          float &range_weights_ch1,
+                                          float &range_weights_ch2) {
+  // Step 1: Input range assumptions
+  // -------------------------------
+  // Assume input tensor T (NHWC) in FP32 has range [0, 5.0]   size nt*ht*wt*ct
+  // Assume input filter W (NHWC) with 2 output channels of    size nw*ht*wt*2
+  // logically,   filter W has 2 channels W1 and W2 each of    size nw*ht*wt*1
+  // Assume input filter W1(NHWC) in FP32 has range [-2.0, 2.0]size nw*ht*wt*1
+  // Assume input filter W2(NHWC) in FP32 has range [-3.0, 3.0]size nw*ht*wt*1
+
+  // Step 2: Quantization details (per channel)
+  // ------------------------------------------
+  // T and W are quantized using a quantize op.
+  // The input tensor T (NHWC) is quantized to unsigned int8.
+  // Hence T's max value is mapped to ((2^8-1) = 255).
+  // The input filter W (NHWC) is quantized to signed int8.
+  // Hence W's max value is mapped to ((2^7)-1 = 127)).
+
+  // Range of quantized T  in uint8[0  , 255] maps to orig T  in FP32[0   , 5.0]
+  // Range of quantized W1 in int8[-127, 127] maps to orig W1 in FP32[-2.0, 2.0]
+  // Range of quantized W2 in int8[-127, 127] maps to orig W2 in FP32[-3.0, 3.0]
+
+  // Hence the resolution of quantized T will be 5.0/255
+  // Hence the resolution of quantized W1 will be 2.0/127
+  // Hence the resolution of quantized W2 will be 3.0/127
+
+  // Step 3: Assumption of quantizedconv on quantized input&weights(per channel)
+  // ---------------------------------------------------------------------------
+  // The input T and weights W1 (or W2) will be convolved.
+  // The output tensor T is in int32 whose range is [-2^31, 2^31).
+  // For simplicity and symmetry, we truncate the above range to (-2^31, 2^31).
+  // The range of convolved T*W1 is ((2^31)-1) * 5.0/255 * 2.0/127 = 663110.59
+  // So the range of convolved T*W1 in int32(-2^31, 2^31) that maps to
+  // orig T range in FP32[0, 5.0] * [-2.0, 2.0] is [-663110.59, 663110.59].
+
+  // The range of convolved T*W2 is (2^31-1) * 5.0/255 * 3.0/127 = 994665.88
+  // So the range of convolved T*W2 in int32(-2^31, 2^31) that maps to
+  // orig T range in FP32 [0, 5.0] * [-3.0, 3.0]  is [-994665.88, 994665.88]
+
+  // Step 4: Assumption output above is fed to requantization_range_perchannel
+  // --------------------------------------------------------------------------
+  // Here we recalculate the new range for convolved T*W so that we
+  // make good use in int8 quantization from int32 to int8.
+
+  // We assume the above operations are performed and use these values above
+  // as ranges for requantization_range_perchannel_op.
+  range_weights_ch1 = 663110.59;  // For W1 channel
+  range_weights_ch2 = 994665.88;  // For W2 Channel
+
+  // We Fill the input tensor T qint32 with arbitrary int32 values
+  test::FillValues<qint32>(
+      &input_tensor_qint32,
+      {-1000, -2000,  2000,   4000,   -3000,  -6000,  4000,   8000,
+       5000,  10000,  -6000,  -12000, 7000,   14000,  8000,   16000,
+       9000,  -18000, -10000, -20000, 11000,  22000,  -12000, -24000,
+       13000, 26000,  14000,  28000,  -15000, -30000, 16000,  32000});
+
+  // Step 5: Define and run requantization_range_perchannel
+  // -------------------------------------------------------
+  // See test RequantizationRangePerChannelTest_Basic and/or
+  // test RequantizationRangePerChannelTest_ClipMax
+}
+
+// Tests the RequantizationRangePerChannel op wherein the range
+// of the weights is calculated per channel.
+TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_Basic) {
+  // Let us set up the tensor and inputs before we run this op.
+  float clip_value_max = static_cast<float>((1L << 31) - 1);
+  float range_weights_ch1 = 0.0;
+  float range_weights_ch2 = 0.0;
+
+  // Create the input tensor
+  const int input_height = 4;
+  const int input_width = 4;
+  const int input_channels = 2;
+
+  // Define the shape of T.
+  Tensor input_tensor_qint32(DT_QINT32,
+                             {1, input_height, input_width, input_channels});
+
+  // Explanation and setup prior to this op. Fill T and populate range values.
+  MklRequantizatedOpsTestHelper helper;
+  helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2);
+
+  // Step 5: Define and run requantization_range_perchannel
+  // -------------------------------------------------------
+  // Define, create and initialize the op in question.
+  TF_ASSERT_OK(NodeDefBuilder("requantization_range_per_channel",
+                              "RequantizationRangePerChannel")
+                   .Input(FakeInput(DT_QINT32))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Attr("T", DataTypeToEnum<qint32>::v())
+                   .Attr("clip_value_max", clip_value_max)
+                   .Finalize(node_def()));
+  TF_ASSERT_OK(InitOp());
+
+  // Add the input nodes to the op.
+  AddInputFromArray<qint32>(input_tensor_qint32.shape(),
+                            input_tensor_qint32.flat<qint32>());
+
+  // Calculate the min and max from the ranges
+  float ch1_min = -range_weights_ch1;
+  float ch1_max = range_weights_ch1;
+  float ch2_min = -range_weights_ch2;
+  float ch2_max = range_weights_ch2;
+
+  // Add the perchannel range Nodes to the op.
+  AddInputFromArray<float>(TensorShape({input_channels}), {ch1_min, ch2_min});
+  AddInputFromArray<float>(TensorShape({input_channels}), {ch1_max, ch2_max});
+
+  // Run the kernel
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Step 6: Verify output and store values to test requantize_perchannel
+  // --------------------------------------------------------------------
+
+  // Verify the Expected Outputs
+  const float output_min = GetOutput(0)->flat<float>()(0);
+  const float output_max = GetOutput(1)->flat<float>()(0);
+  EXPECT_NEAR(-14.8217, output_min, 0.002);
+  EXPECT_NEAR(14.8217, output_max, 0.002);
+
+  // Output range is made use in RequantizePerChannelTest_Basic
+}
+
+TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_ClipMax) {
+  // Let us setup the tensor and inputs before we run this op.
+  float clip_value_max = 6;  // Can be used as 6 for Relu 6 activations.
+  float range_weights_ch1 = 0.0;
+  float range_weights_ch2 = 0.0;
+
+  // Create the input tensor
+  const int input_height = 4;
+  const int input_width = 4;
+  const int input_channels = 2;
+
+  // define and input tensor T shape.
+  Tensor input_tensor_qint32(DT_QINT32,
+                             {1, input_height, input_width, input_channels});
+
+  // Explanation and setup prior to this op. Fill T and populate range values.
+  MklRequantizatedOpsTestHelper helper;
+  helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2);
+
+  // Step 5: Define and run requantization_range_perchannel
+  // -------------------------------------------------------
+  // Define, create and initialize the op in question.
+  TF_ASSERT_OK(NodeDefBuilder("requantization_range_per_channel",
+                              "RequantizationRangePerChannel")
+                   .Input(FakeInput(DT_QINT32))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Attr("T", DataTypeToEnum<qint32>::v())
+                   .Attr("clip_value_max", clip_value_max)
+                   .Finalize(node_def()));
+  TF_ASSERT_OK(InitOp());
+
+  // Add the input nodes to the op.
+  AddInputFromArray<qint32>(input_tensor_qint32.shape(),
+                            input_tensor_qint32.flat<qint32>());
+
+  // Calculate the min and max from the ranges
+  float ch1_min = -range_weights_ch1;
+  float ch1_max = range_weights_ch1;
+  float ch2_min = -range_weights_ch2;
+  float ch2_max = range_weights_ch2;
+
+  // Add the perchannel range nodes to the op.
+  AddInputFromArray<float>(TensorShape({input_channels}), {ch1_min, ch2_min});
+  AddInputFromArray<float>(TensorShape({input_channels}), {ch1_max, ch2_max});
+
+  // Run the kernel
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Step 6: Verify output and store values to test requantize_perchannel
+  // --------------------------------------------------------------------
+
+  // Verify the expected outputs
+  const float output_min = GetOutput(0)->flat<float>()(0);
+  const float output_max = GetOutput(1)->flat<float>()(0);
+  EXPECT_NEAR(-6.0, output_min, 0.002);  // Values are aligned with clip_value.
+  EXPECT_NEAR(6.0, output_max, 0.002);   // Values are aligned with clip_value.
+}
+
+TEST_F(MklRequantizatedOpsTest, RequantizePerChannelTest_Basic) {
+  // Let us setup the tensor and inputs before we run this op.
+  float range_weights_ch1 = 0.0;
+  float range_weights_ch2 = 0.0;
+
+  // Create the input tensor
+  const int input_height = 4;
+  const int input_width = 4;
+  const int input_channels = 2;
+
+  // define an input tensor T shape.
+  Tensor input_tensor_qint32(DT_QINT32,
+                             {1, input_height, input_width, input_channels});
+
+  // Explanation and setup prior to this op. Fill T and populate range values.
+  MklRequantizatedOpsTestHelper helper;
+  helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2);
+
+  // Step 7: Define and run requantize_perchannel
+  // --------------------------------------------
+  // The output of requantization_range_op_per_channel which calculated the
+  // new ranges of int8 is fed to the requantize per channel op.
+  // Here the values of convolved T*W is converted from int32 to int8.
+
+  TF_ASSERT_OK(NodeDefBuilder("requantize_per_channel", "RequantizePerChannel")
+                   .Input(FakeInput(DT_QINT32))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Attr("T", DataTypeToEnum<qint32>::v())
+                   .Attr("out_type", DataTypeToEnum<qint8>::v())
+                   .Finalize(node_def()));
+  TF_ASSERT_OK(InitOp());
+
+  // Add the input Nodes to the op.
+  AddInputFromArray<qint32>(input_tensor_qint32.shape(),
+                            input_tensor_qint32.flat<qint32>());
+
+  // Calculate the min and max from the ranges
+  float ch1_min = -range_weights_ch1;
+  float ch1_max = range_weights_ch1;
+  float ch2_min = -range_weights_ch2;
+  float ch2_max = range_weights_ch2;
+
+  // Add the perchannel range nodes to the op.
+  AddInputFromArray<float>(TensorShape({input_channels}), {ch1_min, ch2_min});
+  AddInputFromArray<float>(TensorShape({input_channels}), {ch1_max, ch2_max});
+
+  // Calculate the min and max from Step 6 above
+  // in RequantizationRangePerChannelTest_Basic
+  float range_op_output_min = -14.8217;
+  float range_op_output_max = 14.8217;
+
+  // Add the requested_min and requested_max stored from Step 6.
+  AddInputFromArray<float>(TensorShape({1}), {range_op_output_min});
+  AddInputFromArray<float>(TensorShape({1}), {range_op_output_max});
+
+  // Run the kernel
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Verify the output with the expected output
+  Tensor output = *GetOutput(0);
+  const float output_min = GetOutput(1)->flat<float>()(0);
+  const float output_max = GetOutput(2)->flat<float>()(0);
+  EXPECT_NEAR(range_op_output_min, output_min, 0.002);
+  EXPECT_NEAR(range_op_output_max, output_max, 0.002);
+}
+
+}  // namespace tensorflow
+#endif  // INTEL_MKL && ENABLE_MKL

diff --git a/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc
new file mode 100644
index 0000000..b5c1a01
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc

@@ -0,0 +1,172 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/array_ops.cc.
+
+#ifdef INTEL_MKL
+#define EIGEN_USE_THREADS
+#include <math.h>
+
+#include "mkldnn.hpp"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/type_traits.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/no_op.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+template <typename Device, typename Toutput>
+class MklRequantizePerChannelOp : public OpKernel {
+ public:
+  explicit MklRequantizePerChannelOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_type_));
+    OP_REQUIRES(ctx, out_type_ == DT_QINT8 || out_type_ == DT_QUINT8,
+                errors::InvalidArgument(
+                    "out_type must be qint8 or quint8, but got: " + out_type_));
+  }
+  virtual ~MklRequantizePerChannelOp() {}
+  void Compute(OpKernelContext* ctx) override {
+    try {
+      const Tensor& input = ctx->input(kInputTensorIndex);
+      const Tensor& input_min_vec = ctx->input(kInputMinVecIndex);
+      float* input_min_vec_data = (float*)const_cast<void*>(
+          static_cast<const void*>(input_min_vec.flat<float>().data()));
+      const Tensor& input_max_vec = ctx->input(kInputMaxVecIndex);
+      float* input_max_vec_data = (float*)const_cast<void*>(
+          static_cast<const void*>(input_max_vec.flat<float>().data()));
+
+      const Tensor& input_requested_min = ctx->input(this->kRequestMinIndex);
+      const float input_requested_min_float =
+          input_requested_min.flat<float>()(0);
+      const Tensor& input_requested_max = ctx->input(this->kRequestMaxIndex);
+      const float input_requested_max_float =
+          input_requested_max.flat<float>()(0);
+
+      size_t depth = input_min_vec.NumElements();
+      OP_REQUIRES(
+          ctx, input_min_vec.dim_size(0) == depth,
+          errors::InvalidArgument("input_min has incorrect size, expected ",
+                                  depth, " was ", input_min_vec.dim_size(0)));
+      OP_REQUIRES(
+          ctx, input_max_vec.dim_size(0) == depth,
+          errors::InvalidArgument("input_max has incorrect size, expected ",
+                                  depth, " was ", input_max_vec.dim_size(0)));
+
+      if (out_type_ == DT_QINT8) DCHECK(input_requested_min_float < 0.0f);
+
+      const float factor = (out_type_ == DT_QINT8) ? 127.0f : 255.0f;
+      const float requested_min_max =
+          std::max(std::abs(input_requested_min_float),
+                   std::abs(input_requested_max_float));
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputTensorIndex,
+                                               input.shape(), &output));
+
+      std::vector<float> scales(depth);
+      for (int i = 0; i < depth; ++i) {
+        float min_max_from_vec = std::max(std::abs(input_min_vec_data[i]),
+                                          std::abs(input_max_vec_data[i]));
+        scales[i] = factor * (min_max_from_vec / requested_min_max /
+                              static_cast<float>(1L << 31));
+      }
+
+      mkldnn::primitive_attr reorder_attr;
+      reorder_attr.set_output_scales(2, scales);
+
+      memory::dims dims_mkl_order =
+          TFShapeToMklDnnDimsInNCHW(input.shape(), FORMAT_NHWC);
+      memory::desc input_md = memory::desc(dims_mkl_order, MklDnnType<qint32>(),
+                                           memory::format::nhwc);
+      memory::desc output_md =
+          (out_type_ == DT_QINT8)
+              ? memory::desc(dims_mkl_order, MklDnnType<qint8>(),
+                             memory::format::nhwc)
+              : memory::desc(dims_mkl_order, MklDnnType<quint8>(),
+                             memory::format::nhwc);
+
+      memory::primitive_desc input_pd =
+          memory::primitive_desc(input_md, cpu_engine_);
+      memory::primitive_desc output_pd =
+          memory::primitive_desc(output_md, cpu_engine_);
+
+      void* input_buf =
+          static_cast<void*>(const_cast<qint32*>(input.flat<qint32>().data()));
+      void* output_buf;
+      if (out_type_ == DT_QINT8) {
+        output_buf = static_cast<void*>(
+            const_cast<qint8*>(output->flat<qint8>().data()));
+      } else {
+        output_buf = static_cast<void*>(
+            const_cast<quint8*>(output->flat<quint8>().data()));
+      }
+
+      std::unique_ptr<memory> input_mem_prim_(new memory(input_pd, input_buf));
+      std::unique_ptr<memory> output_mem_prim_(
+          new memory(output_pd, output_buf));
+
+      mkldnn::reorder::primitive_desc reorder_pd =
+          mkldnn::reorder::primitive_desc(input_pd, output_pd, reorder_attr);
+      std::vector<mkldnn::primitive> net;
+      net.push_back(
+          mkldnn::reorder(reorder_pd, *input_mem_prim_, *output_mem_prim_));
+      stream(stream::kind::eager).submit(net).wait();
+
+      Tensor* output_min = nullptr;
+      Tensor* output_max = nullptr;
+      OP_REQUIRES_OK(ctx,
+                     ctx->allocate_output(kOutputMinIndex, {}, &output_min));
+      OP_REQUIRES_OK(ctx,
+                     ctx->allocate_output(kOutputMaxIndex, {}, &output_max));
+
+      output_min->flat<float>()(0) = input_requested_min_float;
+      output_max->flat<float>()(0) = input_requested_max_float;
+    } catch (mkldnn::error& e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + std::string(e.message) + ", in file " +
+                         std::string(__FILE__) + ":" + std::to_string(__LINE__);
+      OP_REQUIRES_OK(
+          ctx, errors::Aborted("Operation received an exception:", error_msg));
+    }
+  }
+
+ private:
+  const int kInputTensorIndex = 0;
+  const int kInputMinVecIndex = 1;
+  const int kInputMaxVecIndex = 2;
+  const int kRequestMinIndex = 3;
+  const int kRequestMaxIndex = 4;
+  const int kOutputTensorIndex = 0;
+  const int kOutputMinIndex = 1;
+  const int kOutputMaxIndex = 2;
+  DataType out_type_;
+  engine cpu_engine_ = engine(engine::cpu, 0);
+};
+
+REGISTER_KERNEL_BUILDER(Name("RequantizePerChannel")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<qint32>("T")
+                            .TypeConstraint<qint8>("out_type"),
+                        MklRequantizePerChannelOp<CPUDevice, qint8>);
+
+}  // namespace tensorflow
+#endif  // INTEL_MKL

diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index d8c38fe..4866efe 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc

@@ -173,10 +173,11 @@
     FunctionLibraryRuntime::InstantiateOptions opts;
     opts.target = lib->device()->name();
     opts.is_multi_device_function = true;
-    opts.optimize_graph_fn = std::bind(
-        grappler::OptimizeGraph, std::placeholders::_1, std::placeholders::_2,
-        std::placeholders::_3, std::placeholders::_4, config_proto_,
-        func_.name(), optimization_options, std::placeholders::_5);
+    opts.optimize_graph_fn =
+        std::bind(grappler::OptimizeGraph, std::placeholders::_1,
+                  std::placeholders::_2, std::placeholders::_3,
+                  std::placeholders::_4, std::placeholders::_5, config_proto_,
+                  func_.name(), optimization_options, std::placeholders::_6);
     opts.graph_collector = ctx->graph_collector();
     opts.executor_type = executor_type_;
 

diff --git a/tensorflow/core/kernels/quantized_concat_op.cc b/tensorflow/core/kernels/quantized_concat_op.cc
index b03ac8e..ff4e7be 100644
--- a/tensorflow/core/kernels/quantized_concat_op.cc
+++ b/tensorflow/core/kernels/quantized_concat_op.cc

@@ -246,4 +246,16 @@
 
 #undef REGISTER_QUANTIZED_CONCAT
 
+#ifdef INTEL_MKL
+#define REGISTER_QUANTIZED_CONCATV2(type)                \
+  REGISTER_KERNEL_BUILDER(Name("QuantizedConcatV2")      \
+                              .Device(DEVICE_CPU)        \
+                              .TypeConstraint<type>("T") \
+                              .HostMemory("axis"),       \
+                          QuantizedConcatOp<type>)
+
+REGISTER_QUANTIZED_CONCATV2(quint8);
+REGISTER_QUANTIZED_CONCATV2(qint32);
+#endif
+
 }  // namespace tensorflow

diff --git a/tensorflow/core/kernels/random_op_gpu.cu.cc b/tensorflow/core/kernels/random_op_gpu.cu.cc
index edb2b10..a560388 100644
--- a/tensorflow/core/kernels/random_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/random_op_gpu.cu.cc

@@ -18,6 +18,7 @@
 #define EIGEN_USE_GPU
 
 #include "tensorflow/core/kernels/random_op.h"
+#include "tensorflow/core/kernels/random_op_gpu.h"
 
 #include <assert.h>
 #include <stdio.h>
@@ -36,170 +37,6 @@
 
 typedef Eigen::GpuDevice GPUDevice;
 
-template <class Distribution, bool VariableSamplesPerOutput>
-struct FillPhiloxRandomKernel;
-
-template <typename T, int ElementCount>
-class SampleCopier {
- public:
-  inline __device__ void operator()(
-      T* buf, const tensorflow::random::Array<T, ElementCount>& array) const {
-#pragma unroll
-    for (int i = 0; i < ElementCount; i++) {
-      buf[i] = array[i];
-    }
-  }
-};
-
-template <>
-class SampleCopier<float, 4> {
- public:
-  // Copies the elements from the array to buf. buf must be 128-bit aligned,
-  // which is true for tensor data, and all offsets that are a multiple of the
-  // vector size (because the vectors are 128 bits long).
-  inline __device__ void operator()(
-      float* buf, const tensorflow::random::Array<float, 4>& array) const {
-    // NOTE(ringwalt): It's not safe to cast &array[0] to a float4, because they
-    // have 32-bit alignment vs 128-bit alignment. There seems to be no
-    // performance loss when assigning each element to a vector.
-    float4 vec;
-    vec.x = array[0];
-    vec.y = array[1];
-    vec.z = array[2];
-    vec.w = array[3];
-    float4* buf_vector = reinterpret_cast<float4*>(buf);
-    *buf_vector = vec;
-  }
-};
-
-template <>
-class SampleCopier<int32, 4> {
- public:
-  // Copies the elements from the array to buf. buf must be 128-bit aligned,
-  // which is true for tensor data, and all offsets that are a multiple of the
-  // vector size (because the vectors are 128 bits long).
-  inline __device__ void operator()(
-      int32* buf, const tensorflow::random::Array<int32, 4>& array) const {
-    int4 vec;
-    vec.x = array[0];
-    vec.y = array[1];
-    vec.z = array[2];
-    vec.w = array[3];
-    int4* buf_vector = reinterpret_cast<int4*>(buf);
-    *buf_vector = vec;
-  }
-};
-
-template <>
-class SampleCopier<double, 2> {
- public:
-  // Copies the elements from the array to buf. buf must be 128-bit aligned,
-  // which is true for tensor data, and all offsets that are a multiple of the
-  // vector size (because the vectors are 128 bits long).
-  inline __device__ void operator()(
-      double* buf, const tensorflow::random::Array<double, 2>& array) const {
-    double2 vec;
-    vec.x = array[0];
-    vec.y = array[1];
-    double2* buf_vector = reinterpret_cast<double2*>(buf);
-    *buf_vector = vec;
-  }
-};
-
-template <>
-class SampleCopier<int64, 2> {
- public:
-  // Copies the elements from the array to buf. buf must be 128-bit aligned,
-  // which is true for tensor data, and all offsets that are a multiple of the
-  // vector size (because the vectors are 128 bits long).
-  inline __device__ void operator()(
-      int64* buf, const tensorflow::random::Array<int64, 2>& array) const {
-    longlong2 vec;
-    vec.x = array[0];
-    vec.y = array[1];
-    longlong2* buf_vector = reinterpret_cast<longlong2*>(buf);
-    *buf_vector = vec;
-  }
-};
-
-// A cuda kernel to fill the data with random numbers from the specified
-// distribution. Each output takes a fixed number of samples.
-template <class Distribution>
-struct FillPhiloxRandomKernel<Distribution, false> {
-  typedef typename Distribution::ResultElementType T;
-  PHILOX_DEVICE_FUNC void Run(random::PhiloxRandom gen, T* data, int64 size,
-                              Distribution dist) {
-    const int kGroupSize = Distribution::kResultElementCount;
-
-    const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
-    const int32 total_thread_count = gridDim.x * blockDim.x;
-    int32 offset = thread_id * kGroupSize;
-    gen.Skip(thread_id);
-
-    const SampleCopier<T, kGroupSize> copier;
-    while (offset + kGroupSize <= size) {
-      const typename Distribution::ResultType samples = dist(&gen);
-      copier(&data[offset], samples);
-
-      offset += total_thread_count * kGroupSize;
-      gen.Skip(total_thread_count - 1);
-    }
-
-    typename Distribution::ResultType samples = dist(&gen);
-    for (int i = 0; i < kGroupSize; ++i) {
-      if (offset >= size) {
-        return;
-      }
-      data[offset] = samples[i];
-      ++offset;
-    }
-  }
-};
-
-// A cuda kernel to fill the data with random numbers from the specified
-// distribution. Each output takes a variable number of samples.
-template <class Distribution>
-struct FillPhiloxRandomKernel<Distribution, true> {
-  typedef typename Distribution::ResultElementType T;
-  PHILOX_DEVICE_FUNC void Run(const random::PhiloxRandom& base_gen, T* data,
-                              int64 size, Distribution dist) {
-    using random::PhiloxRandom;
-    using random::SingleSampleAdapter;
-
-    const int kReservedSamplesPerOutput = 256;
-    const int kGroupSize = Distribution::kResultElementCount;
-    const int kGeneratorSkipPerOutputGroup = kGroupSize *
-                                             kReservedSamplesPerOutput /
-                                             PhiloxRandom::kResultElementCount;
-
-    const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
-    const int32 total_thread_count = gridDim.x * blockDim.x;
-    int64 group_index = thread_id;
-    int64 offset = group_index * kGroupSize;
-
-    while (offset < size) {
-      // Since each output takes a variable number of samples, we need to
-      // realign the generator to the beginning for the current output group
-      PhiloxRandom gen = base_gen;
-      gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
-      SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
-
-      typename Distribution::ResultType samples = dist(&single_samples);
-
-      for (int i = 0; i < kGroupSize; ++i) {
-        if (offset >= size) {
-          return;
-        }
-        data[offset] = samples[i];
-        ++offset;
-      }
-
-      offset += (total_thread_count - 1) * kGroupSize;
-      group_index += total_thread_count;
-    }
-  }
-};
-
 // A simple launch pad to call the correct function templates to fill the data
 template <class Distribution>
 __global__ void __launch_bounds__(1024)
@@ -224,7 +61,7 @@
 
   FillPhiloxRandomKernelLaunch<Distribution>
       <<<num_blocks, block_size, 0, d.stream()>>>(gen, data, size, dist);
-};
+}
 
 // Explicit instantiation of the GPU distributions functors
 // clang-format off

diff --git a/tensorflow/core/kernels/random_op_gpu.h b/tensorflow/core/kernels/random_op_gpu.h
new file mode 100644
index 0000000..e32c755
--- /dev/null
+++ b/tensorflow/core/kernels/random_op_gpu.h

@@ -0,0 +1,206 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_KERNELS_RANDOM_OP_GPU_H_
+#define TENSORFLOW_CORE_KERNELS_RANDOM_OP_GPU_H_
+
+#if defined(__CUDACC__)
+
+#include "tensorflow/core/lib/random/philox_random.h"
+#include "tensorflow/core/lib/random/random_distributions.h"
+
+namespace tensorflow {
+
+namespace functor {
+
+template <class Distribution, bool VariableSamplesPerOutput>
+struct FillPhiloxRandomKernel;
+
+template <class Distribution>
+struct FillPhiloxRandomKernel<Distribution, false> {
+  typedef typename Distribution::ResultElementType T;
+  PHILOX_DEVICE_FUNC void Run(random::PhiloxRandom gen, T* data, int64 size,
+                              Distribution dist);
+};
+
+template <class Distribution>
+struct FillPhiloxRandomKernel<Distribution, true> {
+  typedef typename Distribution::ResultElementType T;
+  PHILOX_DEVICE_FUNC void Run(const random::PhiloxRandom& base_gen, T* data,
+                              int64 size, Distribution dist);
+};
+
+template <typename T, int ElementCount>
+class SampleCopier {
+ public:
+  inline __device__ void operator()(
+      T* buf, const tensorflow::random::Array<T, ElementCount>& array) const {
+#pragma unroll
+    for (int i = 0; i < ElementCount; i++) {
+      buf[i] = array[i];
+    }
+  }
+};
+
+template <>
+class SampleCopier<float, 4> {
+ public:
+  // Copies the elements from the array to buf. buf must be 128-bit aligned,
+  // which is true for tensor data, and all offsets that are a multiple of the
+  // vector size (because the vectors are 128 bits long).
+  inline __device__ void operator()(
+      float* buf, const tensorflow::random::Array<float, 4>& array) const {
+    // NOTE(ringwalt): It's not safe to cast &array[0] to a float4, because they
+    // have 32-bit alignment vs 128-bit alignment. There seems to be no
+    // performance loss when assigning each element to a vector.
+    float4 vec;
+    vec.x = array[0];
+    vec.y = array[1];
+    vec.z = array[2];
+    vec.w = array[3];
+    float4* buf_vector = reinterpret_cast<float4*>(buf);
+    *buf_vector = vec;
+  }
+};
+
+template <>
+class SampleCopier<int32, 4> {
+ public:
+  // Copies the elements from the array to buf. buf must be 128-bit aligned,
+  // which is true for tensor data, and all offsets that are a multiple of the
+  // vector size (because the vectors are 128 bits long).
+  inline __device__ void operator()(
+      int32* buf, const tensorflow::random::Array<int32, 4>& array) const {
+    int4 vec;
+    vec.x = array[0];
+    vec.y = array[1];
+    vec.z = array[2];
+    vec.w = array[3];
+    int4* buf_vector = reinterpret_cast<int4*>(buf);
+    *buf_vector = vec;
+  }
+};
+
+template <>
+class SampleCopier<double, 2> {
+ public:
+  // Copies the elements from the array to buf. buf must be 128-bit aligned,
+  // which is true for tensor data, and all offsets that are a multiple of the
+  // vector size (because the vectors are 128 bits long).
+  inline __device__ void operator()(
+      double* buf, const tensorflow::random::Array<double, 2>& array) const {
+    double2 vec;
+    vec.x = array[0];
+    vec.y = array[1];
+    double2* buf_vector = reinterpret_cast<double2*>(buf);
+    *buf_vector = vec;
+  }
+};
+
+template <>
+class SampleCopier<int64, 2> {
+ public:
+  // Copies the elements from the array to buf. buf must be 128-bit aligned,
+  // which is true for tensor data, and all offsets that are a multiple of the
+  // vector size (because the vectors are 128 bits long).
+  inline __device__ void operator()(
+      int64* buf, const tensorflow::random::Array<int64, 2>& array) const {
+    longlong2 vec;
+    vec.x = array[0];
+    vec.y = array[1];
+    longlong2* buf_vector = reinterpret_cast<longlong2*>(buf);
+    *buf_vector = vec;
+  }
+};
+
+// A cuda kernel to fill the data with random numbers from the specified
+// distribution. Each output takes a fixed number of samples.
+template <class Distribution>
+PHILOX_DEVICE_FUNC void FillPhiloxRandomKernel<Distribution, false>::Run(
+    random::PhiloxRandom gen, T* data, int64 size, Distribution dist) {
+  const int kGroupSize = Distribution::kResultElementCount;
+
+  const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
+  const int32 total_thread_count = gridDim.x * blockDim.x;
+  int32 offset = thread_id * kGroupSize;
+  gen.Skip(thread_id);
+
+  const SampleCopier<T, kGroupSize> copier;
+  while (offset + kGroupSize <= size) {
+    const typename Distribution::ResultType samples = dist(&gen);
+    copier(&data[offset], samples);
+
+    offset += total_thread_count * kGroupSize;
+    gen.Skip(total_thread_count - 1);
+  }
+
+  typename Distribution::ResultType samples = dist(&gen);
+  for (int i = 0; i < kGroupSize; ++i) {
+    if (offset >= size) {
+      return;
+    }
+    data[offset] = samples[i];
+    ++offset;
+  }
+}
+
+// A cuda kernel to fill the data with random numbers from the specified
+// distribution. Each output takes a variable number of samples.
+template <class Distribution>
+PHILOX_DEVICE_FUNC void FillPhiloxRandomKernel<Distribution, true>::Run(
+    const random::PhiloxRandom& base_gen, T* data, int64 size,
+    Distribution dist) {
+  using random::PhiloxRandom;
+  using random::SingleSampleAdapter;
+
+  const int kReservedSamplesPerOutput = 256;
+  const int kGroupSize = Distribution::kResultElementCount;
+  const int kGeneratorSkipPerOutputGroup = kGroupSize *
+                                           kReservedSamplesPerOutput /
+                                           PhiloxRandom::kResultElementCount;
+
+  const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
+  const int32 total_thread_count = gridDim.x * blockDim.x;
+  int64 group_index = thread_id;
+  int64 offset = group_index * kGroupSize;
+
+  while (offset < size) {
+    // Since each output takes a variable number of samples, we need to
+    // realign the generator to the beginning for the current output group
+    PhiloxRandom gen = base_gen;
+    gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
+    SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
+
+    typename Distribution::ResultType samples = dist(&single_samples);
+
+    for (int i = 0; i < kGroupSize; ++i) {
+      if (offset >= size) {
+        return;
+      }
+      data[offset] = samples[i];
+      ++offset;
+    }
+
+    offset += (total_thread_count - 1) * kGroupSize;
+    group_index += total_thread_count;
+  }
+}
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // defined(__CUDACC__)
+
+#endif  // TENSORFLOW_CORE_KERNELS_RANDOM_OP_GPU_H_

diff --git a/tensorflow/core/kernels/range_sampler.cc b/tensorflow/core/kernels/range_sampler.cc
index d682cd3..c1457b3 100644
--- a/tensorflow/core/kernels/range_sampler.cc
+++ b/tensorflow/core/kernels/range_sampler.cc

@@ -294,7 +294,7 @@
     // Skip entries that do not belong to this shard.
     if (word_id % num_shards_ == shard_) {
       float w = 0.0;
-      if (!strings::safe_strtof(cols.at(cols.size() - 1).c_str(), &w)) {
+      if (!strings::safe_strtof(cols.at(cols.size() - 1), &w)) {
         return errors::InvalidArgument("Wrong vocabulary format at line: ",
                                        line);
       }

diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h
index ffa41ec..e9900e5 100644
--- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h
+++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h

@@ -41,6 +41,20 @@
 typedef Eigen::GpuDevice GPUDevice;
 
 template <typename T>
+struct Square {
+  __host__ __device__ T operator()(const T& a) const {
+    return a * Eigen::numext::conj(a);
+  }
+};
+
+template <typename T>
+struct Sqrt {
+  __host__ __device__ T operator()(const T& a) const {
+    return Eigen::numext::sqrt(a);
+  }
+};
+
+template <typename T>
 struct Sum {
   __host__ __device__ T operator()(const T& a, const T& b) const {
     return a + b;
@@ -500,7 +514,7 @@
     BlockReduceKernel<IN_T, OUT_T, num_threads>
         <<<num_blocks, num_threads, 0, cu_stream>>>(in, out, in_size, op, init);
     return;
-  } else if (in_size <= 1 << 19) {
+  } else if (in_size <= 1 << 18) {
     const int num_threads = 256;
     const int num_blocks = std::min(32, Eigen::divup(in_size, num_threads));
     // it seems like tailoring this to the GPU
@@ -884,6 +898,31 @@
   }
 };
 
+// TODO(rmlarsen): Specialize for float16.
+template <typename T>
+struct ReduceFunctor<GPUDevice, functor::EuclideanNormReducer<T>> {
+  template <typename OUT_T, typename IN_T, typename ReductionAxes>
+  static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in,
+                     const ReductionAxes& reduction_axes,
+                     const functor::EuclideanNormReducer<T>& reducer) {
+    typedef cub::TransformInputIterator<T, Square<T>, T*> inputIterType;
+    inputIterType input_itr((T*)in.data(), Square<T>());
+    typedef TransformOutputIterator<T, T, Sqrt<T>> outputIterType;
+    outputIterType output_itr((T*)out.data(), Sqrt<T>());
+    ReduceImpl<T, Sum<T>, outputIterType, inputIterType, ReductionAxes>(
+        ctx, output_itr, input_itr, in.rank(), in.dimension(0),
+        in.rank() >= 2 ? in.dimension(1) : 1,
+        in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes,
+        Sum<T>());
+  }
+
+  template <typename OUT_T>
+  static void FillIdentity(const GPUDevice& d, OUT_T out,
+                           const functor::EuclideanNormReducer<T>& reducer) {
+    FillIdentityEigenImpl(d, To32Bit(out), reducer);
+  }
+};
+
 template <typename T>
 struct ReduceFunctor<GPUDevice, functor::MeanReducer<T>> {
   template <typename OUT_T, typename IN_T, typename ReductionAxes>

diff --git a/tensorflow/core/kernels/reduction_ops.h b/tensorflow/core/kernels/reduction_ops.h
index 2331599..0a1568b 100644
--- a/tensorflow/core/kernels/reduction_ops.h
+++ b/tensorflow/core/kernels/reduction_ops.h

@@ -33,6 +33,12 @@
   Scalar initialize() const { return Scalar(0); }
 };
 
+// Dummy class used for template specialization for l2-norm reduction.
+template <typename Scalar>
+struct EuclideanNormReducer {
+  Scalar initialize() const { return Scalar(0); }
+};
+
 template <typename Device, typename OUT_T, typename IN_T,
           typename ReductionAxes, typename Reducer>
 struct ReduceEigenImpl {
@@ -56,6 +62,39 @@
   }
 };
 
+// TODO(rmlarsen): Refactor this such that taking the sqrt can be optional
+// controlled by an attribute.
+template <typename Device, typename OUT_T, typename IN_T,
+          typename ReductionAxes, typename Scalar>
+struct ReduceEigenImpl<Device, OUT_T, IN_T, ReductionAxes,
+                       functor::EuclideanNormReducer<Scalar>> {
+  void operator()(const Device& d, OUT_T out, IN_T in,
+                  const ReductionAxes& reduction_axes,
+                  const functor::EuclideanNormReducer<Scalar>& reducer) {
+    static_assert(std::is_same<Scalar, typename OUT_T::Scalar>::value, "");
+    Eigen::internal::SumReducer<Scalar> sum_reducer;
+    out.device(d) =
+        (in * in.conjugate()).reduce(reduction_axes, sum_reducer).sqrt();
+  }
+};
+
+template <typename Device, typename OUT_T, typename IN_T,
+          typename ReductionAxes>
+struct ReduceEigenImpl<Device, OUT_T, IN_T, ReductionAxes,
+                       functor::EuclideanNormReducer<bfloat16>> {
+  void operator()(const Device& d, OUT_T out, IN_T in,
+                  const ReductionAxes& reduction_axes,
+                  const functor::EuclideanNormReducer<bfloat16>& reducer) {
+    static_assert(std::is_same<bfloat16, typename OUT_T::Scalar>::value, "");
+    Eigen::internal::SumReducer<float> sum_reducer;
+    auto in_as_float = in.template cast<float>();
+    out.device(d) = (in_as_float * in_as_float.conjugate())
+                        .reduce(reduction_axes, sum_reducer)
+                        .sqrt()
+                        .template cast<bfloat16>();
+  }
+};
+
 // For most reducers, the identity is Reducer::initialize()
 template <typename Reducer>
 struct Identity {

diff --git a/tensorflow/core/kernels/reduction_ops_euclidean.cc b/tensorflow/core/kernels/reduction_ops_euclidean.cc
new file mode 100644
index 0000000..9f4bf50
--- /dev/null
+++ b/tensorflow/core/kernels/reduction_ops_euclidean.cc

@@ -0,0 +1,81 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/reduction_ops_common.h"
+
+namespace tensorflow {
+
+#define REGISTER_CPU_KERNELS(type)                                           \
+  REGISTER_KERNEL_BUILDER(Name("EuclideanNorm")                              \
+                              .Device(DEVICE_CPU)                            \
+                              .TypeConstraint<type>("T")                     \
+                              .TypeConstraint<int32>("Tidx"),                \
+                          ReductionOp<CPUDevice, type, int32,                \
+                                      functor::EuclideanNormReducer<type>>); \
+  REGISTER_KERNEL_BUILDER(Name("EuclideanNorm")                              \
+                              .Device(DEVICE_CPU)                            \
+                              .TypeConstraint<type>("T")                     \
+                              .TypeConstraint<int64>("Tidx"),                \
+                          ReductionOp<CPUDevice, type, int64,                \
+                                      functor::EuclideanNormReducer<type>>);
+TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
+#undef REGISTER_CPU_KERNELS
+
+#if GOOGLE_CUDA
+
+#define REGISTER_GPU_KERNELS(type)                                           \
+  REGISTER_KERNEL_BUILDER(Name("EuclideanNorm")                              \
+                              .Device(DEVICE_GPU)                            \
+                              .TypeConstraint<type>("T")                     \
+                              .TypeConstraint<int32>("Tidx")                 \
+                              .HostMemory("reduction_indices"),              \
+                          ReductionOp<GPUDevice, type, int32,                \
+                                      functor::EuclideanNormReducer<type>>); \
+  REGISTER_KERNEL_BUILDER(Name("EuclideanNorm")                              \
+                              .Device(DEVICE_GPU)                            \
+                              .TypeConstraint<type>("T")                     \
+                              .TypeConstraint<int64>("Tidx")                 \
+                              .HostMemory("reduction_indices"),              \
+                          ReductionOp<GPUDevice, type, int64,                \
+                                      functor::EuclideanNormReducer<type>>);
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
+TF_CALL_complex64(REGISTER_GPU_KERNELS);
+TF_CALL_complex128(REGISTER_GPU_KERNELS);
+#undef REGISTER_GPU_KERNELS
+
+#endif
+
+#ifdef TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNELS(type)                                          \
+  REGISTER_KERNEL_BUILDER(Name("EuclideanNorm")                              \
+                              .Device(DEVICE_SYCL)                           \
+                              .TypeConstraint<type>("T")                     \
+                              .TypeConstraint<int32>("Tidx")                 \
+                              .HostMemory("reduction_indices"),              \
+                          ReductionOp<SYCLDevice, type, int32,               \
+                                      functor::EuclideanNormReducer<type>>); \
+  REGISTER_KERNEL_BUILDER(Name("EuclideanNorm")                              \
+                              .Device(DEVICE_SYCL)                           \
+                              .TypeConstraint<type>("T")                     \
+                              .TypeConstraint<int64>("Tidx")                 \
+                              .HostMemory("reduction_indices"),              \
+                          ReductionOp<SYCLDevice, type, int64,               \
+                                      functor::EuclideanNormReducer<type>>);
+REGISTER_SYCL_KERNELS(float);
+REGISTER_SYCL_KERNELS(double);
+#undef REGISTER_SYCL_KERNELS
+#endif  // TENSORFLOW_USE_SYCL
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/kernels/reduction_ops_gpu_complex128.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu_complex128.cu.cc
index c44a40b..662f24d 100644
--- a/tensorflow/core/kernels/reduction_ops_gpu_complex128.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu_complex128.cu.cc

@@ -53,6 +53,7 @@
 
 DEFINE_FOR_TYPE_AND_R(complex128, Eigen::internal::SumReducer<complex128>);
 DEFINE_FOR_TYPE_AND_R(complex128, functor::MeanReducer<complex128>);
+DEFINE_FOR_TYPE_AND_R(complex128, functor::EuclideanNormReducer<complex128>);
 DEFINE_FOR_TYPE_AND_R(complex128, Eigen::internal::ProdReducer<complex128>);
 #undef DEFINE_FOR_TYPE_AND_R
 #undef DEFINE

diff --git a/tensorflow/core/kernels/reduction_ops_gpu_complex64.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu_complex64.cu.cc
index 1921130..8ab2a6e 100644
--- a/tensorflow/core/kernels/reduction_ops_gpu_complex64.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu_complex64.cu.cc

@@ -53,6 +53,7 @@
 
 DEFINE_FOR_TYPE_AND_R(complex64, Eigen::internal::SumReducer<complex64>);
 DEFINE_FOR_TYPE_AND_R(complex64, functor::MeanReducer<complex64>);
+DEFINE_FOR_TYPE_AND_R(complex64, functor::EuclideanNormReducer<complex64>);
 DEFINE_FOR_TYPE_AND_R(complex64, Eigen::internal::ProdReducer<complex64>);
 #undef DEFINE_FOR_TYPE_AND_R
 #undef DEFINE

diff --git a/tensorflow/core/kernels/reduction_ops_gpu_double.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu_double.cu.cc
index 119f726..c492308 100644
--- a/tensorflow/core/kernels/reduction_ops_gpu_double.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu_double.cu.cc

@@ -51,11 +51,12 @@
   DEFINE(T, R, 3, 2);               \
   DEFINE_IDENTITY(T, R)
 
-#define DEFINE_FOR_ALL_REDUCERS(T)                          \
-  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>); \
-  DEFINE_FOR_TYPE_AND_R(T, functor::MeanReducer<T>);        \
-  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer<T>); \
-  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer<T>); \
+#define DEFINE_FOR_ALL_REDUCERS(T)                            \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>);   \
+  DEFINE_FOR_TYPE_AND_R(T, functor::MeanReducer<T>);          \
+  DEFINE_FOR_TYPE_AND_R(T, functor::EuclideanNormReducer<T>); \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer<T>);   \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer<T>);   \
   DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::ProdReducer<T>)
 
 DEFINE_FOR_ALL_REDUCERS(double);

diff --git a/tensorflow/core/kernels/reduction_ops_gpu_float.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu_float.cu.cc
index 70ba4ab..b006311 100644
--- a/tensorflow/core/kernels/reduction_ops_gpu_float.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu_float.cu.cc

@@ -51,11 +51,12 @@
   DEFINE(T, R, 3, 2);               \
   DEFINE_IDENTITY(T, R)
 
-#define DEFINE_FOR_ALL_REDUCERS(T)                          \
-  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>); \
-  DEFINE_FOR_TYPE_AND_R(T, functor::MeanReducer<T>);        \
-  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer<T>); \
-  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer<T>); \
+#define DEFINE_FOR_ALL_REDUCERS(T)                            \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>);   \
+  DEFINE_FOR_TYPE_AND_R(T, functor::MeanReducer<T>);          \
+  DEFINE_FOR_TYPE_AND_R(T, functor::EuclideanNormReducer<T>); \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer<T>);   \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer<T>);   \
   DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::ProdReducer<T>)
 
 DEFINE_FOR_ALL_REDUCERS(float);

diff --git a/tensorflow/core/kernels/reduction_ops_gpu_int.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu_int.cu.cc
index 82f6d7d..91a33b9 100644
--- a/tensorflow/core/kernels/reduction_ops_gpu_int.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu_int.cu.cc

@@ -51,11 +51,12 @@
   DEFINE(T, R, 3, 2);               \
   DEFINE_IDENTITY(T, R)
 
-#define DEFINE_FOR_ALL_REDUCERS(T)                          \
-  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>); \
-  DEFINE_FOR_TYPE_AND_R(T, functor::MeanReducer<T>);        \
-  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer<T>); \
-  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer<T>); \
+#define DEFINE_FOR_ALL_REDUCERS(T)                            \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>);   \
+  DEFINE_FOR_TYPE_AND_R(T, functor::MeanReducer<T>);          \
+  DEFINE_FOR_TYPE_AND_R(T, functor::EuclideanNormReducer<T>); \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer<T>);   \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer<T>);   \
   DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::ProdReducer<T>)
 
 DEFINE_FOR_ALL_REDUCERS(int32);

diff --git a/tensorflow/core/kernels/reduction_ops_half_mean_sum.cu.cc b/tensorflow/core/kernels/reduction_ops_half_mean_sum.cu.cc
index db050fd..f33d504 100644
--- a/tensorflow/core/kernels/reduction_ops_half_mean_sum.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_half_mean_sum.cu.cc

@@ -51,8 +51,9 @@
   DEFINE(T, R, 3, 2);               \
   DEFINE_IDENTITY(T, R)
 
-#define DEFINE_FOR_ALL_REDUCERS(T)                          \
-  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>); \
+#define DEFINE_FOR_ALL_REDUCERS(T)                            \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>);   \
+  DEFINE_FOR_TYPE_AND_R(T, functor::EuclideanNormReducer<T>); \
   DEFINE_FOR_TYPE_AND_R(T, functor::MeanReducer<T>);
 
 DEFINE_FOR_ALL_REDUCERS(Eigen::half);

diff --git a/tensorflow/core/kernels/reduction_ops_test.cc b/tensorflow/core/kernels/reduction_ops_test.cc
index fe8ea59..359d7db 100644
--- a/tensorflow/core/kernels/reduction_ops_test.cc
+++ b/tensorflow/core/kernels/reduction_ops_test.cc

@@ -164,6 +164,11 @@
 }
 BENCHMARK(BM_Mean2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192);
 
+static void BM_EuclideanNorm2DToScalarGPU(int iters, int num_x, int num_y) {
+  ReduceToScalar<float>(iters, "gpu", "EuclideanNorm", num_x, num_y);
+}
+BENCHMARK(BM_EuclideanNorm2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192);
+
 static void BM_Max2DToScalarGPU(int iters, int num_x, int num_y) {
   ReduceToScalar<float>(iters, "gpu", "Max", num_x, num_y);
 }

diff --git a/tensorflow/core/kernels/save_v2_op_test.cc b/tensorflow/core/kernels/save_v2_op_test.cc
index 82e566d..589d963 100644
--- a/tensorflow/core/kernels/save_v2_op_test.cc
+++ b/tensorflow/core/kernels/save_v2_op_test.cc

@@ -67,9 +67,8 @@
                    [&tensornames](int x) -> string { return tensornames[x]; });
 
   // Add the slice specs
-  AddInput<string>(TensorShape({13}), [&tensornames](int x) -> string {
-    return "" /* saves in full */;
-  });
+  AddInput<string>(TensorShape({13}),
+                   [](int x) -> string { return "" /* saves in full */; });
 
   // Add a 1-d bool tensor
   AddInput<bool>(TensorShape({2}), [](int x) -> bool { return x != 0; });

diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc
index 2c1f613..9c9e737 100644
--- a/tensorflow/core/kernels/sparse_matmul_op.cc
+++ b/tensorflow/core/kernels/sparse_matmul_op.cc

@@ -53,8 +53,6 @@
 namespace tensorflow {
 namespace {
 
-using Eigen::operator==;
-
 template <typename T>
 using BasicMatrix = Eigen::Tensor<T, 2, Eigen::RowMajor>;
 

diff --git a/tensorflow/core/kernels/spectrogram_test_utils.cc b/tensorflow/core/kernels/spectrogram_test_utils.cc
index 872a6e9..bb9d18e 100644
--- a/tensorflow/core/kernels/spectrogram_test_utils.cc
+++ b/tensorflow/core/kernels/spectrogram_test_utils.cc

@@ -140,9 +140,9 @@
       for (std::vector<string>::const_iterator j = parts.begin();
            j != parts.end(); ++j) {
         if (j->find_first_of("ij") != string::npos) {
-          strings::safe_strtod((*j).c_str(), &imaginary_part);
+          strings::safe_strtod(*j, &imaginary_part);
         } else {
-          strings::safe_strtod((*j).c_str(), &real_part);
+          strings::safe_strtod(*j, &real_part);
         }
       }
       data_line.push_back(std::complex<double>(real_part, imaginary_part));

diff --git a/tensorflow/core/kernels/stack.cc b/tensorflow/core/kernels/stack.cc
index 2af6b4b..033b9f3 100644
--- a/tensorflow/core/kernels/stack.cc
+++ b/tensorflow/core/kernels/stack.cc

@@ -244,9 +244,9 @@
     DeviceContext* device_ctxt = ctx->op_device_context();
     auto device = static_cast<tensorflow::Device*>(ctx->device());
     Allocator* allocator = device->GetAllocator(alloc_attrs);
-    AllocatorStats stats;
-    allocator->GetStats(&stats);
-    if (stats.bytes_in_use > (stats.bytes_limit * kOccupancy)) {
+    absl::optional<AllocatorStats> stats = allocator->GetStats();
+    if (stats && *stats->bytes_limit &&
+        stats->bytes_in_use > (*stats->bytes_limit * kOccupancy)) {
       // Asynchronously copy the tensor from GPU to CPU memory.
       // TODO(yuanbyu): Swap the oldest tensor first.
       AllocatorAttributes host_alloc_attrs;

diff --git a/tensorflow/core/kernels/stateful_random_ops.cc b/tensorflow/core/kernels/stateful_random_ops.cc
index 7cbbfd4..62c4ee3 100644
--- a/tensorflow/core/kernels/stateful_random_ops.cc
+++ b/tensorflow/core/kernels/stateful_random_ops.cc

@@ -15,184 +15,43 @@
 
 #define EIGEN_USE_THREADS
 
-#include <algorithm>
-#include <cmath>
-#include <memory>
-
-#include "absl/strings/str_join.h"
-#include "absl/types/variant.h"
-#include "tensorflow/core/common_runtime/device.h"
-#include "tensorflow/core/framework/bounds_check.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/resource_mgr.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/framework/variant_op_registry.h"
-#include "tensorflow/core/kernels/dense_update_functor.h"
-#include "tensorflow/core/kernels/gather_functor.h"
+#include "tensorflow/core/kernels/stateful_random_ops.h"
 #include "tensorflow/core/kernels/random_op.h"
-#include "tensorflow/core/kernels/resource_variable_ops.h"
-#include "tensorflow/core/kernels/scatter_functor.h"
 #include "tensorflow/core/kernels/training_op_helpers.h"
-#include "tensorflow/core/kernels/variable_ops.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/refcount.h"
-#include "tensorflow/core/lib/random/random_distributions.h"
-#include "tensorflow/core/lib/random/simple_philox.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/mem.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/util/util.h"
-#include "tensorflow/core/util/work_sharder.h"
 
 namespace tensorflow {
 
-using CPUDevice = Eigen::ThreadPoolDevice;
-
-using random::PhiloxRandom;
-
-namespace {
-
-// 'Variable' doesn't support uint32 or uint64 yet (due to reasons explained
-// in b/111604096 and cl/171681867), so I use signed int here. I choose int64
-// instead of int32 because `VarHandleOp` doesn't support int32 on GPU.
-using StateElementType = int64;
-static constexpr DataType STATE_ELEMENT_DTYPE = DT_INT64;
-
-using Algorithm = StateElementType;
-static constexpr Algorithm RNG_ALG_PHILOX = 1;
-
-using SkippableRNG = absl::variant<PhiloxRandom>;
-
-// This function is for hiding the implementation detail about the
-// `absl::variant` index of each algorithm.
-Algorithm GetAlgorithm(SkippableRNG const& rng) {
-  auto idx = rng.index();
-  if (idx == 0) {
-    return RNG_ALG_PHILOX;
-  }
-  // unreachable
-  return RNG_ALG_PHILOX;
-}
-
-// Fills a buffer with random numbers sampled from a given distribution.
-template <class Device, class Distribution>
-Status FillRandom(OpKernelContext* ctx, const Device& device,
-                  SkippableRNG const& gen, int64 size, Distribution dist,
-                  typename Distribution::ResultElementType* data) {
-  auto algorithm = GetAlgorithm(gen);
-  if (algorithm == RNG_ALG_PHILOX) {
-    auto philox = absl::get<PhiloxRandom>(gen);
-    functor::FillPhiloxRandom<Device, Distribution>()(ctx, device, philox, data,
-                                                      size, dist);
-    return Status::OK();
-  } else {
-    // return errors::InvalidArgument("Unsupported algorithm id: ", algorithm);
-    return Status::OK();
-  }
-}
-
-// The following two functions use the contract "lower 32 bits for the first
-// uint32, higher 32 bits for the second". Note that this is endian-neutral,
-// unlike a direct memory copy `memcpy(output, &input, 8)`.
-void Int64ToUint32s(int64 input, uint32* output1, uint32* output2) {
-  auto u64 = static_cast<uint64>(input);
-  *output1 = static_cast<uint32>(u64);
-  *output2 = static_cast<uint32>(u64 >> 32);
-}
-
-int64 Uint32sToInt64(uint32 input1, uint32 input2) {
-  auto u64_1 = static_cast<uint64>(input1);
-  auto u64_2 = static_cast<uint64>(input2);
-  return static_cast<int64>(u64_1 | (u64_2 << 32));
-}
-
-void GetPhiloxStateFromTensor(Tensor const& tensor,
-                              PhiloxRandom::ResultType* counter,
-                              PhiloxRandom::Key* key) {
-  auto tensor_flat = tensor.flat<StateElementType>();
-  auto tensor_ptr = tensor_flat.data();
-  // tensor_ptr's index is added by 1 to skip the algorithm tag.
-  Int64ToUint32s(tensor_ptr[1], &(*counter)[0], &(*counter)[1]);
-  Int64ToUint32s(tensor_ptr[2], &(*counter)[2], &(*counter)[3]);
-  Int64ToUint32s(tensor_ptr[3], &(*key)[0], &(*key)[1]);
-}
-
-void WritePhiloxStateToTensor(PhiloxRandom::ResultType const& counter,
-                              PhiloxRandom::Key const& key, Tensor* tensor) {
-  auto tensor_flat = tensor->flat<StateElementType>();
-  auto tensor_ptr = tensor_flat.data();
-  // tensor_ptr's index is added by 1 to skip the algorithm tag.
-  tensor_ptr[1] = Uint32sToInt64(counter[0], counter[1]);
-  tensor_ptr[2] = Uint32sToInt64(counter[2], counter[3]);
-  tensor_ptr[3] = Uint32sToInt64(key[0], key[1]);
-}
-
-// A helper function that does the actual work for
-// 'MakeRNGCopyAndUpdateVariable'.
-template <typename Device>
-Status GetRNGCopyAndUpdateTensor(Tensor* tensor, int64 delta,
-                                 SkippableRNG* rng_copy);
-
-template <>
-Status GetRNGCopyAndUpdateTensor<CPUDevice>(Tensor* tensor, int64 delta,
-                                            SkippableRNG* rng_copy) {
-  // The dtype of `tensor` should be `StateElementType` and the first element
-  // is the algorithm.
-  if (tensor->dims() != 1) {
-    return errors::InvalidArgument(
-        "RNG state must have one and only one dimension, not ", tensor->dims());
-  }
-  auto tensor_flat = tensor->flat<StateElementType>();
-  if (tensor_flat.size() < 1) {
-    return errors::InvalidArgument("Size of tensor must be at least 1");
-  }
-  auto algorithm = tensor_flat.data()[0];
-  if (algorithm == RNG_ALG_PHILOX) {
+template <typename Distribution>
+struct UpdateVariableAndFill_Philox<CPUDevice, Distribution> {
+  void operator()(OpKernelContext* ctx, const CPUDevice& device,
+                  int64 output_size, int64 alg_tag_skip,
+                  ScopedUnlockUnref* state_var_guard, Tensor* state_tensor,
+                  typename Distribution::ResultElementType* output_data) {
+    auto state_tensor_flat = state_tensor->flat<StateElementType>();
+    auto state_data = state_tensor_flat.data();
     // Delegates to PhiloxRandom to do the actual increasing.
-    static_assert(std::is_same<StateElementType, int64>::value,
-                  "StateElementType must be int64");
-    static_assert(std::is_same<PhiloxRandom::ResultElementType, uint32>::value,
-                  "PhiloxRandom::ResultElementType must be uint32");
-    auto counter_size = PhiloxRandom::ResultType::kElementCount;
-    auto key_size = PhiloxRandom::Key::kElementCount;
-    auto min_tensor_size = 1 + (counter_size + key_size) / 2;
-    if (tensor_flat.size() < min_tensor_size) {
-      return errors::InvalidArgument(
-          "For Philox algorithm, the size of state"
-          " must be at least ",
-          min_tensor_size, "; got ", tensor_flat.size());
-    }
-    PhiloxRandom::ResultType counter;
-    PhiloxRandom::Key key;
-    GetPhiloxStateFromTensor(*tensor, &counter, &key);
-    PhiloxRandom philox(counter, key);
-    auto old_philox = philox;
-    philox.Skip(delta);  // do the actual increasing
-    WritePhiloxStateToTensor(philox.counter(), philox.key(), tensor);
-    *rng_copy = SkippableRNG(old_philox);
-    return Status::OK();
-  } else {
-    // return errors::InvalidArgument("Unsupported algorithm id: ", algorithm);
-    *rng_copy = SkippableRNG(PhiloxRandom());
-    return Status::OK();
+    auto philox = GetPhiloxRandomFromMem(state_data + alg_tag_skip);
+    UpdateMemWithPhiloxRandom(philox, output_size, state_data + alg_tag_skip);
+    // No longer needs the lock.
+    state_var_guard->Release();
+    functor::FillPhiloxRandom<CPUDevice, Distribution>()(
+        ctx, device, philox, output_data, output_size, Distribution());
   }
-}
+};
 
-// Gets a copy of the RNG and updates the variable. The copy can be used to
-// generate upto 'samples' random numbers, and the variable is updated as if
-// 'samples' random numbers have been generated (e.g. if the variable is a
-// counnter, the counter is increased by 'samples').
-template <class Device>
-Status MakeRNGCopyAndUpdateVariable(OpKernelContext* ctx, int input_idx,
-                                    int64 samples, SkippableRNG* rng_copy) {
+template <typename Device, typename Distribution>
+Status UpdateVariableAndFill(
+    OpKernelContext* ctx, int state_input_idx, bool read_alg_from_state,
+    Algorithm alg, int64 output_size,
+    typename Distribution::ResultElementType* output_data) {
   Var* var = nullptr;
   TF_RETURN_IF_ERROR(
-      LookupResource(ctx, HandleFromInput(ctx, input_idx), &var));
-  core::ScopedUnref s(var);
-  mutex_lock ml(*var->mu());
+      LookupResource(ctx, HandleFromInput(ctx, state_input_idx), &var));
+  // Use `ScopedUnlockUnref` here instead of `mutex_lock` and `ScopedUnref`
+  // because the former supports early releasing which is needed by
+  // `UpdateVariableAndFill_Philox<CPU>` to avoid holding the lock while
+  // filling.
+  ScopedUnlockUnref state_var_guard(var);
   Tensor* var_tensor = var->tensor();
   if (var_tensor->dtype() != STATE_ELEMENT_DTYPE) {
     return errors::InvalidArgument("dtype of RNG state variable must be ",
@@ -200,66 +59,129 @@
                                    ", not ",
                                    DataTypeString(var_tensor->dtype()));
   }
-  TF_RETURN_IF_ERROR(PrepareToUpdateVariable<Device, StateElementType>(
-      ctx, var_tensor, var->copy_on_read_mode.load()));
-  TF_RETURN_IF_ERROR(
-      GetRNGCopyAndUpdateTensor<Device>(var_tensor, samples, rng_copy));
-  return Status::OK();
+  if (var_tensor->dims() != 1) {
+    return errors::InvalidArgument(
+        "RNG state must have one and only one dimension, not ",
+        var_tensor->dims());
+  }
+  auto var_tensor_flat = var_tensor->flat<StateElementType>();
+  int64 alg_tag_skip = 0;
+  if (read_alg_from_state) {
+    alg_tag_skip = 1;
+    if (var_tensor_flat.size() < 1) {
+      return errors::InvalidArgument("Size of tensor must be at least 1");
+    }
+    alg = var_tensor_flat(0);
+  }
+  if (alg == RNG_ALG_PHILOX) {
+    static_assert(std::is_same<StateElementType, int64>::value,
+                  "StateElementType must be int64");
+    static_assert(std::is_same<PhiloxRandom::ResultElementType, uint32>::value,
+                  "PhiloxRandom::ResultElementType must be uint32");
+    if (var_tensor_flat.size() < alg_tag_skip + PHILOX_MIN_STATE_SIZE) {
+      return errors::InvalidArgument(
+          "For Philox algorithm, the size of state"
+          " must be at least ",
+          alg_tag_skip + PHILOX_MIN_STATE_SIZE, "; got ",
+          var_tensor_flat.size());
+    }
+    TF_RETURN_IF_ERROR(PrepareToUpdateVariable<Device, StateElementType>(
+        ctx, var_tensor, var->copy_on_read_mode.load()));
+    UpdateVariableAndFill_Philox<Device, Distribution>()(
+        ctx, ctx->eigen_device<Device>(), output_size, alg_tag_skip,
+        &state_var_guard, var_tensor, output_data);
+    return Status::OK();
+  } else {
+    return errors::InvalidArgument("Unsupported algorithm id: ", alg);
+  }
+}
+
+// Preconditon: input(0) is an existing resource.
+template <typename Device, class Distribution>
+void ComputeImpl(OpKernelContext* ctx, int state_input_idx, int shape_input_idx,
+                 bool read_alg_from_state, Algorithm alg) {
+  using T = typename Distribution::ResultElementType;
+  const Tensor& shape_t = ctx->input(shape_input_idx);
+  TensorShape shape;
+  OP_REQUIRES_OK(ctx, ctx->op_kernel().MakeShape(shape_t, &shape));
+  Tensor* output;
+  OP_REQUIRES_OK(ctx, ctx->allocate_output(0, shape, &output));
+  auto output_flat = output->flat<T>();
+  OP_REQUIRES_OK(ctx, UpdateVariableAndFill<Device, Distribution>(
+                          ctx, state_input_idx, read_alg_from_state, alg,
+                          output_flat.size(), output_flat.data()));
 }
 
 template <typename Device, class Distribution>
 class StatefulRandomOp : public OpKernel {
  public:
-  using T = typename Distribution::ResultElementType;
   explicit StatefulRandomOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
 
-  // Assumes that input(0) is an existing resource.
   void Compute(OpKernelContext* ctx) override {
-    const Tensor& shape_t = ctx->input(1);
-    Tensor* output;
-    TensorShape shape;
-    OP_REQUIRES_OK(ctx, MakeShape(shape_t, &shape));
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, shape, &output));
-    if (shape.num_elements() == 0) return;
-
-    auto output_flat = output->flat<T>();
-    SkippableRNG rng;
-    // Multiplier 256 is the same as in FillPhiloxRandomTask; do not change
-    // it just here.
-    OP_REQUIRES_OK(ctx, MakeRNGCopyAndUpdateVariable<Device>(
-                            ctx, 0, output_flat.size() * 256, &rng));
-    // Fill in the random numbers
-    OP_REQUIRES_OK(ctx, FillRandom(ctx, ctx->eigen_device<Device>(), rng,
-                                   output_flat.size(), Distribution(),
-                                   output_flat.data()));
+    ComputeImpl<Device, Distribution>(ctx, 0, 1, true, 0);
   }
 };
 
-}  // namespace
+template <typename Device, class Distribution>
+class StatefulRandomOpV2 : public OpKernel {
+ public:
+  explicit StatefulRandomOpV2(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& alg_tensor = ctx->input(1);
+    OP_REQUIRES(ctx, alg_tensor.dims() == 0,
+                errors::InvalidArgument("algorithm must be of shape [], not ",
+                                        alg_tensor.shape().DebugString()));
+    auto alg = alg_tensor.flat<Algorithm>()(0);
+    ComputeImpl<Device, Distribution>(ctx, 0, 2, false, alg);
+  }
+};
 
 // So far the 'Distribution' type parameter is only used when the algorithm is
 // philox, so 'NormalDistribution<PhiloxRandom, ...>' is fine for now.
 #define REGISTER(DEVICE, TYPE)            \
   REGISTER_KERNEL_BUILDER(                \
-      Name("StatefulStandardNormal")      \
+      Name("StatefulStandardNormalV2")    \
           .Device(DEVICE_##DEVICE)        \
           .HostMemory("resource")         \
+          .HostMemory("algorithm")        \
+          .HostMemory("shape")            \
+          .TypeConstraint<TYPE>("dtype"), \
+      StatefulRandomOpV2<DEVICE##Device,  \
+                         random::NormalDistribution<PhiloxRandom, TYPE> >);
+
+// CPU also has the old 'StatefulStandardNormal' op for backward compatibility.
+#define REGISTER_CPU(TYPE)                \
+  REGISTER(CPU, TYPE)                     \
+  REGISTER_KERNEL_BUILDER(                \
+      Name("StatefulStandardNormal")      \
+          .Device(DEVICE_CPU)             \
+          .HostMemory("resource")         \
           .HostMemory("shape")            \
           .TypeConstraint<TYPE>("dtype"), \
-      StatefulRandomOp<DEVICE##Device,    \
+      StatefulRandomOp<CPUDevice,         \
                        random::NormalDistribution<PhiloxRandom, TYPE> >);
 
-#define REGISTER_CPU(TYPE) REGISTER(CPU, TYPE)
+#define REGISTER_GPU(TYPE) REGISTER(GPU, TYPE)
 
 TF_CALL_half(REGISTER_CPU);
 TF_CALL_bfloat16(REGISTER_CPU);
 TF_CALL_float(REGISTER_CPU);
 TF_CALL_double(REGISTER_CPU);
 
+#if GOOGLE_CUDA
+
+TF_CALL_half(REGISTER_GPU);
+TF_CALL_float(REGISTER_GPU);
+TF_CALL_double(REGISTER_GPU);
+
+#endif  // GOOGLE_CUDA
+
+#undef REGISTER_GPU
 #undef REGISTER_CPU
 #undef REGISTER
 
 // TODO(wangpeng): Add RNG ops for other distributions.
-// TODO(wangpeng): Add support for GPU and XLA.
+// TODO(wangpeng): Add support for XLA.
 
 }  // end namespace tensorflow

diff --git a/tensorflow/core/kernels/stateful_random_ops.h b/tensorflow/core/kernels/stateful_random_ops.h
new file mode 100644
index 0000000..689c2be
--- /dev/null
+++ b/tensorflow/core/kernels/stateful_random_ops.h

@@ -0,0 +1,145 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_KERNELS_STATEFUL_RANDOM_OPS_H_
+#define TENSORFLOW_CORE_KERNELS_STATEFUL_RANDOM_OPS_H_
+
+#include "tensorflow/core/framework/resource_var.h"
+#include "tensorflow/core/lib/random/philox_random.h"
+
+namespace tensorflow {
+
+// 'Variable' doesn't support uint32 or uint64 yet (due to reasons explained
+// in b/111604096 and cl/171681867), so I use signed int here. I choose int64
+// instead of int32 because `VarHandleOp` doesn't support int32 on GPU.
+using StateElementType = int64;
+static constexpr DataType STATE_ELEMENT_DTYPE = DT_INT64;
+
+using Algorithm = StateElementType;
+static constexpr Algorithm RNG_ALG_PHILOX = 1;
+
+using random::PhiloxRandom;
+
+static constexpr int64 PHILOX_MIN_STATE_SIZE =
+    (PhiloxRandom::ResultType::kElementCount +
+     PhiloxRandom::Key::kElementCount) /
+    2;
+
+// The following 5 functions are made templates to avoid duplicate symbols when
+// linking.
+
+// The following two functions use the contract "lower 32 bits for the first
+// uint32, higher 32 bits for the second". Note that this is endian-neutral,
+// unlike a direct memory copy `memcpy(output, &input, 8)`.
+template <typename INT64>
+PHILOX_DEVICE_FUNC void Int64ToUint32s(INT64 input, uint32* output1,
+                                       uint32* output2) {
+  auto u64 = static_cast<uint64>(input);
+  *output1 = static_cast<uint32>(u64);
+  *output2 = static_cast<uint32>(u64 >> 32);
+}
+
+template <typename UINT32>
+PHILOX_DEVICE_FUNC int64 Uint32sToInt64(UINT32 input1, UINT32 input2) {
+  auto u64_1 = static_cast<uint64>(input1);
+  auto u64_2 = static_cast<uint64>(input2);
+  return static_cast<int64>(u64_1 | (u64_2 << 32));
+}
+
+template <typename STATE_ELEMENT_TYPE>
+PHILOX_DEVICE_FUNC PhiloxRandom
+GetPhiloxRandomFromMem(STATE_ELEMENT_TYPE const* ptr) {
+  PhiloxRandom::ResultType counter;
+  PhiloxRandom::Key key;
+  Int64ToUint32s(ptr[0], &counter[0], &counter[1]);
+  Int64ToUint32s(ptr[1], &counter[2], &counter[3]);
+  Int64ToUint32s(ptr[2], &key[0], &key[1]);
+  return PhiloxRandom(counter, key);
+}
+
+template <typename PHILOX_RANDOM>
+PHILOX_DEVICE_FUNC void WritePhiloxRandomToMem(PHILOX_RANDOM const& philox,
+                                               StateElementType* ptr) {
+  PhiloxRandom::ResultType const& counter = philox.counter();
+  PhiloxRandom::Key const& key = philox.key();
+  ptr[0] = Uint32sToInt64(counter[0], counter[1]);
+  ptr[1] = Uint32sToInt64(counter[2], counter[3]);
+  ptr[2] = Uint32sToInt64(key[0], key[1]);
+}
+
+template <typename PHILOX_RANDOM>
+PHILOX_DEVICE_FUNC void UpdateMemWithPhiloxRandom(PHILOX_RANDOM const& philox,
+                                                  int64 output_size,
+                                                  StateElementType* ptr) {
+  auto new_philox = philox;
+  // Multiplier 256 is the same as in `FillPhiloxRandomTask`; do not change
+  // it just here.
+  auto delta = output_size * 256;
+  new_philox.Skip(delta);  // do the actual increasing
+  WritePhiloxRandomToMem(new_philox, ptr);
+}
+
+// Does unlock and unref automatically when going out of scope, and also
+// supports early manual release.
+class ScopedUnlockUnref {
+ public:
+  explicit ScopedUnlockUnref(Var* var) : var_(var) {
+    if (var_) {
+      var_->mu()->lock();
+    }
+  }
+  void Release() {
+    if (var_) {
+      var_->mu()->unlock();
+      var_->Unref();
+      var_ = nullptr;
+    }
+  }
+  ~ScopedUnlockUnref() { Release(); }
+
+ private:
+  Var* var_;
+
+  ScopedUnlockUnref(const ScopedUnlockUnref&) = delete;
+  void operator=(const ScopedUnlockUnref&) = delete;
+};
+
+// A per-device helper function that does the actual work for
+// `UpdateVariableAndFill`.
+// Reason to use functor: C++ doesn't allow function-template partial
+// specialization.
+template <typename Device, typename Distribution>
+struct UpdateVariableAndFill_Philox;
+
+using CPUDevice = Eigen::ThreadPoolDevice;
+
+#if GOOGLE_CUDA
+
+using GPUDevice = Eigen::GpuDevice;
+
+// Declares the partially GPU-specialized functor struct.
+template <typename Distribution>
+struct UpdateVariableAndFill_Philox<GPUDevice, Distribution> {
+  void operator()(OpKernelContext* ctx, const GPUDevice& device,
+                  int64 output_size, int64 alg_tag_skip,
+                  ScopedUnlockUnref* not_used, Tensor* state_tensor,
+                  typename Distribution::ResultElementType* output_data);
+};
+
+#endif  // GOOGLE_CUDA
+
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_KERNELS_STATEFUL_RANDOM_OPS_H_

diff --git a/tensorflow/core/kernels/stateful_random_ops_gpu.cu.cc b/tensorflow/core/kernels/stateful_random_ops_gpu.cu.cc
new file mode 100644
index 0000000..4ce0db7
--- /dev/null
+++ b/tensorflow/core/kernels/stateful_random_ops_gpu.cu.cc

@@ -0,0 +1,96 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/kernels/random_op_gpu.h"
+#include "tensorflow/core/kernels/stateful_random_ops.h"
+#include "tensorflow/core/util/cuda_launch_config.h"
+
+namespace tensorflow {
+
+using random::PhiloxRandom;
+
+__device__ int thread_counter;
+
+template <typename Distribution>
+__global__ void FillKernel(
+    Distribution dist, int64 state_size, int64 output_size,
+    StateElementType* state_data,
+    typename Distribution::ResultElementType* output_data) {
+  // Threads in this block share `philox`. Thread 0 is responsible for
+  // initializing it.
+  __shared__ char philox_raw[sizeof(PhiloxRandom)];
+  auto philox = reinterpret_cast<PhiloxRandom*>(philox_raw);
+  if (threadIdx.x == 0) {
+    *philox = GetPhiloxRandomFromMem(state_data);
+  }
+  __syncthreads();
+  functor::FillPhiloxRandomKernel<Distribution,
+                                  Distribution::kVariableSamplesPerOutput>()
+      .Run(*philox, output_data, output_size, dist);
+  // The last thread updates the state.
+  auto total_thread_count = gridDim.x * blockDim.x;
+  auto old_counter_value = atomicAdd(&thread_counter, 1);
+  if (old_counter_value == total_thread_count - 1) {
+    UpdateMemWithPhiloxRandom(*philox, output_size, state_data);
+  }
+}
+
+template <typename Distribution>
+void UpdateVariableAndFill_Philox<GPUDevice, Distribution>::operator()(
+    OpKernelContext* ctx, const GPUDevice& d, int64 output_size,
+    int64 alg_tag_skip, ScopedUnlockUnref* not_used, Tensor* state_tensor,
+    typename Distribution::ResultElementType* output_data) {
+  OP_REQUIRES(
+      ctx, alg_tag_skip == 0,
+      errors::InvalidArgument(
+          "GPU kernel doesn't support reading algorithm from state variable, "
+          "so alg_tag_skip must be 0; got",
+          alg_tag_skip));
+  auto state_tensor_flat = state_tensor->flat<StateElementType>();
+  auto state_size = state_tensor_flat.size();
+  auto state_data = state_tensor_flat.data();
+
+  // maximize occupancy
+  const int kGroupSize = Distribution::kResultElementCount;
+  int work_element_count = (output_size + kGroupSize - 1) / kGroupSize;
+  CudaLaunchConfig cfg = GetCudaLaunchConfig(work_element_count, d,
+                                             FillKernel<Distribution>, 0, 0);
+
+  int zero = 0;
+  cudaMemcpyToSymbol(thread_counter, &zero, sizeof(int));
+  FillKernel<Distribution>
+      <<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>(
+          Distribution(), state_size, output_size, state_data, output_data);
+}
+
+// Explicit instantiation of the GPU distributions functors.
+
+// clang-format off
+// NVCC cannot handle ">>" properly
+template struct UpdateVariableAndFill_Philox<
+    GPUDevice, random::NormalDistribution<random::PhiloxRandom, Eigen::half> >;
+template struct UpdateVariableAndFill_Philox<
+    GPUDevice, random::NormalDistribution<random::PhiloxRandom, float> >;
+template struct UpdateVariableAndFill_Philox<
+    GPUDevice, random::NormalDistribution<random::PhiloxRandom, double> >;
+// clang-format on
+
+}  // end namespace tensorflow
+
+#endif  // GOOGLE_CUDA

diff --git a/tensorflow/core/kernels/string_to_number_op.cc b/tensorflow/core/kernels/string_to_number_op.cc
index 70dbd15..22742dd 100644
--- a/tensorflow/core/kernels/string_to_number_op.cc
+++ b/tensorflow/core/kernels/string_to_number_op.cc

@@ -51,7 +51,7 @@
     for (int i = 0; i < input_flat.size(); ++i) {
       OP_REQUIRES(
           context,
-          strings::SafeStringToNumeric<OutputType>(input_flat(i).c_str(),
+          strings::SafeStringToNumeric<OutputType>(input_flat(i),
                                                    &output_flat(i)),
           errors::InvalidArgument(kErrorMessage, input_flat(i).c_str()));
     }

diff --git a/tensorflow/core/kernels/svd_op_gpu.cu.cc b/tensorflow/core/kernels/svd_op_gpu.cu.cc
index 8c3a58b..9e308cf 100644
--- a/tensorflow/core/kernels/svd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/svd_op_gpu.cu.cc

@@ -93,9 +93,48 @@
   }
 
   void RunSVD(OpKernelContext* context, DoneCallback done, int64 m, int64 n,
-              int64 p, int64 batch_size, Scalar* input_ptr,
-              RealScalar* outputS_ptr, Scalar* outputU_ptr,
-              Scalar* outputVT_ptr, int* dev_info_ptr, CudaSolver* solver) {
+              int64 p, Tensor& M_copy, Tensor* S, Tensor* U, Tensor* V,
+              std::unique_ptr<CudaSolver> solver) {
+    // Compute U S V* = M.
+    // 1. cuSolver works in column-major rather than row-major.
+    // 2. Gesvd returns V*.
+    // 3. Hence M should be transposed before input and U (rather than V) should
+    // be transposed on output.
+
+    Tensor u_copy;
+    if (compute_uv_) {
+      TensorShape u_shape;
+      if (full_matrices_) {
+        u_shape = U->shape();
+      } else {
+        TensorShape shapeRaw = M_copy.shape();
+        shapeRaw.RemoveLastDims(2);
+        u_shape = shapeRaw;
+        u_shape.AddDim(p);
+        u_shape.AddDim(m);
+      }
+      OP_REQUIRES_OK_ASYNC(
+          context, solver->allocate_scoped_tensor(U->dtype(), u_shape, &u_copy),
+          done);
+    }
+
+    // get the pointers to the data
+    Scalar* input_ptr;
+    RealScalar* outputS_ptr;
+    Scalar* outputU_ptr = NULL;
+    Scalar* outputV_ptr = NULL;
+    auto input_reshaped = M_copy.template flat_inner_dims<Scalar, 3>();
+    input_ptr = input_reshaped.data();
+    outputS_ptr = S->template flat_inner_dims<RealScalar, 2>().data();
+    if (compute_uv_) {
+      outputU_ptr = u_copy.template flat_inner_dims<Scalar, 3>().data();
+      outputV_ptr = V->template flat_inner_dims<Scalar, 3>().data();
+    }
+    const int64 batch_size = input_reshaped.dimension(0);
+    std::vector<DeviceLapackInfo> dev_info;
+    dev_info.push_back(solver->GetDeviceLapackInfo(batch_size, "gesvd"));
+    int* dev_info_ptr = dev_info.back().mutable_data();
+
     // Save the input matrix
     // Needed for the n=1 fix, see below, since SVD destroys the input
     Tensor input_copy;
@@ -121,12 +160,12 @@
       if (compute_uv_) {
         if (full_matrices_) {
           outputU = outputU_ptr + batch * m * m;
-          outputVT = outputVT_ptr + batch * n * n;
+          outputVT = outputV_ptr + batch * n * n;
           jobu = 'A';
           jobvt = 'A';
         } else {
           outputU = outputU_ptr + batch * m * p;
-          outputVT = outputVT_ptr + batch * n * p;
+          outputVT = outputV_ptr + batch * n * p;
           jobu = 'S';
           jobvt = 'S';
         }
@@ -155,17 +194,24 @@
     if (compute_uv_ && n == 1) {
       // 1. compute the (batched) sum
       const GPUDevice& d = context->eigen_device<GPUDevice>();
-      d.memset(outputVT_ptr, 0, batch_size * sizeof(Scalar));
+      d.memset(outputV_ptr, 0, batch_size * sizeof(Scalar));
       Cuda2DLaunchConfig cfg2D = GetCuda2DLaunchConfig(batch_size, m, d);
       ComputeValueOfVKernel<<<cfg2D.block_count, cfg2D.thread_per_block, 0,
                               d.stream()>>>(
           cfg2D, m, full_matrices_ ? m : p, input_copy.flat<Scalar>().data(),
-          outputU_ptr, outputS_ptr, outputVT_ptr);
+          outputU_ptr, outputS_ptr, outputV_ptr);
       // 2. clamp V to -1 or +1
       CudaLaunchConfig cfg1D = GetCudaLaunchConfig(batch_size, d);
       ExtractSignOfVKernel<<<cfg1D.block_count, cfg1D.thread_per_block, 0,
-                             d.stream()>>>(cfg1D, outputVT_ptr);
+                             d.stream()>>>(cfg1D, outputV_ptr);
     }
+
+    if (compute_uv_) {
+      auto device = context->eigen_device<GPUDevice>();
+      OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, u_copy, U), done);
+    }
+
+    CheckResult(context, std::move(done), dev_info, std::move(solver));
   }
 
   void CheckResult(OpKernelContext* context, DoneCallback done,
@@ -192,10 +238,9 @@
   void PerformSVD_MgeqN(OpKernelContext* context, DoneCallback done, int64 m,
                         int64 n, int64 p, const Tensor& M, Tensor* S, Tensor* U,
                         Tensor* V) {
+    // Transpose M, because cuSolver expects it to be column-major
     TensorShape shapeRaw = M.shape();
     shapeRaw.RemoveLastDims(2);
-
-    // Transpose M, because cuSolver expects it to be column-major
     TensorShape input_shape = shapeRaw;
     input_shape.AddDim(n);
     input_shape.AddDim(m);
@@ -210,58 +255,16 @@
     OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, M, &input_copy),
                          done);
 
-    // I need to transpose U at the end
-    // Not V, because cuSolver work column-major
-    Tensor u_copy;
-    if (compute_uv_) {
-      TensorShape u_shape;
-      if (full_matrices_) {
-        u_shape = U->shape();
-      } else {
-        u_shape = shapeRaw;
-        u_shape.AddDim(p);
-        u_shape.AddDim(m);
-      }
-      OP_REQUIRES_OK_ASYNC(
-          context, solver->allocate_scoped_tensor(U->dtype(), u_shape, &u_copy),
-          done);
-    }
-
-    // get the pointers to the data
-    Scalar* input_ptr;
-    RealScalar* outputS_ptr;
-    Scalar* outputU_ptr = NULL;
-    Scalar* outputV_ptr = NULL;
-    auto input_reshaped = input_copy.template flat_inner_dims<Scalar, 3>();
-    input_ptr = input_reshaped.data();
-    outputS_ptr = S->template flat_inner_dims<RealScalar, 2>().data();
-    if (compute_uv_) {
-      outputU_ptr = u_copy.template flat_inner_dims<Scalar, 3>().data();
-      outputV_ptr = V->template flat_inner_dims<Scalar, 3>().data();
-    }
-
-    // call the SVD
-    const int64 batch_size = input_reshaped.dimension(0);
-    std::vector<DeviceLapackInfo> dev_info;
-    dev_info.push_back(solver->GetDeviceLapackInfo(batch_size, "gesvd"));
-    RunSVD(context, done, m, n, p, batch_size, input_ptr, outputS_ptr,
-           outputU_ptr, outputV_ptr, dev_info.back().mutable_data(),
-           solver.get());
-
-    // Transpose U
-    if (compute_uv_) {
-      OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, u_copy, U), done);
-    }
-
-    // now check if the SVD operation succeeded or not
-    CheckResult(context, std::move(done), dev_info, std::move(solver));
+    // Call the SVD: compute U S V* = M.
+    RunSVD(context, done, m, n, p, input_copy, S, U, V, std::move(solver));
   }
 
   // The SVD if m < n
   void PerformSVD_MlessN(OpKernelContext* context, DoneCallback done, int64 m,
                          int64 n, int64 p, const Tensor& M, Tensor* S,
                          Tensor* U, Tensor* V) {
-    // Perform the SVD on M'
+    // Perform the SVD on M'. cuSolver works column major so don't need to
+    // transpose M.
 
     // Reuse the input buffer or make a copy for the SVD depending on whether
     // this op owns the input buffer exclusively. This is needed because the
@@ -281,55 +284,8 @@
                M.NumElements() * sizeof(Scalar));
     }
 
-    // I need to transpose V at the end
-    Tensor v_copy;
-    if (compute_uv_) {
-      TensorShape v_shape;
-      if (full_matrices_) {
-        v_shape = V->shape();
-      } else {
-        TensorShape shapeRaw = M.shape();
-        shapeRaw.RemoveLastDims(2);
-        v_shape = shapeRaw;
-        v_shape.AddDim(p);
-        v_shape.AddDim(n);
-      }
-      OP_REQUIRES_OK_ASYNC(
-          context, solver->allocate_scoped_tensor(V->dtype(), v_shape, &v_copy),
-          done);
-    }
-
-    // get the pointers to the data
-    Scalar* input_ptr;
-    RealScalar* outputS_ptr;
-    Scalar* outputU_ptr = NULL;
-    Scalar* outputV_ptr = NULL;
-    auto input_reshaped = input_copy.template flat_inner_dims<Scalar, 3>();
-    input_ptr = input_reshaped.data();
-    outputS_ptr = S->template flat_inner_dims<RealScalar, 2>().data();
-    if (compute_uv_) {
-      // Note that U and V are flipped
-      outputU_ptr = v_copy.template flat_inner_dims<Scalar, 3>().data();
-      outputV_ptr = U->template flat_inner_dims<Scalar, 3>().data();
-    }
-
-    // call the SVD
-    const int64 batch_size = input_reshaped.dimension(0);
-    std::vector<DeviceLapackInfo> dev_info;
-    dev_info.push_back(solver->GetDeviceLapackInfo(batch_size, "gesvd"));
-    // Note that m and n are flipped
-    RunSVD(context, done, n, m, p, batch_size, input_ptr, outputS_ptr,
-           outputU_ptr, outputV_ptr, dev_info.back().mutable_data(),
-           solver.get());
-
-    // Transpose V
-    if (compute_uv_) {
-      auto device = context->eigen_device<GPUDevice>();
-      OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, v_copy, V), done);
-    }
-
-    // now check if the SVD operation succeeded or not
-    CheckResult(context, std::move(done), dev_info, std::move(solver));
+    // Call the SVD: compute V S U* = M*.
+    RunSVD(context, done, n, m, p, input_copy, S, V, U, std::move(solver));
   }
 
   void ComputeAsync(OpKernelContext* context, DoneCallback done) final {

diff --git a/tensorflow/core/kernels/tridiagonal_solve_op.cc b/tensorflow/core/kernels/tridiagonal_solve_op.cc
new file mode 100644
index 0000000..5884ffe
--- /dev/null
+++ b/tensorflow/core/kernels/tridiagonal_solve_op.cc

@@ -0,0 +1,163 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/linalg_ops.cc.
+
+#include "tensorflow/core/framework/kernel_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+static const char kErrMsg[] = "The matrix is not invertible.";
+
+template <class Scalar>
+class TridiagonalSolveOp : public LinearAlgebraOp<Scalar> {
+ public:
+  INHERIT_LINALG_TYPEDEFS(Scalar);
+
+  explicit TridiagonalSolveOp(OpKernelConstruction* context) : Base(context) {}
+
+  void ValidateInputMatrixShapes(
+      OpKernelContext* context,
+      const TensorShapes& input_matrix_shapes) const final {
+    auto num_inputs = input_matrix_shapes.size();
+    OP_REQUIRES(context, num_inputs == 2,
+                errors::InvalidArgument("Expected two input matrices, got ",
+                                        num_inputs, "."));
+
+    auto num_diags = input_matrix_shapes[0].dim_size(0);
+    OP_REQUIRES(
+        context, num_diags == 3,
+        errors::InvalidArgument("Expected diagonals to be provided as a "
+                                "matrix with 3 rows, got ",
+                                num_diags, " rows."));
+
+    auto num_eqs_left = input_matrix_shapes[0].dim_size(1);
+    auto num_eqs_right = input_matrix_shapes[1].dim_size(0);
+    OP_REQUIRES(
+        context, num_eqs_left == num_eqs_right,
+        errors::InvalidArgument("Expected the same number of left-hand sides "
+                                "and right-hand sides, got ",
+                                num_eqs_left, " and ", num_eqs_right, "."));
+  }
+
+  TensorShapes GetOutputMatrixShapes(
+      const TensorShapes& input_matrix_shapes) const final {
+    return TensorShapes({input_matrix_shapes[1]});
+  }
+
+  int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const final {
+    const int num_eqs = static_cast<int>(input_matrix_shapes[0].dim_size(1));
+    const int num_rhss = static_cast<int>(input_matrix_shapes[1].dim_size(0));
+
+    const double add_cost = Eigen::TensorOpCost::AddCost<Scalar>();
+    const double mult_cost = Eigen::TensorOpCost::MulCost<Scalar>();
+    const double div_cost = Eigen::TensorOpCost::DivCost<Scalar>();
+
+    // Assuming cases with and without row interchange are equiprobable.
+    const double cost =
+        num_eqs * (div_cost * (num_rhss + 1) +
+                   (add_cost + mult_cost) * (2.5 * num_rhss + 1.5));
+    return cost >= static_cast<double>(kint64max) ? kint64max
+                                                  : static_cast<int64>(cost);
+  }
+
+  void ComputeMatrix(OpKernelContext* context, const ConstMatrixMaps& inputs,
+                     MatrixMaps* outputs) final {
+    const auto diagonals = inputs[0];
+
+    // Subdiagonal elements, first is ignored.
+    const auto& superdiag = diagonals.row(0);
+    // Diagonal elements.
+    const auto& diag = diagonals.row(1);
+    // Superdiagonal elements, n-th is ignored.
+    const auto& subdiag = diagonals.row(2);
+    // Right-hand sides (transposed - necessary for GPU impl).
+    const auto& rhs = inputs[1];
+
+    const int n = diag.size();
+    MatrixMap& x = outputs->at(0);
+    const Scalar zero(0);
+
+    if (n == 0) {
+      return;
+    }
+    if (n == 1) {
+      OP_REQUIRES(context, diag(0) != zero, errors::InvalidArgument(kErrMsg));
+      x.row(0) = rhs.row(0) / diag(0);
+      return;
+    }
+
+    // The three columns in u are the diagonal, superdiagonal, and second
+    // superdiagonal, respectively, of the U matrix in the LU decomposition of
+    // the input matrix (subject to row exchanges due to pivoting). For pivoted
+    // tridiagonal matrix, the U matrix has at most two non-zero superdiagonals.
+    Eigen::Array<Scalar, Eigen::Dynamic, 3> u(n, 3);
+
+    // The code below roughly follows LAPACK's dgtsv routine, with main
+    // difference being not overwriting the input.
+    u(0, 0) = diag(0);
+    u(0, 1) = superdiag(0);
+    x.row(0) = rhs.row(0);
+    for (int i = 0; i < n - 1; ++i) {
+      if (std::abs(u(i)) >= std::abs(subdiag(i + 1))) {
+        // No row interchange.
+        OP_REQUIRES(context, u(i) != zero, errors::InvalidArgument(kErrMsg));
+        const Scalar factor = subdiag(i + 1) / u(i, 0);
+        u(i + 1, 0) = diag(i + 1) - factor * u(i, 1);
+        x.row(i + 1) = rhs.row(i + 1) - factor * x.row(i);
+        if (i != n - 2) {
+          u(i + 1, 1) = superdiag(i + 1);
+          u(i, 2) = 0;
+        }
+      } else {
+        // Interchange rows i and i + 1.
+        const Scalar factor = u(i, 0) / subdiag(i + 1);
+        u(i, 0) = subdiag(i + 1);
+        u(i + 1, 0) = u(i, 1) - factor * diag(i + 1);
+        u(i, 1) = diag(i + 1);
+        x.row(i + 1) = x.row(i) - factor * rhs.row(i + 1);
+        x.row(i) = rhs.row(i + 1);
+        if (i != n - 2) {
+          u(i, 2) = superdiag(i + 1);
+          u(i + 1, 1) = -factor * superdiag(i + 1);
+        }
+      }
+    }
+    x.row(n - 1) /= u(n - 1, 0);
+    x.row(n - 2) = (x.row(n - 2) - u(n - 2, 1) * x.row(n - 1)) / u(n - 2, 0);
+    for (int i = n - 3; i >= 0; --i) {
+      x.row(i) = (x.row(i) - u(i, 1) * x.row(i + 1) - u(i, 2) * x.row(i + 2)) /
+                 u(i, 0);
+    }
+  }
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(TridiagonalSolveOp);
+};
+
+REGISTER_LINALG_OP_CPU("TridiagonalSolve", (TridiagonalSolveOp<float>), float);
+REGISTER_LINALG_OP_CPU("TridiagonalSolve", (TridiagonalSolveOp<double>),
+                       double);
+REGISTER_LINALG_OP_CPU("TridiagonalSolve", (TridiagonalSolveOp<complex64>),
+                       complex64);
+REGISTER_LINALG_OP_CPU("TridiagonalSolve", (TridiagonalSolveOp<complex128>),
+                       complex128);
+}  // namespace tensorflow

diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h
index 9a7c635..1294ccf 100644
--- a/tensorflow/core/lib/bfloat16/bfloat16.h
+++ b/tensorflow/core/lib/bfloat16/bfloat16.h

@@ -372,6 +372,12 @@
     return x;
   }
 
+  static bfloat16 min_positive_normal() {
+    bfloat16 x;
+    x.value = 0x0080;  // 0x1p-126
+    return x;
+  }
+
   bool IsZero() const { return (value & 0x7FFF) == ZERO_VALUE; }
 
   uint16_t value;

diff --git a/tensorflow/core/lib/monitoring/collection_registry_test.cc b/tensorflow/core/lib/monitoring/collection_registry_test.cc
index ca25f50..ce87e4d 100644
--- a/tensorflow/core/lib/monitoring/collection_registry_test.cc
+++ b/tensorflow/core/lib/monitoring/collection_registry_test.cc

@@ -81,14 +81,6 @@
       "/tensorflow/metric");
 }
 
-TEST(CollectMetricsTest, NoMetrics) {
-  auto* collection_registry = CollectionRegistry::Default();
-  const std::unique_ptr<CollectedMetrics> collected_metrics =
-      collection_registry->CollectMetrics({});
-  EXPECT_EQ(0, collected_metrics->metric_descriptor_map.size());
-  EXPECT_EQ(0, collected_metrics->point_set_map.size());
-}
-
 TEST(CollectMetricsTest, Counter) {
   auto counter_with_labels = std::unique_ptr<Counter<2>>(
       Counter<2>::New("/tensorflow/test/counter_with_labels",
@@ -111,7 +103,7 @@
         collection_registry->CollectMetrics(options);
 
     if (collect_metric_descriptors) {
-      ASSERT_EQ(2, collected_metrics->metric_descriptor_map.size());
+      ASSERT_GE(collected_metrics->metric_descriptor_map.size(), 2);
 
       const MetricDescriptor& ld = *collected_metrics->metric_descriptor_map.at(
           "/tensorflow/test/counter_with_labels");
@@ -134,7 +126,7 @@
       EXPECT_EQ(0, collected_metrics->metric_descriptor_map.size());
     }
 
-    ASSERT_EQ(2, collected_metrics->point_set_map.size());
+    ASSERT_GE(collected_metrics->point_set_map.size(), 2);
 
     const PointSet& lps = *collected_metrics->point_set_map.at(
         "/tensorflow/test/counter_with_labels");
@@ -201,7 +193,7 @@
         collection_registry->CollectMetrics(options);
 
     if (collect_metric_descriptors) {
-      ASSERT_EQ(2, collected_metrics->metric_descriptor_map.size());
+      ASSERT_GE(collected_metrics->metric_descriptor_map.size(), 2);
 
       const MetricDescriptor& ld = *collected_metrics->metric_descriptor_map.at(
           "/tensorflow/test/string_gauge_with_labels");
@@ -224,7 +216,7 @@
       EXPECT_EQ(0, collected_metrics->metric_descriptor_map.size());
     }
 
-    ASSERT_EQ(2, collected_metrics->point_set_map.size());
+    ASSERT_GE(collected_metrics->point_set_map.size(), 2);
 
     const PointSet& lps = *collected_metrics->point_set_map.at(
         "/tensorflow/test/string_gauge_with_labels");
@@ -307,7 +299,7 @@
         collection_registry->CollectMetrics(options);
 
     if (collect_metric_descriptors) {
-      ASSERT_EQ(2, collected_metrics->metric_descriptor_map.size());
+      ASSERT_GE(collected_metrics->metric_descriptor_map.size(), 2);
 
       const MetricDescriptor& ld = *collected_metrics->metric_descriptor_map.at(
           "/tensorflow/test/sampler_with_labels");
@@ -330,7 +322,7 @@
       EXPECT_EQ(0, collected_metrics->metric_descriptor_map.size());
     }
 
-    ASSERT_EQ(2, collected_metrics->point_set_map.size());
+    ASSERT_GE(collected_metrics->point_set_map.size(), 2);
 
     const PointSet& lps = *collected_metrics->point_set_map.at(
         "/tensorflow/test/sampler_with_labels");

diff --git a/tensorflow/core/lib/strings/proto_serialization.cc b/tensorflow/core/lib/strings/proto_serialization.cc
index a6c321c..2341d3e 100644
--- a/tensorflow/core/lib/strings/proto_serialization.cc
+++ b/tensorflow/core/lib/strings/proto_serialization.cc

@@ -17,11 +17,15 @@
 #include <cstring>
 #include "absl/memory/memory.h"
 #include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 
 namespace tensorflow {
+namespace {
+static const int kInlinedBufferSize = 256;
+}  // namespace
 
 bool SerializeToStringDeterministic(const protobuf::MessageLite& msg,
                                     string* result) {
@@ -47,28 +51,28 @@
   const size_t size = x.ByteSizeLong();
   if (size != y.ByteSizeLong()) return false;
   if (size == 0) return true;
-  auto x_serialized = absl::make_unique<char[]>(size);
-  bool success_x = SerializeToBufferDeterministic(x, x_serialized.get(), size);
+  gtl::InlinedVector<char, kInlinedBufferSize> x_serialized(size);
+  bool success_x = SerializeToBufferDeterministic(x, x_serialized.data(), size);
   DCHECK(success_x);
-  auto y_serialized = absl::make_unique<char[]>(size);
-  bool success_y = SerializeToBufferDeterministic(y, y_serialized.get(), size);
+  gtl::InlinedVector<char, kInlinedBufferSize> y_serialized(size);
+  bool success_y = SerializeToBufferDeterministic(y, y_serialized.data(), size);
   DCHECK(success_y);
-  return memcmp(x_serialized.get(), y_serialized.get(), size) == 0;
+  return memcmp(x_serialized.data(), y_serialized.data(), size) == 0;
 }
 
 uint64 DeterministicProtoHash64(const protobuf::MessageLite& proto,
                                 uint64 seed) {
   const size_t size = proto.ByteSizeLong();
-  auto serialized = absl::make_unique<char[]>(size);
-  SerializeToBufferDeterministic(proto, serialized.get(), size);
-  return Hash64(serialized.get(), size, seed);
+  gtl::InlinedVector<char, kInlinedBufferSize> serialized(size);
+  SerializeToBufferDeterministic(proto, serialized.data(), size);
+  return Hash64(serialized.data(), size, seed);
 }
 
 uint64 DeterministicProtoHash64(const protobuf::MessageLite& proto) {
   const size_t size = proto.ByteSizeLong();
-  auto serialized = absl::make_unique<char[]>(size);
-  SerializeToBufferDeterministic(proto, serialized.get(), size);
-  return Hash64(serialized.get(), size);
+  gtl::InlinedVector<char, kInlinedBufferSize> serialized(size);
+  SerializeToBufferDeterministic(proto, serialized.data(), size);
+  return Hash64(serialized.data(), size);
 }
 
 }  // namespace tensorflow

diff --git a/tensorflow/core/lib/strings/proto_serialization_test.cc b/tensorflow/core/lib/strings/proto_serialization_test.cc
index cbde20e..81a6f08 100644
--- a/tensorflow/core/lib/strings/proto_serialization_test.cc
+++ b/tensorflow/core/lib/strings/proto_serialization_test.cc

@@ -17,22 +17,32 @@
 
 #include <string>
 #include "absl/memory/memory.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
-
-static void BM_ProtoSerializationToString(int iters, int num_nodes) {
-  testing::StopTiming();
+namespace {
+GraphDef MakeGraphDef(int num_nodes) {
   GraphDef graph_def;
   for (int i = 0; i < num_nodes; ++i) {
     NodeDef* node = graph_def.add_node();
     node->set_name(strings::StrCat("node", i));
     node->set_op(strings::StrCat("op", i % 10));
+    (*node->mutable_attr())["foo"].set_f(3.14f);
+    (*node->mutable_attr())["bar"].set_s("baz");
   }
+  return graph_def;
+}
+}  // namespace
+
+static void BM_ProtoSerializationToString(int iters, int num_nodes) {
+  testing::StopTiming();
+  GraphDef graph_def = MakeGraphDef(num_nodes);
   testing::StartTiming();
   for (int i = 0; i < iters; ++i) {
     string serialized;
@@ -45,22 +55,40 @@
 
 static void BM_ProtoSerializationToBuffer(int iters, int num_nodes) {
   testing::StopTiming();
-  GraphDef graph_def;
-  for (int i = 0; i < num_nodes; ++i) {
-    NodeDef* node = graph_def.add_node();
-    node->set_name(strings::StrCat("node", i));
-    node->set_op(strings::StrCat("op", i % 10));
-  }
+  GraphDef graph_def = MakeGraphDef(num_nodes);
   testing::StartTiming();
   const size_t size = graph_def.ByteSizeLong();
   for (int i = 0; i < iters; ++i) {
-    auto buf = absl::make_unique<char[]>(size);
+    gtl::InlinedVector<char, 1024> buf(size);
     testing::DoNotOptimize(
-        SerializeToBufferDeterministic(graph_def, buf.get(), size));
+        SerializeToBufferDeterministic(graph_def, buf.data(), size));
   }
   testing::StopTiming();
 }
-
 BENCHMARK(BM_ProtoSerializationToBuffer)->Range(1, 10000);
 
+static void BM_DeterministicProtoHash64(int iters, int num_nodes) {
+  testing::StopTiming();
+  GraphDef graph_def = MakeGraphDef(num_nodes);
+  testing::StartTiming();
+  for (int i = 0; i < iters; ++i) {
+    testing::DoNotOptimize(DeterministicProtoHash64(graph_def));
+  }
+  testing::StopTiming();
+}
+BENCHMARK(BM_DeterministicProtoHash64)->Range(1, 10000);
+
+static void BM_AreSerializedProtosEqual(int iters, int num_nodes) {
+  testing::StopTiming();
+  GraphDef graph_def_a = MakeGraphDef(num_nodes);
+  GraphDef graph_def_b = MakeGraphDef(num_nodes);
+  graph_def_b.mutable_node(0)->mutable_name()[0] = 'l';
+  testing::StartTiming();
+  for (int i = 0; i < iters; ++i) {
+    testing::DoNotOptimize(AreSerializedProtosEqual(graph_def_a, graph_def_a));
+  }
+  testing::StopTiming();
+}
+BENCHMARK(BM_AreSerializedProtosEqual)->Range(1, 10000);
+
 }  // namespace tensorflow

diff --git a/tensorflow/core/nccl/nccl_manager.cc b/tensorflow/core/nccl/nccl_manager.cc
index 5452843..a0b602f 100644
--- a/tensorflow/core/nccl/nccl_manager.cc
+++ b/tensorflow/core/nccl/nccl_manager.cc

@@ -63,7 +63,7 @@
   std::unique_ptr<Thread> thread;
   mutex mu;
   condition_variable cv;
-  // Has collective,rank pairs.
+  // Has collective,participant_idx pairs.
   std::deque<std::pair<Collective*, int>> pending_launches_ GUARDED_BY(mu);
   bool shutdown_requested GUARDED_BY(mu) = false;
 };
@@ -312,8 +312,10 @@
   CUDA_RETURN_IF_ERROR(cudaGetDevice(&saved_device));
   NCCL_RETURN_IF_ERROR(ncclGroupStart());
   for (int i = 0; i < collective->num_local_devices; ++i) {
-    const int rank =
-        collective->single_node ? i : collective->participants[i]->global_rank;
+    // Set rank to `participant->global_rank` if provided, else `i`.
+    const int rank = collective->participants[i]->global_rank >= 0
+                         ? collective->participants[i]->global_rank
+                         : i;
     CUDA_RETURN_IF_ERROR(cudaSetDevice(devices[i]));
     NCCL_RETURN_IF_ERROR(ncclCommInitRank(
         nccl_comms.data() + i, collective->num_global_devices, nccl_id, rank));
@@ -344,6 +346,12 @@
   AddParticipant(std::move(participant), context, kAllReduce, reduction_op);
 }
 
+void NcclManager::AddToAllGather(std::unique_ptr<Participant> participant,
+                                 const Context& context) {
+  AddParticipant(std::move(participant), context, kAllGather,
+                 ncclSum /* unused */);
+}
+
 void NcclManager::AddBroadcastSend(std::unique_ptr<Participant> participant,
                                    const Context& context) {
   participant->root = true;
@@ -492,13 +500,11 @@
     return;
   }
 
-  for (int local_rank = 0; local_rank < collective->num_local_devices;
-       ++local_rank) {
-    Participant* p = collective->participants[local_rank].get();
-    NcclStream* nccl_stream =
-        collective->communicator->members[local_rank].nccl_stream;
+  for (int i = 0; i < collective->num_local_devices; ++i) {
+    Participant* p = collective->participants[i].get();
+    NcclStream* nccl_stream = collective->communicator->members[i].nccl_stream;
     CHECK(nccl_stream != nullptr);
-    const int rank = collective->single_node ? local_rank : p->global_rank;
+    const int rank = p->global_rank >= 0 ? p->global_rank : i;
 
     if (p->input != nullptr) {
       // Wait to ensure that the kernel that produces the data in the input
@@ -522,13 +528,11 @@
     // Note that it would be possible to run multiple collectives at once, if
     // they have non-intersecting sets of devices.
     mutex_lock l(collective_mu);
-    for (int local_rank = 0; local_rank < collective->num_local_devices;
-         ++local_rank) {
+    for (int i = 0; i < collective->num_local_devices; ++i) {
       NcclStream* nccl_stream =
-          collective->communicator->members[local_rank].nccl_stream;
+          collective->communicator->members[i].nccl_stream;
       mutex_lock l(nccl_stream->mu);
-      nccl_stream->pending_launches_.push_front(
-          std::make_pair(collective, local_rank));
+      nccl_stream->pending_launches_.push_front(std::make_pair(collective, i));
       nccl_stream->cv.notify_all();
     }
   }
@@ -555,23 +559,22 @@
       next_launch = nccl_stream->pending_launches_.back();
       nccl_stream->pending_launches_.pop_back();
     }
-    Collective* collective = next_launch.first;
-    int local_rank = next_launch.second;
 
     // Launch the nccl kernel.
+    Collective* collective = next_launch.first;
     ncclDataType_t data_type = ToNcclType(collective->data_type);
-    Participant* p = collective->participants[local_rank].get();
-
-    auto nccl_comm = collective->communicator->members[local_rank].nccl_comm;
+    int p_idx = next_launch.second;
+    Participant* p = collective->participants[p_idx].get();
+    auto nccl_comm = collective->communicator->members[p_idx].nccl_comm;
     ncclResult_t nccl_result = ncclSuccess;
     switch (collective->type) {
       case kAllReduce: {
         const void* sendbuff = p->input->tensor_data().data();
         void* recvbuff = const_cast<char*>(p->output->tensor_data().data());
 
-        VLOG(2) << "call NcclAllReduce participant " << local_rank
-                << " sendbuff " << sendbuff << " recvbuff " << recvbuff
-                << " nccl_comm " << nccl_comm << " comm_stream " << comm_stream
+        VLOG(2) << "call NcclAllReduce participant " << p_idx << " sendbuff "
+                << sendbuff << " recvbuff " << recvbuff << " nccl_comm "
+                << nccl_comm << " comm_stream " << comm_stream
                 << " cuda_stream " << cu_stream;
         nccl_result = ncclAllReduce(sendbuff, recvbuff, p->input->NumElements(),
                                     data_type, collective->reduction_op,
@@ -595,16 +598,30 @@
                                  collective->root_rank, nccl_comm, *cu_stream);
         break;
       }
+      case kAllGather: {
+        const void* sendbuff = p->input->tensor_data().data();
+        void* recvbuff = const_cast<char*>(p->output->tensor_data().data());
+
+        VLOG(2) << "call NcclAllGather participant " << p_idx << " sendbuff "
+                << sendbuff << " sendcount " << p->input->NumElements()
+                << " recvbuff " << recvbuff << " recvcount "
+                << p->output->NumElements() << " nccl_comm " << nccl_comm
+                << " comm_stream " << comm_stream << " cuda_stream "
+                << cu_stream;
+        nccl_result = ncclAllGather(sendbuff, recvbuff, p->input->NumElements(),
+                                    data_type, nccl_comm, *cu_stream);
+        break;
+      }
     }
 
     // Run the done_callback when the nccl kernel finishes running.
-    auto done_callback = [collective, local_rank, nccl_result]() {
+    auto done_callback = [collective, p_idx, nccl_result]() {
       if (nccl_result == ncclSuccess) {
-        collective->participants[local_rank]->done_callback(Status::OK());
+        collective->participants[p_idx]->done_callback(Status::OK());
       } else {
         // Propagate the error, but note that if other members of the collective
         // did launch their kernels, then they are hanging.
-        collective->participants[local_rank]->done_callback(errors::Unknown(
+        collective->participants[p_idx]->done_callback(errors::Unknown(
             "Error invoking NCCL: ", ncclGetErrorString(nccl_result)));
       }
 

diff --git a/tensorflow/core/nccl/nccl_manager.h b/tensorflow/core/nccl/nccl_manager.h
index 6ac5deb..7cf2c85 100644
--- a/tensorflow/core/nccl/nccl_manager.h
+++ b/tensorflow/core/nccl/nccl_manager.h

@@ -143,6 +143,10 @@
   void AddToAllReduce(std::unique_ptr<Participant> participant,
                       const Context& context, ncclRedOp_t reduction_op);
 
+  // Adds one participant to an all-gather.
+  void AddToAllGather(std::unique_ptr<Participant> participant,
+                      const Context& context);
+
   // AddBroadcastSend and AddBroadcastRecv combine to send data from one sender
   // to all receivers.
   void AddBroadcastSend(std::unique_ptr<Participant> participant,
@@ -170,6 +174,7 @@
     kAllReduce = 1,
     kBroadcast = 2,
     kReduce = 3,
+    kAllGather = 4,
   };
   struct Collective;
   struct Communicator;

diff --git a/tensorflow/core/nccl/nccl_manager_test.cc b/tensorflow/core/nccl/nccl_manager_test.cc
index e65af13..420e143 100644
--- a/tensorflow/core/nccl/nccl_manager_test.cc
+++ b/tensorflow/core/nccl/nccl_manager_test.cc

@@ -73,9 +73,9 @@
 
   static void TearDownTestCase() { delete devices_; }
 
-  TestCase* MakeTestCase(int num_nodes, int num_ranks_per_node,
-                         ncclRedOp_t reduction_op, TensorShape shape,
-                         float value_offset) {
+  TestCase* MakeReductionTestCase(int num_nodes, int num_ranks_per_node,
+                                  ncclRedOp_t reduction_op, TensorShape shape,
+                                  float value_offset) {
     TestCase* test_case = new TestCase();
     test_case->expected = Tensor(data_type_, shape);
     if (reduction_op == ncclProd) {
@@ -134,6 +134,47 @@
     return test_case;
   }
 
+  TestCase* MakeGatherTestCase(int num_nodes, int num_ranks_per_node,
+                               TensorShape in_shape, TensorShape out_shape) {
+    TestCase* test_case = new TestCase();
+    test_case->expected = Tensor(data_type_, out_shape);
+    test::FillFn<Scalar>(&test_case->expected,
+                         [](int) { return static_cast<Scalar>(0); });
+
+    float value_scale = 0.01;  // Small scale to avoid fp16 overflow.
+    for (int node = 0; node < num_nodes; ++node) {
+      for (int i = 0; i < num_ranks_per_node; ++i) {
+        auto* device = GetDevice(i);
+        auto* stream = device->tensorflow_gpu_device_info()->stream;
+
+        Tensor in_cpu(data_type_, in_shape);
+        test::FillFn<Scalar>(&in_cpu, [&](int index) {
+          return static_cast<Scalar>((index + 1) * value_scale);
+        });
+        // Starting index for this rank's tensor in the all-gathered output.
+        int32 gather_idx =
+            (node * num_ranks_per_node + i) * in_shape.num_elements();
+        for (int j = 0; j < in_shape.num_elements(); ++j) {
+          auto in_val = in_cpu.flat<Scalar>()(j);
+          auto out_expr = test_case->expected.template flat<Scalar>();
+          out_expr(gather_idx + j) = in_val;
+        }
+
+        value_scale *= 10;
+        test_case->ins.emplace_back(GpuAllocator(device), data_type_, in_shape);
+        test_case->outs.emplace_back(GpuAllocator(device), data_type_,
+                                     out_shape);
+
+        const Tensor& in_gpu = test_case->ins.back();
+        auto in_gpu_mem = AsDeviceMemory(in_gpu.flat<Scalar>().data());
+        stream->ThenMemcpy(&in_gpu_mem, in_cpu.flat<Scalar>().data(),
+                           in_cpu.TotalBytes());
+      }
+    }
+
+    return test_case;
+  }
+
   // Waits for the done callback to be called for each participant.
   void WaitForTestCompletion(TestCase* test_case) {
     test_case->mu.lock();
@@ -158,6 +199,9 @@
       stream->ThenMemcpy(out_cpu.flat<Scalar>().data(), out_gpu_mem,
                          out_cpu.TotalBytes());
       SE_ASSERT_OK(stream->BlockHostUntilDone());
+      VLOG(1) << "Verifying rank " << rank << " expected shape "
+              << test_case->expected.shape() << " out shape "
+              << out_cpu.shape();
       test::ExpectClose(test_case->expected, out_cpu);
     }
   }
@@ -218,8 +262,8 @@
   for (int op = 0; op < 4; ++op) {
     ncclRedOp_t reduction_op = static_cast<ncclRedOp_t>(op);
     std::unique_ptr<typename TestFixture::TestCase> test_case(
-        this->MakeTestCase(/*num_nodes=*/1, num_ranks, reduction_op,
-                           TensorShape({2, 3}), 0.0f));
+        this->MakeReductionTestCase(/*num_nodes=*/1, num_ranks, reduction_op,
+                                    TensorShape({2, 3}), 0.0f));
     for (int rank = 0; rank < num_ranks; ++rank) {
       auto* device = this->GetDevice(rank);
       VLOG(2) << "rank " << rank << " device " << device->name();
@@ -259,7 +303,7 @@
     std::vector<std::pair<int, int>> case_and_rank;
     std::vector<std::unique_ptr<typename TestFixture::TestCase>> test_cases;
     for (int i = 0; i < num_collectives_per_iteration; ++i) {
-      test_cases.emplace_back(this->MakeTestCase(
+      test_cases.emplace_back(this->MakeReductionTestCase(
           /*num_nodes=*/1, num_ranks, ncclSum,
           TensorShape({100, i % 5 + 1, i % 3 + 1}), 1.1f * i));
       for (int j = 0; j < num_ranks; ++j) {
@@ -324,6 +368,34 @@
   }
 }
 
+// Test basic all-gather.
+TYPED_TEST(NcclManagerTest, BasicAllGather) {
+  const int num_ranks = 4;
+  for (int i = 0; i < num_ranks; ++i) {
+    std::unique_ptr<typename TestFixture::TestCase> test_case(
+        this->MakeGatherTestCase(/*num_nodes=*/1, num_ranks,
+                                 TensorShape({2, 3}),
+                                 TensorShape({2 * num_ranks, 3})));
+    for (int rank = 0; rank < num_ranks; ++rank) {
+      auto* device = this->GetDevice(rank);
+      VLOG(2) << "rank " << rank << " device " << device->name();
+      auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;
+      auto* stream = device->tensorflow_gpu_device_info()->stream;
+      auto participant = absl::make_unique<NcclManager::Participant>(
+          device->executor(), stream, event_mgr, device->gpu_id(),
+          &test_case->ins[rank], &test_case->outs[rank], rank,
+          this->CreateDoneCallback(test_case.get()));
+      NcclManager::instance()->AddToAllGather(
+          std::move(participant),
+          {"allgather", /*num_local_devices=*/num_ranks,
+           /*num_global_devices=*/num_ranks, /*communicator_key=*/""});
+    }
+
+    LOG(INFO) << "Verifying results";
+    this->VerifyResults(test_case.get());
+  }
+}
+
 // Multi-node NCCL tests.
 
 TEST(NcclManagerTest, CommunicatorKey) {
@@ -353,8 +425,8 @@
   for (int op = 0; op < 4; ++op) {
     ncclRedOp_t reduction_op = static_cast<ncclRedOp_t>(op);
     std::unique_ptr<typename TestFixture::TestCase> test_case(
-        this->MakeTestCase(num_nodes, num_ranks_per_node, reduction_op,
-                           TensorShape({2, 3}), 0.0f));
+        this->MakeReductionTestCase(num_nodes, num_ranks_per_node, reduction_op,
+                                    TensorShape({2, 3}), 0.0f));
     for (int node = 0; node < num_nodes; ++node) {
       auto node_fn = [this, node, &nccl_managers, &communicator_key,
                       &collective_key, reduction_op, &test_case] {
@@ -393,8 +465,9 @@
 TYPED_TEST(NcclManagerTest, ConsistentCollectiveType) {
   const int num_ranks = 2;
 
-  std::unique_ptr<typename TestFixture::TestCase> test_case(this->MakeTestCase(
-      1 /* num_nodes */, num_ranks, ncclSum, TensorShape({2, 3}), 0.0f));
+  std::unique_ptr<typename TestFixture::TestCase> test_case(
+      this->MakeReductionTestCase(1 /* num_nodes */, num_ranks, ncclSum,
+                                  TensorShape({2, 3}), 0.0f));
   for (int rank = 0; rank < num_ranks; ++rank) {
     auto* device = this->GetDevice(rank);
     auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;
@@ -427,8 +500,9 @@
 TYPED_TEST(NcclManagerTest, ConsistentCommunicatorKey) {
   const int num_ranks = 2;
 
-  std::unique_ptr<typename TestFixture::TestCase> test_case(this->MakeTestCase(
-      1 /* num_nodes */, num_ranks, ncclSum, TensorShape({2, 3}), 0.0f));
+  std::unique_ptr<typename TestFixture::TestCase> test_case(
+      this->MakeReductionTestCase(1 /* num_nodes */, num_ranks, ncclSum,
+                                  TensorShape({2, 3}), 0.0f));
   for (int rank = 0; rank < num_ranks; ++rank) {
     auto* device = this->GetDevice(rank);
     auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;
@@ -454,8 +528,9 @@
 TYPED_TEST(NcclManagerTest, ConsistentNumberOfDevices) {
   const int num_ranks = 2;
 
-  std::unique_ptr<typename TestFixture::TestCase> test_case(this->MakeTestCase(
-      1 /* num_nodes */, num_ranks, ncclSum, TensorShape({2, 3}), 0.0f));
+  std::unique_ptr<typename TestFixture::TestCase> test_case(
+      this->MakeReductionTestCase(1 /* num_nodes */, num_ranks, ncclSum,
+                                  TensorShape({2, 3}), 0.0f));
   for (int rank = 0; rank < num_ranks; ++rank) {
     auto* device = this->GetDevice(rank);
     auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;

diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 108f187..8b6ee87 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc

@@ -466,47 +466,37 @@
     .Attr("T: type")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle in = c->input(0);
+      ShapeHandle shape_in = c->input(1);
+      TF_RETURN_IF_ERROR(c->WithRank(shape_in, 1, &shape_in));
       ShapeHandle out;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &out));
-
       if (!c->RankKnown(out)) {
         // We have no information about the shape of the output.
         c->set_output(0, out);
         return Status::OK();
       }
 
+      ShapeHandle in = c->input(0);
       if (!c->RankKnown(in)) {
         // We have no information about the shape of the input,
         // nothing to do here.
         c->set_output(0, out);
         return Status::OK();
       }
-      if (c->Rank(out) < c->Rank(in)) {
-        return errors::InvalidArgument("Cannot broadcast a tensor with shape ",
-                                       c->DebugString(in), " shape ",
-                                       c->DebugString(out));
-      }
-
-      int32 in_offset = c->Rank(out) - c->Rank(in);
-      for (int32 i = 0; i < c->Rank(out); ++i) {
-        DimensionHandle dim = c->Dim(out, i);
-        if (c->ValueKnown(dim)) {
-          // The first in_offset dimensions for input will be expanded with 1,
-          // so no check needed.
-          if (i >= in_offset) {
-            DimensionHandle in_dim = c->Dim(in, i - in_offset);
-            if (c->ValueKnown(in_dim) && c->Value(in_dim) != 0) {
-              if (c->Value(dim) % c->Value(in_dim) != 0) {
-                return errors::InvalidArgument(
-                    "Cannot broadcast a tensor with shape ", c->DebugString(in),
-                    " shape ", c->DebugString(out));
-              }
-            }
-          }
+      int out_rank = c->Rank(out);
+      TF_RETURN_IF_ERROR(c->WithRankAtMost(in, out_rank, &in));
+      int in_rank = c->Rank(in);
+      for (int i = 0; i < in_rank; ++i) {
+        auto in_dim = c->Dim(in, in_rank - i - 1);
+        if (c->Value(in_dim) > 1) {
+          // If the input dimension is greater than 1 then the output dimension
+          // must be equal to it, since we only broadcast "from left to right".
+          auto out_dim = c->Dim(out, out_rank - i - 1);
+          TF_RETURN_IF_ERROR(c->Merge(in_dim, out_dim, &out_dim));
+          TF_RETURN_IF_ERROR(
+              c->ReplaceDim(out, out_rank - i - 1, out_dim, &out));
         }
       }
-
       c->set_output(0, out);
       return Status::OK();
     });

diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index 1c29cd2..92648ce 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc

@@ -509,6 +509,33 @@
   INFER_ERROR("Shape must be rank 1 but is rank 0", op, "?;[]");
 }
 
+TEST(ArrayOpsTest, BroadcastTo_ShapeFn) {
+  ShapeInferenceTestOp op("BroadcastTo");
+  op.input_tensors.resize(2);
+
+  INFER_OK(op, "?;[?]", "?");
+  INFER_OK(op, "[];[1]", "[?]");
+  INFER_OK(op, "[1];[1]", "[?]");
+  INFER_OK(op, "[1];[2]", "[?,?]");
+  INFER_OK(op, "[2,2];[3]", "[?,d0_0,d0_1]");
+
+  // Rank checks
+  INFER_ERROR("Shape must be rank 1 but is rank 2", op, "?;[?,?]");
+  INFER_ERROR("Shape must be rank 1 but is rank 0", op, "[2];[]");
+  INFER_ERROR("Shape must be at most rank 1 but is rank 2", op, "[2,2];[1]");
+
+  Tensor shape_t(DT_INT64, TensorShape{3});
+  test::FillValues<int64>(&shape_t, {2, 10, 3});
+  op.input_tensors[1] = &shape_t;
+  INFER_OK(op, "[1,?,1];[3]", "[2,10,3]");
+  INFER_OK(op, "[1,1,1];[3]", "[2,10,3]");
+  INFER_OK(op, "[10,1];[3]", "[2,d0_0,3]");
+  INFER_ERROR("Dimensions must be equal, but are 3 and 2 for", op,
+              "[3,1,1];[3]");
+  INFER_ERROR("Dimensions must be equal, but are 2 and 10 for", op,
+              "[2,2,1];[3]");
+}
+
 TEST(ArrayOpsTest, BroadcastGradientArgs_ShapeFn) {
   ShapeInferenceTestOp op("BroadcastGradientArgs");
   // Output is always two unknown vectors.

diff --git a/tensorflow/core/ops/collective_ops.cc b/tensorflow/core/ops/collective_ops.cc
index e45a8a9..06e5f14 100644
--- a/tensorflow/core/ops/collective_ops.cc
+++ b/tensorflow/core/ops/collective_ops.cc

@@ -32,6 +32,17 @@
     .SetIsStateful()
     .SetShapeFn(shape_inference::UnchangedShape);
 
+REGISTER_OP("CollectiveGather")
+    .Input("input: T")
+    .Output("data: T")
+    .Attr("T: {float, float16, float64, int32, int64}")
+    .Attr("group_size: int")
+    .Attr("group_key: int")
+    .Attr("instance_key: int")
+    .Attr("shape: shape")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::ExplicitShape);
+
 REGISTER_OP("CollectiveBcastSend")
     .Input("input: T")
     .Output("data: T")

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 7e5b448..b67f034 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt

@@ -1547,6 +1547,96 @@
   is_stateful: true
 }
 op {
+  name: "AllToAll"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "group_assignment"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "concat_dimension"
+    type: "int"
+  }
+  attr {
+    name: "split_dimension"
+    type: "int"
+  }
+  attr {
+    name: "split_count"
+    type: "int"
+  }
+}
+op {
+  name: "AllToAll"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "group_assignment"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BOOL
+      }
+    }
+  }
+  attr {
+    name: "concat_dimension"
+    type: "int"
+  }
+  attr {
+    name: "split_dimension"
+    type: "int"
+  }
+  attr {
+    name: "split_count"
+    type: "int"
+  }
+}
+op {
   name: "Angle"
   input_arg {
     name: "input"
@@ -12388,6 +12478,46 @@
   }
 }
 op {
+  name: "Case"
+  input_arg {
+    name: "branch_index"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "input"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "branches"
+    type: "list(func)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
   name: "Cast"
   input_arg {
     name: "x"
@@ -12830,6 +12960,87 @@
   is_stateful: true
 }
 op {
+  name: "CollectiveGather"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "group_size"
+    type: "int"
+  }
+  attr {
+    name: "group_key"
+    type: "int"
+  }
+  attr {
+    name: "instance_key"
+    type: "int"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  is_stateful: true
+}
+op {
+  name: "CollectivePermute"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "source_target_pairs"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
   name: "CollectiveReduce"
   input_arg {
     name: "input"
@@ -13576,6 +13787,35 @@
   is_stateful: true
 }
 op {
+  name: "ConfigureDistributedTPU"
+  output_arg {
+    name: "topology"
+    type: DT_STRING
+  }
+  attr {
+    name: "embedding_config"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "tpu_embedding_config"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "is_global_init"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
   name: "Conj"
   input_arg {
     name: "input"
@@ -16332,6 +16572,58 @@
   }
 }
 op {
+  name: "CrossReplicaSum"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "group_assignment"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "CrossReplicaSum"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "group_assignment"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_UINT32
+      }
+    }
+  }
+}
+op {
   name: "CudnnRNN"
   input_arg {
     name: "input"
@@ -18080,6 +18372,30 @@
   }
 }
 op {
+  name: "DatasetToSingleElement"
+  input_arg {
+    name: "dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
   name: "DebugGradientIdentity"
   input_arg {
     name: "input"
@@ -19117,6 +19433,43 @@
   }
 }
 op {
+  name: "DecodeRaw"
+  input_arg {
+    name: "bytes"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT16
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  attr {
+    name: "little_endian"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
+op {
   name: "DecodeWav"
   input_arg {
     name: "contents"
@@ -21829,6 +22182,124 @@
   }
 }
 op {
+  name: "EnqueueTPUEmbeddingIntegerBatch"
+  input_arg {
+    name: "batch"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "mode_override"
+    type: DT_STRING
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "EnqueueTPUEmbeddingSparseBatch"
+  input_arg {
+    name: "sample_indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "embedding_indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "aggregation_weights"
+    type: DT_FLOAT
+    number_attr: "N"
+  }
+  input_arg {
+    name: "mode_override"
+    type: DT_STRING
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "combiners"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "EnqueueTPUEmbeddingSparseTensorBatch"
+  input_arg {
+    name: "sample_indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "embedding_indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "aggregation_weights"
+    type: DT_FLOAT
+    number_attr: "N"
+  }
+  input_arg {
+    name: "mode_override"
+    type: DT_STRING
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "combiners"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "table_ids"
+    type: "list(int)"
+  }
+  is_stateful: true
+}
+op {
   name: "EnsureShape"
   input_arg {
     name: "input"
@@ -22136,6 +22607,66 @@
   }
 }
 op {
+  name: "EuclideanNorm"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
   name: "Exit"
   input_arg {
     name: "data"
@@ -22479,6 +23010,22 @@
   }
 }
 op {
+  name: "ExperimentalDatasetToTFRecord"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "filename"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "compression_type"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
   name: "ExperimentalDenseToSparseBatchDataset"
   input_arg {
     name: "input_dataset"
@@ -29366,6 +29913,108 @@
   }
 }
 op {
+  name: "InfeedDequeue"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  is_stateful: true
+}
+op {
+  name: "InfeedDequeueTuple"
+  output_arg {
+    name: "outputs"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+  }
+  is_stateful: true
+}
+op {
+  name: "InfeedEnqueue"
+  input_arg {
+    name: "input"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+  attr {
+    name: "layout"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "InfeedEnqueueTuple"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+  }
+  attr {
+    name: "layouts"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
   name: "InitializeTable"
   input_arg {
     name: "table_handle"
@@ -31710,6 +32359,690 @@
   is_stateful: true
 }
 op {
+  name: "LoadTPUEmbeddingADAMParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "velocities"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingADAMParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "velocities"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingAdadeltaParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "updates"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "updates"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingAdagradParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingAdagradParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingCenteredRMSPropParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "mg"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingFTRLParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "linears"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingFTRLParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "linears"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingMDLAdagradLightParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "weights"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "benefits"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingMomentumParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingMomentumParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingProximalAdagradParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingRMSPropParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingStochasticGradientDescentParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
   name: "Log"
   input_arg {
     name: "x"
@@ -40709,6 +42042,80 @@
   is_stateful: true
 }
 op {
+  name: "OutfeedDequeue"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OutfeedDequeueTuple"
+  output_arg {
+    name: "outputs"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OutfeedEnqueue"
+  input_arg {
+    name: "input"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "OutfeedEnqueueTuple"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
   name: "Pack"
   input_arg {
     name: "values"
@@ -49553,6 +50960,25 @@
   is_stateful: true
 }
 op {
+  name: "RecvTPUEmbeddingActivations"
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
+    number_attr: "num_outputs"
+  }
+  attr {
+    name: "num_outputs"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "config"
+    type: "string"
+  }
+  is_stateful: true
+}
+op {
   name: "ReduceDataset"
   input_arg {
     name: "input_dataset"
@@ -49606,6 +51032,60 @@
   }
 }
 op {
+  name: "ReduceDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "initial_state"
+    type_list_attr: "Tstate"
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Tstate"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "use_inter_op_parallelism"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  is_stateful: true
+}
+op {
   name: "ReduceJoin"
   input_arg {
     name: "inputs"
@@ -50605,6 +52085,49 @@
   }
 }
 op {
+  name: "RequantizationRangePerChannel"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "clip_value_max"
+    type: "float"
+  }
+}
+op {
   name: "Requantize"
   input_arg {
     name: "input"
@@ -50727,6 +52250,73 @@
   }
 }
 op {
+  name: "RequantizePerChannel"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "requested_output_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "requested_output_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+}
+op {
   name: "Reshape"
   input_arg {
     name: "tensor"
@@ -59310,6 +60900,690 @@
   is_stateful: true
 }
 op {
+  name: "RetrieveTPUEmbeddingADAMParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "velocities"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "velocities"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingAdadeltaParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "updates"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "updates"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingAdagradParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingAdagradParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingCenteredRMSPropParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "mg"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingFTRLParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "linears"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingFTRLParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "linears"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingMDLAdagradLightParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "weights"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "benefits"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingMomentumParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingProximalAdagradParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingRMSPropParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingStochasticGradientDescentParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
   name: "Reverse"
   input_arg {
     name: "tensor"
@@ -64303,6 +66577,38 @@
   }
 }
 op {
+  name: "SendTPUEmbeddingGradients"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+    number_attr: "N"
+  }
+  input_arg {
+    name: "learning_rates"
+    type: DT_FLOAT
+    number_attr: "NN"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "NN"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "config"
+    type: "string"
+  }
+  is_stateful: true
+}
+op {
   name: "SerializeIterator"
   input_arg {
     name: "resource_handle"
@@ -64770,6 +67076,10 @@
   }
 }
 op {
+  name: "ShutdownDistributedTPU"
+  is_stateful: true
+}
+op {
   name: "Sigmoid"
   input_arg {
     name: "x"
@@ -75446,6 +77756,70 @@
   is_stateful: true
 }
 op {
+  name: "StatefulStandardNormal"
+  input_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "shape"
+    type_attr: "shape_dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    name: "shape_dtype"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "StatefulStandardNormalV2"
+  input_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "algorithm"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "shape"
+    type_attr: "shape_dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    name: "shape_dtype"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+  }
+  is_stateful: true
+}
+op {
   name: "StatelessIf"
   input_arg {
     name: "cond"
@@ -77381,6 +79755,465 @@
   is_stateful: true
 }
 op {
+  name: "TPUCompilationResult"
+  output_arg {
+    name: "output"
+    type: DT_STRING
+  }
+}
+op {
+  name: "TPUEmbeddingActivations"
+  input_arg {
+    name: "embedding_variable"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "sliced_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "lookup_id"
+    type: "int"
+    has_minimum: true
+  }
+}
+op {
+  name: "TPUOrdinalSelector"
+  output_arg {
+    name: "device_ordinals"
+    type: DT_INT32
+  }
+  is_stateful: true
+}
+op {
+  name: "TPUPartitionedCall"
+  input_arg {
+    name: "args"
+    type_list_attr: "Tin"
+  }
+  input_arg {
+    name: "device_ordinal"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+}
+op {
+  name: "TPUReplicate"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "Tinputs"
+  }
+  input_arg {
+    name: "broadcast_inputs"
+    type_list_attr: "Tbroadcast_inputs"
+  }
+  input_arg {
+    name: "variables"
+    type: DT_RESOURCE
+    number_attr: "NumVariables"
+  }
+  input_arg {
+    name: "guaranteed_constants"
+    type_list_attr: "Tguaranteed_constants"
+  }
+  output_arg {
+    name: "outputs"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "computation"
+    type: "func"
+  }
+  attr {
+    name: "num_replicas"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_cores_per_replica"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "topology"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_tpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "device_assignment"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "host_compute_core"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "Tinputs"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tbroadcast_inputs"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "NumVariables"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "Tguaranteed_constants"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "padding_map"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "TPUReplicate"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "Tinputs"
+  }
+  input_arg {
+    name: "broadcast_inputs"
+    type_list_attr: "Tbroadcast_inputs"
+  }
+  input_arg {
+    name: "variables"
+    type: DT_RESOURCE
+    number_attr: "NumVariables"
+  }
+  input_arg {
+    name: "guaranteed_constants"
+    type_list_attr: "Tguaranteed_constants"
+  }
+  output_arg {
+    name: "outputs"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "computation"
+    type: "func"
+  }
+  attr {
+    name: "num_replicas"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_cores_per_replica"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "topology"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_tpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "device_assignment"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "host_compute_core"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "Tinputs"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tbroadcast_inputs"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "NumVariables"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "Tguaranteed_constants"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "padding_map"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "step_marker_location"
+    type: "string"
+    default_value {
+      s: "STEP_MARK_AT_ENTRY"
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "TPUReplicateMetadata"
+  attr {
+    name: "num_replicas"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_cores_per_replica"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "topology"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_tpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "device_assignment"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "computation_shape"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "host_compute_core"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "padding_map"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+}
+op {
+  name: "TPUReplicateMetadata"
+  attr {
+    name: "num_replicas"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_cores_per_replica"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "topology"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_tpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "device_assignment"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "computation_shape"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "host_compute_core"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "padding_map"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "step_marker_location"
+    type: "string"
+    default_value {
+      s: "STEP_MARK_AT_ENTRY"
+    }
+  }
+}
+op {
+  name: "TPUReplicatedInput"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "TPUReplicatedOutput"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "outputs"
+    type_attr: "T"
+    number_attr: "num_replicas"
+  }
+  attr {
+    name: "num_replicas"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
   name: "TakeDataset"
   input_arg {
     name: "input_dataset"
@@ -79460,6 +82293,29 @@
   }
 }
 op {
+  name: "TensorListScatterIntoExistingList"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tensor"
+    type_attr: "element_dtype"
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "element_dtype"
+    type: "type"
+  }
+}
+op {
   name: "TensorListScatterV2"
   input_arg {
     name: "tensor"
@@ -80455,6 +83311,33 @@
   }
 }
 op {
+  name: "TridiagonalSolve"
+  input_arg {
+    name: "diagonals"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
   name: "TruncateDiv"
   input_arg {
     name: "x"
@@ -82655,6 +85538,18 @@
   }
 }
 op {
+  name: "WorkerHeartbeat"
+  input_arg {
+    name: "request"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "response"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
   name: "WrapDatasetVariant"
   input_arg {
     name: "input_handle"

diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 872a6da..cc7ce54 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc

@@ -554,13 +554,22 @@
     .Attr("output_shapes: list(shape) >= 1")
     .SetShapeFn(IteratorGetNextShapeFn);
 
+// TODO(b/124308596): Instead of conservatively marking this op as stateful,
+// implement a mechanism to determine whether `dataset` has a side-effect
+// and use it to decide whether to use a stateless or stateful version of this
+// op.
 REGISTER_OP("DatasetToSingleElement")
     .Input("dataset: variant")
     .Output("components: output_types")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
+    .SetIsStateful()
     .SetShapeFn(IteratorGetNextShapeFn);
 
+// TODO(b/124308596): Instead of conservatively marking this op as stateful,
+// implement a mechanism to determine whether `dataset` has a side-effect
+// and use it to decide whether to use a stateless or stateful version of this
+// op.
 REGISTER_OP("ReduceDataset")
     .Input("input_dataset: variant")
     .Input("initial_state: Tstate")
@@ -572,6 +581,7 @@
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .Attr("use_inter_op_parallelism: bool = true")
+    .SetIsStateful()
     .SetShapeFn(IteratorGetNextShapeFn);
 
 REGISTER_OP("IteratorToStringHandle")
@@ -652,6 +662,8 @@
     .Attr("output_shapes: list(shape) >= 1")
     .SetShapeFn(shape_inference::ScalarShape);
 
+// TODO(b/124308749): Add a stateful version of MapDefun and use it when `f`
+// is stateful.
 REGISTER_OP("MapDefun")
     .Input("arguments: Targuments")
     .Input("captured_inputs: Tcaptured")

diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc
index 95230af..7b9d95a 100644
--- a/tensorflow/core/ops/experimental_dataset_ops.cc
+++ b/tensorflow/core/ops/experimental_dataset_ops.cc

@@ -76,10 +76,15 @@
     .Output("cardinality: int64")
     .SetShapeFn(shape_inference::ScalarShape);
 
+// TODO(b/124308596): Instead of conservatively marking this op as stateful,
+// implement a mechanism to determine whether `dataset` has a side-effect
+// and use it to decide whether to use a stateless or stateful version of this
+// op.
 REGISTER_OP("ExperimentalDatasetToTFRecord")
     .Input("input_dataset: variant")
     .Input("filename: string")
     .Input("compression_type: string")
+    .SetIsStateful()
     .SetShapeFn(shape_inference::NoOutputs);
 
 REGISTER_OP("ExperimentalDenseToSparseBatchDataset")

diff --git a/tensorflow/core/ops/functional_ops.cc b/tensorflow/core/ops/functional_ops.cc
index be440ed..4982ec6 100644
--- a/tensorflow/core/ops/functional_ops.cc
+++ b/tensorflow/core/ops/functional_ops.cc

@@ -132,6 +132,35 @@
       return Status::OK();
     });
 
+REGISTER_OP("Case")
+    .Input("branch_index: int32")
+    .Input("input: Tin")
+    .Output("output: Tout")
+    .Attr("Tin: list(type) >= 0")
+    .Attr("Tout: list(type) >= 0")
+    .Attr("branches: list(func) >= 1")
+    .Attr("output_shapes: list(shape) = []")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      std::vector<PartialTensorShape> output_shapes;
+      TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes));
+      // If `output_shapes` attr is set use that as the shapes of the outputs
+      // else return unknown shapes.
+      if (output_shapes.empty()) return shape_inference::UnknownShape(c);
+      if (output_shapes.size() != c->num_outputs()) {
+        return errors::InvalidArgument(
+            "`output_shapes` must be the same length as num outputs (",
+            output_shapes.size(), " vs. ", c->num_outputs());
+      }
+      for (size_t i = 0; i < output_shapes.size(); ++i) {
+        shape_inference::ShapeHandle output_shape_handle;
+        TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
+            output_shapes[i], &output_shape_handle));
+        c->set_output(static_cast<int>(i), output_shape_handle);
+      }
+      return Status::OK();
+    });
+
 // TODO(drpng): remove this.
 REGISTER_OP("_While")
     .Input("input: T")

diff --git a/tensorflow/core/ops/linalg_ops.cc b/tensorflow/core/ops/linalg_ops.cc
index 8a84195..66594b3 100644
--- a/tensorflow/core/ops/linalg_ops.cc
+++ b/tensorflow/core/ops/linalg_ops.cc

@@ -208,6 +208,36 @@
   return Status::OK();
 }
 
+// The first input is [...,3,M] and second input is [...,M,K].
+// Output is [...,M,K].
+Status TridiagonalSolveShapeFn(InferenceContext* c) {
+  ShapeHandle lhs;
+  ShapeHandle rhs;
+  // Check that rank is at least 2.
+  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 2, &lhs));
+  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 2, &rhs));
+
+  // Extract batch dimensions and check they are the same.
+  ShapeHandle lhs_batch_shape;
+  ShapeHandle rhs_batch_shape;
+  TF_RETURN_IF_ERROR(c->Subshape(lhs, 0, -2, &lhs_batch_shape));
+  TF_RETURN_IF_ERROR(c->Subshape(rhs, 0, -2, &rhs_batch_shape));
+  TF_RETURN_IF_ERROR(
+      c->Merge(lhs_batch_shape, rhs_batch_shape, &lhs_batch_shape));
+
+  // Check that "M" is the same in both inputs.
+  DimensionHandle m_lhs = c->Dim(lhs, -1);
+  DimensionHandle m_rhs = c->Dim(rhs, -2);
+  TF_RETURN_IF_ERROR(c->Merge(m_lhs, m_rhs, &m_lhs));
+
+  // Check that next-to-last dimension of the first input is 3.
+  TF_RETURN_IF_ERROR(c->WithValue(c->Dim(lhs, -2), 3, &m_lhs));
+
+  // The output shape is the same as rhs shape.
+  c->set_output(0, rhs);
+  return Status::OK();
+}
+
 }  // namespace
 
 REGISTER_OP("MatrixDeterminant")
@@ -379,6 +409,13 @@
     .Attr("T: {double, float, half, complex64, complex128}")
     .SetShapeFn(SvdShapeFn);
 
+REGISTER_OP("TridiagonalSolve")
+    .Input("diagonals: T")
+    .Input("rhs: T")
+    .Output("output: T")
+    .Attr("T: {double, float, complex64, complex128}")
+    .SetShapeFn(TridiagonalSolveShapeFn);
+
 // Deprecated op registrations:
 
 // Can be deleted after 3feb2017.

diff --git a/tensorflow/core/ops/linalg_ops_test.cc b/tensorflow/core/ops/linalg_ops_test.cc
index 7d1aed5..93732f9 100644
--- a/tensorflow/core/ops/linalg_ops_test.cc
+++ b/tensorflow/core/ops/linalg_ops_test.cc

@@ -314,4 +314,40 @@
            "[d0_0,d0_1,d0_2,d0_3,d0_5,d0_5];[d0_0,d0_1,d0_2,d0_3,d0_5]");
 }
 
+TEST(LinalgOpsTest, TridiagonalSolve_ShapeFn) {
+  ShapeInferenceTestOp op("TridiagonalSolve");
+  INFER_OK(op, "?;?", "in1");
+  INFER_OK(op, "[3,5];[?,1]", "in1");
+  INFER_OK(op, "[?,5];[5,1]", "in1");
+  INFER_OK(op, "[?,5];[?,?]", "in1");
+  INFER_OK(op, "[?,?];[?,?]", "in1");
+  INFER_OK(op, "[3,5];[5,1]", "in1");
+  INFER_OK(op, "[3,5];[5,2]", "in1");
+
+  INFER_OK(op, "[?,?,?];[?,?,?]", "in1");
+  INFER_OK(op, "[?,3,5];[7,5,2]", "in1");
+  INFER_OK(op, "[7,3,5];[?,5,2]", "in1");
+  INFER_OK(op, "[7,?,5];[?,5,?]", "in1");
+  INFER_OK(op, "[7,3,5];[7,5,2]", "in1");
+
+  INFER_OK(op, "[7,?,3,5];[7,8,5,2]", "in1");
+  INFER_OK(op, "[7,8,3,5];[7,8,5,2]", "in1");
+
+  INFER_ERROR("Shape must be at least rank 2 but is rank 1", op, "[3];[5,1]");
+  INFER_ERROR("Shape must be at least rank 2 but is rank 1", op, "[3,5];[5]");
+  INFER_ERROR(
+      "Dimension 1 in both shapes must be equal, but are 4 and 8. "
+      "Shapes are [6,4] and [6,8].",
+      op, "[6,4,3,5];[6,8,5,2]");
+  INFER_ERROR(
+      "Dimension 1 in both shapes must be equal, but are 4 and 8. "
+      "Shapes are [?,4] and [6,8].",
+      op, "[?,4,3,5];[6,8,5,2]");
+  INFER_ERROR("Dimension must be 3 but is 4", op, "[4,5];[5,2]");
+  INFER_ERROR("Dimension must be 3 but is 4", op, "[6,4,5];[6,5,2]");
+  INFER_ERROR("Dimensions must be equal, but are 9 and 5", op, "[3,9];[5,2]");
+  INFER_ERROR("Dimensions must be equal, but are 9 and 5", op,
+              "[6,3,9];[6,5,2]");
+}
+
 }  // end namespace tensorflow

diff --git a/tensorflow/core/ops/list_ops.cc b/tensorflow/core/ops/list_ops.cc
index f8fdb10..123ffc4 100644
--- a/tensorflow/core/ops/list_ops.cc
+++ b/tensorflow/core/ops/list_ops.cc

@@ -20,6 +20,34 @@
 namespace tensorflow {
 namespace {
 
+// Verifies that `shapes_and_types` is a valid list handle and has the right
+// dtype.
+Status VerifyHandleData(
+    shape_inference::InferenceContext* c,
+    const std::vector<shape_inference::ShapeAndType>& shapes_and_types,
+    DataType element_dtype) {
+  if (shapes_and_types.size() != 1) {
+    return errors::InvalidArgument(
+        "Invalid handle_data for input list. Expected length of "
+        "shape_and_types: ",
+        1, " Saw: ", shapes_and_types.size());
+  }
+  const shape_inference::ShapeAndType& list_shape_type = shapes_and_types[0];
+  if (list_shape_type.dtype != element_dtype) {
+    return errors::InvalidArgument("Expected list with element dtype ",
+                                   DataTypeString(element_dtype),
+                                   " but got list with element dtype ",
+                                   DataTypeString(list_shape_type.dtype));
+  }
+  return Status::OK();
+}
+
+// Assumes that the handle_data is valid.
+shape_inference::ShapeHandle GetElementShapeFromHandleData(
+    const std::vector<shape_inference::ShapeAndType>& shapes_and_types) {
+  return shapes_and_types[0].shape;
+}
+
 REGISTER_OP("EmptyTensorList")
     .Input("element_shape: shape_type")
     .Input("max_num_elements: int32")
@@ -503,6 +531,34 @@
       return Status::OK();
     });
 
+REGISTER_OP("TensorListScatterIntoExistingList")
+    .Input("input_handle: variant")
+    .Input("tensor: element_dtype")
+    .Input("indices: int32")
+    .Output("output_handle: variant")
+    .Attr("element_dtype: type")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      shape_inference::ShapeHandle ignored;
+      // Check that tensor is at least a vector.
+      TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &ignored));
+      // Check that indices is a vector.
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &ignored));
+
+      DataType element_dtype;
+      TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype));
+      shape_inference::ShapeHandle element_shape = c->UnknownShape();
+
+      auto* handle_data = c->input_handle_shapes_and_types(0);
+      if (handle_data != nullptr) {
+        TF_RETURN_IF_ERROR(VerifyHandleData(c, *handle_data, element_dtype));
+        element_shape = GetElementShapeFromHandleData(*handle_data);
+      }
+      c->set_output_handle_shapes_and_types(0,
+                                            {{element_shape, element_dtype}});
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    });
+
 REGISTER_OP("TensorListConcatLists")
     .Input("input_a: variant")
     .Input("input_b: variant")

diff --git a/tensorflow/core/ops/lookup_table_ops.cc b/tensorflow/core/ops/lookup_table_ops.cc
new file mode 100644
index 0000000..3ce08f6
--- /dev/null
+++ b/tensorflow/core/ops/lookup_table_ops.cc

@@ -0,0 +1,61 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+
+REGISTER_OP("LookupTableInsertOrAssignOp")
+    .Input("table_int64_args: num_int64_table_args * int64")
+    .Input("table_handle: resource")
+    .Input("keys: insert_key_tensor_dtype")
+    .Input("values: table_value_dtype")
+    .Attr("insert_key_tensor_dtype: type")
+    .Attr("table_value_dtype: type")
+    .Attr("num_int64_table_args: int >= 0")
+    .SetShapeFn([](InferenceContext* c) {
+      // Note that, by design, shape checks are implementation dependent so they
+      // must be deferred until runtime.
+      return Status::OK();
+    });
+
+REGISTER_OP("LookupTableFindOp")
+    .Input("table_int64_args: num_int64_table_args * int64")
+    .Input("table_handle: resource")
+    .Input("keys: lookup_key_tensor_dtype")
+    .Input("num_threads: int64")
+    .Output("values: table_value_dtype")
+    .Attr("table_value_dtype: type")
+    .Attr("lookup_key_tensor_dtype: type")
+    .Attr("num_int64_table_args: int >= 0")
+    .SetShapeFn([](InferenceContext* c) {
+      // The output shape cannot be inferred here because the key size
+      // cannot be inferred from the key tensor in general.
+      c->set_output(0, c->UnknownShape());
+      return Status::OK();
+    });
+
+REGISTER_OP("ContainerSizeOp")
+    .Input("container_handle: resource")
+    .Output("size: int64")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    });
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 747536a..dd24d2c 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc

@@ -850,6 +850,15 @@
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .SetShapeFn(shape_inference::ReductionShape);
 
+REGISTER_OP("EuclideanNorm")
+    .Input("input: T")
+    .Input("reduction_indices: Tidx")
+    .Output("output: T")
+    .Attr("keep_dims: bool = false")
+    .Attr("T: numbertype")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .SetShapeFn(shape_inference::ReductionShape);
+
 REGISTER_OP("Mean")
     .Input("input: T")
     .Input("reduction_indices: Tidx")
@@ -1749,6 +1758,45 @@
 
 #endif  // INTEL_MKL
 
+REGISTER_OP("RequantizePerChannel")
+    .Input("input: T")
+    .Input("input_min: float")
+    .Input("input_max: float")
+    .Input("requested_output_min: float")
+    .Input("requested_output_max: float")
+    .Output("output: out_type")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Attr("T: quantizedtype = DT_QINT32")
+    .Attr("out_type: quantizedtype = DT_QUINT8")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    });
+REGISTER_OP("RequantizationRangePerChannel")
+    .Input("input: T")
+    .Input("input_min: float")
+    .Input("input_max: float")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Attr("T: quantizedtype = DT_QINT32")
+    .Attr("clip_value_max: float")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
+      c->set_output(0, c->Scalar());
+      c->set_output(1, c->Scalar());
+      return Status::OK();
+    });
+
 REGISTER_OP("NextAfter")
     .Attr("T: {float64, float32} = DT_FLOAT")
     .Input("x1: T")

diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc
index 05379a7..1e6dbbf 100644
--- a/tensorflow/core/ops/math_ops_test.cc
+++ b/tensorflow/core/ops/math_ops_test.cc

@@ -144,6 +144,7 @@
     INFER_OK(op, "[1];[2]", "[d1_0]");
     INFER_OK(op, "[2];[1]", "[d0_0]");
     INFER_OK(op, "[2];[]", "[d0_0]");
+    INFER_OK(op, "[2];[?]", "[d0_0]");
 
     INFER_OK(op, "[0];[0]", "[d0_0|d1_0]");
     INFER_OK(op, "[];[0]", "[d1_0]");
@@ -151,6 +152,9 @@
     INFER_OK(op, "[0];[1]", "[d0_0]");
     INFER_OK(op, "[0];[]", "[d0_0]");
 
+    INFER_OK(op, "[2];[?,?]", "[d1_0,d0_0]");
+    INFER_OK(op, "[2,2];[?,?,?]", "[d1_0,d0_0,d0_1]");
+
     // Multiple dimension cases (same test cases, switching x and y).
     INFER_OK(op, "[?,1,2,3,4,5];[3,1,?]",
              "[d0_0,d0_1,d0_2,d0_3|d1_0,d0_4,d0_5]");

diff --git a/tensorflow/core/ops/mkl_array_ops.cc b/tensorflow/core/ops/mkl_array_ops.cc
new file mode 100644
index 0000000..e7ad3be
--- /dev/null
+++ b/tensorflow/core/ops/mkl_array_ops.cc

@@ -0,0 +1,92 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+// This file contains the registration of MKL-DNN array ops.
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/util/mirror_pad_mode.h"
+#include "tensorflow/core/util/padding.h"
+#include "tensorflow/core/util/strided_slice_op.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+namespace tensorflow {
+
+using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+using shape_inference::UnchangedShape;
+
+// Adding QuantizedConcatV2 op to be able to replace it by
+// _MklQuantizedConcatV2 in the graph rewrite.
+REGISTER_OP("QuantizedConcatV2")
+    .Input("values: N * T")
+    .Input("axis: Tidx")
+    .Input("input_mins: N * float32")
+    .Input("input_maxes: N * float32")
+    .Output("output: T")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Attr("N: int >= 2")
+    .Attr("T: type")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .SetShapeFn([](InferenceContext* c) {
+      const int n = (c->num_inputs() - 1) / 3;
+      TF_RETURN_IF_ERROR(shape_inference::QuantizedConcatV2Shape(c, n));
+      ShapeHandle unused;
+      for (int i = n + 1; i < c->num_inputs(); ++i) {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
+      }
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    });
+
+REGISTER_OP("_MklQuantizedConcatV2")
+    .Input("values: N * T")
+    .Input("axis: Tidx")
+    .Input("input_mins:  N * float32")
+    .Input("input_maxes: N * float32")
+    .Input("mkl_values: N * uint8")
+    .Input("mkl_axis: uint8")
+    .Input("mkl_input_mins:  N * uint8")
+    .Input("mkl_input_maxes: N * uint8")
+    .Output("output: T")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Output("mkl_output: uint8")
+    .Output("mkl_output_min: uint8")
+    .Output("mkl_output_max: uint8")
+    .Attr("N: int >= 2")
+    .Attr("T: type")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .SetShapeFn([](InferenceContext* c) {
+      const int n = (c->num_inputs() / 2 - 1) / 3;
+      TF_RETURN_IF_ERROR(shape_inference::QuantizedConcatV2Shape(c, n));
+      ShapeHandle unused;
+      for (int i = n + 1; i < c->num_inputs() / 2; ++i) {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
+      }
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    });
+}  // namespace tensorflow
+
+#endif

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index c0ec654..b3ab7d5 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt

@@ -652,6 +652,59 @@
   is_stateful: true
 }
 op {
+  name: "AllToAll"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "group_assignment"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BOOL
+      }
+    }
+  }
+  attr {
+    name: "concat_dimension"
+    type: "int"
+  }
+  attr {
+    name: "split_dimension"
+    type: "int"
+  }
+  attr {
+    name: "split_count"
+    type: "int"
+  }
+}
+op {
   name: "Angle"
   input_arg {
     name: "input"
@@ -5142,6 +5195,46 @@
   }
 }
 op {
+  name: "Case"
+  input_arg {
+    name: "branch_index"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "input"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "branches"
+    type: "list(func)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
   name: "Cast"
   input_arg {
     name: "x"
@@ -5398,6 +5491,87 @@
   is_stateful: true
 }
 op {
+  name: "CollectiveGather"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "group_size"
+    type: "int"
+  }
+  attr {
+    name: "group_key"
+    type: "int"
+  }
+  attr {
+    name: "instance_key"
+    type: "int"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  is_stateful: true
+}
+op {
+  name: "CollectivePermute"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "source_target_pairs"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
   name: "CollectiveReduce"
   input_arg {
     name: "input"
@@ -5849,6 +6023,35 @@
   is_stateful: true
 }
 op {
+  name: "ConfigureDistributedTPU"
+  output_arg {
+    name: "topology"
+    type: DT_STRING
+  }
+  attr {
+    name: "embedding_config"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "tpu_embedding_config"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "is_global_init"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
   name: "Conj"
   input_arg {
     name: "input"
@@ -6932,6 +7135,33 @@
   }
 }
 op {
+  name: "CrossReplicaSum"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "group_assignment"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_UINT32
+      }
+    }
+  }
+}
+op {
   name: "CudnnRNN"
   input_arg {
     name: "input"
@@ -8284,6 +8514,7 @@
     has_minimum: true
     minimum: 1
   }
+  is_stateful: true
 }
 op {
   name: "DebugGradientIdentity"
@@ -8841,6 +9072,8 @@
         type: DT_INT16
         type: DT_INT8
         type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
@@ -10159,6 +10392,124 @@
   }
 }
 op {
+  name: "EnqueueTPUEmbeddingIntegerBatch"
+  input_arg {
+    name: "batch"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "mode_override"
+    type: DT_STRING
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "EnqueueTPUEmbeddingSparseBatch"
+  input_arg {
+    name: "sample_indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "embedding_indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "aggregation_weights"
+    type: DT_FLOAT
+    number_attr: "N"
+  }
+  input_arg {
+    name: "mode_override"
+    type: DT_STRING
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "combiners"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "EnqueueTPUEmbeddingSparseTensorBatch"
+  input_arg {
+    name: "sample_indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "embedding_indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "aggregation_weights"
+    type: DT_FLOAT
+    number_attr: "N"
+  }
+  input_arg {
+    name: "mode_override"
+    type: DT_STRING
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "combiners"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "table_ids"
+    type: "list(int)"
+  }
+  is_stateful: true
+}
+op {
   name: "EnsureShape"
   input_arg {
     name: "input"
@@ -10297,6 +10648,66 @@
   }
 }
 op {
+  name: "EuclideanNorm"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
   name: "Exit"
   input_arg {
     name: "data"
@@ -10546,6 +10957,7 @@
     name: "compression_type"
     type: DT_STRING
   }
+  is_stateful: true
 }
 op {
   name: "ExperimentalDenseToSparseBatchDataset"
@@ -14616,6 +15028,108 @@
   }
 }
 op {
+  name: "InfeedDequeue"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  is_stateful: true
+}
+op {
+  name: "InfeedDequeueTuple"
+  output_arg {
+    name: "outputs"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+  }
+  is_stateful: true
+}
+op {
+  name: "InfeedEnqueue"
+  input_arg {
+    name: "input"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+  attr {
+    name: "layout"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "InfeedEnqueueTuple"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+  }
+  attr {
+    name: "layouts"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
   name: "InitializeTable"
   input_arg {
     name: "table_handle"
@@ -15848,6 +16362,690 @@
   is_stateful: true
 }
 op {
+  name: "LoadTPUEmbeddingADAMParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "velocities"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingADAMParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "velocities"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingAdadeltaParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "updates"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "updates"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingAdagradParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingAdagradParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingCenteredRMSPropParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "mg"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingFTRLParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "linears"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingFTRLParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "linears"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingMDLAdagradLightParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "weights"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "benefits"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingMomentumParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingMomentumParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingProximalAdagradParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingRMSPropParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "LoadTPUEmbeddingStochasticGradientDescentParameters"
+  input_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
   name: "Log"
   input_arg {
     name: "x"
@@ -20144,6 +21342,80 @@
   is_stateful: true
 }
 op {
+  name: "OutfeedDequeue"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OutfeedDequeueTuple"
+  output_arg {
+    name: "outputs"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+  }
+  attr {
+    name: "device_ordinal"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OutfeedEnqueue"
+  input_arg {
+    name: "input"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "OutfeedEnqueueTuple"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
   name: "Pack"
   input_arg {
     name: "values"
@@ -25938,6 +27210,25 @@
   is_stateful: true
 }
 op {
+  name: "RecvTPUEmbeddingActivations"
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
+    number_attr: "num_outputs"
+  }
+  attr {
+    name: "num_outputs"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "config"
+    type: "string"
+  }
+  is_stateful: true
+}
+op {
   name: "ReduceDataset"
   input_arg {
     name: "input_dataset"
@@ -25989,6 +27280,7 @@
       b: true
     }
   }
+  is_stateful: true
 }
 op {
   name: "ReduceJoin"
@@ -26484,6 +27776,49 @@
   }
 }
 op {
+  name: "RequantizationRangePerChannel"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "clip_value_max"
+    type: "float"
+  }
+}
+op {
   name: "Requantize"
   input_arg {
     name: "input"
@@ -26545,6 +27880,73 @@
   }
 }
 op {
+  name: "RequantizePerChannel"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "requested_output_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "requested_output_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+}
+op {
   name: "Reshape"
   input_arg {
     name: "tensor"
@@ -29494,6 +30896,690 @@
   is_stateful: true
 }
 op {
+  name: "RetrieveTPUEmbeddingADAMParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "velocities"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "velocities"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingAdadeltaParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "updates"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "updates"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingAdagradParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingAdagradParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingCenteredRMSPropParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "mg"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingFTRLParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "linears"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingFTRLParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "linears"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingMDLAdagradLightParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "weights"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "benefits"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingMomentumParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "momenta"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingProximalAdagradParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "accumulators"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingRMSPropParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "ms"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "mom"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient_accumulators"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
+  name: "RetrieveTPUEmbeddingStochasticGradientDescentParameters"
+  output_arg {
+    name: "parameters"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "table_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+  }
+  attr {
+    name: "shard_id"
+    type: "int"
+  }
+  is_stateful: true
+}
+op {
   name: "Reverse"
   input_arg {
     name: "tensor"
@@ -31519,6 +33605,38 @@
   }
 }
 op {
+  name: "SendTPUEmbeddingGradients"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+    number_attr: "N"
+  }
+  input_arg {
+    name: "learning_rates"
+    type: DT_FLOAT
+    number_attr: "NN"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "NN"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "config"
+    type: "string"
+  }
+  is_stateful: true
+}
+op {
   name: "SerializeIterator"
   input_arg {
     name: "resource_handle"
@@ -31869,6 +33987,10 @@
   }
 }
 op {
+  name: "ShutdownDistributedTPU"
+  is_stateful: true
+}
+op {
   name: "Sigmoid"
   input_arg {
     name: "x"
@@ -35863,14 +37985,6 @@
     default_value {
       type: DT_FLOAT
     }
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_BFLOAT16
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
   }
   attr {
     name: "shape_dtype"
@@ -35878,11 +37992,39 @@
     default_value {
       type: DT_INT64
     }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+  }
+  is_stateful: true
+}
+op {
+  name: "StatefulStandardNormalV2"
+  input_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "algorithm"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "shape"
+    type_attr: "shape_dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    name: "shape_dtype"
+    type: "type"
+    default_value {
+      type: DT_INT64
     }
   }
   is_stateful: true
@@ -37096,6 +39238,299 @@
   is_stateful: true
 }
 op {
+  name: "TPUCompilationResult"
+  output_arg {
+    name: "output"
+    type: DT_STRING
+  }
+}
+op {
+  name: "TPUEmbeddingActivations"
+  input_arg {
+    name: "embedding_variable"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "sliced_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "table_id"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "lookup_id"
+    type: "int"
+    has_minimum: true
+  }
+}
+op {
+  name: "TPUOrdinalSelector"
+  output_arg {
+    name: "device_ordinals"
+    type: DT_INT32
+  }
+  is_stateful: true
+}
+op {
+  name: "TPUPartitionedCall"
+  input_arg {
+    name: "args"
+    type_list_attr: "Tin"
+  }
+  input_arg {
+    name: "device_ordinal"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+}
+op {
+  name: "TPUReplicate"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "Tinputs"
+  }
+  input_arg {
+    name: "broadcast_inputs"
+    type_list_attr: "Tbroadcast_inputs"
+  }
+  input_arg {
+    name: "variables"
+    type: DT_RESOURCE
+    number_attr: "NumVariables"
+  }
+  input_arg {
+    name: "guaranteed_constants"
+    type_list_attr: "Tguaranteed_constants"
+  }
+  output_arg {
+    name: "outputs"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "computation"
+    type: "func"
+  }
+  attr {
+    name: "num_replicas"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_cores_per_replica"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "topology"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_tpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "device_assignment"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "host_compute_core"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "Tinputs"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tbroadcast_inputs"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "NumVariables"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "Tguaranteed_constants"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "padding_map"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "step_marker_location"
+    type: "string"
+    default_value {
+      s: "STEP_MARK_AT_ENTRY"
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "TPUReplicateMetadata"
+  attr {
+    name: "num_replicas"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_cores_per_replica"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "topology"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_tpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "device_assignment"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "computation_shape"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "host_compute_core"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "padding_map"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "step_marker_location"
+    type: "string"
+    default_value {
+      s: "STEP_MARK_AT_ENTRY"
+    }
+  }
+}
+op {
+  name: "TPUReplicatedInput"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "TPUReplicatedOutput"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "outputs"
+    type_attr: "T"
+    number_attr: "num_replicas"
+  }
+  attr {
+    name: "num_replicas"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
   name: "TakeDataset"
   input_arg {
     name: "input_dataset"
@@ -38688,6 +41123,29 @@
   }
 }
 op {
+  name: "TensorListScatterIntoExistingList"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tensor"
+    type_attr: "element_dtype"
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "element_dtype"
+    type: "type"
+  }
+}
+op {
   name: "TensorListScatterV2"
   input_arg {
     name: "tensor"
@@ -39321,6 +41779,33 @@
   }
 }
 op {
+  name: "TridiagonalSolve"
+  input_arg {
+    name: "diagonals"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
   name: "TruncateDiv"
   input_arg {
     name: "x"
@@ -40694,6 +43179,18 @@
   }
 }
 op {
+  name: "WorkerHeartbeat"
+  input_arg {
+    name: "request"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "response"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
   name: "WrapDatasetVariant"
   input_arg {
     name: "input_handle"

diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc
index eff4532..169076a 100644
--- a/tensorflow/core/ops/parsing_ops.cc
+++ b/tensorflow/core/ops/parsing_ops.cc

@@ -26,7 +26,10 @@
 REGISTER_OP("DecodeRaw")
     .Input("bytes: string")
     .Output("output: out_type")
-    .Attr("out_type: {half,float,double,int32,uint16,uint8,int16,int8,int64}")
+    .Attr(
+        "out_type: "
+        "{half,float,double,int32,uint16,uint8,int16,int8,int64,complex64,"
+        "complex128}")
     .Attr("little_endian: bool = true")
     .SetShapeFn([](InferenceContext* c) {
       // Note: last dimension is data dependent.

diff --git a/tensorflow/core/ops/stateful_random_ops.cc b/tensorflow/core/ops/stateful_random_ops.cc
index 4c9277e..2162107 100644
--- a/tensorflow/core/ops/stateful_random_ops.cc
+++ b/tensorflow/core/ops/stateful_random_ops.cc

@@ -19,18 +19,42 @@
 namespace tensorflow {
 
 Status StatefulRandomShape(shape_inference::InferenceContext* c) {
-  shape_inference::ShapeHandle out;
-  TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &out));
+  using shape_inference::ShapeHandle;
+
+  // Check algorithm shape
+  ShapeHandle unused;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+
+  // Set output shape
+  ShapeHandle out;
+  TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(2, &out));
   c->set_output(0, out);
   return Status::OK();
 }
 
+REGISTER_OP("StatefulStandardNormalV2")
+    .Input("resource: resource")
+    .Input("algorithm: int64")
+    .Input("shape: shape_dtype")
+    .Output("output: dtype")
+    .Attr("dtype : type = DT_FLOAT")
+    .Attr("shape_dtype : type = DT_INT64")
+    .SetShapeFn(StatefulRandomShape);
+
+// Register the old 'StatefulStandardNormal' op
 REGISTER_OP("StatefulStandardNormal")
     .Input("resource: resource")
     .Input("shape: shape_dtype")
     .Output("output: dtype")
-    .Attr("dtype: {half,bfloat16,float,double} = DT_FLOAT")
-    .Attr("shape_dtype: {int32, int64} = DT_INT64")
-    .SetShapeFn(StatefulRandomShape);
+    .Attr("dtype : type = DT_FLOAT")
+    .Attr("shape_dtype : type = DT_INT64")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      using shape_inference::ShapeHandle;
+      // Set output shape
+      ShapeHandle out;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &out));
+      c->set_output(0, out);
+      return Status::OK();
+    });
 
 }  // namespace tensorflow

diff --git a/tensorflow/core/ops/tpu_configuration_ops.cc b/tensorflow/core/ops/tpu_configuration_ops.cc
new file mode 100644
index 0000000..febb250
--- /dev/null
+++ b/tensorflow/core/ops/tpu_configuration_ops.cc

@@ -0,0 +1,202 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+// Configuring a distributed TPU system is achieved by running
+// the following Ops:
+//
+// 1 Run _DisconnectHostFromDistributedTPUSystem on the TPU_SYSTEM of each
+// host. This is needed in case the system had previously been configured. It
+// returns, for each host, the number of TPU chips on the host.
+//
+// 2 Run _ConfigureDistributedTPU on TPU_SYSTEM of worker 0. Takes as input the
+// number of chips on each host. Validates that all hosts have the same number
+// of chips, and that the chips are consistent with the topology set by
+// flags. Has a single output which is a proto describing the requested system
+// configuration, which is sent to all hosts.
+//
+// 3 Run _InitializeHostForDistributedTPU on the TPU_SYSTEM of each host, taking
+// as input the output from ConfigureDistributedTPU. Has a single Tensor output
+// which is a vector of int32 indicating, for each TPU on the host, what its
+// global TPU system id is.
+//
+// 4 Run _WaitForDistributedTPU on TPU_SYSTEM, taking as input the
+// outputs from all the _InitializeHostForDistributedTPU
+// Ops. _These partial specs are combined in the Op with the outputs from
+// the host initialization Ops to construct a mapping from full TPU device
+// specs to global TPU ids. Has a single Tensor output which is a
+// matrix of int32 indicating, for each host (outer dimension) and for
+// each TPU on the host (inner dimension) what that TPU's global id
+// is. _WaitForDistributedTPU also waits for the TPU distributed
+// system to initialize fully, which may take several minutes for a
+// large system.
+//
+// 5 Run _SetGlobalTPUArray on the TPU_SYSTEM of each host, taking as input the
+// output from _WaitForDistributedTPU. This Op tells each host the global Id of
+// every TPU on every host.
+//
+// Most user code works by placing the ConfigureDistributedTPU Op on the desired
+// TPU_SYSTEM device, and a graph rewrite replaces it by the subgraph described
+// above.
+//
+//
+// A distributed TPU system can be cleanly shut down by running the following
+// Ops:
+//
+// 1 Run _DisconnectHostFromDistributedTPUSystem on the TPU_SYSTEM of each host.
+//
+// 2 Run _ShutdownDistributedTPU on the TPU_SYSTEM where
+// _ConfigureDistributedTPU was run. The Op will return an error if no system is
+// configured.
+//
+//
+// Most user code works by placing the ShutdownDistributedTPU Op on the desired
+// TPU_SYSTEM device, and a graph rewrite replaces it by the subgraph described
+// above.
+
+REGISTER_OP("_ConfigureDistributedTPU")
+    .Input("inputs: N * int32")
+    .Output("output: string")
+    .Attr("N: int >= 1")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input;
+      // Validate that all the inputs are scalars.
+      for (int i = 0; i < c->num_inputs(); ++i) {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &input));
+      }
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+An op that sets up the centralized structures for a distributed TPU
+system.
+
+inputs: A scalar tensor for each host indicating how many TPU chips
+there are on the host.
+output: A tensor containing a TPUHostConfiguration proto serialized to
+a string, containing the information necessary to initialize the chips
+in a host.
+)doc");
+
+REGISTER_OP("_WaitForDistributedTPU")
+    .Input("inputs: N * int32")
+    .Output("topology: string")
+    .Attr("startup_timeout_sec: int = 20")
+    .Attr("N: int")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input;
+      // Validate that all the inputs have the same vector shape.
+      for (int i = 0; i < c->num_inputs(); ++i) {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 1, &input));
+      }
+      c->set_output(0, c->Scalar());
+      return ::tensorflow::Status::OK();
+    })
+    .Doc(R"doc(
+An op that blocks execution until a distributed TPU system has
+started up. This Op must be run on the same TPU_SYSTEM device as
+_ConfigureDistributedTPU, and takes an inputs the outputs from the
+_InitializeHostForDistributedTPU Ops.
+
+inputs: For each initialized host, a vector giving the global TPU id
+of each TPU on the host.
+topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
+topology.
+startup_timeout_sec: The number of seconds to wait for the TPU system
+to stabilize.
+)doc");
+
+REGISTER_OP("_SetGlobalTPUArray")
+    .Input("topology: string")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &input));
+      return ::tensorflow::Status::OK();
+    })
+    .Doc(R"doc(
+An op that informs a host of the global ids of all the of TPUs in the
+system.
+
+topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
+topology.
+)doc");
+
+REGISTER_OP("_ShutdownDistributedTPU")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+An op that shuts down a running distributed TPU system. The Op returns
+an error if no system is running. This Op must be run on the same
+TPU_SYSTEM device as the corresponding _ConfigureDistributedTPU was run
+to start the system, and must be run only after
+_DisconnectHostFromDistributedTPUSystem has completed on every host in
+the system.
+)doc");
+
+REGISTER_OP("_InitializeHostForDistributedTPU")
+    .Input("input: string")
+    .Output("tpu_ids: int32")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &input));
+      c->set_output(0, c->Vector(c->UnknownDim()));
+      return ::tensorflow::Status::OK();
+    })
+    .Doc(R"doc(
+An op that connects each chip on the host to a centralized UberDriver to allow
+them to operate as a distributed system with chips in other hosts.
+
+input: A string containing the address of the UberDriver to connect to.
+tpu_ids: A vector containing the global TPU id of each TPU on the host.
+)doc");
+
+REGISTER_OP("_DisconnectHostFromDistributedTPUSystem")
+    .Output("number_of_tpu_chips: int32")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+An op that disconnects the TPUs on a host from a running distributed
+TPU system.
+
+number_of_tpu_chips: A scalar tensor containing the number of TPU
+chips on the host.
+)doc");
+
+REGISTER_OP("ConfigureDistributedTPU")
+    .Output("topology: string")
+    .Attr("embedding_config: string = ''")
+    .Attr("tpu_embedding_config: string = ''")
+    .Attr("is_global_init: bool = false")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("ShutdownDistributedTPU")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape);
+
+}  // end namespace tensorflow

diff --git a/tensorflow/core/ops/tpu_cross_replica_ops.cc b/tensorflow/core/ops/tpu_cross_replica_ops.cc
new file mode 100644
index 0000000..c26b49e
--- /dev/null
+++ b/tensorflow/core/ops/tpu_cross_replica_ops.cc

@@ -0,0 +1,88 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+REGISTER_OP("AllToAll")
+    .Input("input: T")
+    .Input("group_assignment: int32")
+    .Output("output: T")
+    .Attr("T: {numbertype, bool}")
+    .Attr("concat_dimension: int")
+    .Attr("split_dimension: int")
+    .Attr("split_count: int")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input = c->input(0);
+      int64 rank;
+      if (c->RankKnown(input)) {
+        rank = c->Rank(input);
+      } else {
+        return errors::InvalidArgument("input's rank is unknown.");
+      }
+      int concat_dimension;
+      int split_dimension;
+
+      TF_RETURN_IF_ERROR(c->GetAttr("concat_dimension", &concat_dimension));
+
+      if (concat_dimension < 0 || concat_dimension >= rank) {
+        return errors::InvalidArgument("concat_dimension ", concat_dimension,
+                                       " is out of range of input rank ", rank);
+      }
+
+      TF_RETURN_IF_ERROR(c->GetAttr("split_dimension", &split_dimension));
+      if (split_dimension < 0 || split_dimension >= rank) {
+        return errors::InvalidArgument("split_dimension ", split_dimension,
+                                       " is out of range of input rank ", rank);
+      }
+
+      std::vector<DimensionHandle> dims;
+      dims.resize(rank);
+
+      for (int32 i = 0; i < rank; ++i) {
+        int64 in_idx = i;
+        if (i == concat_dimension) {
+          in_idx = split_dimension;
+        } else if (i == split_dimension) {
+          in_idx = concat_dimension;
+        }
+
+        dims[i] = c->Dim(input, in_idx);
+      }
+
+      c->set_output(0, c->MakeShape(dims));
+      return Status::OK();
+    });
+
+REGISTER_OP("CrossReplicaSum")
+    .Input("input: T")
+    .Input("group_assignment: int32")
+    .Output("output: T")
+    .Attr("T: {bfloat16, float, int32, uint32}")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
+REGISTER_OP("CollectivePermute")
+    .Input("input: T")
+    .Input("source_target_pairs: int32")
+    .Output("output: T")
+    .Attr("T: numbertype")
+    .SetShapeFn(shape_inference::UnchangedShape);
+}  // namespace tensorflow

diff --git a/tensorflow/core/ops/tpu_embedding_ops.cc b/tensorflow/core/ops/tpu_embedding_ops.cc
new file mode 100644
index 0000000..79ebc09
--- /dev/null
+++ b/tensorflow/core/ops/tpu_embedding_ops.cc

@@ -0,0 +1,430 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/protobuf/tpu/tpu_embedding_configuration.pb.h"
+#include "tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h"
+#include "tensorflow/core/tpu/tpu_embedding_output_layout_utils.h"
+
+namespace tensorflow {
+
+// TPUs use a specialized mechanism for performing embedding lookups,
+// necessitating differences in TF Graphs that use embeddings on TPUs relative
+// to CPUs. Embedding lookups on TPU systems are achieved by including the
+// following in the TF Graph.
+//
+// 0. Construct a TPUEmbeddingConfiguration, specifying the embedding tables
+//    in the model, the size of the TPU system to be used, and the optimizer to
+//    be used for each table. Some of this information is redundant with other
+//    pieces of the TF Graph.
+// 1. Pass this TPUEmbeddingConfiguration to tpu.initialize_system() as the
+//    tpu_embedding_config parameter.
+// 2. Use the LoadTPUEmbedding Ops to initialize the embedding tables in TPU
+//    memories, sharded across the memories attached to each Host.
+// 3. Use EnqueueTPUEmbeddingSparseBatch to provide the TPU with embedding
+//    indices and aggregation weights.
+// 4. RecvTPUEmbeddingActivations returns a list of Tensors, containing the
+//    activations from each table specified in the configuration.
+// 5. TPUEmbeddingActivations, when used with appropriate Python libraries,
+//    enables the automatic differentiation of models that use embeddings.
+// 6. SendTPUEmbeddingGradients takes a list of Tensors (of the same shapes
+//    as those returned by TPUEmbeddingReceiveActivations) containing gradients
+//    to use in updating the embedding tables.
+// 7. Before saving a checkpoint, use the RetrieveTPUEmbedding Ops to update
+//    the Graph's embedding table Variables from the updated tables in the
+//    TPU memories.
+//
+// TPU Embeddings use dedicated ops to enforce Host/TPU consistency in the
+// state of embedding table variables. Before beginning training or inference,
+// the model must Load the optimizer parameters into the TPU memories. Before
+// saving a checkpoint, the model must Retrieve the parameters back into the
+// host CPU memory.
+
+namespace {
+
+void RegisterPerTableLoadAndRetrieveOps();
+
+class RegisterPerTableLoadAndRetrieveOpsOnConstruction {
+ public:
+  RegisterPerTableLoadAndRetrieveOpsOnConstruction() {
+    RegisterPerTableLoadAndRetrieveOps();
+  }
+};
+
+// Object whose constructor does registrations.
+RegisterPerTableLoadAndRetrieveOpsOnConstruction
+    register_per_table_load_and_retrieve_ops_var;
+
+Status RegisterPerTableLoadOpsForAlgorithmBody(
+    tpu::OptimizationAlgorithm alg, bool is_debug_op,
+    OpRegistrationData* op_reg_data) {
+  tpu::GradientAccumulationSupport grad_accum_support;
+  TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
+
+  std::vector<tpu::StateVariableSpecification> state_variable_specs;
+  TF_CHECK_OK(GetOptimizationAlgorithmStateVariables(
+      alg,
+      grad_accum_support == tpu::GradientAccumulationSupport::kSupported &&
+          is_debug_op,
+      &state_variable_specs));
+  auto* op_def = &op_reg_data->op_def;
+  op_def->set_name(
+      strings::StrCat("LoadTPUEmbedding", GetOptimizationAlgorithmName(alg),
+                      "Parameters", (is_debug_op ? "GradAccumDebug" : "")));
+  // It is important for the order of the inputs to the op defined here
+  // to match the order in input_names because the indexes are used in
+  // the combining transformation.
+  for (const auto& parameter : state_variable_specs) {
+    if (parameter.has_user_defined() || is_debug_op) {
+      auto* arg = op_def->add_input_arg();
+      arg->set_name(parameter.name());
+      arg->set_type(DT_FLOAT);
+    }
+  }
+  {
+    auto* table_id_attr = op_def->add_attr();
+    table_id_attr->set_name("table_id");
+    table_id_attr->set_type("int");
+    table_id_attr->set_has_minimum(true);
+    table_id_attr->set_minimum(-1);
+    table_id_attr->mutable_default_value()->set_i(-1);
+  }
+  {
+    auto* table_name_attr = op_def->add_attr();
+    table_name_attr->set_name("table_name");
+    table_name_attr->set_type("string");
+    table_name_attr->mutable_default_value()->set_s("");
+  }
+  {
+    auto* num_shards_attr = op_def->add_attr();
+    num_shards_attr->set_name("num_shards");
+    num_shards_attr->set_type("int");
+  }
+  {
+    auto* shard_id_attr = op_def->add_attr();
+    shard_id_attr->set_name("shard_id");
+    shard_id_attr->set_type("int");
+  }
+  string parameter_descriptions;
+  for (const auto& parameter : state_variable_specs) {
+    if (parameter.has_user_defined() || is_debug_op) {
+      strings::Appendf(&parameter_descriptions,
+                       R"(
+%s: A tensor containing the initial embedding table %s to use in embedding
+lookups using the %s optimization algorithm.)",
+                       parameter.name().c_str(), parameter.name().c_str(),
+                       GetOptimizationAlgorithmFriendlyName(alg).c_str());
+    }
+  }
+  op_def->set_is_commutative(false);
+  op_def->set_is_aggregate(false);
+  op_def->set_is_stateful(true);
+  auto shape_inference_function =
+      [state_variable_specs,
+       is_debug_op](shape_inference::InferenceContext* c) -> Status {
+    int table_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
+    string table_name;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
+    // Exactly one must be non-default.
+    if ((table_id >= 0) == (!table_name.empty())) {
+      return errors::InvalidArgument(
+          "exactly one of table_id or table_name must be non-default");
+    }
+    int num_shards;
+    TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
+    int shard_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("shard_id", &shard_id));
+    const int user_param_count =
+        std::count_if(state_variable_specs.begin(), state_variable_specs.end(),
+                      [&](const tpu::StateVariableSpecification& sv) {
+                        return sv.has_user_defined() || is_debug_op;
+                      });
+    std::vector<shape_inference::ShapeHandle> inputs(user_param_count);
+    int input_index = 0;
+    for (int i = 0; i < state_variable_specs.size(); ++i) {
+      if (state_variable_specs[i].has_user_defined() || is_debug_op) {
+        std::vector<shape_inference::ShapeHandle> input_temp;
+        TF_RETURN_IF_ERROR(
+            c->input(state_variable_specs[i].name(), &input_temp));
+        if (input_temp.size() != 1) {
+          return errors::InvalidArgument("each input to be rank 1");
+        }
+        inputs[input_index] = input_temp[0];
+        ++input_index;
+      }
+    }
+    // Verify shapes have rank 2 and are compatible when they are
+    // required to be valid.
+    shape_inference::ShapeHandle parameter_shape;
+    TF_RETURN_IF_ERROR(c->WithRank(inputs[0], 2, &parameter_shape));
+    for (int j = 1; j < user_param_count; ++j) {
+      shape_inference::ShapeHandle accumulator_j_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(inputs[j], 2, &accumulator_j_shape));
+      shape_inference::ShapeHandle merged;
+      TF_RETURN_IF_ERROR(
+          c->Merge(parameter_shape, accumulator_j_shape, &merged));
+    }
+    return Status::OK();
+  };
+  op_reg_data->shape_inference_fn = shape_inference_function;
+  return Status::OK();
+}
+
+Status RegisterPerTableRetrieveOpsForAlgorithmBody(
+    tpu::OptimizationAlgorithm alg, bool is_debug_op,
+    OpRegistrationData* op_reg_data) {
+  tpu::GradientAccumulationSupport grad_accum_support;
+  TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
+
+  std::vector<tpu::StateVariableSpecification> state_variable_specs;
+  TF_CHECK_OK(GetOptimizationAlgorithmStateVariables(
+      alg,
+      grad_accum_support == tpu::GradientAccumulationSupport::kSupported &&
+          is_debug_op,
+      &state_variable_specs));
+
+  auto* op_def = &op_reg_data->op_def;
+  op_def->set_name(strings::StrCat(
+      "RetrieveTPUEmbedding", tpu::GetOptimizationAlgorithmName(alg),
+      "Parameters", (is_debug_op ? "GradAccumDebug" : "")));
+  // It is important for the order of the outputs of the op defined here
+  // to match the order in output_names because the indexes are used in
+  // the combining transformation.
+  for (const auto& parameter : state_variable_specs) {
+    if (parameter.has_user_defined() || is_debug_op) {
+      auto* arg = op_def->add_output_arg();
+      arg->set_name(parameter.name());
+      arg->set_type(DT_FLOAT);
+    }
+  }
+  {
+    auto* table_id_attr = op_def->add_attr();
+    table_id_attr->set_name("table_id");
+    table_id_attr->set_type("int");
+    table_id_attr->set_has_minimum(true);
+    table_id_attr->set_minimum(-1);
+    table_id_attr->mutable_default_value()->set_i(-1);
+  }
+  {
+    auto* table_name_attr = op_def->add_attr();
+    table_name_attr->set_name("table_name");
+    table_name_attr->set_type("string");
+    table_name_attr->mutable_default_value()->set_s("");
+  }
+  {
+    auto* num_shards_attr = op_def->add_attr();
+    num_shards_attr->set_name("num_shards");
+    num_shards_attr->set_type("int");
+  }
+  {
+    auto* shard_id_attr = op_def->add_attr();
+    shard_id_attr->set_name("shard_id");
+    shard_id_attr->set_type("int");
+  }
+  string parameter_descriptions;
+  for (const auto& param : state_variable_specs) {
+    if (param.has_user_defined() || is_debug_op) {
+      strings::Appendf(&parameter_descriptions,
+                       R"(
+%s: A tensor containing the embedding table %s to store with the
+parameters from embedding updates using the %s optimization algorithm.)",
+                       param.name().c_str(), param.name().c_str(),
+                       tpu::GetOptimizationAlgorithmFriendlyName(alg).c_str());
+    }
+  }
+  op_def->set_is_commutative(false);
+  op_def->set_is_aggregate(false);
+  op_def->set_is_stateful(true);
+  auto shape_inference_function =
+      [state_variable_specs,
+       is_debug_op](shape_inference::InferenceContext* c) -> Status {
+    int table_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
+    string table_name;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
+    // Exactly one must be non-default.
+    if ((table_id >= 0) == (!table_name.empty())) {
+      return errors::InvalidArgument(
+          "exactly one of table_id or table_name must be non-default");
+    }
+    int num_shards;
+    TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
+    int shard_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("shard_id", &shard_id));
+    for (int j = 0; j < state_variable_specs.size(); ++j) {
+      if (state_variable_specs[j].has_user_defined() || is_debug_op) {
+        auto shape = c->MakeShape(
+            std::vector<shape_inference::DimensionHandle>(2, c->UnknownDim()));
+        TF_RETURN_IF_ERROR(
+            c->set_output(state_variable_specs[j].name(),
+                          std::vector<shape_inference::ShapeHandle>(1, shape)));
+      }
+    }
+    return Status::OK();
+  };
+  op_reg_data->shape_inference_fn = shape_inference_function;
+  return Status::OK();
+}
+
+void RegisterPerTableLoadAndRetrieveOps() {
+  // Load ops
+  for (tpu::OptimizationAlgorithm alg : tpu::GetOptimizationAlgorithms()) {
+    OpRegistry::Global()->Register(
+        [alg](OpRegistrationData* op_reg_data) -> Status {
+          return RegisterPerTableLoadOpsForAlgorithmBody(alg, false,
+                                                         op_reg_data);
+        });
+    tpu::GradientAccumulationSupport grad_accum_support;
+    TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
+    if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
+      OpRegistry::Global()->Register(
+          [alg](OpRegistrationData* op_reg_data) -> Status {
+            return RegisterPerTableLoadOpsForAlgorithmBody(alg, true,
+                                                           op_reg_data);
+          });
+    }
+  }
+  // Retrieve ops
+  for (tpu::OptimizationAlgorithm alg : tpu::GetOptimizationAlgorithms()) {
+    OpRegistry::Global()->Register(
+        [alg](OpRegistrationData* op_reg_data) -> Status {
+          return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, false,
+                                                             op_reg_data);
+        });
+    tpu::GradientAccumulationSupport grad_accum_support;
+    TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
+    if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
+      OpRegistry::Global()->Register(
+          [alg](OpRegistrationData* op_reg_data) -> Status {
+            return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, true,
+                                                               op_reg_data);
+          });
+    }
+  }
+}
+
+}  // namespace
+
+REGISTER_OP("RecvTPUEmbeddingActivations")
+    .Output("outputs: num_outputs * float32")
+    .Attr("num_outputs: int >= 1")
+    .Attr("config: string")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext* c) -> Status {
+      string config_string;
+      TF_RETURN_IF_ERROR(c->GetAttr("config", &config_string));
+      tpu::TPUEmbeddingConfiguration config;
+      if (!config.ParseFromString(config_string)) {
+        return errors::InvalidArgument("Malformed tpu_embedding_config.");
+      }
+      tpu::AddDefaultEmbeddingOutputLayoutIfNeeded(&config);
+      std::vector<TensorShapeProto> output_shapes;
+      TF_RETURN_IF_ERROR(ComputeOutputTensorShapes(config, &output_shapes));
+      if (c->num_outputs() != output_shapes.size()) {
+        return errors::InvalidArgument("num outputs != size of output shapes");
+      }
+      for (int i = 0; i < c->num_outputs(); ++i) {
+        shape_inference::ShapeHandle output_shape;
+        TF_RETURN_IF_ERROR(
+            c->MakeShapeFromShapeProto(output_shapes[i], &output_shape));
+        c->set_output(i, output_shape);
+      }
+      return Status::OK();
+    });
+
+REGISTER_OP("TPUEmbeddingActivations")
+    .Input("embedding_variable: float32")
+    .Input("sliced_activations: float32")
+    .Output("output: float32")
+    .Attr("table_id: int >= 0")
+    .Attr("lookup_id: int >= 0")
+    .SetShapeFn([](shape_inference::InferenceContext *c) {
+      c->set_output(0, c->input(1));
+      return Status::OK();
+    });
+
+REGISTER_OP("SendTPUEmbeddingGradients")
+    .Input("inputs: N * float32")
+    .Input("learning_rates: NN * float32")
+    .Attr("N: int >= 1")
+    .Attr("NN: int >= 0 = 0")
+    .Attr("config: string")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext* c) -> Status {
+      int nn;
+      TF_RETURN_IF_ERROR(c->GetAttr("NN", &nn));
+      std::vector<shape_inference::ShapeHandle> learning_rates;
+      TF_RETURN_IF_ERROR(c->input("learning_rates", &learning_rates));
+      for (int i = 0; i < nn; ++i) {
+        // Verify that each learning_rates element is scalar
+        shape_inference::ShapeHandle learning_rates_shape;
+        TF_RETURN_IF_ERROR(
+            c->WithRank(learning_rates[i], 0, &learning_rates_shape));
+      }
+
+      return Status::OK();
+    });
+
+REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch")
+    .Input("batch: N * int32")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
+    .Attr("device_ordinal: int = -1")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("EnqueueTPUEmbeddingSparseBatch")
+    .Input("sample_indices: N * int32")
+    .Input("embedding_indices: N * int32")
+    .Input("aggregation_weights: N * float32")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
+    .Attr("device_ordinal: int = -1")
+    .Attr("combiners: list(string) = []")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext* c) -> Status {
+      std::vector<string> combiners;
+      TF_RETURN_IF_ERROR(c->GetAttr("combiners", &combiners));
+      int n;
+      TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
+      if (!combiners.empty() && combiners.size() != n) {
+        return errors::InvalidArgument("Invalid length of combiners. Have ",
+                                       combiners.size(), " but expected 0 or ",
+                                       n);
+      }
+
+      return Status::OK();
+    });
+
+REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
+    .Input("sample_indices: N * int32")
+    .Input("embedding_indices: N * int32")
+    .Input("aggregation_weights: N * float32")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
+    .Attr("device_ordinal: int = -1")
+    .Attr("combiners: list(string) = []")
+    .Attr("table_ids: list(int)")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape);
+
+}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/ops/functional_ops.cc b/tensorflow/core/ops/tpu_functional_ops.cc
similarity index 100%
rename from tensorflow/contrib/tpu/ops/functional_ops.cc
rename to tensorflow/core/ops/tpu_functional_ops.cc


diff --git a/tensorflow/core/ops/tpu_heartbeat_ops.cc b/tensorflow/core/ops/tpu_heartbeat_ops.cc
new file mode 100644
index 0000000..660aa32
--- /dev/null
+++ b/tensorflow/core/ops/tpu_heartbeat_ops.cc

@@ -0,0 +1,28 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+REGISTER_OP("WorkerHeartbeat")
+    .Input("request: string")
+    .Output("response: string")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::ScalarShape);
+
+}  // namespace tensorflow

diff --git a/tensorflow/contrib/tpu/ops/host_compute_ops.cc b/tensorflow/core/ops/tpu_host_compute_ops.cc
similarity index 100%
rename from tensorflow/contrib/tpu/ops/host_compute_ops.cc
rename to tensorflow/core/ops/tpu_host_compute_ops.cc


diff --git a/tensorflow/core/ops/tpu_infeed_ops.cc b/tensorflow/core/ops/tpu_infeed_ops.cc
new file mode 100644
index 0000000..0090b76
--- /dev/null
+++ b/tensorflow/core/ops/tpu_infeed_ops.cc

@@ -0,0 +1,66 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+REGISTER_OP("InfeedDequeue")
+    .Output("output: dtype")
+    .Attr("dtype: type")
+    .Attr("shape: shape")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::ExplicitShape);
+
+REGISTER_OP("InfeedEnqueue")
+    .Input("input: dtype")
+    .Attr("dtype: type")
+    .Attr("shape: shape = {}")
+    .Attr("layout: list(int) = []")
+    .Attr("device_ordinal: int = -1")
+    .SetShapeFn(shape_inference::NoOutputs)
+    .SetIsStateful();
+
+REGISTER_OP("InfeedEnqueueTuple")
+    .Input("inputs: dtypes")
+    .Attr("dtypes: list(type)")
+    .Attr("shapes: list(shape)")
+    .Attr("layouts: list(int) = []")
+    .Attr("device_ordinal: int = -1")
+    .SetShapeFn(shape_inference::NoOutputs)
+    .SetIsStateful();
+
+REGISTER_OP("InfeedDequeueTuple")
+    .Output("outputs: dtypes")
+    .Attr("dtypes: list(type)")
+    .Attr("shapes: list(shape)")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      std::vector<PartialTensorShape> shapes;
+      TF_RETURN_IF_ERROR(c->GetAttr("shapes", &shapes));
+      for (int i = 0; i < shapes.size(); ++i) {
+        ShapeHandle out;
+        TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(shapes[i], &out));
+        c->set_output(i, out);
+      }
+      return Status::OK();
+    });
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/ops/tpu_ordinal_selector_ops.cc b/tensorflow/core/ops/tpu_ordinal_selector_ops.cc
new file mode 100644
index 0000000..72f24c5
--- /dev/null
+++ b/tensorflow/core/ops/tpu_ordinal_selector_ops.cc

@@ -0,0 +1,30 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+REGISTER_OP("TPUOrdinalSelector")
+    .Output("device_ordinals: int32")
+    .SetIsStateful()
+    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
+      c->set_output(0,
+                    c->Vector(shape_inference::InferenceContext::kUnknownDim));
+      return Status::OK();
+    });
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/ops/tpu_outfeed_ops.cc b/tensorflow/core/ops/tpu_outfeed_ops.cc
new file mode 100644
index 0000000..e170ed0
--- /dev/null
+++ b/tensorflow/core/ops/tpu_outfeed_ops.cc

@@ -0,0 +1,68 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+REGISTER_OP("OutfeedEnqueue")
+    .Input("input: dtype")
+    .Attr("dtype: type")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::NoOutputs);
+
+REGISTER_OP("OutfeedEnqueueTuple")
+    .Input("inputs: dtypes")
+    .Attr("dtypes: list(type)")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::NoOutputs);
+
+REGISTER_OP("OutfeedDequeue")
+    .Output("output: dtype")
+    .Attr("dtype: type")
+    .Attr("shape: shape")
+    .Attr("device_ordinal: int = -1")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::ExplicitShape);
+
+REGISTER_OP("OutfeedDequeueTuple")
+    .Output("outputs: dtypes")
+    .Attr("dtypes: list(type)")
+    .Attr("shapes: list(shape)")
+    .Attr("device_ordinal: int = -1")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      std::vector<PartialTensorShape> shapes;
+      std::vector<DataType> dtypes;
+      TF_RETURN_IF_ERROR(c->GetAttr("shapes", &shapes));
+      TF_RETURN_IF_ERROR(c->GetAttr("dtypes", &dtypes));
+      if (shapes.size() != dtypes.size()) {
+        return errors::InvalidArgument(
+            "Incorrect number of output shapes specified");
+      }
+      for (int i = 0; i < shapes.size(); ++i) {
+        ShapeHandle out;
+        TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(shapes[i], &out));
+        c->set_output(i, out);
+      }
+      return Status::OK();
+    });
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/ops/tpu_replication_ops.cc b/tensorflow/core/ops/tpu_replication_ops.cc
new file mode 100644
index 0000000..7c89492
--- /dev/null
+++ b/tensorflow/core/ops/tpu_replication_ops.cc

@@ -0,0 +1,113 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+REGISTER_OP("TPUReplicateMetadata")
+    .Attr("num_replicas: int >= 0")
+    .Attr("num_cores_per_replica: int = 1")
+    .Attr("topology: string = \"\"")
+    .Attr("use_tpu: bool = true")
+    .Attr("device_assignment: list(int) = []")
+    // Deprecated. Use num_cores_per_replica instead.
+    .Attr("computation_shape: list(int) = []")
+    .Attr("host_compute_core: list(string) = []")
+    .Attr("padding_map: list(string) = []")
+    .Attr("step_marker_location: string = \"STEP_MARK_AT_ENTRY\"")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("TPUReplicatedInput")
+    .Input("inputs: N * T")
+    .Output("output: T")
+    .Attr("N: int >= 1")
+    .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle cur = c->input(c->num_inputs() - 1);
+      for (int i = c->num_inputs() - 2; i >= 0; --i) {
+        TF_RETURN_WITH_CONTEXT_IF_ERROR(c->Merge(c->input(i), cur, &cur),
+                                        "From merging shape ", i,
+                                        " with other shapes.");
+      }
+      c->set_output(0, cur);
+
+      // If this is a resource, unify the resource shapes.
+      DataType dtype;
+      TF_RETURN_IF_ERROR(c->GetAttr("T", &dtype));
+      if (dtype == DT_RESOURCE) {
+        const std::vector<shape_inference::ShapeAndType>* shapes_and_types =
+            nullptr;
+        for (int i = c->num_inputs() - 1; i >= 0; --i) {
+          if (shapes_and_types) {
+            // The return value of MergeInputHandleShapesAndTypes indicates
+            // the shape was refined, not that there was an error.
+            // TODO(phawkins): there seems to be no way to discover errors.
+            (void)c->MergeInputHandleShapesAndTypes(i, *shapes_and_types);
+          } else {
+            shapes_and_types = c->input_handle_shapes_and_types(i);
+          }
+        }
+        if (shapes_and_types) {
+          c->set_output_handle_shapes_and_types(0, *shapes_and_types);
+        }
+      }
+      return Status::OK();
+    });
+
+REGISTER_OP("TPUReplicatedOutput")
+    .Input("input: T")
+    .Output("outputs: num_replicas * T")
+    .Attr("num_replicas: int >= 1")
+    .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      for (int i = 0; i < c->num_outputs(); ++i) {
+        c->set_output(i, c->input(0));
+      }
+      return Status::OK();
+    });
+
+REGISTER_OP("TPUCompilationResult")
+    .Output("output: string")
+    .SetShapeFn(shape_inference::ScalarShape);
+
+REGISTER_OP("TPUReplicate")
+    .Attr("computation: func")
+    .Attr("num_replicas: int >= 1")
+    .Attr("num_cores_per_replica: int = 1")
+    .Attr("topology: string = \"\"")
+    .Attr("use_tpu: bool = true")
+    .Attr("device_assignment: list(int) = []")
+    .Attr("host_compute_core: list(string) = []")
+    .Attr("Tinputs: list(type) >= 0")
+    .Attr("Tbroadcast_inputs: list(type) >= 0")
+    .Attr("NumVariables: int >= 0")
+    .Attr("Tguaranteed_constants: list(type) >= 0")
+    .Attr("output_types: list(type) >= 0")
+    .Attr("padding_map: list(string) = []")
+    .Attr("step_marker_location: string = \"STEP_MARK_AT_ENTRY\"")
+    .Input("inputs: Tinputs")
+    .Input("broadcast_inputs: Tbroadcast_inputs")
+    .Input("variables: NumVariables * resource")
+    .Input("guaranteed_constants: Tguaranteed_constants")
+    .Output("outputs: output_types")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD
index 647a797..40a808b 100644
--- a/tensorflow/core/platform/cloud/BUILD
+++ b/tensorflow/core/platform/cloud/BUILD

@@ -151,6 +151,7 @@
         ":retrying_utils",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
         "@jsoncpp_git//:jsoncpp",
     ],
 )

diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index d1e5779..10b57df 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc

@@ -64,7 +64,7 @@
 // The HTTP response code "308 Resume Incomplete".
 constexpr uint64 HTTP_CODE_RESUME_INCOMPLETE = 308;
 // The environment variable that overrides the size of the readahead buffer.
-ABSL_DEPRECATED("Use GCS_BLOCK_SIZE_MB instead.")
+ABSL_DEPRECATED("Use GCS_READ_CACHE_BLOCK_SIZE_MB instead.")
 constexpr char kReadaheadBufferSize[] = "GCS_READAHEAD_BUFFER_SIZE_BYTES";
 // The environment variable that disables the GCS block cache for reads.
 // This is the explicit alternative to setting BLOCK_SIZE or MAX_SIZE to 0, and
@@ -73,11 +73,11 @@
 // The environment variable that overrides the block size for aligned reads from
 // GCS. Specified in MB (e.g. "16" = 16 x 1024 x 1024 = 16777216 bytes).
 constexpr char kBlockSize[] = "GCS_READ_CACHE_BLOCK_SIZE_MB";
-constexpr size_t kDefaultBlockSize = 128 * 1024 * 1024;
+constexpr size_t kDefaultBlockSize = 16 * 1024 * 1024;
 // The environment variable that overrides the max size of the LRU cache of
 // blocks read from GCS. Specified in MB.
 constexpr char kMaxCacheSize[] = "GCS_READ_CACHE_MAX_SIZE_MB";
-constexpr size_t kDefaultMaxCacheSize = 2 * kDefaultBlockSize;
+constexpr size_t kDefaultMaxCacheSize = kDefaultBlockSize;
 // The environment variable that overrides the maximum staleness of cached file
 // contents. Once any block of a file reaches this staleness, all cached blocks
 // will be evicted on the next read.

diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index dc9eb77..a998f8e 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc

@@ -3155,8 +3155,8 @@
 TEST(GcsFileSystemTest, OverrideCacheParameters) {
   // Verify defaults are propagated correctly.
   GcsFileSystem fs1;
-  EXPECT_EQ(128 * 1024 * 1024, fs1.block_size());
-  EXPECT_EQ(2 * fs1.block_size(), fs1.max_bytes());
+  EXPECT_EQ(16 * 1024 * 1024, fs1.block_size());
+  EXPECT_EQ(fs1.block_size(), fs1.max_bytes());
   EXPECT_EQ(0, fs1.max_staleness());
   EXPECT_EQ(120, fs1.timeouts().connect);
   EXPECT_EQ(60, fs1.timeouts().idle);

diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc
index e154007..e91a9f8 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider.cc

@@ -22,6 +22,7 @@
 #endif
 #include <fstream>
 #include <utility>
+#include "absl/strings/match.h"
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -44,6 +45,11 @@
 // The environment variable which can override '~/.config/gcloud' if set.
 constexpr char kCloudSdkConfig[] = "CLOUDSDK_CONFIG";
 
+// The environment variable used to skip attempting to fetch GCE credentials:
+// setting this to 'true' (case insensitive) will skip attempting to contact
+// the GCE metadata service.
+constexpr char kNoGceCheck[] = "NO_GCE_CHECK";
+
 // The default path to the gcloud config folder, relative to the home folder.
 constexpr char kGCloudConfigFolder[] = ".config/gcloud/";
 
@@ -146,10 +152,25 @@
   }
 
   auto token_from_files_status = GetTokenFromFiles();
-  auto token_from_gce_status =
-      token_from_files_status.ok() ? Status::OK() : GetTokenFromGce();
+  if (token_from_files_status.ok()) {
+    *t = current_token_;
+    return Status::OK();
+  }
 
-  if (token_from_files_status.ok() || token_from_gce_status.ok()) {
+  char* no_gce_check_var = std::getenv(kNoGceCheck);
+  bool skip_gce_check = no_gce_check_var != nullptr &&
+                        absl::EqualsIgnoreCase(no_gce_check_var, "true");
+  Status token_from_gce_status;
+  if (skip_gce_check) {
+    token_from_gce_status =
+        Status(error::CANCELLED,
+               strings::StrCat("GCE check skipped due to presence of $",
+                               kNoGceCheck, " environment variable."));
+  } else {
+    token_from_gce_status = GetTokenFromGce();
+  }
+
+  if (token_from_gce_status.ok()) {
     *t = current_token_;
     return Status::OK();
   }
@@ -165,8 +186,13 @@
   // so return an empty token instead of failing.
   *t = "";
 
-  // From now on, always return the empty token.
-  expiration_timestamp_sec_ = UINT64_MAX;
+  // We only want to keep returning our empty token if we've tried and failed
+  // the (potentially slow) task of detecting GCE.
+  if (skip_gce_check) {
+    expiration_timestamp_sec_ = 0;
+  } else {
+    expiration_timestamp_sec_ = UINT64_MAX;
+  }
   current_token_ = "";
 
   return Status::OK();

diff --git a/tensorflow/core/platform/cloud/google_auth_provider.h b/tensorflow/core/platform/cloud/google_auth_provider.h
index 3755b12..4ab816d 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.h
+++ b/tensorflow/core/platform/cloud/google_auth_provider.h

@@ -51,7 +51,7 @@
   /// Gets the bearer token from Google Compute Engine environment.
   Status GetTokenFromGce() EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
-  /// Gets the bearer token from the systen env variable, for testing purposes.
+  /// Gets the bearer token from the system env variable, for testing purposes.
   Status GetTokenForTesting() EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
   std::unique_ptr<OAuthClient> oauth_client_;

diff --git a/tensorflow/core/platform/cloud/google_auth_provider_test.cc b/tensorflow/core/platform/cloud/google_auth_provider_test.cc
index ec31c5e..d2db592 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider_test.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider_test.cc

@@ -69,9 +69,10 @@
   void TearDown() override { ClearEnvVars(); }
 
   void ClearEnvVars() {
-    unsetenv("GOOGLE_APPLICATION_CREDENTIALS");
     unsetenv("CLOUDSDK_CONFIG");
+    unsetenv("GOOGLE_APPLICATION_CREDENTIALS");
     unsetenv("GOOGLE_AUTH_TOKEN_FOR_TESTING");
+    unsetenv("NO_GCE_CHECK");
   }
 };
 
@@ -238,4 +239,31 @@
   EXPECT_EQ("", token);
 }
 
+TEST_F(GoogleAuthProviderTest, NoGceCheckEnvironmentVariable) {
+  setenv("NO_GCE_CHECK", "True", 1);
+  auto oauth_client = new FakeOAuthClient;
+
+  FakeEnv env;
+  // If the env var above isn't respected, attempting to fetch a token
+  // from GCE will segfault (as the metadata client is null).
+  GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
+                              nullptr, &env);
+
+  string token;
+  TF_EXPECT_OK(provider.GetToken(&token));
+  EXPECT_EQ("", token);
+
+  // We confirm that our env var is case insensitive.
+  setenv("NO_GCE_CHECK", "true", 1);
+  TF_EXPECT_OK(provider.GetToken(&token));
+  EXPECT_EQ("", token);
+
+  // We also want to confirm that our empty token has a short expiration set: we
+  // now set a testing token, and confirm that it's returned instead of our
+  // empty token.
+  setenv("GOOGLE_AUTH_TOKEN_FOR_TESTING", "newToken", 1);
+  TF_EXPECT_OK(provider.GetToken(&token));
+  EXPECT_EQ("newToken", token);
+}
+
 }  // namespace tensorflow

diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index f9e3e1d..f9ac4ff 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl

@@ -530,19 +530,13 @@
     return [
         "platform/default/integral_types.h",
         "platform/default/logging.h",
-        "platform/default/protobuf.h",
     ] + if_windows([
         "platform/windows/integral_types.h",
     ])
 
-def tf_additional_proto_compiler_hdrs():
-    return [
-        "platform/default/protobuf_compiler.h",
-    ]
-
 def tf_additional_proto_srcs():
     return [
-        "platform/default/protobuf.cc",
+        "platform/protobuf.cc",
     ]
 
 def tf_additional_human_readable_json_deps():
@@ -560,6 +554,15 @@
         otherwise = ["//tensorflow/core:protos_all_cc"],
     )
 
+def tf_profiler_all_protos():
+    return ["//tensorflow/core/profiler:protos_all"]
+
+def tf_grpc_service_all():
+    return [
+        "//tensorflow/core/profiler:profiler_analysis_proto_cc",
+        "//tensorflow/core/profiler:profiler_service_proto_cc",
+    ]
+
 def tf_protos_grappler_impl():
     return ["//tensorflow/core/grappler/costs:op_performance_data_cc_impl"]
 

diff --git a/tensorflow/core/platform/default/device_tracer.cc b/tensorflow/core/platform/default/device_tracer.cc
index 0d1bf39..ffcb38f 100644
--- a/tensorflow/core/platform/default/device_tracer.cc
+++ b/tensorflow/core/platform/default/device_tracer.cc

@@ -445,6 +445,7 @@
     : cupti_manager_(cupti_manager) {
   VLOG(1) << "DeviceTracer created.";
   cupti_wrapper_.reset(new perftools::gputools::profiler::CuptiWrapper());
+  host_tracer_ = profiler::cpu::HostTracer::Create(2);
   enabled_ = false;
 }
 
@@ -509,7 +510,6 @@
 
   CUPTI_CALL(GetTimestamp(&start_timestamp_));
   start_walltime_us_ = NowInUsec();
-  host_tracer_ = profiler::cpu::HostTracer::Create(2);
   host_tracer_->Start().IgnoreError();
   enabled_ = true;
   return Status::OK();

diff --git a/tensorflow/core/platform/default/gpu/cupti_wrapper.cc b/tensorflow/core/platform/default/gpu/cupti_wrapper.cc
index 7ac5e5c..481bbf9 100644
--- a/tensorflow/core/platform/default/gpu/cupti_wrapper.cc
+++ b/tensorflow/core/platform/default/gpu/cupti_wrapper.cc

@@ -28,27 +28,26 @@
 
 namespace dynload {
 
-#define LIBCUPTI_WRAP(__name)                                                 \
-  struct DynLoadShim__##__name {                                              \
-    static const char* kName;                                                 \
-    using FuncPointerT = std::add_pointer<decltype(::__name)>::type;          \
-    static void* GetDsoHandle() {                                             \
-      static auto status =                                                    \
-          stream_executor::internal::CachedDsoLoader::GetLibcuptiDsoHandle(); \
-      return status.ValueOrDie();                                             \
-    }                                                                         \
-    static FuncPointerT DynLoad() {                                           \
-      static void* f;                                                         \
-      TF_CHECK_OK(::tensorflow::Env::Default()->GetSymbolFromLibrary(         \
-          GetDsoHandle(), kName, &f))                                         \
-          << "could not find " << kName << "in libcupti DSO";                 \
-      return reinterpret_cast<FuncPointerT>(f);                               \
-    }                                                                         \
-    template <typename... Args>                                               \
-    CUptiResult operator()(Args... args) {                                    \
-      return DynLoad()(args...);                                              \
-    }                                                                         \
-  } __name;                                                                   \
+#define LIBCUPTI_WRAP(__name)                                                \
+  struct DynLoadShim__##__name {                                             \
+    static const char* kName;                                                \
+    using FuncPointerT = std::add_pointer<decltype(::__name)>::type;         \
+    template <typename... Args>                                              \
+    CUptiResult operator()(Args... args) {                                   \
+      static auto fn = []() -> FuncPointerT {                                \
+        auto handle_or =                                                     \
+            stream_executor::internal::CachedDsoLoader::GetCuptiDsoHandle(); \
+        if (!handle_or.ok()) return nullptr;                                 \
+        void* symbol;                                                        \
+        stream_executor::port::Env::Default()                                \
+            ->GetSymbolFromLibrary(handle_or.ValueOrDie(), kName, &symbol)   \
+            .IgnoreError();                                                  \
+        return reinterpret_cast<FuncPointerT>(symbol);                       \
+      }();                                                                   \
+      if (fn == nullptr) return CUPTI_ERROR_UNKNOWN;                         \
+      return fn(args...);                                                    \
+    }                                                                        \
+  } __name;                                                                  \
   const char* DynLoadShim__##__name::kName = #__name;
 
 LIBCUPTI_WRAP(cuptiActivityDisable);

diff --git a/tensorflow/core/platform/default/human_readable_json.cc b/tensorflow/core/platform/default/human_readable_json.cc
index bf9c7b7..977ff12 100644
--- a/tensorflow/core/platform/default/human_readable_json.cc
+++ b/tensorflow/core/platform/default/human_readable_json.cc

@@ -46,7 +46,7 @@
   return errors::Internal("Cannot parse JSON protos on Android");
 #else
   proto->Clear();
-  auto status = google::protobuf::util::JsonStringToMessage(str, proto);
+  auto status = protobuf::util::JsonStringToMessage(str, proto);
   if (!status.ok()) {
     // Convert error_msg google::protobuf::StringPiece to
     // tensorflow::StringPiece.

diff --git a/tensorflow/core/platform/default/platform.bzl b/tensorflow/core/platform/default/platform.bzl
index 20ab441..76bfaa8 100644
--- a/tensorflow/core/platform/default/platform.bzl
+++ b/tensorflow/core/platform/default/platform.bzl

@@ -5,55 +5,52 @@
 PLATFORM = ""
 
 def cuda_sdk_version():
-  return CUDA_VERSION
+    return CUDA_VERSION
 
 def cudnn_sdk_version():
-  return CUDNN_VERSION
+    return CUDNN_VERSION
 
 def cuda_library_path(name, version = cuda_sdk_version()):
-  if PLATFORM == "Darwin":
-    if not version:
-      return "lib/lib{}.dylib".format(name)
+    if PLATFORM == "Darwin":
+        if not version:
+            return "lib/lib{}.dylib".format(name)
+        else:
+            return "lib/lib{}.{}.dylib".format(name, version)
+    elif not version:
+        return "lib64/lib{}.so".format(name)
     else:
-      return "lib/lib{}.{}.dylib".format(name, version)
-  else:
-    if not version:
-      return "lib64/lib{}.so".format(name)
-    else:
-      return "lib64/lib{}.so.{}".format(name, version)
+        return "lib64/lib{}.so.{}".format(name, version)
 
 def cuda_static_library_path(name):
-  if PLATFORM == "Darwin":
-    return "lib/lib{}_static.a".format(name)
-  else:
-    return "lib64/lib{}_static.a".format(name)
+    if PLATFORM == "Darwin":
+        return "lib/lib{}_static.a".format(name)
+    else:
+        return "lib64/lib{}_static.a".format(name)
 
 def cudnn_library_path(version = cudnn_sdk_version()):
-  if PLATFORM == "Darwin":
-    if not version:
-      return "lib/libcudnn.dylib"
+    if PLATFORM == "Darwin":
+        if not version:
+            return "lib/libcudnn.dylib"
+        else:
+            return "lib/libcudnn.{}.dylib".format(version)
+    elif not version:
+        return "lib64/libcudnn.so"
     else:
-      return "lib/libcudnn.{}.dylib".format(version)
-  else:
-    if not version:
-      return "lib64/libcudnn.so"
-    else:
-      return "lib64/libcudnn.so.{}".format(version)
+        return "lib64/libcudnn.so.{}".format(version)
 
 def cupti_library_path(version = cuda_sdk_version()):
-  if PLATFORM == "Darwin":
-    if not version:
-      return "extras/CUPTI/lib/libcupti.dylib"
+    if PLATFORM == "Darwin":
+        if not version:
+            return "extras/CUPTI/lib/libcupti.dylib"
+        else:
+            return "extras/CUPTI/lib/libcupti.{}.dylib".format(version)
+    elif not version:
+        return "extras/CUPTI/lib64/libcupti.so"
     else:
-      return "extras/CUPTI/lib/libcupti.{}.dylib".format(version)
-  else:
-    if not version:
-      return "extras/CUPTI/lib64/libcupti.so"
-    else:
-      return "extras/CUPTI/lib64/libcupti.so.{}".format(version)
+        return "extras/CUPTI/lib64/libcupti.so.{}".format(version)
 
 def readlink_command():
-  if PLATFORM == "Darwin":
-    return "greadlink"
-  else:
-    return "readlink"
+    if PLATFORM == "Darwin":
+        return "greadlink"
+    else:
+        return "readlink"

diff --git a/tensorflow/core/platform/default/protobuf.cc b/tensorflow/core/platform/default/protobuf.cc
deleted file mode 100644
index 548d583..0000000
--- a/tensorflow/core/platform/default/protobuf.cc
+++ /dev/null

@@ -1,23 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/platform/default/protobuf.h"
-
-namespace tensorflow {
-
-const char* kProtobufInt64Typename = "::google::protobuf::int64";
-const char* kProtobufUint64Typename = "::google::protobuf::uint64";
-
-}  // namespace tensorflow

diff --git a/tensorflow/core/platform/default/protobuf.h b/tensorflow/core/platform/default/protobuf.h
deleted file mode 100644
index aeef2d9..0000000
--- a/tensorflow/core/platform/default/protobuf.h
+++ /dev/null

@@ -1,47 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_H_
-#define TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_H_
-
-// IWYU pragma: private, include "third_party/tensorflow/core/platform/protobuf.h"
-// IWYU pragma: friend third_party/tensorflow/core/platform/protobuf.h
-
-#ifndef TENSORFLOW_LITE_PROTOS
-#include "google/protobuf/descriptor.h"
-#include "google/protobuf/descriptor.pb.h"
-#include "google/protobuf/dynamic_message.h"
-#include "google/protobuf/io/tokenizer.h"
-#include "google/protobuf/text_format.h"
-#include "google/protobuf/util/json_util.h"
-#include "google/protobuf/util/type_resolver_util.h"
-#endif
-
-#include "google/protobuf/arena.h"
-#include "google/protobuf/io/coded_stream.h"
-#include "google/protobuf/io/zero_copy_stream.h"
-#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
-#include "google/protobuf/map.h"
-#include "google/protobuf/repeated_field.h"
-
-namespace tensorflow {
-namespace protobuf = ::google::protobuf;
-using protobuf_int64 = ::google::protobuf::int64;
-using protobuf_uint64 = ::google::protobuf::uint64;
-extern const char* kProtobufInt64Typename;
-extern const char* kProtobufUint64Typename;
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_H_

diff --git a/tensorflow/core/platform/default/protobuf_compiler.h b/tensorflow/core/platform/default/protobuf_compiler.h
deleted file mode 100644
index a93d7a1..0000000
--- a/tensorflow/core/platform/default/protobuf_compiler.h
+++ /dev/null

@@ -1,25 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_COMPILER_H_
-#define TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_COMPILER_H_
-
-// IWYU pragma: private, include "third_party/tensorflow/core/platform/protobuf_compiler.h"
-// IWYU pragma: friend third_party/tensorflow/core/platform/protobuf_compiler.h
-
-#include "google/protobuf/compiler/importer.h"
-#include "tensorflow/core/platform/default/protobuf.h"
-
-#endif  // TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_H_

diff --git a/tensorflow/core/platform/default/string_coding.cc b/tensorflow/core/platform/default/string_coding.cc
deleted file mode 100644
index 7410ee6..0000000
--- a/tensorflow/core/platform/default/string_coding.cc
+++ /dev/null

@@ -1,30 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/platform/default/string_coding.h"
-
-namespace tensorflow {
-namespace port {
-
-std::unique_ptr<StringListEncoder> NewStringListEncoder(string* out) {
-  return std::unique_ptr<StringListEncoder>(new StringListEncoder(out));
-}
-
-std::unique_ptr<StringListDecoder> NewStringListDecoder(const string& in) {
-  return std::unique_ptr<StringListDecoder>(new StringListDecoder(in));
-}
-
-}  // namespace port
-}  // namespace tensorflow

diff --git a/tensorflow/core/platform/default/string_coding.h b/tensorflow/core/platform/default/string_coding.h
deleted file mode 100644
index 70b8ab0..0000000
--- a/tensorflow/core/platform/default/string_coding.h
+++ /dev/null

@@ -1,98 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_STRING_CODING_H_
-#define TENSORFLOW_CORE_PLATFORM_DEFAULT_STRING_CODING_H_
-
-// IWYU pragma: private, include "third_party/tensorflow/core/platform/tensor_coding.h"
-// IWYU pragma: friend third_party/tensorflow/core/platform/tensor_coding.h
-
-#include "tensorflow/core/lib/core/coding.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/platform/protobuf.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-namespace port {
-
-// Encodes sequences of strings and serialized protocol buffers into a string.
-// Normal usage consists of zero or more calls to Append() and a single call to
-// Finalize().
-class StringListEncoder {
- public:
-  explicit StringListEncoder(string* out) : out_(out) {}
-
-  // Encodes the given protocol buffer. This may not be called after Finalize().
-  void Append(const protobuf::MessageLite& m) {
-    core::PutVarint32(out_, m.ByteSize());
-    m.AppendToString(&rest_);
-  }
-
-  // Encodes the given string. This may not be called after Finalize().
-  void Append(const string& s) {
-    core::PutVarint32(out_, s.length());
-    strings::StrAppend(&rest_, s);
-  }
-
-  // Signals end of the encoding process. No other calls are allowed after this.
-  void Finalize() { strings::StrAppend(out_, rest_); }
-
- private:
-  string* out_;
-  string rest_;
-};
-
-// Decodes a string into sequences of strings (which may represent serialized
-// protocol buffers). Normal usage involves a single call to ReadSizes() in
-// order to retrieve the length of all the strings in the sequence. For each
-// size returned a call to Data() is expected and will return the actual
-// string.
-class StringListDecoder {
- public:
-  explicit StringListDecoder(const string& in) : reader_(in) {}
-
-  // Populates the given vector with the lengths of each string in the sequence
-  // being decoded. Upon returning the vector is guaranteed to contain as many
-  // elements as there are strings in the sequence.
-  bool ReadSizes(std::vector<uint32>* sizes) {
-    int64 total = 0;
-    for (auto& size : *sizes) {
-      if (!core::GetVarint32(&reader_, &size)) return false;
-      total += size;
-    }
-    if (total != static_cast<int64>(reader_.size())) {
-      return false;
-    }
-    return true;
-  }
-
-  // Returns a pointer to the next string in the sequence, then prepares for the
-  // next call by advancing 'size' characters in the sequence.
-  const char* Data(uint32 size) {
-    const char* data = reader_.data();
-    reader_.remove_prefix(size);
-    return data;
-  }
-
- private:
-  StringPiece reader_;
-};
-
-std::unique_ptr<StringListEncoder> NewStringListEncoder(string* out);
-std::unique_ptr<StringListDecoder> NewStringListDecoder(const string& in);
-
-}  // namespace port
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_PLATFORM_DEFAULT_STRING_CODING_H_

diff --git a/tensorflow/core/platform/grpc_services.h b/tensorflow/core/platform/grpc_services.h
new file mode 100644
index 0000000..cd91819
--- /dev/null
+++ b/tensorflow/core/platform/grpc_services.h

@@ -0,0 +1,33 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_PLATFORM_GRPC_SERVICES_H_
+#define TENSORFLOW_CORE_PLATFORM_GRPC_SERVICES_H_
+
+#include "tensorflow/core/profiler/profiler_analysis.grpc.pb.h"
+#include "tensorflow/core/profiler/profiler_service.grpc.pb.h"
+
+#if !defined(PLATFORM_GOOGLE)
+
+namespace tensorflow {
+namespace grpc {
+
+// Google internal GRPC generates services under namespace "tensorflow::grpc".
+// Creating aliases here to make sure we can access services under namespace
+// "tensorflow::grpc" both in google internal and open-source.
+using ::tensorflow::ProfileAnalysis;
+using ::tensorflow::ProfilerService;
+
+}  // namespace grpc
+}  // namespace tensorflow
+#endif
+
+#endif  // TENSORFLOW_CORE_PLATFORM_GRPC_SERVICES_H_

diff --git a/tensorflow/core/platform/posix/env.cc b/tensorflow/core/platform/posix/env.cc
index b02b1f3..f2dff5a 100644
--- a/tensorflow/core/platform/posix/env.cc
+++ b/tensorflow/core/platform/posix/env.cc

@@ -102,7 +102,7 @@
   }
 
   bool GetCurrentThreadName(string* name) override {
-#ifdef __ANDROID__
+#if defined(__ANDROID__) || defined(__EMSCRIPTEN__)
     return false;
 #else
     char buf[100];

diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index ea6066a..807e008 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc

@@ -82,7 +82,9 @@
 }
 
 int GetCurrentCPU() {
-#if defined(__linux__) && !defined(__ANDROID__)
+#if defined(__EMSCRIPTEN__)
+  return sched_getcpu();
+#elif defined(__linux__) && !defined(__ANDROID__)
   return sched_getcpu();
   // Attempt to use cpuid on all other platforms.  If that fails, perform a
   // syscall.

diff --git a/tensorflow/core/platform/protobuf.cc b/tensorflow/core/platform/protobuf.cc
new file mode 100644
index 0000000..c9e6f3b
--- /dev/null
+++ b/tensorflow/core/platform/protobuf.cc

@@ -0,0 +1,23 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/protobuf.h"
+
+namespace tensorflow {
+
+const char* kProtobufInt64Typename = "::tensorflow::protobuf_int64";
+const char* kProtobufUint64Typename = "::tensorflow::protobuf_uint64";
+
+}  // namespace tensorflow

diff --git a/tensorflow/core/platform/protobuf.h b/tensorflow/core/platform/protobuf.h
index fcbf1fc..59f4129 100644
--- a/tensorflow/core/platform/protobuf.h
+++ b/tensorflow/core/platform/protobuf.h

@@ -25,13 +25,31 @@
 // TensorFlow code should use the ::tensorflow::protobuf namespace to
 // refer to all protobuf APIs.
 
-#if defined(PLATFORM_GOOGLE) && !defined(USE_DEFAULT_PROTOBUF)
-#include "tensorflow/core/platform/google/protobuf.h"
-#else
-#include "tensorflow/core/platform/default/protobuf.h"
+#ifndef TENSORFLOW_LITE_PROTOS
+#include "google/protobuf/io/tokenizer.h"
+#include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/dynamic_message.h"
+#include "google/protobuf/text_format.h"
+#include "google/protobuf/util/json_util.h"
+#include "google/protobuf/util/type_resolver_util.h"
 #endif
 
+#include "google/protobuf/io/coded_stream.h"
+#include "google/protobuf/io/zero_copy_stream.h"
+#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
+#include "google/protobuf/arena.h"
+#include "google/protobuf/map.h"
+#include "google/protobuf/repeated_field.h"
+
 namespace tensorflow {
+
+namespace protobuf = ::google::protobuf;
+using protobuf_int64 = ::google::protobuf::int64;
+using protobuf_uint64 = ::google::protobuf::uint64;
+extern const char* kProtobufInt64Typename;
+extern const char* kProtobufUint64Typename;
+
 // Parses a protocol buffer contained in a string in the binary wire format.
 // Returns true on success. Note: Unlike protobuf's builtin ParseFromString,
 // this function has no size restrictions on the total size of the encoded
@@ -47,9 +65,20 @@
 // Set <dest> to <src>. Swapping is allowed, as <src> does not need to be
 // preserved.
 inline void SetProtobufStringSwapAllowed(string* src, string* dest) {
-  dest->swap(*src);
+  *dest = std::move(*src);
 }
 
+#if defined(TENSORFLOW_PROTOBUF_USES_CORD)
+// These versions of ProtobufStringToString and SetProtobufString get used by
+// tools/proto_text's generated code.  They have the same name as the versions
+// in core/platform/protobuf.h, so the generation code doesn't need to determine
+// if the type is Cord or string at generation time.
+inline string ProtobufStringToString(const Cord& s) { return s.ToString(); }
+inline void SetProtobufStringSwapAllowed(string* src, Cord* dest) {
+  dest->CopyFrom(*src);
+}
+#endif  // defined(TENSORFLOW_PROTOBUF_USES_CORD)
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_PLATFORM_PROTOBUF_H_

diff --git a/tensorflow/core/platform/protobuf_compiler.h b/tensorflow/core/platform/protobuf_compiler.h
index 29679e0..916637d 100644
--- a/tensorflow/core/platform/protobuf_compiler.h
+++ b/tensorflow/core/platform/protobuf_compiler.h

@@ -16,10 +16,6 @@
 #ifndef TENSORFLOW_PLATFORM_PROTOBUF_COMPILER_H_
 #define TENSORFLOW_PLATFORM_PROTOBUF_COMPILER_H_
 
-#if defined(PLATFORM_GOOGLE) && !defined(USE_DEFAULT_PROTOBUF)
-#include "tensorflow/core/platform/google/protobuf_compiler.h"
-#else
-#include "tensorflow/core/platform/default/protobuf_compiler.h"
-#endif
+#include "google/protobuf/compiler/importer.h"
 
 #endif  // TENSORFLOW_PLATFORM_PROTOBUF_COMPILER_H_

diff --git a/tensorflow/core/platform/s3/aws_logging.cc b/tensorflow/core/platform/s3/aws_logging.cc
index 44317f1..dac5690 100644
--- a/tensorflow/core/platform/s3/aws_logging.cc
+++ b/tensorflow/core/platform/s3/aws_logging.cc

@@ -69,12 +69,32 @@
 }
 
 namespace {
+
+// Taken from tensorflow/core/platform/default/logging.cc
+int ParseInteger(const char* str, size_t size) {
+  string integer_str(str, size);
+  std::istringstream ss(integer_str);
+  int level = 0;
+  ss >> level;
+  return level;
+}
+
+// Taken from tensorflow/core/platform/default/logging.cc
+int64 LogLevelStrToInt(const char* tf_env_var_val) {
+  if (tf_env_var_val == nullptr) {
+    return 0;
+  }
+  return ParseInteger(tf_env_var_val, strlen(tf_env_var_val));
+}
+
 static const char* kAWSLoggingTag = "AWSLogging";
 
 Aws::Utils::Logging::LogLevel ParseLogLevelFromEnv() {
   Aws::Utils::Logging::LogLevel log_level = Aws::Utils::Logging::LogLevel::Info;
 
-  const int64_t level = tensorflow::internal::MinLogLevelFromEnv();
+  const int64_t level = getenv("AWS_LOG_LEVEL")
+                            ? LogLevelStrToInt(getenv("AWS_LOG_LEVEL"))
+                            : tensorflow::internal::MinLogLevelFromEnv();
 
   switch (level) {
     case INFO:

diff --git a/tensorflow/core/platform/tensor_coding.cc b/tensorflow/core/platform/tensor_coding.cc
index 84601de..3280802 100644
--- a/tensorflow/core/platform/tensor_coding.cc
+++ b/tensorflow/core/platform/tensor_coding.cc

@@ -19,6 +19,12 @@
 
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/protobuf.h"
+
+#if defined(TENSORFLOW_PROTOBUF_USES_CORD)
+#include "strings/cord_varint.h"
+#endif  // defined(TENSORFLOW_PROTOBUF_USES_CORD)
 
 namespace tensorflow {
 namespace port {
@@ -66,5 +72,174 @@
   s->assign(base, bytes);
 }
 
+class StringListEncoderImpl : public StringListEncoder {
+ public:
+  explicit StringListEncoderImpl(string* out) : out_(out) {}
+  ~StringListEncoderImpl() override = default;
+
+  void Append(const protobuf::MessageLite& m) override {
+    core::PutVarint32(out_, m.ByteSizeLong());
+    tensorflow::string serialized_message;
+    m.AppendToString(&serialized_message);
+    strings::StrAppend(&rest_, serialized_message);
+  }
+
+  void Append(const string& s) override {
+    core::PutVarint32(out_, s.length());
+    strings::StrAppend(&rest_, s);
+  }
+
+  void Finalize() override { strings::StrAppend(out_, rest_); }
+
+ private:
+  string* out_;
+  string rest_;
+};
+
+class StringListDecoderImpl : public StringListDecoder {
+ public:
+  explicit StringListDecoderImpl(const string& in) : reader_(in) {}
+  ~StringListDecoderImpl() override = default;
+
+  bool ReadSizes(std::vector<uint32>* sizes) override {
+    int64 total = 0;
+    for (auto& size : *sizes) {
+      if (!core::GetVarint32(&reader_, &size)) return false;
+      total += size;
+    }
+    if (total != static_cast<int64>(reader_.size())) {
+      return false;
+    }
+    return true;
+  }
+
+  const char* Data(uint32 size) override {
+    const char* data = reader_.data();
+    reader_.remove_prefix(size);
+    return data;
+  }
+
+ private:
+  StringPiece reader_;
+};
+
+std::unique_ptr<StringListEncoder> NewStringListEncoder(string* out) {
+  return std::unique_ptr<StringListEncoder>(new StringListEncoderImpl(out));
+}
+
+std::unique_ptr<StringListDecoder> NewStringListDecoder(const string& in) {
+  return std::unique_ptr<StringListDecoder>(new StringListDecoderImpl(in));
+}
+
+#if defined(TENSORFLOW_PROTOBUF_USES_CORD)
+void AssignRefCounted(StringPiece src, core::RefCounted* obj, Cord* out) {
+  obj->Ref();
+  out->Clear();
+  // Defines a lambda to unref "obj" when Cord deletes this piece of
+  // memory. +[] converts the lambda to a C style function pointer.
+  auto cleanup = +[](absl::string_view donotcare, void* obj) {
+    reinterpret_cast<core::RefCounted*>(obj)->Unref();
+  };
+  out->AppendExternalMemory(absl::string_view(src.data(), src.size()), obj,
+                            cleanup);
+}
+
+void EncodeStringList(const string* strings, int64 n, Cord* out) {
+  out->Clear();
+  for (int i = 0; i < n; ++i) {
+    ::strings::CordAppendVarint(strings[i].size(), out);
+  }
+  for (int i = 0; i < n; ++i) {
+    out->Append(strings[i]);
+  }
+}
+
+bool DecodeStringList(const Cord& src, string* strings, int64 n) {
+  std::vector<uint32> sizes(n);
+  CordReader reader(src);
+  int64 tot = 0;
+  for (auto& v : sizes) {
+    if (!::strings::CordReaderReadVarint(&reader, &v)) return false;
+    tot += v;
+  }
+  if (tot != reader.Available()) {
+    return false;
+  }
+  string* data = strings;
+  for (int i = 0; i < n; ++i, ++data) {
+    auto size = sizes[i];
+    if (size > reader.Available()) {
+      return false;
+    }
+    gtl::STLStringResizeUninitialized(data, size);
+    reader.ReadN(size, gtl::string_as_array(data));
+  }
+  return true;
+}
+
+void CopyFromArray(Cord* c, const char* base, size_t bytes) {
+  c->CopyFrom(base, bytes);
+}
+
+class CordStringListEncoderImpl : public StringListEncoder {
+ public:
+  explicit CordStringListEncoderImpl(Cord* out) : out_(out) {}
+  ~CordStringListEncoderImpl() override = default;
+
+  void Append(const protobuf::MessageLite& m) override {
+    ::strings::CordAppendVarint(m.ByteSizeLong(), out_);
+    m.AppendToString(&rest_);
+  }
+
+  void Append(const string& s) override {
+    ::strings::CordAppendVarint(s.length(), out_);
+    rest_.append(s.data(), s.size());
+  }
+
+  void Finalize() override { out_->Append(rest_); }
+
+ private:
+  Cord* out_;
+  string rest_;
+};
+
+class CordStringListDecoderImpl : public StringListDecoder {
+ public:
+  explicit CordStringListDecoderImpl(const Cord& in) : reader_(in) {}
+  ~CordStringListDecoderImpl() override = default;
+
+  bool ReadSizes(std::vector<uint32>* sizes) override {
+    int64 total = 0;
+    for (auto& size : *sizes) {
+      if (!::strings::CordReaderReadVarint(&reader_, &size)) return false;
+      total += size;
+    }
+    if (total != static_cast<int64>(reader_.Available())) {
+      return false;
+    }
+    return true;
+  }
+
+  const char* Data(uint32 size) override {
+    tmp_.resize(size);
+    reader_.ReadN(size, tmp_.data());
+    return tmp_.data();
+  }
+
+ private:
+  CordReader reader_;
+  std::vector<char> tmp_;
+};
+
+std::unique_ptr<StringListEncoder> NewStringListEncoder(Cord* out) {
+  return std::unique_ptr<StringListEncoder>(new CordStringListEncoderImpl(out));
+}
+
+std::unique_ptr<StringListDecoder> NewStringListDecoder(const Cord& in) {
+  return std::unique_ptr<StringListDecoder>(new CordStringListDecoderImpl(in));
+}
+
+#endif  // defined(TENSORFLOW_PROTOBUF_USES_CORD)
+
 }  // namespace port
 }  // namespace tensorflow

diff --git a/tensorflow/core/platform/tensor_coding.h b/tensorflow/core/platform/tensor_coding.h
index 6c6d758..7f6075d 100644
--- a/tensorflow/core/platform/tensor_coding.h
+++ b/tensorflow/core/platform/tensor_coding.h

@@ -21,14 +21,9 @@
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/platform.h"
+#include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/types.h"
 
-#ifdef PLATFORM_GOOGLE
-#include "tensorflow/core/platform/google/cord_coding.h"
-#else
-#include "tensorflow/core/platform/default/string_coding.h"
-#endif
-
 namespace tensorflow {
 namespace port {
 
@@ -52,6 +47,68 @@
 // Assigns base[0..bytes-1] to *s
 void CopyFromArray(string* s, const char* base, size_t bytes);
 
+// Encodes sequences of strings and serialized protocol buffers into a string.
+// Normal usage consists of zero or more calls to Append() and a single call to
+// Finalize().
+class StringListEncoder {
+ public:
+  virtual ~StringListEncoder() = default;
+
+  // Encodes the given protocol buffer. This may not be called after Finalize().
+  virtual void Append(const protobuf::MessageLite& m) = 0;
+
+  // Encodes the given string. This may not be called after Finalize().
+  virtual void Append(const string& s) = 0;
+
+  // Signals end of the encoding process. No other calls are allowed after this.
+  virtual void Finalize() = 0;
+};
+
+// Decodes a string into sequences of strings (which may represent serialized
+// protocol buffers). Normal usage involves a single call to ReadSizes() in
+// order to retrieve the length of all the strings in the sequence. For each
+// size returned a call to Data() is expected and will return the actual
+// string.
+class StringListDecoder {
+ public:
+  virtual ~StringListDecoder() = default;
+
+  // Populates the given vector with the lengths of each string in the sequence
+  // being decoded. Upon returning the vector is guaranteed to contain as many
+  // elements as there are strings in the sequence.
+  virtual bool ReadSizes(std::vector<uint32>* sizes) = 0;
+
+  // Returns a pointer to the next string in the sequence, then prepares for the
+  // next call by advancing 'size' characters in the sequence.
+  virtual const char* Data(uint32 size) = 0;
+};
+
+std::unique_ptr<StringListEncoder> NewStringListEncoder(string* out);
+std::unique_ptr<StringListDecoder> NewStringListDecoder(const string& in);
+
+#if defined(TENSORFLOW_PROTOBUF_USES_CORD)
+// Store src contents in *out.  If backing memory for src is shared with *out,
+// will ref obj during the call and will arrange to unref obj when no
+// longer needed.
+void AssignRefCounted(StringPiece src, core::RefCounted* obj, Cord* out);
+
+// TODO(kmensah): Macro guard this with a check for Cord support.
+inline void CopyToArray(const Cord& src, char* dst) { src.CopyToArray(dst); }
+
+// Store encoding of strings[0..n-1] in *out.
+void EncodeStringList(const string* strings, int64 n, Cord* out);
+
+// Decode n strings from src and store in strings[0..n-1].
+// Returns true if successful, false on parse error.
+bool DecodeStringList(const Cord& src, string* strings, int64 n);
+
+// Assigns base[0..bytes-1] to *c
+void CopyFromArray(Cord* c, const char* base, size_t bytes);
+
+std::unique_ptr<StringListEncoder> NewStringListEncoder(Cord* out);
+std::unique_ptr<StringListDecoder> NewStringListDecoder(const Cord& in);
+#endif  // defined(TENSORFLOW_PROTOBUF_USES_CORD)
+
 }  // namespace port
 }  // namespace tensorflow
 

diff --git a/tensorflow/core/profiler/BUILD b/tensorflow/core/profiler/BUILD
index 2bf3712..4efc15b 100644
--- a/tensorflow/core/profiler/BUILD
+++ b/tensorflow/core/profiler/BUILD

@@ -5,6 +5,7 @@
 load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
 load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library")
 load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_all_protos")
+load("//tensorflow/core:platform/default/build_config.bzl", "tf_profiler_all_protos")
 
 tf_cc_binary(
     name = "profiler",
@@ -37,8 +38,34 @@
 )
 
 tf_proto_library(
+    name = "profiler_service_proto",
+    srcs = ["profiler_service.proto"],
+    has_services = 1,
+    cc_api_version = 2,
+    cc_grpc_version = 1,
+    protodeps = tf_profiler_all_protos() + tf_additional_all_protos(),
+    visibility = ["//visibility:public"],
+)
+
+tf_proto_library(
+    name = "profiler_analysis_proto",
+    srcs = ["profiler_analysis.proto"],
+    has_services = 1,
+    cc_api_version = 2,
+    cc_grpc_version = 1,
+    protodeps = [":profiler_service_proto"] + tf_additional_all_protos(),
+    visibility = ["//visibility:public"],
+)
+
+tf_proto_library(
     name = "protos_all",
-    srcs = glob(["**/*.proto"]),
+    srcs = glob(
+        ["**/*.proto"],
+        exclude = [
+            "profiler_service.proto",
+            "profiler_analysis.proto",
+        ],
+    ),
     cc_api_version = 2,
     protodeps = tf_additional_all_protos(),
     visibility = ["//visibility:public"],

diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD
index 30aca58..da3039a 100644
--- a/tensorflow/core/profiler/internal/BUILD
+++ b/tensorflow/core/profiler/internal/BUILD

@@ -393,7 +393,6 @@
         ":traceme_recorder",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/synchronization",
-        "@com_google_absl//absl/time",
         "@com_google_googletest//:gtest_main",
     ],
 )

diff --git a/tensorflow/core/profiler/internal/traceme_recorder_test.cc b/tensorflow/core/profiler/internal/traceme_recorder_test.cc
index bea3f7c..ec588af 100644
--- a/tensorflow/core/profiler/internal/traceme_recorder_test.cc
+++ b/tensorflow/core/profiler/internal/traceme_recorder_test.cc

@@ -18,9 +18,8 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include "absl/synchronization/notification.h"
-#include "absl/time/clock.h"
-#include "absl/time/time.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/env_time.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -32,7 +31,7 @@
 constexpr static uint64 kNanosInSec = 1000000000;
 
 TEST(RecorderTest, SingleThreaded) {
-  uint64 start_time = static_cast<uint64>(absl::GetCurrentTimeNanos());
+  uint64 start_time = Env::Default()->NowNanos();
   uint64 end_time = start_time + kNanosInSec;
 
   TraceMeRecorder::Record({1, "before", start_time, end_time});
@@ -48,7 +47,7 @@
 }
 
 TEST(RecorderTest, CollectionBeforeStop) {
-  uint64 start_time = static_cast<uint64>(absl::GetCurrentTimeNanos());
+  uint64 start_time = Env::Default()->NowNanos();
   uint64 end_time = start_time + kNanosInSec;
 
   TraceMeRecorder::Record({1, "ignored", start_time, end_time});
@@ -73,8 +72,8 @@
 }
 
 void SpinNanos(int nanos) {
-  uint64 deadline = absl::GetCurrentTimeNanos() + nanos;
-  while (absl::GetCurrentTimeNanos() < deadline) {
+  uint64 deadline = Env::Default()->NowNanos() + nanos;
+  while (Env::Default()->NowNanos() < deadline) {
   }
 }
 
@@ -103,7 +102,7 @@
       uint64 j = 0;
       bool was_active = false;
       auto record_event = [&j, i]() {
-        uint64 start_time = static_cast<uint64>(absl::GetCurrentTimeNanos());
+        uint64 start_time = Env::Default()->NowNanos();
         uint64 end_time = start_time + kNanosInSec;
         TraceMeRecorder::Record({/*activity_id=*/j++,
                                  /*name=*/strings::StrCat(i), start_time,
@@ -154,7 +153,7 @@
   // Wait while all the threads are spun up.
   while (thread_count.load(std::memory_order_relaxed) < kNumThreads) {
     LOG(INFO) << "Waiting for all threads to spin up...";
-    absl::SleepFor(absl::Milliseconds(1));
+    Env::Default()->SleepForMicroseconds(1 * EnvTime::kMillisToMicros);
   }
 
   // We will probably be done after two iterations (with each thread getting
@@ -165,7 +164,7 @@
   for (int iters = 0; iters < kMaxIters && !done(); ++iters) {
     LOG(INFO) << "Looping until convergence, iteration: " << iters;
     TraceMeRecorder::Start(/*level=*/1);
-    absl::SleepFor(absl::Milliseconds(100));
+    Env::Default()->SleepForMicroseconds(100 * EnvTime::kMillisToMicros);
     auto results = TraceMeRecorder::Stop();
     for (const auto& thread : results) {
       if (thread.events.empty()) continue;
@@ -194,7 +193,7 @@
         }
       }
     }
-    absl::SleepFor(absl::Milliseconds(1));
+    Env::Default()->SleepForMicroseconds(1 * EnvTime::kMillisToMicros);
   }
   stop.Notify();
 

diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD
index 697076c..f078099 100644
--- a/tensorflow/core/profiler/lib/BUILD
+++ b/tensorflow/core/profiler/lib/BUILD

@@ -23,10 +23,10 @@
     visibility = ["//tensorflow:internal"],
     deps = [
         "//tensorflow/core/common_runtime/eager:context",
-        "//tensorflow/contrib/tpu/profiler:trace_events_proto_cc",
         "//tensorflow/core/profiler/internal/gpu:tracer",
         "//tensorflow/core/profiler/internal/runtime:eager_profiler",
         "//tensorflow/core/profiler/internal:profiler_interface",
+        "//tensorflow/core/profiler:protos_all_cc",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
@@ -53,7 +53,6 @@
         "//tensorflow/core:lib",
         "//tensorflow/core/profiler/internal:traceme_recorder",
         "@com_google_absl//absl/strings",
-        "@com_google_absl//absl/time",
         "@com_google_absl//absl/types:optional",
     ],
 )

diff --git a/tensorflow/core/profiler/lib/profiler_session.cc b/tensorflow/core/profiler/lib/profiler_session.cc
index 1eb9ed6..1910ea8 100644
--- a/tensorflow/core/profiler/lib/profiler_session.cc
+++ b/tensorflow/core/profiler/lib/profiler_session.cc

@@ -15,21 +15,28 @@
 
 #include "tensorflow/core/profiler/lib/profiler_session.h"
 #include <string>
-#include "tensorflow/contrib/tpu/profiler/trace_events.pb.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/internal/gpu/tracer.h"
 #include "tensorflow/core/profiler/internal/runtime/eager_profiler.h"
+#include "tensorflow/core/profiler/trace_events.pb.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 
 namespace {
 
+// Track whether there's an active ProfilerSession.
+// Prevents another ProfilerSession from creating ProfilerInterface(s), as they
+// use singletons that do not allow concurrent profiling request (e.g.,
+// DeviceTracer).
+std::atomic<bool> session_active = ATOMIC_VAR_INIT(false);
+
 void ConvertRunMetadataToTraceEvent(RunMetadata* run_metadata,
-                                    tpu::Trace* trace,
+                                    profiler::Trace* trace,
                                     const uint64 profile_start_time_micros) {
   auto trace_devices = trace->mutable_devices();
   // TODO(fishx): use a lighter representation instead of GraphDef to insert
@@ -40,15 +47,15 @@
     // Create device
     auto* device_stats =
         run_metadata->mutable_step_stats()->mutable_dev_stats(device_id);
-    tensorflow::tpu::Device device;
+    profiler::Device device;
     device.set_name(device_stats->device());
     device.set_device_id(device_id);
-    tensorflow::tpu::Resource resource;
+    profiler::Resource resource;
     resource.set_name("0");
     resource.set_resource_id(0);
     (*device.mutable_resources())[0] = resource;
     for (const auto& thread_name : device_stats->thread_names()) {
-      tensorflow::tpu::Resource resource;
+      profiler::Resource resource;
       resource.set_resource_id(thread_name.first);
       resource.set_name(thread_name.second);
       (*device.mutable_resources())[thread_name.first] = resource;
@@ -58,6 +65,9 @@
     // Emit events.
     for (auto node :
          run_metadata->step_stats().dev_stats(device_id).node_stats()) {
+      if (node.all_start_micros() < profile_start_time_micros) {
+        continue;
+      }
       auto* event = trace->add_trace_events();
       auto* args = event->mutable_args();
       event->set_device_id(device_id);
@@ -82,7 +92,7 @@
 }  // namespace
 
 /*static*/ std::unique_ptr<ProfilerSession> ProfilerSession::Create(
-    EagerContext* const context) {
+    ProfilerContext* const context) {
   return absl::WrapUnique(new ProfilerSession(context));
 }
 
@@ -102,7 +112,13 @@
     profiler->CollectData(&run_metadata).IgnoreError();
   }
 
-  tpu::Trace trace;
+  if (active_) {
+    // Allow another session to start.
+    session_active.store(false);
+    active_ = false;
+  }
+
+  profiler::Trace trace;
 
   ConvertRunMetadataToTraceEvent(&run_metadata, &trace, start_time_micros_);
 
@@ -110,13 +126,20 @@
   return Status::OK();
 }
 
-ProfilerSession::ProfilerSession(EagerContext* const context)
-    : start_time_micros_(Env::Default()->NowNanos() / EnvTime::kMicrosToNanos) {
+ProfilerSession::ProfilerSession(ProfilerContext* const context)
+    : active_(!session_active.exchange(true)),
+      start_time_micros_(Env::Default()->NowNanos() / EnvTime::kMicrosToNanos) {
+  if (!active_) {
+    status_ = tensorflow::Status(tensorflow::error::Code::UNAVAILABLE,
+                                 "Another profiling session is active.");
+    return;
+  }
+
   LOG(INFO) << "Profile Session started.";
 
-  if (context != nullptr) {
-    profilers_.push_back(
-        tensorflow::profiler::runtime::EagerProfiler::Create(context));
+  if (context->eager_context != nullptr) {
+    profilers_.push_back(tensorflow::profiler::runtime::EagerProfiler::Create(
+        context->eager_context));
   }
   profilers_.push_back(tensorflow::profiler::gpu::Tracer::Create());
 
@@ -131,6 +154,11 @@
   for (auto& profiler : profilers_) {
     profiler->Stop().IgnoreError();
   }
+
+  if (active_) {
+    // Allow another session to start.
+    session_active.store(false);
+  }
 }
 
 }  // namespace tensorflow

diff --git a/tensorflow/core/profiler/lib/profiler_session.h b/tensorflow/core/profiler/lib/profiler_session.h
index 1ab4825..0727657 100644
--- a/tensorflow/core/profiler/lib/profiler_session.h
+++ b/tensorflow/core/profiler/lib/profiler_session.h

@@ -22,6 +22,10 @@
 
 namespace tensorflow {
 
+struct ProfilerContext {
+  EagerContext* eager_context = nullptr;
+};
+
 // A profiler which will start profiling when creating the object and will stop
 // when either the object is destroyed or SerializedToString is called. It will
 // profile all operations run under the given EagerContext.
@@ -32,7 +36,8 @@
 class ProfilerSession {
  public:
   // Creates and ProfilerSession and starts profiling.
-  static std::unique_ptr<ProfilerSession> Create(EagerContext* const context);
+  static std::unique_ptr<ProfilerSession> Create(
+      ProfilerContext* const context);
 
   // Deletes an exsiting Profiler and enables starting a new one.
   ~ProfilerSession();
@@ -43,7 +48,7 @@
 
  private:
   // Constructs an instance of the class and starts profiling
-  explicit ProfilerSession(EagerContext* const context);
+  explicit ProfilerSession(ProfilerContext* const context);
 
   // Profiler is neither copyable or movable.
   ProfilerSession(const ProfilerSession&) = delete;
@@ -52,6 +57,9 @@
   std::vector<std::unique_ptr<tensorflow::profiler::ProfilerInterface>>
       profilers_ GUARDED_BY(mutex_);
 
+  // True if the session is active.
+  bool active_ GUARDED_BY(mutex_);
+
   tensorflow::Status status_ GUARDED_BY(mutex_);
   const uint64 start_time_micros_;
   mutex mutex_;

diff --git a/tensorflow/core/profiler/lib/traceme.cc b/tensorflow/core/profiler/lib/traceme.cc
index ae1c921..90272b8 100644
--- a/tensorflow/core/profiler/lib/traceme.cc
+++ b/tensorflow/core/profiler/lib/traceme.cc

@@ -31,17 +31,15 @@
 /* static */ uint64 TraceMe::ActivityStartImpl(
     absl::string_view activity_name) {
   uint64 activity_id = NewActivityId();
-  TraceMeRecorder::Record(
-      {activity_id, string(activity_name),
-       /*start_time=*/static_cast<uint64>(absl::GetCurrentTimeNanos()),
-       /*end_time=*/0});
+  TraceMeRecorder::Record({activity_id, string(activity_name),
+                           /*start_time=*/Env::Default()->NowNanos(),
+                           /*end_time=*/0});
   return activity_id;
 }
 
 /* static */ void TraceMe::ActivityEndImpl(uint64 activity_id) {
-  TraceMeRecorder::Record(
-      {activity_id, /*name=*/"", /*start_time=*/0,
-       /*end_time=*/static_cast<uint64>(absl::GetCurrentTimeNanos())});
+  TraceMeRecorder::Record({activity_id, /*name=*/"", /*start_time=*/0,
+                           /*end_time=*/Env::Default()->NowNanos()});
 }
 
 }  // namespace profiler

diff --git a/tensorflow/core/profiler/lib/traceme.h b/tensorflow/core/profiler/lib/traceme.h
index 098c531..b9fae3d 100644
--- a/tensorflow/core/profiler/lib/traceme.h
+++ b/tensorflow/core/profiler/lib/traceme.h

@@ -18,8 +18,8 @@
 #include <string>
 
 #include "absl/strings/string_view.h"
-#include "absl/time/clock.h"
 #include "absl/types/optional.h"
+#include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/internal/traceme_recorder.h"
@@ -74,7 +74,7 @@
     DCHECK_GE(level, 1);
     if (TraceMeRecorder::Active(level)) {
       new (&no_init_.name) string(activity_name);
-      start_time_ = absl::GetCurrentTimeNanos();
+      start_time_ = Env::Default()->NowNanos();
     } else {
       start_time_ = kUntracedActivity;
     }
@@ -89,7 +89,7 @@
     DCHECK_GE(level, 1);
     if (TraceMeRecorder::Active(level)) {
       new (&no_init_.name) string(std::move(activity_name));
-      start_time_ = absl::GetCurrentTimeNanos();
+      start_time_ = Env::Default()->NowNanos();
     } else {
       start_time_ = kUntracedActivity;
     }
@@ -119,7 +119,7 @@
     DCHECK_GE(level, 1);
     if (TraceMeRecorder::Active(level)) {
       new (&no_init_.name) string(name_generator());
-      start_time_ = absl::GetCurrentTimeNanos();
+      start_time_ = Env::Default()->NowNanos();
     } else {
       start_time_ = kUntracedActivity;
     }
@@ -136,9 +136,8 @@
     //   start/stop session timestamp (recorded in XprofResponse).
     if (start_time_ != kUntracedActivity) {
       if (TraceMeRecorder::Active()) {
-        TraceMeRecorder::Record(
-            {kCompleteActivity, std::move(no_init_.name), start_time_,
-             static_cast<uint64>(absl::GetCurrentTimeNanos())});
+        TraceMeRecorder::Record({kCompleteActivity, std::move(no_init_.name),
+                                 start_time_, Env::Default()->NowNanos()});
       }
       no_init_.name.~string();
     }

diff --git a/tensorflow/core/profiler/op_profile.proto b/tensorflow/core/profiler/op_profile.proto
new file mode 100644
index 0000000..0adca55
--- /dev/null
+++ b/tensorflow/core/profiler/op_profile.proto

@@ -0,0 +1,76 @@
+syntax = "proto3";
+
+package tensorflow.profiler.op_profile;
+
+// Profile is the top-level data that summarizes a program.
+message Profile {
+  reserved 2;
+  reserved "by_program_structure";
+  reserved 3;
+  reserved "per_program";
+  // Root of a profile broken down by instruction category.
+  Node by_category = 1;
+  // Root of a profile broken down by program.
+  Node by_program = 4;
+}
+
+// An entry in the profile tree. (An instruction, or set of instructions).
+message Node {
+  string name = 1;      // Semantics depend on contents.
+  Metrics metrics = 2;  // May be omitted e.g. for fused instructions.
+  repeated Node children = 3;  // Subjected to pruning.
+
+  // Details about what this node represents.
+  oneof contents {
+    InstructionCategory category = 4;
+    XLAInstruction xla = 5;
+  }
+
+  int32 num_children = 6;  // Total number of children before pruning.
+  // A category of XLA instructions.
+  // name is a descriptive string, like "data formatting".
+  message InstructionCategory {
+  }
+  // A single XLA instruction.
+  // name is the unique instruction id, like "%multiply.5".
+  message XLAInstruction {
+    string op = 1;          // Opcode like %multiply
+    string expression = 2;  // %multiply = [shape]multiply(operand1, operand2)
+    string provenance = 3;  // Typically the TensorFlow operation name.
+    string category = 4;
+    // Describes the physical memory layout of the instruction's primary input.
+    // e.g. for a convolution, this analyzes the image and ignores the kernel.
+    LayoutAnalysis layout = 5;
+    message LayoutAnalysis {
+      // The physical data layout, from most-minor to most-major dimensions.
+      repeated Dimension dimensions = 1;
+      message Dimension {
+        int32 size = 1;       // Size of the data in this dimension.
+        int32 alignment = 2;  // Data must be padded to a multiple of alignment.
+        string semantics = 3;  // What the dimension represents, e.g. "spatial".
+      }
+    }
+  }
+}
+
+// Measurements of an operation (or aggregated set of operations).
+// Metrics are always "total" rather than "self".
+message Metrics {
+  // Core-time taken by this operation, as a fraction of all operations.
+  double time = 1;
+  // Floating point computations performed by this operation, as a fraction of
+  // peak core FLOPS * program time. This representation has useful properties:
+  //  - it is proportional to the number of floating point operations performed
+  //  - utilization is flops/time
+  //  - wasted potential flops is proportional to time - flops
+  //  - it does not reveal the peak core FLOPS of the hardware
+  double flops = 2;
+
+  // The memory bandwidth used to load operands, as a fraction of
+  // thereotical memory bandwidth on the specific hardware.
+  double memory_bandwidth = 3;
+
+  double raw_time = 11;   // Elapsed core-time in picoseconds.
+  double raw_flops = 12;  // Total floating-point operations performed.
+  double raw_bytes_accessed = 13;  // Total bytes accessed (include read/write).
+}

diff --git a/tensorflow/core/profiler/profiler_analysis.proto b/tensorflow/core/profiler/profiler_analysis.proto
new file mode 100644
index 0000000..4be75de
--- /dev/null
+++ b/tensorflow/core/profiler/profiler_analysis.proto

@@ -0,0 +1,78 @@
+syntax = "proto3";
+package tensorflow;
+
+import "tensorflow/core/profiler/profiler_service.proto";
+
+message NewProfileSessionRequest {
+  ProfileRequest request = 1;
+  string repository_root = 2;
+  repeated string hosts = 3;
+  string session_id = 4;
+}
+
+message NewProfileSessionResponse {
+  // Auxiliary error_message.
+  string error_message = 1;
+
+  // Whether all hosts had returned a empty trace.
+  bool empty_trace = 2;
+}
+
+message EnumProfileSessionsAndToolsRequest {
+  string repository_root = 1;
+}
+
+message ProfileSessionInfo {
+  string session_id = 1;
+  // Which tool data is available for consumption.
+  repeated string available_tools = 2;
+}
+
+message EnumProfileSessionsAndToolsResponse {
+  // Auxiliary error_message.
+  string error_message = 1;
+  // If success, the returned sessions information are stored here.
+  repeated ProfileSessionInfo sessions = 2;
+}
+
+message ProfileSessionDataRequest {
+  string repository_root = 1;
+  string session_id = 2;
+  // Which host the data is associated. if empty, data from all hosts are
+  // aggregated.
+  string host_name = 5;
+  // Which tool
+  string tool_name = 3;
+  // Tool's specific parameters. e.g. TraceViewer's viewport etc
+  map<string, string> parameters = 4;
+}
+
+message ProfileSessionDataResponse {
+  // Auxiliary error_message.
+  string error_message = 1;
+
+  // Output format. e.g. "json" or "proto" or "blob"
+  string output_format = 2;
+
+  // TODO(jiesun): figure out whether to put bytes or oneof tool specific proto.
+  bytes output = 3;
+}
+////////////////////////////////////////////////////////////////////////////////
+// ProfileAnalysis service provide entry point for profiling TPU and for
+// serving profiled data to Tensorboard through GRPC
+////////////////////////////////////////////////////////////////////////////////
+service ProfileAnalysis {
+  // Starts a profiling session, blocks until it completes.
+  // TPUProfileAnalysis service delegate this to TPUProfiler service.
+  // Populate the profiled data in repository, then return status to caller.
+  rpc NewSession(NewProfileSessionRequest) returns (NewProfileSessionResponse) {
+  }
+  // Enumerate existing sessions and return available profile tools.
+  rpc EnumSessions(EnumProfileSessionsAndToolsRequest)
+      returns (EnumProfileSessionsAndToolsResponse) {
+  }
+  // Retrieve specific tool's data for specific session.
+  rpc GetSessionToolData(ProfileSessionDataRequest)
+      returns (ProfileSessionDataResponse) {
+  }
+}

diff --git a/tensorflow/core/profiler/profiler_service.proto b/tensorflow/core/profiler/profiler_service.proto
new file mode 100644
index 0000000..77702c3
--- /dev/null
+++ b/tensorflow/core/profiler/profiler_service.proto

@@ -0,0 +1,132 @@
+syntax = "proto3";
+package tensorflow;
+
+import "tensorflow/core/framework/graph.proto";
+import "tensorflow/core/protobuf/config.proto";
+import "tensorflow/core/profiler/op_profile.proto";
+
+// The ProfilerService service retrieves performance information about
+// the programs running on connected devices over a period of time.
+service ProfilerService {
+  // Starts a profiling session, blocks until it completes, and returns data.
+  rpc Profile(ProfileRequest) returns (ProfileResponse) {
+  }
+  // Collects profiling data and returns user-friendly metrics.
+  rpc Monitor(MonitorRequest) returns (MonitorResponse) {
+  }
+}
+
+message ProfileOptions {
+  // We don't collect the dataset ops by default for better trace-viewer
+  // scalability. The caller can mannually set this field to include the ops.
+  bool include_dataset_ops = 1;
+
+  // next-field: 2
+}
+
+message ToolRequestOptions {
+  // Required formats for the tool, it should be one of "json", "proto", "raw"
+  // etc. If not specified (backward compatible), use default format, i.e. most
+  // tools use json format.
+  string output_formats = 2;
+
+  // Whether save the result directly to repository or pass it back to caller.
+  // Default to false for backward compatibilities.
+  bool save_to_repo = 3;
+}
+
+message ProfileRequest {
+  // In future, the caller will be able to customize when profiling starts and
+  // stops. For now, it collects `duration_ms` milliseconds worth of data.
+  uint64 duration_ms = 1;
+
+  // The maximum number of events to return. By default (value 0), return all
+  // events.
+  uint64 max_events = 2;
+
+  // Required profiling tools name such as "input_pipeline_analyzer" etc
+  repeated string tools = 3;
+
+  // Specifies the requirement for each tools.
+  map<string, ToolRequestOptions> tool_options = 8;
+
+  // Optional profiling options that control how a TF session will be profiled.
+  ProfileOptions opts = 4;
+
+  // The place where we will dump profile data. We will normally use
+  // MODEL_DIR/plugin/profile/ as our repository root.
+  string repository_root = 5;
+
+  // The user provided profile session identifier.
+  string session_id = 6;
+
+  // The hostname of system where the profile should happen.
+  // We use it as identifier in part of our output filename.
+  string host_name = 7;
+
+  // In future, the caller will indicate which TF session is being profiled, and
+  // only data relating to that program will be returned. For now, we assume
+  // all activity during the profiling period is relevant.
+  // next-field: 9
+}
+
+message ProfileToolData {
+  // The file name which this data is associated (e.g. "input_pipeline.json",
+  // "cluster_xxx.memory_viewer.json").
+  string name = 1;
+
+  // The data payload (likely json) for the specific tool.
+  bytes data = 2;
+}
+
+message ProfileResponse {
+  reserved 1;  // was uint64 placeholder for returning something meaningful.
+  // Graphs of programs executed on devices during the profiling period.
+  repeated GraphDef computation_graph = 2;
+
+  // Performance profile that can be used to annotate HLO operations in the
+  // computation graph.
+  RunMetadata hlo_metadata = 5;
+
+  // Encoded Trace proto message that contains metadata about the trace captured
+  // during the profiling period. Describes the devices and resources that
+  // 'trace_events' refers to.
+  bytes encoded_trace = 3;
+
+  // Assembles a hierarchical performance profile based on HLOs in trace events.
+  // If the trace covers multiple programs, the longest-running one is analyzed.
+  // See op_profile.proto for the detailed semantics of the returned profile.
+  profiler.op_profile.Profile op_profile = 4;
+
+  // Data payload for each required tools.
+  repeated ProfileToolData tool_data = 6;
+
+  // When we write profiling data directly to repository directory, we need a
+  // way to figure out whether the captured trace is empty (due to idle TPU).
+  bool empty_trace = 7;
+
+  // next-field: 8
+}
+
+message MonitorRequest {
+  // Duration for which to profile between each update.
+  uint64 duration_ms = 1;
+
+  // Indicates the level at which we want to monitor. Currently, two levels are
+  // supported:
+  // Level 1: An ultra lightweight mode that captures only some utilization
+  // metrics.
+  // Level 2: More verbose than level 1. Collects utilization metrics, device
+  // information, step time information, etc. Do not use this option if the TPU
+  // host is being very heavily used.
+  int32 monitoring_level = 2;
+
+  // next-field: 3
+}
+
+message MonitorResponse {
+  // Properly formatted string data that can be directly returned back to user.
+  string data = 1;
+
+  // next-field: 2
+}

diff --git a/tensorflow/core/profiler/rpc/BUILD b/tensorflow/core/profiler/rpc/BUILD
index 6b4576e..3e5cdaa 100644
--- a/tensorflow/core/profiler/rpc/BUILD
+++ b/tensorflow/core/profiler/rpc/BUILD

@@ -11,9 +11,10 @@
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow:grpc++",
-        "//tensorflow/contrib/tpu/profiler:tpu_profiler_proto_cc",
         "//tensorflow/core:framework",
+        "//tensorflow/core:grpc_services",
         "//tensorflow/core/common_runtime/eager:context",
+        "//tensorflow/core/profiler:protos_all_cc",
         "//tensorflow/core/profiler/lib:profiler_session",
     ],
     alwayslink = 1,
@@ -27,9 +28,11 @@
     deps = [
         ":profiler_service_impl",
         "//tensorflow:grpc++",
-        "//tensorflow/contrib/tpu/profiler:tpu_profiler_proto_cc",
         "//tensorflow/core:framework",
+        "//tensorflow/core:grpc_services",
         "//tensorflow/core/common_runtime/eager:context",
+        "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler/lib:profiler_session",
     ],
     alwayslink = 1,
 )

diff --git a/tensorflow/core/profiler/rpc/client/BUILD b/tensorflow/core/profiler/rpc/client/BUILD
new file mode 100644
index 0000000..4f963f5
--- /dev/null
+++ b/tensorflow/core/profiler/rpc/client/BUILD

@@ -0,0 +1,24 @@
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
+
+tf_cuda_library(
+    name = "capture_profile",
+    srcs = [
+        "capture_profile.cc",
+    ],
+    hdrs = [
+        "capture_profile.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow:grpc++",
+        "//tensorflow/contrib/tpu/profiler:dump_tpu_profile",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:grpc_services",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/distributed_runtime/rpc:grpc_util",
+        "//tensorflow/core/platform/cloud:gcs_file_system",
+        "//tensorflow/core/profiler:protos_all_cc",
+    ],
+)

diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.cc b/tensorflow/core/profiler/rpc/client/capture_profile.cc
new file mode 100644
index 0000000..58a282b
--- /dev/null
+++ b/tensorflow/core/profiler/rpc/client/capture_profile.cc

@@ -0,0 +1,238 @@
+/* Copyright 2017 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/profiler/rpc/client/capture_profile.h"
+
+#include "grpcpp/grpcpp.h"
+
+#include <cstdio>
+#include <ctime>
+#include <vector>
+
+#include "tensorflow/contrib/tpu/profiler/dump_tpu_profile.h"
+#include "tensorflow/core/distributed_runtime/rpc/grpc_util.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/grpc_services.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace client {
+
+constexpr uint64 kMaxEvents = 1000000;
+
+string GetCurrentTimeStampAsString() {
+  char s[128];
+  std::time_t t = std::time(nullptr);
+  auto result = std::strftime(s, sizeof(s), "%F_%T", std::localtime(&t));
+  DCHECK_NE(result, 0);
+  return s;
+}
+
+Status ValidateHostPortPair(const string& host_port) {
+  uint32 port;
+  std::vector<string> parts = str_util::Split(host_port, ':');
+  // Must be host:port, port must be a number, host must not contain a '/',
+  // host also must not be empty.
+  if (parts.size() != 2 || !strings::safe_strtou32(parts[1], &port) ||
+      parts[0].find("/") != string::npos || parts[0].empty()) {
+    return errors::InvalidArgument("Could not interpret \"", host_port,
+                                   "\" as a host-port pair.");
+  }
+  return Status::OK();
+}
+
+ProfileRequest PopulateProfileRequest(int duration_ms,
+                                      const string& repository_root,
+                                      const string& session_id,
+                                      const ProfileOptions& opts) {
+  ProfileRequest request;
+  request.set_duration_ms(duration_ms);
+  request.set_max_events(kMaxEvents);
+  if (tensorflow::str_util::StartsWith(repository_root, "gs://")) {
+    // For backward compatibilities, only generate tracetable etc when the
+    // user provide a GCS path for model directory.
+    request.set_repository_root(repository_root);
+    request.set_session_id(session_id);
+  }
+  request.add_tools("op_profile");
+  request.add_tools("input_pipeline");
+  request.add_tools("memory_viewer");
+  request.add_tools("overview_page");
+  *request.mutable_opts() = opts;
+  return request;
+}
+
+// Returns whether the returned trace is empty.
+// Failure are handled by CHECK, i.e. abort()
+bool Profile(const string& service_addr, const string& logdir, int duration_ms,
+             const string& repository_root, const string& session_id,
+             const ProfileOptions& opts) {
+  ProfileRequest request =
+      PopulateProfileRequest(duration_ms, repository_root, session_id, opts);
+
+  ::grpc::ClientContext context;
+  ::grpc::ChannelArguments channel_args;
+  // TODO(qiuminxu): use `NewHostPortGrpcChannel` instead once their
+  // `ValidateHostPortPair` checks for empty host string case.
+  channel_args.SetInt(GRPC_ARG_MAX_MESSAGE_LENGTH,
+                      std::numeric_limits<int32>::max());
+  std::unique_ptr<grpc::ProfilerService::Stub> stub =
+      grpc::ProfilerService::NewStub(::grpc::CreateCustomChannel(
+          "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(),
+          channel_args));
+  ProfileResponse response;
+  TF_QCHECK_OK(FromGrpcStatus(stub->Profile(&context, request, &response)));
+
+  if (!response.encoded_trace().empty()) {
+    TF_CHECK_OK(tensorflow::tpu::WriteTensorboardTPUProfile(
+        logdir, session_id, "", response, &std::cout));
+    // Print this at the end so that it's not buried in irrelevant LOG messages.
+    std::cout
+        << "NOTE: using the trace duration " << duration_ms << "ms."
+        << std::endl
+        << "Set an appropriate duration (with --duration_ms) if you "
+           "don't see a full step in your trace or the captured trace is too "
+           "large."
+        << std::endl;
+  }
+
+  return response.encoded_trace().empty();
+}
+
+// Start a new profiling session that include all the hosts included in
+// hostnames, for the time interval of duration_ms. Possibly save the profiling
+// result in the directory specified by repository_root and session_id.
+bool NewSession(const string& service_addr,
+                const std::vector<tensorflow::string>& hostnames,
+                int duration_ms, const string& repository_root,
+                const string& session_id, const ProfileOptions& opts) {
+  NewProfileSessionRequest new_session_request;
+  *new_session_request.mutable_request() =
+      PopulateProfileRequest(duration_ms, repository_root, session_id, opts);
+  new_session_request.set_repository_root(repository_root);
+  new_session_request.set_session_id(session_id);
+  for (const auto& hostname : hostnames) {
+    new_session_request.add_hosts(hostname);
+  }
+
+  ::grpc::ClientContext context;
+  ::grpc::ChannelArguments channel_args;
+  // TODO(qiuminxu): use `NewHostPortGrpcChannel` instead once their
+  // `ValidateHostPortPair` checks for empty host string case.
+  channel_args.SetMaxReceiveMessageSize(std::numeric_limits<int32>::max());
+  // TODO(jiesun): GRPC support following relevant naming scheme:
+  // 1. dns:///host:port
+  // 2. ipv4:host:port or ipv6:[host]:port
+  // We might need to change the prefix which depends on what TPU name resolver
+  // will give us.
+  std::unique_ptr<grpc::ProfileAnalysis::Stub> stub =
+      grpc::ProfileAnalysis::NewStub(::grpc::CreateCustomChannel(
+          "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(),
+          channel_args));
+  NewProfileSessionResponse new_session_response;
+  TF_QCHECK_OK(FromGrpcStatus(
+      stub->NewSession(&context, new_session_request, &new_session_response)));
+
+  std::cout << "Profile session succeed for host(s):"
+            << str_util::Join(hostnames, ",") << std::endl;
+  return new_session_response.empty_trace();
+}
+
+// Starts tracing on a single or multiple TPU hosts and saves the result in the
+// given logdir. If no trace was collected, retries tracing for
+// num_tracing_attempts.
+void StartTracing(const tensorflow::string& service_addr,
+                  const tensorflow::string& logdir,
+                  const tensorflow::string& workers_list,
+                  bool include_dataset_ops, int duration_ms,
+                  int num_tracing_attempts) {
+  // Use the current timestamp as the run name.
+  tensorflow::string session_id = GetCurrentTimeStampAsString();
+  constexpr char kProfilePluginDirectory[] = "plugins/profile/";
+  tensorflow::string repository_root =
+      io::JoinPath(logdir, kProfilePluginDirectory);
+  std::vector<tensorflow::string> hostnames =
+      tensorflow::str_util::Split(workers_list, ",");
+
+  bool empty_trace = false;
+  int remaining_attempts = num_tracing_attempts;
+  tensorflow::ProfileOptions opts;
+  opts.set_include_dataset_ops(include_dataset_ops);
+  while (true) {
+    std::cout << "Starting to profile TPU traces for " << duration_ms << " ms. "
+              << "Remaining attempt(s): " << remaining_attempts-- << std::endl;
+    if (hostnames.empty()) {
+      empty_trace = Profile(service_addr, logdir, duration_ms, repository_root,
+                            session_id, opts);
+    } else {
+      tensorflow::string tpu_master = service_addr;
+      empty_trace = NewSession(tpu_master, hostnames, duration_ms,
+                               repository_root, session_id, opts);
+    }
+    if (remaining_attempts <= 0 || !empty_trace) break;
+    std::cout << "No trace event is collected. Automatically retrying."
+              << std::endl
+              << std::endl;
+  }
+
+  if (empty_trace) {
+    std::cout << "No trace event is collected after " << num_tracing_attempts
+              << " attempt(s). "
+              << "Perhaps, you want to try again (with more attempts?)."
+              << std::endl
+              << "Tip: increase number of attempts with --num_tracing_attempts."
+              << std::endl;
+  }
+}
+
+MonitorRequest PopulateMonitorRequest(int duration_ms, int monitoring_level) {
+  MonitorRequest request;
+  request.set_duration_ms(duration_ms);
+  request.set_monitoring_level(monitoring_level);
+  return request;
+}
+
+// Repeatedly collects profiles and shows user-friendly metrics for
+// 'num_queries' time(s).
+void StartMonitoring(const tensorflow::string& service_addr, int duration_ms,
+                     int monitoring_level, int num_queries) {
+  for (int query = 0; query < num_queries; ++query) {
+    MonitorRequest request =
+        PopulateMonitorRequest(duration_ms, monitoring_level);
+
+    ::grpc::ClientContext context;
+    ::grpc::ChannelArguments channel_args;
+    channel_args.SetInt(GRPC_ARG_MAX_MESSAGE_LENGTH,
+                        std::numeric_limits<int32>::max());
+    std::unique_ptr<grpc::ProfilerService::Stub> stub =
+        grpc::ProfilerService::NewStub(::grpc::CreateCustomChannel(
+            "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(),
+            channel_args));
+    MonitorResponse response;
+    TF_QCHECK_OK(FromGrpcStatus(stub->Monitor(&context, request, &response)));
+
+    std::cout << "Cloud TPU Monitoring Results (Sample " << query + 1
+              << "):\n\n"
+              << response.data() << std::flush;
+  }
+}
+
+}  // namespace client
+}  // namespace profiler
+}  // namespace tensorflow
+

diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.h b/tensorflow/core/profiler/rpc/client/capture_profile.h
new file mode 100644
index 0000000..85c97ae
--- /dev/null
+++ b/tensorflow/core/profiler/rpc/client/capture_profile.h

@@ -0,0 +1,46 @@
+/* Copyright 2017 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// GRPC client to perform on-demand profiling
+
+#ifndef TENSORFLOW_CORE_PROFILER_RPC_CLIENT_CAPTURE_PROFILE_H_
+#define TENSORFLOW_CORE_PROFILER_RPC_CLIENT_CAPTURE_PROFILE_H_
+
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace client {
+
+Status ValidateHostPortPair(const string& host_port);
+
+// Repeatedly collects profiles and shows user-friendly metrics for
+// 'num_queries' time(s).
+void StartMonitoring(const tensorflow::string& service_addr, int duration_ms,
+                     int monitoring_level, int num_queries);
+
+// Starts tracing on a single or multiple hosts and saves the result in the
+// given logdir. If no trace was collected, retries tracing for
+// num_tracing_attempts.
+void StartTracing(const tensorflow::string& service_addr,
+                  const tensorflow::string& logdir,
+                  const tensorflow::string& workers_list,
+                  bool include_dataset_ops, int duration_ms,
+                  int num_tracing_attempts);
+
+}  // namespace client
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_RPC_CLIENT_CAPTURE_PROFILE_H_

diff --git a/tensorflow/core/profiler/rpc/profiler_server.cc b/tensorflow/core/profiler/rpc/profiler_server.cc
index 835aa1e..257e4e0 100644
--- a/tensorflow/core/profiler/rpc/profiler_server.cc
+++ b/tensorflow/core/profiler/rpc/profiler_server.cc

@@ -17,27 +17,32 @@
 #include <memory>
 #include <utility>
 #include "grpcpp/grpcpp.h"
-#include "tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h"
+#include "tensorflow/core/platform/grpc_services.h"
 #include "tensorflow/core/profiler/rpc/profiler_service_impl.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 
-std::unique_ptr<Thread> StartProfilerServer(EagerContext* const eager_context,
-                                            int32 port) {
-  return WrapUnique(eager_context->TFEnv()->StartThread(
-      {}, "profiler_server", [eager_context, port]() {
-        string server_address = strings::StrCat("0.0.0.0:", port);
-        std::unique_ptr<TPUProfiler::Service> service =
-            CreateProfilerService(eager_context);
-        ::grpc::ServerBuilder builder;
-        builder.AddListeningPort(server_address,
-                                 ::grpc::InsecureServerCredentials());
-        builder.RegisterService(service.get());
-        std::unique_ptr<::grpc::Server> server(builder.BuildAndStart());
-        LOG(INFO) << "Profiling Server listening on " << server_address;
-        server->Wait();
-      }));
+std::unique_ptr<Thread> StartProfilerServer(
+    ProfilerContext* const profiler_context, int32 port) {
+  Env* env = profiler_context->eager_context != nullptr
+                 ? profiler_context->eager_context->TFEnv()
+                 : Env::Default();
+  // Starting the server in the child thread may be delay and user may already
+  // delete the profiler context at that point. So we need to make a copy.
+  ProfilerContext ctx = *profiler_context;
+  return WrapUnique(env->StartThread({}, "profiler server", [ctx, port]() {
+    string server_address = strings::StrCat("0.0.0.0:", port);
+    std::unique_ptr<grpc::ProfilerService::Service> service =
+        CreateProfilerService(ctx);
+    ::grpc::ServerBuilder builder;
+    builder.AddListeningPort(server_address,
+                             ::grpc::InsecureServerCredentials());
+    builder.RegisterService(service.get());
+    std::unique_ptr<::grpc::Server> server(builder.BuildAndStart());
+    LOG(INFO) << "Profiling Server listening on " << server_address;
+    server->Wait();
+  }));
 }
 
 }  // namespace tensorflow

diff --git a/tensorflow/core/profiler/rpc/profiler_server.h b/tensorflow/core/profiler/rpc/profiler_server.h
index ebefdd6..4e8c715 100644
--- a/tensorflow/core/profiler/rpc/profiler_server.h
+++ b/tensorflow/core/profiler/rpc/profiler_server.h

@@ -15,10 +15,11 @@
 #ifndef TENSORFLOW_CORE_PROFILER_RPC_PROFILER_SERVER_H_
 #define TENSORFLOW_CORE_PROFILER_RPC_PROFILER_SERVER_H_
 
-#include "tensorflow/core/common_runtime/eager/context.h"
+#include "tensorflow/core/profiler/lib/profiler_session.h"
 
 namespace tensorflow {
-std::unique_ptr<Thread> StartProfilerServer(EagerContext* const eager_context,
-                                            int32 port);
+
+std::unique_ptr<Thread> StartProfilerServer(
+    ProfilerContext* const profiler_context, int32 port);
 }  // namespace tensorflow
 #endif  // TENSORFLOW_CORE_PROFILER_RPC_PROFILER_SERVER_H_

diff --git a/tensorflow/core/profiler/rpc/profiler_service_impl.cc b/tensorflow/core/profiler/rpc/profiler_service_impl.cc
index 872ef2a..f25ee66 100644
--- a/tensorflow/core/profiler/rpc/profiler_service_impl.cc
+++ b/tensorflow/core/profiler/rpc/profiler_service_impl.cc

@@ -15,19 +15,18 @@
 
 #include "tensorflow/core/profiler/rpc/profiler_service_impl.h"
 #include "grpcpp/support/status.h"
-#include "tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
+#include "tensorflow/core/platform/grpc_services.h"
 #include "tensorflow/core/profiler/lib/profiler_session.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace {
 
-// TODO(fishx): Rename TPUProfiler to something more generic.
-class ProfilerServiceImpl : public TPUProfiler::Service {
+class ProfilerServiceImpl : public grpc::ProfilerService::Service {
  public:
-  explicit ProfilerServiceImpl(EagerContext* const eager_context)
-      : eager_context_(eager_context) {}
+  explicit ProfilerServiceImpl(const ProfilerContext& profiler_context)
+      : profiler_context_(profiler_context) {}
   ~ProfilerServiceImpl() override {}
 
   ::grpc::Status Monitor(::grpc::ServerContext* ctx, const MonitorRequest* req,
@@ -39,13 +38,15 @@
                          ProfileResponse* response) override {
     LOG(INFO) << "Received a profile request.";
     std::unique_ptr<ProfilerSession> profiler =
-        ProfilerSession::Create(eager_context_);
+        ProfilerSession::Create(&profiler_context_);
     if (!profiler->Status().ok()) {
       return ::grpc::Status(::grpc::StatusCode::INTERNAL,
                             profiler->Status().error_message());
     }
 
-    Env* env = eager_context_->TFEnv();
+    Env* env = profiler_context_.eager_context != nullptr
+                   ? profiler_context_.eager_context->TFEnv()
+                   : Env::Default();
     for (size_t i = 0; i < req->duration_ms(); ++i) {
       env->SleepForMicroseconds(1000);
       if (ctx->IsCancelled()) {
@@ -62,13 +63,13 @@
   }
 
  private:
-  EagerContext* const eager_context_;
+  ProfilerContext profiler_context_;
 };
 }  // namespace
 
-std::unique_ptr<TPUProfiler::Service> CreateProfilerService(
-    EagerContext* const eager_context) {
-  return MakeUnique<ProfilerServiceImpl>(eager_context);
+std::unique_ptr<grpc::ProfilerService::Service> CreateProfilerService(
+    const ProfilerContext& profiler_context) {
+  return MakeUnique<ProfilerServiceImpl>(profiler_context);
 }
 
 }  // namespace tensorflow

diff --git a/tensorflow/core/profiler/rpc/profiler_service_impl.h b/tensorflow/core/profiler/rpc/profiler_service_impl.h
index 311a267..64ae01d 100644
--- a/tensorflow/core/profiler/rpc/profiler_service_impl.h
+++ b/tensorflow/core/profiler/rpc/profiler_service_impl.h

@@ -18,12 +18,14 @@
 #include "grpcpp/grpcpp.h"
 #include "grpcpp/server_context.h"
 #include "grpcpp/support/status.h"
-#include "tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
+#include "tensorflow/core/platform/grpc_services.h"
+#include "tensorflow/core/profiler/lib/profiler_session.h"
 
 namespace tensorflow {
-std::unique_ptr<TPUProfiler::Service> CreateProfilerService(
-    EagerContext* const eager_context);
+
+std::unique_ptr<grpc::ProfilerService::Service> CreateProfilerService(
+    const ProfilerContext& profiler_context);
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_PROFILER_RPC_PROFILER_SERVICE_IMPL_H_

diff --git a/tensorflow/core/profiler/trace_events.proto b/tensorflow/core/profiler/trace_events.proto
new file mode 100644
index 0000000..69ec88c
--- /dev/null
+++ b/tensorflow/core/profiler/trace_events.proto

@@ -0,0 +1,62 @@
+syntax = "proto3";
+
+package tensorflow.profiler;
+
+// A 'Trace' contains metadata for the individual traces of a system.
+message Trace {
+  // The devices that this trace has information about. Maps from device_id to
+  // more data about the specific device.
+  map<uint32, Device> devices = 1;
+
+  // All trace events capturing in the profiling period.
+  repeated TraceEvent trace_events = 4;
+}
+
+// A 'device' is a physical entity in the system and is comprised of several
+// resources.
+message Device {
+  // The name of the device.
+  string name = 1;
+
+  // The id of this device, unique in a single trace.
+  uint32 device_id = 2;
+
+  // The resources on this device, keyed by resource_id;
+  map<uint32, Resource> resources = 3;
+}
+
+// A 'resource' generally is a specific computation component on a device. These
+// can range from threads on CPUs to specific arithmetic units on hardware
+// devices.
+message Resource {
+  // The name of the resource.
+  string name = 1;
+
+  // The id of the resource. Unique within a device.
+  uint32 resource_id = 2;
+}
+
+message TraceEvent {
+  // The id of the device that this event occurred on. The full dataset should
+  // have this device present in the Trace object.
+  uint32 device_id = 1;
+
+  // The id of the resource that this event occurred on. The full dataset should
+  // have this resource present in the Device object of the Trace object. A
+  // resource_id is unique on a specific device, but not necessarily within the
+  // trace.
+  uint32 resource_id = 2;
+
+  // The name of this trace event.
+  string name = 3;
+
+  // The timestamp that this event occurred at (in picos since tracing started).
+  uint64 timestamp_ps = 9;
+
+  // The duration of the event in picoseconds if applicable.
+  // Events without duration are called instant events.
+  uint64 duration_ps = 10;
+
+  // Extra arguments that will be displayed in trace view.
+  map<string, string> args = 11;
+}

diff --git a/tensorflow/core/protobuf/checkpointable_object_graph.proto b/tensorflow/core/protobuf/checkpointable_object_graph.proto
deleted file mode 100644
index f295640..0000000
--- a/tensorflow/core/protobuf/checkpointable_object_graph.proto
+++ /dev/null

@@ -1,59 +0,0 @@
-syntax = "proto3";
-
-option cc_enable_arenas = true;
-
-package tensorflow;
-
-// A TensorBundle addition which saves extra information about the objects which
-// own variables, allowing for more robust checkpoint loading into modified
-// programs.
-
-message CheckpointableObjectGraph {
-  message CheckpointableObject {
-    message ObjectReference {
-      // An index into `CheckpointableObjectGraph.nodes`, indicating the object
-      // being referenced.
-      int32 node_id = 1;
-      // A user-provided name for the edge.
-      string local_name = 2;
-    }
-
-    message SerializedTensor {
-      // A name for the Tensor. Simple variables have only one
-      // `SerializedTensor` named "VARIABLE_VALUE" by convention. This value may
-      // be restored on object creation as an optimization.
-      string name = 1;
-      // The full name of the variable/tensor, if applicable. Used to allow
-      // name-based loading of checkpoints which were saved using an
-      // object-based API. Should match the checkpoint key which would have been
-      // assigned by tf.train.Saver.
-      string full_name = 2;
-      // The generated name of the Tensor in the checkpoint.
-      string checkpoint_key = 3;
-      // Whether checkpoints should be considered as matching even without this
-      // value restored. Used for non-critical values which don't affect the
-      // TensorFlow graph, such as layer configurations.
-      bool optional_restore = 4;
-    }
-
-    message SlotVariableReference {
-      // An index into `CheckpointableObjectGraph.nodes`, indicating the
-      // variable object this slot was created for.
-      int32 original_variable_node_id = 1;
-      // The name of the slot (e.g. "m"/"v").
-      string slot_name = 2;
-      // An index into `CheckpointableObjectGraph.nodes`, indicating the
-      // `Object` with the value of the slot variable.
-      int32 slot_variable_node_id = 3;
-    }
-
-    // Objects which this object depends on.
-    repeated ObjectReference children = 1;
-    // Serialized data specific to this object.
-    repeated SerializedTensor attributes = 2;
-    // Slot variables owned by this object.
-    repeated SlotVariableReference slot_variables = 3;
-  }
-
-  repeated CheckpointableObject nodes = 1;
-}

diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index 44e9854..3e24235 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto

@@ -520,6 +520,25 @@
 
   // Graphs of the partitions executed by executors.
   repeated GraphDef partition_graphs = 3;
+
+  message FunctionGraphs {
+    // TODO(nareshmodi): Include some sort of function/cache-key identifier?
+    repeated GraphDef partition_graphs = 1;
+
+    GraphDef pre_optimization_graph = 2;
+    GraphDef post_optimization_graph = 3;
+  }
+  // This is only populated for graphs that are run as functions in TensorFlow
+  // V2. There will be an entry below for each function that is traced.
+  // The main use cases of the post_optimization_graph and the partition_graphs
+  // is to give the caller insight into the graphs that were actually run by the
+  // runtime. Additional information (such as those in step_stats) will match
+  // these graphs.
+  // We also include the pre_optimization_graph since it is usually easier to
+  // read, and is helpful in situations where the caller wants to get a high
+  // level idea of what the built graph looks like (since the various graph
+  // optimization passes might change the structure of the graph significantly).
+  repeated FunctionGraphs function_graphs = 4;
 }
 
 // Defines a connection between two tensors in a `GraphDef`.

diff --git a/tensorflow/core/protobuf/graph_debug_info.proto b/tensorflow/core/protobuf/graph_debug_info.proto
index cd3cdeb..a123d3c 100644
--- a/tensorflow/core/protobuf/graph_debug_info.proto
+++ b/tensorflow/core/protobuf/graph_debug_info.proto

@@ -18,6 +18,12 @@
 
     // Col number in the file line.
     int32 col = 3;
+
+    // Name of function contains the file line.
+    string func = 4;
+
+    // Source code contained in this file line.
+    string code = 5;
   }
 
   // This represents a stack trace which is a ordered list of `FileLineCol`.

diff --git a/tensorflow/core/protobuf/master.proto b/tensorflow/core/protobuf/master.proto
index c104463..4a998c5 100644
--- a/tensorflow/core/protobuf/master.proto
+++ b/tensorflow/core/protobuf/master.proto

@@ -16,11 +16,13 @@
 syntax = "proto3";
 
 package tensorflow;
+
 option cc_enable_arenas = true;
 option java_outer_classname = "DistributedRuntimeProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.distruntime";
-option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
+
+// add go_package externally with copybara
 import "tensorflow/core/framework/device_attributes.proto";
 import "tensorflow/core/framework/graph.proto";
 import "tensorflow/core/framework/tensor.proto";
@@ -138,6 +140,11 @@
   // response body. This is a workaround since the RPC subsystem may
   // truncate long metadata messages.
   bool store_errors_in_response_body = 7;
+
+  // Unique identifier for this request. Every RunStepRequest must
+  // have a unique request_id, and retried RunStepRequest must have
+  // the same request_id. If request_id is zero, retry detection is disabled.
+  int64 request_id = 8;
 }
 
 message RunStepResponse {
@@ -183,6 +190,11 @@
   // Target Nodes. A list of node names. The named nodes will be run in future
   // steps, but their outputs will not be fetched.
   repeated string target = 4;
+
+  // Unique identifier for this request. Every PartialRunSetupRequest must
+  // have a unique request_id, and retried PartialRunSetupRequest must have
+  // the same request_id. If request_id is zero, retry detection is disabled.
+  int64 request_id = 5;
 }
 
 message PartialRunSetupResponse {
@@ -204,8 +216,7 @@
   string session_handle = 1;
 }
 
-message CloseSessionResponse {
-}
+message CloseSessionResponse {}
 
 // Reset() allows misbehaving or slow sessions to be aborted and closed, and
 // causes their resources eventually to be released.  Reset() does not wait
@@ -237,8 +248,7 @@
   repeated string device_filters = 2;
 }
 
-message ResetResponse {
-}
+message ResetResponse {}
 
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -279,6 +289,11 @@
 
   // Options that define the behavior of the created callable.
   CallableOptions options = 2;
+
+  // Unique identifier for this request. Every MakeCallableRequest must
+  // have a unique request_id, and retried MakeCallableRequest must have
+  // the same request_id. If request_id is zero, retry detection is disabled.
+  int64 request_id = 3;
 }
 
 message MakeCallableResponse {
@@ -303,6 +318,11 @@
   // Values of the tensors passed as arguments to the callable, in the order
   // defined in the CallableOptions.feed field passed to MakeCallable.
   repeated TensorProto feed = 3;
+
+  // Unique identifier for this request. Every RunCallableRequest must
+  // have a unique request_id, and retried RunCallableRequest must have
+  // the same request_id. If request_id is zero, retry detection is disabled.
+  int64 request_id = 4;
 }
 
 message RunCallableResponse {
@@ -330,5 +350,4 @@
   int64 handle = 2;
 }
 
-message ReleaseCallableResponse {
-}
+message ReleaseCallableResponse {}

diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index b5c9599..7a62c6e 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto

@@ -78,6 +78,9 @@
   Toggle scoped_allocator_optimization = 15;
   // Force small ops onto the CPU (default is ON).
   Toggle pin_to_host_optimization = 18;
+  // Enable the swap of kernel implementations based on the device placement
+  // (default is ON).
+  Toggle implementation_selector = 22;
   // Disable the entire meta optimizer (off by default).
   bool disable_meta_optimizer = 19;
 

diff --git a/tensorflow/contrib/tpu/proto/BUILD b/tensorflow/core/protobuf/tpu/BUILD
similarity index 100%
rename from tensorflow/contrib/tpu/proto/BUILD
rename to tensorflow/core/protobuf/tpu/BUILD


diff --git a/tensorflow/contrib/tpu/proto/compilation_result.proto b/tensorflow/core/protobuf/tpu/compilation_result.proto
similarity index 100%
rename from tensorflow/contrib/tpu/proto/compilation_result.proto
rename to tensorflow/core/protobuf/tpu/compilation_result.proto


diff --git a/tensorflow/contrib/tpu/proto/dynamic_padding.proto b/tensorflow/core/protobuf/tpu/dynamic_padding.proto
similarity index 100%
rename from tensorflow/contrib/tpu/proto/dynamic_padding.proto
rename to tensorflow/core/protobuf/tpu/dynamic_padding.proto


diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/core/protobuf/tpu/optimization_parameters.proto
similarity index 100%
rename from tensorflow/contrib/tpu/proto/optimization_parameters.proto
rename to tensorflow/core/protobuf/tpu/optimization_parameters.proto


diff --git a/tensorflow/contrib/tpu/proto/topology.proto b/tensorflow/core/protobuf/tpu/topology.proto
similarity index 100%
rename from tensorflow/contrib/tpu/proto/topology.proto
rename to tensorflow/core/protobuf/tpu/topology.proto


diff --git a/tensorflow/core/protobuf/tpu/tpu_embedding_configuration.proto b/tensorflow/core/protobuf/tpu/tpu_embedding_configuration.proto
new file mode 100644
index 0000000..53280ed
--- /dev/null
+++ b/tensorflow/core/protobuf/tpu/tpu_embedding_configuration.proto

@@ -0,0 +1,95 @@
+syntax = "proto3";
+
+package tensorflow.tpu;
+
+import "tensorflow/core/protobuf/tpu/optimization_parameters.proto";
+import "tensorflow/core/protobuf/tpu/tpu_embedding_output_layout.proto";
+
+message TPUEmbeddingConfiguration {
+  // Description of the various embedding tables.
+  message TableDescriptor {
+    // Name of the table.
+    string name = 1;
+    // Size of the vocabulary (i.e., number of rows) in the table.
+    int32 vocabulary_size = 2;
+    // The embedding dimension (i.e., the width of the embedding table).
+    int32 dimension = 3;
+    // Number of features mapped to this table.
+    int32 num_features = 4;
+    // Details of the learning algorithm used to update the embedding
+    // parameters.
+    OptimizationParameters optimization_parameters = 5;
+  }
+  repeated TableDescriptor table_descriptor = 1;
+
+  // Mode. Should the embedding layer program be run for inference (just forward
+  // pass), training (both forward and backward pass) or just the backward_pass.
+  enum Mode {
+    UNSPECIFIED = 0;
+    INFERENCE = 1;
+    TRAINING = 2;
+    BACKWARD_PASS_ONLY = 3;
+  }
+  Mode mode = 2;
+
+  // Number of samples in each batch of embedding layer activations sent to
+  // the TensorCore.
+  int32 batch_size_per_tensor_core = 3;
+
+  // Number of TPU hosts used for inference/training.
+  int32 num_hosts = 4;
+
+  // Number of TensorCore used for inference/training.
+  int32 num_tensor_cores = 5;
+
+  // Sharding strategy of the embedding tables among the hosts.
+  // If the sharding_strategy is "mod", each id is assigned to host
+  // "id % num_hosts". For instance, 13 ids are split across 5 hosts as:
+  // [[0, 5, 10], [1, 6, 11], [2, 7, 12], [3, 8], [4, 9]].
+  // If the sharding_strategy is "div", ids are assigned to hosts in a
+  // contiguous manner. In this case, 13 ids are split across 5 hosts as:
+  // [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10], [11, 12]].
+  // In both the strategies, if the id space does not evenly divide the number
+  // of hosts, each of the first "table_descriptor.num_ids % num_hosts" hosts
+  // will be assigned one more id.
+  // This partitioning strategy exactly follows that in the embedding_lookup
+  // TensorFlow function at tensorflow/python/ops/embedding_ops.py.
+  enum ShardingStrategy {
+    DIV_DEFAULT = 0;
+    MOD = 1;
+  }
+  ShardingStrategy sharding_strategy = 6;
+
+  // This parameter determines if the execution of the sparse core will be
+  // pipelined with that of the TensorCore. This parameter only affects results
+  // when mode=TRAINING. If mode=INFERENCE or BACKWARD_PASS_ONLY, this parameter
+  // does not affect execution and hence, is a don't care value.
+  //
+  // false: The execution of the sparse core is not pipelined with that of the
+  // TensorCore. The forward pass of every step on the sparse core is executed
+  // only after the backward pass of the previous step is complete. And the
+  // backward pass on the sparse core is executed only after the embedding
+  // gradients have been computed on the TensorCore on every step. This ensures
+  // that the activations on every step observe the gradient updates from the
+  // previous step on both the sparse core and the TensorCore.
+  //
+  // true: The execution of the sparse core is pipelined with that of the
+  // TensorCore. The forward pass of every step on the sparse core can be
+  // executed after the forward pass of the previous step is complete without
+  // waiting for the backward pass. This improves the utilization of the sparse
+  // core allowing it to process step N+1 while the embedding gradients for step
+  // N are computed on the TensorCore. The backward pass of every step on the
+  // sparse core is executed directly after the forward pass for the next step
+  // is complete. The drawback is that embedding activations for step N+1 do not
+  // observe the embedding gradient updates from step N. This could affect model
+  // quality if step N and N+1 involve the same set of embedding IDs. However,
+  // since the embedding updates are sparse, this is generally not considered a
+  // problem.
+  bool pipeline_execution_with_tensor_core = 7;
+
+  // Extended output layout information; if not provided, a compatibility mode
+  // will use defaults that match the old layout. Providing a value for this
+  // field is EXPERIMENTAL and most ways of filling it will probably break. Do
+  // not set it unless you know what you are doing.
+  TPUEmbeddingOutputLayout output_layout = 8;
+}

diff --git a/tensorflow/contrib/tpu/proto/tpu_embedding_output_layout.proto b/tensorflow/core/protobuf/tpu/tpu_embedding_output_layout.proto
similarity index 100%
rename from tensorflow/contrib/tpu/proto/tpu_embedding_output_layout.proto
rename to tensorflow/core/protobuf/tpu/tpu_embedding_output_layout.proto


diff --git a/tensorflow/core/protobuf/trackable_object_graph.proto b/tensorflow/core/protobuf/trackable_object_graph.proto
new file mode 100644
index 0000000..02d852e
--- /dev/null
+++ b/tensorflow/core/protobuf/trackable_object_graph.proto

@@ -0,0 +1,59 @@
+syntax = "proto3";
+
+option cc_enable_arenas = true;
+
+package tensorflow;
+
+// A TensorBundle addition which saves extra information about the objects which
+// own variables, allowing for more robust checkpoint loading into modified
+// programs.
+
+message TrackableObjectGraph {
+  message TrackableObject {
+    message ObjectReference {
+      // An index into `TrackableObjectGraph.nodes`, indicating the object
+      // being referenced.
+      int32 node_id = 1;
+      // A user-provided name for the edge.
+      string local_name = 2;
+    }
+
+    message SerializedTensor {
+      // A name for the Tensor. Simple variables have only one
+      // `SerializedTensor` named "VARIABLE_VALUE" by convention. This value may
+      // be restored on object creation as an optimization.
+      string name = 1;
+      // The full name of the variable/tensor, if applicable. Used to allow
+      // name-based loading of checkpoints which were saved using an
+      // object-based API. Should match the checkpoint key which would have been
+      // assigned by tf.train.Saver.
+      string full_name = 2;
+      // The generated name of the Tensor in the checkpoint.
+      string checkpoint_key = 3;
+      // Whether checkpoints should be considered as matching even without this
+      // value restored. Used for non-critical values which don't affect the
+      // TensorFlow graph, such as layer configurations.
+      bool optional_restore = 4;
+    }
+
+    message SlotVariableReference {
+      // An index into `TrackableObjectGraph.nodes`, indicating the
+      // variable object this slot was created for.
+      int32 original_variable_node_id = 1;
+      // The name of the slot (e.g. "m"/"v").
+      string slot_name = 2;
+      // An index into `TrackableObjectGraph.nodes`, indicating the
+      // `Object` with the value of the slot variable.
+      int32 slot_variable_node_id = 3;
+    }
+
+    // Objects which this object depends on.
+    repeated ObjectReference children = 1;
+    // Serialized data specific to this object.
+    repeated SerializedTensor attributes = 2;
+    // Slot variables owned by this object.
+    repeated SlotVariableReference slot_variables = 3;
+  }
+
+  repeated TrackableObject nodes = 1;
+}

diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD
new file mode 100644
index 0000000..5cbed40
--- /dev/null
+++ b/tensorflow/core/tpu/BUILD

@@ -0,0 +1,30 @@
+# Description: Utilities for TPU Operations
+
+licenses(["notice"])  # Apache 2.0
+
+cc_library(
+    name = "tpu_embedding_optimization_parameters_utils",
+    srcs = ["tpu_embedding_optimization_parameters_utils.cc"],
+    hdrs = ["tpu_embedding_optimization_parameters_utils.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:lib_proto_parsing",
+        "//tensorflow/core/protobuf/tpu:optimization_parameters_proto_cc",
+        "@com_google_absl//absl/base",
+    ],
+)
+
+cc_library(
+    name = "tpu_embedding_output_layout_utils",
+    srcs = ["tpu_embedding_output_layout_utils.cc"],
+    hdrs = ["tpu_embedding_output_layout_utils.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:lib_proto_parsing",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/protobuf/tpu:tpu_embedding_configuration_proto_cc",
+        "//tensorflow/core/protobuf/tpu:tpu_embedding_output_layout_proto_cc",
+    ],
+)

diff --git a/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc
new file mode 100644
index 0000000..2c179b5
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc

@@ -0,0 +1,264 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace tpu {
+
+string GetOptimizationAlgorithmName(OptimizationAlgorithm alg) {
+  switch (alg) {
+    case OptimizationAlgorithm::kAdagrad:
+      return "Adagrad";
+    case OptimizationAlgorithm::kStochasticGradientDescent:
+      return "StochasticGradientDescent";
+    case OptimizationAlgorithm::kFtrl:
+      return "FTRL";
+    case OptimizationAlgorithm::kAdam:
+      return "ADAM";
+    case OptimizationAlgorithm::kMomentum:
+      return "Momentum";
+    case OptimizationAlgorithm::kRmsProp:
+      return "RMSProp";
+    case OptimizationAlgorithm::kCenteredRmsProp:
+      return "CenteredRMSProp";
+    case OptimizationAlgorithm::kMdlAdagradLight:
+      return "MDLAdagradLight";
+    case OptimizationAlgorithm::kAdadelta:
+      return "Adadelta";
+    case OptimizationAlgorithm::kProximalAdagrad:
+      return "ProximalAdagrad";
+    case OptimizationAlgorithm::PARAMETERS_NOT_SET:
+      return "*** Not set ***";
+  }
+}
+
+string GetOptimizationAlgorithmFriendlyName(OptimizationAlgorithm alg) {
+  switch (alg) {
+    case OptimizationAlgorithm::kAdagrad:
+      return "Adagrad";
+    case OptimizationAlgorithm::kStochasticGradientDescent:
+      return "stochastic gradient descent";
+    case OptimizationAlgorithm::kFtrl:
+      return "FTRL";
+    case OptimizationAlgorithm::kAdam:
+      return "ADAM";
+    case OptimizationAlgorithm::kMomentum:
+      return "Momentum";
+    case OptimizationAlgorithm::kRmsProp:
+      return "RMSProp";
+    case OptimizationAlgorithm::kCenteredRmsProp:
+      return "centered RMSProp";
+    case OptimizationAlgorithm::kMdlAdagradLight:
+      return "MDL Adagrad Light";
+    case OptimizationAlgorithm::kAdadelta:
+      return "Adadelta";
+    case OptimizationAlgorithm::kProximalAdagrad:
+      return "proximal Adagrad";
+    case OptimizationAlgorithm::PARAMETERS_NOT_SET:
+      return "unknown (not specified)";
+  }
+}
+
+// Returns the number of optimization parameter vectors used by the optimization
+// algorithm, excluding the weights themselves and assuming no gradient
+// accumulation.
+Status GetBaseAuxiliaryParameterCount(OptimizationAlgorithm alg, int* count) {
+  switch (alg) {
+    case OptimizationAlgorithm::kAdagrad:
+      *count = 1;
+      return Status::OK();
+    case OptimizationAlgorithm::kStochasticGradientDescent:
+      *count = 0;
+      return Status::OK();
+    case OptimizationAlgorithm::kFtrl:
+      *count = 2;
+      return Status::OK();
+    case OptimizationAlgorithm::kAdam:
+      *count = 2;
+      return Status::OK();
+    case OptimizationAlgorithm::kMomentum:
+      *count = 1;
+      return Status::OK();
+    case OptimizationAlgorithm::kRmsProp:
+      *count = 2;
+      return Status::OK();
+    case OptimizationAlgorithm::kCenteredRmsProp:
+      *count = 3;
+      return Status::OK();
+    case OptimizationAlgorithm::kMdlAdagradLight:
+      *count = 3;
+      return Status::OK();
+    case OptimizationAlgorithm::kAdadelta:
+      *count = 2;
+      return Status::OK();
+    case OptimizationAlgorithm::kProximalAdagrad:
+      *count = 1;
+      return Status::OK();
+    case OptimizationAlgorithm::PARAMETERS_NOT_SET:
+      return errors::InvalidArgument("No optimization algorithm specified");
+  }
+}
+
+Status GetGradientAccumulationSupport(OptimizationAlgorithm alg,
+                                      GradientAccumulationSupport* support) {
+  switch (alg) {
+    case OptimizationAlgorithm::kAdagrad:
+      *support = GradientAccumulationSupport::kSupported;
+      return Status::OK();
+    case OptimizationAlgorithm::kStochasticGradientDescent:
+      *support = GradientAccumulationSupport::kUnnecessary;
+      return Status::OK();
+    default: {
+      int auxiliary_parameter_count;
+      TF_RETURN_IF_ERROR(
+          GetBaseAuxiliaryParameterCount(alg, &auxiliary_parameter_count));
+      *support = auxiliary_parameter_count + 1 <= kMaxAuxiliaryParameterCount
+                     ? GradientAccumulationSupport::kSupported
+                     : GradientAccumulationSupport::kNotSupported;
+      return Status::OK();
+    }
+  }
+}
+namespace {
+// Make a normal state variable specification. Please refer to
+// //tensorflow/core/protobuf/tpu/optimization_parameters.proto
+// (StateVariableSpecification message) for instructions on how to set the
+// padding_initial_value field.
+StateVariableSpecification MakeStandardStateVariableSpecification(
+    const string& name, double padding_initial_value) {
+  StateVariableSpecification result;
+  result.set_name(name);
+  result.mutable_user_defined()->set_padding_initial_value(
+      padding_initial_value);
+  return result;
+}
+}  // namespace
+
+Status GetOptimizationAlgorithmStateVariables(
+    OptimizationAlgorithm alg, bool use_gradient_accumulation,
+    std::vector<StateVariableSpecification>* state_variables) {
+  // The first parameter set is always the weights themselves.
+  state_variables->push_back(
+      MakeStandardStateVariableSpecification("parameters", 0.0));
+  // The order of the returned parameters needs to match the offsets used by
+  // the algorithm implementations in test_util.cc and
+  // address_handler_program_creator.cc.
+  switch (alg) {
+    case OptimizationAlgorithm::kAdagrad: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("accumulators", 0.1));
+      break;
+    }
+    case OptimizationAlgorithm::kStochasticGradientDescent: {
+      // None.
+      break;
+    }
+    case OptimizationAlgorithm::kFtrl: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("accumulators", 0.1));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("linears", 0.0));
+      break;
+    }
+    case OptimizationAlgorithm::kAdam: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("momenta", 0.0));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("velocities", 0.0));
+      break;
+    }
+    case OptimizationAlgorithm::kMomentum: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("momenta", 0.0));
+      break;
+    }
+    case OptimizationAlgorithm::kRmsProp: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("ms", 1.0));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("mom", 0.0));
+      break;
+    }
+    case OptimizationAlgorithm::kCenteredRmsProp: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("ms", 1.0));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("mom", 0.0));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("mg", 0.0));
+      break;
+    }
+    case OptimizationAlgorithm::kMdlAdagradLight: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("accumulators", 0.1));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("weights", 0.0));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("benefits", 0.0));
+      break;
+    }
+    case OptimizationAlgorithm::kAdadelta: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("accumulators", 0.0));
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("updates", 0.0));
+      break;
+    }
+    case OptimizationAlgorithm::kProximalAdagrad: {
+      state_variables->push_back(
+          MakeStandardStateVariableSpecification("accumulators", 0.1));
+      break;
+    }
+    case OptimizationAlgorithm::PARAMETERS_NOT_SET: {
+      return errors::InvalidArgument("No optimization algorithm specified");
+    }
+  }
+  // This needs to be last so that the save/restore ops do not need to know
+  // about gradient accumulation.
+  if (use_gradient_accumulation) {
+    StateVariableSpecification gradient_acc;
+    gradient_acc.set_name("gradient_accumulators");
+    gradient_acc.mutable_fill_with_constant()->set_initial_value(
+        kGradientAccumulatorInitialValue);
+    state_variables->push_back(std::move(gradient_acc));
+  }
+  if (state_variables->size() > kMaxAuxiliaryParameterCount + 1) {
+    return errors::InvalidArgument(
+        "Optimization algorithm", GetOptimizationAlgorithmName(alg),
+        "does not support gradient accumulation because it "
+        "already has too many other accumulators");
+  }
+  return Status::OK();
+}  // namespace tpu
+
+std::vector<OptimizationAlgorithm> GetOptimizationAlgorithms() {
+  return {
+      OptimizationAlgorithm::kAdagrad,
+      OptimizationAlgorithm::kStochasticGradientDescent,
+      OptimizationAlgorithm::kFtrl,
+      OptimizationAlgorithm::kAdam,
+      OptimizationAlgorithm::kMomentum,
+      OptimizationAlgorithm::kRmsProp,
+      OptimizationAlgorithm::kCenteredRmsProp,
+      OptimizationAlgorithm::kMdlAdagradLight,
+      OptimizationAlgorithm::kAdadelta,
+      OptimizationAlgorithm::kProximalAdagrad,
+  };
+}
+
+}  // namespace tpu
+}  // namespace tensorflow

diff --git a/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h
new file mode 100644
index 0000000..ceb07ff
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h

@@ -0,0 +1,90 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_
+#define TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_
+
+#include <string>
+#include "absl/base/casts.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/protobuf/tpu/optimization_parameters.pb.h"
+
+namespace tensorflow {
+namespace tpu {
+
+using OptimizationAlgorithm = OptimizationParameters::ParametersCase;
+
+// Returns the name of the optimization algorithm.
+string GetOptimizationAlgorithmName(OptimizationAlgorithm alg);
+
+// Returns a user-friendly name for the optimization algorithm.
+string GetOptimizationAlgorithmFriendlyName(OptimizationAlgorithm alg);
+
+// Returns all supported optimization algorithms.
+std::vector<OptimizationAlgorithm> GetOptimizationAlgorithms();
+
+enum class GradientAccumulationSupport {
+  // Accumulation cannot be used with this optimizer.
+  kNotSupported,
+
+  // Accumulation is unnecessary because optimizer application is commutative.
+  kUnnecessary,
+
+  // Accumulation is allowed and changes optimizer behavior.
+  kSupported,
+};
+
+// Returns the number of optimization parameter vectors used by the optimization
+// algorithm, excluding the weights themselves and assuming no gradient
+// accumulation.
+Status GetBaseAuxiliaryParameterCount(OptimizationAlgorithm alg, int *count);
+
+// Returns whether (and how) an optimization algorithm supports gradient
+// accumulation.
+Status GetGradientAccumulationSupport(OptimizationAlgorithm alg,
+                                      GradientAccumulationSupport *support);
+
+// Returns the parameter specifications for the optimization algorithm (the main
+// parameters first, followed by any auxiliary parameters such as Adagrad
+// accumulators).
+Status GetOptimizationAlgorithmStateVariables(
+    OptimizationAlgorithm alg, bool use_gradient_accumulation,
+    std::vector<StateVariableSpecification> *state_variables);
+
+// Maximum value of auxiliar_parameter_count for any optimization algorithm.
+static constexpr int kMaxAuxiliaryParameterCount = 3;
+
+// Fill value for gradient accumulators. This is a denormal so that it will be
+// flushed to zero on the current TPU platforms and needs to continue to have
+// the following properties in the future:
+//
+// 1. Does not have the same bit pattern as a zero and can be distinguished from
+// it using integer operations.
+// 2. Treated as zero by floating-point arithmetic operations (at least addition
+// and subtraction).
+// 3. Cannot be produced by any floating-point arithmetic operation, including
+// those involving itself.
+//
+// It does not need to compare equal or not equal to zero in floating point. We
+// need to use a non-zero value here because some optimization algorithms are
+// not no-ops on zero gradients, so we need to distinguish an accumulated
+// gradient of zero from one that has been cleared after its gradients have
+// already been applied to the parameters and accumulators.
+const float kGradientAccumulatorInitialValue = absl::bit_cast<float, uint32>(1);
+
+}  // namespace tpu
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_

diff --git a/tensorflow/core/tpu/tpu_embedding_output_layout_utils.cc b/tensorflow/core/tpu/tpu_embedding_output_layout_utils.cc
new file mode 100644
index 0000000..3a02775
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_embedding_output_layout_utils.cc

@@ -0,0 +1,98 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/tpu/tpu_embedding_output_layout_utils.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/protobuf/tpu/tpu_embedding_output_layout.pb.h"
+
+namespace tensorflow {
+namespace tpu {
+
+void AddDefaultEmbeddingOutputLayoutIfNeeded(
+    TPUEmbeddingConfiguration* config) {
+  if (config->has_output_layout()) {
+    // Model or previous step has already filled this in.
+    return;
+  }
+
+  TPUEmbeddingOutputLayout* layout = config->mutable_output_layout();
+  // Create output tensors.
+  for (const auto& table : config->table_descriptor()) {
+    TPUEmbeddingOutputLayout::EmbeddingOutputTensor* output =
+        layout->add_output();
+    TPUEmbeddingOutputLayout::TwoDOutputTensor* two_d = output->mutable_two_d();
+    two_d->set_dim1_size(table.dimension());
+    two_d->set_dim0_size_per_sample(table.num_features());
+  }
+
+  // Create table output locations.
+  for (int table_id = 0; table_id < config->table_descriptor_size();
+       ++table_id) {
+    TPUEmbeddingOutputLayout::TableDescriptor* output_table =
+        layout->add_table();
+    const auto& table = config->table_descriptor(table_id);
+    for (int feature_index = 0; feature_index < table.num_features();
+         ++feature_index) {
+      TPUEmbeddingOutputLayout::FeatureDescriptor* output_feature =
+          output_table->add_feature();
+      TPUEmbeddingOutputLayout::OutputLocation* output_location =
+          output_feature->add_output_location();
+      output_location->set_tensor_index(table_id);
+      output_location->set_dim0_offset(feature_index);
+      output_location->set_dim1_offset(0);
+    }
+  }
+}
+
+Status ComputeOutputTensorShapes(const TPUEmbeddingConfiguration& config,
+                                 std::vector<TensorShapeProto>* shapes) {
+  if (!config.has_output_layout()) {
+    return errors::InvalidArgument(
+        "TPUEmbeddingConfiguration is missing output layout.");
+  }
+  const TPUEmbeddingOutputLayout& layout = config.output_layout();
+  int batch_size = config.batch_size_per_tensor_core();
+
+  for (int i = 0; i < layout.output_size(); ++i) {
+    const auto& output = layout.output(i);
+    TensorShapeProto shape;
+    switch (output.output_format_case()) {
+      case TPUEmbeddingOutputLayout::EmbeddingOutputTensor::OutputFormatCase::
+          kTwoD: {
+        auto* dim0 = shape.add_dim();
+        dim0->set_size(output.two_d().dim0_size_per_sample() * batch_size);
+        auto* dim1 = shape.add_dim();
+        dim1->set_size(output.two_d().dim1_size());
+        break;
+      }
+      case TPUEmbeddingOutputLayout::EmbeddingOutputTensor::OutputFormatCase::
+          OUTPUT_FORMAT_NOT_SET: {
+        return errors::InvalidArgument(
+            "Output layout in TPUEmbeddingConfiguration has unset embedding "
+            "output tensor format.");
+      }
+      default: {
+        return errors::InvalidArgument(
+            "Output layout in TPUEmbeddingConfiguration has invalid or "
+            "unhandled embedding output tensor format.");
+      }
+    }
+    shapes->push_back(shape);
+  }
+  return Status::OK();
+}
+
+}  // namespace tpu
+}  // namespace tensorflow

diff --git a/tensorflow/core/tpu/tpu_embedding_output_layout_utils.h b/tensorflow/core/tpu/tpu_embedding_output_layout_utils.h
new file mode 100644
index 0000000..5bff401
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_embedding_output_layout_utils.h

@@ -0,0 +1,38 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_
+#define TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_
+
+#include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/protobuf/tpu/tpu_embedding_configuration.pb.h"
+
+namespace tensorflow {
+namespace tpu {
+
+// Creates a default output layout for compatibility if none was provided by the
+// model.
+void AddDefaultEmbeddingOutputLayoutIfNeeded(TPUEmbeddingConfiguration* config);
+
+// Computes the shape of the output tensors from an output layout.
+Status ComputeOutputTensorShapes(
+    const TPUEmbeddingConfiguration& config,
+    std::vector<tensorflow::TensorShapeProto>* shapes);
+
+}  // namespace tpu
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_

diff --git a/tensorflow/core/util/bcast.h b/tensorflow/core/util/bcast.h
index 6d73c38..2d647fd 100644
--- a/tensorflow/core/util/bcast.h
+++ b/tensorflow/core/util/bcast.h

@@ -105,13 +105,19 @@
   static Vec FromShape(const TensorShape& shape);
   static TensorShape ToShape(const BCast::Vec& vec);
 
+  template <typename IndexType, int NDIMS>
+  static Eigen::array<IndexType, NDIMS> ToIndexArrayType(
+      const BCast::Vec& vec) {
+    CHECK_EQ(vec.size(), NDIMS);
+    Eigen::array<IndexType, NDIMS> ret;
+    for (int i = 0; i < NDIMS; ++i) ret[i] = vec[i];
+    return ret;
+  }
+
   template <int NDIMS>
   static Eigen::array<Eigen::DenseIndex, NDIMS> ToIndexArray(
       const BCast::Vec& vec) {
-    CHECK_EQ(vec.size(), NDIMS);
-    Eigen::array<Eigen::DenseIndex, NDIMS> ret;
-    for (int i = 0; i < NDIMS; ++i) ret[i] = vec[i];
-    return ret;
+    return ToIndexArrayType<Eigen::DenseIndex, NDIMS>(vec);
   }
 
  private:

diff --git a/tensorflow/core/util/device_name_utils.cc b/tensorflow/core/util/device_name_utils.cc
index cb088fa..56e6188 100644
--- a/tensorflow/core/util/device_name_utils.cc
+++ b/tensorflow/core/util/device_name_utils.cc

@@ -289,6 +289,30 @@
   return true;
 }
 
+void DeviceNameUtils::EnsureSpecification(ParsedName* more_specific,
+                                          const ParsedName& less_specific) {
+  if (less_specific.has_job) {
+    more_specific->has_job = true;
+    more_specific->job = less_specific.job;
+  }
+  if (less_specific.has_replica) {
+    more_specific->has_replica = true;
+    more_specific->replica = less_specific.replica;
+  }
+  if (less_specific.has_task) {
+    more_specific->has_task = true;
+    more_specific->task = less_specific.task;
+  }
+  if (less_specific.has_type) {
+    more_specific->has_type = true;
+    more_specific->type = less_specific.type;
+  }
+  if (less_specific.has_id) {
+    more_specific->has_id = true;
+    more_specific->id = less_specific.id;
+  }
+}
+
 /* static */
 bool DeviceNameUtils::IsCompleteSpecification(const ParsedName& pattern,
                                               const ParsedName& name) {

diff --git a/tensorflow/core/util/device_name_utils.h b/tensorflow/core/util/device_name_utils.h
index bb5e2b3..b047e81 100644
--- a/tensorflow/core/util/device_name_utils.h
+++ b/tensorflow/core/util/device_name_utils.h

@@ -110,6 +110,11 @@
   static bool IsSpecification(const ParsedName& less_specific,
                               const ParsedName& more_specific);
 
+  // Makes minimal changes to more_specific so that it becomes a
+  // specification of less_specific.
+  static void EnsureSpecification(ParsedName* more_specific,
+                                  const ParsedName& less_specific);
+
   // Like IsSpecification, but the second argument "name" must have a
   // non-wildcard value for all of its components.
   static bool IsCompleteSpecification(const ParsedName& pattern,

diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index cd168f6..4e53c59 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h

@@ -63,7 +63,7 @@
                                     ix.shape().dim_size(0), ", values = ",
                                     vals.shape().dim_size(0), ")"));
     }
-    int dims;
+    int dims = 0;
     TF_RETURN_IF_ERROR(GetDimsFromIx(ix, &dims));
     if (order.size() != dims) {
       return Status(error::INVALID_ARGUMENT,

diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD
index f5f0d7c..5f99f0a 100644
--- a/tensorflow/examples/android/BUILD
+++ b/tensorflow/examples/android/BUILD

@@ -37,8 +37,7 @@
         "-lm",
         "-z defs",
         "-s",
-        "-Wl,--version-script",  # This line must be directly followed by LINKER_SCRIPT.
-        "$(location {})".format(LINKER_SCRIPT),
+        "-Wl,--version-script,$(location {})".format(LINKER_SCRIPT),
     ],
     linkshared = 1,
     linkstatic = 1,

diff --git a/tensorflow/examples/ios/benchmark/ios_image_load.h b/tensorflow/examples/ios/benchmark/ios_image_load.h
index 3f94984..22ee785 100644
--- a/tensorflow/examples/ios/benchmark/ios_image_load.h
+++ b/tensorflow/examples/ios/benchmark/ios_image_load.h

@@ -17,7 +17,7 @@
 
 #include <vector>
 
-#include "tensorflow/core/framework/types.h"
+#include "third_party/tensorflow/core/framework/types.h"
 
 std::vector<tensorflow::uint8> LoadImageFromFile(const char* file_name,
                                                  int* out_width,

diff --git a/tensorflow/examples/ios/camera/CameraExampleViewController.h b/tensorflow/examples/ios/camera/CameraExampleViewController.h
index 0aefbc6..277b6e2 100644
--- a/tensorflow/examples/ios/camera/CameraExampleViewController.h
+++ b/tensorflow/examples/ios/camera/CameraExampleViewController.h

@@ -16,8 +16,8 @@
 #import <UIKit/UIKit.h>
 
 #include <memory>
-#include "tensorflow/core/public/session.h"
-#include "tensorflow/core/util/memmapped_file_system.h"
+#include "third_party/tensorflow/core/public/session.h"
+#include "third_party/tensorflow/core/util/memmapped_file_system.h"
 
 @interface CameraExampleViewController
     : UIViewController<UIGestureRecognizerDelegate,

diff --git a/tensorflow/examples/ios/camera/ios_image_load.h b/tensorflow/examples/ios/camera/ios_image_load.h
index f10b0b9..9915687 100644
--- a/tensorflow/examples/ios/camera/ios_image_load.h
+++ b/tensorflow/examples/ios/camera/ios_image_load.h

@@ -17,7 +17,7 @@
 
 #include <vector>
 
-#include "tensorflow/core/framework/types.h"
+#include "third_party/tensorflow/core/framework/types.h"
 
 std::vector<tensorflow::uint8> LoadImageFromFile(const char* file_name,
 						 int* out_width,

diff --git a/tensorflow/examples/ios/camera/tensorflow_utils.h b/tensorflow/examples/ios/camera/tensorflow_utils.h
index 78bdb82..33e95b1 100644
--- a/tensorflow/examples/ios/camera/tensorflow_utils.h
+++ b/tensorflow/examples/ios/camera/tensorflow_utils.h

@@ -18,8 +18,8 @@
 #include <memory>
 #include <vector>
 
-#include "tensorflow/core/public/session.h"
-#include "tensorflow/core/util/memmapped_file_system.h"
+#include "third_party/tensorflow/core/public/session.h"
+#include "third_party/tensorflow/core/util/memmapped_file_system.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 // Reads a serialized GraphDef protobuf file from the bundle, typically

diff --git a/tensorflow/examples/ios/simple/ios_image_load.h b/tensorflow/examples/ios/simple/ios_image_load.h
index 0e0b771..2d2ee78 100644
--- a/tensorflow/examples/ios/simple/ios_image_load.h
+++ b/tensorflow/examples/ios/simple/ios_image_load.h

@@ -17,7 +17,7 @@
 
 #include <vector>
 
-#include "tensorflow/core/framework/types.h"
+#include "third_party/tensorflow/core/framework/types.h"
 
 std::vector<tensorflow::uint8> LoadImageFromFile(const char* file_name,
 						 int* out_width,

diff --git a/tensorflow/examples/saved_model/integration_tests/BUILD b/tensorflow/examples/saved_model/integration_tests/BUILD
index f40c6d9..d7d8d95 100644
--- a/tensorflow/examples/saved_model/integration_tests/BUILD
+++ b/tensorflow/examples/saved_model/integration_tests/BUILD

@@ -36,18 +36,80 @@
     ],
 )
 
+py_binary(
+    name = "export_simple_text_embedding",
+    srcs = ["export_simple_text_embedding.py"],
+    deps = [
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
+py_binary(
+    name = "use_model_in_sequential_keras",
+    srcs = ["use_model_in_sequential_keras.py"],
+    deps = [
+        ":util",
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
+py_library(
+    name = "util",
+    srcs = ["util.py"],
+    deps = [
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
+py_library(
+    name = "mnist_util",
+    srcs = ["mnist_util.py"],
+    deps = [
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
+py_binary(
+    name = "export_mnist_cnn",
+    srcs = ["export_mnist_cnn.py"],
+    deps = [
+        ":mnist_util",
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
+py_binary(
+    name = "use_mnist_cnn",
+    srcs = ["use_mnist_cnn.py"],
+    deps = [
+        ":mnist_util",
+        ":util",
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
 py_test(
     name = "saved_model_test",
     srcs = [
         "saved_model_test.py",
     ],
     data = [
+        ":export_mnist_cnn",
         ":export_rnn_cell",
+        ":export_simple_text_embedding",
         ":export_text_rnn_model",
+        ":use_mnist_cnn",
+        ":use_model_in_sequential_keras",
         ":use_rnn_cell",
         ":use_text_rnn_model",
     ],
+    shard_count = 4,
     srcs_version = "PY2AND3",
+    tags = [
+        "noasan",  # forge input size exceeded
+        "nomsan",  # forge input size exceeded
+        "notsan",  # forge input size exceeded
+    ],
     deps = [
         "//tensorflow:tensorflow_py",
     ],

diff --git a/tensorflow/examples/saved_model/integration_tests/export_mnist_cnn.py b/tensorflow/examples/saved_model/integration_tests/export_mnist_cnn.py
new file mode 100644
index 0000000..ee45646
--- /dev/null
+++ b/tensorflow/examples/saved_model/integration_tests/export_mnist_cnn.py

@@ -0,0 +1,109 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Exports a convolutional feature extractor for MNIST in SavedModel format.
+
+The feature extractor is a convolutional neural network plus a hidden layer
+that gets trained as part of an MNIST classifier and then written to a
+SavedModel (without the classification layer). From there, use_mnist_cnn.py
+picks it up for transfer learning.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import app
+from absl import flags
+import tensorflow as tf
+
+from tensorflow.examples.saved_model.integration_tests import mnist_util
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string(
+    'export_dir', None,
+    'Directory of exported SavedModel.')
+flags.DEFINE_integer(
+    'epochs', 10,
+    'Number of epochs to train.')
+flags.DEFINE_bool(
+    'fast_test_mode', False,
+    'Shortcut training for running in unit tests.')
+
+
+def make_feature_extractor():
+  """Returns a Keras Model to compute a feature vector from MNIST images."""
+  net = inp = tf.keras.Input(mnist_util.INPUT_SHAPE)
+  net = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(net)
+  net = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(net)
+  net = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(net)
+  # TODO(arnoegw): net = tf.keras.layers.Dropout(dropout_rate)(net)
+  net = tf.keras.layers.Flatten()(net)
+  # TODO(arnoegw): Add weight regularization.
+  net = tf.keras.layers.Dense(128, activation='relu')(net)
+  return tf.keras.Model(inputs=inp, outputs=net)
+
+
+def make_classifier(feature_extractor, dropout_rate=0.5):
+  """Returns a Keras Model to classify MNIST using feature_extractor."""
+  net = inp = tf.keras.Input(mnist_util.INPUT_SHAPE)
+  net = feature_extractor(net)
+  net = tf.keras.layers.Dropout(dropout_rate)(net)
+  net = tf.keras.layers.Dense(mnist_util.NUM_CLASSES, activation='softmax')(net)
+  return tf.keras.Model(inputs=inp, outputs=net)
+
+
+def wrap_keras_model_for_export(model):
+  """Wraps `model` for saving and loading as SavedModel."""
+  obj = tf.train.Checkpoint()
+  obj.__call__ = tf.function(
+      # TODO(arnoegw): Add args for training, hparams.
+      lambda inputs: model(inputs),  # pylint: disable=unnecessary-lambda
+      input_signature=[tf.TensorSpec(shape=(None,) + mnist_util.INPUT_SHAPE,
+                                     dtype=tf.float32)])
+  obj.trainable_variables = model.trainable_variables
+  obj.variables = model.trainable_variables + model.non_trainable_variables
+  return obj
+
+
+def main(argv):
+  del argv
+
+  # Build a complete classifier model using a feature extractor.
+  feature_extractor = make_feature_extractor()
+  classifier = make_classifier(feature_extractor)
+
+  # Train the complete model
+  (x_train, y_train), (x_test, y_test) = mnist_util.load_reshaped_data(
+      fake_tiny_data=FLAGS.fast_test_mode)
+  classifier.compile(loss=tf.keras.losses.categorical_crossentropy,
+                     optimizer=tf.keras.optimizers.SGD(),
+                     metrics=['accuracy'])
+  classifier.fit(x_train, y_train,
+                 batch_size=128,
+                 epochs=FLAGS.epochs,
+                 verbose=1,
+                 validation_data=(x_test, y_test))
+
+  # Save the feature extractor to a framework-agnostic SavedModel for reuse.
+  # Note that the feature_extractor object has not been compiled or fitted,
+  # so it does not contain an optimizer and related state.
+  exportable = wrap_keras_model_for_export(feature_extractor)
+  tf.saved_model.save(exportable, FLAGS.export_dir)
+
+
+if __name__ == '__main__':
+  # tf.enable_v2_behavior()
+  app.run(main)

diff --git a/tensorflow/examples/saved_model/integration_tests/export_rnn_cell.py b/tensorflow/examples/saved_model/integration_tests/export_rnn_cell.py
index f1ae38b..3660ed2 100644
--- a/tensorflow/examples/saved_model/integration_tests/export_rnn_cell.py
+++ b/tensorflow/examples/saved_model/integration_tests/export_rnn_cell.py

@@ -33,7 +33,7 @@
   del argv
 
   root = tf.train.Checkpoint()
-  # Create a cell and attach to our checkpointable.
+  # Create a cell and attach to our trackable.
   root.rnn_cell = tf.keras.layers.LSTMCell(units=10, recurrent_initializer=None)
 
   # Wrap the rnn_cell.__call__ function and assign to next_state.

diff --git a/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py b/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py
new file mode 100644
index 0000000..af61fa3
--- /dev/null
+++ b/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py

@@ -0,0 +1,105 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Text embedding model stored as a SavedModel."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tempfile
+from absl import app
+from absl import flags
+
+import tensorflow as tf
+
+# TODO(vbardiovsky): remove these when symbols are public.
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.training.tracking import tracking
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("export_dir", None, "Directory to export SavedModel.")
+
+
+def write_vocabulary_file(vocabulary):
+  """Write temporary vocab file for module construction."""
+  tmpdir = tempfile.mkdtemp()
+  vocabulary_file = os.path.join(tmpdir, "tokens.txt")
+  with tf.io.gfile.GFile(vocabulary_file, "w") as f:
+    for entry in vocabulary:
+      f.write(entry + "\n")
+  return vocabulary_file
+
+
+class TextEmbeddingModel(tf.train.Checkpoint):
+  """Text embedding model.
+
+  A text embeddings model that takes a sentences on input and outputs the
+  sentence embedding.
+  """
+
+  def __init__(self, vocabulary, emb_dim, oov_buckets):
+    super(TextEmbeddingModel, self).__init__()
+    self._oov_buckets = oov_buckets
+    self._vocabulary_file = tracking.TrackableAsset(
+        write_vocabulary_file(vocabulary))
+    self._total_size = len(vocabulary) + oov_buckets
+    self._table = lookup_ops.index_table_from_file(
+        vocabulary_file=self._vocabulary_file,
+        num_oov_buckets=self._oov_buckets,
+        hasher_spec=lookup_ops.FastHashSpec)
+    self.embeddings = tf.Variable(
+        tf.random.uniform(shape=[self._total_size, emb_dim]))
+    self.variables = [self.embeddings]
+    self.trainable_variables = self.variables
+
+  def _tokenize(self, sentences):
+    # Perform a minimalistic text preprocessing by removing punctuation and
+    # splitting on spaces.
+    normalized_sentences = tf.strings.regex_replace(
+        input=sentences, pattern=r"\pP", rewrite="")
+    normalized_sentences = tf.reshape(normalized_sentences, [-1])
+    sparse_tokens = tf.string_split(normalized_sentences, " ")
+
+    # Deal with a corner case: there is one empty sentence.
+    sparse_tokens, _ = tf.sparse.fill_empty_rows(sparse_tokens, tf.constant(""))
+    # Deal with a corner case: all sentences are empty.
+    sparse_tokens = tf.sparse.reset_shape(sparse_tokens)
+    sparse_token_ids = self._table.lookup(sparse_tokens.values)
+
+    return (sparse_tokens.indices, sparse_token_ids, sparse_tokens.dense_shape)
+
+  @tf.function(input_signature=[tf.TensorSpec([None], tf.dtypes.string)])
+  def __call__(self, sentences):
+    token_ids, token_values, token_dense_shape = self._tokenize(sentences)
+
+    return tf.nn.safe_embedding_lookup_sparse(
+        embedding_weights=self.embeddings,
+        sparse_ids=tf.SparseTensor(token_ids, token_values, token_dense_shape),
+        sparse_weights=None,
+        combiner="sqrtn")
+
+
+def main(argv):
+  del argv
+
+  vocabulary = ["cat", "is", "on", "the", "mat"]
+  module = TextEmbeddingModel(vocabulary=vocabulary, emb_dim=10, oov_buckets=10)
+  tf.saved_model.save(module, FLAGS.export_dir)
+
+
+if __name__ == "__main__":
+  app.run(main)

diff --git a/tensorflow/examples/saved_model/integration_tests/mnist_util.py b/tensorflow/examples/saved_model/integration_tests/mnist_util.py
new file mode 100644
index 0000000..7e4ffda
--- /dev/null
+++ b/tensorflow/examples/saved_model/integration_tests/mnist_util.py

@@ -0,0 +1,49 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Convenience wrapper around Keras' MNIST and Fashion MNIST data."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+
+INPUT_SHAPE = (28, 28, 1)
+NUM_CLASSES = 10
+
+
+def load_reshaped_data(use_fashion_mnist=False, fake_tiny_data=False):
+  """Returns MNIST or Fashion MNIST train and test data."""
+  if fake_tiny_data:
+    num_fakes = 10
+    x_train = x_test = np.zeros((num_fakes, 28, 28), dtype=np.uint8)
+    y_train = y_test = np.zeros((num_fakes,), dtype=np.int64)
+  else:
+    mnist = (tf.keras.datasets.fashion_mnist if use_fashion_mnist else
+             tf.keras.datasets.mnist)
+    (x_train, y_train), (x_test, y_test) = mnist.load_data()
+  return ((_prepare_image(x_train), _prepare_label(y_train)),
+          (_prepare_image(x_test), _prepare_label(y_test)))
+
+
+def _prepare_image(x):
+  """Converts images to [n,h,w,c] format in range [0,1]."""
+  return x[..., None].astype('float32') / 255.
+
+
+def _prepare_label(y):
+  """Conerts labels to one-hot encoding."""
+  return tf.keras.utils.to_categorical(y, NUM_CLASSES)

diff --git a/tensorflow/examples/saved_model/integration_tests/saved_model_test.py b/tensorflow/examples/saved_model/integration_tests/saved_model_test.py
index 770e809..78d2d1e 100644
--- a/tensorflow/examples/saved_model/integration_tests/saved_model_test.py
+++ b/tensorflow/examples/saved_model/integration_tests/saved_model_test.py

@@ -59,6 +59,28 @@
     use_binary = resource_loader.get_path_to_datafile("use_rnn_cell")
     self.assertCommandSucceeded(use_binary, model_dir=export_dir)
 
+  @test_util.run_v2_only
+  def test_text_embedding_in_sequential_keras(self):
+    export_dir = self.get_temp_dir()
+    export_binary = resource_loader.get_path_to_datafile(
+        "export_simple_text_embedding")
+    self.assertCommandSucceeded(export_binary, export_dir=export_dir)
+
+    use_binary = resource_loader.get_path_to_datafile(
+        "use_model_in_sequential_keras")
+    self.assertCommandSucceeded(use_binary, model_dir=export_dir)
+
+  @test_util.run_v2_only
+  def test_mnist_cnn(self):
+    export_dir = self.get_temp_dir()
+    export_binary = resource_loader.get_path_to_datafile("export_mnist_cnn")
+    self.assertCommandSucceeded(export_binary, export_dir=export_dir,
+                                fast_test_mode="true")
+
+    use_binary = resource_loader.get_path_to_datafile("use_mnist_cnn")
+    self.assertCommandSucceeded(use_binary, export_dir=export_dir,
+                                fast_test_mode="true")
+
 if __name__ == "__main__":
   # tf.enable_v2_behavior()
   tf.test.main()

diff --git a/tensorflow/examples/saved_model/integration_tests/use_mnist_cnn.py b/tensorflow/examples/saved_model/integration_tests/use_mnist_cnn.py
new file mode 100644
index 0000000..4e49f3a
--- /dev/null
+++ b/tensorflow/examples/saved_model/integration_tests/use_mnist_cnn.py

@@ -0,0 +1,101 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Imports a convolutional feature extractor for MNIST in SavedModel format.
+
+This program picks up the SavedModel written by export_mnist_cnn.py and
+uses the feature extractor contained in it to classification on either
+classic MNIST (digits) or Fashion MNIST (thumbnails of apparel). Optionally,
+it trains the feature extractor further as part of the new classifier.
+As expected, that makes training slower but does not help much for the
+original training dataset but helps a lot for transfer to the other dataset.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import app
+from absl import flags
+import tensorflow as tf
+
+from tensorflow.examples.saved_model.integration_tests import mnist_util
+from tensorflow.examples.saved_model.integration_tests import util
+from tensorflow.python.saved_model import load as svmd_load
+tf.saved_model.load = svmd_load.load
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string(
+    'export_dir', None,
+    'Directory of exported SavedModel.')
+flags.DEFINE_integer(
+    'epochs', 5,
+    'Number of epochs to train.')
+flags.DEFINE_bool(
+    'retrain', False,
+    'If set, the imported SavedModel is trained further.')
+flags.DEFINE_bool(
+    'use_fashion_mnist', False,
+    'Use Fashion MNIST (products) instead of the real MNIST (digits). '
+    'With this, --retrain gains a lot.')
+flags.DEFINE_bool(
+    'fast_test_mode', False,
+    'Shortcut training for running in unit tests.')
+
+
+def make_classifier(feature_extractor, dropout_rate=0.5):
+  """Returns a Keras Model to classify MNIST using feature_extractor."""
+  net = inp = tf.keras.Input(mnist_util.INPUT_SHAPE)
+  net = feature_extractor(net)
+  net = tf.keras.layers.Dropout(dropout_rate)(net)
+  net = tf.keras.layers.Dense(mnist_util.NUM_CLASSES,
+                              activation='softmax')(net)
+  return tf.keras.Model(inputs=inp, outputs=net)
+
+
+def main(argv):
+  del argv
+
+  # Load a pre-trained feature extractor and wrap it for use in Keras.
+  obj = tf.saved_model.load(FLAGS.export_dir)
+  feature_extractor = util.CustomLayer(obj, output_shape=[128],
+                                       trainable=FLAGS.retrain)
+
+  # Build a classifier with it.
+  model = make_classifier(feature_extractor)
+
+  # Train the classifier (possibly on a different dataset).
+  (x_train, y_train), (x_test, y_test) = mnist_util.load_reshaped_data(
+      use_fashion_mnist=FLAGS.use_fashion_mnist,
+      fake_tiny_data=FLAGS.fast_test_mode)
+  model.compile(loss=tf.keras.losses.categorical_crossentropy,
+                optimizer=tf.keras.optimizers.SGD(),
+                metrics=['accuracy'],
+                # TODO(arnoegw): Remove after investigating huge allocs.
+                run_eagerly=True)
+  print('Training on %s with %d trainable and %d untrainable variables.' %
+        ('Fashion MNIST' if FLAGS.use_fashion_mnist else 'MNIST',
+         len(model.trainable_variables), len(model.non_trainable_variables)))
+  model.fit(x_train, y_train,
+            batch_size=128,
+            epochs=FLAGS.epochs,
+            steps_per_epoch=3,
+            verbose=1,
+            validation_data=(x_test, y_test))
+
+
+if __name__ == '__main__':
+  # tf.enable_v2_behavior()
+  app.run(main)

diff --git a/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py b/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py
new file mode 100644
index 0000000..6c8aab8
--- /dev/null
+++ b/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py

@@ -0,0 +1,73 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Load and use text embedding module in sequential Keras."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import app
+from absl import flags
+
+import numpy as np
+
+import tensorflow as tf
+# TODO(vbardiovsky): Remove when load symbol is public.
+from tensorflow.examples.saved_model.integration_tests import util
+from tensorflow.python.saved_model.load import load
+
+tf.saved_model.load = load
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("model_dir", None, "Directory to load SavedModel from.")
+
+
+def train(fine_tuning):
+  """Build a Keras model and train with mock data."""
+  features = np.array(["my first sentence", "my second sentence"])
+  labels = np.array([1, 0])
+  dataset = tf.data.Dataset.from_tensor_slices((features, labels))
+
+  module = tf.saved_model.load(FLAGS.model_dir)
+
+  # Create the sequential keras model.
+  l = tf.keras.layers
+  model = tf.keras.Sequential()
+  model.add(l.Reshape((), batch_input_shape=[None, 1], dtype=tf.string))
+  model.add(util.CustomLayer(module, output_shape=[10], trainable=fine_tuning))
+  model.add(l.Dense(100, activation="relu"))
+  model.add(l.Dense(50, activation="relu"))
+  model.add(l.Dense(1, activation="sigmoid"))
+
+  model.compile(
+      optimizer="adam",
+      loss="binary_crossentropy",
+      metrics=["accuracy"],
+      # TODO(b/124446120): Remove after fixed.
+      run_eagerly=True)
+
+  model.fit_generator(generator=dataset.batch(1), epochs=5)
+
+
+def main(argv):
+  del argv
+
+  train(fine_tuning=False)
+  train(fine_tuning=True)
+
+
+if __name__ == "__main__":
+  app.run(main)

diff --git a/tensorflow/examples/saved_model/integration_tests/util.py b/tensorflow/examples/saved_model/integration_tests/util.py
new file mode 100644
index 0000000..0fc0cc8
--- /dev/null
+++ b/tensorflow/examples/saved_model/integration_tests/util.py

@@ -0,0 +1,63 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for integration tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+# TODO(vbardiovsky): We should just reuse Keras's Lambda layer, when that
+# enables to get trainable variables.
+class CustomLayer(tf.keras.layers.Layer):
+  """Wraps callable object as a `Layer` object.
+
+  Args:
+    func: The callable object to wrap.
+    output_shape: A tuple with the (possibly partial) output shape of `func`
+      *without* leading batch size (by analogy to Dense(..., input_shape=...)).
+    trainable: Boolean controlling whether the trainable variables of `func`
+      are reported as trainable variables of this layer.
+  """
+
+  def __init__(self, func, output_shape, trainable=False, **kwargs):
+    # Set self._{non,}_trainable_weights before calling Layer.__init__.
+    if hasattr(func, 'trainable_variables'):
+      self._trainable_weights = [v for v in func.trainable_variables]
+      trainable_variables_set = set(func.trainable_variables)
+    else:
+      self._trainable_weights = []
+      trainable_variables_set = set()
+    if hasattr(func, 'variables'):
+      self._non_trainable_weights = [v for v in func.variables
+                                     if v not in trainable_variables_set]
+    else:
+      self._non_trainable_weights = []  # TODO(arnoegw): Infer from `func`.
+    super(CustomLayer, self).__init__(trainable=trainable, **kwargs)
+    self._func = func
+    # TODO(vbardiovsky): We should be able to get the embedding dimension from
+    # the restored model.
+    self._output_shape = tuple(output_shape)
+
+  def call(self, x):
+    result = self._func(x)
+    # TODO(vbardiovsky): Polymorphic function should return shaped tensor.
+    result.set_shape(self.compute_output_shape(x.shape))
+    return result
+
+  def compute_output_shape(self, input_shape):
+    return (input_shape[0],) + self._output_shape

diff --git a/tensorflow/examples/speech_commands/BUILD b/tensorflow/examples/speech_commands/BUILD
index ca044e5..88f7fe7 100644
--- a/tensorflow/examples/speech_commands/BUILD
+++ b/tensorflow/examples/speech_commands/BUILD

@@ -63,6 +63,13 @@
 
 py_binary(
     name = "train",
+    srcs = ["train.py"],
+    srcs_version = "PY2AND3",
+    deps = [":train_main_lib"],
+)
+
+py_library(
+    name = "train_main_lib",
     srcs = [
         "train.py",
     ],
@@ -90,6 +97,13 @@
     name = "freeze",
     srcs = ["freeze.py"],
     srcs_version = "PY2AND3",
+    deps = [":freeze_main_lib"],
+)
+
+py_library(
+    name = "freeze_main_lib",
+    srcs = ["freeze.py"],
+    srcs_version = "PY2AND3",
     deps = [":freeze_lib"],
 )
 
@@ -122,6 +136,13 @@
     name = "wav_to_features",
     srcs = ["wav_to_features.py"],
     srcs_version = "PY2AND3",
+    deps = [":wav_to_features_main_lib"],
+)
+
+py_library(
+    name = "wav_to_features_main_lib",
+    srcs = ["wav_to_features.py"],
+    srcs_version = "PY2AND3",
     deps = [":wav_to_features_lib"],
 )
 
@@ -154,6 +175,13 @@
     name = "generate_streaming_test_wav",
     srcs = ["generate_streaming_test_wav.py"],
     srcs_version = "PY2AND3",
+    deps = [":generate_streaming_test_wav_main_lib"],
+)
+
+py_library(
+    name = "generate_streaming_test_wav_main_lib",
+    srcs = ["generate_streaming_test_wav.py"],
+    srcs_version = "PY2AND3",
     deps = [":generate_streaming_test_wav_lib"],
 )
 
@@ -201,6 +229,13 @@
     name = "label_wav",
     srcs = ["label_wav.py"],
     srcs_version = "PY2AND3",
+    deps = [":label_wav_main_lib"],
+)
+
+py_library(
+    name = "label_wav_main_lib",
+    srcs = ["label_wav.py"],
+    srcs_version = "PY2AND3",
     deps = [":label_wav_lib"],
 )
 

diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md
index c8ab248..b3bd73a 100644
--- a/tensorflow/examples/udacity/README.md
+++ b/tensorflow/examples/udacity/README.md

@@ -121,7 +121,7 @@
 * 0.1.0: Initial release.
 * 0.2.0: Many fixes, including lower memory footprint and support for Python 3.
 * 0.3.0: Use 0.7.1 release.
-* 0.4.0: Move notMMNIST data for Google Cloud.
+* 0.4.0: Move notMNIST data for Google Cloud.
 * 0.5.0: Actually use 0.7.1 release.
 * 0.6.0: Update to TF 0.10.0, add libjpeg (for Pillow).
 * 1.0.0: Update to TF 1.0.0 release.

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index cc0995f..fd614e7 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go

@@ -619,139 +619,6 @@
 	return op.Output(0)
 }
 
-// QuantizeAndDequantizeV2Attr is an optional argument to QuantizeAndDequantizeV2.
-type QuantizeAndDequantizeV2Attr func(optionalAttr)
-
-// QuantizeAndDequantizeV2SignedInput sets the optional signed_input attribute to value.
-//
-// value: Whether the quantization is signed or unsigned. (actually this parameter should
-// have been called <b>`signed_output`</b>)
-// If not specified, defaults to true
-func QuantizeAndDequantizeV2SignedInput(value bool) QuantizeAndDequantizeV2Attr {
-	return func(m optionalAttr) {
-		m["signed_input"] = value
-	}
-}
-
-// QuantizeAndDequantizeV2NumBits sets the optional num_bits attribute to value.
-//
-// value: The bitwidth of the quantization.
-// If not specified, defaults to 8
-func QuantizeAndDequantizeV2NumBits(value int64) QuantizeAndDequantizeV2Attr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// QuantizeAndDequantizeV2RangeGiven sets the optional range_given attribute to value.
-//
-// value: Whether the range is given or should be determined from the `input` tensor.
-// If not specified, defaults to false
-func QuantizeAndDequantizeV2RangeGiven(value bool) QuantizeAndDequantizeV2Attr {
-	return func(m optionalAttr) {
-		m["range_given"] = value
-	}
-}
-
-// QuantizeAndDequantizeV2RoundMode sets the optional round_mode attribute to value.
-//
-// value: The 'round_mode' attribute controls which rounding tie-breaking algorithm is
-// used when rounding float values to their quantized equivalents. The following
-// rounding modes are currently supported:
-//
-// *   HALF_TO_EVEN: this is the default round_mode.
-// *   HALF_UP: round towards positive. In this mode 7.5 rounds up to 8 and -7.5
-//     rounds up to -7.
-//
-// If not specified, defaults to "HALF_TO_EVEN"
-func QuantizeAndDequantizeV2RoundMode(value string) QuantizeAndDequantizeV2Attr {
-	return func(m optionalAttr) {
-		m["round_mode"] = value
-	}
-}
-
-// Quantizes then dequantizes a tensor.
-//
-// This op simulates the precision loss from the quantized forward pass by:
-//
-// 1. Quantizing the tensor to fixed point numbers, which should match the target
-//    quantization method when it is used in inference.
-// 2. Dequantizing it back to floating point numbers for the following ops, most
-//    likely matmul.
-//
-// There are different ways to quantize. This version uses only scaling, so 0.0
-// maps to 0.
-//
-// From the specified 'num_bits' in the quantized output type, it determines
-// minimum and maximum representable quantized values.
-//
-// e.g.
-//
-// *   [-128, 127] for signed, num_bits = 8, or
-// *   [0, 255] for unsigned, num_bits = 8.
-//
-// If range_given == False, the initial input_min, input_max will be determined
-// automatically as the minimum and maximum values in the input tensor, otherwise
-// the specified values of input_min, input_max are used.
-//
-// Note: If the input_min, input_max are specified, they do not need to equal the
-// actual minimum and maximum values in the tensor. e.g. in some cases it may be
-// beneficial to specify these values such that the low probability extremes of the
-// input distribution are clipped.
-//
-// This op determines the maximum scale_factor that would map the initial
-// [input_min, input_max] range to a range that lies within the representable
-// quantized range.
-//
-// It determines the scale from one of input_min and input_max, then updates the
-// other one to maximize the respresentable range.
-//
-// e.g.
-//
-// *   if the output is signed, num_bits = 8, [input_min, input_max] = [-10.0,
-//     5.0]: it would use a scale_factor of -128 / -10.0 = 12.8 In this case, it
-//     would update input_max to be 127 / 12.8 = 9.921875
-// *   if the output is signed, num_bits = 8, [input_min, input_max] = [-10.0,
-//     10.0]: it would use a scale_factor of 127 / 10.0 = 12.7 In this case, it
-//     would update input_min to be 128.0 / 12.7 = -10.07874
-// *   if the output is unsigned, input_min is forced to be 0, and only the
-//     specified input_max is used.
-//
-// After determining the scale_factor and updating the input range, it applies the
-// following to each value in the 'input' tensor.
-//
-// output = round(clamp(value, input_min, input_max) * scale_factor) / scale_factor.
-//
-// The above round function rounds the value based on the given round_mode.
-//
-//
-// Arguments:
-//	input: Tensor to quantize and then dequantize.
-//	input_min: If `range_given == True`, this specifies the minimum input value that needs to
-// be represented, otherwise it is determined from the min value of the `input`
-// tensor.
-//	input_max: If `range_given == True`, this specifies the maximum input value that needs to
-// be represented, otherwise it is determined from the max value of the `input`
-// tensor.
-func QuantizeAndDequantizeV2(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, optional ...QuantizeAndDequantizeV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizeAndDequantizeV2",
-		Input: []tf.Input{
-			input, input_min, input_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Bitcasts a tensor from one type to another without copying data.
 //
 // Given a tensor `input`, this operation returns a tensor that has the same buffer
@@ -782,49 +649,6 @@
 	return op.Output(0)
 }
 
-// Extract `patches` from `images` and put them in the "depth" output dimension.
-//
-// Arguments:
-//	images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
-//	ksizes: The size of the sliding window for each dimension of `images`.
-//	strides: 1-D of length 4. How far the centers of two consecutive patches are in
-// the images. Must be: `[1, stride_rows, stride_cols, 1]`.
-//	rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
-// input stride, specifying how far two consecutive patch samples are in the
-// input. Equivalent to extracting patches with
-// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
-// subsampling them spatially by a factor of `rates`. This is equivalent to
-// `rate` in dilated (a.k.a. Atrous) convolutions.
-//	padding: The type of padding algorithm to use.
-//
-// We specify the size-related attributes as:
-//
-// ```python
-//       ksizes = [1, ksize_rows, ksize_cols, 1]
-//       strides = [1, strides_rows, strides_cols, 1]
-//       rates = [1, rates_rows, rates_cols, 1]
-// ```
-//
-// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows *
-// ksize_cols * depth]` containing image patches with size
-// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note
-// `out_rows` and `out_cols` are the dimensions of the output patches.
-func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "ExtractImagePatches",
-		Input: []tf.Input{
-			images,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // SpaceToDepthAttr is an optional argument to SpaceToDepth.
 type SpaceToDepthAttr func(optionalAttr)
 
@@ -1195,65 +1019,6 @@
 	return op.Output(0)
 }
 
-// ListDiffAttr is an optional argument to ListDiff.
-type ListDiffAttr func(optionalAttr)
-
-// ListDiffOutIdx sets the optional out_idx attribute to value.
-// If not specified, defaults to DT_INT32
-func ListDiffOutIdx(value tf.DataType) ListDiffAttr {
-	return func(m optionalAttr) {
-		m["out_idx"] = value
-	}
-}
-
-// Computes the difference between two lists of numbers or strings.
-//
-// Given a list `x` and a list `y`, this operation returns a list `out` that
-// represents all values that are in `x` but not in `y`. The returned list `out`
-// is sorted in the same order that the numbers appear in `x` (duplicates are
-// preserved). This operation also returns a list `idx` that represents the
-// position of each `out` element in `x`. In other words:
-//
-// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]`
-//
-// For example, given this input:
-//
-// ```
-// x = [1, 2, 3, 4, 5, 6]
-// y = [1, 3, 5]
-// ```
-//
-// This operation would return:
-//
-// ```
-// out ==> [2, 4, 6]
-// idx ==> [1, 3, 5]
-// ```
-//
-// Arguments:
-//	x: 1-D. Values to keep.
-//	y: 1-D. Values to remove.
-//
-// Returns 1-D. Values present in `x` but not in `y`.1-D. Positions of `x` values preserved in `out`.
-func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ListDiff",
-		Input: []tf.Input{
-			x, y,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
 // Inserts a dimension of 1 into a tensor's shape.
 //
 // Given a tensor `input`, this operation inserts a dimension of 1 at the
@@ -1603,78 +1368,6 @@
 	return op.Output(0)
 }
 
-// StridedSliceGradAttr is an optional argument to StridedSliceGrad.
-type StridedSliceGradAttr func(optionalAttr)
-
-// StridedSliceGradBeginMask sets the optional begin_mask attribute to value.
-// If not specified, defaults to 0
-func StridedSliceGradBeginMask(value int64) StridedSliceGradAttr {
-	return func(m optionalAttr) {
-		m["begin_mask"] = value
-	}
-}
-
-// StridedSliceGradEndMask sets the optional end_mask attribute to value.
-// If not specified, defaults to 0
-func StridedSliceGradEndMask(value int64) StridedSliceGradAttr {
-	return func(m optionalAttr) {
-		m["end_mask"] = value
-	}
-}
-
-// StridedSliceGradEllipsisMask sets the optional ellipsis_mask attribute to value.
-// If not specified, defaults to 0
-func StridedSliceGradEllipsisMask(value int64) StridedSliceGradAttr {
-	return func(m optionalAttr) {
-		m["ellipsis_mask"] = value
-	}
-}
-
-// StridedSliceGradNewAxisMask sets the optional new_axis_mask attribute to value.
-// If not specified, defaults to 0
-func StridedSliceGradNewAxisMask(value int64) StridedSliceGradAttr {
-	return func(m optionalAttr) {
-		m["new_axis_mask"] = value
-	}
-}
-
-// StridedSliceGradShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
-// If not specified, defaults to 0
-func StridedSliceGradShrinkAxisMask(value int64) StridedSliceGradAttr {
-	return func(m optionalAttr) {
-		m["shrink_axis_mask"] = value
-	}
-}
-
-// Returns the gradient of `StridedSlice`.
-//
-// Since `StridedSlice` cuts out pieces of its `input` which is size
-// `shape`, its gradient will have the same shape (which is passed here
-// as `shape`). The gradient will be zero in any element that the slice
-// does not select.
-//
-// Arguments are the same as StridedSliceGrad with the exception that
-// `dy` is the input gradient to be propagated and `shape` is the
-// shape of `StridedSlice`'s `input`.
-func StridedSliceGrad(scope *Scope, shape tf.Output, begin tf.Output, end tf.Output, strides tf.Output, dy tf.Output, optional ...StridedSliceGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StridedSliceGrad",
-		Input: []tf.Input{
-			shape, begin, end, strides, dy,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // StridedSliceAttr is an optional argument to StridedSlice.
 type StridedSliceAttr func(optionalAttr)
 
@@ -1868,37 +1561,6 @@
 	return op.Output(0)
 }
 
-// Return a slice from 'input'.
-//
-// The output tensor is a tensor with dimensions described by 'size'
-// whose values are extracted from 'input' starting at the offsets in
-// 'begin'.
-//
-// *Requirements*:
-//   0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)
-//
-// Arguments:
-//
-//	begin: begin[i] specifies the offset into the 'i'th dimension of
-// 'input' to slice from.
-//	size: size[i] specifies the number of elements of the 'i'th dimension
-// of 'input' to slice. If size[i] is -1, all remaining elements in dimension
-// i are included in the slice (i.e. this is equivalent to setting
-// size[i] = input.dim_size(i) - begin[i]).
-func Slice(scope *Scope, input tf.Output, begin tf.Output, size tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Slice",
-		Input: []tf.Input{
-			input, begin, size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // SizeAttr is an optional argument to Size.
 type SizeAttr func(optionalAttr)
 
@@ -3290,30 +2952,6 @@
 	return op.Output(0)
 }
 
-//     Updates specified rows with values in `v`.
-//
-//     Computes `x[i, :] = v; return x`.
-//
-// Arguments:
-//	x: A tensor of type `T`.
-//	i: A vector. Indices into the left-most dimension of `x`.
-//	v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size.
-//
-// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`.
-func InplaceUpdate(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "InplaceUpdate",
-		Input: []tf.Input{
-			x, i, v,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Makes a copy of `x`.
 //
 // Arguments:
@@ -4434,81 +4072,6 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping.
-type GenerateVocabRemappingAttr func(optionalAttr)
-
-// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value.
-//
-// value: Number of entries in the old vocab file to consider.  If -1,
-// use the entire old vocabulary.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr {
-	return func(m optionalAttr) {
-		m["old_vocab_size"] = value
-	}
-}
-
-// Given a path to new and old vocabulary files, returns a remapping Tensor of
-//
-// length `num_new_vocab`, where `remapping[i]` contains the row number in the old
-// vocabulary that corresponds to row `i` in the new vocabulary (starting at line
-// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
-// in the new vocabulary is not in the old vocabulary.  The old vocabulary is
-// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
-// default value of -1.
-//
-// `num_vocab_offset` enables
-// use in the partitioned variable case, and should generally be set through
-// examining partitioning info.  The format of the files should be a text file,
-// with each line containing a single entity within the vocabulary.
-//
-// For example, with `new_vocab_file` a text file containing each of the following
-// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],
-// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be
-// `[0, -1, 2]`.
-//
-// The op also returns a count of how many entries in the new vocabulary
-// were present in the old vocabulary, which is used to calculate the number of
-// values to initialize in a weight matrix remapping
-//
-// This functionality can be used to remap both row vocabularies (typically,
-// features) and column vocabularies (typically, classes) from TensorFlow
-// checkpoints.  Note that the partitioning logic relies on contiguous vocabularies
-// corresponding to div-partitioned variables.  Moreover, the underlying remapping
-// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should
-// use the corresponding index_table_from_file() as the FeatureColumn framework
-// does (as opposed to tf.feature_to_id(), which uses a CuckooTable).
-//
-// Arguments:
-//	new_vocab_file: Path to the new vocab file.
-//	old_vocab_file: Path to the old vocab file.
-//	new_vocab_offset: How many entries into the new vocab file to start reading.
-//	num_new_vocab: Number of entries in the new vocab file to remap.
-//
-// Returns A Tensor of length num_new_vocab where the element at index i
-// is equal to the old ID that maps to the new ID i.  This element is -1 for any
-// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab.
-func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "GenerateVocabRemapping",
-		Input: []tf.Input{
-			new_vocab_file, old_vocab_file,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
 // Broadcasts a tensor value to one or more other devices.
 func CollectiveBcastSend(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) {
 	if scope.Err() != nil {
@@ -4526,6 +4089,23 @@
 	return op.Output(0)
 }
 
+// Mutually accumulates multiple tensors of identical type and shape.
+func CollectiveGather(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
+	opspec := tf.OpSpec{
+		Type: "CollectiveGather",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // CollectiveReduceAttr is an optional argument to CollectiveReduce.
 type CollectiveReduceAttr func(optionalAttr)
 
@@ -6036,77 +5616,6 @@
 	return op.Output(0)
 }
 
-// MapUnstageAttr is an optional argument to MapUnstage.
-type MapUnstageAttr func(optionalAttr)
-
-// MapUnstageCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapUnstageCapacity(value int64) MapUnstageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapUnstageMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapUnstageMemoryLimit(value int64) MapUnstageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// MapUnstageContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapUnstageContainer(value string) MapUnstageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MapUnstageSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapUnstageSharedName(value string) MapUnstageAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes and returns the values associated with the key
-//
-// from the underlying container.   If the underlying container
-// does not contain this key, the op will block until it does.
-func MapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageAttr) (values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapUnstage",
-		Input: []tf.Input{
-			key, indices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("MapUnstage", err)
-		return
-	}
-	return values
-}
-
 // MapPeekAttr is an optional argument to MapPeek.
 type MapPeekAttr func(optionalAttr)
 
@@ -6675,85 +6184,92 @@
 	return op.Output(0), op.Output(1)
 }
 
-// EditDistanceAttr is an optional argument to EditDistance.
-type EditDistanceAttr func(optionalAttr)
+// Deprecated. Use TensorArrayGradV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3
+func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayWriteV2",
+		Input: []tf.Input{
+			handle, index, value, flow_in,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// EditDistanceNormalize sets the optional normalize attribute to value.
+// Deprecated. Use TensorArrayGradV3
 //
-// value: boolean (if true, edit distances are normalized by length of truth).
-//
-// The output is:
+// DEPRECATED at GraphDef version 26: Use TensorArrayGradV3
+func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"source": source}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayGradV2",
+		Input: []tf.Input{
+			handle, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TensorArrayV2Attr is an optional argument to TensorArrayV2.
+type TensorArrayV2Attr func(optionalAttr)
+
+// TensorArrayV2ElementShape sets the optional element_shape attribute to value.
+// If not specified, defaults to <unknown_rank:true >
+func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["element_shape"] = value
+	}
+}
+
+// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value.
+// If not specified, defaults to false
+func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["dynamic_size"] = value
+	}
+}
+
+// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value.
 // If not specified, defaults to true
-func EditDistanceNormalize(value bool) EditDistanceAttr {
+func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr {
 	return func(m optionalAttr) {
-		m["normalize"] = value
+		m["clear_after_read"] = value
 	}
 }
 
-// Computes the (possibly normalized) Levenshtein Edit Distance.
+// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value.
+// If not specified, defaults to ""
+func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["tensor_array_name"] = value
+	}
+}
+
+// Deprecated. Use TensorArrayV3
 //
-// The inputs are variable-length sequences provided by SparseTensors
-//   (hypothesis_indices, hypothesis_values, hypothesis_shape)
-// and
-//   (truth_indices, truth_values, truth_shape).
-//
-// The inputs are:
-//
-// Arguments:
-//	hypothesis_indices: The indices of the hypothesis list SparseTensor.
-// This is an N x R int64 matrix.
-//	hypothesis_values: The values of the hypothesis list SparseTensor.
-// This is an N-length vector.
-//	hypothesis_shape: The shape of the hypothesis list SparseTensor.
-// This is an R-length vector.
-//	truth_indices: The indices of the truth list SparseTensor.
-// This is an M x R int64 matrix.
-//	truth_values: The values of the truth list SparseTensor.
-// This is an M-length vector.
-//	truth_shape: truth indices, vector.
-//
-// Returns A dense float tensor with rank R - 1.
-//
-// For the example input:
-//
-//     // hypothesis represents a 2x1 matrix with variable-length values:
-//     //   (0,0) = ["a"]
-//     //   (1,0) = ["b"]
-//     hypothesis_indices = [[0, 0, 0],
-//                           [1, 0, 0]]
-//     hypothesis_values = ["a", "b"]
-//     hypothesis_shape = [2, 1, 1]
-//
-//     // truth represents a 2x2 matrix with variable-length values:
-//     //   (0,0) = []
-//     //   (0,1) = ["a"]
-//     //   (1,0) = ["b", "c"]
-//     //   (1,1) = ["a"]
-//     truth_indices = [[0, 1, 0],
-//                      [1, 0, 0],
-//                      [1, 0, 1],
-//                      [1, 1, 0]]
-//     truth_values = ["a", "b", "c", "a"]
-//     truth_shape = [2, 2, 2]
-//     normalize = true
-//
-// The output will be:
-//
-//     // output is a 2x2 matrix with edit distances normalized by truth lengths.
-//     output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
-//               [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
-func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) {
+// DEPRECATED at GraphDef version 26: Use TensorArrayV3
+func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "EditDistance",
+		Type: "TensorArrayV2",
 		Input: []tf.Input{
-			hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape,
+			size,
 		},
 		Attrs: attrs,
 	}
@@ -6761,84 +6277,82 @@
 	return op.Output(0)
 }
 
-// Returns 0 if x == 0, and x * log(y) otherwise, elementwise.
-func Xlogy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Split the data from the input value into TensorArray elements.
+//
+// Assuming that `lengths` takes on values
+//
+//   ```(n0, n1, ..., n(T-1))```
+//
+// and that `value` has shape
+//
+//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```,
+//
+// this splits values into a TensorArray with T tensors.
+//
+// TensorArray index t will be the subtensor of values with starting position
+//
+//   ```(n0 + n1 + ... + n(t-1), 0, 0, ...)```
+//
+// and having size
+//
+//   ```nt x d0 x d1 x ...```
+//
+// Arguments:
+//	handle: The handle to a TensorArray.
+//	value: The concatenated tensor to write to the TensorArray.
+//	lengths: The vector of lengths, how to split the rows of value into the
+// TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//
+// Returns A float scalar that enforces proper chaining of operations.
+func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Xlogy",
+		Type: "TensorArraySplitV3",
 		Input: []tf.Input{
-			x, y,
+			handle, value, lengths, flow_in,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
-type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
+// EmptyAttr is an optional argument to Empty.
+type EmptyAttr func(optionalAttr)
 
-// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
+// EmptyInit sets the optional init attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
+// value: If True, initialize the returned tensor with the default value of dtype.  Otherwise, the implementation is free not to initializethe tensor's content.
+// If not specified, defaults to false
+func EmptyInit(value bool) EmptyAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["init"] = value
 	}
 }
 
-// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value.
+// Creates a tensor with the given shape.
 //
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of depthwise convolution with respect to the input.
+// This operation creates a tensor of `shape` and `dtype`.
 //
 // Arguments:
-//	input_sizes: An integer vector representing the shape of `input`, based
-// on `data_format`.  For example, if `data_format` is 'NHWC' then
-//  `input` is a 4-D `[batch, height, width, channels]` tensor.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
-//	out_backprop: 4-D with shape  based on `data_format`.
-// For example, if `data_format` is 'NHWC' then
-// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution.
-//	padding: The type of padding algorithm to use.
+//	shape: 1-D. Represents the shape of the output tensor.
 //
-// Returns 4-D with shape according to `data_format`.  For example, if
-// `data_format` is 'NHWC', output shape is `[batch, in_height,
-// in_width, in_channels]`.  Gradient w.r.t. the input of the
-// convolution.
-func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
+//
+// Returns A `Tensor` of type `T`.
+func Empty(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...EmptyAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNativeBackpropInput",
+		Type: "Empty",
 		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
+			shape,
 		},
 		Attrs: attrs,
 	}
@@ -6846,1017 +6360,240 @@
 	return op.Output(0)
 }
 
-// Returns x / y element-wise.
+// TensorArrayConcatV3Attr is an optional argument to TensorArrayConcatV3.
+type TensorArrayConcatV3Attr func(optionalAttr)
+
+// TensorArrayConcatV3ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
 //
-// *NOTE*: `Div` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: The expected shape of an element, if known,
+// excluding the first dimension. Used to validate the shapes of
+// TensorArray elements. If this shape is not fully specified, concatenating
+// zero-size TensorArrays is an error.
+// If not specified, defaults to <unknown_rank:true >
+func TensorArrayConcatV3ElementShapeExcept0(value tf.Shape) TensorArrayConcatV3Attr {
+	return func(m optionalAttr) {
+		m["element_shape_except0"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Div",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Selects the k nearest centers for each point.
+// Concat the elements from the TensorArray into value `value`.
 //
-// Rows of points are assumed to be input points. Rows of centers are assumed to be
-// the list of candidate centers. For each point, the k centers that have least L2
-// distance to it are computed.
+// Takes `T` elements of shapes
+//
+//   ```
+//   (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...)
+//   ```
+//
+// and concatenates them into a Tensor of shape:
+//
+//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```
+//
+// All elements must have the same shape (excepting the first dimension).
 //
 // Arguments:
-//	points: Matrix of shape (n, d). Rows are assumed to be input points.
-//	centers: Matrix of shape (m, d). Rows are assumed to be centers.
-//	k: Number of nearest centers to return for each point. If k is larger than m, then
-// only m centers are returned.
+//	handle: The handle to a TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	dtype: The type of the elem that is returned.
 //
-// Returns Matrix of shape (n, min(m, k)). Each row contains the indices of the centers
-// closest to the corresponding point, ordered by increasing distance.Matrix of shape (n, min(m, k)). Each row contains the squared L2 distance to the
-// corresponding center in nearest_center_indices.
-func NearestNeighbors(scope *Scope, points tf.Output, centers tf.Output, k tf.Output) (nearest_center_indices tf.Output, nearest_center_distances tf.Output) {
+// Returns All of the elements in the TensorArray, concatenated along the first
+// axis.A vector of the row sizes of the original T elements in the
+// value output.  In the example above, this would be the values:
+// `(n1, n2, ..., n(T-1))`.
+func TensorArrayConcatV3(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV3Attr) (value tf.Output, lengths tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "NearestNeighbors",
+		Type: "TensorArrayConcatV3",
 		Input: []tf.Input{
-			points, centers, k,
+			handle, flow_in,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1)
 }
 
-// Returns x * y element-wise.
+// Scatter the data from the input value into specific TensorArray elements.
 //
-// *NOTE*: `Multiply` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Mul",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// BiasAddAttr is an optional argument to BiasAdd.
-type BiasAddAttr func(optionalAttr)
-
-// BiasAddDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the bias tensor will be added to the last dimension
-// of the value tensor.
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// The tensor will be added to "in_channels", the third-to-the-last
-//     dimension.
-// If not specified, defaults to "NHWC"
-func BiasAddDataFormat(value string) BiasAddAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Adds `bias` to `value`.
-//
-// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
-// Broadcasting is supported, so `value` may have any number of dimensions.
+// `indices` must be a vector, its length must match the first dim of `value`.
 //
 // Arguments:
-//	value: Any number of dimensions.
-//	bias: 1-D with size the last dimension of `value`.
+//	handle: The handle to a TensorArray.
+//	indices: The locations at which to write the tensor elements.
+//	value: The concatenated tensor to write to the TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
 //
-// Returns Broadcasted sum of `value` and `bias`.
-func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) {
+// Returns A float scalar that enforces proper chaining of operations.
+func TensorArrayScatterV3(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "BiasAdd",
+		Type: "TensorArrayScatterV3",
 		Input: []tf.Input{
-			value, bias,
+			handle, indices, value, flow_in,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse.
-type SparseReduceSumSparseAttr func(optionalAttr)
+// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3.
+type TensorArrayGatherV3Attr func(optionalAttr)
 
-// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value.
+// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value.
 //
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr {
+// value: The expected shape of an element, if known. Used to
+// validate the shapes of TensorArray elements. If this shape is not
+// fully specified, gathering zero-size TensorArrays is an error.
+// If not specified, defaults to <unknown_rank:true >
+func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr {
 	return func(m optionalAttr) {
-		m["keep_dims"] = value
+		m["element_shape"] = value
 	}
 }
 
-// Computes the sum of elements across dimensions of a SparseTensor.
+// Gather specific elements from the TensorArray into output `value`.
 //
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a
-// SparseTensor.
-//
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
-//
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
+// All elements selected by `indices` must have the same shape.
 //
 // Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseReduceSumSparse",
-		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler.
-type AllCandidateSamplerAttr func(optionalAttr)
-
-// AllCandidateSamplerSeed sets the optional seed attribute to value.
+//	handle: The handle to a TensorArray.
+//	indices: The locations in the TensorArray from which to read tensor elements.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	dtype: The type of the elem that is returned.
 //
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Generates labels for candidate sampling with a learned unigram distribution.
-//
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
-//
-// Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to produce.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AllCandidateSampler",
-		Input: []tf.Input{
-			true_classes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Returns x + y element-wise.
-//
-// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AddV2",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns an element-wise indication of the sign of a number.
-//
-// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
-//
-// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
-func Sign(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sign",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that passes a sliding window over `input_dataset`.
-//
-// Arguments:
-//
-//	window_size: A scalar representing the number of elements in the
-// sliding window.
-//	window_shift: A scalar representing the steps moving the sliding window
-// forward in one iteration. It must be positive.
-//	window_stride: A scalar representing the stride of the input elements of the sliding window.
-// It must be positive.
-//
-//
-func ExperimentalSlidingWindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, window_shift tf.Output, window_stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalSlidingWindowDataset",
-		Input: []tf.Input{
-			input_dataset, window_size, window_shift, window_stride,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns which elements of x are finite.
-//
-// @compatibility(numpy)
-// Equivalent to np.isfinite
-// @end_compatibility
-func IsFinite(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IsFinite",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
-type ResourceStridedSliceAssignAttr func(optionalAttr)
-
-// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr {
-	return func(m optionalAttr) {
-		m["begin_mask"] = value
-	}
-}
-
-// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr {
-	return func(m optionalAttr) {
-		m["end_mask"] = value
-	}
-}
-
-// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr {
-	return func(m optionalAttr) {
-		m["ellipsis_mask"] = value
-	}
-}
-
-// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr {
-	return func(m optionalAttr) {
-		m["new_axis_mask"] = value
-	}
-}
-
-// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr {
-	return func(m optionalAttr) {
-		m["shrink_axis_mask"] = value
-	}
-}
-
-// Assign `value` to the sliced l-value reference of `ref`.
-//
-// The values of `value` are assigned to the positions in the variable
-// `ref` that are selected by the slice parameters. The slice parameters
-// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
-//
-// NOTE this op currently does not support broadcasting and so `value`'s
-// shape must be exactly the shape produced by the slice of `ref`.
-//
-// Returns the created operation.
-func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceStridedSliceAssign",
-		Input: []tf.Input{
-			ref, begin, end, strides, value,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ArgMaxAttr is an optional argument to ArgMax.
-type ArgMaxAttr func(optionalAttr)
-
-// ArgMaxOutputType sets the optional output_type attribute to value.
-// If not specified, defaults to DT_INT64
-func ArgMaxOutputType(value tf.DataType) ArgMaxAttr {
-	return func(m optionalAttr) {
-		m["output_type"] = value
-	}
-}
-
-// Returns the index with the largest value across dimensions of a tensor.
-//
-// Note that in case of ties the identity of the return value is not guaranteed.
-//
-// Arguments:
-//
-//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-// Describes which dimension of the input Tensor to reduce across. For vectors,
-// use dimension = 0.
-func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ArgMax",
-		Input: []tf.Input{
-			input, dimension,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// PreventGradientAttr is an optional argument to PreventGradient.
-type PreventGradientAttr func(optionalAttr)
-
-// PreventGradientMessage sets the optional message attribute to value.
-//
-// value: Will be printed in the error when anyone tries to differentiate
-// this operation.
-// If not specified, defaults to ""
-func PreventGradientMessage(value string) PreventGradientAttr {
-	return func(m optionalAttr) {
-		m["message"] = value
-	}
-}
-
-// An identity op that triggers an error if a gradient is requested.
-//
-// When executed in a graph, this op outputs its input tensor as-is.
-//
-// When building ops to compute gradients, the TensorFlow gradient system
-// will return an error when trying to lookup the gradient of this op,
-// because no gradient must ever be registered for this function.  This
-// op exists to prevent subtle bugs from silently returning unimplemented
-// gradients in some corner cases.
-//
-// Arguments:
-//	input: any tensor.
-//
-// Returns the same input tensor.
-func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PreventGradient",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes asin of x element-wise.
-func Asin(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Asin",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SparseToDenseAttr is an optional argument to SparseToDense.
-type SparseToDenseAttr func(optionalAttr)
-
-// SparseToDenseValidateIndices sets the optional validate_indices attribute to value.
-//
-// value: If true, indices are checked to make sure they are sorted in
-// lexicographic order and that there are no repeats.
-// If not specified, defaults to true
-func SparseToDenseValidateIndices(value bool) SparseToDenseAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Converts a sparse representation into a dense tensor.
-//
-// Builds an array `dense` with shape `output_shape` such that
-//
-// ```
-// # If sparse_indices is scalar
-// dense[i] = (i == sparse_indices ? sparse_values : default_value)
-//
-// # If sparse_indices is a vector, then for each i
-// dense[sparse_indices[i]] = sparse_values[i]
-//
-// # If sparse_indices is an n by d matrix, then for each i in [0, n)
-// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
-// ```
-//
-// All other values in `dense` are set to `default_value`.  If `sparse_values` is a
-// scalar, all sparse indices are set to this single value.
-//
-// Indices should be sorted in lexicographic order, and indices must not
-// contain any repeats. If `validate_indices` is true, these properties
-// are checked during execution.
-//
-// Arguments:
-//	sparse_indices: 0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete
-// index where `sparse_values[i]` will be placed.
-//	output_shape: 1-D.  Shape of the dense output tensor.
-//	sparse_values: 1-D.  Values corresponding to each row of `sparse_indices`,
-// or a scalar value to be used for all sparse indices.
-//	default_value: Scalar value to set for indices not specified in
-// `sparse_indices`.
-//
-// Returns Dense output tensor of shape `output_shape`.
-func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseToDense",
-		Input: []tf.Input{
-			sparse_indices, output_shape, sparse_values, default_value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the sum along sparse segments of a tensor.
-//
-// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
-// misisng, the `output` tensor at that position will be zeroed.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/sparse#Segmentation)
-// for an explanation of segments.
-//
-// For example:
-//
-// ```python
-// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
-//
-// tf.sparse_segment_sum_with_num_segments(
-//     c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
-// # => [[0 0 0 0]
-// #     [0 0 0 0]
-// #     [0 0 0 0]]
-//
-// tf.sparse_segment_sum_with_num_segments(c,
-//                                         tf.constant([0, 1]),
-//                                         tf.constant([0, 2],
-//                                         num_segments=4))
-// # => [[ 1  2  3  4]
-// #     [ 0  0  0  0]
-// #     [-1 -2 -3 -4]
-// #     [ 0  0  0  0]]
-// ```
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-//	num_segments: Should equal the number of distinct segment IDs.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `num_segments`.
-func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentSumWithNumSegments",
-		Input: []tf.Input{
-			data, indices, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the determinant of one or more square matrices.
-//
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. The output is a tensor containing the determinants
-// for all input submatrices `[..., :, :]`.
-//
-// Arguments:
-//	input: Shape is `[..., M, M]`.
-//
-// Returns Shape is `[...]`.
-func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixDeterminant",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes sin of x element-wise.
-func Sin(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sin",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes Psi, the derivative of Lgamma (the log of the absolute value of
-//
-// `Gamma(x)`), element-wise.
-func Digamma(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Digamma",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter.
-type Conv2DBackpropFilterAttr func(optionalAttr)
-
-// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
-// If not specified, defaults to true
-func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["use_cudnn_on_gpu"] = value
-	}
-}
-
-// Conv2DBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value.
-//
-// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
-// dimension, the amount of padding inserted before and after the dimension is
-// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
-// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
-// If not specified, defaults to <>
-func Conv2DBackpropFilterExplicitPaddings(value []int64) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["explicit_paddings"] = value
-	}
-}
-
-// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Conv2DBackpropFilterDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of convolution with respect to the filter.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 4-D
-// `[filter_height, filter_width, in_channels, out_channels]` tensor.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution. Must be in the same order as the dimension specified with
-// format.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
-// the `filter` input of the convolution.
-func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Conv2DBackpropFilter",
-		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the number of work units this Reader has finished processing.
-//
-// Arguments:
-//	reader_handle: Handle to a Reader.
-func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderNumWorkUnitsCompletedV2",
-		Input: []tf.Input{
-			reader_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that contains the elements of `input_dataset` ignoring errors.
-func ExperimentalIgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalIgnoreErrorsDataset",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the log of the absolute value of `Gamma(x)` element-wise.
-func Lgamma(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Lgamma",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the reverse mode backpropagated gradient of the Cholesky algorithm.
-//
-// For an explanation see "Differentiation of the Cholesky algorithm" by
-// Iain Murray http://arxiv.org/abs/1602.07527.
-//
-// Arguments:
-//	l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`.
-// Algorithm depends only on lower triangular part of the innermost matrices of
-// this tensor.
-//	grad: df/dl where f is some scalar function. Shape is `[..., M, M]`.
-// Algorithm depends only on lower triangular part of the innermost matrices of
-// this tensor.
-//
-// Returns Symmetrized version of df/dA . Shape is `[..., M, M]`
-func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "CholeskyGrad",
-		Input: []tf.Input{
-			l, grad,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that emits each dim-0 slice of `components` once.
-func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "TensorSliceDataset",
-		Input: []tf.Input{
-			tf.OutputList(components),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the index of a data point that should be added to the seed set.
-//
-// Entries in distances are assumed to be squared distances of candidate points to
-// the already sampled centers in the seed set. The op constructs one Markov chain
-// of the k-MC^2 algorithm and returns the index of one candidate point to be added
-// as an additional cluster center.
-//
-// Arguments:
-//	distances: Vector with squared distances to the closest previously sampled cluster center
-// for each candidate point.
-//	seed: Scalar. Seed for initializing the random number generator.
-//
-// Returns Scalar with the index of the sampled point.
-func KMC2ChainInitialization(scope *Scope, distances tf.Output, seed tf.Output) (index tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "KMC2ChainInitialization",
-		Input: []tf.Input{
-			distances, seed,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes hyperbolic sine of x element-wise.
-func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sinh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the sum along sparse segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
-// dimension, selecting a subset of dimension 0, specified by `indices`.
-//
-// For example:
-//
-// ```python
-// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
-//
-// # Select two rows, one segment.
-// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
-// # => [[0 0 0 0]]
-//
-// # Select two rows, two segment.
-// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
-// # => [[ 1  2  3  4]
-// #     [-1 -2 -3 -4]]
-//
-// # Select all rows, two segments.
-// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
-// # => [[0 0 0 0]
-// #     [5 6 7 8]]
-//
-// # Which is equivalent to:
-// tf.segment_sum(c, tf.constant([0, 0, 1]))
-// ```
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentSum",
-		Input: []tf.Input{
-			data, indices, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes natural logarithm of x element-wise.
-//
-// I.e., \\(y = \log_e x\\).
-func Log(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Log",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Rounds the values of a tensor to the nearest integer, element-wise.
-//
-// Rounds half to even.  Also known as bankers rounding. If you want to round
-// according to the current system rounding mode use std::cint.
-func Round(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Round",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes reciprocal of square root of x element-wise.
-//
-// I.e., \\(y = 1 / \sqrt{x}\\).
-func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Rsqrt",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Get the value of the tensor specified by its handle.
-//
-// Arguments:
-//	handle: The handle for a tensor stored in the session state.
-//	dtype: The type of the output value.
-//
-// Returns The tensor for the given handle.
-func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) {
+// Returns All of the elements in the TensorArray, concatenated along a new
+// axis (the new dimension 0).
+func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "GetSessionTensor",
+		Type: "TensorArrayGatherV3",
+		Input: []tf.Input{
+			handle, indices, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a TensorArray for storing multiple gradients of values in the given handle.
+//
+// Similar to TensorArrayGradV3. However it creates an accumulator with an
+// expanded shape compared to the input TensorArray whose gradient is being
+// computed. This enables multiple gradients for the same TensorArray to be
+// calculated using the same accumulator.
+//
+// Arguments:
+//	handle: The handle to the forward TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	shape_to_prepend: An int32 vector representing a shape. Elements in the gradient accumulator will
+// have shape which is this shape_to_prepend value concatenated with shape of the
+// elements in the TensorArray corresponding to the input handle.
+//	source: The gradient source string, used to decide which gradient TensorArray
+// to return.
+func TensorArrayGradWithShape(scope *Scope, handle tf.Output, flow_in tf.Output, shape_to_prepend tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"source": source}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayGradWithShape",
+		Input: []tf.Input{
+			handle, flow_in, shape_to_prepend,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Creates a TensorArray for storing the gradients of values in the given handle.
+//
+// If the given TensorArray gradient already exists, returns a reference to it.
+//
+// Locks the size of the original TensorArray by disabling its dynamic size flag.
+//
+// **A note about the input flow_in:**
+//
+// The handle flow_in forces the execution of the gradient lookup to occur
+// only after certain other operations have occurred.  For example, when
+// the forward TensorArray is dynamically sized, writes to this TensorArray
+// may resize the object.  The gradient TensorArray is statically sized based
+// on the size of the forward TensorArray when this operation executes.
+// Furthermore, the size of the forward TensorArray is frozen by this call.
+// As a result, the flow is used to ensure that the call to generate the gradient
+// TensorArray only happens after all writes are executed.
+//
+// In the case of dynamically sized TensorArrays, gradient computation should
+// only be performed on read operations that have themselves been chained via
+// flow to occur only after all writes have executed. That way the final size
+// of the forward TensorArray is known when this operation is called.
+//
+// **A note about the source attribute:**
+//
+// TensorArray gradient calls use an accumulator TensorArray object.  If
+// multiple gradients are calculated and run in the same session, the multiple
+// gradient nodes may accidentally flow through the same accumulator TensorArray.
+// This double counts and generally breaks the TensorArray gradient flow.
+//
+// The solution is to identify which gradient call this particular
+// TensorArray gradient is being called in.  This is performed by identifying
+// a unique string (e.g. "gradients", "gradients_1", ...) from the input
+// gradient Tensor's name.  This string is used as a suffix when creating
+// the TensorArray gradient object here (the attribute `source`).
+//
+// The attribute `source` is added as a suffix to the forward TensorArray's
+// name when performing the creation / lookup, so that each separate gradient
+// calculation gets its own TensorArray accumulator.
+//
+// Arguments:
+//	handle: The handle to the forward TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	source: The gradient source string, used to decide which gradient TensorArray
+// to return.
+func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"source": source}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayGradV3",
+		Input: []tf.Input{
+			handle, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Pop the element at the top of the stack.
+//
+// Arguments:
+//	handle: The handle to a stack.
+//	elem_type: The type of the elem that is popped.
+//
+// Returns The tensor that is popped from the top of the stack.
+func StackPopV2(scope *Scope, handle tf.Output, elem_type tf.DataType) (elem tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"elem_type": elem_type}
+	opspec := tf.OpSpec{
+		Type: "StackPopV2",
 		Input: []tf.Input{
 			handle,
 		},
@@ -7866,58 +6603,117 @@
 	return op.Output(0)
 }
 
-// Computes the gradient for the sqrt of `x` wrt its input.
+// OneHotAttr is an optional argument to OneHot.
+type OneHotAttr func(optionalAttr)
+
+// OneHotAxis sets the optional axis attribute to value.
 //
-// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`
-// is the corresponding input gradient.
-func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SqrtGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MatrixInverseAttr is an optional argument to MatrixInverse.
-type MatrixInverseAttr func(optionalAttr)
-
-// MatrixInverseAdjoint sets the optional adjoint attribute to value.
-// If not specified, defaults to false
-func MatrixInverseAdjoint(value bool) MatrixInverseAttr {
+// value: The axis to fill (default: -1, a new inner-most axis).
+// If not specified, defaults to -1
+func OneHotAxis(value int64) OneHotAttr {
 	return func(m optionalAttr) {
-		m["adjoint"] = value
+		m["axis"] = value
 	}
 }
 
-// Computes the inverse of one or more square invertible matrices or their
+// Returns a one-hot tensor.
 //
-// adjoints (conjugate transposes).
+// The locations represented by indices in `indices` take value `on_value`,
+// while all other locations take value `off_value`.
 //
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. The output is a tensor of the same shape as the input
-// containing the inverse for all input submatrices `[..., :, :]`.
+// If the input `indices` is rank `N`, the output will have rank `N+1`,
+// The new axis is created at dimension `axis` (default: the new axis is
+// appended at the end).
 //
-// The op uses LU decomposition with partial pivoting to compute the inverses.
+// If `indices` is a scalar the output shape will be a vector of length `depth`.
 //
-// If a matrix is not invertible there is no guarantee what the op does. It
-// may detect the condition and raise an exception or it may simply return a
-// garbage result.
+// If `indices` is a vector of length `features`, the output shape will be:
+// ```
+//   features x depth if axis == -1
+//   depth x features if axis == 0
+// ```
+//
+// If `indices` is a matrix (batch) with shape `[batch, features]`,
+// the output shape will be:
+// ```
+//   batch x features x depth if axis == -1
+//   batch x depth x features if axis == 1
+//   depth x batch x features if axis == 0
+// ```
+//
+//
+// Examples
+// =========
+//
+// Suppose that
+// ```
+//   indices = [0, 2, -1, 1]
+//   depth = 3
+//   on_value = 5.0
+//   off_value = 0.0
+//   axis = -1
+// ```
+//
+// Then output is `[4 x 3]`:
+// ```
+// output =
+//   [5.0 0.0 0.0]  // one_hot(0)
+//   [0.0 0.0 5.0]  // one_hot(2)
+//   [0.0 0.0 0.0]  // one_hot(-1)
+//   [0.0 5.0 0.0]  // one_hot(1)
+// ```
+//
+// Suppose that
+// ```
+//   indices = [0, 2, -1, 1]
+//   depth = 3
+//   on_value = 0.0
+//   off_value = 3.0
+//   axis = 0
+// ```
+//
+// Then output is `[3 x 4]`:
+// ```
+// output =
+//   [0.0 3.0 3.0 3.0]
+//   [3.0 3.0 3.0 0.0]
+//   [3.0 3.0 3.0 3.0]
+//   [3.0 0.0 3.0 3.0]
+// //  ^                one_hot(0)
+// //      ^            one_hot(2)
+// //          ^        one_hot(-1)
+// //              ^    one_hot(1)
+// ```
+//
+// Suppose that
+// ```
+//   indices = [[0, 2], [1, -1]]
+//   depth = 3
+//   on_value = 1.0
+//   off_value = 0.0
+//   axis = -1
+// ```
+//
+// Then output is `[2 x 2 x 3]`:
+// ```
+// output =
+//   [
+//     [1.0, 0.0, 0.0]  // one_hot(0)
+//     [0.0, 0.0, 1.0]  // one_hot(2)
+//   ][
+//     [0.0, 1.0, 0.0]  // one_hot(1)
+//     [0.0, 0.0, 0.0]  // one_hot(-1)
+//   ]
+// ```
 //
 // Arguments:
-//	input: Shape is `[..., M, M]`.
+//	indices: A tensor of indices.
+//	depth: A scalar defining the depth of the one hot dimension.
+//	on_value: A scalar defining the value to fill in output when `indices[j] = i`.
+//	off_value: A scalar defining the value to fill in output when `indices[j] != i`.
 //
-// Returns Shape is `[..., M, M]`.
-//
-// @compatibility(numpy)
-// Equivalent to np.linalg.inv
-// @end_compatibility
-func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) {
+// Returns The one-hot tensor.
+func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -7926,7 +6722,157 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MatrixInverse",
+		Type: "OneHot",
+		Input: []tf.Input{
+			indices, depth, on_value, off_value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the number of elements in the given queue.
+//
+// Arguments:
+//	handle: The handle to a queue.
+//
+// Returns The number of elements in the given queue.
+func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueSizeV2",
+		Input: []tf.Input{
+			handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QueueDequeueManyV2Attr is an optional argument to QueueDequeueManyV2.
+type QueueDequeueManyV2Attr func(optionalAttr)
+
+// QueueDequeueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
+//
+// value: If the queue has fewer than n elements, this operation
+// will block for up to timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueDequeueManyV2TimeoutMs(value int64) QueueDequeueManyV2Attr {
+	return func(m optionalAttr) {
+		m["timeout_ms"] = value
+	}
+}
+
+// Dequeues `n` tuples of one or more tensors from the given queue.
+//
+// If the queue is closed and there are fewer than `n` elements, then an
+// OutOfRange error is returned.
+//
+// This operation concatenates queue-element component tensors along the
+// 0th dimension to make a single component tensor.  All of the components
+// in the dequeued tuple will have size `n` in the 0th dimension.
+//
+// This operation has `k` outputs, where `k` is the number of components in
+// the tuples stored in the given queue, and output `i` is the ith
+// component of the dequeued tuple.
+//
+// N.B. If the queue is empty, this operation will block until `n` elements
+// have been dequeued (or 'timeout_ms' elapses, if specified).
+//
+// Arguments:
+//	handle: The handle to a queue.
+//	n: The number of tuples to dequeue.
+//	component_types: The type of each component in a tuple.
+//
+// Returns One or more tensors that were dequeued as a tuple.
+func QueueDequeueManyV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueManyV2Attr) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueDequeueManyV2",
+		Input: []tf.Input{
+			handle, n,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("QueueDequeueManyV2", err)
+		return
+	}
+	return components
+}
+
+// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize.
+type QuantizeAndDequantizeAttr func(optionalAttr)
+
+// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["signed_input"] = value
+	}
+}
+
+// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value.
+// If not specified, defaults to false
+func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["range_given"] = value
+	}
+}
+
+// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value.
+// If not specified, defaults to 0
+func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["input_min"] = value
+	}
+}
+
+// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value.
+// If not specified, defaults to 0
+func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["input_max"] = value
+	}
+}
+
+// Use QuantizeAndDequantizeV2 instead.
+//
+// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2
+func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizeAndDequantize",
 		Input: []tf.Input{
 			input,
 		},
@@ -7936,190 +6882,254 @@
 	return op.Output(0)
 }
 
-// Returns x + y element-wise.
+// Returns locations of nonzero / true values in a tensor.
 //
-// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// This operation returns the coordinates of true elements in `condition`. The
+// coordinates are returned in a 2-D tensor where the first dimension (rows)
+// represents the number of true elements, and the second dimension (columns)
+// represents the coordinates of the true elements. Keep in mind, the shape of
+// the output tensor can vary depending on how many true values there are in
+// `condition`. Indices are output in row-major order.
+//
+// For example:
+//
+// ```
+// # 'input' tensor is [[True, False]
+// #                    [True, False]]
+// # 'input' has two true values, so output has two coordinates.
+// # 'input' has rank of 2, so coordinates have two indices.
+// where(input) ==> [[0, 0],
+//                   [1, 0]]
+//
+// # `condition` tensor is [[[True, False]
+// #                     [True, False]]
+// #                    [[False, True]
+// #                     [False, True]]
+// #                    [[False, False]
+// #                     [False, True]]]
+// # 'input' has 5 true values, so output has 5 coordinates.
+// # 'input' has rank of 3, so coordinates have three indices.
+// where(input) ==> [[0, 0, 0],
+//                   [0, 1, 0],
+//                   [1, 0, 1],
+//                   [1, 1, 1],
+//                   [2, 1, 1]]
+//
+// # `condition` tensor is [[[1.5,  0.0]
+// #                     [-0.5, 0.0]]
+// #                    [[0.0,  0.25]
+// #                     [0.0,  0.75]]
+// #                    [[0.0,  0.0]
+// #                     [0.0,  0.01]]]
+// # 'input' has 5 nonzero values, so output has 5 coordinates.
+// # 'input' has rank of 3, so coordinates have three indices.
+// where(input) ==> [[0, 0, 0],
+//                   [0, 1, 0],
+//                   [1, 0, 1],
+//                   [1, 1, 1],
+//                   [2, 1, 1]]
+//
+// # `condition` tensor is [[[1.5 + 0.0j, 0.0  + 0.0j]
+// #                     [0.0 + 0.5j, 0.0  + 0.0j]]
+// #                    [[0.0 + 0.0j, 0.25 + 1.5j]
+// #                     [0.0 + 0.0j, 0.75 + 0.0j]]
+// #                    [[0.0 + 0.0j, 0.0  + 0.0j]
+// #                     [0.0 + 0.0j, 0.01 + 0.0j]]]
+// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates.
+// # 'input' has rank of 3, so coordinates have three indices.
+// where(input) ==> [[0, 0, 0],
+//                   [0, 1, 0],
+//                   [1, 0, 1],
+//                   [1, 1, 1],
+//                   [2, 1, 1]]
+// ```
+func Where(scope *Scope, condition tf.Output) (index tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Add",
+		Type: "Where",
 		Input: []tf.Input{
-			x, y,
+			condition,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the derivative of a Gamma random sample w.r.t. `alpha`.
-func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
+// QueueDequeueV2Attr is an optional argument to QueueDequeueV2.
+type QueueDequeueV2Attr func(optionalAttr)
+
+// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value.
+//
+// value: If the queue is empty, this operation will block for up to
+// timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr {
+	return func(m optionalAttr) {
+		m["timeout_ms"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "RandomGammaGrad",
-		Input: []tf.Input{
-			alpha, sample,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes square of x element-wise.
+// Dequeues a tuple of one or more tensors from the given queue.
 //
-// I.e., \\(y = x * x = x^2\\).
-func Square(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Square",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise.
+// This operation has k outputs, where k is the number of components
+// in the tuples stored in the given queue, and output i is the ith
+// component of the dequeued tuple.
 //
-// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
-// ](http://arxiv.org/abs/1511.07289)
-func Elu(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Elu",
-		Input: []tf.Input{
-			features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the reciprocal of x element-wise.
-//
-// I.e., \\(y = 1 / x\\).
-func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Reciprocal",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns a batched matrix tensor with new batched diagonal values.
-//
-// Given `input` and `diagonal`, this operation returns a tensor with the
-// same shape and values as `input`, except for the main diagonal of the
-// innermost matrices.  These will be overwritten by the values in `diagonal`.
-//
-// The output is computed as follows:
-//
-// Assume `input` has `k+1` dimensions `[I, J, K, ..., M, N]` and `diagonal` has
-// `k` dimensions `[I, J, K, ..., min(M, N)]`.  Then the output is a
-// tensor of rank `k+1` with dimensions `[I, J, K, ..., M, N]` where:
-//
-//   * `output[i, j, k, ..., m, n] = diagonal[i, j, k, ..., n]` for `m == n`.
-//   * `output[i, j, k, ..., m, n] = input[i, j, k, ..., m, n]` for `m != n`.
+// N.B. If the queue is empty, this operation will block until an element
+// has been dequeued (or 'timeout_ms' elapses, if specified).
 //
 // Arguments:
-//	input: Rank `k+1`, where `k >= 1`.
-//	diagonal: Rank `k`, where `k >= 1`.
+//	handle: The handle to a queue.
+//	component_types: The type of each component in a tuple.
 //
-// Returns Rank `k+1`, with `output.shape = input.shape`.
-func MatrixSetDiag(scope *Scope, input tf.Output, diagonal tf.Output) (output tf.Output) {
+// Returns One or more tensors that were dequeued as a tuple.
+func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "MatrixSetDiag",
+		Type: "QueueDequeueV2",
 		Input: []tf.Input{
-			input, diagonal,
+			handle,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("QueueDequeueV2", err)
+		return
+	}
+	return components
 }
 
-// Returns the element-wise max of two SparseTensors.
+// QueueEnqueueV2Attr is an optional argument to QueueEnqueueV2.
+type QueueEnqueueV2Attr func(optionalAttr)
+
+// QueueEnqueueV2TimeoutMs sets the optional timeout_ms attribute to value.
 //
-// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
+// value: If the queue is full, this operation will block for up to
+// timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueEnqueueV2TimeoutMs(value int64) QueueEnqueueV2Attr {
+	return func(m optionalAttr) {
+		m["timeout_ms"] = value
+	}
+}
+
+// Enqueues a tuple of one or more tensors in the given queue.
+//
+// The components input has k elements, which correspond to the components of
+// tuples stored in the given queue.
+//
+// N.B. If the queue is full, this operation will block until the given
+// element has been enqueued (or 'timeout_ms' elapses, if specified).
 //
 // Arguments:
-//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, in the canonical lexicographic ordering.
-//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
-//	a_shape: 1-D.  Shape of the input SparseTensor.
-//	b_indices: counterpart to `a_indices` for the other operand.
-//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
-//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
+//	handle: The handle to a queue.
+//	components: One or more tensors from which the enqueued tensors should be taken.
 //
-// Returns 2-D.  The indices of the output SparseTensor.1-D.  The values of the output SparseTensor.
-func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+// Returns the created operation.
+func QueueEnqueueV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "SparseSparseMaximum",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
-		},
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	opspec := tf.OpSpec{
+		Type: "QueueEnqueueV2",
+		Input: []tf.Input{
+			handle, tf.OutputList(components),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
 }
 
-// Computes the reciprocal of x element-wise.
+// MfccAttr is an optional argument to Mfcc.
+type MfccAttr func(optionalAttr)
+
+// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value.
 //
-// I.e., \\(y = 1 / x\\).
-func Inv(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: The highest frequency to use when calculating the
+// ceptstrum.
+// If not specified, defaults to 4000
+func MfccUpperFrequencyLimit(value float32) MfccAttr {
+	return func(m optionalAttr) {
+		m["upper_frequency_limit"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Inv",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Creates a dataset that batches input elements into a SparseTensor.
+// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value.
+//
+// value: The lowest frequency to use when calculating the
+// ceptstrum.
+// If not specified, defaults to 20
+func MfccLowerFrequencyLimit(value float32) MfccAttr {
+	return func(m optionalAttr) {
+		m["lower_frequency_limit"] = value
+	}
+}
+
+// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value.
+//
+// value: Resolution of the Mel bank used internally.
+// If not specified, defaults to 40
+func MfccFilterbankChannelCount(value int64) MfccAttr {
+	return func(m optionalAttr) {
+		m["filterbank_channel_count"] = value
+	}
+}
+
+// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value.
+//
+// value: How many output channels to produce per time slice.
+// If not specified, defaults to 13
+func MfccDctCoefficientCount(value int64) MfccAttr {
+	return func(m optionalAttr) {
+		m["dct_coefficient_count"] = value
+	}
+}
+
+// Transforms a spectrogram into a form that's useful for speech recognition.
+//
+// Mel Frequency Cepstral Coefficients are a way of representing audio data that's
+// been effective as an input feature for machine learning. They are created by
+// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the
+// higher frequencies that are less significant to the human ear. They have a long
+// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
+// is a good resource to learn more.
 //
 // Arguments:
-//	input_dataset: A handle to an input dataset. Must have a single component.
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//	row_shape: A vector representing the dense shape of each row in the produced
-// SparseTensor. The shape may be partially specified, using `-1` to indicate
-// that a particular dimension should use the maximum size of all batch elements.
-//
-//
-func ExperimentalDenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+//	spectrogram: Typically produced by the Spectrogram op, with magnitude_squared
+// set to true.
+//	sample_rate: How many samples per second the source audio used.
+func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ExperimentalDenseToSparseBatchDataset",
+		Type: "Mfcc",
 		Input: []tf.Input{
-			input_dataset, batch_size, row_shape,
+			spectrogram, sample_rate,
 		},
 		Attrs: attrs,
 	}
@@ -8127,30 +7137,406 @@
 	return op.Output(0)
 }
 
-// CastAttr is an optional argument to Cast.
-type CastAttr func(optionalAttr)
+// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2.
+type PaddingFIFOQueueV2Attr func(optionalAttr)
 
-// CastTruncate sets the optional Truncate attribute to value.
-// If not specified, defaults to false
-func CastTruncate(value bool) CastAttr {
+// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value.
+//
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types.
+// Shapes of fixed rank but variable size are allowed by setting
+// any shape dimension to -1.  In this case, the inputs' shape may vary along
+// the given dimension, and DequeueMany will pad the given dimension with
+// zeros up to the maximum shape of all elements in the given batch.
+// If the length of this attr is 0, different queue elements may have
+// different ranks and shapes, but only one element may be dequeued at a time.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr {
 	return func(m optionalAttr) {
-		m["Truncate"] = value
+		m["shapes"] = value
 	}
 }
 
-// Cast x of type SrcT to y of DstT.
-func Cast(scope *Scope, x tf.Output, DstT tf.DataType, optional ...CastAttr) (y tf.Output) {
+// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value.
+//
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// PaddingFIFOQueueV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that produces elements in first-in first-out order.
+//
+// Variable-size shapes are allowed by setting the corresponding shape dimensions
+// to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
+// size of any given element in the minibatch.  See below for details.
+//
+// Arguments:
+//	component_types: The type of each component in a value.
+//
+// Returns The handle to the queue.
+func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"DstT": DstT}
+	attrs := map[string]interface{}{"component_types": component_types}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Cast",
+		Type: "PaddingFIFOQueueV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Interleave the values from the `data` tensors into a single tensor.
+//
+// Builds a merged tensor such that
+//
+// ```python
+//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
+// ```
+//
+// For example, if each `indices[m]` is scalar or vector, we have
+//
+// ```python
+//     # Scalar indices:
+//     merged[indices[m], ...] = data[m][...]
+//
+//     # Vector indices:
+//     merged[indices[m][i], ...] = data[m][i, ...]
+// ```
+//
+// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
+// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
+// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
+// `constant`, the output shape is
+//
+//     merged.shape = [max(indices)] + constant
+//
+// Values may be merged in parallel, so if an index appears in both `indices[m][i]`
+// and `indices[n][j]`, the result may be invalid. This differs from the normal
+// DynamicStitch operator that defines the behavior in that case.
+//
+// For example:
+//
+// ```python
+//     indices[0] = 6
+//     indices[1] = [4, 1]
+//     indices[2] = [[5, 2], [0, 3]]
+//     data[0] = [61, 62]
+//     data[1] = [[41, 42], [11, 12]]
+//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
+//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
+//               [51, 52], [61, 62]]
+// ```
+//
+// This method can be used to merge partitions created by `dynamic_partition`
+// as illustrated on the following example:
+//
+// ```python
+//     # Apply function (increments x_i) on elements for which a certain condition
+//     # apply (x_i != -1 in this example).
+//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
+//     condition_mask=tf.not_equal(x,tf.constant(-1.))
+//     partitioned_data = tf.dynamic_partition(
+//         x, tf.cast(condition_mask, tf.int32) , 2)
+//     partitioned_data[1] = partitioned_data[1] + 1.0
+//     condition_indices = tf.dynamic_partition(
+//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
+//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
+//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
+//     # unchanged.
+// ```
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
+// </div>
+func ParallelDynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ParallelDynamicStitch",
 		Input: []tf.Input{
-			x,
+			tf.OutputList(indices), tf.OutputList(data),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Partitions `data` into `num_partitions` tensors using indices from `partitions`.
+//
+// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]`
+// becomes part of `outputs[partitions[js]]`.  The slices with `partitions[js] = i`
+// are placed in `outputs[i]` in lexicographic order of `js`, and the first
+// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`.
+// In detail,
+//
+// ```python
+//     outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:]
+//
+//     outputs[i] = pack([data[js, ...] for js if partitions[js] == i])
+// ```
+//
+// `data.shape` must start with `partitions.shape`.
+//
+// For example:
+//
+// ```python
+//     # Scalar partitions.
+//     partitions = 1
+//     num_partitions = 2
+//     data = [10, 20]
+//     outputs[0] = []  # Empty with shape [0, 2]
+//     outputs[1] = [[10, 20]]
+//
+//     # Vector partitions.
+//     partitions = [0, 0, 1, 1, 0]
+//     num_partitions = 2
+//     data = [10, 20, 30, 40, 50]
+//     outputs[0] = [10, 20, 50]
+//     outputs[1] = [30, 40]
+// ```
+//
+// See `dynamic_stitch` for an example on how to merge partitions back.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicPartition.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	partitions: Any shape.  Indices in the range `[0, num_partitions)`.
+//	num_partitions: The number of partitions to output.
+func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_partitions": num_partitions}
+	opspec := tf.OpSpec{
+		Type: "DynamicPartition",
+		Input: []tf.Input{
+			data, partitions,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("DynamicPartition", err)
+		return
+	}
+	return outputs
+}
+
+// Produces a string handle for the given MultiDeviceIterator.
+//
+// Arguments:
+//	multi_device_iterator: A MultiDeviceIterator resource.
+//
+// Returns A string representing the resource.
+func MultiDeviceIteratorToStringHandle(scope *Scope, multi_device_iterator tf.Output) (string_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorToStringHandle",
+		Input: []tf.Input{
+			multi_device_iterator,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Checks whether a tree has been initialized.
+//
+// Arguments:
+//	tree_handle: Handle to the tree.
+//
+// Returns Whether the tree is initialized.
+func TensorForestTreeIsInitializedOp(scope *Scope, tree_handle tf.Output) (is_initialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorForestTreeIsInitializedOp",
+		Input: []tf.Input{
+			tree_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Gets next element for the provided shard number.
+//
+// Arguments:
+//	multi_device_iterator: A MultiDeviceIterator resource.
+//	shard_num: Integer representing which shard to fetch data for.
+//	incarnation_id: Which incarnation of the MultiDeviceIterator is running.
+//	output_types: The type list for the return values.
+//	output_shapes: The list of shapes being produced.
+//
+// Returns Result of the get_next on the dataset.
+func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.Output, shard_num tf.Output, incarnation_id tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorGetNextFromShard",
+		Input: []tf.Input{
+			multi_device_iterator, shard_num, incarnation_id,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("MultiDeviceIteratorGetNextFromShard", err)
+		return
+	}
+	return components
+}
+
+// Initializes the multi device iterator with the given dataset.
+//
+// Arguments:
+//	dataset: Dataset to be iterated upon.
+//	multi_device_iterator: A MultiDeviceIteratorResource.
+//	max_buffer_size: The maximum size of the host side per device buffer to keep.
+//
+// Returns An int64 indicating which incarnation of the MultiDeviceIterator
+// is running.
+func MultiDeviceIteratorInit(scope *Scope, dataset tf.Output, multi_device_iterator tf.Output, max_buffer_size tf.Output) (incarnation_id tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorInit",
+		Input: []tf.Input{
+			dataset, multi_device_iterator, max_buffer_size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Copy a tensor setting everything outside a central band in each innermost matrix
+//
+// to zero.
+//
+// The `band` part is computed as follows:
+// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
+// tensor with the same shape where
+//
+// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`.
+//
+// The indicator function
+//
+// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) &&
+//                  (num_upper < 0 || (n-m) <= num_upper)`.
+//
+// For example:
+//
+// ```
+// # if 'input' is [[ 0,  1,  2, 3]
+//                  [-1,  0,  1, 2]
+//                  [-2, -1,  0, 1]
+//                  [-3, -2, -1, 0]],
+//
+// tf.matrix_band_part(input, 1, -1) ==> [[ 0,  1,  2, 3]
+//                                        [-1,  0,  1, 2]
+//                                        [ 0, -1,  0, 1]
+//                                        [ 0,  0, -1, 0]],
+//
+// tf.matrix_band_part(input, 2, 1) ==> [[ 0,  1,  0, 0]
+//                                       [-1,  0,  1, 0]
+//                                       [-2, -1,  0, 1]
+//                                       [ 0, -2, -1, 0]]
+// ```
+//
+// Useful special cases:
+//
+// ```
+//  tf.matrix_band_part(input, 0, -1) ==> Upper triangular part.
+//  tf.matrix_band_part(input, -1, 0) ==> Lower triangular part.
+//  tf.matrix_band_part(input, 0, 0) ==> Diagonal.
+// ```
+//
+// Arguments:
+//	input: Rank `k` tensor.
+//	num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire
+// lower triangle.
+//	num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep
+// entire upper triangle.
+//
+// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor.
+func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixBandPart",
+		Input: []tf.Input{
+			input, num_lower, num_upper,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Gets the next output from the given iterator as an Optional variant.
+func IteratorGetNextAsOptional(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (optional tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "IteratorGetNextAsOptional",
+		Input: []tf.Input{
+			iterator,
 		},
 		Attrs: attrs,
 	}
@@ -8231,60 +7617,6 @@
 	return op.Output(0)
 }
 
-// ComplexAbsAttr is an optional argument to ComplexAbs.
-type ComplexAbsAttr func(optionalAttr)
-
-// ComplexAbsTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func ComplexAbsTout(value tf.DataType) ComplexAbsAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
-	}
-}
-
-// Computes the complex absolute value of a tensor.
-//
-// Given a tensor `x` of complex numbers, this operation returns a tensor of type
-// `float` or `double` that is the absolute value of each element in `x`. All
-// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute
-// value is computed as \\( \sqrt{a^2 + b^2}\\).
-func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ComplexAbs",
-		Input: []tf.Input{
-			x,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the truth value of x AND y element-wise.
-//
-// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LogicalAnd",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Quantized Batch normalization.
 //
 // This op is deprecated and will be removed in the future. Prefer
@@ -8723,24 +8055,6 @@
 	return op.Output(0)
 }
 
-// Returns the truth value of (x <= y) element-wise.
-//
-// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LessEqual",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Computes softmax activations.
 //
 // For each batch `i` and class `j` we have
@@ -8810,85 +8124,33 @@
 	return op.Output(0)
 }
 
-// BatchMatMulAttr is an optional argument to BatchMatMul.
-type BatchMatMulAttr func(optionalAttr)
-
-// BatchMatMulAdjX sets the optional adj_x attribute to value.
+// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise.
 //
-// value: If `True`, adjoint the slices of `x`. Defaults to `False`.
-// If not specified, defaults to false
-func BatchMatMulAdjX(value bool) BatchMatMulAttr {
-	return func(m optionalAttr) {
-		m["adj_x"] = value
-	}
-}
-
-// BatchMatMulAdjY sets the optional adj_y attribute to value.
-//
-// value: If `True`, adjoint the slices of `y`. Defaults to `False`.
-// If not specified, defaults to false
-func BatchMatMulAdjY(value bool) BatchMatMulAttr {
-	return func(m optionalAttr) {
-		m["adj_y"] = value
-	}
-}
-
-// Multiplies slices of two tensors in batches.
-//
-// Multiplies all slices of `Tensor` `x` and `y` (each slice can be
-// viewed as an element of a batch), and arranges the individual results
-// in a single output tensor of the same batch size. Each of the
-// individual slices can optionally be adjointed (to adjoint a matrix
-// means to transpose and conjugate it) before multiplication by setting
-// the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
-//
-// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
-// and `[..., r_y, c_y]`.
-//
-// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
-//
-//     r_o = c_x if adj_x else r_x
-//     c_o = r_y if adj_y else c_y
-//
-// It is computed as:
-//
-//     output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
-//
-// Arguments:
-//	x: 2-D or higher with shape `[..., r_x, c_x]`.
-//	y: 2-D or higher with shape `[..., r_y, c_y]`.
-//
-// Returns 3-D or higher with shape `[..., r_o, c_o]`
-func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) {
+// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
+// ](http://arxiv.org/abs/1511.07289)
+func Elu(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "BatchMatMul",
+		Type: "Elu",
 		Input: []tf.Input{
-			x, y,
+			features,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns which elements of x are NaN.
+// Computes square of x element-wise.
 //
-// @compatibility(numpy)
-// Equivalent to np.isnan
-// @end_compatibility
-func IsNan(scope *Scope, x tf.Output) (y tf.Output) {
+// I.e., \\(y = x * x = x^2\\).
+func Square(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IsNan",
+		Type: "Square",
 		Input: []tf.Input{
 			x,
 		},
@@ -8897,125 +8159,6 @@
 	return op.Output(0)
 }
 
-// Identity op for gradient debugging.
-//
-// This op is hidden from public in Python. It is used by TensorFlow Debugger to
-// register gradient tensors for gradient debugging.
-// This op operates on non-reference-type tensors.
-func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DebugGradientIdentity",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta.
-type ResourceSparseApplyAdadeltaAttr func(optionalAttr)
-
-// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// var: Should be from a Variable().
-//
-// Arguments:
-//
-//	accum: Should be from a Variable().
-//	accum_update: : Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	rho: Decay factor. Must be a scalar.
-//	epsilon: Constant factor. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//
-// Returns the created operation.
-func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyAdadelta",
-		Input: []tf.Input{
-			var_, accum, accum_update, lr, rho, epsilon, grad, indices,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Checks whether a tree has been initialized.
-//
-// Arguments:
-//	tree_handle: Handle to the tree.
-//
-// Returns Whether the tree is initialized.
-func TensorForestTreeIsInitializedOp(scope *Scope, tree_handle tf.Output) (is_initialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorForestTreeIsInitializedOp",
-		Input: []tf.Input{
-			tree_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Gets next element for the provided shard number.
-//
-// Arguments:
-//	multi_device_iterator: A MultiDeviceIterator resource.
-//	shard_num: Integer representing which shard to fetch data for.
-//	incarnation_id: Which incarnation of the MultiDeviceIterator is running.
-//	output_types: The type list for the return values.
-//	output_shapes: The list of shapes being produced.
-//
-// Returns Result of the get_next on the dataset.
-func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.Output, shard_num tf.Output, incarnation_id tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "MultiDeviceIteratorGetNextFromShard",
-		Input: []tf.Input{
-			multi_device_iterator, shard_num, incarnation_id,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("MultiDeviceIteratorGetNextFromShard", err)
-		return
-	}
-	return components
-}
-
 // LeakyReluGradAttr is an optional argument to LeakyReluGrad.
 type LeakyReluGradAttr func(optionalAttr)
 
@@ -9054,23 +8197,6 @@
 	return op.Output(0)
 }
 
-// Deprecated. Use TensorArrayGradV3
-//
-// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3
-func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayWriteV2",
-		Input: []tf.Input{
-			handle, index, value, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // LeakyReluAttr is an optional argument to LeakyRelu.
 type LeakyReluAttr func(optionalAttr)
 
@@ -9336,35 +8462,6 @@
 	return op.Output(0)
 }
 
-// Computes the gradient of morphological 2-D dilation with respect to the input.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
-//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
-//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
-// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
-//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
-// Must be: `[1, rate_height, rate_width, 1]`.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape `[batch, in_height, in_width, depth]`.
-func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "Dilation2DBackpropInput",
-		Input: []tf.Input{
-			input, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Compute the polygamma function \\(\psi^{(n)}(x)\\).
 //
 // The polygamma function is defined as:
@@ -9387,35 +8484,6 @@
 	return op.Output(0)
 }
 
-// Computes second-order gradients of the maxpooling function.
-//
-// Arguments:
-//	input: The original input.
-//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
-// input of `max_pool`.
-//	argmax: The indices of the maximum values chosen for each output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients of gradients w.r.t. the input of `max_pool`.
-func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradGradWithArgmax",
-		Input: []tf.Input{
-			input, grad, argmax,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2.
 type MaxPoolGradGradV2Attr func(optionalAttr)
 
@@ -9538,6 +8606,21 @@
 	return op.Output(0)
 }
 
+// Connects N inputs to an N-way replicated TPU computation.
+func TPUReplicatedInput(scope *Scope, inputs []tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TPUReplicatedInput",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // AvgPool3DAttr is an optional argument to AvgPool3D.
 type AvgPool3DAttr func(optionalAttr)
 
@@ -9585,27 +8668,6 @@
 	return op.Output(0)
 }
 
-// Returns element-wise remainder of division. This emulates C semantics in that
-//
-// the result here is consistent with a truncating divide. E.g.
-// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.
-//
-// *NOTE*: `Mod` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Mod",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // DepthToSpaceAttr is an optional argument to DepthToSpace.
 type DepthToSpaceAttr func(optionalAttr)
 
@@ -9794,37 +8856,20 @@
 	return op.Output(0)
 }
 
-// Computes square root of x element-wise.
-//
-// I.e., \\(y = \sqrt{x} = x^{1/2}\\).
-func Sqrt(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sqrt",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput.
+type Conv3DBackpropInputAttr func(optionalAttr)
 
-// Conv3DBackpropFilterAttr is an optional argument to Conv3DBackpropFilter.
-type Conv3DBackpropFilterAttr func(optionalAttr)
-
-// Conv3DBackpropFilterDilations sets the optional dilations attribute to value.
+// Conv3DBackpropInputDilations sets the optional dilations attribute to value.
 // If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
-func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr {
+func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
 	}
 }
 
-// Computes the gradients of 3-D convolution with respect to the filter.
+// Computes the gradients of 3-D convolution with respect to the input.
 //
-// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2
+// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2
 //
 // Arguments:
 //	input: Shape `[batch, depth, rows, cols, in_channels]`.
@@ -9835,7 +8880,7 @@
 //	strides: 1-D tensor of length 5. The stride of the sliding window for each
 // dimension of `input`. Must have `strides[0] = strides[4] = 1`.
 //	padding: The type of padding algorithm to use.
-func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterAttr) (output tf.Output) {
+func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -9844,7 +8889,7 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropFilter",
+		Type: "Conv3DBackpropInput",
 		Input: []tf.Input{
 			input, filter, out_backprop,
 		},
@@ -9854,24 +8899,6 @@
 	return op.Output(0)
 }
 
-// Computes the gradient for the rsqrt of `x` wrt its input.
-//
-// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`
-// is the corresponding input gradient.
-func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RsqrtGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative.
 type DepthwiseConv2dNativeAttr func(optionalAttr)
 
@@ -9949,78 +8976,6 @@
 	return op.Output(0)
 }
 
-// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2.
-type MaxPoolGradV2Attr func(optionalAttr)
-
-// MaxPoolGradV2DataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes gradients of the maxpooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradV2",
-		Input: []tf.Input{
-			orig_input, orig_output, grad, ksize, strides,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Restore a reader to a previously saved state.
-//
-// Not all Readers support being restored, so this can produce an
-// Unimplemented error.
-//
-// Arguments:
-//	reader_handle: Handle to a Reader.
-//	state: Result of a ReaderSerializeState of a Reader with type
-// matching reader_handle.
-//
-// Returns the created operation.
-func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderRestoreStateV2",
-		Input: []tf.Input{
-			reader_handle, state,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // MaxPoolGradAttr is an optional argument to MaxPoolGrad.
 type MaxPoolGradAttr func(optionalAttr)
 
@@ -10152,6 +9107,129 @@
 	return op.Output(0)
 }
 
+// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter.
+type Conv2DBackpropFilterAttr func(optionalAttr)
+
+// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
+// If not specified, defaults to true
+func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["use_cudnn_on_gpu"] = value
+	}
+}
+
+// Conv2DBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value.
+//
+// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
+// dimension, the amount of padding inserted before and after the dimension is
+// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
+// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
+// If not specified, defaults to <>
+func Conv2DBackpropFilterExplicitPaddings(value []int64) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["explicit_paddings"] = value
+	}
+}
+
+// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv2DBackpropFilterDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of convolution with respect to the filter.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 4-D
+// `[filter_height, filter_width, in_channels, out_channels]` tensor.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution. Must be in the same order as the dimension specified with
+// format.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
+// the `filter` input of the convolution.
+func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv2DBackpropFilter",
+		Input: []tf.Input{
+			input, filter_sizes, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes Psi, the derivative of Lgamma (the log of the absolute value of
+//
+// `Gamma(x)`), element-wise.
+func Digamma(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Digamma",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the number of work units this Reader has finished processing.
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderNumWorkUnitsCompletedV2",
+		Input: []tf.Input{
+			reader_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Conv2DAttr is an optional argument to Conv2D.
 type Conv2DAttr func(optionalAttr)
 
@@ -10322,36 +9400,66 @@
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
-// Reduces `input` from `num_devices` using `reduction` to a single device.
+// LoadTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingADAMParametersGradAccumDebug.
+type LoadTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
 //
-// Reduces `input` from `num_devices` using `reduction` to a single device.
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load ADAM embedding parameters with debug support.
 //
-// The graph should be constructed so that all inputs have a valid device
-// assignment, and the op itself is assigned one of these devices.
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
 //
-// input: The input to the reduction.
-// data: the value of the reduction across all `num_devices` devices.
-// reduction: the reduction operation to perform.
-func NcclReduce(scope *Scope, input []tf.Output, reduction string) (data tf.Output) {
+// Arguments:
+//	parameters: Value of parameters used in the ADAM optimization algorithm.
+//	momenta: Value of momenta used in the ADAM optimization algorithm.
+//	velocities: Value of velocities used in the ADAM optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the ADAM optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersGradAccumDebugAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"reduction": reduction}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "NcclReduce",
+		Type: "LoadTPUEmbeddingADAMParametersGradAccumDebug",
 		Input: []tf.Input{
-			tf.OutputList(input),
+			parameters, momenta, velocities, gradient_accumulators,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// BiasAddGradAttr is an optional argument to BiasAddGrad.
-type BiasAddGradAttr func(optionalAttr)
+// BiasAddAttr is an optional argument to BiasAdd.
+type BiasAddAttr func(optionalAttr)
 
-// BiasAddGradDataFormat sets the optional data_format attribute to value.
+// BiasAddDataFormat sets the optional data_format attribute to value.
 //
 // value: Specify the data format of the input and output data. With the
 // default format "NHWC", the bias tensor will be added to the last dimension
@@ -10361,23 +9469,23 @@
 // The tensor will be added to "in_channels", the third-to-the-last
 //     dimension.
 // If not specified, defaults to "NHWC"
-func BiasAddGradDataFormat(value string) BiasAddGradAttr {
+func BiasAddDataFormat(value string) BiasAddAttr {
 	return func(m optionalAttr) {
 		m["data_format"] = value
 	}
 }
 
-// The backward operation for "BiasAdd" on the "bias" tensor.
+// Adds `bias` to `value`.
 //
-// It accumulates all the values from out_backprop into the feature dimension.
-// For NHWC data format, the feature dimension is the last. For NCHW data format,
-// the feature dimension is the third-to-last.
+// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
+// Broadcasting is supported, so `value` may have any number of dimensions.
 //
 // Arguments:
-//	out_backprop: Any number of dimensions.
+//	value: Any number of dimensions.
+//	bias: 1-D with size the last dimension of `value`.
 //
-// Returns 1-D with size the feature dimension of `out_backprop`.
-func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) {
+// Returns Broadcasted sum of `value` and `bias`.
+func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -10386,9 +9494,9 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "BiasAddGrad",
+		Type: "BiasAdd",
 		Input: []tf.Input{
-			out_backprop,
+			value, bias,
 		},
 		Attrs: attrs,
 	}
@@ -10396,13 +9504,151 @@
 	return op.Output(0)
 }
 
-// Returns 0 if x == 0, and x / y otherwise, elementwise.
-func Xdivy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse.
+type SparseReduceSumSparseAttr func(optionalAttr)
+
+// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the sum of elements across dimensions of a SparseTensor.
+//
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a
+// SparseTensor.
+//
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
+//
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReduceSumSparse",
+		Input: []tf.Input{
+			input_indices, input_values, input_shape, reduction_axes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParameters.
+type LoadTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingStochasticGradientDescentParametersTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load SGD embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the stochastic gradient descent optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, parameters tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingStochasticGradientDescentParameters",
+		Input: []tf.Input{
+			parameters,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Selects the k nearest centers for each point.
+//
+// Rows of points are assumed to be input points. Rows of centers are assumed to be
+// the list of candidate centers. For each point, the k centers that have least L2
+// distance to it are computed.
+//
+// Arguments:
+//	points: Matrix of shape (n, d). Rows are assumed to be input points.
+//	centers: Matrix of shape (m, d). Rows are assumed to be centers.
+//	k: Number of nearest centers to return for each point. If k is larger than m, then
+// only m centers are returned.
+//
+// Returns Matrix of shape (n, min(m, k)). Each row contains the indices of the centers
+// closest to the corresponding point, ordered by increasing distance.Matrix of shape (n, min(m, k)). Each row contains the squared L2 distance to the
+// corresponding center in nearest_center_indices.
+func NearestNeighbors(scope *Scope, points tf.Output, centers tf.Output, k tf.Output) (nearest_center_indices tf.Output, nearest_center_distances tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Xdivy",
+		Type: "NearestNeighbors",
+		Input: []tf.Input{
+			points, centers, k,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Returns x * y element-wise.
+//
+// *NOTE*: `Multiply` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Mul",
 		Input: []tf.Input{
 			x, y,
 		},
@@ -10609,261 +9855,6 @@
 	return op.Output(0)
 }
 
-// Shuffle dimensions of x according to a permutation.
-//
-// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
-//   `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
-func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Transpose",
-		Input: []tf.Input{
-			x, perm,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MinAttr is an optional argument to Min.
-type MinAttr func(optionalAttr)
-
-// MinKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func MinKeepDims(value bool) MinAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the minimum of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Min",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the Bessel i1e function of `x` element-wise.
-//
-// Exponentially scaled modified Bessel function of order 0 defined as
-// `bessel_i1e(x) = exp(-abs(x)) bessel_i1(x)`.
-//
-// This function is faster and numerically stabler than `bessel_i1(x)`.
-func BesselI1e(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BesselI1e",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MapClearAttr is an optional argument to MapClear.
-type MapClearAttr func(optionalAttr)
-
-// MapClearCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapClearCapacity(value int64) MapClearAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapClearMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapClearMemoryLimit(value int64) MapClearAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// MapClearContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapClearContainer(value string) MapClearAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MapClearSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapClearSharedName(value string) MapClearAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes all elements in the underlying container.
-//
-// Returns the created operation.
-func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapClear",
-
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// DecodeCSVAttr is an optional argument to DecodeCSV.
-type DecodeCSVAttr func(optionalAttr)
-
-// DecodeCSVFieldDelim sets the optional field_delim attribute to value.
-//
-// value: char delimiter to separate fields in a record.
-// If not specified, defaults to ","
-func DecodeCSVFieldDelim(value string) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["field_delim"] = value
-	}
-}
-
-// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value.
-//
-// value: If false, treats double quotation marks as regular
-// characters inside of the string fields (ignoring RFC 4180, Section 2,
-// Bullet 5).
-// If not specified, defaults to true
-func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["use_quote_delim"] = value
-	}
-}
-
-// DecodeCSVNaValue sets the optional na_value attribute to value.
-//
-// value: Additional string to recognize as NA/NaN.
-// If not specified, defaults to ""
-func DecodeCSVNaValue(value string) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["na_value"] = value
-	}
-}
-
-// DecodeCSVSelectCols sets the optional select_cols attribute to value.
-// If not specified, defaults to <>
-func DecodeCSVSelectCols(value []int64) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["select_cols"] = value
-	}
-}
-
-// Convert CSV records to tensors. Each column maps to one tensor.
-//
-// RFC 4180 format is expected for the CSV records.
-// (https://tools.ietf.org/html/rfc4180)
-// Note that we allow leading and trailing spaces with int or float field.
-//
-// Arguments:
-//	records: Each string is a record/row in the csv and all records should have
-// the same format.
-//	record_defaults: One tensor per column of the input record, with either a
-// scalar default value for that column or an empty vector if the column is
-// required.
-//
-// Returns Each tensor will have the same shape as records.
-func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeCSV",
-		Input: []tf.Input{
-			records, tf.OutputList(record_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("DecodeCSV", err)
-		return
-	}
-	return output
-}
-
-// Convert JSON-encoded Example records to binary protocol buffer strings.
-//
-// This op translates a tensor containing Example records, encoded using
-// the [standard JSON
-// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json),
-// into a tensor containing the same records encoded as binary protocol
-// buffers. The resulting tensor can then be fed to any of the other
-// Example-parsing ops.
-//
-// Arguments:
-//	json_examples: Each string is a JSON object serialized according to the JSON
-// mapping of the Example proto.
-//
-// Returns Each string is a binary Example protocol buffer corresponding
-// to the respective element of `json_examples`.
-func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeJSONExample",
-		Input: []tf.Input{
-			json_examples,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Transforms a Tensor into a serialized TensorProto proto.
 //
 // Arguments:
@@ -10884,21 +9875,6 @@
 	return op.Output(0)
 }
 
-// Computes acos of x element-wise.
-func Acos(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Acos",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // UnbatchGradAttr is an optional argument to UnbatchGrad.
 type UnbatchGradAttr func(optionalAttr)
 
@@ -11165,53 +10141,53 @@
 	return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values
 }
 
-// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize.
-type QuantizeAndDequantizeAttr func(optionalAttr)
+// SparseToDenseAttr is an optional argument to SparseToDense.
+type SparseToDenseAttr func(optionalAttr)
 
-// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["signed_input"] = value
-	}
-}
-
-// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value.
-// If not specified, defaults to false
-func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["range_given"] = value
-	}
-}
-
-// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value.
-// If not specified, defaults to 0
-func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["input_min"] = value
-	}
-}
-
-// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value.
-// If not specified, defaults to 0
-func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["input_max"] = value
-	}
-}
-
-// Use QuantizeAndDequantizeV2 instead.
+// SparseToDenseValidateIndices sets the optional validate_indices attribute to value.
 //
-// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2
-func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) {
+// value: If true, indices are checked to make sure they are sorted in
+// lexicographic order and that there are no repeats.
+// If not specified, defaults to true
+func SparseToDenseValidateIndices(value bool) SparseToDenseAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Converts a sparse representation into a dense tensor.
+//
+// Builds an array `dense` with shape `output_shape` such that
+//
+// ```
+// # If sparse_indices is scalar
+// dense[i] = (i == sparse_indices ? sparse_values : default_value)
+//
+// # If sparse_indices is a vector, then for each i
+// dense[sparse_indices[i]] = sparse_values[i]
+//
+// # If sparse_indices is an n by d matrix, then for each i in [0, n)
+// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
+// ```
+//
+// All other values in `dense` are set to `default_value`.  If `sparse_values` is a
+// scalar, all sparse indices are set to this single value.
+//
+// Indices should be sorted in lexicographic order, and indices must not
+// contain any repeats. If `validate_indices` is true, these properties
+// are checked during execution.
+//
+// Arguments:
+//	sparse_indices: 0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete
+// index where `sparse_values[i]` will be placed.
+//	output_shape: 1-D.  Shape of the dense output tensor.
+//	sparse_values: 1-D.  Values corresponding to each row of `sparse_indices`,
+// or a scalar value to be used for all sparse indices.
+//	default_value: Scalar value to set for indices not specified in
+// `sparse_indices`.
+//
+// Returns Dense output tensor of shape `output_shape`.
+func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -11220,7 +10196,54 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizeAndDequantize",
+		Type: "SparseToDense",
+		Input: []tf.Input{
+			sparse_indices, output_shape, sparse_values, default_value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// PreventGradientAttr is an optional argument to PreventGradient.
+type PreventGradientAttr func(optionalAttr)
+
+// PreventGradientMessage sets the optional message attribute to value.
+//
+// value: Will be printed in the error when anyone tries to differentiate
+// this operation.
+// If not specified, defaults to ""
+func PreventGradientMessage(value string) PreventGradientAttr {
+	return func(m optionalAttr) {
+		m["message"] = value
+	}
+}
+
+// An identity op that triggers an error if a gradient is requested.
+//
+// When executed in a graph, this op outputs its input tensor as-is.
+//
+// When building ops to compute gradients, the TensorFlow gradient system
+// will return an error when trying to lookup the gradient of this op,
+// because no gradient must ever be registered for this function.  This
+// op exists to prevent subtle bugs from silently returning unimplemented
+// gradients in some corner cases.
+//
+// Arguments:
+//	input: any tensor.
+//
+// Returns the same input tensor.
+func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PreventGradient",
 		Input: []tf.Input{
 			input,
 		},
@@ -11230,344 +10253,71 @@
 	return op.Output(0)
 }
 
-// Returns locations of nonzero / true values in a tensor.
-//
-// This operation returns the coordinates of true elements in `condition`. The
-// coordinates are returned in a 2-D tensor where the first dimension (rows)
-// represents the number of true elements, and the second dimension (columns)
-// represents the coordinates of the true elements. Keep in mind, the shape of
-// the output tensor can vary depending on how many true values there are in
-// `condition`. Indices are output in row-major order.
-//
-// For example:
-//
-// ```
-// # 'input' tensor is [[True, False]
-// #                    [True, False]]
-// # 'input' has two true values, so output has two coordinates.
-// # 'input' has rank of 2, so coordinates have two indices.
-// where(input) ==> [[0, 0],
-//                   [1, 0]]
-//
-// # `condition` tensor is [[[True, False]
-// #                     [True, False]]
-// #                    [[False, True]
-// #                     [False, True]]
-// #                    [[False, False]
-// #                     [False, True]]]
-// # 'input' has 5 true values, so output has 5 coordinates.
-// # 'input' has rank of 3, so coordinates have three indices.
-// where(input) ==> [[0, 0, 0],
-//                   [0, 1, 0],
-//                   [1, 0, 1],
-//                   [1, 1, 1],
-//                   [2, 1, 1]]
-//
-// # `condition` tensor is [[[1.5,  0.0]
-// #                     [-0.5, 0.0]]
-// #                    [[0.0,  0.25]
-// #                     [0.0,  0.75]]
-// #                    [[0.0,  0.0]
-// #                     [0.0,  0.01]]]
-// # 'input' has 5 nonzero values, so output has 5 coordinates.
-// # 'input' has rank of 3, so coordinates have three indices.
-// where(input) ==> [[0, 0, 0],
-//                   [0, 1, 0],
-//                   [1, 0, 1],
-//                   [1, 1, 1],
-//                   [2, 1, 1]]
-//
-// # `condition` tensor is [[[1.5 + 0.0j, 0.0  + 0.0j]
-// #                     [0.0 + 0.5j, 0.0  + 0.0j]]
-// #                    [[0.0 + 0.0j, 0.25 + 1.5j]
-// #                     [0.0 + 0.0j, 0.75 + 0.0j]]
-// #                    [[0.0 + 0.0j, 0.0  + 0.0j]
-// #                     [0.0 + 0.0j, 0.01 + 0.0j]]]
-// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates.
-// # 'input' has rank of 3, so coordinates have three indices.
-// where(input) ==> [[0, 0, 0],
-//                   [0, 1, 0],
-//                   [1, 0, 1],
-//                   [1, 1, 1],
-//                   [2, 1, 1]]
-// ```
-func Where(scope *Scope, condition tf.Output) (index tf.Output) {
+// Computes asin of x element-wise.
+func Asin(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Where",
+		Type: "Asin",
 		Input: []tf.Input{
-			condition,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// QueueDequeueV2Attr is an optional argument to QueueDequeueV2.
-type QueueDequeueV2Attr func(optionalAttr)
-
-// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value.
+// Computes the sum along sparse segments of a tensor.
 //
-// value: If the queue is empty, this operation will block for up to
-// timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr {
-	return func(m optionalAttr) {
-		m["timeout_ms"] = value
-	}
-}
-
-// Dequeues a tuple of one or more tensors from the given queue.
+// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
 //
-// This operation has k outputs, where k is the number of components
-// in the tuples stored in the given queue, and output i is the ith
-// component of the dequeued tuple.
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/sparse#Segmentation)
+// for an explanation of segments.
 //
-// N.B. If the queue is empty, this operation will block until an element
-// has been dequeued (or 'timeout_ms' elapses, if specified).
+// For example:
+//
+// ```python
+// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+//
+// tf.sparse_segment_sum_with_num_segments(
+//     c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
+// # => [[0 0 0 0]
+// #     [0 0 0 0]
+// #     [0 0 0 0]]
+//
+// tf.sparse_segment_sum_with_num_segments(c,
+//                                         tf.constant([0, 1]),
+//                                         tf.constant([0, 2],
+//                                         num_segments=4))
+// # => [[ 1  2  3  4]
+// #     [ 0  0  0  0]
+// #     [-1 -2 -3 -4]
+// #     [ 0  0  0  0]]
+// ```
 //
 // Arguments:
-//	handle: The handle to a queue.
-//	component_types: The type of each component in a tuple.
 //
-// Returns One or more tensors that were dequeued as a tuple.
-func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) {
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `num_segments`.
+func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "QueueDequeueV2",
+		Type: "SparseSegmentSumWithNumSegments",
 		Input: []tf.Input{
-			handle,
+			data, indices, segment_ids, num_segments,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("QueueDequeueV2", err)
-		return
-	}
-	return components
-}
-
-// ParseSequenceExampleAttr is an optional argument to ParseSequenceExample.
-type ParseSequenceExampleAttr func(optionalAttr)
-
-// ParseSequenceExampleNcontextSparse sets the optional Ncontext_sparse attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["Ncontext_sparse"] = value
-	}
-}
-
-// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["Ncontext_dense"] = value
-	}
-}
-
-// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["Nfeature_list_sparse"] = value
-	}
-}
-
-// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["Nfeature_list_dense"] = value
-	}
-}
-
-// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value.
-//
-// value: A list of Ncontext_sparse types; the data types of data in
-// each context Feature given in context_sparse_keys.
-// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["context_sparse_types"] = value
-	}
-}
-
-// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["feature_list_dense_types"] = value
-	}
-}
-
-// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value.
-//
-// value: A list of Ncontext_dense shapes; the shapes of data in
-// each context Feature given in context_dense_keys.
-// The number of elements in the Feature corresponding to context_dense_key[j]
-// must always equal context_dense_shapes[j].NumEntries().
-// The shape of context_dense_values[j] will match context_dense_shapes[j].
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["context_dense_shapes"] = value
-	}
-}
-
-// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value.
-//
-// value: A list of Nfeature_list_sparse types; the data types
-// of data in each FeatureList given in feature_list_sparse_keys.
-// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["feature_list_sparse_types"] = value
-	}
-}
-
-// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value.
-//
-// value: A list of Nfeature_list_dense shapes; the shapes of
-// data in each FeatureList given in feature_list_dense_keys.
-// The shape of each Feature in the FeatureList corresponding to
-// feature_list_dense_key[j] must always equal
-// feature_list_dense_shapes[j].NumEntries().
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["feature_list_dense_shapes"] = value
-	}
-}
-
-// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors.
-//
-// Arguments:
-//	serialized: A vector containing binary serialized SequenceExample protos.
-//	debug_name: A vector containing the names of the serialized protos.
-// May contain, for example, table key (descriptive) name for the
-// corresponding serialized proto.  This is purely useful for debugging
-// purposes, and the presence of values here has no effect on the output.
-// May also be an empty vector if no name is available.
-//	context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
-// context_dense_defaults[j] provides default values
-// when the SequenceExample's context map lacks context_dense_key[j].
-// If an empty Tensor is provided for context_dense_defaults[j],
-// then the Feature context_dense_keys[j] is required.
-// The input type is inferred from context_dense_defaults[j], even when it's
-// empty.  If context_dense_defaults[j] is not empty, its shape must match
-// context_dense_shapes[j].
-//	feature_list_dense_missing_assumed_empty: A vector listing the
-// FeatureList keys which may be missing from the SequenceExamples.  If the
-// associated FeatureList is missing, it is treated as empty.  By default,
-// any FeatureList not listed in this vector must exist in the SequenceExamples.
-//	context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars).
-// The keys expected in the Examples' features associated with context_sparse
-// values.
-//	context_dense_keys: A list of Ncontext_dense string Tensors (scalars).
-// The keys expected in the SequenceExamples' context features associated with
-// dense values.
-//	feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors
-// (scalars).  The keys expected in the FeatureLists associated with sparse
-// values.
-//	feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars).
-// The keys expected in the SequenceExamples' feature_lists associated
-// with lists of dense values.
-func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ParseSequenceExample",
-		Input: []tf.Input{
-			serialized, debug_name, tf.OutputList(context_dense_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths
+	return op.Output(0)
 }
 
 // SparseReduceMaxAttr is an optional argument to SparseReduceMax.
@@ -11625,322 +10375,6 @@
 	return op.Output(0)
 }
 
-// Computes the Gauss error function of `x` element-wise.
-func Erf(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Erf",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns element-wise largest integer not greater than x.
-func Floor(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Floor",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// OneHotAttr is an optional argument to OneHot.
-type OneHotAttr func(optionalAttr)
-
-// OneHotAxis sets the optional axis attribute to value.
-//
-// value: The axis to fill (default: -1, a new inner-most axis).
-// If not specified, defaults to -1
-func OneHotAxis(value int64) OneHotAttr {
-	return func(m optionalAttr) {
-		m["axis"] = value
-	}
-}
-
-// Returns a one-hot tensor.
-//
-// The locations represented by indices in `indices` take value `on_value`,
-// while all other locations take value `off_value`.
-//
-// If the input `indices` is rank `N`, the output will have rank `N+1`,
-// The new axis is created at dimension `axis` (default: the new axis is
-// appended at the end).
-//
-// If `indices` is a scalar the output shape will be a vector of length `depth`.
-//
-// If `indices` is a vector of length `features`, the output shape will be:
-// ```
-//   features x depth if axis == -1
-//   depth x features if axis == 0
-// ```
-//
-// If `indices` is a matrix (batch) with shape `[batch, features]`,
-// the output shape will be:
-// ```
-//   batch x features x depth if axis == -1
-//   batch x depth x features if axis == 1
-//   depth x batch x features if axis == 0
-// ```
-//
-//
-// Examples
-// =========
-//
-// Suppose that
-// ```
-//   indices = [0, 2, -1, 1]
-//   depth = 3
-//   on_value = 5.0
-//   off_value = 0.0
-//   axis = -1
-// ```
-//
-// Then output is `[4 x 3]`:
-// ```
-// output =
-//   [5.0 0.0 0.0]  // one_hot(0)
-//   [0.0 0.0 5.0]  // one_hot(2)
-//   [0.0 0.0 0.0]  // one_hot(-1)
-//   [0.0 5.0 0.0]  // one_hot(1)
-// ```
-//
-// Suppose that
-// ```
-//   indices = [0, 2, -1, 1]
-//   depth = 3
-//   on_value = 0.0
-//   off_value = 3.0
-//   axis = 0
-// ```
-//
-// Then output is `[3 x 4]`:
-// ```
-// output =
-//   [0.0 3.0 3.0 3.0]
-//   [3.0 3.0 3.0 0.0]
-//   [3.0 3.0 3.0 3.0]
-//   [3.0 0.0 3.0 3.0]
-// //  ^                one_hot(0)
-// //      ^            one_hot(2)
-// //          ^        one_hot(-1)
-// //              ^    one_hot(1)
-// ```
-//
-// Suppose that
-// ```
-//   indices = [[0, 2], [1, -1]]
-//   depth = 3
-//   on_value = 1.0
-//   off_value = 0.0
-//   axis = -1
-// ```
-//
-// Then output is `[2 x 2 x 3]`:
-// ```
-// output =
-//   [
-//     [1.0, 0.0, 0.0]  // one_hot(0)
-//     [0.0, 0.0, 1.0]  // one_hot(2)
-//   ][
-//     [0.0, 1.0, 0.0]  // one_hot(1)
-//     [0.0, 0.0, 0.0]  // one_hot(-1)
-//   ]
-// ```
-//
-// Arguments:
-//	indices: A tensor of indices.
-//	depth: A scalar defining the depth of the one hot dimension.
-//	on_value: A scalar defining the value to fill in output when `indices[j] = i`.
-//	off_value: A scalar defining the value to fill in output when `indices[j] != i`.
-//
-// Returns The one-hot tensor.
-func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "OneHot",
-		Input: []tf.Input{
-			indices, depth, on_value, off_value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CudnnRNNAttr is an optional argument to CudnnRNN.
-type CudnnRNNAttr func(optionalAttr)
-
-// CudnnRNNRnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNRnnMode(value string) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNInputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNInputMode(value string) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNDirection sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNDirection(value string) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNDropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNDropout(value float32) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNSeed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNSeed(value int64) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNSeed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNSeed2(value int64) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// CudnnRNNIsTraining sets the optional is_training attribute to value.
-// If not specified, defaults to true
-func CudnnRNNIsTraining(value bool) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// A RNN backed by cuDNN.
-//
-// Computes the RNN from the input and initial states, with respect to the params
-// buffer.
-//
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//   the actual computation before the first layer. 'skip_input' is only allowed
-//   when input_size == num_units; 'auto_select' implies 'skip_input' when
-//   input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used. Should be
-//   "unidirectional" or "bidirectional".
-// dropout: Dropout probability. When set to 0., dropout is disabled.
-// seed: The 1st part of a seed to initialize dropout.
-// seed2: The 2nd part of a seed to initialize dropout.
-// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
-// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
-//     num_units].
-// input_c: For LSTM, a 3-D tensor with the shape of
-//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
-// params: A 1-D tensor that contains the weights and biases in an opaque layout.
-//     The size must be created through CudnnRNNParamsSize, and initialized
-//     separately. Note that they might not be compatible across different
-//     generations. So it is a good idea to save and restore
-// output: A 3-D tensor with the shape of [seq_length, batch_size,
-//     dir * num_units].
-// output_h: The same shape has input_h.
-// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// is_training: Indicates whether this operation is used for inferenece or
-//   training.
-// reserve_space: An opaque tensor that can be used in backprop calculation. It
-//   is only produced if is_training is false.
-func CudnnRNN(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNAttr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNN",
-		Input: []tf.Input{
-			input, input_h, input_c, params,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// DecodeCompressedAttr is an optional argument to DecodeCompressed.
-type DecodeCompressedAttr func(optionalAttr)
-
-// DecodeCompressedCompressionType sets the optional compression_type attribute to value.
-//
-// value: A scalar containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-// If not specified, defaults to ""
-func DecodeCompressedCompressionType(value string) DecodeCompressedAttr {
-	return func(m optionalAttr) {
-		m["compression_type"] = value
-	}
-}
-
-// Decompress strings.
-//
-// This op decompresses each element of the `bytes` input `Tensor`, which
-// is assumed to be compressed using the given `compression_type`.
-//
-// The `output` is a string `Tensor` of the same shape as `bytes`,
-// each element containing the decompressed data from the corresponding
-// element in `bytes`.
-//
-// Arguments:
-//	bytes: A Tensor of string which is compressed.
-//
-// Returns A Tensor with the same shape as input `bytes`, uncompressed
-// from bytes.
-func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeCompressed",
-		Input: []tf.Input{
-			bytes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // DecodeRawAttr is an optional argument to DecodeRaw.
 type DecodeRawAttr func(optionalAttr)
 
@@ -11984,411 +10418,52 @@
 	return op.Output(0)
 }
 
-// Computes natural logarithm of (1 + x) element-wise.
-//
-// I.e., \\(y = \log_e (1 + x)\\).
-func Log1p(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Log1p",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// RetrieveTPUEmbeddingADAMParametersAttr is an optional argument to RetrieveTPUEmbeddingADAMParameters.
+type RetrieveTPUEmbeddingADAMParametersAttr func(optionalAttr)
 
-// Computes rectified linear 6 gradients for a Relu6 operation.
+// RetrieveTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
 //
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Relu6 operation.
-//	features: The features passed as input to the corresponding Relu6 operation, or
-// its output; using either one produces the same result.
-//
-// Returns The gradients:
-// `gradients * (features > 0) * (features < 6)`.
-func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Relu6Grad",
-		Input: []tf.Input{
-			gradients, features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResizeBicubicAttr is an optional argument to ResizeBicubic.
-type ResizeBicubicAttr func(optionalAttr)
-
-// ResizeBicubicAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr {
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingADAMParametersTableId(value int64) RetrieveTPUEmbeddingADAMParametersAttr {
 	return func(m optionalAttr) {
-		m["align_corners"] = value
+		m["table_id"] = value
 	}
 }
 
-// Resize `images` to `size` using bicubic interpolation.
+// RetrieveTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingADAMParametersTableName(value string) RetrieveTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve ADAM embedding parameters.
 //
-// Input images can be of different types but output images are always float.
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
 //
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) {
+// Returns Parameter parameters updated by the ADAM optimization algorithm.Parameter momenta updated by the ADAM optimization algorithm.Parameter velocities updated by the ADAM optimization algorithm.
+func RetrieveTPUEmbeddingADAMParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResizeBicubic",
-		Input: []tf.Input{
-			images, size,
-		},
+		Type: "RetrieveTPUEmbeddingADAMParameters",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Gather ragged slices from `params` axis `0` according to `indices`.
-//
-// Outputs a `RaggedTensor` output composed from `output_dense_values` and
-// `output_nested_splits`, such that:
-//
-// ```python
-// output.shape = indices.shape + params.shape[1:]
-// output.ragged_rank = indices.shape.ndims + params.ragged_rank
-// output[i...j, d0...dn] = params[indices[i...j], d0...dn]
-// ```
-//
-// where
-//
-// * `params =
-//    ragged.from_nested_row_splits(params_dense_values, params_nested_splits)`
-//    provides the values that should be gathered.
-// * `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which
-//    values should be gathered.
-// * `output =
-//    ragged.from_nested_row_splits(output_dense_values, output_nested_splits)`
-//    is the output tensor.
-//
-// (Note: This c++ op is used to implement the higher-level python
-// `tf.ragged.gather` op, which also supports ragged indices.)
-//
-//
-// Arguments:
-//	params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the
-// `params` RaggedTensor input.
-//	params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change
-// at the python level from dense_values to flat_values, so dense_values is the
-// deprecated name.
-//	indices: Indices in the outermost dimension of `params` of the values that should be
-// gathered.
-//	OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
-// this number of `row_splits` tensors. This value should equal
-// `indices.shape.ndims + params.ragged_rank - 1`.
-//
-// Returns The `nested_row_splits` tensors that define the row-partitioning for the
-// returned RaggedTensor.The `flat_values` for the returned RaggedTensor.
-func RaggedGather(scope *Scope, params_nested_splits []tf.Output, params_dense_values tf.Output, indices tf.Output, OUTPUT_RAGGED_RANK int64) (output_nested_splits []tf.Output, output_dense_values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"OUTPUT_RAGGED_RANK": OUTPUT_RAGGED_RANK}
-	opspec := tf.OpSpec{
-		Type: "RaggedGather",
-		Input: []tf.Input{
-			tf.OutputList(params_nested_splits), params_dense_values, indices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil {
-		scope.UpdateErr("RaggedGather", err)
-		return
-	}
-	output_dense_values = op.Output(idx)
-	return output_nested_splits, output_dense_values
-}
-
-// Greedily selects a subset of bounding boxes in descending order of score,
-//
-// pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system.  Note that this
-// algorithm is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
-//
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
-//
-//   selected_indices = tf.image.non_max_suppression_v2(
-//       boxes, scores, max_output_size, iou_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
-//
-// Arguments:
-//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
-//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
-// boxes overlap too much with respect to IOU.
-//
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.
-func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "NonMaxSuppressionV2",
-		Input: []tf.Input{
-			boxes, scores, max_output_size, iou_threshold,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Subtracts sparse updates from the variable referenced by `resource`.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] -= updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] -= updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...]
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions add.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterSub(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterSub",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Converts a `RaggedTensor` into a `SparseTensor` with the same values.
-//
-// input=ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits)
-// output=SparseTensor(indices=sparse_indices, values=sparse_values,
-//                     dense_shape=sparse_dense_shape)
-//
-// Arguments:
-//	rt_nested_splits: The `row_splits` for the `RaggedTensor`.
-//	rt_dense_values: The `flat_values` for the `RaggedTensor`.
-//
-// Returns The indices for the `SparseTensor`.The values of the `SparseTensor`.`sparse_dense_shape` is a tight bounding box of the input `RaggedTensor`.
-func RaggedTensorToSparse(scope *Scope, rt_nested_splits []tf.Output, rt_dense_values tf.Output) (sparse_indices tf.Output, sparse_values tf.Output, sparse_dense_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RaggedTensorToSparse",
-		Input: []tf.Input{
-			tf.OutputList(rt_nested_splits), rt_dense_values,
-		},
-	}
-	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Check if the input matches the regex pattern.
-//
-// The input is a string tensor of any shape. The pattern is a scalar
-// string tensor which is applied to every element of the input tensor.
-// The boolean values (True or False) of the output tensor indicate
-// if the input matches the regex pattern provided.
-//
-// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
-//
-// Arguments:
-//	input: A string tensor of the text to be processed.
-//	pattern: A scalar string tensor containing the regular expression to match the input.
-//
-// Returns A bool tensor with the same shape as `input`.
-func RegexFullMatch(scope *Scope, input tf.Output, pattern tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RegexFullMatch",
-		Input: []tf.Input{
-			input, pattern,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Says whether the targets are in the top `K` predictions.
-//
-// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
-// prediction for the target class is among the top `k` predictions among
-// all predictions for example `i`. Note that the behavior of `InTopK` differs
-// from the `TopK` op in its handling of ties; if multiple classes have the
-// same prediction value and straddle the top-`k` boundary, all of those
-// classes are considered to be in the top `k`.
-//
-// More formally, let
-//
-//   \\(predictions_i\\) be the predictions for all classes for example `i`,
-//   \\(targets_i\\) be the target class for example `i`,
-//   \\(out_i\\) be the output for example `i`,
-//
-// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
-//
-// Arguments:
-//	predictions: A `batch_size` x `classes` tensor.
-//	targets: A `batch_size` vector of class ids.
-//	k: Number of top elements to look at for computing precision.
-//
-// Returns Computed precision at `k` as a `bool Tensor`.
-func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "InTopKV2",
-		Input: []tf.Input{
-			predictions, targets, k,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RandomShuffleAttr is an optional argument to RandomShuffle.
-type RandomShuffleAttr func(optionalAttr)
-
-// RandomShuffleSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomShuffleSeed(value int64) RandomShuffleAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomShuffleSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomShuffleSeed2(value int64) RandomShuffleAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Randomly shuffles a tensor along its first dimension.
-//
-//   The tensor is shuffled along dimension 0, such that each `value[j]` is mapped
-//   to one and only one `output[i]`. For example, a mapping that might occur for a
-//   3x2 tensor is:
-//
-// ```
-// [[1, 2],       [[5, 6],
-//  [3, 4],  ==>   [1, 2],
-//  [5, 6]]        [3, 4]]
-// ```
-//
-// Arguments:
-//	value: The tensor to be shuffled.
-//
-// Returns A tensor of same shape and type as `value`, shuffled along its first
-// dimension.
-func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomShuffle",
-		Input: []tf.Input{
-			value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes sigmoid of `x` element-wise.
-//
-// Specifically, `y = 1 / (1 + exp(-x))`.
-func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sigmoid",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // FusedBatchNormAttr is an optional argument to FusedBatchNorm.
 type FusedBatchNormAttr func(optionalAttr)
 
@@ -12678,126 +10753,6 @@
 	return scope.AddOperation(opspec)
 }
 
-// Computes exponential of x element-wise.  \\(y = e^x\\).
-func Exp(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Exp",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// NthElementAttr is an optional argument to NthElement.
-type NthElementAttr func(optionalAttr)
-
-// NthElementReverse sets the optional reverse attribute to value.
-//
-// value: When set to True, find the nth-largest value in the vector and vice
-// versa.
-// If not specified, defaults to false
-func NthElementReverse(value bool) NthElementAttr {
-	return func(m optionalAttr) {
-		m["reverse"] = value
-	}
-}
-
-// Finds values of the `n`-th order statistic for the last dimension.
-//
-// If the input is a vector (rank-1), finds the entries which is the nth-smallest
-// value in the vector and outputs their values as scalar tensor.
-//
-// For matrices (resp. higher rank input), computes the entries which is the
-// nth-smallest value in each row (resp. vector along the last dimension). Thus,
-//
-//     values.shape = input.shape[:-1]
-//
-// Arguments:
-//	input: 1-D or higher with last dimension at least `n+1`.
-//	n: 0-D. Position of sorted vector to select along the last dimension (along
-// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
-//
-// Returns The `n`-th order statistic along each last dimensional slice.
-func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "NthElement",
-		Input: []tf.Input{
-			input, n,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the maximum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// This operator is similar to the unsorted segment sum operator found
-// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
-// Instead of computing the sum over segments, it computes the maximum such that:
-//
-// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such
-// that `segment_ids[j...] == i`.
-//
-// If the maximum is empty for a given segment ID `i`, it outputs the smallest
-// possible value for the specific numeric type,
-// `output[i] = numeric_limits<T>::lowest()`.
-//
-// If the given segment ID `i` is negative, then the corresponding value is
-// dropped, and will not be included in the result.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
-// </div>
-//
-// For example:
-//
-// ``` python
-// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]])
-// tf.unsorted_segment_max(c, tf.constant([0, 1, 0]), num_segments=2)
-// # ==> [[ 4,  3, 3, 4],
-// #       [5,  6, 7, 8]]
-// ```
-//
-//
-// Arguments:
-//
-//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
-//
-//
-// Returns Has same shape as data, except for the first `segment_ids.rank`
-// dimensions, which are replaced with a single dimension which has size
-// `num_segments`.
-func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentMax",
-		Input: []tf.Input{
-			data, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Transforms a vector of brain.Example protos (as strings) into typed tensors.
 //
 // Arguments:
@@ -12902,14 +10857,14 @@
 	return op.Output(0)
 }
 
-// StatefulStandardNormalAttr is an optional argument to StatefulStandardNormal.
-type StatefulStandardNormalAttr func(optionalAttr)
+// StatefulStandardNormalV2Attr is an optional argument to StatefulStandardNormalV2.
+type StatefulStandardNormalV2Attr func(optionalAttr)
 
-// StatefulStandardNormalDtype sets the optional dtype attribute to value.
+// StatefulStandardNormalV2Dtype sets the optional dtype attribute to value.
 //
 // value: The type of the output.
 // If not specified, defaults to DT_FLOAT
-func StatefulStandardNormalDtype(value tf.DataType) StatefulStandardNormalAttr {
+func StatefulStandardNormalV2Dtype(value tf.DataType) StatefulStandardNormalV2Attr {
 	return func(m optionalAttr) {
 		m["dtype"] = value
 	}
@@ -12921,10 +10876,11 @@
 //
 // Arguments:
 //	resource: The handle of the resource variable that stores the state of the RNG.
+//	algorithm: The RNG algorithm.
 //	shape: The shape of the output tensor.
 //
 // Returns A tensor of the specified shape filled with random normal values.
-func StatefulStandardNormal(scope *Scope, resource tf.Output, shape tf.Output, optional ...StatefulStandardNormalAttr) (output tf.Output) {
+func StatefulStandardNormalV2(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulStandardNormalV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -12933,9 +10889,9 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StatefulStandardNormal",
+		Type: "StatefulStandardNormalV2",
 		Input: []tf.Input{
-			resource, shape,
+			resource, algorithm, shape,
 		},
 		Attrs: attrs,
 	}
@@ -13077,56 +11033,56 @@
 	return op.Output(0), op.Output(1)
 }
 
-// Creates a TensorList which, when stacked, has the value of `tensor`.
+// Identity transformation that models performance.
 //
-// Each tensor in the result list corresponds to one row of the input tensor.
+// Identity transformation that models performance.
 //
-// tensor: The input tensor.
-// output_handle: The list.
-func TensorListFromTensor(scope *Scope, tensor tf.Output, element_shape tf.Output) (output_handle tf.Output) {
+// Arguments:
+//	input_dataset: A variant tensor representing the input dataset.
+//
+//
+func ModelDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "TensorListFromTensor",
+		Type: "ModelDataset",
 		Input: []tf.Input{
-			tensor, element_shape,
+			input_dataset,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Assigns sparse updates to the variable referenced by `resource`.
+// Fast Fourier transform.
 //
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] = updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] = updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]
+// Computes the 1-dimensional discrete Fourier transform over the inner-most
+// dimension of `input`.
 //
 // Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
+//	input: A complex tensor.
 //
-// Returns the created operation.
-func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+// Returns A complex tensor of the same shape as `input`. The inner-most
+//   dimension of `input` is replaced with its 1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.fft
+// @end_compatibility
+func FFT(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceScatterUpdate",
+		Type: "FFT",
 		Input: []tf.Input{
-			resource, indices, updates,
+			input,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
 // MaxPoolAttr is an optional argument to MaxPool.
@@ -13216,6 +11172,47 @@
 	return scope.AddOperation(opspec)
 }
 
+// Subtracts sparse updates from the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] -= updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] -= updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...]
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions add.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterSub(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterSub",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Adds sparse updates to the variable referenced by `resource`.
 //
 // This operation computes
@@ -13257,248 +11254,6 @@
 	return scope.AddOperation(opspec)
 }
 
-// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
-type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
-
-// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-//
-// That is for rows we have grad for, we update var, accum and linear as follows:
-// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-// linear += grad_with_shrinkage +
-//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 shrinkage regulariation. Must be a scalar.
-//
-//	lr_power: Scaling factor. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyFtrlV2",
-		Input: []tf.Input{
-			var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Calculates gains for each feature and returns the best possible split information for the feature.
-//
-// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
-//
-// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
-//
-// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
-//
-// The length of output lists are all of the same length, `num_features`.
-// The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature.
-//
-// Arguments:
-//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
-//	stats_summary_list: A list of Rank 3 tensor (#shape=[max_splits, bucket, 2]) for accumulated stats summary (gradient/hessian) per node per buckets for each feature. The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used.
-//	l1: l1 regularization factor on leaf weights, per instance based.
-//	l2: l2 regularization factor on leaf weights, per instance based.
-//	tree_complexity: adjustment to the gain, per leaf based.
-//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
-//	max_splits: the number of nodes that can be split in the whole tree. Used as a dimension of output tensors.
-//
-// Returns An output list of Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.A list of Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.A list of Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.
-func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Output, stats_summary_list []tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, max_splits int64) (node_ids_list []tf.Output, gains_list []tf.Output, thresholds_list []tf.Output, left_node_contribs_list []tf.Output, right_node_contribs_list []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"max_splits": max_splits}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesCalculateBestGainsPerFeature",
-		Input: []tf.Input{
-			node_id_range, tf.OutputList(stats_summary_list), l1, l2, tree_complexity, min_node_weight,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if node_ids_list, idx, err = makeOutputList(op, idx, "node_ids_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if gains_list, idx, err = makeOutputList(op, idx, "gains_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if thresholds_list, idx, err = makeOutputList(op, idx, "thresholds_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if left_node_contribs_list, idx, err = makeOutputList(op, idx, "left_node_contribs_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if right_node_contribs_list, idx, err = makeOutputList(op, idx, "right_node_contribs_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	return node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list
-}
-
-// EncodePngAttr is an optional argument to EncodePng.
-type EncodePngAttr func(optionalAttr)
-
-// EncodePngCompression sets the optional compression attribute to value.
-//
-// value: Compression level.
-// If not specified, defaults to -1
-func EncodePngCompression(value int64) EncodePngAttr {
-	return func(m optionalAttr) {
-		m["compression"] = value
-	}
-}
-
-// PNG-encode an image.
-//
-// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
-// where `channels` is:
-//
-// *   1: for grayscale.
-// *   2: for grayscale + alpha.
-// *   3: for RGB.
-// *   4: for RGBA.
-//
-// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
-// default or a value from 0 to 9.  9 is the highest compression level, generating
-// the smallest output, but is slower.
-//
-// Arguments:
-//	image: 3-D with shape `[height, width, channels]`.
-//
-// Returns 0-D. PNG-encoded image.
-func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EncodePng",
-		Input: []tf.Input{
-			image,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute.
-type DataFormatVecPermuteAttr func(optionalAttr)
-
-// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value.
-//
-// value: source data format.
-// If not specified, defaults to "NHWC"
-func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr {
-	return func(m optionalAttr) {
-		m["src_format"] = value
-	}
-}
-
-// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value.
-//
-// value: destination data format.
-// If not specified, defaults to "NCHW"
-func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr {
-	return func(m optionalAttr) {
-		m["dst_format"] = value
-	}
-}
-
-// Returns the permuted vector/tensor in the destination data format given the
-//
-// one in the source data format.
-//
-// Arguments:
-//	x: Vector of size 4 or Tensor of shape (4, 2) in source data format.
-//
-// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format.
-func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DataFormatVecPermute",
-		Input: []tf.Input{
-			x,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Initializes the multi device iterator with the given dataset.
-//
-// Arguments:
-//	dataset: Dataset to be iterated upon.
-//	multi_device_iterator: A MultiDeviceIteratorResource.
-//	max_buffer_size: The maximum size of the host side per device buffer to keep.
-//
-// Returns An int64 indicating which incarnation of the MultiDeviceIterator
-// is running.
-func MultiDeviceIteratorInit(scope *Scope, dataset tf.Output, multi_device_iterator tf.Output, max_buffer_size tf.Output) (incarnation_id tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MultiDeviceIteratorInit",
-		Input: []tf.Input{
-			dataset, multi_device_iterator, max_buffer_size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Reads the value of a variable.
 //
 // The tensor returned by this operation is immutable.
@@ -13527,93 +11282,39 @@
 	return op.Output(0)
 }
 
-// This op consumes a lock created by `MutexLock`.
-//
-// This op exists to consume a tensor created by `MutexLock` (other than
-// direct control dependencies).  It should be the only that consumes the tensor,
-// and will raise an error if it is not.  Its only purpose is to keep the
-// mutex lock tensor alive until it is consumed by this op.
-//
-// **NOTE**: This operation must run on the same device as its input.  This may
-// be enforced via the `colocate_with` mechanism.
-//
-// Arguments:
-//	mutex_lock: A tensor returned by `MutexLock`.
-//
-// Returns the created operation.
-func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ConsumeMutexLock",
-		Input: []tf.Input{
-			mutex_lock,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
+// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad.
+type ResourceSparseApplyProximalAdagradAttr func(optionalAttr)
 
-// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd.
-type ResourceScatterNdAddAttr func(optionalAttr)
-
-// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value.
+// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value.
 //
-// value: An optional bool. Defaults to True. If True, the assignment will
-// be protected by a lock; otherwise the behavior is undefined,
-// but may exhibit less contention.
-// If not specified, defaults to true
-func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr {
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr {
 	return func(m optionalAttr) {
 		m["use_locking"] = value
 	}
 }
 
-// Applies sparse addition to individual values or slices in a Variable.
+// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
 //
-// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-//
-// `indices` must be integer tensor, containing indices into `ref`.
-// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-//
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-// dimension of `ref`.
-//
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
-//
-// ```
-// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]
-// ```
-//
-// For example, say we want to add 4 scattered elements to a rank-1 tensor to
-// 8 elements. In Python, that addition would look like this:
-//
-// ```python
-// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
-// indices = tf.constant([[4], [3], [1], [7]])
-// updates = tf.constant([9, 10, 11, 12])
-// add = tf.scatter_nd_add(ref, indices, updates)
-// with tf.Session() as sess:
-//   print sess.run(add)
-// ```
-//
-// The resulting update to ref would look like this:
-//
-//     [1, 13, 3, 14, 14, 6, 7, 20]
-//
-// See `tf.scatter_nd` for more details about how to make updates to
-// slices.
+// That is for rows we have grad for, we update var and accum as follows:
+// accum += grad * grad
+// prox_v = var
+// prox_v -= lr * grad * (1 / sqrt(accum))
+// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
 //
 // Arguments:
-//	ref: A resource handle. Must be from a VarHandleOp.
-//	indices: A Tensor. Must be one of the following types: int32, int64.
-// A tensor of indices into ref.
-//	updates: A Tensor. Must have the same type as ref. A tensor of
-// values to add to ref.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
 //
 // Returns the created operation.
-func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) {
+func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -13622,552 +11323,123 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceScatterNdAdd",
+		Type: "ResourceSparseApplyProximalAdagrad",
 		Input: []tf.Input{
-			ref, indices, updates,
+			var_, accum, lr, l1, l2, grad, indices,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Updates the tree ensemble by either adding a layer to the last tree being grown
-//
-// or by starting a new tree.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the ensemble variable.
-//	feature_ids: Rank 1 tensor with ids for each feature. This is the real id of
-// the feature that will be used in the split.
-//	node_ids: List of rank 1 tensors representing the nodes for which this feature
-// has a split.
-//	gains: List of rank 1 tensors representing the gains for each of the feature's
-// split.
-//	thresholds: List of rank 1 tensors representing the thesholds for each of the
-// feature's split.
-//	left_node_contribs: List of rank 2 tensors with left leaf contribs for each of
-// the feature's splits. Will be added to the previous node values to constitute
-// the values of the left nodes.
-//	right_node_contribs: List of rank 2 tensors with right leaf contribs for each
-// of the feature's splits. Will be added to the previous node values to constitute
-// the values of the right nodes.
-//	max_depth: Max depth of the tree to build.
-//	learning_rate: shrinkage const for each new tree.
-//	pruning_mode: 0-No pruning, 1-Pre-pruning, 2-Post-pruning.
-//
-// Returns the created operation.
-func BoostedTreesUpdateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, feature_ids tf.Output, node_ids []tf.Output, gains []tf.Output, thresholds []tf.Output, left_node_contribs []tf.Output, right_node_contribs []tf.Output, max_depth tf.Output, learning_rate tf.Output, pruning_mode int64) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"pruning_mode": pruning_mode}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesUpdateEnsemble",
-		Input: []tf.Input{
-			tree_ensemble_handle, feature_ids, tf.OutputList(node_ids), tf.OutputList(gains), tf.OutputList(thresholds), tf.OutputList(left_node_contribs), tf.OutputList(right_node_contribs), max_depth, learning_rate,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
+// DecodeJpegAttr is an optional argument to DecodeJpeg.
+type DecodeJpegAttr func(optionalAttr)
 
-// Computes tan of x element-wise.
-func Tan(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Tan",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deprecated. Use TensorArraySplitV3
+// DecodeJpegChannels sets the optional channels attribute to value.
 //
-// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3
-func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArraySplitV2",
-		Input: []tf.Input{
-			handle, value, lengths, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Reshapes a SparseTensor to represent values in a new dense shape.
-//
-// This operation has the same semantics as reshape on the represented dense
-// tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
-//
-// If one component of `new_shape` is the special value -1, the size of that
-// dimension is computed so that the total dense size remains constant.  At
-// most one component of `new_shape` can be -1.  The number of dense elements
-// implied by `new_shape` must be the same as the number of dense elements
-// originally implied by `input_shape`.
-//
-// Reshaping does not affect the order of values in the SparseTensor.
-//
-// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
-// has length `R_out`, then `input_indices` has shape `[N, R_in]`,
-// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
-// `output_shape` has length `R_out`.
-//
-// Arguments:
-//	input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
-// SparseTensor.
-//	input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
-//	new_shape: 1-D.  `R_out` vector with the requested new dense shape.
-//
-// Returns 2-D.  `N x R_out` matrix with the updated indices of non-empty
-// values in the output SparseTensor.1-D.  `R_out` vector with the full dense shape of the output
-// SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
-// filled in.
-func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseReshape",
-		Input: []tf.Input{
-			input_indices, input_shape, new_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Bucketizes 'input' based on 'boundaries'.
-//
-// For example, if the inputs are
-//     boundaries = [0, 10, 100]
-//     input = [[-5, 10000]
-//              [150,   10]
-//              [5,    100]]
-//
-// then the output will be
-//     output = [[0, 3]
-//               [3, 2]
-//               [1, 3]]
-//
-// Arguments:
-//	input: Any shape of Tensor contains with int or float type.
-//	boundaries: A sorted list of floats gives the boundary of the buckets.
-//
-// Returns Same shape with 'input', each value of input replaced with bucket index.
-//
-// @compatibility(numpy)
-// Equivalent to np.digitize.
-// @end_compatibility
-func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"boundaries": boundaries}
-	opspec := tf.OpSpec{
-		Type: "Bucketize",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
-type StatelessTruncatedNormalAttr func(optionalAttr)
-
-// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
+// value: Number of color channels for the decoded image.
+// If not specified, defaults to 0
+func DecodeJpegChannels(value int64) DecodeJpegAttr {
 	return func(m optionalAttr) {
-		m["dtype"] = value
+		m["channels"] = value
 	}
 }
 
-// Outputs deterministic pseudorandom values from a truncated normal distribution.
+// DecodeJpegRatio sets the optional ratio attribute to value.
 //
-// The generated values follow a normal distribution with mean 0 and standard
-// deviation 1, except that values whose magnitude is more than 2 standard
-// deviations from the mean are dropped and re-picked.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns Random values with specified shape.
-func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessTruncatedNormal",
-		Input: []tf.Input{
-			shape, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RestoreSliceAttr is an optional argument to RestoreSlice.
-type RestoreSliceAttr func(optionalAttr)
-
-// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
-//
-// value: Index of file to open first if multiple files match
-// `file_pattern`. See the documentation for `Restore`.
-// If not specified, defaults to -1
-func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
+// value: Downscaling ratio.
+// If not specified, defaults to 1
+func DecodeJpegRatio(value int64) DecodeJpegAttr {
 	return func(m optionalAttr) {
-		m["preferred_shard"] = value
+		m["ratio"] = value
 	}
 }
 
-// Restores a tensor from checkpoint files.
+// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
 //
-// This is like `Restore` except that restored tensor can be listed as filling
-// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
-// larger tensor and the slice that the restored tensor covers.
-//
-// The `shape_and_slice` input has the same format as the
-// elements of the `shapes_and_slices` input of the `SaveSlices` op.
-//
-// Arguments:
-//	file_pattern: Must have a single element. The pattern of the files from
-// which we read the tensor.
-//	tensor_name: Must have a single element. The name of the tensor to be
-// restored.
-//	shape_and_slice: Scalar. The shapes and slice specifications to use when
-// restoring a tensors.
-//	dt: The type of the tensor to be restored.
-//
-// Returns The restored tensor.
-func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dt": dt}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RestoreSlice",
-		Input: []tf.Input{
-			file_pattern, tensor_name, shape_and_slice,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Divides sparse updates into the variable referenced by `resource`.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] /= updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] /= updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...]
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions multiply.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterDiv",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
-type StatelessRandomNormalAttr func(optionalAttr)
-
-// StatelessRandomNormalDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr {
+// value: If true use a slower but nicer upscaling of the
+// chroma planes (yuv420/422 only).
+// If not specified, defaults to true
+func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr {
 	return func(m optionalAttr) {
-		m["dtype"] = value
+		m["fancy_upscaling"] = value
 	}
 }
 
-// Outputs deterministic pseudorandom values from a normal distribution.
+// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
 //
-// The generated values will have mean 0 and standard deviation 1.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns Random values with specified shape.
-func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomNormal",
-		Input: []tf.Input{
-			shape, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UnicodeDecodeAttr is an optional argument to UnicodeDecode.
-type UnicodeDecodeAttr func(optionalAttr)
-
-// UnicodeDecodeErrors sets the optional errors attribute to value.
-//
-// value: Error handling policy when there is invalid formatting found in the input.
-// The value of 'strict' will cause the operation to produce a InvalidArgument
-// error on any invalid input formatting. A value of 'replace' (the default) will
-// cause the operation to replace any invalid formatting in the input with the
-// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
-// skip any invalid formatting in the input and produce no corresponding output
-// character.
-// If not specified, defaults to "replace"
-func UnicodeDecodeErrors(value string) UnicodeDecodeAttr {
-	return func(m optionalAttr) {
-		m["errors"] = value
-	}
-}
-
-// UnicodeDecodeReplacementChar sets the optional replacement_char attribute to value.
-//
-// value: The replacement character codepoint to be used in place of any invalid
-// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
-// be used. The default value is the default unicode replacement character is
-// 0xFFFD or U+65533.)
-// If not specified, defaults to 65533
-func UnicodeDecodeReplacementChar(value int64) UnicodeDecodeAttr {
-	return func(m optionalAttr) {
-		m["replacement_char"] = value
-	}
-}
-
-// UnicodeDecodeReplaceControlCharacters sets the optional replace_control_characters attribute to value.
-//
-// value: Whether to replace the C0 control characters (00-1F) with the
-// `replacement_char`. Default is false.
+// value: If true try to recover an image from truncated input.
 // If not specified, defaults to false
-func UnicodeDecodeReplaceControlCharacters(value bool) UnicodeDecodeAttr {
+func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr {
 	return func(m optionalAttr) {
-		m["replace_control_characters"] = value
+		m["try_recover_truncated"] = value
 	}
 }
 
-// Decodes each string in `input` into a sequence of Unicode code points.
+// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
 //
-// The character codepoints for all strings are returned using a single vector
-// `char_values`, with strings expanded to characters in row-major order.
+// value: The minimum required fraction of lines before a truncated
+// input is accepted.
+// If not specified, defaults to 1
+func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["acceptable_fraction"] = value
+	}
+}
+
+// DecodeJpegDctMethod sets the optional dct_method attribute to value.
 //
-// The `row_splits` tensor indicates where the codepoints for
-// each input string begin and end within the `char_values` tensor.
-// In particular, the values for the `i`th
-// string (in row-major order) are stored in the slice
-// `[row_splits[i]:row_splits[i+1]]`. Thus:
+// value: string specifying a hint about the algorithm used for
+// decompression.  Defaults to "" which maps to a system-specific
+// default.  Currently valid values are ["INTEGER_FAST",
+// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
+// jpeg library changes to a version that does not have that specific
+// option.)
+// If not specified, defaults to ""
+func DecodeJpegDctMethod(value string) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["dct_method"] = value
+	}
+}
+
+// Decode a JPEG-encoded image to a uint8 tensor.
 //
-// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
-//   character in the `i`th string (in row-major order).
-// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
-//   string (in row-major order).
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
+//
+// Accepted values are:
+//
+// *   0: Use the number of channels in the JPEG-encoded image.
+// *   1: output a grayscale image.
+// *   3: output an RGB image.
+//
+// If needed, the JPEG-encoded image is transformed to match the requested number
+// of color channels.
+//
+// The attr `ratio` allows downscaling the image by an integer factor during
+// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
+// downscaling the image later.
+//
+//
+// This op also supports decoding PNGs and non-animated GIFs since the interface is
+// the same, though it is cleaner to use `tf.image.decode_image`.
 //
 // Arguments:
-//	input: The text to be decoded. Can have any shape. Note that the output is flattened
-// to a vector of char values.
-//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
-// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+//	contents: 0-D.  The JPEG-encoded image.
 //
-// Returns A 1D int32 tensor containing the row splits.A 1D int32 Tensor containing the decoded codepoints.
-func UnicodeDecode(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeAttr) (row_splits tf.Output, char_values tf.Output) {
+// Returns 3-D with shape `[height, width, channels]`..
+func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"input_encoding": input_encoding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "UnicodeDecode",
+		Type: "DecodeJpeg",
 		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Adds up a SparseTensor and a dense Tensor, using these special rules:
-//
-// (1) Broadcasts the dense side to have the same shape as the sparse side, if
-//     eligible;
-// (2) Then, only the dense values pointed to by the indices of the SparseTensor
-//     participate in the cwise addition.
-//
-// By these rules, the result is a logical SparseTensor with exactly the same
-// indices and shape, but possibly with different non-zero values.  The output of
-// this Op is the resultant non-zero values.
-//
-// Arguments:
-//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//	dense: `R`-D.  The dense Tensor operand.
-//
-// Returns 1-D.  The `N` values that are operated on.
-func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseDenseCwiseAdd",
-		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape, dense,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the complementary error function of `x` element-wise.
-func Erfc(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Erfc",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UnicodeEncodeAttr is an optional argument to UnicodeEncode.
-type UnicodeEncodeAttr func(optionalAttr)
-
-// UnicodeEncodeErrors sets the optional errors attribute to value.
-//
-// value: Error handling policy when there is invalid formatting found in the input.
-// The value of 'strict' will cause the operation to produce a InvalidArgument
-// error on any invalid input formatting. A value of 'replace' (the default) will
-// cause the operation to replace any invalid formatting in the input with the
-// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
-// skip any invalid formatting in the input and produce no corresponding output
-// character.
-// If not specified, defaults to "replace"
-func UnicodeEncodeErrors(value string) UnicodeEncodeAttr {
-	return func(m optionalAttr) {
-		m["errors"] = value
-	}
-}
-
-// UnicodeEncodeReplacementChar sets the optional replacement_char attribute to value.
-//
-// value: The replacement character codepoint to be used in place of any invalid
-// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
-// be used. The default value is the default unicode replacement character is
-// 0xFFFD (U+65533).
-// If not specified, defaults to 65533
-func UnicodeEncodeReplacementChar(value int64) UnicodeEncodeAttr {
-	return func(m optionalAttr) {
-		m["replacement_char"] = value
-	}
-}
-
-// Encode a tensor of ints into unicode strings.
-//
-// Returns a vector of strings, where `output[i]` is constructed by encoding the
-// Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]`
-// using `output_encoding`.
-//
-// ---
-//
-// Example:
-//
-// ```
-// input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100]
-// input_splits = [0, 5, 10]
-// output_encoding = 'UTF-8'
-//
-// output = ['Hello', 'World']
-// ```
-//
-// Arguments:
-//	input_values: A 1D tensor containing the unicode codepoints that should be encoded.
-//	input_splits: A 1D tensor specifying how the unicode codepoints should be split into strings.
-// In particular, `output[i]` is constructed by encoding the codepoints in the
-// slice `input_values[input_splits[i]:input_splits[i+1]]`.
-//	output_encoding: Unicode encoding of the output strings. Valid encodings are: `"UTF-8",
-// "UTF-16-BE", and "UTF-32-BE"`.
-//
-// Returns The 1-D Tensor of strings encoded from the provided unicode codepoints.
-func UnicodeEncode(scope *Scope, input_values tf.Output, input_splits tf.Output, output_encoding string, optional ...UnicodeEncodeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_encoding": output_encoding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UnicodeEncode",
-		Input: []tf.Input{
-			input_values, input_splits,
+			contents,
 		},
 		Attrs: attrs,
 	}
@@ -14175,42 +11447,204 @@
 	return op.Output(0)
 }
 
-// Returns the number of tensors in the input tensor list.
+// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
+type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
+
+// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
 //
-// input_handle: the input list
-// length: the number of tensors in the list
-func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of depthwise convolution with respect to the input.
+//
+// Arguments:
+//	input_sizes: An integer vector representing the shape of `input`, based
+// on `data_format`.  For example, if `data_format` is 'NHWC' then
+//  `input` is a 4-D `[batch, height, width, channels]` tensor.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
+//	out_backprop: 4-D with shape  based on `data_format`.
+// For example, if `data_format` is 'NHWC' then
+// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape according to `data_format`.  For example, if
+// `data_format` is 'NHWC', output shape is `[batch, in_height,
+// in_width, in_channels]`.  Gradient w.r.t. the input of the
+// convolution.
+func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DepthwiseConv2dNativeBackpropInput",
+		Input: []tf.Input{
+			input_sizes, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EditDistanceAttr is an optional argument to EditDistance.
+type EditDistanceAttr func(optionalAttr)
+
+// EditDistanceNormalize sets the optional normalize attribute to value.
+//
+// value: boolean (if true, edit distances are normalized by length of truth).
+//
+// The output is:
+// If not specified, defaults to true
+func EditDistanceNormalize(value bool) EditDistanceAttr {
+	return func(m optionalAttr) {
+		m["normalize"] = value
+	}
+}
+
+// Computes the (possibly normalized) Levenshtein Edit Distance.
+//
+// The inputs are variable-length sequences provided by SparseTensors
+//   (hypothesis_indices, hypothesis_values, hypothesis_shape)
+// and
+//   (truth_indices, truth_values, truth_shape).
+//
+// The inputs are:
+//
+// Arguments:
+//	hypothesis_indices: The indices of the hypothesis list SparseTensor.
+// This is an N x R int64 matrix.
+//	hypothesis_values: The values of the hypothesis list SparseTensor.
+// This is an N-length vector.
+//	hypothesis_shape: The shape of the hypothesis list SparseTensor.
+// This is an R-length vector.
+//	truth_indices: The indices of the truth list SparseTensor.
+// This is an M x R int64 matrix.
+//	truth_values: The values of the truth list SparseTensor.
+// This is an M-length vector.
+//	truth_shape: truth indices, vector.
+//
+// Returns A dense float tensor with rank R - 1.
+//
+// For the example input:
+//
+//     // hypothesis represents a 2x1 matrix with variable-length values:
+//     //   (0,0) = ["a"]
+//     //   (1,0) = ["b"]
+//     hypothesis_indices = [[0, 0, 0],
+//                           [1, 0, 0]]
+//     hypothesis_values = ["a", "b"]
+//     hypothesis_shape = [2, 1, 1]
+//
+//     // truth represents a 2x2 matrix with variable-length values:
+//     //   (0,0) = []
+//     //   (0,1) = ["a"]
+//     //   (1,0) = ["b", "c"]
+//     //   (1,1) = ["a"]
+//     truth_indices = [[0, 1, 0],
+//                      [1, 0, 0],
+//                      [1, 0, 1],
+//                      [1, 1, 0]]
+//     truth_values = ["a", "b", "c", "a"]
+//     truth_shape = [2, 2, 2]
+//     normalize = true
+//
+// The output will be:
+//
+//     // output is a 2x2 matrix with edit distances normalized by truth lengths.
+//     output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
+//               [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
+func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EditDistance",
+		Input: []tf.Input{
+			hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns 0 if x == 0, and x * log(y) otherwise, elementwise.
+func Xlogy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorListLength",
+		Type: "Xlogy",
 		Input: []tf.Input{
-			input_handle,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Determine the script codes of a given tensor of Unicode integer code points.
+// Stops gradient computation.
 //
-// This operation converts Unicode code points to script codes corresponding to
-// each code point. Script codes correspond to International Components for
-// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html.
-// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will
-// match input shape.
+// When executed in a graph, this op outputs its input tensor as-is.
 //
-// Arguments:
-//	input: A Tensor of int32 Unicode code points.
+// When building ops to compute gradients, this op prevents the contribution of
+// its inputs to be taken into account.  Normally, the gradient generator adds ops
+// to a graph to compute the derivatives of a specified 'loss' by recursively
+// finding out inputs that contributed to its computation.  If you insert this op
+// in the graph it inputs are masked from the gradient generator.  They are not
+// taken into account for computing gradients.
 //
-// Returns A Tensor of int32 script codes corresponding to each input code point.
-func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) {
+// This is useful any time you want to compute a value with TensorFlow but need
+// to pretend that the value was a constant. Some examples include:
+//
+// *  The *EM* algorithm where the *M-step* should not involve backpropagation
+//    through the output of the *E-step*.
+// *  Contrastive divergence training of Boltzmann machines where, when
+//    differentiating the energy function, the training must not backpropagate
+//    through the graph that generated the samples from the model.
+// *  Adversarial training, where no backprop should happen through the adversarial
+//    example generation process.
+func StopGradient(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "UnicodeScript",
+		Type: "StopGradient",
 		Input: []tf.Input{
 			input,
 		},
@@ -14219,95 +11653,19 @@
 	return op.Output(0)
 }
 
-// Creates a sequence of numbers.
+// Eagerly executes a python function to compute func(input)->output. The
 //
-// This operation creates a sequence of numbers that begins at `start` and
-// extends by increments of `delta` up to but not including `limit`.
-//
-// For example:
-//
-// ```
-// # 'start' is 3
-// # 'limit' is 18
-// # 'delta' is 3
-// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
-// ```
-//
-// Arguments:
-//	start: 0-D (scalar). First entry in the sequence.
-//	limit: 0-D (scalar). Upper limit of sequence, exclusive.
-//	delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`.
-//
-// Returns 1-D.
-func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) {
+// semantics of the input, output, and attributes are the same as those for
+// PyFunc.
+func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType) (output []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"token": token, "Tout": Tout}
 	opspec := tf.OpSpec{
-		Type: "Range",
+		Type: "EagerPyFunc",
 		Input: []tf.Input{
-			start, limit, delta,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey.
-type OrderedMapUnstageNoKeyAttr func(optionalAttr)
-
-// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes and returns the (key, value) element with the smallest
-//
-// key from the underlying container.   If the underlying container
-// does not contain elements, the op will block until it does.
-func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "OrderedMapUnstageNoKey",
-		Input: []tf.Input{
-			indices,
+			tf.OutputList(input),
 		},
 		Attrs: attrs,
 	}
@@ -14317,262 +11675,142 @@
 	}
 	var idx int
 	var err error
-	key = op.Output(idx)
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("OrderedMapUnstageNoKey", err)
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("EagerPyFunc", err)
 		return
 	}
-	return key, values
+	return output
 }
 
-// Returns element-wise integer closest to x.
+// Concats all tensors in the list along the 0th dimension.
 //
-// If the result is midway between two representable values,
-// the even representable is chosen.
-// For example:
+// Requires that all tensors have the same shape except the first dimension.
 //
-// ```
-// rint(-1.5) ==> -2.0
-// rint(0.5000001) ==> 1.0
-// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
-// ```
-func Rint(scope *Scope, x tf.Output) (y tf.Output) {
+// input_handle: The input list.
+// element_shape: The shape of the uninitialized elements in the list. If the first
+//   dimension is not -1, it is assumed that all list elements have the same
+//   leading dim.
+// leading_dims: The list of leading dims of uninitialized list elements. Used if
+//   the leading dim of input_handle.element_shape or the element_shape input arg
+//   is not already set.
+// tensor: The concated result.
+// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient.
+//
+func TensorListConcatV2(scope *Scope, input_handle tf.Output, element_shape tf.Output, leading_dims tf.Output, element_dtype tf.DataType) (tensor tf.Output, lengths tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
 	opspec := tf.OpSpec{
-		Type: "Rint",
+		Type: "TensorListConcatV2",
 		Input: []tf.Input{
-			x,
+			input_handle, element_shape, leading_dims,
 		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve.
+type MatrixTriangularSolveAttr func(optionalAttr)
+
+// MatrixTriangularSolveLower sets the optional lower attribute to value.
+//
+// value: Boolean indicating whether the innermost matrices in `matrix` are
+// lower or upper triangular.
+// If not specified, defaults to true
+func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr {
+	return func(m optionalAttr) {
+		m["lower"] = value
+	}
+}
+
+// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value.
+//
+// value: Boolean indicating whether to solve with `matrix` or its (block-wise)
+//          adjoint.
+//
+// @compatibility(numpy)
+// Equivalent to scipy.linalg.solve_triangular
+// @end_compatibility
+// If not specified, defaults to false
+func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr {
+	return func(m optionalAttr) {
+		m["adjoint"] = value
+	}
+}
+
+// Solves systems of linear equations with upper or lower triangular matrices by
+//
+// backsubstitution.
+//
+// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form
+// square matrices. If `lower` is `True` then the strictly upper triangular part
+// of each inner-most matrix is assumed to be zero and not accessed.
+// If `lower` is False then the strictly lower triangular part of each inner-most
+// matrix is assumed to be zero and not accessed.
+// `rhs` is a tensor of shape `[..., M, K]`.
+//
+// The output is a tensor of shape `[..., M, K]`. If `adjoint` is
+// `True` then the innermost matrices in `output` satisfy matrix equations
+// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
+// If `adjoint` is `False` then the strictly then the  innermost matrices in
+// `output` satisfy matrix equations
+// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`.
+//
+// Arguments:
+//	matrix: Shape is `[..., M, M]`.
+//	rhs: Shape is `[..., M, K]`.
+//
+// Returns Shape is `[..., M, K]`.
+func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixTriangularSolve",
+		Input: []tf.Input{
+			matrix, rhs,
+		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
-type ResourceApplyMomentumAttr func(optionalAttr)
-
-// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value.
+// Saves tensors in V2 checkpoint format.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, the tensor passed to compute grad will be
-// var - lr * momentum * accum, so in the end, the var you get is actually
-// var - lr * momentum * accum.
-// If not specified, defaults to false
-func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update '*var' according to the momentum scheme. Set use_nesterov = True if you
-//
-// want to use Nesterov momentum.
-//
-// accum = accum * momentum + grad
-// var -= lr * accum
+// By default, saves the named tensors in full.  If the caller wishes to save
+// specific slices of full tensors, "shape_and_slices" should be non-empty strings
+// and correspondingly well-formed.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
-//	momentum: Momentum. Must be a scalar.
+//	prefix: Must have a single element. The prefix of the V2 checkpoint to which we
+// write the tensors.
+//	tensor_names: shape {N}. The names of the tensors to be saved.
+//	shape_and_slices: shape {N}.  The slice specs of the tensors to be saved.
+// Empty strings indicate that they are non-partitioned tensors.
+//	tensors: `N` tensors to save.
 //
 // Returns the created operation.
-func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) {
+func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyMomentum",
+		Type: "SaveV2",
 		Input: []tf.Input{
-			var_, accum, lr, grad, momentum,
+			prefix, tensor_names, shape_and_slices, tf.OutputList(tensors),
 		},
-		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// SubstrAttr is an optional argument to Substr.
-type SubstrAttr func(optionalAttr)
-
-// SubstrUnit sets the optional unit attribute to value.
-//
-// value: The unit that is used to create the substring.  One of: `"BYTE"` (for
-// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8
-// encoded Unicode code points).  The default is `"BYTE"`. Results are undefined if
-// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid
-// UTF-8.
-// If not specified, defaults to "BYTE"
-func SubstrUnit(value string) SubstrAttr {
-	return func(m optionalAttr) {
-		m["unit"] = value
-	}
-}
-
-// Return substrings from `Tensor` of strings.
-//
-// For each string in the input `Tensor`, creates a substring starting at index
-// `pos` with a total length of `len`.
-//
-// If `len` defines a substring that would extend beyond the length of the input
-// string, then as many characters as possible are used.
-//
-// A negative `pos` indicates distance within the string backwards from the end.
-//
-// If `pos` specifies an index which is out of range for any of the input strings,
-// then an `InvalidArgumentError` is thrown.
-//
-// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
-// Op creation.
-//
-// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about
-// broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-//
-// ---
-//
-// Examples
-//
-// Using scalar `pos` and `len`:
-//
-// ```python
-// input = [b'Hello', b'World']
-// position = 1
-// length = 3
-//
-// output = [b'ell', b'orl']
-// ```
-//
-// Using `pos` and `len` with same shape as `input`:
-//
-// ```python
-// input = [[b'ten', b'eleven', b'twelve'],
-//          [b'thirteen', b'fourteen', b'fifteen'],
-//          [b'sixteen', b'seventeen', b'eighteen']]
-// position = [[1, 2, 3],
-//             [1, 2, 3],
-//             [1, 2, 3]]
-// length =   [[2, 3, 4],
-//             [4, 3, 2],
-//             [5, 5, 5]]
-//
-// output = [[b'en', b'eve', b'lve'],
-//           [b'hirt', b'urt', b'te'],
-//           [b'ixtee', b'vente', b'hteen']]
-// ```
-//
-// Broadcasting `pos` and `len` onto `input`:
-//
-// ```
-// input = [[b'ten', b'eleven', b'twelve'],
-//          [b'thirteen', b'fourteen', b'fifteen'],
-//          [b'sixteen', b'seventeen', b'eighteen'],
-//          [b'nineteen', b'twenty', b'twentyone']]
-// position = [1, 2, 3]
-// length =   [1, 2, 3]
-//
-// output = [[b'e', b'ev', b'lve'],
-//           [b'h', b'ur', b'tee'],
-//           [b'i', b've', b'hte'],
-//           [b'i', b'en', b'nty']]
-// ```
-//
-// Broadcasting `input` onto `pos` and `len`:
-//
-// ```
-// input = b'thirteen'
-// position = [1, 5, 7]
-// length =   [3, 2, 1]
-//
-// output = [b'hir', b'ee', b'n']
-// ```
-//
-// Arguments:
-//	input: Tensor of strings
-//	pos: Scalar defining the position of first character in each substring
-//	len: Scalar defining the number of characters to include in each substring
-//
-// Returns Tensor of substrings
-func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Substr",
-		Input: []tf.Input{
-			input, pos, len,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Exits the current frame to its parent frame.
-//
-// Exit makes its input `data` available to the parent frame.
-//
-// Arguments:
-//	data: The tensor to be made available to the parent frame.
-//
-// Returns The same tensor as `data`.
-func Exit(scope *Scope, data tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Exit",
-		Input: []tf.Input{
-			data,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Produce a string tensor that encodes the state of a Reader.
-//
-// Not all Readers support being serialized, so this can produce an
-// Unimplemented error.
-//
-// Arguments:
-//	reader_handle: Handle to a Reader.
-func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderSerializeStateV2",
-		Input: []tf.Input{
-			reader_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Concatenates quantized tensors along one dimension.
 //
 // Arguments:
@@ -14644,116 +11882,165 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Reduces sparse updates into the variable referenced by `resource` using the `min` operation.
+// Runs multiple additive regression ensemble predictors on input instances and
 //
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] = min(ref[indices, ...], updates[...])
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] = min(ref[indices[i], ...], updates[i, ...])
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] = min(ref[indices[i, ..., j], ...], updates[i, ..., j, ...])
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions are combined.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
+// computes the logits. It is designed to be used during prediction.
+// It traverses all the trees and calculates the final score for each instance.
 //
 // Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
 //
-// Returns the created operation.
-func ResourceScatterMin(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+//	bucketized_features: A list of rank 1 Tensors containing bucket id for each
+// feature.
+//	logits_dimension: scalar, dimension of the logits, to be used for partial logits
+// shape.
+//
+// Returns Output rank 2 Tensor containing logits for each example.
+func BoostedTreesPredict(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (logits tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
 	opspec := tf.OpSpec{
-		Type: "ResourceScatterMin",
+		Type: "BoostedTreesPredict",
 		Input: []tf.Input{
-			resource, indices, updates,
+			tree_ensemble_handle, tf.OutputList(bucketized_features),
 		},
+		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Reshapes a quantized tensor as per the Reshape op.
+// Pads a tensor with zeros.
+//
+// This operation pads a `input` with zeros according to the `paddings` you
+// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
+// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+// how many zeros to add before the contents of `input` in that dimension, and
+// `paddings[D, 1]` indicates how many zeros to add after the contents of `input`
+// in that dimension.
+//
+// The padded size of each dimension D of the output is:
+//
+// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+//
+// For example:
 //
 // ```
+// # 't' is [[1, 1], [2, 2]]
+// # 'paddings' is [[1, 1], [2, 2]]
+// # rank of 't' is 2
+// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
+//                       [0, 0, 1, 1, 0, 0]
+//                       [0, 0, 2, 2, 0, 0]
+//                       [0, 0, 0, 0, 0, 0]]
+// ```
 //
-// Arguments:
-//
-//	shape: Defines the shape of the output tensor.
-//	input_min: The minimum value of the input.
-//	input_max: The maximum value of the input.
-//
-// Returns This value is copied from input_min.This value is copied from input_max.
-func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedReshape",
+		Type: "Pad",
 		Input: []tf.Input{
-			tensor, shape, input_min, input_max,
+			input, paddings,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// StringSplitAttr is an optional argument to StringSplit.
-type StringSplitAttr func(optionalAttr)
-
-// StringSplitSkipEmpty sets the optional skip_empty attribute to value.
+// Checks whether a resource handle-based variable has been initialized.
 //
-// value: A `bool`. If `True`, skip the empty strings from the result.
+// Arguments:
+//	resource: the input resource handle.
+//
+// Returns a scalar boolean which is true if the variable has been
+// initialized.
+func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "VarIsInitializedOp",
+		Input: []tf.Input{
+			resource,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the min of x and y (i.e. x < y ? x : y) element-wise.
+//
+// *NOTE*: `Minimum` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Minimum",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
+//
+// if < 0, `scale * features` otherwise.
+//
+// To be used together with
+// `initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`.
+// For correct dropout, use `tf.contrib.nn.alpha_dropout`.
+//
+// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
+func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Selu",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SetSizeAttr is an optional argument to SetSize.
+type SetSizeAttr func(optionalAttr)
+
+// SetSizeValidateIndices sets the optional validate_indices attribute to value.
 // If not specified, defaults to true
-func StringSplitSkipEmpty(value bool) StringSplitAttr {
+func SetSizeValidateIndices(value bool) SetSizeAttr {
 	return func(m optionalAttr) {
-		m["skip_empty"] = value
+		m["validate_indices"] = value
 	}
 }
 
-// Split elements of `input` based on `delimiter` into a `SparseTensor`.
+// Number of unique elements along last dimension of input `set`.
 //
-// Let N be the size of source (typically N will be the batch size). Split each
-// element of `input` based on `delimiter` and return a `SparseTensor`
-// containing the splitted tokens. Empty tokens are ignored.
+// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,
+// and `set_shape`. The last dimension contains values in a set, duplicates are
+// allowed but ignored.
 //
-// `delimiter` can be empty, or a string of split characters. If `delimiter` is an
-//  empty string, each element of `input` is split into individual single-byte
-//  character strings, including splitting of UTF-8 multibyte sequences. Otherwise
-//  every character of `delimiter` is a potential split point.
-//
-// For example:
-//   N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output
-//   will be
-//
-//   indices = [0, 0;
-//              0, 1;
-//              1, 0;
-//              1, 1;
-//              1, 2]
-//   shape = [2, 3]
-//   values = ['hello', 'world', 'a', 'b', 'c']
+// If `validate_indices` is `True`, this op validates the order and range of `set`
+// indices.
 //
 // Arguments:
-//	input: 1-D. Strings to split.
-//	delimiter: 0-D. Delimiter characters (bytes), or empty string.
+//	set_indices: 2D `Tensor`, indices of a `SparseTensor`.
+//	set_values: 1D `Tensor`, values of a `SparseTensor`.
+//	set_shape: 1D `Tensor`, shape of a `SparseTensor`.
 //
-// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse
-// tensor, where the first value is N and the second value is the maximum number
-// of tokens in a single input entry.
-func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) {
+// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st
+// `n-1` dimensions as `set`. Each value is the number of unique elements in
+// the corresponding `[0...n-1]` dimension of `set`.
+func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -14762,201 +12049,281 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StringSplit",
+		Type: "SetSize",
 		Input: []tf.Input{
-			input, delimiter,
+			set_indices, set_values, set_shape,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum.
-type ResourceSparseApplyMomentumAttr func(optionalAttr)
-
-// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value.
+// Adds sparse `updates` to an existing tensor according to `indices`.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
+// This operation creates a new tensor by adding sparse `updates` to the passed
+// in `tensor`.
+// This operation is very similar to `tf.scatter_nd_add`, except that the updates
+// are added onto an existing tensor (as opposed to a variable). If the memory
+// for the existing tensor cannot be re-used, a copy is made and updated.
 //
-// value: If `True`, the tensor passed to compute grad will be
-// var - lr * momentum * accum, so in the end, the var you get is actually
-// var - lr * momentum * accum.
-// If not specified, defaults to false
-func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
+// `indices` is an integer tensor containing indices into a new tensor of shape
+// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
 //
-// Set use_nesterov = True if you want to use Nesterov momentum.
+//     indices.shape[-1] <= shape.rank
 //
-// That is for rows we have grad for, we update var and accum as follows:
+// The last dimension of `indices` corresponds to indices into elements
+// (if `indices.shape[-1] = shape.rank`) or slices
+// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
+// `shape`.  `updates` is a tensor with shape
 //
-// accum = accum * momentum + grad
-// var -= lr * accum
+//     indices.shape[:-1] + shape[indices.shape[-1]:]
+//
+// The simplest form of tensor_scatter_add is to add individual elements to a
+// tensor by index. For example, say we want to add 4 elements in a rank-1
+// tensor with 8 elements.
+//
+// In Python, this scatter add operation would look like this:
+//
+// ```python
+//     indices = tf.constant([[4], [3], [1], [7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     tensor = tf.ones([8], dtype=tf.int32)
+//     updated = tf.tensor_scatter_add(tensor, indices, updates)
+//     with tf.Session() as sess:
+//       print(sess.run(scatter))
+// ```
+//
+// The resulting tensor would look like this:
+//
+//     [1, 12, 1, 11, 10, 1, 1, 13]
+//
+// We can also, insert entire slices of a higher rank tensor all at once. For
+// example, if we wanted to insert two slices in the first dimension of a
+// rank-3 tensor with two matrices of new values.
+//
+// In Python, this scatter add operation would look like this:
+//
+// ```python
+//     indices = tf.constant([[0], [2]])
+//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
+//                             [7, 7, 7, 7], [8, 8, 8, 8]],
+//                            [[5, 5, 5, 5], [6, 6, 6, 6],
+//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
+//     tensor = tf.ones([4, 4, 4])
+//     updated = tf.tensor_scatter_add(tensor, indices, updates)
+//     with tf.Session() as sess:
+//       print(sess.run(scatter))
+// ```
+//
+// The resulting tensor would look like this:
+//
+//     [[[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]],
+//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
+//      [[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]],
+//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]]
+//
+// Note that on CPU, if an out of bound index is found, an error is returned.
+// On GPU, if an out of bound index is found, the index is ignored.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	momentum: Momentum. Must be a scalar.
+//	tensor: Tensor to copy/update.
+//	indices: Index tensor.
+//	updates: Updates to scatter into output.
 //
-// Returns the created operation.
-func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) {
+// Returns A new tensor copied from tensor and updates added according to the indices.
+func TensorScatterAdd(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyMomentum",
+		Type: "TensorScatterAdd",
 		Input: []tf.Input{
-			var_, accum, lr, grad, indices, momentum,
+			tensor, indices, updates,
 		},
-		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Returns the complex conjugate of a complex number.
+// Computes the sign and the log of the absolute value of the determinant of
 //
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// complex numbers that are the complex conjugate of each element in `input`. The
-// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
-// real part and *b* is the imaginary part.
+// one or more square matrices.
 //
-// The complex conjugate returned by this operation is of the form \\(a - bj\\).
+// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions
+// form square matrices. The outputs are two tensors containing the signs and
+// absolute values of the log determinants for all N input submatrices
+// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant).
+// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU
+// is the LU decomposition of the input and P is the corresponding
+// permutation matrix.
 //
-// For example:
+// Arguments:
+//	input: Shape is `[N, M, M]`.
 //
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
-// ```
-func Conj(scope *Scope, input tf.Output) (output tf.Output) {
+// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants
+// of the N input matrices.  Shape is `[N]`.
+func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Conj",
+		Type: "LogMatrixDeterminant",
 		Input: []tf.Input{
 			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Says whether the targets are in the top `K` predictions.
+//
+// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
+// prediction for the target class is among the top `k` predictions among
+// all predictions for example `i`. Note that the behavior of `InTopK` differs
+// from the `TopK` op in its handling of ties; if multiple classes have the
+// same prediction value and straddle the top-`k` boundary, all of those
+// classes are considered to be in the top `k`.
+//
+// More formally, let
+//
+//   \\(predictions_i\\) be the predictions for all classes for example `i`,
+//   \\(targets_i\\) be the target class for example `i`,
+//   \\(out_i\\) be the output for example `i`,
+//
+// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
+//
+// Arguments:
+//	predictions: A `batch_size` x `classes` tensor.
+//	targets: A `batch_size` vector of class ids.
+//	k: Number of top elements to look at for computing precision.
+//
+// Returns Computed precision at `k` as a `bool Tensor`.
+func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "InTopKV2",
+		Input: []tf.Input{
+			predictions, targets, k,
+		},
+	}
+	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// CudnnRNNBackpropAttr is an optional argument to CudnnRNNBackprop.
-type CudnnRNNBackpropAttr func(optionalAttr)
-
-// CudnnRNNBackpropRnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNBackpropRnnMode(value string) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNBackpropInputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNBackpropInputMode(value string) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNBackpropDirection sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNBackpropDirection(value string) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNBackpropDropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNBackpropDropout(value float32) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNBackpropSeed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNBackpropSeed(value int64) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNBackpropSeed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNBackpropSeed2(value int64) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Backprop step of CudnnRNN.
+// Check if the input matches the regex pattern.
 //
-// Compute the backprop of both data and weights in a RNN.
+// The input is a string tensor of any shape. The pattern is a scalar
+// string tensor which is applied to every element of the input tensor.
+// The boolean values (True or False) of the output tensor indicate
+// if the input matches the regex pattern provided.
 //
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//     the actual computation before the first layer. 'skip_input' is only allowed
-//     when input_size == num_units; 'auto_select' implies 'skip_input' when
-//     input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used. Should be
-//   "unidirectional" or "bidirectional".
-// dropout: Dropout probability. When set to 0., dropout is disabled.
-// seed: The 1st part of a seed to initialize dropout.
-// seed2: The 2nd part of a seed to initialize dropout.
-// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
-// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
-//     num_units].
-// input_c: For LSTM, a 3-D tensor with the shape of
-//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
-// params: A 1-D tensor that contains the weights and biases in an opaque layout.
-//     The size must be created through CudnnRNNParamsSize, and initialized
-//     separately. Note that they might not be compatible across different
-//     generations. So it is a good idea to save and restore
-// output: A 3-D tensor with the shape of [seq_length, batch_size,
-//     dir * num_units].
-// output_h: The same shape has input_h.
-// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// output_backprop: A 3-D tensor with the same shape as output in the forward pass.
-// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward
-//     pass.
-// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward
-//     pass.
-// reserve_space: The same reserve_space produced in for forward operation.
-// input_backprop: The backprop to input in the forward pass. Has the same shape
-//     as input.
-// input_h_backprop: The backprop to input_h in the forward pass. Has the same
-//     shape as input_h.
-// input_c_backprop: The backprop to input_c in the forward pass. Has the same
-//     shape as input_c.
-// params_backprop: The backprop to the params buffer in the forward pass. Has the
-//     same shape as params.
-func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, optional ...CudnnRNNBackpropAttr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) {
+// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+//
+// Arguments:
+//	input: A string tensor of the text to be processed.
+//	pattern: A scalar string tensor containing the regular expression to match the input.
+//
+// Returns A bool tensor with the same shape as `input`.
+func RegexFullMatch(scope *Scope, input tf.Output, pattern tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RegexFullMatch",
+		Input: []tf.Input{
+			input, pattern,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Converts a `RaggedTensor` into a `SparseTensor` with the same values.
+//
+// input=ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits)
+// output=SparseTensor(indices=sparse_indices, values=sparse_values,
+//                     dense_shape=sparse_dense_shape)
+//
+// Arguments:
+//	rt_nested_splits: The `row_splits` for the `RaggedTensor`.
+//	rt_dense_values: The `flat_values` for the `RaggedTensor`.
+//
+// Returns The indices for the `SparseTensor`.The values of the `SparseTensor`.`sparse_dense_shape` is a tight bounding box of the input `RaggedTensor`.
+func RaggedTensorToSparse(scope *Scope, rt_nested_splits []tf.Output, rt_dense_values tf.Output) (sparse_indices tf.Output, sparse_values tf.Output, sparse_dense_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RaggedTensorToSparse",
+		Input: []tf.Input{
+			tf.OutputList(rt_nested_splits), rt_dense_values,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// FusedBatchNormGradV2Attr is an optional argument to FusedBatchNormGradV2.
+type FusedBatchNormGradV2Attr func(optionalAttr)
+
+// FusedBatchNormGradV2Epsilon sets the optional epsilon attribute to value.
+//
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormGradV2Epsilon(value float32) FusedBatchNormGradV2Attr {
+	return func(m optionalAttr) {
+		m["epsilon"] = value
+	}
+}
+
+// FusedBatchNormGradV2DataFormat sets the optional data_format attribute to value.
+//
+// value: The data format for y_backprop, x, x_backprop.
+// Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormGradV2DataFormat(value string) FusedBatchNormGradV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// FusedBatchNormGradV2IsTraining sets the optional is_training attribute to value.
+//
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormGradV2IsTraining(value bool) FusedBatchNormGradV2Attr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Gradient for batch normalization.
+//
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
+//
+// Arguments:
+//	y_backprop: A 4D Tensor for the gradient with respect to y.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
+// mean to be reused in gradient computation. When is_training is
+// False, a 1D Tensor for the population mean to be reused in both
+// 1st and 2nd order gradient computation.
+//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
+// variance (inverted variance in the cuDNN case) to be reused in
+// gradient computation. When is_training is False, a 1D Tensor
+// for the population variance to be reused in both 1st and 2nd
+// order gradient computation.
+//
+// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
+// in FusedBatchNorm.
+func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradV2Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -14965,94 +12332,146 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CudnnRNNBackprop",
+		Type: "FusedBatchNormGradV2",
 		Input: []tf.Input{
-			input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space,
+			y_backprop, x, scale, reserve_space_1, reserve_space_2,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
 }
 
-// Encode audio data using the WAV file format.
+// Component-wise multiplies a SparseTensor by a dense Tensor.
 //
-// This operation will generate a string suitable to be saved out to create a .wav
-// audio file. It will be encoded in the 16-bit PCM format. It takes in float
-// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
-// that range.
+// The output locations corresponding to the implicitly zero elements in the sparse
+// tensor will be zero (i.e., will not take up storage space), regardless of the
+// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN).
 //
-// `audio` is a 2-D float Tensor of shape `[length, channels]`.
-// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
+// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
+// the other direction.
 //
 // Arguments:
-//	audio: 2-D with shape `[length, channels]`.
-//	sample_rate: Scalar containing the sample frequency.
+//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//	dense: `R`-D.  The dense Tensor operand.
 //
-// Returns 0-D. WAV-encoded file contents.
-func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
+// Returns 1-D.  The `N` values that are operated on.
+func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "EncodeWav",
+		Type: "SparseDenseCwiseMul",
 		Input: []tf.Input{
-			audio, sample_rate,
+			sp_indices, sp_values, sp_shape, dense,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes atan of x element-wise.
-func Atan(scope *Scope, x tf.Output) (y tf.Output) {
+// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad.
+type MaxPool3DGradAttr func(optionalAttr)
+
+// MaxPool3DGradDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes gradients of max pooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPool3DGrad",
+		Input: []tf.Input{
+			orig_input, orig_output, grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the name of the device on which `resource` has been placed.
+func ExperimentalIteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Atan",
+		Type: "ExperimentalIteratorGetDevice",
 		Input: []tf.Input{
-			x,
+			resource,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
-type ResourceApplyAdaMaxAttr func(optionalAttr)
+// SparseReduceSumAttr is an optional argument to SparseReduceSum.
+type SparseReduceSumAttr func(optionalAttr)
 
-// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
+// SparseReduceSumKeepDims sets the optional keep_dims attribute to value.
 //
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
+// value: If true, retain reduced dimensions with length 1.
 // If not specified, defaults to false
-func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
+func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// Update '*var' according to the AdaMax algorithm.
+// Computes the sum of elements across dimensions of a SparseTensor.
 //
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// v_t <- max(beta2 * v_{t-1}, abs(g))
-// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`
+// instead of a sparse one.
+//
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
+//
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	beta1_power: Must be a scalar.
-//	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
 //
-// Returns the created operation.
-func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
+// Returns `R-K`-D.  The reduced Tensor.
+func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -15061,39 +12480,76 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdaMax",
+		Type: "SparseReduceSum",
 		Input: []tf.Input{
-			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
+			input_indices, input_values, input_shape, reduction_axes,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// AssertAttr is an optional argument to Assert.
-type AssertAttr func(optionalAttr)
+// Records the latency of producing `input_dataset` elements in a StatsAggregator.
+func ExperimentalLatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalLatencyStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// AssertSummarize sets the optional summarize attribute to value.
+// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
 //
-// value: Print this many entries of each tensor.
-// If not specified, defaults to 3
-func AssertSummarize(value int64) AssertAttr {
+// This Op does not require `a_indices` be sorted in standard lexicographic order.
+//
+// Arguments:
+//	a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
+//	a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
+//	a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
+//	b: `ndims`-D Tensor.  With shape `a_shape`.
+func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseTensorDenseAdd",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedReluAttr is an optional argument to QuantizedRelu.
+type QuantizedReluAttr func(optionalAttr)
+
+// QuantizedReluOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QUINT8
+func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr {
 	return func(m optionalAttr) {
-		m["summarize"] = value
+		m["out_type"] = value
 	}
 }
 
-// Asserts that the given condition is true.
-//
-// If `condition` evaluates to false, print the list of tensors in `data`.
-// `summarize` determines how many entries of the tensors to print.
+// Computes Quantized Rectified Linear: `max(features, 0)`
 //
 // Arguments:
-//	condition: The condition to evaluate.
-//	data: The tensors to print out when condition is false.
 //
-// Returns the created operation.
-func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
+//	min_features: The float value that the lowest quantized value represents.
+//	max_features: The float value that the highest quantized value represents.
+//
+// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
+func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -15102,13 +12558,47 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Assert",
+		Type: "QuantizedRelu",
 		Input: []tf.Input{
-			condition, tf.OutputList(data),
+			features, min_features, max_features,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Reorders a SparseTensor into the canonical, row-major ordering.
+//
+// Note that by convention, all sparse ops preserve the canonical ordering along
+// increasing dimension number. The only time ordering can be violated is during
+// manual manipulation of the indices and values vectors to add entries.
+//
+// Reordering does not affect the shape of the SparseTensor.
+//
+// If the tensor has rank `R` and `N` non-empty values, `input_indices` has
+// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//
+// Returns 2-D.  `N x R` matrix with the same indices as input_indices, but
+// in canonical row-major ordering.1-D.  `N` non-empty values corresponding to `output_indices`.
+func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReorder",
+		Input: []tf.Input{
+			input_indices, input_values, input_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
 }
 
 // Split a `SparseTensor` into `num_split` tensors along one dimension.
@@ -15177,63 +12667,543 @@
 	return output_indices, output_values, output_shape
 }
 
-// Computes numerical negative value element-wise.
+// Applies sparse addition to `input` using individual values or slices
 //
-// I.e., \\(y = -x\\).
-func Neg(scope *Scope, x tf.Output) (y tf.Output) {
+// from `updates` according to indices `indices`.  The updates are non-aliasing:
+// `input` is only modified in-place if no other operations will use it.
+// Otherwise, a copy of `input` is made.  This operation has a gradient with
+// respect to both `input` and `updates`.
+//
+// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `input`.
+// It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or `(P-K)`-dimensional slices
+// (if `K < P`) along the `K`th dimension of `input`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// $$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$
+//
+// For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
+// elements. In Python, that addition would look like this:
+//
+//     input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8])
+//     indices = tf.constant([[4], [3], [1], [7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     output = tf.scatter_nd_non_aliasing_add(input, indices, updates)
+//     with tf.Session() as sess:
+//       print(sess.run(output))
+//
+// The resulting value `output` would look like this:
+//
+//     [1, 13, 3, 14, 14, 6, 7, 20]
+//
+// See `tf.scatter_nd` for more details about how to make updates to slices.
+//
+// Arguments:
+//	input: A Tensor.
+//	indices: A Tensor. Must be one of the following types: `int32`, `int64`.
+// A tensor of indices into `input`.
+//	updates: A Tensor. Must have the same type as ref. A tensor of updated values
+// to add to `input`.
+//
+// Returns A `Tensor` with the same shape as `input`, containing values of `input`
+// updated with `updates`.
+func ScatterNdNonAliasingAdd(scope *Scope, input tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Neg",
+		Type: "ScatterNdNonAliasingAdd",
 		Input: []tf.Input{
-			x,
+			input, indices, updates,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Execute a sub graph on a remote processor.
-//
-// The graph specifications(such as graph itself, input tensors and output names)
-// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo
-// as serialized_remote_fused_graph_execute_info.
-// The specifications will be passed to a dedicated registered
-// remote fused graph executor.  The executor will send the graph specifications
-// to a remote processor and execute that graph.  The execution results
-// will be passed to consumer nodes as outputs of this node.
+// Creates a MultiDeviceIterator resource.
 //
 // Arguments:
-//	inputs: Arbitrary number of tensors with arbitrary data types
+//	devices: A list of devices the iterator works across.
+//	shared_name: If non-empty, this resource will be shared under the given name
+// across multiple sessions.
+//	container: If non-empty, this resource is placed in the given container.
+// Otherwise, a default container is used.
+//	output_types: The type list for the return values.
+//	output_shapes: The list of shapes being produced.
 //
-//	serialized_remote_fused_graph_execute_info: Serialized protocol buffer
-// of RemoteFusedGraphExecuteInfo which contains graph specifications.
-//
-// Returns Arbitrary number of tensors with arbitrary data types
-func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) {
+// Returns Handle to the resource created.
+func MultiDeviceIterator(scope *Scope, devices []string, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info}
+	attrs := map[string]interface{}{"devices": devices, "shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "RemoteFusedGraphExecute",
+		Type: "MultiDeviceIterator",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FractionalMaxPoolAttr is an optional argument to FractionalMaxPool.
+type FractionalMaxPoolAttr func(optionalAttr)
+
+// FractionalMaxPoolPseudoRandom sets the optional pseudo_random attribute to value.
+//
+// value: When set to True, generates the pooling sequence in a
+// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
+// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
+// difference between pseudorandom and random.
+// If not specified, defaults to false
+func FractionalMaxPoolPseudoRandom(value bool) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["pseudo_random"] = value
+	}
+}
+
+// FractionalMaxPoolOverlapping sets the optional overlapping attribute to value.
+//
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
+//
+// `index  0  1  2  3  4`
+//
+// `value  20 5  16 3  7`
+//
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [20, 16] for fractional max pooling.
+// If not specified, defaults to false
+func FractionalMaxPoolOverlapping(value bool) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["overlapping"] = value
+	}
+}
+
+// FractionalMaxPoolDeterministic sets the optional deterministic attribute to value.
+//
+// value: When set to True, a fixed pooling region will be used when
+// iterating over a FractionalMaxPool node in the computation graph. Mainly used
+// in unit test to make FractionalMaxPool deterministic.
+// If not specified, defaults to false
+func FractionalMaxPoolDeterministic(value bool) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["deterministic"] = value
+	}
+}
+
+// FractionalMaxPoolSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func FractionalMaxPoolSeed(value int64) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// FractionalMaxPoolSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func FractionalMaxPoolSeed2(value int64) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Performs fractional max pooling on the input.
+//
+// Fractional max pooling is slightly different than regular max pooling.  In
+// regular max pooling, you downsize an input set by taking the maximum value of
+// smaller N x N subsections of the set (often 2x2), and try to reduce the set by
+// a factor of N, where N is an integer.  Fractional max pooling, as you might
+// expect from the word "fractional", means that the overall reduction ratio N
+// does not have to be an integer.
+//
+// The sizes of the pooling regions are generated randomly but are fairly uniform.
+// For example, let's look at the height dimension, and the constraints on the
+// list of rows that will be pool boundaries.
+//
+// First we define the following:
+//
+// 1.  input_row_length : the number of rows from the input set
+// 2.  output_row_length : which will be smaller than the input
+// 3.  alpha = input_row_length / output_row_length : our reduction ratio
+// 4.  K = floor(alpha)
+// 5.  row_pooling_sequence : this is the result list of pool boundary rows
+//
+// Then, row_pooling_sequence should satisfy:
+//
+// 1.  a[0] = 0 : the first value of the sequence is 0
+// 2.  a[end] = input_row_length : the last value of the sequence is the size
+// 3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
+// 4.  length(row_pooling_sequence) = output_row_length+1
+//
+// For more details on fractional max pooling, see this paper:
+// [Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071)
+//
+// Arguments:
+//	value: 4-D with shape `[batch, height, width, channels]`.
+//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
+// supports row and col dimension and should be >= 1.0. For example, a valid
+// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
+// must be 1.0 because we don't allow pooling on batch and channels
+// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
+// respectively.
+//
+// Returns output tensor after fractional max pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient.
+func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalMaxPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FractionalMaxPool",
 		Input: []tf.Input{
-			tf.OutputList(inputs),
+			value,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Generates sparse cross from a list of sparse and dense tensors.
+//
+// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each
+// representing features of one feature column. It outputs a 2D `SparseTensor` with
+// the batchwise crosses of these features.
+//
+// For example, if the inputs are
+//
+//     inputs[0]: SparseTensor with shape = [2, 2]
+//     [0, 0]: "a"
+//     [1, 0]: "b"
+//     [1, 1]: "c"
+//
+//     inputs[1]: SparseTensor with shape = [2, 1]
+//     [0, 0]: "d"
+//     [1, 0]: "e"
+//
+//     inputs[2]: Tensor [["f"], ["g"]]
+//
+// then the output will be
+//
+//     shape = [2, 2]
+//     [0, 0]: "a_X_d_X_f"
+//     [1, 0]: "b_X_e_X_g"
+//     [1, 1]: "c_X_e_X_g"
+//
+// if hashed_output=true then the output will be
+//
+//     shape = [2, 2]
+//     [0, 0]: FingerprintCat64(
+//                 Fingerprint64("f"), FingerprintCat64(
+//                     Fingerprint64("d"), Fingerprint64("a")))
+//     [1, 0]: FingerprintCat64(
+//                 Fingerprint64("g"), FingerprintCat64(
+//                     Fingerprint64("e"), Fingerprint64("b")))
+//     [1, 1]: FingerprintCat64(
+//                 Fingerprint64("g"), FingerprintCat64(
+//                     Fingerprint64("e"), Fingerprint64("c")))
+//
+// Arguments:
+//	indices: 2-D.  Indices of each input `SparseTensor`.
+//	values: 1-D.   values of each `SparseTensor`.
+//	shapes: 1-D.   Shapes of each `SparseTensor`.
+//	dense_inputs: 2-D.    Columns represented by dense `Tensor`.
+//	hashed_output: If true, returns the hash of the cross instead of the string.
+// This will allow us avoiding string manipulations.
+//	num_buckets: It is used if hashed_output is true.
+// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value.
+//	hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+// function to combine the crosses fingerprints.
+//
+//
+//
+// Returns 2-D.  Indices of the concatenated `SparseTensor`.1-D.  Non-empty values of the concatenated or hashed
+// `SparseTensor`.1-D.  Shape of the concatenated `SparseTensor`.
+func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("RemoteFusedGraphExecute", err)
+	attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type}
+	opspec := tf.OpSpec{
+		Type: "SparseCross",
+		Input: []tf.Input{
+			tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Inverse real-valued fast Fourier transform.
+//
+// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
+// signal over the inner-most dimension of `input`.
+//
+// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
+// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
+// `fft_length` is not provided, it is computed from the size of the inner-most
+// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
+// compute `input` is odd, it should be provided since it cannot be inferred
+// properly.
+//
+// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
+// than the corresponding dimension of `input`, the dimension is cropped. If it is
+// larger, the dimension is padded with zeros.
+//
+// Arguments:
+//	input: A complex64 tensor.
+//	fft_length: An int32 tensor of shape [1]. The FFT length.
+//
+// Returns A float32 tensor of the same rank as `input`. The inner-most
+//   dimension of `input` is replaced with the `fft_length` samples of its inverse
+//   1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.irfft
+// @end_compatibility
+func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
 		return
 	}
-	return outputs
+	opspec := tf.OpSpec{
+		Type: "IRFFT",
+		Input: []tf.Input{
+			input, fft_length,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Concatenates a list of `SparseTensor` along the specified dimension.
+//
+// Concatenation is with respect to the dense versions of these sparse tensors.
+// It is assumed that each input is a `SparseTensor` whose elements are ordered
+// along increasing dimension number.
+//
+// All inputs' shapes must match, except for the concat dimension.  The
+// `indices`, `values`, and `shapes` lists must have the same length.
+//
+// The output shape is identical to the inputs', except along the concat
+// dimension, where it is the sum of the inputs' sizes along that dimension.
+//
+// The output elements will be resorted to preserve the sort order along
+// increasing dimension number.
+//
+// This op runs in `O(M log M)` time, where `M` is the total number of non-empty
+// values across all inputs. This is due to the need for an internal sort in
+// order to concatenate efficiently across an arbitrary dimension.
+//
+// For example, if `concat_dim = 1` and the inputs are
+//
+//     sp_inputs[0]: shape = [2, 3]
+//     [0, 2]: "a"
+//     [1, 0]: "b"
+//     [1, 1]: "c"
+//
+//     sp_inputs[1]: shape = [2, 4]
+//     [0, 1]: "d"
+//     [0, 2]: "e"
+//
+// then the output will be
+//
+//     shape = [2, 7]
+//     [0, 2]: "a"
+//     [0, 4]: "d"
+//     [0, 5]: "e"
+//     [1, 0]: "b"
+//     [1, 1]: "c"
+//
+// Graphically this is equivalent to doing
+//
+//     [    a] concat [  d e  ] = [    a   d e  ]
+//     [b c  ]        [       ]   [b c          ]
+//
+// Arguments:
+//	indices: 2-D.  Indices of each input `SparseTensor`.
+//	values: 1-D.  Non-empty values of each `SparseTensor`.
+//	shapes: 1-D.  Shapes of each `SparseTensor`.
+//	concat_dim: Dimension to concatenate along. Must be in range [-rank, rank),
+// where rank is the number of dimensions in each input `SparseTensor`.
+//
+// Returns 2-D.  Indices of the concatenated `SparseTensor`.1-D.  Non-empty values of the concatenated `SparseTensor`.1-D.  Shape of the concatenated `SparseTensor`.
+func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"concat_dim": concat_dim}
+	opspec := tf.OpSpec{
+		Type: "SparseConcat",
+		Input: []tf.Input{
+			tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Elementwise computes the bitwise AND of `x` and `y`.
+//
+// The result will have those bits set, that are set in both `x` and `y`. The
+// computation is performed on the underlying representations of `x` and `y`.
+func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BitwiseAnd",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Deserialize and concatenate `SparseTensors` from a serialized minibatch.
+//
+// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where
+// `N` is the minibatch size and the rows correspond to packed outputs of
+// `SerializeSparse`.  The ranks of the original `SparseTensor` objects
+// must all match.  When the final `SparseTensor` is created, it has rank one
+// higher than the ranks of the incoming `SparseTensor` objects
+// (they have been concatenated along a new row dimension).
+//
+// The output `SparseTensor` object's shape values for all dimensions but the
+// first are the max across the input `SparseTensor` objects' shape values
+// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
+// size.
+//
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
+//
+// For example, if the serialized input is a `[2 x 3]` matrix representing two
+// original `SparseTensor` objects:
+//
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
+//
+// and
+//
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
+//
+// then the final deserialized `SparseTensor` will be:
+//
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
+//
+// Arguments:
+//	serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects.
+// Must have 3 columns.
+//	dtype: The `dtype` of the serialized `SparseTensor` objects.
+func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "DeserializeManySparse",
+		Input: []tf.Input{
+			serialized_sparse,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Deserialize `SparseTensor` objects.
+//
+// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+// the last dimension stores serialized `SparseTensor` objects and the other N
+// dimensions (N >= 0) correspond to a batch. The ranks of the original
+// `SparseTensor` objects must all match. When the final `SparseTensor` is
+// created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+// the sparse tensors have been concatenated along new dimensions, one for each
+// batch.
+//
+// The output `SparseTensor` object's shape values for the original dimensions
+// are the max across the input `SparseTensor` objects' shape values for the
+// corresponding dimensions. The new dimensions match the size of the batch.
+//
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
+//
+// For example, if the serialized input is a `[2 x 3]` matrix representing two
+// original `SparseTensor` objects:
+//
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
+//
+// and
+//
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
+//
+// then the final deserialized `SparseTensor` will be:
+//
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
+//
+// Arguments:
+//	serialized_sparse: The serialized `SparseTensor` objects. The last dimension
+// must have 3 columns.
+//	dtype: The `dtype` of the serialized `SparseTensor` objects.
+func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "DeserializeSparse",
+		Input: []tf.Input{
+			serialized_sparse,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
 // MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad.
@@ -15348,38 +13318,77 @@
 	return op.Output(0)
 }
 
-// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars.
-type FakeQuantWithMinMaxVarsAttr func(optionalAttr)
+// Execute a sub graph on a remote processor.
+//
+// The graph specifications(such as graph itself, input tensors and output names)
+// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo
+// as serialized_remote_fused_graph_execute_info.
+// The specifications will be passed to a dedicated registered
+// remote fused graph executor.  The executor will send the graph specifications
+// to a remote processor and execute that graph.  The execution results
+// will be passed to consumer nodes as outputs of this node.
+//
+// Arguments:
+//	inputs: Arbitrary number of tensors with arbitrary data types
+//
+//	serialized_remote_fused_graph_execute_info: Serialized protocol buffer
+// of RemoteFusedGraphExecuteInfo which contains graph specifications.
+//
+// Returns Arbitrary number of tensors with arbitrary data types
+func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info}
+	opspec := tf.OpSpec{
+		Type: "RemoteFusedGraphExecute",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("RemoteFusedGraphExecute", err)
+		return
+	}
+	return outputs
+}
 
-// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr {
+// SerializeManySparseAttr is an optional argument to SerializeManySparse.
+type SerializeManySparseAttr func(optionalAttr)
+
+// SerializeManySparseOutType sets the optional out_type attribute to value.
+//
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr {
 	return func(m optionalAttr) {
-		m["num_bits"] = value
+		m["out_type"] = value
 	}
 }
 
-// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value.
-// If not specified, defaults to false
-func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
-	}
-}
-
-// Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
+// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object.
 //
-// and `max` to 'outputs' tensor of same shape as `inputs`.
+// The `SparseTensor` must have rank `R` greater than 1, and the first dimension
+// is treated as the minibatch dimension.  Elements of the `SparseTensor`
+// must be sorted in increasing order of this first dimension.  The serialized
+// `SparseTensor` objects going into each row of `serialized_sparse` will have
+// rank `R-1`.
 //
-// `[min; max]` define the clamping range for the `inputs` data.
-// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-// then de-quantized and output as floats in `[min; max]` interval.
-// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
+// The minibatch size `N` is extracted from `sparse_shape[0]`.
 //
-// This operation has a gradient and thus allows for training `min` and `max`
-// values.
-func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) {
+// Arguments:
+//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -15388,9 +13397,9 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxVars",
+		Type: "SerializeManySparse",
 		Input: []tf.Input{
-			inputs, min, max,
+			sparse_indices, sparse_values, sparse_shape,
 		},
 		Attrs: attrs,
 	}
@@ -15398,24 +13407,474 @@
 	return op.Output(0)
 }
 
-// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate.
-type ResourceScatterNdUpdateAttr func(optionalAttr)
+// Computes inverse hyperbolic cosine of x element-wise.
+func Acosh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Acosh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value.
+// Computes rectified linear 6 gradients for a Relu6 operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Relu6 operation.
+//	features: The features passed as input to the corresponding Relu6 operation, or
+// its output; using either one produces the same result.
+//
+// Returns The gradients:
+// `gradients * (features > 0) * (features < 6)`.
+func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Relu6Grad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes natural logarithm of (1 + x) element-wise.
+//
+// I.e., \\(y = \log_e (1 + x)\\).
+func Log1p(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Log1p",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResizeBicubicAttr is an optional argument to ResizeBicubic.
+type ResizeBicubicAttr func(optionalAttr)
+
+// ResizeBicubicAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
+// If not specified, defaults to false
+func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// Resize `images` to `size` using bicubic interpolation.
+//
+// Input images can be of different types but output images are always float.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
+//
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResizeBicubic",
+		Input: []tf.Input{
+			images, size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul.
+type SparseTensorDenseMatMulAttr func(optionalAttr)
+
+// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value.
+//
+// value: Use the adjoint of A in the matrix multiply.  If A is complex, this
+// is transpose(conj(A)).  Otherwise it's transpose(A).
+// If not specified, defaults to false
+func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr {
+	return func(m optionalAttr) {
+		m["adjoint_a"] = value
+	}
+}
+
+// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value.
+//
+// value: Use the adjoint of B in the matrix multiply.  If B is complex, this
+// is transpose(conj(B)).  Otherwise it's transpose(B).
+// If not specified, defaults to false
+func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr {
+	return func(m optionalAttr) {
+		m["adjoint_b"] = value
+	}
+}
+
+// Multiply SparseTensor (of rank 2) "A" by dense matrix "B".
+//
+// No validity checking is performed on the indices of A.  However, the following
+// input format is recommended for optimal behavior:
+//
+// if adjoint_a == false:
+//   A should be sorted in lexicographically increasing order.  Use SparseReorder
+//   if you're not sure.
+// if adjoint_a == true:
+//   A should be sorted in order of increasing dimension 1 (i.e., "column major"
+//   order instead of "row major" order).
+//
+// Arguments:
+//	a_indices: 2-D.  The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix.
+//	a_values: 1-D.  The `values` of the `SparseTensor`, size `[nnz]` Vector.
+//	a_shape: 1-D.  The `shape` of the `SparseTensor`, size `[2]` Vector.
+//	b: 2-D.  A dense Matrix.
+func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseTensorDenseMatMul",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg.
+type DecodeAndCropJpegAttr func(optionalAttr)
+
+// DecodeAndCropJpegChannels sets the optional channels attribute to value.
+//
+// value: Number of color channels for the decoded image.
+// If not specified, defaults to 0
+func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr {
+	return func(m optionalAttr) {
+		m["channels"] = value
+	}
+}
+
+// DecodeAndCropJpegRatio sets the optional ratio attribute to value.
+//
+// value: Downscaling ratio.
+// If not specified, defaults to 1
+func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr {
+	return func(m optionalAttr) {
+		m["ratio"] = value
+	}
+}
+
+// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
+//
+// value: If true use a slower but nicer upscaling of the
+// chroma planes (yuv420/422 only).
+// If not specified, defaults to true
+func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr {
+	return func(m optionalAttr) {
+		m["fancy_upscaling"] = value
+	}
+}
+
+// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
+//
+// value: If true try to recover an image from truncated input.
+// If not specified, defaults to false
+func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr {
+	return func(m optionalAttr) {
+		m["try_recover_truncated"] = value
+	}
+}
+
+// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
+//
+// value: The minimum required fraction of lines before a truncated
+// input is accepted.
+// If not specified, defaults to 1
+func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr {
+	return func(m optionalAttr) {
+		m["acceptable_fraction"] = value
+	}
+}
+
+// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value.
+//
+// value: string specifying a hint about the algorithm used for
+// decompression.  Defaults to "" which maps to a system-specific
+// default.  Currently valid values are ["INTEGER_FAST",
+// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
+// jpeg library changes to a version that does not have that specific
+// option.)
+// If not specified, defaults to ""
+func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr {
+	return func(m optionalAttr) {
+		m["dct_method"] = value
+	}
+}
+
+// Decode and Crop a JPEG-encoded image to a uint8 tensor.
+//
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
+//
+// Accepted values are:
+//
+// *   0: Use the number of channels in the JPEG-encoded image.
+// *   1: output a grayscale image.
+// *   3: output an RGB image.
+//
+// If needed, the JPEG-encoded image is transformed to match the requested number
+// of color channels.
+//
+// The attr `ratio` allows downscaling the image by an integer factor during
+// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
+// downscaling the image later.
+//
+//
+// It is equivalent to a combination of decode and crop, but much faster by only
+// decoding partial jpeg image.
+//
+// Arguments:
+//	contents: 0-D.  The JPEG-encoded image.
+//	crop_window: 1-D.  The crop window: [crop_y, crop_x, crop_height, crop_width].
+//
+// Returns 3-D with shape `[height, width, channels]`..
+func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeAndCropJpeg",
+		Input: []tf.Input{
+			contents, crop_window,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds two `SparseTensor` objects to produce another `SparseTensor`.
+//
+// The input `SparseTensor` objects' indices are assumed ordered in standard
+// lexicographic order.  If this is not the case, before this step run
+// `SparseReorder` to restore index ordering.
+//
+// By default, if two values sum to zero at some index, the output `SparseTensor`
+// would still include that particular location in its index, storing a zero in the
+// corresponding value slot.  To override this, callers can specify `thresh`,
+// indicating that if the sum has a magnitude strictly smaller than `thresh`, its
+// corresponding value and index would then not be included.  In particular,
+// `thresh == 0` (default) means everything is kept and actual thresholding happens
+// only for a positive value.
+//
+// In the following shapes, `nnz` is the count after taking `thresh` into account.
+//
+// Arguments:
+//	a_indices: 2-D.  The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix.
+//	a_values: 1-D.  The `values` of the first `SparseTensor`, size `[nnz]` Vector.
+//	a_shape: 1-D.  The `shape` of the first `SparseTensor`, size `[ndims]` Vector.
+//	b_indices: 2-D.  The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix.
+//	b_values: 1-D.  The `values` of the second `SparseTensor`, size `[nnz]` Vector.
+//	b_shape: 1-D.  The `shape` of the second `SparseTensor`, size `[ndims]` Vector.
+//	thresh: 0-D.  The magnitude threshold that determines if an output value/index
+// pair takes space.
+func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseAdd",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// EnqueueTPUEmbeddingSparseTensorBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseTensorBatch.
+type EnqueueTPUEmbeddingSparseTensorBatchAttr func(optionalAttr)
+
+// EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. Should be >= 0 and less than the number
+// of TPU cores in the task on which the node is placed.
+// If not specified, defaults to -1
+func EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseTensorBatchAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// EnqueueTPUEmbeddingSparseTensorBatchCombiners sets the optional combiners attribute to value.
+//
+// value: A list of string scalars, one for each embedding table that specify
+// how to normalize the embedding activations after weighted summation.
+// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
+// the sum of the weights be 0 for 'mean' or the sum of the squared weights be
+// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
+// all tables.
+// If not specified, defaults to <>
+func EnqueueTPUEmbeddingSparseTensorBatchCombiners(value []string) EnqueueTPUEmbeddingSparseTensorBatchAttr {
+	return func(m optionalAttr) {
+		m["combiners"] = value
+	}
+}
+
+// Eases the porting of code that uses tf.nn.embedding_lookup_sparse().
+//
+// sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
+// to the ith feature. table_ids[i] indicates which embedding table to look up ith
+// feature.
+//
+// The tensors at corresponding positions in the three input lists (sample_indices,
+// embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
+// with dim_size() equal to the total number of lookups into the table described by
+// the corresponding feature.
+//
+// Arguments:
+//	sample_indices: A list of rank 1 Tensors specifying the training example to
+// which the corresponding embedding_indices and aggregation_weights values
+// belong. It corresponds to sp_ids.indices[:,0] in  embedding_lookup_sparse().
+//	embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+// It corresponds to sp_ids.values in embedding_lookup_sparse().
+//	aggregation_weights: A list of rank 1 Tensors containing per training example
+// aggregation weights. It corresponds to sp_weights.values in
+// embedding_lookup_sparse().
+//	mode_override: A string input that overrides the mode specified in the
+// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+//	table_ids: A list of integers specifying the identifier of the embedding table
+// (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
+// corresponding input. The ith input is looked up using table_ids[i]. The size
+// of the table_ids list must be equal to that of sample_indices,
+// embedding_indices and aggregation_weights.
+//
+// Returns the created operation.
+func EnqueueTPUEmbeddingSparseTensorBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, table_ids []int64, optional ...EnqueueTPUEmbeddingSparseTensorBatchAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"table_ids": table_ids}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EnqueueTPUEmbeddingSparseTensorBatch",
+		Input: []tf.Input{
+			tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// The gradient operator for the SparseAdd op.
+//
+// The SparseAdd op calculates A + B, where A, B, and the sum are all represented
+// as `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.
+// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
+// values of A and B.
+//
+// Arguments:
+//	backprop_val_grad: 1-D with shape `[nnz(sum)]`.  The gradient with respect to
+// the non-empty values of the sum.
+//	a_indices: 2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`.
+//	b_indices: 2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`.
+//	sum_indices: 2-D.  The `indices` of the sum `SparseTensor`, size
+// `[nnz(sum), ndims]`.
+//
+// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the
+// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the
+// non-empty values of B.
+func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseAddGrad",
+		Input: []tf.Input{
+			backprop_val_grad, a_indices, b_indices, sum_indices,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// This op consumes a lock created by `MutexLock`.
+//
+// This op exists to consume a tensor created by `MutexLock` (other than
+// direct control dependencies).  It should be the only that consumes the tensor,
+// and will raise an error if it is not.  Its only purpose is to keep the
+// mutex lock tensor alive until it is consumed by this op.
+//
+// **NOTE**: This operation must run on the same device as its input.  This may
+// be enforced via the `colocate_with` mechanism.
+//
+// Arguments:
+//	mutex_lock: A tensor returned by `MutexLock`.
+//
+// Returns the created operation.
+func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ConsumeMutexLock",
+		Input: []tf.Input{
+			mutex_lock,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd.
+type ResourceScatterNdAddAttr func(optionalAttr)
+
+// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value.
 //
 // value: An optional bool. Defaults to True. If True, the assignment will
 // be protected by a lock; otherwise the behavior is undefined,
 // but may exhibit less contention.
 // If not specified, defaults to true
-func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr {
+func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr {
 	return func(m optionalAttr) {
 		m["use_locking"] = value
 	}
 }
 
-// Applies sparse `updates` to individual values or slices within a given
-//
-// variable according to `indices`.
+// Applies sparse addition to individual values or slices in a Variable.
 //
 // `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
 //
@@ -15429,24 +13888,24 @@
 // `updates` is `Tensor` of rank `Q-1+P-K` with shape:
 //
 // ```
-// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]
 // ```
 //
-// For example, say we want to update 4 scattered elements to a rank-1 tensor to
-// 8 elements. In Python, that update would look like this:
+// For example, say we want to add 4 scattered elements to a rank-1 tensor to
+// 8 elements. In Python, that addition would look like this:
 //
 // ```python
-//     ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
-//     indices = tf.constant([[4], [3], [1] ,[7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     update = tf.scatter_nd_update(ref, indices, updates)
-//     with tf.Session() as sess:
-//       print sess.run(update)
+// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
+// indices = tf.constant([[4], [3], [1], [7]])
+// updates = tf.constant([9, 10, 11, 12])
+// add = tf.scatter_nd_add(ref, indices, updates)
+// with tf.Session() as sess:
+//   print sess.run(add)
 // ```
 //
 // The resulting update to ref would look like this:
 //
-//     [1, 11, 3, 10, 9, 6, 7, 12]
+//     [1, 13, 3, 14, 14, 6, 7, 20]
 //
 // See `tf.scatter_nd` for more details about how to make updates to
 // slices.
@@ -15455,11 +13914,11 @@
 //	ref: A resource handle. Must be from a VarHandleOp.
 //	indices: A Tensor. Must be one of the following types: int32, int64.
 // A tensor of indices into ref.
-//	updates: A Tensor. Must have the same type as ref. A tensor of updated
+//	updates: A Tensor. Must have the same type as ref. A tensor of
 // values to add to ref.
 //
 // Returns the created operation.
-func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) {
+func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -15468,7 +13927,7 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceScatterNdUpdate",
+		Type: "ResourceScatterNdAdd",
 		Input: []tf.Input{
 			ref, indices, updates,
 		},
@@ -15477,229 +13936,166 @@
 	return scope.AddOperation(opspec)
 }
 
-// Produces a string handle for the given MultiDeviceIterator.
+// Replaces the contents of the table with the specified keys and values.
+//
+// The tensor `keys` must be of the same type as the keys of the table.
+// The tensor `values` must be of the type of the table values.
 //
 // Arguments:
-//	multi_device_iterator: A MultiDeviceIterator resource.
+//	table_handle: Handle to the table.
+//	keys: Any shape.  Keys to look up.
+//	values: Values to associate with keys.
 //
-// Returns A string representing the resource.
-func MultiDeviceIteratorToStringHandle(scope *Scope, multi_device_iterator tf.Output) (string_handle tf.Output) {
+// Returns the created operation.
+func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "MultiDeviceIteratorToStringHandle",
+		Type: "LookupTableImportV2",
 		Input: []tf.Input{
-			multi_device_iterator,
+			table_handle, keys, values,
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// CudnnRNNV3Attr is an optional argument to CudnnRNNV3.
-type CudnnRNNV3Attr func(optionalAttr)
-
-// CudnnRNNV3RnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNV3RnnMode(value string) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNV3InputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNV3InputMode(value string) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNV3Direction sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNV3Direction(value string) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNV3Dropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNV3Dropout(value float32) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNV3Seed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNV3Seed(value int64) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNV3Seed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNV3Seed2(value int64) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// CudnnRNNV3IsTraining sets the optional is_training attribute to value.
-// If not specified, defaults to true
-func CudnnRNNV3IsTraining(value bool) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// A RNN backed by cuDNN.
+// Extract `patches` from `images` and put them in the "depth" output dimension.
 //
-// Computes the RNN from the input and initial states, with respect to the params
-// buffer. Accepts one extra input "sequence_lengths" than CudnnRNN.
+// Arguments:
+//	images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
+//	ksizes: The size of the sliding window for each dimension of `images`.
+//	strides: 1-D of length 4. How far the centers of two consecutive patches are in
+// the images. Must be: `[1, stride_rows, stride_cols, 1]`.
+//	rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
+// input stride, specifying how far two consecutive patch samples are in the
+// input. Equivalent to extracting patches with
+// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
+// subsampling them spatially by a factor of `rates`. This is equivalent to
+// `rate` in dilated (a.k.a. Atrous) convolutions.
+//	padding: The type of padding algorithm to use.
 //
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicates whether there is a linear projection between the input and
-//   the actual computation before the first layer. 'skip_input' is only allowed
-//   when input_size == num_units; 'auto_select' implies 'skip_input' when
-//   input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used. Should be
-//   "unidirectional" or "bidirectional".
-// dropout: Dropout probability. When set to 0., dropout is disabled.
-// seed: The 1st part of a seed to initialize dropout.
-// seed2: The 2nd part of a seed to initialize dropout.
-// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
-// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
-//     num_units].
-// input_c: For LSTM, a 3-D tensor with the shape of
-//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
-// params: A 1-D tensor that contains the weights and biases in an opaque layout.
-//     The size must be created through CudnnRNNParamsSize, and initialized
-//     separately. Note that they might not be compatible across different
-//     generations. So it is a good idea to save and restore
-// sequence_lengths: a vector of lengths of each input sequence.
-// output: A 3-D tensor with the shape of [seq_length, batch_size,
-//     dir * num_units].
-// output_h: The same shape has input_h.
-// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// is_training: Indicates whether this operation is used for inferenece or
-//   training.
-// reserve_space: An opaque tensor that can be used in backprop calculation. It
-//   is only produced if is_training is true.
-func CudnnRNNV3(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, sequence_lengths tf.Output, optional ...CudnnRNNV3Attr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output, host_reserved tf.Output) {
+// We specify the size-related attributes as:
+//
+// ```python
+//       ksizes = [1, ksize_rows, ksize_cols, 1]
+//       strides = [1, strides_rows, strides_cols, 1]
+//       rates = [1, rates_rows, rates_cols, 1]
+// ```
+//
+// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows *
+// ksize_cols * depth]` containing image patches with size
+// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note
+// `out_rows` and `out_cols` are the dimensions of the output patches.
+func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "CudnnRNNV3",
+		Type: "ExtractImagePatches",
 		Input: []tf.Input{
-			input, input_h, input_c, params, sequence_lengths,
+			images,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+	return op.Output(0)
 }
 
-// Applies softmax to a batched N-D `SparseTensor`.
+// Computes the mean along sparse segments of a tensor.
 //
-// The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
-// (where `N >= 2`), and with indices sorted in the canonical lexicographic order.
+// See `tf.sparse.segment_sum` for usage examples.
 //
-// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost
-// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly
-// zero elements do not participate*.  Specifically, the algorithm is equivalent
-// to the following:
-//
-//   (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix
-//       with shape `[B, C]`, along the size-C dimension;
-//   (2) Masks out the original implicitly-zero locations;
-//   (3) Renormalizes the remaining elements.
-//
-// Hence, the `SparseTensor` result has exactly the same non-zero indices and
-// shape.
+// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
+// dimension, selecting a subset of dimension 0, specified by `indices`.
 //
 // Arguments:
-//	sp_indices: 2-D.  `NNZ x R` matrix with the indices of non-empty values in a
-// SparseTensor, in canonical ordering.
-//	sp_values: 1-D.  `NNZ` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
 //
-// Returns 1-D.  The `NNZ` values for the result `SparseTensor`.
-func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) {
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseSoftmax",
+		Type: "SparseSegmentMean",
 		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape,
+			data, indices, segment_ids,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Partitions `data` into `num_partitions` tensors using indices from `partitions`.
+// Deserializes a serialized tree ensemble config and replaces current tree
 //
-// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]`
-// becomes part of `outputs[partitions[js]]`.  The slices with `partitions[js] = i`
-// are placed in `outputs[i]` in lexicographic order of `js`, and the first
-// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`.
-// In detail,
-//
-// ```python
-//     outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:]
-//
-//     outputs[i] = pack([data[js, ...] for js if partitions[js] == i])
-// ```
-//
-// `data.shape` must start with `partitions.shape`.
-//
-// For example:
-//
-// ```python
-//     # Scalar partitions.
-//     partitions = 1
-//     num_partitions = 2
-//     data = [10, 20]
-//     outputs[0] = []  # Empty with shape [0, 2]
-//     outputs[1] = [[10, 20]]
-//
-//     # Vector partitions.
-//     partitions = [0, 0, 1, 1, 0]
-//     num_partitions = 2
-//     data = [10, 20, 30, 40, 50]
-//     outputs[0] = [10, 20, 50]
-//     outputs[1] = [30, 40]
-// ```
-//
-// See `dynamic_stitch` for an example on how to merge partitions back.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicPartition.png" alt>
-// </div>
+// ensemble.
 //
 // Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble.
+//	stamp_token: Token to use as the new value of the resource stamp.
+//	tree_ensemble_serialized: Serialized proto of the ensemble.
 //
-//	partitions: Any shape.  Indices in the range `[0, num_partitions)`.
-//	num_partitions: The number of partitions to output.
-func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) {
+// Returns the created operation.
+func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_partitions": num_partitions}
 	opspec := tf.OpSpec{
-		Type: "DynamicPartition",
+		Type: "BoostedTreesDeserializeEnsemble",
 		Input: []tf.Input{
-			data, partitions,
+			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Transforms a tf.Example proto (as a string) into typed tensors.
+//
+// Arguments:
+//	serialized: A vector containing a batch of binary serialized Example protos.
+//	dense_defaults: A list of Tensors (some may be empty), whose length matches
+// the length of `dense_keys`. dense_defaults[j] provides default values
+// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+// The input type is inferred from dense_defaults[j], even when it's empty.
+// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+// then the shape of dense_defaults[j] must match that of dense_shapes[j].
+// If dense_shapes[j] has an undefined major dimension (variable strides dense
+// feature), dense_defaults[j] must contain a single element:
+// the padding element.
+//	num_sparse: The number of sparse features to be parsed from the example. This
+// must match the lengths of `sparse_keys` and `sparse_types`.
+//	sparse_keys: A list of `num_sparse` strings.
+// The keys expected in the Examples' features associated with sparse values.
+//	dense_keys: The keys expected in the Examples' features associated with dense
+// values.
+//	sparse_types: A list of `num_sparse` types; the data types of data in each
+// Feature given in sparse_keys.
+// Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+//	dense_shapes: The shapes of data in each Feature given in dense_keys.
+// The length of this list must match the length of `dense_keys`.  The
+// number of elements in the Feature corresponding to dense_key[j] must
+// always equal dense_shapes[j].NumEntries().  If dense_shapes[j] ==
+// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j]
+// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1,
+// ..., DN), the shape of the output Tensor dense_values[j] will be (M,
+// D1, .., DN), where M is the number of blocks of elements of length
+// D1 * .... * DN, in the input.
+func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes}
+	opspec := tf.OpSpec{
+		Type: "ParseSingleExample",
+		Input: []tf.Input{
+			serialized, tf.OutputList(dense_defaults),
 		},
 		Attrs: attrs,
 	}
@@ -15709,49 +14105,57 @@
 	}
 	var idx int
 	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("DynamicPartition", err)
+	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
 		return
 	}
-	return outputs
+	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	return sparse_indices, sparse_values, sparse_shapes, dense_values
 }
 
-// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad.
-type ResourceApplyAdagradAttr func(optionalAttr)
+// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2.
+type WholeFileReaderV2Attr func(optionalAttr)
 
-// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value.
+// WholeFileReaderV2Container sets the optional container attribute to value.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr {
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["container"] = value
 	}
 }
 
-// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value.
-// If not specified, defaults to true
-func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr {
+// WholeFileReaderV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr {
 	return func(m optionalAttr) {
-		m["update_slots"] = value
+		m["shared_name"] = value
 	}
 }
 
-// Update '*var' according to the adagrad scheme.
+// A Reader that outputs the entire contents of a file as a value.
 //
-// accum += grad * grad
-// var -= lr * grad * (1 / sqrt(accum))
+// To use, enqueue filenames in a Queue.  The output of ReaderRead will
+// be a filename (key) and the contents of that file (value).
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) {
+// Returns The handle to reference the Reader.
+func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -15760,138 +14164,46 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdagrad",
-		Input: []tf.Input{
-			var_, accum, lr, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
+		Type: "WholeFileReaderV2",
 
-// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble.
-//
-// Returns Stamp token of the tree ensemble resource.The number of trees in the tree ensemble resource.The number of trees that were finished successfully.The number of layers we attempted to build (but not necessarily succeeded).Rank size 2 tensor that contains start and end ids of the nodes in the latest
-// layer.
-func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesGetEnsembleStates",
-		Input: []tf.Input{
-			tree_ensemble_handle,
-		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+	return op.Output(0)
 }
 
-// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
-type ResourceApplyPowerSignAttr func(optionalAttr)
-
-// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
+// Says whether the targets are in the top `K` predictions.
 //
-// value: If `True`, updating of the var and m tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the AddSign update.
+// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
+// prediction for the target class is among the top `k` predictions among
+// all predictions for example `i`. Note that the behavior of `InTopK` differs
+// from the `TopK` op in its handling of ties; if multiple classes have the
+// same prediction value and straddle the top-`k` boundary, all of those
+// classes are considered to be in the top `k`.
 //
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
-// variable <- variable - lr_t * update
+// More formally, let
+//
+//   \\(predictions_i\\) be the predictions for all classes for example `i`,
+//   \\(targets_i\\) be the target class for example `i`,
+//   \\(out_i\\) be the output for example `i`,
+//
+// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	logbase: Must be a scalar.
-//	sign_decay: Must be a scalar.
-//	beta: Must be a scalar.
-//	grad: The gradient.
+//	predictions: A `batch_size` x `classes` tensor.
+//	targets: A `batch_size` vector of class ids.
+//	k: Number of top elements to look at for computing precision.
 //
-// Returns the created operation.
-func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
+// Returns Computed Precision at `k` as a `bool Tensor`.
+func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"k": k}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyPowerSign",
+		Type: "InTopK",
 		Input: []tf.Input{
-			var_, m, lr, logbase, sign_decay, beta, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// StringFormatAttr is an optional argument to StringFormat.
-type StringFormatAttr func(optionalAttr)
-
-// StringFormatTemplate sets the optional template attribute to value.
-//
-// value: A string, the template to format tensor summaries into.
-// If not specified, defaults to "%s"
-func StringFormatTemplate(value string) StringFormatAttr {
-	return func(m optionalAttr) {
-		m["template"] = value
-	}
-}
-
-// StringFormatPlaceholder sets the optional placeholder attribute to value.
-//
-// value: A string, at each placeholder in the template a subsequent tensor summary will be inserted.
-// If not specified, defaults to "%s"
-func StringFormatPlaceholder(value string) StringFormatAttr {
-	return func(m optionalAttr) {
-		m["placeholder"] = value
-	}
-}
-
-// StringFormatSummarize sets the optional summarize attribute to value.
-//
-// value: When formatting the tensor summaries print the first and last summarize entries of each tensor dimension.
-// If not specified, defaults to 3
-func StringFormatSummarize(value int64) StringFormatAttr {
-	return func(m optionalAttr) {
-		m["summarize"] = value
-	}
-}
-
-// Formats a string template using a list of tensors.
-//
-// Formats a string template using a list of tensors, pretty-printing tensor summaries.
-//
-// Arguments:
-//	inputs: The list of tensors to format into the placeholder string.
-//
-// Returns = The resulting string scalar.
-func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StringFormat",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
+			predictions, targets,
 		},
 		Attrs: attrs,
 	}
@@ -15899,637 +14211,119 @@
 	return op.Output(0)
 }
 
-// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp.
-type ResourceSparseApplyRMSPropAttr func(optionalAttr)
+// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdagradParametersGradAccumDebug.
+type RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr)
 
-// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, ms, and mom tensors is protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the RMSProp algorithm.
-//
-// Note that in dense implementation of this algorithm, ms and mom will
-// update even if the grad is zero, but in this sparse implementation, ms
-// and mom will not update in iterations during which the grad is zero.
-//
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-//
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-// var <- var - mom
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
-//
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var, ms and mom.
-//
-// Returns the created operation.
-func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyRMSProp",
-		Input: []tf.Input{
-			var_, ms, mom, lr, rho, momentum, epsilon, grad, indices,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Creates a TensorList by indexing into a Tensor.
-//
-// Each member of the TensorList corresponds to one row of the input tensor,
-// specified by the given index (see `tf.gather`).
-//
-// tensor: The input tensor.
-// indices: The indices used to index into the list.
-// element_shape: The shape of the elements in the list (can be less specified than
-//   the shape of the tensor).
-// num_elements: The size of the output list. Must be large enough to accommodate
-//   the largest index in indices. If -1, the list is just large enough to include
-//   the largest index in indices.
-// output_handle: The TensorList.
-func TensorListScatterV2(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output, num_elements tf.Output) (output_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListScatterV2",
-		Input: []tf.Input{
-			tensor, indices, element_shape, num_elements,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox.
-type SampleDistortedBoundingBoxAttr func(optionalAttr)
-
-// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to non-zero, the random number
-// generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
-// seed.
-// If not specified, defaults to 0
-func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value.
-//
-// value: The cropped area of the image must contain at least this
-// fraction of any bounding box supplied. The value of this parameter should be
-// non-negative. In the case of 0, the cropped area does not need to overlap
-// any of the bounding boxes supplied.
-// If not specified, defaults to 0.1
-func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["min_object_covered"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value.
-//
-// value: The cropped area of the image must have an aspect ratio =
-// width / height within this range.
-// If not specified, defaults to <f:0.75 f:1.33 >
-func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["aspect_ratio_range"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value.
-//
-// value: The cropped area of the image must contain a fraction of the
-// supplied image within this range.
-// If not specified, defaults to <f:0.05 f:1 >
-func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["area_range"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value.
-//
-// value: Number of attempts at generating a cropped region of the image
-// of the specified constraints. After `max_attempts` failures, return the entire
-// image.
-// If not specified, defaults to 100
-func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["max_attempts"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
-//
-// value: Controls behavior if no bounding boxes supplied.
-// If true, assume an implicit bounding box covering the whole input. If false,
-// raise an error.
-// If not specified, defaults to false
-func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["use_image_if_no_bounding_boxes"] = value
-	}
-}
-
-// Generate a single randomly distorted bounding box for an image.
-//
-// Bounding box annotations are often supplied in addition to ground-truth labels
-// in image recognition or object localization tasks. A common technique for
-// training such a system is to randomly distort an image while preserving
-// its content, i.e. *data augmentation*. This Op outputs a randomly distorted
-// localization of an object, i.e. bounding box, given an `image_size`,
-// `bounding_boxes` and a series of constraints.
-//
-// The output of this Op is a single bounding box that may be used to crop the
-// original image. The output is returned as 3 tensors: `begin`, `size` and
-// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
-// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
-// what the bounding box looks like.
-//
-// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
-//
-// For example,
-//
-// ```python
-//     # Generate a single distorted bounding box.
-//     begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
-//         tf.shape(image),
-//         bounding_boxes=bounding_boxes)
-//
-//     # Draw the bounding box in an image summary.
-//     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
-//                                                   bbox_for_draw)
-//     tf.summary.image('images_with_box', image_with_box)
-//
-//     # Employ the bounding box to distort the image.
-//     distorted_image = tf.slice(image, begin, size)
-// ```
-//
-// Note that if no bounding box information is available, setting
-// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
-// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
-// false and no bounding boxes are supplied, an error is raised.
-//
-// Arguments:
-//	image_size: 1-D, containing `[height, width, channels]`.
-//	bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
-// associated with the image.
-//
-// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
-// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to
-// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box.
-// Provide as input to `tf.image.draw_bounding_boxes`.
-func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SampleDistortedBoundingBox",
-		Input: []tf.Input{
-			image_size, bounding_boxes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// LRNAttr is an optional argument to LRN.
-type LRNAttr func(optionalAttr)
-
-// LRNDepthRadius sets the optional depth_radius attribute to value.
-//
-// value: 0-D.  Half-width of the 1-D normalization window.
-// If not specified, defaults to 5
-func LRNDepthRadius(value int64) LRNAttr {
-	return func(m optionalAttr) {
-		m["depth_radius"] = value
-	}
-}
-
-// LRNBias sets the optional bias attribute to value.
-//
-// value: An offset (usually positive to avoid dividing by 0).
-// If not specified, defaults to 1
-func LRNBias(value float32) LRNAttr {
-	return func(m optionalAttr) {
-		m["bias"] = value
-	}
-}
-
-// LRNAlpha sets the optional alpha attribute to value.
-//
-// value: A scale factor, usually positive.
-// If not specified, defaults to 1
-func LRNAlpha(value float32) LRNAttr {
-	return func(m optionalAttr) {
-		m["alpha"] = value
-	}
-}
-
-// LRNBeta sets the optional beta attribute to value.
-//
-// value: An exponent.
-// If not specified, defaults to 0.5
-func LRNBeta(value float32) LRNAttr {
-	return func(m optionalAttr) {
-		m["beta"] = value
-	}
-}
-
-// Local Response Normalization.
-//
-// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last
-// dimension), and each vector is normalized independently.  Within a given vector,
-// each component is divided by the weighted, squared sum of inputs within
-// `depth_radius`.  In detail,
-//
-//     sqr_sum[a, b, c, d] =
-//         sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
-//     output = input / (bias + alpha * sqr_sum) ** beta
-//
-// For details, see [Krizhevsky et al., ImageNet classification with deep
-// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
-//
-// Arguments:
-//	input: 4-D.
-func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LRN",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that zips together `input_datasets`.
-func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ZipDataset",
-		Input: []tf.Input{
-			tf.OutputList(input_datasets),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad.
-type ResourceSparseApplyAdagradAttr func(optionalAttr)
-
-// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceSparseApplyAdagradUpdateSlots sets the optional update_slots attribute to value.
-// If not specified, defaults to true
-func ResourceSparseApplyAdagradUpdateSlots(value bool) ResourceSparseApplyAdagradAttr {
-	return func(m optionalAttr) {
-		m["update_slots"] = value
-	}
-}
-
-// Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
-//
-// That is for rows we have grad for, we update var and accum as follows:
-// accum += grad * grad
-// var -= lr * grad * (1 / sqrt(accum))
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//
-// Returns the created operation.
-func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyAdagrad",
-		Input: []tf.Input{
-			var_, accum, lr, grad, indices,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Elementwise computes the bitwise right-shift of `x` and `y`.
-//
-// Performs a logical shift for unsigned integer types, and an arithmetic shift
-// for signed integer types.
-//
-// If `y` is negative, or greater than or equal to than the width of `x` in bits
-// the result is implementation defined.
-func RightShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RightShift",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TensorListStackAttr is an optional argument to TensorListStack.
-type TensorListStackAttr func(optionalAttr)
-
-// TensorListStackNumElements sets the optional num_elements attribute to value.
+// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
 // If not specified, defaults to -1
-func TensorListStackNumElements(value int64) TensorListStackAttr {
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr {
 	return func(m optionalAttr) {
-		m["num_elements"] = value
+		m["table_id"] = value
 	}
 }
 
-// Stacks all tensors in the list.
+// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve Adagrad embedding parameters with debug support.
 //
-// Requires that all tensors have the same shape.
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
 //
-// input_handle: the input list
-// tensor: the gathered result
-// num_elements: optional. If not -1, the number of elements in the list.
-//
-func TensorListStack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType, optional ...TensorListStackAttr) (tensor tf.Output) {
+// Returns Parameter parameters updated by the Adagrad optimization algorithm.Parameter accumulators updated by the Adagrad optimization algorithm.Parameter gradient_accumulators updated by the Adagrad optimization algorithm.
+func RetrieveTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorListStack",
-		Input: []tf.Input{
-			input_handle, element_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+		Type: "RetrieveTPUEmbeddingAdagradParametersGradAccumDebug",
 
-// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform.
-type StatelessRandomUniformAttr func(optionalAttr)
-
-// StatelessRandomUniformDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs deterministic pseudorandom random values from a uniform distribution.
-//
-// The generated values follow a uniform distribution in the range `[0, 1)`. The
-// lower bound 0 is included in the range, while the upper bound 1 is excluded.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns Random values with specified shape.
-func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomUniform",
-		Input: []tf.Input{
-			shape, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Makes its input available to the next iteration.
-//
-// Arguments:
-//	data: The tensor to be made available to the next iteration.
-//
-// Returns The same tensor as `data`.
-func NextIteration(scope *Scope, data tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "NextIteration",
-		Input: []tf.Input{
-			data,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Output a fact about factorials.
-func Fact(scope *Scope) (fact tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Fact",
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deserialize `SparseTensor` objects.
-//
-// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
-// the last dimension stores serialized `SparseTensor` objects and the other N
-// dimensions (N >= 0) correspond to a batch. The ranks of the original
-// `SparseTensor` objects must all match. When the final `SparseTensor` is
-// created, its rank is the rank of the incoming `SparseTensor` objects plus N;
-// the sparse tensors have been concatenated along new dimensions, one for each
-// batch.
-//
-// The output `SparseTensor` object's shape values for the original dimensions
-// are the max across the input `SparseTensor` objects' shape values for the
-// corresponding dimensions. The new dimensions match the size of the batch.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in
-// standard lexicographic order.  If this is not the case, after this
-// step run `SparseReorder` to restore index ordering.
-//
-// For example, if the serialized input is a `[2 x 3]` matrix representing two
-// original `SparseTensor` objects:
-//
-//     index = [ 0]
-//             [10]
-//             [20]
-//     values = [1, 2, 3]
-//     shape = [50]
-//
-// and
-//
-//     index = [ 2]
-//             [10]
-//     values = [4, 5]
-//     shape = [30]
-//
-// then the final deserialized `SparseTensor` will be:
-//
-//     index = [0  0]
-//             [0 10]
-//             [0 20]
-//             [1  2]
-//             [1 10]
-//     values = [1, 2, 3, 4, 5]
-//     shape = [2 50]
-//
-// Arguments:
-//	serialized_sparse: The serialized `SparseTensor` objects. The last dimension
-// must have 3 columns.
-//	dtype: The `dtype` of the serialized `SparseTensor` objects.
-func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "DeserializeSparse",
-		Input: []tf.Input{
-			serialized_sparse,
-		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// SqueezeAttr is an optional argument to Squeeze.
-type SqueezeAttr func(optionalAttr)
+// Serializes the tree handle to a proto
+//
+// Arguments:
+//	tree_handle: Handle to the tree resource to be serialized.
+//
+// Returns Serialied proto string of the tree resource.
+func TensorForestTreeSerialize(scope *Scope, tree_handle tf.Output) (tree_config tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorForestTreeSerialize",
+		Input: []tf.Input{
+			tree_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// SqueezeAxis sets the optional axis attribute to value.
-//
-// value: If specified, only squeezes the dimensions listed. The dimension
-// index starts at 0. It is an error to squeeze a dimension that is not 1. Must
-// be in the range `[-rank(input), rank(input))`.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func SqueezeAxis(value []int64) SqueezeAttr {
+// SparseMatMulAttr is an optional argument to SparseMatMul.
+type SparseMatMulAttr func(optionalAttr)
+
+// SparseMatMulTransposeA sets the optional transpose_a attribute to value.
+// If not specified, defaults to false
+func SparseMatMulTransposeA(value bool) SparseMatMulAttr {
 	return func(m optionalAttr) {
-		m["squeeze_dims"] = value
+		m["transpose_a"] = value
 	}
 }
 
-// Removes dimensions of size 1 from the shape of a tensor.
+// SparseMatMulTransposeB sets the optional transpose_b attribute to value.
+// If not specified, defaults to false
+func SparseMatMulTransposeB(value bool) SparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_b"] = value
+	}
+}
+
+// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value.
+// If not specified, defaults to false
+func SparseMatMulAIsSparse(value bool) SparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["a_is_sparse"] = value
+	}
+}
+
+// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value.
+// If not specified, defaults to false
+func SparseMatMulBIsSparse(value bool) SparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["b_is_sparse"] = value
+	}
+}
+
+// Multiply matrix "a" by matrix "b".
 //
-// Given a tensor `input`, this operation returns a tensor of the same type with
-// all dimensions of size 1 removed. If you don't want to remove all size 1
-// dimensions, you can remove specific size 1 dimensions by specifying
-// `axis`.
+// The inputs must be two-dimensional matrices and the inner dimension of "a" must
+// match the outer dimension of "b". Both "a" and "b" must be `Tensor`s not
+// `SparseTensor`s.  This op is optimized for the case where at least one of "a" or
+// "b" is sparse, in the sense that they have a large proportion of zero values.
+// The breakeven for using this versus a dense matrix multiply on one platform was
+// 30% zero values in the sparse matrix.
 //
-// For example:
-//
-// ```
-// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
-// shape(squeeze(t)) ==> [2, 3]
-// ```
-//
-// Or, to remove specific size 1 dimensions:
-//
-// ```
-// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
-// shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1]
-// ```
-//
-// Arguments:
-//	input: The `input` to squeeze.
-//
-// Returns Contains the same data as `input`, but has one or more dimensions of
-// size 1 removed.
-func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.Output) {
+// The gradient computation of this operation will only take advantage of sparsity
+// in the input gradient when that gradient comes from a Relu.
+func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -16538,9 +14332,9 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Squeeze",
+		Type: "SparseMatMul",
 		Input: []tf.Input{
-			input,
+			a, b,
 		},
 		Attrs: attrs,
 	}
@@ -16548,274 +14342,225 @@
 	return op.Output(0)
 }
 
-// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta.
-type ResourceApplyAdadeltaAttr func(optionalAttr)
+// ExperimentalThreadPoolHandleAttr is an optional argument to ExperimentalThreadPoolHandle.
+type ExperimentalThreadPoolHandleAttr func(optionalAttr)
 
-// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
+// ExperimentalThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value.
 //
-// value: If True, updating of the var, accum and update_accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr {
+// value: The maximum degree of parallelism to use within operations that execute on this
+// threadpool.
+// If not specified, defaults to 1
+func ExperimentalThreadPoolHandleMaxIntraOpParallelism(value int64) ExperimentalThreadPoolHandleAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["max_intra_op_parallelism"] = value
 	}
 }
 
-// Update '*var' according to the adadelta scheme.
-//
-// accum = rho() * accum + (1 - rho()) * grad.square();
-// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
-// update_accum = rho() * update_accum + (1 - rho()) * update.square();
-// var -= update;
+// ExperimentalThreadPoolHandleContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func ExperimentalThreadPoolHandleContainer(value string) ExperimentalThreadPoolHandleAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// ExperimentalThreadPoolHandleSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func ExperimentalThreadPoolHandleSharedName(value string) ExperimentalThreadPoolHandleAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	accum_update: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay factor. Must be a scalar.
-//	epsilon: Constant factor. Must be a scalar.
-//	grad: The gradient.
+//	num_threads: The number of threads in the thread pool.
+//	display_name: A human-readable name for the threads that may be visible in some
+// visualizations.
+// threadpool.
 //
-// Returns the created operation.
-func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) {
+// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset
+// ops.
+func ExperimentalThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ExperimentalThreadPoolHandleAttr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdadelta",
+		Type: "ExperimentalThreadPoolHandle",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug.
+type LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load proximal Adagrad embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the proximal Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the proximal Adagrad optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug",
 		Input: []tf.Input{
-			var_, accum, accum_update, lr, rho, epsilon, grad,
+			parameters, accumulators, gradient_accumulators,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression.
-type NonMaxSuppressionAttr func(optionalAttr)
+// LoadTPUEmbeddingProximalAdagradParametersAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParameters.
+type LoadTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr)
 
-// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value.
+// LoadTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
 //
-// value: A float representing the threshold for deciding whether boxes
-// overlap too much with respect to IOU.
-// If not specified, defaults to 0.5
-func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr {
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingProximalAdagradParametersTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersAttr {
 	return func(m optionalAttr) {
-		m["iou_threshold"] = value
+		m["table_id"] = value
 	}
 }
 
-// Greedily selects a subset of bounding boxes in descending order of score,
+// LoadTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingProximalAdagradParametersTableName(value string) LoadTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load proximal Adagrad embedding parameters.
 //
-// pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system.  Note that this
-// algorithm is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
-//   selected_indices = tf.image.non_max_suppression(
-//       boxes, scores, max_output_size, iou_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
 //
 // Arguments:
-//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
+//	parameters: Value of parameters used in the proximal Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm.
 //
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.
-func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) {
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingProximalAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "NonMaxSuppression",
+		Type: "LoadTPUEmbeddingProximalAdagradParameters",
 		Input: []tf.Input{
-			boxes, scores, max_output_size,
+			parameters, accumulators,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Creates a dataset that emits `components` as a tuple of tensors once.
-func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "TensorDataset",
-		Input: []tf.Input{
-			tf.OutputList(components),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Component-wise multiplies a SparseTensor by a dense Tensor.
-//
-// The output locations corresponding to the implicitly zero elements in the sparse
-// tensor will be zero (i.e., will not take up storage space), regardless of the
-// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN).
-//
-// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
-// the other direction.
+// Get the current size of the TensorArray.
 //
 // Arguments:
-//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//	dense: `R`-D.  The dense Tensor operand.
+//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
+//	flow_in: A float scalar that enforces proper chaining of operations.
 //
-// Returns 1-D.  The `N` values that are operated on.
-func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
+// Returns The current size of the TensorArray.
+func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseDenseCwiseMul",
+		Type: "TensorArraySizeV3",
 		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape, dense,
+			handle, flow_in,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// 2D real-valued fast Fourier transform.
-//
-// Computes the 2-dimensional discrete Fourier transform of a real-valued signal
-// over the inner-most 2 dimensions of `input`.
-//
-// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the
-// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
-// of `output`: the zero-frequency term, followed by the `fft_length / 2`
-// positive-frequency terms.
-//
-// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
+// Computes gradients for the scaled exponential linear (Selu) operation.
 //
 // Arguments:
-//	input: A float32 tensor.
-//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
+//	gradients: The backpropagated gradients to the corresponding Selu operation.
+//	outputs: The outputs of the corresponding Selu operation.
 //
-// Returns A complex64 tensor of the same rank as `input`. The inner-most 2
-//   dimensions of `input` are replaced with their 2D Fourier transform. The
-//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
-//   components.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.rfft2
-// @end_compatibility
-func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+// Returns The gradients: `gradients * (outputs + scale * alpha)`
+// if outputs < 0, `scale * gradients` otherwise.
+func SeluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "RFFT2D",
+		Type: "SeluGrad",
 		Input: []tf.Input{
-			input, fft_length,
+			gradients, outputs,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Pads a tensor with zeros.
-//
-// This operation pads a `input` with zeros according to the `paddings` you
-// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
-// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-// how many zeros to add before the contents of `input` in that dimension, and
-// `paddings[D, 1]` indicates how many zeros to add after the contents of `input`
-// in that dimension.
-//
-// The padded size of each dimension D of the output is:
-//
-// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
-//
-// For example:
-//
-// ```
-// # 't' is [[1, 1], [2, 2]]
-// # 'paddings' is [[1, 1], [2, 2]]
-// # rank of 't' is 2
-// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
-//                       [0, 0, 1, 1, 0, 0]
-//                       [0, 0, 2, 2, 0, 0]
-//                       [0, 0, 0, 0, 0, 0]]
-// ```
-//
-func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Pad",
-		Input: []tf.Input{
-			input, paddings,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
+type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
 
-// Checks whether a resource handle-based variable has been initialized.
-//
-// Arguments:
-//	resource: the input resource handle.
-//
-// Returns a scalar boolean which is true if the variable has been
-// initialized.
-func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "VarIsInitializedOp",
-		Input: []tf.Input{
-			resource,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl.
-type ResourceSparseApplyFtrlAttr func(optionalAttr)
-
-// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value.
+// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
 //
 // value: If `True`, updating of the var and accum tensors will be protected
 // by a lock; otherwise the behavior is undefined, but may exhibit less
 // contention.
 // If not specified, defaults to false
-func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr {
+func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr {
 	return func(m optionalAttr) {
 		m["use_locking"] = value
 	}
@@ -16824,233 +14569,6 @@
 // Update relevant entries in '*var' according to the Ftrl-proximal scheme.
 //
 // That is for rows we have grad for, we update var, accum and linear as follows:
-// accum_new = accum + grad * grad
-// linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	lr_power: Scaling factor. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyFtrl",
-		Input: []tf.Input{
-			var_, accum, linear, grad, indices, lr, l1, l2, lr_power,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Returns which elements of x are Inf.
-//
-// @compatibility(numpy)
-// Equivalent to np.isinf
-// @end_compatibility
-func IsInf(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IsInf",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TruncatedNormalAttr is an optional argument to TruncatedNormal.
-type TruncatedNormalAttr func(optionalAttr)
-
-// TruncatedNormalSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func TruncatedNormalSeed(value int64) TruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// TruncatedNormalSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func TruncatedNormalSeed2(value int64) TruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Outputs random values from a truncated normal distribution.
-//
-// The generated values follow a normal distribution with mean 0 and standard
-// deviation 1, except that values whose magnitude is more than 2 standard
-// deviations from the mean are dropped and re-picked.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	dtype: The type of the output.
-//
-// Returns A tensor of the specified shape filled with random truncated normal
-// values.
-func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TruncatedNormal",
-		Input: []tf.Input{
-			shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SkipgramAttr is an optional argument to Skipgram.
-type SkipgramAttr func(optionalAttr)
-
-// SkipgramWindowSize sets the optional window_size attribute to value.
-//
-// value: The number of words to predict to the left and right of the target.
-// If not specified, defaults to 5
-func SkipgramWindowSize(value int64) SkipgramAttr {
-	return func(m optionalAttr) {
-		m["window_size"] = value
-	}
-}
-
-// SkipgramMinCount sets the optional min_count attribute to value.
-//
-// value: The minimum number of word occurrences for it to be included in the
-// vocabulary.
-// If not specified, defaults to 5
-func SkipgramMinCount(value int64) SkipgramAttr {
-	return func(m optionalAttr) {
-		m["min_count"] = value
-	}
-}
-
-// SkipgramSubsample sets the optional subsample attribute to value.
-//
-// value: Threshold for word occurrence. Words that appear with higher
-// frequency will be randomly down-sampled. Set to 0 to disable.
-// If not specified, defaults to 0.001
-func SkipgramSubsample(value float32) SkipgramAttr {
-	return func(m optionalAttr) {
-		m["subsample"] = value
-	}
-}
-
-// Parses a text file and creates a batch of examples.
-//
-// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result
-//
-// Arguments:
-//	filename: The corpus's text file name.
-//	batch_size: The size of produced batch.
-//
-// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids.
-func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Skipgram",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
-}
-
-// StringToNumberAttr is an optional argument to StringToNumber.
-type StringToNumberAttr func(optionalAttr)
-
-// StringToNumberOutType sets the optional out_type attribute to value.
-//
-// value: The numeric type to interpret each string in `string_tensor` as.
-// If not specified, defaults to DT_FLOAT
-func StringToNumberOutType(value tf.DataType) StringToNumberAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Converts each string in the input Tensor to the specified numeric type.
-//
-// (Note that int32 overflow results in an error while float overflow
-// results in a rounded value.)
-//
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StringToNumber",
-		Input: []tf.Input{
-			string_tensor,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2.
-type ResourceApplyFtrlV2Attr func(optionalAttr)
-
-// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the Ftrl-proximal scheme.
-//
 // grad_with_shrinkage = grad + 2 * l2_shrinkage * var
 // accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
 // linear += grad_with_shrinkage +
@@ -17064,14 +14582,15 @@
 //	accum: Should be from a Variable().
 //	linear: Should be from a Variable().
 //	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
 //	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regulariation. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
 //	l2: L2 shrinkage regulariation. Must be a scalar.
 //
 //	lr_power: Scaling factor. Must be a scalar.
 //
 // Returns the created operation.
-func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) {
+func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17080,9 +14599,423 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyFtrlV2",
+		Type: "ResourceSparseApplyFtrlV2",
 		Input: []tf.Input{
-			var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power,
+			var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// SumAttr is an optional argument to Sum.
+type SumAttr func(optionalAttr)
+
+// SumKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SumKeepDims(value bool) SumAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the sum of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Sum",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation.
+type SparseToSparseSetOperationAttr func(optionalAttr)
+
+// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Applies set operation along last dimension of 2 `SparseTensor` inputs.
+//
+// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+//
+// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the
+// order and range of `set1` and `set2` indices.
+//
+// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,
+// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
+//
+// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
+// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
+//
+// If `validate_indices` is `True`, this op validates the order and range of `set1`
+// and `set2` indices.
+//
+// Output `result` is a `SparseTensor` represented by `result_indices`,
+// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+// dimension contains the result of `set_operation` applied to the corresponding
+// `[0...n-1]` dimension of `set`.
+//
+// Arguments:
+//	set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must
+// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the
+// max set size across `0...n-1` dimensions.
+//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
+// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the
+// max set size across `0...n-1` dimensions.
+//
+//
+// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+// is the max result set size across all `0...n-1` dimensions.
+func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"set_operation": set_operation}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseToSparseSetOperation",
+		Input: []tf.Input{
+			set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Computes softmax cross entropy cost and gradients to backpropagate.
+//
+// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
+// a matrix of label probabilities, but rather a single label per row
+// of features.  This label is considered to have probability 1.0 for the
+// given row.
+//
+// Inputs are the logits, not probabilities.
+//
+// Arguments:
+//	features: batch_size x num_classes matrix
+//	labels: batch_size vector with values in [0, num_classes).
+// This is the label for the given minibatch entry.
+//
+// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix).
+func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSoftmaxCrossEntropyWithLogits",
+		Input: []tf.Input{
+			features, labels,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// StridedSliceGradAttr is an optional argument to StridedSliceGrad.
+type StridedSliceGradAttr func(optionalAttr)
+
+// StridedSliceGradBeginMask sets the optional begin_mask attribute to value.
+// If not specified, defaults to 0
+func StridedSliceGradBeginMask(value int64) StridedSliceGradAttr {
+	return func(m optionalAttr) {
+		m["begin_mask"] = value
+	}
+}
+
+// StridedSliceGradEndMask sets the optional end_mask attribute to value.
+// If not specified, defaults to 0
+func StridedSliceGradEndMask(value int64) StridedSliceGradAttr {
+	return func(m optionalAttr) {
+		m["end_mask"] = value
+	}
+}
+
+// StridedSliceGradEllipsisMask sets the optional ellipsis_mask attribute to value.
+// If not specified, defaults to 0
+func StridedSliceGradEllipsisMask(value int64) StridedSliceGradAttr {
+	return func(m optionalAttr) {
+		m["ellipsis_mask"] = value
+	}
+}
+
+// StridedSliceGradNewAxisMask sets the optional new_axis_mask attribute to value.
+// If not specified, defaults to 0
+func StridedSliceGradNewAxisMask(value int64) StridedSliceGradAttr {
+	return func(m optionalAttr) {
+		m["new_axis_mask"] = value
+	}
+}
+
+// StridedSliceGradShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
+// If not specified, defaults to 0
+func StridedSliceGradShrinkAxisMask(value int64) StridedSliceGradAttr {
+	return func(m optionalAttr) {
+		m["shrink_axis_mask"] = value
+	}
+}
+
+// Returns the gradient of `StridedSlice`.
+//
+// Since `StridedSlice` cuts out pieces of its `input` which is size
+// `shape`, its gradient will have the same shape (which is passed here
+// as `shape`). The gradient will be zero in any element that the slice
+// does not select.
+//
+// Arguments are the same as StridedSliceGrad with the exception that
+// `dy` is the input gradient to be propagated and `shape` is the
+// shape of `StridedSlice`'s `input`.
+func StridedSliceGrad(scope *Scope, shape tf.Output, begin tf.Output, end tf.Output, strides tf.Output, dy tf.Output, optional ...StridedSliceGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StridedSliceGrad",
+		Input: []tf.Input{
+			shape, begin, end, strides, dy,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingRMSPropParameters.
+type LoadTPUEmbeddingRMSPropParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingRMSPropParametersTableId(value int64) LoadTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingRMSPropParametersTableName(value string) LoadTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load RMSProp embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the RMSProp optimization algorithm.
+//	ms: Value of ms used in the RMSProp optimization algorithm.
+//	mom: Value of mom used in the RMSProp optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingRMSPropParameters",
+		Input: []tf.Input{
+			parameters, ms, mom,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the gradient for the inverse of `x` wrt its input.
+//
+// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
+// is the corresponding input gradient.
+func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReciprocalGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EuclideanNormAttr is an optional argument to EuclideanNorm.
+type EuclideanNormAttr func(optionalAttr)
+
+// EuclideanNormKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func EuclideanNormKeepDims(value bool) EuclideanNormAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the euclidean norm of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func EuclideanNorm(scope *Scope, input tf.Output, axis tf.Output, optional ...EuclideanNormAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EuclideanNorm",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the element-wise min of two SparseTensors.
+//
+// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
+//
+// Arguments:
+//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, in the canonical lexicographic ordering.
+//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
+//	a_shape: 1-D.  Shape of the input SparseTensor.
+//	b_indices: counterpart to `a_indices` for the other operand.
+//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
+//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
+//
+// Returns 2-D.  The indices of the output SparseTensor.1-D.  The values of the output SparseTensor.
+func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSparseMinimum",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA.
+type ResourceSparseApplyAdagradDAAttr func(optionalAttr)
+
+// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	gradient_accumulator: Should be from a Variable().
+//	gradient_squared_accumulator: Should be from a Variable().
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	lr: Learning rate. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	global_step: Training step number. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyAdagradDA",
+		Input: []tf.Input{
+			var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step,
 		},
 		Attrs: attrs,
 	}
@@ -17285,35 +15218,72 @@
 	return op.Output(0)
 }
 
-// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA.
-type ResourceSparseApplyAdagradDAAttr func(optionalAttr)
+// RetrieveTPUEmbeddingRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingRMSPropParameters.
+type RetrieveTPUEmbeddingRMSPropParametersAttr func(optionalAttr)
 
-// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
+// RetrieveTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
 //
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr {
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingRMSPropParametersAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["table_id"] = value
 	}
 }
 
-// Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
+// RetrieveTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingRMSPropParametersTableName(value string) RetrieveTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve RMSProp embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the RMSProp optimization algorithm.Parameter ms updated by the RMSProp optimization algorithm.Parameter mom updated by the RMSProp optimization algorithm.
+func RetrieveTPUEmbeddingRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingRMSPropParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// QuantizedRelu6Attr is an optional argument to QuantizedRelu6.
+type QuantizedRelu6Attr func(optionalAttr)
+
+// QuantizedRelu6OutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QUINT8
+func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	gradient_accumulator: Should be from a Variable().
-//	gradient_squared_accumulator: Should be from a Variable().
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Learning rate. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	global_step: Training step number. Must be a scalar.
 //
-// Returns the created operation.
-func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) {
+//	min_features: The float value that the lowest quantized value represents.
+//	max_features: The float value that the highest quantized value represents.
+//
+// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
+func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17322,221 +15292,244 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyAdagradDA",
+		Type: "QuantizedRelu6",
 		Input: []tf.Input{
-			var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Converts each string in the input Tensor to its hash mod by a number of buckets.
-//
-// The hash function is deterministic on the content of the string within the
-// process and will never change. However, it is not suitable for cryptography.
-// This function may be used when CPU time is scarce and inputs are trusted or
-// unimportant. There is a risk of adversaries constructing inputs that all hash
-// to the same bucket. To prevent this problem, use a strong hash function with
-// `tf.string_to_hash_bucket_strong`.
-//
-// Arguments:
-//	input: The strings to assign a hash bucket.
-//	num_buckets: The number of buckets.
-//
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_buckets": num_buckets}
-	opspec := tf.OpSpec{
-		Type: "StringToHashBucketFast",
-		Input: []tf.Input{
-			input,
+			features, min_features, max_features,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Returns the last element of the input list as well as a list with all but that element.
-//
-// Fails if the list is empty.
-//
-// input_handle: the input list
-// tensor: the withdrawn last element of the list
-// element_dtype: the type of elements in the list
-// element_shape: the shape of the output tensor
-func TensorListPopBack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorListPopBack",
-		Input: []tf.Input{
-			input_handle, element_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
+// BatchMatMulAttr is an optional argument to BatchMatMul.
+type BatchMatMulAttr func(optionalAttr)
 
-// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad.
-type MaxPoolGradGradAttr func(optionalAttr)
-
-// MaxPoolGradGradDataFormat sets the optional data_format attribute to value.
+// BatchMatMulAdjX sets the optional adj_x attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr {
+// value: If `True`, adjoint the slices of `x`. Defaults to `False`.
+// If not specified, defaults to false
+func BatchMatMulAdjX(value bool) BatchMatMulAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["adj_x"] = value
 	}
 }
 
-// Computes second-order gradients of the maxpooling function.
+// BatchMatMulAdjY sets the optional adj_y attribute to value.
+//
+// value: If `True`, adjoint the slices of `y`. Defaults to `False`.
+// If not specified, defaults to false
+func BatchMatMulAdjY(value bool) BatchMatMulAttr {
+	return func(m optionalAttr) {
+		m["adj_y"] = value
+	}
+}
+
+// Multiplies slices of two tensors in batches.
+//
+// Multiplies all slices of `Tensor` `x` and `y` (each slice can be
+// viewed as an element of a batch), and arranges the individual results
+// in a single output tensor of the same batch size. Each of the
+// individual slices can optionally be adjointed (to adjoint a matrix
+// means to transpose and conjugate it) before multiplication by setting
+// the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
+//
+// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
+// and `[..., r_y, c_y]`.
+//
+// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
+//
+//     r_o = c_x if adj_x else r_x
+//     c_o = r_y if adj_y else c_y
+//
+// It is computed as:
+//
+//     output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
 //
 // Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
+//	x: 2-D or higher with shape `[..., r_x, c_x]`.
+//	y: 2-D or higher with shape `[..., r_y, c_y]`.
 //
-// Returns Gradients of gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) {
+// Returns 3-D or higher with shape `[..., r_o, c_o]`
+func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MaxPoolGradGrad",
-		Input: []tf.Input{
-			orig_input, orig_output, grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3.
-type TensorArrayGatherV3Attr func(optionalAttr)
-
-// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value.
-//
-// value: The expected shape of an element, if known. Used to
-// validate the shapes of TensorArray elements. If this shape is not
-// fully specified, gathering zero-size TensorArrays is an error.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr {
-	return func(m optionalAttr) {
-		m["element_shape"] = value
-	}
-}
-
-// Gather specific elements from the TensorArray into output `value`.
-//
-// All elements selected by `indices` must have the same shape.
-//
-// Arguments:
-//	handle: The handle to a TensorArray.
-//	indices: The locations in the TensorArray from which to read tensor elements.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	dtype: The type of the elem that is returned.
-//
-// Returns All of the elements in the TensorArray, concatenated along a new
-// axis (the new dimension 0).
-func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayGatherV3",
-		Input: []tf.Input{
-			handle, indices, flow_in,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns x / y element-wise for integer types.
-//
-// Truncation designates that negative numbers will round fractional quantities
-// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different
-// than Python semantics. See `FloorDiv` for a division function that matches
-// Python Semantics.
-//
-// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TruncateDiv",
+		Type: "BatchMatMul",
 		Input: []tf.Input{
 			x, y,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Restores tensors from a V2 checkpoint.
+// ParseSequenceExampleAttr is an optional argument to ParseSequenceExample.
+type ParseSequenceExampleAttr func(optionalAttr)
+
+// ParseSequenceExampleNcontextSparse sets the optional Ncontext_sparse attribute to value.
+// If not specified, defaults to 0
 //
-// For backward compatibility with the V1 format, this Op currently allows
-// restoring from a V1 checkpoint as well:
-//   - This Op first attempts to find the V2 index file pointed to by "prefix", and
-//     if found proceed to read it as a V2 checkpoint;
-//   - Otherwise the V1 read path is invoked.
-// Relying on this behavior is not recommended, as the ability to fall back to read
-// V1 might be deprecated and eventually removed.
+// REQUIRES: value >= 0
+func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Ncontext_sparse"] = value
+	}
+}
+
+// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value.
+// If not specified, defaults to 0
 //
-// By default, restores the named tensors in full.  If the caller wishes to restore
-// specific slices of stored tensors, "shape_and_slices" should be non-empty
-// strings and correspondingly well-formed.
+// REQUIRES: value >= 0
+func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Ncontext_dense"] = value
+	}
+}
+
+// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value.
+// If not specified, defaults to 0
 //
-// Callers must ensure all the named tensors are indeed stored in the checkpoint.
+// REQUIRES: value >= 0
+func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Nfeature_list_sparse"] = value
+	}
+}
+
+// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Nfeature_list_dense"] = value
+	}
+}
+
+// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value.
+//
+// value: A list of Ncontext_sparse types; the data types of data in
+// each context Feature given in context_sparse_keys.
+// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["context_sparse_types"] = value
+	}
+}
+
+// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["feature_list_dense_types"] = value
+	}
+}
+
+// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value.
+//
+// value: A list of Ncontext_dense shapes; the shapes of data in
+// each context Feature given in context_dense_keys.
+// The number of elements in the Feature corresponding to context_dense_key[j]
+// must always equal context_dense_shapes[j].NumEntries().
+// The shape of context_dense_values[j] will match context_dense_shapes[j].
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["context_dense_shapes"] = value
+	}
+}
+
+// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value.
+//
+// value: A list of Nfeature_list_sparse types; the data types
+// of data in each FeatureList given in feature_list_sparse_keys.
+// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["feature_list_sparse_types"] = value
+	}
+}
+
+// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value.
+//
+// value: A list of Nfeature_list_dense shapes; the shapes of
+// data in each FeatureList given in feature_list_dense_keys.
+// The shape of each Feature in the FeatureList corresponding to
+// feature_list_dense_key[j] must always equal
+// feature_list_dense_shapes[j].NumEntries().
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["feature_list_dense_shapes"] = value
+	}
+}
+
+// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors.
 //
 // Arguments:
-//	prefix: Must have a single element.  The prefix of a V2 checkpoint.
-//	tensor_names: shape {N}.  The names of the tensors to be restored.
-//	shape_and_slices: shape {N}.  The slice specs of the tensors to be restored.
-// Empty strings indicate that they are non-partitioned tensors.
-//	dtypes: shape {N}.  The list of expected dtype for the tensors.  Must match
-// those stored in the checkpoint.
-//
-// Returns shape {N}.  The restored tensors, whose shapes are read from the
-// checkpoint directly.
-func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) {
+//	serialized: A vector containing binary serialized SequenceExample protos.
+//	debug_name: A vector containing the names of the serialized protos.
+// May contain, for example, table key (descriptive) name for the
+// corresponding serialized proto.  This is purely useful for debugging
+// purposes, and the presence of values here has no effect on the output.
+// May also be an empty vector if no name is available.
+//	context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
+// context_dense_defaults[j] provides default values
+// when the SequenceExample's context map lacks context_dense_key[j].
+// If an empty Tensor is provided for context_dense_defaults[j],
+// then the Feature context_dense_keys[j] is required.
+// The input type is inferred from context_dense_defaults[j], even when it's
+// empty.  If context_dense_defaults[j] is not empty, its shape must match
+// context_dense_shapes[j].
+//	feature_list_dense_missing_assumed_empty: A vector listing the
+// FeatureList keys which may be missing from the SequenceExamples.  If the
+// associated FeatureList is missing, it is treated as empty.  By default,
+// any FeatureList not listed in this vector must exist in the SequenceExamples.
+//	context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars).
+// The keys expected in the Examples' features associated with context_sparse
+// values.
+//	context_dense_keys: A list of Ncontext_dense string Tensors (scalars).
+// The keys expected in the SequenceExamples' context features associated with
+// dense values.
+//	feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors
+// (scalars).  The keys expected in the FeatureLists associated with sparse
+// values.
+//	feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars).
+// The keys expected in the SequenceExamples' feature_lists associated
+// with lists of dense values.
+func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
+	attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "RestoreV2",
+		Type: "ParseSequenceExample",
 		Input: []tf.Input{
-			prefix, tensor_names, shape_and_slices,
+			serialized, debug_name, tf.OutputList(context_dense_defaults),
 		},
 		Attrs: attrs,
 	}
@@ -17546,200 +15539,268 @@
 	}
 	var idx int
 	var err error
-	if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil {
-		scope.UpdateErr("RestoreV2", err)
+	if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
 		return
 	}
-	return tensors
-}
-
-// Receives a tensor value broadcast from another device.
-func CollectiveBcastRecv(scope *Scope, T tf.DataType, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) {
-	if scope.Err() != nil {
+	if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
 		return
 	}
-	attrs := map[string]interface{}{"T": T, "group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
-	opspec := tf.OpSpec{
-		Type: "CollectiveBcastRecv",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Decode web-safe base64-encoded strings.
-//
-// Input may or may not have padding at the end. See EncodeBase64 for padding.
-// Web-safe means that input must use - and _ instead of + and /.
-//
-// Arguments:
-//	input: Base64 strings to decode.
-//
-// Returns Decoded strings.
-func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
+	if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "DecodeBase64",
-		Input: []tf.Input{
-			input,
-		},
+	if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths
 }
 
-// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad.
-type ResourceSparseApplyProximalAdagradAttr func(optionalAttr)
+// LoadTPUEmbeddingADAMParametersAttr is an optional argument to LoadTPUEmbeddingADAMParameters.
+type LoadTPUEmbeddingADAMParametersAttr func(optionalAttr)
 
-// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value.
+// LoadTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
 //
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr {
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingADAMParametersTableId(value int64) LoadTPUEmbeddingADAMParametersAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["table_id"] = value
 	}
 }
 
-// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
+// LoadTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingADAMParametersTableName(value string) LoadTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load ADAM embedding parameters.
 //
-// That is for rows we have grad for, we update var and accum as follows:
-// accum += grad * grad
-// prox_v = var
-// prox_v -= lr * grad * (1 / sqrt(accum))
-// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
+//	parameters: Value of parameters used in the ADAM optimization algorithm.
+//	momenta: Value of momenta used in the ADAM optimization algorithm.
+//	velocities: Value of velocities used in the ADAM optimization algorithm.
+//
+//
 //
 // Returns the created operation.
-func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) {
+func LoadTPUEmbeddingADAMParameters(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyProximalAdagrad",
+		Type: "LoadTPUEmbeddingADAMParameters",
 		Input: []tf.Input{
-			var_, accum, lr, l1, l2, grad, indices,
+			parameters, momenta, velocities,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad.
-type MaxPool3DGradAttr func(optionalAttr)
-
-// MaxPool3DGradDataFormat sets the optional data_format attribute to value.
+// Inverse 2D real-valued fast Fourier transform.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes gradients of max pooling function.
+// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued
+// signal over the inner-most 2 dimensions of `input`.
+//
+// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`:
+// The inner-most dimension contains the `fft_length / 2 + 1` unique components of
+// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
+// from the size of the inner-most 2 dimensions of `input`. If the FFT length used
+// to compute `input` is odd, it should be provided since it cannot be inferred
+// properly.
+//
+// Along each axis `IRFFT2D` is computed on, if `fft_length` (or
+// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
 //
 // Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) {
+//	input: A complex64 tensor.
+//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
+//
+// Returns A float32 tensor of the same rank as `input`. The inner-most 2
+//   dimensions of `input` are replaced with the `fft_length` samples of their
+//   inverse 2D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.irfft2
+// @end_compatibility
+func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "IRFFT2D",
+		Input: []tf.Input{
+			input, fft_length,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// InfeedEnqueueTupleAttr is an optional argument to InfeedEnqueueTuple.
+type InfeedEnqueueTupleAttr func(optionalAttr)
+
+// InfeedEnqueueTupleLayouts sets the optional layouts attribute to value.
+//
+// value: A vector holding the requested layout in minor-to-major sequence for
+// all the tuple shapes, in the order the shapes appear in the "shapes" input.
+// The layout elements for a sub-shape can be set to -1, in which case the
+// corresponding layout will be computed by the infeed operation.
+// If not specified, defaults to <>
+func InfeedEnqueueTupleLayouts(value []int64) InfeedEnqueueTupleAttr {
+	return func(m optionalAttr) {
+		m["layouts"] = value
+	}
+}
+
+// InfeedEnqueueTupleDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op
+// is running on a TPU device, and >= 0 when the Op is running on the CPU
+// device.
+// If not specified, defaults to -1
+func InfeedEnqueueTupleDeviceOrdinal(value int64) InfeedEnqueueTupleAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// Feeds multiple Tensor values into the computation as an XLA tuple.
+//
+// Arguments:
+//	inputs: A list of tensors that will be provided using the infeed mechanism.
+//	shapes: The shapes of each tensor in `inputs`.
+//
+// Returns the created operation.
+func InfeedEnqueueTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...InfeedEnqueueTupleAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shapes": shapes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MaxPool3DGrad",
+		Type: "InfeedEnqueueTuple",
 		Input: []tf.Input{
-			orig_input, orig_output, grad,
+			tf.OutputList(inputs),
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Returns the name of the device on which `resource` has been placed.
-func ExperimentalIteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) {
+// Returns which elements of x are finite.
+//
+// @compatibility(numpy)
+// Equivalent to np.isfinite
+// @end_compatibility
+func IsFinite(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ExperimentalIteratorGetDevice",
+		Type: "IsFinite",
 		Input: []tf.Input{
-			resource,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SparseReduceSumAttr is an optional argument to SparseReduceSum.
-type SparseReduceSumAttr func(optionalAttr)
+// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
+type ResourceStridedSliceAssignAttr func(optionalAttr)
 
-// SparseReduceSumKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr {
+// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr {
 	return func(m optionalAttr) {
-		m["keep_dims"] = value
+		m["begin_mask"] = value
 	}
 }
 
-// Computes the sum of elements across dimensions of a SparseTensor.
+// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["end_mask"] = value
+	}
+}
+
+// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["ellipsis_mask"] = value
+	}
+}
+
+// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["new_axis_mask"] = value
+	}
+}
+
+// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["shrink_axis_mask"] = value
+	}
+}
+
+// Assign `value` to the sliced l-value reference of `ref`.
 //
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`
-// instead of a sparse one.
+// The values of `value` are assigned to the positions in the variable
+// `ref` that are selected by the slice parameters. The slice parameters
+// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
 //
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
+// NOTE this op currently does not support broadcasting and so `value`'s
+// shape must be exactly the shape produced by the slice of `ref`.
 //
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
-//
-// Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-//
-// Returns `R-K`-D.  The reduced Tensor.
-func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17748,76 +15809,36 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseReduceSum",
+		Type: "ResourceStridedSliceAssign",
 		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
+			ref, begin, end, strides, value,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Records the latency of producing `input_dataset` elements in a StatsAggregator.
-func ExperimentalLatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalLatencyStatsDataset",
-		Input: []tf.Input{
-			input_dataset, tag,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// ArgMaxAttr is an optional argument to ArgMax.
+type ArgMaxAttr func(optionalAttr)
 
-// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul.
-type SparseTensorDenseMatMulAttr func(optionalAttr)
-
-// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value.
-//
-// value: Use the adjoint of A in the matrix multiply.  If A is complex, this
-// is transpose(conj(A)).  Otherwise it's transpose(A).
-// If not specified, defaults to false
-func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr {
+// ArgMaxOutputType sets the optional output_type attribute to value.
+// If not specified, defaults to DT_INT64
+func ArgMaxOutputType(value tf.DataType) ArgMaxAttr {
 	return func(m optionalAttr) {
-		m["adjoint_a"] = value
+		m["output_type"] = value
 	}
 }
 
-// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value.
+// Returns the index with the largest value across dimensions of a tensor.
 //
-// value: Use the adjoint of B in the matrix multiply.  If B is complex, this
-// is transpose(conj(B)).  Otherwise it's transpose(B).
-// If not specified, defaults to false
-func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr {
-	return func(m optionalAttr) {
-		m["adjoint_b"] = value
-	}
-}
-
-// Multiply SparseTensor (of rank 2) "A" by dense matrix "B".
-//
-// No validity checking is performed on the indices of A.  However, the following
-// input format is recommended for optimal behavior:
-//
-// if adjoint_a == false:
-//   A should be sorted in lexicographically increasing order.  Use SparseReorder
-//   if you're not sure.
-// if adjoint_a == true:
-//   A should be sorted in order of increasing dimension 1 (i.e., "column major"
-//   order instead of "row major" order).
+// Note that in case of ties the identity of the return value is not guaranteed.
 //
 // Arguments:
-//	a_indices: 2-D.  The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix.
-//	a_values: 1-D.  The `values` of the `SparseTensor`, size `[nnz]` Vector.
-//	a_shape: 1-D.  The `shape` of the `SparseTensor`, size `[2]` Vector.
-//	b: 2-D.  A dense Matrix.
-func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) {
+//
+//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+// Describes which dimension of the input Tensor to reduce across. For vectors,
+// use dimension = 0.
+func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17826,9 +15847,9 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseTensorDenseMatMul",
+		Type: "ArgMax",
 		Input: []tf.Input{
-			a_indices, a_values, a_shape, b,
+			input, dimension,
 		},
 		Attrs: attrs,
 	}
@@ -17836,16 +15857,1010 @@
 	return op.Output(0)
 }
 
-// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp.
-type ResourceApplyRMSPropAttr func(optionalAttr)
+// Fetches multiple values from infeed as an XLA tuple.
+//
+// Arguments:
+//	dtypes: The element types of each element in `outputs`.
+//	shapes: The shapes of each tensor in `outputs`.
+//
+// Returns A list of tensors that will be provided using the infeed mechanism.
+func InfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes}
+	opspec := tf.OpSpec{
+		Type: "InfeedDequeueTuple",
 
-// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value.
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("InfeedDequeueTuple", err)
+		return
+	}
+	return outputs
+}
+
+// Enqueue multiple Tensor values on the computation outfeed.
+//
+// Arguments:
+//	inputs: A list of tensors that will be inserted into the outfeed queue as an
+// XLA tuple.
+//
+// Returns the created operation.
+func OutfeedEnqueueTuple(scope *Scope, inputs []tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "OutfeedEnqueueTuple",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad.
+type ResourceApplyAdagradAttr func(optionalAttr)
+
+// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value.
+// If not specified, defaults to true
+func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr {
+	return func(m optionalAttr) {
+		m["update_slots"] = value
+	}
+}
+
+// Update '*var' according to the adagrad scheme.
+//
+// accum += grad * grad
+// var -= lr * grad * (1 / sqrt(accum))
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdagrad",
+		Input: []tf.Input{
+			var_, accum, lr, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// CudnnRNNV3Attr is an optional argument to CudnnRNNV3.
+type CudnnRNNV3Attr func(optionalAttr)
+
+// CudnnRNNV3RnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNV3RnnMode(value string) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNV3InputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNV3InputMode(value string) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNV3Direction sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNV3Direction(value string) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNV3Dropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNV3Dropout(value float32) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNV3Seed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNV3Seed(value int64) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNV3Seed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNV3Seed2(value int64) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// CudnnRNNV3IsTraining sets the optional is_training attribute to value.
+// If not specified, defaults to true
+func CudnnRNNV3IsTraining(value bool) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// A RNN backed by cuDNN.
+//
+// Computes the RNN from the input and initial states, with respect to the params
+// buffer. Accepts one extra input "sequence_lengths" than CudnnRNN.
+//
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicates whether there is a linear projection between the input and
+//   the actual computation before the first layer. 'skip_input' is only allowed
+//   when input_size == num_units; 'auto_select' implies 'skip_input' when
+//   input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used. Should be
+//   "unidirectional" or "bidirectional".
+// dropout: Dropout probability. When set to 0., dropout is disabled.
+// seed: The 1st part of a seed to initialize dropout.
+// seed2: The 2nd part of a seed to initialize dropout.
+// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
+// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
+//     num_units].
+// input_c: For LSTM, a 3-D tensor with the shape of
+//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
+// params: A 1-D tensor that contains the weights and biases in an opaque layout.
+//     The size must be created through CudnnRNNParamsSize, and initialized
+//     separately. Note that they might not be compatible across different
+//     generations. So it is a good idea to save and restore
+// sequence_lengths: a vector of lengths of each input sequence.
+// output: A 3-D tensor with the shape of [seq_length, batch_size,
+//     dir * num_units].
+// output_h: The same shape has input_h.
+// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+// is_training: Indicates whether this operation is used for inferenece or
+//   training.
+// reserve_space: An opaque tensor that can be used in backprop calculation. It
+//   is only produced if is_training is true.
+func CudnnRNNV3(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, sequence_lengths tf.Output, optional ...CudnnRNNV3Attr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output, host_reserved tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNV3",
+		Input: []tf.Input{
+			input, input_h, input_c, params, sequence_lengths,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// Applies softmax to a batched N-D `SparseTensor`.
+//
+// The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
+// (where `N >= 2`), and with indices sorted in the canonical lexicographic order.
+//
+// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost
+// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly
+// zero elements do not participate*.  Specifically, the algorithm is equivalent
+// to the following:
+//
+//   (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix
+//       with shape `[B, C]`, along the size-C dimension;
+//   (2) Masks out the original implicitly-zero locations;
+//   (3) Renormalizes the remaining elements.
+//
+// Hence, the `SparseTensor` result has exactly the same non-zero indices and
+// shape.
+//
+// Arguments:
+//	sp_indices: 2-D.  `NNZ x R` matrix with the indices of non-empty values in a
+// SparseTensor, in canonical ordering.
+//	sp_values: 1-D.  `NNZ` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//
+// Returns 1-D.  The `NNZ` values for the result `SparseTensor`.
+func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSoftmax",
+		Input: []tf.Input{
+			sp_indices, sp_values, sp_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a Tensor by indexing into the TensorList.
+//
+// Each row in the produced Tensor corresponds to the element in the TensorList
+// specified by the given index (see `tf.gather`).
+//
+// input_handle: The input tensor list.
+// indices: The indices used to index into the list.
+// values: The tensor.
+func TensorListGather(scope *Scope, input_handle tf.Output, indices tf.Output, element_shape tf.Output, element_dtype tf.DataType) (values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorListGather",
+		Input: []tf.Input{
+			input_handle, indices, element_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2.
+type FixedLengthRecordReaderV2Attr func(optionalAttr)
+
+// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value.
+//
+// value: Number of bytes in the header, defaults to 0.
+// If not specified, defaults to 0
+func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["header_bytes"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value.
+//
+// value: Number of bytes in the footer, defaults to 0.
+// If not specified, defaults to 0
+func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["footer_bytes"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value.
+//
+// value: Number of bytes to hop before each read. Default of 0 means using
+// record_bytes.
+// If not specified, defaults to 0
+func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["hop_bytes"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value.
+//
+// value: The type of encoding for the file. Currently ZLIB and GZIP
+// are supported. Defaults to none.
+// If not specified, defaults to ""
+func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["encoding"] = value
+	}
+}
+
+// A Reader that outputs fixed-length records from a file.
+//
+// Arguments:
+//	record_bytes: Number of bytes in the record.
+//
+// Returns The handle to reference the Reader.
+func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"record_bytes": record_bytes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FixedLengthRecordReaderV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CompilationResultProto indicating the status of the TPU compilation.
+func TPUCompilationResult(scope *Scope) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TPUCompilationResult",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble.
+//
+// Returns Stamp token of the tree ensemble resource.The number of trees in the tree ensemble resource.The number of trees that were finished successfully.The number of layers we attempted to build (but not necessarily succeeded).Rank size 2 tensor that contains start and end ids of the nodes in the latest
+// layer.
+func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesGetEnsembleStates",
+		Input: []tf.Input{
+			tree_ensemble_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
+type ResourceApplyPowerSignAttr func(optionalAttr)
+
+// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and m tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the AddSign update.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
+// variable <- variable - lr_t * update
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	logbase: Must be a scalar.
+//	sign_decay: Must be a scalar.
+//	beta: Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyPowerSign",
+		Input: []tf.Input{
+			var_, m, lr, logbase, sign_decay, beta, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Deprecated. Use TensorArraySplitV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3
+func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArraySplitV2",
+		Input: []tf.Input{
+			handle, value, lengths, flow_in,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reshapes a SparseTensor to represent values in a new dense shape.
+//
+// This operation has the same semantics as reshape on the represented dense
+// tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
+//
+// If one component of `new_shape` is the special value -1, the size of that
+// dimension is computed so that the total dense size remains constant.  At
+// most one component of `new_shape` can be -1.  The number of dense elements
+// implied by `new_shape` must be the same as the number of dense elements
+// originally implied by `input_shape`.
+//
+// Reshaping does not affect the order of values in the SparseTensor.
+//
+// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
+// has length `R_out`, then `input_indices` has shape `[N, R_in]`,
+// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
+// `output_shape` has length `R_out`.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
+// SparseTensor.
+//	input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
+//	new_shape: 1-D.  `R_out` vector with the requested new dense shape.
+//
+// Returns 2-D.  `N x R_out` matrix with the updated indices of non-empty
+// values in the output SparseTensor.1-D.  `R_out` vector with the full dense shape of the output
+// SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
+// filled in.
+func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReshape",
+		Input: []tf.Input{
+			input_indices, input_shape, new_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Computes the product along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \prod_j data_j\\) where the product is over `j` such
+// that `segment_ids[j] == i`.
+//
+// If the product is empty for a given segment ID `i`, `output[i] = 1`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
+// </div>
+//
+// For example:
+//
+// ```
+// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+// tf.segment_prod(c, tf.constant([0, 0, 1]))
+// # ==> [[4, 6, 6, 4],
+// #      [5, 6, 7, 8]]
+// ```
+//
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentProd",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingFTRLParametersAttr is an optional argument to RetrieveTPUEmbeddingFTRLParameters.
+type RetrieveTPUEmbeddingFTRLParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingFTRLParametersTableId(value int64) RetrieveTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingFTRLParametersTableName(value string) RetrieveTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve FTRL embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the FTRL optimization algorithm.Parameter accumulators updated by the FTRL optimization algorithm.Parameter linears updated by the FTRL optimization algorithm.
+func RetrieveTPUEmbeddingFTRLParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingFTRLParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Connects outputs of an N-way replicated computation to N outputs.
+func TPUReplicatedOutput(scope *Scope, input tf.Output, num_replicas int64) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_replicas": num_replicas}
+	opspec := tf.OpSpec{
+		Type: "TPUReplicatedOutput",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("TPUReplicatedOutput", err)
+		return
+	}
+	return outputs
+}
+
+// LoadTPUEmbeddingFTRLParametersAttr is an optional argument to LoadTPUEmbeddingFTRLParameters.
+type LoadTPUEmbeddingFTRLParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingFTRLParametersTableId(value int64) LoadTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingFTRLParametersTableName(value string) LoadTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load FTRL embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the FTRL optimization algorithm.
+//	accumulators: Value of accumulators used in the FTRL optimization algorithm.
+//	linears: Value of linears used in the FTRL optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingFTRLParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingFTRLParameters",
+		Input: []tf.Input{
+			parameters, accumulators, linears,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns (x - y)(x - y) element-wise.
+//
+// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SquaredDifference",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Push an element onto the tensor_array.
+//
+// Arguments:
+//	handle: The handle to a TensorArray.
+//	index: The position to write to inside the TensorArray.
+//	value: The tensor to write to the TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//
+// Returns A float scalar that enforces proper chaining of operations.
+func TensorArrayWriteV3(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayWriteV3",
+		Input: []tf.Input{
+			handle, index, value, flow_in,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingAdagradParametersAttr is an optional argument to RetrieveTPUEmbeddingAdagradParameters.
+type RetrieveTPUEmbeddingAdagradParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingAdagradParametersTableId(value int64) RetrieveTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdagradParametersTableName(value string) RetrieveTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve Adagrad embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the Adagrad optimization algorithm.Parameter accumulators updated by the Adagrad optimization algorithm.
+func RetrieveTPUEmbeddingAdagradParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdagradParametersAttr) (parameters tf.Output, accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingAdagradParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Compare values of `input` to `threshold` and pack resulting bits into a `uint8`.
+//
+// Each comparison returns a boolean `true` (if `input_value > threshold`)
+// or and `false` otherwise.
+//
+// This operation is useful for Locality-Sensitive-Hashing (LSH) and other
+// algorithms that use hashing approximations of cosine and `L2` distances;
+// codes can be generated from an input via:
+//
+// ```python
+// codebook_size = 50
+// codebook_bits = codebook_size * 32
+// codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits],
+//                            dtype=x.dtype,
+//                            initializer=tf.orthogonal_initializer())
+// codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.)
+// codes = tf.bitcast(codes, tf.int32)  # go from uint8 to int32
+// # now codes has shape x.shape[:-1] + [codebook_size]
+// ```
+//
+// **NOTE**: Currently, the innermost dimension of the tensor must be divisible
+// by 8.
+//
+// Given an `input` shaped `[s0, s1, ..., s_n]`, the output is
+// a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`.
+//
+// Arguments:
+//	input: Values to compare against `threshold` and bitpack.
+//	threshold: Threshold to compare against.
+//
+// Returns The bitpacked comparisons.
+func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "CompareAndBitpack",
+		Input: []tf.Input{
+			input, threshold,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizeAndDequantizeV2Attr is an optional argument to QuantizeAndDequantizeV2.
+type QuantizeAndDequantizeV2Attr func(optionalAttr)
+
+// QuantizeAndDequantizeV2SignedInput sets the optional signed_input attribute to value.
+//
+// value: Whether the quantization is signed or unsigned. (actually this parameter should
+// have been called <b>`signed_output`</b>)
+// If not specified, defaults to true
+func QuantizeAndDequantizeV2SignedInput(value bool) QuantizeAndDequantizeV2Attr {
+	return func(m optionalAttr) {
+		m["signed_input"] = value
+	}
+}
+
+// QuantizeAndDequantizeV2NumBits sets the optional num_bits attribute to value.
+//
+// value: The bitwidth of the quantization.
+// If not specified, defaults to 8
+func QuantizeAndDequantizeV2NumBits(value int64) QuantizeAndDequantizeV2Attr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// QuantizeAndDequantizeV2RangeGiven sets the optional range_given attribute to value.
+//
+// value: Whether the range is given or should be determined from the `input` tensor.
+// If not specified, defaults to false
+func QuantizeAndDequantizeV2RangeGiven(value bool) QuantizeAndDequantizeV2Attr {
+	return func(m optionalAttr) {
+		m["range_given"] = value
+	}
+}
+
+// QuantizeAndDequantizeV2RoundMode sets the optional round_mode attribute to value.
+//
+// value: The 'round_mode' attribute controls which rounding tie-breaking algorithm is
+// used when rounding float values to their quantized equivalents. The following
+// rounding modes are currently supported:
+//
+// *   HALF_TO_EVEN: this is the default round_mode.
+// *   HALF_UP: round towards positive. In this mode 7.5 rounds up to 8 and -7.5
+//     rounds up to -7.
+//
+// If not specified, defaults to "HALF_TO_EVEN"
+func QuantizeAndDequantizeV2RoundMode(value string) QuantizeAndDequantizeV2Attr {
+	return func(m optionalAttr) {
+		m["round_mode"] = value
+	}
+}
+
+// Quantizes then dequantizes a tensor.
+//
+// This op simulates the precision loss from the quantized forward pass by:
+//
+// 1. Quantizing the tensor to fixed point numbers, which should match the target
+//    quantization method when it is used in inference.
+// 2. Dequantizing it back to floating point numbers for the following ops, most
+//    likely matmul.
+//
+// There are different ways to quantize. This version uses only scaling, so 0.0
+// maps to 0.
+//
+// From the specified 'num_bits' in the quantized output type, it determines
+// minimum and maximum representable quantized values.
+//
+// e.g.
+//
+// *   [-128, 127] for signed, num_bits = 8, or
+// *   [0, 255] for unsigned, num_bits = 8.
+//
+// If range_given == False, the initial input_min, input_max will be determined
+// automatically as the minimum and maximum values in the input tensor, otherwise
+// the specified values of input_min, input_max are used.
+//
+// Note: If the input_min, input_max are specified, they do not need to equal the
+// actual minimum and maximum values in the tensor. e.g. in some cases it may be
+// beneficial to specify these values such that the low probability extremes of the
+// input distribution are clipped.
+//
+// This op determines the maximum scale_factor that would map the initial
+// [input_min, input_max] range to a range that lies within the representable
+// quantized range.
+//
+// It determines the scale from one of input_min and input_max, then updates the
+// other one to maximize the respresentable range.
+//
+// e.g.
+//
+// *   if the output is signed, num_bits = 8, [input_min, input_max] = [-10.0,
+//     5.0]: it would use a scale_factor of -128 / -10.0 = 12.8 In this case, it
+//     would update input_max to be 127 / 12.8 = 9.921875
+// *   if the output is signed, num_bits = 8, [input_min, input_max] = [-10.0,
+//     10.0]: it would use a scale_factor of 127 / 10.0 = 12.7 In this case, it
+//     would update input_min to be 128.0 / 12.7 = -10.07874
+// *   if the output is unsigned, input_min is forced to be 0, and only the
+//     specified input_max is used.
+//
+// After determining the scale_factor and updating the input range, it applies the
+// following to each value in the 'input' tensor.
+//
+// output = round(clamp(value, input_min, input_max) * scale_factor) / scale_factor.
+//
+// The above round function rounds the value based on the given round_mode.
+//
+//
+// Arguments:
+//	input: Tensor to quantize and then dequantize.
+//	input_min: If `range_given == True`, this specifies the minimum input value that needs to
+// be represented, otherwise it is determined from the min value of the `input`
+// tensor.
+//	input_max: If `range_given == True`, this specifies the maximum input value that needs to
+// be represented, otherwise it is determined from the max value of the `input`
+// tensor.
+func QuantizeAndDequantizeV2(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, optional ...QuantizeAndDequantizeV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizeAndDequantizeV2",
+		Input: []tf.Input{
+			input, input_min, input_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// A TPU core selector Op.
+//
+// This Op produces a set of TPU cores (for warm-up) or a single TPU core
+// (for regular inference) to execute the TPU program on. The output is
+// consumed by TPUPartitionedCall.
+//
+// Returns A vector 1 or more TPU cores.
+func TPUOrdinalSelector(scope *Scope) (device_ordinals tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TPUOrdinalSelector",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Looks up keys in a table, outputs the corresponding values.
+//
+// The tensor `keys` must of the same type as the keys of the table.
+// The output `values` is of the type of the table values.
+//
+// The scalar `default_value` is the value output for keys not present in the
+// table. It must also be of the same type as the table values.
+//
+// Arguments:
+//	table_handle: Handle to the table.
+//	keys: Any shape.  Keys to look up.
+//
+//
+// Returns Same shape as `keys`.  Values found in the table, or `default_values`
+// for missing keys.
+func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LookupTableFindV2",
+		Input: []tf.Input{
+			table_handle, keys, default_value,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp.
+type ResourceSparseApplyRMSPropAttr func(optionalAttr)
+
+// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value.
 //
 // value: If `True`, updating of the var, ms, and mom tensors is protected
 // by a lock; otherwise the behavior is undefined, but may exhibit less
 // contention.
 // If not specified, defaults to false
-func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr {
+func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr {
 	return func(m optionalAttr) {
 		m["use_locking"] = value
 	}
@@ -17873,9 +16888,10 @@
 //
 //	epsilon: Ridge term. Must be a scalar.
 //	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var, ms and mom.
 //
 // Returns the created operation.
-func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) {
+func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17884,15 +16900,1391 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyRMSProp",
+		Type: "ResourceSparseApplyRMSProp",
 		Input: []tf.Input{
-			var_, ms, mom, lr, rho, momentum, epsilon, grad,
+			var_, ms, mom, lr, rho, momentum, epsilon, grad, indices,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
+// Returns the truth value of (x > y) element-wise.
+//
+// *NOTE*: `Greater` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Greater",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a TensorList by indexing into a Tensor.
+//
+// Each member of the TensorList corresponds to one row of the input tensor,
+// specified by the given index (see `tf.gather`).
+//
+// tensor: The input tensor.
+// indices: The indices used to index into the list.
+// element_shape: The shape of the elements in the list (can be less specified than
+//   the shape of the tensor).
+// num_elements: The size of the output list. Must be large enough to accommodate
+//   the largest index in indices. If -1, the list is just large enough to include
+//   the largest index in indices.
+// output_handle: The TensorList.
+func TensorListScatterV2(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output, num_elements tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListScatterV2",
+		Input: []tf.Input{
+			tensor, indices, element_shape, num_elements,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox.
+type SampleDistortedBoundingBoxAttr func(optionalAttr)
+
+// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to non-zero, the random number
+// generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
+// seed.
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value.
+//
+// value: The cropped area of the image must contain at least this
+// fraction of any bounding box supplied. The value of this parameter should be
+// non-negative. In the case of 0, the cropped area does not need to overlap
+// any of the bounding boxes supplied.
+// If not specified, defaults to 0.1
+func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["min_object_covered"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value.
+//
+// value: The cropped area of the image must have an aspect ratio =
+// width / height within this range.
+// If not specified, defaults to <f:0.75 f:1.33 >
+func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["aspect_ratio_range"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value.
+//
+// value: The cropped area of the image must contain a fraction of the
+// supplied image within this range.
+// If not specified, defaults to <f:0.05 f:1 >
+func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["area_range"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value.
+//
+// value: Number of attempts at generating a cropped region of the image
+// of the specified constraints. After `max_attempts` failures, return the entire
+// image.
+// If not specified, defaults to 100
+func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["max_attempts"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
+//
+// value: Controls behavior if no bounding boxes supplied.
+// If true, assume an implicit bounding box covering the whole input. If false,
+// raise an error.
+// If not specified, defaults to false
+func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["use_image_if_no_bounding_boxes"] = value
+	}
+}
+
+// Generate a single randomly distorted bounding box for an image.
+//
+// Bounding box annotations are often supplied in addition to ground-truth labels
+// in image recognition or object localization tasks. A common technique for
+// training such a system is to randomly distort an image while preserving
+// its content, i.e. *data augmentation*. This Op outputs a randomly distorted
+// localization of an object, i.e. bounding box, given an `image_size`,
+// `bounding_boxes` and a series of constraints.
+//
+// The output of this Op is a single bounding box that may be used to crop the
+// original image. The output is returned as 3 tensors: `begin`, `size` and
+// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
+// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
+// what the bounding box looks like.
+//
+// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example,
+//
+// ```python
+//     # Generate a single distorted bounding box.
+//     begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
+//         tf.shape(image),
+//         bounding_boxes=bounding_boxes)
+//
+//     # Draw the bounding box in an image summary.
+//     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
+//                                                   bbox_for_draw)
+//     tf.summary.image('images_with_box', image_with_box)
+//
+//     # Employ the bounding box to distort the image.
+//     distorted_image = tf.slice(image, begin, size)
+// ```
+//
+// Note that if no bounding box information is available, setting
+// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
+// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
+// false and no bounding boxes are supplied, an error is raised.
+//
+// Arguments:
+//	image_size: 1-D, containing `[height, width, channels]`.
+//	bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
+// associated with the image.
+//
+// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
+// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to
+// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box.
+// Provide as input to `tf.image.draw_bounding_boxes`.
+func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SampleDistortedBoundingBox",
+		Input: []tf.Input{
+			image_size, bounding_boxes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate.
+type ResourceScatterNdUpdateAttr func(optionalAttr)
+
+// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value.
+//
+// value: An optional bool. Defaults to True. If True, the assignment will
+// be protected by a lock; otherwise the behavior is undefined,
+// but may exhibit less contention.
+// If not specified, defaults to true
+func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Applies sparse `updates` to individual values or slices within a given
+//
+// variable according to `indices`.
+//
+// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `ref`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+// dimension of `ref`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// ```
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+// ```
+//
+// For example, say we want to update 4 scattered elements to a rank-1 tensor to
+// 8 elements. In Python, that update would look like this:
+//
+// ```python
+//     ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+//     indices = tf.constant([[4], [3], [1] ,[7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     update = tf.scatter_nd_update(ref, indices, updates)
+//     with tf.Session() as sess:
+//       print sess.run(update)
+// ```
+//
+// The resulting update to ref would look like this:
+//
+//     [1, 11, 3, 10, 9, 6, 7, 12]
+//
+// See `tf.scatter_nd` for more details about how to make updates to
+// slices.
+//
+// Arguments:
+//	ref: A resource handle. Must be from a VarHandleOp.
+//	indices: A Tensor. Must be one of the following types: int32, int64.
+// A tensor of indices into ref.
+//	updates: A Tensor. Must have the same type as ref. A tensor of updated
+// values to add to ref.
+//
+// Returns the created operation.
+func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterNdUpdate",
+		Input: []tf.Input{
+			ref, indices, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// UnicodeDecodeWithOffsetsAttr is an optional argument to UnicodeDecodeWithOffsets.
+type UnicodeDecodeWithOffsetsAttr func(optionalAttr)
+
+// UnicodeDecodeWithOffsetsErrors sets the optional errors attribute to value.
+//
+// value: Error handling policy when there is invalid formatting found in the input.
+// The value of 'strict' will cause the operation to produce a InvalidArgument
+// error on any invalid input formatting. A value of 'replace' (the default) will
+// cause the operation to replace any invalid formatting in the input with the
+// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
+// skip any invalid formatting in the input and produce no corresponding output
+// character.
+// If not specified, defaults to "replace"
+func UnicodeDecodeWithOffsetsErrors(value string) UnicodeDecodeWithOffsetsAttr {
+	return func(m optionalAttr) {
+		m["errors"] = value
+	}
+}
+
+// UnicodeDecodeWithOffsetsReplacementChar sets the optional replacement_char attribute to value.
+//
+// value: The replacement character codepoint to be used in place of any invalid
+// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+// be used. The default value is the default unicode replacement character is
+// 0xFFFD or U+65533.)
+// If not specified, defaults to 65533
+func UnicodeDecodeWithOffsetsReplacementChar(value int64) UnicodeDecodeWithOffsetsAttr {
+	return func(m optionalAttr) {
+		m["replacement_char"] = value
+	}
+}
+
+// UnicodeDecodeWithOffsetsReplaceControlCharacters sets the optional replace_control_characters attribute to value.
+//
+// value: Whether to replace the C0 control characters (00-1F) with the
+// `replacement_char`. Default is false.
+// If not specified, defaults to false
+func UnicodeDecodeWithOffsetsReplaceControlCharacters(value bool) UnicodeDecodeWithOffsetsAttr {
+	return func(m optionalAttr) {
+		m["replace_control_characters"] = value
+	}
+}
+
+// Decodes each string in `input` into a sequence of Unicode code points.
+//
+// The character codepoints for all strings are returned using a single vector
+// `char_values`, with strings expanded to characters in row-major order.
+// Similarly, the character start byte offsets are returned using a single vector
+// `char_to_byte_starts`, with strings expanded in row-major order.
+//
+// The `row_splits` tensor indicates where the codepoints and start offsets for
+// each input string begin and end within the `char_values` and
+// `char_to_byte_starts` tensors.  In particular, the values for the `i`th
+// string (in row-major order) are stored in the slice
+// `[row_splits[i]:row_splits[i+1]]`. Thus:
+//
+// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
+//   character in the `i`th string (in row-major order).
+// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th
+//   character in the `i`th string (in row-major order).
+// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
+//   string (in row-major order).
+//
+// Arguments:
+//	input: The text to be decoded. Can have any shape. Note that the output is flattened
+// to a vector of char values.
+//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
+// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+//
+// Returns A 1D int32 tensor containing the row splits.A 1D int32 Tensor containing the decoded codepoints.A 1D int32 Tensor containing the byte index in the input string where each
+// character in `char_values` starts.
+func UnicodeDecodeWithOffsets(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeWithOffsetsAttr) (row_splits tf.Output, char_values tf.Output, char_to_byte_starts tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"input_encoding": input_encoding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeDecodeWithOffsets",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Returns x - y element-wise.
+//
+// *NOTE*: `Subtract` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sub",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LRNAttr is an optional argument to LRN.
+type LRNAttr func(optionalAttr)
+
+// LRNDepthRadius sets the optional depth_radius attribute to value.
+//
+// value: 0-D.  Half-width of the 1-D normalization window.
+// If not specified, defaults to 5
+func LRNDepthRadius(value int64) LRNAttr {
+	return func(m optionalAttr) {
+		m["depth_radius"] = value
+	}
+}
+
+// LRNBias sets the optional bias attribute to value.
+//
+// value: An offset (usually positive to avoid dividing by 0).
+// If not specified, defaults to 1
+func LRNBias(value float32) LRNAttr {
+	return func(m optionalAttr) {
+		m["bias"] = value
+	}
+}
+
+// LRNAlpha sets the optional alpha attribute to value.
+//
+// value: A scale factor, usually positive.
+// If not specified, defaults to 1
+func LRNAlpha(value float32) LRNAttr {
+	return func(m optionalAttr) {
+		m["alpha"] = value
+	}
+}
+
+// LRNBeta sets the optional beta attribute to value.
+//
+// value: An exponent.
+// If not specified, defaults to 0.5
+func LRNBeta(value float32) LRNAttr {
+	return func(m optionalAttr) {
+		m["beta"] = value
+	}
+}
+
+// Local Response Normalization.
+//
+// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last
+// dimension), and each vector is normalized independently.  Within a given vector,
+// each component is divided by the weighted, squared sum of inputs within
+// `depth_radius`.  In detail,
+//
+//     sqr_sum[a, b, c, d] =
+//         sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
+//     output = input / (bias + alpha * sqr_sum) ** beta
+//
+// For details, see [Krizhevsky et al., ImageNet classification with deep
+// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
+//
+// Arguments:
+//	input: 4-D.
+func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LRN",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug.
+type RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve proximal Adagrad embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the proximal Adagrad optimization algorithm.Parameter accumulators updated by the proximal Adagrad optimization algorithm.Parameter gradient_accumulators updated by the proximal Adagrad optimization algorithm.
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad.
+type ResourceSparseApplyAdagradAttr func(optionalAttr)
+
+// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceSparseApplyAdagradUpdateSlots sets the optional update_slots attribute to value.
+// If not specified, defaults to true
+func ResourceSparseApplyAdagradUpdateSlots(value bool) ResourceSparseApplyAdagradAttr {
+	return func(m optionalAttr) {
+		m["update_slots"] = value
+	}
+}
+
+// Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
+//
+// That is for rows we have grad for, we update var and accum as follows:
+// accum += grad * grad
+// var -= lr * grad * (1 / sqrt(accum))
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//
+// Returns the created operation.
+func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyAdagrad",
+		Input: []tf.Input{
+			var_, accum, lr, grad, indices,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// LoadTPUEmbeddingMomentumParametersAttr is an optional argument to LoadTPUEmbeddingMomentumParameters.
+type LoadTPUEmbeddingMomentumParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingMomentumParametersTableId(value int64) LoadTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMomentumParametersTableName(value string) LoadTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load Momentum embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Momentum optimization algorithm.
+//	momenta: Value of momenta used in the Momentum optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingMomentumParameters(scope *Scope, parameters tf.Output, momenta tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingMomentumParameters",
+		Input: []tf.Input{
+			parameters, momenta,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Assigns sparse updates to the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] = updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] = updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterUpdate",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth.
+type HistogramFixedWidthAttr func(optionalAttr)
+
+// HistogramFixedWidthDtype sets the optional dtype attribute to value.
+// If not specified, defaults to DT_INT32
+func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Return histogram of values.
+//
+// Given the tensor `values`, this operation returns a rank 1 histogram counting
+// the number of entries in `values` that fall into every bin.  The bins are
+// equal width and determined by the arguments `value_range` and `nbins`.
+//
+// ```python
+// # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
+// nbins = 5
+// value_range = [0.0, 5.0]
+// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
+//
+// with tf.get_default_session() as sess:
+//   hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
+//   variables.global_variables_initializer().run()
+//   sess.run(hist) => [2, 1, 1, 0, 2]
+// ```
+//
+// Arguments:
+//	values: Numeric `Tensor`.
+//	value_range: Shape [2] `Tensor` of same `dtype` as `values`.
+// values <= value_range[0] will be mapped to hist[0],
+// values >= value_range[1] will be mapped to hist[-1].
+//	nbins: Scalar `int32 Tensor`.  Number of histogram bins.
+//
+// Returns A 1-D `Tensor` holding histogram of values.
+func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "HistogramFixedWidth",
+		Input: []tf.Input{
+			values, value_range, nbins,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Elementwise computes the bitwise right-shift of `x` and `y`.
+//
+// Performs a logical shift for unsigned integer types, and an arithmetic shift
+// for signed integer types.
+//
+// If `y` is negative, or greater than or equal to than the width of `x` in bits
+// the result is implementation defined.
+func RightShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RightShift",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TensorListStackAttr is an optional argument to TensorListStack.
+type TensorListStackAttr func(optionalAttr)
+
+// TensorListStackNumElements sets the optional num_elements attribute to value.
+// If not specified, defaults to -1
+func TensorListStackNumElements(value int64) TensorListStackAttr {
+	return func(m optionalAttr) {
+		m["num_elements"] = value
+	}
+}
+
+// Stacks all tensors in the list.
+//
+// Requires that all tensors have the same shape.
+//
+// input_handle: the input list
+// tensor: the gathered result
+// num_elements: optional. If not -1, the number of elements in the list.
+//
+func TensorListStack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType, optional ...TensorListStackAttr) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListStack",
+		Input: []tf.Input{
+			input_handle, element_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// A placeholder op for a value that will be fed into the computation.
+//
+// Arguments:
+//	dtype: The type of elements in the tensor.
+//	shape: The shape of the tensor.
+//
+// Returns A tensor that will be provided using the infeed mechanism.
+func InfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
+	opspec := tf.OpSpec{
+		Type: "InfeedDequeue",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform.
+type StatelessRandomUniformAttr func(optionalAttr)
+
+// StatelessRandomUniformDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom random values from a uniform distribution.
+//
+// The generated values follow a uniform distribution in the range `[0, 1)`. The
+// lower bound 0 is included in the range, while the upper bound 1 is excluded.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessRandomUniform",
+		Input: []tf.Input{
+			shape, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Makes its input available to the next iteration.
+//
+// Arguments:
+//	data: The tensor to be made available to the next iteration.
+//
+// Returns The same tensor as `data`.
+func NextIteration(scope *Scope, data tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NextIteration",
+		Input: []tf.Input{
+			data,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Output a fact about factorials.
+func Fact(scope *Scope) (fact tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Fact",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping.
+type GenerateVocabRemappingAttr func(optionalAttr)
+
+// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value.
+//
+// value: Number of entries in the old vocab file to consider.  If -1,
+// use the entire old vocabulary.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr {
+	return func(m optionalAttr) {
+		m["old_vocab_size"] = value
+	}
+}
+
+// Given a path to new and old vocabulary files, returns a remapping Tensor of
+//
+// length `num_new_vocab`, where `remapping[i]` contains the row number in the old
+// vocabulary that corresponds to row `i` in the new vocabulary (starting at line
+// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
+// in the new vocabulary is not in the old vocabulary.  The old vocabulary is
+// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
+// default value of -1.
+//
+// `num_vocab_offset` enables
+// use in the partitioned variable case, and should generally be set through
+// examining partitioning info.  The format of the files should be a text file,
+// with each line containing a single entity within the vocabulary.
+//
+// For example, with `new_vocab_file` a text file containing each of the following
+// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],
+// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be
+// `[0, -1, 2]`.
+//
+// The op also returns a count of how many entries in the new vocabulary
+// were present in the old vocabulary, which is used to calculate the number of
+// values to initialize in a weight matrix remapping
+//
+// This functionality can be used to remap both row vocabularies (typically,
+// features) and column vocabularies (typically, classes) from TensorFlow
+// checkpoints.  Note that the partitioning logic relies on contiguous vocabularies
+// corresponding to div-partitioned variables.  Moreover, the underlying remapping
+// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should
+// use the corresponding index_table_from_file() as the FeatureColumn framework
+// does (as opposed to tf.feature_to_id(), which uses a CuckooTable).
+//
+// Arguments:
+//	new_vocab_file: Path to the new vocab file.
+//	old_vocab_file: Path to the old vocab file.
+//	new_vocab_offset: How many entries into the new vocab file to start reading.
+//	num_new_vocab: Number of entries in the new vocab file to remap.
+//
+// Returns A Tensor of length num_new_vocab where the element at index i
+// is equal to the old ID that maps to the new ID i.  This element is -1 for any
+// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab.
+func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "GenerateVocabRemapping",
+		Input: []tf.Input{
+			new_vocab_file, old_vocab_file,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Worker heartbeat op.
+//
+// Heartbeats may be sent periodically to indicate the coordinator is still active,
+// to retrieve the current worker status and to expedite shutdown when necessary.
+//
+// Arguments:
+//	request: A string tensor containing a serialized WorkerHeartbeatRequest
+//
+// Returns A string tensor containing a serialized WorkerHeartbeatResponse
+func WorkerHeartbeat(scope *Scope, request tf.Output) (response tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "WorkerHeartbeat",
+		Input: []tf.Input{
+			request,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the truth value of (x <= y) element-wise.
+//
+// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LessEqual",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EnqueueTPUEmbeddingIntegerBatchAttr is an optional argument to EnqueueTPUEmbeddingIntegerBatch.
+type EnqueueTPUEmbeddingIntegerBatchAttr func(optionalAttr)
+
+// EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. Should be >= 0 and less than the number
+// of TPU cores in the task on which the node is placed.
+// If not specified, defaults to -1
+func EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingIntegerBatchAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// An op that enqueues a list of input batch tensors to TPUEmbedding.
+//
+// Arguments:
+//	batch: A list of 1D tensors, one for each embedding table, containing the
+// indices into the tables.
+//	mode_override: A string input that overrides the mode specified in the
+// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+//
+// Returns the created operation.
+func EnqueueTPUEmbeddingIntegerBatch(scope *Scope, batch []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingIntegerBatchAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EnqueueTPUEmbeddingIntegerBatch",
+		Input: []tf.Input{
+			tf.OutputList(batch), mode_override,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// An op that receives embedding activations on the TPU.
+//
+// The TPU system performs the embedding lookups and aggregations specified by
+// the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The
+// results of these aggregations are visible to the Tensorflow Graph as the
+// outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing
+// one Tensor of activations per table specified in the model. There can be at
+// most one RecvTPUEmbeddingActivations op in the TPU graph.
+//
+// Arguments:
+//	num_outputs: The number of output activation tensors, equal to the number of
+// embedding tables in the model.
+//	config: Serialized TPUEmbeddingConfiguration proto.
+//
+// Returns A TensorList of embedding activations containing one Tensor per
+// embedding table in the model.
+func RecvTPUEmbeddingActivations(scope *Scope, num_outputs int64, config string) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_outputs": num_outputs, "config": config}
+	opspec := tf.OpSpec{
+		Type: "RecvTPUEmbeddingActivations",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("RecvTPUEmbeddingActivations", err)
+		return
+	}
+	return outputs
+}
+
+// Selects elements from `x` or `y`, depending on `condition`.
+//
+// The `x`, and `y` tensors must all have the same shape, and the
+// output will also have that shape.
+//
+// The `condition` tensor must be a scalar if `x` and `y` are scalars.
+// If `x` and `y` are vectors or higher rank, then `condition` must be either a
+// scalar, a vector with size matching the first dimension of `x`, or must have
+// the same shape as `x`.
+//
+// The `condition` tensor acts as a mask that chooses, based on the value at each
+// element, whether the corresponding element / row in the output should be
+// taken from `x` (if true) or `y` (if false).
+//
+// If `condition` is a vector and `x` and `y` are higher rank matrices, then
+// it chooses which row (outer dimension) to copy from `x` and `y`.
+// If `condition` has the same shape as `x` and `y`, then it chooses which
+// element to copy from `x` and `y`.
+//
+// For example:
+//
+// ```python
+// # 'condition' tensor is [[True,  False]
+// #                        [False, True]]
+// # 't' is [[1, 2],
+// #         [3, 4]]
+// # 'e' is [[5, 6],
+// #         [7, 8]]
+// select(condition, t, e)  # => [[1, 6], [7, 4]]
+//
+//
+// # 'condition' tensor is [True, False]
+// # 't' is [[1, 2],
+// #         [3, 4]]
+// # 'e' is [[5, 6],
+// #         [7, 8]]
+// select(condition, t, e) ==> [[1, 2],
+//                              [7, 8]]
+//
+// ```
+//
+// Arguments:
+//
+//	x: = A `Tensor` which may have the same shape as `condition`.
+// If `condition` is rank 1, `x` may have higher rank,
+// but its first dimension must match the size of `condition`.
+//	y: = A `Tensor` with the same type and shape as `x`.
+//
+// Returns = A `Tensor` with the same type and shape as `x` and `y`.
+func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Select",
+		Input: []tf.Input{
+			condition, x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the set of files matching one or more glob patterns.
+//
+// Note that this routine only supports wildcard characters in the
+// basename portion of the pattern, not in the directory portion.
+// Note also that the order of filenames returned can be non-deterministic.
+//
+// Arguments:
+//	pattern: Shell wildcard pattern(s). Scalar or vector of type string.
+//
+// Returns A vector of matching filenames.
+func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatchingFiles",
+		Input: []tf.Input{
+			pattern,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SqueezeAttr is an optional argument to Squeeze.
+type SqueezeAttr func(optionalAttr)
+
+// SqueezeAxis sets the optional axis attribute to value.
+//
+// value: If specified, only squeezes the dimensions listed. The dimension
+// index starts at 0. It is an error to squeeze a dimension that is not 1. Must
+// be in the range `[-rank(input), rank(input))`.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func SqueezeAxis(value []int64) SqueezeAttr {
+	return func(m optionalAttr) {
+		m["squeeze_dims"] = value
+	}
+}
+
+// Removes dimensions of size 1 from the shape of a tensor.
+//
+// Given a tensor `input`, this operation returns a tensor of the same type with
+// all dimensions of size 1 removed. If you don't want to remove all size 1
+// dimensions, you can remove specific size 1 dimensions by specifying
+// `axis`.
+//
+// For example:
+//
+// ```
+// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
+// shape(squeeze(t)) ==> [2, 3]
+// ```
+//
+// Or, to remove specific size 1 dimensions:
+//
+// ```
+// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
+// shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1]
+// ```
+//
+// Arguments:
+//	input: The `input` to squeeze.
+//
+// Returns Contains the same data as `input`, but has one or more dimensions of
+// size 1 removed.
+func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Squeeze",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta.
+type ResourceApplyAdadeltaAttr func(optionalAttr)
+
+// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var, accum and update_accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the adadelta scheme.
+//
+// accum = rho() * accum + (1 - rho()) * grad.square();
+// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
+// update_accum = rho() * update_accum + (1 - rho()) * update.square();
+// var -= update;
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	accum_update: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay factor. Must be a scalar.
+//	epsilon: Constant factor. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdadelta",
+		Input: []tf.Input{
+			var_, accum, accum_update, lr, rho, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression.
+type NonMaxSuppressionAttr func(optionalAttr)
+
+// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value.
+//
+// value: A float representing the threshold for deciding whether boxes
+// overlap too much with respect to IOU.
+// If not specified, defaults to 0.5
+func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr {
+	return func(m optionalAttr) {
+		m["iou_threshold"] = value
+	}
+}
+
+// Greedily selects a subset of bounding boxes in descending order of score,
+//
+// pruning away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system.  Note that this
+// algorithm is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
+//   selected_indices = tf.image.non_max_suppression(
+//       boxes, scores, max_output_size, iou_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
+//
+// Arguments:
+//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
+//
+// Returns A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "NonMaxSuppression",
+		Input: []tf.Input{
+			boxes, scores, max_output_size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that emits `components` as a tuple of tensors once.
+func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "TensorDataset",
+		Input: []tf.Input{
+			tf.OutputList(components),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// VariableShapeAttr is an optional argument to VariableShape.
+type VariableShapeAttr func(optionalAttr)
+
+// VariableShapeOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func VariableShapeOutType(value tf.DataType) VariableShapeAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Returns the shape of the variable pointed to by `resource`.
+//
+// This operation returns a 1-D integer tensor representing the shape of `input`.
+//
+// For example:
+//
+// ```
+// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+// shape(t) ==> [2, 2, 3]
+// ```
+func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "VariableShape",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Store the input tensor in the state of the current session.
 //
 // Arguments:
@@ -17977,35 +18369,233 @@
 	return scope.AddOperation(opspec)
 }
 
-// SerializeManySparseAttr is an optional argument to SerializeManySparse.
-type SerializeManySparseAttr func(optionalAttr)
+// SdcaOptimizerAttr is an optional argument to SdcaOptimizer.
+type SdcaOptimizerAttr func(optionalAttr)
 
-// SerializeManySparseOutType sets the optional out_type attribute to value.
+// SdcaOptimizerAdaptative sets the optional adaptative attribute to value.
 //
-// value: The `dtype` to use for serialization; the supported types are `string`
-// (default) and `variant`.
-// If not specified, defaults to DT_STRING
-func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr {
+// value: Whether to use Adaptive SDCA for the inner loop.
+// If not specified, defaults to true
+func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr {
 	return func(m optionalAttr) {
-		m["out_type"] = value
+		m["adaptative"] = value
 	}
 }
 
-// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object.
+// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
 //
-// The `SparseTensor` must have rank `R` greater than 1, and the first dimension
-// is treated as the minibatch dimension.  Elements of the `SparseTensor`
-// must be sorted in increasing order of this first dimension.  The serialized
-// `SparseTensor` objects going into each row of `serialized_sparse` will have
-// rank `R-1`.
+// linear models with L1 + L2 regularization. As global optimization objective is
+// strongly-convex, the optimizer optimizes the dual objective at each step. The
+// optimizer applies each update one example at a time. Examples are sampled
+// uniformly, and the optimizer is learning rate free and enjoys linear convergence
+// rate.
 //
-// The minibatch size `N` is extracted from `sparse_shape[0]`.
+// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
+// Shai Shalev-Shwartz, Tong Zhang. 2012
+//
+// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
+//
+// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
+// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
+// Peter Richtarik, Martin Takac. 2015
+//
+// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
+// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
 //
 // Arguments:
-//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
-func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) {
+//	sparse_example_indices: a list of vectors which contain example indices.
+//	sparse_feature_indices: a list of vectors which contain feature indices.
+//	sparse_feature_values: a list of vectors which contains feature value
+// associated with each feature group.
+//	dense_features: a list of matrices which contains the dense feature values.
+//	example_weights: a vector which contains the weight associated with each
+// example.
+//	example_labels: a vector which contains the label/target associated with each
+// example.
+//	sparse_indices: a list of vectors where each value is the indices which has
+// corresponding weights in sparse_weights. This field maybe omitted for the
+// dense approach.
+//	sparse_weights: a list of vectors where each value is the weight associated with
+// a sparse feature group.
+//	dense_weights: a list of vectors where the values are the weights associated
+// with a dense feature group.
+//	example_state_data: a list of vectors containing the example state data.
+//	loss_type: Type of the primal loss. Currently SdcaSolver supports logistic,
+// squared and hinge losses.
+//	l1: Symmetric l1 regularization strength.
+//	l2: Symmetric l2 regularization strength.
+//	num_loss_partitions: Number of partitions of the global loss function.
+//	num_inner_iterations: Number of iterations per mini-batch.
+//
+// Returns a list of vectors containing the updated example state
+// data.a list of vectors where each value is the delta
+// weights associated with a sparse feature group.a list of vectors where the values are the delta
+// weights associated with a dense feature group.
+func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SdcaOptimizer",
+		Input: []tf.Input{
+			tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	out_example_state_data = op.Output(idx)
+	if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil {
+		scope.UpdateErr("SdcaOptimizer", err)
+		return
+	}
+	if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil {
+		scope.UpdateErr("SdcaOptimizer", err)
+		return
+	}
+	return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights
+}
+
+// ExperimentalParseExampleDatasetAttr is an optional argument to ExperimentalParseExampleDataset.
+type ExperimentalParseExampleDatasetAttr func(optionalAttr)
+
+// ExperimentalParseExampleDatasetSloppy sets the optional sloppy attribute to value.
+// If not specified, defaults to false
+func ExperimentalParseExampleDatasetSloppy(value bool) ExperimentalParseExampleDatasetAttr {
+	return func(m optionalAttr) {
+		m["sloppy"] = value
+	}
+}
+
+// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features.
+//
+// Arguments:
+//
+//
+//	dense_defaults: A dict mapping string keys to `Tensor`s.
+// The keys of the dict must match the dense_keys of the feature.
+//	sparse_keys: A list of string keys in the examples features.
+// The results for these keys will be returned as `SparseTensor` objects.
+//	dense_keys: A list of Ndense string Tensors (scalars).
+// The keys expected in the Examples features associated with dense values.
+//	sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
+// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
+// and `tf.string` (`BytesList`) are supported.
+//	dense_shapes: List of tuples with the same length as `dense_keys`.
+// The shape of the data for each dense feature referenced by `dense_keys`.
+// Required for any input tensors identified by `dense_keys`.  Must be
+// either fully defined, or may contain an unknown first dimension.
+// An unknown first dimension means the feature is treated as having
+// a variable number of blocks, and the output shape along this dimension
+// is considered unknown at graph build time.  Padding is applied for
+// minibatch elements smaller than the maximum number of blocks for the
+// given feature along this dimension.
+//	output_types: The type list for the return values.
+//	output_shapes: The list of shapes being produced.
+func ExperimentalParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalParseExampleDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalParseExampleDataset",
+		Input: []tf.Input{
+			input_dataset, num_parallel_calls, tf.OutputList(dense_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// 2D real-valued fast Fourier transform.
+//
+// Computes the 2-dimensional discrete Fourier transform of a real-valued signal
+// over the inner-most 2 dimensions of `input`.
+//
+// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the
+// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
+// of `output`: the zero-frequency term, followed by the `fft_length / 2`
+// positive-frequency terms.
+//
+// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
+//
+// Arguments:
+//	input: A float32 tensor.
+//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
+//
+// Returns A complex64 tensor of the same rank as `input`. The inner-most 2
+//   dimensions of `input` are replaced with their 2D Fourier transform. The
+//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
+//   components.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.rfft2
+// @end_compatibility
+func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RFFT2D",
+		Input: []tf.Input{
+			input, fft_length,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl.
+type ResourceSparseApplyFtrlAttr func(optionalAttr)
+
+// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+//
+// That is for rows we have grad for, we update var, accum and linear as follows:
+// accum_new = accum + grad * grad
+// linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	lr_power: Scaling factor. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18014,23 +18604,26 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SerializeManySparse",
+		Type: "ResourceSparseApplyFtrl",
 		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
+			var_, accum, linear, grad, indices, lr, l1, l2, lr_power,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Computes inverse hyperbolic cosine of x element-wise.
-func Acosh(scope *Scope, x tf.Output) (y tf.Output) {
+// Returns which elements of x are Inf.
+//
+// @compatibility(numpy)
+// Equivalent to np.isinf
+// @end_compatibility
+func IsInf(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Acosh",
+		Type: "IsInf",
 		Input: []tf.Input{
 			x,
 		},
@@ -18039,45 +18632,156 @@
 	return op.Output(0)
 }
 
-// TensorArrayV2Attr is an optional argument to TensorArrayV2.
-type TensorArrayV2Attr func(optionalAttr)
-
-// TensorArrayV2ElementShape sets the optional element_shape attribute to value.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["element_shape"] = value
-	}
-}
-
-// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value.
-// If not specified, defaults to false
-func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["dynamic_size"] = value
-	}
-}
-
-// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value.
-// If not specified, defaults to true
-func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["clear_after_read"] = value
-	}
-}
-
-// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value.
-// If not specified, defaults to ""
-func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["tensor_array_name"] = value
-	}
-}
-
-// Deprecated. Use TensorArrayV3
+// Gather ragged slices from `params` axis `0` according to `indices`.
 //
-// DEPRECATED at GraphDef version 26: Use TensorArrayV3
-func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) {
+// Outputs a `RaggedTensor` output composed from `output_dense_values` and
+// `output_nested_splits`, such that:
+//
+// ```python
+// output.shape = indices.shape + params.shape[1:]
+// output.ragged_rank = indices.shape.ndims + params.ragged_rank
+// output[i...j, d0...dn] = params[indices[i...j], d0...dn]
+// ```
+//
+// where
+//
+// * `params =
+//    ragged.from_nested_row_splits(params_dense_values, params_nested_splits)`
+//    provides the values that should be gathered.
+// * `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which
+//    values should be gathered.
+// * `output =
+//    ragged.from_nested_row_splits(output_dense_values, output_nested_splits)`
+//    is the output tensor.
+//
+// (Note: This c++ op is used to implement the higher-level python
+// `tf.ragged.gather` op, which also supports ragged indices.)
+//
+//
+// Arguments:
+//	params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the
+// `params` RaggedTensor input.
+//	params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change
+// at the python level from dense_values to flat_values, so dense_values is the
+// deprecated name.
+//	indices: Indices in the outermost dimension of `params` of the values that should be
+// gathered.
+//	OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
+// this number of `row_splits` tensors. This value should equal
+// `indices.shape.ndims + params.ragged_rank - 1`.
+//
+// Returns The `nested_row_splits` tensors that define the row-partitioning for the
+// returned RaggedTensor.The `flat_values` for the returned RaggedTensor.
+func RaggedGather(scope *Scope, params_nested_splits []tf.Output, params_dense_values tf.Output, indices tf.Output, OUTPUT_RAGGED_RANK int64) (output_nested_splits []tf.Output, output_dense_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"OUTPUT_RAGGED_RANK": OUTPUT_RAGGED_RANK}
+	opspec := tf.OpSpec{
+		Type: "RaggedGather",
+		Input: []tf.Input{
+			tf.OutputList(params_nested_splits), params_dense_values, indices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil {
+		scope.UpdateErr("RaggedGather", err)
+		return
+	}
+	output_dense_values = op.Output(idx)
+	return output_nested_splits, output_dense_values
+}
+
+// Greedily selects a subset of bounding boxes in descending order of score,
+//
+// pruning away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system.  Note that this
+// algorithm is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
+//
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
+//
+//   selected_indices = tf.image.non_max_suppression_v2(
+//       boxes, scores, max_output_size, iou_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
+//
+// Arguments:
+//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
+//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+// boxes overlap too much with respect to IOU.
+//
+// Returns A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NonMaxSuppressionV2",
+		Input: []tf.Input{
+			boxes, scores, max_output_size, iou_threshold,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TruncatedNormalAttr is an optional argument to TruncatedNormal.
+type TruncatedNormalAttr func(optionalAttr)
+
+// TruncatedNormalSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func TruncatedNormalSeed(value int64) TruncatedNormalAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// TruncatedNormalSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func TruncatedNormalSeed2(value int64) TruncatedNormalAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Outputs random values from a truncated normal distribution.
+//
+// The generated values follow a normal distribution with mean 0 and standard
+// deviation 1, except that values whose magnitude is more than 2 standard
+// deviations from the mean are dropped and re-picked.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	dtype: The type of the output.
+//
+// Returns A tensor of the specified shape filled with random truncated normal
+// values.
+func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18086,9 +18790,9 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayV2",
+		Type: "TruncatedNormal",
 		Input: []tf.Input{
-			size,
+			shape,
 		},
 		Attrs: attrs,
 	}
@@ -18096,6 +18800,160 @@
 	return op.Output(0)
 }
 
+// StringToNumberAttr is an optional argument to StringToNumber.
+type StringToNumberAttr func(optionalAttr)
+
+// StringToNumberOutType sets the optional out_type attribute to value.
+//
+// value: The numeric type to interpret each string in `string_tensor` as.
+// If not specified, defaults to DT_FLOAT
+func StringToNumberOutType(value tf.DataType) StringToNumberAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Converts each string in the input Tensor to the specified numeric type.
+//
+// (Note that int32 overflow results in an error while float overflow
+// results in a rounded value.)
+//
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringToNumber",
+		Input: []tf.Input{
+			string_tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2.
+type ResourceApplyFtrlV2Attr func(optionalAttr)
+
+// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the Ftrl-proximal scheme.
+//
+// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+// linear += grad_with_shrinkage +
+//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regulariation. Must be a scalar.
+//	l2: L2 shrinkage regulariation. Must be a scalar.
+//
+//	lr_power: Scaling factor. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyFtrlV2",
+		Input: []tf.Input{
+			var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// SkipgramAttr is an optional argument to Skipgram.
+type SkipgramAttr func(optionalAttr)
+
+// SkipgramWindowSize sets the optional window_size attribute to value.
+//
+// value: The number of words to predict to the left and right of the target.
+// If not specified, defaults to 5
+func SkipgramWindowSize(value int64) SkipgramAttr {
+	return func(m optionalAttr) {
+		m["window_size"] = value
+	}
+}
+
+// SkipgramMinCount sets the optional min_count attribute to value.
+//
+// value: The minimum number of word occurrences for it to be included in the
+// vocabulary.
+// If not specified, defaults to 5
+func SkipgramMinCount(value int64) SkipgramAttr {
+	return func(m optionalAttr) {
+		m["min_count"] = value
+	}
+}
+
+// SkipgramSubsample sets the optional subsample attribute to value.
+//
+// value: Threshold for word occurrence. Words that appear with higher
+// frequency will be randomly down-sampled. Set to 0 to disable.
+// If not specified, defaults to 0.001
+func SkipgramSubsample(value float32) SkipgramAttr {
+	return func(m optionalAttr) {
+		m["subsample"] = value
+	}
+}
+
+// Parses a text file and creates a batch of examples.
+//
+// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result
+//
+// Arguments:
+//	filename: The corpus's text file name.
+//	batch_size: The size of produced batch.
+//
+// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids.
+func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Skipgram",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
+}
+
 // ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler.
 type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr)
 
@@ -18215,6 +19073,78 @@
 	return op.Output(0)
 }
 
+// Does nothing. Serves as a control trigger for scheduling.
+//
+// Only useful as a placeholder for control edges.
+//
+// Returns the created operation.
+func ControlTrigger(scope *Scope) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ControlTrigger",
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Deprecated. Use TensorArrayReadV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArrayReadV3
+func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayReadV2",
+		Input: []tf.Input{
+			handle, index, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Batch normalization.
+//
+// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
+//
+// This op is deprecated. Prefer `tf.nn.batch_normalization`.
+//
+// Arguments:
+//	t: A 4D input Tensor.
+//	m: A 1D mean Tensor with size matching the last dimension of t.
+// This is the first output from tf.nn.moments,
+// or a saved moving average thereof.
+//	v: A 1D variance Tensor with size matching the last dimension of t.
+// This is the second output from tf.nn.moments,
+// or a saved moving average thereof.
+//	beta: A 1D beta Tensor with size matching the last dimension of t.
+// An offset to be added to the normalized tensor.
+//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
+// If "scale_after_normalization" is true, this tensor will be multiplied
+// with the normalized tensor.
+//	variance_epsilon: A small float number to avoid dividing by 0.
+//	scale_after_normalization: A bool indicating whether the resulted tensor
+// needs to be multiplied with gamma.
+func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
+	opspec := tf.OpSpec{
+		Type: "BatchNormWithGlobalNormalization",
+		Input: []tf.Input{
+			t, m, v, beta, gamma,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // AddManySparseToTensorsMapAttr is an optional argument to AddManySparseToTensorsMap.
 type AddManySparseToTensorsMapAttr func(optionalAttr)
 
@@ -18292,6 +19222,161 @@
 	return op.Output(0)
 }
 
+// TPUReplicateMetadataAttr is an optional argument to TPUReplicateMetadata.
+type TPUReplicateMetadataAttr func(optionalAttr)
+
+// TPUReplicateMetadataNumCoresPerReplica sets the optional num_cores_per_replica attribute to value.
+//
+// value: Number of cores per replica. Used for model parallelism.
+// If not specified, defaults to 1
+func TPUReplicateMetadataNumCoresPerReplica(value int64) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["num_cores_per_replica"] = value
+	}
+}
+
+// TPUReplicateMetadataTopology sets the optional topology attribute to value.
+//
+// value: TopologyProto indicating the topology of the TPU pod slice.
+// If not specified, defaults to ""
+func TPUReplicateMetadataTopology(value string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["topology"] = value
+	}
+}
+
+// TPUReplicateMetadataUseTpu sets the optional use_tpu attribute to value.
+//
+// value: Whether to place the computation on the TPU.
+// If not specified, defaults to true
+func TPUReplicateMetadataUseTpu(value bool) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["use_tpu"] = value
+	}
+}
+
+// TPUReplicateMetadataDeviceAssignment sets the optional device_assignment attribute to value.
+//
+// value: The assignment of devices for the computation.
+// If not specified, defaults to <>
+func TPUReplicateMetadataDeviceAssignment(value []int64) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["device_assignment"] = value
+	}
+}
+
+// TPUReplicateMetadataComputationShape sets the optional computation_shape attribute to value.
+//
+// value: DEPRECATED. Use num_cores_per_replica instead.
+// If not specified, defaults to <>
+func TPUReplicateMetadataComputationShape(value []int64) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["computation_shape"] = value
+	}
+}
+
+// TPUReplicateMetadataHostComputeCore sets the optional host_compute_core attribute to value.
+// If not specified, defaults to <>
+func TPUReplicateMetadataHostComputeCore(value []string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["host_compute_core"] = value
+	}
+}
+
+// TPUReplicateMetadataPaddingMap sets the optional padding_map attribute to value.
+// If not specified, defaults to <>
+func TPUReplicateMetadataPaddingMap(value []string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["padding_map"] = value
+	}
+}
+
+// TPUReplicateMetadataStepMarkerLocation sets the optional step_marker_location attribute to value.
+// If not specified, defaults to "STEP_MARK_AT_ENTRY"
+func TPUReplicateMetadataStepMarkerLocation(value string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["step_marker_location"] = value
+	}
+}
+
+// Metadata indicaitng how the TPU computation should be replicated.
+//
+// Arguments:
+//	num_replicas: Number of replicas of the computation
+//
+// Returns the created operation.
+func TPUReplicateMetadata(scope *Scope, num_replicas int64, optional ...TPUReplicateMetadataAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_replicas": num_replicas}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TPUReplicateMetadata",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingFTRLParametersGradAccumDebug.
+type LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load FTRL embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the FTRL optimization algorithm.
+//	accumulators: Value of accumulators used in the FTRL optimization algorithm.
+//	linears: Value of linears used in the FTRL optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the FTRL optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingFTRLParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, accumulators, linears, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Concatenates tensors along one dimension.
 //
 // Arguments:
@@ -18332,171 +19417,6 @@
 	return op.Output(0)
 }
 
-// Returns immutable tensor from memory region.
-//
-// The current implementation memmaps the tensor from a file.
-//
-// Arguments:
-//	dtype: Type of the returned tensor.
-//	shape: Shape of the returned tensor.
-//	memory_region_name: Name of readonly memory region used by the tensor, see
-// NewReadOnlyMemoryRegionFromFile in tensorflow::Env.
-func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name}
-	opspec := tf.OpSpec{
-		Type: "ImmutableConst",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StringJoinAttr is an optional argument to StringJoin.
-type StringJoinAttr func(optionalAttr)
-
-// StringJoinSeparator sets the optional separator attribute to value.
-//
-// value: string, an optional join separator.
-// If not specified, defaults to ""
-func StringJoinSeparator(value string) StringJoinAttr {
-	return func(m optionalAttr) {
-		m["separator"] = value
-	}
-}
-
-// Joins the strings in the given list of string tensors into one tensor;
-//
-// with the given separator (default is an empty separator).
-//
-// Arguments:
-//	inputs: A list of string tensors.  The tensors must all have the same shape,
-// or be scalars.  Scalars may be mixed in; these will be broadcast to the shape
-// of non-scalar inputs.
-func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StringJoin",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates and returns an empty tensor list.
-//
-// All list elements must be tensors of dtype element_dtype and shape compatible
-// with element_shape.
-//
-// handle: an empty tensor list.
-// element_dtype: the type of elements in the list.
-// element_shape: a shape compatible with that of elements in the list.
-func EmptyTensorList(scope *Scope, element_shape tf.Output, max_num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "EmptyTensorList",
-		Input: []tf.Input{
-			element_shape, max_num_elements,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes softsign gradients for a softsign operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding softsign operation.
-//	features: The features passed as input to the corresponding softsign operation.
-//
-// Returns The gradients: `gradients / (1 + abs(features)) ** 2`.
-func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SoftsignGrad",
-		Input: []tf.Input{
-			gradients, features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Provides the time since epoch in seconds.
-//
-// Returns the timestamp as a `float64` for seconds since the Unix epoch.
-//
-// Note: the timestamp is computed when the op is executed, not when it is added
-// to the graph.
-func Timestamp(scope *Scope) (ts tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Timestamp",
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// VariableShapeAttr is an optional argument to VariableShape.
-type VariableShapeAttr func(optionalAttr)
-
-// VariableShapeOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func VariableShapeOutType(value tf.DataType) VariableShapeAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Returns the shape of the variable pointed to by `resource`.
-//
-// This operation returns a 1-D integer tensor representing the shape of `input`.
-//
-// For example:
-//
-// ```
-// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
-// shape(t) ==> [2, 2, 3]
-// ```
-func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "VariableShape",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // AvgPoolGradAttr is an optional argument to AvgPoolGrad.
 type AvgPoolGradAttr func(optionalAttr)
 
@@ -18589,289 +19509,46 @@
 	return op.Output(0)
 }
 
-// Computes softmax cross entropy cost and gradients to backpropagate.
-//
-// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
-// a matrix of label probabilities, but rather a single label per row
-// of features.  This label is considered to have probability 1.0 for the
-// given row.
-//
-// Inputs are the logits, not probabilities.
-//
-// Arguments:
-//	features: batch_size x num_classes matrix
-//	labels: batch_size vector with values in [0, num_classes).
-// This is the label for the given minibatch entry.
-//
-// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix).
-func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSoftmaxCrossEntropyWithLogits",
-		Input: []tf.Input{
-			features, labels,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
+// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad.
+type FractionalAvgPoolGradAttr func(optionalAttr)
 
-// Returns the truth value of NOT x element-wise.
-func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LogicalNot",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// 3D real-valued fast Fourier transform.
+// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value.
 //
-// Computes the 3-dimensional discrete Fourier transform of a real-valued signal
-// over the inner-most 3 dimensions of `input`.
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
 //
-// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the
-// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
-// of `output`: the zero-frequency term, followed by the `fft_length / 2`
-// positive-frequency terms.
+// `index  0  1  2  3  4`
 //
-// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
+// `value  20 5  16 3  7`
 //
-// Arguments:
-//	input: A float32 tensor.
-//	fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
-//
-// Returns A complex64 tensor of the same rank as `input`. The inner-most 3
-//   dimensions of `input` are replaced with the their 3D Fourier transform. The
-//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
-//   components.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.rfftn with 3 dimensions.
-// @end_compatibility
-func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RFFT3D",
-		Input: []tf.Input{
-			input, fft_length,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TensorArrayV3Attr is an optional argument to TensorArrayV3.
-type TensorArrayV3Attr func(optionalAttr)
-
-// TensorArrayV3ElementShape sets the optional element_shape attribute to value.
-//
-// value: The expected shape of an element, if known. Used to
-// validate the shapes of TensorArray elements. If this shape is not
-// fully specified, gathering zero-size TensorArrays is an error.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr {
-	return func(m optionalAttr) {
-		m["element_shape"] = value
-	}
-}
-
-// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value.
-//
-// value: A boolean that determines whether writes to the TensorArray
-// are allowed to grow the size.  By default, this is not allowed.
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [41/3, 26/3] for fractional avg pooling.
 // If not specified, defaults to false
-func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr {
+func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr {
 	return func(m optionalAttr) {
-		m["dynamic_size"] = value
+		m["overlapping"] = value
 	}
 }
 
-// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value.
+// Computes gradient of the FractionalAvgPool function.
 //
-// value: If true (default), Tensors in the TensorArray are cleared
-// after being read.  This disables multiple read semantics but allows early
-// release of memory.
-// If not specified, defaults to true
-func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr {
-	return func(m optionalAttr) {
-		m["clear_after_read"] = value
-	}
-}
-
-// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value.
-//
-// value: If true (default is false), then all
-// elements in the TensorArray will be expected to have have identical shapes.
-// This allows certain behaviors, like dynamically checking for
-// consistent shapes on write, and being able to fill in properly
-// shaped zero tensors on stack -- even if the element_shape attribute
-// is not fully defined.
-// If not specified, defaults to false
-func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr {
-	return func(m optionalAttr) {
-		m["identical_element_shapes"] = value
-	}
-}
-
-// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value.
-//
-// value: Overrides the name used for the temporary tensor_array
-// resource. Default value is the name of the 'TensorArray' op (which
-// is guaranteed unique).
-// If not specified, defaults to ""
-func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr {
-	return func(m optionalAttr) {
-		m["tensor_array_name"] = value
-	}
-}
-
-// An array of Tensors of given size.
-//
-// Write data via Write and read via Read or Pack.
+// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for
+// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of
+// out_backprop to those indices that form the same pooling cell. Therefore, we
+// just need to know the shape of original input tensor, instead of the whole
+// tensor.
 //
 // Arguments:
-//	size: The size of the array.
-//	dtype: The type of the elements on the tensor_array.
+//	orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool`
+//	out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
+// w.r.t. the output of `fractional_avg_pool`.
+//	row_pooling_sequence: row pooling sequence, form pooling region with
+// col_pooling_sequence.
+//	col_pooling_sequence: column pooling sequence, form pooling region with
+// row_pooling sequence.
 //
-// Returns The handle to the TensorArray.A scalar used to control gradient flow.
-func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayV3",
-		Input: []tf.Input{
-			size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Runs multiple additive regression ensemble predictors on input instances and
-//
-// computes the logits. It is designed to be used during prediction.
-// It traverses all the trees and calculates the final score for each instance.
-//
-// Arguments:
-//
-//	bucketized_features: A list of rank 1 Tensors containing bucket id for each
-// feature.
-//	logits_dimension: scalar, dimension of the logits, to be used for partial logits
-// shape.
-//
-// Returns Output rank 2 Tensor containing logits for each example.
-func BoostedTreesPredict(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (logits tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesPredict",
-		Input: []tf.Input{
-			tree_ensemble_handle, tf.OutputList(bucketized_features),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Elementwise computes the bitwise OR of `x` and `y`.
-//
-// The result will have those bits set, that are set in `x`, `y` or both. The
-// computation is performed on the underlying representations of `x` and `y`.
-func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BitwiseOr",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MatrixSolveLsAttr is an optional argument to MatrixSolveLs.
-type MatrixSolveLsAttr func(optionalAttr)
-
-// MatrixSolveLsFast sets the optional fast attribute to value.
-// If not specified, defaults to true
-func MatrixSolveLsFast(value bool) MatrixSolveLsAttr {
-	return func(m optionalAttr) {
-		m["fast"] = value
-	}
-}
-
-// Solves one or more linear least-squares problems.
-//
-// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same
-// type as `matrix` and shape `[..., M, K]`.
-// The output is a tensor shape `[..., N, K]` where each output matrix solves
-// each of the equations
-// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]`
-// in the least squares sense.
-//
-// We use the following notation for (complex) matrix and right-hand sides
-// in the batch:
-//
-// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\),
-// `rhs`=\\(B  \in \mathbb{C}^{m \times k}\\),
-// `output`=\\(X  \in \mathbb{C}^{n \times k}\\),
-// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\).
-//
-// If `fast` is `True`, then the solution is computed by solving the normal
-// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then
-// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares
-// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\).
-// If \\(m \lt n\\) then `output` is computed as
-// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the
-// minimum-norm solution to the under-determined linear system, i.e.
-// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\),
-// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable
-// when \\(A\\) is numerically full rank and has a condition number
-// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is
-// sufficiently large.
-//
-// If `fast` is `False` an algorithm based on the numerically robust complete
-// orthogonal decomposition is used. This computes the minimum-norm
-// least-squares solution, even when \\(A\\) is rank deficient. This path is
-// typically 6-7 times slower than the fast path. If `fast` is `False` then
-// `l2_regularizer` is ignored.
-//
-// Arguments:
-//	matrix: Shape is `[..., M, N]`.
-//	rhs: Shape is `[..., M, K]`.
-//	l2_regularizer: Scalar tensor.
-//
-// @compatibility(numpy)
-// Equivalent to np.linalg.lstsq
-// @end_compatibility
-//
-// Returns Shape is `[..., N, K]`.
-func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) {
+// Returns 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
+func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18880,9 +19557,9 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MatrixSolveLs",
+		Type: "FractionalAvgPoolGrad",
 		Input: []tf.Input{
-			matrix, rhs, l2_regularizer,
+			orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence,
 		},
 		Attrs: attrs,
 	}
@@ -18890,6 +19567,715 @@
 	return op.Output(0)
 }
 
+// StaticRegexReplaceAttr is an optional argument to StaticRegexReplace.
+type StaticRegexReplaceAttr func(optionalAttr)
+
+// StaticRegexReplaceReplaceGlobal sets the optional replace_global attribute to value.
+//
+// value: If True, the replacement is global, otherwise the replacement
+// is done only on the first match.
+// If not specified, defaults to true
+func StaticRegexReplaceReplaceGlobal(value bool) StaticRegexReplaceAttr {
+	return func(m optionalAttr) {
+		m["replace_global"] = value
+	}
+}
+
+// Replaces the match of pattern in input with rewrite.
+//
+// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+//
+// Arguments:
+//	input: The text to be processed.
+//	pattern: The regular expression to match the input.
+//	rewrite: The rewrite to be applied to the matched expression.
+//
+// Returns The text after applying pattern and rewrite.
+func StaticRegexReplace(scope *Scope, input tf.Output, pattern string, rewrite string, optional ...StaticRegexReplaceAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pattern": pattern, "rewrite": rewrite}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StaticRegexReplace",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes gradients for the exponential linear (Elu) operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Elu operation.
+//	outputs: The outputs of the corresponding Elu operation.
+//
+// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0,
+// `gradients` otherwise.
+func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "EluGrad",
+		Input: []tf.Input{
+			gradients, outputs,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Converts each string in the input Tensor to its hash mod by a number of buckets.
+//
+// The hash function is deterministic on the content of the string within the
+// process.
+//
+// Note that the hash function may change from time to time.
+// This functionality will be deprecated and it's recommended to use
+// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
+//
+// Arguments:
+//
+//	num_buckets: The number of buckets.
+//
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_buckets": num_buckets}
+	opspec := tf.OpSpec{
+		Type: "StringToHashBucket",
+		Input: []tf.Input{
+			string_tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that batches `batch_size` elements from `input_dataset`.
+//
+// Arguments:
+//
+//	batch_size: A scalar representing the number of elements to accumulate in a batch.
+//	drop_remainder: A scalar representing whether the last batch should be dropped in case its size
+// is smaller than desired.
+//
+//
+func BatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "BatchDatasetV2",
+		Input: []tf.Input{
+			input_dataset, batch_size, drop_remainder,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient of `igamma(a, x)` wrt `a`.
+func IgammaGradA(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IgammaGradA",
+		Input: []tf.Input{
+			a, x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that contains `count` elements from the `input_dataset`.
+//
+// Arguments:
+//
+//	count: A scalar representing the number of elements from the `input_dataset`
+// that should be taken. A value of `-1` indicates that all of `input_dataset`
+// is taken.
+//
+//
+func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "TakeDataset",
+		Input: []tf.Input{
+			input_dataset, count,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars.
+type FakeQuantWithMinMaxVarsAttr func(optionalAttr)
+
+// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
+//
+// and `max` to 'outputs' tensor of same shape as `inputs`.
+//
+// `[min; max]` define the clamping range for the `inputs` data.
+// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+// then de-quantized and output as floats in `[min; max]` interval.
+// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
+//
+// This operation has a gradient and thus allows for training `min` and `max`
+// values.
+func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FakeQuantWithMinMaxVars",
+		Input: []tf.Input{
+			inputs, min, max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingMomentumParametersAttr is an optional argument to RetrieveTPUEmbeddingMomentumParameters.
+type RetrieveTPUEmbeddingMomentumParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingMomentumParametersTableId(value int64) RetrieveTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMomentumParametersTableName(value string) RetrieveTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve Momentum embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the Momentum optimization algorithm.Parameter momenta updated by the Momentum optimization algorithm.
+func RetrieveTPUEmbeddingMomentumParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersAttr) (parameters tf.Output, momenta tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingMomentumParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Forwards the value of an available tensor from `inputs` to `output`.
+//
+// `Merge` waits for at least one of the tensors in `inputs` to become available.
+// It is usually combined with `Switch` to implement branching.
+//
+// `Merge` forwards the first tensor to become available to `output`, and sets
+// `value_index` to its index in `inputs`.
+//
+// Arguments:
+//	inputs: The input tensors, exactly one of which will become available.
+//
+// Returns Will be set to the available input tensor.The index of the chosen input tensor in `inputs`.
+func Merge(scope *Scope, inputs []tf.Output) (output tf.Output, value_index tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Merge",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// QueueCloseV2Attr is an optional argument to QueueCloseV2.
+type QueueCloseV2Attr func(optionalAttr)
+
+// QueueCloseV2CancelPendingEnqueues sets the optional cancel_pending_enqueues attribute to value.
+//
+// value: If true, all pending enqueue requests that are
+// blocked on the given queue will be canceled.
+// If not specified, defaults to false
+func QueueCloseV2CancelPendingEnqueues(value bool) QueueCloseV2Attr {
+	return func(m optionalAttr) {
+		m["cancel_pending_enqueues"] = value
+	}
+}
+
+// Closes the given queue.
+//
+// This operation signals that no more elements will be enqueued in the
+// given queue. Subsequent Enqueue(Many) operations will fail.
+// Subsequent Dequeue(Many) operations will continue to succeed if
+// sufficient elements remain in the queue. Subsequent Dequeue(Many)
+// operations that would block will fail immediately.
+//
+// Arguments:
+//	handle: The handle to a queue.
+//
+// Returns the created operation.
+func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueCloseV2",
+		Input: []tf.Input{
+			handle,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Writes the given dataset to the given file using the TFRecord format.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the dataset to write.
+//	filename: A scalar string tensor representing the filename to use.
+//	compression_type: A scalar string tensor containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//
+// Returns the created operation.
+func ExperimentalDatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalDatasetToTFRecord",
+		Input: []tf.Input{
+			input_dataset, filename, compression_type,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// BiasAddGradAttr is an optional argument to BiasAddGrad.
+type BiasAddGradAttr func(optionalAttr)
+
+// BiasAddGradDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the bias tensor will be added to the last dimension
+// of the value tensor.
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// The tensor will be added to "in_channels", the third-to-the-last
+//     dimension.
+// If not specified, defaults to "NHWC"
+func BiasAddGradDataFormat(value string) BiasAddGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// The backward operation for "BiasAdd" on the "bias" tensor.
+//
+// It accumulates all the values from out_backprop into the feature dimension.
+// For NHWC data format, the feature dimension is the last. For NCHW data format,
+// the feature dimension is the third-to-last.
+//
+// Arguments:
+//	out_backprop: Any number of dimensions.
+//
+// Returns 1-D with size the feature dimension of `out_backprop`.
+func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BiasAddGrad",
+		Input: []tf.Input{
+			out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reduces `input` from `num_devices` using `reduction` to a single device.
+//
+// Reduces `input` from `num_devices` using `reduction` to a single device.
+//
+// The graph should be constructed so that all inputs have a valid device
+// assignment, and the op itself is assigned one of these devices.
+//
+// input: The input to the reduction.
+// data: the value of the reduction across all `num_devices` devices.
+// reduction: the reduction operation to perform.
+func NcclReduce(scope *Scope, input []tf.Output, reduction string) (data tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"reduction": reduction}
+	opspec := tf.OpSpec{
+		Type: "NcclReduce",
+		Input: []tf.Input{
+			tf.OutputList(input),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient of morphological 2-D dilation with respect to the input.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
+//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
+//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
+// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
+//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
+// Must be: `[1, rate_height, rate_width, 1]`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape `[batch, in_height, in_width, depth]`.
+func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "Dilation2DBackpropInput",
+		Input: []tf.Input{
+			input, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// An Op to sum inputs across replicated TPU instances.
+//
+// Each instance supplies its own input.
+//
+// For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`.
+// Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0,
+// and `B, D, F, H` as group 1. Thus we get the outputs:
+// `[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`.
+//
+// Arguments:
+//	input: The local input to the sum.
+//	group_assignment: An int32 tensor with shape
+// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
+// replica ids in the ith subgroup.
+//
+// Returns The sum of all the distributed inputs.
+func CrossReplicaSum(scope *Scope, input tf.Output, group_assignment tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "CrossReplicaSum",
+		Input: []tf.Input{
+			input, group_assignment,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum.
+type ResourceSparseApplyMomentumAttr func(optionalAttr)
+
+// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, the tensor passed to compute grad will be
+// var - lr * momentum * accum, so in the end, the var you get is actually
+// var - lr * momentum * accum.
+// If not specified, defaults to false
+func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
+//
+// Set use_nesterov = True if you want to use Nesterov momentum.
+//
+// That is for rows we have grad for, we update var and accum as follows:
+//
+// accum = accum * momentum + grad
+// var -= lr * accum
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	momentum: Momentum. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyMomentum",
+		Input: []tf.Input{
+			var_, accum, lr, grad, indices, momentum,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// An Op to permute tensors across replicated TPU instances.
+//
+// Each instance supplies its own input.
+//
+// For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing
+// source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs:
+// `[D, A, B, C]`.
+//
+// Arguments:
+//	input: The local input to be permuted. Currently only supports float and
+// bfloat16.
+//	source_target_pairs: A tensor with shape [num_pairs, 2].
+//
+// Returns The permuted input.
+func CollectivePermute(scope *Scope, input tf.Output, source_target_pairs tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "CollectivePermute",
+		Input: []tf.Input{
+			input, source_target_pairs,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the complex conjugate of a complex number.
+//
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// complex numbers that are the complex conjugate of each element in `input`. The
+// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
+// real part and *b* is the imaginary part.
+//
+// The complex conjugate returned by this operation is of the form \\(a - bj\\).
+//
+// For example:
+//
+// ```
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
+// ```
+func Conj(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Conj",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingCenteredRMSPropParameters.
+type RetrieveTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingCenteredRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingCenteredRMSPropParametersTableName(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve centered RMSProp embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the centered RMSProp optimization algorithm.Parameter ms updated by the centered RMSProp optimization algorithm.Parameter mom updated by the centered RMSProp optimization algorithm.Parameter mg updated by the centered RMSProp optimization algorithm.
+func RetrieveTPUEmbeddingCenteredRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingCenteredRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingCenteredRMSPropParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// StringSplitAttr is an optional argument to StringSplit.
+type StringSplitAttr func(optionalAttr)
+
+// StringSplitSkipEmpty sets the optional skip_empty attribute to value.
+//
+// value: A `bool`. If `True`, skip the empty strings from the result.
+// If not specified, defaults to true
+func StringSplitSkipEmpty(value bool) StringSplitAttr {
+	return func(m optionalAttr) {
+		m["skip_empty"] = value
+	}
+}
+
+// Split elements of `input` based on `delimiter` into a `SparseTensor`.
+//
+// Let N be the size of source (typically N will be the batch size). Split each
+// element of `input` based on `delimiter` and return a `SparseTensor`
+// containing the splitted tokens. Empty tokens are ignored.
+//
+// `delimiter` can be empty, or a string of split characters. If `delimiter` is an
+//  empty string, each element of `input` is split into individual single-byte
+//  character strings, including splitting of UTF-8 multibyte sequences. Otherwise
+//  every character of `delimiter` is a potential split point.
+//
+// For example:
+//   N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output
+//   will be
+//
+//   indices = [0, 0;
+//              0, 1;
+//              1, 0;
+//              1, 1;
+//              1, 2]
+//   shape = [2, 3]
+//   values = ['hello', 'world', 'a', 'b', 'c']
+//
+// Arguments:
+//	input: 1-D. Strings to split.
+//	delimiter: 0-D. Delimiter characters (bytes), or empty string.
+//
+// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse
+// tensor, where the first value is N and the second value is the maximum number
+// of tokens in a single input entry.
+func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringSplit",
+		Input: []tf.Input{
+			input, delimiter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
 // MaxPool3DAttr is an optional argument to MaxPool3D.
 type MaxPool3DAttr func(optionalAttr)
 
@@ -18937,182 +20323,69 @@
 	return op.Output(0)
 }
 
-// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput.
-type Conv3DBackpropInputAttr func(optionalAttr)
-
-// Conv3DBackpropInputDilations sets the optional dilations attribute to value.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
-func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of 3-D convolution with respect to the input.
+// Convert JSON-encoded Example records to binary protocol buffer strings.
 //
-// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2
+// This op translates a tensor containing Example records, encoded using
+// the [standard JSON
+// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json),
+// into a tensor containing the same records encoded as binary protocol
+// buffers. The resulting tensor can then be fed to any of the other
+// Example-parsing ops.
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) {
+//	json_examples: Each string is a JSON object serialized according to the JSON
+// mapping of the Example proto.
+//
+// Returns Each string is a binary Example protocol buffer corresponding
+// to the respective element of `json_examples`.
+func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropInput",
+		Type: "DecodeJSONExample",
 		Input: []tf.Input{
-			input, filter, out_backprop,
+			json_examples,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter.
-type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr)
+// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2.
+type QueueEnqueueManyV2Attr func(optionalAttr)
 
-// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value.
+// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr {
+// value: If the queue is too full, this operation will block for up
+// to timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["timeout_ms"] = value
 	}
 }
 
-// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value.
+// Enqueues zero or more tuples of one or more tensors in the given queue.
 //
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of depthwise convolution with respect to the filter.
+// This operation slices each component tensor along the 0th dimension to
+// make multiple queue elements. All of the tuple components must have the
+// same size in the 0th dimension.
+//
+// The components input has k elements, which correspond to the components of
+// tuples stored in the given queue.
+//
+// N.B. If the queue is full, this operation will block until the given
+// elements have been enqueued (or 'timeout_ms' elapses, if specified).
 //
 // Arguments:
-//	input: 4-D with shape based on `data_format`.  For example, if
-// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height,
-// in_width, in_channels]` tensor.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 4-D
-// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor.
-//	out_backprop: 4-D with shape  based on `data_format`.
-// For example, if `data_format` is 'NHWC' then
-// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution.
-//	padding: The type of padding algorithm to use.
+//	handle: The handle to a queue.
+//	components: One or more tensors from which the enqueued tensors should
+// be taken.
 //
-// Returns 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
-// the `filter` input of the convolution.
-func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNativeBackpropFilter",
-		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Converts each string in the input Tensor to its hash mod by a number of buckets.
-//
-// The hash function is deterministic on the content of the string within the
-// process. The hash function is a keyed hash function, where attribute `key`
-// defines the key of the hash function. `key` is an array of 2 elements.
-//
-// A strong hash is important when inputs may be malicious, e.g. URLs with
-// additional components. Adversaries could try to make their inputs hash to the
-// same bucket for a denial-of-service attack or to skew the results. A strong
-// hash prevents this by making it difficult, if not infeasible, to compute inputs
-// that hash to the same bucket. This comes at a cost of roughly 4x higher compute
-// time than `tf.string_to_hash_bucket_fast`.
-//
-// Arguments:
-//	input: The strings to assign a hash bucket.
-//	num_buckets: The number of buckets.
-//	key: The key for the keyed hash function passed as a list of two uint64
-// elements.
-//
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key}
-	opspec := tf.OpSpec{
-		Type: "StringToHashBucketStrong",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StringLengthAttr is an optional argument to StringLength.
-type StringLengthAttr func(optionalAttr)
-
-// StringLengthUnit sets the optional unit attribute to value.
-//
-// value: The unit that is counted to compute string length.  One of: `"BYTE"` (for
-// the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8
-// encoded Unicode code points in each string).  Results are undefined
-// if `unit=UTF8_CHAR` and the `input` strings do not contain structurally
-// valid UTF-8.
-// If not specified, defaults to "BYTE"
-func StringLengthUnit(value string) StringLengthAttr {
-	return func(m optionalAttr) {
-		m["unit"] = value
-	}
-}
-
-// String lengths of `input`.
-//
-// Computes the length of each string given in the input tensor.
-//
-// Arguments:
-//	input: The string for which to compute the length.
-//
-// Returns Integer tensor that has the same shape as `input`. The output contains the
-// element-wise string lengths of `input`.
-func StringLength(scope *Scope, input tf.Output, optional ...StringLengthAttr) (output tf.Output) {
+// Returns the created operation.
+func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -19121,12 +20394,219 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StringLength",
+		Type: "QueueEnqueueManyV2",
+		Input: []tf.Input{
+			handle, tf.OutputList(components),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// PrintV2Attr is an optional argument to PrintV2.
+type PrintV2Attr func(optionalAttr)
+
+// PrintV2OutputStream sets the optional output_stream attribute to value.
+//
+// value: A string specifying the output stream or logging level to print to.
+// If not specified, defaults to "stderr"
+func PrintV2OutputStream(value string) PrintV2Attr {
+	return func(m optionalAttr) {
+		m["output_stream"] = value
+	}
+}
+
+// Prints a string scalar.
+//
+// Prints a string scalar to the desired output_stream.
+//
+// Arguments:
+//	input: The string scalar to print.
+//
+// Returns the created operation.
+func PrintV2(scope *Scope, input tf.Output, optional ...PrintV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PrintV2",
 		Input: []tf.Input{
 			input,
 		},
 		Attrs: attrs,
 	}
+	return scope.AddOperation(opspec)
+}
+
+// The gradient operator for the SparseSlice op.
+//
+// This op takes in the upstream gradient w.r.t. non-empty values of
+// the sliced `SparseTensor`, and outputs the gradients w.r.t.
+// the non-empty values of input `SparseTensor`.
+//
+// Arguments:
+//	backprop_val_grad: 1-D. The gradient with respect to
+// the non-empty values of the sliced `SparseTensor`.
+//	input_indices: 2-D.  The `indices` of the input `SparseTensor`.
+//	input_start: 1-D. tensor represents the start of the slice.
+//	output_indices: 2-D.  The `indices` of the sliced `SparseTensor`.
+//
+// Returns 1-D. The gradient with respect to the non-empty values of input `SparseTensor`.
+func SparseSliceGrad(scope *Scope, backprop_val_grad tf.Output, input_indices tf.Output, input_start tf.Output, output_indices tf.Output) (val_grad tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSliceGrad",
+		Input: []tf.Input{
+			backprop_val_grad, input_indices, input_start, output_indices,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset by applying optimizations to `input_dataset`.
+//
+// Creates a dataset by applying optimizations to `input_dataset`.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the input dataset.
+//	optimizations: A `tf.string` vector `tf.Tensor` identifying optimizations to use.
+//
+//
+func OptimizeDataset(scope *Scope, input_dataset tf.Output, optimizations tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "OptimizeDataset",
+		Input: []tf.Input{
+			input_dataset, optimizations,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad.
+type ResourceApplyProximalAdagradAttr func(optionalAttr)
+
+// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
+//
+// accum += grad * grad
+// prox_v = var - lr * grad * (1 / sqrt(accum))
+// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyProximalAdagrad",
+		Input: []tf.Input{
+			var_, accum, lr, l1, l2, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2.
+type MutableHashTableOfTensorsV2Attr func(optionalAttr)
+
+// MutableHashTableOfTensorsV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this table is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this table is shared under the given name across
+// multiple sessions.
+// If not specified, defaults to ""
+func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
+// If not specified, defaults to false
+func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr {
+	return func(m optionalAttr) {
+		m["use_node_name_sharing"] = value
+	}
+}
+
+// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value.
+// If not specified, defaults to <>
+func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr {
+	return func(m optionalAttr) {
+		m["value_shape"] = value
+	}
+}
+
+// Creates an empty hash table.
+//
+// This op creates a mutable hash table, specifying the type of its keys and
+// values. Each value must be a vector. Data can be inserted into the table using
+// the insert operations. It does not support the initialization operation.
+//
+// Arguments:
+//	key_dtype: Type of the table keys.
+//	value_dtype: Type of the table values.
+//
+// Returns Handle to a table.
+func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MutableHashTableOfTensorsV2",
+
+		Attrs: attrs,
+	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
@@ -19195,89 +20675,128 @@
 	return op.Output(0)
 }
 
-// UnicodeDecodeWithOffsetsAttr is an optional argument to UnicodeDecodeWithOffsets.
-type UnicodeDecodeWithOffsetsAttr func(optionalAttr)
-
-// UnicodeDecodeWithOffsetsErrors sets the optional errors attribute to value.
+// Subtracts a value from the current value of a variable.
 //
-// value: Error handling policy when there is invalid formatting found in the input.
-// The value of 'strict' will cause the operation to produce a InvalidArgument
-// error on any invalid input formatting. A value of 'replace' (the default) will
-// cause the operation to replace any invalid formatting in the input with the
-// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
-// skip any invalid formatting in the input and produce no corresponding output
-// character.
-// If not specified, defaults to "replace"
-func UnicodeDecodeWithOffsetsErrors(value string) UnicodeDecodeWithOffsetsAttr {
-	return func(m optionalAttr) {
-		m["errors"] = value
-	}
-}
-
-// UnicodeDecodeWithOffsetsReplacementChar sets the optional replacement_char attribute to value.
-//
-// value: The replacement character codepoint to be used in place of any invalid
-// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
-// be used. The default value is the default unicode replacement character is
-// 0xFFFD or U+65533.)
-// If not specified, defaults to 65533
-func UnicodeDecodeWithOffsetsReplacementChar(value int64) UnicodeDecodeWithOffsetsAttr {
-	return func(m optionalAttr) {
-		m["replacement_char"] = value
-	}
-}
-
-// UnicodeDecodeWithOffsetsReplaceControlCharacters sets the optional replace_control_characters attribute to value.
-//
-// value: Whether to replace the C0 control characters (00-1F) with the
-// `replacement_char`. Default is false.
-// If not specified, defaults to false
-func UnicodeDecodeWithOffsetsReplaceControlCharacters(value bool) UnicodeDecodeWithOffsetsAttr {
-	return func(m optionalAttr) {
-		m["replace_control_characters"] = value
-	}
-}
-
-// Decodes each string in `input` into a sequence of Unicode code points.
-//
-// The character codepoints for all strings are returned using a single vector
-// `char_values`, with strings expanded to characters in row-major order.
-// Similarly, the character start byte offsets are returned using a single vector
-// `char_to_byte_starts`, with strings expanded in row-major order.
-//
-// The `row_splits` tensor indicates where the codepoints and start offsets for
-// each input string begin and end within the `char_values` and
-// `char_to_byte_starts` tensors.  In particular, the values for the `i`th
-// string (in row-major order) are stored in the slice
-// `[row_splits[i]:row_splits[i+1]]`. Thus:
-//
-// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
-//   character in the `i`th string (in row-major order).
-// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th
-//   character in the `i`th string (in row-major order).
-// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
-//   string (in row-major order).
+// Any ReadVariableOp with a control dependency on this op is guaranteed to
+// see the decremented value or a subsequent newer one.
 //
 // Arguments:
-//	input: The text to be decoded. Can have any shape. Note that the output is flattened
-// to a vector of char values.
-//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
-// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+//	resource: handle to the resource in which to store the variable.
+//	value: the value by which the variable will be incremented.
 //
-// Returns A 1D int32 tensor containing the row splits.A 1D int32 Tensor containing the decoded codepoints.A 1D int32 Tensor containing the byte index in the input string where each
-// character in `char_values` starts.
-func UnicodeDecodeWithOffsets(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeWithOffsetsAttr) (row_splits tf.Output, char_values tf.Output, char_to_byte_starts tf.Output) {
+// Returns the created operation.
+func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"input_encoding": input_encoding}
+	opspec := tf.OpSpec{
+		Type: "AssignSubVariableOp",
+		Input: []tf.Input{
+			resource, value,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// RestoreAttr is an optional argument to Restore.
+type RestoreAttr func(optionalAttr)
+
+// RestorePreferredShard sets the optional preferred_shard attribute to value.
+//
+// value: Index of file to open first if multiple files match
+// `file_pattern`.
+// If not specified, defaults to -1
+func RestorePreferredShard(value int64) RestoreAttr {
+	return func(m optionalAttr) {
+		m["preferred_shard"] = value
+	}
+}
+
+// Restores a tensor from checkpoint files.
+//
+// Reads a tensor stored in one or several files. If there are several files (for
+// instance because a tensor was saved as slices), `file_pattern` may contain
+// wildcard symbols (`*` and `?`) in the filename portion only, not in the
+// directory portion.
+//
+// If a `file_pattern` matches several files, `preferred_shard` can be used to hint
+// in which file the requested tensor is likely to be found. This op will first
+// open the file at index `preferred_shard` in the list of matching files and try
+// to restore tensors from that file.  Only if some tensors or tensor slices are
+// not found in that first file, then the Op opens all the files. Setting
+// `preferred_shard` to match the value passed as the `shard` input
+// of a matching `Save` Op may speed up Restore.  This attribute only affects
+// performance, not correctness.  The default value -1 means files are processed in
+// order.
+//
+// See also `RestoreSlice`.
+//
+// Arguments:
+//	file_pattern: Must have a single element. The pattern of the files from
+// which we read the tensor.
+//	tensor_name: Must have a single element. The name of the tensor to be
+// restored.
+//	dt: The type of the tensor to be restored.
+//
+// Returns The restored tensor.
+func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dt": dt}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "UnicodeDecodeWithOffsets",
+		Type: "Restore",
 		Input: []tf.Input{
-			input,
+			file_pattern, tensor_name,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear.
+type QuantizedResizeBilinearAttr func(optionalAttr)
+
+// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
+// If not specified, defaults to false
+func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// Resize quantized `images` to `size` using quantized bilinear interpolation.
+//
+// Input images and output images must be quantized types.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
+//
+//
+//
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedResizeBilinear",
+		Input: []tf.Input{
+			images, size, min, max,
 		},
 		Attrs: attrs,
 	}
@@ -19285,42 +20804,329 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Returns x - y element-wise.
+// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
 //
-// *NOTE*: `Subtract` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Arguments:
+//
+//	num_threads: Identifies the number of threads to use for the private threadpool.
+//
+//
+func ExperimentalPrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, num_threads tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalPrivateThreadPoolDataset",
+		Input: []tf.Input{
+			input_dataset, num_threads,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation.
+type DenseToSparseSetOperationAttr func(optionalAttr)
+
+// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Applies set operation along last dimension of `Tensor` and `SparseTensor`.
+//
+// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+//
+// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
+// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
+//
+// If `validate_indices` is `True`, this op validates the order and range of `set2`
+// indices.
+//
+// Output `result` is a `SparseTensor` represented by `result_indices`,
+// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+// dimension contains the result of `set_operation` applied to the corresponding
+// `[0...n-1]` dimension of `set`.
+//
+// Arguments:
+//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
+// Dimension `n` contains values in a set, duplicates are allowed but ignored.
+//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
+// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the
+// max set size across `n-1` dimensions.
+//
+//
+// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+// is the max result set size across all `0...n-1` dimensions.
+func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"set_operation": set_operation}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DenseToSparseSetOperation",
+		Input: []tf.Input{
+			set1, set2_indices, set2_values, set2_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// L2 Loss.
+//
+// Computes half the L2 norm of a tensor without the `sqrt`:
+//
+//     output = sum(t ** 2) / 2
+//
+// Arguments:
+//	t: Typically 2-D, but may have any dimensions.
+//
+// Returns 0-D.
+func L2Loss(scope *Scope, t tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Sub",
+		Type: "L2Loss",
 		Input: []tf.Input{
-			x, y,
+			t,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the max of x and y (i.e. x > y ? x : y) element-wise.
+// StackV2Attr is an optional argument to StackV2.
+type StackV2Attr func(optionalAttr)
+
+// StackV2StackName sets the optional stack_name attribute to value.
 //
-// *NOTE*: `Maximum` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// value: Overrides the name used for the temporary stack resource. Default
+// value is the name of the 'Stack' op (which is guaranteed unique).
+// If not specified, defaults to ""
+func StackV2StackName(value string) StackV2Attr {
+	return func(m optionalAttr) {
+		m["stack_name"] = value
+	}
+}
+
+// A stack that produces elements in first-in last-out order.
+//
+// Arguments:
+//	max_size: The maximum size of the stack if non-negative. If negative, the stack
+// size is unlimited.
+//	elem_type: The type of the elements on the stack.
+//
+// Returns The handle to the stack.
+func StackV2(scope *Scope, max_size tf.Output, elem_type tf.DataType, optional ...StackV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"elem_type": elem_type}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Maximum",
+		Type: "StackV2",
 		Input: []tf.Input{
-			x, y,
+			max_size,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
+// CudnnRNNBackpropAttr is an optional argument to CudnnRNNBackprop.
+type CudnnRNNBackpropAttr func(optionalAttr)
+
+// CudnnRNNBackpropRnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNBackpropRnnMode(value string) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNBackpropInputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNBackpropInputMode(value string) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNBackpropDirection sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNBackpropDirection(value string) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNBackpropDropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNBackpropDropout(value float32) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNBackpropSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNBackpropSeed(value int64) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNBackpropSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNBackpropSeed2(value int64) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Backprop step of CudnnRNN.
+//
+// Compute the backprop of both data and weights in a RNN.
+//
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//     the actual computation before the first layer. 'skip_input' is only allowed
+//     when input_size == num_units; 'auto_select' implies 'skip_input' when
+//     input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used. Should be
+//   "unidirectional" or "bidirectional".
+// dropout: Dropout probability. When set to 0., dropout is disabled.
+// seed: The 1st part of a seed to initialize dropout.
+// seed2: The 2nd part of a seed to initialize dropout.
+// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
+// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
+//     num_units].
+// input_c: For LSTM, a 3-D tensor with the shape of
+//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
+// params: A 1-D tensor that contains the weights and biases in an opaque layout.
+//     The size must be created through CudnnRNNParamsSize, and initialized
+//     separately. Note that they might not be compatible across different
+//     generations. So it is a good idea to save and restore
+// output: A 3-D tensor with the shape of [seq_length, batch_size,
+//     dir * num_units].
+// output_h: The same shape has input_h.
+// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+// output_backprop: A 3-D tensor with the same shape as output in the forward pass.
+// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward
+//     pass.
+// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward
+//     pass.
+// reserve_space: The same reserve_space produced in for forward operation.
+// input_backprop: The backprop to input in the forward pass. Has the same shape
+//     as input.
+// input_h_backprop: The backprop to input_h in the forward pass. Has the same
+//     shape as input_h.
+// input_c_backprop: The backprop to input_c in the forward pass. Has the same
+//     shape as input_c.
+// params_backprop: The backprop to the params buffer in the forward pass. Has the
+//     same shape as params.
+func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, optional ...CudnnRNNBackpropAttr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNBackprop",
+		Input: []tf.Input{
+			input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// InfeedEnqueueAttr is an optional argument to InfeedEnqueue.
+type InfeedEnqueueAttr func(optionalAttr)
+
+// InfeedEnqueueShape sets the optional shape attribute to value.
+//
+// value: The shape of the tensor.
+// If not specified, defaults to <>
+func InfeedEnqueueShape(value tf.Shape) InfeedEnqueueAttr {
+	return func(m optionalAttr) {
+		m["shape"] = value
+	}
+}
+
+// InfeedEnqueueLayout sets the optional layout attribute to value.
+//
+// value: A vector holding the requested layout in minor-to-major sequence.
+// If a layout attribute is passed, but its values are all -1, the layout will
+// be computed by the infeed operation.
+// If not specified, defaults to <>
+func InfeedEnqueueLayout(value []int64) InfeedEnqueueAttr {
+	return func(m optionalAttr) {
+		m["layout"] = value
+	}
+}
+
+// InfeedEnqueueDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op
+// is running on a TPU device, and >= 0 when the Op is running on the CPU
+// device.
+// If not specified, defaults to -1
+func InfeedEnqueueDeviceOrdinal(value int64) InfeedEnqueueAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// An op which feeds a single Tensor value into the computation.
+//
+// Arguments:
+//	input: A tensor that will be provided using the infeed mechanism.
+//
+// Returns the created operation.
+func InfeedEnqueue(scope *Scope, input tf.Output, optional ...InfeedEnqueueAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "InfeedEnqueue",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Computes softmax cross entropy cost and gradients to backpropagate.
 //
 // Inputs are the logits, not probabilities.
@@ -19422,183 +21228,6 @@
 	return op.Output(0)
 }
 
-// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation.
-type DenseToSparseSetOperationAttr func(optionalAttr)
-
-// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Applies set operation along last dimension of `Tensor` and `SparseTensor`.
-//
-// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
-//
-// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
-// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
-//
-// If `validate_indices` is `True`, this op validates the order and range of `set2`
-// indices.
-//
-// Output `result` is a `SparseTensor` represented by `result_indices`,
-// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-// dimension contains the result of `set_operation` applied to the corresponding
-// `[0...n-1]` dimension of `set`.
-//
-// Arguments:
-//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
-// Dimension `n` contains values in a set, duplicates are allowed but ignored.
-//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
-// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the
-// max set size across `n-1` dimensions.
-//
-//
-// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-// is the max result set size across all `0...n-1` dimensions.
-func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"set_operation": set_operation}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DenseToSparseSetOperation",
-		Input: []tf.Input{
-			set1, set2_indices, set2_values, set2_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// L2 Loss.
-//
-// Computes half the L2 norm of a tensor without the `sqrt`:
-//
-//     output = sum(t ** 2) / 2
-//
-// Arguments:
-//	t: Typically 2-D, but may have any dimensions.
-//
-// Returns 0-D.
-func L2Loss(scope *Scope, t tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "L2Loss",
-		Input: []tf.Input{
-			t,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes cos of x element-wise.
-func Cos(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Cos",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad.
-type FusedBatchNormGradAttr func(optionalAttr)
-
-// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value.
-//
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
-	}
-}
-
-// FusedBatchNormGradDataFormat sets the optional data_format attribute to value.
-//
-// value: The data format for y_backprop, x, x_backprop.
-// Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// FusedBatchNormGradIsTraining sets the optional is_training attribute to value.
-//
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// Gradient for batch normalization.
-//
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
-//
-// Arguments:
-//	y_backprop: A 4D Tensor for the gradient with respect to y.
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
-// mean to be reused in gradient computation. When is_training is
-// False, a 1D Tensor for the population mean to be reused in both
-// 1st and 2nd order gradient computation.
-//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
-// variance (inverted variance in the cuDNN case) to be reused in
-// gradient computation. When is_training is False, a 1D Tensor
-// for the population variance to be reused in both 1st and 2nd
-// order gradient computation.
-//
-// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
-// in FusedBatchNorm.
-func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FusedBatchNormGrad",
-		Input: []tf.Input{
-			y_backprop, x, scale, reserve_space_1, reserve_space_2,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
 // TopKAttr is an optional argument to TopK.
 type TopKAttr func(optionalAttr)
 
@@ -19655,72 +21284,242 @@
 	return op.Output(0), op.Output(1)
 }
 
-// Converts each string in the input Tensor to its hash mod by a number of buckets.
+// BatchToSpace for N-D tensors of type T.
 //
-// The hash function is deterministic on the content of the string within the
-// process.
-//
-// Note that the hash function may change from time to time.
-// This functionality will be deprecated and it's recommended to use
-// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
+// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape
+// `block_shape + [batch]`, interleaves these blocks back into the grid defined by
+// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as
+// the input.  The spatial dimensions of this intermediate result are then
+// optionally cropped according to `crops` to produce the output.  This is the
+// reverse of SpaceToBatch.  See below for a precise description.
 //
 // Arguments:
+//	input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
+// where spatial_shape has M dimensions.
+//	block_shape: 1-D with shape `[M]`, all values must be >= 1.
+//	crops: 2-D with shape `[M, 2]`, all values must be >= 0.
+//   `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input
+//   dimension `i + 1`, which corresponds to spatial dimension `i`.  It is
+//   required that
+//   `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`.
 //
-//	num_buckets: The number of buckets.
+// This operation is equivalent to the following steps:
 //
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) {
+// 1. Reshape `input` to `reshaped` of shape:
+//      [block_shape[0], ..., block_shape[M-1],
+//       batch / prod(block_shape),
+//       input_shape[1], ..., input_shape[N-1]]
+//
+// 2. Permute dimensions of `reshaped` to produce `permuted` of shape
+//      [batch / prod(block_shape),
+//
+//       input_shape[1], block_shape[0],
+//       ...,
+//       input_shape[M], block_shape[M-1],
+//
+//       input_shape[M+1], ..., input_shape[N-1]]
+//
+// 3. Reshape `permuted` to produce `reshaped_permuted` of shape
+//      [batch / prod(block_shape),
+//
+//       input_shape[1] * block_shape[0],
+//       ...,
+//       input_shape[M] * block_shape[M-1],
+//
+//       input_shape[M+1],
+//       ...,
+//       input_shape[N-1]]
+//
+// 4. Crop the start and end of dimensions `[1, ..., M]` of
+//    `reshaped_permuted` according to `crops` to produce the output of shape:
+//      [batch / prod(block_shape),
+//
+//       input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1],
+//       ...,
+//       input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1],
+//
+//       input_shape[M+1], ..., input_shape[N-1]]
+//
+// Some examples:
+//
+// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and
+//     `crops = [[0, 0], [0, 0]]`:
+//
+// ```
+// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+// ```
+//
+// The output tensor has shape `[1, 2, 2, 1]` and value:
+//
+// ```
+// x = [[[[1], [2]], [[3], [4]]]]
+// ```
+//
+// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and
+//     `crops = [[0, 0], [0, 0]]`:
+//
+// ```
+// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+// ```
+//
+// The output tensor has shape `[1, 2, 2, 3]` and value:
+//
+// ```
+// x = [[[[1, 2, 3], [4, 5, 6]],
+//       [[7, 8, 9], [10, 11, 12]]]]
+// ```
+//
+// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and
+//     `crops = [[0, 0], [0, 0]]`:
+//
+// ```
+// x = [[[[1], [3]], [[9], [11]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
+//
+// The output tensor has shape `[1, 4, 4, 1]` and value:
+//
+// ```
+// x = [[[1],   [2],  [3],  [4]],
+//      [[5],   [6],  [7],  [8]],
+//      [[9],  [10], [11],  [12]],
+//      [[13], [14], [15],  [16]]]
+// ```
+//
+// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and
+//     `crops = [[0, 0], [2, 0]]`:
+//
+// ```
+// x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
+//      [[[0], [2], [4]]], [[[0], [10], [12]]],
+//      [[[0], [5], [7]]], [[[0], [13], [15]]],
+//      [[[0], [6], [8]]], [[[0], [14], [16]]]]
+// ```
+//
+// The output tensor has shape `[2, 2, 4, 1]` and value:
+//
+// ```
+// x = [[[[1],   [2],  [3],  [4]],
+//       [[5],   [6],  [7],  [8]]],
+//      [[[9],  [10], [11],  [12]],
+//       [[13], [14], [15],  [16]]]]
+// ```
+func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_buckets": num_buckets}
 	opspec := tf.OpSpec{
-		Type: "StringToHashBucket",
+		Type: "BatchToSpaceND",
 		Input: []tf.Input{
-			string_tensor,
+			input, block_shape, crops,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// StaticRegexReplaceAttr is an optional argument to StaticRegexReplace.
-type StaticRegexReplaceAttr func(optionalAttr)
+// UnpackAttr is an optional argument to Unpack.
+type UnpackAttr func(optionalAttr)
 
-// StaticRegexReplaceReplaceGlobal sets the optional replace_global attribute to value.
+// UnpackAxis sets the optional axis attribute to value.
 //
-// value: If True, the replacement is global, otherwise the replacement
-// is done only on the first match.
-// If not specified, defaults to true
-func StaticRegexReplaceReplaceGlobal(value bool) StaticRegexReplaceAttr {
+// value: Dimension along which to unpack.  Negative values wrap around, so the
+// valid range is `[-R, R)`.
+// If not specified, defaults to 0
+func UnpackAxis(value int64) UnpackAttr {
 	return func(m optionalAttr) {
-		m["replace_global"] = value
+		m["axis"] = value
 	}
 }
 
-// Replaces the match of pattern in input with rewrite.
+// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors.
 //
-// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.
+// For example, given a tensor of shape `(A, B, C, D)`;
+//
+// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]`
+//   and each tensor in `output` will have shape `(B, C, D)`. (Note that the
+//   dimension unpacked along is gone, unlike `split`).
+//
+// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]`
+//   and each tensor in `output` will have shape `(A, C, D)`.
+// Etc.
+//
+// This is the opposite of `pack`.
 //
 // Arguments:
-//	input: The text to be processed.
-//	pattern: The regular expression to match the input.
-//	rewrite: The rewrite to be applied to the matched expression.
+//	value: 1-D or higher, with `axis` dimension size equal to `num`.
 //
-// Returns The text after applying pattern and rewrite.
-func StaticRegexReplace(scope *Scope, input tf.Output, pattern string, rewrite string, optional ...StaticRegexReplaceAttr) (output tf.Output) {
+//
+// Returns The list of tensors unpacked from `value`.
+func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"pattern": pattern, "rewrite": rewrite}
+	attrs := map[string]interface{}{"num": num}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StaticRegexReplace",
+		Type: "Unpack",
 		Input: []tf.Input{
-			input,
+			value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("Unpack", err)
+		return
+	}
+	return output
+}
+
+// Delete the stack from its resource container.
+//
+// Arguments:
+//	handle: The handle to a stack.
+//
+// Returns the created operation.
+func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "StackCloseV2",
+		Input: []tf.Input{
+			handle,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Increments variable pointed to by 'resource' until it reaches 'limit'.
+//
+// Arguments:
+//	resource: Should be from a scalar `Variable` node.
+//	limit: If incrementing ref would bring it above limit, instead generates an
+// 'OutOfRange' error.
+//
+//
+// Returns A copy of the input before increment. If nothing else modifies the
+// input, the values produced will all be distinct.
+func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"limit": limit, "T": T}
+	opspec := tf.OpSpec{
+		Type: "ResourceCountUpTo",
+		Input: []tf.Input{
+			resource,
 		},
 		Attrs: attrs,
 	}
@@ -19728,61 +21527,100 @@
 	return op.Output(0)
 }
 
-// Computes gradients for the exponential linear (Elu) operation.
+// Computes softsign gradients for a softsign operation.
 //
 // Arguments:
-//	gradients: The backpropagated gradients to the corresponding Elu operation.
-//	outputs: The outputs of the corresponding Elu operation.
+//	gradients: The backpropagated gradients to the corresponding softsign operation.
+//	features: The features passed as input to the corresponding softsign operation.
 //
-// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0,
-// `gradients` otherwise.
-func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
+// Returns The gradients: `gradients / (1 + abs(features)) ** 2`.
+func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "EluGrad",
+		Type: "SoftsignGrad",
 		Input: []tf.Input{
-			gradients, outputs,
+			gradients, features,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the gradient of `igamma(a, x)` wrt `a`.
-func IgammaGradA(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+// Provides the time since epoch in seconds.
+//
+// Returns the timestamp as a `float64` for seconds since the Unix epoch.
+//
+// Note: the timestamp is computed when the op is executed, not when it is added
+// to the graph.
+func Timestamp(scope *Scope) (ts tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IgammaGradA",
-		Input: []tf.Input{
-			a, x,
-		},
+		Type: "Timestamp",
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Creates a dataset that contains `count` elements from the `input_dataset`.
+// Returns immutable tensor from memory region.
+//
+// The current implementation memmaps the tensor from a file.
 //
 // Arguments:
-//
-//	count: A scalar representing the number of elements from the `input_dataset`
-// that should be taken. A value of `-1` indicates that all of `input_dataset`
-// is taken.
-//
-//
-func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+//	dtype: Type of the returned tensor.
+//	shape: Shape of the returned tensor.
+//	memory_region_name: Name of readonly memory region used by the tensor, see
+// NewReadOnlyMemoryRegionFromFile in tensorflow::Env.
+func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name}
 	opspec := tf.OpSpec{
-		Type: "TakeDataset",
+		Type: "ImmutableConst",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StringJoinAttr is an optional argument to StringJoin.
+type StringJoinAttr func(optionalAttr)
+
+// StringJoinSeparator sets the optional separator attribute to value.
+//
+// value: string, an optional join separator.
+// If not specified, defaults to ""
+func StringJoinSeparator(value string) StringJoinAttr {
+	return func(m optionalAttr) {
+		m["separator"] = value
+	}
+}
+
+// Joins the strings in the given list of string tensors into one tensor;
+//
+// with the given separator (default is an empty separator).
+//
+// Arguments:
+//	inputs: A list of string tensors.  The tensors must all have the same shape,
+// or be scalars.  Scalars may be mixed in; these will be broadcast to the shape
+// of non-scalar inputs.
+func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringJoin",
 		Input: []tf.Input{
-			input_dataset, count,
+			tf.OutputList(inputs),
 		},
 		Attrs: attrs,
 	}
@@ -19790,36 +21628,28 @@
 	return op.Output(0)
 }
 
-// The gradient operator for the SparseAdd op.
+// Creates and returns an empty tensor list.
 //
-// The SparseAdd op calculates A + B, where A, B, and the sum are all represented
-// as `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.
-// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
-// values of A and B.
+// All list elements must be tensors of dtype element_dtype and shape compatible
+// with element_shape.
 //
-// Arguments:
-//	backprop_val_grad: 1-D with shape `[nnz(sum)]`.  The gradient with respect to
-// the non-empty values of the sum.
-//	a_indices: 2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`.
-//	b_indices: 2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`.
-//	sum_indices: 2-D.  The `indices` of the sum `SparseTensor`, size
-// `[nnz(sum), ndims]`.
-//
-// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the
-// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the
-// non-empty values of B.
-func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) {
+// handle: an empty tensor list.
+// element_dtype: the type of elements in the list.
+// element_shape: a shape compatible with that of elements in the list.
+func EmptyTensorList(scope *Scope, element_shape tf.Output, max_num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
 	opspec := tf.OpSpec{
-		Type: "SparseAddGrad",
+		Type: "EmptyTensorList",
 		Input: []tf.Input{
-			backprop_val_grad, a_indices, b_indices, sum_indices,
+			element_shape, max_num_elements,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
 // Returns a list of tensors with the same shapes and contents as the input
@@ -20017,393 +21847,129 @@
 	return op.Output(0)
 }
 
-// RandomPoissonV2Attr is an optional argument to RandomPoissonV2.
-type RandomPoissonV2Attr func(optionalAttr)
+// LoadTPUEmbeddingAdadeltaParametersAttr is an optional argument to LoadTPUEmbeddingAdadeltaParameters.
+type LoadTPUEmbeddingAdadeltaParametersAttr func(optionalAttr)
 
-// RandomPoissonV2Seed sets the optional seed attribute to value.
+// LoadTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
 //
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr {
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingAdadeltaParametersTableId(value int64) LoadTPUEmbeddingAdadeltaParametersAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["table_id"] = value
 	}
 }
 
-// RandomPoissonV2Seed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr {
+// LoadTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdadeltaParametersTableName(value string) LoadTPUEmbeddingAdadeltaParametersAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["table_name"] = value
 	}
 }
 
-// RandomPoissonV2Dtype sets the optional dtype attribute to value.
-// If not specified, defaults to DT_INT64
-func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs random values from the Poisson distribution(s) described by rate.
+// Load Adadelta embedding parameters.
 //
-// This op uses two algorithms, depending on rate. If rate >= 10, then
-// the algorithm by Hormann is used to acquire samples via
-// transformation-rejection.
-// See http://www.sciencedirect.com/science/article/pii/0167668793909974.
-//
-// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform
-// random variables.
-// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer
-// Programming, Volume 2. Addison Wesley
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
 //
 // Arguments:
-//	shape: 1-D integer tensor. Shape of independent samples to draw from each
-// distribution described by the shape parameters given in rate.
-//	rate: A tensor in which each scalar is a "rate" parameter describing the
-// associated poisson distribution.
+//	parameters: Value of parameters used in the Adadelta optimization algorithm.
+//	accumulators: Value of accumulators used in the Adadelta optimization algorithm.
+//	updates: Value of updates used in the Adadelta optimization algorithm.
 //
-// Returns A tensor with shape `shape + shape(rate)`. Each slice
-// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
-// `rate[i0, i1, ...iN]`.
-func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) {
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdadeltaParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomPoissonV2",
+		Type: "LoadTPUEmbeddingAdadeltaParameters",
 		Input: []tf.Input{
-			shape, rate,
+			parameters, accumulators, updates,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg.
-type DecodeAndCropJpegAttr func(optionalAttr)
-
-// DecodeAndCropJpegChannels sets the optional channels attribute to value.
+// Converts each string in the input Tensor to its hash mod by a number of buckets.
 //
-// value: Number of color channels for the decoded image.
-// If not specified, defaults to 0
-func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["channels"] = value
-	}
-}
-
-// DecodeAndCropJpegRatio sets the optional ratio attribute to value.
-//
-// value: Downscaling ratio.
-// If not specified, defaults to 1
-func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["ratio"] = value
-	}
-}
-
-// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
-//
-// value: If true use a slower but nicer upscaling of the
-// chroma planes (yuv420/422 only).
-// If not specified, defaults to true
-func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["fancy_upscaling"] = value
-	}
-}
-
-// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
-//
-// value: If true try to recover an image from truncated input.
-// If not specified, defaults to false
-func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["try_recover_truncated"] = value
-	}
-}
-
-// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
-//
-// value: The minimum required fraction of lines before a truncated
-// input is accepted.
-// If not specified, defaults to 1
-func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["acceptable_fraction"] = value
-	}
-}
-
-// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value.
-//
-// value: string specifying a hint about the algorithm used for
-// decompression.  Defaults to "" which maps to a system-specific
-// default.  Currently valid values are ["INTEGER_FAST",
-// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
-// jpeg library changes to a version that does not have that specific
-// option.)
-// If not specified, defaults to ""
-func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["dct_method"] = value
-	}
-}
-
-// Decode and Crop a JPEG-encoded image to a uint8 tensor.
-//
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
-//
-// Accepted values are:
-//
-// *   0: Use the number of channels in the JPEG-encoded image.
-// *   1: output a grayscale image.
-// *   3: output an RGB image.
-//
-// If needed, the JPEG-encoded image is transformed to match the requested number
-// of color channels.
-//
-// The attr `ratio` allows downscaling the image by an integer factor during
-// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
-// downscaling the image later.
-//
-//
-// It is equivalent to a combination of decode and crop, but much faster by only
-// decoding partial jpeg image.
+// The hash function is deterministic on the content of the string within the
+// process and will never change. However, it is not suitable for cryptography.
+// This function may be used when CPU time is scarce and inputs are trusted or
+// unimportant. There is a risk of adversaries constructing inputs that all hash
+// to the same bucket. To prevent this problem, use a strong hash function with
+// `tf.string_to_hash_bucket_strong`.
 //
 // Arguments:
-//	contents: 0-D.  The JPEG-encoded image.
-//	crop_window: 1-D.  The crop window: [crop_y, crop_x, crop_height, crop_width].
+//	input: The strings to assign a hash bucket.
+//	num_buckets: The number of buckets.
 //
-// Returns 3-D with shape `[height, width, channels]`..
-func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) {
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"num_buckets": num_buckets}
 	opspec := tf.OpSpec{
-		Type: "DecodeAndCropJpeg",
-		Input: []tf.Input{
-			contents, crop_window,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Adds two `SparseTensor` objects to produce another `SparseTensor`.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in standard
-// lexicographic order.  If this is not the case, before this step run
-// `SparseReorder` to restore index ordering.
-//
-// By default, if two values sum to zero at some index, the output `SparseTensor`
-// would still include that particular location in its index, storing a zero in the
-// corresponding value slot.  To override this, callers can specify `thresh`,
-// indicating that if the sum has a magnitude strictly smaller than `thresh`, its
-// corresponding value and index would then not be included.  In particular,
-// `thresh == 0` (default) means everything is kept and actual thresholding happens
-// only for a positive value.
-//
-// In the following shapes, `nnz` is the count after taking `thresh` into account.
-//
-// Arguments:
-//	a_indices: 2-D.  The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix.
-//	a_values: 1-D.  The `values` of the first `SparseTensor`, size `[nnz]` Vector.
-//	a_shape: 1-D.  The `shape` of the first `SparseTensor`, size `[ndims]` Vector.
-//	b_indices: 2-D.  The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix.
-//	b_values: 1-D.  The `values` of the second `SparseTensor`, size `[nnz]` Vector.
-//	b_shape: 1-D.  The `shape` of the second `SparseTensor`, size `[ndims]` Vector.
-//	thresh: 0-D.  The magnitude threshold that determines if an output value/index
-// pair takes space.
-func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseAdd",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// QuantizedRelu6Attr is an optional argument to QuantizedRelu6.
-type QuantizedRelu6Attr func(optionalAttr)
-
-// QuantizedRelu6OutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QUINT8
-func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
-//
-// Arguments:
-//
-//	min_features: The float value that the lowest quantized value represents.
-//	max_features: The float value that the highest quantized value represents.
-//
-// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
-func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedRelu6",
-		Input: []tf.Input{
-			features, min_features, max_features,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2.
-type FixedLengthRecordReaderV2Attr func(optionalAttr)
-
-// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value.
-//
-// value: Number of bytes in the header, defaults to 0.
-// If not specified, defaults to 0
-func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["header_bytes"] = value
-	}
-}
-
-// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value.
-//
-// value: Number of bytes in the footer, defaults to 0.
-// If not specified, defaults to 0
-func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["footer_bytes"] = value
-	}
-}
-
-// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value.
-//
-// value: Number of bytes to hop before each read. Default of 0 means using
-// record_bytes.
-// If not specified, defaults to 0
-func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["hop_bytes"] = value
-	}
-}
-
-// FixedLengthRecordReaderV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value.
-//
-// value: The type of encoding for the file. Currently ZLIB and GZIP
-// are supported. Defaults to none.
-// If not specified, defaults to ""
-func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["encoding"] = value
-	}
-}
-
-// A Reader that outputs fixed-length records from a file.
-//
-// Arguments:
-//	record_bytes: Number of bytes in the record.
-//
-// Returns The handle to reference the Reader.
-func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"record_bytes": record_bytes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FixedLengthRecordReaderV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Return a tensor with the same shape and contents as the input tensor or value.
-func Identity(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Identity",
+		Type: "StringToHashBucketFast",
 		Input: []tf.Input{
 			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
+// RealAttr is an optional argument to Real.
+type RealAttr func(optionalAttr)
+
+// RealTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func RealTout(value tf.DataType) RealAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Returns the real part of a complex number.
 //
-// This is the angle \( \theta \in [-\pi, \pi] \) such that
-// \[ x = r \cos(\theta) \]
-// and
-// \[ y = r \sin(\theta) \]
-// where \(r = \sqrt(x^2 + y^2) \).
-func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) {
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// type `float` that is the real part of each element in `input`. All elements in
+// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real
+//  part returned by this operation and *b* is the imaginary part.
+//
+// For example:
+//
+// ```
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.real(input) ==> [-2.25, 3.25]
+// ```
+func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Atan2",
+		Type: "Real",
 		Input: []tf.Input{
-			y, x,
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -20519,113 +22085,146 @@
 	return op.Output(0), op.Output(1)
 }
 
-// Check if the input matches the regex pattern.
+// TensorArrayV3Attr is an optional argument to TensorArrayV3.
+type TensorArrayV3Attr func(optionalAttr)
+
+// TensorArrayV3ElementShape sets the optional element_shape attribute to value.
 //
-// The input is a string tensor of any shape. The pattern is the
-// regular expression to be matched with every element of the input tensor.
-// The boolean values (True or False) of the output tensor indicate
-// if the input matches the regex pattern provided.
+// value: The expected shape of an element, if known. Used to
+// validate the shapes of TensorArray elements. If this shape is not
+// fully specified, gathering zero-size TensorArrays is an error.
+// If not specified, defaults to <unknown_rank:true >
+func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["element_shape"] = value
+	}
+}
+
+// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value.
 //
-// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+// value: A boolean that determines whether writes to the TensorArray
+// are allowed to grow the size.  By default, this is not allowed.
+// If not specified, defaults to false
+func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["dynamic_size"] = value
+	}
+}
+
+// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value.
+//
+// value: If true (default), Tensors in the TensorArray are cleared
+// after being read.  This disables multiple read semantics but allows early
+// release of memory.
+// If not specified, defaults to true
+func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["clear_after_read"] = value
+	}
+}
+
+// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value.
+//
+// value: If true (default is false), then all
+// elements in the TensorArray will be expected to have have identical shapes.
+// This allows certain behaviors, like dynamically checking for
+// consistent shapes on write, and being able to fill in properly
+// shaped zero tensors on stack -- even if the element_shape attribute
+// is not fully defined.
+// If not specified, defaults to false
+func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["identical_element_shapes"] = value
+	}
+}
+
+// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value.
+//
+// value: Overrides the name used for the temporary tensor_array
+// resource. Default value is the name of the 'TensorArray' op (which
+// is guaranteed unique).
+// If not specified, defaults to ""
+func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["tensor_array_name"] = value
+	}
+}
+
+// An array of Tensors of given size.
+//
+// Write data via Write and read via Read or Pack.
 //
 // Arguments:
-//	input: A string tensor of the text to be processed.
-//	pattern: The regular expression to match the input.
+//	size: The size of the array.
+//	dtype: The type of the elements on the tensor_array.
 //
-// Returns A bool tensor with the same shape as `input`.
-func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) {
+// Returns The handle to the TensorArray.A scalar used to control gradient flow.
+func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"pattern": pattern}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "StaticRegexFullMatch",
+		Type: "TensorArrayV3",
 		Input: []tf.Input{
-			input,
+			size,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Returns the truth value of NOT x element-wise.
+func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LogicalNot",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent.
-type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr)
-
-// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
+// 3D real-valued fast Fourier transform.
 //
-// value: If True, the subtraction will be protected by a lock;
-// otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Sparse update '*var' as FOBOS algorithm with fixed learning rate.
+// Computes the 3-dimensional discrete Fourier transform of a real-valued signal
+// over the inner-most 3 dimensions of `input`.
 //
-// That is for rows we have grad for, we update var as follows:
-// prox_v = var - alpha * grad
-// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the
+// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
+// of `output`: the zero-frequency term, followed by the `fft_length / 2`
+// positive-frequency terms.
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	alpha: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//
-// Returns the created operation.
-func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyProximalGradientDescent",
-		Input: []tf.Input{
-			var_, alpha, l1, l2, grad, indices,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Real-valued fast Fourier transform.
-//
-// Computes the 1-dimensional discrete Fourier transform of a real-valued signal
-// over the inner-most dimension of `input`.
-//
-// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the
-// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,
-// followed by the `fft_length / 2` positive-frequency terms.
-//
-// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the
+// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the
 // corresponding dimension of `input`, the dimension is cropped. If it is larger,
 // the dimension is padded with zeros.
 //
 // Arguments:
 //	input: A float32 tensor.
-//	fft_length: An int32 tensor of shape [1]. The FFT length.
+//	fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
 //
-// Returns A complex64 tensor of the same rank as `input`. The inner-most
-//   dimension of `input` is replaced with the `fft_length / 2 + 1` unique
-//   frequency components of its 1D Fourier transform.
+// Returns A complex64 tensor of the same rank as `input`. The inner-most 3
+//   dimensions of `input` are replaced with the their 3D Fourier transform. The
+//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
+//   components.
 //
 // @compatibility(numpy)
-// Equivalent to np.fft.rfft
+// Equivalent to np.fft.rfftn with 3 dimensions.
 // @end_compatibility
-func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "RFFT",
+		Type: "RFFT3D",
 		Input: []tf.Input{
 			input, fft_length,
 		},
@@ -20634,100 +22233,6 @@
 	return op.Output(0)
 }
 
-// Adds a value to the current value of a variable.
-//
-// Any ReadVariableOp with a control dependency on this op is guaranteed to
-// see the incremented value or a subsequent newer one.
-//
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value by which the variable will be incremented.
-//
-// Returns the created operation.
-func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AssignAddVariableOp",
-		Input: []tf.Input{
-			resource, value,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// QuantizedReluAttr is an optional argument to QuantizedRelu.
-type QuantizedReluAttr func(optionalAttr)
-
-// QuantizedReluOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QUINT8
-func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Computes Quantized Rectified Linear: `max(features, 0)`
-//
-// Arguments:
-//
-//	min_features: The float value that the lowest quantized value represents.
-//	max_features: The float value that the highest quantized value represents.
-//
-// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
-func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedRelu",
-		Input: []tf.Input{
-			features, min_features, max_features,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Reorders a SparseTensor into the canonical, row-major ordering.
-//
-// Note that by convention, all sparse ops preserve the canonical ordering along
-// increasing dimension number. The only time ordering can be violated is during
-// manual manipulation of the indices and values vectors to add entries.
-//
-// Reordering does not affect the shape of the SparseTensor.
-//
-// If the tensor has rank `R` and `N` non-empty values, `input_indices` has
-// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`.
-//
-// Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//
-// Returns 2-D.  `N x R` matrix with the same indices as input_indices, but
-// in canonical row-major ordering.1-D.  `N` non-empty values corresponding to `output_indices`.
-func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseReorder",
-		Input: []tf.Input{
-			input_indices, input_values, input_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
 // Computes rectified linear: `max(features, 0)`.
 func Relu(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
@@ -20792,6 +22297,667 @@
 	return scope.AddOperation(opspec)
 }
 
+// Divides sparse updates into the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] /= updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] /= updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...]
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions multiply.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterDiv",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ListDiffAttr is an optional argument to ListDiff.
+type ListDiffAttr func(optionalAttr)
+
+// ListDiffOutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func ListDiffOutIdx(value tf.DataType) ListDiffAttr {
+	return func(m optionalAttr) {
+		m["out_idx"] = value
+	}
+}
+
+// Computes the difference between two lists of numbers or strings.
+//
+// Given a list `x` and a list `y`, this operation returns a list `out` that
+// represents all values that are in `x` but not in `y`. The returned list `out`
+// is sorted in the same order that the numbers appear in `x` (duplicates are
+// preserved). This operation also returns a list `idx` that represents the
+// position of each `out` element in `x`. In other words:
+//
+// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]`
+//
+// For example, given this input:
+//
+// ```
+// x = [1, 2, 3, 4, 5, 6]
+// y = [1, 3, 5]
+// ```
+//
+// This operation would return:
+//
+// ```
+// out ==> [2, 4, 6]
+// idx ==> [1, 3, 5]
+// ```
+//
+// Arguments:
+//	x: 1-D. Values to keep.
+//	y: 1-D. Values to remove.
+//
+// Returns 1-D. Values present in `x` but not in `y`.1-D. Positions of `x` values preserved in `out`.
+func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ListDiff",
+		Input: []tf.Input{
+			x, y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdadeltaParametersGradAccumDebug.
+type LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load Adadelta parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Adadelta optimization algorithm.
+//	accumulators: Value of accumulators used in the Adadelta optimization algorithm.
+//	updates: Value of updates used in the Adadelta optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, accumulators, updates, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Return a tensor with the same shape and contents as the input tensor or value.
+func Identity(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Identity",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
+//
+// This is the angle \( \theta \in [-\pi, \pi] \) such that
+// \[ x = r \cos(\theta) \]
+// and
+// \[ y = r \sin(\theta) \]
+// where \(r = \sqrt(x^2 + y^2) \).
+func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atan2",
+		Input: []tf.Input{
+			y, x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+//     Updates specified rows with values in `v`.
+//
+//     Computes `x[i, :] = v; return x`.
+//
+// Arguments:
+//	x: A tensor of type `T`.
+//	i: A vector. Indices into the left-most dimension of `x`.
+//	v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size.
+//
+// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`.
+func InplaceUpdate(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "InplaceUpdate",
+		Input: []tf.Input{
+			x, i, v,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OutfeedDequeueTupleAttr is an optional argument to OutfeedDequeueTuple.
+type OutfeedDequeueTupleAttr func(optionalAttr)
+
+// OutfeedDequeueTupleDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op
+// is running on a TPU device, and >= 0 when the Op is running on the CPU
+// device.
+// If not specified, defaults to -1
+func OutfeedDequeueTupleDeviceOrdinal(value int64) OutfeedDequeueTupleAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// Retrieve multiple values from the computation outfeed.
+//
+// This operation will block indefinitely until data is available. Output `i`
+// corresponds to XLA tuple element `i`.
+//
+// Arguments:
+//	dtypes: The element types of each element in `outputs`.
+//	shapes: The shapes of each tensor in `outputs`.
+//
+// Returns A list of tensors that will be read from the outfeed.
+func OutfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape, optional ...OutfeedDequeueTupleAttr) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OutfeedDequeueTuple",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("OutfeedDequeueTuple", err)
+		return
+	}
+	return outputs
+}
+
+// Identity op for gradient debugging.
+//
+// This op is hidden from public in Python. It is used by TensorFlow Debugger to
+// register gradient tensors for gradient debugging.
+// This op operates on non-reference-type tensors.
+func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DebugGradientIdentity",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta.
+type ResourceSparseApplyAdadeltaAttr func(optionalAttr)
+
+// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// var: Should be from a Variable().
+//
+// Arguments:
+//
+//	accum: Should be from a Variable().
+//	accum_update: : Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	rho: Decay factor. Must be a scalar.
+//	epsilon: Constant factor. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//
+// Returns the created operation.
+func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyAdadelta",
+		Input: []tf.Input{
+			var_, accum, accum_update, lr, rho, epsilon, grad, indices,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns which elements of x are NaN.
+//
+// @compatibility(numpy)
+// Equivalent to np.isnan
+// @end_compatibility
+func IsNan(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IsNan",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter.
+type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr)
+
+// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of depthwise convolution with respect to the filter.
+//
+// Arguments:
+//	input: 4-D with shape based on `data_format`.  For example, if
+// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height,
+// in_width, in_channels]` tensor.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 4-D
+// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor.
+//	out_backprop: 4-D with shape  based on `data_format`.
+// For example, if `data_format` is 'NHWC' then
+// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
+// the `filter` input of the convolution.
+func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DepthwiseConv2dNativeBackpropFilter",
+		Input: []tf.Input{
+			input, filter_sizes, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MapUnstageAttr is an optional argument to MapUnstage.
+type MapUnstageAttr func(optionalAttr)
+
+// MapUnstageCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapUnstageCapacity(value int64) MapUnstageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// MapUnstageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapUnstageMemoryLimit(value int64) MapUnstageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapUnstageContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapUnstageContainer(value string) MapUnstageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapUnstageSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapUnstageSharedName(value string) MapUnstageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes and returns the values associated with the key
+//
+// from the underlying container.   If the underlying container
+// does not contain this key, the op will block until it does.
+func MapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageAttr) (values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MapUnstage",
+		Input: []tf.Input{
+			key, indices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("MapUnstage", err)
+		return
+	}
+	return values
+}
+
+// An op enabling differentiation of TPU Embeddings.
+//
+// This op simply returns its first input, which is assumed to have been sliced
+// from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of
+// this op, and its first argument being a trainable Variable, enables automatic
+// differentiation of graphs containing embeddings via the TPU Embedding Python
+// libraries.
+//
+// Arguments:
+//	embedding_variable: A trainable variable, enabling optimizers to find this op.
+//	sliced_activations: The embedding activations Tensor to return.
+//	table_id: The id of the table in the embedding layer configuration from which
+// these activations were computed.
+//	lookup_id: Identifier of the set of embedding indices which produced these
+// activations.
+func TPUEmbeddingActivations(scope *Scope, embedding_variable tf.Output, sliced_activations tf.Output, table_id int64, lookup_id int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"table_id": table_id, "lookup_id": lookup_id}
+	opspec := tf.OpSpec{
+		Type: "TPUEmbeddingActivations",
+		Input: []tf.Input{
+			embedding_variable, sliced_activations,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BatchToSpace for 4-D tensors of type T.
+//
+// This is a legacy version of the more general BatchToSpaceND.
+//
+// Rearranges (permutes) data from batch into blocks of spatial data, followed by
+// cropping. This is the reverse transformation of SpaceToBatch. More specifically,
+// this op outputs a copy of the input tensor where values from the `batch`
+// dimension are moved in spatial blocks to the `height` and `width` dimensions,
+// followed by cropping along the `height` and `width` dimensions.
+//
+// Arguments:
+//	input: 4-D tensor with shape
+// `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
+//   depth]`. Note that the batch size of the input tensor must be divisible by
+// `block_size * block_size`.
+//	crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
+// how many elements to crop from the intermediate result across the spatial
+// dimensions as follows:
+//
+//     crops = [[crop_top, crop_bottom], [crop_left, crop_right]]
+//
+//
+// Returns 4-D with shape `[batch, height, width, depth]`, where:
+//
+//       height = height_pad - crop_top - crop_bottom
+//       width = width_pad - crop_left - crop_right
+//
+// The attr `block_size` must be greater than one. It indicates the block size.
+//
+// Some examples:
+//
+// (1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2:
+//
+// ```
+// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+// ```
+//
+// The output tensor has shape `[1, 2, 2, 1]` and value:
+//
+// ```
+// x = [[[[1], [2]], [[3], [4]]]]
+// ```
+//
+// (2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2:
+//
+// ```
+// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+// ```
+//
+// The output tensor has shape `[1, 2, 2, 3]` and value:
+//
+// ```
+// x = [[[[1, 2, 3], [4, 5, 6]],
+//       [[7, 8, 9], [10, 11, 12]]]]
+// ```
+//
+// (3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2:
+//
+// ```
+// x = [[[[1], [3]], [[9], [11]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
+//
+// The output tensor has shape `[1, 4, 4, 1]` and value:
+//
+// ```
+// x = [[[1],   [2],  [3],  [4]],
+//      [[5],   [6],  [7],  [8]],
+//      [[9],  [10], [11],  [12]],
+//      [[13], [14], [15],  [16]]]
+// ```
+//
+// (4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2:
+//
+// ```
+// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
+//      [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
+// ```
+//
+// The output tensor has shape `[2, 2, 4, 1]` and value:
+//
+// ```
+// x = [[[[1], [3]], [[5], [7]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
+func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"block_size": block_size}
+	opspec := tf.OpSpec{
+		Type: "BatchToSpace",
+		Input: []tf.Input{
+			input, crops,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Produces a summary of any statistics recorded by the given statistics manager.
+func ExperimentalStatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalStatsAggregatorSummary",
+		Input: []tf.Input{
+			iterator,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Makes a new iterator from the given `dataset` and stores it in `iterator`.
+//
+// This operation may be executed multiple times. Each execution will reset the
+// iterator in `iterator` to the first element of `dataset`.
+//
+// Returns the created operation.
+func MakeIterator(scope *Scope, dataset tf.Output, iterator tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MakeIterator",
+		Input: []tf.Input{
+			dataset, iterator,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Component-wise divides a SparseTensor by a dense Tensor.
 //
 // *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
@@ -20819,46 +22985,78 @@
 	return op.Output(0)
 }
 
-// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad.
-type FractionalAvgPoolGradAttr func(optionalAttr)
+// Creates a dataset that batches and pads `batch_size` elements from the input.
+//
+// Arguments:
+//
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//	padded_shapes: A list of int64 tensors representing the desired padded shapes
+// of the corresponding output components. These shapes may be partially
+// specified, using `-1` to indicate that a particular dimension should be
+// padded to the maximum size of all batch elements.
+//	padding_values: A list of scalars containing the padding value to use for
+// each of the outputs.
+//
+func PaddedBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "PaddedBatchDataset",
+		Input: []tf.Input{
+			input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value.
+// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
+type ResourceApplyMomentumAttr func(optionalAttr)
+
+// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value.
 //
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
-//
-// `index  0  1  2  3  4`
-//
-// `value  20 5  16 3  7`
-//
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [41/3, 26/3] for fractional avg pooling.
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
 // If not specified, defaults to false
-func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr {
+func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr {
 	return func(m optionalAttr) {
-		m["overlapping"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Computes gradient of the FractionalAvgPool function.
+// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
 //
-// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for
-// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of
-// out_backprop to those indices that form the same pooling cell. Therefore, we
-// just need to know the shape of original input tensor, instead of the whole
-// tensor.
+// value: If `True`, the tensor passed to compute grad will be
+// var - lr * momentum * accum, so in the end, the var you get is actually
+// var - lr * momentum * accum.
+// If not specified, defaults to false
+func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update '*var' according to the momentum scheme. Set use_nesterov = True if you
+//
+// want to use Nesterov momentum.
+//
+// accum = accum * momentum + grad
+// var -= lr * accum
 //
 // Arguments:
-//	orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool`
-//	out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
-// w.r.t. the output of `fractional_avg_pool`.
-//	row_pooling_sequence: row pooling sequence, form pooling region with
-// col_pooling_sequence.
-//	col_pooling_sequence: column pooling sequence, form pooling region with
-// row_pooling sequence.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
+//	momentum: Momentum. Must be a scalar.
 //
-// Returns 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
-func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -20867,9 +23065,56 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FractionalAvgPoolGrad",
+		Type: "ResourceApplyMomentum",
 		Input: []tf.Input{
-			orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence,
+			var_, accum, lr, grad, momentum,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad.
+type MaxPoolGradGradAttr func(optionalAttr)
+
+// MaxPoolGradGradDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes second-order gradients of the maxpooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients of gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGradGrad",
+		Input: []tf.Input{
+			orig_input, orig_output, grad,
 		},
 		Attrs: attrs,
 	}
@@ -20877,96 +23122,277 @@
 	return op.Output(0)
 }
 
-// QuantizedConv2DAttr is an optional argument to QuantizedConv2D.
-type QuantizedConv2DAttr func(optionalAttr)
-
-// QuantizedConv2DOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// QuantizedConv2DDilations sets the optional dilations attribute to value.
+// Returns the last element of the input list as well as a list with all but that element.
 //
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each
-// filter element on that dimension. The dimension order is determined by the
-// value of `data_format`, see above for details. Dilations in the batch and
-// depth dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes a 2D convolution given quantized 4D input and filter tensors.
+// Fails if the list is empty.
 //
-// The inputs are quantized tensors where the lowest value represents the real
-// number of the associated minimum, and the highest represents the maximum.
-// This means that you can only interpret the quantized output in the same way, by
-// taking the returned minimum and maximum values into account.
-//
-// Arguments:
-//
-//	filter: filter's input_depth dimension must match input's depth dimensions.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	min_filter: The float value that the lowest quantized filter value represents.
-//	max_filter: The float value that the highest quantized filter value represents.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+// input_handle: the input list
+// tensor: the withdrawn last element of the list
+// element_dtype: the type of elements in the list
+// element_shape: the shape of the output tensor
+func TensorListPopBack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
 	opspec := tf.OpSpec{
-		Type: "QuantizedConv2D",
+		Type: "TensorListPopBack",
 		Input: []tf.Input{
-			input, filter, min_input, max_input, min_filter, max_filter,
+			input_handle, element_shape,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0), op.Output(1)
 }
 
-// ResourceGatherAttr is an optional argument to ResourceGather.
-type ResourceGatherAttr func(optionalAttr)
+// Determine the script codes of a given tensor of Unicode integer code points.
+//
+// This operation converts Unicode code points to script codes corresponding to
+// each code point. Script codes correspond to International Components for
+// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html.
+// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will
+// match input shape.
+//
+// Arguments:
+//	input: A Tensor of int32 Unicode code points.
+//
+// Returns A Tensor of int32 script codes corresponding to each input code point.
+func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeScript",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// ResourceGatherValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func ResourceGatherValidateIndices(value bool) ResourceGatherAttr {
+// Creates a sequence of numbers.
+//
+// This operation creates a sequence of numbers that begins at `start` and
+// extends by increments of `delta` up to but not including `limit`.
+//
+// For example:
+//
+// ```
+// # 'start' is 3
+// # 'limit' is 18
+// # 'delta' is 3
+// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
+// ```
+//
+// Arguments:
+//	start: 0-D (scalar). First entry in the sequence.
+//	limit: 0-D (scalar). Upper limit of sequence, exclusive.
+//	delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`.
+//
+// Returns 1-D.
+func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Range",
+		Input: []tf.Input{
+			start, limit, delta,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes second-order gradients of the maxpooling function.
+//
+// Arguments:
+//	input: The original input.
+//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
+// input of `max_pool`.
+//	argmax: The indices of the maximum values chosen for each output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients of gradients w.r.t. the input of `max_pool`.
+func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGradGradWithArgmax",
+		Input: []tf.Input{
+			input, grad, argmax,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Return a slice from 'input'.
+//
+// The output tensor is a tensor with dimensions described by 'size'
+// whose values are extracted from 'input' starting at the offsets in
+// 'begin'.
+//
+// *Requirements*:
+//   0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)
+//
+// Arguments:
+//
+//	begin: begin[i] specifies the offset into the 'i'th dimension of
+// 'input' to slice from.
+//	size: size[i] specifies the number of elements of the 'i'th dimension
+// of 'input' to slice. If size[i] is -1, all remaining elements in dimension
+// i are included in the slice (i.e. this is equivalent to setting
+// size[i] = input.dim_size(i) - begin[i]).
+func Slice(scope *Scope, input tf.Output, begin tf.Output, size tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Slice",
+		Input: []tf.Input{
+			input, begin, size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
+//
+// The Hurwitz zeta function is defined as:
+//
+//
+// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
+func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Zeta",
+		Input: []tf.Input{
+			x, q,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the cardinality of `input_dataset`.
+//
+// Returns the cardinality of `input_dataset`.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the dataset to return cardinality for.
+//
+// Returns The cardinality of `input_dataset`. Named constants are used to represent
+// infinite and unknown cardinality.
+func ExperimentalDatasetCardinality(scope *Scope, input_dataset tf.Output) (cardinality tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalDatasetCardinality",
+		Input: []tf.Input{
+			input_dataset,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap.
+type TakeManySparseFromTensorsMapAttr func(optionalAttr)
+
+// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value.
+//
+// value: The container name for the `SparseTensorsMap` read by this op.
+// If not specified, defaults to ""
+func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr {
 	return func(m optionalAttr) {
-		m["validate_indices"] = value
+		m["container"] = value
 	}
 }
 
-// Gather slices from the variable pointed to by `resource` according to `indices`.
+// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value.
 //
-// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
+// value: The shared name for the `SparseTensorsMap` read by this op.
+// It should not be blank; rather the `shared_name` or unique Operation name
+// of the Op that created the original `SparseTensorsMap` should be used.
+// If not specified, defaults to ""
+func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them.
 //
-// ```python
-//     # Scalar indices
-//     output[:, ..., :] = params[indices, :, ... :]
+// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where
+// `N` is the minibatch size and the rows correspond to the output handles of
+// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`.  The ranks of the
+// original `SparseTensor` objects that went into the given input ops must all
+// match.  When the final `SparseTensor` is created, it has rank one
+// higher than the ranks of the incoming `SparseTensor` objects
+// (they have been concatenated along a new row dimension on the left).
 //
-//     # Vector indices
-//     output[i, :, ..., :] = params[indices[i], :, ... :]
+// The output `SparseTensor` object's shape values for all dimensions but the
+// first are the max across the input `SparseTensor` objects' shape values
+// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
+// size.
 //
-//     # Higher rank indices
-//     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
+//
+// For example, if the handles represent an input, which is a `[2, 3]` matrix
+// representing two original `SparseTensor` objects:
+//
 // ```
-func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) {
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
+// ```
+//
+// and
+//
+// ```
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
+// ```
+//
+// then the final `SparseTensor` will be:
+//
+// ```
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
+// ```
+//
+// Arguments:
+//	sparse_handles: 1-D, The `N` serialized `SparseTensor` objects.
+// Shape: `[N]`.
+//	dtype: The `dtype` of the `SparseTensor` objects stored in the
+// `SparseTensorsMap`.
+//
+// Returns 2-D.  The `indices` of the minibatch `SparseTensor`.1-D.  The `values` of the minibatch `SparseTensor`.1-D.  The `shape` of the minibatch `SparseTensor`.
+func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -20975,60 +23401,62 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceGather",
+		Type: "TakeManySparseFromTensorsMap",
 		Input: []tf.Input{
-			resource, indices,
+			sparse_handles,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Delete the TensorArray from its resource container.
+// ResourceSparseApplyKerasMomentumAttr is an optional argument to ResourceSparseApplyKerasMomentum.
+type ResourceSparseApplyKerasMomentumAttr func(optionalAttr)
+
+// ResourceSparseApplyKerasMomentumUseLocking sets the optional use_locking attribute to value.
 //
-// This enables the user to close and release the resource in the middle
-// of a step/run.
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyKerasMomentumUseLocking(value bool) ResourceSparseApplyKerasMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceSparseApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, the tensor passed to compute grad will be
+// var + momentum * accum, so in the end, the var you get is actually
+// var + momentum * accum.
+// If not specified, defaults to false
+func ResourceSparseApplyKerasMomentumUseNesterov(value bool) ResourceSparseApplyKerasMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
+//
+// Set use_nesterov = True if you want to use Nesterov momentum.
+//
+// That is for rows we have grad for, we update var and accum as follows:
+//
+// accum = accum * momentum - lr * grad
+// var += accum
 //
 // Arguments:
-//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	momentum: Momentum. Must be a scalar.
 //
 // Returns the created operation.
-func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayCloseV3",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// StatelessMultinomialAttr is an optional argument to StatelessMultinomial.
-type StatelessMultinomialAttr func(optionalAttr)
-
-// StatelessMultinomialOutputDtype sets the optional output_dtype attribute to value.
-// If not specified, defaults to DT_INT64
-func StatelessMultinomialOutputDtype(value tf.DataType) StatelessMultinomialAttr {
-	return func(m optionalAttr) {
-		m["output_dtype"] = value
-	}
-}
-
-// Draws samples from a multinomial distribution.
-//
-// Arguments:
-//	logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`
-// represents the unnormalized log probabilities for all classes.
-//	num_samples: 0-D.  Number of independent samples to draw for each row slice.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns 2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`
-// contains the drawn class labels with range `[0, num_classes)`.
-func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, seed tf.Output, optional ...StatelessMultinomialAttr) (output tf.Output) {
+func ResourceSparseApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyKerasMomentumAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -21037,115 +23465,298 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StatelessMultinomial",
+		Type: "ResourceSparseApplyKerasMomentum",
 		Input: []tf.Input{
-			logits, num_samples, seed,
+			var_, accum, lr, grad, indices, momentum,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
-//
-// This Op does not require `a_indices` be sorted in standard lexicographic order.
-//
-// Arguments:
-//	a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
-//	a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
-//	a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
-//	b: `ndims`-D Tensor.  With shape `a_shape`.
-func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseTensorDenseAdd",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// ResourceApplyAdamWithAmsgradAttr is an optional argument to ResourceApplyAdamWithAmsgrad.
+type ResourceApplyAdamWithAmsgradAttr func(optionalAttr)
 
-// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation.
-type SparseToSparseSetOperationAttr func(optionalAttr)
-
-// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr {
+// ResourceApplyAdamWithAmsgradUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdamWithAmsgradUseLocking(value bool) ResourceApplyAdamWithAmsgradAttr {
 	return func(m optionalAttr) {
-		m["validate_indices"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Applies set operation along last dimension of 2 `SparseTensor` inputs.
+// Update '*var' according to the Adam algorithm.
 //
-// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
-//
-// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the
-// order and range of `set1` and `set2` indices.
-//
-// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,
-// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
-//
-// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
-// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
-//
-// If `validate_indices` is `True`, this op validates the order and range of `set1`
-// and `set2` indices.
-//
-// Output `result` is a `SparseTensor` represented by `result_indices`,
-// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-// dimension contains the result of `set_operation` applied to the corresponding
-// `[0...n-1]` dimension of `set`.
+// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
+// $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
+// $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
+// $$vhat_t := max{vhat_{t-1}, v_t}$$
+// $$variable := variable - lr_t * m_t / (\sqrt{vhat_t} + \epsilon)$$
 //
 // Arguments:
-//	set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must
-// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the
-// max set size across `0...n-1` dimensions.
-//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
-// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the
-// max set size across `0...n-1` dimensions.
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	vhat: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	beta2_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
 //
-//
-// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-// is the max result set size across all `0...n-1` dimensions.
-func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
+// Returns the created operation.
+func ResourceApplyAdamWithAmsgrad(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, vhat tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamWithAmsgradAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"set_operation": set_operation}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseToSparseSetOperation",
+		Type: "ResourceApplyAdamWithAmsgrad",
 		Input: []tf.Input{
-			set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape,
+			var_, m, v, vhat, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey.
+type MapUnstageNoKeyAttr func(optionalAttr)
+
+// MapUnstageNoKeyCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapUnstageNoKeyContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapUnstageNoKeySharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes and returns a random (key, value)
+//
+// from the underlying container.   If the underlying container
+// does not contain elements, the op will block until it does.
+func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MapUnstageNoKey",
+		Input: []tf.Input{
+			indices,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	key = op.Output(idx)
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("MapUnstageNoKey", err)
+		return
+	}
+	return key, values
+}
+
+// HashTableV2Attr is an optional argument to HashTableV2.
+type HashTableV2Attr func(optionalAttr)
+
+// HashTableV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this table is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func HashTableV2Container(value string) HashTableV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// HashTableV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this table is shared under the given name across
+// multiple sessions.
+// If not specified, defaults to ""
+func HashTableV2SharedName(value string) HashTableV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
+//
+// value: If true and shared_name is empty, the table is shared
+// using the node name.
+// If not specified, defaults to false
+func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr {
+	return func(m optionalAttr) {
+		m["use_node_name_sharing"] = value
+	}
+}
+
+// Creates a non-initialized hash table.
+//
+// This op creates a hash table, specifying the type of its keys and values.
+// Before using the table you will have to initialize it.  After initialization the
+// table will be immutable.
+//
+// Arguments:
+//	key_dtype: Type of the table keys.
+//	value_dtype: Type of the table values.
+//
+// Returns Handle to a table.
+func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "HashTableV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingMomentumParametersGradAccumDebug.
+type RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve Momentum embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the Momentum optimization algorithm.Parameter momenta updated by the Momentum optimization algorithm.Parameter gradient_accumulators updated by the Momentum optimization algorithm.
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Enqueue a Tensor on the computation outfeed.
+//
+// Arguments:
+//	input: A tensor that will be inserted into the outfeed queue.
+//
+// Returns the created operation.
+func OutfeedEnqueue(scope *Scope, input tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "OutfeedEnqueue",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Outputs a `Summary` protocol buffer with a histogram.
+//
+// The generated
+// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+// has one summary value containing a histogram for `values`.
+//
+// This op reports an `InvalidArgument` error if any value is not finite.
+//
+// Arguments:
+//	tag: Scalar.  Tag to use for the `Summary.Value`.
+//	values: Any shape. Values to use to build the histogram.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "HistogramSummary",
+		Input: []tf.Input{
+			tag, values,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // MutableDenseHashTableV2Attr is an optional argument to MutableDenseHashTableV2.
 type MutableDenseHashTableV2Attr func(optionalAttr)
 
@@ -21246,6 +23857,999 @@
 	return op.Output(0)
 }
 
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingADAMParametersGradAccumDebug.
+type RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve ADAM embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the ADAM optimization algorithm.Parameter momenta updated by the ADAM optimization algorithm.Parameter velocities updated by the ADAM optimization algorithm.Parameter gradient_accumulators updated by the ADAM optimization algorithm.
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// CudnnRNNAttr is an optional argument to CudnnRNN.
+type CudnnRNNAttr func(optionalAttr)
+
+// CudnnRNNRnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNRnnMode(value string) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNInputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNInputMode(value string) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNDirection sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNDirection(value string) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNDropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNDropout(value float32) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNSeed(value int64) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNSeed2(value int64) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// CudnnRNNIsTraining sets the optional is_training attribute to value.
+// If not specified, defaults to true
+func CudnnRNNIsTraining(value bool) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// A RNN backed by cuDNN.
+//
+// Computes the RNN from the input and initial states, with respect to the params
+// buffer.
+//
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//   the actual computation before the first layer. 'skip_input' is only allowed
+//   when input_size == num_units; 'auto_select' implies 'skip_input' when
+//   input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used. Should be
+//   "unidirectional" or "bidirectional".
+// dropout: Dropout probability. When set to 0., dropout is disabled.
+// seed: The 1st part of a seed to initialize dropout.
+// seed2: The 2nd part of a seed to initialize dropout.
+// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
+// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
+//     num_units].
+// input_c: For LSTM, a 3-D tensor with the shape of
+//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
+// params: A 1-D tensor that contains the weights and biases in an opaque layout.
+//     The size must be created through CudnnRNNParamsSize, and initialized
+//     separately. Note that they might not be compatible across different
+//     generations. So it is a good idea to save and restore
+// output: A 3-D tensor with the shape of [seq_length, batch_size,
+//     dir * num_units].
+// output_h: The same shape has input_h.
+// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+// is_training: Indicates whether this operation is used for inferenece or
+//   training.
+// reserve_space: An opaque tensor that can be used in backprop calculation. It
+//   is only produced if is_training is false.
+func CudnnRNN(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNAttr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNN",
+		Input: []tf.Input{
+			input, input_h, input_c, params,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// DecodeCompressedAttr is an optional argument to DecodeCompressed.
+type DecodeCompressedAttr func(optionalAttr)
+
+// DecodeCompressedCompressionType sets the optional compression_type attribute to value.
+//
+// value: A scalar containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+// If not specified, defaults to ""
+func DecodeCompressedCompressionType(value string) DecodeCompressedAttr {
+	return func(m optionalAttr) {
+		m["compression_type"] = value
+	}
+}
+
+// Decompress strings.
+//
+// This op decompresses each element of the `bytes` input `Tensor`, which
+// is assumed to be compressed using the given `compression_type`.
+//
+// The `output` is a string `Tensor` of the same shape as `bytes`,
+// each element containing the decompressed data from the corresponding
+// element in `bytes`.
+//
+// Arguments:
+//	bytes: A Tensor of string which is compressed.
+//
+// Returns A Tensor with the same shape as input `bytes`, uncompressed
+// from bytes.
+func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeCompressed",
+		Input: []tf.Input{
+			bytes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EnterAttr is an optional argument to Enter.
+type EnterAttr func(optionalAttr)
+
+// EnterIsConstant sets the optional is_constant attribute to value.
+//
+// value: If true, the output is constant within the child frame.
+// If not specified, defaults to false
+func EnterIsConstant(value bool) EnterAttr {
+	return func(m optionalAttr) {
+		m["is_constant"] = value
+	}
+}
+
+// EnterParallelIterations sets the optional parallel_iterations attribute to value.
+//
+// value: The number of iterations allowed to run in parallel.
+// If not specified, defaults to 10
+func EnterParallelIterations(value int64) EnterAttr {
+	return func(m optionalAttr) {
+		m["parallel_iterations"] = value
+	}
+}
+
+// Creates or finds a child frame, and makes `data` available to the child frame.
+//
+// This op is used together with `Exit` to create loops in the graph.
+// The unique `frame_name` is used by the `Executor` to identify frames. If
+// `is_constant` is true, `output` is a constant in the child frame; otherwise
+// it may be changed in the child frame. At most `parallel_iterations` iterations
+// are run in parallel in the child frame.
+//
+// Arguments:
+//	data: The tensor to be made available to the child frame.
+//	frame_name: The name of the child frame.
+//
+// Returns The same tensor as `data`.
+func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"frame_name": frame_name}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Enter",
+		Input: []tf.Input{
+			data,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TryRpcAttr is an optional argument to TryRpc.
+type TryRpcAttr func(optionalAttr)
+
+// TryRpcProtocol sets the optional protocol attribute to value.
+//
+// value: RPC protocol to use.  Empty string means use the default protocol.
+// Options include 'grpc'.
+// If not specified, defaults to ""
+func TryRpcProtocol(value string) TryRpcAttr {
+	return func(m optionalAttr) {
+		m["protocol"] = value
+	}
+}
+
+// TryRpcFailFast sets the optional fail_fast attribute to value.
+//
+// value: `boolean`. If `true` (default), then failures to connect
+// (i.e., the server does not immediately respond) cause an RPC failure.
+// If not specified, defaults to true
+func TryRpcFailFast(value bool) TryRpcAttr {
+	return func(m optionalAttr) {
+		m["fail_fast"] = value
+	}
+}
+
+// TryRpcTimeoutInMs sets the optional timeout_in_ms attribute to value.
+//
+// value: `int`. If `0` (default), then the kernel will run the RPC
+// request and only time out if the RPC deadline passes or the session times out.
+// If this value is greater than `0`, then the op will raise an exception if
+// the RPC takes longer than `timeout_in_ms`.
+// If not specified, defaults to 0
+func TryRpcTimeoutInMs(value int64) TryRpcAttr {
+	return func(m optionalAttr) {
+		m["timeout_in_ms"] = value
+	}
+}
+
+// Perform batches of RPC requests.
+//
+// This op asynchronously performs either a single RPC request, or a batch
+// of requests.  RPC requests are defined by three main parameters:
+//
+//   - `address` (the host+port or BNS address of the request)
+//   - `method` (the method name for the request)
+//   - `request` (the serialized proto string, or vector of strings,
+//      of the RPC request argument).
+//
+// For example, if you have an RPC service running on port localhost:2345,
+// and its interface is configured with the following proto declaration:
+//
+// ```
+// service MyService {
+//   rpc MyMethod(MyRequestProto) returns (MyResponseProto) {
+//   }
+// };
+// ```
+//
+// then call this op with arguments:
+//
+// ```
+// address = "localhost:2345"
+// method = "MyService/MyMethod"
+// ```
+//
+// The `request` tensor is a string tensor representing serialized `MyRequestProto`
+// strings; and the output string tensor `response` will have the same shape
+// and contain (upon successful completion) corresponding serialized
+// `MyResponseProto` strings.
+//
+// For example, to send a single, empty, `MyRequestProto`, call
+// this op with `request = ""`.  To send 5 **parallel** empty requests,
+// call this op with `request = ["", "", "", "", ""]`.
+//
+// More generally, one can create a batch of `MyRequestProto` serialized protos
+// from regular batched tensors using the `encode_proto` op, and convert
+// the response `MyResponseProto` serialized protos to batched tensors
+// using the `decode_proto` op.
+//
+// **NOTE** Working with serialized proto strings is faster than instantiating
+// actual proto objects in memory, so no performance degradation is expected
+// compared to writing custom kernels for this workflow.
+//
+// Unlike the standard `Rpc` op, if the connection fails or the remote worker
+// returns an error status, this op does **not** reraise the exception.
+// Instead, the `status_code` and `status_message` entry for the corresponding RPC
+// call is set with the error returned from the RPC call.  The `response` tensor
+// will contain valid response values for those minibatch entries whose RPCs did
+// not fail; the rest of the entries will have empty strings.
+//
+// Arguments:
+//	address: `0-D` or `1-D`.  The address (i.e. host_name:port) of the RPC server.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `method` and `request`.
+//	method: `0-D` or `1-D`.  The method address on the RPC server.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `address` and `request`.
+//	request: `0-D` or `1-D`.  Serialized proto strings: the rpc request argument.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `address` and `method`.
+//
+// Returns Same shape as `request`. Serialized proto strings: the rpc responses.Same shape as `request`.  Values correspond to tensorflow Status enum codes.Same shape as `request`.  Values correspond to Status messages
+// returned from the RPC calls.
+func TryRpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...TryRpcAttr) (response tf.Output, status_code tf.Output, status_message tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TryRpc",
+		Input: []tf.Input{
+			address, method, request,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Add all input tensors element wise.
+//
+// Arguments:
+//	inputs: Must all be the same size and shape.
+func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AddN",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to RetrieveTPUEmbeddingMDLAdagradLightParameters.
+type RetrieveTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingMDLAdagradLightParametersTableId(value int64) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMDLAdagradLightParametersTableName(value string) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve MDL Adagrad Light embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the MDL Adagrad Light optimization algorithm.Parameter accumulators updated by the MDL Adagrad Light optimization algorithm.Parameter weights updated by the MDL Adagrad Light optimization algorithm.Parameter benefits updated by the MDL Adagrad Light optimization algorithm.
+func RetrieveTPUEmbeddingMDLAdagradLightParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMDLAdagradLightParametersAttr) (parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingMDLAdagradLightParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug.
+type RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve Adadelta embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the Adadelta optimization algorithm.Parameter accumulators updated by the Adadelta optimization algorithm.Parameter updates updated by the Adadelta optimization algorithm.Parameter gradient_accumulators updated by the Adadelta optimization algorithm.
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// MapClearAttr is an optional argument to MapClear.
+type MapClearAttr func(optionalAttr)
+
+// MapClearCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapClearCapacity(value int64) MapClearAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// MapClearMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapClearMemoryLimit(value int64) MapClearAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapClearContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapClearContainer(value string) MapClearAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapClearSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapClearSharedName(value string) MapClearAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes all elements in the underlying container.
+//
+// Returns the created operation.
+func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MapClear",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// DecodeCSVAttr is an optional argument to DecodeCSV.
+type DecodeCSVAttr func(optionalAttr)
+
+// DecodeCSVFieldDelim sets the optional field_delim attribute to value.
+//
+// value: char delimiter to separate fields in a record.
+// If not specified, defaults to ","
+func DecodeCSVFieldDelim(value string) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["field_delim"] = value
+	}
+}
+
+// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value.
+//
+// value: If false, treats double quotation marks as regular
+// characters inside of the string fields (ignoring RFC 4180, Section 2,
+// Bullet 5).
+// If not specified, defaults to true
+func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["use_quote_delim"] = value
+	}
+}
+
+// DecodeCSVNaValue sets the optional na_value attribute to value.
+//
+// value: Additional string to recognize as NA/NaN.
+// If not specified, defaults to ""
+func DecodeCSVNaValue(value string) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["na_value"] = value
+	}
+}
+
+// DecodeCSVSelectCols sets the optional select_cols attribute to value.
+// If not specified, defaults to <>
+func DecodeCSVSelectCols(value []int64) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["select_cols"] = value
+	}
+}
+
+// Convert CSV records to tensors. Each column maps to one tensor.
+//
+// RFC 4180 format is expected for the CSV records.
+// (https://tools.ietf.org/html/rfc4180)
+// Note that we allow leading and trailing spaces with int or float field.
+//
+// Arguments:
+//	records: Each string is a record/row in the csv and all records should have
+// the same format.
+//	record_defaults: One tensor per column of the input record, with either a
+// scalar default value for that column or an empty vector if the column is
+// required.
+//
+// Returns Each tensor will have the same shape as records.
+func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeCSV",
+		Input: []tf.Input{
+			records, tf.OutputList(record_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("DecodeCSV", err)
+		return
+	}
+	return output
+}
+
+// Produces the max pool of the input tensor for quantized types.
+//
+// Arguments:
+//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	ksize: The size of the window for each dimension of the input tensor.
+// The length must be 4 to match the number of dimensions of the input.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor. The length must be 4 to match the number of dimensions of the input.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "QuantizedMaxPool",
+		Input: []tf.Input{
+			input, min_input, max_input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// RandomShuffleAttr is an optional argument to RandomShuffle.
+type RandomShuffleAttr func(optionalAttr)
+
+// RandomShuffleSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomShuffleSeed(value int64) RandomShuffleAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomShuffleSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomShuffleSeed2(value int64) RandomShuffleAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Randomly shuffles a tensor along its first dimension.
+//
+//   The tensor is shuffled along dimension 0, such that each `value[j]` is mapped
+//   to one and only one `output[i]`. For example, a mapping that might occur for a
+//   3x2 tensor is:
+//
+// ```
+// [[1, 2],       [[5, 6],
+//  [3, 4],  ==>   [1, 2],
+//  [5, 6]]        [3, 4]]
+// ```
+//
+// Arguments:
+//	value: The tensor to be shuffled.
+//
+// Returns A tensor of same shape and type as `value`, shuffled along its first
+// dimension.
+func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomShuffle",
+		Input: []tf.Input{
+			value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EnqueueTPUEmbeddingSparseBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseBatch.
+type EnqueueTPUEmbeddingSparseBatchAttr func(optionalAttr)
+
+// EnqueueTPUEmbeddingSparseBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. Should be >= 0 and less than the number
+// of TPU cores in the task on which the node is placed.
+// If not specified, defaults to -1
+func EnqueueTPUEmbeddingSparseBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseBatchAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// EnqueueTPUEmbeddingSparseBatchCombiners sets the optional combiners attribute to value.
+//
+// value: A list of string scalars, one for each embedding table that specify
+// how to normalize the embedding activations after weighted summation.
+// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
+// the sum of the weights be 0 for 'mean' or the sum of the squared weights be
+// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
+// all tables.
+// If not specified, defaults to <>
+func EnqueueTPUEmbeddingSparseBatchCombiners(value []string) EnqueueTPUEmbeddingSparseBatchAttr {
+	return func(m optionalAttr) {
+		m["combiners"] = value
+	}
+}
+
+// An op that enqueues TPUEmbedding input indices from a SparseTensor.
+//
+// This Op eases the porting of code that uses embedding_lookup_sparse(),
+// although some Python preprocessing of the SparseTensor arguments to
+// embedding_lookup_sparse() is required to produce the arguments to this Op,
+// since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training
+// step.
+//
+// The tensors at corresponding positions in the three input lists
+// must have the same shape, i.e. rank 1 with dim_size() equal to the total
+// number of lookups into the table described by the corresponding table_id.
+//
+// Arguments:
+//	sample_indices: A list of rank 1 Tensors specifying the training example and
+// feature to which the corresponding embedding_indices and aggregation_weights
+// values belong. sample_indices[i] must equal b * nf + f, where nf is the
+// number of features from the corresponding table, f is in [0, nf), and
+// b is in [0, batch size).
+//	embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+//	aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
+// (training example, feature) -- aggregation weights.
+//	mode_override: A string input that overrides the mode specified in the
+// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+//
+// Returns the created operation.
+func EnqueueTPUEmbeddingSparseBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingSparseBatchAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EnqueueTPUEmbeddingSparseBatch",
+		Input: []tf.Input{
+			tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
+type StatelessRandomNormalAttr func(optionalAttr)
+
+// StatelessRandomNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom values from a normal distribution.
+//
+// The generated values will have mean 0 and standard deviation 1.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessRandomNormal",
+		Input: []tf.Input{
+			shape, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// An Op to exchange data across TPU replicas.
+//
+// On each replica, the input is split into `split_count` blocks along
+// `split_dimension` and send to the other replicas given group_assignment. After
+// receiving `split_count` - 1 blocks from other replicas, we concatenate the
+// blocks along `concat_dimension` as the output.
+//
+// For example, suppose there are 2 TPU replicas:
+// replica 0 receives input: `[[A, B]]`
+// replica 1 receives input: `[[C, D]]`
+//
+// group_assignment=`[[0, 1]]`
+// concat_dimension=0
+// split_dimension=1
+// split_count=2
+//
+// replica 0's output: `[[A], [C]]`
+// replica 1's output: `[[B], [D]]`
+//
+// Arguments:
+//	input: The local input to the sum.
+//	group_assignment: An int32 tensor with shape
+// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
+// replica ids in the ith subgroup.
+//	concat_dimension: The dimension number to concatenate.
+//	split_dimension: The dimension number to split.
+//	split_count: The number of splits, this number must equal to the sub-group
+// size(group_assignment.get_shape()[1])
+//
+// Returns The exchanged result.
+func AllToAll(scope *Scope, input tf.Output, group_assignment tf.Output, concat_dimension int64, split_dimension int64, split_count int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"concat_dimension": concat_dimension, "split_dimension": split_dimension, "split_count": split_count}
+	opspec := tf.OpSpec{
+		Type: "AllToAll",
+		Input: []tf.Input{
+			input, group_assignment,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds a value to the current value of a variable.
+//
+// Any ReadVariableOp with a control dependency on this op is guaranteed to
+// see the incremented value or a subsequent newer one.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	value: the value by which the variable will be incremented.
+//
+// Returns the created operation.
+func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AssignAddVariableOp",
+		Input: []tf.Input{
+			resource, value,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Real-valued fast Fourier transform.
+//
+// Computes the 1-dimensional discrete Fourier transform of a real-valued signal
+// over the inner-most dimension of `input`.
+//
+// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the
+// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,
+// followed by the `fft_length / 2` positive-frequency terms.
+//
+// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
+//
+// Arguments:
+//	input: A float32 tensor.
+//	fft_length: An int32 tensor of shape [1]. The FFT length.
+//
+// Returns A complex64 tensor of the same rank as `input`. The inner-most
+//   dimension of `input` is replaced with the `fft_length / 2 + 1` unique
+//   frequency components of its 1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.rfft
+// @end_compatibility
+func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RFFT",
+		Input: []tf.Input{
+			input, fft_length,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParameters.
+type RetrieveTPUEmbeddingAdadeltaParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingAdadeltaParametersTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdadeltaParametersTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve Adadelta embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the Adadelta optimization algorithm.Parameter accumulators updated by the Adadelta optimization algorithm.Parameter updates updated by the Adadelta optimization algorithm.
+func RetrieveTPUEmbeddingAdadeltaParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingAdadeltaParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
 // UpperBoundAttr is an optional argument to UpperBound.
 type UpperBoundAttr func(optionalAttr)
 
@@ -21357,51 +24961,6 @@
 	return op.Output(0)
 }
 
-// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA.
-type ResourceApplyAdagradDAAttr func(optionalAttr)
-
-// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the proximal adagrad scheme.
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	gradient_accumulator: Should be from a Variable().
-//	gradient_squared_accumulator: Should be from a Variable().
-//	grad: The gradient.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	global_step: Training step number. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdagradDA",
-		Input: []tf.Input{
-			var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
 // SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse.
 type SparseReduceMaxSparseAttr func(optionalAttr)
 
@@ -21455,6 +25014,78 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Convert one or more images from HSV to RGB.
+//
+// Outputs a tensor of the same shape as the `images` tensor, containing the RGB
+// value of the pixels. The output is only well defined if the value in `images`
+// are in `[0,1]`.
+//
+// See `rgb_to_hsv` for a description of the HSV encoding.
+//
+// Arguments:
+//	images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
+//
+// Returns `images` converted to RGB.
+func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "HSVToRGB",
+		Input: []tf.Input{
+			images,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient of the sigmoid of `x` wrt its input.
+//
+// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and
+// `dy` is the corresponding input gradient.
+func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SigmoidGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that changes the batch size.
+//
+// Creates a dataset that changes the batch size of the dataset to current batch
+// size // num_workers.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the input dataset.
+//	num_workers: A scalar representing the number of workers to distribute this batch across. As
+// a result of this transformation the current batch size would end up being
+// divided  by this parameter.
+//
+//
+func ExperimentalRebatchDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalRebatchDataset",
+		Input: []tf.Input{
+			input_dataset, num_workers,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Creates a dataset that emits the outputs of `input_dataset` `count` times.
 //
 // Arguments:
@@ -21479,101 +25110,34 @@
 	return op.Output(0)
 }
 
-// Computes the gradient for the inverse of `x` wrt its input.
-//
-// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
-// is the corresponding input gradient.
-func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReciprocalGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA.
+type ResourceApplyAdagradDAAttr func(optionalAttr)
 
-// Returns the min of x and y (i.e. x < y ? x : y) element-wise.
+// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
 //
-// *NOTE*: `Minimum` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Minimum",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MfccAttr is an optional argument to Mfcc.
-type MfccAttr func(optionalAttr)
-
-// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value.
-//
-// value: The highest frequency to use when calculating the
-// ceptstrum.
-// If not specified, defaults to 4000
-func MfccUpperFrequencyLimit(value float32) MfccAttr {
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr {
 	return func(m optionalAttr) {
-		m["upper_frequency_limit"] = value
+		m["use_locking"] = value
 	}
 }
 
-// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value.
-//
-// value: The lowest frequency to use when calculating the
-// ceptstrum.
-// If not specified, defaults to 20
-func MfccLowerFrequencyLimit(value float32) MfccAttr {
-	return func(m optionalAttr) {
-		m["lower_frequency_limit"] = value
-	}
-}
-
-// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value.
-//
-// value: Resolution of the Mel bank used internally.
-// If not specified, defaults to 40
-func MfccFilterbankChannelCount(value int64) MfccAttr {
-	return func(m optionalAttr) {
-		m["filterbank_channel_count"] = value
-	}
-}
-
-// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value.
-//
-// value: How many output channels to produce per time slice.
-// If not specified, defaults to 13
-func MfccDctCoefficientCount(value int64) MfccAttr {
-	return func(m optionalAttr) {
-		m["dct_coefficient_count"] = value
-	}
-}
-
-// Transforms a spectrogram into a form that's useful for speech recognition.
-//
-// Mel Frequency Cepstral Coefficients are a way of representing audio data that's
-// been effective as an input feature for machine learning. They are created by
-// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the
-// higher frequencies that are less significant to the human ear. They have a long
-// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
-// is a good resource to learn more.
+// Update '*var' according to the proximal adagrad scheme.
 //
 // Arguments:
-//	spectrogram: Typically produced by the Spectrogram op, with magnitude_squared
-// set to true.
-//	sample_rate: How many samples per second the source audio used.
-func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) {
+//	var_: Should be from a Variable().
+//	gradient_accumulator: Should be from a Variable().
+//	gradient_squared_accumulator: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	global_step: Training step number. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -21582,9 +25146,1794 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Mfcc",
+		Type: "ResourceApplyAdagradDA",
 		Input: []tf.Input{
-			spectrogram, sample_rate,
+			var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Creates a TensorList which, when stacked, has the value of `tensor`.
+//
+// Each tensor in the result list corresponds to one row of the input tensor.
+//
+// tensor: The input tensor.
+// output_handle: The list.
+func TensorListFromTensor(scope *Scope, tensor tf.Output, element_shape tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListFromTensor",
+		Input: []tf.Input{
+			tensor, element_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ConfigureDistributedTPUAttr is an optional argument to ConfigureDistributedTPU.
+type ConfigureDistributedTPUAttr func(optionalAttr)
+
+// ConfigureDistributedTPUEmbeddingConfig sets the optional embedding_config attribute to value.
+//
+// value: Reserved. Do not use.
+// If not specified, defaults to ""
+func ConfigureDistributedTPUEmbeddingConfig(value string) ConfigureDistributedTPUAttr {
+	return func(m optionalAttr) {
+		m["embedding_config"] = value
+	}
+}
+
+// ConfigureDistributedTPUTpuEmbeddingConfig sets the optional tpu_embedding_config attribute to value.
+//
+// value: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that
+// describes the embedding lookups of the program.
+// If not specified, defaults to ""
+func ConfigureDistributedTPUTpuEmbeddingConfig(value string) ConfigureDistributedTPUAttr {
+	return func(m optionalAttr) {
+		m["tpu_embedding_config"] = value
+	}
+}
+
+// ConfigureDistributedTPUIsGlobalInit sets the optional is_global_init attribute to value.
+//
+// value: Reserved. Do not use.
+// If not specified, defaults to false
+func ConfigureDistributedTPUIsGlobalInit(value bool) ConfigureDistributedTPUAttr {
+	return func(m optionalAttr) {
+		m["is_global_init"] = value
+	}
+}
+
+// Sets up the centralized structures for a distributed TPU system.
+//
+// Returns A serialized tensorflow.tpu.TopologyProto that describes the TPU
+// topology.
+func ConfigureDistributedTPU(scope *Scope, optional ...ConfigureDistributedTPUAttr) (topology tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ConfigureDistributedTPU",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reshapes a quantized tensor as per the Reshape op.
+//
+// ```
+//
+// Arguments:
+//
+//	shape: Defines the shape of the output tensor.
+//	input_min: The minimum value of the input.
+//	input_max: The maximum value of the input.
+//
+// Returns This value is copied from input_min.This value is copied from input_max.
+func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedReshape",
+		Input: []tf.Input{
+			tensor, shape, input_min, input_max,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// PriorityQueueV2Attr is an optional argument to PriorityQueueV2.
+type PriorityQueueV2Attr func(optionalAttr)
+
+// PriorityQueueV2ComponentTypes sets the optional component_types attribute to value.
+//
+// value: The type of each component in a value.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func PriorityQueueV2ComponentTypes(value []tf.DataType) PriorityQueueV2Attr {
+	return func(m optionalAttr) {
+		m["component_types"] = value
+	}
+}
+
+// PriorityQueueV2Capacity sets the optional capacity attribute to value.
+//
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func PriorityQueueV2Capacity(value int64) PriorityQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// PriorityQueueV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func PriorityQueueV2Container(value string) PriorityQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// PriorityQueueV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func PriorityQueueV2SharedName(value string) PriorityQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that produces elements sorted by the first component value.
+//
+// Note that the PriorityQueue requires the first component of any element
+// to be a scalar int64, in addition to the other elements declared by
+// component_types.  Therefore calls to Enqueue and EnqueueMany (resp. Dequeue
+// and DequeueMany) on a PriorityQueue will all require (resp. output) one extra
+// entry in their input (resp. output) lists.
+//
+// Arguments:
+//	shapes: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types. If the length of
+// this attr is 0, the shapes of queue elements are not constrained, and
+// only one element may be dequeued at a time.
+//
+// Returns The handle to the queue.
+func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV2Attr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shapes": shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PriorityQueueV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent.
+type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr)
+
+// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, the subtraction will be protected by a lock;
+// otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Sparse update '*var' as FOBOS algorithm with fixed learning rate.
+//
+// That is for rows we have grad for, we update var as follows:
+// prox_v = var - alpha * grad
+// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	alpha: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//
+// Returns the created operation.
+func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyProximalGradientDescent",
+		Input: []tf.Input{
+			var_, alpha, l1, l2, grad, indices,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Check if the input matches the regex pattern.
+//
+// The input is a string tensor of any shape. The pattern is the
+// regular expression to be matched with every element of the input tensor.
+// The boolean values (True or False) of the output tensor indicate
+// if the input matches the regex pattern provided.
+//
+// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+//
+// Arguments:
+//	input: A string tensor of the text to be processed.
+//	pattern: The regular expression to match the input.
+//
+// Returns A bool tensor with the same shape as `input`.
+func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pattern": pattern}
+	opspec := tf.OpSpec{
+		Type: "StaticRegexFullMatch",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OutfeedDequeueAttr is an optional argument to OutfeedDequeue.
+type OutfeedDequeueAttr func(optionalAttr)
+
+// OutfeedDequeueDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op
+// is running on a TPU device, and >= 0 when the Op is running on the CPU
+// device.
+// If not specified, defaults to -1
+func OutfeedDequeueDeviceOrdinal(value int64) OutfeedDequeueAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// Retrieves a single tensor from the computation outfeed.
+//
+// This operation will block indefinitely until data is available.
+//
+// Arguments:
+//	dtype: The type of elements in the tensor.
+//	shape: The shape of the tensor.
+//
+// Returns A tensor that will be read from the device outfeed.
+func OutfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...OutfeedDequeueAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OutfeedDequeue",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RandomPoissonV2Attr is an optional argument to RandomPoissonV2.
+type RandomPoissonV2Attr func(optionalAttr)
+
+// RandomPoissonV2Seed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomPoissonV2Seed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// RandomPoissonV2Dtype sets the optional dtype attribute to value.
+// If not specified, defaults to DT_INT64
+func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs random values from the Poisson distribution(s) described by rate.
+//
+// This op uses two algorithms, depending on rate. If rate >= 10, then
+// the algorithm by Hormann is used to acquire samples via
+// transformation-rejection.
+// See http://www.sciencedirect.com/science/article/pii/0167668793909974.
+//
+// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform
+// random variables.
+// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer
+// Programming, Volume 2. Addison Wesley
+//
+// Arguments:
+//	shape: 1-D integer tensor. Shape of independent samples to draw from each
+// distribution described by the shape parameters given in rate.
+//	rate: A tensor in which each scalar is a "rate" parameter describing the
+// associated poisson distribution.
+//
+// Returns A tensor with shape `shape + shape(rate)`. Each slice
+// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
+// `rate[i0, i1, ...iN]`.
+func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomPoissonV2",
+		Input: []tf.Input{
+			shape, rate,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug.
+type RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve RMSProp embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the RMSProp optimization algorithm.Parameter ms updated by the RMSProp optimization algorithm.Parameter mom updated by the RMSProp optimization algorithm.Parameter gradient_accumulators updated by the RMSProp optimization algorithm.
+func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// Computes the gradient for the rsqrt of `x` wrt its input.
+//
+// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`
+// is the corresponding input gradient.
+func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RsqrtGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Encode audio data using the WAV file format.
+//
+// This operation will generate a string suitable to be saved out to create a .wav
+// audio file. It will be encoded in the 16-bit PCM format. It takes in float
+// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
+// that range.
+//
+// `audio` is a 2-D float Tensor of shape `[length, channels]`.
+// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
+//
+// Arguments:
+//	audio: 2-D with shape `[length, channels]`.
+//	sample_rate: Scalar containing the sample frequency.
+//
+// Returns 0-D. WAV-encoded file contents.
+func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodeWav",
+		Input: []tf.Input{
+			audio, sample_rate,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
+type ResourceApplyAdaMaxAttr func(optionalAttr)
+
+// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the AdaMax algorithm.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// v_t <- max(beta2 * v_{t-1}, abs(g))
+// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdaMax",
+		Input: []tf.Input{
+			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes atan of x element-wise.
+func Atan(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atan",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AssertAttr is an optional argument to Assert.
+type AssertAttr func(optionalAttr)
+
+// AssertSummarize sets the optional summarize attribute to value.
+//
+// value: Print this many entries of each tensor.
+// If not specified, defaults to 3
+func AssertSummarize(value int64) AssertAttr {
+	return func(m optionalAttr) {
+		m["summarize"] = value
+	}
+}
+
+// Asserts that the given condition is true.
+//
+// If `condition` evaluates to false, print the list of tensors in `data`.
+// `summarize` determines how many entries of the tensors to print.
+//
+// Arguments:
+//	condition: The condition to evaluate.
+//	data: The tensors to print out when condition is false.
+//
+// Returns the created operation.
+func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Assert",
+		Input: []tf.Input{
+			condition, tf.OutputList(data),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdagradParametersGradAccumDebug.
+type LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load Adagrad embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the Adagrad optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the Adagrad optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingAdagradParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, accumulators, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingFTRLParametersGradAccumDebug.
+type RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve FTRL embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the FTRL optimization algorithm.Parameter accumulators updated by the FTRL optimization algorithm.Parameter linears updated by the FTRL optimization algorithm.Parameter gradient_accumulators updated by the FTRL optimization algorithm.
+func RetrieveTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingFTRLParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// A dataset that splits the elements of its input into multiple elements.
+func ExperimentalUnbatchDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalUnbatchDataset",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StringFormatAttr is an optional argument to StringFormat.
+type StringFormatAttr func(optionalAttr)
+
+// StringFormatTemplate sets the optional template attribute to value.
+//
+// value: A string, the template to format tensor summaries into.
+// If not specified, defaults to "%s"
+func StringFormatTemplate(value string) StringFormatAttr {
+	return func(m optionalAttr) {
+		m["template"] = value
+	}
+}
+
+// StringFormatPlaceholder sets the optional placeholder attribute to value.
+//
+// value: A string, at each placeholder in the template a subsequent tensor summary will be inserted.
+// If not specified, defaults to "%s"
+func StringFormatPlaceholder(value string) StringFormatAttr {
+	return func(m optionalAttr) {
+		m["placeholder"] = value
+	}
+}
+
+// StringFormatSummarize sets the optional summarize attribute to value.
+//
+// value: When formatting the tensor summaries print the first and last summarize entries of each tensor dimension.
+// If not specified, defaults to 3
+func StringFormatSummarize(value int64) StringFormatAttr {
+	return func(m optionalAttr) {
+		m["summarize"] = value
+	}
+}
+
+// Formats a string template using a list of tensors.
+//
+// Formats a string template using a list of tensors, pretty-printing tensor summaries.
+//
+// Arguments:
+//	inputs: The list of tensors to format into the placeholder string.
+//
+// Returns = The resulting string scalar.
+func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringFormat",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns true if queue is closed.
+//
+// This operation returns true if the queue is closed and false if the queue
+// is open.
+//
+// Arguments:
+//	handle: The handle to a queue.
+func QueueIsClosedV2(scope *Scope, handle tf.Output) (is_closed tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueIsClosedV2",
+		Input: []tf.Input{
+			handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes inverse hyperbolic tangent of x element-wise.
+func Atanh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atanh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the reverse mode backpropagated gradient of the Cholesky algorithm.
+//
+// For an explanation see "Differentiation of the Cholesky algorithm" by
+// Iain Murray http://arxiv.org/abs/1602.07527.
+//
+// Arguments:
+//	l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`.
+// Algorithm depends only on lower triangular part of the innermost matrices of
+// this tensor.
+//	grad: df/dl where f is some scalar function. Shape is `[..., M, M]`.
+// Algorithm depends only on lower triangular part of the innermost matrices of
+// this tensor.
+//
+// Returns Symmetrized version of df/dA . Shape is `[..., M, M]`
+func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "CholeskyGrad",
+		Input: []tf.Input{
+			l, grad,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Assigns a new value to a variable.
+//
+// Any ReadVariableOp with a control dependency on this op is guaranteed to return
+// this value or a subsequent newer value of the variable.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	value: the value to set the new tensor to use.
+//
+// Returns the created operation.
+func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AssignVariableOp",
+		Input: []tf.Input{
+			resource, value,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns a tensor of ones with the same shape and type as x.
+//
+// Arguments:
+//	x: a tensor of type T.
+//
+// Returns a tensor of the same shape and type as x but filled with ones.
+func OnesLike(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "OnesLike",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// The gradient of SparseFillEmptyRows.
+//
+// Takes vectors reverse_index_map, shaped `[N]`, and grad_values,
+// shaped `[N_full]`, where `N_full >= N` and copies data into either
+// `d_values` or `d_default_value`.  Here `d_values` is shaped `[N]` and
+// `d_default_value` is a scalar.
+//
+//   d_values[j] = grad_values[reverse_index_map[j]]
+//   d_default_value = sum_{k : 0 .. N_full - 1} (
+//      grad_values[k] * 1{k not in reverse_index_map})
+//
+// Arguments:
+//	reverse_index_map: 1-D.  The reverse index map from SparseFillEmptyRows.
+//	grad_values: 1-D.  The gradients from backprop.
+//
+// Returns 1-D.  The backprop into values.0-D.  The backprop into default_value.
+func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseFillEmptyRowsGrad",
+		Input: []tf.Input{
+			reverse_index_map, grad_values,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Creates a dataset that zips together `input_datasets`.
+func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ZipDataset",
+		Input: []tf.Input{
+			tf.OutputList(input_datasets),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingAdagradParametersAttr is an optional argument to LoadTPUEmbeddingAdagradParameters.
+type LoadTPUEmbeddingAdagradParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingAdagradParametersTableId(value int64) LoadTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdagradParametersTableName(value string) LoadTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load Adagrad embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the Adagrad optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingAdagradParameters",
+		Input: []tf.Input{
+			parameters, accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Strip leading and trailing whitespaces from the Tensor.
+//
+// Arguments:
+//	input: A string `Tensor` of any shape.
+//
+// Returns A string `Tensor` of the same shape as the input.
+func StringStrip(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "StringStrip",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Converts each string in the input Tensor to its hash mod by a number of buckets.
+//
+// The hash function is deterministic on the content of the string within the
+// process. The hash function is a keyed hash function, where attribute `key`
+// defines the key of the hash function. `key` is an array of 2 elements.
+//
+// A strong hash is important when inputs may be malicious, e.g. URLs with
+// additional components. Adversaries could try to make their inputs hash to the
+// same bucket for a denial-of-service attack or to skew the results. A strong
+// hash prevents this by making it difficult, if not infeasible, to compute inputs
+// that hash to the same bucket. This comes at a cost of roughly 4x higher compute
+// time than `tf.string_to_hash_bucket_fast`.
+//
+// Arguments:
+//	input: The strings to assign a hash bucket.
+//	num_buckets: The number of buckets.
+//	key: The key for the keyed hash function passed as a list of two uint64
+// elements.
+//
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key}
+	opspec := tf.OpSpec{
+		Type: "StringToHashBucketStrong",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StringLengthAttr is an optional argument to StringLength.
+type StringLengthAttr func(optionalAttr)
+
+// StringLengthUnit sets the optional unit attribute to value.
+//
+// value: The unit that is counted to compute string length.  One of: `"BYTE"` (for
+// the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8
+// encoded Unicode code points in each string).  Results are undefined
+// if `unit=UTF8_CHAR` and the `input` strings do not contain structurally
+// valid UTF-8.
+// If not specified, defaults to "BYTE"
+func StringLengthUnit(value string) StringLengthAttr {
+	return func(m optionalAttr) {
+		m["unit"] = value
+	}
+}
+
+// String lengths of `input`.
+//
+// Computes the length of each string given in the input tensor.
+//
+// Arguments:
+//	input: The string for which to compute the length.
+//
+// Returns Integer tensor that has the same shape as `input`. The output contains the
+// element-wise string lengths of `input`.
+func StringLength(scope *Scope, input tf.Output, optional ...StringLengthAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringLength",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Performs gradient updates of embedding tables.
+//
+// Arguments:
+//	inputs: A TensorList of gradients with which to update embedding tables.
+// This argument has the same length and shapes as the return value of
+// RecvTPUEmbeddingActivations, but contains gradients of the model's loss
+// with respect to the embedding activations. The embedding tables are updated
+// from these gradients via the optimizer specified in the TPU embedding
+// configuration given to tpu.initialize_system.
+//	learning_rates: A TensorList of float32 scalars, one for each dynamic learning
+// rate tag: see the comments in
+// //third_party/tensorflow/core/protobuf/tpu/optimization_parameters.proto.
+// Multiple tables can share the same dynamic learning rate tag as specified
+// in the configuration. If the learning rates for all tables are constant,
+// this list should be empty.
+//	config: Serialized TPUEmbeddingConfiguration proto.
+//
+// Returns the created operation.
+func SendTPUEmbeddingGradients(scope *Scope, inputs []tf.Output, learning_rates []tf.Output, config string) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"config": config}
+	opspec := tf.OpSpec{
+		Type: "SendTPUEmbeddingGradients",
+		Input: []tf.Input{
+			tf.OutputList(inputs), tf.OutputList(learning_rates),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes numerical negative value element-wise.
+//
+// I.e., \\(y = -x\\).
+func Neg(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Neg",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Receives a tensor value broadcast from another device.
+func CollectiveBcastRecv(scope *Scope, T tf.DataType, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"T": T, "group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
+	opspec := tf.OpSpec{
+		Type: "CollectiveBcastRecv",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Decode web-safe base64-encoded strings.
+//
+// Input may or may not have padding at the end. See EncodeBase64 for padding.
+// Web-safe means that input must use - and _ instead of + and /.
+//
+// Arguments:
+//	input: Base64 strings to decode.
+//
+// Returns Decoded strings.
+func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeBase64",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SubstrAttr is an optional argument to Substr.
+type SubstrAttr func(optionalAttr)
+
+// SubstrUnit sets the optional unit attribute to value.
+//
+// value: The unit that is used to create the substring.  One of: `"BYTE"` (for
+// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8
+// encoded Unicode code points).  The default is `"BYTE"`. Results are undefined if
+// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid
+// UTF-8.
+// If not specified, defaults to "BYTE"
+func SubstrUnit(value string) SubstrAttr {
+	return func(m optionalAttr) {
+		m["unit"] = value
+	}
+}
+
+// Return substrings from `Tensor` of strings.
+//
+// For each string in the input `Tensor`, creates a substring starting at index
+// `pos` with a total length of `len`.
+//
+// If `len` defines a substring that would extend beyond the length of the input
+// string, then as many characters as possible are used.
+//
+// A negative `pos` indicates distance within the string backwards from the end.
+//
+// If `pos` specifies an index which is out of range for any of the input strings,
+// then an `InvalidArgumentError` is thrown.
+//
+// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
+// Op creation.
+//
+// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about
+// broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+//
+// ---
+//
+// Examples
+//
+// Using scalar `pos` and `len`:
+//
+// ```python
+// input = [b'Hello', b'World']
+// position = 1
+// length = 3
+//
+// output = [b'ell', b'orl']
+// ```
+//
+// Using `pos` and `len` with same shape as `input`:
+//
+// ```python
+// input = [[b'ten', b'eleven', b'twelve'],
+//          [b'thirteen', b'fourteen', b'fifteen'],
+//          [b'sixteen', b'seventeen', b'eighteen']]
+// position = [[1, 2, 3],
+//             [1, 2, 3],
+//             [1, 2, 3]]
+// length =   [[2, 3, 4],
+//             [4, 3, 2],
+//             [5, 5, 5]]
+//
+// output = [[b'en', b'eve', b'lve'],
+//           [b'hirt', b'urt', b'te'],
+//           [b'ixtee', b'vente', b'hteen']]
+// ```
+//
+// Broadcasting `pos` and `len` onto `input`:
+//
+// ```
+// input = [[b'ten', b'eleven', b'twelve'],
+//          [b'thirteen', b'fourteen', b'fifteen'],
+//          [b'sixteen', b'seventeen', b'eighteen'],
+//          [b'nineteen', b'twenty', b'twentyone']]
+// position = [1, 2, 3]
+// length =   [1, 2, 3]
+//
+// output = [[b'e', b'ev', b'lve'],
+//           [b'h', b'ur', b'tee'],
+//           [b'i', b've', b'hte'],
+//           [b'i', b'en', b'nty']]
+// ```
+//
+// Broadcasting `input` onto `pos` and `len`:
+//
+// ```
+// input = b'thirteen'
+// position = [1, 5, 7]
+// length =   [3, 2, 1]
+//
+// output = [b'hir', b'ee', b'n']
+// ```
+//
+// Arguments:
+//	input: Tensor of strings
+//	pos: Scalar defining the position of first character in each substring
+//	len: Scalar defining the number of characters to include in each substring
+//
+// Returns Tensor of substrings
+func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Substr",
+		Input: []tf.Input{
+			input, pos, len,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Exits the current frame to its parent frame.
+//
+// Exit makes its input `data` available to the parent frame.
+//
+// Arguments:
+//	data: The tensor to be made available to the parent frame.
+//
+// Returns The same tensor as `data`.
+func Exit(scope *Scope, data tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Exit",
+		Input: []tf.Input{
+			data,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParameters.
+type RetrieveTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingProximalAdagradParametersTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingProximalAdagradParametersTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve proximal Adagrad embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the proximal Adagrad optimization algorithm.Parameter accumulators updated by the proximal Adagrad optimization algorithm.
+func RetrieveTPUEmbeddingProximalAdagradParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersAttr) (parameters tf.Output, accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingProximalAdagradParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Produce a string tensor that encodes the state of a Reader.
+//
+// Not all Readers support being serialized, so this can produce an
+// Unimplemented error.
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderSerializeStateV2",
+		Input: []tf.Input{
+			reader_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the number of tensors in the input tensor list.
+//
+// input_handle: the input list
+// length: the number of tensors in the list
+func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListLength",
+		Input: []tf.Input{
+			input_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset with a range of values. Corresponds to python's xrange.
+//
+// Arguments:
+//	start: corresponds to start in python's xrange().
+//	stop: corresponds to stop in python's xrange().
+//	step: corresponds to step in python's xrange().
+//
+//
+func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "RangeDataset",
+		Input: []tf.Input{
+			start, stop, step,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes inverse hyperbolic sine of x element-wise.
+func Asinh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Asinh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// UnicodeTranscodeAttr is an optional argument to UnicodeTranscode.
+type UnicodeTranscodeAttr func(optionalAttr)
+
+// UnicodeTranscodeErrors sets the optional errors attribute to value.
+//
+// value: Error handling policy when there is invalid formatting found in the input.
+// The value of 'strict' will cause the operation to produce a InvalidArgument
+// error on any invalid input formatting. A value of 'replace' (the default) will
+// cause the operation to replace any invalid formatting in the input with the
+// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
+// skip any invalid formatting in the input and produce no corresponding output
+// character.
+// If not specified, defaults to "replace"
+func UnicodeTranscodeErrors(value string) UnicodeTranscodeAttr {
+	return func(m optionalAttr) {
+		m["errors"] = value
+	}
+}
+
+// UnicodeTranscodeReplacementChar sets the optional replacement_char attribute to value.
+//
+// value: The replacement character codepoint to be used in place of any invalid
+// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+// be used. The default value is the default unicode replacement character is
+// 0xFFFD or U+65533.)
+//
+// Note that for UTF-8, passing a replacement character expressible in 1 byte, such
+// as ' ', will preserve string alignment to the source since invalid bytes will be
+// replaced with a 1-byte replacement. For UTF-16-BE and UTF-16-LE, any 1 or 2 byte
+// replacement character will preserve byte alignment to the source.
+// If not specified, defaults to 65533
+func UnicodeTranscodeReplacementChar(value int64) UnicodeTranscodeAttr {
+	return func(m optionalAttr) {
+		m["replacement_char"] = value
+	}
+}
+
+// UnicodeTranscodeReplaceControlCharacters sets the optional replace_control_characters attribute to value.
+//
+// value: Whether to replace the C0 control characters (00-1F) with the
+// `replacement_char`. Default is false.
+// If not specified, defaults to false
+func UnicodeTranscodeReplaceControlCharacters(value bool) UnicodeTranscodeAttr {
+	return func(m optionalAttr) {
+		m["replace_control_characters"] = value
+	}
+}
+
+// Transcode the input text from a source encoding to a destination encoding.
+//
+// The input is a string tensor of any shape. The output is a string tensor of
+// the same shape containing the transcoded strings. Output strings are always
+// valid unicode. If the input contains invalid encoding positions, the
+// `errors` attribute sets the policy for how to deal with them. If the default
+// error-handling policy is used, invalid formatting will be substituted in the
+// output by the `replacement_char`. If the errors policy is to `ignore`, any
+// invalid encoding positions in the input are skipped and not included in the
+// output. If it set to `strict` then any invalid formatting will result in an
+// InvalidArgument error.
+//
+// This operation can be used with `output_encoding = input_encoding` to enforce
+// correct formatting for inputs even if they are already in the desired encoding.
+//
+// If the input is prefixed by a Byte Order Mark needed to determine encoding
+// (e.g. if the encoding is UTF-16 and the BOM indicates big-endian), then that
+// BOM will be consumed and not emitted into the output. If the input encoding
+// is marked with an explicit endianness (e.g. UTF-16-BE), then the BOM is
+// interpreted as a non-breaking-space and is preserved in the output (including
+// always for UTF-8).
+//
+// The end result is that if the input is marked as an explicit endianness the
+// transcoding is faithful to all codepoints in the source. If it is not marked
+// with an explicit endianness, the BOM is not considered part of the string itself
+// but as metadata, and so is not preserved in the output.
+//
+// Arguments:
+//	input: The text to be processed. Can have any shape.
+//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
+// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+//	output_encoding: The unicode encoding to use in the output. Must be one of
+// `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. Multi-byte encodings will be big-endian.
+//
+// Returns A string tensor containing unicode text encoded using `output_encoding`.
+func UnicodeTranscode(scope *Scope, input tf.Output, input_encoding string, output_encoding string, optional ...UnicodeTranscodeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"input_encoding": input_encoding, "output_encoding": output_encoding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeTranscode",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// UnicodeDecodeAttr is an optional argument to UnicodeDecode.
+type UnicodeDecodeAttr func(optionalAttr)
+
+// UnicodeDecodeErrors sets the optional errors attribute to value.
+//
+// value: Error handling policy when there is invalid formatting found in the input.
+// The value of 'strict' will cause the operation to produce a InvalidArgument
+// error on any invalid input formatting. A value of 'replace' (the default) will
+// cause the operation to replace any invalid formatting in the input with the
+// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
+// skip any invalid formatting in the input and produce no corresponding output
+// character.
+// If not specified, defaults to "replace"
+func UnicodeDecodeErrors(value string) UnicodeDecodeAttr {
+	return func(m optionalAttr) {
+		m["errors"] = value
+	}
+}
+
+// UnicodeDecodeReplacementChar sets the optional replacement_char attribute to value.
+//
+// value: The replacement character codepoint to be used in place of any invalid
+// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+// be used. The default value is the default unicode replacement character is
+// 0xFFFD or U+65533.)
+// If not specified, defaults to 65533
+func UnicodeDecodeReplacementChar(value int64) UnicodeDecodeAttr {
+	return func(m optionalAttr) {
+		m["replacement_char"] = value
+	}
+}
+
+// UnicodeDecodeReplaceControlCharacters sets the optional replace_control_characters attribute to value.
+//
+// value: Whether to replace the C0 control characters (00-1F) with the
+// `replacement_char`. Default is false.
+// If not specified, defaults to false
+func UnicodeDecodeReplaceControlCharacters(value bool) UnicodeDecodeAttr {
+	return func(m optionalAttr) {
+		m["replace_control_characters"] = value
+	}
+}
+
+// Decodes each string in `input` into a sequence of Unicode code points.
+//
+// The character codepoints for all strings are returned using a single vector
+// `char_values`, with strings expanded to characters in row-major order.
+//
+// The `row_splits` tensor indicates where the codepoints for
+// each input string begin and end within the `char_values` tensor.
+// In particular, the values for the `i`th
+// string (in row-major order) are stored in the slice
+// `[row_splits[i]:row_splits[i+1]]`. Thus:
+//
+// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
+//   character in the `i`th string (in row-major order).
+// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
+//   string (in row-major order).
+//
+// Arguments:
+//	input: The text to be decoded. Can have any shape. Note that the output is flattened
+// to a vector of char values.
+//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
+// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+//
+// Returns A 1D int32 tensor containing the row splits.A 1D int32 Tensor containing the decoded codepoints.
+func UnicodeDecode(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeAttr) (row_splits tf.Output, char_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"input_encoding": input_encoding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeDecode",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Adds up a SparseTensor and a dense Tensor, using these special rules:
+//
+// (1) Broadcasts the dense side to have the same shape as the sparse side, if
+//     eligible;
+// (2) Then, only the dense values pointed to by the indices of the SparseTensor
+//     participate in the cwise addition.
+//
+// By these rules, the result is a logical SparseTensor with exactly the same
+// indices and shape, but possibly with different non-zero values.  The output of
+// this Op is the resultant non-zero values.
+//
+// Arguments:
+//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//	dense: `R`-D.  The dense Tensor operand.
+//
+// Returns 1-D.  The `N` values that are operated on.
+func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseDenseCwiseAdd",
+		Input: []tf.Input{
+			sp_indices, sp_values, sp_shape, dense,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp.
+type ResourceApplyRMSPropAttr func(optionalAttr)
+
+// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, ms, and mom tensors is protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the RMSProp algorithm.
+//
+// Note that in dense implementation of this algorithm, ms and mom will
+// update even if the grad is zero, but in this sparse implementation, ms
+// and mom will not update in iterations during which the grad is zero.
+//
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+//
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+// var <- var - mom
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
+//
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyRMSProp",
+		Input: []tf.Input{
+			var_, ms, mom, lr, rho, momentum, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
+type StatelessTruncatedNormalAttr func(optionalAttr)
+
+// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom values from a truncated normal distribution.
+//
+// The generated values follow a normal distribution with mean 0 and standard
+// deviation 1, except that values whose magnitude is more than 2 standard
+// deviations from the mean are dropped and re-picked.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessTruncatedNormal",
+		Input: []tf.Input{
+			shape, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RestoreSliceAttr is an optional argument to RestoreSlice.
+type RestoreSliceAttr func(optionalAttr)
+
+// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
+//
+// value: Index of file to open first if multiple files match
+// `file_pattern`. See the documentation for `Restore`.
+// If not specified, defaults to -1
+func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
+	return func(m optionalAttr) {
+		m["preferred_shard"] = value
+	}
+}
+
+// Restores a tensor from checkpoint files.
+//
+// This is like `Restore` except that restored tensor can be listed as filling
+// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
+// larger tensor and the slice that the restored tensor covers.
+//
+// The `shape_and_slice` input has the same format as the
+// elements of the `shapes_and_slices` input of the `SaveSlices` op.
+//
+// Arguments:
+//	file_pattern: Must have a single element. The pattern of the files from
+// which we read the tensor.
+//	tensor_name: Must have a single element. The name of the tensor to be
+// restored.
+//	shape_and_slice: Scalar. The shapes and slice specifications to use when
+// restoring a tensors.
+//	dt: The type of the tensor to be restored.
+//
+// Returns The restored tensor.
+func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dt": dt}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RestoreSlice",
+		Input: []tf.Input{
+			file_pattern, tensor_name, shape_and_slice,
 		},
 		Attrs: attrs,
 	}
@@ -21622,6 +26971,190 @@
 	return op.Output(0)
 }
 
+// Convert the quantized 'input' tensor into a lower-precision 'output', using the
+//
+// actual distribution of the values to maximize the usage of the lower bit depth
+// and adjusting the output min and max ranges accordingly.
+//
+// [input_min, input_max] are scalar floats that specify the range for the float
+// interpretation of the 'input' data. For example, if input_min is -1.0f and
+// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+//
+// This operator tries to squeeze as much precision as possible into an output with
+// a lower bit depth by calculating the actual min and max values found in the
+// data. For example, maybe that quint16 input has no values lower than 16,384 and
+// none higher than 49,152. That means only half the range is actually needed, all
+// the float interpretations are between -0.5f and 0.5f, so if we want to compress
+// the data into a quint8 output, we can use that range rather than the theoretical
+// -1.0f to 1.0f that is suggested by the input min and max.
+//
+// In practice, this is most useful for taking output from operations like
+// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
+// may have large potential output ranges, but in practice have a distribution of
+// input values that only uses a small fraction of the possible range. By feeding
+// that output into this operator, we can reduce it from 32 bits down to 8 with
+// minimal loss of accuracy.
+//
+// Arguments:
+//
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//	out_type: The type of the output. Should be a lower bit depth than Tinput.
+//
+// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
+func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "QuantizeDownAndShrinkRange",
+		Input: []tf.Input{
+			input, input_min, input_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// RandomGammaAttr is an optional argument to RandomGamma.
+type RandomGammaAttr func(optionalAttr)
+
+// RandomGammaSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomGammaSeed(value int64) RandomGammaAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomGammaSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomGammaSeed2(value int64) RandomGammaAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Outputs random values from the Gamma distribution(s) described by alpha.
+//
+// This op uses the algorithm by Marsaglia et al. to acquire samples via
+// transformation-rejection from pairs of uniform and normal random variables.
+// See http://dl.acm.org/citation.cfm?id=358414
+//
+// Arguments:
+//	shape: 1-D integer tensor. Shape of independent samples to draw from each
+// distribution described by the shape parameters given in alpha.
+//	alpha: A tensor in which each scalar is a "shape" parameter describing the
+// associated gamma distribution.
+//
+// Returns A tensor with shape `shape + shape(alpha)`. Each slice
+// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
+// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha.
+func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomGamma",
+		Input: []tf.Input{
+			shape, alpha,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceScatterNdSubAttr is an optional argument to ResourceScatterNdSub.
+type ResourceScatterNdSubAttr func(optionalAttr)
+
+// ResourceScatterNdSubUseLocking sets the optional use_locking attribute to value.
+//
+// value: An optional bool. Defaults to True. If True, the assignment will
+// be protected by a lock; otherwise the behavior is undefined,
+// but may exhibit less contention.
+// If not specified, defaults to true
+func ResourceScatterNdSubUseLocking(value bool) ResourceScatterNdSubAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Applies sparse subtraction to individual values or slices in a Variable.
+//
+// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `ref`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+// dimension of `ref`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// ```
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]
+// ```
+//
+// For example, say we want to subtract 4 scattered elements from a rank-1 tensor
+// with 8 elements. In Python, that subtraction would look like this:
+//
+// ```python
+// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
+// indices = tf.constant([[4], [3], [1], [7]])
+// updates = tf.constant([9, 10, 11, 12])
+// sub = tf.scatter_nd_sub(ref, indices, updates)
+// with tf.Session() as sess:
+//   print sess.run(sub)
+// ```
+//
+// The resulting update to ref would look like this:
+//
+//     [1, -9, 3, -6, -4, 6, 7, -4]
+//
+// See `tf.scatter_nd` for more details about how to make updates to
+// slices.
+//
+// Arguments:
+//	ref: A resource handle. Must be from a VarHandleOp.
+//	indices: A Tensor. Must be one of the following types: int32, int64.
+// A tensor of indices into ref.
+//	updates: A Tensor. Must have the same type as ref. A tensor of
+// values to add to ref.
+//
+// Returns the created operation.
+func ResourceScatterNdSub(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdSubAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterNdSub",
+		Input: []tf.Input{
+			ref, indices, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Outputs deterministic pseudorandom random integers from a uniform distribution.
 //
 // The generated values follow a uniform distribution in the range `[minval, maxval)`.
@@ -21649,6 +27182,1208 @@
 	return op.Output(0)
 }
 
+// QuantizedConv2DAttr is an optional argument to QuantizedConv2D.
+type QuantizedConv2DAttr func(optionalAttr)
+
+// QuantizedConv2DOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// QuantizedConv2DDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes a 2D convolution given quantized 4D input and filter tensors.
+//
+// The inputs are quantized tensors where the lowest value represents the real
+// number of the associated minimum, and the highest represents the maximum.
+// This means that you can only interpret the quantized output in the same way, by
+// taking the returned minimum and maximum values into account.
+//
+// Arguments:
+//
+//	filter: filter's input_depth dimension must match input's depth dimensions.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	min_filter: The float value that the lowest quantized filter value represents.
+//	max_filter: The float value that the highest quantized filter value represents.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedConv2D",
+		Input: []tf.Input{
+			input, filter, min_input, max_input, min_filter, max_filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// ResourceGatherAttr is an optional argument to ResourceGather.
+type ResourceGatherAttr func(optionalAttr)
+
+// ResourceGatherValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func ResourceGatherValidateIndices(value bool) ResourceGatherAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Gather slices from the variable pointed to by `resource` according to `indices`.
+//
+// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
+//
+// ```python
+//     # Scalar indices
+//     output[:, ..., :] = params[indices, :, ... :]
+//
+//     # Vector indices
+//     output[i, :, ..., :] = params[indices[i], :, ... :]
+//
+//     # Higher rank indices
+//     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
+// ```
+func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceGather",
+		Input: []tf.Input{
+			resource, indices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StatelessMultinomialAttr is an optional argument to StatelessMultinomial.
+type StatelessMultinomialAttr func(optionalAttr)
+
+// StatelessMultinomialOutputDtype sets the optional output_dtype attribute to value.
+// If not specified, defaults to DT_INT64
+func StatelessMultinomialOutputDtype(value tf.DataType) StatelessMultinomialAttr {
+	return func(m optionalAttr) {
+		m["output_dtype"] = value
+	}
+}
+
+// Draws samples from a multinomial distribution.
+//
+// Arguments:
+//	logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`
+// represents the unnormalized log probabilities for all classes.
+//	num_samples: 0-D.  Number of independent samples to draw for each row slice.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns 2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`
+// contains the drawn class labels with range `[0, num_classes)`.
+func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, seed tf.Output, optional ...StatelessMultinomialAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessMultinomial",
+		Input: []tf.Input{
+			logits, num_samples, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns a batched matrix tensor with new batched diagonal values.
+//
+// Given `input` and `diagonal`, this operation returns a tensor with the
+// same shape and values as `input`, except for the main diagonal of the
+// innermost matrices.  These will be overwritten by the values in `diagonal`.
+//
+// The output is computed as follows:
+//
+// Assume `input` has `k+1` dimensions `[I, J, K, ..., M, N]` and `diagonal` has
+// `k` dimensions `[I, J, K, ..., min(M, N)]`.  Then the output is a
+// tensor of rank `k+1` with dimensions `[I, J, K, ..., M, N]` where:
+//
+//   * `output[i, j, k, ..., m, n] = diagonal[i, j, k, ..., n]` for `m == n`.
+//   * `output[i, j, k, ..., m, n] = input[i, j, k, ..., m, n]` for `m != n`.
+//
+// Arguments:
+//	input: Rank `k+1`, where `k >= 1`.
+//	diagonal: Rank `k`, where `k >= 1`.
+//
+// Returns Rank `k+1`, with `output.shape = input.shape`.
+func MatrixSetDiag(scope *Scope, input tf.Output, diagonal tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixSetDiag",
+		Input: []tf.Input{
+			input, diagonal,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the element-wise max of two SparseTensors.
+//
+// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
+//
+// Arguments:
+//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, in the canonical lexicographic ordering.
+//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
+//	a_shape: 1-D.  Shape of the input SparseTensor.
+//	b_indices: counterpart to `a_indices` for the other operand.
+//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
+//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
+//
+// Returns 2-D.  The indices of the output SparseTensor.1-D.  The values of the output SparseTensor.
+func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSparseMaximum",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// List of the given size with empty elements.
+//
+// element_shape: the shape of the future elements of the list
+// num_elements: the number of elements to reserve
+// handle: the output list
+// element_dtype: the desired type of elements in the list.
+func TensorListReserve(scope *Scope, element_shape tf.Output, num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorListReserve",
+		Input: []tf.Input{
+			element_shape, num_elements,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to LoadTPUEmbeddingMDLAdagradLightParameters.
+type LoadTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingMDLAdagradLightParametersTableId(value int64) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMDLAdagradLightParametersTableName(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load MDL Adagrad Light embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the MDL Adagrad Light optimization algorithm.
+//	accumulators: Value of accumulators used in the MDL Adagrad Light optimization algorithm.
+//	weights: Value of weights used in the MDL Adagrad Light optimization algorithm.
+//	benefits: Value of benefits used in the MDL Adagrad Light optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingMDLAdagradLightParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMDLAdagradLightParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingMDLAdagradLightParameters",
+		Input: []tf.Input{
+			parameters, accumulators, weights, benefits,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the gradient for the inverse of `x` wrt its input.
+//
+// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
+// is the corresponding input gradient.
+func InvGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "InvGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reduces sparse updates into the variable referenced by `resource` using the `min` operation.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] = min(ref[indices, ...], updates[...])
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] = min(ref[indices[i], ...], updates[i, ...])
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] = min(ref[indices[i, ..., j], ...], updates[i, ..., j, ...])
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions are combined.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterMin(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterMin",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Elementwise computes the bitwise OR of `x` and `y`.
+//
+// The result will have those bits set, that are set in `x`, `y` or both. The
+// computation is performed on the underlying representations of `x` and `y`.
+func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BitwiseOr",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MatrixSolveLsAttr is an optional argument to MatrixSolveLs.
+type MatrixSolveLsAttr func(optionalAttr)
+
+// MatrixSolveLsFast sets the optional fast attribute to value.
+// If not specified, defaults to true
+func MatrixSolveLsFast(value bool) MatrixSolveLsAttr {
+	return func(m optionalAttr) {
+		m["fast"] = value
+	}
+}
+
+// Solves one or more linear least-squares problems.
+//
+// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same
+// type as `matrix` and shape `[..., M, K]`.
+// The output is a tensor shape `[..., N, K]` where each output matrix solves
+// each of the equations
+// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]`
+// in the least squares sense.
+//
+// We use the following notation for (complex) matrix and right-hand sides
+// in the batch:
+//
+// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\),
+// `rhs`=\\(B  \in \mathbb{C}^{m \times k}\\),
+// `output`=\\(X  \in \mathbb{C}^{n \times k}\\),
+// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\).
+//
+// If `fast` is `True`, then the solution is computed by solving the normal
+// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then
+// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares
+// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\).
+// If \\(m \lt n\\) then `output` is computed as
+// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the
+// minimum-norm solution to the under-determined linear system, i.e.
+// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\),
+// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable
+// when \\(A\\) is numerically full rank and has a condition number
+// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is
+// sufficiently large.
+//
+// If `fast` is `False` an algorithm based on the numerically robust complete
+// orthogonal decomposition is used. This computes the minimum-norm
+// least-squares solution, even when \\(A\\) is rank deficient. This path is
+// typically 6-7 times slower than the fast path. If `fast` is `False` then
+// `l2_regularizer` is ignored.
+//
+// Arguments:
+//	matrix: Shape is `[..., M, N]`.
+//	rhs: Shape is `[..., M, K]`.
+//	l2_regularizer: Scalar tensor.
+//
+// @compatibility(numpy)
+// Equivalent to np.linalg.lstsq
+// @end_compatibility
+//
+// Returns Shape is `[..., N, K]`.
+func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixSolveLs",
+		Input: []tf.Input{
+			matrix, rhs, l2_regularizer,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Interleave the values from the `data` tensors into a single tensor.
+//
+// Builds a merged tensor such that
+//
+// ```python
+//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
+// ```
+//
+// For example, if each `indices[m]` is scalar or vector, we have
+//
+// ```python
+//     # Scalar indices:
+//     merged[indices[m], ...] = data[m][...]
+//
+//     # Vector indices:
+//     merged[indices[m][i], ...] = data[m][i, ...]
+// ```
+//
+// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
+// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
+// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
+// `constant`, the output shape is
+//
+//     merged.shape = [max(indices)] + constant
+//
+// Values are merged in order, so if an index appears in both `indices[m][i]` and
+// `indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the
+// merged result. If you do not need this guarantee, ParallelDynamicStitch might
+// perform better on some devices.
+//
+// For example:
+//
+// ```python
+//     indices[0] = 6
+//     indices[1] = [4, 1]
+//     indices[2] = [[5, 2], [0, 3]]
+//     data[0] = [61, 62]
+//     data[1] = [[41, 42], [11, 12]]
+//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
+//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
+//               [51, 52], [61, 62]]
+// ```
+//
+// This method can be used to merge partitions created by `dynamic_partition`
+// as illustrated on the following example:
+//
+// ```python
+//     # Apply function (increments x_i) on elements for which a certain condition
+//     # apply (x_i != -1 in this example).
+//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
+//     condition_mask=tf.not_equal(x,tf.constant(-1.))
+//     partitioned_data = tf.dynamic_partition(
+//         x, tf.cast(condition_mask, tf.int32) , 2)
+//     partitioned_data[1] = partitioned_data[1] + 1.0
+//     condition_indices = tf.dynamic_partition(
+//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
+//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
+//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
+//     # unchanged.
+// ```
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
+// </div>
+func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DynamicStitch",
+		Input: []tf.Input{
+			tf.OutputList(indices), tf.OutputList(data),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Performs a padding as a preprocess during a convolution.
+//
+// Similar to FusedResizeAndPadConv2d, this op allows for an optimized
+// implementation where the spatial padding transformation stage is fused with the
+// im2col lookup, but in this case without the bilinear filtering required for
+// resizing. Fusing the padding prevents the need to write out the intermediate
+// results as whole tensors, reducing memory pressure, and we can get some latency
+// gains by merging the transformation calculations.
+// The data_format attribute for Conv2D isn't supported by this op, and 'NHWC'
+// order is used instead.
+// Internally this op uses a single per-graph scratch buffer, which means that it
+// will block if multiple versions are being run in parallel. This is because this
+// operator is primarily an optimization to minimize memory usage.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.
+//
+//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
+// of `input`. Must be in the same order as the dimension specified with format.
+//	padding: The type of padding algorithm to use.
+func FusedPadConv2D(scope *Scope, input tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "FusedPadConv2D",
+		Input: []tf.Input{
+			input, paddings, filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Conv2DBackpropInputAttr is an optional argument to Conv2DBackpropInput.
+type Conv2DBackpropInputAttr func(optionalAttr)
+
+// Conv2DBackpropInputUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
+// If not specified, defaults to true
+func Conv2DBackpropInputUseCudnnOnGpu(value bool) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["use_cudnn_on_gpu"] = value
+	}
+}
+
+// Conv2DBackpropInputExplicitPaddings sets the optional explicit_paddings attribute to value.
+//
+// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
+// dimension, the amount of padding inserted before and after the dimension is
+// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
+// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
+// If not specified, defaults to <>
+func Conv2DBackpropInputExplicitPaddings(value []int64) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["explicit_paddings"] = value
+	}
+}
+
+// Conv2DBackpropInputDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv2DBackpropInputDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of convolution with respect to the input.
+//
+// Arguments:
+//	input_sizes: An integer vector representing the shape of `input`,
+// where `input` is a 4-D `[batch, height, width, channels]` tensor.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution. Must be in the same order as the dimension specified with
+// format.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape `[batch, in_height, in_width, in_channels]`.  Gradient
+// w.r.t. the input of the convolution.
+func Conv2DBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropInputAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv2DBackpropInput",
+		Input: []tf.Input{
+			input_sizes, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that executes a SQL query and emits rows of the result set.
+//
+// Arguments:
+//	driver_name: The database type. Currently, the only supported type is 'sqlite'.
+//	data_source_name: A connection string to connect to the database.
+//	query: A SQL query to execute.
+//
+//
+func ExperimentalSqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalSqlDataset",
+		Input: []tf.Input{
+			driver_name, data_source_name, query,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingCenteredRMSPropParameters.
+type LoadTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingCenteredRMSPropParametersTableId(value int64) LoadTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingCenteredRMSPropParametersTableName(value string) LoadTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load centered RMSProp embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the centered RMSProp optimization algorithm.
+//	ms: Value of ms used in the centered RMSProp optimization algorithm.
+//	mom: Value of mom used in the centered RMSProp optimization algorithm.
+//	mg: Value of mg used in the centered RMSProp optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingCenteredRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingCenteredRMSPropParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingCenteredRMSPropParameters",
+		Input: []tf.Input{
+			parameters, ms, mom, mg,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute.
+type DataFormatVecPermuteAttr func(optionalAttr)
+
+// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value.
+//
+// value: source data format.
+// If not specified, defaults to "NHWC"
+func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr {
+	return func(m optionalAttr) {
+		m["src_format"] = value
+	}
+}
+
+// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value.
+//
+// value: destination data format.
+// If not specified, defaults to "NCHW"
+func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr {
+	return func(m optionalAttr) {
+		m["dst_format"] = value
+	}
+}
+
+// Returns the permuted vector/tensor in the destination data format given the
+//
+// one in the source data format.
+//
+// Arguments:
+//	x: Vector of size 4 or Tensor of shape (4, 2) in source data format.
+//
+// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format.
+func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DataFormatVecPermute",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x / y element-wise.
+//
+// *NOTE*: `Div` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Div",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResizeAreaAttr is an optional argument to ResizeArea.
+type ResizeAreaAttr func(optionalAttr)
+
+// ResizeAreaAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
+// If not specified, defaults to false
+func ResizeAreaAlignCorners(value bool) ResizeAreaAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// Resize `images` to `size` using area interpolation.
+//
+// Input images can be of different types but output images are always float.
+//
+// The range of pixel values for the output image might be slightly different
+// from the range for the input image because of limited numerical precision.
+// To guarantee an output range, for example `[0.0, 1.0]`, apply
+// `tf.clip_by_value` to the output.
+//
+// Each output pixel is computed by first transforming the pixel's footprint into
+// the input tensor and then averaging the pixels that intersect the footprint. An
+// input pixel's contribution to the average is weighted by the fraction of its
+// area that intersects the footprint.  This is the same as OpenCV's INTER_AREA.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
+//
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResizeArea",
+		Input: []tf.Input{
+			images, size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Sends `input` to all devices that are connected to the output.
+//
+// Sends `input` to all devices that are connected to the output.
+//
+// The graph should be constructed so that all ops connected to the output have a
+// valid device assignment, and the op itself is assigned one of these devices.
+//
+// input: The input to the broadcast.
+// output: The same as input.
+// shape: The shape of the input tensor.
+//
+func NcclBroadcast(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shape": shape}
+	opspec := tf.OpSpec{
+		Type: "NcclBroadcast",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient of morphological 2-D dilation with respect to the filter.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
+//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
+//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
+// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
+//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
+// Must be: `[1, rate_height, rate_width, 1]`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 3-D with shape `[filter_height, filter_width, depth]`.
+func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "Dilation2DBackpropFilter",
+		Input: []tf.Input{
+			input, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap.
+type AddSparseToTensorsMapAttr func(optionalAttr)
+
+// AddSparseToTensorsMapContainer sets the optional container attribute to value.
+//
+// value: The container name for the `SparseTensorsMap` created by this op.
+// If not specified, defaults to ""
+func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value.
+//
+// value: The shared name for the `SparseTensorsMap` created by this op.
+// If blank, the new Operation's unique name is used.
+// If not specified, defaults to ""
+func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Add a `SparseTensor` to a `SparseTensorsMap` return its handle.
+//
+// A `SparseTensor` is represented by three tensors: `sparse_indices`,
+// `sparse_values`, and `sparse_shape`.
+//
+// This operator takes the given `SparseTensor` and adds it to a container
+// object (a `SparseTensorsMap`).  A unique key within this container is generated
+// in the form of an `int64`, and this is the value that is returned.
+//
+// The `SparseTensor` can then be read out as part of a minibatch by passing
+// the key as a vector element to `TakeManySparseFromTensorsMap`.  To ensure
+// the correct `SparseTensorsMap` is accessed, ensure that the same
+// `container` and `shared_name` are passed to that Op.  If no `shared_name`
+// is provided here, instead use the *name* of the Operation created by calling
+// `AddSparseToTensorsMap` as the `shared_name` passed to
+// `TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
+//
+// Arguments:
+//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+//
+// Returns 0-D.  The handle of the `SparseTensor` now stored in the
+// `SparseTensorsMap`.
+func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AddSparseToTensorsMap",
+		Input: []tf.Input{
+			sparse_indices, sparse_values, sparse_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns a list list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`.
+//
+// tensor: The tensor to put on the list.
+// input_handle: The old list.
+// output_handle: A list with the elements of the old list followed by tensor.
+// element_dtype: the type of elements in the list.
+// element_shape: a shape compatible with that of elements in the list.
+func TensorListPushBack(scope *Scope, input_handle tf.Output, tensor tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListPushBack",
+		Input: []tf.Input{
+			input_handle, tensor,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams.
+type CudnnRNNCanonicalToParamsAttr func(optionalAttr)
+
+// CudnnRNNCanonicalToParamsRnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNCanonicalToParamsRnnMode(value string) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsInputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNCanonicalToParamsInputMode(value string) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsDirection sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNCanonicalToParamsDirection(value string) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsDropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNCanonicalToParamsDropout(value float32) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNCanonicalToParamsSeed(value int64) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNCanonicalToParamsSeed2(value int64) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Converts CudnnRNN params from canonical form to usable form.
+//
+// Writes a set of weights into the opaque params buffer so they can be used in
+// upcoming training or inferences.
+//
+// Note that the params buffer may not be compatible across different GPUs. So any
+// save and restoration should be converted to and from the canonical weights and
+// biases.
+//
+// num_layers: Specifies the number of layers in the RNN model.
+// num_units: Specifies the size of the hidden state.
+// input_size: Specifies the size of the input state.
+// weights: the canonical form of weights that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// biases: the canonical form of biases that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// num_params: number of parameter sets for all layers.
+//     Each layer may contain multiple parameter sets, with each set consisting of
+//     a weight matrix and a bias vector.
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//     The actual computation before the first layer. 'skip_input' is only allowed
+//     when input_size == num_units; 'auto_select' implies 'skip_input' when
+//     input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used.
+//     dir = (direction == bidirectional) ? 2 : 1
+// dropout: dropout probability. When set to 0., dropout is disabled.
+// seed: the 1st part of a seed to initialize dropout.
+// seed2: the 2nd part of a seed to initialize dropout.
+func CudnnRNNCanonicalToParams(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsAttr) (params tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNCanonicalToParams",
+		Input: []tf.Input{
+			num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset containing elements of first component of `input_dataset` having true in the last component.
+func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "FilterByLastComponentDataset",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the absolute value of a tensor.
+//
+// Given a tensor `x`, this operation returns a tensor containing the absolute
+// value of each element in `x`. For example, if x is an input element and y is
+// an output element, this operation computes \\(y = |x|\\).
+func Abs(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Abs",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2.
+type MaxPoolGradV2Attr func(optionalAttr)
+
+// MaxPoolGradV2DataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes gradients of the maxpooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGradV2",
+		Input: []tf.Input{
+			orig_input, orig_output, grad, ksize, strides,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Restore a reader to a previously saved state.
+//
+// Not all Readers support being restored, so this can produce an
+// Unimplemented error.
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+//	state: Result of a ReaderSerializeState of a Reader with type
+// matching reader_handle.
+//
+// Returns the created operation.
+func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderRestoreStateV2",
+		Input: []tf.Input{
+			reader_handle, state,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Inverse fast Fourier transform.
 //
 // Computes the inverse 1-dimensional discrete Fourier transform over the
@@ -21733,163 +28468,6 @@
 	return op.Output(0)
 }
 
-// Inverse 2D real-valued fast Fourier transform.
-//
-// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued
-// signal over the inner-most 2 dimensions of `input`.
-//
-// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`:
-// The inner-most dimension contains the `fft_length / 2 + 1` unique components of
-// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
-// from the size of the inner-most 2 dimensions of `input`. If the FFT length used
-// to compute `input` is odd, it should be provided since it cannot be inferred
-// properly.
-//
-// Along each axis `IRFFT2D` is computed on, if `fft_length` (or
-// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
-//
-// Arguments:
-//	input: A complex64 tensor.
-//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
-//
-// Returns A float32 tensor of the same rank as `input`. The inner-most 2
-//   dimensions of `input` are replaced with the `fft_length` samples of their
-//   inverse 2D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.irfft2
-// @end_compatibility
-func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IRFFT2D",
-		Input: []tf.Input{
-			input, fft_length,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DecodeJpegAttr is an optional argument to DecodeJpeg.
-type DecodeJpegAttr func(optionalAttr)
-
-// DecodeJpegChannels sets the optional channels attribute to value.
-//
-// value: Number of color channels for the decoded image.
-// If not specified, defaults to 0
-func DecodeJpegChannels(value int64) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["channels"] = value
-	}
-}
-
-// DecodeJpegRatio sets the optional ratio attribute to value.
-//
-// value: Downscaling ratio.
-// If not specified, defaults to 1
-func DecodeJpegRatio(value int64) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["ratio"] = value
-	}
-}
-
-// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
-//
-// value: If true use a slower but nicer upscaling of the
-// chroma planes (yuv420/422 only).
-// If not specified, defaults to true
-func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["fancy_upscaling"] = value
-	}
-}
-
-// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
-//
-// value: If true try to recover an image from truncated input.
-// If not specified, defaults to false
-func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["try_recover_truncated"] = value
-	}
-}
-
-// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
-//
-// value: The minimum required fraction of lines before a truncated
-// input is accepted.
-// If not specified, defaults to 1
-func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["acceptable_fraction"] = value
-	}
-}
-
-// DecodeJpegDctMethod sets the optional dct_method attribute to value.
-//
-// value: string specifying a hint about the algorithm used for
-// decompression.  Defaults to "" which maps to a system-specific
-// default.  Currently valid values are ["INTEGER_FAST",
-// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
-// jpeg library changes to a version that does not have that specific
-// option.)
-// If not specified, defaults to ""
-func DecodeJpegDctMethod(value string) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["dct_method"] = value
-	}
-}
-
-// Decode a JPEG-encoded image to a uint8 tensor.
-//
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
-//
-// Accepted values are:
-//
-// *   0: Use the number of channels in the JPEG-encoded image.
-// *   1: output a grayscale image.
-// *   3: output an RGB image.
-//
-// If needed, the JPEG-encoded image is transformed to match the requested number
-// of color channels.
-//
-// The attr `ratio` allows downscaling the image by an integer factor during
-// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
-// downscaling the image later.
-//
-//
-// This op also supports decoding PNGs and non-animated GIFs since the interface is
-// the same, though it is cleaner to use `tf.image.decode_image`.
-//
-// Arguments:
-//	contents: 0-D.  The JPEG-encoded image.
-//
-// Returns 3-D with shape `[height, width, channels]`..
-func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeJpeg",
-		Input: []tf.Input{
-			contents,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Inverse 3D real-valued fast Fourier transform.
 //
 // Computes the inverse 3-dimensional discrete Fourier transform of a real-valued
@@ -21950,59 +28528,124 @@
 	return op.Output(0)
 }
 
-// Produces the max pool of the input tensor for quantized types.
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingMomentumParametersGradAccumDebug.
+type LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load Momentum embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
 //
 // Arguments:
-//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	ksize: The size of the window for each dimension of the input tensor.
-// The length must be 4 to match the number of dimensions of the input.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor. The length must be 4 to match the number of dimensions of the input.
-//	padding: The type of padding algorithm to use.
+//	parameters: Value of parameters used in the Momentum optimization algorithm.
+//	momenta: Value of momenta used in the Momentum optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the Momentum optimization algorithm.
 //
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedMaxPool",
+		Type: "LoadTPUEmbeddingMomentumParametersGradAccumDebug",
 		Input: []tf.Input{
-			input, min_input, max_input,
+			parameters, momenta, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// StatefulStandardNormalAttr is an optional argument to StatefulStandardNormal.
+type StatefulStandardNormalAttr func(optionalAttr)
+
+// StatefulStandardNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatefulStandardNormalDtype(value tf.DataType) StatefulStandardNormalAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs random values from a normal distribution.
+//
+// The generated values will have mean 0 and standard deviation 1.
+//
+// Arguments:
+//	resource: The handle of the resource variable that stores the state of the RNG.
+//	shape: The shape of the output tensor.
+//
+// Returns A tensor of the specified shape filled with random normal values.
+func StatefulStandardNormal(scope *Scope, resource tf.Output, shape tf.Output, optional ...StatefulStandardNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatefulStandardNormal",
+		Input: []tf.Input{
+			resource, shape,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Computes softplus: `log(exp(features) + 1)`.
-func Softplus(scope *Scope, features tf.Output) (activations tf.Output) {
+// Computes the Gauss error function of `x` element-wise.
+func Erf(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Softplus",
+		Type: "Erf",
 		Input: []tf.Input{
-			features,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes exponential of x - 1 element-wise.
-//
-// I.e., \\(y = (\exp x) - 1\\).
-func Expm1(scope *Scope, x tf.Output) (y tf.Output) {
+// Returns element-wise largest integer not greater than x.
+func Floor(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Expm1",
+		Type: "Floor",
 		Input: []tf.Input{
 			x,
 		},
@@ -22070,86 +28713,6 @@
 	return op.Output(0), op.Output(1)
 }
 
-// Returns the set of files matching one or more glob patterns.
-//
-// Note that this routine only supports wildcard characters in the
-// basename portion of the pattern, not in the directory portion.
-// Note also that the order of filenames returned can be non-deterministic.
-//
-// Arguments:
-//	pattern: Shell wildcard pattern(s). Scalar or vector of type string.
-//
-// Returns A vector of matching filenames.
-func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatchingFiles",
-		Input: []tf.Input{
-			pattern,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth.
-type HistogramFixedWidthAttr func(optionalAttr)
-
-// HistogramFixedWidthDtype sets the optional dtype attribute to value.
-// If not specified, defaults to DT_INT32
-func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Return histogram of values.
-//
-// Given the tensor `values`, this operation returns a rank 1 histogram counting
-// the number of entries in `values` that fall into every bin.  The bins are
-// equal width and determined by the arguments `value_range` and `nbins`.
-//
-// ```python
-// # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
-// nbins = 5
-// value_range = [0.0, 5.0]
-// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
-//
-// with tf.get_default_session() as sess:
-//   hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
-//   variables.global_variables_initializer().run()
-//   sess.run(hist) => [2, 1, 1, 0, 2]
-// ```
-//
-// Arguments:
-//	values: Numeric `Tensor`.
-//	value_range: Shape [2] `Tensor` of same `dtype` as `values`.
-// values <= value_range[0] will be mapped to hist[0],
-// values >= value_range[1] will be mapped to hist[-1].
-//	nbins: Scalar `int32 Tensor`.  Number of histogram bins.
-//
-// Returns A 1-D `Tensor` holding histogram of values.
-func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "HistogramFixedWidth",
-		Input: []tf.Input{
-			values, value_range, nbins,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Conv3DAttr is an optional argument to Conv3D.
 type Conv3DAttr func(optionalAttr)
 
@@ -22361,24 +28924,97 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Creates a dataset that batches `batch_size` elements from `input_dataset`.
+// ComplexAbsAttr is an optional argument to ComplexAbs.
+type ComplexAbsAttr func(optionalAttr)
+
+// ComplexAbsTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func ComplexAbsTout(value tf.DataType) ComplexAbsAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Computes the complex absolute value of a tensor.
+//
+// Given a tensor `x` of complex numbers, this operation returns a tensor of type
+// `float` or `double` that is the absolute value of each element in `x`. All
+// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute
+// value is computed as \\( \sqrt{a^2 + b^2}\\).
+func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ComplexAbs",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the truth value of x AND y element-wise.
+//
+// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LogicalAnd",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the reciprocal of x element-wise.
+//
+// I.e., \\(y = 1 / x\\).
+func Inv(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Inv",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that batches input elements into a SparseTensor.
 //
 // Arguments:
-//
-//	batch_size: A scalar representing the number of elements to accumulate in a batch.
-//	drop_remainder: A scalar representing whether the last batch should be dropped in case its size
-// is smaller than desired.
+//	input_dataset: A handle to an input dataset. Must have a single component.
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//	row_shape: A vector representing the dense shape of each row in the produced
+// SparseTensor. The shape may be partially specified, using `-1` to indicate
+// that a particular dimension should use the maximum size of all batch elements.
 //
 //
-func BatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+func ExperimentalDenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "BatchDatasetV2",
+		Type: "ExperimentalDenseToSparseBatchDataset",
 		Input: []tf.Input{
-			input_dataset, batch_size, drop_remainder,
+			input_dataset, batch_size, row_shape,
 		},
 		Attrs: attrs,
 	}
@@ -22386,6 +29022,1260 @@
 	return op.Output(0)
 }
 
+// Computes the reciprocal of x element-wise.
+//
+// I.e., \\(y = 1 / x\\).
+func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Reciprocal",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Conv3DBackpropFilterAttr is an optional argument to Conv3DBackpropFilter.
+type Conv3DBackpropFilterAttr func(optionalAttr)
+
+// Conv3DBackpropFilterDilations sets the optional dilations attribute to value.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
+func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of 3-D convolution with respect to the filter.
+//
+// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv3DBackpropFilter",
+		Input: []tf.Input{
+			input, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes square root of x element-wise.
+//
+// I.e., \\(y = \sqrt{x} = x^{1/2}\\).
+func Sqrt(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sqrt",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Get the value of the tensor specified by its handle.
+//
+// Arguments:
+//	handle: The handle for a tensor stored in the session state.
+//	dtype: The type of the output value.
+//
+// Returns The tensor for the given handle.
+func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "GetSessionTensor",
+		Input: []tf.Input{
+			handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient for the sqrt of `x` wrt its input.
+//
+// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`
+// is the corresponding input gradient.
+func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SqrtGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MatrixInverseAttr is an optional argument to MatrixInverse.
+type MatrixInverseAttr func(optionalAttr)
+
+// MatrixInverseAdjoint sets the optional adjoint attribute to value.
+// If not specified, defaults to false
+func MatrixInverseAdjoint(value bool) MatrixInverseAttr {
+	return func(m optionalAttr) {
+		m["adjoint"] = value
+	}
+}
+
+// Computes the inverse of one or more square invertible matrices or their
+//
+// adjoints (conjugate transposes).
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. The output is a tensor of the same shape as the input
+// containing the inverse for all input submatrices `[..., :, :]`.
+//
+// The op uses LU decomposition with partial pivoting to compute the inverses.
+//
+// If a matrix is not invertible there is no guarantee what the op does. It
+// may detect the condition and raise an exception or it may simply return a
+// garbage result.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[..., M, M]`.
+//
+// @compatibility(numpy)
+// Equivalent to np.linalg.inv
+// @end_compatibility
+func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixInverse",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes reciprocal of square root of x element-wise.
+//
+// I.e., \\(y = 1 / \sqrt{x}\\).
+func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Rsqrt",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Rounds the values of a tensor to the nearest integer, element-wise.
+//
+// Rounds half to even.  Also known as bankers rounding. If you want to round
+// according to the current system rounding mode use std::cint.
+func Round(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Round",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Delete the TensorArray from its resource container.
+//
+// This enables the user to close and release the resource in the middle
+// of a step/run.
+//
+// Arguments:
+//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
+//
+// Returns the created operation.
+func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayCloseV3",
+		Input: []tf.Input{
+			handle,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes exponential of x element-wise.  \\(y = e^x\\).
+func Exp(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Exp",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// NthElementAttr is an optional argument to NthElement.
+type NthElementAttr func(optionalAttr)
+
+// NthElementReverse sets the optional reverse attribute to value.
+//
+// value: When set to True, find the nth-largest value in the vector and vice
+// versa.
+// If not specified, defaults to false
+func NthElementReverse(value bool) NthElementAttr {
+	return func(m optionalAttr) {
+		m["reverse"] = value
+	}
+}
+
+// Finds values of the `n`-th order statistic for the last dimension.
+//
+// If the input is a vector (rank-1), finds the entries which is the nth-smallest
+// value in the vector and outputs their values as scalar tensor.
+//
+// For matrices (resp. higher rank input), computes the entries which is the
+// nth-smallest value in each row (resp. vector along the last dimension). Thus,
+//
+//     values.shape = input.shape[:-1]
+//
+// Arguments:
+//	input: 1-D or higher with last dimension at least `n+1`.
+//	n: 0-D. Position of sorted vector to select along the last dimension (along
+// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
+//
+// Returns The `n`-th order statistic along each last dimensional slice.
+func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "NthElement",
+		Input: []tf.Input{
+			input, n,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the maximum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// This operator is similar to the unsorted segment sum operator found
+// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
+// Instead of computing the sum over segments, it computes the maximum such that:
+//
+// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such
+// that `segment_ids[j...] == i`.
+//
+// If the maximum is empty for a given segment ID `i`, it outputs the smallest
+// possible value for the specific numeric type,
+// `output[i] = numeric_limits<T>::lowest()`.
+//
+// If the given segment ID `i` is negative, then the corresponding value is
+// dropped, and will not be included in the result.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
+// </div>
+//
+// For example:
+//
+// ``` python
+// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]])
+// tf.unsorted_segment_max(c, tf.constant([0, 1, 0]), num_segments=2)
+// # ==> [[ 4,  3, 3, 4],
+// #       [5,  6, 7, 8]]
+// ```
+//
+//
+// Arguments:
+//
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
+//
+//
+// Returns Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
+func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnsortedSegmentMax",
+		Input: []tf.Input{
+			data, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes softplus: `log(exp(features) + 1)`.
+func Softplus(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Softplus",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes exponential of x - 1 element-wise.
+//
+// I.e., \\(y = (\exp x) - 1\\).
+func Expm1(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Expm1",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes natural logarithm of x element-wise.
+//
+// I.e., \\(y = \log_e x\\).
+func Log(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Log",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the index of a data point that should be added to the seed set.
+//
+// Entries in distances are assumed to be squared distances of candidate points to
+// the already sampled centers in the seed set. The op constructs one Markov chain
+// of the k-MC^2 algorithm and returns the index of one candidate point to be added
+// as an additional cluster center.
+//
+// Arguments:
+//	distances: Vector with squared distances to the closest previously sampled cluster center
+// for each candidate point.
+//	seed: Scalar. Seed for initializing the random number generator.
+//
+// Returns Scalar with the index of the sampled point.
+func KMC2ChainInitialization(scope *Scope, distances tf.Output, seed tf.Output) (index tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "KMC2ChainInitialization",
+		Input: []tf.Input{
+			distances, seed,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes hyperbolic sine of x element-wise.
+func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sinh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along sparse segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
+// dimension, selecting a subset of dimension 0, specified by `indices`.
+//
+// For example:
+//
+// ```python
+// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+//
+// # Select two rows, one segment.
+// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
+// # => [[0 0 0 0]]
+//
+// # Select two rows, two segment.
+// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
+// # => [[ 1  2  3  4]
+// #     [-1 -2 -3 -4]]
+//
+// # Select all rows, two segments.
+// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+// # => [[0 0 0 0]
+// #     [5 6 7 8]]
+//
+// # Which is equivalent to:
+// tf.segment_sum(c, tf.constant([0, 0, 1]))
+// ```
+//
+// Arguments:
+//
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentSum",
+		Input: []tf.Input{
+			data, indices, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CastAttr is an optional argument to Cast.
+type CastAttr func(optionalAttr)
+
+// CastTruncate sets the optional Truncate attribute to value.
+// If not specified, defaults to false
+func CastTruncate(value bool) CastAttr {
+	return func(m optionalAttr) {
+		m["Truncate"] = value
+	}
+}
+
+// Cast x of type SrcT to y of DstT.
+func Cast(scope *Scope, x tf.Output, DstT tf.DataType, optional ...CastAttr) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"DstT": DstT}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Cast",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the log of the absolute value of `Gamma(x)` element-wise.
+func Lgamma(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Lgamma",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// UnicodeEncodeAttr is an optional argument to UnicodeEncode.
+type UnicodeEncodeAttr func(optionalAttr)
+
+// UnicodeEncodeErrors sets the optional errors attribute to value.
+//
+// value: Error handling policy when there is invalid formatting found in the input.
+// The value of 'strict' will cause the operation to produce a InvalidArgument
+// error on any invalid input formatting. A value of 'replace' (the default) will
+// cause the operation to replace any invalid formatting in the input with the
+// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
+// skip any invalid formatting in the input and produce no corresponding output
+// character.
+// If not specified, defaults to "replace"
+func UnicodeEncodeErrors(value string) UnicodeEncodeAttr {
+	return func(m optionalAttr) {
+		m["errors"] = value
+	}
+}
+
+// UnicodeEncodeReplacementChar sets the optional replacement_char attribute to value.
+//
+// value: The replacement character codepoint to be used in place of any invalid
+// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+// be used. The default value is the default unicode replacement character is
+// 0xFFFD (U+65533).
+// If not specified, defaults to 65533
+func UnicodeEncodeReplacementChar(value int64) UnicodeEncodeAttr {
+	return func(m optionalAttr) {
+		m["replacement_char"] = value
+	}
+}
+
+// Encode a tensor of ints into unicode strings.
+//
+// Returns a vector of strings, where `output[i]` is constructed by encoding the
+// Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]`
+// using `output_encoding`.
+//
+// ---
+//
+// Example:
+//
+// ```
+// input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100]
+// input_splits = [0, 5, 10]
+// output_encoding = 'UTF-8'
+//
+// output = ['Hello', 'World']
+// ```
+//
+// Arguments:
+//	input_values: A 1D tensor containing the unicode codepoints that should be encoded.
+//	input_splits: A 1D tensor specifying how the unicode codepoints should be split into strings.
+// In particular, `output[i]` is constructed by encoding the codepoints in the
+// slice `input_values[input_splits[i]:input_splits[i+1]]`.
+//	output_encoding: Unicode encoding of the output strings. Valid encodings are: `"UTF-8",
+// "UTF-16-BE", and "UTF-32-BE"`.
+//
+// Returns The 1-D Tensor of strings encoded from the provided unicode codepoints.
+func UnicodeEncode(scope *Scope, input_values tf.Output, input_splits tf.Output, output_encoding string, optional ...UnicodeEncodeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_encoding": output_encoding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeEncode",
+		Input: []tf.Input{
+			input_values, input_splits,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the complementary error function of `x` element-wise.
+func Erfc(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Erfc",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes sigmoid of `x` element-wise.
+//
+// Specifically, `y = 1 / (1 + exp(-x))`.
+func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sigmoid",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes sin of x element-wise.
+func Sin(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sin",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad.
+type FusedBatchNormGradAttr func(optionalAttr)
+
+// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value.
+//
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr {
+	return func(m optionalAttr) {
+		m["epsilon"] = value
+	}
+}
+
+// FusedBatchNormGradDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format for y_backprop, x, x_backprop.
+// Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// FusedBatchNormGradIsTraining sets the optional is_training attribute to value.
+//
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Gradient for batch normalization.
+//
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
+//
+// Arguments:
+//	y_backprop: A 4D Tensor for the gradient with respect to y.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
+// mean to be reused in gradient computation. When is_training is
+// False, a 1D Tensor for the population mean to be reused in both
+// 1st and 2nd order gradient computation.
+//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
+// variance (inverted variance in the cuDNN case) to be reused in
+// gradient computation. When is_training is False, a 1D Tensor
+// for the population variance to be reused in both 1st and 2nd
+// order gradient computation.
+//
+// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
+// in FusedBatchNorm.
+func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FusedBatchNormGrad",
+		Input: []tf.Input{
+			y_backprop, x, scale, reserve_space_1, reserve_space_2,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// Computes cos of x element-wise.
+func Cos(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Cos",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the determinant of one or more square matrices.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. The output is a tensor containing the determinants
+// for all input submatrices `[..., :, :]`.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[...]`.
+func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixDeterminant",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Updates the tree ensemble by either adding a layer to the last tree being grown
+//
+// or by starting a new tree.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the ensemble variable.
+//	feature_ids: Rank 1 tensor with ids for each feature. This is the real id of
+// the feature that will be used in the split.
+//	node_ids: List of rank 1 tensors representing the nodes for which this feature
+// has a split.
+//	gains: List of rank 1 tensors representing the gains for each of the feature's
+// split.
+//	thresholds: List of rank 1 tensors representing the thesholds for each of the
+// feature's split.
+//	left_node_contribs: List of rank 2 tensors with left leaf contribs for each of
+// the feature's splits. Will be added to the previous node values to constitute
+// the values of the left nodes.
+//	right_node_contribs: List of rank 2 tensors with right leaf contribs for each
+// of the feature's splits. Will be added to the previous node values to constitute
+// the values of the right nodes.
+//	max_depth: Max depth of the tree to build.
+//	learning_rate: shrinkage const for each new tree.
+//	pruning_mode: 0-No pruning, 1-Pre-pruning, 2-Post-pruning.
+//
+// Returns the created operation.
+func BoostedTreesUpdateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, feature_ids tf.Output, node_ids []tf.Output, gains []tf.Output, thresholds []tf.Output, left_node_contribs []tf.Output, right_node_contribs []tf.Output, max_depth tf.Output, learning_rate tf.Output, pruning_mode int64) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pruning_mode": pruning_mode}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesUpdateEnsemble",
+		Input: []tf.Input{
+			tree_ensemble_handle, feature_ids, tf.OutputList(node_ids), tf.OutputList(gains), tf.OutputList(thresholds), tf.OutputList(left_node_contribs), tf.OutputList(right_node_contribs), max_depth, learning_rate,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes tan of x element-wise.
+func Tan(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Tan",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that emits each dim-0 slice of `components` once.
+func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "TensorSliceDataset",
+		Input: []tf.Input{
+			tf.OutputList(components),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes acos of x element-wise.
+func Acos(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Acos",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the Bessel i0e function of `x` element-wise.
+//
+// Exponentially scaled modified Bessel function of order 0 defined as
+// `bessel_i0e(x) = exp(-abs(x)) bessel_i0(x)`.
+//
+// This function is faster and numerically stabler than `bessel_i0(x)`.
+func BesselI0e(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BesselI0e",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Shuffle dimensions of x according to a permutation.
+//
+// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
+//   `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
+func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Transpose",
+		Input: []tf.Input{
+			x, perm,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MinAttr is an optional argument to Min.
+type MinAttr func(optionalAttr)
+
+// MinKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func MinKeepDims(value bool) MinAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the minimum of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Min",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the Bessel i1e function of `x` element-wise.
+//
+// Exponentially scaled modified Bessel function of order 0 defined as
+// `bessel_i1e(x) = exp(-abs(x)) bessel_i1(x)`.
+//
+// This function is faster and numerically stabler than `bessel_i1(x)`.
+func BesselI1e(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BesselI1e",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns an element-wise indication of the sign of a number.
+//
+// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
+//
+// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
+func Sign(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sign",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that passes a sliding window over `input_dataset`.
+//
+// Arguments:
+//
+//	window_size: A scalar representing the number of elements in the
+// sliding window.
+//	window_shift: A scalar representing the steps moving the sliding window
+// forward in one iteration. It must be positive.
+//	window_stride: A scalar representing the stride of the input elements of the sliding window.
+// It must be positive.
+//
+//
+func ExperimentalSlidingWindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, window_shift tf.Output, window_stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalSlidingWindowDataset",
+		Input: []tf.Input{
+			input_dataset, window_size, window_shift, window_stride,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey.
+type OrderedMapUnstageNoKeyAttr func(optionalAttr)
+
+// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes and returns the (key, value) element with the smallest
+//
+// key from the underlying container.   If the underlying container
+// does not contain elements, the op will block until it does.
+func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OrderedMapUnstageNoKey",
+		Input: []tf.Input{
+			indices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	key = op.Output(idx)
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("OrderedMapUnstageNoKey", err)
+		return
+	}
+	return key, values
+}
+
+// Returns element-wise integer closest to x.
+//
+// If the result is midway between two representable values,
+// the even representable is chosen.
+// For example:
+//
+// ```
+// rint(-1.5) ==> -2.0
+// rint(0.5000001) ==> 1.0
+// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
+// ```
+func Rint(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Rint",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the derivative of a Gamma random sample w.r.t. `alpha`.
+func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomGammaGrad",
+		Input: []tf.Input{
+			alpha, sample,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x + y element-wise.
+//
+// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Add",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x + y element-wise.
+//
+// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AddV2",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler.
+type AllCandidateSamplerAttr func(optionalAttr)
+
+// AllCandidateSamplerSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Generates labels for candidate sampling with a learned unigram distribution.
+//
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to produce.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AllCandidateSampler",
+		Input: []tf.Input{
+			true_classes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is
+//
+// true, this follows Python semantics in that the result here is consistent
+// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`.
+//
+// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "FloorMod",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Saves the input tensors to disk.
 //
 // The size of `tensor_names` must match the number of tensors in `data`. `data[i]`
@@ -22413,19 +30303,21 @@
 	return scope.AddOperation(opspec)
 }
 
-// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is
+// Returns x / y element-wise for integer types.
 //
-// true, this follows Python semantics in that the result here is consistent
-// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`.
+// Truncation designates that negative numbers will round fractional quantities
+// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different
+// than Python semantics. See `FloorDiv` for a division function that matches
+// Python Semantics.
 //
-// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting
+// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting
 // [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "FloorMod",
+		Type: "TruncateDiv",
 		Input: []tf.Input{
 			x, y,
 		},
@@ -22434,105 +30326,30 @@
 	return op.Output(0)
 }
 
-// Computes the gradient of morphological 2-D dilation with respect to the filter.
+// RequantizePerChannelAttr is an optional argument to RequantizePerChannel.
+type RequantizePerChannelAttr func(optionalAttr)
+
+// RequantizePerChannelOutType sets the optional out_type attribute to value.
+//
+// value: The quantized type of output tensor that needs to be converted.
+// If not specified, defaults to DT_QUINT8
+func RequantizePerChannelOutType(value tf.DataType) RequantizePerChannelAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Requantizes input with min and max values known per channel.
 //
 // Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
-//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
-//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
-// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
-//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
-// Must be: `[1, rate_height, rate_width, 1]`.
-//	padding: The type of padding algorithm to use.
+//	input: The original input tensor.
+//	input_min: The minimum value of the input tensor
+//	input_max: The maximum value of the input tensor.
+//	requested_output_min: The minimum value of the output tensor requested.
+//	requested_output_max: The maximum value of the output tensor requested.
 //
-// Returns 3-D with shape `[filter_height, filter_width, depth]`.
-func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "Dilation2DBackpropFilter",
-		Input: []tf.Input{
-			input, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns a list list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`.
-//
-// tensor: The tensor to put on the list.
-// input_handle: The old list.
-// output_handle: A list with the elements of the old list followed by tensor.
-// element_dtype: the type of elements in the list.
-// element_shape: a shape compatible with that of elements in the list.
-func TensorListPushBack(scope *Scope, input_handle tf.Output, tensor tf.Output) (output_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListPushBack",
-		Input: []tf.Input{
-			input_handle, tensor,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap.
-type AddSparseToTensorsMapAttr func(optionalAttr)
-
-// AddSparseToTensorsMapContainer sets the optional container attribute to value.
-//
-// value: The container name for the `SparseTensorsMap` created by this op.
-// If not specified, defaults to ""
-func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value.
-//
-// value: The shared name for the `SparseTensorsMap` created by this op.
-// If blank, the new Operation's unique name is used.
-// If not specified, defaults to ""
-func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Add a `SparseTensor` to a `SparseTensorsMap` return its handle.
-//
-// A `SparseTensor` is represented by three tensors: `sparse_indices`,
-// `sparse_values`, and `sparse_shape`.
-//
-// This operator takes the given `SparseTensor` and adds it to a container
-// object (a `SparseTensorsMap`).  A unique key within this container is generated
-// in the form of an `int64`, and this is the value that is returned.
-//
-// The `SparseTensor` can then be read out as part of a minibatch by passing
-// the key as a vector element to `TakeManySparseFromTensorsMap`.  To ensure
-// the correct `SparseTensorsMap` is accessed, ensure that the same
-// `container` and `shared_name` are passed to that Op.  If no `shared_name`
-// is provided here, instead use the *name* of the Operation created by calling
-// `AddSparseToTensorsMap` as the `shared_name` passed to
-// `TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
-//
-// Arguments:
-//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
-//
-// Returns 0-D.  The handle of the `SparseTensor` now stored in the
-// `SparseTensorsMap`.
-func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) {
+// Returns Output tensor.The minimum value of the final output tensorThe maximum value of the final output tensor.
+func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, optional ...RequantizePerChannelAttr) (output tf.Output, output_min tf.Output, output_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -22541,73 +30358,9 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AddSparseToTensorsMap",
+		Type: "RequantizePerChannel",
 		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deserialize and concatenate `SparseTensors` from a serialized minibatch.
-//
-// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where
-// `N` is the minibatch size and the rows correspond to packed outputs of
-// `SerializeSparse`.  The ranks of the original `SparseTensor` objects
-// must all match.  When the final `SparseTensor` is created, it has rank one
-// higher than the ranks of the incoming `SparseTensor` objects
-// (they have been concatenated along a new row dimension).
-//
-// The output `SparseTensor` object's shape values for all dimensions but the
-// first are the max across the input `SparseTensor` objects' shape values
-// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
-// size.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in
-// standard lexicographic order.  If this is not the case, after this
-// step run `SparseReorder` to restore index ordering.
-//
-// For example, if the serialized input is a `[2 x 3]` matrix representing two
-// original `SparseTensor` objects:
-//
-//     index = [ 0]
-//             [10]
-//             [20]
-//     values = [1, 2, 3]
-//     shape = [50]
-//
-// and
-//
-//     index = [ 2]
-//             [10]
-//     values = [4, 5]
-//     shape = [30]
-//
-// then the final deserialized `SparseTensor` will be:
-//
-//     index = [0  0]
-//             [0 10]
-//             [0 20]
-//             [1  2]
-//             [1 10]
-//     values = [1, 2, 3, 4, 5]
-//     shape = [2 50]
-//
-// Arguments:
-//	serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects.
-// Must have 3 columns.
-//	dtype: The `dtype` of the serialized `SparseTensor` objects.
-func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "DeserializeManySparse",
-		Input: []tf.Input{
-			serialized_sparse,
+			input, input_min, input_max, requested_output_min, requested_output_max,
 		},
 		Attrs: attrs,
 	}
@@ -22615,657 +30368,124 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Elementwise computes the bitwise AND of `x` and `y`.
+// Restores tensors from a V2 checkpoint.
 //
-// The result will have those bits set, that are set in both `x` and `y`. The
-// computation is performed on the underlying representations of `x` and `y`.
-func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BitwiseAnd",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Inverse real-valued fast Fourier transform.
+// For backward compatibility with the V1 format, this Op currently allows
+// restoring from a V1 checkpoint as well:
+//   - This Op first attempts to find the V2 index file pointed to by "prefix", and
+//     if found proceed to read it as a V2 checkpoint;
+//   - Otherwise the V1 read path is invoked.
+// Relying on this behavior is not recommended, as the ability to fall back to read
+// V1 might be deprecated and eventually removed.
 //
-// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
-// signal over the inner-most dimension of `input`.
+// By default, restores the named tensors in full.  If the caller wishes to restore
+// specific slices of stored tensors, "shape_and_slices" should be non-empty
+// strings and correspondingly well-formed.
 //
-// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
-// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
-// `fft_length` is not provided, it is computed from the size of the inner-most
-// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
-// compute `input` is odd, it should be provided since it cannot be inferred
-// properly.
-//
-// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
-// than the corresponding dimension of `input`, the dimension is cropped. If it is
-// larger, the dimension is padded with zeros.
+// Callers must ensure all the named tensors are indeed stored in the checkpoint.
 //
 // Arguments:
-//	input: A complex64 tensor.
-//	fft_length: An int32 tensor of shape [1]. The FFT length.
+//	prefix: Must have a single element.  The prefix of a V2 checkpoint.
+//	tensor_names: shape {N}.  The names of the tensors to be restored.
+//	shape_and_slices: shape {N}.  The slice specs of the tensors to be restored.
+// Empty strings indicate that they are non-partitioned tensors.
+//	dtypes: shape {N}.  The list of expected dtype for the tensors.  Must match
+// those stored in the checkpoint.
 //
-// Returns A float32 tensor of the same rank as `input`. The inner-most
-//   dimension of `input` is replaced with the `fft_length` samples of its inverse
-//   1D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.irfft
-// @end_compatibility
-func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+// Returns shape {N}.  The restored tensors, whose shapes are read from the
+// checkpoint directly.
+func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
 	opspec := tf.OpSpec{
-		Type: "IRFFT",
+		Type: "RestoreV2",
 		Input: []tf.Input{
-			input, fft_length,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Concatenates a list of `SparseTensor` along the specified dimension.
-//
-// Concatenation is with respect to the dense versions of these sparse tensors.
-// It is assumed that each input is a `SparseTensor` whose elements are ordered
-// along increasing dimension number.
-//
-// All inputs' shapes must match, except for the concat dimension.  The
-// `indices`, `values`, and `shapes` lists must have the same length.
-//
-// The output shape is identical to the inputs', except along the concat
-// dimension, where it is the sum of the inputs' sizes along that dimension.
-//
-// The output elements will be resorted to preserve the sort order along
-// increasing dimension number.
-//
-// This op runs in `O(M log M)` time, where `M` is the total number of non-empty
-// values across all inputs. This is due to the need for an internal sort in
-// order to concatenate efficiently across an arbitrary dimension.
-//
-// For example, if `concat_dim = 1` and the inputs are
-//
-//     sp_inputs[0]: shape = [2, 3]
-//     [0, 2]: "a"
-//     [1, 0]: "b"
-//     [1, 1]: "c"
-//
-//     sp_inputs[1]: shape = [2, 4]
-//     [0, 1]: "d"
-//     [0, 2]: "e"
-//
-// then the output will be
-//
-//     shape = [2, 7]
-//     [0, 2]: "a"
-//     [0, 4]: "d"
-//     [0, 5]: "e"
-//     [1, 0]: "b"
-//     [1, 1]: "c"
-//
-// Graphically this is equivalent to doing
-//
-//     [    a] concat [  d e  ] = [    a   d e  ]
-//     [b c  ]        [       ]   [b c          ]
-//
-// Arguments:
-//	indices: 2-D.  Indices of each input `SparseTensor`.
-//	values: 1-D.  Non-empty values of each `SparseTensor`.
-//	shapes: 1-D.  Shapes of each `SparseTensor`.
-//	concat_dim: Dimension to concatenate along. Must be in range [-rank, rank),
-// where rank is the number of dimensions in each input `SparseTensor`.
-//
-// Returns 2-D.  Indices of the concatenated `SparseTensor`.1-D.  Non-empty values of the concatenated `SparseTensor`.1-D.  Shape of the concatenated `SparseTensor`.
-func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"concat_dim": concat_dim}
-	opspec := tf.OpSpec{
-		Type: "SparseConcat",
-		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes),
+			prefix, tensor_names, shape_and_slices,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Generates sparse cross from a list of sparse and dense tensors.
-//
-// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each
-// representing features of one feature column. It outputs a 2D `SparseTensor` with
-// the batchwise crosses of these features.
-//
-// For example, if the inputs are
-//
-//     inputs[0]: SparseTensor with shape = [2, 2]
-//     [0, 0]: "a"
-//     [1, 0]: "b"
-//     [1, 1]: "c"
-//
-//     inputs[1]: SparseTensor with shape = [2, 1]
-//     [0, 0]: "d"
-//     [1, 0]: "e"
-//
-//     inputs[2]: Tensor [["f"], ["g"]]
-//
-// then the output will be
-//
-//     shape = [2, 2]
-//     [0, 0]: "a_X_d_X_f"
-//     [1, 0]: "b_X_e_X_g"
-//     [1, 1]: "c_X_e_X_g"
-//
-// if hashed_output=true then the output will be
-//
-//     shape = [2, 2]
-//     [0, 0]: FingerprintCat64(
-//                 Fingerprint64("f"), FingerprintCat64(
-//                     Fingerprint64("d"), Fingerprint64("a")))
-//     [1, 0]: FingerprintCat64(
-//                 Fingerprint64("g"), FingerprintCat64(
-//                     Fingerprint64("e"), Fingerprint64("b")))
-//     [1, 1]: FingerprintCat64(
-//                 Fingerprint64("g"), FingerprintCat64(
-//                     Fingerprint64("e"), Fingerprint64("c")))
-//
-// Arguments:
-//	indices: 2-D.  Indices of each input `SparseTensor`.
-//	values: 1-D.   values of each `SparseTensor`.
-//	shapes: 1-D.   Shapes of each `SparseTensor`.
-//	dense_inputs: 2-D.    Columns represented by dense `Tensor`.
-//	hashed_output: If true, returns the hash of the cross instead of the string.
-// This will allow us avoiding string manipulations.
-//	num_buckets: It is used if hashed_output is true.
-// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value.
-//	hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
-// function to combine the crosses fingerprints.
-//
-//
-//
-// Returns 2-D.  Indices of the concatenated `SparseTensor`.1-D.  Non-empty values of the concatenated or hashed
-// `SparseTensor`.1-D.  Shape of the concatenated `SparseTensor`.
-func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type}
-	opspec := tf.OpSpec{
-		Type: "SparseCross",
-		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad.
-type ResourceApplyProximalAdagradAttr func(optionalAttr)
-
-// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
-//
-// accum += grad * grad
-// prox_v = var - lr * grad * (1 / sqrt(accum))
-// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
+	var idx int
+	var err error
+	if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil {
+		scope.UpdateErr("RestoreV2", err)
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyProximalAdagrad",
-		Input: []tf.Input{
-			var_, accum, lr, l1, l2, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
+	return tensors
 }
 
-// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2.
-type MutableHashTableOfTensorsV2Attr func(optionalAttr)
+// FIFOQueueV2Attr is an optional argument to FIFOQueueV2.
+type FIFOQueueV2Attr func(optionalAttr)
 
-// MutableHashTableOfTensorsV2Container sets the optional container attribute to value.
+// FIFOQueueV2Shapes sets the optional shapes attribute to value.
 //
-// value: If non-empty, this table is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this table is shared under the given name across
-// multiple sessions.
-// If not specified, defaults to ""
-func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
-// If not specified, defaults to false
-func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr {
-	return func(m optionalAttr) {
-		m["use_node_name_sharing"] = value
-	}
-}
-
-// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value.
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types. If the length of
+// this attr is 0, the shapes of queue elements are not constrained, and
+// only one element may be dequeued at a time.
 // If not specified, defaults to <>
-func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr {
+//
+// REQUIRES: len(value) >= 0
+func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr {
 	return func(m optionalAttr) {
-		m["value_shape"] = value
+		m["shapes"] = value
 	}
 }
 
-// Creates an empty hash table.
+// FIFOQueueV2Capacity sets the optional capacity attribute to value.
 //
-// This op creates a mutable hash table, specifying the type of its keys and
-// values. Each value must be a vector. Data can be inserted into the table using
-// the insert operations. It does not support the initialization operation.
-//
-// Arguments:
-//	key_dtype: Type of the table keys.
-//	value_dtype: Type of the table values.
-//
-// Returns Handle to a table.
-func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MutableHashTableOfTensorsV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// The gradient operator for the SparseSlice op.
-//
-// This op takes in the upstream gradient w.r.t. non-empty values of
-// the sliced `SparseTensor`, and outputs the gradients w.r.t.
-// the non-empty values of input `SparseTensor`.
-//
-// Arguments:
-//	backprop_val_grad: 1-D. The gradient with respect to
-// the non-empty values of the sliced `SparseTensor`.
-//	input_indices: 2-D.  The `indices` of the input `SparseTensor`.
-//	input_start: 1-D. tensor represents the start of the slice.
-//	output_indices: 2-D.  The `indices` of the sliced `SparseTensor`.
-//
-// Returns 1-D. The gradient with respect to the non-empty values of input `SparseTensor`.
-func SparseSliceGrad(scope *Scope, backprop_val_grad tf.Output, input_indices tf.Output, input_start tf.Output, output_indices tf.Output) (val_grad tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSliceGrad",
-		Input: []tf.Input{
-			backprop_val_grad, input_indices, input_start, output_indices,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that changes the batch size.
-//
-// Creates a dataset that changes the batch size of the dataset to current batch
-// size // num_workers.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//	num_workers: A scalar representing the number of workers to distribute this batch across. As
-// a result of this transformation the current batch size would end up being
-// divided  by this parameter.
-//
-//
-func ExperimentalRebatchDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalRebatchDataset",
-		Input: []tf.Input{
-			input_dataset, num_workers,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the gradient of the sigmoid of `x` wrt its input.
-//
-// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and
-// `dy` is the corresponding input gradient.
-func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SigmoidGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Convert one or more images from HSV to RGB.
-//
-// Outputs a tensor of the same shape as the `images` tensor, containing the RGB
-// value of the pixels. The output is only well defined if the value in `images`
-// are in `[0,1]`.
-//
-// See `rgb_to_hsv` for a description of the HSV encoding.
-//
-// Arguments:
-//	images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
-//
-// Returns `images` converted to RGB.
-func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "HSVToRGB",
-		Input: []tf.Input{
-			images,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset by applying optimizations to `input_dataset`.
-//
-// Creates a dataset by applying optimizations to `input_dataset`.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//	optimizations: A `tf.string` vector `tf.Tensor` identifying optimizations to use.
-//
-//
-func OptimizeDataset(scope *Scope, input_dataset tf.Output, optimizations tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "OptimizeDataset",
-		Input: []tf.Input{
-			input_dataset, optimizations,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the element-wise min of two SparseTensors.
-//
-// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
-//
-// Arguments:
-//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, in the canonical lexicographic ordering.
-//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
-//	a_shape: 1-D.  Shape of the input SparseTensor.
-//	b_indices: counterpart to `a_indices` for the other operand.
-//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
-//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
-//
-// Returns 2-D.  The indices of the output SparseTensor.1-D.  The values of the output SparseTensor.
-func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSparseMinimum",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// ResourceApplyAdamWithAmsgradAttr is an optional argument to ResourceApplyAdamWithAmsgrad.
-type ResourceApplyAdamWithAmsgradAttr func(optionalAttr)
-
-// ResourceApplyAdamWithAmsgradUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdamWithAmsgradUseLocking(value bool) ResourceApplyAdamWithAmsgradAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the Adam algorithm.
-//
-// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
-// $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
-// $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
-// $$vhat_t := max{vhat_{t-1}, v_t}$$
-// $$variable := variable - lr_t * m_t / (\sqrt{vhat_t} + \epsilon)$$
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	vhat: Should be from a Variable().
-//	beta1_power: Must be a scalar.
-//	beta2_power: Must be a scalar.
-//	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyAdamWithAmsgrad(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, vhat tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamWithAmsgradAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdamWithAmsgrad",
-		Input: []tf.Input{
-			var_, m, v, vhat, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey.
-type MapUnstageNoKeyAttr func(optionalAttr)
-
-// MapUnstageNoKeyCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr {
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr {
 	return func(m optionalAttr) {
 		m["capacity"] = value
 	}
 }
 
-// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// FIFOQueueV2Container sets the optional container attribute to value.
 //
-// REQUIRES: value >= 0
-func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// MapUnstageNoKeyContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MapUnstageNoKeySharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes and returns a random (key, value)
-//
-// from the underlying container.   If the underlying container
-// does not contain elements, the op will block until it does.
-func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapUnstageNoKey",
-		Input: []tf.Input{
-			indices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	key = op.Output(idx)
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("MapUnstageNoKey", err)
-		return
-	}
-	return key, values
-}
-
-// HashTableV2Attr is an optional argument to HashTableV2.
-type HashTableV2Attr func(optionalAttr)
-
-// HashTableV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this table is placed in the given container.
+// value: If non-empty, this queue is placed in the given container.
 // Otherwise, a default container is used.
 // If not specified, defaults to ""
-func HashTableV2Container(value string) HashTableV2Attr {
+func FIFOQueueV2Container(value string) FIFOQueueV2Attr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// HashTableV2SharedName sets the optional shared_name attribute to value.
+// FIFOQueueV2SharedName sets the optional shared_name attribute to value.
 //
-// value: If non-empty, this table is shared under the given name across
-// multiple sessions.
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
 // If not specified, defaults to ""
-func HashTableV2SharedName(value string) HashTableV2Attr {
+func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
-//
-// value: If true and shared_name is empty, the table is shared
-// using the node name.
-// If not specified, defaults to false
-func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr {
-	return func(m optionalAttr) {
-		m["use_node_name_sharing"] = value
-	}
-}
-
-// Creates a non-initialized hash table.
-//
-// This op creates a hash table, specifying the type of its keys and values.
-// Before using the table you will have to initialize it.  After initialization the
-// table will be immutable.
+// A queue that produces elements in first-in first-out order.
 //
 // Arguments:
-//	key_dtype: Type of the table keys.
-//	value_dtype: Type of the table values.
+//	component_types: The type of each component in a value.
 //
-// Returns Handle to a table.
-func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) {
+// Returns The handle to the queue.
+func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
+	attrs := map[string]interface{}{"component_types": component_types}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "HashTableV2",
+		Type: "FIFOQueueV2",
 
 		Attrs: attrs,
 	}
@@ -23273,661 +30493,16 @@
 	return op.Output(0)
 }
 
-// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap.
-type TakeManySparseFromTensorsMapAttr func(optionalAttr)
-
-// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value.
-//
-// value: The container name for the `SparseTensorsMap` read by this op.
-// If not specified, defaults to ""
-func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value.
-//
-// value: The shared name for the `SparseTensorsMap` read by this op.
-// It should not be blank; rather the `shared_name` or unique Operation name
-// of the Op that created the original `SparseTensorsMap` should be used.
-// If not specified, defaults to ""
-func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them.
-//
-// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where
-// `N` is the minibatch size and the rows correspond to the output handles of
-// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`.  The ranks of the
-// original `SparseTensor` objects that went into the given input ops must all
-// match.  When the final `SparseTensor` is created, it has rank one
-// higher than the ranks of the incoming `SparseTensor` objects
-// (they have been concatenated along a new row dimension on the left).
-//
-// The output `SparseTensor` object's shape values for all dimensions but the
-// first are the max across the input `SparseTensor` objects' shape values
-// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
-// size.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in
-// standard lexicographic order.  If this is not the case, after this
-// step run `SparseReorder` to restore index ordering.
-//
-// For example, if the handles represent an input, which is a `[2, 3]` matrix
-// representing two original `SparseTensor` objects:
-//
-// ```
-//     index = [ 0]
-//             [10]
-//             [20]
-//     values = [1, 2, 3]
-//     shape = [50]
-// ```
-//
-// and
-//
-// ```
-//     index = [ 2]
-//             [10]
-//     values = [4, 5]
-//     shape = [30]
-// ```
-//
-// then the final `SparseTensor` will be:
-//
-// ```
-//     index = [0  0]
-//             [0 10]
-//             [0 20]
-//             [1  2]
-//             [1 10]
-//     values = [1, 2, 3, 4, 5]
-//     shape = [2 50]
-// ```
-//
-// Arguments:
-//	sparse_handles: 1-D, The `N` serialized `SparseTensor` objects.
-// Shape: `[N]`.
-//	dtype: The `dtype` of the `SparseTensor` objects stored in the
-// `SparseTensorsMap`.
-//
-// Returns 2-D.  The `indices` of the minibatch `SparseTensor`.1-D.  The `values` of the minibatch `SparseTensor`.1-D.  The `shape` of the minibatch `SparseTensor`.
-func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TakeManySparseFromTensorsMap",
-		Input: []tf.Input{
-			sparse_handles,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// ResourceSparseApplyKerasMomentumAttr is an optional argument to ResourceSparseApplyKerasMomentum.
-type ResourceSparseApplyKerasMomentumAttr func(optionalAttr)
-
-// ResourceSparseApplyKerasMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyKerasMomentumUseLocking(value bool) ResourceSparseApplyKerasMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceSparseApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, the tensor passed to compute grad will be
-// var + momentum * accum, so in the end, the var you get is actually
-// var + momentum * accum.
-// If not specified, defaults to false
-func ResourceSparseApplyKerasMomentumUseNesterov(value bool) ResourceSparseApplyKerasMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
-//
-// Set use_nesterov = True if you want to use Nesterov momentum.
-//
-// That is for rows we have grad for, we update var and accum as follows:
-//
-// accum = accum * momentum - lr * grad
-// var += accum
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	momentum: Momentum. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceSparseApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyKerasMomentumAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyKerasMomentum",
-		Input: []tf.Input{
-			var_, accum, lr, grad, indices, momentum,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Assigns a new value to a variable.
-//
-// Any ReadVariableOp with a control dependency on this op is guaranteed to return
-// this value or a subsequent newer value of the variable.
-//
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value to set the new tensor to use.
-//
-// Returns the created operation.
-func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AssignVariableOp",
-		Input: []tf.Input{
-			resource, value,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Strip leading and trailing whitespaces from the Tensor.
-//
-// Arguments:
-//	input: A string `Tensor` of any shape.
-//
-// Returns A string `Tensor` of the same shape as the input.
-func StringStrip(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StringStrip",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns a tensor of ones with the same shape and type as x.
-//
-// Arguments:
-//	x: a tensor of type T.
-//
-// Returns a tensor of the same shape and type as x but filled with ones.
-func OnesLike(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "OnesLike",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// The gradient of SparseFillEmptyRows.
-//
-// Takes vectors reverse_index_map, shaped `[N]`, and grad_values,
-// shaped `[N_full]`, where `N_full >= N` and copies data into either
-// `d_values` or `d_default_value`.  Here `d_values` is shaped `[N]` and
-// `d_default_value` is a scalar.
-//
-//   d_values[j] = grad_values[reverse_index_map[j]]
-//   d_default_value = sum_{k : 0 .. N_full - 1} (
-//      grad_values[k] * 1{k not in reverse_index_map})
-//
-// Arguments:
-//	reverse_index_map: 1-D.  The reverse index map from SparseFillEmptyRows.
-//	grad_values: 1-D.  The gradients from backprop.
-//
-// Returns 1-D.  The backprop into values.0-D.  The backprop into default_value.
-func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseFillEmptyRowsGrad",
-		Input: []tf.Input{
-			reverse_index_map, grad_values,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
-//
-// if < 0, `scale * features` otherwise.
-//
-// To be used together with
-// `initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`.
-// For correct dropout, use `tf.contrib.nn.alpha_dropout`.
-//
-// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
-func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Selu",
-		Input: []tf.Input{
-			features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SetSizeAttr is an optional argument to SetSize.
-type SetSizeAttr func(optionalAttr)
-
-// SetSizeValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func SetSizeValidateIndices(value bool) SetSizeAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Number of unique elements along last dimension of input `set`.
-//
-// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,
-// and `set_shape`. The last dimension contains values in a set, duplicates are
-// allowed but ignored.
-//
-// If `validate_indices` is `True`, this op validates the order and range of `set`
-// indices.
-//
-// Arguments:
-//	set_indices: 2D `Tensor`, indices of a `SparseTensor`.
-//	set_values: 1D `Tensor`, values of a `SparseTensor`.
-//	set_shape: 1D `Tensor`, shape of a `SparseTensor`.
-//
-// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st
-// `n-1` dimensions as `set`. Each value is the number of unique elements in
-// the corresponding `[0...n-1]` dimension of `set`.
-func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SetSize",
-		Input: []tf.Input{
-			set_indices, set_values, set_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Adds sparse `updates` to an existing tensor according to `indices`.
-//
-// This operation creates a new tensor by adding sparse `updates` to the passed
-// in `tensor`.
-// This operation is very similar to `tf.scatter_nd_add`, except that the updates
-// are added onto an existing tensor (as opposed to a variable). If the memory
-// for the existing tensor cannot be re-used, a copy is made and updated.
-//
-// `indices` is an integer tensor containing indices into a new tensor of shape
-// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
-//
-//     indices.shape[-1] <= shape.rank
-//
-// The last dimension of `indices` corresponds to indices into elements
-// (if `indices.shape[-1] = shape.rank`) or slices
-// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
-// `shape`.  `updates` is a tensor with shape
-//
-//     indices.shape[:-1] + shape[indices.shape[-1]:]
-//
-// The simplest form of tensor_scatter_add is to add individual elements to a
-// tensor by index. For example, say we want to add 4 elements in a rank-1
-// tensor with 8 elements.
-//
-// In Python, this scatter add operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[4], [3], [1], [7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     tensor = tf.ones([8], dtype=tf.int32)
-//     updated = tf.tensor_scatter_add(tensor, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [1, 12, 1, 11, 10, 1, 1, 13]
-//
-// We can also, insert entire slices of a higher rank tensor all at once. For
-// example, if we wanted to insert two slices in the first dimension of a
-// rank-3 tensor with two matrices of new values.
-//
-// In Python, this scatter add operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[0], [2]])
-//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]],
-//                            [[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
-//     tensor = tf.ones([4, 4, 4])
-//     updated = tf.tensor_scatter_add(tensor, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [[[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]],
-//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
-//      [[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]],
-//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]]
-//
-// Note that on CPU, if an out of bound index is found, an error is returned.
-// On GPU, if an out of bound index is found, the index is ignored.
-//
-// Arguments:
-//	tensor: Tensor to copy/update.
-//	indices: Index tensor.
-//	updates: Updates to scatter into output.
-//
-// Returns A new tensor copied from tensor and updates added according to the indices.
-func TensorScatterAdd(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorScatterAdd",
-		Input: []tf.Input{
-			tensor, indices, updates,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the sign and the log of the absolute value of the determinant of
-//
-// one or more square matrices.
-//
-// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions
-// form square matrices. The outputs are two tensors containing the signs and
-// absolute values of the log determinants for all N input submatrices
-// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant).
-// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU
-// is the LU decomposition of the input and P is the corresponding
-// permutation matrix.
-//
-// Arguments:
-//	input: Shape is `[N, M, M]`.
-//
-// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants
-// of the N input matrices.  Shape is `[N]`.
-func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LogMatrixDeterminant",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Copy a tensor setting everything outside a central band in each innermost matrix
-//
-// to zero.
-//
-// The `band` part is computed as follows:
-// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
-// tensor with the same shape where
-//
-// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`.
-//
-// The indicator function
-//
-// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) &&
-//                  (num_upper < 0 || (n-m) <= num_upper)`.
-//
-// For example:
-//
-// ```
-// # if 'input' is [[ 0,  1,  2, 3]
-//                  [-1,  0,  1, 2]
-//                  [-2, -1,  0, 1]
-//                  [-3, -2, -1, 0]],
-//
-// tf.matrix_band_part(input, 1, -1) ==> [[ 0,  1,  2, 3]
-//                                        [-1,  0,  1, 2]
-//                                        [ 0, -1,  0, 1]
-//                                        [ 0,  0, -1, 0]],
-//
-// tf.matrix_band_part(input, 2, 1) ==> [[ 0,  1,  0, 0]
-//                                       [-1,  0,  1, 0]
-//                                       [-2, -1,  0, 1]
-//                                       [ 0, -2, -1, 0]]
-// ```
-//
-// Useful special cases:
-//
-// ```
-//  tf.matrix_band_part(input, 0, -1) ==> Upper triangular part.
-//  tf.matrix_band_part(input, -1, 0) ==> Lower triangular part.
-//  tf.matrix_band_part(input, 0, 0) ==> Diagonal.
-// ```
-//
-// Arguments:
-//	input: Rank `k` tensor.
-//	num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire
-// lower triangle.
-//	num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep
-// entire upper triangle.
-//
-// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor.
-func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixBandPart",
-		Input: []tf.Input{
-			input, num_lower, num_upper,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Subtracts a value from the current value of a variable.
-//
-// Any ReadVariableOp with a control dependency on this op is guaranteed to
-// see the decremented value or a subsequent newer one.
-//
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value by which the variable will be incremented.
-//
-// Returns the created operation.
-func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AssignSubVariableOp",
-		Input: []tf.Input{
-			resource, value,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// RestoreAttr is an optional argument to Restore.
-type RestoreAttr func(optionalAttr)
-
-// RestorePreferredShard sets the optional preferred_shard attribute to value.
-//
-// value: Index of file to open first if multiple files match
-// `file_pattern`.
-// If not specified, defaults to -1
-func RestorePreferredShard(value int64) RestoreAttr {
-	return func(m optionalAttr) {
-		m["preferred_shard"] = value
-	}
-}
-
-// Restores a tensor from checkpoint files.
-//
-// Reads a tensor stored in one or several files. If there are several files (for
-// instance because a tensor was saved as slices), `file_pattern` may contain
-// wildcard symbols (`*` and `?`) in the filename portion only, not in the
-// directory portion.
-//
-// If a `file_pattern` matches several files, `preferred_shard` can be used to hint
-// in which file the requested tensor is likely to be found. This op will first
-// open the file at index `preferred_shard` in the list of matching files and try
-// to restore tensors from that file.  Only if some tensors or tensor slices are
-// not found in that first file, then the Op opens all the files. Setting
-// `preferred_shard` to match the value passed as the `shard` input
-// of a matching `Save` Op may speed up Restore.  This attribute only affects
-// performance, not correctness.  The default value -1 means files are processed in
-// order.
-//
-// See also `RestoreSlice`.
-//
-// Arguments:
-//	file_pattern: Must have a single element. The pattern of the files from
-// which we read the tensor.
-//	tensor_name: Must have a single element. The name of the tensor to be
-// restored.
-//	dt: The type of the tensor to be restored.
-//
-// Returns The restored tensor.
-func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dt": dt}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Restore",
-		Input: []tf.Input{
-			file_pattern, tensor_name,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear.
-type QuantizedResizeBilinearAttr func(optionalAttr)
-
-// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// Resize quantized `images` to `size` using quantized bilinear interpolation.
-//
-// Input images and output images must be quantized types.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//
-//
-//
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedResizeBilinear",
-		Input: []tf.Input{
-			images, size, min, max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
-//
-// Arguments:
-//
-//	num_threads: Identifies the number of threads to use for the private threadpool.
-//
-//
-func ExperimentalPrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, num_threads tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Creates a dataset that contains the elements of `input_dataset` ignoring errors.
+func ExperimentalIgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "ExperimentalPrivateThreadPoolDataset",
+		Type: "ExperimentalIgnoreErrorsDataset",
 		Input: []tf.Input{
-			input_dataset, num_threads,
+			input_dataset,
 		},
 		Attrs: attrs,
 	}
@@ -23935,539 +30510,13 @@
 	return op.Output(0)
 }
 
-// ExperimentalParseExampleDatasetAttr is an optional argument to ExperimentalParseExampleDataset.
-type ExperimentalParseExampleDatasetAttr func(optionalAttr)
-
-// ExperimentalParseExampleDatasetSloppy sets the optional sloppy attribute to value.
-// If not specified, defaults to false
-func ExperimentalParseExampleDatasetSloppy(value bool) ExperimentalParseExampleDatasetAttr {
-	return func(m optionalAttr) {
-		m["sloppy"] = value
-	}
-}
-
-// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features.
-//
-// Arguments:
-//
-//
-//	dense_defaults: A dict mapping string keys to `Tensor`s.
-// The keys of the dict must match the dense_keys of the feature.
-//	sparse_keys: A list of string keys in the examples features.
-// The results for these keys will be returned as `SparseTensor` objects.
-//	dense_keys: A list of Ndense string Tensors (scalars).
-// The keys expected in the Examples features associated with dense values.
-//	sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
-// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
-// and `tf.string` (`BytesList`) are supported.
-//	dense_shapes: List of tuples with the same length as `dense_keys`.
-// The shape of the data for each dense feature referenced by `dense_keys`.
-// Required for any input tensors identified by `dense_keys`.  Must be
-// either fully defined, or may contain an unknown first dimension.
-// An unknown first dimension means the feature is treated as having
-// a variable number of blocks, and the output shape along this dimension
-// is considered unknown at graph build time.  Padding is applied for
-// minibatch elements smaller than the maximum number of blocks for the
-// given feature along this dimension.
-//	output_types: The type list for the return values.
-//	output_shapes: The list of shapes being produced.
-func ExperimentalParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalParseExampleDatasetAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalParseExampleDataset",
-		Input: []tf.Input{
-			input_dataset, num_parallel_calls, tf.OutputList(dense_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SdcaOptimizerAttr is an optional argument to SdcaOptimizer.
-type SdcaOptimizerAttr func(optionalAttr)
-
-// SdcaOptimizerAdaptative sets the optional adaptative attribute to value.
-//
-// value: Whether to use Adaptive SDCA for the inner loop.
-// If not specified, defaults to true
-func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr {
-	return func(m optionalAttr) {
-		m["adaptative"] = value
-	}
-}
-
-// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
-//
-// linear models with L1 + L2 regularization. As global optimization objective is
-// strongly-convex, the optimizer optimizes the dual objective at each step. The
-// optimizer applies each update one example at a time. Examples are sampled
-// uniformly, and the optimizer is learning rate free and enjoys linear convergence
-// rate.
-//
-// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
-// Shai Shalev-Shwartz, Tong Zhang. 2012
-//
-// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
-//
-// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
-// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
-// Peter Richtarik, Martin Takac. 2015
-//
-// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
-// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
-//
-// Arguments:
-//	sparse_example_indices: a list of vectors which contain example indices.
-//	sparse_feature_indices: a list of vectors which contain feature indices.
-//	sparse_feature_values: a list of vectors which contains feature value
-// associated with each feature group.
-//	dense_features: a list of matrices which contains the dense feature values.
-//	example_weights: a vector which contains the weight associated with each
-// example.
-//	example_labels: a vector which contains the label/target associated with each
-// example.
-//	sparse_indices: a list of vectors where each value is the indices which has
-// corresponding weights in sparse_weights. This field maybe omitted for the
-// dense approach.
-//	sparse_weights: a list of vectors where each value is the weight associated with
-// a sparse feature group.
-//	dense_weights: a list of vectors where the values are the weights associated
-// with a dense feature group.
-//	example_state_data: a list of vectors containing the example state data.
-//	loss_type: Type of the primal loss. Currently SdcaSolver supports logistic,
-// squared and hinge losses.
-//	l1: Symmetric l1 regularization strength.
-//	l2: Symmetric l2 regularization strength.
-//	num_loss_partitions: Number of partitions of the global loss function.
-//	num_inner_iterations: Number of iterations per mini-batch.
-//
-// Returns a list of vectors containing the updated example state
-// data.a list of vectors where each value is the delta
-// weights associated with a sparse feature group.a list of vectors where the values are the delta
-// weights associated with a dense feature group.
-func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SdcaOptimizer",
-		Input: []tf.Input{
-			tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	out_example_state_data = op.Output(idx)
-	if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil {
-		scope.UpdateErr("SdcaOptimizer", err)
-		return
-	}
-	if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil {
-		scope.UpdateErr("SdcaOptimizer", err)
-		return
-	}
-	return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights
-}
-
-// Concats all tensors in the list along the 0th dimension.
-//
-// Requires that all tensors have the same shape except the first dimension.
-//
-// input_handle: The input list.
-// element_shape: The shape of the uninitialized elements in the list. If the first
-//   dimension is not -1, it is assumed that all list elements have the same
-//   leading dim.
-// leading_dims: The list of leading dims of uninitialized list elements. Used if
-//   the leading dim of input_handle.element_shape or the element_shape input arg
-//   is not already set.
-// tensor: The concated result.
-// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient.
-//
-func TensorListConcatV2(scope *Scope, input_handle tf.Output, element_shape tf.Output, leading_dims tf.Output, element_dtype tf.DataType) (tensor tf.Output, lengths tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorListConcatV2",
-		Input: []tf.Input{
-			input_handle, element_shape, leading_dims,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve.
-type MatrixTriangularSolveAttr func(optionalAttr)
-
-// MatrixTriangularSolveLower sets the optional lower attribute to value.
-//
-// value: Boolean indicating whether the innermost matrices in `matrix` are
-// lower or upper triangular.
-// If not specified, defaults to true
-func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr {
-	return func(m optionalAttr) {
-		m["lower"] = value
-	}
-}
-
-// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value.
-//
-// value: Boolean indicating whether to solve with `matrix` or its (block-wise)
-//          adjoint.
-//
-// @compatibility(numpy)
-// Equivalent to scipy.linalg.solve_triangular
-// @end_compatibility
-// If not specified, defaults to false
-func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr {
-	return func(m optionalAttr) {
-		m["adjoint"] = value
-	}
-}
-
-// Solves systems of linear equations with upper or lower triangular matrices by
-//
-// backsubstitution.
-//
-// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form
-// square matrices. If `lower` is `True` then the strictly upper triangular part
-// of each inner-most matrix is assumed to be zero and not accessed.
-// If `lower` is False then the strictly lower triangular part of each inner-most
-// matrix is assumed to be zero and not accessed.
-// `rhs` is a tensor of shape `[..., M, K]`.
-//
-// The output is a tensor of shape `[..., M, K]`. If `adjoint` is
-// `True` then the innermost matrices in `output` satisfy matrix equations
-// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
-// If `adjoint` is `False` then the strictly then the  innermost matrices in
-// `output` satisfy matrix equations
-// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`.
-//
-// Arguments:
-//	matrix: Shape is `[..., M, M]`.
-//	rhs: Shape is `[..., M, K]`.
-//
-// Returns Shape is `[..., M, K]`.
-func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixTriangularSolve",
-		Input: []tf.Input{
-			matrix, rhs,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Saves tensors in V2 checkpoint format.
-//
-// By default, saves the named tensors in full.  If the caller wishes to save
-// specific slices of full tensors, "shape_and_slices" should be non-empty strings
-// and correspondingly well-formed.
-//
-// Arguments:
-//	prefix: Must have a single element. The prefix of the V2 checkpoint to which we
-// write the tensors.
-//	tensor_names: shape {N}. The names of the tensors to be saved.
-//	shape_and_slices: shape {N}.  The slice specs of the tensors to be saved.
-// Empty strings indicate that they are non-partitioned tensors.
-//	tensors: `N` tensors to save.
-//
-// Returns the created operation.
-func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) {
+// Returns 0 if x == 0, and x / y otherwise, elementwise.
+func Xdivy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SaveV2",
-		Input: []tf.Input{
-			prefix, tensor_names, shape_and_slices, tf.OutputList(tensors),
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// UnicodeTranscodeAttr is an optional argument to UnicodeTranscode.
-type UnicodeTranscodeAttr func(optionalAttr)
-
-// UnicodeTranscodeErrors sets the optional errors attribute to value.
-//
-// value: Error handling policy when there is invalid formatting found in the input.
-// The value of 'strict' will cause the operation to produce a InvalidArgument
-// error on any invalid input formatting. A value of 'replace' (the default) will
-// cause the operation to replace any invalid formatting in the input with the
-// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
-// skip any invalid formatting in the input and produce no corresponding output
-// character.
-// If not specified, defaults to "replace"
-func UnicodeTranscodeErrors(value string) UnicodeTranscodeAttr {
-	return func(m optionalAttr) {
-		m["errors"] = value
-	}
-}
-
-// UnicodeTranscodeReplacementChar sets the optional replacement_char attribute to value.
-//
-// value: The replacement character codepoint to be used in place of any invalid
-// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
-// be used. The default value is the default unicode replacement character is
-// 0xFFFD or U+65533.)
-//
-// Note that for UTF-8, passing a replacement character expressible in 1 byte, such
-// as ' ', will preserve string alignment to the source since invalid bytes will be
-// replaced with a 1-byte replacement. For UTF-16-BE and UTF-16-LE, any 1 or 2 byte
-// replacement character will preserve byte alignment to the source.
-// If not specified, defaults to 65533
-func UnicodeTranscodeReplacementChar(value int64) UnicodeTranscodeAttr {
-	return func(m optionalAttr) {
-		m["replacement_char"] = value
-	}
-}
-
-// UnicodeTranscodeReplaceControlCharacters sets the optional replace_control_characters attribute to value.
-//
-// value: Whether to replace the C0 control characters (00-1F) with the
-// `replacement_char`. Default is false.
-// If not specified, defaults to false
-func UnicodeTranscodeReplaceControlCharacters(value bool) UnicodeTranscodeAttr {
-	return func(m optionalAttr) {
-		m["replace_control_characters"] = value
-	}
-}
-
-// Transcode the input text from a source encoding to a destination encoding.
-//
-// The input is a string tensor of any shape. The output is a string tensor of
-// the same shape containing the transcoded strings. Output strings are always
-// valid unicode. If the input contains invalid encoding positions, the
-// `errors` attribute sets the policy for how to deal with them. If the default
-// error-handling policy is used, invalid formatting will be substituted in the
-// output by the `replacement_char`. If the errors policy is to `ignore`, any
-// invalid encoding positions in the input are skipped and not included in the
-// output. If it set to `strict` then any invalid formatting will result in an
-// InvalidArgument error.
-//
-// This operation can be used with `output_encoding = input_encoding` to enforce
-// correct formatting for inputs even if they are already in the desired encoding.
-//
-// If the input is prefixed by a Byte Order Mark needed to determine encoding
-// (e.g. if the encoding is UTF-16 and the BOM indicates big-endian), then that
-// BOM will be consumed and not emitted into the output. If the input encoding
-// is marked with an explicit endianness (e.g. UTF-16-BE), then the BOM is
-// interpreted as a non-breaking-space and is preserved in the output (including
-// always for UTF-8).
-//
-// The end result is that if the input is marked as an explicit endianness the
-// transcoding is faithful to all codepoints in the source. If it is not marked
-// with an explicit endianness, the BOM is not considered part of the string itself
-// but as metadata, and so is not preserved in the output.
-//
-// Arguments:
-//	input: The text to be processed. Can have any shape.
-//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
-// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
-//	output_encoding: The unicode encoding to use in the output. Must be one of
-// `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. Multi-byte encodings will be big-endian.
-//
-// Returns A string tensor containing unicode text encoded using `output_encoding`.
-func UnicodeTranscode(scope *Scope, input tf.Output, input_encoding string, output_encoding string, optional ...UnicodeTranscodeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"input_encoding": input_encoding, "output_encoding": output_encoding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UnicodeTranscode",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes inverse hyperbolic sine of x element-wise.
-func Asinh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Asinh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset with a range of values. Corresponds to python's xrange.
-//
-// Arguments:
-//	start: corresponds to start in python's xrange().
-//	stop: corresponds to stop in python's xrange().
-//	step: corresponds to step in python's xrange().
-//
-//
-func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "RangeDataset",
-		Input: []tf.Input{
-			start, stop, step,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Stops gradient computation.
-//
-// When executed in a graph, this op outputs its input tensor as-is.
-//
-// When building ops to compute gradients, this op prevents the contribution of
-// its inputs to be taken into account.  Normally, the gradient generator adds ops
-// to a graph to compute the derivatives of a specified 'loss' by recursively
-// finding out inputs that contributed to its computation.  If you insert this op
-// in the graph it inputs are masked from the gradient generator.  They are not
-// taken into account for computing gradients.
-//
-// This is useful any time you want to compute a value with TensorFlow but need
-// to pretend that the value was a constant. Some examples include:
-//
-// *  The *EM* algorithm where the *M-step* should not involve backpropagation
-//    through the output of the *E-step*.
-// *  Contrastive divergence training of Boltzmann machines where, when
-//    differentiating the energy function, the training must not backpropagate
-//    through the graph that generated the samples from the model.
-// *  Adversarial training, where no backprop should happen through the adversarial
-//    example generation process.
-func StopGradient(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StopGradient",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Eagerly executes a python function to compute func(input)->output. The
-//
-// semantics of the input, output, and attributes are the same as those for
-// PyFunc.
-func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"token": token, "Tout": Tout}
-	opspec := tf.OpSpec{
-		Type: "EagerPyFunc",
-		Input: []tf.Input{
-			tf.OutputList(input),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("EagerPyFunc", err)
-		return
-	}
-	return output
-}
-
-// Says whether the targets are in the top `K` predictions.
-//
-// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
-// prediction for the target class is among the top `k` predictions among
-// all predictions for example `i`. Note that the behavior of `InTopK` differs
-// from the `TopK` op in its handling of ties; if multiple classes have the
-// same prediction value and straddle the top-`k` boundary, all of those
-// classes are considered to be in the top `k`.
-//
-// More formally, let
-//
-//   \\(predictions_i\\) be the predictions for all classes for example `i`,
-//   \\(targets_i\\) be the target class for example `i`,
-//   \\(out_i\\) be the output for example `i`,
-//
-// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
-//
-// Arguments:
-//	predictions: A `batch_size` x `classes` tensor.
-//	targets: A `batch_size` vector of class ids.
-//	k: Number of top elements to look at for computing precision.
-//
-// Returns Computed Precision at `k` as a `bool Tensor`.
-func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"k": k}
-	opspec := tf.OpSpec{
-		Type: "InTopK",
-		Input: []tf.Input{
-			predictions, targets,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns (x - y)(x - y) element-wise.
-//
-// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SquaredDifference",
+		Type: "Xdivy",
 		Input: []tf.Input{
 			x, y,
 		},
@@ -24476,47 +30525,138 @@
 	return op.Output(0)
 }
 
-// RandomGammaAttr is an optional argument to RandomGamma.
-type RandomGammaAttr func(optionalAttr)
-
-// RandomGammaSeed sets the optional seed attribute to value.
+// Bucketizes 'input' based on 'boundaries'.
 //
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomGammaSeed(value int64) RandomGammaAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomGammaSeed2 sets the optional seed2 attribute to value.
+// For example, if the inputs are
+//     boundaries = [0, 10, 100]
+//     input = [[-5, 10000]
+//              [150,   10]
+//              [5,    100]]
 //
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomGammaSeed2(value int64) RandomGammaAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Outputs random values from the Gamma distribution(s) described by alpha.
-//
-// This op uses the algorithm by Marsaglia et al. to acquire samples via
-// transformation-rejection from pairs of uniform and normal random variables.
-// See http://dl.acm.org/citation.cfm?id=358414
+// then the output will be
+//     output = [[0, 3]
+//               [3, 2]
+//               [1, 3]]
 //
 // Arguments:
-//	shape: 1-D integer tensor. Shape of independent samples to draw from each
-// distribution described by the shape parameters given in alpha.
-//	alpha: A tensor in which each scalar is a "shape" parameter describing the
-// associated gamma distribution.
+//	input: Any shape of Tensor contains with int or float type.
+//	boundaries: A sorted list of floats gives the boundary of the buckets.
 //
-// Returns A tensor with shape `shape + shape(alpha)`. Each slice
-// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
-// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha.
-func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) {
+// Returns Same shape with 'input', each value of input replaced with bucket index.
+//
+// @compatibility(numpy)
+// Equivalent to np.digitize.
+// @end_compatibility
+func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"boundaries": boundaries}
+	opspec := tf.OpSpec{
+		Type: "Bucketize",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Calculates gains for each feature and returns the best possible split information for the feature.
+//
+// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
+//
+// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
+//
+// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
+//
+// The length of output lists are all of the same length, `num_features`.
+// The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature.
+//
+// Arguments:
+//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
+//	stats_summary_list: A list of Rank 3 tensor (#shape=[max_splits, bucket, 2]) for accumulated stats summary (gradient/hessian) per node per buckets for each feature. The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used.
+//	l1: l1 regularization factor on leaf weights, per instance based.
+//	l2: l2 regularization factor on leaf weights, per instance based.
+//	tree_complexity: adjustment to the gain, per leaf based.
+//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
+//	max_splits: the number of nodes that can be split in the whole tree. Used as a dimension of output tensors.
+//
+// Returns An output list of Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.A list of Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.A list of Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.
+func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Output, stats_summary_list []tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, max_splits int64) (node_ids_list []tf.Output, gains_list []tf.Output, thresholds_list []tf.Output, left_node_contribs_list []tf.Output, right_node_contribs_list []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"max_splits": max_splits}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCalculateBestGainsPerFeature",
+		Input: []tf.Input{
+			node_id_range, tf.OutputList(stats_summary_list), l1, l2, tree_complexity, min_node_weight,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if node_ids_list, idx, err = makeOutputList(op, idx, "node_ids_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if gains_list, idx, err = makeOutputList(op, idx, "gains_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if thresholds_list, idx, err = makeOutputList(op, idx, "thresholds_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if left_node_contribs_list, idx, err = makeOutputList(op, idx, "left_node_contribs_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if right_node_contribs_list, idx, err = makeOutputList(op, idx, "right_node_contribs_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	return node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list
+}
+
+// EncodePngAttr is an optional argument to EncodePng.
+type EncodePngAttr func(optionalAttr)
+
+// EncodePngCompression sets the optional compression attribute to value.
+//
+// value: Compression level.
+// If not specified, defaults to -1
+func EncodePngCompression(value int64) EncodePngAttr {
+	return func(m optionalAttr) {
+		m["compression"] = value
+	}
+}
+
+// PNG-encode an image.
+//
+// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
+// where `channels` is:
+//
+// *   1: for grayscale.
+// *   2: for grayscale + alpha.
+// *   3: for RGB.
+// *   4: for RGBA.
+//
+// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
+// default or a value from 0 to 9.  9 is the highest compression level, generating
+// the smallest output, but is slower.
+//
+// Arguments:
+//	image: 3-D with shape `[height, width, channels]`.
+//
+// Returns 0-D. PNG-encoded image.
+func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -24525,9 +30665,9 @@
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomGamma",
+		Type: "EncodePng",
 		Input: []tf.Input{
-			shape, alpha,
+			image,
 		},
 		Attrs: attrs,
 	}
@@ -24535,52 +30675,112 @@
 	return op.Output(0)
 }
 
-// Convert the quantized 'input' tensor into a lower-precision 'output', using the
+// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2.
+type QueueDequeueUpToV2Attr func(optionalAttr)
+
+// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value.
 //
-// actual distribution of the values to maximize the usage of the lower bit depth
-// and adjusting the output min and max ranges accordingly.
+// value: If the queue has fewer than n elements, this operation
+// will block for up to timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr {
+	return func(m optionalAttr) {
+		m["timeout_ms"] = value
+	}
+}
+
+// Dequeues `n` tuples of one or more tensors from the given queue.
 //
-// [input_min, input_max] are scalar floats that specify the range for the float
-// interpretation of the 'input' data. For example, if input_min is -1.0f and
-// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+// This operation is not supported by all queues.  If a queue does not support
+// DequeueUpTo, then an Unimplemented error is returned.
 //
-// This operator tries to squeeze as much precision as possible into an output with
-// a lower bit depth by calculating the actual min and max values found in the
-// data. For example, maybe that quint16 input has no values lower than 16,384 and
-// none higher than 49,152. That means only half the range is actually needed, all
-// the float interpretations are between -0.5f and 0.5f, so if we want to compress
-// the data into a quint8 output, we can use that range rather than the theoretical
-// -1.0f to 1.0f that is suggested by the input min and max.
+// If the queue is closed and there are more than 0 but less than `n`
+// elements remaining, then instead of returning an OutOfRange error like
+// QueueDequeueMany, less than `n` elements are returned immediately.  If
+// the queue is closed and there are 0 elements left in the queue, then
+// an OutOfRange error is returned just like in QueueDequeueMany.
+// Otherwise the behavior is identical to QueueDequeueMany:
 //
-// In practice, this is most useful for taking output from operations like
-// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
-// may have large potential output ranges, but in practice have a distribution of
-// input values that only uses a small fraction of the possible range. By feeding
-// that output into this operator, we can reduce it from 32 bits down to 8 with
-// minimal loss of accuracy.
+// This operation concatenates queue-element component tensors along the
+// 0th dimension to make a single component tensor.  All of the components
+// in the dequeued tuple will have size n in the 0th dimension.
+//
+// This operation has `k` outputs, where `k` is the number of components in
+// the tuples stored in the given queue, and output `i` is the ith
+// component of the dequeued tuple.
 //
 // Arguments:
+//	handle: The handle to a queue.
+//	n: The number of tuples to dequeue.
+//	component_types: The type of each component in a tuple.
 //
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//	out_type: The type of the output. Should be a lower bit depth than Tinput.
-//
-// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
-func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+// Returns One or more tensors that were dequeued as a tuple.
+func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "QuantizeDownAndShrinkRange",
+		Type: "QueueDequeueUpToV2",
 		Input: []tf.Input{
-			input, input_min, input_max,
+			handle, n,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("QueueDequeueUpToV2", err)
+		return
+	}
+	return components
+}
+
+// Returns the max of x and y (i.e. x > y ? x : y) element-wise.
+//
+// *NOTE*: `Maximum` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Maximum",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns element-wise remainder of division. This emulates C semantics in that
+//
+// the result here is consistent with a truncating divide. E.g.
+// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.
+//
+// *NOTE*: `Mod` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Mod",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
 // Returns element-wise remainder of division. This emulates C semantics in that
@@ -24646,6 +30846,62 @@
 	return offset
 }
 
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingRMSPropParametersGradAccumDebug.
+type LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Load RMSProp embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the RMSProp optimization algorithm.
+//	ms: Value of ms used in the RMSProp optimization algorithm.
+//	mom: Value of mom used in the RMSProp optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the RMSProp optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, ms, mom, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Compute the lower regularized incomplete Gamma function `P(a, x)`.
 //
 // The lower regularized incomplete Gamma function is defined as:
@@ -24675,73 +30931,6 @@
 	return op.Output(0)
 }
 
-// Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
-//
-// The Hurwitz zeta function is defined as:
-//
-//
-// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
-func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Zeta",
-		Input: []tf.Input{
-			x, q,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the cardinality of `input_dataset`.
-//
-// Returns the cardinality of `input_dataset`.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the dataset to return cardinality for.
-//
-// Returns The cardinality of `input_dataset`. Named constants are used to represent
-// infinite and unknown cardinality.
-func ExperimentalDatasetCardinality(scope *Scope, input_dataset tf.Output) (cardinality tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalDatasetCardinality",
-		Input: []tf.Input{
-			input_dataset,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that executes a SQL query and emits rows of the result set.
-//
-// Arguments:
-//	driver_name: The database type. Currently, the only supported type is 'sqlite'.
-//	data_source_name: A connection string to connect to the database.
-//	query: A SQL query to execute.
-//
-//
-func ExperimentalSqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalSqlDataset",
-		Input: []tf.Input{
-			driver_name, data_source_name, query,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
 //
 // The regularized incomplete beta integral is defined as:
@@ -24954,6 +31143,37 @@
 	return op.Output(0)
 }
 
+// Gets the next output from the given iterator.
+//
+// This operation is a synchronous version IteratorGetNext. It should only be used
+// in situations where the iterator does not block the calling thread, or where
+// the calling thread is not a member of the thread pool used to execute parallel
+// operations (e.g. in eager mode).
+func IteratorGetNextSync(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "IteratorGetNextSync",
+		Input: []tf.Input{
+			iterator,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("IteratorGetNextSync", err)
+		return
+	}
+	return components
+}
+
 // Returns the truth value of (x >= y) element-wise.
 //
 // *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
@@ -25021,69 +31241,6 @@
 	return op.Output(0)
 }
 
-// Selects elements from `x` or `y`, depending on `condition`.
-//
-// The `x`, and `y` tensors must all have the same shape, and the
-// output will also have that shape.
-//
-// The `condition` tensor must be a scalar if `x` and `y` are scalars.
-// If `x` and `y` are vectors or higher rank, then `condition` must be either a
-// scalar, a vector with size matching the first dimension of `x`, or must have
-// the same shape as `x`.
-//
-// The `condition` tensor acts as a mask that chooses, based on the value at each
-// element, whether the corresponding element / row in the output should be
-// taken from `x` (if true) or `y` (if false).
-//
-// If `condition` is a vector and `x` and `y` are higher rank matrices, then
-// it chooses which row (outer dimension) to copy from `x` and `y`.
-// If `condition` has the same shape as `x` and `y`, then it chooses which
-// element to copy from `x` and `y`.
-//
-// For example:
-//
-// ```python
-// # 'condition' tensor is [[True,  False]
-// #                        [False, True]]
-// # 't' is [[1, 2],
-// #         [3, 4]]
-// # 'e' is [[5, 6],
-// #         [7, 8]]
-// select(condition, t, e)  # => [[1, 6], [7, 4]]
-//
-//
-// # 'condition' tensor is [True, False]
-// # 't' is [[1, 2],
-// #         [3, 4]]
-// # 'e' is [[5, 6],
-// #         [7, 8]]
-// select(condition, t, e) ==> [[1, 2],
-//                              [7, 8]]
-//
-// ```
-//
-// Arguments:
-//
-//	x: = A `Tensor` which may have the same shape as `condition`.
-// If `condition` is rank 1, `x` may have higher rank,
-// but its first dimension must match the size of `condition`.
-//	y: = A `Tensor` with the same type and shape as `x`.
-//
-// Returns = A `Tensor` with the same type and shape as `x` and `y`.
-func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Select",
-		Input: []tf.Input{
-			condition, x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // MatMulAttr is an optional argument to MatMul.
 type MatMulAttr func(optionalAttr)
 
@@ -25135,506 +31292,6 @@
 	return op.Output(0)
 }
 
-// Serializes the tree handle to a proto
-//
-// Arguments:
-//	tree_handle: Handle to the tree resource to be serialized.
-//
-// Returns Serialied proto string of the tree resource.
-func TensorForestTreeSerialize(scope *Scope, tree_handle tf.Output) (tree_config tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorForestTreeSerialize",
-		Input: []tf.Input{
-			tree_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SparseMatMulAttr is an optional argument to SparseMatMul.
-type SparseMatMulAttr func(optionalAttr)
-
-// SparseMatMulTransposeA sets the optional transpose_a attribute to value.
-// If not specified, defaults to false
-func SparseMatMulTransposeA(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
-	}
-}
-
-// SparseMatMulTransposeB sets the optional transpose_b attribute to value.
-// If not specified, defaults to false
-func SparseMatMulTransposeB(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_b"] = value
-	}
-}
-
-// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value.
-// If not specified, defaults to false
-func SparseMatMulAIsSparse(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["a_is_sparse"] = value
-	}
-}
-
-// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value.
-// If not specified, defaults to false
-func SparseMatMulBIsSparse(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["b_is_sparse"] = value
-	}
-}
-
-// Multiply matrix "a" by matrix "b".
-//
-// The inputs must be two-dimensional matrices and the inner dimension of "a" must
-// match the outer dimension of "b". Both "a" and "b" must be `Tensor`s not
-// `SparseTensor`s.  This op is optimized for the case where at least one of "a" or
-// "b" is sparse, in the sense that they have a large proportion of zero values.
-// The breakeven for using this versus a dense matrix multiply on one platform was
-// 30% zero values in the sparse matrix.
-//
-// The gradient computation of this operation will only take advantage of sparsity
-// in the input gradient when that gradient comes from a Relu.
-func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseMatMul",
-		Input: []tf.Input{
-			a, b,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ExperimentalThreadPoolHandleAttr is an optional argument to ExperimentalThreadPoolHandle.
-type ExperimentalThreadPoolHandleAttr func(optionalAttr)
-
-// ExperimentalThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value.
-//
-// value: The maximum degree of parallelism to use within operations that execute on this
-// threadpool.
-// If not specified, defaults to 1
-func ExperimentalThreadPoolHandleMaxIntraOpParallelism(value int64) ExperimentalThreadPoolHandleAttr {
-	return func(m optionalAttr) {
-		m["max_intra_op_parallelism"] = value
-	}
-}
-
-// ExperimentalThreadPoolHandleContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func ExperimentalThreadPoolHandleContainer(value string) ExperimentalThreadPoolHandleAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// ExperimentalThreadPoolHandleSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func ExperimentalThreadPoolHandleSharedName(value string) ExperimentalThreadPoolHandleAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
-//
-// Arguments:
-//	num_threads: The number of threads in the thread pool.
-//	display_name: A human-readable name for the threads that may be visible in some
-// visualizations.
-// threadpool.
-//
-// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset
-// ops.
-func ExperimentalThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ExperimentalThreadPoolHandleAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalThreadPoolHandle",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams.
-type CudnnRNNCanonicalToParamsAttr func(optionalAttr)
-
-// CudnnRNNCanonicalToParamsRnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNCanonicalToParamsRnnMode(value string) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsInputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNCanonicalToParamsInputMode(value string) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsDirection sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNCanonicalToParamsDirection(value string) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsDropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNCanonicalToParamsDropout(value float32) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsSeed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNCanonicalToParamsSeed(value int64) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsSeed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNCanonicalToParamsSeed2(value int64) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Converts CudnnRNN params from canonical form to usable form.
-//
-// Writes a set of weights into the opaque params buffer so they can be used in
-// upcoming training or inferences.
-//
-// Note that the params buffer may not be compatible across different GPUs. So any
-// save and restoration should be converted to and from the canonical weights and
-// biases.
-//
-// num_layers: Specifies the number of layers in the RNN model.
-// num_units: Specifies the size of the hidden state.
-// input_size: Specifies the size of the input state.
-// weights: the canonical form of weights that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// biases: the canonical form of biases that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// num_params: number of parameter sets for all layers.
-//     Each layer may contain multiple parameter sets, with each set consisting of
-//     a weight matrix and a bias vector.
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//     The actual computation before the first layer. 'skip_input' is only allowed
-//     when input_size == num_units; 'auto_select' implies 'skip_input' when
-//     input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used.
-//     dir = (direction == bidirectional) ? 2 : 1
-// dropout: dropout probability. When set to 0., dropout is disabled.
-// seed: the 1st part of a seed to initialize dropout.
-// seed2: the 2nd part of a seed to initialize dropout.
-func CudnnRNNCanonicalToParams(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsAttr) (params tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNNCanonicalToParams",
-		Input: []tf.Input{
-			num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset containing elements of first component of `input_dataset` having true in the last component.
-func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "FilterByLastComponentDataset",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SumAttr is an optional argument to Sum.
-type SumAttr func(optionalAttr)
-
-// SumKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SumKeepDims(value bool) SumAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the sum of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Sum",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// EnterAttr is an optional argument to Enter.
-type EnterAttr func(optionalAttr)
-
-// EnterIsConstant sets the optional is_constant attribute to value.
-//
-// value: If true, the output is constant within the child frame.
-// If not specified, defaults to false
-func EnterIsConstant(value bool) EnterAttr {
-	return func(m optionalAttr) {
-		m["is_constant"] = value
-	}
-}
-
-// EnterParallelIterations sets the optional parallel_iterations attribute to value.
-//
-// value: The number of iterations allowed to run in parallel.
-// If not specified, defaults to 10
-func EnterParallelIterations(value int64) EnterAttr {
-	return func(m optionalAttr) {
-		m["parallel_iterations"] = value
-	}
-}
-
-// Creates or finds a child frame, and makes `data` available to the child frame.
-//
-// This op is used together with `Exit` to create loops in the graph.
-// The unique `frame_name` is used by the `Executor` to identify frames. If
-// `is_constant` is true, `output` is a constant in the child frame; otherwise
-// it may be changed in the child frame. At most `parallel_iterations` iterations
-// are run in parallel in the child frame.
-//
-// Arguments:
-//	data: The tensor to be made available to the child frame.
-//	frame_name: The name of the child frame.
-//
-// Returns The same tensor as `data`.
-func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"frame_name": frame_name}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Enter",
-		Input: []tf.Input{
-			data,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Add all input tensors element wise.
-//
-// Arguments:
-//	inputs: Must all be the same size and shape.
-func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AddN",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TryRpcAttr is an optional argument to TryRpc.
-type TryRpcAttr func(optionalAttr)
-
-// TryRpcProtocol sets the optional protocol attribute to value.
-//
-// value: RPC protocol to use.  Empty string means use the default protocol.
-// Options include 'grpc'.
-// If not specified, defaults to ""
-func TryRpcProtocol(value string) TryRpcAttr {
-	return func(m optionalAttr) {
-		m["protocol"] = value
-	}
-}
-
-// TryRpcFailFast sets the optional fail_fast attribute to value.
-//
-// value: `boolean`. If `true` (default), then failures to connect
-// (i.e., the server does not immediately respond) cause an RPC failure.
-// If not specified, defaults to true
-func TryRpcFailFast(value bool) TryRpcAttr {
-	return func(m optionalAttr) {
-		m["fail_fast"] = value
-	}
-}
-
-// TryRpcTimeoutInMs sets the optional timeout_in_ms attribute to value.
-//
-// value: `int`. If `0` (default), then the kernel will run the RPC
-// request and only time out if the RPC deadline passes or the session times out.
-// If this value is greater than `0`, then the op will raise an exception if
-// the RPC takes longer than `timeout_in_ms`.
-// If not specified, defaults to 0
-func TryRpcTimeoutInMs(value int64) TryRpcAttr {
-	return func(m optionalAttr) {
-		m["timeout_in_ms"] = value
-	}
-}
-
-// Perform batches of RPC requests.
-//
-// This op asynchronously performs either a single RPC request, or a batch
-// of requests.  RPC requests are defined by three main parameters:
-//
-//   - `address` (the host+port or BNS address of the request)
-//   - `method` (the method name for the request)
-//   - `request` (the serialized proto string, or vector of strings,
-//      of the RPC request argument).
-//
-// For example, if you have an RPC service running on port localhost:2345,
-// and its interface is configured with the following proto declaration:
-//
-// ```
-// service MyService {
-//   rpc MyMethod(MyRequestProto) returns (MyResponseProto) {
-//   }
-// };
-// ```
-//
-// then call this op with arguments:
-//
-// ```
-// address = "localhost:2345"
-// method = "MyService/MyMethod"
-// ```
-//
-// The `request` tensor is a string tensor representing serialized `MyRequestProto`
-// strings; and the output string tensor `response` will have the same shape
-// and contain (upon successful completion) corresponding serialized
-// `MyResponseProto` strings.
-//
-// For example, to send a single, empty, `MyRequestProto`, call
-// this op with `request = ""`.  To send 5 **parallel** empty requests,
-// call this op with `request = ["", "", "", "", ""]`.
-//
-// More generally, one can create a batch of `MyRequestProto` serialized protos
-// from regular batched tensors using the `encode_proto` op, and convert
-// the response `MyResponseProto` serialized protos to batched tensors
-// using the `decode_proto` op.
-//
-// **NOTE** Working with serialized proto strings is faster than instantiating
-// actual proto objects in memory, so no performance degradation is expected
-// compared to writing custom kernels for this workflow.
-//
-// Unlike the standard `Rpc` op, if the connection fails or the remote worker
-// returns an error status, this op does **not** reraise the exception.
-// Instead, the `status_code` and `status_message` entry for the corresponding RPC
-// call is set with the error returned from the RPC call.  The `response` tensor
-// will contain valid response values for those minibatch entries whose RPCs did
-// not fail; the rest of the entries will have empty strings.
-//
-// Arguments:
-//	address: `0-D` or `1-D`.  The address (i.e. host_name:port) of the RPC server.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `method` and `request`.
-//	method: `0-D` or `1-D`.  The method address on the RPC server.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `address` and `request`.
-//	request: `0-D` or `1-D`.  Serialized proto strings: the rpc request argument.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `address` and `method`.
-//
-// Returns Same shape as `request`. Serialized proto strings: the rpc responses.Same shape as `request`.  Values correspond to tensorflow Status enum codes.Same shape as `request`.  Values correspond to Status messages
-// returned from the RPC calls.
-func TryRpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...TryRpcAttr) (response tf.Output, status_code tf.Output, status_message tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TryRpc",
-		Input: []tf.Input{
-			address, method, request,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
 // InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2.
 type InitializeTableFromTextFileV2Attr func(optionalAttr)
 
@@ -26233,198 +31890,6 @@
 	return op.Output(0)
 }
 
-// Computes the mean along sparse segments of a tensor.
-//
-// See `tf.sparse.segment_sum` for usage examples.
-//
-// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
-// dimension, selecting a subset of dimension 0, specified by `indices`.
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentMean",
-		Input: []tf.Input{
-			data, indices, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deserializes a serialized tree ensemble config and replaces current tree
-//
-// ensemble.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble.
-//	stamp_token: Token to use as the new value of the resource stamp.
-//	tree_ensemble_serialized: Serialized proto of the ensemble.
-//
-// Returns the created operation.
-func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesDeserializeEnsemble",
-		Input: []tf.Input{
-			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Transforms a tf.Example proto (as a string) into typed tensors.
-//
-// Arguments:
-//	serialized: A vector containing a batch of binary serialized Example protos.
-//	dense_defaults: A list of Tensors (some may be empty), whose length matches
-// the length of `dense_keys`. dense_defaults[j] provides default values
-// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
-// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
-// The input type is inferred from dense_defaults[j], even when it's empty.
-// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
-// then the shape of dense_defaults[j] must match that of dense_shapes[j].
-// If dense_shapes[j] has an undefined major dimension (variable strides dense
-// feature), dense_defaults[j] must contain a single element:
-// the padding element.
-//	num_sparse: The number of sparse features to be parsed from the example. This
-// must match the lengths of `sparse_keys` and `sparse_types`.
-//	sparse_keys: A list of `num_sparse` strings.
-// The keys expected in the Examples' features associated with sparse values.
-//	dense_keys: The keys expected in the Examples' features associated with dense
-// values.
-//	sparse_types: A list of `num_sparse` types; the data types of data in each
-// Feature given in sparse_keys.
-// Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-//	dense_shapes: The shapes of data in each Feature given in dense_keys.
-// The length of this list must match the length of `dense_keys`.  The
-// number of elements in the Feature corresponding to dense_key[j] must
-// always equal dense_shapes[j].NumEntries().  If dense_shapes[j] ==
-// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j]
-// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1,
-// ..., DN), the shape of the output Tensor dense_values[j] will be (M,
-// D1, .., DN), where M is the number of blocks of elements of length
-// D1 * .... * DN, in the input.
-func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes}
-	opspec := tf.OpSpec{
-		Type: "ParseSingleExample",
-		Input: []tf.Input{
-			serialized, tf.OutputList(dense_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	return sparse_indices, sparse_values, sparse_shapes, dense_values
-}
-
-// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2.
-type WholeFileReaderV2Attr func(optionalAttr)
-
-// WholeFileReaderV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// WholeFileReaderV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A Reader that outputs the entire contents of a file as a value.
-//
-// To use, enqueue filenames in a Queue.  The output of ReaderRead will
-// be a filename (key) and the contents of that file (value).
-//
-// Returns The handle to reference the Reader.
-func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "WholeFileReaderV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Pop the element at the top of the stack.
-//
-// Arguments:
-//	handle: The handle to a stack.
-//	elem_type: The type of the elem that is popped.
-//
-// Returns The tensor that is popped from the top of the stack.
-func StackPopV2(scope *Scope, handle tf.Output, elem_type tf.DataType) (elem tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"elem_type": elem_type}
-	opspec := tf.OpSpec{
-		Type: "StackPopV2",
-		Input: []tf.Input{
-			handle,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Computes hyperbolic cosine of x element-wise.
 func Cosh(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
@@ -27064,130 +32529,6 @@
 	return op.Output(0)
 }
 
-// RealAttr is an optional argument to Real.
-type RealAttr func(optionalAttr)
-
-// RealTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func RealTout(value tf.DataType) RealAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
-	}
-}
-
-// Returns the real part of a complex number.
-//
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// type `float` that is the real part of each element in `input`. All elements in
-// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real
-//  part returned by this operation and *b* is the imaginary part.
-//
-// For example:
-//
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.real(input) ==> [-2.25, 3.25]
-// ```
-func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Real",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Sends `input` to all devices that are connected to the output.
-//
-// Sends `input` to all devices that are connected to the output.
-//
-// The graph should be constructed so that all ops connected to the output have a
-// valid device assignment, and the op itself is assigned one of these devices.
-//
-// input: The input to the broadcast.
-// output: The same as input.
-// shape: The shape of the input tensor.
-//
-func NcclBroadcast(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shape": shape}
-	opspec := tf.OpSpec{
-		Type: "NcclBroadcast",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResizeAreaAttr is an optional argument to ResizeArea.
-type ResizeAreaAttr func(optionalAttr)
-
-// ResizeAreaAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func ResizeAreaAlignCorners(value bool) ResizeAreaAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// Resize `images` to `size` using area interpolation.
-//
-// Input images can be of different types but output images are always float.
-//
-// The range of pixel values for the output image might be slightly different
-// from the range for the input image because of limited numerical precision.
-// To guarantee an output range, for example `[0.0, 1.0]`, apply
-// `tf.clip_by_value` to the output.
-//
-// Each output pixel is computed by first transforming the pixel's footprint into
-// the input tensor and then averaging the pixels that intersect the footprint. An
-// input pixel's contribution to the average is weighted by the fraction of its
-// area that intersects the footprint.  This is the same as OpenCV's INTER_AREA.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResizeArea",
-		Input: []tf.Input{
-			images, size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // VarHandleOpAttr is an optional argument to VarHandleOp.
 type VarHandleOpAttr func(optionalAttr)
 
@@ -27572,6 +32913,52 @@
 	return op.Output(0)
 }
 
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParameters.
+type RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// Retrieve SGD embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the stochastic gradient descent optimization algorithm.
+func RetrieveTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr) (parameters tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingStochasticGradientDescentParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // QuantizedMatMulAttr is an optional argument to QuantizedMatMul.
 type QuantizedMatMulAttr func(optionalAttr)
 
@@ -27649,78 +33036,6 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Does nothing. Serves as a control trigger for scheduling.
-//
-// Only useful as a placeholder for control edges.
-//
-// Returns the created operation.
-func ControlTrigger(scope *Scope) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ControlTrigger",
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Batch normalization.
-//
-// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
-//
-// This op is deprecated. Prefer `tf.nn.batch_normalization`.
-//
-// Arguments:
-//	t: A 4D input Tensor.
-//	m: A 1D mean Tensor with size matching the last dimension of t.
-// This is the first output from tf.nn.moments,
-// or a saved moving average thereof.
-//	v: A 1D variance Tensor with size matching the last dimension of t.
-// This is the second output from tf.nn.moments,
-// or a saved moving average thereof.
-//	beta: A 1D beta Tensor with size matching the last dimension of t.
-// An offset to be added to the normalized tensor.
-//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
-// If "scale_after_normalization" is true, this tensor will be multiplied
-// with the normalized tensor.
-//	variance_epsilon: A small float number to avoid dividing by 0.
-//	scale_after_normalization: A bool indicating whether the resulted tensor
-// needs to be multiplied with gamma.
-func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
-	opspec := tf.OpSpec{
-		Type: "BatchNormWithGlobalNormalization",
-		Input: []tf.Input{
-			t, m, v, beta, gamma,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deprecated. Use TensorArrayReadV3
-//
-// DEPRECATED at GraphDef version 26: Use TensorArrayReadV3
-func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayReadV2",
-		Input: []tf.Input{
-			handle, index, flow_in,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // QuantizedMulAttr is an optional argument to QuantizedMul.
 type QuantizedMulAttr func(optionalAttr)
 
@@ -27809,6 +33124,29 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Scatters tensor at indices in an input list.
+//
+// Each member of the TensorList corresponds to one row of the input tensor,
+// specified by the given index (see `tf.gather`).
+//
+// input_handle: The list to scatter into.
+// tensor: The input tensor.
+// indices: The indices used to index into the list.
+// output_handle: The TensorList.
+func TensorListScatterIntoExistingList(scope *Scope, input_handle tf.Output, tensor tf.Output, indices tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListScatterIntoExistingList",
+		Input: []tf.Input{
+			input_handle, tensor, indices,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Given a quantized tensor described by (input, input_min, input_max), outputs a
 //
 // range that covers the actual values present in that tensor.  This op is
@@ -27886,35 +33224,6 @@
 	return op.Output(0)
 }
 
-// Looks up keys in a table, outputs the corresponding values.
-//
-// The tensor `keys` must of the same type as the keys of the table.
-// The output `values` is of the type of the table values.
-//
-// The scalar `default_value` is the value output for keys not present in the
-// table. It must also be of the same type as the table values.
-//
-// Arguments:
-//	table_handle: Handle to the table.
-//	keys: Any shape.  Keys to look up.
-//
-//
-// Returns Same shape as `keys`.  Values found in the table, or `default_values`
-// for missing keys.
-func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LookupTableFindV2",
-		Input: []tf.Input{
-			table_handle, keys, default_value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Updates the table to associates keys with values.
 //
 // The tensor `keys` must be of the same type as the keys of the table.
@@ -28096,30 +33405,6 @@
 	return op.Output(0), op.Output(1)
 }
 
-// Replaces the contents of the table with the specified keys and values.
-//
-// The tensor `keys` must be of the same type as the keys of the table.
-// The tensor `values` must be of the type of the table values.
-//
-// Arguments:
-//	table_handle: Handle to the table.
-//	keys: Any shape.  Keys to look up.
-//	values: Values to associate with keys.
-//
-// Returns the created operation.
-func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LookupTableImportV2",
-		Input: []tf.Input{
-			table_handle, keys, values,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // MultiDeviceIteratorFromStringHandleAttr is an optional argument to MultiDeviceIteratorFromStringHandle.
 type MultiDeviceIteratorFromStringHandleAttr func(optionalAttr)
 
@@ -28414,6 +33699,21 @@
 	return op.Output(0)
 }
 
+// Shuts down a running distributed TPU system.
+//
+// The op returns an error if no system is running.
+//
+// Returns the created operation.
+func ShutdownDistributedTPU(scope *Scope) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ShutdownDistributedTPU",
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Deprecated. Disallowed in GraphDef version >= 2.
 //
 // DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead
@@ -28513,96 +33813,6 @@
 	return op.Output(0)
 }
 
-// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2.
-type QueueEnqueueManyV2Attr func(optionalAttr)
-
-// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
-//
-// value: If the queue is too full, this operation will block for up
-// to timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr {
-	return func(m optionalAttr) {
-		m["timeout_ms"] = value
-	}
-}
-
-// Enqueues zero or more tuples of one or more tensors in the given queue.
-//
-// This operation slices each component tensor along the 0th dimension to
-// make multiple queue elements. All of the tuple components must have the
-// same size in the 0th dimension.
-//
-// The components input has k elements, which correspond to the components of
-// tuples stored in the given queue.
-//
-// N.B. If the queue is full, this operation will block until the given
-// elements have been enqueued (or 'timeout_ms' elapses, if specified).
-//
-// Arguments:
-//	handle: The handle to a queue.
-//	components: One or more tensors from which the enqueued tensors should
-// be taken.
-//
-// Returns the created operation.
-func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueEnqueueManyV2",
-		Input: []tf.Input{
-			handle, tf.OutputList(components),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// PrintV2Attr is an optional argument to PrintV2.
-type PrintV2Attr func(optionalAttr)
-
-// PrintV2OutputStream sets the optional output_stream attribute to value.
-//
-// value: A string specifying the output stream or logging level to print to.
-// If not specified, defaults to "stderr"
-func PrintV2OutputStream(value string) PrintV2Attr {
-	return func(m optionalAttr) {
-		m["output_stream"] = value
-	}
-}
-
-// Prints a string scalar.
-//
-// Prints a string scalar to the desired output_stream.
-//
-// Arguments:
-//	input: The string scalar to print.
-//
-// Returns the created operation.
-func PrintV2(scope *Scope, input tf.Output, optional ...PrintV2Attr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PrintV2",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
 // Outputs a `Summary` protocol buffer with a tensor and per-plugin data.
 //
 // Arguments:
@@ -28816,53 +34026,6 @@
 	return op.Output(0)
 }
 
-// Outputs a `Summary` protocol buffer with a histogram.
-//
-// The generated
-// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-// has one summary value containing a histogram for `values`.
-//
-// This op reports an `InvalidArgument` error if any value is not finite.
-//
-// Arguments:
-//	tag: Scalar.  Tag to use for the `Summary.Value`.
-//	values: Any shape. Values to use to build the histogram.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "HistogramSummary",
-		Input: []tf.Input{
-			tag, values,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the number of elements in the given queue.
-//
-// Arguments:
-//	handle: The handle to a queue.
-//
-// Returns The number of elements in the given queue.
-func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueSizeV2",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // ImageSummaryAttr is an optional argument to ImageSummary.
 type ImageSummaryAttr func(optionalAttr)
 
@@ -29282,30 +34445,6 @@
 	return op.Output(0)
 }
 
-// Creates a Tensor by indexing into the TensorList.
-//
-// Each row in the produced Tensor corresponds to the element in the TensorList
-// specified by the given index (see `tf.gather`).
-//
-// input_handle: The input tensor list.
-// indices: The indices used to index into the list.
-// values: The tensor.
-func TensorListGather(scope *Scope, input_handle tf.Output, indices tf.Output, element_shape tf.Output, element_dtype tf.DataType) (values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorListGather",
-		Input: []tf.Input{
-			input_handle, indices, element_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a TensorList by indexing into a Tensor.
 //
 // Each member of the TensorList corresponds to one row of the input tensor,
@@ -29485,75 +34624,6 @@
 	return op.Output(0)
 }
 
-// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2.
-type QueueDequeueUpToV2Attr func(optionalAttr)
-
-// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value.
-//
-// value: If the queue has fewer than n elements, this operation
-// will block for up to timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr {
-	return func(m optionalAttr) {
-		m["timeout_ms"] = value
-	}
-}
-
-// Dequeues `n` tuples of one or more tensors from the given queue.
-//
-// This operation is not supported by all queues.  If a queue does not support
-// DequeueUpTo, then an Unimplemented error is returned.
-//
-// If the queue is closed and there are more than 0 but less than `n`
-// elements remaining, then instead of returning an OutOfRange error like
-// QueueDequeueMany, less than `n` elements are returned immediately.  If
-// the queue is closed and there are 0 elements left in the queue, then
-// an OutOfRange error is returned just like in QueueDequeueMany.
-// Otherwise the behavior is identical to QueueDequeueMany:
-//
-// This operation concatenates queue-element component tensors along the
-// 0th dimension to make a single component tensor.  All of the components
-// in the dequeued tuple will have size n in the 0th dimension.
-//
-// This operation has `k` outputs, where `k` is the number of components in
-// the tuples stored in the given queue, and output `i` is the ith
-// component of the dequeued tuple.
-//
-// Arguments:
-//	handle: The handle to a queue.
-//	n: The number of tuples to dequeue.
-//	component_types: The type of each component in a tuple.
-//
-// Returns One or more tensors that were dequeued as a tuple.
-func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueDequeueUpToV2",
-		Input: []tf.Input{
-			handle, n,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("QueueDequeueUpToV2", err)
-		return
-	}
-	return components
-}
-
 // Computes the Cholesky decomposition of one or more square matrices.
 //
 // The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
@@ -29706,6 +34776,35 @@
 	return op.Output(0)
 }
 
+// Solves tridiagonal systems of equations.
+//
+// `diagonals` is a tensor of shape `[..., 3, M]` whose inner-most 2 dimensions
+// represent matrices with three rows being the superdiagonal, diagonals, and
+// subdiagonals, in order. The last element of the superdiagonal and the first
+// element of the subdiagonal is ignored.
+// `rhs` is a tensor of shape `[..., M, K]`, representing K right-hand sides per
+// each left-hand side.
+// The output is a tensor of shape `[..., M, K]` containing the solutions.
+//
+// Arguments:
+//	diagonals: Shape is `[..., 3, M]`.
+//	rhs: Shape is `[..., M, K]`.
+//
+// Returns Shape is `[..., M, K]`.
+func TridiagonalSolve(scope *Scope, diagonals tf.Output, rhs tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TridiagonalSolve",
+		Input: []tf.Input{
+			diagonals, rhs,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2.
 type SelfAdjointEigV2Attr func(optionalAttr)
 
@@ -30179,53 +35278,6 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes the product along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \prod_j data_j\\) where the product is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the product is empty for a given segment ID `i`, `output[i] = 1`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
-// </div>
-//
-// For example:
-//
-// ```
-// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
-// tf.segment_prod(c, tf.constant([0, 0, 1]))
-// # ==> [[4, 6, 6, 4],
-// #      [5, 6, 7, 8]]
-// ```
-//
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentProd",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Converts one or more images from RGB to HSV.
 //
 // Outputs a tensor of the same shape as the `images` tensor, containing the HSV
@@ -30428,249 +35480,6 @@
 	return op.Output(0)
 }
 
-// BatchToSpace for N-D tensors of type T.
-//
-// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape
-// `block_shape + [batch]`, interleaves these blocks back into the grid defined by
-// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as
-// the input.  The spatial dimensions of this intermediate result are then
-// optionally cropped according to `crops` to produce the output.  This is the
-// reverse of SpaceToBatch.  See below for a precise description.
-//
-// Arguments:
-//	input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
-// where spatial_shape has M dimensions.
-//	block_shape: 1-D with shape `[M]`, all values must be >= 1.
-//	crops: 2-D with shape `[M, 2]`, all values must be >= 0.
-//   `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input
-//   dimension `i + 1`, which corresponds to spatial dimension `i`.  It is
-//   required that
-//   `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`.
-//
-// This operation is equivalent to the following steps:
-//
-// 1. Reshape `input` to `reshaped` of shape:
-//      [block_shape[0], ..., block_shape[M-1],
-//       batch / prod(block_shape),
-//       input_shape[1], ..., input_shape[N-1]]
-//
-// 2. Permute dimensions of `reshaped` to produce `permuted` of shape
-//      [batch / prod(block_shape),
-//
-//       input_shape[1], block_shape[0],
-//       ...,
-//       input_shape[M], block_shape[M-1],
-//
-//       input_shape[M+1], ..., input_shape[N-1]]
-//
-// 3. Reshape `permuted` to produce `reshaped_permuted` of shape
-//      [batch / prod(block_shape),
-//
-//       input_shape[1] * block_shape[0],
-//       ...,
-//       input_shape[M] * block_shape[M-1],
-//
-//       input_shape[M+1],
-//       ...,
-//       input_shape[N-1]]
-//
-// 4. Crop the start and end of dimensions `[1, ..., M]` of
-//    `reshaped_permuted` according to `crops` to produce the output of shape:
-//      [batch / prod(block_shape),
-//
-//       input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1],
-//       ...,
-//       input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1],
-//
-//       input_shape[M+1], ..., input_shape[N-1]]
-//
-// Some examples:
-//
-// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and
-//     `crops = [[0, 0], [0, 0]]`:
-//
-// ```
-// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-// ```
-//
-// The output tensor has shape `[1, 2, 2, 1]` and value:
-//
-// ```
-// x = [[[[1], [2]], [[3], [4]]]]
-// ```
-//
-// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and
-//     `crops = [[0, 0], [0, 0]]`:
-//
-// ```
-// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-// ```
-//
-// The output tensor has shape `[1, 2, 2, 3]` and value:
-//
-// ```
-// x = [[[[1, 2, 3], [4, 5, 6]],
-//       [[7, 8, 9], [10, 11, 12]]]]
-// ```
-//
-// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and
-//     `crops = [[0, 0], [0, 0]]`:
-//
-// ```
-// x = [[[[1], [3]], [[9], [11]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
-//
-// The output tensor has shape `[1, 4, 4, 1]` and value:
-//
-// ```
-// x = [[[1],   [2],  [3],  [4]],
-//      [[5],   [6],  [7],  [8]],
-//      [[9],  [10], [11],  [12]],
-//      [[13], [14], [15],  [16]]]
-// ```
-//
-// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and
-//     `crops = [[0, 0], [2, 0]]`:
-//
-// ```
-// x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
-//      [[[0], [2], [4]]], [[[0], [10], [12]]],
-//      [[[0], [5], [7]]], [[[0], [13], [15]]],
-//      [[[0], [6], [8]]], [[[0], [14], [16]]]]
-// ```
-//
-// The output tensor has shape `[2, 2, 4, 1]` and value:
-//
-// ```
-// x = [[[[1],   [2],  [3],  [4]],
-//       [[5],   [6],  [7],  [8]]],
-//      [[[9],  [10], [11],  [12]],
-//       [[13], [14], [15],  [16]]]]
-// ```
-func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BatchToSpaceND",
-		Input: []tf.Input{
-			input, block_shape, crops,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UnpackAttr is an optional argument to Unpack.
-type UnpackAttr func(optionalAttr)
-
-// UnpackAxis sets the optional axis attribute to value.
-//
-// value: Dimension along which to unpack.  Negative values wrap around, so the
-// valid range is `[-R, R)`.
-// If not specified, defaults to 0
-func UnpackAxis(value int64) UnpackAttr {
-	return func(m optionalAttr) {
-		m["axis"] = value
-	}
-}
-
-// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors.
-//
-// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.
-// For example, given a tensor of shape `(A, B, C, D)`;
-//
-// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]`
-//   and each tensor in `output` will have shape `(B, C, D)`. (Note that the
-//   dimension unpacked along is gone, unlike `split`).
-//
-// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]`
-//   and each tensor in `output` will have shape `(A, C, D)`.
-// Etc.
-//
-// This is the opposite of `pack`.
-//
-// Arguments:
-//	value: 1-D or higher, with `axis` dimension size equal to `num`.
-//
-//
-// Returns The list of tensors unpacked from `value`.
-func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num": num}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Unpack",
-		Input: []tf.Input{
-			value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("Unpack", err)
-		return
-	}
-	return output
-}
-
-// Increments variable pointed to by 'resource' until it reaches 'limit'.
-//
-// Arguments:
-//	resource: Should be from a scalar `Variable` node.
-//	limit: If incrementing ref would bring it above limit, instead generates an
-// 'OutOfRange' error.
-//
-//
-// Returns A copy of the input before increment. If nothing else modifies the
-// input, the values produced will all be distinct.
-func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"limit": limit, "T": T}
-	opspec := tf.OpSpec{
-		Type: "ResourceCountUpTo",
-		Input: []tf.Input{
-			resource,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Delete the stack from its resource container.
-//
-// Arguments:
-//	handle: The handle to a stack.
-//
-// Returns the created operation.
-func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StackCloseV2",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // Generate a glob pattern matching all sharded file names.
 func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) {
 	if scope.Err() != nil {
@@ -31416,88 +36225,6 @@
 	return op.Output(0)
 }
 
-// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2.
-type PaddingFIFOQueueV2Attr func(optionalAttr)
-
-// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value.
-//
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types.
-// Shapes of fixed rank but variable size are allowed by setting
-// any shape dimension to -1.  In this case, the inputs' shape may vary along
-// the given dimension, and DequeueMany will pad the given dimension with
-// zeros up to the maximum shape of all elements in the given batch.
-// If the length of this attr is 0, different queue elements may have
-// different ranks and shapes, but only one element may be dequeued at a time.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shapes"] = value
-	}
-}
-
-// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// PaddingFIFOQueueV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A queue that produces elements in first-in first-out order.
-//
-// Variable-size shapes are allowed by setting the corresponding shape dimensions
-// to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
-// size of any given element in the minibatch.  See below for details.
-//
-// Arguments:
-//	component_types: The type of each component in a value.
-//
-// Returns The handle to the queue.
-func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PaddingFIFOQueueV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // DecodePngAttr is an optional argument to DecodePng.
 type DecodePngAttr func(optionalAttr)
 
@@ -31661,45 +36388,6 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// SerializeSparseAttr is an optional argument to SerializeSparse.
-type SerializeSparseAttr func(optionalAttr)
-
-// SerializeSparseOutType sets the optional out_type attribute to value.
-//
-// value: The `dtype` to use for serialization; the supported types are `string`
-// (default) and `variant`.
-// If not specified, defaults to DT_STRING
-func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Serialize a `SparseTensor` into a `[3]` `Tensor` object.
-//
-// Arguments:
-//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
-func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SerializeSparse",
-		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2.
 type RandomShuffleQueueV2Attr func(optionalAttr)
 
@@ -31807,6 +36495,45 @@
 	return op.Output(0)
 }
 
+// SerializeSparseAttr is an optional argument to SerializeSparse.
+type SerializeSparseAttr func(optionalAttr)
+
+// SerializeSparseOutType sets the optional out_type attribute to value.
+//
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Serialize a `SparseTensor` into a `[3]` `Tensor` object.
+//
+// Arguments:
+//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SerializeSparse",
+		Input: []tf.Input{
+			sparse_indices, sparse_values, sparse_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Draw bounding boxes on a batch of images.
 //
 // Outputs a copy of `images` but draws on top of the pixels zero or more bounding
@@ -31842,37 +36569,6 @@
 	return op.Output(0)
 }
 
-// Gets the next output from the given iterator.
-//
-// This operation is a synchronous version IteratorGetNext. It should only be used
-// in situations where the iterator does not block the calling thread, or where
-// the calling thread is not a member of the thread pool used to execute parallel
-// operations (e.g. in eager mode).
-func IteratorGetNextSync(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "IteratorGetNextSync",
-		Input: []tf.Input{
-			iterator,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("IteratorGetNextSync", err)
-		return
-	}
-	return components
-}
-
 // SampleDistortedBoundingBoxV2Attr is an optional argument to SampleDistortedBoundingBoxV2.
 type SampleDistortedBoundingBoxV2Attr func(optionalAttr)
 
@@ -32017,6 +36713,32 @@
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Computes requantization range per channel.
+//
+// Arguments:
+//	input: The original input tensor.
+//	input_min: The minimum value of the input tensor
+//	input_max: The maximum value of the input tensor.
+//	clip_value_max: The maximum value of the output that needs to be clipped.
+// Example: set this to 6 for Relu6.
+//
+// Returns The minimum value of the final output tensorThe maximum value of the final output tensor.
+func RequantizationRangePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, clip_value_max float32) (output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"clip_value_max": clip_value_max}
+	opspec := tf.OpSpec{
+		Type: "RequantizationRangePerChannel",
+		Input: []tf.Input{
+			input, input_min, input_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
 // ExtractGlimpseAttr is an optional argument to ExtractGlimpse.
 type ExtractGlimpseAttr func(optionalAttr)
 
@@ -32657,39 +37379,251 @@
 	return op.Output(0)
 }
 
-// Computes the gradient for the inverse of `x` wrt its input.
+// OrderedMapStageAttr is an optional argument to OrderedMapStage.
+type OrderedMapStageAttr func(optionalAttr)
+
+// OrderedMapStageCapacity sets the optional capacity attribute to value.
 //
-// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
-// is the corresponding input gradient.
-func InvGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+// value: Maximum number of elements in the Staging Area. If > 0, inserts
+// on the container will block when the capacity is reached.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapStageCapacity(value int64) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// OrderedMapStageContainer sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container. Otherwise,
+// a default container is used.
+// If not specified, defaults to ""
+func OrderedMapStageContainer(value string) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// OrderedMapStageSharedName sets the optional shared_name attribute to value.
+//
+// value: It is necessary to match this name to the matching Unstage Op.
+// If not specified, defaults to ""
+func OrderedMapStageSharedName(value string) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Stage (key, values) in the underlying container which behaves like a ordered
+//
+// associative container.   Elements are ordered by key.
+//
+// Arguments:
+//	key: int64
+//
+//	values: a list of tensors
+// dtypes A list of data types that inserted values should adhere to.
+//
+//
+// Returns the created operation.
+func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "InvGrad",
+		Type: "OrderedMapStage",
 		Input: []tf.Input{
-			y, dy,
+			key, indices, tf.OutputList(values),
 		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// StackPushV2Attr is an optional argument to StackPushV2.
+type StackPushV2Attr func(optionalAttr)
+
+// StackPushV2SwapMemory sets the optional swap_memory attribute to value.
+//
+// value: Swap `elem` to CPU. Default to false.
+// If not specified, defaults to false
+func StackPushV2SwapMemory(value bool) StackPushV2Attr {
+	return func(m optionalAttr) {
+		m["swap_memory"] = value
+	}
+}
+
+// Push an element onto the stack.
+//
+// Arguments:
+//	handle: The handle to a stack.
+//	elem: The tensor to be pushed onto the stack.
+//
+// Returns The same tensor as the input 'elem'.
+func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...StackPushV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StackPushV2",
+		Input: []tf.Input{
+			handle, elem,
+		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// List of the given size with empty elements.
+// RpcAttr is an optional argument to Rpc.
+type RpcAttr func(optionalAttr)
+
+// RpcProtocol sets the optional protocol attribute to value.
 //
-// element_shape: the shape of the future elements of the list
-// num_elements: the number of elements to reserve
-// handle: the output list
-// element_dtype: the desired type of elements in the list.
-func TensorListReserve(scope *Scope, element_shape tf.Output, num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) {
+// value: RPC protocol to use.  Empty string means use the default protocol.
+// Options include 'grpc'.
+// If not specified, defaults to ""
+func RpcProtocol(value string) RpcAttr {
+	return func(m optionalAttr) {
+		m["protocol"] = value
+	}
+}
+
+// RpcFailFast sets the optional fail_fast attribute to value.
+//
+// value: `boolean`. If `true` (default), then failures to connect
+// (i.e., the server does not immediately respond) cause an RPC failure.
+// If not specified, defaults to true
+func RpcFailFast(value bool) RpcAttr {
+	return func(m optionalAttr) {
+		m["fail_fast"] = value
+	}
+}
+
+// RpcTimeoutInMs sets the optional timeout_in_ms attribute to value.
+//
+// value: `int`. If `0` (default), then the kernel will run the RPC
+// request and only time out if the RPC deadline passes or the session times out.
+// If this value is greater than `0`, then the op will raise an exception if
+// the RPC takes longer than `timeout_in_ms`.
+// If not specified, defaults to 0
+func RpcTimeoutInMs(value int64) RpcAttr {
+	return func(m optionalAttr) {
+		m["timeout_in_ms"] = value
+	}
+}
+
+// Perform batches of RPC requests.
+//
+// This op asynchronously performs either a single RPC request, or a batch
+// of requests.  RPC requests are defined by three main parameters:
+//
+//   - `address` (the host+port or BNS address of the request)
+//   - `method` (the RPC method name for the request)
+//   - `request` (the serialized proto string, or vector of strings,
+//      of the RPC request argument).
+//
+// For example, if you have an RPC service running on port localhost:2345,
+// and its interface is configured with the following proto declaration:
+//
+// ```
+// service MyService {
+//   rpc MyMethod(MyRequestProto) returns (MyResponseProto) {
+//   }
+// };
+// ```
+//
+// then call this op with arguments:
+//
+// ```
+// address = "localhost:2345"
+// method = "MyService/MyMethod"
+// ```
+//
+// The `request` tensor is a string tensor representing serialized `MyRequestProto`
+// strings; and the output string tensor `response` will have the same shape
+// and contain (upon successful completion) corresponding serialized
+// `MyResponseProto` strings.
+//
+// For example, to send a single, empty, `MyRequestProto`, call
+// this op with `request = ""`.  To send 5 **parallel** empty requests,
+// call this op with `request = ["", "", "", "", ""]`.
+//
+// More generally, one can create a batch of `MyRequestProto` serialized protos
+// from regular batched tensors using the `encode_proto` op, and convert
+// the response `MyResponseProto` serialized protos to batched tensors
+// using the `decode_proto` op.
+//
+// **NOTE** Working with serialized proto strings is faster than instantiating
+// actual proto objects in memory, so no performance degradation is expected
+// compared to writing custom kernels for this workflow.
+//
+// If the connection fails or the remote worker returns an error
+// status, the op reraises this exception locally.
+//
+// See the `TryRpc` op if you prefer to handle RPC failures manually in the graph.
+//
+// Arguments:
+//	address: `0-D` or `1-D`.  The address (i.e. host_name:port) of the RPC server.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `method` and `request`.
+//	method: `0-D` or `1-D`.  The method address on the RPC server.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `address` and `request`.
+//	request: `0-D` or `1-D`.  Serialized proto strings: the rpc request argument.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `address` and `method`.
+//
+// Returns Same shape as `request`. Serialized proto strings: the rpc responses.
+func Rpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...RpcAttr) (response tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "TensorListReserve",
+		Type: "Rpc",
 		Input: []tf.Input{
-			element_shape, num_elements,
+			address, method, request,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
+func ExperimentalBytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalBytesProducedStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
 		},
 		Attrs: attrs,
 	}
@@ -32831,23 +37765,6 @@
 	return op.Output(0)
 }
 
-// A dataset that splits the elements of its input into multiple elements.
-func ExperimentalUnbatchDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalUnbatchDataset",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a dataset that overrides the maximum intra-op parallelism.
 //
 // Arguments:
@@ -33048,66 +37965,6 @@
 	return op.Output(0)
 }
 
-// Creates a TensorArray for storing the gradients of values in the given handle.
-//
-// If the given TensorArray gradient already exists, returns a reference to it.
-//
-// Locks the size of the original TensorArray by disabling its dynamic size flag.
-//
-// **A note about the input flow_in:**
-//
-// The handle flow_in forces the execution of the gradient lookup to occur
-// only after certain other operations have occurred.  For example, when
-// the forward TensorArray is dynamically sized, writes to this TensorArray
-// may resize the object.  The gradient TensorArray is statically sized based
-// on the size of the forward TensorArray when this operation executes.
-// Furthermore, the size of the forward TensorArray is frozen by this call.
-// As a result, the flow is used to ensure that the call to generate the gradient
-// TensorArray only happens after all writes are executed.
-//
-// In the case of dynamically sized TensorArrays, gradient computation should
-// only be performed on read operations that have themselves been chained via
-// flow to occur only after all writes have executed. That way the final size
-// of the forward TensorArray is known when this operation is called.
-//
-// **A note about the source attribute:**
-//
-// TensorArray gradient calls use an accumulator TensorArray object.  If
-// multiple gradients are calculated and run in the same session, the multiple
-// gradient nodes may accidentally flow through the same accumulator TensorArray.
-// This double counts and generally breaks the TensorArray gradient flow.
-//
-// The solution is to identify which gradient call this particular
-// TensorArray gradient is being called in.  This is performed by identifying
-// a unique string (e.g. "gradients", "gradients_1", ...) from the input
-// gradient Tensor's name.  This string is used as a suffix when creating
-// the TensorArray gradient object here (the attribute `source`).
-//
-// The attribute `source` is added as a suffix to the forward TensorArray's
-// name when performing the creation / lookup, so that each separate gradient
-// calculation gets its own TensorArray accumulator.
-//
-// Arguments:
-//	handle: The handle to the forward TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	source: The gradient source string, used to decide which gradient TensorArray
-// to return.
-func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"source": source}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayGradV3",
-		Input: []tf.Input{
-			handle, flow_in,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
 // Creates a dataset that splits a SparseTensor into elements row-wise.
 func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) {
 	if scope.Err() != nil {
@@ -33233,112 +38090,6 @@
 	return scope.AddOperation(opspec)
 }
 
-// ResourceScatterNdSubAttr is an optional argument to ResourceScatterNdSub.
-type ResourceScatterNdSubAttr func(optionalAttr)
-
-// ResourceScatterNdSubUseLocking sets the optional use_locking attribute to value.
-//
-// value: An optional bool. Defaults to True. If True, the assignment will
-// be protected by a lock; otherwise the behavior is undefined,
-// but may exhibit less contention.
-// If not specified, defaults to true
-func ResourceScatterNdSubUseLocking(value bool) ResourceScatterNdSubAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Applies sparse subtraction to individual values or slices in a Variable.
-//
-// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-//
-// `indices` must be integer tensor, containing indices into `ref`.
-// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-//
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-// dimension of `ref`.
-//
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
-//
-// ```
-// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]
-// ```
-//
-// For example, say we want to subtract 4 scattered elements from a rank-1 tensor
-// with 8 elements. In Python, that subtraction would look like this:
-//
-// ```python
-// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
-// indices = tf.constant([[4], [3], [1], [7]])
-// updates = tf.constant([9, 10, 11, 12])
-// sub = tf.scatter_nd_sub(ref, indices, updates)
-// with tf.Session() as sess:
-//   print sess.run(sub)
-// ```
-//
-// The resulting update to ref would look like this:
-//
-//     [1, -9, 3, -6, -4, 6, 7, -4]
-//
-// See `tf.scatter_nd` for more details about how to make updates to
-// slices.
-//
-// Arguments:
-//	ref: A resource handle. Must be from a VarHandleOp.
-//	indices: A Tensor. Must be one of the following types: int32, int64.
-// A tensor of indices into ref.
-//	updates: A Tensor. Must have the same type as ref. A tensor of
-// values to add to ref.
-//
-// Returns the created operation.
-func ResourceScatterNdSub(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdSubAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterNdSub",
-		Input: []tf.Input{
-			ref, indices, updates,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Creates a dataset that batches and pads `batch_size` elements from the input.
-//
-// Arguments:
-//
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//	padded_shapes: A list of int64 tensors representing the desired padded shapes
-// of the corresponding output components. These shapes may be partially
-// specified, using `-1` to indicate that a particular dimension should be
-// padded to the maximum size of all batch elements.
-//	padding_values: A list of scalars containing the padding value to use for
-// each of the outputs.
-//
-func PaddedBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "PaddedBatchDataset",
-		Input: []tf.Input{
-			input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a dataset that shuffles and repeats elements from `input_dataset`
 //
 // pseudorandomly.
@@ -33545,145 +38296,6 @@
 	return op.Output(0)
 }
 
-// BatchToSpace for 4-D tensors of type T.
-//
-// This is a legacy version of the more general BatchToSpaceND.
-//
-// Rearranges (permutes) data from batch into blocks of spatial data, followed by
-// cropping. This is the reverse transformation of SpaceToBatch. More specifically,
-// this op outputs a copy of the input tensor where values from the `batch`
-// dimension are moved in spatial blocks to the `height` and `width` dimensions,
-// followed by cropping along the `height` and `width` dimensions.
-//
-// Arguments:
-//	input: 4-D tensor with shape
-// `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
-//   depth]`. Note that the batch size of the input tensor must be divisible by
-// `block_size * block_size`.
-//	crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
-// how many elements to crop from the intermediate result across the spatial
-// dimensions as follows:
-//
-//     crops = [[crop_top, crop_bottom], [crop_left, crop_right]]
-//
-//
-// Returns 4-D with shape `[batch, height, width, depth]`, where:
-//
-//       height = height_pad - crop_top - crop_bottom
-//       width = width_pad - crop_left - crop_right
-//
-// The attr `block_size` must be greater than one. It indicates the block size.
-//
-// Some examples:
-//
-// (1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2:
-//
-// ```
-// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-// ```
-//
-// The output tensor has shape `[1, 2, 2, 1]` and value:
-//
-// ```
-// x = [[[[1], [2]], [[3], [4]]]]
-// ```
-//
-// (2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2:
-//
-// ```
-// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-// ```
-//
-// The output tensor has shape `[1, 2, 2, 3]` and value:
-//
-// ```
-// x = [[[[1, 2, 3], [4, 5, 6]],
-//       [[7, 8, 9], [10, 11, 12]]]]
-// ```
-//
-// (3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1], [3]], [[9], [11]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
-//
-// The output tensor has shape `[1, 4, 4, 1]` and value:
-//
-// ```
-// x = [[[1],   [2],  [3],  [4]],
-//      [[5],   [6],  [7],  [8]],
-//      [[9],  [10], [11],  [12]],
-//      [[13], [14], [15],  [16]]]
-// ```
-//
-// (4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
-//      [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
-// ```
-//
-// The output tensor has shape `[2, 2, 4, 1]` and value:
-//
-// ```
-// x = [[[[1], [3]], [[5], [7]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
-func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"block_size": block_size}
-	opspec := tf.OpSpec{
-		Type: "BatchToSpace",
-		Input: []tf.Input{
-			input, crops,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Produces a summary of any statistics recorded by the given statistics manager.
-func ExperimentalStatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalStatsAggregatorSummary",
-		Input: []tf.Input{
-			iterator,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Makes a new iterator from the given `dataset` and stores it in `iterator`.
-//
-// This operation may be executed multiple times. Each execution will reset the
-// iterator in `iterator` to the first element of `dataset`.
-//
-// Returns the created operation.
-func MakeIterator(scope *Scope, dataset tf.Output, iterator tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MakeIterator",
-		Input: []tf.Input{
-			dataset, iterator,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // Adjust the contrast of one or more images.
 //
 // `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
@@ -33922,80 +38534,6 @@
 	return op.Output(0)
 }
 
-// FIFOQueueV2Attr is an optional argument to FIFOQueueV2.
-type FIFOQueueV2Attr func(optionalAttr)
-
-// FIFOQueueV2Shapes sets the optional shapes attribute to value.
-//
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types. If the length of
-// this attr is 0, the shapes of queue elements are not constrained, and
-// only one element may be dequeued at a time.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shapes"] = value
-	}
-}
-
-// FIFOQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// FIFOQueueV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func FIFOQueueV2Container(value string) FIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// FIFOQueueV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A queue that produces elements in first-in first-out order.
-//
-// Arguments:
-//	component_types: The type of each component in a value.
-//
-// Returns The handle to the queue.
-func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FIFOQueueV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Deserializes a proto into the tree handle
 //
 // Arguments:
@@ -34210,1638 +38748,3 @@
 	}
 	return components
 }
-
-// Gets the next output from the given iterator as an Optional variant.
-func IteratorGetNextAsOptional(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (optional tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "IteratorGetNextAsOptional",
-		Input: []tf.Input{
-			iterator,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Fast Fourier transform.
-//
-// Computes the 1-dimensional discrete Fourier transform over the inner-most
-// dimension of `input`.
-//
-// Arguments:
-//	input: A complex tensor.
-//
-// Returns A complex tensor of the same shape as `input`. The inner-most
-//   dimension of `input` is replaced with its 1D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.fft
-// @end_compatibility
-func FFT(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "FFT",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Identity transformation that models performance.
-//
-// Identity transformation that models performance.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//
-//
-func ModelDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ModelDataset",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the truth value of (x > y) element-wise.
-//
-// *NOTE*: `Greater` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Greater",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Applies sparse addition to `input` using individual values or slices
-//
-// from `updates` according to indices `indices`.  The updates are non-aliasing:
-// `input` is only modified in-place if no other operations will use it.
-// Otherwise, a copy of `input` is made.  This operation has a gradient with
-// respect to both `input` and `updates`.
-//
-// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-//
-// `indices` must be integer tensor, containing indices into `input`.
-// It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`.
-//
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or `(P-K)`-dimensional slices
-// (if `K < P`) along the `K`th dimension of `input`.
-//
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
-//
-// $$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$
-//
-// For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
-// elements. In Python, that addition would look like this:
-//
-//     input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8])
-//     indices = tf.constant([[4], [3], [1], [7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     output = tf.scatter_nd_non_aliasing_add(input, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(output))
-//
-// The resulting value `output` would look like this:
-//
-//     [1, 13, 3, 14, 14, 6, 7, 20]
-//
-// See `tf.scatter_nd` for more details about how to make updates to slices.
-//
-// Arguments:
-//	input: A Tensor.
-//	indices: A Tensor. Must be one of the following types: `int32`, `int64`.
-// A tensor of indices into `input`.
-//	updates: A Tensor. Must have the same type as ref. A tensor of updated values
-// to add to `input`.
-//
-// Returns A `Tensor` with the same shape as `input`, containing values of `input`
-// updated with `updates`.
-func ScatterNdNonAliasingAdd(scope *Scope, input tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ScatterNdNonAliasingAdd",
-		Input: []tf.Input{
-			input, indices, updates,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FractionalMaxPoolAttr is an optional argument to FractionalMaxPool.
-type FractionalMaxPoolAttr func(optionalAttr)
-
-// FractionalMaxPoolPseudoRandom sets the optional pseudo_random attribute to value.
-//
-// value: When set to True, generates the pooling sequence in a
-// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
-// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
-// difference between pseudorandom and random.
-// If not specified, defaults to false
-func FractionalMaxPoolPseudoRandom(value bool) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["pseudo_random"] = value
-	}
-}
-
-// FractionalMaxPoolOverlapping sets the optional overlapping attribute to value.
-//
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
-//
-// `index  0  1  2  3  4`
-//
-// `value  20 5  16 3  7`
-//
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [20, 16] for fractional max pooling.
-// If not specified, defaults to false
-func FractionalMaxPoolOverlapping(value bool) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["overlapping"] = value
-	}
-}
-
-// FractionalMaxPoolDeterministic sets the optional deterministic attribute to value.
-//
-// value: When set to True, a fixed pooling region will be used when
-// iterating over a FractionalMaxPool node in the computation graph. Mainly used
-// in unit test to make FractionalMaxPool deterministic.
-// If not specified, defaults to false
-func FractionalMaxPoolDeterministic(value bool) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["deterministic"] = value
-	}
-}
-
-// FractionalMaxPoolSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func FractionalMaxPoolSeed(value int64) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// FractionalMaxPoolSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func FractionalMaxPoolSeed2(value int64) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Performs fractional max pooling on the input.
-//
-// Fractional max pooling is slightly different than regular max pooling.  In
-// regular max pooling, you downsize an input set by taking the maximum value of
-// smaller N x N subsections of the set (often 2x2), and try to reduce the set by
-// a factor of N, where N is an integer.  Fractional max pooling, as you might
-// expect from the word "fractional", means that the overall reduction ratio N
-// does not have to be an integer.
-//
-// The sizes of the pooling regions are generated randomly but are fairly uniform.
-// For example, let's look at the height dimension, and the constraints on the
-// list of rows that will be pool boundaries.
-//
-// First we define the following:
-//
-// 1.  input_row_length : the number of rows from the input set
-// 2.  output_row_length : which will be smaller than the input
-// 3.  alpha = input_row_length / output_row_length : our reduction ratio
-// 4.  K = floor(alpha)
-// 5.  row_pooling_sequence : this is the result list of pool boundary rows
-//
-// Then, row_pooling_sequence should satisfy:
-//
-// 1.  a[0] = 0 : the first value of the sequence is 0
-// 2.  a[end] = input_row_length : the last value of the sequence is the size
-// 3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
-// 4.  length(row_pooling_sequence) = output_row_length+1
-//
-// For more details on fractional max pooling, see this paper:
-// [Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071)
-//
-// Arguments:
-//	value: 4-D with shape `[batch, height, width, channels]`.
-//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
-// supports row and col dimension and should be >= 1.0. For example, a valid
-// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
-// must be 1.0 because we don't allow pooling on batch and channels
-// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
-// respectively.
-//
-// Returns output tensor after fractional max pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient.
-func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalMaxPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FractionalMaxPool",
-		Input: []tf.Input{
-			value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Creates a MultiDeviceIterator resource.
-//
-// Arguments:
-//	devices: A list of devices the iterator works across.
-//	shared_name: If non-empty, this resource will be shared under the given name
-// across multiple sessions.
-//	container: If non-empty, this resource is placed in the given container.
-// Otherwise, a default container is used.
-//	output_types: The type list for the return values.
-//	output_shapes: The list of shapes being produced.
-//
-// Returns Handle to the resource created.
-func MultiDeviceIterator(scope *Scope, devices []string, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"devices": devices, "shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "MultiDeviceIterator",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Performs a padding as a preprocess during a convolution.
-//
-// Similar to FusedResizeAndPadConv2d, this op allows for an optimized
-// implementation where the spatial padding transformation stage is fused with the
-// im2col lookup, but in this case without the bilinear filtering required for
-// resizing. Fusing the padding prevents the need to write out the intermediate
-// results as whole tensors, reducing memory pressure, and we can get some latency
-// gains by merging the transformation calculations.
-// The data_format attribute for Conv2D isn't supported by this op, and 'NHWC'
-// order is used instead.
-// Internally this op uses a single per-graph scratch buffer, which means that it
-// will block if multiple versions are being run in parallel. This is because this
-// operator is primarily an optimization to minimize memory usage.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.
-//
-//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
-// of `input`. Must be in the same order as the dimension specified with format.
-//	padding: The type of padding algorithm to use.
-func FusedPadConv2D(scope *Scope, input tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "FusedPadConv2D",
-		Input: []tf.Input{
-			input, paddings, filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Conv2DBackpropInputAttr is an optional argument to Conv2DBackpropInput.
-type Conv2DBackpropInputAttr func(optionalAttr)
-
-// Conv2DBackpropInputUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
-// If not specified, defaults to true
-func Conv2DBackpropInputUseCudnnOnGpu(value bool) Conv2DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["use_cudnn_on_gpu"] = value
-	}
-}
-
-// Conv2DBackpropInputExplicitPaddings sets the optional explicit_paddings attribute to value.
-//
-// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
-// dimension, the amount of padding inserted before and after the dimension is
-// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
-// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
-// If not specified, defaults to <>
-func Conv2DBackpropInputExplicitPaddings(value []int64) Conv2DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["explicit_paddings"] = value
-	}
-}
-
-// Conv2DBackpropInputDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Conv2DBackpropInputDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of convolution with respect to the input.
-//
-// Arguments:
-//	input_sizes: An integer vector representing the shape of `input`,
-// where `input` is a 4-D `[batch, height, width, channels]` tensor.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution. Must be in the same order as the dimension specified with
-// format.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape `[batch, in_height, in_width, in_channels]`.  Gradient
-// w.r.t. the input of the convolution.
-func Conv2DBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropInputAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Conv2DBackpropInput",
-		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Interleave the values from the `data` tensors into a single tensor.
-//
-// Builds a merged tensor such that
-//
-// ```python
-//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
-// ```
-//
-// For example, if each `indices[m]` is scalar or vector, we have
-//
-// ```python
-//     # Scalar indices:
-//     merged[indices[m], ...] = data[m][...]
-//
-//     # Vector indices:
-//     merged[indices[m][i], ...] = data[m][i, ...]
-// ```
-//
-// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
-// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
-// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
-// `constant`, the output shape is
-//
-//     merged.shape = [max(indices)] + constant
-//
-// Values are merged in order, so if an index appears in both `indices[m][i]` and
-// `indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the
-// merged result. If you do not need this guarantee, ParallelDynamicStitch might
-// perform better on some devices.
-//
-// For example:
-//
-// ```python
-//     indices[0] = 6
-//     indices[1] = [4, 1]
-//     indices[2] = [[5, 2], [0, 3]]
-//     data[0] = [61, 62]
-//     data[1] = [[41, 42], [11, 12]]
-//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
-//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
-//               [51, 52], [61, 62]]
-// ```
-//
-// This method can be used to merge partitions created by `dynamic_partition`
-// as illustrated on the following example:
-//
-// ```python
-//     # Apply function (increments x_i) on elements for which a certain condition
-//     # apply (x_i != -1 in this example).
-//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
-//     condition_mask=tf.not_equal(x,tf.constant(-1.))
-//     partitioned_data = tf.dynamic_partition(
-//         x, tf.cast(condition_mask, tf.int32) , 2)
-//     partitioned_data[1] = partitioned_data[1] + 1.0
-//     condition_indices = tf.dynamic_partition(
-//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
-//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
-//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
-//     # unchanged.
-// ```
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
-// </div>
-func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DynamicStitch",
-		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(data),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Interleave the values from the `data` tensors into a single tensor.
-//
-// Builds a merged tensor such that
-//
-// ```python
-//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
-// ```
-//
-// For example, if each `indices[m]` is scalar or vector, we have
-//
-// ```python
-//     # Scalar indices:
-//     merged[indices[m], ...] = data[m][...]
-//
-//     # Vector indices:
-//     merged[indices[m][i], ...] = data[m][i, ...]
-// ```
-//
-// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
-// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
-// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
-// `constant`, the output shape is
-//
-//     merged.shape = [max(indices)] + constant
-//
-// Values may be merged in parallel, so if an index appears in both `indices[m][i]`
-// and `indices[n][j]`, the result may be invalid. This differs from the normal
-// DynamicStitch operator that defines the behavior in that case.
-//
-// For example:
-//
-// ```python
-//     indices[0] = 6
-//     indices[1] = [4, 1]
-//     indices[2] = [[5, 2], [0, 3]]
-//     data[0] = [61, 62]
-//     data[1] = [[41, 42], [11, 12]]
-//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
-//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
-//               [51, 52], [61, 62]]
-// ```
-//
-// This method can be used to merge partitions created by `dynamic_partition`
-// as illustrated on the following example:
-//
-// ```python
-//     # Apply function (increments x_i) on elements for which a certain condition
-//     # apply (x_i != -1 in this example).
-//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
-//     condition_mask=tf.not_equal(x,tf.constant(-1.))
-//     partitioned_data = tf.dynamic_partition(
-//         x, tf.cast(condition_mask, tf.int32) , 2)
-//     partitioned_data[1] = partitioned_data[1] + 1.0
-//     condition_indices = tf.dynamic_partition(
-//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
-//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
-//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
-//     # unchanged.
-// ```
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
-// </div>
-func ParallelDynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ParallelDynamicStitch",
-		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(data),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// PriorityQueueV2Attr is an optional argument to PriorityQueueV2.
-type PriorityQueueV2Attr func(optionalAttr)
-
-// PriorityQueueV2ComponentTypes sets the optional component_types attribute to value.
-//
-// value: The type of each component in a value.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func PriorityQueueV2ComponentTypes(value []tf.DataType) PriorityQueueV2Attr {
-	return func(m optionalAttr) {
-		m["component_types"] = value
-	}
-}
-
-// PriorityQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func PriorityQueueV2Capacity(value int64) PriorityQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// PriorityQueueV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func PriorityQueueV2Container(value string) PriorityQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// PriorityQueueV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func PriorityQueueV2SharedName(value string) PriorityQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A queue that produces elements sorted by the first component value.
-//
-// Note that the PriorityQueue requires the first component of any element
-// to be a scalar int64, in addition to the other elements declared by
-// component_types.  Therefore calls to Enqueue and EnqueueMany (resp. Dequeue
-// and DequeueMany) on a PriorityQueue will all require (resp. output) one extra
-// entry in their input (resp. output) lists.
-//
-// Arguments:
-//	shapes: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types. If the length of
-// this attr is 0, the shapes of queue elements are not constrained, and
-// only one element may be dequeued at a time.
-//
-// Returns The handle to the queue.
-func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV2Attr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shapes": shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PriorityQueueV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QueueEnqueueV2Attr is an optional argument to QueueEnqueueV2.
-type QueueEnqueueV2Attr func(optionalAttr)
-
-// QueueEnqueueV2TimeoutMs sets the optional timeout_ms attribute to value.
-//
-// value: If the queue is full, this operation will block for up to
-// timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueEnqueueV2TimeoutMs(value int64) QueueEnqueueV2Attr {
-	return func(m optionalAttr) {
-		m["timeout_ms"] = value
-	}
-}
-
-// Enqueues a tuple of one or more tensors in the given queue.
-//
-// The components input has k elements, which correspond to the components of
-// tuples stored in the given queue.
-//
-// N.B. If the queue is full, this operation will block until the given
-// element has been enqueued (or 'timeout_ms' elapses, if specified).
-//
-// Arguments:
-//	handle: The handle to a queue.
-//	components: One or more tensors from which the enqueued tensors should be taken.
-//
-// Returns the created operation.
-func QueueEnqueueV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueV2Attr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueEnqueueV2",
-		Input: []tf.Input{
-			handle, tf.OutputList(components),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes the Bessel i0e function of `x` element-wise.
-//
-// Exponentially scaled modified Bessel function of order 0 defined as
-// `bessel_i0e(x) = exp(-abs(x)) bessel_i0(x)`.
-//
-// This function is faster and numerically stabler than `bessel_i0(x)`.
-func BesselI0e(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BesselI0e",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QueueDequeueManyV2Attr is an optional argument to QueueDequeueManyV2.
-type QueueDequeueManyV2Attr func(optionalAttr)
-
-// QueueDequeueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
-//
-// value: If the queue has fewer than n elements, this operation
-// will block for up to timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueDequeueManyV2TimeoutMs(value int64) QueueDequeueManyV2Attr {
-	return func(m optionalAttr) {
-		m["timeout_ms"] = value
-	}
-}
-
-// Dequeues `n` tuples of one or more tensors from the given queue.
-//
-// If the queue is closed and there are fewer than `n` elements, then an
-// OutOfRange error is returned.
-//
-// This operation concatenates queue-element component tensors along the
-// 0th dimension to make a single component tensor.  All of the components
-// in the dequeued tuple will have size `n` in the 0th dimension.
-//
-// This operation has `k` outputs, where `k` is the number of components in
-// the tuples stored in the given queue, and output `i` is the ith
-// component of the dequeued tuple.
-//
-// N.B. If the queue is empty, this operation will block until `n` elements
-// have been dequeued (or 'timeout_ms' elapses, if specified).
-//
-// Arguments:
-//	handle: The handle to a queue.
-//	n: The number of tuples to dequeue.
-//	component_types: The type of each component in a tuple.
-//
-// Returns One or more tensors that were dequeued as a tuple.
-func QueueDequeueManyV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueManyV2Attr) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueDequeueManyV2",
-		Input: []tf.Input{
-			handle, n,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("QueueDequeueManyV2", err)
-		return
-	}
-	return components
-}
-
-// Forwards the value of an available tensor from `inputs` to `output`.
-//
-// `Merge` waits for at least one of the tensors in `inputs` to become available.
-// It is usually combined with `Switch` to implement branching.
-//
-// `Merge` forwards the first tensor to become available to `output`, and sets
-// `value_index` to its index in `inputs`.
-//
-// Arguments:
-//	inputs: The input tensors, exactly one of which will become available.
-//
-// Returns Will be set to the available input tensor.The index of the chosen input tensor in `inputs`.
-func Merge(scope *Scope, inputs []tf.Output) (output tf.Output, value_index tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Merge",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Writes the given dataset to the given file using the TFRecord format.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the dataset to write.
-//	filename: A scalar string tensor representing the filename to use.
-//	compression_type: A scalar string tensor containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//
-// Returns the created operation.
-func ExperimentalDatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalDatasetToTFRecord",
-		Input: []tf.Input{
-			input_dataset, filename, compression_type,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// QueueCloseV2Attr is an optional argument to QueueCloseV2.
-type QueueCloseV2Attr func(optionalAttr)
-
-// QueueCloseV2CancelPendingEnqueues sets the optional cancel_pending_enqueues attribute to value.
-//
-// value: If true, all pending enqueue requests that are
-// blocked on the given queue will be canceled.
-// If not specified, defaults to false
-func QueueCloseV2CancelPendingEnqueues(value bool) QueueCloseV2Attr {
-	return func(m optionalAttr) {
-		m["cancel_pending_enqueues"] = value
-	}
-}
-
-// Closes the given queue.
-//
-// This operation signals that no more elements will be enqueued in the
-// given queue. Subsequent Enqueue(Many) operations will fail.
-// Subsequent Dequeue(Many) operations will continue to succeed if
-// sufficient elements remain in the queue. Subsequent Dequeue(Many)
-// operations that would block will fail immediately.
-//
-// Arguments:
-//	handle: The handle to a queue.
-//
-// Returns the created operation.
-func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueCloseV2",
-		Input: []tf.Input{
-			handle,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes inverse hyperbolic tangent of x element-wise.
-func Atanh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Atanh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns true if queue is closed.
-//
-// This operation returns true if the queue is closed and false if the queue
-// is open.
-//
-// Arguments:
-//	handle: The handle to a queue.
-func QueueIsClosedV2(scope *Scope, handle tf.Output) (is_closed tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueIsClosedV2",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the absolute value of a tensor.
-//
-// Given a tensor `x`, this operation returns a tensor containing the absolute
-// value of each element in `x`. For example, if x is an input element and y is
-// an output element, this operation computes \\(y = |x|\\).
-func Abs(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Abs",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StackV2Attr is an optional argument to StackV2.
-type StackV2Attr func(optionalAttr)
-
-// StackV2StackName sets the optional stack_name attribute to value.
-//
-// value: Overrides the name used for the temporary stack resource. Default
-// value is the name of the 'Stack' op (which is guaranteed unique).
-// If not specified, defaults to ""
-func StackV2StackName(value string) StackV2Attr {
-	return func(m optionalAttr) {
-		m["stack_name"] = value
-	}
-}
-
-// A stack that produces elements in first-in last-out order.
-//
-// Arguments:
-//	max_size: The maximum size of the stack if non-negative. If negative, the stack
-// size is unlimited.
-//	elem_type: The type of the elements on the stack.
-//
-// Returns The handle to the stack.
-func StackV2(scope *Scope, max_size tf.Output, elem_type tf.DataType, optional ...StackV2Attr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"elem_type": elem_type}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StackV2",
-		Input: []tf.Input{
-			max_size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// OrderedMapStageAttr is an optional argument to OrderedMapStage.
-type OrderedMapStageAttr func(optionalAttr)
-
-// OrderedMapStageCapacity sets the optional capacity attribute to value.
-//
-// value: Maximum number of elements in the Staging Area. If > 0, inserts
-// on the container will block when the capacity is reached.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapStageCapacity(value int64) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// OrderedMapStageContainer sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container. Otherwise,
-// a default container is used.
-// If not specified, defaults to ""
-func OrderedMapStageContainer(value string) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// OrderedMapStageSharedName sets the optional shared_name attribute to value.
-//
-// value: It is necessary to match this name to the matching Unstage Op.
-// If not specified, defaults to ""
-func OrderedMapStageSharedName(value string) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Stage (key, values) in the underlying container which behaves like a ordered
-//
-// associative container.   Elements are ordered by key.
-//
-// Arguments:
-//	key: int64
-//
-//	values: a list of tensors
-// dtypes A list of data types that inserted values should adhere to.
-//
-//
-// Returns the created operation.
-func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "OrderedMapStage",
-		Input: []tf.Input{
-			key, indices, tf.OutputList(values),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// RpcAttr is an optional argument to Rpc.
-type RpcAttr func(optionalAttr)
-
-// RpcProtocol sets the optional protocol attribute to value.
-//
-// value: RPC protocol to use.  Empty string means use the default protocol.
-// Options include 'grpc'.
-// If not specified, defaults to ""
-func RpcProtocol(value string) RpcAttr {
-	return func(m optionalAttr) {
-		m["protocol"] = value
-	}
-}
-
-// RpcFailFast sets the optional fail_fast attribute to value.
-//
-// value: `boolean`. If `true` (default), then failures to connect
-// (i.e., the server does not immediately respond) cause an RPC failure.
-// If not specified, defaults to true
-func RpcFailFast(value bool) RpcAttr {
-	return func(m optionalAttr) {
-		m["fail_fast"] = value
-	}
-}
-
-// RpcTimeoutInMs sets the optional timeout_in_ms attribute to value.
-//
-// value: `int`. If `0` (default), then the kernel will run the RPC
-// request and only time out if the RPC deadline passes or the session times out.
-// If this value is greater than `0`, then the op will raise an exception if
-// the RPC takes longer than `timeout_in_ms`.
-// If not specified, defaults to 0
-func RpcTimeoutInMs(value int64) RpcAttr {
-	return func(m optionalAttr) {
-		m["timeout_in_ms"] = value
-	}
-}
-
-// Perform batches of RPC requests.
-//
-// This op asynchronously performs either a single RPC request, or a batch
-// of requests.  RPC requests are defined by three main parameters:
-//
-//   - `address` (the host+port or BNS address of the request)
-//   - `method` (the RPC method name for the request)
-//   - `request` (the serialized proto string, or vector of strings,
-//      of the RPC request argument).
-//
-// For example, if you have an RPC service running on port localhost:2345,
-// and its interface is configured with the following proto declaration:
-//
-// ```
-// service MyService {
-//   rpc MyMethod(MyRequestProto) returns (MyResponseProto) {
-//   }
-// };
-// ```
-//
-// then call this op with arguments:
-//
-// ```
-// address = "localhost:2345"
-// method = "MyService/MyMethod"
-// ```
-//
-// The `request` tensor is a string tensor representing serialized `MyRequestProto`
-// strings; and the output string tensor `response` will have the same shape
-// and contain (upon successful completion) corresponding serialized
-// `MyResponseProto` strings.
-//
-// For example, to send a single, empty, `MyRequestProto`, call
-// this op with `request = ""`.  To send 5 **parallel** empty requests,
-// call this op with `request = ["", "", "", "", ""]`.
-//
-// More generally, one can create a batch of `MyRequestProto` serialized protos
-// from regular batched tensors using the `encode_proto` op, and convert
-// the response `MyResponseProto` serialized protos to batched tensors
-// using the `decode_proto` op.
-//
-// **NOTE** Working with serialized proto strings is faster than instantiating
-// actual proto objects in memory, so no performance degradation is expected
-// compared to writing custom kernels for this workflow.
-//
-// If the connection fails or the remote worker returns an error
-// status, the op reraises this exception locally.
-//
-// See the `TryRpc` op if you prefer to handle RPC failures manually in the graph.
-//
-// Arguments:
-//	address: `0-D` or `1-D`.  The address (i.e. host_name:port) of the RPC server.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `method` and `request`.
-//	method: `0-D` or `1-D`.  The method address on the RPC server.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `address` and `request`.
-//	request: `0-D` or `1-D`.  Serialized proto strings: the rpc request argument.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `address` and `method`.
-//
-// Returns Same shape as `request`. Serialized proto strings: the rpc responses.
-func Rpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...RpcAttr) (response tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Rpc",
-		Input: []tf.Input{
-			address, method, request,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
-func ExperimentalBytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalBytesProducedStatsDataset",
-		Input: []tf.Input{
-			input_dataset, tag,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StackPushV2Attr is an optional argument to StackPushV2.
-type StackPushV2Attr func(optionalAttr)
-
-// StackPushV2SwapMemory sets the optional swap_memory attribute to value.
-//
-// value: Swap `elem` to CPU. Default to false.
-// If not specified, defaults to false
-func StackPushV2SwapMemory(value bool) StackPushV2Attr {
-	return func(m optionalAttr) {
-		m["swap_memory"] = value
-	}
-}
-
-// Push an element onto the stack.
-//
-// Arguments:
-//	handle: The handle to a stack.
-//	elem: The tensor to be pushed onto the stack.
-//
-// Returns The same tensor as the input 'elem'.
-func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...StackPushV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StackPushV2",
-		Input: []tf.Input{
-			handle, elem,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FusedBatchNormGradV2Attr is an optional argument to FusedBatchNormGradV2.
-type FusedBatchNormGradV2Attr func(optionalAttr)
-
-// FusedBatchNormGradV2Epsilon sets the optional epsilon attribute to value.
-//
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormGradV2Epsilon(value float32) FusedBatchNormGradV2Attr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
-	}
-}
-
-// FusedBatchNormGradV2DataFormat sets the optional data_format attribute to value.
-//
-// value: The data format for y_backprop, x, x_backprop.
-// Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormGradV2DataFormat(value string) FusedBatchNormGradV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// FusedBatchNormGradV2IsTraining sets the optional is_training attribute to value.
-//
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormGradV2IsTraining(value bool) FusedBatchNormGradV2Attr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// Gradient for batch normalization.
-//
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
-//
-// Arguments:
-//	y_backprop: A 4D Tensor for the gradient with respect to y.
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
-// mean to be reused in gradient computation. When is_training is
-// False, a 1D Tensor for the population mean to be reused in both
-// 1st and 2nd order gradient computation.
-//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
-// variance (inverted variance in the cuDNN case) to be reused in
-// gradient computation. When is_training is False, a 1D Tensor
-// for the population variance to be reused in both 1st and 2nd
-// order gradient computation.
-//
-// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
-// in FusedBatchNorm.
-func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradV2Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FusedBatchNormGradV2",
-		Input: []tf.Input{
-			y_backprop, x, scale, reserve_space_1, reserve_space_2,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// Creates a TensorArray for storing multiple gradients of values in the given handle.
-//
-// Similar to TensorArrayGradV3. However it creates an accumulator with an
-// expanded shape compared to the input TensorArray whose gradient is being
-// computed. This enables multiple gradients for the same TensorArray to be
-// calculated using the same accumulator.
-//
-// Arguments:
-//	handle: The handle to the forward TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	shape_to_prepend: An int32 vector representing a shape. Elements in the gradient accumulator will
-// have shape which is this shape_to_prepend value concatenated with shape of the
-// elements in the TensorArray corresponding to the input handle.
-//	source: The gradient source string, used to decide which gradient TensorArray
-// to return.
-func TensorArrayGradWithShape(scope *Scope, handle tf.Output, flow_in tf.Output, shape_to_prepend tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"source": source}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayGradWithShape",
-		Input: []tf.Input{
-			handle, flow_in, shape_to_prepend,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Compare values of `input` to `threshold` and pack resulting bits into a `uint8`.
-//
-// Each comparison returns a boolean `true` (if `input_value > threshold`)
-// or and `false` otherwise.
-//
-// This operation is useful for Locality-Sensitive-Hashing (LSH) and other
-// algorithms that use hashing approximations of cosine and `L2` distances;
-// codes can be generated from an input via:
-//
-// ```python
-// codebook_size = 50
-// codebook_bits = codebook_size * 32
-// codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits],
-//                            dtype=x.dtype,
-//                            initializer=tf.orthogonal_initializer())
-// codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.)
-// codes = tf.bitcast(codes, tf.int32)  # go from uint8 to int32
-// # now codes has shape x.shape[:-1] + [codebook_size]
-// ```
-//
-// **NOTE**: Currently, the innermost dimension of the tensor must be divisible
-// by 8.
-//
-// Given an `input` shaped `[s0, s1, ..., s_n]`, the output is
-// a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`.
-//
-// Arguments:
-//	input: Values to compare against `threshold` and bitpack.
-//	threshold: Threshold to compare against.
-//
-// Returns The bitpacked comparisons.
-func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "CompareAndBitpack",
-		Input: []tf.Input{
-			input, threshold,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Push an element onto the tensor_array.
-//
-// Arguments:
-//	handle: The handle to a TensorArray.
-//	index: The position to write to inside the TensorArray.
-//	value: The tensor to write to the TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//
-// Returns A float scalar that enforces proper chaining of operations.
-func TensorArrayWriteV3(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayWriteV3",
-		Input: []tf.Input{
-			handle, index, value, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Scatter the data from the input value into specific TensorArray elements.
-//
-// `indices` must be a vector, its length must match the first dim of `value`.
-//
-// Arguments:
-//	handle: The handle to a TensorArray.
-//	indices: The locations at which to write the tensor elements.
-//	value: The concatenated tensor to write to the TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//
-// Returns A float scalar that enforces proper chaining of operations.
-func TensorArrayScatterV3(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayScatterV3",
-		Input: []tf.Input{
-			handle, indices, value, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// EmptyAttr is an optional argument to Empty.
-type EmptyAttr func(optionalAttr)
-
-// EmptyInit sets the optional init attribute to value.
-//
-// value: If True, initialize the returned tensor with the default value of dtype.  Otherwise, the implementation is free not to initializethe tensor's content.
-// If not specified, defaults to false
-func EmptyInit(value bool) EmptyAttr {
-	return func(m optionalAttr) {
-		m["init"] = value
-	}
-}
-
-// Creates a tensor with the given shape.
-//
-// This operation creates a tensor of `shape` and `dtype`.
-//
-// Arguments:
-//	shape: 1-D. Represents the shape of the output tensor.
-//
-//
-// Returns A `Tensor` of type `T`.
-func Empty(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...EmptyAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Empty",
-		Input: []tf.Input{
-			shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TensorArrayConcatV3Attr is an optional argument to TensorArrayConcatV3.
-type TensorArrayConcatV3Attr func(optionalAttr)
-
-// TensorArrayConcatV3ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
-//
-// value: The expected shape of an element, if known,
-// excluding the first dimension. Used to validate the shapes of
-// TensorArray elements. If this shape is not fully specified, concatenating
-// zero-size TensorArrays is an error.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayConcatV3ElementShapeExcept0(value tf.Shape) TensorArrayConcatV3Attr {
-	return func(m optionalAttr) {
-		m["element_shape_except0"] = value
-	}
-}
-
-// Concat the elements from the TensorArray into value `value`.
-//
-// Takes `T` elements of shapes
-//
-//   ```
-//   (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...)
-//   ```
-//
-// and concatenates them into a Tensor of shape:
-//
-//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```
-//
-// All elements must have the same shape (excepting the first dimension).
-//
-// Arguments:
-//	handle: The handle to a TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	dtype: The type of the elem that is returned.
-//
-// Returns All of the elements in the TensorArray, concatenated along the first
-// axis.A vector of the row sizes of the original T elements in the
-// value output.  In the example above, this would be the values:
-// `(n1, n2, ..., n(T-1))`.
-func TensorArrayConcatV3(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV3Attr) (value tf.Output, lengths tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayConcatV3",
-		Input: []tf.Input{
-			handle, flow_in,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Split the data from the input value into TensorArray elements.
-//
-// Assuming that `lengths` takes on values
-//
-//   ```(n0, n1, ..., n(T-1))```
-//
-// and that `value` has shape
-//
-//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```,
-//
-// this splits values into a TensorArray with T tensors.
-//
-// TensorArray index t will be the subtensor of values with starting position
-//
-//   ```(n0 + n1 + ... + n(t-1), 0, 0, ...)```
-//
-// and having size
-//
-//   ```nt x d0 x d1 x ...```
-//
-// Arguments:
-//	handle: The handle to a TensorArray.
-//	value: The concatenated tensor to write to the TensorArray.
-//	lengths: The vector of lengths, how to split the rows of value into the
-// TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//
-// Returns A float scalar that enforces proper chaining of operations.
-func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArraySplitV3",
-		Input: []tf.Input{
-			handle, value, lengths, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes gradients for the scaled exponential linear (Selu) operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Selu operation.
-//	outputs: The outputs of the corresponding Selu operation.
-//
-// Returns The gradients: `gradients * (outputs + scale * alpha)`
-// if outputs < 0, `scale * gradients` otherwise.
-func SeluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SeluGrad",
-		Input: []tf.Input{
-			gradients, outputs,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Get the current size of the TensorArray.
-//
-// Arguments:
-//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//
-// Returns The current size of the TensorArray.
-func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArraySizeV3",
-		Input: []tf.Input{
-			handle, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deprecated. Use TensorArrayGradV3
-//
-// DEPRECATED at GraphDef version 26: Use TensorArrayGradV3
-func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"source": source}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayGradV2",
-		Input: []tf.Input{
-			handle, flow_in,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}

diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD
index af5503f..54b10cd 100644
--- a/tensorflow/java/BUILD
+++ b/tensorflow/java/BUILD

@@ -380,15 +380,13 @@
     linkopts = select({
         "//tensorflow:debug": [],  # Disable all custom linker options in debug mode
         "//tensorflow:darwin": [
-            "-Wl,-exported_symbols_list",  # This line must be directly followed by LINKER_EXPORTED_SYMBOLS
-            "$(location {})".format(LINKER_EXPORTED_SYMBOLS),
+            "-Wl,-exported_symbols_list,$(location {})".format(LINKER_EXPORTED_SYMBOLS),
         ],
         "//tensorflow:windows": [],
         "//conditions:default": [
             "-z defs",
             "-s",
-            "-Wl,--version-script",  #  This line must be directly followed by LINKER_VERSION_SCRIPT
-            "$(location {})".format(LINKER_VERSION_SCRIPT),
+            "-Wl,--version-script,$(location {})".format(LINKER_VERSION_SCRIPT),
         ],
     }),
     linkshared = 1,

diff --git a/tensorflow/java/build_defs.bzl b/tensorflow/java/build_defs.bzl
index e1916ca..f423cc4 100644
--- a/tensorflow/java/build_defs.bzl
+++ b/tensorflow/java/build_defs.bzl

@@ -18,7 +18,7 @@
     "-Xlint:-processing",
     "-Xlint:-serial",
     "-Xlint:-try",
-    "-Xlint:-classfile", # see b/32750402, go/javac-warnings#classfile
+    "-Xlint:-classfile",  # see b/32750402, go/javac-warnings#classfile
 ]
 
 # The bazel errorprone plugin currently only enables default errorChecks

diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc
index 5d6387e..db6116b 100644
--- a/tensorflow/java/src/gen/cc/op_generator.cc
+++ b/tensorflow/java/src/gen/cc/op_generator.cc

@@ -516,7 +516,7 @@
     return false;
   }
   for (const auto& attr : op_def.attr()) {
-    if (attr.type() == "func") {
+    if (attr.type() == "func" || attr.type() == "list(func)") {
       return false;  // TODO(karllessard) add support for function attributes
     }
   }

diff --git a/tensorflow/java/src/gen/gen_ops.bzl b/tensorflow/java/src/gen/gen_ops.bzl
index f4ff34e..b46721a 100644
--- a/tensorflow/java/src/gen/gen_ops.bzl
+++ b/tensorflow/java/src/gen/gen_ops.bzl

@@ -17,46 +17,48 @@
 # and then archive those source files into
 #     ops/gen_sources.srcjar
 #
-def tf_java_op_gen_srcjar(name,
-                          gen_tool,
-                          base_package,
-                          api_def_srcs=[],
-                          out_dir="ops/",
-                          out_src_dir="src/main/java/",
-                          visibility=["//tensorflow/java:__pkg__"]):
+def tf_java_op_gen_srcjar(
+        name,
+        gen_tool,
+        base_package,
+        api_def_srcs = [],
+        out_dir = "ops/",
+        out_src_dir = "src/main/java/",
+        visibility = ["//tensorflow/java:__pkg__"]):
+    gen_cmds = ["rm -rf $(@D)"]  # Always start from fresh when generating source files
+    srcs = api_def_srcs[:]
 
-  gen_cmds = ["rm -rf $(@D)"]  # Always start from fresh when generating source files
-  srcs = api_def_srcs[:]
+    if not api_def_srcs:
+        api_def_args_str = ","
+    else:
+        api_def_args = []
+        for api_def_src in api_def_srcs:
+            # Add directory of the first ApiDef source to args.
+            # We are assuming all ApiDefs in a single api_def_src are in the
+            # same directory.
+            api_def_args.append(
+                "$$(dirname $$(echo $(locations " + api_def_src +
+                ") | cut -d\" \" -f1))",
+            )
+        api_def_args_str = ",".join(api_def_args)
 
-  if not api_def_srcs:
-    api_def_args_str = ","
-  else:
-    api_def_args = []
-    for api_def_src in api_def_srcs:
-      # Add directory of the first ApiDef source to args.
-      # We are assuming all ApiDefs in a single api_def_src are in the
-      # same directory.
-      api_def_args.append(
-          "$$(dirname $$(echo $(locations " + api_def_src +
-          ") | cut -d\" \" -f1))")
-    api_def_args_str = ",".join(api_def_args)
+    gen_cmds += ["$(location " + gen_tool + ")" +
+                 " --output_dir=$(@D)/" + out_src_dir +
+                 " --base_package=" + base_package +
+                 " --api_dirs=" + api_def_args_str]
 
-  gen_cmds += ["$(location " + gen_tool + ")" +
-               " --output_dir=$(@D)/" + out_src_dir +
-               " --base_package=" + base_package +
-               " --api_dirs=" + api_def_args_str]
+    # Generate a source archive containing generated code for these ops.
+    gen_srcjar = out_dir + name + ".srcjar"
+    gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) src"]
 
-  # Generate a source archive containing generated code for these ops.
-  gen_srcjar = out_dir + name + ".srcjar"
-  gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) src"]
-
-  native.genrule(
-      name=name,
-      srcs=srcs,
-      outs=[gen_srcjar],
-      tools=[
-          "@local_jdk//:jar",
-          "@local_jdk//:jdk",
-          gen_tool
-      ] + tf_binary_additional_srcs(),
-      cmd=" && ".join(gen_cmds))
+    native.genrule(
+        name = name,
+        srcs = srcs,
+        outs = [gen_srcjar],
+        tools = [
+            "@local_jdk//:jar",
+            "@local_jdk//:jdk",
+            gen_tool,
+        ] + tf_binary_additional_srcs(),
+        cmd = " && ".join(gen_cmds),
+    )

diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD
index 86eca1d..46800a8 100644
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD

@@ -11,6 +11,8 @@
 exports_files(glob([
     "testdata/*.bin",
     "testdata/*.pb",
+    "testdata/*.tflite",
+    "testdata/*.csv",
     "models/testdata/*",
 ]))
 
@@ -405,15 +407,13 @@
     name = "libtensorflowlite.so",
     linkopts = select({
         "//tensorflow:darwin": [
-            "-Wl,-exported_symbols_list",  # This line must be directly followed by the exported_symbols.lds file
-            "$(location //tensorflow/lite:tflite_exported_symbols.lds)",
+            "-Wl,-exported_symbols_list,$(location //tensorflow/lite:tflite_exported_symbols.lds)",
             "-Wl,-install_name,@rpath/libtensorflowlite.so",
         ],
         "//tensorflow:windows": [],
         "//conditions:default": [
             "-z defs",
-            "-Wl,--version-script",  #  This line must be directly followed by the version_script.lds file
-            "$(location //tensorflow/lite:tflite_version_script.lds)",
+            "-Wl,--version-script,$(location //tensorflow/lite:tflite_version_script.lds)",
         ],
     }),
     deps = [

diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl
index 88a8faf..b693524 100644
--- a/tensorflow/lite/build_def.bzl
+++ b/tensorflow/lite/build_def.bzl

@@ -238,6 +238,7 @@
         "conv2d_transpose",
         "conv_with_shared_weights",
         "conv_to_depthwiseconv_with_shared_weights",
+        "cos",
         "depthwiseconv",
         "div",
         "equal",
@@ -250,6 +251,7 @@
         "fully_connected",
         "fused_batch_norm",
         "gather",
+        "gather_nd",
         "gather_with_constant",
         "global_batch_norm",
         "greater",
@@ -284,6 +286,7 @@
         "prelu",
         "pow",
         "range",
+        "rank",
         "reduce_any",
         "reduce_max",
         "reduce_min",

diff --git a/tensorflow/lite/builtin_ops.h b/tensorflow/lite/builtin_ops.h
index fc871d3..361d501 100644
--- a/tensorflow/lite/builtin_ops.h
+++ b/tensorflow/lite/builtin_ops.h

@@ -132,6 +132,10 @@
   kTfLiteBuiltinCeil = 104,
   kTfLiteBuiltinReverseV2 = 105,
   kTfLiteBuiltinAddN = 106,
+  kTfLiteBuiltinGatherNd = 107,
+  kTfLiteBuiltinCos = 108,
+  kTfLiteBuiltinWhere = 109,
+  kTfLiteBuiltinRank = 110,
 } TfLiteBuiltinOperator;
 
 #ifdef __cplusplus

diff --git a/tensorflow/lite/c/BUILD b/tensorflow/lite/c/BUILD
index 680addf..661b648 100644
--- a/tensorflow/lite/c/BUILD
+++ b/tensorflow/lite/c/BUILD

@@ -12,6 +12,7 @@
         "c_api_internal.h",
     ],
     visibility = [
+        "//learning/brain/mobile/kernel_test:__subpackages__",
         "//tensorflow/lite:__subpackages__",
     ],
 )

diff --git a/tensorflow/lite/c/builtin_op_data.h b/tensorflow/lite/c/builtin_op_data.h
index 332c2db..40fea17 100644
--- a/tensorflow/lite/c/builtin_op_data.h
+++ b/tensorflow/lite/c/builtin_op_data.h

@@ -334,6 +334,9 @@
 } TfLiteShapeParams;
 
 typedef struct {
+} TfLiteRankParams;
+
+typedef struct {
   // Parameters supported by version 1:
   float min;
   float max;

diff --git a/tensorflow/lite/c/builtin_op_data_test.cc b/tensorflow/lite/c/builtin_op_data_test.cc
index 4ce7c48..4967183 100644
--- a/tensorflow/lite/c/builtin_op_data_test.cc
+++ b/tensorflow/lite/c/builtin_op_data_test.cc

@@ -71,6 +71,7 @@
   TfLiteTransposeConvParams transpose_conv_params;
   TfLiteSparseToDenseParams sparse_to_dense_params;
   TfLiteShapeParams shape_params;
+  TfLiteRankParams rank_params;
   TfLiteFakeQuantParams fake_quant_params;
   TfLitePackParams pack_params;
   TfLiteOneHotParams one_hot_params;

diff --git a/tensorflow/lite/context_util.h b/tensorflow/lite/context_util.h
index 68b91ea..2f846cc 100644
--- a/tensorflow/lite/context_util.h
+++ b/tensorflow/lite/context_util.h

@@ -38,6 +38,7 @@
   const_iterator begin() const { return int_array_->data; }
   const_iterator end() const { return &int_array_->data[int_array_->size]; }
   size_t size() const { return end() - begin(); }
+  int operator[](size_t pos) const { return int_array_->data[pos]; }
 
  private:
   const TfLiteIntArray* int_array_;

diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc
index 175caa1..09cef8d 100644
--- a/tensorflow/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc

@@ -680,6 +680,7 @@
     // ok for now, since there is no call implementation either.
     case BuiltinOperator_CALL:
     case BuiltinOperator_CONCAT_EMBEDDINGS:
+    case BuiltinOperator_COS:
     case BuiltinOperator_CUSTOM:
     case BuiltinOperator_DEQUANTIZE:
     case BuiltinOperator_EMBEDDING_LOOKUP:
@@ -728,6 +729,9 @@
     case BuiltinOperator_SQUARED_DIFFERENCE:
     case BuiltinOperator_REVERSE_V2:
     case BuiltinOperator_ADD_N:
+    case BuiltinOperator_GATHER_ND:
+    case BuiltinOperator_WHERE:
+    case BuiltinOperator_RANK:
       break;
   }
   return kTfLiteOk;

diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc
index 2fdafa3..ec6762b1 100644
--- a/tensorflow/lite/core/subgraph.cc
+++ b/tensorflow/lite/core/subgraph.cc

@@ -592,7 +592,12 @@
 
   // Short-circuit the state change if the dimensions don't change, avoiding
   // unnecessary (re)allocations.
-  if (EqualArrayAndTfLiteIntArray(tensor->dims, dims.size(), dims.data())) {
+  //
+  // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
+  // the subgraph won't allocate memory for a dynamic tensor when its size
+  // is equal to the original tensor size.
+  if (tensor->data.raw != nullptr &&
+      EqualArrayAndTfLiteIntArray(tensor->dims, dims.size(), dims.data())) {
     return kTfLiteOk;
   }
 

diff --git a/tensorflow/lite/experimental/c/BUILD b/tensorflow/lite/experimental/c/BUILD
index 2f0f4327b..ac71c9b 100644
--- a/tensorflow/lite/experimental/c/BUILD
+++ b/tensorflow/lite/experimental/c/BUILD

@@ -20,15 +20,13 @@
     name = "libtensorflowlite_c.so",
     linkopts = select({
         "//tensorflow:darwin": [
-            "-Wl,-exported_symbols_list",  # This line must be directly followed by the exported_symbols.lds file
-            "$(location //tensorflow/lite/experimental/c:exported_symbols.lds)",
+            "-Wl,-exported_symbols_list,$(location //tensorflow/lite/experimental/c:exported_symbols.lds)",
             "-Wl,-install_name,@rpath/libtensorflowlite_c.so",
         ],
         "//tensorflow:windows": [],
         "//conditions:default": [
             "-z defs",
-            "-Wl,--version-script",  #  This line must be directly followed by the version_script.lds file
-            "$(location //tensorflow/lite/experimental/c:version_script.lds)",
+            "-Wl,--version-script,$(location //tensorflow/lite/experimental/c:version_script.lds)",
         ],
     }),
     deps = [

diff --git a/tensorflow/lite/experimental/examples/lstm/BUILD b/tensorflow/lite/experimental/examples/lstm/BUILD
index 827b104..ab6d508 100644
--- a/tensorflow/lite/experimental/examples/lstm/BUILD
+++ b/tensorflow/lite/experimental/examples/lstm/BUILD

@@ -5,8 +5,8 @@
 load("//tensorflow:tensorflow.bzl", "py_test")
 
 py_library(
-    name = "tflite_lstm",
-    srcs = ["tflite_lstm.py"],
+    name = "rnn",
+    srcs = ["rnn.py"],
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = [
@@ -18,8 +18,8 @@
 )
 
 py_library(
-    name = "tflite_rnn",
-    srcs = ["tflite_rnn.py"],
+    name = "rnn_cell",
+    srcs = ["rnn_cell.py"],
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = [
@@ -40,7 +40,8 @@
         "no_pip",
     ],
     deps = [
-        ":tflite_lstm",
+        ":rnn",
+        ":rnn_cell",
         "//tensorflow:tensorflow_py",
         "//tensorflow/examples/tutorials/mnist:input_data",
         "//tensorflow/lite/python:lite",
@@ -62,7 +63,8 @@
         "no_pip",
     ],
     deps = [
-        ":tflite_rnn",
+        ":rnn",
+        ":rnn_cell",
         "//tensorflow:tensorflow_py",
         "//tensorflow/examples/tutorials/mnist:input_data",
         "//tensorflow/lite/python:lite",
@@ -84,7 +86,7 @@
         "no_pip",
     ],
     deps = [
-        ":tflite_lstm",
+        ":rnn_cell",
         "//tensorflow:tensorflow_py",
         "//tensorflow/examples/tutorials/mnist:input_data",
         "//tensorflow/lite/python:lite",
@@ -106,7 +108,7 @@
         "no_pip",
     ],
     deps = [
-        ":tflite_rnn",
+        ":rnn_cell",
         "//tensorflow:tensorflow_py",
         "//tensorflow/examples/tutorials/mnist:input_data",
         "//tensorflow/lite/python:lite",

diff --git a/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_lstm_test.py b/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_lstm_test.py
index 9dc8109..7761980 100644
--- a/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_lstm_test.py
+++ b/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_lstm_test.py

@@ -20,7 +20,7 @@
 import tensorflow as tf
 
 from tensorflow.examples.tutorials.mnist import input_data
-from tensorflow.lite.experimental.examples.lstm.tflite_lstm import TFLiteLSTMCell
+from tensorflow.lite.experimental.examples.lstm.rnn_cell import TFLiteLSTMCell
 from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs
 from tensorflow.lite.python.op_hint import find_all_hinted_output_nodes
 from tensorflow.python.framework import test_util

diff --git a/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_rnn_test.py b/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_rnn_test.py
index 7a937ce..316d482 100644
--- a/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_rnn_test.py
+++ b/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_rnn_test.py

@@ -22,7 +22,7 @@
 from tensorflow import flags
 
 from tensorflow.examples.tutorials.mnist import input_data
-from tensorflow.lite.experimental.examples.lstm.tflite_rnn import TfLiteRNNCell
+from tensorflow.lite.experimental.examples.lstm.rnn_cell import TfLiteRNNCell
 from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs
 from tensorflow.lite.python.op_hint import find_all_hinted_output_nodes
 from tensorflow.python.framework import test_util

diff --git a/tensorflow/lite/experimental/examples/lstm/rnn.py b/tensorflow/lite/experimental/examples/lstm/rnn.py
new file mode 100644
index 0000000..1076cd3
--- /dev/null
+++ b/tensorflow/lite/experimental/examples/lstm/rnn.py

@@ -0,0 +1,273 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TfLite LSTMCell wrapper.
+
+TODO(renjieliu): Find a better home for this one.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import rnn_cell_impl
+from tensorflow.python.ops import variable_scope as vs
+from tensorflow.python.ops.rnn import _best_effort_input_batch_size
+from tensorflow.python.ops.rnn import _dynamic_rnn_loop
+from tensorflow.python.ops.rnn import _should_cache
+from tensorflow.python.ops.rnn import _transpose_batch_time
+from tensorflow.python.util import nest
+
+
+def dynamic_rnn(cell,
+                inputs,
+                sequence_length=None,
+                initial_state=None,
+                dtype=None,
+                parallel_iterations=None,
+                swap_memory=False,
+                time_major=True,
+                scope=None):
+  """Creates a recurrent neural network specified by RNNCell `cell`.
+
+  Performs fully dynamic unrolling of `inputs`.
+
+  Example:
+
+  ```python
+  # create a BasicRNNCell
+  rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
+
+  # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]
+
+  # defining initial state
+  initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
+
+  # 'state' is a tensor of shape [batch_size, cell_state_size]
+  outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data,
+                                     initial_state=initial_state,
+                                     dtype=tf.float32)
+  ```
+
+  ```python
+  # create 2 LSTMCells
+  rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]
+
+  # create a RNN cell composed sequentially of a number of RNNCells
+  multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
+
+  # 'outputs' is a tensor of shape [batch_size, max_time, 256]
+  # 'state' is a N-tuple where N is the number of LSTMCells containing a
+  # tf.contrib.rnn.LSTMStateTuple for each cell
+  outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
+                                     inputs=data,
+                                     dtype=tf.float32)
+  ```
+
+
+  Args:
+    cell: An instance of RNNCell.
+    inputs: The RNN inputs.
+      If `time_major == False` (default), this must be a `Tensor` of shape:
+        `[batch_size, max_time, ...]`, or a nested tuple of such elements.
+      If `time_major == True`, this must be a `Tensor` of shape: `[max_time,
+        batch_size, ...]`, or a nested tuple of such elements. This may also be
+        a (possibly nested) tuple of Tensors satisfying this property.  The
+        first two dimensions must match across all the inputs, but otherwise the
+        ranks and other shape components may differ. In this case, input to
+        `cell` at each time-step will replicate the structure of these tuples,
+        except for the time dimension (from which the time is taken). The input
+        to `cell` at each time step will be a `Tensor` or (possibly nested)
+        tuple of Tensors each with dimensions `[batch_size, ...]`.
+    sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. Used
+      to copy-through state and zero-out outputs when past a batch element's
+      sequence length.  So it's more for performance than correctness.
+    initial_state: (optional) An initial state for the RNN. If `cell.state_size`
+      is an integer, this must be a `Tensor` of appropriate type and shape
+      `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this
+      should be a tuple of tensors having shapes `[batch_size, s] for s in
+      cell.state_size`.
+    dtype: (optional) The data type for the initial state and expected output.
+      Required if initial_state is not provided or RNN state has a heterogeneous
+      dtype.
+    parallel_iterations: (Default: 32).  The number of iterations to run in
+      parallel.  Those operations which do not have any temporal dependency and
+      can be run in parallel, will be.  This parameter trades off time for
+      space.  Values >> 1 use more memory but take less time, while smaller
+      values use less memory but computations take longer.
+    swap_memory: Transparently swap the tensors produced in forward inference
+      but needed for back prop from GPU to CPU.  This allows training RNNs which
+      would typically not fit on a single GPU, with very minimal (or no)
+      performance penalty.
+    time_major: The shape format of the `inputs` and `outputs` Tensors. If true,
+      these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false,
+      these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using
+      `time_major = True` is a bit more efficient because it avoids transposes
+      at the beginning and end of the RNN calculation.  However, most TensorFlow
+      data is batch-major, so by default this function accepts input and emits
+      output in batch-major form.
+    scope: VariableScope for the created subgraph; defaults to "rnn".
+
+  Returns:
+    A pair (outputs, state) where:
+
+    outputs: The RNN output `Tensor`.
+
+      If time_major == False (default), this will be a `Tensor` shaped:
+        `[batch_size, max_time, cell.output_size]`.
+
+      If time_major == True, this will be a `Tensor` shaped:
+        `[max_time, batch_size, cell.output_size]`.
+
+      Note, if `cell.output_size` is a (possibly nested) tuple of integers
+      or `TensorShape` objects, then `outputs` will be a tuple having the
+      same structure as `cell.output_size`, containing Tensors having shapes
+      corresponding to the shape data in `cell.output_size`.
+
+    state: The final state.  If `cell.state_size` is an int, this
+      will be shaped `[batch_size, cell.state_size]`.  If it is a
+      `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
+      If it is a (possibly nested) tuple of ints or `TensorShape`, this will
+      be a tuple having the corresponding shapes. If cells are `LSTMCells`
+      `state` will be a tuple containing a `LSTMStateTuple` for each cell.
+
+  Raises:
+    TypeError: If `cell` is not an instance of RNNCell.
+    ValueError: If inputs is None or an empty list.
+    RuntimeError: If not using control flow v2.
+  """
+
+  # Currently only support time_major == True case.
+  assert time_major
+
+  # TODO(b/123051275): We need to check if the cells are TfLiteLSTMCells or
+  # TfLiteRNNCells.
+  rnn_cell_impl.assert_like_rnncell("cell", cell)
+
+  if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
+    raise RuntimeError("OpHint dynamic rnn only supports control flow v2.")
+
+  parent_first_child_input = [{
+      "parent_ophint_input_index": 0,
+      "first_child_ophint_input_index": 0
+  }]
+  parent_last_child_output = [{
+      "parent_output_index": 0,
+      # For LstmCell, the index is 2.
+      # For RnnCell, the index is 1.
+      # So we use -1 meaning it's the last one.
+      "child_output_index": -1
+  }]
+  internal_children_input_output = [{
+      "child_input_index": 0,
+      # For LstmCell, the index is 2.
+      # For RnnCell, the index is 1.
+      # So we use -1 meaning it's the last one.
+      "child_output_index": -1
+  }]
+  inputs_outputs_mappings = {
+      "parent_first_child_input": parent_first_child_input,
+      "parent_last_child_output": parent_last_child_output,
+      "internal_children_input_output": internal_children_input_output
+  }
+  tflite_wrapper = tf.lite.OpHint(
+      "TfLiteDynamicRnn",
+      level=2,
+      children_inputs_mappings=inputs_outputs_mappings)
+  with vs.variable_scope(scope or "rnn") as varscope:
+    # Create a new scope in which the caching device is either
+    # determined by the parent scope, or is set to place the cached
+    # Variable using the same placement as for the rest of the RNN.
+    if _should_cache():
+      if varscope.caching_device is None:
+        varscope.set_caching_device(lambda op: op.device)
+
+    inputs = tflite_wrapper.add_input(inputs, name="input", index_override=0)
+
+    # By default, time_major==False and inputs are batch-major: shaped
+    #   [batch, time, depth]
+    # For internal calculations, we transpose to [time, batch, depth]
+    flat_input = nest.flatten(inputs)
+
+    if not time_major:
+      # (batch, time, depth) => (time, batch, depth)
+      flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
+      flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input)
+
+    parallel_iterations = parallel_iterations or 32
+    if sequence_length is not None:
+      sequence_length = math_ops.to_int32(sequence_length)
+      if sequence_length.get_shape().rank not in (None, 1):
+        raise ValueError(
+            "sequence_length must be a vector of length batch_size, "
+            "but saw shape: %s" % sequence_length.get_shape())
+      sequence_length = array_ops.identity(  # Just to find it in the graph.
+          sequence_length,
+          name="sequence_length")
+
+    batch_size = _best_effort_input_batch_size(flat_input)
+
+    if initial_state is not None:
+      state = initial_state
+    else:
+      if not dtype:
+        raise ValueError("If there is no initial_state, you must give a dtype.")
+      if getattr(cell, "get_initial_state", None) is not None:
+        state = cell.get_initial_state(
+            inputs=None, batch_size=batch_size, dtype=dtype)
+      else:
+        state = cell.zero_state(batch_size, dtype)
+
+    def _assert_has_shape(x, shape):
+      x_shape = array_ops.shape(x)
+      packed_shape = array_ops.stack(shape)
+      return control_flow_ops.Assert(
+          math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), [
+              "Expected shape for Tensor %s is " % x.name, packed_shape,
+              " but saw shape: ", x_shape
+          ])
+
+    if not context.executing_eagerly() and sequence_length is not None:
+      # Perform some shape validation
+      with ops.control_dependencies(
+          [_assert_has_shape(sequence_length, [batch_size])]):
+        sequence_length = array_ops.identity(
+            sequence_length, name="CheckSeqLen")
+
+    inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input)
+
+    outputs, final_state = _dynamic_rnn_loop(
+        cell,
+        inputs,
+        state,
+        parallel_iterations=parallel_iterations,
+        swap_memory=swap_memory,
+        sequence_length=sequence_length,
+        dtype=dtype)
+
+    # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].
+    # If we are performing batch-major calculations, transpose output back
+    # to shape [batch, time, depth]
+    if not time_major:
+      # (time, batch, depth) => (batch, time, depth)
+      outputs = nest.map_structure(_transpose_batch_time, outputs)
+    outputs = tflite_wrapper.add_output(outputs, name="outputs")
+
+    return outputs, final_state

diff --git a/tensorflow/lite/experimental/examples/lstm/rnn_cell.py b/tensorflow/lite/experimental/examples/lstm/rnn_cell.py
new file mode 100644
index 0000000..b1f89d6
--- /dev/null
+++ b/tensorflow/lite/experimental/examples/lstm/rnn_cell.py

@@ -0,0 +1,519 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TfLite BasicRnnCell wrapper.
+
+TODO(renjieliu): Find a better home for this one.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import itertools
+import tensorflow as tf
+
+from tensorflow.python.keras import activations
+from tensorflow.python.keras import initializers
+from tensorflow.python.layers import base as base_layer
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import rnn_cell_impl
+from tensorflow.python.platform import tf_logging as logging
+
+
+class TfLiteRNNCell(rnn_cell_impl.LayerRNNCell):
+  """The most basic RNN cell.
+
+  This is used only for TfLite, it provides hints and it also makes the
+  variables in the desired for the tflite ops.
+  """
+
+  def __init__(self,
+               num_units,
+               activation=None,
+               reuse=None,
+               name=None,
+               dtype=None,
+               **kwargs):
+    """Initializes the parameters for an RNN cell.
+
+    Args:
+      num_units: int, The number of units in the RNN cell.
+      activation: Nonlinearity to use.  Default: `tanh`. It could also be string
+        that is within Keras activation function names.
+      reuse: (optional) Python boolean describing whether to reuse variables in
+        an existing scope. Raises an error if not `True` and the existing scope
+        already has the given variables.
+      name: String, the name of the layer. Layers with the same name will share
+        weights, but to avoid mistakes we require reuse=True in such cases.
+      dtype: Default dtype of the layer (default of `None` means use the type of
+        the first input). Required when `build` is called before `call`.
+      **kwargs: Dict, keyword named properties for common layer attributes, like
+        `trainable` etc when constructing the cell from configs of get_config().
+
+    Raises:
+      ValueError: If the existing scope already has the given variables.
+    """
+    super(TfLiteRNNCell, self).__init__(
+        _reuse=reuse, name=name, dtype=dtype, **kwargs)
+
+    # Inputs must be Rank-2.
+    self.input_spec = base_layer.InputSpec(ndim=2)
+
+    self._tflite_wrapper = tf.lite.OpHint("UnidirectionalSequenceRnn")
+    self._num_units = num_units
+    if activation:
+      self._activation = activations.get(activation)
+    else:
+      self._activation = math_ops.tanh
+
+  @property
+  def state_size(self):
+    return self._num_units
+
+  @property
+  def output_size(self):
+    return self._num_units
+
+  def build(self, inputs_shape):
+    """Builds the RNN cell.
+
+    Args:
+      inputs_shape: Rnn input tensor shape.
+
+    Raises:
+      ValueError: If last dimension of the input shape is not known.
+    """
+    if inputs_shape[-1] is None:
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" %
+                       (inputs_shape,))
+
+    input_depth = inputs_shape[-1]
+
+    def add_variable_wrapped(name, shape, initializer, index):
+      var = self.add_variable(name, shape=shape, initializer=initializer)
+      return self._tflite_wrapper.add_input(
+          var, name=name, index_override=index)
+
+    self._input_weights = add_variable_wrapped(
+        "input_weights", [self._num_units, input_depth], None, 1)
+    self._recurrent_weights = add_variable_wrapped(
+        "recurrent_weights", [self._num_units, self._num_units], None, 2)
+    self._bias = add_variable_wrapped(
+        "bias",
+        shape=[self._num_units],
+        initializer=init_ops.zeros_initializer(dtype=self.dtype),
+        index=3)
+
+    self.built = True
+
+  def call(self, inputs, state):
+    """Most basic RNN: output = new_state = act(W * input + U * state + B)."""
+    inputs = self._tflite_wrapper.add_input(
+        inputs, tag="input", name="input", aggregate="stack", index_override=0)
+    state = self._tflite_wrapper.add_input(
+        state,
+        tag="hidden_state",
+        name="hidden_state",
+        aggregate="first",
+        index_override=4)
+    weights = array_ops.transpose(
+        array_ops.concat([self._input_weights, self._recurrent_weights], 1))
+    gate_inputs = math_ops.matmul(array_ops.concat([inputs, state], 1), weights)
+    gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
+    output = self._activation(gate_inputs)
+    output = self._tflite_wrapper.add_output(
+        output,
+        tag="output",
+        name="output",
+        index_override=1,
+        aggregate="stack")
+    return output, output
+
+  def get_config(self):
+    config = {
+        "num_units": self._num_units,
+        "activation": activations.serialize(self._activation),
+        "reuse": self._reuse,
+    }
+    base_config = super(TfLiteRNNCell, self).get_config()
+    return dict(itertools.chain(base_config.items(), config.items()))
+
+
+class TFLiteLSTMCell(rnn_cell_impl.LayerRNNCell):
+  """Long short-term memory unit (LSTM) recurrent network cell.
+
+  This is used only for TfLite, it provides hints and it also makes the
+  variables in the desired for the tflite ops  (transposed and seaparated).
+
+  The default non-peephole implementation is based on:
+
+    https://pdfs.semanticscholar.org/1154/0131eae85b2e11d53df7f1360eeb6476e7f4.pdf
+
+  Felix Gers, Jurgen Schmidhuber, and Fred Cummins.
+  "Learning to forget: Continual prediction with LSTM." IET, 850-855, 1999.
+
+  The peephole implementation is based on:
+
+    https://research.google.com/pubs/archive/43905.pdf
+
+  Hasim Sak, Andrew Senior, and Francoise Beaufays.
+  "Long short-term memory recurrent neural network architectures for
+   large scale acoustic modeling." INTERSPEECH, 2014.
+
+  The class uses optional peep-hole connections, optional cell clipping, and
+  an optional projection layer.
+
+  Note that this cell is not optimized for performance. Please use
+  `tf.contrib.cudnn_rnn.CudnnLSTM` for better performance on GPU, or
+  `tf.contrib.rnn.LSTMBlockCell` and `tf.contrib.rnn.LSTMBlockFusedCell` for
+  better performance on CPU.
+  """
+
+  def __init__(self,
+               num_units,
+               use_peepholes=False,
+               cell_clip=None,
+               initializer=None,
+               num_proj=None,
+               proj_clip=None,
+               num_unit_shards=None,
+               num_proj_shards=None,
+               forget_bias=1.0,
+               state_is_tuple=True,
+               activation=None,
+               reuse=None,
+               name=None,
+               dtype=None):
+    """Initialize the parameters for an LSTM cell.
+
+    Args:
+      num_units: int, The number of units in the LSTM cell.
+      use_peepholes: bool, set True to enable diagonal/peephole connections.
+      cell_clip: (optional) A float value, if provided the cell state is clipped
+        by this value prior to the cell output activation.
+      initializer: (optional) The initializer to use for the weight and
+        projection matrices.
+      num_proj: (optional) int, The output dimensionality for the projection
+        matrices.  If None, no projection is performed.
+      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
+        provided, then the projected values are clipped elementwise to within
+        `[-proj_clip, proj_clip]`.
+      num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a
+        variable_scope partitioner instead.
+      num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a
+        variable_scope partitioner instead.
+      forget_bias: Biases of the forget gate are initialized by default to 1 in
+        order to reduce the scale of forgetting at the beginning of the
+        training. Must set it manually to `0.0` when restoring from CudnnLSTM
+        trained checkpoints.
+      state_is_tuple: If True, accepted and returned states are 2-tuples of the
+        `c_state` and `m_state`.  If False, they are concatenated along the
+        column axis.  This latter behavior will soon be deprecated.
+      activation: Activation function of the inner states.  Default: `tanh`.
+      reuse: (optional) Python boolean describing whether to reuse variables in
+        an existing scope.  If not `True`, and the existing scope already has
+        the given variables, an error is raised.
+      name: String, the name of the layer. Layers with the same name will share
+        weights, but to avoid mistakes we require reuse=True in such cases.
+      dtype: Default dtype of the layer (default of `None` means use the type of
+        the first input). Required when `build` is called before `call`.  When
+        restoring from CudnnLSTM-trained checkpoints, use
+        `CudnnCompatibleLSTMCell` instead.
+    """
+    super(TFLiteLSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype)
+    # TODO(raziel): decide if we want to just support tuples (yes please!).
+    if not state_is_tuple:
+      logging.warn(
+          "%s: Using a concatenated state is slower and will soon be "
+          "deprecated.  Use state_is_tuple=True.", self)
+    if num_unit_shards is not None or num_proj_shards is not None:
+      logging.warn(
+          "%s: The num_unit_shards and proj_unit_shards parameters are "
+          "deprecated and will be removed in Jan 2017.  "
+          "Use a variable scope with a partitioner instead.", self)
+
+    # Inputs must be 2-dimensional.
+    # TODO(raziel): layers stuff -- chop if un-layerizing Op.
+    self.input_spec = base_layer.InputSpec(ndim=2)
+
+    self._tflite_wrapper = tf.lite.OpHint("UnidirectionalSequenceLstm")
+
+    self._num_units = num_units
+    self._use_peepholes = use_peepholes
+    self._cell_clip = cell_clip
+    self._initializer = initializer
+    self._num_proj = num_proj
+    self._proj_clip = proj_clip
+    self._num_unit_shards = num_unit_shards
+    self._num_proj_shards = num_proj_shards
+    self._forget_bias = forget_bias
+    self._state_is_tuple = state_is_tuple
+    self._activation = activation or math_ops.tanh
+
+    self._output_size = num_proj if num_proj else num_units
+    self._state_size = (
+        rnn_cell_impl.LSTMStateTuple(num_units, self._output_size)
+        if state_is_tuple else num_units + self._output_size)
+
+  @property
+  def state_size(self):
+    return self._state_size
+
+  @property
+  def output_size(self):
+    return self._output_size
+
+  def build(self, inputs_shape):
+    """Build TfLite LSTM cell graph.
+
+    Args:
+      inputs_shape: The inputs_shape must be known, and is [batch_size,
+        input_size] shape.
+
+    Raises:
+      ValueError: if the inputs_shape is invalid.
+    """
+    if len(inputs_shape) != 2 or inputs_shape[1].value is None:
+      raise ValueError("Invalid inputs_shape, saw shape: %s" % inputs_shape)
+
+    input_depth = inputs_shape[1].value
+    maybe_partitioner = (
+        partitioned_variables.fixed_size_partitioner(self._num_unit_shards)
+        if self._num_unit_shards is not None else None)
+    input_weight_shape = [self._num_units, input_depth]
+    cell_weight_shape = [self._num_units, self._output_size]
+    bias_shape = [self._num_units]
+
+    def add_variable_wrapped(name, shape, initializer, index, partitioner):
+      var = self.add_variable(
+          name, shape=shape, initializer=initializer, partitioner=partitioner)
+      return self._tflite_wrapper.add_input(
+          var, name=name, index_override=index)
+
+    weight_initializer = self._initializer
+    if self.dtype is None:
+      bias_initializer = init_ops.zeros_initializer
+    else:
+      bias_initializer = init_ops.zeros_initializer(dtype=self.dtype)
+
+    self.input_to_input_w = add_variable_wrapped(
+        "input_to_input_w", input_weight_shape, weight_initializer, 1,
+        maybe_partitioner)
+    self.input_to_forget_w = add_variable_wrapped(
+        "input_to_forget_w", input_weight_shape, weight_initializer, 2,
+        maybe_partitioner)
+    self.input_to_cell_w = add_variable_wrapped(
+        "input_to_cell_w", input_weight_shape, weight_initializer, 3,
+        maybe_partitioner)
+    self.input_to_output_w = add_variable_wrapped(
+        "input_to_output_w", input_weight_shape, weight_initializer, 4,
+        maybe_partitioner)
+    self.cell_to_input_w = add_variable_wrapped(
+        "cell_to_input_w", cell_weight_shape, weight_initializer, 5,
+        maybe_partitioner)
+    self.cell_to_forget_w = add_variable_wrapped(
+        "cell_to_forget_w", cell_weight_shape, weight_initializer, 6,
+        maybe_partitioner)
+    self.cell_to_cell_w = add_variable_wrapped(
+        "cell_to_cell_w", cell_weight_shape, weight_initializer, 7,
+        maybe_partitioner)
+    self.cell_to_output_w = add_variable_wrapped(
+        "cell_to_output_w", cell_weight_shape, weight_initializer, 8,
+        maybe_partitioner)
+
+    self.input_bias = add_variable_wrapped(
+        "input_bias", bias_shape, bias_initializer, 12, maybe_partitioner)
+    self.forget_bias = add_variable_wrapped(
+        "forget_bias", bias_shape, bias_initializer, 13, maybe_partitioner)
+    self.cell_bias = add_variable_wrapped(
+        "cell_bias", bias_shape, bias_initializer, 14, maybe_partitioner)
+    self.output_bias = add_variable_wrapped(
+        "output_bias", bias_shape, bias_initializer, 15, maybe_partitioner)
+
+    # index 9, 10, 11.
+    # f stands for forget, i stands for input and o stands for output.
+    if self._use_peepholes:
+      self._w_f_diag = add_variable_wrapped("w_f_diag", [self._num_units],
+                                            self._initializer, 10,
+                                            maybe_partitioner)
+      self._w_i_diag = add_variable_wrapped("w_i_diag", [self._num_units],
+                                            self._initializer, 9,
+                                            maybe_partitioner)
+      self._w_o_diag = add_variable_wrapped("w_o_diag", [self._num_units],
+                                            self._initializer, 11,
+                                            maybe_partitioner)
+
+    # index 16 for proj kernel.
+    if self._num_proj is not None:
+      maybe_proj_partitioner = (
+          partitioned_variables.fixed_size_partitioner(self._num_proj_shards)
+          if self._num_proj_shards is not None else None)
+      self._proj_kernel = add_variable_wrapped(
+          "projection/kernel", [self._num_proj, self._num_units],
+          self._initializer,
+          16,
+          partitioner=maybe_proj_partitioner)
+
+    self.built = True
+
+  def call(self, inputs, state):
+    """Run one step of LSTM.
+
+    Args:
+      inputs: input Tensor, 2D, `[batch, num_units]`.
+      state: if `state_is_tuple` is False, this must be a state Tensor, `2-D,
+        [batch, state_size]`.  If `state_is_tuple` is True, this must be a tuple
+        of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`.
+
+    Returns:
+      A tuple containing:
+
+      - A `2-D, [batch, output_dim]`, Tensor representing the output of the
+        LSTM after reading `inputs` when previous state was `state`.
+        Here output_dim is:
+           num_proj if num_proj was set,
+           num_units otherwise.
+      - Tensor(s) representing the new state of LSTM after reading `inputs` when
+        the previous state was `state`.  Same type and shape(s) as `state`.
+
+    Raises:
+      ValueError: If input size cannot be inferred from inputs via
+        static shape inference.
+    """
+    inputs = self._tflite_wrapper.add_input(
+        inputs, tag="input", name="input", aggregate="stack", index_override=0)
+
+    # Make sure inputs and bias_initializer has the same type.
+    assert inputs.dtype == self.input_to_input_w.dtype
+
+    num_proj = self._num_units if self._num_proj is None else self._num_proj
+    sigmoid = math_ops.sigmoid
+
+    if self._state_is_tuple:
+      (c_prev, m_prev) = state
+    else:
+      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
+      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])
+
+    # Note: For TfLite, cell_state is at index 19 while activation state at
+    # index 18.
+    c_prev = self._tflite_wrapper.add_input(
+        c_prev,
+        tag="c_prev",
+        name="c_prev",
+        aggregate="first",
+        index_override=19)
+    m_prev = self._tflite_wrapper.add_input(
+        m_prev,
+        tag="m_prev",
+        name="m_prev",
+        aggregate="first",
+        index_override=18)
+
+    input_size = inputs.get_shape().with_rank(2)[1]
+    if input_size.value is None:
+      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
+
+    inputs_and_m_prev = array_ops.concat([inputs, m_prev], axis=1)
+
+    # i stands for input gate.
+    # f stands for forget gate activation.
+    # o outputs.
+    # j output of LSTM unit.
+    # c is the final state.
+    # m is the output.
+    i = nn_ops.bias_add(
+        math_ops.matmul(
+            inputs_and_m_prev,
+            array_ops.concat([self.input_to_input_w, self.cell_to_input_w],
+                             axis=1),
+            transpose_b=True), self.input_bias)
+    f = nn_ops.bias_add(
+        math_ops.matmul(
+            inputs_and_m_prev,
+            array_ops.concat([self.input_to_forget_w, self.cell_to_forget_w],
+                             axis=1),
+            transpose_b=True), self.forget_bias)
+    o = nn_ops.bias_add(
+        math_ops.matmul(
+            inputs_and_m_prev,
+            array_ops.concat([self.input_to_output_w, self.cell_to_output_w],
+                             axis=1),
+            transpose_b=True), self.output_bias)
+    j = nn_ops.bias_add(
+        math_ops.matmul(
+            inputs_and_m_prev,
+            array_ops.concat([self.input_to_cell_w, self.cell_to_cell_w],
+                             axis=1),
+            transpose_b=True), self.cell_bias)
+
+    # Diagonal connections
+    if self._use_peepholes:
+      c = (
+          sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
+          sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
+    else:
+      c = (
+          sigmoid(f + self._forget_bias) * c_prev +
+          sigmoid(i) * self._activation(j))
+
+    if self._cell_clip is not None:
+      # pylint: disable=invalid-unary-operand-type
+      c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
+      # pylint: enable=invalid-unary-operand-type
+    if self._use_peepholes:
+      m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
+    else:
+      m = sigmoid(o) * self._activation(c)
+
+    if self._num_proj is not None:
+      transposed_proj_kernel = array_ops.transpose(self._proj_kernel)
+      m = math_ops.matmul(m, transposed_proj_kernel)
+
+      if self._proj_clip is not None:
+        # pylint: disable=invalid-unary-operand-type
+        m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
+        # pylint: enable=invalid-unary-operand-type
+
+    c = self._tflite_wrapper.add_output(
+        c, tag="c", name="c", aggregate="last", index_override=1)
+    m = self._tflite_wrapper.add_output(
+        m, tag="m", name="m", index_override=2, aggregate="stack")
+
+    new_state = (
+        rnn_cell_impl.LSTMStateTuple(c, m)
+        if self._state_is_tuple else array_ops.concat([c, m], 1))
+    return m, new_state
+
+  def get_config(self):
+    config = {
+        "num_units": self._num_units,
+        "use_peepholes": self._use_peepholes,
+        "cell_clip": self._cell_clip,
+        "initializer": initializers.serialize(self._initializer),
+        "num_proj": self._num_proj,
+        "proj_clip": self._proj_clip,
+        "num_unit_shards": self._num_unit_shards,
+        "num_proj_shards": self._num_proj_shards,
+        "forget_bias": self._forget_bias,
+        "state_is_tuple": self._state_is_tuple,
+        "activation": activations.serialize(self._activation),
+        "reuse": self._reuse,
+    }
+    base_config = super(TFLiteLSTMCell, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))

diff --git a/tensorflow/lite/experimental/examples/lstm/tflite_lstm.py b/tensorflow/lite/experimental/examples/lstm/tflite_lstm.py
deleted file mode 100644
index e6d329f..0000000
--- a/tensorflow/lite/experimental/examples/lstm/tflite_lstm.py
+++ /dev/null

@@ -1,643 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""TfLite LSTMCell wrapper.
-
-TODO(renjieliu): Find a better home for this one.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import tensorflow as tf
-
-from tensorflow.lite.python import lite
-from tensorflow.python.eager import context
-from tensorflow.python.framework import ops
-from tensorflow.python.keras import activations
-from tensorflow.python.keras import initializers
-from tensorflow.python.layers import base as base_layer
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import clip_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import control_flow_util
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import rnn_cell_impl
-from tensorflow.python.ops import variable_scope as vs
-from tensorflow.python.ops.rnn import _best_effort_input_batch_size
-from tensorflow.python.ops.rnn import _dynamic_rnn_loop
-from tensorflow.python.ops.rnn import _should_cache
-from tensorflow.python.ops.rnn import _transpose_batch_time
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util import nest
-
-
-class TFLiteLSTMCell(rnn_cell_impl.LayerRNNCell):
-  """Long short-term memory unit (LSTM) recurrent network cell.
-
-  This is used only for TfLite, it provides hints and it also makes the
-  variables in the desired for the tflite ops  (transposed and seaparated).
-
-  The default non-peephole implementation is based on:
-
-    https://pdfs.semanticscholar.org/1154/0131eae85b2e11d53df7f1360eeb6476e7f4.pdf
-
-  Felix Gers, Jurgen Schmidhuber, and Fred Cummins.
-  "Learning to forget: Continual prediction with LSTM." IET, 850-855, 1999.
-
-  The peephole implementation is based on:
-
-    https://research.google.com/pubs/archive/43905.pdf
-
-  Hasim Sak, Andrew Senior, and Francoise Beaufays.
-  "Long short-term memory recurrent neural network architectures for
-   large scale acoustic modeling." INTERSPEECH, 2014.
-
-  The class uses optional peep-hole connections, optional cell clipping, and
-  an optional projection layer.
-
-  Note that this cell is not optimized for performance. Please use
-  `tf.contrib.cudnn_rnn.CudnnLSTM` for better performance on GPU, or
-  `tf.contrib.rnn.LSTMBlockCell` and `tf.contrib.rnn.LSTMBlockFusedCell` for
-  better performance on CPU.
-  """
-
-  def __init__(self,
-               num_units,
-               use_peepholes=False,
-               cell_clip=None,
-               initializer=None,
-               num_proj=None,
-               proj_clip=None,
-               num_unit_shards=None,
-               num_proj_shards=None,
-               forget_bias=1.0,
-               state_is_tuple=True,
-               activation=None,
-               reuse=None,
-               name=None,
-               dtype=None):
-    """Initialize the parameters for an LSTM cell.
-
-    Args:
-      num_units: int, The number of units in the LSTM cell.
-      use_peepholes: bool, set True to enable diagonal/peephole connections.
-      cell_clip: (optional) A float value, if provided the cell state is clipped
-        by this value prior to the cell output activation.
-      initializer: (optional) The initializer to use for the weight and
-        projection matrices.
-      num_proj: (optional) int, The output dimensionality for the projection
-        matrices.  If None, no projection is performed.
-      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
-        provided, then the projected values are clipped elementwise to within
-        `[-proj_clip, proj_clip]`.
-      num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a
-        variable_scope partitioner instead.
-      num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a
-        variable_scope partitioner instead.
-      forget_bias: Biases of the forget gate are initialized by default to 1 in
-        order to reduce the scale of forgetting at the beginning of the
-        training. Must set it manually to `0.0` when restoring from CudnnLSTM
-        trained checkpoints.
-      state_is_tuple: If True, accepted and returned states are 2-tuples of the
-        `c_state` and `m_state`.  If False, they are concatenated along the
-        column axis.  This latter behavior will soon be deprecated.
-      activation: Activation function of the inner states.  Default: `tanh`.
-      reuse: (optional) Python boolean describing whether to reuse variables in
-        an existing scope.  If not `True`, and the existing scope already has
-        the given variables, an error is raised.
-      name: String, the name of the layer. Layers with the same name will share
-        weights, but to avoid mistakes we require reuse=True in such cases.
-      dtype: Default dtype of the layer (default of `None` means use the type of
-        the first input). Required when `build` is called before `call`.  When
-        restoring from CudnnLSTM-trained checkpoints, use
-        `CudnnCompatibleLSTMCell` instead.
-    """
-    super(TFLiteLSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype)
-    # TODO(raziel): decide if we want to just support tuples (yes please!).
-    if not state_is_tuple:
-      logging.warn(
-          "%s: Using a concatenated state is slower and will soon be "
-          "deprecated.  Use state_is_tuple=True.", self)
-    if num_unit_shards is not None or num_proj_shards is not None:
-      logging.warn(
-          "%s: The num_unit_shards and proj_unit_shards parameters are "
-          "deprecated and will be removed in Jan 2017.  "
-          "Use a variable scope with a partitioner instead.", self)
-
-    # Inputs must be 2-dimensional.
-    # TODO(raziel): layers stuff -- chop if un-layerizing Op.
-    self.input_spec = base_layer.InputSpec(ndim=2)
-
-    self._tflite_wrapper = lite.OpHint("UnidirectionalSequenceLstm")
-
-    self._num_units = num_units
-    self._use_peepholes = use_peepholes
-    self._cell_clip = cell_clip
-    self._initializer = initializer
-    self._num_proj = num_proj
-    self._proj_clip = proj_clip
-    self._num_unit_shards = num_unit_shards
-    self._num_proj_shards = num_proj_shards
-    self._forget_bias = forget_bias
-    self._state_is_tuple = state_is_tuple
-    self._activation = activation or math_ops.tanh
-
-    self._output_size = num_proj if num_proj else num_units
-    self._state_size = (
-        tf.nn.rnn_cell.LSTMStateTuple(num_units, self._output_size)
-        if state_is_tuple else num_units + self._output_size)
-
-  @property
-  def state_size(self):
-    return self._state_size
-
-  @property
-  def output_size(self):
-    return self._output_size
-
-  def build(self, inputs_shape):
-    """Build TfLite LSTM cell graph.
-
-    Args:
-      inputs_shape: The inputs_shape must be known, and is [batch_size,
-        input_size] shape.
-
-    Raises:
-      ValueError: if the inputs_shape is invalid.
-    """
-    if len(inputs_shape) != 2 or inputs_shape[1].value is None:
-      raise ValueError("Invalid inputs_shape, saw shape: %s" % inputs_shape)
-
-    input_depth = inputs_shape[1].value
-    maybe_partitioner = (
-        partitioned_variables.fixed_size_partitioner(self._num_unit_shards)
-        if self._num_unit_shards is not None else None)
-    input_weight_shape = [self._num_units, input_depth]
-    cell_weight_shape = [self._num_units, self._output_size]
-    bias_shape = [self._num_units]
-
-    def add_variable_wrapped(name, shape, initializer, index, partitioner):
-      var = self.add_variable(
-          name, shape=shape, initializer=initializer, partitioner=partitioner)
-      return self._tflite_wrapper.add_input(
-          var, name=name, index_override=index)
-
-    weight_initializer = self._initializer
-    if self.dtype is None:
-      bias_initializer = init_ops.zeros_initializer
-    else:
-      bias_initializer = init_ops.zeros_initializer(dtype=self.dtype)
-
-    self.input_to_input_w = add_variable_wrapped(
-        "input_to_input_w", input_weight_shape, weight_initializer, 1,
-        maybe_partitioner)
-    self.input_to_forget_w = add_variable_wrapped(
-        "input_to_forget_w", input_weight_shape, weight_initializer, 2,
-        maybe_partitioner)
-    self.input_to_cell_w = add_variable_wrapped(
-        "input_to_cell_w", input_weight_shape, weight_initializer, 3,
-        maybe_partitioner)
-    self.input_to_output_w = add_variable_wrapped(
-        "input_to_output_w", input_weight_shape, weight_initializer, 4,
-        maybe_partitioner)
-    self.cell_to_input_w = add_variable_wrapped(
-        "cell_to_input_w", cell_weight_shape, weight_initializer, 5,
-        maybe_partitioner)
-    self.cell_to_forget_w = add_variable_wrapped(
-        "cell_to_forget_w", cell_weight_shape, weight_initializer, 6,
-        maybe_partitioner)
-    self.cell_to_cell_w = add_variable_wrapped(
-        "cell_to_cell_w", cell_weight_shape, weight_initializer, 7,
-        maybe_partitioner)
-    self.cell_to_output_w = add_variable_wrapped(
-        "cell_to_output_w", cell_weight_shape, weight_initializer, 8,
-        maybe_partitioner)
-
-    self.input_bias = add_variable_wrapped(
-        "input_bias", bias_shape, bias_initializer, 12, maybe_partitioner)
-    self.forget_bias = add_variable_wrapped(
-        "forget_bias", bias_shape, bias_initializer, 13, maybe_partitioner)
-    self.cell_bias = add_variable_wrapped(
-        "cell_bias", bias_shape, bias_initializer, 14, maybe_partitioner)
-    self.output_bias = add_variable_wrapped(
-        "output_bias", bias_shape, bias_initializer, 15, maybe_partitioner)
-
-    # index 9, 10, 11.
-    # f stands for forget, i stands for input and o stands for output.
-    if self._use_peepholes:
-      self._w_f_diag = add_variable_wrapped("w_f_diag", [self._num_units],
-                                            self._initializer, 10,
-                                            maybe_partitioner)
-      self._w_i_diag = add_variable_wrapped("w_i_diag", [self._num_units],
-                                            self._initializer, 9,
-                                            maybe_partitioner)
-      self._w_o_diag = add_variable_wrapped("w_o_diag", [self._num_units],
-                                            self._initializer, 11,
-                                            maybe_partitioner)
-
-    # index 16 for proj kernel.
-    if self._num_proj is not None:
-      maybe_proj_partitioner = (
-          partitioned_variables.fixed_size_partitioner(self._num_proj_shards)
-          if self._num_proj_shards is not None else None)
-      self._proj_kernel = add_variable_wrapped(
-          "projection/kernel", [self._num_proj, self._num_units],
-          self._initializer,
-          16,
-          partitioner=maybe_proj_partitioner)
-
-    self.built = True
-
-  def call(self, inputs, state):
-    """Run one step of LSTM.
-
-    Args:
-      inputs: input Tensor, 2D, `[batch, num_units]`.
-      state: if `state_is_tuple` is False, this must be a state Tensor, `2-D,
-        [batch, state_size]`.  If `state_is_tuple` is True, this must be a tuple
-        of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`.
-
-    Returns:
-      A tuple containing:
-
-      - A `2-D, [batch, output_dim]`, Tensor representing the output of the
-        LSTM after reading `inputs` when previous state was `state`.
-        Here output_dim is:
-           num_proj if num_proj was set,
-           num_units otherwise.
-      - Tensor(s) representing the new state of LSTM after reading `inputs` when
-        the previous state was `state`.  Same type and shape(s) as `state`.
-
-    Raises:
-      ValueError: If input size cannot be inferred from inputs via
-        static shape inference.
-    """
-    inputs = self._tflite_wrapper.add_input(
-        inputs, tag="input", name="input", aggregate="stack", index_override=0)
-
-    # Make sure inputs and bias_initializer has the same type.
-    assert inputs.dtype == self.input_to_input_w.dtype
-
-    num_proj = self._num_units if self._num_proj is None else self._num_proj
-    sigmoid = math_ops.sigmoid
-
-    if self._state_is_tuple:
-      (c_prev, m_prev) = state
-    else:
-      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
-      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])
-
-    # Note: For TfLite, cell_state is at index 19 while activation state at
-    # index 18.
-    c_prev = self._tflite_wrapper.add_input(
-        c_prev,
-        tag="c_prev",
-        name="c_prev",
-        aggregate="first",
-        index_override=19)
-    m_prev = self._tflite_wrapper.add_input(
-        m_prev,
-        tag="m_prev",
-        name="m_prev",
-        aggregate="first",
-        index_override=18)
-
-    input_size = inputs.get_shape().with_rank(2)[1]
-    if input_size.value is None:
-      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
-
-    inputs_and_m_prev = array_ops.concat([inputs, m_prev], axis=1)
-
-    # i stands for input gate.
-    # f stands for forget gate activation.
-    # o outputs.
-    # j output of LSTM unit.
-    # c is the final state.
-    # m is the output.
-    i = nn_ops.bias_add(
-        tf.matmul(
-            inputs_and_m_prev,
-            tf.concat([self.input_to_input_w, self.cell_to_input_w], axis=1),
-            transpose_b=True), self.input_bias)
-    f = nn_ops.bias_add(
-        tf.matmul(
-            inputs_and_m_prev,
-            tf.concat([self.input_to_forget_w, self.cell_to_forget_w], axis=1),
-            transpose_b=True), self.forget_bias)
-    o = nn_ops.bias_add(
-        tf.matmul(
-            inputs_and_m_prev,
-            tf.concat([self.input_to_output_w, self.cell_to_output_w], axis=1),
-            transpose_b=True), self.output_bias)
-    j = nn_ops.bias_add(
-        tf.matmul(
-            inputs_and_m_prev,
-            tf.concat([self.input_to_cell_w, self.cell_to_cell_w], axis=1),
-            transpose_b=True), self.cell_bias)
-
-    # Diagonal connections
-    if self._use_peepholes:
-      c = (
-          sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
-          sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
-    else:
-      c = (
-          sigmoid(f + self._forget_bias) * c_prev +
-          sigmoid(i) * self._activation(j))
-
-    if self._cell_clip is not None:
-      # pylint: disable=invalid-unary-operand-type
-      c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
-      # pylint: enable=invalid-unary-operand-type
-    if self._use_peepholes:
-      m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
-    else:
-      m = sigmoid(o) * self._activation(c)
-
-    if self._num_proj is not None:
-      transposed_proj_kernel = tf.transpose(self._proj_kernel)
-      m = math_ops.matmul(m, transposed_proj_kernel)
-
-      if self._proj_clip is not None:
-        # pylint: disable=invalid-unary-operand-type
-        m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
-        # pylint: enable=invalid-unary-operand-type
-
-    c = self._tflite_wrapper.add_output(
-        c, tag="c", name="c", aggregate="last", index_override=1)
-    m = self._tflite_wrapper.add_output(
-        m, tag="m", name="m", index_override=2, aggregate="stack")
-
-    new_state = (
-        tf.nn.rnn_cell.LSTMStateTuple(c, m)
-        if self._state_is_tuple else array_ops.concat([c, m], 1))
-    return m, new_state
-
-  def get_config(self):
-    config = {
-        "num_units": self._num_units,
-        "use_peepholes": self._use_peepholes,
-        "cell_clip": self._cell_clip,
-        "initializer": initializers.serialize(self._initializer),
-        "num_proj": self._num_proj,
-        "proj_clip": self._proj_clip,
-        "num_unit_shards": self._num_unit_shards,
-        "num_proj_shards": self._num_proj_shards,
-        "forget_bias": self._forget_bias,
-        "state_is_tuple": self._state_is_tuple,
-        "activation": activations.serialize(self._activation),
-        "reuse": self._reuse,
-    }
-    base_config = super(TFLiteLSTMCell, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-def dynamic_rnn(cell,
-                inputs,
-                sequence_length=None,
-                initial_state=None,
-                dtype=None,
-                parallel_iterations=None,
-                swap_memory=False,
-                time_major=True,
-                scope=None):
-  """Creates a recurrent neural network specified by RNNCell `cell`.
-
-  Performs fully dynamic unrolling of `inputs`.
-
-  Example:
-
-  ```python
-  # create a BasicRNNCell
-  rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
-
-  # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]
-
-  # defining initial state
-  initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
-
-  # 'state' is a tensor of shape [batch_size, cell_state_size]
-  outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data,
-                                     initial_state=initial_state,
-                                     dtype=tf.float32)
-  ```
-
-  ```python
-  # create 2 LSTMCells
-  rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]
-
-  # create a RNN cell composed sequentially of a number of RNNCells
-  multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
-
-  # 'outputs' is a tensor of shape [batch_size, max_time, 256]
-  # 'state' is a N-tuple where N is the number of LSTMCells containing a
-  # tf.contrib.rnn.LSTMStateTuple for each cell
-  outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
-                                     inputs=data,
-                                     dtype=tf.float32)
-  ```
-
-
-  Args:
-    cell: An instance of RNNCell.
-    inputs: The RNN inputs.
-      If `time_major == False` (default), this must be a `Tensor` of shape:
-        `[batch_size, max_time, ...]`, or a nested tuple of such elements.
-      If `time_major == True`, this must be a `Tensor` of shape: `[max_time,
-        batch_size, ...]`, or a nested tuple of such elements. This may also be
-        a (possibly nested) tuple of Tensors satisfying this property.  The
-        first two dimensions must match across all the inputs, but otherwise the
-        ranks and other shape components may differ. In this case, input to
-        `cell` at each time-step will replicate the structure of these tuples,
-        except for the time dimension (from which the time is taken). The input
-        to `cell` at each time step will be a `Tensor` or (possibly nested)
-        tuple of Tensors each with dimensions `[batch_size, ...]`.
-    sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. Used
-      to copy-through state and zero-out outputs when past a batch element's
-      sequence length.  So it's more for performance than correctness.
-    initial_state: (optional) An initial state for the RNN. If `cell.state_size`
-      is an integer, this must be a `Tensor` of appropriate type and shape
-      `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this
-      should be a tuple of tensors having shapes `[batch_size, s] for s in
-      cell.state_size`.
-    dtype: (optional) The data type for the initial state and expected output.
-      Required if initial_state is not provided or RNN state has a heterogeneous
-      dtype.
-    parallel_iterations: (Default: 32).  The number of iterations to run in
-      parallel.  Those operations which do not have any temporal dependency and
-      can be run in parallel, will be.  This parameter trades off time for
-      space.  Values >> 1 use more memory but take less time, while smaller
-      values use less memory but computations take longer.
-    swap_memory: Transparently swap the tensors produced in forward inference
-      but needed for back prop from GPU to CPU.  This allows training RNNs which
-      would typically not fit on a single GPU, with very minimal (or no)
-      performance penalty.
-    time_major: The shape format of the `inputs` and `outputs` Tensors. If true,
-      these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false,
-      these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using
-      `time_major = True` is a bit more efficient because it avoids transposes
-      at the beginning and end of the RNN calculation.  However, most TensorFlow
-      data is batch-major, so by default this function accepts input and emits
-      output in batch-major form.
-    scope: VariableScope for the created subgraph; defaults to "rnn".
-
-  Returns:
-    A pair (outputs, state) where:
-
-    outputs: The RNN output `Tensor`.
-
-      If time_major == False (default), this will be a `Tensor` shaped:
-        `[batch_size, max_time, cell.output_size]`.
-
-      If time_major == True, this will be a `Tensor` shaped:
-        `[max_time, batch_size, cell.output_size]`.
-
-      Note, if `cell.output_size` is a (possibly nested) tuple of integers
-      or `TensorShape` objects, then `outputs` will be a tuple having the
-      same structure as `cell.output_size`, containing Tensors having shapes
-      corresponding to the shape data in `cell.output_size`.
-
-    state: The final state.  If `cell.state_size` is an int, this
-      will be shaped `[batch_size, cell.state_size]`.  If it is a
-      `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
-      If it is a (possibly nested) tuple of ints or `TensorShape`, this will
-      be a tuple having the corresponding shapes. If cells are `LSTMCells`
-      `state` will be a tuple containing a `LSTMStateTuple` for each cell.
-
-  Raises:
-    TypeError: If `cell` is not an instance of RNNCell.
-    ValueError: If inputs is None or an empty list.
-    RuntimeError: If not using control flow v2.
-  """
-
-  # Currently only support time_major == True case.
-  assert time_major
-
-  # TODO(b/123051275): We need to check if the cells are TfLiteLSTMCells or
-  # TfLiteRNNCells.
-  rnn_cell_impl.assert_like_rnncell("cell", cell)
-
-  if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
-    raise RuntimeError("OpHint dynamic rnn only supports control flow v2.")
-
-  parent_first_child_input = [{
-      "parent_ophint_input_index": 0,
-      "first_child_ophint_input_index": 0
-  }]
-  parent_last_child_output = [{
-      "parent_output_index": 0,
-      # For LstmCell, the index is 2.
-      # For RnnCell, the index is 1.
-      # So we use -1 meaning it's the last one.
-      "child_output_index": -1
-  }]
-  internal_children_input_output = [{
-      "child_input_index": 0,
-      # For LstmCell, the index is 2.
-      # For RnnCell, the index is 1.
-      # So we use -1 meaning it's the last one.
-      "child_output_index": -1
-  }]
-  inputs_outputs_mappings = {
-      "parent_first_child_input": parent_first_child_input,
-      "parent_last_child_output": parent_last_child_output,
-      "internal_children_input_output": internal_children_input_output
-  }
-  tflite_wrapper = lite.OpHint(
-      "TfLiteDynamicRnn",
-      level=2,
-      children_inputs_mappings=inputs_outputs_mappings)
-  with vs.variable_scope(scope or "rnn") as varscope:
-    # Create a new scope in which the caching device is either
-    # determined by the parent scope, or is set to place the cached
-    # Variable using the same placement as for the rest of the RNN.
-    if _should_cache():
-      if varscope.caching_device is None:
-        varscope.set_caching_device(lambda op: op.device)
-
-    inputs = tflite_wrapper.add_input(inputs, name="input", index_override=0)
-
-    # By default, time_major==False and inputs are batch-major: shaped
-    #   [batch, time, depth]
-    # For internal calculations, we transpose to [time, batch, depth]
-    flat_input = nest.flatten(inputs)
-
-    if not time_major:
-      # (batch, time, depth) => (time, batch, depth)
-      flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
-      flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input)
-
-    parallel_iterations = parallel_iterations or 32
-    if sequence_length is not None:
-      sequence_length = math_ops.to_int32(sequence_length)
-      if sequence_length.get_shape().rank not in (None, 1):
-        raise ValueError(
-            "sequence_length must be a vector of length batch_size, "
-            "but saw shape: %s" % sequence_length.get_shape())
-      sequence_length = array_ops.identity(  # Just to find it in the graph.
-          sequence_length,
-          name="sequence_length")
-
-    batch_size = _best_effort_input_batch_size(flat_input)
-
-    if initial_state is not None:
-      state = initial_state
-    else:
-      if not dtype:
-        raise ValueError("If there is no initial_state, you must give a dtype.")
-      if getattr(cell, "get_initial_state", None) is not None:
-        state = cell.get_initial_state(
-            inputs=None, batch_size=batch_size, dtype=dtype)
-      else:
-        state = cell.zero_state(batch_size, dtype)
-
-    def _assert_has_shape(x, shape):
-      x_shape = array_ops.shape(x)
-      packed_shape = array_ops.stack(shape)
-      return control_flow_ops.Assert(
-          math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), [
-              "Expected shape for Tensor %s is " % x.name, packed_shape,
-              " but saw shape: ", x_shape
-          ])
-
-    if not context.executing_eagerly() and sequence_length is not None:
-      # Perform some shape validation
-      with ops.control_dependencies(
-          [_assert_has_shape(sequence_length, [batch_size])]):
-        sequence_length = array_ops.identity(
-            sequence_length, name="CheckSeqLen")
-
-    inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input)
-
-    outputs, final_state = _dynamic_rnn_loop(
-        cell,
-        inputs,
-        state,
-        parallel_iterations=parallel_iterations,
-        swap_memory=swap_memory,
-        sequence_length=sequence_length,
-        dtype=dtype)
-
-    # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].
-    # If we are performing batch-major calculations, transpose output back
-    # to shape [batch, time, depth]
-    if not time_major:
-      # (time, batch, depth) => (batch, time, depth)
-      outputs = nest.map_structure(_transpose_batch_time, outputs)
-    outputs = tflite_wrapper.add_output(outputs, name="outputs")
-
-    return outputs, final_state

diff --git a/tensorflow/lite/experimental/examples/lstm/tflite_rnn.py b/tensorflow/lite/experimental/examples/lstm/tflite_rnn.py
deleted file mode 100644
index e4aad18..0000000
--- a/tensorflow/lite/experimental/examples/lstm/tflite_rnn.py
+++ /dev/null

@@ -1,150 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""TfLite BasicRnnCell wrapper.
-
-TODO(renjieliu): Find a better home for this one.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import itertools
-
-from tensorflow.lite.python import lite
-from tensorflow.python.keras import activations
-from tensorflow.python.layers import base as base_layer
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import rnn_cell_impl
-
-
-class TfLiteRNNCell(rnn_cell_impl.LayerRNNCell):
-  """The most basic RNN cell.
-
-  This is used only for TfLite, it provides hints and it also makes the
-  variables in the desired for the tflite ops.
-  """
-
-  def __init__(self,
-               num_units,
-               activation=None,
-               reuse=None,
-               name=None,
-               dtype=None,
-               **kwargs):
-    """Initializes the parameters for an RNN cell.
-
-    Args:
-      num_units: int, The number of units in the RNN cell.
-      activation: Nonlinearity to use.  Default: `tanh`. It could also be string
-        that is within Keras activation function names.
-      reuse: (optional) Python boolean describing whether to reuse variables in
-        an existing scope. Raises an error if not `True` and the existing scope
-        already has the given variables.
-      name: String, the name of the layer. Layers with the same name will share
-        weights, but to avoid mistakes we require reuse=True in such cases.
-      dtype: Default dtype of the layer (default of `None` means use the type of
-        the first input). Required when `build` is called before `call`.
-      **kwargs: Dict, keyword named properties for common layer attributes, like
-        `trainable` etc when constructing the cell from configs of get_config().
-
-    Raises:
-      ValueError: If the existing scope already has the given variables.
-    """
-    super(TfLiteRNNCell, self).__init__(
-        _reuse=reuse, name=name, dtype=dtype, **kwargs)
-
-    # Inputs must be Rank-2.
-    self.input_spec = base_layer.InputSpec(ndim=2)
-
-    self._tflite_wrapper = lite.OpHint("UnidirectionalSequenceRnn")
-    self._num_units = num_units
-    if activation:
-      self._activation = activations.get(activation)
-    else:
-      self._activation = math_ops.tanh
-
-  @property
-  def state_size(self):
-    return self._num_units
-
-  @property
-  def output_size(self):
-    return self._num_units
-
-  def build(self, inputs_shape):
-    """Builds the RNN cell.
-
-    Args:
-      inputs_shape: Rnn input tensor shape.
-
-    Raises:
-      ValueError: If last dimension of the input shape is not known.
-    """
-    if inputs_shape[-1] is None:
-      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" %
-                       (inputs_shape,))
-
-    input_depth = inputs_shape[-1]
-
-    def add_variable_wrapped(name, shape, initializer, index):
-      var = self.add_variable(name, shape=shape, initializer=initializer)
-      return self._tflite_wrapper.add_input(
-          var, name=name, index_override=index)
-
-    self._input_weights = add_variable_wrapped(
-        "input_weights", [self._num_units, input_depth], None, 1)
-    self._recurrent_weights = add_variable_wrapped(
-        "recurrent_weights", [self._num_units, self._num_units], None, 2)
-    self._bias = add_variable_wrapped(
-        "bias",
-        shape=[self._num_units],
-        initializer=init_ops.zeros_initializer(dtype=self.dtype),
-        index=3)
-
-    self.built = True
-
-  def call(self, inputs, state):
-    """Most basic RNN: output = new_state = act(W * input + U * state + B)."""
-    inputs = self._tflite_wrapper.add_input(
-        inputs, tag="input", name="input", aggregate="stack", index_override=0)
-    state = self._tflite_wrapper.add_input(
-        state,
-        tag="hidden_state",
-        name="hidden_state",
-        aggregate="first",
-        index_override=4)
-    weights = array_ops.transpose(
-        array_ops.concat([self._input_weights, self._recurrent_weights], 1))
-    gate_inputs = math_ops.matmul(array_ops.concat([inputs, state], 1), weights)
-    gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
-    output = self._activation(gate_inputs)
-    output = self._tflite_wrapper.add_output(
-        output,
-        tag="output",
-        name="output",
-        index_override=1,
-        aggregate="stack")
-    return output, output
-
-  def get_config(self):
-    config = {
-        "num_units": self._num_units,
-        "activation": activations.serialize(self._activation),
-        "reuse": self._reuse,
-    }
-    base_config = super(TfLiteRNNCell, self).get_config()
-    return dict(itertools.chain(base_config.items(), config.items()))

diff --git a/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py b/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
index ed02d6c..b2e91c4 100644
--- a/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
+++ b/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py

@@ -20,8 +20,8 @@
 import tensorflow as tf
 
 from tensorflow.examples.tutorials.mnist import input_data
-from tensorflow.lite.experimental.examples.lstm.tflite_lstm import dynamic_rnn
-from tensorflow.lite.experimental.examples.lstm.tflite_lstm import TFLiteLSTMCell
+from tensorflow.lite.experimental.examples.lstm.rnn import dynamic_rnn
+from tensorflow.lite.experimental.examples.lstm.rnn_cell import TFLiteLSTMCell
 from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test

diff --git a/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_rnn_test.py b/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_rnn_test.py
index af7ac4c..78a2e45 100644
--- a/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_rnn_test.py
+++ b/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_rnn_test.py

@@ -22,7 +22,8 @@
 from tensorflow import flags
 
 from tensorflow.examples.tutorials.mnist import input_data
-from tensorflow.lite.experimental.examples.lstm.tflite_rnn import TfLiteRNNCell
+from tensorflow.lite.experimental.examples.lstm.rnn import dynamic_rnn
+from tensorflow.lite.experimental.examples.lstm.rnn_cell import TfLiteRNNCell
 from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
@@ -66,7 +67,7 @@
         TfLiteRNNCell(self.num_units, name="rnn2")
     ])
 
-  def buildModel(self, rnn_layer):
+  def buildModel(self, rnn_layer, is_dynamic_rnn):
     # Weights and biases for output softmax layer.
     out_weights = tf.Variable(
         tf.random_normal([self.num_units, self.n_classes]))
@@ -77,8 +78,13 @@
         "float", [None, self.time_steps, self.n_input], name="INPUT_IMAGE")
 
     # x is shaped [batch_size,time_steps,num_inputs]
-    rnn_input = tf.unstack(x, self.time_steps, 1)
-    outputs, _ = tf.nn.static_rnn(rnn_layer, rnn_input, dtype="float32")
+    if is_dynamic_rnn:
+      rnn_input = tf.transpose(x, perm=[1, 0, 2])
+      outputs, _ = dynamic_rnn(rnn_layer, rnn_input, dtype="float32")
+      outputs = tf.unstack(outputs, axis=0)
+    else:
+      rnn_input = tf.unstack(x, self.time_steps, 1)
+      outputs, _ = tf.nn.static_rnn(rnn_layer, rnn_input, dtype="float32")
 
     # Compute logits by multiplying outputs[-1] of shape [batch_size,num_units]
     # by the softmax layer's out_weight of shape [num_units,n_classes]
@@ -108,13 +114,14 @@
                                  self.n_input))
       sess.run(opt, feed_dict={x: batch_x, y: batch_y})
 
-  def saveAndRestoreModel(self, rnn_layer, sess, saver):
+  def saveAndRestoreModel(self, rnn_layer, sess, saver, is_dynamic_rnn):
     """Saves and restores the model to mimic the most common use case.
 
     Args:
       rnn_layer: The rnn layer either a single rnn cell or a multi rnn cell.
       sess: Old session.
       saver: saver created by tf.train.Saver()
+      is_dynamic_rnn: use dynamic_rnn or not.
 
     Returns:
       A tuple containing:
@@ -130,7 +137,7 @@
 
     # Reset the graph.
     tf.reset_default_graph()
-    x, prediction, output_class = self.buildModel(rnn_layer)
+    x, prediction, output_class = self.buildModel(rnn_layer, is_dynamic_rnn)
 
     new_sess = tf.Session(config=CONFIG)
     saver = tf.train.Saver()
@@ -177,12 +184,32 @@
   def testStaticRnnMultiRnnCell(self):
     sess = tf.Session(config=CONFIG)
 
-    x, prediction, output_class = self.buildModel(self.buildRnnLayer())
+    x, prediction, output_class = self.buildModel(
+        self.buildRnnLayer(), is_dynamic_rnn=False)
     self.trainModel(x, prediction, output_class, sess)
 
     saver = tf.train.Saver()
     x, prediction, output_class, new_sess = self.saveAndRestoreModel(
-        self.buildRnnLayer(), sess, saver)
+        self.buildRnnLayer(), sess, saver, is_dynamic_rnn=False)
+
+    test_inputs, expected_output, frozen_graph = self.getInferenceResult(
+        x, output_class, new_sess)
+
+    result = self.tfliteInvoke(frozen_graph, test_inputs, output_class)
+    self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-2))
+
+  @test_util.enable_control_flow_v2
+  def testDynamicRnnMultiRnnCell(self):
+    sess = tf.Session(config=CONFIG)
+
+    x, prediction, output_class = self.buildModel(
+        self.buildRnnLayer(), is_dynamic_rnn=True)
+    self.trainModel(x, prediction, output_class, sess)
+
+    saver = tf.train.Saver()
+
+    x, prediction, output_class, new_sess = self.saveAndRestoreModel(
+        self.buildRnnLayer(), sess, saver, is_dynamic_rnn=True)
 
     test_inputs, expected_output, frozen_graph = self.getInferenceResult(
         x, output_class, new_sess)

diff --git a/tensorflow/lite/experimental/micro/README.md b/tensorflow/lite/experimental/micro/README.md
index 6892e5d..a3faf4d 100644
--- a/tensorflow/lite/experimental/micro/README.md
+++ b/tensorflow/lite/experimental/micro/README.md

@@ -10,6 +10,7 @@
 ## Table of Contents
 
 -   [Getting Started](#getting-started)
+
     *   [Getting Started with Portable Reference Code](#getting-started-with-portable-reference-code)
     *   [Building Portable Reference Code using Make](#building-portable-reference-code-using-make)
     *   [Building for the "Blue Pill" STM32F103 using Make](#building-for-the-blue-pill-stm32f103-using-make)
@@ -19,8 +20,11 @@
     *   [Building for the Eta Compute ECM3531 EVB using Make](#Building-for-the-Eta-Compute-ECM3531-EVB-using-Make)
 
 -   [Goals](#goals)
+
 -   [Generating Project Files](#generating-project-#files)
+
 -   [How to Port TensorFlow Lite Micro to a New Platform](#how-to-port-tensorflow-lite-micro-to-a-new-platform)
+
     *   [Requirements](#requirements)
     *   [Getting Started](getting-started)
     *   [Troubleshooting](#troubleshooting)
@@ -41,13 +45,13 @@
 If you're a product developer, we have build instructions or pre-generated
 project files that you can download for the following platforms:
 
-| Device | Mbed | Keil | Make/GCC
--------- | ---- | ---- | --------
-[STM32F746G Discovery Board](https://www.st.com/en/evaluation-tools/32f746gdiscovery.html) | [Download](https://drive.google.com/open?id=1OtgVkytQBrEYIpJPsE8F6GUKHPBS3Xeb) | - | [Download](https://drive.google.com/open?id=1u46mTtAMZ7Y1aD-He1u3R8AE4ZyEpnOl) | -
-["Blue Pill" STM32F103-compatible development board](https://github.com/google/stm32_bare_lib) | - | - | [Instructions](#building-for-the-blue-pill-stm32f103-using-make)
-[Ambiq Micro Apollo3Blue EVB using Make](https://ambiqmicro.com/apollo-ultra-low-power-mcus/) | - | - | [Instructions](#building-for-ambiq-micro-apollo3blue-evb-using-make)
-[Generic Keil uVision Projects](http://www2.keil.com/mdk5/uvision/) | - | [Download](https://drive.google.com/open?id=1Lw9rsdquNKObozClLPoE5CTJLuhfh5mV) | -
-[Eta Compute ECM3531 EVB](https://etacompute.com/) | - | - | [Instructions](#Building-for-the-Eta-Compute-ECM3531-EVB-using-Make)
+Device                                                                                         | Mbed                                                                           | Keil                                                                           | Make/GCC
+---------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------ | --------
+[STM32F746G Discovery Board](https://www.st.com/en/evaluation-tools/32f746gdiscovery.html)     | [Download](https://drive.google.com/open?id=1OtgVkytQBrEYIpJPsE8F6GUKHPBS3Xeb) | -                                                                              | [Download](https://drive.google.com/open?id=1u46mTtAMZ7Y1aD-He1u3R8AE4ZyEpnOl)
+["Blue Pill" STM32F103-compatible development board](https://github.com/google/stm32_bare_lib) | -                                                                              | -                                                                              | [Instructions](#building-for-the-blue-pill-stm32f103-using-make)
+[Ambiq Micro Apollo3Blue EVB using Make](https://ambiqmicro.com/apollo-ultra-low-power-mcus/)  | -                                                                              | -                                                                              | [Instructions](#building-for-ambiq-micro-apollo3blue-evb-using-make)
+[Generic Keil uVision Projects](http://www2.keil.com/mdk5/uvision/)                            | -                                                                              | [Download](https://drive.google.com/open?id=1Lw9rsdquNKObozClLPoE5CTJLuhfh5mV) | -
+[Eta Compute ECM3531 EVB](https://etacompute.com/)                                             | -                                                                              | -                                                                              | [Instructions](#Building-for-the-Eta-Compute-ECM3531-EVB-using-Make)
 
 If your device is not yet supported, it may not be too hard to add support. You
 can learn about that process
@@ -268,12 +272,11 @@
 
 Follow these steps to get the pushbutton yes/no example working on Apollo 3:
 
-1.  Make sure to run the "Getting Started" section before performing the
-    following steps
-2.  Download Apollo3-SDK-2018.08.13 and place in
-    `tensorflow/lite/experimental/micro/tools/make/downloads`. This is not yet
-    publicly released, but you can contact ashah@ambiqmicro.com to request a
-    copy.
+1.  Make sure to run the "Building Portable Reference Code using Make" section
+    before performing the following steps
+2.  The Ambiq Micro SDK is downloaded into
+    `tensorflow/lite/experimental/micro/tools/make/downloads` by
+    'download_dependencies.sh'.
 3.  Compile the project with the following command: make -f
     tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=apollo3evb
     pushbutton_cmsis_speech_test_bin
@@ -300,7 +303,10 @@
     4.  Press BTN2. An LED will flash for 1 second. Speak your utterance during
         this one second
     5.  The debugger will print out four numbers. They are the probabilites for
-        1) no speech, 2) unknown speech, 3) yes, 4) no
+        1.  no speech
+        2.  unkown speech
+        3.  yes
+        4.  no
     6.  The EVB LEDs will indicate detection.
         1.  LED0 (rightmost LED) - ON when capturing 1sec of audio
         2.  LED1 - ON when detecting silence
@@ -319,29 +325,51 @@
 5. Prog Addr = 0x0000C000
 
 ## Building for the Eta Compute ECM3531 EVB using Make
-1. Follow the instructions at [Tensorflow Micro Speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_speech#getting-started) to down load the Tensorflow source code and the support libraries \(but do not run the make command shown there.\)
-2. Download the Eta Compute SDK, version 0.0.17. Contact info@etacompute.com
-3. You will need the the Arm compiler arm-none-eabi-gcc, version 7.3.1 20180622, release ARM/embedded-7-branch revision 261907, 7-2018-q2-update.  This compiler is downloaded when you run the tensorflow/lite/experimental/micro/tools/make/download_dependencies.sh script.
-4. Edit the file tensorflow/lite/experimental/micro/tools/make/targets/ecm3531_makefile.inc so that the variables ETA_SDK and GCC_ARM point to the correct directories.
-5. Compile the code with the command  
-&nbsp;&nbsp;&nbsp;&nbsp;make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=ecm3531 TAGS="CMSIS"  test  
-This will produce a set of executables in the tensorflow/lite/experimental/micro/tools/make/gen/ecm3531_cortex-m3/bin directory. 
-6. To load an executable into SRAM  
-&nbsp;&nbsp;&nbsp;&nbsp;Start ocd  
-&nbsp;&nbsp;&nbsp;&nbsp;cd tensorflow/lite/experimental/micro/tools/make/targets/ecm3531  
-&nbsp;&nbsp;&nbsp;&nbsp;./load_program name_of_executable, for e.g., ./load_program  audio_provider_test  
-&nbsp;&nbsp;&nbsp;&nbsp;Start PuTTY \(Connection type = Serial, Speed = 11520, Data bits = 8, Stop bits = 1,  Parity = None\)  
-The following output should appear:  
-Testing TestAudioProvider  
-Testing TestTimer  
-2/2 tests passed  
-\~\~\~ALL TESTS PASSED\~\~\~  
-Execution time \(msec\) = 7
-7. To load into flash  
-&nbsp;&nbsp;&nbsp;&nbsp;Edit the variable ETA_LDS_FILE in tensorflow/lite/experimental/micro/tools/&nbsp;&nbsp;make/targets/ecm3531_makefile.inc to point to the ecm3531_flash.lds file  
-&nbsp;&nbsp;&nbsp;&nbsp;Recompile  \( make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=ecm3531 TAGS="CMSIS"  test\)  
-&nbsp;&nbsp;&nbsp;&nbsp;cd tensorflow/lite/experimental/micro/tools/make/targets/ecm3531  
-&nbsp;&nbsp;&nbsp;&nbsp;./flash_program executable_name  to load into flash.
+
+1.  Follow the instructions at
+    [Tensorflow Micro Speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_speech#getting-started)
+    to down load the Tensorflow source code and the support libraries \(but do
+    not run the make command shown there.\)
+2.  Download the Eta Compute SDK, version 0.0.17. Contact info@etacompute.com
+3.  You will need the the Arm compiler arm-none-eabi-gcc, version 7.3.1
+    20180622, release ARM/embedded-7-branch revision 261907, 7-2018-q2-update.
+    This compiler is downloaded when you run the
+    tensorflow/lite/experimental/micro/tools/make/download_dependencies.sh
+    script.
+4.  Edit the file
+    tensorflow/lite/experimental/micro/tools/make/targets/ecm3531_makefile.inc
+    so that the variables ETA_SDK and GCC_ARM point to the correct directories.
+5.  Compile the code with the command \
+    &nbsp;&nbsp;&nbsp;&nbsp;make -f
+    tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=ecm3531
+    TAGS="CMSIS" test \
+    This will produce a set of executables in the
+    tensorflow/lite/experimental/micro/tools/make/gen/ecm3531_cortex-m3/bin
+    directory.
+6.  To load an executable into SRAM \
+    &nbsp;&nbsp;&nbsp;&nbsp;Start ocd \
+    &nbsp;&nbsp;&nbsp;&nbsp;cd
+    tensorflow/lite/experimental/micro/tools/make/targets/ecm3531 \
+    &nbsp;&nbsp;&nbsp;&nbsp;./load_program name_of_executable, for e.g.,
+    ./load_program audio_provider_test \
+    &nbsp;&nbsp;&nbsp;&nbsp;Start PuTTY \(Connection type = Serial, Speed =
+    11520, Data bits = 8, Stop bits = 1, Parity = None\) \
+    The following output should appear: \
+    Testing TestAudioProvider \
+    Testing TestTimer \
+    2/2 tests passed \
+    \~\~\~ALL TESTS PASSED\~\~\~ \
+    Execution time \(msec\) = 7
+7.  To load into flash \
+    &nbsp;&nbsp;&nbsp;&nbsp;Edit the variable ETA_LDS_FILE in
+    tensorflow/lite/experimental/micro/tools/&nbsp;&nbsp;make/targets/ecm3531_makefile.inc
+    to point to the ecm3531_flash.lds file \
+    &nbsp;&nbsp;&nbsp;&nbsp;Recompile \( make -f
+    tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=ecm3531
+    TAGS="CMSIS" test\) \
+    &nbsp;&nbsp;&nbsp;&nbsp;cd
+    tensorflow/lite/experimental/micro/tools/make/targets/ecm3531 \
+    &nbsp;&nbsp;&nbsp;&nbsp;./flash_program executable_name to load into flash.
 
 ## Goals
 

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/BUILD b/tensorflow/lite/experimental/micro/examples/micro_speech/BUILD
index 51ba297..29d40e7 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/BUILD
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/BUILD

@@ -11,34 +11,34 @@
 )
 
 cc_library(
-    name = "model_settings",
+    name = "simple_model_settings",
     srcs = [
-        "model_settings.cc",
+        "simple_features/simple_model_settings.cc",
     ],
     hdrs = [
-        "model_settings.h",
+        "simple_features/simple_model_settings.h",
     ],
 )
 
 cc_library(
-    name = "tiny_conv_model_data",
+    name = "tiny_conv_simple_features_model_data",
     srcs = [
-        "tiny_conv_model_data.cc",
+        "simple_features/tiny_conv_simple_features_model_data.cc",
     ],
     hdrs = [
-        "tiny_conv_model_data.h",
+        "simple_features/tiny_conv_simple_features_model_data.h",
     ],
 )
 
 cc_library(
-    name = "features_test_data",
+    name = "simple_features_test_data",
     srcs = [
-        "no_features_data.cc",
-        "yes_features_data.cc",
+        "simple_features/no_simple_features_data.cc",
+        "simple_features/yes_simple_features_data.cc",
     ],
     hdrs = [
-        "no_features_data.h",
-        "yes_features_data.h",
+        "simple_features/no_simple_features_data.h",
+        "simple_features/yes_simple_features_data.h",
     ],
 )
 
@@ -48,10 +48,10 @@
         "micro_speech_test.cc",
     ],
     deps = [
-        ":features_test_data",
-        ":tiny_conv_model_data",
         "//tensorflow/lite:schema_fbs_version",
         "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_features_test_data",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:tiny_conv_micro_features_model_data",
         "//tensorflow/lite/experimental/micro/kernels:all_ops_resolver",
         "//tensorflow/lite/experimental/micro/kernels:micro_ops",
         "//tensorflow/lite/experimental/micro/testing:micro_test",
@@ -60,45 +60,66 @@
 )
 
 cc_library(
-    name = "preprocessor_test_data",
+    name = "audio_sample_test_data",
     srcs = [
         "no_30ms_sample_data.cc",
-        "no_power_spectrum_data.cc",
         "yes_30ms_sample_data.cc",
-        "yes_power_spectrum_data.cc",
     ],
     hdrs = [
         "no_30ms_sample_data.h",
-        "no_power_spectrum_data.h",
         "yes_30ms_sample_data.h",
-        "yes_power_spectrum_data.h",
     ],
 )
 
 cc_library(
-    name = "preprocessor_reference",
+    name = "audio_large_sample_test_data",
     srcs = [
-        "preprocessor.cc",
+        "no_1000ms_sample_data.cc",
+        "yes_1000ms_sample_data.cc",
     ],
     hdrs = [
-        "preprocessor.h",
+        "no_1000ms_sample_data.h",
+        "yes_1000ms_sample_data.h",
+    ],
+)
+
+cc_library(
+    name = "simple_features_generator_test_data",
+    srcs = [
+        "simple_features/no_power_spectrum_data.cc",
+        "simple_features/yes_power_spectrum_data.cc",
+    ],
+    hdrs = [
+        "simple_features/no_power_spectrum_data.h",
+        "simple_features/yes_power_spectrum_data.h",
+    ],
+)
+
+cc_library(
+    name = "simple_features_generator_reference",
+    srcs = [
+        "simple_features/simple_features_generator.cc",
+    ],
+    hdrs = [
+        "simple_features/simple_features_generator.h",
     ],
     deps = [
-        ":model_settings",
+        ":simple_model_settings",
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/experimental/micro:micro_framework",
     ],
 )
 
 tflite_micro_cc_test(
-    name = "preprocessor_reference_test",
+    name = "simple_features_generator_reference_test",
     srcs = [
-        "preprocessor_test.cc",
+        "simple_features/simple_features_generator_test.cc",
     ],
     deps = [
-        ":model_settings",
-        ":preprocessor_reference",
-        ":preprocessor_test_data",
+        ":audio_sample_test_data",
+        ":simple_features_generator_reference",
+        ":simple_features_generator_test_data",
+        ":simple_model_settings",
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/experimental/micro:micro_framework",
         "//tensorflow/lite/experimental/micro/testing:micro_test",
@@ -106,29 +127,30 @@
 )
 
 cc_library(
-    name = "preprocessor_fixed",
+    name = "simple_features_generator_fixed",
     srcs = [
-        "fixed_point/preprocessor.cc",
+        "simple_features/fixed_point/simple_features_generator.cc",
     ],
     hdrs = [
-        "preprocessor.h",
+        "simple_features/simple_features_generator.h",
     ],
     deps = [
-        ":model_settings",
+        ":simple_model_settings",
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/experimental/micro:micro_framework",
     ],
 )
 
 tflite_micro_cc_test(
-    name = "preprocessor_fixed_test",
+    name = "simple_features_generator_fixed_test",
     srcs = [
-        "preprocessor_test.cc",
+        "simple_features/simple_features_generator_test.cc",
     ],
     deps = [
-        ":model_settings",
-        ":preprocessor_fixed",
-        ":preprocessor_test_data",
+        ":audio_sample_test_data",
+        ":simple_features_generator_fixed",
+        ":simple_features_generator_test_data",
+        ":simple_model_settings",
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/experimental/micro:micro_framework",
         "//tensorflow/lite/experimental/micro/testing:micro_test",
@@ -144,9 +166,25 @@
         "audio_provider.h",
     ],
     deps = [
-        ":model_settings",
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
+    ],
+)
+
+cc_library(
+    name = "audio_provider_mock",
+    srcs = [
+        "audio_provider_mock.cc",
+    ],
+    hdrs = [
+        "audio_provider.h",
+    ],
+    deps = [
+        ":audio_large_sample_test_data",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
     ],
 )
 
@@ -157,9 +195,24 @@
     ],
     deps = [
         ":audio_provider",
-        ":model_settings",
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "audio_provider_mock_test",
+    srcs = [
+        "audio_provider_mock_test.cc",
+    ],
+    deps = [
+        ":audio_large_sample_test_data",
+        ":audio_provider_mock",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
         "//tensorflow/lite/experimental/micro/testing:micro_test",
     ],
 )
@@ -174,10 +227,10 @@
     ],
     deps = [
         ":audio_provider",
-        ":model_settings",
-        ":preprocessor_reference",
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_features_generator",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
     ],
 )
 
@@ -189,9 +242,41 @@
     deps = [
         ":audio_provider",
         ":feature_provider",
-        ":model_settings",
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+cc_library(
+    name = "feature_provider_mock",
+    srcs = [
+        "feature_provider.cc",
+    ],
+    hdrs = [
+        "feature_provider.h",
+    ],
+    deps = [
+        ":audio_provider_mock",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_features_generator",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "feature_provider_mock_test",
+    srcs = [
+        "feature_provider_mock_test.cc",
+    ],
+    deps = [
+        ":feature_provider_mock",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_features_test_data",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
         "//tensorflow/lite/experimental/micro/testing:micro_test",
     ],
 )
@@ -205,9 +290,9 @@
         "recognize_commands.h",
     ],
     deps = [
-        ":model_settings",
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
     ],
 )
 
@@ -235,11 +320,29 @@
     deps = [
         ":audio_provider",
         ":feature_provider",
-        ":model_settings",
         ":recognize_commands",
-        ":tiny_conv_model_data",
         "//tensorflow/lite:schema_fbs_version",
         "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:tiny_conv_micro_features_model_data",
+        "//tensorflow/lite/experimental/micro/kernels:all_ops_resolver",
+        "//tensorflow/lite/schema:schema_fbs",
+    ],
+)
+
+cc_binary(
+    name = "micro_speech_mock",
+    srcs = [
+        "main.cc",
+    ],
+    deps = [
+        ":audio_provider_mock",
+        ":feature_provider",
+        ":recognize_commands",
+        "//tensorflow/lite:schema_fbs_version",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:micro_model_settings",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech/micro_features:tiny_conv_micro_features_model_data",
         "//tensorflow/lite/experimental/micro/kernels:all_ops_resolver",
         "//tensorflow/lite/schema:schema_fbs",
     ],

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/Makefile.inc b/tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/Makefile.inc
index 2d0deb0..73b884f 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/Makefile.inc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/Makefile.inc

@@ -17,6 +17,7 @@
   CMSIS_PREPROCESSOR_HDRS := \
     tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/hanning.h \
     tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/sin_1k.h \
+    third_party/CMSIS_ext/README.md \
     third_party/CMSIS_ext/arm_cmplx_mag_squared_q10p6.h
 
   PREPROCESSOR_TEST_SRCS += $(CMSIS_PREPROCESSOR_SRCS)
@@ -43,6 +44,7 @@
     $(MAKEFILE_DIR)/downloads/cmsis/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c
 
   THIRD_PARTY_CC_HDRS += \
+    third_party/cmsis/LICENSE.txt \
     third_party/cmsis/CMSIS/Core/Include/cmsis_compiler.h \
     third_party/cmsis/CMSIS/Core/Include/cmsis_gcc.h \
     third_party/cmsis/CMSIS/Core/Include/cmsis_version.h \

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/preprocessor.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/preprocessor.cc
deleted file mode 100644
index 78155cc..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/preprocessor.cc
+++ /dev/null

@@ -1,104 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-extern "C" {
-#define IFFT_FLAG_R 0
-#define BIT_REVERSE_FLAG 1
-#define FFT_SIZE 512
-#define FFT_SIZE_DIV2 256
-#include <arm_math.h>
-#include "arm_cmplx_mag_squared_q10p6.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/hanning.h"
-}
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h"
-
-void quantize(q15_t* bufA, q15_t* bufB, uint8_t* output);
-
-q15_t bufA[FFT_SIZE];
-q15_t bufB[FFT_SIZE];
-arm_rfft_instance_q15 S_arm_fft;
-arm_status arm_math_status;
-
-namespace {
-// These constants allow us to allocate fixed-sized arrays on the stack for our
-// working memory.
-constexpr int kInputSize = 512;
-constexpr int kAverageWindowSize = 6;
-constexpr int kOutputSize =
-    ((kInputSize / 2) + (kAverageWindowSize - 1)) / kAverageWindowSize;
-}  // namespace
-
-TfLiteStatus Preprocess(tflite::ErrorReporter* error_reporter,
-                        const int16_t* input, int input_size, int output_size,
-                        uint8_t* output) {
-  if (input_size > kInputSize) {
-    error_reporter->Report("Input size %d larger than %d", input_size,
-                           kInputSize);
-    return kTfLiteError;
-  }
-  if (output_size != kOutputSize) {
-    error_reporter->Report("Requested output size %d doesn't match %d",
-                           output_size, kOutputSize);
-    return kTfLiteError;
-  }
-
-  // 30ms at 16 kHz = 480 samples
-  // We want to pad the rest of the 512-sample buffer with zeros
-  arm_mult_q15((q15_t*)input, g_hanning, bufB, 480);
-  int i;
-  for (i = 480; i < 512; i++) {
-    bufB[i] = 0;
-  }
-
-  // Should move init code outside of Preprocess() function
-  arm_math_status =
-      arm_rfft_init_q15(&S_arm_fft, FFT_SIZE, IFFT_FLAG_R, BIT_REVERSE_FLAG);
-  arm_rfft_q15(&S_arm_fft, bufB, bufA);
-
-  // The rfft function packs data as follows:
-  // {real[0], real[N/2], real[1], imag[1], ..., real[N/2-1], imag[N/2-1]}
-  // Below we pack as follows:
-  // {real[0], 0, real[1], imag[1], ..., real[N/2-1], imag[N/2-1, real[N/2], 0}
-  bufA[FFT_SIZE_DIV2] = bufA[1];
-  bufA[FFT_SIZE_DIV2 + 1] = 0;
-  bufA[1] = 0;
-  arm_cmplx_mag_squared_q10p6(bufA, bufB, FFT_SIZE_DIV2 + 1);
-
-  quantize(bufA, bufB, output);
-
-  return kTfLiteOk;
-}
-
-void quantize(q15_t* bufA, q15_t* bufB, uint8_t* output) {
-  int i;
-  for (i = 0; i < 42; i++) {
-    arm_mean_q15(bufB + 6 * i, 6, bufA + i);
-  }
-  arm_mean_q15(bufB + 252, 5, bufA + 42);
-
-  for (i = 0; i < 43; i++) {
-    output[i] = (uint8_t)(bufA[i] >> 5);
-  }
-}
-
-TfLiteStatus Preprocess_1sec(tflite::ErrorReporter* error_reporter,
-                             const int16_t* input, uint8_t* output) {
-  int i;
-  for (i = 0; i < 49; i++) {
-    Preprocess(error_reporter, input + i * 320, 480, 43, output + i * 43);
-  }
-  return kTfLiteOk;
-}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/Makefile.inc b/tensorflow/lite/experimental/micro/examples/micro_speech/Makefile.inc
index 49aace3..c4e0f0e 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/Makefile.inc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/Makefile.inc

@@ -1,106 +1,349 @@
 
+INCLUDES += \
+ -I$(MAKEFILE_DIR)/downloads/kissfft
+
+PROJECT_INCLUDES += \
+third_party/kissfft
+
+KISSFFT_LIB_SRCS := \
+$(MAKEFILE_DIR)/downloads/kissfft/kiss_fft.c \
+$(MAKEFILE_DIR)/downloads/kissfft/tools/kiss_fftr.c
+
+KISSFFT_LIB_HDRS := \
+$(MAKEFILE_DIR)/downloads/kissfft/COPYING \
+$(MAKEFILE_DIR)/downloads/kissfft/kiss_fft.h \
+$(MAKEFILE_DIR)/downloads/kissfft/_kiss_fft_guts.h \
+$(MAKEFILE_DIR)/downloads/kissfft/tools/kiss_fftr.h
+
 MICRO_SPEECH_TEST_SRCS := \
 tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.cc
 
 MICRO_SPEECH_TEST_HDRS := \
-tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.h \
 
-PREPROCESSOR_TEST_SRCS := \
-tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_test.cc \
+SIMPLE_FEATURES_GENERATOR_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator_test.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.cc
+tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_power_spectrum_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.cc
 
-PREPROCESSOR_TEST_HDRS := \
-tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h \
+SIMPLE_FEATURES_GENERATOR_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_30ms_sample_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_30ms_sample_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h
+
+MICRO_FEATURES_LIB_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.cc \
+$(KISSFFT_LIB_SRCS)
+
+MICRO_FEATURES_LIB_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/bits.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.h \
+$(KISSFFT_LIB_HDRS)
+
+MICRO_FEATURES_FFT_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.cc \
+$(KISSFFT_LIB_SRCS)
+
+MICRO_FEATURES_FFT_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h \
+$(KISSFFT_LIB_HDRS)
+
+MICRO_FEATURES_FILTERBANK_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.cc
+
+MICRO_FEATURES_FILTERBANK_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h
+
+MICRO_FEATURES_FRONTEND_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_test.cc \
+$(MICRO_FEATURES_LIB_SRCS)
+
+MICRO_FEATURES_FRONTEND_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.h \
+$(MICRO_FEATURES_LIB_HDRS)
+
+MICRO_FEATURES_LOG_SCALE_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.cc
+
+MICRO_FEATURES_LOG_SCALE_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/bits.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h
+
+MICRO_FEATURES_NOISE_REDUCTION_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.cc
+
+MICRO_FEATURES_NOISE_REDUCTION_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h
+
+MICRO_FEATURES_PCAN_GAIN_CONTROL_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.cc
+
+MICRO_FEATURES_PCAN_GAIN_CONTROL_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h
+
+MICRO_FEATURES_WINDOW_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.cc
+
+MICRO_FEATURES_WINDOW_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h
+
+MICRO_FEATURES_GENERATOR_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.cc \
+$(MICRO_FEATURES_LIB_SRCS)
+
+MICRO_FEATURES_GENERATOR_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h \
+$(MICRO_FEATURES_LIB_HDRS)
+
+MICRO_FEATURES_GENERATOR_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_feature_data_slice.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_feature_data_slice.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.cc \
+$(MICRO_FEATURES_GENERATOR_SRCS)
+
+MICRO_FEATURES_GENERATOR_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_feature_data_slice.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h \
 tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.h \
 tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.h
+$(MICRO_FEATURES_GENERATOR_HDRS)
 
 AUDIO_PROVIDER_TEST_SRCS := \
 tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_test.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.cc
 
 AUDIO_PROVIDER_TEST_HDRS := \
-tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h \
+
+AUDIO_PROVIDER_MOCK_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_mock_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_mock.cc
+
+AUDIO_PROVIDER_MOCK_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.h \
 tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h \
 
 FEATURE_PROVIDER_TEST_SRCS := \
 tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider_test.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc
+tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc \
+$(MICRO_FEATURES_GENERATOR_SRCS)
 
 FEATURE_PROVIDER_TEST_HDRS := \
-tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h \
 tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.h
+tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.h \
+$(MICRO_FEATURES_GENERATOR_HDRS)
+
+FEATURE_PROVIDER_MOCK_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_mock.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.cc \
+$(MICRO_FEATURES_GENERATOR_SRCS)
+
+FEATURE_PROVIDER_MOCK_TEST_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.h \
+$(MICRO_FEATURES_GENERATOR_HDRS)
 
 RECOGNIZE_COMMANDS_TEST_SRCS := \
 tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands_test.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.cc
 
 RECOGNIZE_COMMANDS_TEST_HDRS := \
-tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h \
 tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.h
 
 MICRO_SPEECH_SRCS := \
 tensorflow/lite/experimental/micro/examples/micro_speech/main.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.cc
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.cc \
+$(MICRO_FEATURES_GENERATOR_SRCS)
 
 MICRO_SPEECH_HDRS := \
-tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h \
 tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h \
 tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h \
-tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.h
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.h \
+$(MICRO_FEATURES_GENERATOR_HDRS)
+
+MICRO_SPEECH_MOCK_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/main.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_mock.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.cc \
+$(MICRO_FEATURES_GENERATOR_SRCS)
+
+MICRO_SPEECH_MOCK_HDRS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h \
+tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.h \
+$(MICRO_FEATURES_GENERATOR_HDRS)
 
 # Find any platform-specific rules for this example.
 include $(wildcard tensorflow/lite/experimental/micro/examples/micro_speech/*/Makefile.inc)
 
+$(eval $(call microlite_test,micro_features_fft_test,\
+$(MICRO_FEATURES_FFT_TEST_SRCS),$(MICRO_FEATURES_FFT_TEST_HDRS)))
+
+$(eval $(call microlite_test,micro_features_filterbank_test,\
+$(MICRO_FEATURES_FILTERBANK_TEST_SRCS),$(MICRO_FEATURES_FILTERBANK_TEST_HDRS)))
+
+$(eval $(call microlite_test,micro_features_frontend_test,\
+$(MICRO_FEATURES_FRONTEND_TEST_SRCS),$(MICRO_FEATURES_FRONTEND_TEST_HDRS)))
+
+$(eval $(call microlite_test,micro_features_log_scale_test,\
+$(MICRO_FEATURES_LOG_SCALE_TEST_SRCS),$(MICRO_FEATURES_LOG_SCALE_TEST_HDRS)))
+
+$(eval $(call microlite_test,micro_features_noise_reduction_test,\
+$(MICRO_FEATURES_NOISE_REDUCTION_TEST_SRCS),$(MICRO_FEATURES_NOISE_REDUCTION_TEST_HDRS)))
+
+$(eval $(call microlite_test,micro_features_pcan_gain_control_test,\
+$(MICRO_FEATURES_PCAN_GAIN_CONTROL_TEST_SRCS),$(MICRO_FEATURES_PCAN_GAIN_CONTROL_TEST_HDRS)))
+
+$(eval $(call microlite_test,micro_features_window_test,\
+$(MICRO_FEATURES_WINDOW_TEST_SRCS),$(MICRO_FEATURES_WINDOW_TEST_HDRS)))
+
+# Test the code for feature generation.
+$(eval $(call microlite_test,micro_features_generator_test,\
+$(MICRO_FEATURES_GENERATOR_TEST_SRCS), $(MICRO_FEATURES_GENERATOR_TEST_HDRS)))
+
 # Tests loading and running a speech model.
 $(eval $(call microlite_test,micro_speech_test,\
 $(MICRO_SPEECH_TEST_SRCS),$(MICRO_SPEECH_TEST_HDRS)))
 
 # Test the code for feature generation.
-$(eval $(call microlite_test,preprocessor_test,\
-$(PREPROCESSOR_TEST_SRCS), $(PREPROCESSOR_TEST_HDRS)))
+$(eval $(call microlite_test,simple_features_generator_test,\
+$(SIMPLE_FEATURES_GENERATOR_TEST_SRCS), $(SIMPLE_FEATURES_GENERATOR_TEST_HDRS)))
 
 # Tests the audio provider module.
 $(eval $(call microlite_test,audio_provider_test,\
 $(AUDIO_PROVIDER_TEST_SRCS),$(AUDIO_PROVIDER_TEST_HDRS)))
 
+# Tests the audio provider mock module.
+$(eval $(call microlite_test,audio_provider_mock_test,\
+$(AUDIO_PROVIDER_MOCK_TEST_SRCS),$(AUDIO_PROVIDER_MOCK_TEST_HDRS)))
+
 # Tests the feature provider module.
 $(eval $(call microlite_test,feature_provider_test,\
 $(FEATURE_PROVIDER_TEST_SRCS),$(FEATURE_PROVIDER_TEST_HDRS)))
 
-# Tests the feature provider module.
+# Tests the feature provider module using the mock audio provider.
+$(eval $(call microlite_test,feature_provider_mock_test,\
+$(FEATURE_PROVIDER_MOCK_TEST_SRCS),$(FEATURE_PROVIDER_MOCK_TEST_HDRS)))
+
+# Tests the command recognizer module.
 $(eval $(call microlite_test,recognize_commands_test,\
 $(RECOGNIZE_COMMANDS_TEST_SRCS),$(RECOGNIZE_COMMANDS_TEST_HDRS)))
 
 # Builds a standalone speech command recognizer binary.
 $(eval $(call microlite_test,micro_speech,\
 $(MICRO_SPEECH_SRCS),$(MICRO_SPEECH_HDRS)))
+
+# Builds a standalone speech command recognizer binary using fake audio input.
+$(eval $(call microlite_test,micro_speech_mock,\
+$(MICRO_SPEECH_MOCK_SRCS),$(MICRO_SPEECH_MOCK_HDRS)))

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/README.md b/tensorflow/lite/experimental/micro/examples/micro_speech/README.md
index 500eed3..3cc81c4 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/README.md
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/README.md

@@ -83,7 +83,8 @@
 ```
 bazel run tensorflow/examples/speech_commands:freeze -- \
 --model_architecture=tiny_conv --window_stride=20 --preprocess=average \
---wanted_words="yes,no" --quantize=1 --output_file=/tmp/tiny_conv.pb
+--wanted_words="yes,no" --quantize=1 --output_file=/tmp/tiny_conv.pb \
+--start_checkpoint=/tmp/speech_commands_train/tiny_conv.ckpt-18000
 ```
 
 The next step is to create a TensorFlow Lite file from the frozen graph:
@@ -99,5 +100,5 @@
 Finally, convert the file into a C source file that can be compiled into an embedded system:
 
 ```
-xxd -i /tmp/tiny_conv.tflite > /tmp/tiny_conv_model_data.cc
+xxd -i /tmp/tiny_conv.tflite > /tmp/tiny_conv_simple_features_model_data.cc
 ```

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/Makefile.inc b/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/Makefile.inc
index 0aa362b..c830903 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/Makefile.inc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/Makefile.inc

@@ -5,7 +5,7 @@
     $(AP3_MICRO_DIR)/../preprocessor.cc \
     $(AP3_MICRO_DIR)/pushbutton_main.c \
     $(AP3_MICRO_DIR)/pushbutton_test.cc \
-    $(AP3_MICRO_DIR)/../tiny_conv_model_data.cc \
+    $(AP3_MICRO_DIR)/../simple_features/tiny_conv_simple_features_model_data.cc \
     $(APOLLO3_SDK)/devices/am_devices_led.c
   ALL_SRCS += $(PUSHBUTTON_MICRO_SPEECH_TEST_SRCS)
   PUSHBUTTON_MICRO_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \
@@ -24,8 +24,8 @@
   PUSHBUTTON_CMSIS_SPEECH_TEST_SRCS := \
     $(AP3_MICRO_DIR)/pushbutton_main.c \
     $(AP3_MICRO_DIR)/pushbutton_test.cc \
-    $(AP3_MICRO_DIR)/../tiny_conv_model_data.cc \
-    $(CMSIS_DIR)/preprocessor.cc \
+    $(AP3_MICRO_DIR)/../simple_features/tiny_conv_simple_features_model_data.cc \
+    $(CMSIS_DIR)/simple_features_generator.cc \
     $(CMSIS_EXT_DIR)/arm_cmplx_mag_squared_q10p6.c \
     $(CMSIS_DIR)/hanning.c \
     $(APOLLO3_SDK)/devices/am_devices_led.c \

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/pushbutton_main.c b/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/pushbutton_main.c
index afee383..4f70d47 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/pushbutton_main.c
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/pushbutton_main.c

@@ -157,7 +157,7 @@
 // PDM interrupt handler.
 //
 //*****************************************************************************
-void am_pdm_isr(void) {
+void am_pdm0_isr(void) {
   uint32_t ui32Status;
   //
   // Read the interrupt status.

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/pushbutton_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/pushbutton_test.cc
index 95043f8..d4583db 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/pushbutton_test.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/apollo3/pushbutton_test.cc

@@ -17,8 +17,8 @@
  * micro_speech_test.cc */
 
 #include "tensorflow/lite/c/c_api_internal.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/tiny_conv_simple_features_model_data.h"
 #include "tensorflow/lite/experimental/micro/kernels/all_ops_resolver.h"
 #include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
 #include "tensorflow/lite/experimental/micro/micro_interpreter.h"
@@ -32,20 +32,36 @@
 uint8_t g_yes_score = 0;
 uint8_t g_no_score = 0;
 
+namespace {
+
+TfLiteStatus GenerateSimpleFeatures_1sec(tflite::ErrorReporter* error_reporter,
+                                         const int16_t* input,
+                                         uint8_t* output) {
+  int i;
+  for (i = 0; i < 49; i++) {
+    GenerateSimpleFeatures(error_reporter, input + i * 320, 480, 43,
+                           output + i * 43);
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
 TF_LITE_MICRO_TESTS_BEGIN
 
-TF_LITE_MICRO_TEST(TestPreprocessor) {
+TF_LITE_MICRO_TEST(TestSimpleFeaturesGenerator) {
   tflite::MicroErrorReporter micro_error_reporter;
   tflite::ErrorReporter* error_reporter = &micro_error_reporter;
 
   uint8_t preprocessed_data[43 * 49];
-  TfLiteStatus preprocess_1sec_status =
-      Preprocess_1sec(error_reporter, captured_data, preprocessed_data);
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, preprocess_1sec_status);
+  TfLiteStatus generate_1sec_status = GenerateSimpleFeatures_1sec(
+      error_reporter, captured_data, preprocessed_data);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, generate_1sec_status);
 
   // Map the model into a usable data structure. This doesn't involve any
   // copying or parsing, it's a very lightweight operation.
-  const tflite::Model* model = ::tflite::GetModel(g_tiny_conv_model_data);
+  const tflite::Model* model =
+      ::tflite::GetModel(g_tiny_conv_simple_features_model_data);
   if (model->version() != TFLITE_SCHEMA_VERSION) {
     error_reporter->Report(
         "Model provided is schema version %d not equal "

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.cc
index 52db18e..08811c8 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.cc

@@ -15,7 +15,7 @@
 
 #include "tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h"
 
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
 
 namespace {
 int16_t g_dummy_audio_data[kMaxAudioSampleSize];

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_mock.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_mock.cc
new file mode 100644
index 0000000..9c97925
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_mock.cc

@@ -0,0 +1,57 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h"
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.h"
+
+namespace {
+int16_t g_dummy_audio_data[kMaxAudioSampleSize];
+int32_t g_latest_audio_timestamp = 0;
+}  // namespace
+
+TfLiteStatus GetAudioSamples(tflite::ErrorReporter* error_reporter,
+                             int start_ms, int duration_ms,
+                             int* audio_samples_size, int16_t** audio_samples) {
+  const int yes_start = (0 * kAudioSampleFrequency) / 1000;
+  const int yes_end = (1000 * kAudioSampleFrequency) / 1000;
+  const int no_start = (4000 * kAudioSampleFrequency) / 1000;
+  const int no_end = (5000 * kAudioSampleFrequency) / 1000;
+  const int wraparound = (8000 * kAudioSampleFrequency) / 1000;
+  const int start_sample = (start_ms * kAudioSampleFrequency) / 1000;
+  for (int i = 0; i < kMaxAudioSampleSize; ++i) {
+    const int sample_index = (start_sample + i) % wraparound;
+    int16_t sample;
+    if ((sample_index >= yes_start) && (sample_index < yes_end)) {
+      sample = g_yes_1000ms_sample_data[sample_index - yes_start];
+    } else if ((sample_index >= no_start) && (sample_index < no_end)) {
+      sample = g_no_1000ms_sample_data[sample_index - no_start];
+    } else {
+      sample = 0;
+    }
+    g_dummy_audio_data[i] = sample;
+  }
+  *audio_samples_size = kMaxAudioSampleSize;
+  *audio_samples = g_dummy_audio_data;
+  return kTfLiteOk;
+}
+
+int32_t LatestAudioTimestamp() {
+  g_latest_audio_timestamp += 100;
+  return g_latest_audio_timestamp;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_mock_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_mock_test.cc
new file mode 100644
index 0000000..b73d436
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_mock_test.cc

@@ -0,0 +1,76 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h"
+
+#include <limits>
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.h"
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestAudioProviderMock) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  int audio_samples_size = 0;
+  int16_t* audio_samples = nullptr;
+  TfLiteStatus get_status =
+      GetAudioSamples(error_reporter, 0, kFeatureSliceDurationMs,
+                      &audio_samples_size, &audio_samples);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status);
+  TF_LITE_MICRO_EXPECT_LE(audio_samples_size, kMaxAudioSampleSize);
+  TF_LITE_MICRO_EXPECT_NE(audio_samples, nullptr);
+  for (int i = 0; i < audio_samples_size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(g_yes_1000ms_sample_data[i], audio_samples[i]);
+  }
+
+  get_status = GetAudioSamples(error_reporter, 500, kFeatureSliceDurationMs,
+                               &audio_samples_size, &audio_samples);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status);
+  TF_LITE_MICRO_EXPECT_LE(audio_samples_size, kMaxAudioSampleSize);
+  TF_LITE_MICRO_EXPECT_NE(audio_samples, nullptr);
+  for (int i = 0; i < audio_samples_size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(g_yes_1000ms_sample_data[i + 8000],
+                            audio_samples[i]);
+  }
+
+  get_status = GetAudioSamples(error_reporter, 1500, kFeatureSliceDurationMs,
+                               &audio_samples_size, &audio_samples);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status);
+  TF_LITE_MICRO_EXPECT_LE(audio_samples_size, kMaxAudioSampleSize);
+  TF_LITE_MICRO_EXPECT_NE(audio_samples, nullptr);
+  for (int i = 0; i < audio_samples_size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(0, audio_samples[i]);
+  }
+
+  get_status = GetAudioSamples(error_reporter, 12250, kFeatureSliceDurationMs,
+                               &audio_samples_size, &audio_samples);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status);
+  TF_LITE_MICRO_EXPECT_LE(audio_samples_size, kMaxAudioSampleSize);
+  TF_LITE_MICRO_EXPECT_NE(audio_samples, nullptr);
+  for (int i = 0; i < audio_samples_size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(g_no_1000ms_sample_data[i + 4000],
+                            audio_samples[i]);
+  }
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_test.cc
index 85fbbb8..f9212aa 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_test.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_test.cc

@@ -18,7 +18,7 @@
 #include <limits>
 
 #include "tensorflow/lite/c/c_api_internal.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
 #include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
 #include "tensorflow/lite/experimental/micro/testing/micro_test.h"
 

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/disco_f746ng/audio_provider.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/disco_f746ng/audio_provider.cc
index 06647d0..49fea82 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/disco_f746ng/audio_provider.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/disco_f746ng/audio_provider.cc

@@ -15,7 +15,7 @@
 
 #include "tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h"
 
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
 
 #include "AUDIO_DISCO_F746NG.h"
 #include "SDRAM_DISCO_F746NG.h"

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc
index 7f9ece4..b5dfa3d 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc

@@ -16,8 +16,8 @@
 #include "tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.h"
 
 #include "tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
 
 FeatureProvider::FeatureProvider(int feature_size, uint8_t* feature_data)
     : feature_size_(feature_size),
@@ -48,6 +48,10 @@
   int slices_needed = current_step - last_step;
   // If this is the first call, make sure we don't use any cached information.
   if (is_first_run_) {
+    TfLiteStatus init_status = InitializeMicroFeatures(error_reporter);
+    if (init_status != kTfLiteOk) {
+      return init_status;
+    }
     is_first_run_ = false;
     slices_needed = kFeatureSliceCount;
   }
@@ -94,16 +98,17 @@
       GetAudioSamples(error_reporter, slice_start_ms, kFeatureSliceDurationMs,
                       &audio_samples_size, &audio_samples);
       if (audio_samples_size < kMaxAudioSampleSize) {
-        error_reporter->Report("Audio data size %d  too small, want %d",
+        error_reporter->Report("Audio data size %d too small, want %d",
                                audio_samples_size, kMaxAudioSampleSize);
         return kTfLiteError;
       }
       uint8_t* new_slice_data = feature_data_ + (new_slice * kFeatureSliceSize);
-      TfLiteStatus preprocess_status =
-          Preprocess(error_reporter, audio_samples, audio_samples_size,
-                     kFeatureSliceSize, new_slice_data);
-      if (preprocess_status != kTfLiteOk) {
-        return preprocess_status;
+      size_t num_samples_read;
+      TfLiteStatus generate_status = GenerateMicroFeatures(
+          error_reporter, audio_samples, audio_samples_size, kFeatureSliceSize,
+          new_slice_data, &num_samples_read);
+      if (generate_status != kTfLiteOk) {
+        return generate_status;
       }
     }
   }

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider_mock_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider_mock_test.cc
new file mode 100644
index 0000000..b05912e
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider_mock_test.cc

@@ -0,0 +1,66 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestFeatureProviderMockYes) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  uint8_t feature_data[kFeatureElementCount];
+  FeatureProvider feature_provider(kFeatureElementCount, feature_data);
+
+  int how_many_new_slices = 0;
+  TfLiteStatus populate_status = feature_provider.PopulateFeatureData(
+      error_reporter, /* last_time_in_ms= */ 0, /* time_in_ms= */ 970,
+      &how_many_new_slices);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, populate_status);
+  TF_LITE_MICRO_EXPECT_EQ(kFeatureSliceCount, how_many_new_slices);
+
+  for (int i = 0; i < kFeatureElementCount; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(g_yes_micro_f2e59fea_nohash_1_data[i],
+                            feature_data[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(TestFeatureProviderMockNo) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  uint8_t feature_data[kFeatureElementCount];
+  FeatureProvider feature_provider(kFeatureElementCount, feature_data);
+
+  int how_many_new_slices = 0;
+  TfLiteStatus populate_status = feature_provider.PopulateFeatureData(
+      error_reporter, /* last_time_in_ms= */ 4000, /* time_in_ms= */ 4970,
+      &how_many_new_slices);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, populate_status);
+  TF_LITE_MICRO_EXPECT_EQ(kFeatureSliceCount, how_many_new_slices);
+
+  for (int i = 0; i < kFeatureElementCount; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(g_no_micro_f9643d42_nohash_4_data[i],
+                            feature_data[i]);
+  }
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider_test.cc
index 556cbfe..e7655a3 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider_test.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider_test.cc

@@ -15,7 +15,7 @@
 
 #include "tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.h"
 #include "tensorflow/lite/c/c_api_internal.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
 #include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
 #include "tensorflow/lite/experimental/micro/testing/micro_test.h"
 

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/fixed_point/preprocessor.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/fixed_point/preprocessor.cc
deleted file mode 100644
index b623d8d..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/fixed_point/preprocessor.cc
+++ /dev/null

@@ -1,212 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Reference implementation of the preprocessing pipeline, with the same
-// results as the audio tutorial at
-// https://www.tensorflow.org/tutorials/sequences/audio_recognition
-// This module takes 30ms of PCM-encoded signed 16-bit audio samples (at 16KHz,
-// so 480 values), and extracts a power spectrum of frequencies. There are 43
-// frequency bands in the result, derived from the original 256 output from the
-// discrete Fourier transform, and averaged together in groups of 6.
-// It's expected that most platforms will have optimized versions of the
-// functions used here, for example replacing the DFT with an FFT, so this
-// version shouldn't be used where performance is critical.
-// This implementation uses fixed point for any non-constant calculations,
-// instead of floating point, to help show how this can work on platforms that
-// don't have good float support.
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h"
-
-#include <cmath>
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
-
-namespace {
-
-// q format notation: qx.y => 1 sign bit, x-1 integer bits, y fraction bits.
-// Use standard (non-saturating) arithmetic with signed ints of size x+y bits.
-// Sacrifice some precision to avoid use of 64-bit ints.
-
-// q1.15 * q1.15 => q2.30
-inline int32_t Q1_15_FixedMultiply_Q2_30(int16_t a, int16_t b) {
-  int32_t big_a = a;
-  int32_t big_b = b;
-  return big_a * big_b;
-}
-
-// q2.30 * q2.30 => q10.22
-inline int32_t Q2_30_FixedMultiply_Q10_22(int32_t a, int32_t b) {
-  // q2.30 result
-  int32_t tmp = (a >> 15) * (b >> 15);
-  // q10.22 result
-  return tmp >> 8;
-}
-
-// q10.22 * q10.22 => q10.22
-// Will overflow if product is >= 512.
-// Largest product in small test set is 465.25
-inline int32_t Q10_22_FixedMultiply_Q10_22(int32_t a, int32_t b) {
-  // q10.22 result
-  return (a >> 11) * (b >> 11);
-}
-
-// float => q2.30
-// No checking for saturation.  Only used for inputs in range [-1, 1].
-inline int32_t FloatToFixed_Q2_30(float input) {
-  return static_cast<int32_t>(roundf(input * (1 << 30)));
-}
-
-// Performs a discrete Fourier transform on the real inputs. This corresponds to
-// rdft() in the FFT package at http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html,
-// and to kiss_fftr() in KISSFFT at https://github.com/mborgerding/kissfft.
-// It takes in an array of float real values, and returns a result of the same
-// length with q10.22 fixed point real and imaginary components interleaved, so
-// fourier_output[0] is the first real value, fourier_output[1] is the first
-// imaginary, fourier_output[2] is the second real, and so on.
-// The calling function should ensure that the array passed in as fourier_output
-// is at least time_series_size in length. Most optimized FFT implementations
-// require the length to be a power of two as well, but this version doesn't
-// enforce that.
-
-// input: q2.30 fixed point.  output: q10.22 fixed point.
-// Outputs interpreted as q10.22 fixed point are un-scaled.
-void CalculateDiscreteFourierTransform(int32_t* time_series,
-                                       int time_series_size,
-                                       int32_t* fourier_output) {
-  for (int i = 0; i < time_series_size / 2; ++i) {
-    int32_t real = 0;
-    for (int j = 0; j < time_series_size; ++j) {
-      const int32_t real_scale =
-          FloatToFixed_Q2_30(cos(j * i * M_PI * 2 / time_series_size));
-      real += Q2_30_FixedMultiply_Q10_22(time_series[j], real_scale);
-    }
-    int32_t imaginary = 0;
-    for (int j = 0; j < time_series_size; ++j) {
-      const int32_t imaginary_scale =
-          FloatToFixed_Q2_30(sin(j * i * M_PI * 2 / time_series_size));
-      imaginary -= Q2_30_FixedMultiply_Q10_22(time_series[j], imaginary_scale);
-    }
-    fourier_output[(i * 2) + 0] = real;
-    fourier_output[(i * 2) + 1] = imaginary;
-  }
-}
-
-// Produces a simple sine curve that is used to ensure frequencies at the center
-// of the current sample window are weighted more heavily than those at the end.
-// q1.15 output format.
-void CalculatePeriodicHann(int window_length, int16_t* window_function) {
-  for (int i = 0; i < window_length; ++i) {
-    const float real_value = (0.5 - 0.5 * cos((2 * M_PI * i) / window_length));
-    int tmp = static_cast<int32_t>(roundf(real_value * (1 << 15)));
-    // Saturate the 0x8000 value to 0x7fff
-    if (tmp > 0x7fff) tmp = 0x7fff;
-    window_function[i] = tmp;
-  }
-}
-
-}  // namespace
-
-TfLiteStatus Preprocess(tflite::ErrorReporter* error_reporter,
-                        const int16_t* input, int input_size, int output_size,
-                        uint8_t* output) {
-  // Ensure our input and output data arrays are valid.
-  if (input_size > kMaxAudioSampleSize) {
-    error_reporter->Report("Input size %d larger than %d", input_size,
-                           kMaxAudioSampleSize);
-    return kTfLiteError;
-  }
-  if (output_size != kFeatureSliceSize) {
-    error_reporter->Report("Requested output size %d doesn't match %d",
-                           output_size, kFeatureSliceSize);
-    return kTfLiteError;
-  }
-
-  // Pre-calculate the window function we'll be applying to the input data.
-  // In a real application, we'd calculate this table once in an initialization
-  // function and store it for repeated reuse.
-  // q1.15 format.
-  int16_t window_function[kMaxAudioSampleSize];
-  CalculatePeriodicHann(input_size, window_function);
-
-  // Apply the window function to our time series input, and pad it with zeroes
-  // to the next power of two.
-  int32_t fixed_input[kMaxAudioSampleSize];
-  for (int i = 0; i < kMaxAudioSampleSize; ++i) {
-    if (i < input_size) {
-      // input is int16_t.  Treat as q1.15 fixed point value in range [-1,1)
-      // window_function is also q1.15 fixed point number
-      fixed_input[i] = Q1_15_FixedMultiply_Q2_30(input[i], window_function[i]);
-    } else {
-      fixed_input[i] = 0;
-    }
-  }
-
-  // Pull the frequency data from the time series sample.
-  // Calculated in q10.22 format from q2.30 inputs.
-  int32_t fourier_values[kMaxAudioSampleSize];
-  CalculateDiscreteFourierTransform(fixed_input, kMaxAudioSampleSize,
-                                    fourier_values);
-
-  // We have the complex numbers giving us information about each frequency
-  // band, but all we want to know is how strong each frequency is, so calculate
-  // the squared magnitude by adding together the squares of each component.
-  int32_t power_spectrum[kMaxAudioSampleSize / 2];
-  for (int i = 0; i < (kMaxAudioSampleSize / 2); ++i) {
-    const int32_t real = fourier_values[(i * 2) + 0];
-    const int32_t imaginary = fourier_values[(i * 2) + 1];
-    // q10.22 results
-    power_spectrum[i] = Q10_22_FixedMultiply_Q10_22(real, real) +
-                        Q10_22_FixedMultiply_Q10_22(imaginary, imaginary);
-  }
-
-  // Finally, reduce the size of the output by averaging together six adjacent
-  // frequencies into each slot, producing an array of 43 values.
-  // Power_spectrum numbers are q10.22.  Divide by kAverageWindowSize inside
-  // loop to prevent overflow.
-  for (int i = 0; i < kFeatureSliceSize; ++i) {
-    int32_t average = 0;
-    for (int j = 0; j < kAverageWindowSize; ++j) {
-      const int index = (i * kAverageWindowSize) + j;
-      if (index < (kMaxAudioSampleSize / 2)) {
-        average += power_spectrum[index] / kAverageWindowSize;
-      }
-    }
-    // Quantize the result into eight bits, effectively multiplying by two.
-    // The 127.5 constant here has to match the features_max value defined in
-    // tensorflow/examples/speech_commands/input_data.py, and this also assumes
-    // that features_min is zero.
-    //
-    // q10.22 input
-    // integer output
-    //
-    // output = (input - features_min) *
-    //     (output_max - output_min) / (features_max - features_min)
-    // == (input) * (255) / (127.5)
-    // == input * 2
-    // == input << 1
-    // Also want to round to nearest integer and only keep integer bits
-    // => ((input << 1) + 0x200000) >> 22
-    // == (input + 0x100000) >> 21
-    int32_t quantized_average = (average + 0x100000) >> 21;
-    if (quantized_average < 0) {
-      quantized_average = 0;
-    }
-    if (quantized_average > 255) {
-      quantized_average = 255;
-    }
-    output[i] = quantized_average;
-  }
-  return kTfLiteOk;
-}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/main.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/main.cc
index 3a9a5a4..e71e621 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/main.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/main.cc

@@ -15,9 +15,9 @@
 
 #include "tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h"
 #include "tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h"
 #include "tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h"
 #include "tensorflow/lite/experimental/micro/kernels/all_ops_resolver.h"
 #include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
 #include "tensorflow/lite/experimental/micro/micro_interpreter.h"
@@ -31,7 +31,8 @@
 
   // Map the model into a usable data structure. This doesn't involve any
   // copying or parsing, it's a very lightweight operation.
-  const tflite::Model* model = ::tflite::GetModel(g_tiny_conv_model_data);
+  const tflite::Model* model =
+      ::tflite::GetModel(g_tiny_conv_micro_features_model_data);
   if (model->version() != TFLITE_SCHEMA_VERSION) {
     error_reporter->Report(
         "Model provided is schema version %d not equal "
@@ -123,7 +124,8 @@
       return 1;
     }
     if (is_new_command) {
-      error_reporter->Report("Heard %s (%d)", found_command, score);
+      error_reporter->Report("Heard %s (%d) @%dms", found_command, score,
+                             current_time);
     }
   }
 

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/BUILD b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/BUILD
new file mode 100644
index 0000000..1e684e1
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/BUILD

@@ -0,0 +1,300 @@
+# Library for generating feature vectors from audio data
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow/lite/experimental/micro/testing:micro_test.bzl",
+    "tflite_micro_cc_test",
+)
+
+cc_library(
+    name = "micro_model_settings",
+    srcs = [
+        "micro_model_settings.cc",
+    ],
+    hdrs = [
+        "micro_model_settings.h",
+    ],
+)
+
+cc_library(
+    name = "tiny_conv_micro_features_model_data",
+    srcs = [
+        "tiny_conv_micro_features_model_data.cc",
+    ],
+    hdrs = [
+        "tiny_conv_micro_features_model_data.h",
+    ],
+)
+
+cc_library(
+    name = "micro_features_test_data",
+    srcs = [
+        "no_micro_features_data.cc",
+        "yes_micro_features_data.cc",
+    ],
+    hdrs = [
+        "no_micro_features_data.h",
+        "yes_micro_features_data.h",
+    ],
+)
+
+cc_library(
+    name = "bits",
+    hdrs = ["bits.h"],
+)
+
+cc_library(
+    name = "static_alloc",
+    hdrs = ["static_alloc.h"],
+)
+
+cc_library(
+    name = "fft",
+    srcs = [
+        "fft.cc",
+        "fft_util.cc",
+    ],
+    hdrs = [
+        "fft.h",
+        "fft_util.h",
+    ],
+    deps = [
+        ":micro_model_settings",
+        ":static_alloc",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "@kissfft//:kiss_fftr_16",
+    ],
+)
+
+cc_library(
+    name = "filterbank",
+    srcs = [
+        "filterbank.cc",
+        "filterbank_util.cc",
+    ],
+    hdrs = [
+        "filterbank.h",
+        "filterbank_util.h",
+    ],
+    deps = [
+        ":bits",
+        ":fft",
+        ":micro_model_settings",
+        ":static_alloc",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+    ],
+)
+
+cc_library(
+    name = "frontend",
+    srcs = [
+        "frontend.cc",
+        "frontend_util.cc",
+    ],
+    hdrs = [
+        "frontend.h",
+        "frontend_util.h",
+    ],
+    deps = [
+        ":bits",
+        ":fft",
+        ":filterbank",
+        ":log_scale",
+        ":micro_model_settings",
+        ":noise_reduction",
+        ":pcan_gain_control",
+        ":window",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+    ],
+)
+
+cc_library(
+    name = "log_scale",
+    srcs = [
+        "log_lut.cc",
+        "log_scale.cc",
+        "log_scale_util.cc",
+    ],
+    hdrs = [
+        "log_lut.h",
+        "log_scale.h",
+        "log_scale_util.h",
+    ],
+    deps = [
+        ":bits",
+        ":micro_model_settings",
+        ":static_alloc",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+    ],
+)
+
+cc_library(
+    name = "noise_reduction",
+    srcs = [
+        "noise_reduction.cc",
+        "noise_reduction_util.cc",
+    ],
+    hdrs = [
+        "noise_reduction.h",
+        "noise_reduction_util.h",
+    ],
+    deps = [
+        ":micro_model_settings",
+        ":static_alloc",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+    ],
+)
+
+cc_library(
+    name = "pcan_gain_control",
+    srcs = [
+        "pcan_gain_control.cc",
+        "pcan_gain_control_util.cc",
+    ],
+    hdrs = [
+        "pcan_gain_control.h",
+        "pcan_gain_control_util.h",
+    ],
+    deps = [
+        ":bits",
+        ":micro_model_settings",
+        ":static_alloc",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+    ],
+)
+
+cc_library(
+    name = "window",
+    srcs = [
+        "window.cc",
+        "window_util.cc",
+    ],
+    hdrs = [
+        "window.h",
+        "window_util.h",
+    ],
+    deps = [
+        ":micro_model_settings",
+        ":static_alloc",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+    ],
+)
+
+cc_library(
+    name = "micro_features_generator",
+    srcs = [
+        "micro_features_generator.cc",
+    ],
+    hdrs = [
+        "micro_features_generator.h",
+    ],
+    deps = [
+        ":frontend",
+        ":micro_model_settings",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+    ],
+)
+
+cc_library(
+    name = "micro_features_generator_test_data",
+    srcs = [
+        "no_feature_data_slice.cc",
+        "yes_feature_data_slice.cc",
+    ],
+    hdrs = [
+        "no_feature_data_slice.h",
+        "yes_feature_data_slice.h",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "fft_test",
+    srcs = ["fft_test.cc"],
+    deps = [
+        ":fft",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "filterbank_test",
+    srcs = ["filterbank_test.cc"],
+    deps = [
+        ":filterbank",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "frontend_test",
+    srcs = ["frontend_test.cc"],
+    deps = [
+        ":frontend",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "log_scale_test",
+    srcs = ["log_scale_test.cc"],
+    deps = [
+        ":log_scale",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "noise_reduction_test",
+    srcs = ["noise_reduction_test.cc"],
+    deps = [
+        ":noise_reduction",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "pcan_gain_control_test",
+    srcs = ["pcan_gain_control_test.cc"],
+    deps = [
+        ":pcan_gain_control",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "window_test",
+    srcs = ["window_test.cc"],
+    deps = [
+        ":window",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "micro_features_generator_test",
+    srcs = [
+        "micro_features_generator_test.cc",
+    ],
+    deps = [
+        ":micro_features_generator",
+        ":micro_features_generator_test_data",
+        ":micro_model_settings",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/examples/micro_speech:audio_sample_test_data",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/bits.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/bits.h
new file mode 100644
index 0000000..3b19ee6
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/bits.h

@@ -0,0 +1,94 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_BITS_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_BITS_H_
+
+#include <cstdint>
+
+static inline int CountLeadingZeros32Slow(uint64_t n) {
+  int zeroes = 28;
+  if (n >> 16) zeroes -= 16, n >>= 16;
+  if (n >> 8) zeroes -= 8, n >>= 8;
+  if (n >> 4) zeroes -= 4, n >>= 4;
+  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
+}
+
+static inline int CountLeadingZeros32(uint32_t n) {
+#if defined(_MSC_VER)
+  unsigned long result = 0;  // NOLINT(runtime/int)
+  if (_BitScanReverse(&result, n)) {
+    return 31 - result;
+  }
+  return 32;
+#elif defined(__GNUC__)
+
+  // Handle 0 as a special case because __builtin_clz(0) is undefined.
+  if (n == 0) {
+    return 32;
+  }
+  return __builtin_clz(n);
+#else
+  return CountLeadingZeros32Slow(n);
+#endif
+}
+
+static inline int MostSignificantBit32(uint32_t n) {
+  return 32 - CountLeadingZeros32(n);
+}
+
+static inline int CountLeadingZeros64Slow(uint64_t n) {
+  int zeroes = 60;
+  if (n >> 32) zeroes -= 32, n >>= 32;
+  if (n >> 16) zeroes -= 16, n >>= 16;
+  if (n >> 8) zeroes -= 8, n >>= 8;
+  if (n >> 4) zeroes -= 4, n >>= 4;
+  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
+}
+
+static inline int CountLeadingZeros64(uint64_t n) {
+#if defined(_MSC_VER) && defined(_M_X64)
+  // MSVC does not have __buitin_clzll. Use _BitScanReverse64.
+  unsigned long result = 0;  // NOLINT(runtime/int)
+  if (_BitScanReverse64(&result, n)) {
+    return 63 - result;
+  }
+  return 64;
+#elif defined(_MSC_VER)
+  // MSVC does not have __buitin_clzll. Compose two calls to _BitScanReverse
+  unsigned long result = 0;  // NOLINT(runtime/int)
+  if ((n >> 32) && _BitScanReverse(&result, n >> 32)) {
+    return 31 - result;
+  }
+  if (_BitScanReverse(&result, n)) {
+    return 63 - result;
+  }
+  return 64;
+#elif defined(__GNUC__)
+
+  // Handle 0 as a special case because __builtin_clzll(0) is undefined.
+  if (n == 0) {
+    return 64;
+  }
+  return __builtin_clzll(n);
+#else
+  return CountLeadingZeros64Slow(n);
+#endif
+}
+
+static inline int MostSignificantBit64(uint64_t n) {
+  return 64 - CountLeadingZeros64(n);
+}
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_BITS_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.cc
new file mode 100644
index 0000000..cde4e38
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.cc

@@ -0,0 +1,54 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.h"
+
+#include <string.h>
+
+#define FIXED_POINT 16
+#include "kiss_fft.h"
+// Internal test dependency placeholder1
+// Internal test dependency placeholder2
+#include "tools/kiss_fftr.h"
+// Internal test dependency placeholder3
+
+void FftCompute(struct FftState* state, const int16_t* input,
+                int input_scale_shift) {
+  const size_t input_size = state->input_size;
+  const size_t fft_size = state->fft_size;
+
+  int16_t* fft_input = state->input;
+  // First, scale the input by the given shift.
+  int i;
+  for (i = 0; i < input_size; ++i) {
+    *fft_input++ = (*input++) << input_scale_shift;
+  }
+  // Zero out whatever else remains in the top part of the input.
+  for (; i < fft_size; ++i) {
+    *fft_input++ = 0;
+  }
+
+  // Apply the FFT.
+  kiss_fftr(reinterpret_cast<const kiss_fftr_cfg>(state->scratch), state->input,
+            reinterpret_cast<kiss_fft_cpx*>(state->output));
+}
+
+void FftInit(struct FftState* state) {
+  // All the initialization is done in FftPopulateState()
+}
+
+void FftReset(struct FftState* state) {
+  memset(state->input, 0, state->fft_size * sizeof(*state->input));
+  memset(state->output, 0, (state->fft_size / 2 + 1) * sizeof(*state->output));
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.h
new file mode 100644
index 0000000..d5d29f6
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.h

@@ -0,0 +1,48 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FFT_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FFT_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
+
+struct complex_int16_t {
+  int16_t real;
+  int16_t imag;
+};
+
+struct FftState {
+  int16_t input[kMaxAudioSampleSize];
+  struct complex_int16_t output[kMaxAudioSampleSize + 2];
+  size_t fft_size;
+  size_t input_size;
+  // This magic number was derived from KissFFT's estimate of how much space it
+  // will need to process the particular lengths and datatypes we need to for
+  // these model settings. This size will need to be recalculated for different
+  // models, but you will see a runtime error if it's not large enough.
+  char scratch[2848];
+  size_t scratch_size;
+};
+
+void FftCompute(struct FftState* state, const int16_t* input,
+                int input_scale_shift);
+
+void FftInit(struct FftState* state);
+
+void FftReset(struct FftState* state);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FFT_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_test.cc
new file mode 100644
index 0000000..b89b014
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_test.cc

@@ -0,0 +1,55 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.h"
+
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+namespace {
+
+const int16_t kFakeWindow[] = {
+    0, 1151,   0, -5944, 0, 13311,  0, -21448, 0, 28327, 0, -32256, 0, 32255,
+    0, -28328, 0, 21447, 0, -13312, 0, 5943,   0, -1152, 0};
+const int kScaleShift = 0;
+
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(FftTest_CheckOutputValues) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  struct FftState state;
+  TF_LITE_MICRO_EXPECT(FftPopulateState(
+      error_reporter, &state, sizeof(kFakeWindow) / sizeof(kFakeWindow[0])));
+
+  FftInit(&state);
+  FftCompute(&state, kFakeWindow, kScaleShift);
+
+  const struct complex_int16_t expected[] = {
+      {0, 0},    {-10, 9},     {-20, 0},   {-9, -10},     {0, 25},  {-119, 119},
+      {-887, 0}, {3000, 3000}, {0, -6401}, {-3000, 3000}, {886, 0}, {118, 119},
+      {0, 25},   {9, -10},     {19, 0},    {9, 9},        {0, 0}};
+  TF_LITE_MICRO_EXPECT_EQ(state.fft_size / 2 + 1,
+                          sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i <= state.fft_size / 2; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(state.output[i].real, expected[i].real, 2);
+    TF_LITE_MICRO_EXPECT_NEAR(state.output[i].imag, expected[i].imag, 2);
+  }
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.cc
new file mode 100644
index 0000000..ab74289
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.cc

@@ -0,0 +1,54 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.h"
+
+#define FIXED_POINT 16
+#include "kiss_fft.h"
+#include "tools/kiss_fftr.h"
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h"
+
+int FftPopulateState(tflite::ErrorReporter* error_reporter,
+                     struct FftState* state, size_t input_size) {
+  state->input_size = input_size;
+  state->fft_size = 1;
+  while (state->fft_size < state->input_size) {
+    state->fft_size <<= 1;
+  }
+
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(state->input,
+                                 (state->fft_size * sizeof(*state->input)));
+
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      state->output, ((state->fft_size / 2 + 1) * sizeof(*state->output) * 2));
+
+  // Ask kissfft how much memory it wants.
+  size_t scratch_size = 0;
+  kiss_fftr_cfg kfft_cfg =
+      kiss_fftr_alloc(state->fft_size, 0, nullptr, &scratch_size);
+  if (kfft_cfg != nullptr) {
+    error_reporter->Report("Kiss memory sizing failed.");
+    return 0;
+  }
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(state->scratch, scratch_size);
+  state->scratch_size = scratch_size;
+  // Let kissfft configure the scratch space we just allocated
+  kfft_cfg = kiss_fftr_alloc(state->fft_size, 0, state->scratch, &scratch_size);
+  if (reinterpret_cast<char*>(kfft_cfg) != state->scratch) {
+    error_reporter->Report("Kiss memory preallocation strategy failed.");
+    return 0;
+  }
+  return 1;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.h
new file mode 100644
index 0000000..1dea097
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.h

@@ -0,0 +1,26 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FFT_UTIL_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FFT_UTIL_H_
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+
+// Prepares and FFT for the given input size.
+int FftPopulateState(tflite::ErrorReporter* error_reporter,
+                     struct FftState* state, size_t input_size);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FFT_UTIL_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.cc
new file mode 100644
index 0000000..67f69dd
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.cc

@@ -0,0 +1,135 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.h"
+
+#include <string.h>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/bits.h"
+
+void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
+                                         struct complex_int16_t* fft_output,
+                                         int32_t* energy) {
+  const int end_index = state->end_index;
+  int i;
+  energy += state->start_index;
+  fft_output += state->start_index;
+  for (i = state->start_index; i < end_index; ++i) {
+    const int32_t real = fft_output->real;
+    const int32_t imag = fft_output->imag;
+    fft_output++;
+    const uint32_t mag_squared = (real * real) + (imag * imag);
+    *energy++ = mag_squared;
+  }
+}
+
+void FilterbankAccumulateChannels(struct FilterbankState* state,
+                                  const int32_t* energy) {
+  uint64_t* work = state->work;
+  uint64_t weight_accumulator = 0;
+  uint64_t unweight_accumulator = 0;
+
+  const int16_t* channel_frequency_starts = state->channel_frequency_starts;
+  const int16_t* channel_weight_starts = state->channel_weight_starts;
+  const int16_t* channel_widths = state->channel_widths;
+
+  int num_channels_plus_1 = state->num_channels + 1;
+  int i;
+  for (i = 0; i < num_channels_plus_1; ++i) {
+    const int32_t* magnitudes = energy + *channel_frequency_starts++;
+    const int16_t* weights = state->weights + *channel_weight_starts;
+    const int16_t* unweights = state->unweights + *channel_weight_starts++;
+    const int width = *channel_widths++;
+    int j;
+    for (j = 0; j < width; ++j) {
+      weight_accumulator += *weights++ * (static_cast<uint64_t>(*magnitudes));
+      unweight_accumulator +=
+          *unweights++ * (static_cast<uint64_t>(*magnitudes));
+      ++magnitudes;
+    }
+    *work++ = weight_accumulator;
+    weight_accumulator = unweight_accumulator;
+    unweight_accumulator = 0;
+  }
+}
+
+static uint16_t Sqrt32(uint32_t num) {
+  if (num == 0) {
+    return 0;
+  }
+  uint32_t res = 0;
+  int max_bit_number = 32 - MostSignificantBit32(num);
+  max_bit_number |= 1;
+  uint32_t bit = 1U << (31 - max_bit_number);
+  int iterations = (31 - max_bit_number) / 2 + 1;
+  while (iterations--) {
+    if (num >= res + bit) {
+      num -= res + bit;
+      res = (res >> 1U) + bit;
+    } else {
+      res >>= 1U;
+    }
+    bit >>= 2U;
+  }
+  // Do rounding - if we have the bits.
+  if (num > res && res != 0xFFFF) {
+    ++res;
+  }
+  return res;
+}
+
+static uint32_t Sqrt64(uint64_t num) {
+  // Take a shortcut and just use 32 bit operations if the upper word is all
+  // clear. This will cause a slight off by one issue for numbers close to 2^32,
+  // but it probably isn't going to matter (and gives us a big performance win).
+  if ((num >> 32) == 0) {
+    return Sqrt32(static_cast<uint32_t>(num));
+  }
+  uint64_t res = 0;
+  int max_bit_number = 64 - MostSignificantBit64(num);
+  max_bit_number |= 1;
+  uint64_t bit = 1ULL << (63 - max_bit_number);
+  int iterations = (63 - max_bit_number) / 2 + 1;
+  while (iterations--) {
+    if (num >= res + bit) {
+      num -= res + bit;
+      res = (res >> 1U) + bit;
+    } else {
+      res >>= 1U;
+    }
+    bit >>= 2U;
+  }
+  // Do rounding - if we have the bits.
+  if (num > res && res != 0xFFFFFFFFLL) {
+    ++res;
+  }
+  return res;
+}
+
+uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) {
+  const int num_channels = state->num_channels;
+  const int64_t* work = reinterpret_cast<int64_t*>(state->work + 1);
+  // Reuse the work buffer since we're fine clobbering it at this point to hold
+  // the output.
+  uint32_t* output = reinterpret_cast<uint32_t*>(state->work);
+  int i;
+  for (i = 0; i < num_channels; ++i) {
+    *output++ = Sqrt64(*work++) >> scale_down_shift;
+  }
+  return reinterpret_cast<uint32_t*>(state->work);
+}
+
+void FilterbankReset(struct FilterbankState* state) {
+  memset(state->work, 0, (state->num_channels + 1) * sizeof(*state->work));
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.h
new file mode 100644
index 0000000..f7b479d
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.h

@@ -0,0 +1,56 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FILTERBANK_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FILTERBANK_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
+
+#define kFilterbankBits 12
+
+struct FilterbankState {
+  int num_channels;
+  int start_index;
+  int end_index;
+  int16_t channel_frequency_starts[kFeatureSliceSize + 1];
+  int16_t channel_weight_starts[kFeatureSliceSize + 1];
+  int16_t channel_widths[kFeatureSliceSize + 1];
+  int16_t weights[316];
+  int16_t unweights[316];
+  uint64_t work[kFeatureSliceSize + 1];
+};
+
+// Converts the relevant complex values of an FFT output into energy (the
+// square magnitude).
+void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
+                                         struct complex_int16_t* fft_output,
+                                         int32_t* energy);
+
+// Computes the mel-scale filterbank on the given energy array. Output is cached
+// internally - to fetch it, you need to call FilterbankSqrt.
+void FilterbankAccumulateChannels(struct FilterbankState* state,
+                                  const int32_t* energy);
+
+// Applies an integer square root to the 64 bit intermediate values of the
+// filterbank, and returns a pointer to them. Memory will be invalidated the
+// next time FilterbankAccumulateChannels is called.
+uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift);
+
+void FilterbankReset(struct FilterbankState* state);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FILTERBANK_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_test.cc
new file mode 100644
index 0000000..682b216
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_test.cc

@@ -0,0 +1,228 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.h"
+
+#include <cstring>
+
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+namespace {
+
+const int kSampleRate = 1000;
+const int kSpectrumSize = 17;
+const int kStartIndex = 1;
+const int kEndIndex = 15;
+const int32_t kEnergy[] = {-1,     181,      400,      181,      625,    28322,
+                           786769, 18000000, 40972801, 18000000, 784996, 28085,
+                           625,    181,      361,      -1,       -1};
+const uint64_t kWork[] = {1835887, 61162970173, 258694800000};
+const int kScaleShift = 0;
+
+// Test filterbank generation using scaled-down defaults.
+class FilterbankTestConfig {
+ public:
+  FilterbankTestConfig() {
+    config_.num_channels = 2;
+    config_.lower_band_limit = 8.0;
+    config_.upper_band_limit = 450.0;
+  }
+
+  struct FilterbankConfig config_;
+};
+
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckStartIndex) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FilterbankTestConfig config;
+  struct FilterbankState state;
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(
+      error_reporter, &config.config_, &state, kSampleRate, kSpectrumSize));
+
+  TF_LITE_MICRO_EXPECT_EQ(state.start_index, kStartIndex);
+}
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckEndIndex) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FilterbankTestConfig config;
+  struct FilterbankState state;
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(
+      error_reporter, &config.config_, &state, kSampleRate, kSpectrumSize));
+
+  TF_LITE_MICRO_EXPECT_EQ(state.end_index, kEndIndex);
+}
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckChannelFrequencyStarts) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FilterbankTestConfig config;
+  struct FilterbankState state;
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(
+      error_reporter, &config.config_, &state, kSampleRate, kSpectrumSize));
+
+  const int16_t expected[] = {0, 4, 8};
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels + 1,
+                          sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i <= state.num_channels; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.channel_frequency_starts[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckChannelWeightStarts) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FilterbankTestConfig config;
+  struct FilterbankState state;
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(
+      error_reporter, &config.config_, &state, kSampleRate, kSpectrumSize));
+
+  const int16_t expected[] = {0, 8, 16};
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels + 1,
+                          sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i <= state.num_channels; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.channel_weight_starts[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckChannelWidths) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FilterbankTestConfig config;
+  struct FilterbankState state;
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(
+      error_reporter, &config.config_, &state, kSampleRate, kSpectrumSize));
+
+  const int16_t expected[] = {8, 8, 8};
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels + 1,
+                          sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i <= state.num_channels; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.channel_widths[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckWeights) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FilterbankTestConfig config;
+  struct FilterbankState state;
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(
+      error_reporter, &config.config_, &state, kSampleRate, kSpectrumSize));
+
+  const int16_t expected[] = {0, 3277, 2217, 1200, 222,  0,   0,   0,
+                              0, 3376, 2468, 1591, 744,  0,   0,   0,
+                              0, 4020, 3226, 2456, 1708, 983, 277, 0};
+  TF_LITE_MICRO_EXPECT_EQ(state.channel_weight_starts[state.num_channels] +
+                              state.channel_widths[state.num_channels],
+                          sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.weights[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckUnweights) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FilterbankTestConfig config;
+  struct FilterbankState state;
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(
+      error_reporter, &config.config_, &state, kSampleRate, kSpectrumSize));
+
+  const int16_t expected[] = {0, 819, 1879, 2896, 3874, 0,    0,    0,
+                              0, 720, 1628, 2505, 3352, 0,    0,    0,
+                              0, 76,  870,  1640, 2388, 3113, 3819, 0};
+  TF_LITE_MICRO_EXPECT_EQ(state.channel_weight_starts[state.num_channels] +
+                              state.channel_widths[state.num_channels],
+                          sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.unweights[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckConvertFftComplexToEnergy) {
+  struct FilterbankState state;
+  state.start_index = kStartIndex;
+  state.end_index = kEndIndex;
+
+  struct complex_int16_t fake_fft[] = {
+      {0, 0},    {-10, 9},     {-20, 0},   {-9, -10},     {0, 25},  {-119, 119},
+      {-887, 0}, {3000, 3000}, {0, -6401}, {-3000, 3000}, {886, 0}, {118, 119},
+      {0, 25},   {9, -10},     {19, 0},    {9, 9},        {0, 0}};
+  int32_t* energy = reinterpret_cast<int32_t*>(fake_fft);
+  FilterbankConvertFftComplexToEnergy(&state, fake_fft, energy);
+
+  int i;
+  for (i = state.start_index; i < state.end_index; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(energy[i], kEnergy[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckAccumulateChannels) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FilterbankTestConfig config;
+  struct FilterbankState state;
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(
+      error_reporter, &config.config_, &state, kSampleRate, kSpectrumSize));
+
+  FilterbankAccumulateChannels(&state, kEnergy);
+
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels + 1,
+                          sizeof(kWork) / sizeof(kWork[0]));
+  int i;
+  for (i = 0; i <= state.num_channels; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.work[i], kWork[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckSqrt) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FilterbankTestConfig config;
+  struct FilterbankState state;
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(
+      error_reporter, &config.config_, &state, kSampleRate, kSpectrumSize));
+  std::memcpy(state.work, kWork, sizeof(kWork));
+
+  uint32_t* scaled_filterbank = FilterbankSqrt(&state, kScaleShift);
+
+  const uint32_t expected[] = {247311, 508620};
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels,
+                          sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < state.num_channels; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(scaled_filterbank[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.cc
new file mode 100644
index 0000000..ea4aa51
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.cc

@@ -0,0 +1,212 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.h"
+
+#include <assert.h>
+#include <math.h>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h"
+
+#define kFilterbankIndexAlignment 4
+#define kFilterbankChannelBlockSize 4
+
+void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config) {
+  config->num_channels = 32;
+  config->lower_band_limit = 125.0f;
+  config->upper_band_limit = 7500.0f;
+  config->output_scale_shift = 7;
+}
+
+static float FreqToMel(float freq) {
+  return 1127.0 * log(1.0 + (freq / 700.0));
+}
+
+static void CalculateCenterFrequencies(const int num_channels,
+                                       const float lower_frequency_limit,
+                                       const float upper_frequency_limit,
+                                       float* center_frequencies) {
+  assert(lower_frequency_limit >= 0.0f);
+  assert(upper_frequency_limit > lower_frequency_limit);
+
+  const float mel_low = FreqToMel(lower_frequency_limit);
+  const float mel_hi = FreqToMel(upper_frequency_limit);
+  const float mel_span = mel_hi - mel_low;
+  const float mel_spacing = mel_span / (static_cast<float>(num_channels));
+  int i;
+  for (i = 0; i < num_channels; ++i) {
+    center_frequencies[i] = mel_low + (mel_spacing * (i + 1));
+  }
+}
+
+static void QuantizeFilterbankWeights(const float float_weight, int16_t* weight,
+                                      int16_t* unweight) {
+  *weight = floor(float_weight * (1 << kFilterbankBits) + 0.5);
+  *unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5);
+}
+
+int FilterbankPopulateState(tflite::ErrorReporter* error_reporter,
+                            const struct FilterbankConfig* config,
+                            struct FilterbankState* state, int sample_rate,
+                            int spectrum_size) {
+  state->num_channels = config->num_channels;
+  const int num_channels_plus_1 = config->num_channels + 1;
+
+  // How should we align things to index counts given the byte alignment?
+  const int index_alignment =
+      (kFilterbankIndexAlignment < sizeof(int16_t)
+           ? 1
+           : kFilterbankIndexAlignment / sizeof(int16_t));
+
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      state->channel_frequency_starts,
+      (num_channels_plus_1 * sizeof(*state->channel_frequency_starts)));
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      state->channel_weight_starts,
+      (num_channels_plus_1 * sizeof(*state->channel_weight_starts)));
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      state->channel_widths,
+      (num_channels_plus_1 * sizeof(*state->channel_widths)));
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(state->work,
+                                 (num_channels_plus_1 * sizeof(*state->work)));
+
+  float center_mel_freqs[kFeatureSliceSize + 1];
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      center_mel_freqs, (num_channels_plus_1 * sizeof(*center_mel_freqs)));
+
+  int16_t actual_channel_starts[kFeatureSliceSize + 1];
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      actual_channel_starts,
+      (num_channels_plus_1 * sizeof(*actual_channel_starts)));
+
+  int16_t actual_channel_widths[kFeatureSliceSize + 1];
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      actual_channel_widths,
+      (num_channels_plus_1 * sizeof(*actual_channel_widths)));
+
+  CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit,
+                             config->upper_band_limit, center_mel_freqs);
+
+  // Always exclude DC.
+  const float hz_per_sbin =
+      0.5 * sample_rate / (static_cast<float>(spectrum_size) - 1);
+  state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin;
+  state->end_index = 0;  // Initialized to zero here, but actually set below.
+
+  // For each channel, we need to figure out what frequencies belong to it, and
+  // how much padding we need to add so that we can efficiently multiply the
+  // weights and unweights for accumulation. To simplify the multiplication
+  // logic, all channels will have some multiplication to do (even if there are
+  // no frequencies that accumulate to that channel) - they will be directed to
+  // a set of zero weights.
+  int chan_freq_index_start = state->start_index;
+  int weight_index_start = 0;
+  int needs_zeros = 0;
+
+  int chan;
+  for (chan = 0; chan < num_channels_plus_1; ++chan) {
+    // Keep jumping frequencies until we overshoot the bound on this channel.
+    int freq_index = chan_freq_index_start;
+    while (FreqToMel((freq_index)*hz_per_sbin) <= center_mel_freqs[chan]) {
+      ++freq_index;
+    }
+
+    const int width = freq_index - chan_freq_index_start;
+    actual_channel_starts[chan] = chan_freq_index_start;
+    actual_channel_widths[chan] = width;
+
+    if (width == 0) {
+      // This channel doesn't actually get anything from the frequencies, it's
+      // always zero. We need then to insert some 'zero' weights into the
+      // output, and just redirect this channel to do a single multiplication at
+      // this point. For simplicity, the zeros are placed at the beginning of
+      // the weights arrays, so we have to go and update all the other
+      // weight_starts to reflect this shift (but only once).
+      state->channel_frequency_starts[chan] = 0;
+      state->channel_weight_starts[chan] = 0;
+      state->channel_widths[chan] = kFilterbankChannelBlockSize;
+      if (!needs_zeros) {
+        needs_zeros = 1;
+        int j;
+        for (j = 0; j < chan; ++j) {
+          state->channel_weight_starts[j] += kFilterbankChannelBlockSize;
+        }
+        weight_index_start += kFilterbankChannelBlockSize;
+      }
+    } else {
+      // How far back do we need to go to ensure that we have the proper
+      // alignment?
+      const int aligned_start =
+          (chan_freq_index_start / index_alignment) * index_alignment;
+      const int aligned_width = (chan_freq_index_start - aligned_start + width);
+      const int padded_width =
+          (((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) *
+          kFilterbankChannelBlockSize;
+
+      state->channel_frequency_starts[chan] = aligned_start;
+      state->channel_weight_starts[chan] = weight_index_start;
+      state->channel_widths[chan] = padded_width;
+      weight_index_start += padded_width;
+    }
+    chan_freq_index_start = freq_index;
+  }
+
+  // Allocate the two arrays to store the weights - weight_index_start contains
+  // the index of what would be the next set of weights that we would need to
+  // add, so that's how many weights we need to allocate.
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      state->weights, (weight_index_start * sizeof(*state->weights)));
+  for (int i = 0; i < weight_index_start; ++i) {
+    state->weights[i] = 0;
+  }
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      state->unweights, (weight_index_start * sizeof(*state->unweights)));
+  for (int i = 0; i < weight_index_start; ++i) {
+    state->unweights[i] = 0;
+  }
+
+  // Next pass, compute all the weights. Since everything has been memset to
+  // zero, we only need to fill in the weights that correspond to some frequency
+  // for a channel.
+  const float mel_low = FreqToMel(config->lower_band_limit);
+  for (chan = 0; chan < num_channels_plus_1; ++chan) {
+    int frequency = actual_channel_starts[chan];
+    const int num_frequencies = actual_channel_widths[chan];
+    const int frequency_offset =
+        frequency - state->channel_frequency_starts[chan];
+    const int weight_start = state->channel_weight_starts[chan];
+    const float denom_val = (chan == 0) ? mel_low : center_mel_freqs[chan - 1];
+
+    int j;
+    for (j = 0; j < num_frequencies; ++j, ++frequency) {
+      const float weight =
+          (center_mel_freqs[chan] - FreqToMel(frequency * hz_per_sbin)) /
+          (center_mel_freqs[chan] - denom_val);
+
+      // Make the float into an integer for the weights (and unweights).
+      const int weight_index = weight_start + frequency_offset + j;
+      QuantizeFilterbankWeights(weight, state->weights + weight_index,
+                                state->unweights + weight_index);
+    }
+    if (frequency > state->end_index) {
+      state->end_index = frequency;
+    }
+  }
+
+  if (state->end_index >= spectrum_size) {
+    error_reporter->Report("Filterbank end_index is above spectrum size.");
+    return 0;
+  }
+  return 1;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.h
new file mode 100644
index 0000000..0bf0c84
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.h

@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FILTERBANK_UTIL_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FILTERBANK_UTIL_H_
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+
+struct FilterbankConfig {
+  // number of frequency channel buckets for filterbank
+  int num_channels;
+  // maximum frequency to include
+  float upper_band_limit;
+  // minimum frequency to include
+  float lower_band_limit;
+  // unused
+  int output_scale_shift;
+};
+
+// Fills the frontendConfig with "sane" defaults.
+void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config);
+
+// Allocates any buffers.
+int FilterbankPopulateState(tflite::ErrorReporter* error_reporter,
+                            const struct FilterbankConfig* config,
+                            struct FilterbankState* state, int sample_rate,
+                            int spectrum_size);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FILTERBANK_UTIL_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.cc
new file mode 100644
index 0000000..c609190
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.cc

@@ -0,0 +1,70 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.h"
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/bits.h"
+
+struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
+                                             const int16_t* samples,
+                                             size_t num_samples,
+                                             size_t* num_samples_read) {
+  struct FrontendOutput output;
+  output.values = nullptr;
+  output.size = 0;
+
+  // Try to apply the window - if it fails, return and wait for more data.
+  if (!WindowProcessSamples(&state->window, samples, num_samples,
+                            num_samples_read)) {
+    return output;
+  }
+
+  // Apply the FFT to the window's output (and scale it so that the fixed point
+  // FFT can have as much resolution as possible).
+  int input_shift =
+      15 - MostSignificantBit32(state->window.max_abs_output_value);
+  FftCompute(&state->fft, state->window.output, input_shift);
+
+  // We can re-ruse the fft's output buffer to hold the energy.
+  int32_t* energy = reinterpret_cast<int32_t*>(state->fft.output);
+  FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output,
+                                      energy);
+  FilterbankAccumulateChannels(&state->filterbank, energy);
+  uint32_t* scaled_filterbank = FilterbankSqrt(&state->filterbank, input_shift);
+
+  // Apply noise reduction.
+  NoiseReductionApply(&state->noise_reduction, scaled_filterbank);
+
+  if (state->pcan_gain_control.enable_pcan) {
+    PcanGainControlApply(&state->pcan_gain_control, scaled_filterbank);
+  }
+
+  // Apply the log and scale.
+  int correction_bits =
+      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
+  uint16_t* logged_filterbank =
+      LogScaleApply(&state->log_scale, scaled_filterbank,
+                    state->filterbank.num_channels, correction_bits);
+
+  output.size = state->filterbank.num_channels;
+  output.values = logged_filterbank;
+  return output;
+}
+
+void FrontendReset(struct FrontendState* state) {
+  WindowReset(&state->window);
+  FftReset(&state->fft);
+  FilterbankReset(&state->filterbank);
+  NoiseReductionReset(&state->noise_reduction);
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.h
new file mode 100644
index 0000000..3221d28
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.h

@@ -0,0 +1,56 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FRONTEND_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FRONTEND_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.h"
+
+struct FrontendState {
+  struct WindowState window;
+  struct FftState fft;
+  struct FilterbankState filterbank;
+  struct NoiseReductionState noise_reduction;
+  struct PcanGainControlState pcan_gain_control;
+  struct LogScaleState log_scale;
+};
+
+struct FrontendOutput {
+  const uint16_t* values;
+  size_t size;
+};
+
+// Main entry point to processing frontend samples. Updates num_samples_read to
+// contain the number of samples that have been consumed from the input array.
+// Returns a struct containing the generated output. If not enough samples were
+// added to generate a feature vector, the returned size will be 0 and the
+// values pointer will be NULL. Note that the output pointer will be invalidated
+// as soon as FrontendProcessSamples is called again, so copy the contents
+// elsewhere if you need to use them later.
+struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
+                                             const int16_t* samples,
+                                             size_t num_samples,
+                                             size_t* num_samples_read);
+
+void FrontendReset(struct FrontendState* state);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FRONTEND_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_test.cc
new file mode 100644
index 0000000..4d9f86d
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_test.cc

@@ -0,0 +1,134 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.h"
+
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+namespace {
+
+const int kSampleRate = 1000;
+const int kWindowSamples = 25;
+const int kStepSamples = 10;
+const int16_t kFakeAudioData[] = {
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768,
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768,
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768};
+
+// Test end-to-end frontend behaviors.
+class FrontendTestConfig {
+ public:
+  FrontendTestConfig() {
+    config_.window.size_ms = 25;
+    config_.window.step_size_ms = 10;
+    config_.noise_reduction.smoothing_bits = 10;
+    config_.filterbank.num_channels = 2;
+    config_.filterbank.lower_band_limit = 8.0;
+    config_.filterbank.upper_band_limit = 450.0;
+    config_.noise_reduction.smoothing_bits = 10;
+    config_.noise_reduction.even_smoothing = 0.025;
+    config_.noise_reduction.odd_smoothing = 0.06;
+    config_.noise_reduction.min_signal_remaining = 0.05;
+    config_.pcan_gain_control.enable_pcan = true;
+    config_.pcan_gain_control.strength = 0.95;
+    config_.pcan_gain_control.offset = 80.0;
+    config_.pcan_gain_control.gain_bits = 21;
+    config_.log_scale.enable_log = true;
+    config_.log_scale.scale_shift = 6;
+  }
+
+  struct FrontendConfig config_;
+};
+
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(FrontendTest_CheckOutputValues) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FrontendTestConfig config;
+  struct FrontendState state;
+  TF_LITE_MICRO_EXPECT(FrontendPopulateState(error_reporter, &config.config_,
+                                             &state, kSampleRate));
+  size_t num_samples_read;
+
+  struct FrontendOutput output = FrontendProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read);
+
+  const uint16_t expected[] = {479, 425};
+  TF_LITE_MICRO_EXPECT_EQ(output.size, sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < output.size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(output.values[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(FrontendTest_CheckConsecutiveWindow) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FrontendTestConfig config;
+  struct FrontendState state;
+  TF_LITE_MICRO_EXPECT(FrontendPopulateState(error_reporter, &config.config_,
+                                             &state, kSampleRate));
+  size_t num_samples_read;
+
+  FrontendProcessSamples(&state, kFakeAudioData,
+                         sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]),
+                         &num_samples_read);
+  struct FrontendOutput output = FrontendProcessSamples(
+      &state, kFakeAudioData + kWindowSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples,
+      &num_samples_read);
+
+  const int16_t expected[] = {436, 378};
+  TF_LITE_MICRO_EXPECT_EQ(output.size, sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < output.size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(output.values[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(FrontendTest_CheckNotEnoughSamples) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  FrontendTestConfig config;
+  struct FrontendState state;
+  TF_LITE_MICRO_EXPECT(FrontendPopulateState(error_reporter, &config.config_,
+                                             &state, kSampleRate));
+  size_t num_samples_read;
+
+  FrontendProcessSamples(&state, kFakeAudioData,
+                         sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]),
+                         &num_samples_read);
+  FrontendProcessSamples(
+      &state, kFakeAudioData + kWindowSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples,
+      &num_samples_read);
+  struct FrontendOutput output = FrontendProcessSamples(
+      &state, kFakeAudioData + kWindowSamples + kStepSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples -
+          kStepSamples,
+      &num_samples_read);
+
+  TF_LITE_MICRO_EXPECT_EQ(output.size, 0);
+  TF_LITE_MICRO_EXPECT_EQ(output.values, nullptr);
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.cc
new file mode 100644
index 0000000..220bc13
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.cc

@@ -0,0 +1,80 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.h"
+
+#include <string.h>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/bits.h"
+
+void FrontendFillConfigWithDefaults(struct FrontendConfig* config) {
+  WindowFillConfigWithDefaults(&config->window);
+  FilterbankFillConfigWithDefaults(&config->filterbank);
+  NoiseReductionFillConfigWithDefaults(&config->noise_reduction);
+  PcanGainControlFillConfigWithDefaults(&config->pcan_gain_control);
+  LogScaleFillConfigWithDefaults(&config->log_scale);
+}
+
+int FrontendPopulateState(tflite::ErrorReporter* error_reporter,
+                          const struct FrontendConfig* config,
+                          struct FrontendState* state, int sample_rate) {
+  memset(state, 0, sizeof(*state));
+
+  if (!WindowPopulateState(error_reporter, &config->window, &state->window,
+                           sample_rate)) {
+    error_reporter->Report("Failed to populate window state");
+    return 0;
+  }
+
+  if (!FftPopulateState(error_reporter, &state->fft, state->window.size)) {
+    error_reporter->Report("Failed to populate fft state");
+    return 0;
+  }
+  FftInit(&state->fft);
+
+  if (!FilterbankPopulateState(error_reporter, &config->filterbank,
+                               &state->filterbank, sample_rate,
+                               state->fft.fft_size / 2 + 1)) {
+    error_reporter->Report("Failed to populate filterbank state");
+    return 0;
+  }
+
+  if (!NoiseReductionPopulateState(error_reporter, &config->noise_reduction,
+                                   &state->noise_reduction,
+                                   state->filterbank.num_channels)) {
+    error_reporter->Report("Failed to populate noise reduction state");
+    return 0;
+  }
+
+  int input_correction_bits =
+      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
+  if (!PcanGainControlPopulateState(
+          error_reporter, &config->pcan_gain_control, &state->pcan_gain_control,
+          state->noise_reduction.estimate, state->filterbank.num_channels,
+          state->noise_reduction.smoothing_bits, input_correction_bits)) {
+    error_reporter->Report("Failed to populate pcan gain control state");
+    return 0;
+  }
+
+  if (!LogScalePopulateState(error_reporter, &config->log_scale,
+                             &state->log_scale)) {
+    error_reporter->Report("Failed to populate log scale state");
+    return 0;
+  }
+
+  FrontendReset(state);
+
+  // All good, return a true value.
+  return 1;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.h
new file mode 100644
index 0000000..f726764
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.h

@@ -0,0 +1,44 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FRONTEND_UTIL_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FRONTEND_UTIL_H_
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/fft_util.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/filterbank_util.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+
+struct FrontendConfig {
+  struct WindowConfig window;
+  struct FilterbankConfig filterbank;
+  struct NoiseReductionConfig noise_reduction;
+  struct PcanGainControlConfig pcan_gain_control;
+  struct LogScaleConfig log_scale;
+};
+
+// Fills the frontendConfig with "sane" defaults.
+void FrontendFillConfigWithDefaults(struct FrontendConfig* config);
+
+// Prepares any buffers.
+int FrontendPopulateState(tflite::ErrorReporter* error_reporter,
+                          const struct FrontendConfig* config,
+                          struct FrontendState* state, int sample_rate);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_FRONTEND_UTIL_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.cc
new file mode 100644
index 0000000..c651caa
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.cc

@@ -0,0 +1,30 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.h"
+const uint16_t kLogLut[]
+#ifndef _MSC_VER
+    __attribute__((aligned(4)))
+#endif  // _MSV_VER
+    = {0,    224,  442,  654,  861,  1063, 1259, 1450, 1636, 1817, 1992, 2163,
+       2329, 2490, 2646, 2797, 2944, 3087, 3224, 3358, 3487, 3611, 3732, 3848,
+       3960, 4068, 4172, 4272, 4368, 4460, 4549, 4633, 4714, 4791, 4864, 4934,
+       5001, 5063, 5123, 5178, 5231, 5280, 5326, 5368, 5408, 5444, 5477, 5507,
+       5533, 5557, 5578, 5595, 5610, 5622, 5631, 5637, 5640, 5641, 5638, 5633,
+       5626, 5615, 5602, 5586, 5568, 5547, 5524, 5498, 5470, 5439, 5406, 5370,
+       5332, 5291, 5249, 5203, 5156, 5106, 5054, 5000, 4944, 4885, 4825, 4762,
+       4697, 4630, 4561, 4490, 4416, 4341, 4264, 4184, 4103, 4020, 3935, 3848,
+       3759, 3668, 3575, 3481, 3384, 3286, 3186, 3084, 2981, 2875, 2768, 2659,
+       2549, 2437, 2323, 2207, 2090, 1971, 1851, 1729, 1605, 1480, 1353, 1224,
+       1094, 963,  830,  695,  559,  421,  282,  142,  0,    0};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.h
new file mode 100644
index 0000000..d5ed933
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.h

@@ -0,0 +1,32 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_LOG_LUT_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_LOG_LUT_H_
+
+#include <stdint.h>
+
+// Number of segments in the log lookup table. The table will be kLogSegments+1
+// in length (with some padding).
+#define kLogSegments 128
+#define kLogSegmentsLog2 7
+
+// Scale used by lookup table.
+#define kLogScale 65536
+#define kLogScaleLog2 16
+#define kLogCoeff 45426
+
+extern const uint16_t kLogLut[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_LOG_LUT_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.cc
new file mode 100644
index 0000000..f85e9c1
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.cc

@@ -0,0 +1,84 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.h"
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/bits.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_lut.h"
+
+#define kuint16max 0x0000FFFF
+
+// The following functions implement integer logarithms of various sizes. The
+// approximation is calculated according to method described in
+//       www.inti.gob.ar/electronicaeinformatica/instrumentacion/utic/
+//       publicaciones/SPL2007/Log10-spl07.pdf
+// It first calculates log2 of the input and then converts it to natural
+// logarithm.
+
+static uint32_t Log2FractionPart(const uint32_t x, const uint32_t log2x) {
+  // Part 1
+  int32_t frac = x - (1LL << log2x);
+  if (log2x < kLogScaleLog2) {
+    frac <<= kLogScaleLog2 - log2x;
+  } else {
+    frac >>= log2x - kLogScaleLog2;
+  }
+  // Part 2
+  const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2);
+  const uint32_t seg_unit =
+      ((static_cast<uint32_t>(1)) << kLogScaleLog2) >> kLogSegmentsLog2;
+
+  const int32_t c0 = kLogLut[base_seg];
+  const int32_t c1 = kLogLut[base_seg + 1];
+  const int32_t seg_base = seg_unit * base_seg;
+  const int32_t rel_pos = ((c1 - c0) * (frac - seg_base)) >> kLogScaleLog2;
+  return frac + c0 + rel_pos;
+}
+
+static uint32_t Log(const uint32_t x, const uint32_t scale_shift) {
+  const uint32_t integer = MostSignificantBit32(x) - 1;
+  const uint32_t fraction = Log2FractionPart(x, integer);
+  const uint32_t log2 = (integer << kLogScaleLog2) + fraction;
+  const uint32_t round = kLogScale / 2;
+  const uint32_t loge =
+      ((static_cast<uint64_t>(kLogCoeff)) * log2 + round) >> kLogScaleLog2;
+  // Finally scale to our output scale
+  const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2;
+  return loge_scaled;
+}
+
+uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
+                        int signal_size, int correction_bits) {
+  const int scale_shift = state->scale_shift;
+  uint16_t* output = reinterpret_cast<uint16_t*>(signal);
+  uint16_t* ret = output;
+  int i;
+  for (i = 0; i < signal_size; ++i) {
+    uint32_t value = *signal++;
+    if (state->enable_log) {
+      if (correction_bits < 0) {
+        value >>= -correction_bits;
+      } else {
+        value <<= correction_bits;
+      }
+      if (value > 1) {
+        value = Log(value, scale_shift);
+      } else {
+        value = 0;
+      }
+    }
+    *output++ = (value < kuint16max) ? value : kuint16max;
+  }
+  return ret;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.h
new file mode 100644
index 0000000..d90b87f
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.h

@@ -0,0 +1,31 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_LOG_SCALE_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_LOG_SCALE_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+struct LogScaleState {
+  int enable_log;
+  int scale_shift;
+};
+
+// Applies a fixed point logarithm to the signal and converts it to 16 bit. Note
+// that the signal array will be modified.
+uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
+                        int signal_size, int correction_bits);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_LOG_SCALE_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_test.cc
new file mode 100644
index 0000000..d5b2332
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_test.cc

@@ -0,0 +1,63 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.h"
+
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+namespace {
+
+const int kScaleShift = 6;
+const int kCorrectionBits = -1;
+
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(LogScaleTest_CheckOutputValues) {
+  struct LogScaleState state;
+  state.enable_log = true;
+  state.scale_shift = kScaleShift;
+
+  uint32_t fake_signal[] = {3578, 1533};
+  uint16_t* output = LogScaleApply(&state, fake_signal,
+                                   sizeof(fake_signal) / sizeof(fake_signal[0]),
+                                   kCorrectionBits);
+
+  const uint16_t expected[] = {479, 425};
+  int i;
+  for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(output[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(LogScaleTest_CheckOutputValuesNoLog) {
+  struct LogScaleState state;
+  state.enable_log = false;
+  state.scale_shift = kScaleShift;
+
+  uint32_t fake_signal[] = {85964, 45998};
+  uint16_t* output = LogScaleApply(&state, fake_signal,
+                                   sizeof(fake_signal) / sizeof(fake_signal[0]),
+                                   kCorrectionBits);
+
+  const uint16_t expected[] = {65535, 45998};
+  int i;
+  for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(output[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.cc
new file mode 100644
index 0000000..09adc09
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.cc

@@ -0,0 +1,28 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.h"
+
+void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config) {
+  config->enable_log = 1;
+  config->scale_shift = 6;
+}
+
+int LogScalePopulateState(tflite::ErrorReporter* error_reporter,
+                          const struct LogScaleConfig* config,
+                          struct LogScaleState* state) {
+  state->enable_log = config->enable_log;
+  state->scale_shift = config->scale_shift;
+  return 1;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.h
new file mode 100644
index 0000000..3caf207
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale_util.h

@@ -0,0 +1,40 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_LOG_SCALE_UTIL_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_LOG_SCALE_UTIL_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/log_scale.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+
+struct LogScaleConfig {
+  // set to false (0) to disable this module
+  int enable_log;
+  // scale results by 2^(scale_shift)
+  int scale_shift;
+};
+
+// Populates the LogScaleConfig with "sane" default values.
+void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config);
+
+// Allocates any buffers.
+int LogScalePopulateState(tflite::ErrorReporter* error_reporter,
+                          const struct LogScaleConfig* config,
+                          struct LogScaleState* state);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_LOG_SCALE_UTIL_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.cc
new file mode 100644
index 0000000..6657c6f
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.cc

@@ -0,0 +1,99 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.h"
+
+#include <cmath>
+#include <cstring>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/frontend_util.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
+
+namespace {
+
+FrontendState g_micro_features_state;
+bool g_is_first_time = true;
+
+}  // namespace
+
+TfLiteStatus InitializeMicroFeatures(tflite::ErrorReporter* error_reporter) {
+  FrontendConfig config;
+  config.window.size_ms = kFeatureSliceDurationMs;
+  config.window.step_size_ms = kFeatureSliceStrideMs;
+  config.noise_reduction.smoothing_bits = 10;
+  config.filterbank.num_channels = kFeatureSliceSize;
+  config.filterbank.lower_band_limit = 125.0;
+  config.filterbank.upper_band_limit = 7500.0;
+  config.noise_reduction.smoothing_bits = 10;
+  config.noise_reduction.even_smoothing = 0.025;
+  config.noise_reduction.odd_smoothing = 0.06;
+  config.noise_reduction.min_signal_remaining = 0.05;
+  config.pcan_gain_control.enable_pcan = 1;
+  config.pcan_gain_control.strength = 0.95;
+  config.pcan_gain_control.offset = 80.0;
+  config.pcan_gain_control.gain_bits = 21;
+  config.log_scale.enable_log = 1;
+  config.log_scale.scale_shift = 6;
+  if (!FrontendPopulateState(error_reporter, &config, &g_micro_features_state,
+                             kAudioSampleFrequency)) {
+    error_reporter->Report("FrontendPopulateState() failed");
+    return kTfLiteError;
+  }
+  g_is_first_time = true;
+  return kTfLiteOk;
+}
+
+// This is not exposed in any header, and is only used for testing, to ensure
+// that the state is correctly set up before generating results.
+void SetMicroFeaturesNoiseEstimates(const uint32_t* estimate_presets) {
+  for (int i = 0; i < g_micro_features_state.filterbank.num_channels; ++i) {
+    g_micro_features_state.noise_reduction.estimate[i] = estimate_presets[i];
+  }
+}
+
+TfLiteStatus GenerateMicroFeatures(tflite::ErrorReporter* error_reporter,
+                                   const int16_t* input, int input_size,
+                                   int output_size, uint8_t* output,
+                                   size_t* num_samples_read) {
+  const int16_t* frontend_input;
+  if (g_is_first_time) {
+    frontend_input = input;
+    g_is_first_time = false;
+  } else {
+    frontend_input = input + 160;
+  }
+  FrontendOutput frontend_output = FrontendProcessSamples(
+      &g_micro_features_state, frontend_input, input_size, num_samples_read);
+
+  for (int i = 0; i < frontend_output.size; ++i) {
+    // These scaling values are derived from those used in input_data.py in the
+    // training pipeline.
+    constexpr int32_t value_scale = (10 * 255);
+    constexpr int32_t value_div = (256 * 26);
+    int32_t value =
+        ((frontend_output.values[i] * value_scale) + (value_div / 2)) /
+        value_div;
+    if (value < 0) {
+      value = 0;
+    }
+    if (value > 255) {
+      value = 255;
+    }
+    output[i] = value;
+  }
+
+  return kTfLiteOk;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.h
new file mode 100644
index 0000000..46fa55d
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.h

@@ -0,0 +1,32 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_MICRO_FEATURES_GENERATOR_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_MICRO_FEATURES_GENERATOR_H_
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+
+// Sets up any resources needed for the feature generation pipeline.
+TfLiteStatus InitializeMicroFeatures(tflite::ErrorReporter* error_reporter);
+
+// Converts audio sample data into a more compact form that's appropriate for
+// feeding into a neural network.
+TfLiteStatus GenerateMicroFeatures(tflite::ErrorReporter* error_reporter,
+                                   const int16_t* input, int input_size,
+                                   int output_size, uint8_t* output,
+                                   size_t* num_samples_read);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_MICRO_FEATURES_GENERATOR_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator_test.cc
new file mode 100644
index 0000000..2c2da7a
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator_test.cc

@@ -0,0 +1,100 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_features_generator.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_feature_data_slice.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+// This is a test-only API, not exposed in any public headers, so declare it.
+void SetMicroFeaturesNoiseEstimates(const uint32_t* estimate_presets);
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestMicroFeaturesGeneratorYes) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, InitializeMicroFeatures(error_reporter));
+
+  // The micro features pipeline retains state from previous calls to help
+  // estimate the background noise. Unfortunately this makes it harder to
+  // exactly reproduce results in a test environment, so use a known snapshot
+  // of the parameters at the point that the golden feature values were
+  // created.
+  const uint32_t yes_estimate_presets[] = {
+      1062898, 2644477, 1257642, 1864718, 412722, 725703, 395721, 474082,
+      173046,  255856,  158966,  153736,  69181,  199100, 144493, 227740,
+      110573,  164330,  79666,   144650,  122947, 476799, 398553, 497493,
+      322152,  1140005, 566716,  690605,  308902, 347481, 109891, 170457,
+      73901,   100975,  42963,   72325,   34183,  20207,  6640,   9468,
+  };
+  SetMicroFeaturesNoiseEstimates(yes_estimate_presets);
+
+  uint8_t yes_calculated_data[g_yes_feature_data_slice_size];
+  size_t num_samples_read;
+  TfLiteStatus yes_status = GenerateMicroFeatures(
+      error_reporter, g_yes_30ms_sample_data, g_yes_30ms_sample_data_size,
+      g_yes_feature_data_slice_size, yes_calculated_data, &num_samples_read);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, yes_status);
+
+  for (int i = 0; i < g_yes_feature_data_slice_size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(g_yes_feature_data_slice[i],
+                            yes_calculated_data[i]);
+    if (g_yes_feature_data_slice[i] != yes_calculated_data[i]) {
+      error_reporter->Report("Expected value %d but found %d",
+                             g_yes_feature_data_slice[i],
+                             yes_calculated_data[i]);
+    }
+  }
+}
+
+TF_LITE_MICRO_TEST(TestMicroFeaturesGeneratorNo) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, InitializeMicroFeatures(error_reporter));
+  // As we did for the previous features, set known good noise state
+  // parameters.
+  const uint32_t no_estimate_presets[] = {
+      2563964, 1909393, 559801, 538670, 203643, 175959, 75088, 139491,
+      59691,   95307,   43865,  129263, 52517,  80058,  51330, 100731,
+      76674,   76262,   15497,  22598,  13778,  21460,  8946,  17806,
+      10023,   18810,   8002,   10842,  7578,   9983,   6267,  10759,
+      8946,    18488,   9691,   39785,  9939,   17835,  9671,  18512,
+  };
+  SetMicroFeaturesNoiseEstimates(no_estimate_presets);
+
+  uint8_t no_calculated_data[g_no_feature_data_slice_size];
+  size_t num_samples_read;
+  TfLiteStatus no_status = GenerateMicroFeatures(
+      error_reporter, g_no_30ms_sample_data, g_no_30ms_sample_data_size,
+      g_no_feature_data_slice_size, no_calculated_data, &num_samples_read);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, no_status);
+
+  for (int i = 0; i < g_no_feature_data_slice_size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(g_no_feature_data_slice[i], no_calculated_data[i]);
+    if (g_no_feature_data_slice[i] != no_calculated_data[i]) {
+      error_reporter->Report("Expected value %d but found %d",
+                             g_no_feature_data_slice[i], no_calculated_data[i]);
+    }
+  }
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.cc
new file mode 100644
index 0000000..09f65ca
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.cc

@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
+
+const char* kCategoryLabels[kCategoryCount] = {
+    "silence",
+    "unknown",
+    "yes",
+    "no",
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h
new file mode 100644
index 0000000..b74a4d0
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h

@@ -0,0 +1,41 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_MICRO_MODEL_SETTINGS_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_MICRO_MODEL_SETTINGS_H_
+
+// Keeping these as constant expressions allow us to allocate fixed-sized arrays
+// on the stack for our working memory.
+
+// The size of the input time series data we pass to the FFT to produce the
+// frequency information. This has to be a power of two, and since we're dealing
+// with 30ms of 16KHz inputs, which means 480 samples, this is the next value.
+constexpr int kMaxAudioSampleSize = 512;
+constexpr int kAudioSampleFrequency = 16000;
+
+// All of these values are derived from the values used during model training,
+// if you change your model you'll need to update these constants.
+constexpr int kFeatureSliceSize = 40;
+constexpr int kFeatureSliceCount = 49;
+constexpr int kFeatureElementCount = (kFeatureSliceSize * kFeatureSliceCount);
+constexpr int kFeatureSliceStrideMs = 20;
+constexpr int kFeatureSliceDurationMs = 30;
+
+constexpr int kCategoryCount = 4;
+constexpr int kSilenceIndex = 0;
+constexpr int kUnknownIndex = 1;
+extern const char* kCategoryLabels[kCategoryCount];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_MICRO_MODEL_SETTINGS_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_feature_data_slice.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_feature_data_slice.cc
new file mode 100644
index 0000000..1dbb606
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_feature_data_slice.cc

@@ -0,0 +1,24 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See the header for documentation on the meaning of this data.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_feature_data_slice.h"
+
+const uint8_t g_no_feature_data_slice[g_no_feature_data_slice_size] = {
+    216, 195, 223, 211, 238, 223, 243, 215, 226, 204, 232, 211, 232, 213,
+    240, 218, 235, 214, 238, 205, 207, 173, 149, 201, 215, 200, 230, 213,
+    208, 195, 175, 151, 195, 175, 182, 163, 235, 217, 218, 190,
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_feature_data_slice.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_feature_data_slice.h
new file mode 100644
index 0000000..72ea2bf
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_feature_data_slice.h

@@ -0,0 +1,29 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This data was extracted from the larger feature data held in
+// no_features_data.cc and consists of the 29th spectrogram slice of 43 values.
+// This is the expected result of running the sample data in
+// no_30ms_sample_data.cc through through the preprocessing pipeline.
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NO_FEATURE_DATA_SLICE_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NO_FEATURE_DATA_SLICE_H_
+
+#include <cstdint>
+
+constexpr int g_no_feature_data_slice_size = 40;
+extern const uint8_t g_no_feature_data_slice[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NO_FEATURE_DATA_SLICE_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.cc
new file mode 100644
index 0000000..865209b
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.cc

@@ -0,0 +1,165 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h"
+
+/* File automatically created by
+ * tensorflow/examples/speech_commands/wav_to_features.py \
+ * --sample_rate=16000 \
+ * --clip_duration_ms=1000 \
+ * --window_size_ms=30 \
+ * --window_stride_ms=20 \
+ * --feature_bin_count=40 \
+ * --quantize=1 \
+ * --preprocess="micro" \
+ * --input_wav="speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav" \
+ * --output_c_file="/tmp/no_micro_features_data.cc" \
+ */
+
+const int g_no_micro_f9643d42_nohash_4_width = 40;
+const int g_no_micro_f9643d42_nohash_4_height = 49;
+const unsigned char g_no_micro_f9643d42_nohash_4_data[] = {
+    230, 205, 191, 203, 202, 181, 180, 194, 205, 187, 183, 197, 203, 198, 196,
+    186, 202, 159, 151, 126, 110, 138, 141, 142, 137, 148, 133, 120, 110, 126,
+    117, 110, 117, 116, 137, 134, 95,  116, 123, 110, 184, 144, 183, 189, 197,
+    172, 188, 164, 194, 179, 175, 174, 182, 173, 184, 174, 200, 145, 154, 148,
+    147, 135, 143, 122, 127, 138, 116, 99,  122, 105, 110, 125, 127, 133, 131,
+    123, 116, 119, 127, 114, 193, 176, 185, 170, 175, 146, 166, 167, 185, 185,
+    185, 183, 195, 185, 176, 178, 197, 155, 137, 144, 164, 132, 153, 132, 138,
+    137, 134, 95,  120, 116, 131, 122, 99,  120, 120, 110, 116, 110, 126, 127,
+    128, 159, 187, 119, 178, 187, 197, 167, 199, 184, 180, 165, 194, 176, 144,
+    134, 187, 136, 142, 134, 145, 132, 145, 105, 119, 123, 125, 116, 125, 102,
+    129, 138, 130, 99,  99,  90,  120, 123, 134, 95,  194, 172, 187, 123, 191,
+    179, 195, 182, 201, 137, 167, 142, 185, 161, 187, 146, 167, 152, 154, 107,
+    152, 112, 134, 144, 117, 116, 105, 85,  105, 105, 99,  90,  123, 112, 112,
+    68,  107, 105, 117, 99,  116, 143, 139, 90,  154, 142, 188, 172, 178, 135,
+    175, 149, 177, 110, 173, 160, 169, 162, 173, 119, 132, 110, 85,  85,  117,
+    129, 117, 112, 117, 51,  112, 95,  139, 102, 105, 90,  128, 119, 112, 99,
+    170, 168, 195, 152, 174, 173, 180, 0,   157, 130, 169, 149, 149, 123, 170,
+    130, 170, 133, 159, 102, 134, 90,  85,  105, 126, 119, 130, 90,  78,  68,
+    127, 120, 95,  51,  122, 110, 112, 78,  116, 95,  180, 135, 179, 146, 179,
+    162, 197, 153, 172, 135, 154, 0,   149, 95,  145, 114, 166, 0,   114, 110,
+    145, 107, 114, 90,  136, 68,  95,  95,  95,  85,  116, 99,  116, 0,   95,
+    68,  102, 51,  102, 78,  185, 157, 138, 158, 180, 117, 173, 142, 145, 117,
+    169, 130, 159, 99,  138, 123, 169, 90,  78,  0,   123, 85,  107, 51,  114,
+    102, 95,  0,   116, 85,  119, 95,  95,  68,  85,  51,  116, 68,  102, 78,
+    167, 105, 164, 163, 178, 126, 164, 154, 154, 51,  177, 120, 156, 85,  134,
+    139, 168, 90,  161, 102, 114, 116, 122, 95,  112, 102, 107, 51,  114, 85,
+    119, 78,  114, 90,  102, 51,  102, 51,  114, 99,  177, 68,  152, 102, 184,
+    166, 179, 129, 177, 129, 180, 110, 158, 105, 139, 0,   145, 85,  148, 102,
+    117, 102, 116, 0,   78,  68,  90,  51,  107, 85,  78,  0,   51,  0,   51,
+    0,   95,  51,  107, 68,  180, 117, 90,  0,   138, 0,   187, 146, 119, 140,
+    164, 90,  136, 0,   131, 51,  159, 99,  141, 138, 116, 51,  90,  51,  90,
+    68,  105, 0,   85,  78,  112, 51,  122, 95,  128, 68,  85,  0,   112, 68,
+    147, 126, 178, 146, 171, 130, 190, 147, 188, 123, 170, 78,  132, 0,   130,
+    125, 159, 95,  102, 0,   110, 0,   95,  85,  120, 68,  78,  51,  99,  51,
+    105, 0,   112, 102, 105, 68,  90,  51,  90,  0,   127, 95,  166, 175, 187,
+    133, 135, 0,   171, 139, 132, 128, 140, 51,  126, 107, 161, 0,   95,  51,
+    119, 0,   114, 0,   95,  110, 116, 51,  112, 0,   90,  0,   116, 51,  68,
+    0,   105, 68,  105, 0,   164, 78,  173, 0,   194, 166, 145, 114, 116, 51,
+    107, 122, 151, 0,   156, 102, 148, 51,  122, 95,  129, 0,   85,  0,   127,
+    78,  90,  0,   78,  0,   95,  0,   110, 0,   68,  119, 120, 68,  68,  0,
+    122, 99,  147, 127, 200, 167, 85,  114, 161, 85,  161, 125, 143, 99,  156,
+    85,  147, 68,  99,  0,   107, 102, 132, 51,  112, 68,  95,  78,  99,  0,
+    68,  0,   51,  0,   90,  78,  128, 51,  95,  0,   166, 136, 174, 138, 189,
+    144, 130, 129, 138, 134, 132, 120, 134, 0,   51,  78,  147, 51,  51,  0,
+    51,  0,   78,  0,   68,  68,  95,  78,  90,  0,   0,   0,   68,  0,   90,
+    68,  110, 0,   95,  51,  165, 151, 157, 0,   0,   0,   112, 0,   112, 95,
+    149, 107, 119, 68,  126, 68,  138, 0,   78,  0,   78,  0,   99,  51,  112,
+    0,   102, 0,   78,  51,  85,  0,   0,   0,   78,  0,   95,  0,   95,  78,
+    105, 0,   152, 0,   0,   51,  132, 105, 159, 0,   129, 102, 114, 0,   138,
+    51,  123, 0,   129, 78,  119, 51,  51,  51,  105, 0,   78,  85,  95,  0,
+    85,  0,   0,   0,   85,  0,   78,  0,   0,   0,   172, 142, 141, 0,   137,
+    0,   148, 128, 157, 120, 146, 120, 120, 0,   95,  78,  141, 68,  68,  0,
+    68,  0,   90,  0,   85,  0,   107, 0,   78,  0,   85,  51,  102, 0,   68,
+    78,  68,  0,   51,  0,   125, 0,   141, 51,  102, 138, 175, 51,  120, 51,
+    173, 85,  116, 141, 164, 68,  150, 123, 133, 51,  114, 0,   117, 68,  150,
+    51,  116, 68,  78,  0,   68,  0,   68,  0,   85,  0,   78,  0,   51,  78,
+    155, 90,  161, 0,   132, 99,  123, 78,  107, 0,   134, 90,  95,  0,   78,
+    0,   162, 143, 85,  0,   107, 78,  125, 90,  90,  51,  51,  0,   85,  0,
+    0,   0,   132, 102, 102, 154, 128, 0,   99,  68,  162, 102, 151, 0,   99,
+    51,  147, 141, 156, 0,   112, 120, 158, 127, 145, 139, 187, 171, 135, 138,
+    146, 0,   95,  68,  127, 0,   85,  0,   105, 0,   0,   0,   187, 170, 162,
+    188, 165, 51,  51,  78,  243, 215, 225, 196, 205, 181, 205, 168, 176, 134,
+    157, 110, 126, 114, 133, 139, 193, 163, 159, 116, 160, 126, 122, 127, 171,
+    99,  114, 68,  123, 85,  90,  0,   157, 146, 166, 179, 136, 0,   116, 90,
+    242, 219, 240, 204, 216, 164, 188, 171, 176, 164, 154, 158, 190, 157, 190,
+    141, 182, 177, 169, 128, 172, 145, 105, 129, 157, 90,  78,  51,  119, 68,
+    137, 68,  116, 78,  141, 132, 151, 122, 156, 140, 234, 206, 229, 201, 216,
+    174, 191, 144, 162, 85,  122, 157, 194, 167, 204, 149, 180, 166, 166, 139,
+    122, 133, 156, 126, 145, 85,  128, 0,   99,  51,  145, 0,   126, 51,  166,
+    162, 166, 162, 177, 157, 228, 198, 221, 197, 214, 177, 173, 166, 173, 139,
+    185, 191, 202, 163, 205, 172, 206, 189, 135, 68,  166, 134, 149, 134, 135,
+    90,  127, 107, 175, 90,  136, 117, 135, 140, 172, 167, 166, 149, 177, 152,
+    221, 191, 215, 194, 211, 0,   156, 147, 182, 178, 208, 163, 190, 157, 208,
+    200, 195, 164, 179, 154, 181, 150, 143, 99,  132, 137, 185, 143, 163, 85,
+    51,  107, 132, 134, 164, 127, 167, 159, 175, 141, 216, 195, 223, 211, 238,
+    223, 243, 215, 226, 204, 232, 211, 232, 213, 240, 218, 235, 214, 238, 205,
+    207, 173, 149, 201, 215, 200, 230, 213, 208, 195, 175, 151, 195, 175, 182,
+    163, 235, 217, 218, 190, 211, 191, 215, 191, 217, 220, 241, 215, 229, 206,
+    236, 210, 227, 216, 236, 188, 183, 149, 202, 189, 208, 172, 191, 201, 220,
+    193, 221, 207, 216, 208, 201, 131, 170, 187, 229, 197, 211, 194, 226, 201,
+    205, 184, 206, 177, 221, 210, 226, 184, 204, 197, 218, 198, 212, 209, 213,
+    141, 172, 110, 175, 167, 180, 156, 213, 188, 192, 179, 213, 205, 204, 174,
+    200, 147, 162, 181, 203, 167, 198, 187, 210, 164, 196, 169, 189, 168, 224,
+    198, 213, 204, 198, 195, 230, 211, 221, 197, 208, 0,   0,   0,   85,  90,
+    167, 130, 175, 173, 203, 164, 193, 144, 170, 145, 185, 148, 154, 139, 198,
+    159, 180, 171, 216, 174, 178, 161, 166, 136, 216, 184, 215, 197, 199, 190,
+    228, 195, 208, 51,  117, 0,   0,   0,   0,   0,   140, 51,  135, 154, 188,
+    155, 168, 0,   90,  0,   156, 85,  110, 0,   174, 90,  172, 154, 179, 99,
+    142, 166, 179, 157, 177, 95,  192, 142, 204, 198, 217, 147, 173, 0,   112,
+    0,   0,   0,   0,   0,   0,   0,   110, 0,   107, 0,   160, 0,   148, 95,
+    172, 0,   0,   0,   116, 0,   122, 114, 170, 0,   0,   0,   0,   0,   179,
+    110, 196, 85,  205, 183, 169, 0,   99,  0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   141, 0,   112, 0,   0,   0,   134, 0,   0,   0,   0,
+    0,   0,   0,   139, 0,   0,   0,   0,   112, 186, 78,  163, 0,   169, 128,
+    174, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   95,
+    0,   105, 0,   0,   0,   105, 0,   0,   0,   0,   0,   0,   0,   95,  0,
+    0,   0,   0,   0,   0,   0,   119, 0,   164, 78,  0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   90,  0,   0,   68,
+    117, 0,   0,   0,   0,   0,   0,   0,   148, 0,   0,   0,   0,   0,   0,
+    0,   0,   0,   116, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   51,
+    0,   0,   0,   99,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   99,  0,   0,   0,   0,   0,   0,   0,   0,   0,   78,  0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h
new file mode 100644
index 0000000..178323e
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h

@@ -0,0 +1,23 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NO_MICRO_FEATURES_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NO_MICRO_FEATURES_DATA_H_
+
+extern const int g_no_micro_f9643d42_nohash_4_width;
+extern const int g_no_micro_f9643d42_nohash_4_height;
+extern const unsigned char g_no_micro_f9643d42_nohash_4_data[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NO_MICRO_FEATURES_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.cc
new file mode 100644
index 0000000..3b3aa19
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.cc

@@ -0,0 +1,51 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.h"
+
+#include <string.h>
+
+void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) {
+  int i;
+  for (i = 0; i < state->num_channels; ++i) {
+    const uint32_t smoothing =
+        ((i & 1) == 0) ? state->even_smoothing : state->odd_smoothing;
+    const uint32_t one_minus_smoothing = (1 << kNoiseReductionBits) - smoothing;
+
+    // Update the estimate of the noise.
+    const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits;
+    uint32_t estimate =
+        ((static_cast<uint64_t>(signal_scaled_up) * smoothing) +
+         (static_cast<uint64_t>(state->estimate[i]) * one_minus_smoothing)) >>
+        kNoiseReductionBits;
+    state->estimate[i] = estimate;
+
+    // Make sure that we can't get a negative value for the signal - estimate.
+    if (estimate > signal_scaled_up) {
+      estimate = signal_scaled_up;
+    }
+
+    const uint32_t floor =
+        (static_cast<uint64_t>(signal[i]) * state->min_signal_remaining) >>
+        kNoiseReductionBits;
+    const uint32_t subtracted =
+        (signal_scaled_up - estimate) >> state->smoothing_bits;
+    const uint32_t output = subtracted > floor ? subtracted : floor;
+    signal[i] = output;
+  }
+}
+
+void NoiseReductionReset(struct NoiseReductionState* state) {
+  memset(state->estimate, 0, sizeof(*state->estimate) * state->num_channels);
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.h
new file mode 100644
index 0000000..6991443
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.h

@@ -0,0 +1,40 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NOISE_REDUCTION_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NOISE_REDUCTION_H_
+
+#define kNoiseReductionBits 14
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
+
+struct NoiseReductionState {
+  int smoothing_bits;
+  uint16_t even_smoothing;
+  uint16_t odd_smoothing;
+  uint16_t min_signal_remaining;
+  int num_channels;
+  uint32_t estimate[kFeatureSliceSize];
+};
+
+// Removes stationary noise from each channel of the signal using a low pass
+// filter.
+void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal);
+
+void NoiseReductionReset(struct NoiseReductionState* state);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NOISE_REDUCTION_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_test.cc
new file mode 100644
index 0000000..de7181d
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_test.cc

@@ -0,0 +1,83 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.h"
+
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+namespace {
+
+const int kNumChannels = 2;
+
+// Test noise reduction using default config values.
+class NoiseReductionTestConfig {
+ public:
+  NoiseReductionTestConfig() {
+    config_.smoothing_bits = 10;
+    config_.even_smoothing = 0.025;
+    config_.odd_smoothing = 0.06;
+    config_.min_signal_remaining = 0.05;
+  }
+
+  struct NoiseReductionConfig config_;
+};
+
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(NoiseReductionTest_TestNoiseReductionEstimate) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  NoiseReductionTestConfig config;
+  struct NoiseReductionState state;
+  TF_LITE_MICRO_EXPECT(NoiseReductionPopulateState(
+      error_reporter, &config.config_, &state, kNumChannels));
+
+  uint32_t signal[] = {247311, 508620};
+  NoiseReductionApply(&state, signal);
+
+  const uint32_t expected[] = {6321887, 31248341};
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels,
+                          sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < state.num_channels; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.estimate[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(NoiseReductionTest_TestNoiseReduction) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  NoiseReductionTestConfig config;
+  struct NoiseReductionState state;
+  TF_LITE_MICRO_EXPECT(NoiseReductionPopulateState(
+      error_reporter, &config.config_, &state, kNumChannels));
+
+  uint32_t signal[] = {247311, 508620};
+  NoiseReductionApply(&state, signal);
+
+  const uint32_t expected[] = {241137, 478104};
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels,
+                          sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < state.num_channels; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(signal[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.cc
new file mode 100644
index 0000000..42a5c21
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.cc

@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.h"
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h"
+
+void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config) {
+  config->smoothing_bits = 10;
+  config->even_smoothing = 0.025;
+  config->odd_smoothing = 0.06;
+  config->min_signal_remaining = 0.05;
+}
+
+int NoiseReductionPopulateState(tflite::ErrorReporter* error_reporter,
+                                const struct NoiseReductionConfig* config,
+                                struct NoiseReductionState* state,
+                                int num_channels) {
+  state->smoothing_bits = config->smoothing_bits;
+  state->odd_smoothing = config->odd_smoothing * (1 << kNoiseReductionBits);
+  state->even_smoothing = config->even_smoothing * (1 << kNoiseReductionBits);
+  state->min_signal_remaining =
+      config->min_signal_remaining * (1 << kNoiseReductionBits);
+  state->num_channels = num_channels;
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      state->estimate, (state->num_channels * sizeof(*state->estimate)));
+  for (int i = 0; i < state->num_channels; ++i) {
+    state->estimate[i] = 0;
+  }
+  return 1;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.h
new file mode 100644
index 0000000..60f9de5
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction_util.h

@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NOISE_REDUCTION_UTIL_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NOISE_REDUCTION_UTIL_H_
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/noise_reduction.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+
+struct NoiseReductionConfig {
+  // scale the signal up by 2^(smoothing_bits) before reduction
+  int smoothing_bits;
+  // smoothing coefficient for even-numbered channels
+  float even_smoothing;
+  // smoothing coefficient for odd-numbered channels
+  float odd_smoothing;
+  // fraction of signal to preserve (1.0 disables this module)
+  float min_signal_remaining;
+};
+
+// Populates the NoiseReductionConfig with "sane" default values.
+void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config);
+
+// Prepares any buffers.
+int NoiseReductionPopulateState(tflite::ErrorReporter* error_reporter,
+                                const struct NoiseReductionConfig* config,
+                                struct NoiseReductionState* state,
+                                int num_channels);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NOISE_REDUCTION_UTIL_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.cc
new file mode 100644
index 0000000..5065675
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.cc

@@ -0,0 +1,57 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.h"
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/bits.h"
+
+int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut) {
+  if (x <= 2) {
+    return lut[x];
+  }
+
+  const int16_t interval = MostSignificantBit32(x);
+  lut += 4 * interval - 6;
+
+  const int16_t frac =
+      ((interval < 11) ? (x << (11 - interval)) : (x >> (interval - 11))) &
+      0x3FF;
+
+  int32_t result = (static_cast<int32_t>(lut[2]) * frac) >> 5;
+  result += (static_cast<int32_t>(lut[1])) << 5;
+  result *= frac;
+  result = (result + (1 << 14)) >> 15;
+  result += lut[0];
+  return static_cast<int16_t>(result);
+}
+
+uint32_t PcanShrink(const uint32_t x) {
+  if (x < (2 << kPcanSnrBits)) {
+    return (x * x) >> (2 + 2 * kPcanSnrBits - kPcanOutputBits);
+  } else {
+    return (x >> (kPcanSnrBits - kPcanOutputBits)) - (1 << kPcanOutputBits);
+  }
+}
+
+void PcanGainControlApply(struct PcanGainControlState* state,
+                          uint32_t* signal) {
+  int i;
+  for (i = 0; i < state->num_channels; ++i) {
+    const uint32_t gain =
+        WideDynamicFunction(state->noise_estimate[i], state->gain_lut);
+    const uint32_t snr =
+        (static_cast<uint64_t>(signal[i]) * gain) >> state->snr_shift;
+    signal[i] = PcanShrink(snr);
+  }
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.h
new file mode 100644
index 0000000..06d6fc9
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.h

@@ -0,0 +1,41 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_PCAN_GAIN_CONTROL_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_PCAN_GAIN_CONTROL_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#define kPcanSnrBits 12
+#define kPcanOutputBits 6
+
+#define kWideDynamicFunctionBits 32
+#define kWideDynamicFunctionLUTSize (4 * kWideDynamicFunctionBits - 3)
+
+struct PcanGainControlState {
+  int enable_pcan;
+  uint32_t* noise_estimate;
+  int num_channels;
+  int16_t gain_lut[kWideDynamicFunctionLUTSize];
+  int32_t snr_shift;
+};
+
+int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut);
+
+uint32_t PcanShrink(const uint32_t x);
+
+void PcanGainControlApply(struct PcanGainControlState* state, uint32_t* signal);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_PCAN_GAIN_CONTROL_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_test.cc
new file mode 100644
index 0000000..7dee667
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_test.cc

@@ -0,0 +1,66 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.h"
+
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+namespace {
+
+const int kNumChannels = 2;
+const int kSmoothingBits = 10;
+const int kCorrectionBits = -1;
+
+// Test pcan auto gain control using default config values.
+class PcanGainControlTestConfig {
+ public:
+  PcanGainControlTestConfig() {
+    config_.enable_pcan = 1;
+    config_.strength = 0.95;
+    config_.offset = 80.0;
+    config_.gain_bits = 21;
+  }
+
+  struct PcanGainControlConfig config_;
+};
+
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(PcanGainControlTest_TestPcanGainControl) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  uint32_t estimate[] = {6321887, 31248341};
+  PcanGainControlTestConfig config;
+  struct PcanGainControlState state;
+  TF_LITE_MICRO_EXPECT(PcanGainControlPopulateState(
+      error_reporter, &config.config_, &state, estimate, kNumChannels,
+      kSmoothingBits, kCorrectionBits));
+
+  uint32_t signal[] = {241137, 478104};
+  PcanGainControlApply(&state, signal);
+
+  const uint32_t expected[] = {3578, 1533};
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels,
+                          sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < state.num_channels; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(signal[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.cc
new file mode 100644
index 0000000..e7867ac
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.cc

@@ -0,0 +1,87 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.h"
+
+#include <math.h>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h"
+
+#define kint16max 0x00007FFF
+
+void PcanGainControlFillConfigWithDefaults(
+    struct PcanGainControlConfig* config) {
+  config->enable_pcan = 0;
+  config->strength = 0.95;
+  config->offset = 80.0;
+  config->gain_bits = 21;
+}
+
+int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
+                               int32_t input_bits, uint32_t x) {
+  const float x_as_float =
+      (static_cast<float>(x)) / (static_cast<uint32_t>(1) << input_bits);
+  const float gain_as_float =
+      (static_cast<uint32_t>(1) << config->gain_bits) *
+      powf(x_as_float + config->offset, -config->strength);
+
+  if (gain_as_float > kint16max) {
+    return kint16max;
+  }
+  return static_cast<int16_t>(gain_as_float + 0.5f);
+}
+
+int PcanGainControlPopulateState(tflite::ErrorReporter* error_reporter,
+                                 const struct PcanGainControlConfig* config,
+                                 struct PcanGainControlState* state,
+                                 uint32_t* noise_estimate,
+                                 const int num_channels,
+                                 const uint16_t smoothing_bits,
+                                 const int32_t input_correction_bits) {
+  state->enable_pcan = config->enable_pcan;
+  if (!state->enable_pcan) {
+    return 1;
+  }
+  state->noise_estimate = noise_estimate;
+  state->num_channels = num_channels;
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(
+      state->gain_lut, (kWideDynamicFunctionLUTSize * sizeof(int16_t)));
+  state->snr_shift = config->gain_bits - input_correction_bits - kPcanSnrBits;
+
+  const int32_t input_bits = smoothing_bits - input_correction_bits;
+  state->gain_lut[0] = PcanGainLookupFunction(config, input_bits, 0);
+  state->gain_lut[1] = PcanGainLookupFunction(config, input_bits, 1);
+  int16_t* temp_gain_lut = state->gain_lut - 6;
+  int interval;
+  for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
+    const uint32_t x0 = static_cast<uint32_t>(1) << (interval - 1);
+    const uint32_t x1 = x0 + (x0 >> 1);
+    const uint32_t x2 =
+        (interval == kWideDynamicFunctionBits) ? x0 + (x0 - 1) : 2 * x0;
+
+    const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0);
+    const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1);
+    const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2);
+
+    const int32_t diff1 = static_cast<int32_t>(y1) - y0;
+    const int32_t diff2 = static_cast<int32_t>(y2) - y0;
+    const int32_t a1 = 4 * diff1 - diff2;
+    const int32_t a2 = diff2 - a1;
+
+    temp_gain_lut[4 * interval] = y0;
+    temp_gain_lut[4 * interval + 1] = static_cast<int16_t>(a1);
+    temp_gain_lut[4 * interval + 2] = static_cast<int16_t>(a2);
+  }
+  return 1;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.h
new file mode 100644
index 0000000..4cc1de7
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control_util.h

@@ -0,0 +1,47 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_PCAN_GAIN_CONTROL_UTIL_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_PCAN_GAIN_CONTROL_UTIL_H_
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/pcan_gain_control.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+
+struct PcanGainControlConfig {
+  // set to false (0) to disable this module
+  int enable_pcan;
+  // gain normalization exponent (0.0 disables, 1.0 full strength)
+  float strength;
+  // positive value added in the normalization denominator
+  float offset;
+  // number of fractional bits in the gain
+  int gain_bits;
+};
+
+void PcanGainControlFillConfigWithDefaults(
+    struct PcanGainControlConfig* config);
+
+int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
+                               int32_t input_bits, uint32_t x);
+
+int PcanGainControlPopulateState(tflite::ErrorReporter* error_reporter,
+                                 const struct PcanGainControlConfig* config,
+                                 struct PcanGainControlState* state,
+                                 uint32_t* noise_estimate,
+                                 const int num_channels,
+                                 const uint16_t smoothing_bits,
+                                 const int32_t input_correction_bits);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_PCAN_GAIN_CONTROL_UTIL_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h
new file mode 100644
index 0000000..e2af862
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h

@@ -0,0 +1,32 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_STATIC_ALLOC_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_STATIC_ALLOC_H_
+
+// Checks to ensure that the C-style array passed in has a compile-time size of
+// at least the number of bytes requested. This doesn't work with raw pointers
+// since sizeof() doesn't know their actual length, so only use this to check
+// statically-allocated arrays with known sizes.
+#define STATIC_ALLOC_ENSURE_ARRAY_SIZE(A, N)                                 \
+  do {                                                                       \
+    if (sizeof(A) < (N)) {                                                   \
+      error_reporter->Report(#A " too small (%d bytes, wanted %d) at %s:%d", \
+                             sizeof(A), (N), __FILE__, __LINE__);            \
+      return 0;                                                              \
+    }                                                                        \
+  } while (0)
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_STATIC_ALLOC_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.cc
new file mode 100644
index 0000000..57a32c3
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.cc

@@ -0,0 +1,1541 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Automatically created from a TensorFlow Lite flatbuffer using the command:
+// xxd -i tiny_conv.tflite > tiny_conv_simple_features_model_data.cc
+// See the README for a full description of the creation process.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h"
+
+const unsigned char g_tiny_conv_micro_features_model_data[] = {
+    0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
+    0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xd0, 0x46, 0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0xb4, 0x41, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+    0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74,
+    0x65, 0x64, 0x2e, 0x00, 0x09, 0x00, 0x00, 0x00, 0x94, 0x41, 0x00, 0x00,
+    0x74, 0x41, 0x00, 0x00, 0x44, 0x41, 0x00, 0x00, 0xb4, 0x3e, 0x00, 0x00,
+    0xac, 0x3e, 0x00, 0x00, 0xa4, 0x3e, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf0, 0xb9, 0xff, 0xff,
+    0xf4, 0xb9, 0xff, 0xff, 0x52, 0xba, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0x80, 0x3e, 0x00, 0x00, 0x68, 0x95, 0x91, 0x7d, 0x9b, 0x85, 0x85, 0x81,
+    0x77, 0x85, 0x99, 0x89, 0x7e, 0x8a, 0x85, 0x92, 0xa5, 0x7e, 0x93, 0x97,
+    0x97, 0x91, 0xa3, 0x97, 0x88, 0x8b, 0xa6, 0x71, 0x77, 0x85, 0x95, 0x86,
+    0x6b, 0x93, 0xcb, 0x96, 0x7a, 0x9a, 0x7f, 0x85, 0x7a, 0x8e, 0xac, 0x98,
+    0x6d, 0x9d, 0x9b, 0x70, 0x9a, 0x90, 0xba, 0x99, 0x7b, 0x93, 0x6e, 0x68,
+    0x75, 0x86, 0xc4, 0x8b, 0x66, 0x5d, 0x96, 0x7f, 0x92, 0x91, 0xb6, 0x7b,
+    0x96, 0x95, 0x9a, 0x77, 0x9a, 0x96, 0xce, 0x80, 0x88, 0x65, 0x8e, 0x80,
+    0x88, 0x85, 0xb7, 0x9c, 0x7b, 0x93, 0x9d, 0x95, 0x83, 0x92, 0xd0, 0x7e,
+    0x68, 0x88, 0x6c, 0x78, 0x98, 0x81, 0xac, 0x95, 0x9e, 0x98, 0xa2, 0x99,
+    0x8d, 0x7d, 0xb8, 0x81, 0x6e, 0x68, 0xa1, 0x81, 0x9d, 0x99, 0xb4, 0x7d,
+    0x92, 0x86, 0x9d, 0x93, 0xa3, 0xb0, 0xd6, 0x79, 0x93, 0x76, 0x8d, 0x84,
+    0x91, 0x9d, 0xbe, 0x94, 0xb0, 0x70, 0x84, 0x80, 0x85, 0x99, 0x9e, 0xa2,
+    0x86, 0x8a, 0x7a, 0x76, 0x91, 0x8d, 0xa6, 0x76, 0x8d, 0x82, 0x98, 0x8c,
+    0x92, 0x8f, 0x8c, 0xb3, 0x78, 0x75, 0xa5, 0x88, 0x73, 0x8c, 0x91, 0x7c,
+    0x82, 0x7d, 0x93, 0x9e, 0x8b, 0x97, 0x7c, 0x90, 0x84, 0x95, 0x7e, 0x9e,
+    0xa4, 0x52, 0x8a, 0xb4, 0x97, 0x65, 0x7d, 0xb6, 0x83, 0x7d, 0x99, 0x80,
+    0x97, 0x85, 0x96, 0x5f, 0x8e, 0x87, 0x95, 0x6d, 0x76, 0x84, 0x97, 0x8c,
+    0x66, 0x97, 0xae, 0x6b, 0x93, 0xb3, 0xa8, 0x8b, 0xa1, 0x79, 0xa3, 0x94,
+    0x7e, 0xa8, 0x8d, 0xad, 0x78, 0x82, 0xa2, 0x7b, 0x90, 0xa4, 0x7d, 0xb3,
+    0xa0, 0x7b, 0x94, 0x85, 0x9a, 0x8d, 0x76, 0x82, 0x65, 0x73, 0xab, 0xa4,
+    0xaa, 0x74, 0x93, 0x9c, 0x83, 0x66, 0xbf, 0x7a, 0xaa, 0x81, 0x92, 0x89,
+    0x7e, 0x88, 0xa6, 0x66, 0xaf, 0x92, 0x9f, 0x97, 0x6c, 0x89, 0x9c, 0x74,
+    0x7e, 0x82, 0x8e, 0x88, 0xb2, 0x85, 0xba, 0x96, 0x90, 0x78, 0x8d, 0xa7,
+    0x9e, 0x87, 0xbc, 0x7f, 0xb2, 0x8b, 0x77, 0x9b, 0xab, 0x8f, 0xa4, 0x7d,
+    0x6f, 0x77, 0x8c, 0x98, 0x6f, 0x89, 0xb1, 0x9f, 0xa7, 0x94, 0x7d, 0xae,
+    0x88, 0x8a, 0xa9, 0x75, 0x7d, 0x7c, 0x88, 0x99, 0x90, 0x9d, 0x97, 0xa7,
+    0x8d, 0x7f, 0x73, 0xa1, 0xa3, 0x87, 0xa9, 0x92, 0x98, 0x7e, 0x9c, 0x88,
+    0x73, 0x6b, 0x78, 0x8e, 0x7d, 0x86, 0x6c, 0x7c, 0x92, 0x40, 0x86, 0xa7,
+    0x65, 0x93, 0x67, 0x91, 0x67, 0x71, 0x6c, 0xa8, 0x81, 0x70, 0x8e, 0xa8,
+    0x7b, 0x63, 0x89, 0x76, 0x69, 0x90, 0x73, 0x5e, 0x92, 0x78, 0x7e, 0x9d,
+    0x87, 0x86, 0x89, 0x64, 0x66, 0xa9, 0x92, 0x8d, 0x72, 0x7c, 0x63, 0x7f,
+    0x94, 0x5c, 0x92, 0x89, 0x87, 0x9d, 0x8b, 0x75, 0x93, 0x8c, 0x94, 0x68,
+    0x97, 0x87, 0x78, 0x7d, 0x7f, 0x84, 0x84, 0x77, 0x6b, 0x8e, 0x83, 0xab,
+    0x7e, 0x62, 0x90, 0x83, 0x8e, 0x71, 0x7e, 0x9b, 0x96, 0x6d, 0x83, 0x6a,
+    0x76, 0x68, 0x71, 0x90, 0x98, 0x90, 0x9b, 0x68, 0x89, 0x89, 0x95, 0x85,
+    0x6e, 0x75, 0x8e, 0x95, 0x83, 0x7a, 0x91, 0x7f, 0x8b, 0x71, 0x90, 0x7d,
+    0xad, 0x91, 0x6f, 0x74, 0x98, 0x8a, 0xb0, 0xa8, 0x80, 0xa3, 0x8e, 0x7c,
+    0xa5, 0x67, 0xa4, 0x66, 0xa9, 0x7b, 0x85, 0x9d, 0x88, 0xab, 0x7d, 0x81,
+    0x6e, 0x7f, 0x8f, 0x97, 0x97, 0x84, 0x89, 0x74, 0x9d, 0x5f, 0x9c, 0x88,
+    0x6f, 0x74, 0x96, 0x9e, 0x7e, 0x7e, 0xa4, 0x85, 0x94, 0x91, 0xaf, 0x99,
+    0x7a, 0xaa, 0x8c, 0x92, 0x85, 0x9d, 0x6c, 0x79, 0x57, 0x7a, 0x80, 0x84,
+    0x79, 0x79, 0x74, 0xa5, 0x55, 0xab, 0x73, 0x8c, 0x72, 0x9d, 0x72, 0xa9,
+    0x90, 0x73, 0x8f, 0xa0, 0x89, 0x6d, 0x68, 0x66, 0x61, 0x6f, 0x57, 0x7d,
+    0x66, 0x8c, 0x65, 0x87, 0x62, 0x76, 0x83, 0x77, 0x89, 0xa4, 0x73, 0x89,
+    0x7f, 0x70, 0x79, 0x6b, 0x86, 0x6f, 0x8d, 0x96, 0x65, 0x89, 0x66, 0x53,
+    0x73, 0xae, 0x6a, 0x72, 0x88, 0x97, 0x7a, 0x7f, 0x5d, 0xa1, 0x86, 0x88,
+    0x5f, 0x9f, 0x9b, 0x8a, 0x74, 0x9a, 0x7a, 0x7e, 0x8b, 0x71, 0x58, 0x74,
+    0x8f, 0x9b, 0x9b, 0x8d, 0x6b, 0x83, 0x60, 0x7f, 0x75, 0x91, 0x79, 0x93,
+    0x7a, 0x92, 0x8c, 0x7e, 0x7a, 0x95, 0x84, 0x69, 0x8f, 0x8c, 0x7c, 0x6e,
+    0x8b, 0x87, 0x82, 0x62, 0xa6, 0x97, 0x91, 0x65, 0xa2, 0xa4, 0x9b, 0x8b,
+    0x85, 0xa4, 0x84, 0x7b, 0x67, 0x93, 0x96, 0x84, 0x85, 0x75, 0x6d, 0x9e,
+    0x80, 0x80, 0x73, 0x8c, 0x81, 0x70, 0x8a, 0x68, 0x9c, 0x8e, 0x63, 0x91,
+    0x89, 0x79, 0x8d, 0x79, 0xa4, 0x9a, 0x96, 0xa0, 0x83, 0x63, 0x88, 0x8f,
+    0x76, 0xb4, 0xa8, 0x8e, 0x68, 0x8d, 0x8e, 0x95, 0x78, 0xae, 0x5d, 0x89,
+    0x66, 0x7e, 0x7b, 0x8a, 0x75, 0x86, 0x71, 0x97, 0x6d, 0xb3, 0x67, 0x76,
+    0x82, 0x7d, 0x70, 0x79, 0x8a, 0x9c, 0x82, 0xa7, 0x82, 0xab, 0x58, 0x86,
+    0x5c, 0x70, 0x8c, 0x71, 0x61, 0xa6, 0x74, 0xa8, 0x65, 0x78, 0x72, 0x9d,
+    0x6c, 0x92, 0x70, 0x88, 0x88, 0x79, 0x96, 0x6f, 0x68, 0xa4, 0x7a, 0x7b,
+    0x96, 0xac, 0x6d, 0x76, 0x6a, 0xab, 0x82, 0x7d, 0x71, 0x8d, 0x6b, 0x81,
+    0x6c, 0x9d, 0x71, 0x59, 0x5c, 0x71, 0x77, 0x6d, 0x6a, 0x96, 0x76, 0x69,
+    0x80, 0x83, 0x88, 0x70, 0x97, 0xb4, 0x8a, 0x6c, 0xa5, 0x6e, 0x64, 0x75,
+    0x73, 0xa2, 0x7f, 0x97, 0x9e, 0x75, 0x8f, 0x86, 0x68, 0xbb, 0x6b, 0x86,
+    0x8d, 0x80, 0x8e, 0x58, 0x6d, 0xb2, 0x76, 0x99, 0x8f, 0x70, 0x6c, 0x86,
+    0x78, 0x9e, 0x91, 0x90, 0xa2, 0x7c, 0x8c, 0x81, 0x80, 0xb4, 0x77, 0x7a,
+    0x8c, 0x5f, 0x85, 0x56, 0x7a, 0x93, 0x6b, 0x5c, 0x74, 0x59, 0x7e, 0x86,
+    0x8c, 0xae, 0x76, 0x7d, 0x76, 0x7e, 0x81, 0x5f, 0x81, 0x8e, 0x7b, 0x90,
+    0xaa, 0x99, 0x79, 0x89, 0x93, 0xbc, 0x86, 0x91, 0xa2, 0x88, 0x79, 0x82,
+    0x80, 0xb6, 0x4a, 0x93, 0x7b, 0x89, 0x75, 0x8d, 0x7a, 0x8d, 0x66, 0x7c,
+    0x81, 0x9f, 0x6e, 0x86, 0x4d, 0x82, 0x66, 0x88, 0x73, 0x89, 0x7d, 0xac,
+    0x89, 0x9f, 0x58, 0x7f, 0x6b, 0x8c, 0x6a, 0x82, 0x59, 0xb8, 0x83, 0x67,
+    0x8b, 0x8a, 0x84, 0x7b, 0x7f, 0xb5, 0x44, 0x57, 0x5a, 0x73, 0x8b, 0x6d,
+    0x7c, 0x9e, 0x71, 0x72, 0x8d, 0x93, 0x80, 0x60, 0x7f, 0xc5, 0x69, 0x5c,
+    0x67, 0x92, 0x6c, 0x75, 0x66, 0x8f, 0x91, 0x5a, 0x6c, 0x70, 0x90, 0x84,
+    0x88, 0xab, 0x90, 0x66, 0x9c, 0x64, 0x6e, 0x68, 0x92, 0x9e, 0x89, 0x8d,
+    0x82, 0x97, 0x77, 0x75, 0x7f, 0xa7, 0x91, 0x75, 0x8c, 0x89, 0xa4, 0x6b,
+    0x98, 0x99, 0x80, 0x7d, 0x6b, 0x7f, 0x7d, 0x88, 0x79, 0xa1, 0x87, 0x90,
+    0x81, 0x8e, 0x94, 0x96, 0x7d, 0xa8, 0x86, 0x84, 0x86, 0x79, 0x97, 0x6e,
+    0xaa, 0x95, 0x8a, 0x9f, 0x8c, 0x72, 0x99, 0x77, 0x81, 0x94, 0x91, 0x9f,
+    0x6e, 0x67, 0x87, 0x70, 0x7d, 0xad, 0x58, 0x7f, 0x6d, 0x96, 0x8e, 0x82,
+    0x7d, 0xa6, 0x77, 0x99, 0x87, 0x95, 0x89, 0x7e, 0xa6, 0x9e, 0x86, 0xac,
+    0x78, 0x9f, 0x9b, 0x85, 0x76, 0x99, 0x6a, 0x92, 0x66, 0x7b, 0x9a, 0x99,
+    0x83, 0x8b, 0x57, 0x65, 0x75, 0x9f, 0xa6, 0x8a, 0x8d, 0x96, 0x6f, 0x80,
+    0x65, 0x8f, 0x80, 0x9f, 0x82, 0x85, 0x55, 0x75, 0x5c, 0x84, 0x91, 0x86,
+    0x76, 0x96, 0x5a, 0x6c, 0x62, 0x7b, 0x92, 0x88, 0x61, 0xca, 0x75, 0x66,
+    0x70, 0x70, 0x8e, 0x7a, 0x75, 0xb2, 0x66, 0x81, 0x5b, 0x79, 0x92, 0x97,
+    0x94, 0xaf, 0x72, 0x8a, 0x9b, 0x5f, 0x65, 0x96, 0x81, 0xb6, 0x8a, 0x6f,
+    0x94, 0x7a, 0x96, 0x92, 0x79, 0x94, 0x8e, 0x53, 0x9a, 0x73, 0x6a, 0x9d,
+    0xa1, 0xa3, 0xa4, 0x8f, 0x6b, 0xa4, 0x8b, 0x82, 0x96, 0xb1, 0x8c, 0x92,
+    0x7f, 0x91, 0x5f, 0x98, 0x8a, 0xa4, 0x7e, 0x80, 0x97, 0x86, 0x86, 0x86,
+    0x8f, 0xa6, 0x77, 0x9a, 0x82, 0x80, 0x6e, 0x73, 0x83, 0xaf, 0x87, 0x6d,
+    0x77, 0x9a, 0x83, 0x9f, 0x7c, 0xa4, 0x71, 0x6f, 0x7d, 0x75, 0x9d, 0x82,
+    0x83, 0xaf, 0x85, 0x80, 0x8d, 0x7f, 0xa4, 0xa2, 0x88, 0xba, 0x76, 0x76,
+    0x94, 0x6b, 0x76, 0x83, 0x77, 0x96, 0x78, 0x8c, 0xb0, 0x8e, 0x83, 0x87,
+    0xa0, 0xcc, 0x7f, 0xa4, 0x8c, 0x77, 0x84, 0x8c, 0x80, 0xa0, 0x57, 0x76,
+    0x76, 0x71, 0x86, 0x9c, 0x7f, 0x88, 0x57, 0x95, 0x4d, 0x8c, 0x7f, 0x80,
+    0x66, 0x9e, 0x42, 0x8d, 0x6a, 0x8e, 0x8c, 0x80, 0x89, 0x9d, 0x4f, 0x83,
+    0x54, 0x8a, 0x5e, 0x64, 0x70, 0x94, 0x78, 0x90, 0x7d, 0x78, 0x8d, 0x71,
+    0x56, 0x9a, 0x8c, 0x65, 0x8b, 0x62, 0x88, 0x9a, 0x6c, 0x8e, 0x7b, 0x78,
+    0x68, 0x86, 0x64, 0x6b, 0x67, 0xaa, 0x8c, 0x7b, 0x67, 0x75, 0x58, 0x7e,
+    0x6b, 0x97, 0x92, 0x87, 0x9c, 0x79, 0x71, 0x76, 0x7d, 0xbb, 0x89, 0x75,
+    0x83, 0x57, 0x74, 0x98, 0xa1, 0x8f, 0xb0, 0x89, 0x76, 0x88, 0x69, 0x9c,
+    0x74, 0xb0, 0x86, 0x9c, 0x79, 0x6f, 0x84, 0x70, 0x94, 0xa1, 0x6e, 0x7a,
+    0xa3, 0x88, 0xa0, 0x7a, 0x94, 0xa1, 0x82, 0x93, 0x99, 0x95, 0x7f, 0xab,
+    0x97, 0x9d, 0x6e, 0x68, 0x79, 0x73, 0x76, 0x83, 0x76, 0xbd, 0x87, 0x87,
+    0x86, 0x74, 0x8f, 0x6e, 0x65, 0xba, 0x6a, 0x78, 0x91, 0x62, 0x72, 0x67,
+    0x75, 0xbd, 0x8c, 0x5e, 0x85, 0x6d, 0x72, 0x85, 0x7d, 0x96, 0x8f, 0xb9,
+    0x9f, 0x97, 0xa2, 0x8a, 0xa1, 0xc1, 0x8d, 0xbc, 0x85, 0x78, 0x93, 0x97,
+    0x99, 0x9f, 0x3a, 0x98, 0x65, 0x8d, 0x6a, 0x6c, 0x92, 0x85, 0x49, 0x7e,
+    0x6a, 0xaa, 0x8a, 0x94, 0x6b, 0x93, 0x40, 0x8a, 0x8c, 0x9c, 0x6f, 0xad,
+    0x72, 0xb0, 0x58, 0x88, 0x60, 0x8c, 0x86, 0x84, 0x74, 0x96, 0x8f, 0x97,
+    0x5e, 0x6c, 0x79, 0x92, 0x51, 0xa8, 0x92, 0x58, 0x62, 0x6f, 0x6c, 0x76,
+    0x5f, 0x9e, 0x86, 0x71, 0x9c, 0x69, 0x7e, 0x80, 0x8a, 0x97, 0x6f, 0x79,
+    0x8b, 0x6f, 0x6c, 0x88, 0x73, 0x9c, 0x6d, 0x91, 0x77, 0x73, 0x7f, 0x97,
+    0x86, 0xa9, 0xac, 0x71, 0x82, 0x90, 0x83, 0x8a, 0x80, 0x9d, 0xa8, 0x85,
+    0x78, 0x7f, 0x94, 0x99, 0x8e, 0xa3, 0x89, 0x70, 0x87, 0x62, 0x82, 0x87,
+    0x8c, 0x98, 0x7a, 0x88, 0x72, 0x7e, 0x78, 0xa0, 0x78, 0x95, 0x97, 0x8f,
+    0x7b, 0x7c, 0x83, 0x94, 0x93, 0xa7, 0x77, 0x97, 0x90, 0x5e, 0x76, 0x7c,
+    0x68, 0xaa, 0x69, 0x67, 0x76, 0x84, 0x7e, 0x64, 0xa3, 0xbe, 0x7e, 0x8b,
+    0x82, 0x50, 0x8a, 0x82, 0x89, 0xc0, 0x79, 0x78, 0x68, 0x7c, 0x6b, 0x77,
+    0x82, 0x99, 0x7b, 0x83, 0x80, 0x90, 0x96, 0x96, 0x87, 0xb7, 0xa5, 0x94,
+    0x82, 0x99, 0x95, 0x91, 0x7e, 0xa2, 0x49, 0x95, 0x6d, 0x8e, 0xa9, 0x89,
+    0x8e, 0x8f, 0x3d, 0x95, 0x6a, 0x8c, 0x8b, 0x8c, 0x7e, 0x88, 0x63, 0x94,
+    0x69, 0x94, 0x88, 0x92, 0x79, 0xa7, 0x68, 0x60, 0x76, 0x85, 0xa1, 0x6f,
+    0x54, 0x96, 0x63, 0x7a, 0x5c, 0x73, 0x74, 0x6e, 0x53, 0x99, 0x69, 0x76,
+    0x69, 0x57, 0x6a, 0x82, 0x55, 0x93, 0x82, 0x80, 0x65, 0x7f, 0x7b, 0x76,
+    0x72, 0x87, 0x8d, 0x97, 0x98, 0x78, 0x7e, 0x6d, 0x7a, 0x95, 0x78, 0x70,
+    0x90, 0x83, 0x89, 0x80, 0x7f, 0x9d, 0x73, 0x73, 0x84, 0x77, 0x8e, 0x77,
+    0x8e, 0x75, 0x9e, 0xa5, 0x86, 0x68, 0x89, 0x7d, 0x8d, 0x99, 0x79, 0x8f,
+    0x8e, 0x87, 0x87, 0x97, 0x8c, 0x91, 0xa1, 0x96, 0x83, 0x73, 0x87, 0xa9,
+    0x8c, 0xa6, 0x85, 0x8c, 0x96, 0x7d, 0x7f, 0x8e, 0x7e, 0xb0, 0x85, 0x8f,
+    0x7f, 0x7d, 0x95, 0x7d, 0x9c, 0xb3, 0x71, 0x86, 0x81, 0x69, 0x7b, 0x69,
+    0x76, 0xb6, 0x5d, 0x67, 0x8a, 0x68, 0x9c, 0xa6, 0x70, 0xbf, 0x79, 0x60,
+    0x8b, 0x7f, 0x7a, 0x7b, 0x8b, 0xaf, 0x8c, 0xa1, 0x86, 0x92, 0x76, 0x8d,
+    0x89, 0xa2, 0xa8, 0xa3, 0xa0, 0xa2, 0x96, 0x9d, 0x7c, 0x92, 0x3f, 0x9b,
+    0x6d, 0x8a, 0x80, 0x81, 0xa0, 0x92, 0x50, 0x7c, 0x82, 0x99, 0x80, 0xa6,
+    0x8e, 0x8d, 0x4f, 0x8d, 0x65, 0x71, 0x77, 0x81, 0x51, 0xa6, 0x3f, 0x5c,
+    0x63, 0x6f, 0x61, 0x93, 0x5c, 0xaa, 0x77, 0x8f, 0x5d, 0x53, 0x79, 0x74,
+    0x6b, 0x94, 0x86, 0x81, 0x85, 0x48, 0x81, 0x80, 0x6b, 0x85, 0x6c, 0x91,
+    0x92, 0x6a, 0x74, 0x78, 0x72, 0x87, 0x6c, 0x82, 0x88, 0x7b, 0x93, 0x71,
+    0x91, 0x8d, 0x67, 0x83, 0x86, 0x5b, 0x86, 0x79, 0x81, 0x9f, 0x95, 0x8a,
+    0x70, 0x66, 0x9e, 0x6b, 0x72, 0x98, 0x97, 0x95, 0x72, 0x93, 0x84, 0x92,
+    0x8c, 0x96, 0xa2, 0x65, 0x80, 0x75, 0xa2, 0xa7, 0x7d, 0x97, 0x71, 0x8f,
+    0x69, 0x65, 0x8f, 0xae, 0x9c, 0x97, 0x5d, 0xb3, 0x98, 0x83, 0x98, 0xa0,
+    0x5f, 0x7e, 0x7a, 0x7a, 0x87, 0x7c, 0x92, 0xa0, 0x81, 0xa6, 0x71, 0x8e,
+    0x88, 0x52, 0xa3, 0x88, 0x6a, 0x9d, 0x84, 0x82, 0x7c, 0x78, 0x9f, 0x92,
+    0x66, 0xa4, 0x53, 0x6a, 0x7e, 0x84, 0x60, 0x84, 0x92, 0xb0, 0x93, 0x9d,
+    0xa0, 0x5f, 0x95, 0x8c, 0x77, 0xa1, 0x8c, 0x90, 0xa0, 0x9c, 0x9a, 0x95,
+    0x85, 0xa1, 0x22, 0x8f, 0x57, 0x80, 0x96, 0x7d, 0x92, 0x8b, 0x41, 0xa6,
+    0x61, 0xa2, 0x6f, 0x80, 0x5d, 0x91, 0x66, 0xab, 0x6d, 0x7e, 0x88, 0x93,
+    0x5c, 0xa5, 0x75, 0x6e, 0x6c, 0x86, 0x69, 0x73, 0x4e, 0x8e, 0x77, 0x6b,
+    0x6c, 0x60, 0x67, 0x91, 0x75, 0x91, 0x6c, 0x7c, 0x53, 0x6e, 0x75, 0x8e,
+    0x79, 0x8c, 0x8b, 0x74, 0x6b, 0x57, 0x71, 0xa1, 0x7f, 0x83, 0x6c, 0x6b,
+    0x93, 0x99, 0x7a, 0x78, 0x71, 0x8c, 0x78, 0x88, 0x9f, 0x85, 0x77, 0x7b,
+    0x86, 0x85, 0xa1, 0x61, 0x78, 0x65, 0x61, 0x75, 0x82, 0x7d, 0xa9, 0xa2,
+    0x84, 0x82, 0x94, 0x95, 0x90, 0x9f, 0x83, 0x97, 0x76, 0x95, 0x8a, 0x83,
+    0x9b, 0x87, 0x8b, 0x7a, 0x6c, 0x6e, 0x75, 0x95, 0x85, 0x95, 0x84, 0x9e,
+    0x96, 0x74, 0x7d, 0xa5, 0x85, 0x8e, 0x7e, 0x73, 0x85, 0x8d, 0x87, 0x80,
+    0x8a, 0x96, 0x65, 0x87, 0x7c, 0x73, 0x80, 0x96, 0x73, 0x8d, 0x5e, 0x79,
+    0x7e, 0x8d, 0x79, 0x85, 0x63, 0xa0, 0x62, 0x89, 0x9d, 0x8c, 0x74, 0x7b,
+    0x9c, 0xa5, 0x71, 0x8c, 0x83, 0x91, 0x8e, 0x8d, 0x89, 0x8b, 0x8b, 0xa4,
+    0x78, 0x88, 0x9e, 0x85, 0x8b, 0x94, 0x38, 0x84, 0x7b, 0x86, 0x7d, 0xa2,
+    0x73, 0x8f, 0x47, 0x7b, 0x69, 0xb4, 0x85, 0x71, 0x61, 0x9d, 0x59, 0x95,
+    0x74, 0x93, 0x6a, 0x88, 0x62, 0xa2, 0x56, 0x93, 0x8d, 0x68, 0x7e, 0x80,
+    0x6b, 0xb7, 0x63, 0x90, 0x5d, 0x54, 0x6c, 0x90, 0x5a, 0x8e, 0x7e, 0x7d,
+    0x82, 0x73, 0x7f, 0x89, 0x94, 0x8e, 0x7a, 0x70, 0x6c, 0x79, 0x88, 0x88,
+    0x9b, 0x8b, 0x70, 0x81, 0x83, 0x83, 0x8b, 0x86, 0x64, 0x93, 0x82, 0x66,
+    0x66, 0x79, 0x74, 0x91, 0x92, 0x94, 0x7c, 0x87, 0x72, 0x79, 0x8d, 0xaa,
+    0xa2, 0x9e, 0xaf, 0x95, 0xb1, 0x8a, 0x95, 0x8b, 0x94, 0x7e, 0x79, 0x8e,
+    0x99, 0x98, 0x97, 0x9e, 0x94, 0x87, 0x74, 0x72, 0x63, 0x92, 0x92, 0x95,
+    0xb0, 0x94, 0x86, 0x91, 0x77, 0x8f, 0x91, 0x7e, 0x83, 0x88, 0x90, 0xa5,
+    0x79, 0x70, 0x85, 0x8f, 0x67, 0x90, 0x98, 0x8d, 0x8a, 0x5d, 0x8c, 0x9c,
+    0x94, 0x91, 0x80, 0x95, 0x6e, 0x95, 0x73, 0x8d, 0x63, 0x8e, 0x53, 0x8a,
+    0x77, 0x88, 0x8f, 0x6f, 0x87, 0x9e, 0x8b, 0xb7, 0x99, 0xb2, 0x85, 0x82,
+    0xa1, 0x89, 0x9b, 0xa7, 0x80, 0x81, 0xa0, 0x8e, 0x84, 0xa9, 0x27, 0x73,
+    0x5e, 0x85, 0x5f, 0x92, 0x8c, 0xa2, 0x34, 0x8e, 0x6e, 0xb2, 0x7b, 0x8c,
+    0x69, 0x93, 0x47, 0x9e, 0x58, 0x7e, 0x94, 0x86, 0x47, 0xa3, 0x53, 0x6b,
+    0x6e, 0x6a, 0x7f, 0x73, 0x5b, 0x8c, 0x7a, 0x99, 0x6c, 0x5d, 0x82, 0x82,
+    0x62, 0x8a, 0x7a, 0x8e, 0x88, 0x62, 0xa0, 0x8e, 0x5c, 0x9a, 0x72, 0x79,
+    0x66, 0x6b, 0x75, 0x78, 0x82, 0x8a, 0x59, 0x91, 0x93, 0x68, 0x78, 0xb4,
+    0x86, 0x7e, 0x8c, 0x6e, 0x88, 0x7f, 0x96, 0x8e, 0x6e, 0x8b, 0x8c, 0x73,
+    0xab, 0x79, 0x88, 0xa6, 0x86, 0x81, 0x9a, 0x80, 0x9a, 0x9e, 0x8b, 0x6d,
+    0x9a, 0x70, 0x8e, 0x8a, 0x84, 0x7a, 0xaf, 0xb8, 0x9e, 0x90, 0x89, 0xb3,
+    0x9b, 0x85, 0x94, 0xb6, 0x87, 0x8c, 0x6e, 0xa3, 0xac, 0x9e, 0x8c, 0x7c,
+    0x81, 0x83, 0x70, 0x8d, 0x7c, 0x81, 0x77, 0x82, 0x69, 0x8e, 0x5e, 0x80,
+    0x8a, 0x8e, 0x7c, 0x8a, 0x89, 0x90, 0x58, 0x59, 0x85, 0x88, 0x7a, 0x86,
+    0x73, 0x9c, 0x4a, 0x81, 0x8d, 0x89, 0x91, 0x95, 0x72, 0x83, 0x9d, 0x99,
+    0x8d, 0x6b, 0x95, 0x7e, 0x70, 0x94, 0x8c, 0x9f, 0x8a, 0x8f, 0xa7, 0x84,
+    0x87, 0xb6, 0x42, 0x81, 0x63, 0x8a, 0x79, 0x77, 0x74, 0x90, 0x23, 0x85,
+    0x74, 0x8f, 0x87, 0x80, 0x50, 0xa1, 0x4d, 0x9b, 0x55, 0x82, 0x74, 0x8e,
+    0x4a, 0xa7, 0x52, 0x4d, 0x77, 0x67, 0x77, 0x9e, 0x62, 0xa5, 0x7d, 0x96,
+    0x6f, 0x45, 0x80, 0x8c, 0x6c, 0x92, 0x99, 0x6f, 0x5d, 0x56, 0x93, 0xac,
+    0x94, 0x9c, 0x95, 0x92, 0x6e, 0x71, 0x87, 0x8c, 0x7b, 0xa9, 0x7f, 0x7a,
+    0x69, 0x6b, 0x7d, 0x90, 0x6f, 0x81, 0x9f, 0x80, 0x83, 0x67, 0x78, 0x85,
+    0x85, 0x91, 0x8a, 0x80, 0xaa, 0x86, 0x8c, 0x88, 0x8c, 0x8f, 0x9b, 0x85,
+    0x8b, 0x7e, 0x83, 0x82, 0x95, 0x75, 0x6b, 0x8f, 0x85, 0x8b, 0xb0, 0x9f,
+    0xa7, 0x8e, 0x61, 0x9d, 0x72, 0xac, 0x92, 0x87, 0x94, 0x96, 0x68, 0x8f,
+    0x63, 0x85, 0x9c, 0xa8, 0x82, 0x9b, 0x85, 0x9b, 0x6b, 0x72, 0x83, 0x85,
+    0x90, 0x87, 0x74, 0xa4, 0x88, 0x57, 0x63, 0x90, 0x8e, 0x7b, 0x80, 0x81,
+    0x94, 0x74, 0x68, 0x8a, 0x7f, 0x86, 0x78, 0x72, 0x75, 0x67, 0x7a, 0x8a,
+    0x7a, 0x74, 0x8c, 0xad, 0x75, 0xa2, 0x7d, 0x9a, 0x9e, 0x83, 0x92, 0xa2,
+    0xa3, 0x98, 0xa5, 0x91, 0x84, 0xb0, 0x21, 0x9a, 0x5f, 0x8c, 0x7e, 0x86,
+    0x80, 0xa0, 0x16, 0x9b, 0x5b, 0x9c, 0x76, 0x8d, 0x77, 0x9f, 0x62, 0x86,
+    0x6a, 0x6c, 0x6e, 0x8f, 0x4e, 0xc1, 0x61, 0x6f, 0x74, 0x79, 0x80, 0x5f,
+    0x59, 0x9e, 0x7c, 0x87, 0x7f, 0x4b, 0x6c, 0x8b, 0x5a, 0x8f, 0x65, 0x8a,
+    0x62, 0x58, 0x66, 0x8d, 0x83, 0x97, 0x8a, 0x7a, 0x77, 0x79, 0x6c, 0x83,
+    0x8c, 0x93, 0x82, 0x5e, 0x61, 0x8c, 0x82, 0x80, 0x88, 0x88, 0x85, 0x87,
+    0x77, 0x70, 0x8d, 0x7f, 0x7a, 0x89, 0x72, 0x7e, 0xa3, 0x99, 0x6b, 0xaa,
+    0x81, 0x87, 0x90, 0x6f, 0x7f, 0x77, 0x96, 0x83, 0x89, 0x89, 0x6a, 0x77,
+    0xa4, 0x6c, 0x97, 0x7e, 0x95, 0xa4, 0x63, 0x8d, 0x71, 0x96, 0x8a, 0xa4,
+    0x9f, 0x7c, 0x54, 0x94, 0x7a, 0x89, 0x8a, 0x90, 0x7e, 0x9d, 0x53, 0x7c,
+    0x9d, 0x83, 0x90, 0x84, 0xa1, 0x8e, 0x80, 0x74, 0x69, 0x7a, 0x69, 0x93,
+    0x8a, 0x90, 0x83, 0x76, 0x8b, 0x6f, 0x8e, 0x93, 0x82, 0x84, 0x7d, 0x94,
+    0xa1, 0x78, 0x7d, 0x68, 0x79, 0x83, 0x85, 0x9d, 0x89, 0xa0, 0x8a, 0x93,
+    0x90, 0x8c, 0x82, 0x86, 0x80, 0x71, 0xb3, 0xa1, 0x90, 0xb2, 0x27, 0xa3,
+    0x5e, 0xa3, 0xa6, 0x64, 0x75, 0xa0, 0x23, 0x8c, 0x7c, 0xc4, 0x7a, 0x8c,
+    0x4d, 0xa3, 0x4c, 0x93, 0x71, 0x7b, 0x71, 0x8b, 0x34, 0xa5, 0x47, 0x7f,
+    0x4e, 0x73, 0x51, 0x8a, 0x67, 0xa0, 0x9d, 0x7f, 0x65, 0x38, 0x61, 0x70,
+    0x71, 0x8d, 0x6a, 0x7e, 0x7e, 0x4c, 0x7d, 0x8d, 0x81, 0x80, 0xa5, 0x84,
+    0x6f, 0x57, 0x70, 0x91, 0x8b, 0x99, 0x9d, 0x84, 0x77, 0x7f, 0x6b, 0x7f,
+    0x76, 0x8f, 0x90, 0x72, 0x6c, 0x58, 0x6b, 0x85, 0xa6, 0x8a, 0xa2, 0x6d,
+    0x8a, 0x71, 0x71, 0x95, 0x92, 0x7c, 0x88, 0x67, 0x86, 0x6d, 0x8d, 0x95,
+    0x79, 0x8e, 0x65, 0x71, 0x71, 0x91, 0x85, 0x99, 0xa9, 0x87, 0x80, 0x88,
+    0x74, 0x86, 0x75, 0x83, 0x8b, 0x7f, 0x78, 0xb1, 0x90, 0xa8, 0x7b, 0x98,
+    0x8a, 0x7b, 0x5b, 0x99, 0x6f, 0x7f, 0xa0, 0x79, 0xa5, 0x93, 0x8b, 0x7b,
+    0x7e, 0x7a, 0x61, 0x9d, 0x98, 0x8b, 0x82, 0x7c, 0x76, 0x73, 0x81, 0x8a,
+    0x7e, 0x8d, 0x6e, 0x71, 0xa0, 0x65, 0x80, 0x62, 0x7d, 0x8d, 0x5e, 0x9b,
+    0x8f, 0x85, 0x89, 0xad, 0x71, 0x73, 0x7f, 0x89, 0x8d, 0x89, 0xb3, 0xa1,
+    0x7c, 0xaf, 0x43, 0x82, 0x49, 0x92, 0x62, 0x7f, 0x79, 0xa6, 0x23, 0x99,
+    0x6c, 0x9a, 0x8a, 0x90, 0x6c, 0xb9, 0x6f, 0x8a, 0x61, 0x7f, 0x8f, 0x8a,
+    0x57, 0xb9, 0x55, 0x65, 0x4b, 0x51, 0x66, 0x6e, 0x4a, 0xa1, 0x83, 0x8a,
+    0x73, 0x23, 0x8a, 0x6d, 0x46, 0xa7, 0x87, 0x64, 0x84, 0x5f, 0x6f, 0x6f,
+    0x9b, 0x9d, 0x76, 0x83, 0x60, 0x6e, 0x76, 0x8a, 0x9a, 0xa6, 0x75, 0x73,
+    0x86, 0x5b, 0x97, 0x88, 0x7b, 0x8e, 0x82, 0x5c, 0x97, 0x71, 0x74, 0x85,
+    0x83, 0x91, 0x89, 0x6f, 0x93, 0x94, 0x8b, 0xa9, 0x7d, 0x84, 0x80, 0x89,
+    0x97, 0x80, 0x65, 0x92, 0x9a, 0x85, 0x5a, 0x6a, 0x6b, 0x58, 0x6f, 0x8c,
+    0x9a, 0x8b, 0x6e, 0x81, 0x9d, 0xae, 0x8c, 0x86, 0x8d, 0x90, 0x6c, 0xb8,
+    0x91, 0x89, 0x98, 0xbd, 0x8b, 0x78, 0x7d, 0x87, 0x9c, 0x72, 0x73, 0x80,
+    0x9e, 0x92, 0x5d, 0x77, 0x78, 0x4f, 0x87, 0x7b, 0x7a, 0x9e, 0x74, 0x67,
+    0x6a, 0x58, 0x95, 0x80, 0x75, 0x97, 0x81, 0x75, 0x94, 0x75, 0x73, 0x92,
+    0x83, 0x7b, 0x6b, 0x8e, 0x82, 0x6e, 0x7d, 0x9b, 0x91, 0x7f, 0x9e, 0xaa,
+    0x8c, 0xa3, 0xa8, 0x8c, 0x9a, 0xc1, 0x28, 0xac, 0x49, 0x9b, 0x59, 0x8a,
+    0x60, 0xa7, 0x39, 0xa7, 0x75, 0x9b, 0x95, 0x94, 0x76, 0xb3, 0x4a, 0x6b,
+    0x60, 0x6c, 0xa5, 0x71, 0x40, 0xc4, 0x4c, 0x7c, 0x76, 0x7b, 0x67, 0x76,
+    0x76, 0xa4, 0x7b, 0x83, 0x67, 0x4d, 0x87, 0x87, 0x6e, 0x93, 0x84, 0x70,
+    0x78, 0x41, 0x87, 0x9f, 0x7a, 0x8c, 0x87, 0x69, 0x73, 0x6c, 0x93, 0x73,
+    0x77, 0xa2, 0x52, 0x72, 0x5c, 0x75, 0x6c, 0x8f, 0x65, 0x92, 0x87, 0x52,
+    0x67, 0x54, 0x54, 0x75, 0x90, 0x9c, 0x91, 0x6f, 0xa3, 0x86, 0x87, 0x9c,
+    0x99, 0x86, 0x9f, 0x71, 0x8a, 0x7a, 0x7a, 0x97, 0x7a, 0x86, 0x6c, 0x99,
+    0x89, 0x7e, 0x9c, 0x83, 0x98, 0x78, 0x73, 0x7f, 0x91, 0x96, 0x9a, 0x8d,
+    0xb0, 0x9e, 0x6a, 0x80, 0x92, 0x86, 0x95, 0x83, 0x94, 0x92, 0x6f, 0x86,
+    0x8a, 0x52, 0x6e, 0x82, 0x84, 0x8b, 0x77, 0x88, 0x70, 0x54, 0x8f, 0x7f,
+    0x7d, 0x7e, 0x57, 0x89, 0x6d, 0x6f, 0x9c, 0x93, 0x90, 0x93, 0x52, 0x70,
+    0x75, 0x92, 0x73, 0x88, 0x93, 0x77, 0x77, 0x91, 0x89, 0xa2, 0x9d, 0xa6,
+    0xae, 0x84, 0x7d, 0xab, 0x92, 0x7e, 0x9c, 0x98, 0x7b, 0xc3, 0x38, 0x98,
+    0x4f, 0x97, 0x8f, 0x93, 0x62, 0xb8, 0x23, 0xa4, 0x6d, 0x9c, 0x81, 0x8e,
+    0x6f, 0x9d, 0x56, 0x89, 0x50, 0x94, 0x70, 0x77, 0x5d, 0xb7, 0x60, 0x5b,
+    0x72, 0x45, 0x81, 0x8c, 0x66, 0xbc, 0x8f, 0x7f, 0x57, 0x43, 0x85, 0x96,
+    0x5a, 0xb2, 0x91, 0x7d, 0x6c, 0x3a, 0x73, 0x92, 0x63, 0x93, 0x89, 0x90,
+    0x7f, 0x52, 0x7f, 0x7b, 0xa1, 0xa6, 0x8f, 0x60, 0x78, 0x51, 0x5f, 0xac,
+    0x7b, 0x89, 0x88, 0x97, 0x7e, 0x64, 0x57, 0x72, 0x6c, 0x96, 0x74, 0x78,
+    0xab, 0x66, 0x62, 0x8d, 0x6f, 0x86, 0x91, 0x93, 0x7d, 0x74, 0x82, 0x80,
+    0x73, 0x84, 0x9c, 0x8e, 0x68, 0x69, 0x9e, 0xa1, 0x8a, 0x83, 0x7a, 0x87,
+    0x94, 0x8c, 0x83, 0x7e, 0x91, 0x92, 0x82, 0x7b, 0xa0, 0x8e, 0x73, 0x86,
+    0xa9, 0x95, 0x7c, 0xa5, 0x6c, 0x6f, 0x8c, 0x87, 0xa6, 0x8a, 0x77, 0x86,
+    0x7d, 0x79, 0x89, 0x75, 0x8f, 0x82, 0x54, 0x61, 0x82, 0x8e, 0x80, 0x84,
+    0x7b, 0x8e, 0x61, 0x82, 0x86, 0x77, 0x7d, 0x7c, 0x7e, 0x6c, 0x7b, 0xad,
+    0x7b, 0x90, 0x88, 0x80, 0x64, 0x83, 0x7e, 0xa7, 0x83, 0x7e, 0xb5, 0xbb,
+    0x88, 0xd9, 0x21, 0x9a, 0x4d, 0x9f, 0x91, 0x97, 0x64, 0xb5, 0x1c, 0x8a,
+    0x5f, 0xaf, 0x7e, 0x7b, 0x67, 0xad, 0x48, 0x7f, 0x4e, 0x87, 0x8f, 0x7c,
+    0x46, 0xab, 0x70, 0x7f, 0x4b, 0x4e, 0x48, 0x8c, 0x63, 0xc5, 0xa2, 0x7f,
+    0x68, 0x3b, 0x59, 0x7f, 0x53, 0xa1, 0x8e, 0x6e, 0x7a, 0x4a, 0x5f, 0x62,
+    0x5b, 0xa1, 0x62, 0x78, 0x74, 0x57, 0x78, 0x91, 0x7b, 0x9b, 0x75, 0x73,
+    0x73, 0x72, 0x94, 0x92, 0x79, 0xaa, 0x94, 0x75, 0x86, 0x58, 0x8c, 0x71,
+    0x77, 0x91, 0xa5, 0x74, 0x8f, 0x73, 0x89, 0x77, 0x68, 0x8e, 0x90, 0x96,
+    0x9f, 0x79, 0x77, 0x7d, 0x89, 0x9b, 0x8c, 0x94, 0x81, 0x88, 0x91, 0x8f,
+    0x9b, 0x91, 0x78, 0x87, 0x82, 0x72, 0xa7, 0xa2, 0x85, 0x98, 0xa3, 0x91,
+    0x83, 0x75, 0x72, 0x93, 0x80, 0x8f, 0x85, 0x70, 0x97, 0x58, 0x9f, 0x72,
+    0x91, 0x8e, 0x93, 0x74, 0x97, 0x73, 0x74, 0x91, 0x80, 0x84, 0x96, 0x94,
+    0x76, 0x69, 0x66, 0x9e, 0x81, 0x8a, 0x8b, 0x63, 0x65, 0x7c, 0xa1, 0x9a,
+    0x72, 0x84, 0x9e, 0x89, 0x9a, 0x86, 0x98, 0x7f, 0x77, 0x85, 0x82, 0xaa,
+    0xa3, 0x88, 0xac, 0x9e, 0x76, 0xca, 0x2b, 0xa0, 0x40, 0xad, 0x6f, 0x6c,
+    0x66, 0xc8, 0x07, 0x9e, 0x3e, 0x9f, 0x85, 0x9f, 0x5e, 0xb7, 0x53, 0x91,
+    0x56, 0x6d, 0x62, 0x95, 0x4c, 0xc7, 0x46, 0x56, 0x4b, 0x5d, 0x6f, 0x52,
+    0x4d, 0xa3, 0x8c, 0x90, 0x78, 0x4d, 0x58, 0x8d, 0x53, 0x93, 0x8e, 0x68,
+    0x6f, 0x3b, 0x49, 0x86, 0x6e, 0x9d, 0x76, 0x74, 0x5b, 0x44, 0x7b, 0x8c,
+    0x89, 0xb0, 0x64, 0x62, 0x6a, 0x6d, 0x7a, 0xae, 0x84, 0x95, 0x8c, 0x71,
+    0x8b, 0x60, 0x82, 0x9e, 0x8c, 0xa8, 0x90, 0x66, 0xa1, 0x7b, 0x65, 0x82,
+    0x8f, 0x7d, 0x8d, 0x78, 0x8e, 0x5f, 0x75, 0x88, 0x5d, 0x93, 0xa1, 0x93,
+    0x6b, 0x67, 0x7a, 0xa7, 0x92, 0x8c, 0x65, 0x88, 0x95, 0x93, 0x87, 0x81,
+    0x9c, 0x97, 0x62, 0x9d, 0x90, 0x62, 0xa1, 0x9f, 0x87, 0x94, 0x94, 0x99,
+    0x92, 0x8f, 0x71, 0x80, 0x77, 0x82, 0x92, 0x78, 0x67, 0x69, 0x7e, 0x81,
+    0x93, 0x89, 0x80, 0x9b, 0x71, 0x57, 0x63, 0x83, 0x7b, 0x9f, 0x5d, 0x92,
+    0x85, 0x96, 0x7e, 0x92, 0x84, 0x7f, 0x81, 0xa3, 0xa8, 0x96, 0x91, 0x8e,
+    0x8c, 0x8e, 0x7d, 0xb0, 0x86, 0x72, 0x9d, 0x8e, 0x8e, 0xd0, 0x05, 0x77,
+    0x45, 0xad, 0x91, 0x95, 0x71, 0xb8, 0x01, 0x9a, 0x41, 0xb8, 0x94, 0x6e,
+    0x63, 0xd3, 0x58, 0x8c, 0x5a, 0x89, 0x85, 0x83, 0x52, 0xc1, 0x7b, 0x6a,
+    0x65, 0x6e, 0x73, 0x63, 0x68, 0xba, 0x67, 0x78, 0x79, 0x4a, 0x73, 0x8f,
+    0x51, 0xc9, 0x85, 0x8a, 0x6b, 0x45, 0x6a, 0x8f, 0x6c, 0xad, 0x8a, 0x8d,
+    0x6a, 0x6e, 0x6b, 0x7f, 0x86, 0xb4, 0x88, 0x7d, 0xaa, 0x71, 0x5c, 0x69,
+    0x5d, 0xa8, 0x62, 0x7d, 0x6c, 0x6e, 0x6f, 0x6a, 0x7c, 0x9d, 0x7a, 0x83,
+    0x7d, 0x79, 0x7b, 0x9c, 0x73, 0x93, 0x7f, 0x9d, 0x8c, 0x75, 0x78, 0x83,
+    0x85, 0x88, 0x81, 0x81, 0x98, 0x79, 0xa3, 0xae, 0x5b, 0x90, 0x89, 0x9d,
+    0x6d, 0x90, 0xa3, 0x8e, 0x87, 0x96, 0x60, 0xa7, 0x76, 0x82, 0x81, 0x84,
+    0x84, 0x9c, 0x73, 0x8a, 0x6c, 0x58, 0x64, 0x96, 0x89, 0x8b, 0x76, 0x60,
+    0x91, 0x72, 0x7f, 0x86, 0x9a, 0x89, 0x67, 0x7d, 0x77, 0x84, 0x73, 0x5c,
+    0x67, 0x8a, 0x82, 0x8c, 0x8c, 0x94, 0x8a, 0xa2, 0xaa, 0x7e, 0x5f, 0x7f,
+    0x86, 0x90, 0x96, 0xab, 0x8d, 0x91, 0x7c, 0xb6, 0x82, 0x8d, 0xb8, 0xa9,
+    0x92, 0xea, 0x1b, 0x74, 0x25, 0xab, 0x8d, 0x61, 0x81, 0xd8, 0x2c, 0x86,
+    0x2f, 0xcf, 0xa2, 0x84, 0x7f, 0xa4, 0x36, 0x86, 0x47, 0x8d, 0x60, 0x8a,
+    0x62, 0xb1, 0x4a, 0x54, 0x48, 0x73, 0x64, 0x9d, 0x72, 0xb2, 0x76, 0x4c,
+    0x8e, 0x4e, 0x76, 0x94, 0x7c, 0xad, 0x74, 0x6c, 0x6c, 0x54, 0x7f, 0x63,
+    0x97, 0xb3, 0x74, 0x6c, 0x99, 0x5f, 0x86, 0x6a, 0xa3, 0x94, 0x7c, 0x83,
+    0x8d, 0x81, 0x79, 0xac, 0x61, 0x9b, 0x65, 0x7b, 0x66, 0x89, 0x60, 0x76,
+    0x8d, 0x93, 0x8d, 0x84, 0x71, 0x65, 0x82, 0x8c, 0x94, 0xa7, 0x59, 0xa1,
+    0x8b, 0x72, 0x84, 0x65, 0x75, 0x95, 0x62, 0x71, 0x71, 0x7e, 0x7b, 0x97,
+    0x9b, 0x9a, 0x80, 0xb1, 0x77, 0x7a, 0x73, 0x8e, 0x9c, 0x8c, 0x7d, 0x96,
+    0x89, 0x7d, 0x7e, 0x80, 0x8e, 0x93, 0x63, 0x72, 0x6b, 0x57, 0x78, 0x8f,
+    0x90, 0x86, 0x62, 0x75, 0x7e, 0x54, 0x7d, 0x95, 0x85, 0x84, 0x73, 0x7b,
+    0x8f, 0x9e, 0x72, 0x8c, 0x90, 0x96, 0x8e, 0x6c, 0x80, 0x8b, 0x9e, 0x8c,
+    0x87, 0x8e, 0x9b, 0x97, 0x8f, 0x94, 0xa3, 0x6b, 0xad, 0x93, 0x8a, 0x96,
+    0x8d, 0x91, 0xa6, 0x8a, 0x9e, 0xce, 0x6b, 0x98, 0x6d, 0xa9, 0x92, 0x92,
+    0x7c, 0xe2, 0x63, 0x97, 0x42, 0xc8, 0xa3, 0xa0, 0x88, 0xdc, 0x75, 0x9b,
+    0x51, 0x7d, 0x5c, 0x80, 0x89, 0xc0, 0x83, 0x5e, 0x5e, 0xa4, 0x3e, 0x74,
+    0x9b, 0xb6, 0x7f, 0x63, 0x78, 0x7d, 0x74, 0x57, 0x93, 0xa2, 0x83, 0x70,
+    0x5e, 0x7d, 0x60, 0x69, 0x93, 0x9e, 0x79, 0x86, 0x91, 0x67, 0x86, 0x95,
+    0xa2, 0xad, 0x62, 0x74, 0x68, 0x7e, 0x7e, 0x82, 0x8c, 0xb0, 0xa0, 0x63,
+    0x8b, 0x82, 0x8f, 0x8c, 0xa4, 0xa3, 0x76, 0x6c, 0x8e, 0x87, 0x72, 0x85,
+    0xaa, 0xa4, 0x7f, 0x7b, 0x8e, 0x9a, 0x69, 0x91, 0x9d, 0xa0, 0x81, 0x92,
+    0x90, 0x85, 0x66, 0x82, 0xa3, 0xa9, 0x7f, 0x8f, 0x83, 0x9d, 0x8b, 0x8d,
+    0x96, 0xa3, 0x8f, 0x7a, 0x6d, 0x89, 0x74, 0x8a, 0xa9, 0xa9, 0x7b, 0x77,
+    0x93, 0x8b, 0x63, 0x92, 0x99, 0x8b, 0x88, 0x4f, 0x87, 0x7c, 0x67, 0x78,
+    0x83, 0xa5, 0xa5, 0x58, 0x8d, 0x70, 0x86, 0x82, 0x9e, 0xa7, 0xa5, 0x96,
+    0x8d, 0x7b, 0x96, 0x8c, 0x95, 0xa3, 0x8d, 0x9c, 0x92, 0x95, 0x98, 0x94,
+    0x87, 0x90, 0x92, 0x92, 0x95, 0x96, 0xad, 0x6e, 0x97, 0x8c, 0x92, 0x7f,
+    0x95, 0x8b, 0x8a, 0x90, 0x9b, 0x87, 0x9e, 0x86, 0x91, 0xa0, 0x68, 0x82,
+    0x85, 0x8e, 0x82, 0xa8, 0x9f, 0x68, 0x87, 0x75, 0x9b, 0x70, 0x95, 0x91,
+    0x6c, 0x77, 0x8b, 0x7b, 0x95, 0x80, 0x99, 0x65, 0x95, 0x82, 0x92, 0x9a,
+    0x8a, 0x65, 0x70, 0x8c, 0x98, 0x9e, 0x80, 0x7b, 0xa5, 0x9b, 0x93, 0x94,
+    0x84, 0x6a, 0x69, 0x82, 0x80, 0x7a, 0x75, 0x72, 0x94, 0x79, 0xad, 0xb2,
+    0x81, 0x8b, 0x85, 0x6c, 0x86, 0x88, 0x9e, 0x79, 0x86, 0x9e, 0x7e, 0x91,
+    0x7b, 0x6d, 0x93, 0x91, 0x82, 0x97, 0x6b, 0xa6, 0xaa, 0x9f, 0xa8, 0x74,
+    0x94, 0x7f, 0x63, 0x98, 0x90, 0xa1, 0x8c, 0x7f, 0x71, 0x86, 0x89, 0x95,
+    0x88, 0x80, 0x77, 0x67, 0x85, 0x7d, 0x89, 0x6d, 0x9c, 0x76, 0x72, 0x8d,
+    0x96, 0x94, 0x88, 0x98, 0x9f, 0x94, 0x8e, 0x84, 0x7a, 0x88, 0x79, 0x9f,
+    0x81, 0xa1, 0x7c, 0x8b, 0x71, 0x79, 0x7d, 0x9d, 0x7b, 0x6a, 0x8c, 0x66,
+    0x9e, 0x7b, 0x77, 0x7a, 0xb0, 0x74, 0x7f, 0x8d, 0x8d, 0x71, 0x72, 0x84,
+    0x90, 0x98, 0x7b, 0x89, 0x9b, 0x8e, 0x85, 0x7a, 0x67, 0x8a, 0x72, 0x84,
+    0x82, 0x91, 0x91, 0x7a, 0x85, 0x8a, 0xae, 0x8a, 0x9a, 0x9a, 0x7f, 0x85,
+    0x8a, 0x90, 0x69, 0x7b, 0x76, 0x78, 0x98, 0x54, 0x94, 0x7e, 0x6c, 0x72,
+    0x89, 0x88, 0x82, 0x96, 0x59, 0x95, 0x76, 0x91, 0x94, 0x96, 0x83, 0x84,
+    0x72, 0x8d, 0x97, 0x71, 0x68, 0x8e, 0x88, 0x8b, 0x7c, 0xa9, 0x73, 0x8a,
+    0x95, 0x86, 0x87, 0x96, 0x91, 0x77, 0xb1, 0x88, 0x6e, 0x7d, 0x7c, 0x9f,
+    0x8f, 0x82, 0x79, 0x83, 0xa6, 0x81, 0x89, 0x83, 0x85, 0x9b, 0x7c, 0x68,
+    0x6f, 0x84, 0x7c, 0xa1, 0x8e, 0x80, 0x78, 0x8f, 0x96, 0x77, 0x7e, 0x7b,
+    0x8f, 0x81, 0xa5, 0x84, 0x86, 0x91, 0x7b, 0x73, 0x92, 0x85, 0xa3, 0x7e,
+    0x80, 0x95, 0x7d, 0x5f, 0x8c, 0x94, 0x95, 0x73, 0x95, 0x78, 0x87, 0xa1,
+    0x94, 0x6c, 0xac, 0x6c, 0x77, 0x89, 0x86, 0x9c, 0x82, 0x76, 0x99, 0x93,
+    0x92, 0x88, 0x80, 0x80, 0x85, 0x8a, 0xa8, 0x8f, 0x7a, 0x89, 0x9a, 0x7a,
+    0x8f, 0x91, 0x86, 0x82, 0x7f, 0x82, 0x91, 0x95, 0x85, 0x71, 0x7d, 0x8f,
+    0x83, 0x8c, 0x79, 0x97, 0x7a, 0x9b, 0x91, 0x88, 0xa2, 0x86, 0x8a, 0x80,
+    0xa0, 0x96, 0x8b, 0x7d, 0x76, 0x96, 0x9f, 0x8d, 0x95, 0x8a, 0x94, 0xa0,
+    0x80, 0x95, 0x9b, 0x96, 0x81, 0xa8, 0x59, 0x89, 0x92, 0xb2, 0x83, 0x89,
+    0x85, 0x81, 0x7e, 0x64, 0x77, 0x82, 0x90, 0x96, 0x7e, 0x9f, 0xab, 0x8a,
+    0x6e, 0x9b, 0x90, 0x89, 0x6e, 0x7d, 0x81, 0x65, 0x81, 0x86, 0xa1, 0x93,
+    0x8b, 0x83, 0x81, 0x89, 0x8b, 0x90, 0x7e, 0x97, 0x8e, 0x75, 0x7e, 0x7e,
+    0x7b, 0x81, 0x9a, 0x64, 0x90, 0xab, 0x90, 0x82, 0x8a, 0x82, 0x8d, 0xad,
+    0x90, 0x74, 0x7f, 0x9a, 0x88, 0x92, 0x83, 0x97, 0xa6, 0x6e, 0x9d, 0x81,
+    0xa2, 0x98, 0x74, 0x84, 0x93, 0x85, 0x84, 0x7d, 0xa2, 0x92, 0x92, 0x87,
+    0x73, 0x8b, 0x92, 0x74, 0x96, 0x70, 0x83, 0x86, 0x8a, 0x89, 0x86, 0x88,
+    0x87, 0x7c, 0x7d, 0x81, 0x8d, 0x71, 0x8c, 0x89, 0x70, 0x94, 0x8f, 0x9a,
+    0x83, 0x9d, 0x99, 0x78, 0x74, 0x88, 0x84, 0x9a, 0x95, 0x8b, 0x8e, 0x7f,
+    0xa2, 0xa0, 0x76, 0x93, 0x9b, 0x7c, 0x97, 0x81, 0x83, 0x8c, 0xa1, 0x99,
+    0x9d, 0x7f, 0x87, 0x75, 0xa7, 0x75, 0x89, 0x7e, 0x88, 0x80, 0x8f, 0x84,
+    0x9a, 0x77, 0x8d, 0x90, 0x9d, 0x6c, 0x88, 0x8d, 0x8e, 0x81, 0x97, 0x6d,
+    0x81, 0x88, 0x64, 0x8c, 0x77, 0x8e, 0x91, 0x8a, 0x7f, 0x8a, 0x94, 0x7a,
+    0x89, 0x93, 0x8c, 0x69, 0x85, 0x8c, 0x93, 0x61, 0x7e, 0x89, 0x7e, 0x8a,
+    0x65, 0x8a, 0xa9, 0x7f, 0x80, 0x86, 0x82, 0x90, 0x66, 0x7a, 0x99, 0x71,
+    0x7f, 0x73, 0x8d, 0x94, 0x7d, 0x73, 0x7a, 0x7d, 0x87, 0x7a, 0x97, 0x70,
+    0x81, 0x60, 0x61, 0x7a, 0x91, 0x88, 0x93, 0x7a, 0x9e, 0xa6, 0x92, 0x9d,
+    0x92, 0x67, 0x99, 0x9a, 0xae, 0x71, 0x89, 0xa5, 0x9f, 0xa6, 0x98, 0x89,
+    0x97, 0x90, 0x9b, 0x9a, 0xc0, 0x95, 0x8f, 0x9c, 0x95, 0x93, 0x88, 0x95,
+    0x95, 0xa0, 0x8e, 0x8c, 0xa8, 0x94, 0x6e, 0x9e, 0x6f, 0x7b, 0xa5, 0x96,
+    0x98, 0x90, 0x91, 0x89, 0x93, 0x8f, 0x84, 0xb2, 0x7f, 0x5e, 0xc2, 0x75,
+    0x8f, 0x90, 0x9c, 0xbf, 0x8a, 0x84, 0xa6, 0x85, 0x7d, 0x84, 0x8a, 0xad,
+    0x6f, 0x88, 0xac, 0x77, 0x91, 0x8d, 0x94, 0xac, 0x8f, 0x7f, 0xa1, 0xa5,
+    0x8e, 0x6d, 0x8a, 0x82, 0x85, 0x80, 0x9b, 0x7a, 0x9f, 0x60, 0x95, 0x97,
+    0x90, 0x67, 0x8f, 0x91, 0x86, 0x89, 0x88, 0x89, 0x96, 0x6c, 0x8b, 0x94,
+    0x8a, 0x75, 0x84, 0x96, 0x8a, 0x86, 0x7c, 0x91, 0x74, 0x8f, 0x97, 0x89,
+    0x8f, 0x8e, 0x6b, 0x97, 0x93, 0x89, 0x6b, 0x7e, 0x65, 0xa4, 0xa5, 0x63,
+    0x85, 0x88, 0x81, 0xa3, 0x70, 0x9b, 0x9e, 0x8c, 0x62, 0x73, 0x85, 0xb4,
+    0x88, 0x6e, 0x92, 0x6f, 0x91, 0x88, 0x79, 0x91, 0x7f, 0x7d, 0x9a, 0x6b,
+    0x78, 0x93, 0x7e, 0x79, 0x93, 0x7a, 0x74, 0x91, 0x8d, 0x92, 0xb3, 0x61,
+    0xa3, 0x76, 0x81, 0x99, 0x96, 0x8b, 0x93, 0x8f, 0xa7, 0x6f, 0x8f, 0xa6,
+    0xb2, 0x76, 0xa1, 0x83, 0xa8, 0x8b, 0xae, 0x99, 0x90, 0x6a, 0x97, 0x97,
+    0xaa, 0x95, 0x85, 0x7d, 0x97, 0x94, 0x86, 0x94, 0x89, 0xa4, 0xa9, 0x81,
+    0x89, 0x7c, 0x96, 0xb3, 0x92, 0x7d, 0xa4, 0x6f, 0x6d, 0x92, 0x83, 0xb4,
+    0x7b, 0x94, 0x8c, 0x79, 0x61, 0x6f, 0x8f, 0xb7, 0x88, 0x66, 0xaa, 0x7d,
+    0x89, 0x7f, 0x90, 0xbd, 0x99, 0xac, 0xb1, 0x96, 0x9c, 0x7c, 0x92, 0xb7,
+    0x73, 0x94, 0xad, 0x9d, 0x7c, 0x80, 0x87, 0x96, 0x73, 0x8d, 0xa8, 0x88,
+    0xa9, 0x83, 0x7b, 0x84, 0x9d, 0x99, 0x83, 0x89, 0x9d, 0x7f, 0x7e, 0x86,
+    0x75, 0x83, 0x77, 0x7d, 0x8b, 0x7d, 0x80, 0x9d, 0xa2, 0x94, 0x72, 0x92,
+    0x75, 0x95, 0x99, 0xa0, 0x7b, 0x83, 0x99, 0x89, 0x82, 0x92, 0x5b, 0x9e,
+    0x7c, 0x91, 0x95, 0x79, 0x61, 0x86, 0x60, 0xc7, 0x72, 0x91, 0xb5, 0x88,
+    0x71, 0x8d, 0x85, 0x91, 0x83, 0x74, 0xa8, 0x67, 0x79, 0x77, 0x7f, 0x79,
+    0x68, 0x84, 0x95, 0x69, 0x98, 0x88, 0x74, 0x72, 0x9c, 0x86, 0x87, 0x95,
+    0x90, 0x95, 0x9b, 0x8b, 0xc5, 0x7d, 0x81, 0x8f, 0x88, 0x8c, 0xb0, 0x95,
+    0xa8, 0x8c, 0x84, 0xa0, 0xb0, 0x89, 0x9a, 0x90, 0xaa, 0x88, 0x96, 0x9b,
+    0x88, 0xa9, 0x89, 0x99, 0xb7, 0x82, 0x99, 0xa0, 0x85, 0x70, 0x9c, 0x9a,
+    0x94, 0x74, 0x91, 0x81, 0x76, 0x70, 0x8f, 0xc2, 0x8c, 0x91, 0x8f, 0x69,
+    0x74, 0x7e, 0x6d, 0x9a, 0x80, 0x77, 0xa5, 0x94, 0x8b, 0x6d, 0x82, 0xcf,
+    0x8e, 0x74, 0xc4, 0x86, 0x7f, 0x78, 0x72, 0xb3, 0x78, 0x7a, 0xac, 0x9c,
+    0x7d, 0x77, 0x8d, 0xca, 0x67, 0x8c, 0xd5, 0x8f, 0x7f, 0x71, 0x70, 0x82,
+    0x7e, 0x9f, 0xb0, 0x7f, 0x75, 0x90, 0x79, 0x7b, 0x8d, 0x7b, 0xa6, 0x87,
+    0x98, 0x76, 0x84, 0x96, 0x81, 0x6a, 0x96, 0x86, 0x8e, 0x77, 0xa3, 0x83,
+    0x91, 0x83, 0x8a, 0x6c, 0x74, 0x83, 0x99, 0x7d, 0x7c, 0x8a, 0x88, 0x9a,
+    0x6b, 0x86, 0x59, 0xa3, 0x8a, 0x8e, 0xbb, 0x8a, 0x75, 0x78, 0x68, 0xb5,
+    0x9b, 0x7b, 0xa7, 0x93, 0x5b, 0x6c, 0x6b, 0xa0, 0x74, 0x99, 0xc0, 0x73,
+    0x8b, 0x7e, 0x8e, 0x83, 0x64, 0x7c, 0x7d, 0x7a, 0x98, 0x7d, 0x82, 0x7c,
+    0x8f, 0x7e, 0x74, 0x86, 0xa9, 0x84, 0xba, 0x8f, 0xc7, 0x6f, 0x87, 0xae,
+    0x97, 0x91, 0xad, 0x82, 0xb2, 0x70, 0x8a, 0xa0, 0xb0, 0x7d, 0x95, 0x8d,
+    0xc2, 0x85, 0x80, 0xad, 0x9f, 0x85, 0x8b, 0x76, 0xaa, 0xab, 0x8f, 0xa0,
+    0x89, 0x9b, 0x8a, 0xb3, 0xa0, 0x72, 0xbe, 0x8c, 0x93, 0x7a, 0xa0, 0xad,
+    0x99, 0x6f, 0xa2, 0x79, 0x78, 0x8b, 0x6d, 0xae, 0x75, 0x6f, 0xa1, 0x8d,
+    0x68, 0x81, 0x74, 0xb3, 0x8f, 0x81, 0xc6, 0x96, 0x77, 0x68, 0x85, 0xaf,
+    0x86, 0x9f, 0xbb, 0x8a, 0x7e, 0x8a, 0x86, 0xab, 0x8b, 0x87, 0x94, 0x96,
+    0x99, 0x82, 0x6a, 0xaa, 0x7b, 0x81, 0xa6, 0x9b, 0xb6, 0x73, 0x78, 0x9a,
+    0x8f, 0xaa, 0x93, 0x81, 0x97, 0x7a, 0x72, 0x82, 0x79, 0x81, 0x7c, 0x88,
+    0x8e, 0x79, 0x9d, 0x81, 0x9a, 0x75, 0x9b, 0x89, 0x73, 0x6a, 0xa6, 0x84,
+    0x5c, 0x6f, 0xa0, 0x9d, 0x81, 0x84, 0x3e, 0xaf, 0x94, 0xa1, 0xb8, 0x93,
+    0x81, 0x89, 0x68, 0xd4, 0x87, 0x99, 0x99, 0x95, 0x79, 0x72, 0x81, 0xa1,
+    0x78, 0x7d, 0x8f, 0x7e, 0x87, 0x78, 0x8e, 0x97, 0x7e, 0x96, 0x86, 0x86,
+    0x97, 0x74, 0x6f, 0x7d, 0xa5, 0x81, 0x6f, 0x8e, 0x9e, 0x8b, 0xad, 0xac,
+    0xbd, 0x75, 0x84, 0xa2, 0x93, 0x76, 0xc7, 0x9e, 0xb0, 0x75, 0x89, 0xa4,
+    0x95, 0x92, 0xb5, 0xaa, 0xb9, 0x7d, 0x79, 0xa5, 0x88, 0x70, 0x84, 0x70,
+    0xa3, 0x81, 0xa1, 0xa6, 0x8f, 0x96, 0x96, 0x8d, 0xa5, 0x83, 0xb2, 0x8f,
+    0x88, 0x74, 0x96, 0xbc, 0x8b, 0x81, 0xa4, 0x85, 0x7c, 0x87, 0x64, 0xb4,
+    0x80, 0x88, 0x92, 0x90, 0x78, 0x79, 0x77, 0xa5, 0x79, 0x8b, 0xbd, 0x7d,
+    0x84, 0x8c, 0x96, 0xd4, 0x78, 0x81, 0xa4, 0x8c, 0x97, 0x89, 0x78, 0xc4,
+    0x9f, 0x94, 0xb9, 0x83, 0x76, 0x78, 0x89, 0x86, 0x81, 0x8f, 0xbd, 0xa7,
+    0x88, 0x79, 0x8e, 0x92, 0x86, 0x88, 0xad, 0x8a, 0x7b, 0x7f, 0x80, 0xad,
+    0x7a, 0xaf, 0x8a, 0x93, 0xa6, 0x84, 0x92, 0x8e, 0x84, 0x99, 0x80, 0xae,
+    0x74, 0x7c, 0x95, 0x9c, 0x7b, 0x84, 0x84, 0x84, 0xa4, 0x82, 0x57, 0xb5,
+    0x95, 0xc1, 0xb7, 0xa0, 0x85, 0x7b, 0x69, 0xc3, 0xb1, 0x8e, 0xa0, 0x8e,
+    0x81, 0x88, 0x78, 0x9e, 0x81, 0x97, 0xb2, 0x74, 0x81, 0x84, 0x91, 0x87,
+    0x6f, 0x6f, 0x75, 0x78, 0x92, 0x7a, 0x6d, 0x80, 0x9a, 0x7e, 0x81, 0xa1,
+    0xa8, 0x6d, 0xb5, 0x98, 0xb4, 0x7f, 0x9a, 0xa4, 0x9d, 0x7b, 0xba, 0xaa,
+    0xce, 0x93, 0x79, 0xa5, 0x81, 0x95, 0xa6, 0x7f, 0x8c, 0x8b, 0x96, 0xa4,
+    0xa1, 0x8d, 0x91, 0x97, 0xce, 0x8e, 0x8e, 0x9d, 0x86, 0x7f, 0x97, 0xa3,
+    0x99, 0x75, 0xa3, 0xa0, 0x69, 0x6a, 0x87, 0xa0, 0x9a, 0x80, 0xa2, 0x72,
+    0x6d, 0x85, 0x6b, 0x94, 0x8d, 0x77, 0x9f, 0x84, 0x7f, 0x92, 0x64, 0xaa,
+    0x78, 0x82, 0xa7, 0x8f, 0x84, 0x79, 0x84, 0xb9, 0x92, 0x7c, 0xb6, 0x96,
+    0x9c, 0x99, 0x8f, 0xab, 0xab, 0x8a, 0xa2, 0xab, 0x6d, 0x97, 0x7b, 0xb1,
+    0x9e, 0x6c, 0x9a, 0x99, 0xaa, 0xa3, 0x70, 0x80, 0x81, 0x6f, 0xb6, 0x95,
+    0x93, 0x93, 0x8e, 0x80, 0x86, 0xb0, 0x87, 0x91, 0x8f, 0x8c, 0xa4, 0x86,
+    0x89, 0x8f, 0x93, 0x83, 0x75, 0x7d, 0x9b, 0x86, 0x7d, 0x5a, 0x9d, 0x67,
+    0x9f, 0x78, 0x5c, 0xa5, 0x8e, 0xa2, 0xc1, 0x95, 0x89, 0x84, 0x53, 0xd1,
+    0x7d, 0x9b, 0xc0, 0x8f, 0x73, 0x7f, 0x85, 0x9e, 0x8a, 0x7b, 0xa6, 0x84,
+    0x6c, 0x74, 0x95, 0x93, 0x7a, 0x7a, 0x81, 0x7d, 0x89, 0x86, 0x76, 0x8a,
+    0xad, 0x66, 0x90, 0x90, 0x9d, 0x77, 0xb4, 0xad, 0xac, 0x8e, 0xb3, 0xa5,
+    0x9d, 0x91, 0xd7, 0x94, 0xba, 0x8b, 0x72, 0xa4, 0x93, 0x7e, 0xa7, 0x86,
+    0xae, 0x83, 0x63, 0xa6, 0xa0, 0x78, 0x81, 0x8b, 0xc4, 0x82, 0x8f, 0x98,
+    0xa1, 0x8f, 0x79, 0x9a, 0x92, 0x85, 0x9d, 0x91, 0x92, 0x84, 0x8f, 0x84,
+    0x91, 0x6d, 0x7b, 0x69, 0x75, 0x87, 0x5d, 0x99, 0x92, 0x83, 0xab, 0x8f,
+    0x53, 0x90, 0x7b, 0xa0, 0x71, 0x89, 0xc2, 0x7f, 0x6a, 0x7c, 0x86, 0xb2,
+    0x8d, 0x89, 0xaf, 0x9c, 0x81, 0x8c, 0x84, 0xbe, 0x93, 0x9c, 0xa8, 0x97,
+    0x68, 0x9b, 0x84, 0xa3, 0x8a, 0x77, 0xa5, 0x79, 0x7b, 0x87, 0x86, 0xa5,
+    0x80, 0x83, 0x9e, 0x8d, 0xb1, 0x94, 0x7a, 0x8b, 0xa6, 0xa8, 0x80, 0x98,
+    0x8c, 0x73, 0xa9, 0x7b, 0x91, 0x8f, 0x71, 0x82, 0x68, 0x84, 0xa5, 0x96,
+    0x67, 0x63, 0xa6, 0x71, 0xa7, 0x85, 0x57, 0x9f, 0x91, 0xb2, 0xa6, 0x87,
+    0x80, 0x8f, 0x6a, 0xba, 0x9d, 0xb7, 0xb9, 0x8b, 0x75, 0x7c, 0x6f, 0x9f,
+    0x74, 0x8d, 0xaf, 0x6e, 0x7c, 0x65, 0x6c, 0x8a, 0x7c, 0x81, 0x89, 0x77,
+    0x8b, 0x74, 0x65, 0x9b, 0xa5, 0x6b, 0x92, 0x71, 0xbb, 0x70, 0x99, 0xbf,
+    0xb0, 0x7b, 0x92, 0xb4, 0xa4, 0x84, 0xc4, 0x92, 0xa8, 0x94, 0x7e, 0xcd,
+    0x83, 0x87, 0xaf, 0xa0, 0xa5, 0x94, 0x72, 0xb9, 0x90, 0xa6, 0x9e, 0x9e,
+    0x9b, 0x7a, 0x68, 0xc0, 0x8f, 0x89, 0x72, 0x94, 0x9b, 0x81, 0x81, 0x91,
+    0x88, 0x90, 0xa8, 0x8d, 0x90, 0x78, 0x7c, 0x67, 0x64, 0x8e, 0x55, 0xa1,
+    0x6d, 0x86, 0xa3, 0x6f, 0x5c, 0x7d, 0x79, 0xa3, 0x64, 0x71, 0xd4, 0x87,
+    0x73, 0x85, 0x76, 0xc7, 0x72, 0x86, 0xb2, 0x8c, 0x7b, 0x8d, 0x96, 0xc3,
+    0xad, 0x87, 0xac, 0xa8, 0x84, 0x94, 0x7b, 0xbf, 0x83, 0x74, 0x8e, 0x8c,
+    0x9c, 0x99, 0x88, 0x8e, 0x86, 0x88, 0xae, 0x7f, 0x70, 0x96, 0x6f, 0x74,
+    0x8f, 0x85, 0x7c, 0x86, 0x97, 0x83, 0xa0, 0x6a, 0x8b, 0x82, 0x88, 0x90,
+    0x72, 0x84, 0x9b, 0xa1, 0x6f, 0x72, 0xa4, 0x95, 0xa6, 0x7d, 0x65, 0xbd,
+    0x90, 0xb6, 0x9e, 0x98, 0xa1, 0x94, 0x66, 0xb3, 0x9c, 0xb3, 0xa7, 0x7f,
+    0x91, 0x69, 0x6e, 0xb1, 0x68, 0x7a, 0xaa, 0x91, 0x7c, 0x71, 0x9f, 0x95,
+    0x83, 0x86, 0x76, 0x69, 0x9b, 0x7f, 0x8c, 0x94, 0x9c, 0x89, 0x86, 0x93,
+    0xc1, 0x79, 0x98, 0x9e, 0xb1, 0x90, 0x9b, 0xb7, 0xab, 0x86, 0xc6, 0xa1,
+    0xa9, 0xaa, 0x86, 0xb0, 0x8b, 0x79, 0xb9, 0x85, 0xbe, 0x92, 0x60, 0xc0,
+    0x9f, 0x9a, 0x90, 0x8d, 0xb5, 0x77, 0x95, 0xad, 0x8b, 0x93, 0x8a, 0x93,
+    0x93, 0x7e, 0x86, 0xa6, 0x7d, 0x89, 0x6b, 0x81, 0x93, 0x75, 0x7f, 0x86,
+    0x66, 0x8f, 0x56, 0x8f, 0x84, 0x75, 0x9e, 0x77, 0x78, 0x89, 0x62, 0xb3,
+    0x78, 0x76, 0xb5, 0x92, 0x7f, 0x80, 0x7a, 0xb9, 0x7d, 0x80, 0xc2, 0xb9,
+    0x7d, 0x8f, 0x8f, 0x8c, 0xa0, 0x78, 0xa2, 0xaf, 0x68, 0x98, 0x77, 0xac,
+    0x96, 0x77, 0x96, 0x99, 0x84, 0xb1, 0x72, 0x8e, 0x96, 0xa4, 0xa9, 0x8e,
+    0x84, 0x7b, 0x85, 0x8d, 0x8f, 0x83, 0x83, 0x7f, 0x85, 0x6e, 0xa4, 0x98,
+    0xab, 0x83, 0x90, 0x8e, 0x77, 0x8e, 0xab, 0x9c, 0x73, 0x79, 0x8d, 0x6e,
+    0xa0, 0x97, 0x68, 0xa7, 0x8a, 0xbd, 0x95, 0x96, 0x96, 0x8b, 0x72, 0xc7,
+    0x8d, 0x8c, 0xa5, 0x83, 0x9b, 0x8b, 0x6c, 0xac, 0x62, 0x78, 0xae, 0x78,
+    0x71, 0x7a, 0x8d, 0xae, 0x91, 0x87, 0x90, 0x82, 0x9b, 0x83, 0x90, 0x97,
+    0xb0, 0x96, 0x82, 0xa5, 0xa9, 0x76, 0xa5, 0xa0, 0xac, 0xa1, 0x93, 0x94,
+    0xb7, 0x91, 0xbb, 0x9b, 0xa4, 0xa5, 0x8c, 0xb5, 0x95, 0x7b, 0x92, 0x91,
+    0xb0, 0x97, 0x73, 0xb9, 0x86, 0xa7, 0x92, 0x98, 0x9e, 0x70, 0x77, 0xba,
+    0x96, 0x7b, 0xa6, 0x86, 0x97, 0x85, 0x8e, 0xaa, 0x93, 0x97, 0x8f, 0x8b,
+    0x8d, 0x79, 0x84, 0x7e, 0x70, 0x95, 0x52, 0x8f, 0x62, 0x75, 0x8b, 0x8b,
+    0x7b, 0x8b, 0x79, 0xaf, 0x90, 0x6d, 0xc8, 0x8d, 0x84, 0x8c, 0x72, 0xaf,
+    0x70, 0x8d, 0xa5, 0x8a, 0x76, 0x97, 0x87, 0x8e, 0xa9, 0x83, 0xb2, 0x8d,
+    0x7e, 0x9b, 0x76, 0xc2, 0xa2, 0x72, 0xc5, 0x87, 0x75, 0xb7, 0x92, 0x95,
+    0x9e, 0xa0, 0xc3, 0x82, 0x8d, 0x8f, 0x7d, 0x85, 0x90, 0x99, 0x7b, 0x82,
+    0x87, 0x87, 0xa0, 0x87, 0x9a, 0x8b, 0xa2, 0xa4, 0x67, 0x93, 0xa5, 0xbb,
+    0x73, 0x5f, 0x8c, 0x60, 0xa5, 0x7d, 0x6c, 0xb3, 0xb2, 0xb3, 0xa9, 0xa9,
+    0x8d, 0x8d, 0x67, 0xd7, 0x63, 0x99, 0xaa, 0x83, 0x88, 0x6a, 0x6f, 0x9e,
+    0x5e, 0x9e, 0x9d, 0x81, 0x84, 0x6e, 0x98, 0x90, 0x89, 0x7c, 0x95, 0x7d,
+    0x81, 0x8a, 0xa2, 0x8c, 0x92, 0x85, 0x80, 0x92, 0xac, 0x80, 0x9b, 0x9b,
+    0xc3, 0x8c, 0x95, 0xbc, 0xaa, 0x7c, 0xb5, 0x8d, 0xa1, 0xb8, 0x70, 0xb6,
+    0x8c, 0x92, 0xa8, 0x8e, 0xa3, 0x76, 0x6c, 0xbe, 0xa0, 0x8c, 0x92, 0x8e,
+    0xa1, 0x83, 0x76, 0xb2, 0x91, 0x7b, 0x8e, 0x87, 0x7f, 0x89, 0x8a, 0xa1,
+    0x91, 0xa0, 0x7a, 0x95, 0x7b, 0x86, 0x99, 0x92, 0x78, 0x8a, 0x62, 0x9e,
+    0x7b, 0x7b, 0x89, 0x79, 0x78, 0x87, 0x82, 0x94, 0x7d, 0x91, 0x96, 0x79,
+    0x7b, 0x8d, 0x80, 0xa7, 0x88, 0x95, 0xa6, 0x8f, 0x7d, 0x95, 0x79, 0xa2,
+    0x91, 0x9b, 0x9d, 0x90, 0x79, 0xa4, 0x88, 0x98, 0x9b, 0x7a, 0xa5, 0x7f,
+    0x71, 0x9c, 0x87, 0x96, 0x8c, 0x8f, 0xbc, 0x74, 0x95, 0x99, 0x7f, 0x78,
+    0x8c, 0x63, 0x7c, 0x7a, 0x92, 0x8c, 0xa8, 0x78, 0xa8, 0x89, 0x9a, 0x86,
+    0x69, 0x7e, 0xa1, 0xc3, 0x57, 0x68, 0x84, 0x89, 0xa9, 0x8d, 0x6f, 0xa9,
+    0x8a, 0xab, 0xa5, 0xad, 0x94, 0x83, 0x6b, 0xa7, 0x7e, 0x95, 0x9b, 0x7f,
+    0x8b, 0x78, 0x73, 0x90, 0x65, 0x8d, 0xb1, 0x91, 0x84, 0x65, 0x90, 0xb4,
+    0x8c, 0x89, 0x94, 0x7c, 0x99, 0x8b, 0x98, 0xb7, 0xb0, 0x91, 0x9e, 0x88,
+    0xbd, 0xa0, 0xa4, 0xb9, 0xad, 0x96, 0x97, 0xa3, 0xb6, 0x81, 0xba, 0x9b,
+    0xbc, 0xa9, 0x94, 0xb9, 0xa0, 0x85, 0x8e, 0xa1, 0xac, 0x87, 0x65, 0xa6,
+    0x98, 0x8e, 0xaa, 0xa3, 0xa3, 0x7f, 0x79, 0xb4, 0x93, 0x76, 0x90, 0x99,
+    0x8b, 0x90, 0x84, 0xa6, 0x90, 0x8f, 0x88, 0xa6, 0x89, 0x83, 0x86, 0x7a,
+    0x5d, 0x96, 0x71, 0xa5, 0x64, 0x94, 0x9a, 0x85, 0x7c, 0xa1, 0x96, 0x9d,
+    0x76, 0x8f, 0x95, 0xa0, 0x7f, 0x8c, 0x80, 0xc7, 0x6c, 0x7d, 0xb7, 0xb2,
+    0x82, 0x8e, 0x82, 0xbd, 0xb3, 0x82, 0x99, 0x9b, 0x80, 0x94, 0x8c, 0x94,
+    0x94, 0x6b, 0xc6, 0xa9, 0x81, 0x9f, 0x8c, 0x7e, 0x87, 0x88, 0xb3, 0x7d,
+    0x88, 0x8c, 0x81, 0x81, 0x7e, 0x7e, 0x86, 0x87, 0x96, 0x85, 0xb4, 0x87,
+    0xab, 0x91, 0x8f, 0xa1, 0x72, 0x83, 0xa4, 0x89, 0x6b, 0x75, 0x85, 0x7c,
+    0x94, 0x85, 0x6f, 0xad, 0x91, 0xae, 0xa4, 0xa5, 0xa7, 0x8e, 0x6c, 0xb2,
+    0x73, 0x99, 0x96, 0x92, 0x89, 0x81, 0x7d, 0x88, 0x60, 0x8d, 0x94, 0x83,
+    0x99, 0x68, 0x86, 0xa2, 0x94, 0x8e, 0x82, 0x76, 0x89, 0x8d, 0x98, 0x86,
+    0x94, 0x90, 0x83, 0x7d, 0xad, 0x94, 0xa6, 0x90, 0xcb, 0x96, 0xa2, 0xb2,
+    0xb6, 0x89, 0xc4, 0x9d, 0xc7, 0xa5, 0x75, 0xc3, 0x92, 0x8c, 0x8e, 0xad,
+    0x96, 0x94, 0x8e, 0xab, 0x94, 0x90, 0xa8, 0x84, 0xb5, 0x84, 0x66, 0xce,
+    0x74, 0x8c, 0x93, 0x8d, 0x8f, 0x95, 0x8b, 0xa1, 0x7b, 0xa1, 0x79, 0x9e,
+    0x81, 0xa4, 0xa0, 0x98, 0x5f, 0x78, 0x8e, 0x97, 0x6f, 0x81, 0x96, 0x8d,
+    0x70, 0x93, 0x72, 0x9c, 0x7b, 0x98, 0x8b, 0x8a, 0x8f, 0x8b, 0x6c, 0xa9,
+    0x81, 0x99, 0xb3, 0xa3, 0x71, 0x9c, 0x8b, 0x94, 0xa6, 0x8a, 0xb8, 0xa0,
+    0x7b, 0x98, 0x74, 0x9f, 0x92, 0x92, 0xb2, 0x89, 0x81, 0xa8, 0x87, 0x97,
+    0x96, 0x86, 0xa4, 0x7b, 0x63, 0x8e, 0x86, 0x7d, 0x76, 0x81, 0x93, 0x94,
+    0x98, 0x8b, 0xaf, 0x6d, 0xab, 0x9b, 0x85, 0x9b, 0x91, 0x86, 0x95, 0x95,
+    0x65, 0x89, 0x9e, 0x6b, 0xa4, 0x82, 0x68, 0xb5, 0x8b, 0xd1, 0x9d, 0x93,
+    0x7d, 0x67, 0x5e, 0xba, 0x9b, 0x94, 0x93, 0x8d, 0x88, 0x73, 0x7c, 0x8e,
+    0x7d, 0x83, 0x9a, 0x82, 0xa4, 0x62, 0x9a, 0x8d, 0x86, 0xa0, 0x7b, 0x72,
+    0xa9, 0x84, 0xa7, 0x94, 0xb2, 0x98, 0x8f, 0x81, 0xbe, 0x84, 0x9d, 0x94,
+    0x9c, 0x9a, 0x94, 0x8f, 0xb1, 0x82, 0xb1, 0x82, 0xb1, 0xb2, 0x78, 0xa7,
+    0x95, 0x99, 0x8b, 0x8c, 0xb1, 0x81, 0x5b, 0xbb, 0x88, 0x7a, 0x90, 0xa3,
+    0x8d, 0x78, 0x6f, 0xbf, 0x8c, 0x93, 0xa1, 0x8e, 0x9f, 0x98, 0x88, 0xb3,
+    0x7e, 0x82, 0x8a, 0x8e, 0x7d, 0x8a, 0x96, 0x6a, 0x6c, 0x7b, 0x91, 0x94,
+    0x6f, 0x89, 0x9a, 0x84, 0x73, 0x8b, 0x8c, 0x91, 0x7d, 0x8e, 0x9e, 0x80,
+    0x88, 0x81, 0x78, 0xaf, 0x86, 0xa5, 0xa2, 0x8d, 0x6a, 0x8a, 0x75, 0xa1,
+    0x83, 0x87, 0xaf, 0x7d, 0x6c, 0xa3, 0x65, 0x77, 0x89, 0x91, 0x9a, 0xa1,
+    0xa1, 0xaf, 0x78, 0x94, 0x93, 0xb2, 0xaf, 0x92, 0x74, 0x7a, 0xa7, 0x7b,
+    0x8f, 0x9c, 0x86, 0x8d, 0x8f, 0x79, 0xb0, 0xb3, 0x97, 0x82, 0x8e, 0x92,
+    0x92, 0x81, 0xa7, 0xbc, 0x6e, 0x6e, 0x89, 0xa5, 0x9a, 0x8d, 0x84, 0xb6,
+    0x83, 0xae, 0xa5, 0xa7, 0xae, 0x86, 0x6b, 0xb9, 0x89, 0xb0, 0x8f, 0x82,
+    0x8f, 0x6f, 0x83, 0x98, 0x6a, 0x98, 0x9a, 0x85, 0x9f, 0x78, 0x93, 0x8d,
+    0x83, 0x88, 0x88, 0x7e, 0x97, 0x99, 0x8a, 0x9b, 0xb0, 0x90, 0x86, 0x88,
+    0xb5, 0x90, 0xb3, 0xaa, 0xad, 0x96, 0x93, 0xa3, 0x9d, 0x81, 0xa3, 0x9a,
+    0x9f, 0x99, 0x90, 0x9c, 0x9e, 0x8e, 0x88, 0x93, 0xa8, 0x94, 0x62, 0xa6,
+    0x94, 0x92, 0xa1, 0x86, 0xb7, 0x8a, 0x6a, 0xa6, 0x81, 0x7e, 0x7b, 0x80,
+    0x89, 0x8f, 0x74, 0xa6, 0x72, 0x91, 0xa6, 0x9b, 0x73, 0x97, 0x7e, 0x6f,
+    0x70, 0x8d, 0x73, 0x98, 0x80, 0x90, 0x8f, 0x7e, 0x83, 0x77, 0x84, 0x92,
+    0x7f, 0x8c, 0x91, 0xa6, 0x99, 0x90, 0x9d, 0xb1, 0x88, 0x85, 0x89, 0x85,
+    0x7c, 0x9f, 0x7e, 0xb0, 0xaa, 0x84, 0xa0, 0x8e, 0x74, 0x93, 0x78, 0x90,
+    0x9a, 0x8b, 0x8e, 0x97, 0x8f, 0x9f, 0x7c, 0x83, 0x8a, 0x88, 0xa5, 0x8f,
+    0x8b, 0x74, 0x84, 0x9a, 0x7f, 0x91, 0x88, 0x77, 0x9c, 0x91, 0xbc, 0x93,
+    0x9c, 0x82, 0x89, 0x9b, 0x8a, 0x7d, 0xb7, 0xb8, 0x6f, 0x68, 0xb5, 0x8e,
+    0xb4, 0x86, 0x8c, 0xb3, 0x94, 0xb6, 0xa4, 0x93, 0x98, 0x8b, 0x70, 0xb3,
+    0x96, 0xaa, 0x87, 0x89, 0x99, 0x68, 0x74, 0xa4, 0x69, 0x9e, 0x8e, 0x6b,
+    0x9f, 0x6b, 0x95, 0x9c, 0x88, 0x89, 0x8a, 0x86, 0x8d, 0x75, 0x94, 0x88,
+    0xa0, 0x94, 0x77, 0x8c, 0x9c, 0x8d, 0x8e, 0xa4, 0xac, 0xa7, 0x8a, 0x9b,
+    0xa9, 0x81, 0xab, 0xac, 0xaf, 0xaf, 0x87, 0xbb, 0x9b, 0x95, 0x8e, 0x9e,
+    0x9f, 0xa1, 0x6c, 0xb4, 0x98, 0x8f, 0x81, 0x8d, 0x98, 0x8f, 0x78, 0x96,
+    0x89, 0x86, 0x6c, 0x91, 0x8d, 0x9f, 0x95, 0x9f, 0x6b, 0x7f, 0x93, 0x7c,
+    0x96, 0x8e, 0x8a, 0x58, 0x80, 0x8e, 0x7a, 0x93, 0x8b, 0x78, 0x99, 0x92,
+    0x62, 0x8e, 0x83, 0x8e, 0x87, 0x83, 0x86, 0x99, 0x93, 0x92, 0x80, 0x95,
+    0xa2, 0x72, 0xa2, 0x97, 0x78, 0x87, 0x7b, 0xa3, 0x99, 0x78, 0x98, 0x9c,
+    0x80, 0x9b, 0x5e, 0x8a, 0x9c, 0x99, 0xa6, 0x7a, 0x8e, 0x99, 0x7a, 0x8e,
+    0x8b, 0x76, 0x9b, 0x89, 0x80, 0x8e, 0x83, 0x8a, 0x80, 0x7c, 0x80, 0x74,
+    0x95, 0x8c, 0xbf, 0x7e, 0xa8, 0x7a, 0x99, 0x7d, 0x7d, 0x73, 0xb4, 0xae,
+    0x88, 0x76, 0xae, 0x78, 0xaa, 0x65, 0x94, 0xbe, 0x97, 0xaf, 0xa4, 0x91,
+    0x9c, 0x95, 0x6c, 0xbe, 0x82, 0xb1, 0x9b, 0x91, 0x85, 0x7d, 0x66, 0x9c,
+    0x99, 0xbd, 0xa3, 0x88, 0xa8, 0x73, 0x81, 0x94, 0x92, 0x8e, 0x90, 0x8d,
+    0xaf, 0x75, 0x86, 0x9b, 0x8b, 0x8b, 0x8d, 0x74, 0xbd, 0x85, 0x97, 0x8b,
+    0x9d, 0xba, 0x90, 0xa8, 0x9d, 0x72, 0xa5, 0xa8, 0xbf, 0xbb, 0x7b, 0xb6,
+    0xad, 0x94, 0x6f, 0x9a, 0xa7, 0x97, 0x78, 0x9c, 0x98, 0x8d, 0x8c, 0x93,
+    0xb8, 0xa8, 0x7f, 0x9d, 0x98, 0x7f, 0x8f, 0x8a, 0x8d, 0xa8, 0x86, 0x7b,
+    0x5d, 0x89, 0x8a, 0x83, 0x8c, 0x8b, 0x81, 0x56, 0x7c, 0x87, 0x89, 0xa6,
+    0x75, 0x7c, 0x92, 0x74, 0x96, 0x92, 0x78, 0x8d, 0x8d, 0x98, 0xae, 0x7a,
+    0x95, 0x8f, 0x8b, 0x9c, 0x95, 0x9f, 0xae, 0x93, 0x7b, 0x93, 0x8c, 0x9a,
+    0x79, 0x74, 0x94, 0x6e, 0x7e, 0x8f, 0x64, 0x9f, 0x9c, 0x88, 0x8f, 0x8e,
+    0x84, 0x8d, 0x89, 0x95, 0x96, 0x8f, 0x9d, 0x60, 0x85, 0x86, 0x7c, 0x93,
+    0x8d, 0x68, 0x83, 0x7c, 0x94, 0x87, 0xb8, 0xa2, 0x9d, 0x82, 0x8e, 0x84,
+    0x6c, 0x73, 0xa8, 0xbc, 0x84, 0x85, 0xa2, 0x79, 0x92, 0x64, 0x69, 0xa9,
+    0x82, 0xa7, 0x9d, 0x95, 0x8e, 0x6f, 0x9f, 0xa7, 0x97, 0xb1, 0x9d, 0x8e,
+    0xa1, 0x70, 0x80, 0x9e, 0x8e, 0x91, 0xa0, 0xaa, 0x81, 0x5b, 0x98, 0x8f,
+    0xa0, 0xaa, 0x83, 0x7a, 0x91, 0x7a, 0x73, 0x80, 0xa6, 0x9a, 0x80, 0x7d,
+    0x9e, 0x75, 0x7b, 0xa3, 0xad, 0x92, 0x98, 0xc0, 0xa1, 0x80, 0x88, 0xa2,
+    0xa5, 0xa4, 0x7e, 0x9b, 0xa0, 0x80, 0x6e, 0xa0, 0x9f, 0xa3, 0x8a, 0x8f,
+    0xa2, 0x93, 0x86, 0x8d, 0x8f, 0x93, 0x7e, 0x90, 0x98, 0x83, 0x7d, 0x9b,
+    0x9f, 0x9a, 0x97, 0x83, 0x6e, 0x8d, 0x94, 0x6c, 0x7b, 0x7f, 0x73, 0x65,
+    0x6a, 0x93, 0x8a, 0x94, 0x83, 0x89, 0x7d, 0x7b, 0x77, 0x8a, 0x7a, 0x9b,
+    0x8e, 0x8d, 0x94, 0x89, 0x86, 0x83, 0x7c, 0x8e, 0x8b, 0x90, 0xab, 0x99,
+    0x81, 0x8e, 0x77, 0x9c, 0x8c, 0x82, 0x97, 0x8f, 0x78, 0x91, 0x5f, 0xa1,
+    0x8b, 0x83, 0xa9, 0x8d, 0x7b, 0x97, 0x77, 0x80, 0x84, 0x7e, 0x9e, 0x75,
+    0xa3, 0x86, 0x67, 0x7c, 0x80, 0x6d, 0x77, 0x75, 0x88, 0x75, 0xad, 0x7a,
+    0x93, 0x89, 0x8c, 0x87, 0x7a, 0x79, 0xb2, 0xa1, 0x69, 0x80, 0xb5, 0x7a,
+    0xa6, 0x7b, 0x95, 0xac, 0x95, 0xa9, 0x98, 0xa4, 0xad, 0x83, 0x8d, 0xbe,
+    0xa4, 0x98, 0xad, 0x7d, 0x8b, 0x65, 0x65, 0xad, 0x6a, 0xae, 0xa3, 0xa8,
+    0x9c, 0x63, 0x90, 0x91, 0x6d, 0x9a, 0x81, 0x98, 0x86, 0x6a, 0x83, 0x84,
+    0x94, 0x9c, 0x77, 0x86, 0xc2, 0x7f, 0x9b, 0xa9, 0xad, 0xae, 0xa7, 0xa6,
+    0xd4, 0x70, 0x9d, 0xb5, 0xaa, 0xdb, 0x8f, 0xa3, 0xa5, 0x87, 0x88, 0x9e,
+    0xa9, 0x9f, 0x62, 0xa7, 0xa2, 0x8e, 0x7d, 0x8a, 0x9d, 0xa2, 0x6b, 0xa7,
+    0x96, 0x6d, 0x76, 0x8c, 0x9b, 0x8c, 0x86, 0x86, 0x93, 0x7c, 0x9d, 0x7c,
+    0x7e, 0x93, 0x5c, 0x79, 0x76, 0x8c, 0x8a, 0x87, 0x79, 0x97, 0x9a, 0x7a,
+    0x85, 0x8c, 0x7f, 0x85, 0x7a, 0xa1, 0xa7, 0x72, 0x87, 0x7f, 0x96, 0x9e,
+    0x92, 0x92, 0x9e, 0xa0, 0x72, 0x99, 0x7a, 0xb0, 0x8c, 0x8d, 0xa3, 0x9b,
+    0x91, 0xa6, 0x63, 0x94, 0x8b, 0x81, 0xbb, 0x94, 0x79, 0x95, 0x99, 0x9a,
+    0xa0, 0x7a, 0x96, 0x72, 0x82, 0x9a, 0x83, 0x7f, 0x72, 0x7f, 0x6d, 0x75,
+    0x91, 0x7f, 0xbc, 0x84, 0x9a, 0x81, 0x95, 0x69, 0x7d, 0x6d, 0xa2, 0xa8,
+    0x7e, 0x64, 0xac, 0x86, 0x85, 0x6d, 0x99, 0xaa, 0x7e, 0x79, 0x9c, 0xa0,
+    0xa4, 0x77, 0x99, 0xac, 0xa8, 0x8d, 0xb7, 0xa2, 0xa3, 0x61, 0x82, 0x98,
+    0x84, 0x8e, 0xa1, 0x8c, 0x88, 0x82, 0x6f, 0x7d, 0x88, 0x80, 0x7a, 0x8a,
+    0x8c, 0x6d, 0x87, 0x6f, 0xab, 0x8f, 0x8b, 0x76, 0xa0, 0x7d, 0x9f, 0xab,
+    0xb0, 0xb8, 0x9c, 0x8d, 0xb8, 0x81, 0x89, 0x94, 0xa8, 0xc8, 0x92, 0x9b,
+    0x8d, 0x83, 0x7b, 0xaf, 0x97, 0x94, 0x6e, 0xa5, 0x9b, 0x97, 0x89, 0x8d,
+    0xaa, 0x8a, 0x66, 0x88, 0x93, 0x84, 0xa1, 0x88, 0xa0, 0x99, 0x85, 0x89,
+    0x7d, 0x84, 0x8b, 0x6a, 0x92, 0xa1, 0x74, 0x76, 0x73, 0x87, 0x7a, 0x9a,
+    0x77, 0x86, 0x89, 0x5f, 0x7f, 0x8b, 0x7f, 0x8d, 0x7e, 0x81, 0x95, 0x8a,
+    0x7d, 0x85, 0x74, 0x9a, 0x87, 0x8c, 0x9e, 0xae, 0x80, 0x88, 0x7d, 0x8b,
+    0xaa, 0x79, 0x7c, 0x97, 0x79, 0x90, 0x7b, 0x97, 0x97, 0x9f, 0xa1, 0xa2,
+    0xab, 0x97, 0x69, 0x7a, 0x8d, 0x9f, 0x9f, 0x89, 0x90, 0x8c, 0x66, 0x98,
+    0x6e, 0x86, 0x7b, 0x6e, 0x86, 0x8a, 0xb2, 0xa6, 0x93, 0x7d, 0x8c, 0x81,
+    0x7e, 0x84, 0xa6, 0xb6, 0x83, 0x92, 0xa0, 0x88, 0x90, 0x5f, 0x7c, 0x92,
+    0x98, 0x94, 0x92, 0x98, 0xa7, 0x65, 0x90, 0xa2, 0xa2, 0x9b, 0xa6, 0x7d,
+    0x8b, 0x5a, 0x94, 0x95, 0x9b, 0xa5, 0x99, 0xa5, 0x7e, 0x61, 0x9a, 0x7a,
+    0x8b, 0x77, 0x87, 0x76, 0x9d, 0x72, 0x9a, 0x84, 0x98, 0x94, 0x92, 0x73,
+    0xae, 0x78, 0x8e, 0xaa, 0xa0, 0xc3, 0x7a, 0xa4, 0xa0, 0x75, 0xa9, 0xae,
+    0x8c, 0xd6, 0x87, 0x8f, 0x9f, 0x8c, 0x9b, 0x90, 0x99, 0x97, 0x73, 0x8f,
+    0x9b, 0x9c, 0x8c, 0x89, 0xa5, 0x84, 0x8f, 0x7b, 0x8b, 0x7f, 0x97, 0x98,
+    0x8d, 0x7b, 0x94, 0x9d, 0x9c, 0x8e, 0x92, 0x89, 0x88, 0x8d, 0x6c, 0x63,
+    0x73, 0x81, 0x72, 0x8a, 0x88, 0x8a, 0x9f, 0x79, 0x81, 0x82, 0x9a, 0xa9,
+    0x7a, 0x92, 0x7d, 0x76, 0x7b, 0x7a, 0x6a, 0xbe, 0x91, 0x7d, 0x86, 0xad,
+    0x84, 0x86, 0x6c, 0x91, 0x91, 0x9f, 0x92, 0x6b, 0x95, 0x98, 0x84, 0xa0,
+    0x8f, 0x8b, 0x9e, 0x7f, 0x9f, 0x97, 0x7e, 0x87, 0x80, 0x9e, 0x79, 0x8d,
+    0x68, 0x87, 0x88, 0x7d, 0x89, 0x81, 0x6d, 0x85, 0x80, 0x82, 0xa0, 0x97,
+    0xa3, 0x72, 0x94, 0x74, 0x8e, 0x56, 0x96, 0x98, 0x91, 0x6f, 0xa0, 0xae,
+    0x7c, 0x6e, 0x8e, 0xa9, 0x7c, 0x80, 0x87, 0xa3, 0x9e, 0x57, 0x8e, 0xb5,
+    0x87, 0xa6, 0x87, 0x79, 0x8f, 0x55, 0x8a, 0x81, 0x97, 0x6c, 0x9b, 0x99,
+    0x78, 0x5c, 0x82, 0x80, 0x91, 0x76, 0x80, 0x91, 0x8b, 0x65, 0x89, 0x7d,
+    0xa9, 0x95, 0x89, 0x97, 0x96, 0x6a, 0x89, 0xad, 0x92, 0x9f, 0xb6, 0x82,
+    0x88, 0x79, 0x9d, 0xa5, 0x9c, 0xae, 0x9a, 0x93, 0x77, 0x8e, 0x8a, 0xb5,
+    0x84, 0xb0, 0x76, 0xa2, 0x89, 0xa0, 0x96, 0x7a, 0xa5, 0x8e, 0x7e, 0x74,
+    0x8d, 0x89, 0x89, 0x9e, 0x93, 0x95, 0x90, 0x78, 0x93, 0x8f, 0xa5, 0x7c,
+    0x9d, 0x7c, 0x77, 0x85, 0x81, 0x92, 0x7c, 0x87, 0x92, 0x82, 0x98, 0xa3,
+    0x63, 0x76, 0x9b, 0x91, 0x7b, 0x8e, 0x97, 0x7e, 0x66, 0x90, 0x63, 0xb4,
+    0x71, 0x88, 0x86, 0x8e, 0x6f, 0x89, 0x7a, 0x88, 0x93, 0x7f, 0x96, 0xa8,
+    0x7d, 0x88, 0x88, 0x86, 0x7b, 0x91, 0x88, 0x6b, 0xa6, 0x8b, 0x69, 0x78,
+    0x82, 0x80, 0x83, 0x6b, 0xaf, 0x81, 0x7b, 0x64, 0x8f, 0x78, 0x6e, 0x7f,
+    0x86, 0x91, 0x92, 0xa3, 0xa0, 0x97, 0x82, 0x88, 0x92, 0x90, 0x9e, 0x89,
+    0x9d, 0x7b, 0x96, 0x82, 0xa3, 0x8c, 0x7f, 0x84, 0x7a, 0x6c, 0x60, 0x85,
+    0xa9, 0x74, 0x83, 0xa2, 0x89, 0x87, 0x9b, 0x77, 0x9b, 0x9a, 0x99, 0x84,
+    0x7c, 0x9c, 0x8d, 0x90, 0x8d, 0x7b, 0x74, 0x77, 0x93, 0x8c, 0x6c, 0x8b,
+    0x85, 0x78, 0x7f, 0x7d, 0x75, 0x7f, 0x7e, 0x85, 0x8f, 0x7d, 0x62, 0x8c,
+    0x7c, 0xad, 0x7f, 0x83, 0xa1, 0xa1, 0x97, 0x7b, 0x72, 0x82, 0x9d, 0x81,
+    0x94, 0x81, 0x8d, 0x9f, 0x6f, 0x8f, 0x9d, 0x89, 0x6a, 0x7e, 0x7f, 0x7f,
+    0x8d, 0x7e, 0x91, 0x86, 0x7d, 0x8a, 0x7e, 0x70, 0x7b, 0x9b, 0x6e, 0x5f,
+    0xa8, 0x7a, 0x73, 0x8a, 0x7a, 0x71, 0x90, 0x95, 0x8d, 0x78, 0x7b, 0x72,
+    0x5e, 0x89, 0x62, 0xa1, 0x87, 0x7f, 0x83, 0x75, 0x98, 0x7f, 0x76, 0x72,
+    0x8f, 0x9b, 0x7a, 0x8b, 0xa1, 0x7f, 0x60, 0x99, 0x96, 0x6e, 0x67, 0x76,
+    0x88, 0x98, 0x6c, 0x7b, 0x9b, 0x8d, 0x5f, 0x89, 0x7c, 0x81, 0x79, 0x86,
+    0x69, 0x9e, 0x83, 0x65, 0x8e, 0x82, 0x83, 0x89, 0x85, 0x7f, 0x90, 0x80,
+    0xa2, 0x81, 0x85, 0x83, 0x8e, 0x94, 0x94, 0x75, 0x86, 0x87, 0x9a, 0xb2,
+    0x82, 0x99, 0x85, 0x7f, 0x8c, 0x7e, 0x81, 0x9a, 0x81, 0x7d, 0x87, 0x81,
+    0xa3, 0x8c, 0x8d, 0x85, 0x8d, 0x96, 0x86, 0x7c, 0xa7, 0x87, 0x7e, 0x9d,
+    0x63, 0xa8, 0x7c, 0x97, 0xa2, 0xa4, 0x7e, 0x87, 0x93, 0x9e, 0x89, 0x8d,
+    0x6b, 0x6d, 0x9d, 0x9b, 0x78, 0x8a, 0x8e, 0x7f, 0x7b, 0xa5, 0x6e, 0x8c,
+    0x89, 0x88, 0x73, 0x7e, 0x77, 0x9d, 0xa6, 0xa7, 0x77, 0x87, 0x7e, 0x7e,
+    0x97, 0x84, 0x6b, 0x59, 0x60, 0x90, 0x85, 0x76, 0x8f, 0x61, 0x7f, 0x94,
+    0x8f, 0x84, 0x8b, 0x7f, 0x73, 0x77, 0x73, 0x71, 0x8a, 0x9b, 0x7b, 0x89,
+    0x97, 0x8f, 0x76, 0x63, 0xa3, 0xa1, 0x6b, 0x7c, 0x62, 0x95, 0x8e, 0xa3,
+    0x9f, 0x89, 0x8f, 0x7f, 0x92, 0x7c, 0xa2, 0xa4, 0xa6, 0x92, 0x89, 0x93,
+    0x74, 0x73, 0x73, 0x96, 0xad, 0x9b, 0x87, 0xac, 0x91, 0x8a, 0xa0, 0x70,
+    0x70, 0x7e, 0x8f, 0x74, 0x75, 0xaf, 0x8d, 0x82, 0x8e, 0x82, 0x96, 0x7d,
+    0x69, 0x9c, 0x64, 0xa2, 0x82, 0x89, 0x83, 0x9d, 0x83, 0x88, 0x62, 0x92,
+    0x72, 0x89, 0x6d, 0x7f, 0x92, 0x70, 0x8e, 0x80, 0x7e, 0x8d, 0x91, 0x85,
+    0x8d, 0x89, 0x83, 0x96, 0x90, 0x96, 0x9c, 0xa6, 0x8a, 0x73, 0x89, 0x79,
+    0xa9, 0x70, 0x80, 0x78, 0x96, 0x80, 0x7b, 0x85, 0xa5, 0x80, 0x93, 0x95,
+    0xc5, 0x74, 0x81, 0x88, 0xa2, 0x93, 0x86, 0x9c, 0xa3, 0x6d, 0x92, 0x8a,
+    0x92, 0x99, 0x98, 0x65, 0xad, 0x63, 0x9d, 0x95, 0x99, 0x89, 0x7f, 0x7a,
+    0x99, 0x91, 0x7f, 0x78, 0x90, 0x8f, 0x80, 0x85, 0xa1, 0x68, 0x9d, 0x6c,
+    0x83, 0x8f, 0x7c, 0x5e, 0x99, 0x7b, 0x80, 0x91, 0x66, 0x8a, 0x92, 0xb3,
+    0x7a, 0x99, 0x91, 0x7e, 0x7d, 0x96, 0x69, 0x9e, 0x7c, 0x89, 0xad, 0x8f,
+    0x9d, 0x90, 0x85, 0x8e, 0x72, 0xa9, 0x89, 0x83, 0x7c, 0x82, 0x70, 0x82,
+    0x6b, 0x79, 0x75, 0x8d, 0x77, 0x9b, 0x7c, 0x8f, 0x8a, 0x95, 0x87, 0x9f,
+    0x7c, 0x90, 0x87, 0x70, 0x83, 0x83, 0x98, 0x9f, 0x85, 0x86, 0x8d, 0x81,
+    0x87, 0x87, 0x87, 0x9d, 0x8f, 0x9d, 0x7c, 0x98, 0xa2, 0xac, 0x88, 0x93,
+    0x88, 0x7d, 0x9b, 0x76, 0x82, 0x67, 0x69, 0x7f, 0x8c, 0x8d, 0x94, 0x7d,
+    0x7b, 0xae, 0x8c, 0x85, 0x8b, 0xa7, 0x8c, 0x87, 0x96, 0x7d, 0x8b, 0x90,
+    0x90, 0x7c, 0x92, 0xa8, 0x81, 0x87, 0xa4, 0xa4, 0x82, 0x8b, 0x8d, 0x89,
+    0x8f, 0x70, 0x9d, 0x7f, 0xa0, 0x84, 0x99, 0x65, 0x99, 0x78, 0x94, 0x8b,
+    0xc5, 0x8d, 0x8d, 0x55, 0xb3, 0x8d, 0x78, 0x93, 0xb4, 0x6d, 0x84, 0x90,
+    0xd5, 0x76, 0x7a, 0x9e, 0xc8, 0x8f, 0x86, 0x8a, 0xaa, 0x8b, 0x7f, 0x90,
+    0xaa, 0x95, 0x9c, 0x81, 0xb4, 0x6b, 0x64, 0x8a, 0x99, 0x84, 0x74, 0x6e,
+    0x95, 0x75, 0x98, 0x92, 0x9a, 0x91, 0x8c, 0x7d, 0x88, 0x6e, 0x89, 0x7d,
+    0x87, 0x80, 0x8e, 0x86, 0x78, 0x9f, 0x96, 0x75, 0x76, 0x82, 0x84, 0xaf,
+    0x8a, 0xb3, 0x93, 0x97, 0x86, 0x7c, 0x7e, 0x96, 0x7c, 0x6d, 0x90, 0x8e,
+    0x85, 0x88, 0x8a, 0x9f, 0x70, 0x89, 0x9f, 0x99, 0x95, 0x87, 0x91, 0x9d,
+    0x80, 0x74, 0x88, 0x7c, 0x7f, 0xa8, 0x93, 0x77, 0x66, 0xa6, 0x80, 0xa2,
+    0x88, 0xa0, 0xaf, 0x6f, 0x76, 0x70, 0x82, 0x9a, 0x73, 0x89, 0x9a, 0x75,
+    0x75, 0x8e, 0x5f, 0x85, 0x6a, 0x76, 0x98, 0x66, 0x87, 0xa3, 0x7a, 0x73,
+    0x9d, 0xa1, 0x98, 0x8e, 0x78, 0x91, 0x83, 0x8c, 0x82, 0x9e, 0x90, 0x87,
+    0x8f, 0x9b, 0x8b, 0x8f, 0x89, 0x62, 0x74, 0x82, 0x7b, 0x7f, 0x8a, 0x9d,
+    0x89, 0x93, 0x8c, 0x7a, 0x99, 0x77, 0xac, 0x75, 0x9b, 0x7f, 0x7f, 0x56,
+    0x8c, 0x96, 0x70, 0x79, 0xc2, 0x7d, 0x90, 0x64, 0xe9, 0x79, 0x68, 0xb2,
+    0xc2, 0xa6, 0xa7, 0x7e, 0xd9, 0x98, 0x79, 0x87, 0xc0, 0x97, 0x87, 0x66,
+    0xd0, 0x9f, 0x92, 0x82, 0xa4, 0xa8, 0x8d, 0x78, 0xa6, 0xa1, 0x76, 0x7d,
+    0xa4, 0x87, 0x89, 0x51, 0xae, 0x88, 0x5b, 0x76, 0x7d, 0x70, 0x74, 0x93,
+    0x89, 0x74, 0x9e, 0x7a, 0x79, 0x64, 0x9a, 0x94, 0x65, 0x93, 0xb0, 0x8d,
+    0x88, 0x7e, 0x8e, 0xa5, 0x63, 0x94, 0x94, 0x7d, 0x91, 0x87, 0x84, 0x95,
+    0x75, 0x9e, 0x81, 0x99, 0x65, 0x76, 0x82, 0x9c, 0x6a, 0xab, 0x84, 0x85,
+    0x88, 0x72, 0x92, 0x83, 0x82, 0xaf, 0x6d, 0x9d, 0x9e, 0x73, 0x98, 0x7f,
+    0x91, 0xb4, 0x62, 0x8d, 0x74, 0x6e, 0xb4, 0x94, 0x97, 0x9e, 0x6f, 0x9a,
+    0x83, 0x7b, 0xa9, 0x7d, 0x87, 0x97, 0x60, 0xa9, 0x7a, 0x75, 0xad, 0x6c,
+    0x77, 0xa4, 0x88, 0x82, 0x6f, 0x8a, 0x83, 0x74, 0x9a, 0xa7, 0x83, 0x91,
+    0x7c, 0x7c, 0x78, 0x77, 0x83, 0x92, 0x7a, 0x83, 0x90, 0x6f, 0x79, 0x6b,
+    0x9b, 0x8d, 0x99, 0x95, 0x7b, 0x89, 0x8e, 0x6c, 0x8e, 0x6c, 0x9b, 0x91,
+    0x97, 0x80, 0x83, 0x6f, 0xaa, 0x91, 0x66, 0x76, 0xc9, 0x77, 0x82, 0x4d,
+    0xd7, 0x5f, 0x58, 0x9a, 0xb1, 0x7a, 0xb1, 0x6b, 0xe5, 0x9d, 0x76, 0x89,
+    0xb6, 0x94, 0x90, 0x5b, 0xb8, 0x92, 0x7d, 0x90, 0xbd, 0x9a, 0x85, 0x4e,
+    0xb4, 0x84, 0x61, 0x82, 0x94, 0x8e, 0x70, 0x57, 0x90, 0x89, 0x6f, 0x60,
+    0x78, 0x90, 0x78, 0x85, 0x8e, 0x7c, 0x76, 0x74, 0x71, 0x5d, 0x94, 0x93,
+    0x71, 0x8f, 0xc2, 0x80, 0x75, 0x7d, 0x77, 0xa8, 0x70, 0x8f, 0xa6, 0x83,
+    0x74, 0x6b, 0x79, 0x97, 0x76, 0xa2, 0xad, 0x93, 0x5b, 0x8c, 0x7c, 0x7e,
+    0x82, 0x9b, 0xa0, 0x76, 0x71, 0x7a, 0xa3, 0x80, 0x87, 0x90, 0x92, 0xa6,
+    0x85, 0x71, 0x99, 0x91, 0x91, 0x8c, 0x99, 0x9b, 0x92, 0x74, 0xb2, 0x79,
+    0x9c, 0x7c, 0x7b, 0xa8, 0x8c, 0x6f, 0xb5, 0x69, 0x7a, 0x8a, 0x68, 0x9f,
+    0x82, 0x7d, 0xbd, 0x5f, 0xa1, 0x92, 0x83, 0x9f, 0x6f, 0xa1, 0x88, 0x61,
+    0x7b, 0x94, 0x89, 0x83, 0x6f, 0x6e, 0x92, 0x9d, 0x65, 0x7f, 0x97, 0x83,
+    0x87, 0x75, 0x92, 0x8a, 0x82, 0x82, 0x79, 0x92, 0x78, 0x89, 0x92, 0x7a,
+    0x91, 0x64, 0x8a, 0x93, 0x9d, 0x74, 0x78, 0x64, 0xab, 0x57, 0x7a, 0x84,
+    0xcf, 0x7d, 0x95, 0x4f, 0xde, 0x63, 0x78, 0x9a, 0xb7, 0x7a, 0x8b, 0x5b,
+    0xda, 0xa3, 0x94, 0x99, 0xbd, 0x88, 0xa4, 0x53, 0xad, 0x8b, 0x81, 0x96,
+    0xca, 0x8f, 0x76, 0x5e, 0xbd, 0x9d, 0x70, 0x81, 0x9b, 0x7d, 0x8a, 0x44,
+    0xa0, 0x77, 0x52, 0x6e, 0x82, 0x62, 0x6a, 0x6b, 0x9d, 0xaa, 0x81, 0x85,
+    0x7d, 0x5f, 0x7f, 0x9c, 0x65, 0x99, 0x97, 0x81, 0x7f, 0x65, 0x65, 0xa4,
+    0x84, 0x8c, 0xa1, 0x6d, 0x7a, 0x70, 0x79, 0x90, 0x98, 0xaa, 0x76, 0x95,
+    0x7f, 0x91, 0x95, 0x96, 0x6e, 0xa5, 0x95, 0xa2, 0x7d, 0x7e, 0x93, 0x87,
+    0x7d, 0x9b, 0x85, 0x9b, 0x85, 0x79, 0x96, 0x6b, 0x9d, 0x9d, 0x61, 0x99,
+    0x9c, 0x74, 0xcc, 0x7e, 0x9a, 0x83, 0x83, 0x98, 0x6f, 0x6d, 0xc5, 0x69,
+    0xb0, 0xa5, 0x5c, 0x91, 0x6c, 0x7b, 0xcc, 0x72, 0x9a, 0x9d, 0x7e, 0xa3,
+    0x8a, 0x96, 0x8e, 0x74, 0x7b, 0x80, 0x6b, 0x85, 0x84, 0x56, 0x92, 0x83,
+    0x64, 0x90, 0x86, 0x86, 0x88, 0x79, 0x8b, 0xa0, 0x86, 0x72, 0xab, 0x95,
+    0x80, 0x81, 0x96, 0x8f, 0x75, 0x7f, 0x71, 0x92, 0x9e, 0x75, 0x62, 0x5e,
+    0xc3, 0x7a, 0x6c, 0x84, 0xba, 0x81, 0x8f, 0x49, 0xc9, 0x76, 0x54, 0x89,
+    0xc2, 0x8c, 0xa2, 0x54, 0xd8, 0xa4, 0x72, 0x90, 0xb1, 0x91, 0xa0, 0x7a,
+    0xbf, 0x9a, 0x6f, 0x82, 0xbb, 0x81, 0x6a, 0x52, 0xc2, 0x82, 0x52, 0x65,
+    0x8d, 0x8a, 0x84, 0x46, 0xa2, 0x90, 0x45, 0x52, 0x82, 0x61, 0x8c, 0x77,
+    0x92, 0x6d, 0x87, 0x5b, 0x5e, 0x72, 0x76, 0x97, 0x73, 0x8d, 0x8d, 0x70,
+    0x7a, 0x66, 0x76, 0x89, 0x72, 0xbf, 0xb0, 0x84, 0x7d, 0x80, 0x71, 0x8f,
+    0x85, 0xa9, 0xa3, 0x7d, 0x7b, 0x84, 0x83, 0xa1, 0x97, 0xa7, 0xaf, 0x84,
+    0x86, 0x7d, 0x94, 0x78, 0x80, 0x98, 0x71, 0x84, 0x94, 0x73, 0xb0, 0x74,
+    0x99, 0xa2, 0x68, 0xa7, 0x8b, 0x86, 0xe0, 0x75, 0x9e, 0x93, 0x5c, 0xb2,
+    0xa2, 0x68, 0xb8, 0x61, 0x92, 0xa3, 0x68, 0xa4, 0x89, 0x59, 0xd0, 0x77,
+    0x97, 0xa9, 0x6a, 0x9b, 0x7d, 0x69, 0x9b, 0x79, 0x8c, 0x7c, 0x68, 0x8b,
+    0x7a, 0x53, 0x99, 0x9c, 0x7e, 0x8d, 0x89, 0x96, 0x9e, 0x83, 0x89, 0x74,
+    0x7f, 0x94, 0x92, 0x8f, 0x85, 0x8a, 0x8a, 0x80, 0x99, 0x87, 0x7a, 0x7d,
+    0xac, 0x93, 0x74, 0x68, 0xba, 0x87, 0x6a, 0x98, 0xc7, 0x79, 0x91, 0x54,
+    0xeb, 0x80, 0x45, 0x80, 0xc4, 0xb4, 0x94, 0x61, 0xd2, 0xa6, 0x7b, 0x95,
+    0xa4, 0xaa, 0x93, 0x7b, 0xb1, 0x74, 0x53, 0x7c, 0xaa, 0x91, 0x64, 0x51,
+    0xa9, 0x6e, 0x5e, 0x7c, 0x79, 0x82, 0x8b, 0x2e, 0x9d, 0x66, 0x61, 0x5e,
+    0x72, 0x7f, 0x6e, 0x6d, 0x8c, 0x79, 0x7d, 0x60, 0x76, 0x79, 0x68, 0x84,
+    0x4d, 0x8e, 0xa8, 0x8f, 0x78, 0x74, 0x69, 0xa4, 0x6e, 0xa9, 0xb9, 0x59,
+    0x83, 0x7f, 0x7a, 0x93, 0x90, 0x9b, 0x8d, 0x93, 0x78, 0x80, 0x77, 0x8b,
+    0x72, 0xa3, 0x97, 0x73, 0x91, 0x6c, 0x9a, 0x97, 0xa3, 0xad, 0x89, 0x96,
+    0x9e, 0x6d, 0xb5, 0x7c, 0xa4, 0x98, 0x61, 0x8a, 0x93, 0x5f, 0xdc, 0x63,
+    0xba, 0x92, 0x84, 0x94, 0xab, 0x6f, 0xbf, 0x66, 0x98, 0x93, 0x74, 0x85,
+    0x96, 0x63, 0xb8, 0x60, 0x94, 0xbb, 0x79, 0x94, 0x7b, 0x67, 0x8a, 0x64,
+    0x99, 0xac, 0x60, 0x98, 0xb0, 0x65, 0xa2, 0x73, 0x8f, 0x94, 0x8c, 0x92,
+    0x84, 0x84, 0x9b, 0x8f, 0x84, 0x8d, 0x9f, 0x90, 0x91, 0x85, 0x93, 0x74,
+    0x97, 0x66, 0x7f, 0x78, 0xa2, 0x95, 0x73, 0x6b, 0xc5, 0x6f, 0x62, 0x79,
+    0xbd, 0x81, 0x89, 0x4a, 0xbd, 0x93, 0x57, 0x81, 0xba, 0xb0, 0x9b, 0x4c,
+    0xe8, 0xa2, 0x85, 0xa2, 0x96, 0x92, 0x93, 0x62, 0xbe, 0x7a, 0x71, 0x8b,
+    0x8d, 0x97, 0x53, 0x56, 0xb1, 0x5f, 0x67, 0x60, 0x7a, 0x8e, 0x8a, 0x3a,
+    0x86, 0x67, 0x6d, 0x53, 0x6e, 0x91, 0x7b, 0x60, 0x99, 0x6d, 0x71, 0x5d,
+    0x67, 0x65, 0x63, 0x87, 0x71, 0x8a, 0x92, 0x6d, 0x8f, 0x6f, 0x6f, 0xae,
+    0x6c, 0xa2, 0x87, 0x6f, 0x99, 0x88, 0x78, 0x94, 0x8a, 0xb2, 0x93, 0x89,
+    0x90, 0x8d, 0x8c, 0x98, 0x81, 0x86, 0x90, 0x6d, 0xa2, 0x82, 0xa2, 0xa3,
+    0x9d, 0x8f, 0x7a, 0x9f, 0x87, 0x70, 0xbd, 0x8e, 0xa5, 0x99, 0x5d, 0x70,
+    0x8c, 0x60, 0xc7, 0x78, 0x97, 0xb0, 0x6f, 0x94, 0x92, 0x5a, 0xc3, 0x6e,
+    0x8b, 0x9f, 0x79, 0xa3, 0x8c, 0x5e, 0xbf, 0x79, 0x8e, 0x98, 0x76, 0x8e,
+    0x67, 0x31, 0x9b, 0x85, 0x8e, 0x85, 0x71, 0x99, 0x72, 0x77, 0x84, 0x81,
+    0x91, 0x95, 0x80, 0x98, 0x82, 0x6f, 0x90, 0xa0, 0x91, 0x91, 0x8e, 0x75,
+    0x8a, 0x89, 0x93, 0x69, 0x95, 0x7f, 0x9a, 0xa0, 0x9e, 0x9b, 0x88, 0x4e,
+    0xc3, 0x8d, 0x65, 0x74, 0xba, 0x8d, 0x97, 0x4d, 0xd6, 0x94, 0x73, 0xa0,
+    0xb1, 0xb3, 0x8c, 0x67, 0xdd, 0x9f, 0x7f, 0xaa, 0xaf, 0x9a, 0x88, 0x67,
+    0xc2, 0x8f, 0x71, 0x7b, 0x8f, 0x9f, 0x47, 0x52, 0x93, 0x72, 0x5a, 0x52,
+    0x97, 0x9d, 0x67, 0x3c, 0xa9, 0x59, 0x59, 0x5b, 0x88, 0x92, 0x82, 0x57,
+    0x83, 0x67, 0x94, 0x77, 0x52, 0x74, 0x60, 0x9e, 0x52, 0x84, 0xa2, 0x69,
+    0x71, 0x96, 0x73, 0xb0, 0x5e, 0xb0, 0x89, 0x71, 0x94, 0x8a, 0x66, 0xa0,
+    0x75, 0xc1, 0x99, 0x8e, 0x83, 0x8a, 0x91, 0x89, 0x6b, 0xa5, 0x79, 0x82,
+    0x8b, 0x73, 0x95, 0xb0, 0x77, 0x9b, 0x82, 0x7d, 0x8f, 0x60, 0xb9, 0x78,
+    0x8b, 0x8f, 0x7b, 0x74, 0x84, 0x6d, 0xbf, 0x76, 0x8f, 0xa3, 0x91, 0xa1,
+    0x81, 0x59, 0xcb, 0x69, 0xac, 0x90, 0x98, 0x92, 0xa7, 0x5d, 0xb4, 0x8b,
+    0xaa, 0xb1, 0x98, 0x8c, 0xa2, 0x4d, 0xa1, 0x69, 0x7f, 0xa0, 0x7d, 0x8a,
+    0x9b, 0x77, 0x8e, 0x71, 0x82, 0x8a, 0x78, 0x8d, 0x98, 0x78, 0x90, 0x91,
+    0x7e, 0x7f, 0x78, 0x85, 0x97, 0x8a, 0x97, 0x6d, 0xb3, 0x94, 0x89, 0xa3,
+    0xa5, 0x9a, 0x76, 0x6b, 0xbd, 0x79, 0x71, 0x95, 0xce, 0xab, 0x93, 0x1f,
+    0xe9, 0x97, 0x4c, 0x84, 0xd5, 0x9f, 0x98, 0x6e, 0xdd, 0x8d, 0x80, 0x9c,
+    0xa8, 0x9e, 0x8d, 0x75, 0xbc, 0x8c, 0x80, 0x89, 0xa1, 0x89, 0x74, 0x58,
+    0x92, 0x86, 0x55, 0x87, 0x91, 0x8d, 0x70, 0x33, 0xb8, 0x50, 0x63, 0x6b,
+    0x79, 0x99, 0x76, 0x71, 0x75, 0x59, 0x73, 0x6b, 0x62, 0x62, 0x74, 0x85,
+    0x73, 0xa3, 0xac, 0x78, 0x77, 0x88, 0x64, 0xa0, 0x73, 0xa1, 0xa8, 0x73,
+    0x91, 0x8e, 0x5f, 0x9a, 0x68, 0xc9, 0xa1, 0x92, 0x7a, 0x7c, 0x69, 0x77,
+    0x7d, 0x9e, 0x8f, 0x76, 0x88, 0x80, 0x92, 0x93, 0x91, 0x99, 0x8c, 0x85,
+    0x9f, 0x69, 0xa8, 0x9b, 0x9f, 0x9a, 0x64, 0x7a, 0x99, 0x70, 0xc4, 0x6d,
+    0x9a, 0x99, 0x82, 0xa0, 0x8b, 0x59, 0xc8, 0x61, 0x8f, 0x95, 0x72, 0x8c,
+    0x90, 0x63, 0xa9, 0x7e, 0x88, 0x8c, 0x85, 0x78, 0x76, 0x58, 0x8e, 0x72,
+    0xa3, 0x9a, 0x7c, 0xa0, 0x7f, 0x6d, 0xa6, 0x83, 0x7e, 0x8d, 0x83, 0x88,
+    0x86, 0x68, 0x8d, 0x96, 0xaa, 0x78, 0x90, 0xa5, 0x9c, 0x9d, 0x99, 0x88,
+    0xb0, 0x82, 0x6f, 0x7e, 0xad, 0xa9, 0x7b, 0x6a, 0xba, 0x6c, 0x6d, 0x89,
+    0xc1, 0x9e, 0x8e, 0x2f, 0xf2, 0x77, 0x50, 0x73, 0xdb, 0xc4, 0x9c, 0x6c,
+    0xd0, 0x90, 0x88, 0xbe, 0x97, 0xb9, 0x9e, 0x6e, 0xbe, 0x8e, 0x83, 0x8e,
+    0x96, 0x98, 0x4c, 0x4e, 0xa7, 0x8d, 0x43, 0x92, 0x8f, 0x92, 0x6d, 0x27,
+    0x94, 0x73, 0x5f, 0x42, 0x7c, 0xa7, 0x8a, 0x5a, 0x81, 0x60, 0x85, 0x66,
+    0x73, 0x72, 0x74, 0x9d, 0x5a, 0x9e, 0xa3, 0x71, 0x75, 0x91, 0x4f, 0xa2,
+    0x67, 0xa6, 0x91, 0x64, 0x92, 0x7e, 0x95, 0x8d, 0x6e, 0xbe, 0x9b, 0x57,
+    0x9b, 0x82, 0x89, 0x70, 0x6f, 0x9e, 0x7e, 0x86, 0x97, 0x81, 0x85, 0x8e,
+    0x70, 0x96, 0x6c, 0x72, 0xab, 0x6d, 0x9c, 0x91, 0xa0, 0x8a, 0x8d, 0x88,
+    0x9e, 0x75, 0xc6, 0x76, 0x7c, 0xa7, 0x6b, 0xa8, 0x94, 0x72, 0xb6, 0x78,
+    0x8d, 0x90, 0x7b, 0x8c, 0xa6, 0x65, 0xad, 0x9b, 0xaa, 0x94, 0x89, 0x7d,
+    0x90, 0x69, 0xaa, 0x7e, 0x9e, 0xad, 0x7f, 0x94, 0x81, 0x7d, 0xa1, 0x7b,
+    0x6c, 0x65, 0x83, 0x95, 0x89, 0x75, 0x93, 0x87, 0x94, 0x87, 0xa8, 0x92,
+    0x8d, 0xa6, 0x9f, 0x78, 0xaa, 0x72, 0x95, 0x94, 0xac, 0xa6, 0x91, 0x5a,
+    0xdb, 0x82, 0x55, 0xb6, 0xc1, 0xa3, 0x84, 0x4f, 0xc9, 0x88, 0x53, 0x8f,
+    0xbb, 0xae, 0x9b, 0x8a, 0xd8, 0xa9, 0x68, 0xc2, 0xa0, 0xa9, 0x87, 0x6b,
+    0xbd, 0x99, 0x7e, 0x86, 0x88, 0xa7, 0x5e, 0x53, 0xa4, 0x84, 0x6b, 0x6e,
+    0x89, 0x95, 0x84, 0x2d, 0xb5, 0x43, 0x3e, 0x50, 0x71, 0x96, 0x9a, 0x5b,
+    0xa1, 0x60, 0x80, 0x70, 0x6a, 0x73, 0x8f, 0x95, 0x52, 0x9b, 0xae, 0x71,
+    0x76, 0x7d, 0x61, 0x99, 0x5b, 0xc3, 0xa8, 0x76, 0x98, 0x72, 0x7f, 0x8a,
+    0x66, 0xc7, 0xa3, 0x7b, 0x8e, 0x8f, 0x70, 0x74, 0x6a, 0xae, 0x85, 0x83,
+    0x96, 0x7d, 0x98, 0xa7, 0x8f, 0x94, 0x7e, 0x84, 0x96, 0x7a, 0xab, 0x7d,
+    0x83, 0xb1, 0x6f, 0x7d, 0x9f, 0x80, 0xca, 0x8f, 0x9b, 0xa9, 0x69, 0x7a,
+    0x92, 0x73, 0xaa, 0x74, 0x88, 0x98, 0x87, 0x8f, 0xa7, 0x68, 0xa0, 0x74,
+    0x97, 0x95, 0x6e, 0x6f, 0x83, 0x53, 0x9b, 0x79, 0x71, 0x87, 0x7d, 0x8b,
+    0x79, 0x87, 0xa3, 0x75, 0x68, 0x73, 0x7e, 0x89, 0x8f, 0x81, 0x98, 0x7a,
+    0x9a, 0x83, 0x9d, 0x95, 0x90, 0x98, 0x97, 0x57, 0x93, 0x7e, 0xa2, 0x9a,
+    0xa8, 0x8a, 0x85, 0x53, 0xbd, 0x7a, 0x61, 0x8b, 0xca, 0xac, 0x9b, 0x2e,
+    0xe8, 0xa5, 0x66, 0x86, 0xca, 0xa7, 0xa0, 0x85, 0xcf, 0xa4, 0x6a, 0xc2,
+    0xb0, 0xaa, 0x76, 0x76, 0xb6, 0xa2, 0x72, 0xa9, 0xa1, 0xa1, 0x67, 0x67,
+    0xac, 0x90, 0x70, 0x6d, 0x8f, 0xb5, 0x6d, 0x3b, 0x85, 0x64, 0x4a, 0x6e,
+    0x72, 0x9f, 0x98, 0x5b, 0x97, 0x3e, 0x8a, 0x6a, 0x6c, 0x7d, 0x77, 0x98,
+    0x5a, 0x92, 0xa3, 0x81, 0x6f, 0x91, 0x7b, 0xa6, 0x6e, 0x9c, 0x9b, 0x5f,
+    0x9e, 0x7e, 0x77, 0x9d, 0x88, 0xc6, 0x81, 0x5a, 0x93, 0x8b, 0x6c, 0x71,
+    0x63, 0x9e, 0x78, 0x79, 0x70, 0x90, 0x95, 0x9f, 0x71, 0xa9, 0x90, 0x73,
+    0x98, 0x8a, 0xa5, 0x8e, 0x87, 0xb0, 0x79, 0x79, 0x92, 0x7d, 0xcc, 0xa8,
+    0x7a, 0x92, 0x82, 0x91, 0x90, 0x69, 0xa4, 0x9b, 0x97, 0x8f, 0x75, 0x7c,
+    0xa3, 0x69, 0xb5, 0x87, 0x8d, 0x88, 0x7b, 0x94, 0x8b, 0x55, 0xa2, 0x6d,
+    0x89, 0x8e, 0x81, 0x8a, 0x9e, 0x87, 0x86, 0x83, 0x8b, 0x84, 0x87, 0xa7,
+    0x8e, 0x79, 0xa4, 0x9c, 0x99, 0x82, 0xa3, 0x8f, 0x91, 0x9a, 0x95, 0x5b,
+    0x9f, 0x6e, 0x85, 0x93, 0xa6, 0x9a, 0x91, 0x4c, 0xd8, 0x6b, 0x6d, 0x85,
+    0xde, 0xaa, 0x97, 0x51, 0xcf, 0x8c, 0x5f, 0x9a, 0xc2, 0x9d, 0x9a, 0x7c,
+    0xc6, 0xb1, 0x84, 0xac, 0xba, 0xa5, 0x7c, 0x76, 0xbd, 0x93, 0x7f, 0xa0,
+    0x86, 0xae, 0x47, 0x41, 0x88, 0x82, 0x62, 0x62, 0x73, 0xad, 0x6b, 0x23,
+    0xa0, 0x48, 0x5a, 0x5a, 0x8f, 0x98, 0xbd, 0x5c, 0x9c, 0x72, 0x7c, 0x68,
+    0x50, 0x78, 0x91, 0xab, 0x5c, 0xc1, 0xc6, 0x66, 0x87, 0x86, 0x60, 0x99,
+    0x65, 0xac, 0x94, 0x91, 0x7e, 0x8c, 0x7d, 0x9b, 0x70, 0xb2, 0x9a, 0x7d,
+    0x82, 0x91, 0x6b, 0x86, 0x6f, 0xbb, 0x7f, 0x66, 0x7a, 0x79, 0x94, 0x96,
+    0x71, 0xa5, 0x75, 0x73, 0x95, 0x81, 0xa4, 0x8b, 0x87, 0xaa, 0x8e, 0x92,
+    0xa9, 0x82, 0xb0, 0x92, 0x89, 0xa7, 0x83, 0x81, 0x8c, 0x6d, 0xc4, 0x7a,
+    0x89, 0xa5, 0xa1, 0xa2, 0xa4, 0x6b, 0xa4, 0x82, 0x90, 0xb2, 0x8d, 0x72,
+    0x83, 0x60, 0xa7, 0x7a, 0x80, 0x97, 0x65, 0x90, 0x87, 0x85, 0xae, 0x71,
+    0x7d, 0x71, 0x98, 0xa8, 0x90, 0x75, 0xa9, 0x96, 0xa2, 0x91, 0x7b, 0x6b,
+    0xa0, 0x9d, 0x8d, 0x5d, 0xa4, 0x79, 0x8c, 0xa4, 0xad, 0x94, 0x7e, 0x77,
+    0xb6, 0x92, 0x74, 0xaf, 0xb5, 0x9b, 0x99, 0x67, 0xe7, 0x8e, 0x6a, 0x87,
+    0xc1, 0x98, 0x9b, 0x7e, 0xd7, 0x9b, 0x5b, 0xae, 0xc9, 0x94, 0x7a, 0x6d,
+    0x9e, 0xb4, 0x86, 0x8e, 0xa3, 0xa1, 0x5e, 0x5d, 0x8e, 0x8f, 0x6b, 0x59,
+    0xa5, 0xa9, 0x69, 0x20, 0xa4, 0x64, 0x35, 0x61, 0x83, 0x9d, 0x8a, 0x4e,
+    0x8b, 0x6c, 0x5e, 0x5b, 0x68, 0x76, 0x89, 0x94, 0x5f, 0x87, 0x98, 0x7a,
+    0x5d, 0x81, 0x89, 0xa6, 0x54, 0xa3, 0xb4, 0x7b, 0x83, 0x8a, 0x90, 0x8b,
+    0x86, 0xbc, 0x86, 0x59, 0x91, 0x79, 0x71, 0x6b, 0x7c, 0x94, 0x98, 0x7f,
+    0x81, 0x76, 0x85, 0xad, 0x69, 0xa8, 0x83, 0x8c, 0x8f, 0x70, 0x9a, 0x91,
+    0x78, 0xb3, 0x8f, 0x6d, 0x90, 0x86, 0xbd, 0x97, 0x7f, 0xaf, 0x7e, 0x90,
+    0x8f, 0x63, 0xa2, 0x93, 0x6e, 0xab, 0x75, 0x72, 0x8d, 0x74, 0xa1, 0x72,
+    0x82, 0xaa, 0x70, 0x82, 0x8d, 0x67, 0x94, 0x91, 0x92, 0xa5, 0x7f, 0xa5,
+    0x6f, 0x6d, 0xaf, 0x80, 0x89, 0x7d, 0x92, 0x99, 0x92, 0x72, 0x9d, 0x7d,
+    0x92, 0x78, 0xa9, 0x89, 0xa9, 0x9b, 0xa3, 0x73, 0x98, 0x71, 0x98, 0x86,
+    0x9e, 0x97, 0x9e, 0x6a, 0xb9, 0x6a, 0x6e, 0x90, 0xde, 0x94, 0x9a, 0x52,
+    0xdd, 0xa9, 0x6a, 0x79, 0xb9, 0xa3, 0xaa, 0x95, 0xba, 0xa2, 0x75, 0xc2,
+    0xbf, 0xb5, 0x6d, 0x8d, 0xae, 0x9b, 0x8d, 0x9a, 0x92, 0xb4, 0x5e, 0x4b,
+    0x8b, 0x99, 0x4f, 0x65, 0x94, 0xb6, 0x5d, 0x3a, 0xa3, 0x77, 0x51, 0x4e,
+    0x6d, 0xa3, 0x94, 0x59, 0x80, 0x56, 0x8c, 0x67, 0x67, 0x74, 0x99, 0x85,
+    0x57, 0x7b, 0x9e, 0x7e, 0x84, 0x85, 0x94, 0x96, 0x71, 0xbf, 0x97, 0x5f,
+    0x7d, 0x80, 0x93, 0x87, 0x6b, 0xb9, 0x7d, 0x8b, 0x84, 0x84, 0x6b, 0x8c,
+    0x6c, 0xc4, 0x85, 0x82, 0x87, 0x8d, 0x64, 0x90, 0x80, 0xb6, 0x9a, 0x70,
+    0x9c, 0x68, 0xa0, 0x88, 0x81, 0x9d, 0x83, 0x75, 0x9d, 0x84, 0xbf, 0x8f,
+    0x83, 0x9b, 0x75, 0x82, 0x9c, 0x76, 0xa4, 0x9d, 0x8a, 0xa7, 0x8e, 0x96,
+    0x9c, 0x64, 0xc0, 0x95, 0x88, 0xa5, 0x6f, 0x74, 0x7e, 0x5d, 0x9f, 0x7d,
+    0x89, 0x81, 0x71, 0xa8, 0x82, 0x6e, 0x9b, 0x9a, 0x6f, 0xa5, 0x88, 0x89,
+    0xa4, 0x7e, 0xa4, 0x90, 0xa1, 0x83, 0x8b, 0x9c, 0x9a, 0x89, 0xa2, 0x89,
+    0x9d, 0x5d, 0x86, 0xa5, 0xc4, 0x96, 0x9c, 0x85, 0xd6, 0x7c, 0x69, 0x88,
+    0xc9, 0xa5, 0x9b, 0x60, 0xea, 0xab, 0x62, 0x9f, 0xd1, 0xa5, 0x86, 0x7e,
+    0xb3, 0xbd, 0x7a, 0xa1, 0xbd, 0xa0, 0x7c, 0x92, 0xa6, 0xa3, 0x7d, 0xa9,
+    0x98, 0xa6, 0x71, 0x5c, 0x9b, 0x9b, 0x58, 0x6f, 0x8f, 0xaa, 0x5e, 0x3b,
+    0xa6, 0x5f, 0x3a, 0x79, 0x94, 0xa5, 0x84, 0x6f, 0x83, 0x5d, 0x75, 0x65,
+    0x6c, 0x77, 0x86, 0xad, 0x4a, 0x92, 0x8e, 0x8a, 0x8f, 0x7b, 0x72, 0x96,
+    0x79, 0xa6, 0xa8, 0x6d, 0x7b, 0x7b, 0x98, 0xa9, 0x79, 0xb9, 0x9e, 0x8f,
+    0x90, 0x6d, 0x76, 0x82, 0x81, 0xc1, 0x95, 0x7c, 0x97, 0x8d, 0x95, 0xa2,
+    0x7c, 0xa4, 0x7b, 0x9b, 0x7f, 0x6f, 0xac, 0x83, 0x7e, 0xa1, 0x7c, 0x7c,
+    0xa1, 0x7a, 0xa1, 0x6d, 0x95, 0x86, 0x77, 0x98, 0x8e, 0x58, 0xa2, 0x76,
+    0x8e, 0xa8, 0x94, 0x90, 0xa7, 0x62, 0xb8, 0x8a, 0x9f, 0xac, 0x87, 0x91,
+    0x88, 0x50, 0xa7, 0x83, 0x88, 0x65, 0x7a, 0x92, 0x9d, 0x70, 0xa9, 0x99,
+    0x7c, 0x87, 0x8c, 0x96, 0x8e, 0x73, 0xa4, 0xa7, 0x9b, 0x70, 0x99, 0x96,
+    0x8f, 0x88, 0xb4, 0x85, 0xa8, 0x6a, 0x9e, 0x78, 0xb0, 0x82, 0x9f, 0x89,
+    0xc9, 0x8d, 0x71, 0x7f, 0xc0, 0x98, 0xa0, 0x6d, 0xd2, 0x8e, 0x64, 0x9e,
+    0xb2, 0xa9, 0x93, 0x6e, 0xcc, 0xbb, 0x89, 0xb1, 0xc1, 0x9b, 0x86, 0x94,
+    0xb5, 0xb5, 0x95, 0xa0, 0x9c, 0x9b, 0x62, 0x5f, 0x7b, 0x91, 0x69, 0x74,
+    0x9e, 0xa3, 0x81, 0x30, 0x85, 0x59, 0x49, 0x5e, 0x83, 0x85, 0x7d, 0x6a,
+    0x90, 0x51, 0x80, 0x5e, 0x64, 0x6f, 0x99, 0x93, 0x75, 0x9a, 0xa7, 0x72,
+    0x6c, 0x5d, 0xa3, 0x93, 0x87, 0xa7, 0xbd, 0x6f, 0x92, 0x6d, 0x85, 0x98,
+    0x6f, 0xc7, 0xb6, 0x7c, 0x80, 0x71, 0x8a, 0x9f, 0x71, 0xb5, 0x8c, 0x6d,
+    0xac, 0x7b, 0x72, 0xb7, 0x69, 0xa6, 0x9d, 0x66, 0xab, 0x7a, 0x8b, 0x70,
+    0x8c, 0x9e, 0x86, 0x75, 0x96, 0x7b, 0xa3, 0x93, 0x8f, 0xb7, 0x84, 0x8c,
+    0x87, 0x56, 0xae, 0x82, 0x71, 0xa3, 0x8d, 0x93, 0xaf, 0x59, 0xb3, 0x8a,
+    0x97, 0x99, 0x75, 0x73, 0x8e, 0x51, 0xae, 0x84, 0x8b, 0x7a, 0x76, 0x77,
+    0x6e, 0x75, 0xa4, 0x8a, 0x75, 0x8e, 0x8f, 0xa2, 0x96, 0x76, 0x9a, 0x80,
+    0x96, 0x7d, 0x94, 0x71, 0x8a, 0x90, 0xac, 0x82, 0xa5, 0x61, 0xa3, 0x84,
+    0xac, 0x8f, 0x74, 0x5c, 0xb6, 0x77, 0x8b, 0x9b, 0xb5, 0x8b, 0xb6, 0x52,
+    0xd7, 0xaa, 0x4b, 0x8c, 0xbf, 0xb8, 0x9f, 0x6d, 0xcb, 0xa3, 0x6e, 0x97,
+    0xaa, 0x8d, 0x7c, 0x99, 0xc0, 0xd0, 0x9e, 0xb7, 0x93, 0xaa, 0x5a, 0x6a,
+    0x7d, 0x9a, 0x63, 0x71, 0x78, 0x8c, 0x67, 0x43, 0x87, 0x52, 0x64, 0x68,
+    0x68, 0x9c, 0x65, 0x60, 0x7a, 0x35, 0x68, 0x66, 0x63, 0x69, 0x8d, 0x8f,
+    0x72, 0x9b, 0x99, 0x5b, 0x80, 0x67, 0x93, 0xa2, 0x97, 0x9d, 0x8c, 0x68,
+    0x80, 0x86, 0x96, 0x91, 0x64, 0xbf, 0x98, 0x63, 0x83, 0x85, 0x61, 0x97,
+    0x6a, 0xac, 0xb4, 0x99, 0x8d, 0x7b, 0x7b, 0xad, 0x8b, 0xb2, 0x9e, 0x7f,
+    0x9a, 0x73, 0x91, 0x84, 0x89, 0x9f, 0x8a, 0x87, 0x8b, 0x72, 0x8e, 0x79,
+    0x86, 0xa7, 0x77, 0x84, 0x90, 0x58, 0xb2, 0x90, 0x93, 0xa0, 0x7f, 0x8a,
+    0x91, 0x5a, 0xb1, 0x80, 0x99, 0xc1, 0x80, 0x7d, 0x97, 0x5c, 0x9a, 0x8c,
+    0x71, 0x96, 0x7e, 0x7f, 0xad, 0x7b, 0xb9, 0x8a, 0x84, 0x84, 0x81, 0x97,
+    0x94, 0x64, 0x9f, 0x7e, 0x9b, 0x8d, 0x7d, 0x8d, 0x9a, 0x9e, 0xac, 0x72,
+    0xb2, 0x73, 0x81, 0x84, 0xc8, 0x81, 0x88, 0x72, 0xbe, 0x85, 0x86, 0x97,
+    0xd3, 0x8a, 0xc7, 0x75, 0xce, 0x9c, 0x69, 0xa6, 0xb0, 0xa1, 0x8e, 0x64,
+    0xb1, 0xa6, 0x67, 0xaa, 0xcd, 0x95, 0x97, 0xa2, 0xb2, 0xb2, 0x85, 0x9a,
+    0x9d, 0xa3, 0x5e, 0x73, 0x6e, 0xae, 0x50, 0x83, 0x8c, 0xab, 0x92, 0x43,
+    0x6b, 0x66, 0x43, 0x5c, 0x8f, 0x8a, 0x9a, 0x6c, 0x84, 0x48, 0x80, 0x6b,
+    0x8d, 0x82, 0xaf, 0x89, 0x71, 0x9f, 0xa4, 0x9a, 0x7b, 0x68, 0x91, 0xaa,
+    0x6b, 0xa3, 0x9c, 0x62, 0x8d, 0x6d, 0x87, 0x87, 0x81, 0x9a, 0x97, 0x6c,
+    0x9c, 0x76, 0x63, 0xbc, 0x62, 0xbc, 0xb0, 0x97, 0xa7, 0x81, 0x70, 0x8f,
+    0x7d, 0xb2, 0xa6, 0x98, 0xa1, 0x7b, 0x8e, 0x83, 0x8c, 0xa2, 0x7e, 0x73,
+    0x99, 0x65, 0xc1, 0x77, 0x8e, 0xbc, 0x72, 0xa6, 0x8c, 0x55, 0xab, 0x8e,
+    0x7d, 0xa3, 0x79, 0x80, 0x9e, 0x6b, 0xa9, 0x6c, 0x80, 0xb6, 0x81, 0xa6,
+    0x92, 0x5b, 0xb7, 0x99, 0x81, 0x7e, 0x8e, 0x89, 0x97, 0x86, 0x93, 0x86,
+    0x7b, 0x9a, 0x7f, 0x9a, 0x8e, 0x69, 0xa3, 0xa4, 0x9f, 0x8b, 0x96, 0x6f,
+    0x8b, 0x97, 0xb4, 0x74, 0x96, 0x53, 0x99, 0x91, 0xa7, 0xa8, 0x69, 0x72,
+    0xc9, 0x85, 0x99, 0x93, 0xc0, 0x90, 0xaa, 0x7f, 0xc7, 0x71, 0x74, 0x8d,
+    0xb7, 0xab, 0x91, 0x69, 0xb4, 0x9b, 0x7d, 0x95, 0xc3, 0xb0, 0x9b, 0xa9,
+    0xb3, 0x9f, 0x79, 0xa5, 0x9f, 0xad, 0x6b, 0x85, 0x90, 0xad, 0x69, 0x62,
+    0x7e, 0xa6, 0x69, 0x4e, 0x80, 0x7e, 0x52, 0x57, 0x5f, 0x95, 0x72, 0x4c,
+    0x87, 0x4e, 0x5a, 0x62, 0x7d, 0x70, 0x92, 0x98, 0x76, 0x8e, 0x99, 0x7d,
+    0x73, 0x6d, 0x86, 0x8e, 0x6b, 0x80, 0xa7, 0x9d, 0x91, 0x73, 0x95, 0x70,
+    0x80, 0xc3, 0x9f, 0x8b, 0x72, 0x86, 0x6b, 0xad, 0x76, 0xbe, 0xad, 0x8e,
+    0x9c, 0x78, 0x6a, 0xbf, 0x7d, 0xa8, 0x88, 0x8a, 0x8b, 0x8c, 0x9c, 0x8c,
+    0x8a, 0x85, 0x73, 0x92, 0xa2, 0x7b, 0xa5, 0x96, 0x9b, 0xa3, 0x6c, 0x80,
+    0xa6, 0x63, 0xac, 0x98, 0xa3, 0x9a, 0x83, 0x8a, 0x8c, 0x63, 0xb9, 0x8c,
+    0x99, 0xa1, 0x7a, 0x6c, 0x9e, 0x59, 0x90, 0x84, 0x8a, 0x93, 0x8f, 0x87,
+    0x98, 0x84, 0x99, 0xa4, 0x72, 0x6d, 0x95, 0xa2, 0x95, 0x72, 0xc3, 0x88,
+    0x8f, 0x6a, 0x77, 0x7d, 0x8b, 0xae, 0xa3, 0x7c, 0xa8, 0x5d, 0x7c, 0xa8,
+    0xa1, 0x85, 0x7e, 0x8c, 0xac, 0x8d, 0x73, 0x88, 0xc1, 0x89, 0xaa, 0x89,
+    0xb2, 0x92, 0x75, 0x9a, 0x9c, 0x8e, 0xb9, 0xaa, 0xaa, 0xac, 0x78, 0x85,
+    0xbc, 0x9f, 0x6d, 0xb7, 0x89, 0xa6, 0xb3, 0x8e, 0xa5, 0xbb, 0x6b, 0x9d,
+    0x8f, 0x8b, 0x69, 0x7a, 0x82, 0x99, 0x8c, 0x49, 0x87, 0x74, 0x37, 0x63,
+    0x5d, 0x92, 0x77, 0x66, 0x63, 0x56, 0x77, 0x5d, 0x7f, 0x68, 0x97, 0x74,
+    0x84, 0x94, 0x7d, 0x7d, 0x91, 0x78, 0x87, 0x96, 0x7f, 0x97, 0x94, 0x6f,
+    0x89, 0x6c, 0x96, 0x71, 0x83, 0x8f, 0x8a, 0x89, 0x7d, 0x84, 0x8a, 0xa6,
+    0x7b, 0x95, 0x89, 0x77, 0x94, 0x80, 0x7f, 0x93, 0x5e, 0xbb, 0x9c, 0xa8,
+    0xa2, 0x7e, 0xa6, 0x86, 0x7d, 0x8b, 0x92, 0x73, 0xac, 0x78, 0xaa, 0x98,
+    0xb1, 0x94, 0x79, 0x8b, 0x8f, 0x70, 0xa7, 0xae, 0x92, 0xad, 0xb1, 0x8b,
+    0xb0, 0x78, 0xbc, 0xa9, 0xa4, 0xa3, 0x9e, 0x76, 0x89, 0x67, 0xab, 0x98,
+    0x75, 0x8c, 0x86, 0x95, 0x9e, 0x77, 0x96, 0x85, 0x8c, 0x8e, 0x8b, 0x8a,
+    0x8a, 0x4b, 0x71, 0x8a, 0x9b, 0x6d, 0x6e, 0x89, 0x81, 0x82, 0xa7, 0x98,
+    0xa5, 0x66, 0x72, 0x8b, 0x99, 0x9a, 0x8b, 0x8b, 0x9f, 0x87, 0x79, 0x84,
+    0x99, 0x6d, 0x90, 0x7d, 0x9d, 0xa7, 0x81, 0xa3, 0x9d, 0x96, 0x82, 0x86,
+    0xa2, 0x8e, 0x8d, 0x7f, 0x84, 0x8c, 0x98, 0xbc, 0x83, 0xb4, 0xb5, 0x78,
+    0x7d, 0xab, 0x8d, 0x87, 0x71, 0x8d, 0x6e, 0x8f, 0x89, 0xaa, 0x7c, 0x6f,
+    0x71, 0x69, 0x65, 0x60, 0x81, 0x91, 0x94, 0x6d, 0x76, 0x66, 0x74, 0x5e,
+    0x77, 0x7c, 0xa2, 0xa6, 0x70, 0x90, 0xa3, 0x68, 0x83, 0x69, 0x71, 0x72,
+    0x6c, 0xa9, 0x85, 0x71, 0x88, 0x60, 0x90, 0x84, 0x8a, 0xba, 0x8b, 0x8c,
+    0x72, 0x8f, 0x98, 0x84, 0x8b, 0x8a, 0xb1, 0xa2, 0x93, 0x8d, 0x86, 0x99,
+    0xa2, 0x99, 0xb0, 0xa6, 0x92, 0x78, 0x86, 0x87, 0x9c, 0x9d, 0x6f, 0x92,
+    0x9a, 0x8a, 0xbf, 0xaa, 0xa3, 0xa2, 0x71, 0x8d, 0x93, 0x70, 0xb5, 0x9c,
+    0xa8, 0x97, 0xb4, 0x93, 0xa6, 0x75, 0xbb, 0xa3, 0x92, 0x95, 0x95, 0x94,
+    0x90, 0x5b, 0xbf, 0x92, 0x8a, 0x95, 0xa0, 0xa1, 0x68, 0x7e, 0x9a, 0x7f,
+    0x88, 0xa7, 0x93, 0xa1, 0x7a, 0x93, 0x95, 0x8b, 0x96, 0x94, 0x70, 0xa0,
+    0x70, 0x8f, 0x9d, 0x96, 0x8e, 0x9c, 0x90, 0x9f, 0x7e, 0x83, 0x84, 0x9e,
+    0x7f, 0x65, 0x72, 0x84, 0x64, 0x94, 0x75, 0xa7, 0x62, 0xa3, 0x8a, 0x9b,
+    0x82, 0x99, 0x87, 0x70, 0x81, 0x6d, 0xac, 0x7b, 0x74, 0x68, 0x5d, 0x95,
+    0xa0, 0x6e, 0x84, 0xab, 0x79, 0x8e, 0x8b, 0x79, 0x7b, 0x83, 0xa0, 0x7b,
+    0x96, 0x71, 0x5d, 0xad, 0xa4, 0x82, 0x79, 0x96, 0x73, 0x84, 0x7d, 0x98,
+    0x87, 0x93, 0x86, 0xa6, 0x7f, 0x7c, 0x71, 0x9d, 0xa4, 0x9b, 0x8a, 0x7c,
+    0x87, 0x6a, 0x7f, 0x8d, 0x97, 0x92, 0xa0, 0x88, 0x77, 0x7d, 0x70, 0x9c,
+    0x9f, 0xa0, 0x71, 0xa3, 0x73, 0x95, 0x76, 0x79, 0x94, 0x95, 0x83, 0x8b,
+    0x8d, 0x82, 0x7a, 0x77, 0xa6, 0x88, 0x72, 0x7a, 0x90, 0x76, 0x7f, 0x95,
+    0x83, 0x90, 0x9e, 0x7c, 0x8e, 0x9a, 0x6b, 0xa4, 0x98, 0x9f, 0x86, 0x8c,
+    0x76, 0x70, 0x74, 0x97, 0x7e, 0xa4, 0x5f, 0xa3, 0xa7, 0x7f, 0x67, 0x8d,
+    0x82, 0x95, 0x93, 0x99, 0x82, 0x70, 0x75, 0xa8, 0xa1, 0xaf, 0x8a, 0x8a,
+    0xb0, 0x89, 0x88, 0x6b, 0x98, 0xaf, 0x75, 0x7f, 0x86, 0x90, 0x8f, 0x8c,
+    0x84, 0x8d, 0x7f, 0x8b, 0x94, 0x9f, 0x80, 0x8b, 0x93, 0xa2, 0x98, 0xa5,
+    0x83, 0x81, 0x8a, 0xaa, 0x86, 0xa3, 0xb0, 0xac, 0x64, 0x9c, 0x7c, 0x93,
+    0xac, 0x85, 0x7f, 0x88, 0x7a, 0xa5, 0x75, 0x69, 0x94, 0xa8, 0x95, 0xa9,
+    0x6f, 0x9f, 0x85, 0x8a, 0xa5, 0x97, 0x98, 0xa9, 0x76, 0x80, 0x7e, 0x95,
+    0x89, 0xaf, 0x68, 0x7b, 0xb4, 0x8a, 0x6b, 0xa4, 0x7b, 0x90, 0x79, 0xba,
+    0x9f, 0x82, 0x7d, 0x89, 0x85, 0x82, 0x94, 0xa5, 0x78, 0x8f, 0x6f, 0x71,
+    0x62, 0x66, 0x73, 0x98, 0x8c, 0x7d, 0x81, 0xa2, 0x69, 0x7c, 0x76, 0xa4,
+    0x94, 0x8f, 0x6f, 0x8a, 0x94, 0x8e, 0x8a, 0x88, 0x8c, 0xa3, 0x6f, 0xa2,
+    0x7d, 0x90, 0x8f, 0x96, 0x6c, 0x76, 0x6e, 0x8e, 0x82, 0x85, 0x7f, 0x93,
+    0x81, 0x83, 0x7b, 0x9f, 0x91, 0x89, 0x75, 0x9c, 0x9f, 0x86, 0x7a, 0x8c,
+    0x7a, 0x7b, 0x82, 0xae, 0x6a, 0x7d, 0x82, 0x82, 0xa0, 0x85, 0x99, 0x9f,
+    0x88, 0x8b, 0x8c, 0x8f, 0x90, 0x96, 0x8e, 0x98, 0xa3, 0x87, 0x7f, 0x9b,
+    0x94, 0x73, 0x96, 0x86, 0x72, 0x7c, 0x75, 0x7c, 0x90, 0x79, 0x83, 0x80,
+    0x79, 0x9e, 0x9c, 0x8e, 0x99, 0x8c, 0x7a, 0x9c, 0x8d, 0x99, 0x9d, 0x84,
+    0xa5, 0x93, 0x85, 0x96, 0x88, 0x94, 0x80, 0x90, 0x73, 0xa3, 0x7c, 0xa1,
+    0x88, 0xa4, 0x98, 0x9f, 0x9e, 0x92, 0x6c, 0xa0, 0x84, 0x87, 0x8a, 0x83,
+    0x7b, 0x91, 0x8c, 0x9e, 0x73, 0xa6, 0x93, 0xa0, 0x8d, 0x98, 0x74, 0xa1,
+    0x83, 0x9a, 0x80, 0xbc, 0x62, 0x70, 0x9e, 0xad, 0x9e, 0x8f, 0x8f, 0x9e,
+    0x7e, 0xac, 0xb0, 0xa9, 0x79, 0x6f, 0x79, 0x8f, 0x7e, 0x71, 0x8d, 0xab,
+    0x97, 0x76, 0x86, 0xa2, 0x98, 0x95, 0x8b, 0x9b, 0x75, 0x7a, 0x71, 0x85,
+    0x7f, 0x61, 0x76, 0x8e, 0x99, 0x91, 0x88, 0x73, 0x71, 0x65, 0x82, 0xa0,
+    0x9b, 0x8f, 0x79, 0x70, 0x78, 0x66, 0x85, 0x94, 0x8b, 0x91, 0x75, 0x80,
+    0x9c, 0x94, 0x7f, 0xa5, 0x82, 0x91, 0x7d, 0x76, 0x80, 0x78, 0x83, 0x82,
+    0x79, 0x98, 0x83, 0x87, 0x94, 0x71, 0x73, 0x77, 0x71, 0x94, 0x6a, 0xa8,
+    0x9e, 0x8d, 0x90, 0x78, 0x7a, 0x81, 0x9c, 0x91, 0x96, 0x80, 0x79, 0x83,
+    0x92, 0x9f, 0x8a, 0x84, 0x8e, 0x97, 0x8c, 0x81, 0x87, 0x74, 0x8b, 0x8e,
+    0xa7, 0x86, 0x8b, 0x8a, 0x8e, 0x8f, 0x9b, 0x6b, 0x82, 0x8a, 0x9f, 0x7a,
+    0x96, 0x80, 0x91, 0x94, 0xa6, 0x8e, 0x7a, 0x97, 0x8a, 0x6c, 0xad, 0xa1,
+    0x78, 0x95, 0x9d, 0x9d, 0x88, 0x94, 0x99, 0x86, 0x80, 0x9b, 0x7c, 0x9c,
+    0x87, 0x7a, 0xa0, 0xa8, 0x83, 0x74, 0x8e, 0x9b, 0x65, 0x95, 0x83, 0xc2,
+    0x69, 0x88, 0x87, 0xa7, 0x86, 0x98, 0x9f, 0xc6, 0x5c, 0x7f, 0xb9, 0x9c,
+    0x8b, 0x6e, 0x95, 0xbd, 0x72, 0x83, 0xbf, 0xb1, 0x89, 0x6d, 0x89, 0x8e,
+    0x9d, 0x87, 0x95, 0x92, 0x76, 0x8d, 0x7f, 0x7f, 0x6d, 0x9d, 0x7b, 0x95,
+    0x86, 0x69, 0x90, 0xa0, 0x62, 0x7c, 0x56, 0xa0, 0x9c, 0x8b, 0x81, 0x79,
+    0xa6, 0x73, 0x69, 0xaa, 0x7b, 0x87, 0x8b, 0x7e, 0xa1, 0x9f, 0x6d, 0xa6,
+    0x7e, 0x7e, 0x87, 0x7c, 0xa5, 0x84, 0x7b, 0xa2, 0xae, 0x92, 0x8e, 0x67,
+    0x93, 0x88, 0x8b, 0xa2, 0x8d, 0x96, 0x92, 0x8e, 0x71, 0x7a, 0x82, 0x80,
+    0x9e, 0x8b, 0x7b, 0x87, 0x96, 0xa0, 0xa4, 0x92, 0x88, 0x7e, 0x77, 0x8e,
+    0x91, 0x7e, 0x81, 0x77, 0x79, 0x93, 0x8d, 0x9d, 0x8a, 0x71, 0x8d, 0x88,
+    0x9d, 0x89, 0x85, 0x94, 0x99, 0x80, 0x89, 0x8f, 0x87, 0x81, 0x83, 0x74,
+    0x8a, 0x89, 0x68, 0x7e, 0x99, 0x82, 0x8c, 0x76, 0xc6, 0x8f, 0x90, 0x7d,
+    0x6c, 0x68, 0xbd, 0x90, 0x78, 0x9d, 0x7b, 0xa3, 0x99, 0x76, 0xaf, 0x8d,
+    0x7d, 0x84, 0x7f, 0x9f, 0x8b, 0x7a, 0xaa, 0xa8, 0x79, 0x89, 0x8f, 0x8f,
+    0x71, 0x80, 0x7f, 0xaa, 0x85, 0x70, 0xa8, 0x96, 0x6c, 0x8c, 0xaf, 0xeb,
+    0x57, 0x7e, 0xcf, 0x8d, 0x93, 0x72, 0xa6, 0xd2, 0x52, 0xab, 0xbb, 0xa8,
+    0x8d, 0x82, 0x7a, 0xbc, 0x72, 0x95, 0xa3, 0xa7, 0x8b, 0x74, 0x84, 0x85,
+    0x6a, 0x85, 0x92, 0x9f, 0x91, 0x6b, 0x9b, 0x73, 0x77, 0xa2, 0x7f, 0x81,
+    0x8e, 0x8b, 0x71, 0x8c, 0x7f, 0x60, 0x86, 0x81, 0x9c, 0x86, 0x93, 0x65,
+    0x84, 0x84, 0x89, 0xa2, 0x98, 0x67, 0x88, 0x71, 0x92, 0x80, 0x65, 0xa2,
+    0xa5, 0x99, 0x85, 0x95, 0x8f, 0x85, 0x8f, 0x82, 0x7e, 0x9a, 0x8a, 0x74,
+    0x9d, 0x75, 0x88, 0x7e, 0xa2, 0x77, 0x82, 0x9e, 0x78, 0xa1, 0x74, 0x79,
+    0x7f, 0x87, 0x91, 0x8d, 0x7a, 0x73, 0x96, 0xa2, 0xa3, 0x81, 0x7d, 0x8a,
+    0x85, 0x75, 0x84, 0x81, 0x8b, 0x7f, 0x6c, 0x86, 0x8d, 0x7b, 0x79, 0x78,
+    0x89, 0x85, 0x8c, 0x9a, 0xa6, 0x96, 0x7a, 0x78, 0xa2, 0x85, 0x9b, 0x89,
+    0xc8, 0x97, 0xa3, 0x82, 0x8b, 0x7f, 0xe7, 0x8f, 0x8f, 0x74, 0x75, 0x83,
+    0x87, 0x79, 0xb3, 0xab, 0x70, 0x9a, 0x9a, 0xa6, 0x81, 0x7e, 0xb8, 0x91,
+    0x8b, 0x8d, 0x93, 0xa1, 0x79, 0x7d, 0x81, 0xb4, 0x79, 0x94, 0xa5, 0x89,
+    0x8e, 0x7c, 0x9b, 0xe2, 0x50, 0x94, 0xdf, 0xa0, 0x53, 0x5d, 0x90, 0xde,
+    0x67, 0x90, 0xaf, 0x8a, 0x8f, 0x73, 0x7b, 0xcb, 0x64, 0x9f, 0x91, 0x86,
+    0x95, 0x84, 0x83, 0x88, 0x76, 0x8b, 0x8a, 0x8f, 0x9c, 0x9a, 0x92, 0x96,
+    0x7f, 0x8e, 0x79, 0x80, 0x91, 0x6d, 0x86, 0x59, 0x74, 0x8a, 0x53, 0x88,
+    0xae, 0x7b, 0x80, 0x70, 0x87, 0x74, 0x75, 0x91, 0xa4, 0x74, 0x8d, 0x5a,
+    0x83, 0x95, 0x65, 0xa1, 0xb3, 0x74, 0x87, 0x7d, 0xaa, 0x82, 0x79, 0x78,
+    0x9b, 0x7c, 0x78, 0x74, 0x9e, 0x74, 0x92, 0x92, 0xa3, 0x6e, 0x75, 0x92,
+    0x6a, 0x6f, 0xa3, 0x7c, 0x9e, 0x7f, 0x92, 0x6b, 0x96, 0x79, 0x9a, 0x87,
+    0x83, 0x8c, 0x72, 0x79, 0x6a, 0xa3, 0x79, 0x7d, 0x6d, 0x6c, 0x81, 0x96,
+    0x98, 0x7f, 0x94, 0x81, 0x8a, 0x8a, 0xa7, 0x8c, 0x9a, 0x84, 0xa7, 0x89,
+    0x9d, 0x85, 0xa6, 0xa8, 0xd0, 0x92, 0x97, 0x9f, 0x76, 0x86, 0xe6, 0x6f,
+    0x7c, 0x84, 0x98, 0x8d, 0x80, 0x75, 0xc5, 0x86, 0x6b, 0x8d, 0x9e, 0x9e,
+    0x7f, 0x71, 0x97, 0xa1, 0x75, 0x92, 0xa9, 0x9e, 0x91, 0x5e, 0xa2, 0xa2,
+    0x68, 0xad, 0xa5, 0xa0, 0x7e, 0x68, 0xac, 0xdc, 0x50, 0xa2, 0xc1, 0x8a,
+    0x63, 0x74, 0x7e, 0xd9, 0x3f, 0xbb, 0xba, 0x9d, 0x7f, 0x76, 0x5f, 0xb0,
+    0x74, 0x8e, 0xb1, 0x95, 0x9a, 0x81, 0x63, 0x9f, 0x98, 0x74, 0x80, 0x89,
+    0x95, 0x8e, 0x9e, 0x78, 0x87, 0x82, 0x57, 0x87, 0x8d, 0x90, 0x79, 0x80,
+    0x76, 0x7c, 0x7d, 0x8a, 0xa6, 0x82, 0x98, 0x7a, 0x96, 0x97, 0x84, 0x87,
+    0xab, 0x7f, 0x87, 0x57, 0x83, 0x6a, 0x6a, 0x84, 0x9c, 0x8d, 0x74, 0x68,
+    0xa2, 0x92, 0x90, 0x98, 0x98, 0x8b, 0x6d, 0x72, 0x90, 0x8c, 0x7c, 0x7d,
+    0x9b, 0x6e, 0x71, 0x76, 0x6b, 0x7b, 0x63, 0x81, 0xad, 0x71, 0x78, 0x8e,
+    0x74, 0x87, 0x8e, 0x8a, 0xab, 0x8e, 0x83, 0x85, 0x7d, 0xa0, 0x67, 0x7f,
+    0x9c, 0x74, 0x6b, 0x88, 0x66, 0x92, 0x7f, 0x83, 0x94, 0x92, 0xa5, 0x82,
+    0xa1, 0x7b, 0x6f, 0x70, 0xab, 0x72, 0xb5, 0x91, 0xb7, 0x89, 0x91, 0x77,
+    0x77, 0x8a, 0xdb, 0x88, 0x8a, 0x8d, 0x89, 0x6c, 0x7b, 0x83, 0xc8, 0xb5,
+    0x4b, 0x96, 0x8b, 0x92, 0x91, 0x76, 0xa9, 0xae, 0x70, 0xa8, 0x74, 0x9d,
+    0x96, 0x6d, 0xa1, 0xba, 0x86, 0xbc, 0xbc, 0xa2, 0x8d, 0x6c, 0x96, 0xd8,
+    0x71, 0xb1, 0xae, 0xb0, 0x79, 0x7b, 0x71, 0xd8, 0x32, 0xaa, 0xae, 0xa7,
+    0x7c, 0x6b, 0x77, 0xc0, 0x7c, 0x9e, 0x9f, 0x89, 0x92, 0x8a, 0x76, 0xae,
+    0x97, 0x75, 0x87, 0x8c, 0x7f, 0x86, 0x8b, 0x73, 0x6b, 0x64, 0x87, 0x6d,
+    0x99, 0x8f, 0x8d, 0x66, 0x76, 0x87, 0x6d, 0x6e, 0x98, 0x7a, 0x91, 0x92,
+    0x8c, 0x7c, 0x89, 0x9b, 0x9e, 0x83, 0x86, 0x62, 0x90, 0x6e, 0x62, 0x82,
+    0xa3, 0x7e, 0x86, 0x6a, 0x93, 0x9b, 0x73, 0x6c, 0xa8, 0x99, 0x73, 0x99,
+    0x8c, 0x89, 0x85, 0x67, 0x98, 0x78, 0x63, 0x98, 0x77, 0xa6, 0x6e, 0x81,
+    0xa4, 0x64, 0x8f, 0x8a, 0x7f, 0x9b, 0x91, 0x91, 0x94, 0x82, 0x8b, 0x8b,
+    0x76, 0x66, 0x83, 0x81, 0x94, 0x71, 0x82, 0x9e, 0x93, 0x85, 0x80, 0x8c,
+    0xae, 0x94, 0x96, 0x74, 0x91, 0x9a, 0x6f, 0x9e, 0xa9, 0x76, 0xab, 0x8e,
+    0xd6, 0x9c, 0x7d, 0x98, 0x83, 0x6e, 0xfe, 0x83, 0x71, 0x82, 0x9f, 0x93,
+    0x7b, 0x67, 0xcb, 0xb9, 0x66, 0x89, 0x99, 0x8a, 0xac, 0x8c, 0xa0, 0x9c,
+    0x70, 0xaf, 0x81, 0x88, 0x9c, 0x7e, 0xa8, 0xa5, 0x65, 0x8c, 0xa1, 0x8c,
+    0x83, 0x85, 0x9d, 0xcb, 0x4b, 0xc1, 0xb5, 0xa2, 0x75, 0x63, 0x75, 0xbd,
+    0x34, 0xae, 0xca, 0xa2, 0x89, 0x7a, 0x69, 0xb0, 0x70, 0xae, 0x94, 0x76,
+    0x85, 0x93, 0x6a, 0x90, 0x6a, 0x8a, 0xac, 0x71, 0x7e, 0x81, 0xa2, 0x71,
+    0x98, 0x86, 0x99, 0x76, 0x8f, 0x6f, 0x90, 0x93, 0x7c, 0x72, 0x81, 0x8c,
+    0x78, 0x77, 0x97, 0x84, 0x98, 0x70, 0x96, 0x9a, 0x9b, 0x93, 0x92, 0x5f,
+    0xaa, 0x88, 0x5b, 0x74, 0xaa, 0x96, 0x6a, 0x73, 0x87, 0x83, 0x72, 0x89,
+    0xab, 0x8a, 0x5f, 0x71, 0xa4, 0x94, 0x92, 0x60, 0x96, 0x7b, 0x53, 0x88,
+    0x69, 0x8b, 0x5e, 0x7b, 0xa0, 0x83, 0x70, 0x95, 0x6d, 0x9b, 0x6d, 0x98,
+    0x99, 0x86, 0x6e, 0x7a, 0x87, 0x86, 0x68, 0x8a, 0x7e, 0x87, 0x90, 0x7d,
+    0x76, 0x93, 0x80, 0x8a, 0x8f, 0x97, 0xac, 0x71, 0xa2, 0x96, 0x7f, 0x8e,
+    0xc2, 0x71, 0xab, 0xa9, 0xd1, 0x85, 0x8c, 0x74, 0x70, 0x72, 0xff, 0x77,
+    0x6d, 0x77, 0x91, 0x5d, 0x71, 0x5d, 0xb2, 0xb1, 0x38, 0x76, 0xa6, 0x80,
+    0x91, 0x86, 0xa3, 0x9c, 0x85, 0x95, 0x99, 0xab, 0x8a, 0x6e, 0x9f, 0xa6,
+    0x75, 0xa9, 0xb3, 0x97, 0x69, 0x85, 0xa4, 0xc9, 0x59, 0xb4, 0xca, 0x8d,
+    0x5c, 0x67, 0x7d, 0xcd, 0x29, 0xca, 0xdb, 0x8c, 0x86, 0x8c, 0x70, 0xaa,
+    0x5c, 0x9e, 0x98, 0x86, 0x92, 0x7e, 0x6b, 0x8e, 0x8f, 0x6a, 0x84, 0x71,
+    0x9a, 0x76, 0x87, 0x84, 0x8b, 0x7f, 0x7f, 0x6e, 0xa3, 0x83, 0x85, 0x78,
+    0x6f, 0x7c, 0x6f, 0x96, 0x95, 0x8c, 0xa3, 0x72, 0x92, 0x66, 0x7b, 0x99,
+    0x9c, 0x9c, 0x9a, 0x63, 0xaa, 0x81, 0x7f, 0x90, 0x8c, 0xa0, 0x7e, 0x67,
+    0x94, 0x96, 0x7f, 0x8a, 0x95, 0x91, 0x5c, 0x73, 0x88, 0x9b, 0x85, 0x70,
+    0x87, 0x79, 0x56, 0x92, 0x69, 0x95, 0x62, 0x78, 0x93, 0x83, 0x63, 0x98,
+    0x7a, 0xa4, 0x95, 0x7c, 0x8e, 0x69, 0x86, 0x92, 0x7d, 0x6b, 0x69, 0x85,
+    0xa8, 0x90, 0x7c, 0x7b, 0x9e, 0x87, 0x7b, 0x90, 0x98, 0x7a, 0xa4, 0x92,
+    0xad, 0x97, 0xa0, 0x6d, 0xa6, 0x74, 0xb7, 0x7f, 0xb9, 0x94, 0x6c, 0x77,
+    0x65, 0x6f, 0xfc, 0x7d, 0x68, 0x74, 0xa1, 0x6c, 0x71, 0x61, 0xc3, 0xb5,
+    0x60, 0x86, 0x8b, 0x7d, 0x89, 0x8b, 0x93, 0xa4, 0x68, 0xa0, 0x8f, 0x73,
+    0x96, 0x6e, 0x81, 0x99, 0x81, 0x9d, 0xae, 0x93, 0x6a, 0x8b, 0x9a, 0xcb,
+    0x68, 0xaf, 0xca, 0x81, 0x73, 0x6e, 0x70, 0xd7, 0x49, 0xb9, 0xc5, 0x9d,
+    0x87, 0x8d, 0x61, 0xa8, 0x5e, 0xa4, 0xb7, 0xab, 0x96, 0x84, 0x76, 0x98,
+    0x84, 0x99, 0x8f, 0x70, 0x79, 0x94, 0xa5, 0x87, 0x6e, 0x73, 0x63, 0x7e,
+    0x83, 0x8c, 0x88, 0x71, 0x7a, 0x81, 0x7d, 0x94, 0x92, 0x89, 0xab, 0x7a,
+    0x96, 0x66, 0x7b, 0x8b, 0x8f, 0x8e, 0x94, 0x5b, 0xa0, 0x7f, 0x82, 0x84,
+    0x84, 0x80, 0x7d, 0x81, 0x89, 0x7b, 0x97, 0x78, 0x83, 0x93, 0x4c, 0x95,
+    0x7f, 0x93, 0x8e, 0x70, 0x89, 0x81, 0x69, 0x87, 0x76, 0x73, 0x9a, 0x74,
+    0xa2, 0x88, 0x5e, 0xac, 0x74, 0x8e, 0x74, 0x8e, 0x94, 0x85, 0x7b, 0x7a,
+    0x72, 0x82, 0x68, 0x77, 0x96, 0x8a, 0x7b, 0x6c, 0x88, 0x8b, 0x6b, 0x86,
+    0xa4, 0x88, 0xac, 0xa1, 0x90, 0x8e, 0x85, 0x6d, 0xb1, 0x69, 0xb1, 0xa2,
+    0xbe, 0x9a, 0x7c, 0xb4, 0x63, 0x56, 0xf2, 0x90, 0x5e, 0x71, 0xa3, 0x6a,
+    0x8b, 0x67, 0xbe, 0xa8, 0x6e, 0x8b, 0x90, 0x83, 0xa0, 0x78, 0x9f, 0xa5,
+    0x65, 0xa3, 0x8b, 0x94, 0x84, 0x6c, 0xa5, 0x97, 0x7d, 0xa7, 0x9f, 0x9c,
+    0x62, 0x7d, 0xb5, 0xb1, 0x58, 0x98, 0xba, 0x8d, 0x7f, 0x57, 0x86, 0xc5,
+    0x39, 0xb3, 0xc9, 0xa9, 0x89, 0x8e, 0x55, 0xaf, 0x54, 0xb4, 0xb0, 0x8f,
+    0x8b, 0x7c, 0x6e, 0x8e, 0x96, 0x90, 0x8a, 0x83, 0x84, 0x8c, 0x96, 0x7f,
+    0x89, 0x67, 0x99, 0x60, 0x74, 0x8d, 0x9b, 0x82, 0x6f, 0x61, 0x84, 0x9a,
+    0x7c, 0x85, 0x86, 0x7c, 0x9b, 0x5f, 0x81, 0x96, 0x90, 0x9b, 0xa0, 0x58,
+    0xaf, 0x78, 0x81, 0x8f, 0x96, 0x81, 0x77, 0x7d, 0xa2, 0x85, 0x74, 0x84,
+    0x99, 0x8d, 0x5f, 0x77, 0x8a, 0x8c, 0x85, 0x78, 0x8f, 0x80, 0x5c, 0x6f,
+    0x77, 0x73, 0x80, 0x99, 0x83, 0x89, 0x6f, 0x8e, 0x85, 0x7e, 0x6c, 0x81,
+    0x99, 0x89, 0x69, 0x70, 0x8c, 0x8f, 0x6b, 0x89, 0x80, 0x7a, 0x83, 0x7a,
+    0x96, 0x99, 0x73, 0x76, 0x9c, 0x67, 0xab, 0xab, 0xbd, 0x8b, 0x85, 0x90,
+    0xb0, 0x6b, 0xbd, 0x9c, 0xb9, 0xa0, 0x7c, 0x7d, 0x66, 0x78, 0xdb, 0x97,
+    0x55, 0x67, 0x96, 0x69, 0x80, 0x49, 0xc1, 0xbb, 0x6c, 0x91, 0x8a, 0x92,
+    0x9a, 0x98, 0xa5, 0x98, 0x51, 0xa6, 0x99, 0x8e, 0x73, 0x73, 0x9d, 0x9f,
+    0x77, 0xa6, 0xa4, 0x92, 0x64, 0x75, 0xac, 0xb2, 0x5d, 0xa1, 0xab, 0xa4,
+    0x5a, 0x5b, 0xb3, 0xb7, 0x2d, 0xca, 0xc8, 0x76, 0x94, 0x8e, 0x59, 0xb0,
+    0x52, 0x9d, 0xbd, 0x89, 0x97, 0x84, 0x5d, 0x9a, 0x87, 0x9b, 0x94, 0x6c,
+    0x7b, 0xaa, 0x8a, 0x8b, 0x79, 0x5d, 0x90, 0x5c, 0x8b, 0x7b, 0xbe, 0x68,
+    0x84, 0x6f, 0x75, 0x72, 0x98, 0x82, 0x92, 0x7a, 0xa2, 0x6e, 0x7b, 0x7d,
+    0x9c, 0x99, 0x97, 0x5d, 0x9b, 0x69, 0x80, 0xa3, 0x96, 0x8d, 0x7c, 0x82,
+    0xa3, 0x76, 0x95, 0x67, 0x93, 0x8e, 0x62, 0x7b, 0x78, 0x96, 0x69, 0x67,
+    0x84, 0x8f, 0x62, 0x80, 0x88, 0x7e, 0x6c, 0x94, 0xab, 0x8b, 0x82, 0x9e,
+    0x7e, 0x8c, 0x70, 0x83, 0x9c, 0x9c, 0x80, 0x87, 0x8f, 0xa1, 0x7f, 0x81,
+    0x95, 0x83, 0x6d, 0x7a, 0xa0, 0x77, 0x6d, 0x76, 0x91, 0x7e, 0xa3, 0x62,
+    0xa0, 0x93, 0x7e, 0x97, 0xb6, 0x6c, 0xad, 0x72, 0xb2, 0x95, 0x73, 0x83,
+    0x62, 0x56, 0xe2, 0x99, 0x6e, 0x66, 0xb0, 0x6c, 0x75, 0x4e, 0xb2, 0xc7,
+    0x51, 0x98, 0x90, 0x8c, 0x82, 0x63, 0xa8, 0x99, 0x54, 0xc1, 0x87, 0x80,
+    0x79, 0x62, 0xad, 0x81, 0x76, 0x99, 0xa9, 0x9b, 0x4e, 0x8c, 0xaf, 0xb6,
+    0x5d, 0x9b, 0xb4, 0x9f, 0x6d, 0x60, 0xa5, 0xb5, 0x3e, 0xb2, 0xc4, 0x96,
+    0x86, 0x6d, 0x48, 0x99, 0x50, 0xc1, 0xa8, 0x93, 0x8a, 0x92, 0x7d, 0x8f,
+    0x74, 0x87, 0x91, 0x71, 0x8c, 0x87, 0x90, 0x80, 0x80, 0x82, 0x7b, 0x85,
+    0x81, 0x7f, 0xa7, 0x6a, 0x78, 0x4e, 0x90, 0x85, 0x9f, 0x93, 0x91, 0x91,
+    0xa5, 0x6e, 0x9d, 0xa7, 0x9e, 0x7f, 0x9a, 0x66, 0xbe, 0x6f, 0x82, 0x81,
+    0x85, 0x86, 0x89, 0x6c, 0x88, 0x92, 0x6d, 0x6a, 0x8c, 0x95, 0x68, 0x70,
+    0x91, 0x9b, 0x76, 0x59, 0x87, 0x93, 0x6f, 0x79, 0x7a, 0x99, 0x7d, 0x76,
+    0xa3, 0x9c, 0x69, 0x75, 0x8f, 0x8e, 0x7e, 0x7a, 0x80, 0x8b, 0x76, 0x82,
+    0x70, 0x71, 0x77, 0x7a, 0x88, 0xa1, 0x79, 0x75, 0x9e, 0x7e, 0x6d, 0x6f,
+    0xa5, 0x84, 0xb1, 0x77, 0xad, 0x94, 0x98, 0x90, 0xa7, 0x5c, 0xb6, 0x84,
+    0x99, 0x91, 0x71, 0x7b, 0x6d, 0x54, 0xd2, 0x84, 0x5d, 0x75, 0xb4, 0x7e,
+    0x7d, 0x53, 0xc5, 0x98, 0x70, 0xaa, 0x9e, 0x81, 0x7d, 0x68, 0xa7, 0x8d,
+    0x63, 0xab, 0x9b, 0x96, 0x7e, 0x6b, 0xa3, 0x9e, 0x6d, 0x98, 0xaf, 0x9b,
+    0x78, 0x74, 0xae, 0xc7, 0x70, 0x98, 0xd4, 0x9a, 0x6e, 0x75, 0xa2, 0xcd,
+    0x42, 0xb0, 0xc9, 0x89, 0x88, 0x77, 0x6a, 0xa4, 0x66, 0xb5, 0xbc, 0x8a,
+    0x96, 0x87, 0x5e, 0xa5, 0x87, 0x95, 0x91, 0x5d, 0x85, 0x91, 0xaa, 0x8f,
+    0x99, 0x78, 0x79, 0x74, 0x7f, 0x81, 0xa1, 0x74, 0x77, 0x64, 0x6c, 0x94,
+    0xa0, 0x8b, 0x9b, 0x8e, 0xac, 0x6a, 0x98, 0x9c, 0x7a, 0x9f, 0xab, 0x7e,
+    0xa3, 0x8b, 0x68, 0x7f, 0x84, 0x9f, 0x93, 0x77, 0x90, 0x98, 0x8f, 0x87,
+    0x81, 0x8e, 0x76, 0x95, 0x66, 0x78, 0x85, 0x79, 0x95, 0x89, 0x64, 0x8e,
+    0x8a, 0x87, 0x6f, 0x65, 0xa4, 0x98, 0x7a, 0x83, 0x85, 0x7e, 0x6b, 0xaa,
+    0x81, 0x94, 0x7c, 0x6e, 0x78, 0x85, 0x87, 0x6d, 0x7a, 0x92, 0x67, 0x7a,
+    0x8d, 0x95, 0x77, 0x7f, 0x9f, 0x71, 0xb1, 0xa1, 0xb2, 0x91, 0x7f, 0xb0,
+    0xac, 0x5c, 0xaf, 0x6a, 0xae, 0x98, 0x63, 0x7e, 0x67, 0x6f, 0xc4, 0x8a,
+    0x75, 0x61, 0xac, 0x73, 0x86, 0x54, 0xc3, 0xa8, 0x5d, 0xa9, 0xb4, 0x9b,
+    0x80, 0x6d, 0xa1, 0x8d, 0x64, 0xaa, 0x86, 0x96, 0x86, 0x6c, 0x9b, 0x8b,
+    0x73, 0x9f, 0x9a, 0x87, 0x64, 0x6c, 0xad, 0xa6, 0x64, 0x8a, 0xbe, 0x88,
+    0x67, 0x67, 0xaf, 0xb0, 0x71, 0xae, 0xde, 0x95, 0x9f, 0x7c, 0x7d, 0xa1,
+    0x79, 0xb8, 0xaa, 0x9c, 0x84, 0x91, 0x6b, 0xac, 0x74, 0xa1, 0xad, 0x74,
+    0x88, 0x93, 0x94, 0x72, 0x97, 0x7a, 0x78, 0x86, 0x76, 0x93, 0xb1, 0x6f,
+    0x91, 0x44, 0x96, 0x8e, 0x8e, 0xa5, 0x9a, 0x70, 0x99, 0x79, 0x84, 0x82,
+    0x7f, 0x78, 0xac, 0x6f, 0x9c, 0x80, 0x7d, 0x87, 0x7f, 0x9d, 0x6a, 0x71,
+    0x7c, 0x92, 0x78, 0x7a, 0x93, 0x90, 0x55, 0x83, 0x7a, 0x8a, 0x9a, 0x65,
+    0x86, 0x9b, 0x7c, 0x6b, 0xa3, 0x85, 0x86, 0x71, 0xab, 0x9a, 0x86, 0x90,
+    0x86, 0x88, 0x88, 0x88, 0x99, 0x98, 0x77, 0x86, 0x88, 0x90, 0x79, 0x7c,
+    0x6e, 0x9f, 0x76, 0x70, 0x84, 0x67, 0x7e, 0x8b, 0xa5, 0x68, 0xa7, 0x9d,
+    0xb5, 0x9b, 0x8b, 0x8a, 0xc0, 0x60, 0x9e, 0x83, 0xb0, 0xb7, 0x65, 0x7f,
+    0x7a, 0x7e, 0xc3, 0x7b, 0x74, 0x8f, 0xa4, 0x68, 0x5f, 0x47, 0xbb, 0xa4,
+    0x74, 0x95, 0xab, 0x80, 0x70, 0x5c, 0x9a, 0x8a, 0x7d, 0xa5, 0x90, 0x7d,
+    0x86, 0x68, 0xb1, 0x73, 0x6d, 0xad, 0x93, 0x8d, 0x7b, 0x64, 0xbd, 0xae,
+    0x7a, 0x98, 0xcb, 0x97, 0x83, 0x67, 0xab, 0xb0, 0x61, 0xa7, 0xcd, 0x7e,
+    0x87, 0x78, 0x76, 0x95, 0x6a, 0xba, 0xa9, 0x84, 0x8f, 0x95, 0x7c, 0x8b,
+    0x90, 0x89, 0x8b, 0x81, 0x87, 0x8b, 0x76, 0x73, 0x6f, 0x61, 0x94, 0x73,
+    0x83, 0x97, 0xb3, 0x6b, 0x9c, 0x55, 0x7f, 0x96, 0x9a, 0x92, 0x85, 0x52,
+    0xc6, 0x73, 0x88, 0x9c, 0x7c, 0x86, 0x98, 0x6d, 0x99, 0x87, 0x80, 0x7c,
+    0x7d, 0x98, 0x74, 0x7c, 0x89, 0x8a, 0x7d, 0x7b, 0x83, 0x90, 0x7d, 0x81,
+    0x7a, 0xa0, 0x86, 0x5f, 0x74, 0x8e, 0x68, 0x7b, 0x6c, 0x86, 0x90, 0x84,
+    0x7e, 0xae, 0x73, 0x6f, 0x8d, 0x81, 0x7c, 0x93, 0xa0, 0xb3, 0x6b, 0x9a,
+    0x88, 0xab, 0x8a, 0x94, 0x9c, 0x87, 0x9c, 0x75, 0x7d, 0x8f, 0x7c, 0x7f,
+    0x9b, 0x69, 0xa8, 0x99, 0x9d, 0x89, 0x8f, 0x72, 0xba, 0x61, 0xac, 0x91,
+    0xb5, 0xa7, 0x84, 0x99, 0x71, 0x7e, 0xd0, 0x7c, 0x6d, 0x66, 0xb6, 0x72,
+    0x79, 0x61, 0xb6, 0xab, 0x69, 0xa0, 0xaa, 0x7d, 0x74, 0x61, 0x95, 0xa5,
+    0x71, 0xb0, 0x93, 0x95, 0x86, 0x7d, 0x9f, 0x7e, 0x6c, 0x97, 0x85, 0x87,
+    0x72, 0x7b, 0xb4, 0xad, 0x84, 0x7b, 0xcd, 0xa9, 0x7e, 0x6d, 0xc8, 0xc7,
+    0x7e, 0xb7, 0xcf, 0x98, 0x7b, 0x7c, 0x69, 0xaf, 0x64, 0xa6, 0xc1, 0x8e,
+    0x8f, 0x9c, 0x7d, 0x93, 0x7a, 0x96, 0x8a, 0x65, 0x92, 0x95, 0x8d, 0x6f,
+    0x9f, 0x7f, 0x65, 0x69, 0x7a, 0x92, 0x9f, 0x5c, 0x90, 0x4e, 0x69, 0x89,
+    0x8f, 0x9c, 0xa8, 0x7a, 0xb6, 0x7d, 0x84, 0x97, 0x7f, 0x91, 0x8d, 0x71,
+    0xae, 0x86, 0x80, 0x78, 0x81, 0x87, 0x6e, 0x88, 0x87, 0x7f, 0x8f, 0x9d,
+    0x78, 0x91, 0x74, 0x91, 0x7f, 0x7a, 0x80, 0x63, 0x93, 0xa0, 0x7f, 0x6f,
+    0xa3, 0x88, 0x76, 0x5c, 0x6e, 0xa1, 0x6e, 0x7f, 0x84, 0x8b, 0x87, 0x6d,
+    0x87, 0x9f, 0x79, 0x7c, 0x83, 0x89, 0x7e, 0x86, 0xa0, 0x82, 0x80, 0x8e,
+    0x8b, 0x6c, 0x6e, 0x69, 0x9f, 0x79, 0xaa, 0x6e, 0xa2, 0x8f, 0x9d, 0x87,
+    0xb4, 0x5d, 0xba, 0x6c, 0xaf, 0xa0, 0x84, 0x87, 0x8c, 0x89, 0xcb, 0x6f,
+    0x8e, 0x71, 0xae, 0x5d, 0x6c, 0x61, 0xb3, 0xaf, 0x7a, 0x94, 0xb1, 0x8a,
+    0x80, 0x65, 0x8a, 0x9d, 0x61, 0xb6, 0x8b, 0x97, 0x8a, 0x73, 0xa8, 0x82,
+    0x74, 0x8a, 0x9c, 0x73, 0x61, 0x69, 0xb8, 0x9f, 0x76, 0x90, 0xc5, 0xaa,
+    0x6b, 0x5f, 0xb7, 0xce, 0x6d, 0xb7, 0xcc, 0x97, 0x7a, 0x81, 0x95, 0xbe,
+    0x78, 0xb1, 0xb4, 0x97, 0x8e, 0x99, 0x70, 0xa2, 0x72, 0x8d, 0x8e, 0x7d,
+    0x90, 0x9f, 0x7b, 0x63, 0x87, 0x89, 0x7a, 0x5f, 0x81, 0x97, 0x8d, 0x78,
+    0x94, 0x64, 0x95, 0x9d, 0x90, 0x87, 0xb3, 0x6e, 0xc2, 0x80, 0x94, 0x86,
+    0x87, 0x93, 0xb3, 0x57, 0xb8, 0x73, 0x8a, 0x81, 0x6f, 0x95, 0x89, 0x82,
+    0x94, 0x7a, 0x8e, 0x97, 0x8a, 0x91, 0x7f, 0x77, 0x98, 0x72, 0x67, 0x5f,
+    0x7b, 0x8d, 0x78, 0x74, 0x91, 0x82, 0x86, 0x5c, 0x88, 0xa3, 0x73, 0x6f,
+    0x92, 0x78, 0x9c, 0x95, 0x99, 0x9d, 0x70, 0x89, 0x8f, 0xa7, 0x74, 0x89,
+    0x77, 0x90, 0x72, 0x8d, 0x9c, 0x6f, 0x7a, 0x6c, 0x9f, 0x72, 0xad, 0x6c,
+    0xa5, 0x7a, 0x9d, 0x78, 0xa4, 0x52, 0xbd, 0x94, 0xb5, 0x97, 0x75, 0x78,
+    0x86, 0x72, 0xdf, 0x6f, 0x98, 0x81, 0xab, 0x5d, 0x62, 0x65, 0x9d, 0xbc,
+    0x68, 0x8a, 0xc1, 0x7e, 0x67, 0x7f, 0x88, 0x95, 0x7f, 0xbd, 0x9c, 0x77,
+    0x7d, 0x7e, 0x96, 0x7c, 0x7f, 0xa1, 0xa4, 0x90, 0x7c, 0x74, 0xc0, 0xac,
+    0x7d, 0xa1, 0xdb, 0x85, 0x85, 0x51, 0xbc, 0xb1, 0x6c, 0xcb, 0xd1, 0xa7,
+    0x76, 0x70, 0x7d, 0xba, 0x88, 0xb6, 0xaf, 0xa2, 0x9d, 0x9b, 0x71, 0x96,
+    0x80, 0x89, 0xa3, 0x86, 0x89, 0x8f, 0x76, 0x77, 0xa9, 0x82, 0x8f, 0x69,
+    0x7f, 0x9d, 0xac, 0x80, 0x98, 0x6c, 0x70, 0x72, 0x81, 0x8b, 0xaf, 0x80,
+    0xb1, 0x6f, 0x7c, 0x90, 0x91, 0x82, 0xa5, 0x67, 0x9c, 0x76, 0x8c, 0x6b,
+    0x9c, 0x9b, 0x87, 0x8c, 0x8e, 0x8b, 0xb0, 0x9d, 0x89, 0x8f, 0x76, 0x87,
+    0x9b, 0x90, 0x8e, 0x74, 0x73, 0x91, 0x85, 0x80, 0x81, 0x72, 0x99, 0x84,
+    0x87, 0x95, 0x84, 0x8c, 0x8a, 0x6e, 0x8c, 0x82, 0xad, 0x9d, 0x80, 0x7f,
+    0x96, 0x9c, 0x7f, 0x67, 0xb0, 0x98, 0x69, 0x84, 0x94, 0xa9, 0x7e, 0x83,
+    0x9d, 0x62, 0x92, 0x6e, 0x95, 0x88, 0xa4, 0x90, 0x97, 0x4d, 0xae, 0x89,
+    0xb6, 0xa1, 0x88, 0x9f, 0x7a, 0x70, 0xc2, 0x71, 0x7f, 0x83, 0x90, 0x83,
+    0x5e, 0x50, 0xa9, 0x9f, 0x73, 0x8c, 0xb2, 0x80, 0x79, 0x65, 0x7c, 0x90,
+    0x6d, 0x9a, 0x91, 0x8d, 0x6f, 0x65, 0x97, 0x87, 0x82, 0xa0, 0xa4, 0x8c,
+    0x68, 0x76, 0xa8, 0xa2, 0x7f, 0xa4, 0xcd, 0x91, 0x70, 0x54, 0x95, 0xc6,
+    0x6e, 0x9c, 0xe2, 0xa1, 0x86, 0x82, 0x73, 0xbc, 0x89, 0xaa, 0xb2, 0x7d,
+    0x82, 0x84, 0x8b, 0x9e, 0x84, 0x94, 0xa0, 0x7a, 0x98, 0x9d, 0x99, 0x7b,
+    0x7b, 0x89, 0x8f, 0x66, 0x89, 0x9b, 0xa7, 0x8b, 0x9b, 0x62, 0x9b, 0x78,
+    0x8b, 0x95, 0xbd, 0x7a, 0x9e, 0x61, 0x80, 0x84, 0x89, 0x8e, 0xb4, 0x7b,
+    0xb8, 0x70, 0x75, 0x8e, 0x7b, 0x9c, 0x9e, 0x9f, 0x89, 0x86, 0x9b, 0x7a,
+    0x7b, 0x95, 0x83, 0x95, 0x80, 0x94, 0x85, 0x65, 0x8c, 0x81, 0x67, 0x77,
+    0x94, 0x8a, 0x92, 0x74, 0x72, 0x90, 0x6b, 0x74, 0x7e, 0x75, 0x71, 0x84,
+    0x9e, 0xa6, 0x64, 0x80, 0x8d, 0x7a, 0x8c, 0x82, 0x98, 0x96, 0x64, 0x7d,
+    0x8b, 0x82, 0x6a, 0x7f, 0x97, 0x4e, 0x91, 0x74, 0x94, 0x99, 0x6d, 0x6a,
+    0xb3, 0x5a, 0xb8, 0x64, 0xa3, 0x95, 0x5d, 0x95, 0x90, 0x87, 0xcc, 0x72,
+    0x85, 0x85, 0x8f, 0x55, 0x6f, 0x65, 0x84, 0xb6, 0x7b, 0x77, 0xce, 0x79,
+    0x82, 0x59, 0x8a, 0xa2, 0x68, 0x9b, 0xa3, 0x81, 0x9c, 0x7a, 0x97, 0x87,
+    0x6b, 0x8c, 0x9c, 0xaa, 0x5c, 0x69, 0xb8, 0xb7, 0x7c, 0xa0, 0xb5, 0x92,
+    0x8d, 0x67, 0x96, 0xd2, 0x77, 0xa6, 0xd9, 0xad, 0xaa, 0x79, 0x90, 0xc9,
+    0x81, 0xbf, 0xd0, 0x8d, 0x9d, 0x88, 0x9c, 0x91, 0x90, 0x94, 0x89, 0x8a,
+    0x91, 0x9b, 0x89, 0x79, 0x92, 0x80, 0x8f, 0x7b, 0x7e, 0x8b, 0xb1, 0x85,
+    0xa4, 0x5a, 0xb4, 0x7a, 0xa7, 0x8c, 0xa4, 0x75, 0xb9, 0x66, 0x93, 0x86,
+    0x8a, 0x87, 0xad, 0x64, 0xa2, 0x7e, 0x99, 0x9f, 0x81, 0xa2, 0x9b, 0x88,
+    0x9e, 0xa2, 0xb9, 0x8a, 0x78, 0x84, 0x91, 0x8e, 0x8b, 0x90, 0x83, 0x80,
+    0x64, 0x93, 0x77, 0x89, 0x81, 0x86, 0x96, 0x7a, 0x81, 0xab, 0x6d, 0x73,
+    0x7d, 0x7e, 0xaa, 0x85, 0x95, 0xac, 0x8b, 0x89, 0x8b, 0x77, 0xa3, 0x8b,
+    0xa3, 0xa0, 0x87, 0x86, 0x7a, 0x74, 0x6f, 0x7c, 0x90, 0x58, 0xa2, 0x64,
+    0x94, 0x8b, 0xa0, 0x88, 0xab, 0x53, 0xce, 0x67, 0xb7, 0x7f, 0x8d, 0x69,
+    0x84, 0x74, 0xaf, 0x72, 0xab, 0x70, 0x8f, 0x6e, 0x5d, 0x61, 0x96, 0xa1,
+    0x7b, 0x6f, 0xa2, 0x75, 0x8f, 0x5d, 0x93, 0x72, 0x82, 0x97, 0x76, 0x65,
+    0x7e, 0x96, 0xb3, 0x8b, 0x8d, 0x89, 0x8f, 0x7b, 0x6f, 0x71, 0xa1, 0x9e,
+    0x91, 0x7c, 0xc9, 0x9f, 0x7c, 0x71, 0xa1, 0xba, 0x77, 0xa5, 0xd4, 0xa6,
+    0xa0, 0x82, 0x7b, 0x95, 0x9d, 0xb7, 0xaa, 0x8d, 0x71, 0x87, 0x94, 0x7e,
+    0x88, 0x7f, 0x8b, 0x6e, 0x93, 0x9f, 0x82, 0x88, 0x94, 0x8a, 0x97, 0x7f,
+    0x7d, 0x8c, 0xa0, 0x84, 0xb4, 0x7c, 0x8c, 0x7f, 0x71, 0x8c, 0x8e, 0x7f,
+    0xc6, 0x64, 0x81, 0x8d, 0x89, 0x8d, 0xc4, 0x77, 0xaf, 0x75, 0x92, 0x7f,
+    0x84, 0xa1, 0x99, 0x94, 0x9e, 0x82, 0x7a, 0x98, 0x7e, 0x8e, 0x93, 0x8c,
+    0x6b, 0x93, 0x84, 0xaa, 0x7f, 0x8f, 0x6b, 0x94, 0xa3, 0x8a, 0x78, 0x82,
+    0x60, 0x92, 0x8b, 0x8d, 0x75, 0x8c, 0x8e, 0x6e, 0x7e, 0x9d, 0x6d, 0x8e,
+    0x79, 0x8d, 0x80, 0x89, 0xaa, 0x99, 0x7e, 0xa3, 0x83, 0x95, 0x83, 0x85,
+    0x9c, 0x60, 0x99, 0x78, 0x93, 0x8b, 0x80, 0x82, 0x9d, 0x6b, 0xc2, 0x54,
+    0xb9, 0x7a, 0x83, 0x98, 0x88, 0x65, 0xcb, 0x52, 0xa7, 0x8d, 0x7f, 0x81,
+    0x6b, 0x6d, 0x9e, 0x92, 0x85, 0x82, 0x9f, 0x67, 0x6f, 0x74, 0xaa, 0x75,
+    0x99, 0x9f, 0x8a, 0x8b, 0x88, 0x82, 0xb8, 0x6b, 0x85, 0x99, 0x93, 0x90,
+    0x8d, 0x7a, 0xaa, 0x9d, 0x86, 0x7f, 0xbd, 0x91, 0x67, 0x65, 0x8c, 0xb3,
+    0x87, 0x94, 0xa3, 0x9a, 0x7e, 0x73, 0x83, 0xaa, 0x7a, 0xba, 0xaa, 0x9e,
+    0x9e, 0x86, 0x9a, 0x63, 0x9c, 0x98, 0x5e, 0xa0, 0x9c, 0x9e, 0x8b, 0x85,
+    0xa2, 0x74, 0x80, 0x8d, 0x7e, 0x89, 0xc0, 0x75, 0xa5, 0x3f, 0x97, 0xa2,
+    0x8c, 0x8c, 0x9d, 0x88, 0xa4, 0x5e, 0x75, 0x5f, 0x87, 0x82, 0xbc, 0x72,
+    0xa3, 0x77, 0x83, 0x79, 0x82, 0x95, 0x8d, 0x77, 0x73, 0x81, 0x9d, 0x9b,
+    0x6c, 0x87, 0x93, 0x96, 0x83, 0x86, 0x8b, 0x89, 0x72, 0x7d, 0x96, 0x78,
+    0x67, 0xa2, 0x8d, 0x81, 0x6a, 0x98, 0x75, 0x80, 0x8a, 0x80, 0x9e, 0x82,
+    0x76, 0x9b, 0x6c, 0x94, 0x7a, 0x96, 0x74, 0x92, 0x78, 0x91, 0x7a, 0x7c,
+    0x9a, 0x98, 0x70, 0x5d, 0x9c, 0x4b, 0x70, 0x7d, 0xa9, 0x9b, 0x70, 0x96,
+    0xad, 0x59, 0xc4, 0x63, 0xbc, 0x8f, 0x5c, 0x86, 0x8e, 0x97, 0xa0, 0x7c,
+    0xa6, 0x77, 0xaa, 0x93, 0x68, 0x66, 0x93, 0x91, 0x7b, 0x7e, 0xa2, 0x7a,
+    0x98, 0x77, 0x97, 0x59, 0x84, 0x76, 0x9c, 0x7b, 0x8b, 0x76, 0x88, 0x7a,
+    0x8c, 0x7b, 0xa4, 0xae, 0x6e, 0x7d, 0xb3, 0x99, 0x8d, 0x68, 0x9e, 0x7e,
+    0x77, 0x59, 0x80, 0xbe, 0x80, 0x83, 0xd9, 0x9f, 0x7d, 0x60, 0x8b, 0x98,
+    0x7f, 0x9e, 0xa3, 0x8d, 0x7d, 0x81, 0x9e, 0x78, 0x99, 0x94, 0x70, 0x80,
+    0x9b, 0x89, 0x8c, 0x6d, 0x9c, 0x95, 0x76, 0x7c, 0x83, 0x87, 0x97, 0x93,
+    0x89, 0x6d, 0x77, 0x7e, 0x7e, 0x87, 0x8e, 0x7e, 0x94, 0x61, 0x94, 0xa2,
+    0x94, 0x91, 0xa1, 0x64, 0xc1, 0x78, 0x79, 0xaf, 0x67, 0x7a, 0x9b, 0xa1,
+    0x95, 0x8e, 0x97, 0x84, 0x7b, 0x85, 0x80, 0xa1, 0x6f, 0x87, 0x79, 0x83,
+    0x73, 0x9d, 0x81, 0x64, 0x7a, 0x7f, 0x8f, 0x91, 0x73, 0x97, 0x74, 0x8b,
+    0x7e, 0x88, 0x7f, 0x7e, 0x6e, 0xa1, 0x85, 0x8f, 0x77, 0x93, 0x7a, 0x6f,
+    0x7b, 0x91, 0x67, 0x73, 0x8b, 0x97, 0x6d, 0x87, 0x84, 0xf8, 0xff, 0xff,
+    0x88, 0xf8, 0xff, 0xff, 0xe6, 0xf8, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0x80, 0x02, 0x00, 0x00, 0x73, 0x84, 0xbb, 0xa4, 0xa5, 0x44, 0x5c, 0xb1,
+    0x8e, 0x50, 0x82, 0x8b, 0x81, 0x86, 0x48, 0x80, 0xa9, 0x61, 0xa3, 0xa8,
+    0xca, 0x5a, 0x9d, 0x8a, 0x89, 0x7c, 0x65, 0x91, 0x5e, 0x70, 0x84, 0x71,
+    0xbc, 0x36, 0x8e, 0x8b, 0xa6, 0x63, 0xb7, 0x75, 0x92, 0x59, 0x60, 0x7e,
+    0x33, 0x8f, 0x90, 0x7a, 0xa9, 0x27, 0x72, 0x80, 0x62, 0x95, 0x93, 0x7b,
+    0x60, 0x46, 0x40, 0x55, 0x01, 0x9e, 0x8a, 0x6b, 0x58, 0x8a, 0xa6, 0xb7,
+    0x91, 0x39, 0x72, 0xb4, 0x6e, 0x67, 0x83, 0x91, 0x82, 0x7b, 0x64, 0x7a,
+    0x87, 0x6e, 0xb0, 0xa0, 0xd3, 0x53, 0xb7, 0x93, 0x76, 0xa6, 0x68, 0x8a,
+    0x74, 0x6a, 0x96, 0x6e, 0xb3, 0x53, 0xaa, 0x89, 0xf1, 0x76, 0xb8, 0x75,
+    0x8b, 0x66, 0x5f, 0x6e, 0x52, 0x92, 0x6f, 0x82, 0xbe, 0x45, 0x8d, 0x69,
+    0x98, 0x98, 0x80, 0x87, 0x73, 0x7d, 0x4d, 0x42, 0x1f, 0xa5, 0x6a, 0x73,
+    0x47, 0x87, 0x8a, 0xd1, 0x75, 0x30, 0x91, 0xae, 0x60, 0x82, 0x7a, 0x94,
+    0x75, 0x71, 0x6a, 0x7c, 0x74, 0x7a, 0xac, 0xa2, 0xb6, 0x51, 0xc6, 0x97,
+    0x63, 0xa0, 0x67, 0x7f, 0x80, 0x69, 0x88, 0x6b, 0xa5, 0x5e, 0xc2, 0x72,
+    0xf4, 0x6e, 0xaf, 0x76, 0x7f, 0x7c, 0x55, 0x68, 0x67, 0x97, 0x61, 0x7b,
+    0xbe, 0x5e, 0xab, 0x58, 0xca, 0xa2, 0x77, 0x7a, 0x8f, 0x6e, 0x54, 0x33,
+    0x4d, 0xa7, 0x5d, 0x66, 0x47, 0x92, 0x6f, 0xd6, 0x5c, 0x25, 0xa9, 0xbc,
+    0x5c, 0xb8, 0x64, 0x9b, 0x58, 0x6e, 0x77, 0x76, 0x6a, 0x94, 0xb2, 0xac,
+    0x9a, 0x51, 0xd0, 0x94, 0x62, 0xcc, 0x5a, 0x7f, 0x74, 0x6e, 0x7d, 0x71,
+    0x9b, 0x69, 0xd3, 0x64, 0xef, 0x76, 0xaa, 0x75, 0x89, 0x84, 0x50, 0x76,
+    0x72, 0x97, 0x5f, 0x77, 0xc5, 0x66, 0xce, 0x3a, 0xe5, 0xad, 0x5a, 0x81,
+    0x9e, 0x8e, 0x60, 0x3d, 0x6d, 0xa9, 0x46, 0x6b, 0x44, 0x89, 0x4d, 0xd8,
+    0x4c, 0x28, 0xb1, 0xb7, 0x60, 0xc7, 0x57, 0xb5, 0x50, 0x68, 0x88, 0x7c,
+    0x60, 0x98, 0xac, 0x9a, 0x7f, 0x51, 0xce, 0x8a, 0x5e, 0xd8, 0x51, 0x7d,
+    0x68, 0x6e, 0x7f, 0x6e, 0x90, 0x7b, 0xdf, 0x60, 0xda, 0x77, 0x91, 0x6f,
+    0x85, 0xa0, 0x58, 0x73, 0x70, 0x93, 0x51, 0x7d, 0xb9, 0x70, 0xf5, 0x31,
+    0xe9, 0xa3, 0x47, 0x76, 0xa7, 0x9b, 0x72, 0x3d, 0x90, 0xb2, 0x57, 0x64,
+    0x5b, 0x6f, 0x2b, 0xcf, 0x52, 0x28, 0xc1, 0xa7, 0x6a, 0x78, 0x51, 0xad,
+    0x49, 0x70, 0x90, 0x81, 0x5c, 0x7e, 0x9e, 0x99, 0x77, 0x50, 0xc0, 0x94,
+    0x63, 0xb7, 0x4d, 0x71, 0x58, 0x66, 0x76, 0x6d, 0x78, 0x6a, 0xe1, 0x40,
+    0xc7, 0x73, 0x7f, 0x65, 0x7c, 0x7f, 0x4d, 0x80, 0x64, 0x95, 0x57, 0x81,
+    0xb1, 0x5e, 0xff, 0x26, 0xd6, 0xa2, 0x3a, 0x73, 0xa7, 0x81, 0x76, 0x5d,
+    0x92, 0xb1, 0x58, 0x48, 0x4e, 0x5e, 0x1a, 0xc8, 0x58, 0x2c, 0xb6, 0xa7,
+    0x67, 0x89, 0x5e, 0xa0, 0x4f, 0x78, 0x93, 0x8b, 0x57, 0x7b, 0x95, 0x78,
+    0x6e, 0x46, 0xb2, 0x98, 0x55, 0xd3, 0x5e, 0x66, 0x56, 0x68, 0x74, 0x7e,
+    0x72, 0x74, 0xdd, 0x36, 0xa6, 0x64, 0x65, 0x6b, 0x81, 0x98, 0x56, 0x76,
+    0x65, 0x93, 0x58, 0x7d, 0x9b, 0x82, 0xef, 0x44, 0xbf, 0xa4, 0x3d, 0x57,
+    0xa0, 0xa7, 0x7a, 0x74, 0x9f, 0xa8, 0x70, 0x52, 0x55, 0x5f, 0x1a, 0x94,
+    0x64, 0x37, 0xa7, 0xa6, 0x80, 0x7d, 0x6e, 0x99, 0x5d, 0x81, 0x8a, 0x99,
+    0x5c, 0x76, 0x8f, 0x44, 0x68, 0x50, 0x94, 0x97, 0x63, 0xb6, 0x73, 0x56,
+    0x5b, 0x70, 0x66, 0x8b, 0x72, 0x78, 0xcc, 0x31, 0x8b, 0x68, 0x4a, 0x74,
+    0x7d, 0x99, 0x54, 0x91, 0x6a, 0x90, 0x5d, 0x80, 0x8c, 0x82, 0xcd, 0x4f,
+    0xb0, 0x96, 0x63, 0x56, 0x97, 0xb3, 0x7e, 0x97, 0xa4, 0x9d, 0x7a, 0x5d,
+    0x49, 0x36, 0x18, 0x64, 0x60, 0x43, 0x89, 0xa2, 0x6a, 0x49, 0x7f, 0x58,
+    0x6a, 0x83, 0x77, 0x9d, 0x70, 0x3b, 0x83, 0x21, 0x59, 0x52, 0x6d, 0x95,
+    0x48, 0xa8, 0x8a, 0x42, 0x50, 0x6d, 0x44, 0x95, 0x69, 0x50, 0xc1, 0x4b,
+    0x7c, 0x59, 0x42, 0x78, 0x77, 0x7f, 0x5b, 0x98, 0x67, 0x89, 0x55, 0x8b,
+    0x82, 0x47, 0xb7, 0x64, 0x9d, 0x83, 0x5c, 0x53, 0x89, 0x90, 0x79, 0xb2,
+    0x90, 0x98, 0x85, 0x5a, 0x4d, 0x2b, 0x19, 0x1e, 0x52, 0x50, 0x57, 0x8b,
+    0x73, 0x3a, 0x88, 0x1e, 0x65, 0x80, 0x4d, 0x9b, 0x6c, 0x3c, 0x86, 0x26,
+    0x5b, 0x56, 0x36, 0x98, 0x49, 0x87, 0x9f, 0x2a, 0x40, 0x61, 0x27, 0x9d,
+    0x63, 0x40, 0xa8, 0x46, 0x6b, 0x52, 0x52, 0x7f, 0x67, 0x6a, 0x58, 0xa1,
+    0x5d, 0x6d, 0x5f, 0x9a, 0x72, 0x3a, 0x99, 0x63, 0x8c, 0x80, 0x68, 0x58,
+    0x72, 0x6a, 0x7c, 0xbb, 0x7e, 0x78, 0x94, 0x60, 0x72, 0xfb, 0xff, 0xff,
+    0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x96, 0xfe, 0xff, 0xff,
+    0x8f, 0x00, 0x00, 0x00, 0x8f, 0xfc, 0xff, 0xff, 0xb4, 0xfe, 0xff, 0xff,
+    0xc1, 0xfd, 0xff, 0xff, 0x59, 0xff, 0xff, 0xff, 0xbc, 0xfe, 0xff, 0xff,
+    0x09, 0xff, 0xff, 0xff, 0x9e, 0xfb, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0xe9, 0x03, 0x00, 0x00, 0x2b, 0xfd, 0xff, 0xff,
+    0x3b, 0xfd, 0xff, 0xff, 0x91, 0x01, 0x00, 0x00, 0x60, 0xfb, 0xff, 0xff,
+    0x04, 0xfd, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0xf0, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x18, 0x03, 0x00, 0x00, 0x78, 0x03, 0x00, 0x00,
+    0x88, 0x01, 0x00, 0x00, 0xf8, 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x64, 0x02, 0x00, 0x00, 0xe0, 0x00, 0x00, 0x00,
+    0xb2, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x24, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73,
+    0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x84, 0xfd, 0xff, 0xff,
+    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3b,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x22, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x1c, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x61, 0x64, 0x64, 0x5f,
+    0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0xec, 0xfd, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x7d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x28, 0x17, 0xb1, 0x3d,
+    0x01, 0x00, 0x00, 0x00, 0x84, 0xdb, 0x33, 0x41, 0x01, 0x00, 0x00, 0x00,
+    0x9d, 0xf0, 0x2c, 0xc1, 0x8e, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x48, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67,
+    0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, 0x74, 0x5f, 0x31, 0x2f,
+    0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74,
+    0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, 0x73, 0x2f,
+    0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0, 0x0f, 0x00, 0x00,
+    0x84, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0xac, 0x5f, 0xf6, 0x39, 0x01, 0x00, 0x00, 0x00, 0x1d, 0xaf, 0x62, 0x3d,
+    0x01, 0x00, 0x00, 0x00, 0x5e, 0x1b, 0x83, 0xbd, 0x22, 0xfe, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x03, 0x1c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0xf4, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x0f, 0x72, 0x3d,
+    0x01, 0x00, 0x00, 0x00, 0x38, 0x1d, 0x71, 0x41, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x20, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68,
+    0x61, 0x70, 0x65, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x6c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xc6, 0xd0, 0xd0, 0x3d,
+    0x01, 0x00, 0x00, 0x00, 0xf5, 0xff, 0xcf, 0x41, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x3c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x50, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67,
+    0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, 0x74, 0x2f, 0x46, 0x61,
+    0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d,
+    0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, 0x73, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+    0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7f, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf7, 0x5e, 0x6c, 0x3a,
+    0x01, 0x00, 0x00, 0x00, 0x30, 0x42, 0xec, 0x3d, 0x01, 0x00, 0x00, 0x00,
+    0x42, 0xca, 0xe8, 0xbd, 0xaa, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x02,
+    0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x43, 0x6f, 0x6e, 0x76,
+    0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0xec, 0xcd, 0xc0, 0x38, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00,
+    0x07, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x25, 0xf5, 0xe8, 0x37, 0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00,
+    0x5c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x09, 0x02, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+    0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f,
+    0x14, 0x00, 0x1c, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x18, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x0c, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xfa, 0xff, 0xff, 0xff,
+    0x00, 0x19, 0x06, 0x00, 0x06, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x00, 0x09, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x04};
+const int g_tiny_conv_micro_features_model_data_len = 18208;

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h
new file mode 100644
index 0000000..22c0a97
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h

@@ -0,0 +1,27 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This is a standard TensorFlow Lite model file that has been converted into a
+// C data array, so it can be easily compiled into a binary for devices that
+// don't have a file system. It was created using the command:
+// xxd -i tiny_conv.tflite > tiny_conv_simple_features_model_data.cc
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_TINY_CONV_MICRO_FEATURES_MODEL_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_TINY_CONV_MICRO_FEATURES_MODEL_DATA_H_
+
+extern const unsigned char g_tiny_conv_micro_features_model_data[];
+extern const int g_tiny_conv_micro_features_model_data_len;
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_TINY_CONV_MICRO_FEATURES_MODEL_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.cc
new file mode 100644
index 0000000..51b7d8b
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.cc

@@ -0,0 +1,70 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.h"
+
+#include <string.h>
+
+int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
+                         size_t num_samples, size_t* num_samples_read) {
+  const int size = state->size;
+
+  // Copy samples from the samples buffer over to our local input.
+  size_t max_samples_to_copy = state->size - state->input_used;
+  if (max_samples_to_copy > num_samples) {
+    max_samples_to_copy = num_samples;
+  }
+  memcpy(state->input + state->input_used, samples,
+         max_samples_to_copy * sizeof(*samples));
+  *num_samples_read = max_samples_to_copy;
+  state->input_used += max_samples_to_copy;
+
+  if (state->input_used < state->size) {
+    // We don't have enough samples to compute a window.
+    return 0;
+  }
+
+  // Apply the window to the input.
+  const int16_t* coefficients = state->coefficients;
+  const int16_t* input = state->input;
+  int16_t* output = state->output;
+  int i;
+  int16_t max_abs_output_value = 0;
+  for (i = 0; i < size; ++i) {
+    int16_t new_value = ((static_cast<int32_t>(*input++)) * *coefficients++) >>
+                        kFrontendWindowBits;
+    *output++ = new_value;
+    if (new_value < 0) {
+      new_value = -new_value;
+    }
+    if (new_value > max_abs_output_value) {
+      max_abs_output_value = new_value;
+    }
+  }
+  // Shuffle the input down by the step size, and update how much we have used.
+  memmove(state->input, state->input + state->step,
+          sizeof(*state->input) * (state->size - state->step));
+  state->input_used -= state->step;
+  state->max_abs_output_value = max_abs_output_value;
+
+  // Indicate that the output buffer is valid for the next stage.
+  return 1;
+}
+
+void WindowReset(struct WindowState* state) {
+  memset(state->input, 0, state->size * sizeof(*state->input));
+  memset(state->output, 0, state->size * sizeof(*state->output));
+  state->input_used = 0;
+  state->max_abs_output_value = 0;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.h
new file mode 100644
index 0000000..b32c059
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.h

@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_WINDOW_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_WINDOW_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
+
+#define kFrontendWindowBits 12
+
+struct WindowState {
+  size_t size;
+  int16_t coefficients[kMaxAudioSampleSize];
+  size_t step;
+
+  int16_t input[kMaxAudioSampleSize];
+  size_t input_used;
+  int16_t output[kMaxAudioSampleSize];
+  int16_t max_abs_output_value;
+};
+
+// Applies a window to the samples coming in, stepping forward at the given
+// rate.
+int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
+                         size_t num_samples, size_t* num_samples_read);
+
+void WindowReset(struct WindowState* state);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_WINDOW_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_test.cc
new file mode 100644
index 0000000..310f84f
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_test.cc

@@ -0,0 +1,183 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.h"
+
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+namespace {
+
+const int kSampleRate = 1000;
+const int kWindowSamples = 25;
+const int kStepSamples = 10;
+const int16_t kFakeAudioData[] = {
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768,
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768,
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768};
+
+// Test window function behaviors using default config values.
+class WindowTestConfig {
+ public:
+  WindowTestConfig() {
+    config_.size_ms = 25;
+    config_.step_size_ms = 10;
+  }
+
+  struct WindowConfig config_;
+};
+
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(WindowState_CheckCoefficients) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  WindowTestConfig config;
+  struct WindowState state;
+  TF_LITE_MICRO_EXPECT(WindowPopulateState(error_reporter, &config.config_,
+                                           &state, kSampleRate));
+
+  const int16_t expected[] = {16,   144,  391,  743,  1176, 1664, 2177,
+                              2681, 3145, 3541, 3843, 4032, 4096, 4032,
+                              3843, 3541, 3145, 2681, 2177, 1664, 1176,
+                              743,  391,  144,  16};
+  TF_LITE_MICRO_EXPECT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < state.size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.coefficients[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(WindowState_CheckResidualInput) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  WindowTestConfig config;
+  struct WindowState state;
+  TF_LITE_MICRO_EXPECT(WindowPopulateState(error_reporter, &config.config_,
+                                           &state, kSampleRate));
+  size_t num_samples_read;
+
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
+
+  int i;
+  for (i = kStepSamples; i < kWindowSamples; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.input[i - kStepSamples], kFakeAudioData[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(WindowState_CheckOutputValues) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  WindowTestConfig config;
+  struct WindowState state;
+  TF_LITE_MICRO_EXPECT(WindowPopulateState(error_reporter, &config.config_,
+                                           &state, kSampleRate));
+  size_t num_samples_read;
+
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
+
+  const int16_t expected[] = {
+      0, 1151,   0, -5944, 0, 13311,  0, -21448, 0, 28327, 0, -32256, 0, 32255,
+      0, -28328, 0, 21447, 0, -13312, 0, 5943,   0, -1152, 0};
+  TF_LITE_MICRO_EXPECT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < state.size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.output[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(WindowState_CheckMaxAbsValue) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  WindowTestConfig config;
+  struct WindowState state;
+  TF_LITE_MICRO_EXPECT(WindowPopulateState(error_reporter, &config.config_,
+                                           &state, kSampleRate));
+  size_t num_samples_read;
+
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
+
+  TF_LITE_MICRO_EXPECT_EQ(state.max_abs_output_value, 32256);
+}
+
+TF_LITE_MICRO_TEST(WindowState_CheckConsecutiveWindow) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  WindowTestConfig config;
+  struct WindowState state;
+  TF_LITE_MICRO_EXPECT(WindowPopulateState(error_reporter, &config.config_,
+                                           &state, kSampleRate));
+  size_t num_samples_read;
+
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
+      &state, kFakeAudioData + kWindowSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples,
+      &num_samples_read));
+
+  const int16_t expected[] = {
+      0, -1152, 0, 5943,   0, -13312, 0, 21447, 0, -28328, 0, 32255, 0, -32256,
+      0, 28327, 0, -21448, 0, 13311,  0, -5944, 0, 1151,   0};
+  TF_LITE_MICRO_EXPECT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
+  int i;
+  for (i = 0; i < state.size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(state.output[i], expected[i]);
+  }
+}
+
+TF_LITE_MICRO_TEST(WindowState_CheckNotEnoughSamples) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  WindowTestConfig config;
+  struct WindowState state;
+  TF_LITE_MICRO_EXPECT(WindowPopulateState(error_reporter, &config.config_,
+                                           &state, kSampleRate));
+  size_t num_samples_read;
+
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
+      &state, kFakeAudioData + kWindowSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples,
+      &num_samples_read));
+  TF_LITE_MICRO_EXPECT_EQ(
+      false, WindowProcessSamples(
+                 &state, kFakeAudioData + kWindowSamples + kStepSamples,
+                 sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) -
+                     kWindowSamples - kStepSamples,
+                 &num_samples_read));
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      state.input_used,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - 2 * kStepSamples);
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.cc
new file mode 100644
index 0000000..618973b
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.cc

@@ -0,0 +1,57 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/static_alloc.h"
+
+// Needed because some platforms don't have M_PI defined.
+#define WINDOW_PI (3.14159265358979323846f)
+
+void WindowFillConfigWithDefaults(struct WindowConfig* config) {
+  config->size_ms = 25;
+  config->step_size_ms = 10;
+}
+
+int WindowPopulateState(tflite::ErrorReporter* error_reporter,
+                        const struct WindowConfig* config,
+                        struct WindowState* state, int sample_rate) {
+  state->size = config->size_ms * sample_rate / 1000;
+  state->step = config->step_size_ms * sample_rate / 1000;
+
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(state->coefficients,
+                                 (state->size * sizeof(*state->coefficients)));
+
+  // Populate the window values.
+  const float arg = WINDOW_PI * 2.0 / (static_cast<float>(state->size));
+  int i;
+  for (i = 0; i < state->size; ++i) {
+    float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5)));
+    // Scale it to fixed point and round it.
+    state->coefficients[i] =
+        floor(float_value * (1 << kFrontendWindowBits) + 0.5);
+  }
+
+  state->input_used = 0;
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(state->input,
+                                 (state->size * sizeof(*state->input)));
+
+  STATIC_ALLOC_ENSURE_ARRAY_SIZE(state->output,
+                                 (state->size * sizeof(*state->output)));
+  return 1;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.h
new file mode 100644
index 0000000..d0c61c2
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window_util.h

@@ -0,0 +1,40 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_WINDOW_UTIL_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_WINDOW_UTIL_H_
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/window.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+
+struct WindowConfig {
+  // length of window frame in milliseconds
+  size_t size_ms;
+  // length of step for next frame in milliseconds
+  size_t step_size_ms;
+};
+
+// Populates the WindowConfig with "sane" default values.
+void WindowFillConfigWithDefaults(struct WindowConfig* config);
+
+// Allocates any buffers.
+int WindowPopulateState(tflite::ErrorReporter* error_reporter,
+                        const struct WindowConfig* config,
+                        struct WindowState* state, int sample_rate);
+
+// Frees any allocated buffers.
+void WindowFreeStateContents(struct WindowState* state);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_WINDOW_UTIL_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_feature_data_slice.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_feature_data_slice.cc
new file mode 100644
index 0000000..48535d1
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_feature_data_slice.cc

@@ -0,0 +1,24 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See the header for documentation on the meaning of this data.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h"
+
+const uint8_t g_yes_feature_data_slice[g_yes_feature_data_slice_size] = {
+    214, 215, 236, 202, 235, 203, 225, 191, 203, 188, 199, 194, 212, 127,
+    51,  0,   174, 188, 219, 196, 228, 221, 240, 207, 235, 220, 241, 219,
+    237, 207, 212, 142, 95,  0,   139, 78,  162, 177, 197, 183,
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h
new file mode 100644
index 0000000..e73a131
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h

@@ -0,0 +1,29 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This data was extracted from the larger feature data held in
+// no_micro_features_data.cc and consists of the 26th spectrogram slice of 40
+// values. This is the expected result of running the sample data in
+// yes_30ms_sample_data.cc through through the preprocessing pipeline.
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_YES_FEATURE_DATA_SLICE_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_YES_FEATURE_DATA_SLICE_H_
+
+#include <cstdint>
+
+constexpr int g_yes_feature_data_slice_size = 40;
+extern const uint8_t g_yes_feature_data_slice[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_YES_FEATURE_DATA_SLICE_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.cc
new file mode 100644
index 0000000..2c2ee09
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.cc

@@ -0,0 +1,165 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.h"
+
+/* File automatically created by
+ * tensorflow/examples/speech_commands/wav_to_features.py \
+ * --sample_rate=16000 \
+ * --clip_duration_ms=1000 \
+ * --window_size_ms=30 \
+ * --window_stride_ms=20 \
+ * --feature_bin_count=40 \
+ * --quantize=1 \
+ * --preprocess="micro" \
+ * --input_wav="speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav" \
+ * --output_c_file="yes_micro_features_data.cc" \
+ */
+
+const int g_yes_micro_f2e59fea_nohash_1_width = 40;
+const int g_yes_micro_f2e59fea_nohash_1_height = 49;
+const unsigned char g_yes_micro_f2e59fea_nohash_1_data[] = {
+    244, 226, 245, 223, 234, 213, 228, 208, 194, 110, 95,  116, 102, 0,   137,
+    161, 183, 173, 137, 116, 133, 157, 151, 156, 128, 110, 128, 0,   68,  78,
+    78,  90,  68,  68,  78,  102, 95,  78,  95,  78,  210, 188, 209, 183, 204,
+    188, 201, 191, 166, 119, 90,  107, 110, 107, 175, 157, 179, 168, 182, 145,
+    152, 164, 171, 165, 136, 143, 122, 68,  0,   78,  90,  90,  110, 90,  102,
+    99,  90,  68,  78,  68,  223, 186, 179, 123, 182, 110, 196, 171, 159, 110,
+    102, 95,  90,  99,  160, 134, 125, 136, 153, 152, 164, 134, 164, 151, 141,
+    136, 99,  90,  90,  90,  78,  78,  102, 119, 102, 90,  110, 90,  68,  51,
+    177, 175, 211, 172, 183, 0,   95,  68,  129, 102, 68,  85,  114, 105, 110,
+    85,  102, 95,  140, 51,  85,  51,  95,  90,  143, 116, 90,  78,  78,  51,
+    107, 85,  68,  0,   68,  51,  90,  51,  68,  0,   164, 117, 193, 120, 156,
+    0,   138, 51,  90,  0,   51,  0,   51,  85,  0,   0,   51,  0,   0,   0,
+    0,   0,   114, 0,   85,  78,  90,  51,  0,   0,   51,  85,  99,  85,  107,
+    68,  90,  85,  78,  0,   51,  0,   110, 0,   68,  0,   0,   0,   51,  0,
+    51,  0,   0,   0,   68,  90,  107, 0,   68,  0,   0,   0,   68,  0,   51,
+    68,  0,   78,  68,  0,   51,  0,   78,  68,  90,  68,  78,  51,  51,  0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   90,  0,   0,   0,   0,
+    0,   51,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   51,  68,
+    0,   0,   78,  0,   78,  0,   78,  0,   51,  0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   51,  0,   51,  0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   51,  0,   51,
+    0,   51,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   51,
+    0,   0,   0,   0,   51,  78,  0,   0,   51,  51,  0,   0,   0,   78,  0,
+    213, 170, 192, 180, 196, 188, 173, 131, 173, 116, 137, 105, 159, 127, 0,
+    0,   0,   0,   127, 164, 165, 161, 170, 164, 185, 197, 195, 167, 134, 138,
+    159, 134, 136, 105, 51,  0,   99,  0,   51,  0,   228, 215, 229, 218, 237,
+    215, 228, 210, 237, 222, 239, 211, 208, 211, 234, 218, 220, 209, 225, 219,
+    235, 222, 245, 225, 245, 224, 243, 223, 241, 218, 237, 224, 234, 213, 221,
+    193, 197, 164, 157, 128, 227, 188, 232, 196, 220, 220, 240, 219, 234, 213,
+    234, 211, 231, 218, 233, 213, 239, 215, 228, 207, 229, 206, 224, 208, 226,
+    207, 232, 210, 225, 208, 230, 199, 227, 206, 210, 205, 218, 174, 178, 141,
+    235, 208, 220, 206, 225, 203, 233, 203, 225, 167, 205, 199, 208, 190, 221,
+    204, 223, 207, 225, 188, 225, 197, 215, 188, 199, 183, 225, 195, 224, 200,
+    216, 178, 208, 188, 215, 202, 214, 183, 176, 140, 198, 150, 211, 194, 203,
+    120, 175, 188, 204, 189, 219, 192, 223, 202, 216, 186, 203, 185, 210, 182,
+    214, 183, 204, 170, 204, 125, 184, 187, 206, 185, 198, 182, 210, 161, 202,
+    198, 218, 173, 145, 120, 188, 183, 205, 168, 200, 170, 210, 177, 187, 190,
+    209, 193, 193, 166, 210, 162, 175, 119, 174, 147, 182, 161, 181, 134, 176,
+    143, 187, 165, 186, 149, 185, 141, 192, 181, 202, 123, 170, 143, 144, 78,
+    149, 0,   208, 182, 170, 78,  170, 0,   117, 51,  156, 99,  195, 170, 200,
+    130, 152, 68,  175, 141, 173, 134, 194, 132, 189, 164, 198, 134, 173, 117,
+    171, 149, 183, 181, 185, 99,  153, 117, 125, 0,   166, 0,   173, 117, 144,
+    0,   117, 102, 188, 120, 193, 166, 197, 68,  163, 119, 169, 99,  134, 0,
+    162, 0,   164, 68,  171, 116, 126, 0,   120, 68,  68,  0,   105, 0,   159,
+    95,  150, 51,  90,  85,  0,   0,   131, 0,   105, 0,   145, 51,  170, 51,
+    120, 0,   107, 0,   145, 85,  160, 0,   85,  0,   0,   51,  149, 0,   78,
+    0,   0,   0,   0,   0,   0,   0,   90,  0,   112, 0,   78,  102, 122, 0,
+    0,   0,   0,   0,   105, 0,   0,   0,   0,   0,   0,   0,   0,   0,   112,
+    0,   164, 120, 143, 0,   0,   0,   0,   0,   51,  0,   90,  0,   78,  0,
+    0,   0,   0,   0,   110, 0,   139, 0,   112, 51,  0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   102, 0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   107,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   78,  0,   51,  0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   51,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   127, 110, 133, 0,   167, 0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   132, 0,   190,
+    194, 202, 0,   197, 187, 161, 0,   0,   0,   0,   0,   0,   0,   0,   0,
+    214, 213, 223, 203, 218, 189, 200, 122, 78,  0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   191, 210, 231, 197, 226, 217, 238, 216, 236, 207,
+    199, 0,   0,   0,   0,   0,   107, 122, 155, 160, 214, 215, 236, 202, 235,
+    203, 225, 191, 203, 188, 199, 194, 212, 127, 51,  0,   174, 188, 219, 196,
+    228, 221, 240, 207, 235, 220, 241, 219, 237, 207, 212, 142, 95,  0,   139,
+    78,  162, 177, 197, 183, 211, 199, 235, 208, 238, 215, 227, 207, 211, 201,
+    224, 213, 226, 192, 213, 170, 223, 205, 234, 221, 245, 225, 242, 220, 245,
+    221, 239, 221, 238, 213, 226, 180, 159, 112, 176, 159, 208, 202, 213, 191,
+    205, 191, 225, 197, 238, 219, 224, 201, 227, 200, 221, 201, 225, 203, 212,
+    195, 229, 210, 228, 210, 239, 216, 226, 212, 233, 205, 225, 200, 229, 207,
+    222, 151, 147, 119, 179, 185, 230, 218, 223, 192, 202, 136, 205, 177, 223,
+    204, 228, 215, 232, 209, 221, 189, 221, 205, 209, 200, 226, 209, 229, 205,
+    235, 192, 209, 198, 228, 190, 206, 185, 207, 187, 214, 175, 177, 184, 220,
+    195, 214, 207, 230, 184, 205, 159, 208, 184, 189, 169, 224, 213, 219, 199,
+    229, 203, 216, 205, 222, 204, 224, 206, 231, 208, 231, 176, 197, 184, 216,
+    193, 211, 139, 212, 195, 231, 164, 166, 195, 217, 182, 208, 190, 217, 179,
+    205, 68,  182, 119, 195, 168, 182, 136, 204, 179, 193, 158, 182, 140, 188,
+    154, 197, 169, 190, 99,  184, 0,   125, 0,   131, 0,   99,  68,  179, 85,
+    190, 184, 213, 203, 223, 202, 212, 190, 209, 138, 178, 0,   159, 51,  128,
+    51,  105, 0,   139, 51,  179, 125, 185, 114, 171, 128, 175, 132, 181, 174,
+    155, 0,   0,   0,   90,  0,   125, 0,   176, 188, 227, 217, 244, 215, 234,
+    221, 239, 192, 224, 210, 0,   0,   134, 0,   51,  0,   105, 0,   105, 0,
+    143, 90,  192, 119, 175, 147, 141, 51,  184, 110, 85,  0,   0,   0,   0,
+    0,   0,   0,   151, 139, 201, 203, 232, 203, 226, 208, 236, 206, 230, 212,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   169, 0,   119,
+    0,   78,  0,   0,   0,   0,   0,   0,   0,   0,   0,   68,  0,   0,   133,
+    200, 180, 220, 197, 228, 201, 221, 184, 213, 193, 110, 0,   0,   0,   0,
+    0,   0,   0,   0,   0,   78,  0,   164, 0,   0,   0,   0,   0,   107, 0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   150, 164, 202, 182, 224,
+    197, 211, 179, 212, 193, 134, 0,   0,   0,   0,   0,   0,   0,   0,   0,
+    85,  0,   150, 0,   85,  0,   95,  0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   102, 90,  193, 160, 203, 164, 200, 178, 205, 174,
+    116, 0,   0,   0,   0,   0,   0,   0,   0,   0,   120, 114, 123, 0,   114,
+    0,   145, 68,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    102, 68,  199, 170, 195, 180, 208, 176, 200, 164, 0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   110, 0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   142, 102, 172, 110, 186,
+    167, 185, 147, 189, 154, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   177, 0,   158, 136, 197, 155, 189, 166,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    85,  0,   155, 90,  175, 117, 175, 138, 202, 165, 0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   51,  0,   139,
+    0,   120, 68,  51,  123, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   119, 0,   78,  0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.h
new file mode 100644
index 0000000..d19bf8f
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.h

@@ -0,0 +1,23 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_YES_MICRO_FEATURES_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_YES_MICRO_FEATURES_DATA_H_
+
+extern const int g_yes_micro_f2e59fea_nohash_1_width;
+extern const int g_yes_micro_f2e59fea_nohash_1_height;
+extern const unsigned char g_yes_micro_f2e59fea_nohash_1_data[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_YES_MICRO_FEATURES_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
index 4e54ff6..6f0c258 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc

@@ -13,9 +13,9 @@
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/no_micro_features_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/yes_micro_features_data.h"
 #include "tensorflow/lite/experimental/micro/kernels/all_ops_resolver.h"
 #include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
 #include "tensorflow/lite/experimental/micro/micro_interpreter.h"
@@ -32,7 +32,8 @@
 
   // Map the model into a usable data structure. This doesn't involve any
   // copying or parsing, it's a very lightweight operation.
-  const tflite::Model* model = ::tflite::GetModel(g_tiny_conv_model_data);
+  const tflite::Model* model =
+      ::tflite::GetModel(g_tiny_conv_micro_features_model_data);
   if (model->version() != TFLITE_SCHEMA_VERSION) {
     error_reporter->Report(
         "Model provided is schema version %d not equal "
@@ -61,12 +62,12 @@
   TF_LITE_MICRO_EXPECT_EQ(4, input->dims->size);
   TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]);
   TF_LITE_MICRO_EXPECT_EQ(49, input->dims->data[1]);
-  TF_LITE_MICRO_EXPECT_EQ(43, input->dims->data[2]);
+  TF_LITE_MICRO_EXPECT_EQ(40, input->dims->data[2]);
   TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, input->type);
 
   // Copy a spectrogram created from a .wav audio file of someone saying "Yes",
   // into the memory area used for the input.
-  const uint8_t* yes_features_data = g_yes_f2e59fea_nohash_1_data;
+  const uint8_t* yes_features_data = g_yes_micro_f2e59fea_nohash_1_data;
   for (int i = 0; i < input->bytes; ++i) {
     input->data.uint8[i] = yes_features_data[i];
   }
@@ -102,7 +103,7 @@
   TF_LITE_MICRO_EXPECT_GT(yes_score, no_score);
 
   // Now test with a different input, from a recording of "No".
-  const uint8_t* no_features_data = g_no_f9643d42_nohash_4_data;
+  const uint8_t* no_features_data = g_no_micro_f9643d42_nohash_4_data;
   for (int i = 0; i < input->bytes; ++i) {
     input->data.uint8[i] = no_features_data[i];
   }

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.cc
deleted file mode 100644
index b9b8fb3..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.cc
+++ /dev/null

@@ -1,23 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
-
-const char* kCategoryLabels[kCategoryCount] = {
-    "silence",
-    "unknown",
-    "yes",
-    "no",
-};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h b/tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h
deleted file mode 100644
index f48252d..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h
+++ /dev/null

@@ -1,43 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MODEL_SETTINGS_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MODEL_SETTINGS_H_
-
-// Keeping these as constant expressions allow us to allocate fixed-sized arrays
-// on the stack for our working memory.
-
-// The size of the input time series data we pass to the FFT to produce the
-// frequency information. This has to be a power of two, and since we're dealing
-// with 30ms of 16KHz inputs, which means 480 samples, this is the next value.
-constexpr int kMaxAudioSampleSize = 512;
-constexpr int kAudioSampleFrequency = 16000;
-
-// All of these values are derived from the values used during model training,
-// if you change your model you'll need to update these constants.
-constexpr int kAverageWindowSize = 6;
-constexpr int kFeatureSliceSize =
-    ((kMaxAudioSampleSize / 2) + (kAverageWindowSize - 1)) / kAverageWindowSize;
-constexpr int kFeatureSliceCount = 49;
-constexpr int kFeatureElementCount = (kFeatureSliceSize * kFeatureSliceCount);
-constexpr int kFeatureSliceStrideMs = 20;
-constexpr int kFeatureSliceDurationMs = 30;
-
-constexpr int kCategoryCount = 4;
-constexpr int kSilenceIndex = 0;
-constexpr int kUnknownIndex = 1;
-extern const char* kCategoryLabels[kCategoryCount];
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_MODEL_SETTINGS_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.cc
new file mode 100644
index 0000000..85113a9
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.cc

@@ -0,0 +1,1477 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See the header for documentation on the meaning of this data.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.h"
+
+const int g_no_1000ms_sample_data_size = 16000;
+const int16_t g_no_1000ms_sample_data[16000] = {
+    5,     1,     -10,   -16,   -14,   -10,   -4,    -5,    -10,   -15,   -13,
+    -17,   -22,   -21,   -23,   -25,   -22,   -26,   -28,   -31,   -28,   -25,
+    -20,   -24,   -21,   -13,   -7,    -1,    -1,    3,     3,     4,     -4,
+    -6,    -8,    -10,   -13,   -4,    -2,    5,     8,     11,    26,    28,
+    34,    32,    34,    30,    21,    18,    15,    13,    8,     5,     14,
+    13,    7,     8,     4,     -5,    -7,    -4,    -9,    -13,   -17,   -21,
+    -16,   -14,   -12,   -12,   -14,   -11,   -9,    -2,    5,     -1,    2,
+    0,     2,     1,     -3,    -13,   -14,   -16,   -11,   -10,   -9,    -13,
+    -17,   -19,   -25,   -21,   -21,   -20,   -13,   -5,    -3,    0,     3,
+    6,     5,     1,     0,     -1,    -7,    -10,   -11,   -9,    -6,    -7,
+    -11,   -10,   -5,    -14,   -20,   -23,   -22,   -22,   -19,   -15,   -12,
+    -6,    -5,    3,     13,    16,    17,    25,    26,    28,    34,    34,
+    33,    34,    30,    21,    22,    18,    13,    20,    22,    24,    27,
+    26,    23,    21,    18,    9,     5,     -2,    -7,    -8,    -10,   -8,
+    -8,    -4,    2,     2,     -1,    -7,    -10,   -8,    -12,   -13,   -15,
+    -9,    -5,    -4,    -3,    -6,    -11,   -11,   -18,   -16,   -13,   -10,
+    -12,   -6,    0,     -2,    0,     -3,    -4,    -8,    -12,   -19,   -16,
+    -17,   -19,   -23,   -30,   -33,   -36,   -38,   -39,   -40,   -36,   -37,
+    -32,   -27,   -25,   -31,   -38,   -41,   -47,   -52,   -50,   -42,   -32,
+    -16,   -7,    -3,    0,     -1,    -1,    -5,    -16,   -23,   -29,   -34,
+    -33,   -27,   -17,   -11,   1,     4,     10,    18,    21,    24,    24,
+    25,    30,    34,    30,    29,    26,    23,    20,    15,    14,    13,
+    14,    16,    23,    28,    21,    23,    21,    13,    12,    12,    14,
+    17,    21,    26,    27,    30,    30,    26,    20,    15,    15,    9,
+    8,     9,     10,    7,     8,     7,     1,     -2,    -6,    -10,   -10,
+    -12,   -15,   -10,   -7,    -6,    -5,    0,     -3,    -3,    -12,   -25,
+    -35,   -49,   -53,   -49,   -51,   -48,   -46,   -48,   -39,   -33,   -31,
+    -37,   -42,   -47,   -49,   -46,   -47,   -47,   -46,   -42,   -39,   -33,
+    -26,   -23,   -14,   -8,    -9,    -7,    -10,   -11,   -13,   -13,   -19,
+    -20,   -16,   -11,   -9,    7,     16,    21,    29,    27,    29,    28,
+    21,    14,    13,    17,    19,    20,    18,    13,    17,    16,    18,
+    20,    17,    13,    16,    23,    26,    26,    25,    27,    31,    30,
+    31,    34,    32,    35,    32,    36,    31,    26,    23,    27,    27,
+    29,    27,    26,    32,    31,    28,    26,    23,    14,    6,     0,
+    -4,    -7,    -9,    -10,   -8,    -3,    4,     12,    11,    15,    11,
+    8,     2,     -3,    -3,    -4,    -6,    -11,   -14,   -20,   -28,   -32,
+    -38,   -46,   -42,   -44,   -40,   -34,   -26,   -29,   -25,   -23,   -24,
+    -17,   -21,   -26,   -23,   -25,   -19,   -10,   -11,   -10,   -10,   -12,
+    -9,    -3,    0,     -3,    -7,    -10,   -13,   -10,   -14,   -13,   -17,
+    -22,   -22,   -30,   -28,   -29,   -26,   -18,   -6,    -1,    -3,    -4,
+    -6,    -10,   -13,   -10,   -14,   -16,   -11,   -15,   -9,    -3,    -6,
+    -1,    2,     3,     4,     6,     6,     3,     4,     12,    14,    17,
+    21,    19,    20,    16,    17,    15,    21,    21,    22,    20,    17,
+    16,    16,    20,    17,    15,    9,     5,     11,    18,    24,    28,
+    26,    23,    23,    26,    22,    18,    21,    23,    26,    27,    25,
+    27,    29,    26,    20,    10,    7,     11,    8,     16,    25,    33,
+    37,    38,    39,    35,    30,    20,    13,    9,     6,     5,     13,
+    13,    14,    15,    12,    8,     3,     3,     3,     2,     9,     11,
+    10,    5,     5,     0,     -7,    -11,   -12,   -15,   -17,   -12,   -13,
+    -18,   -19,   -21,   -24,   -22,   -27,   -34,   -36,   -36,   -32,   -20,
+    -16,   -15,   -5,    -5,    -9,    -10,   -9,    -17,   -19,   -20,   -14,
+    -13,   -10,   -4,    -7,    -7,    -14,   -19,   -28,   -31,   -30,   -31,
+    -23,   -19,   -20,   -12,   -11,   -14,   -16,   -20,   -18,   -20,   -21,
+    -24,   -29,   -30,   -30,   -34,   -31,   -25,   -21,   -18,   -11,   -4,
+    2,     2,     3,     3,     2,     4,     -1,    -4,    -8,    -3,    -1,
+    7,     15,    18,    22,    20,    20,    16,    16,    14,    13,    21,
+    25,    26,    35,    28,    28,    28,    25,    21,    19,    18,    21,
+    24,    20,    25,    28,    19,    16,    15,    8,     3,     -1,    3,
+    5,     13,    18,    25,    31,    33,    39,    36,    36,    32,    36,
+    37,    39,    42,    36,    32,    27,    30,    24,    18,    15,    10,
+    7,     5,     6,     -1,    -4,    -10,   -17,   -15,   -19,   -15,   -7,
+    -4,    3,     0,     3,     4,     -2,    -7,    -13,   -21,   -23,   -28,
+    -27,   -26,   -25,   -15,   -10,   -4,    -6,    -5,    -9,    -5,    -3,
+    1,     2,     -1,    1,     -4,    -7,    -8,    -17,   -17,   -15,   -14,
+    -9,    -5,    -7,    -6,    -9,    -16,   -15,   -15,   -16,   -16,   -11,
+    -15,   -15,   -6,    -6,    -5,    -2,    0,     -9,    -10,   -12,   -13,
+    -10,   -4,    0,     8,     5,     4,     2,     0,     -5,    -8,    -16,
+    -15,   -12,   -3,    9,     17,    24,    26,    30,    28,    22,    17,
+    14,    9,     8,     9,     8,     11,    12,    12,    15,    14,    18,
+    20,    17,    19,    22,    21,    12,    5,     0,     3,     -3,    -4,
+    -6,    -7,    1,     8,     8,     8,     10,    2,     -3,    -8,    -15,
+    -20,   -24,   -22,   -23,   -13,   -6,    -7,    -5,    -10,   -8,    -15,
+    -19,   -22,   -20,   -17,   -18,   -13,   -10,   -1,    6,     5,     3,
+    1,     -5,    -11,   -10,   -14,   -19,   -15,   -13,   -8,    -2,    -3,
+    -4,    -3,    -4,    -1,    1,     0,     -3,    -4,    -8,    -18,   -21,
+    -25,   -24,   -16,   -9,    -2,    1,     5,     1,     3,     -2,    -7,
+    -10,   -23,   -30,   -29,   -23,   -9,    -3,    4,     11,    11,    6,
+    2,     0,     -12,   -20,   -28,   -24,   -22,   -17,   -22,   -19,   -14,
+    -21,   -17,   -17,   -12,   -8,    -3,    2,     0,     -6,    -5,    -8,
+    -12,   -17,   -27,   -34,   -31,   -30,   -27,   -19,   -14,   -14,   -14,
+    -14,   -19,   -22,   -21,   -19,   -14,   -1,    5,     9,     8,     6,
+    5,     -4,    -2,    -3,    -3,    -1,    -2,    -3,    2,     7,     8,
+    7,     6,     6,     3,     2,     1,     -2,    0,     6,     11,    18,
+    18,    19,    17,    14,    9,     4,     3,     3,     0,     -1,    3,
+    -1,    -5,    0,     -2,    0,     1,     7,     7,     8,     20,    29,
+    33,    31,    24,    14,    5,     -6,    -11,   -8,    -11,   -2,    6,
+    10,    12,    16,    26,    26,    24,    18,    12,    10,    4,     7,
+    6,     -2,    -12,   -17,   -17,   -20,   -23,   -23,   -18,   -8,    1,
+    3,     5,     6,     3,     0,     -6,    -12,   -12,   -15,   -12,   -7,
+    3,     3,     8,     7,     7,     7,     1,     -1,    -1,    4,     11,
+    17,    25,    32,    35,    42,    50,    52,    56,    50,    55,    53,
+    52,    47,    40,    38,    30,    26,    27,    28,    29,    25,    23,
+    23,    28,    30,    25,    26,    21,    19,    14,    9,     16,    22,
+    25,    33,    39,    45,    49,    48,    55,    51,    43,    35,    20,
+    14,    13,    23,    25,    24,    20,    22,    28,    22,    22,    17,
+    16,    13,    10,    10,    10,    9,     9,     14,    11,    10,    10,
+    4,     0,     0,     -2,    -3,    -5,    -7,    -3,    1,     -8,    -8,
+    -9,    -4,    4,     9,     11,    14,    11,    6,     8,     3,     -6,
+    -10,   -19,   -22,   -24,   -27,   -22,   -16,   -21,   -25,   -33,   -33,
+    -32,   -30,   -21,   -13,   -6,    -5,    2,     1,     4,     9,     7,
+    5,     1,     1,     8,     6,     7,     6,     0,     -6,    -15,   -18,
+    -23,   -22,   -23,   -25,   -22,   -21,   -19,   -17,   -13,   -10,   -10,
+    -16,   -17,   -15,   -13,   -8,    -9,    -14,   -13,   -17,   -20,   -26,
+    -28,   -31,   -29,   -26,   -23,   -13,   -10,   -6,    -1,    5,     7,
+    2,     -3,    -7,    -20,   -18,   -16,   -21,   -27,   -33,   -25,   -27,
+    -22,   -22,   -21,   -16,   -11,   -7,    -2,    2,     11,    18,    11,
+    9,     4,     1,     -1,    -6,    -4,    -5,    -9,    -12,   -16,   -25,
+    -29,   -37,   -37,   -38,   -37,   -33,   -23,   -16,   -14,   -7,    -1,
+    -4,    -3,    -4,    -5,    -11,   -14,   -8,    -8,    -8,    -8,    -9,
+    -4,    -14,   -21,   -22,   -21,   -18,   -15,   -2,    3,     -3,    0,
+    -2,    0,     -4,    -7,    -1,    -2,    3,     3,     -3,    -10,   -13,
+    -10,   -16,   -19,   -17,   -17,   -14,   -7,    5,     5,     7,     8,
+    12,    7,     0,     -5,    -13,   -17,   -18,   -14,   -7,    -4,    3,
+    11,    11,    12,    11,    8,     4,     -5,    -5,    -11,   -15,   -17,
+    -23,   -22,   -18,   -14,   -14,   -12,   -6,    -4,    -1,    3,     1,
+    -4,    -10,   -22,   -29,   -30,   -26,   -15,   -2,    6,     16,    21,
+    28,    32,    25,    24,    20,    9,     5,     0,     3,     7,     10,
+    11,    13,    17,    15,    16,    13,    11,    11,    8,     7,     1,
+    1,     -5,    -2,    -2,    -1,    4,     8,     17,    22,    24,    24,
+    26,    23,    20,    17,    16,    9,     4,     6,     5,     8,     2,
+    -1,    -5,    -4,    -10,   -14,   -14,   -17,   -19,   -18,   -16,   -14,
+    -6,    -3,    1,     3,     0,     -4,    -6,    -4,    -1,    -1,    2,
+    5,     3,     8,     7,     7,     14,    13,    20,    24,    29,    24,
+    12,    7,     -1,    -6,    -15,   -22,   -20,   -27,   -22,   -14,   -6,
+    2,     7,     9,     9,     2,     -3,    -7,    -8,    -10,   -9,    -3,
+    -6,    -11,   -12,   -8,    -5,    -4,    -5,    -3,    0,     3,     6,
+    6,     7,     5,     -7,    -10,   -14,   -13,   -14,   -17,   -11,   -7,
+    -4,    1,     1,     4,     -4,    -8,    -18,   -23,   -23,   -25,   -19,
+    -16,   -15,   -9,    3,     10,    19,    25,    30,    31,    26,    27,
+    23,    19,    16,    8,     7,     2,     0,     -1,    -1,    1,     5,
+    6,     6,     1,     3,     -1,    -7,    -11,   -17,   -19,   -19,   -7,
+    0,     3,     11,    12,    18,    20,    16,    9,     -2,    -7,    -14,
+    -19,   -22,   -30,   -33,   -34,   -36,   -26,   -14,   -11,   -9,    -3,
+    0,     -2,    1,     -2,    -3,    -5,    -12,   -15,   -19,   -14,   -9,
+    -8,    -2,    -6,    -13,   -15,   -19,   -22,   -25,   -26,   -21,   -20,
+    -11,   -1,    1,     5,     9,     13,    15,    12,    11,    3,     1,
+    -1,    0,     8,     13,    16,    16,    15,    16,    15,    12,    9,
+    7,     8,     4,     6,     4,     3,     3,     7,     0,     -4,    -8,
+    -11,   -18,   -18,   -15,   -20,   -23,   -21,   -22,   -21,   -27,   -25,
+    -15,   -7,    -2,    8,     9,     8,     8,     3,     3,     7,     8,
+    8,     8,     12,    11,    12,    4,     -1,    -7,    -11,   -15,   -18,
+    -17,   -17,   -20,   -19,   -13,   -11,   -3,    -3,    -1,    1,     -3,
+    1,     1,     8,     10,    15,    24,    26,    29,    34,    36,    26,
+    20,    12,    -2,    -6,    -9,    -7,    -6,    1,     10,    13,    19,
+    22,    22,    18,    21,    24,    28,    35,    37,    34,    33,    34,
+    34,    30,    19,    15,    10,    19,    21,    23,    24,    21,    19,
+    18,    21,    22,    22,    27,    30,    31,    32,    33,    32,    32,
+    24,    18,    10,    8,     10,    10,    6,     2,     -7,    -14,   -22,
+    -29,   -27,   -29,   -32,   -30,   -28,   -23,   -22,   -11,   -11,   -13,
+    -3,    2,     -1,    1,     1,     -3,    -7,    -5,    -7,    -11,   -17,
+    -23,   -25,   -26,   -27,   -26,   -23,   -14,   -5,    -3,    -1,    -2,
+    -2,    -1,    1,     -2,    -7,    -4,    2,     4,     10,    13,    6,
+    3,     -2,    -6,    -7,    -11,   -17,   -21,   -15,   -7,    -2,    11,
+    16,    22,    25,    25,    23,    24,    23,    21,    22,    25,    23,
+    17,    17,    12,    8,     -2,    -4,    1,     0,     4,     9,     8,
+    10,    9,     9,     15,    13,    10,    8,     1,     1,     -3,    1,
+    4,     11,    10,    9,     5,     5,     4,     1,     -1,    -4,    0,
+    8,     7,     4,     3,     3,     0,     -9,    -16,   -19,   -20,   -21,
+    -18,   -16,   -11,   -10,   -9,    -13,   -12,   -19,   -25,   -21,   -15,
+    -5,    8,     14,    21,    24,    18,    20,    17,    6,     1,     -2,
+    -2,    1,     1,     4,     1,     -3,    2,     0,     -3,    -3,    -4,
+    1,     0,     -5,    -11,   -17,   -21,   -20,   -20,   -20,   -14,   -9,
+    -3,    3,     7,     5,     3,     1,     -1,    -3,    -4,    -1,    1,
+    -5,    -1,    -1,    -7,    -11,   -14,   -12,   -14,   -17,   -18,   -23,
+    -29,   -24,   -27,   -19,   -12,   -13,   -2,    -3,    4,     4,     0,
+    -3,    -5,    -2,    -1,    -5,    -6,    -7,    -7,    -7,    -9,    -13,
+    -9,    -4,    1,     1,     1,     -4,    -11,   -8,    -15,   -19,   -19,
+    -12,   -5,    1,     7,     12,    8,     10,    10,    10,    11,    11,
+    19,    12,    9,     9,     2,     -4,    -13,   -22,   -24,   -25,   -24,
+    -26,   -19,   -14,   -10,   -1,    5,     4,     -1,    -4,    -5,    -10,
+    -14,   -11,   -8,    -10,   -8,    -9,    -7,    -8,    -6,    -1,    -5,
+    -10,   -18,   -27,   -29,   -24,   -19,   -11,   -7,    1,     10,    8,
+    8,     5,     2,     -5,    -1,    -1,    0,     2,     2,     -2,    -8,
+    -8,    -14,   -26,   -25,   -23,   -18,   -9,    2,     2,     7,     13,
+    6,     7,     5,     4,     3,     2,     1,     7,     2,     -1,    1,
+    -2,    2,     0,     -2,    -6,    -3,    5,     7,     9,     6,     5,
+    4,     2,     0,     -1,    -3,    3,     7,     6,     14,    18,    22,
+    20,    22,    19,    13,    9,     2,     -8,    -11,   -6,    -2,    -3,
+    -3,    0,     0,     0,     1,     -1,    -2,    1,     7,     11,    10,
+    11,    17,    17,    11,    11,    4,     6,     6,     13,    19,    22,
+    23,    27,    25,    24,    22,    14,    11,    13,    7,     0,     -3,
+    -9,    -11,   -7,    -7,    -6,    -4,    1,     7,     9,     15,    18,
+    18,    10,    5,     3,     -3,    -6,    -5,    -8,    -5,    4,     8,
+    8,     11,    10,    9,     4,     4,     1,     -3,    -10,   -11,   -8,
+    -16,   -20,   -22,   -19,   -12,   -7,    -10,   -10,   -13,   -14,   -11,
+    -11,   -13,   -18,   -21,   -19,   -17,   -22,   -18,   -22,   -22,   -16,
+    -9,    -3,    0,     3,     6,     3,     3,     -3,    -6,    -9,    -14,
+    -1,    14,    21,    30,    37,    33,    27,    26,    19,    15,    14,
+    11,    20,    12,    9,     10,    19,    20,    19,    22,    20,    22,
+    17,    13,    14,    10,    8,     12,    15,    13,    12,    12,    12,
+    9,     10,    11,    11,    9,     6,     4,     5,     -2,    1,     1,
+    -1,    5,     1,     8,     6,     3,     -1,    -4,    -15,   -24,   -27,
+    -26,   -23,   -19,   -9,    -3,    -4,    -9,    -9,    -10,   -16,   -22,
+    -19,   -18,   -15,   -2,    3,     5,     6,     7,     8,     11,    3,
+    1,     2,     1,     1,     0,     -4,    -13,   -18,   -19,   -19,   -20,
+    -23,   -15,   -10,   -5,    -3,    -1,    -1,    -1,    3,     -1,    0,
+    -8,    -11,   -13,   -14,   -13,   -8,    -6,    -3,    1,     1,     0,
+    0,     5,     4,     5,     5,     5,     4,     0,     -1,    -4,    -13,
+    -22,   -21,   -28,   -26,   -22,   -28,   -23,   -23,   -14,   -11,   -10,
+    -7,    -8,    -5,    -4,    1,     9,     10,    15,    19,    21,    17,
+    18,    19,    16,    13,    16,    21,    27,    29,    22,    22,    13,
+    4,     1,     0,     -5,    -6,    -2,    3,     5,     8,     6,     9,
+    10,    2,     -3,    -9,    -8,    -4,    -2,    -7,    -6,    -4,    -8,
+    -6,    -8,    -11,   -8,    -8,    -6,    2,     -2,    -2,    -1,    2,
+    4,     8,     5,     -1,    -8,    -10,   -7,    -6,    -5,    -6,    -5,
+    6,     13,    22,    28,    33,    31,    38,    35,    28,    27,    22,
+    22,    23,    26,    23,    21,    28,    28,    23,    23,    22,    21,
+    20,    14,    6,     -1,    -5,    -8,    -5,    -1,    2,     5,     5,
+    7,     8,     5,     4,     0,     3,     6,     10,    13,    13,    6,
+    4,     4,     0,     -2,    -3,    0,     3,     5,     7,     9,     7,
+    6,     10,    8,     3,     4,     -1,    -4,    -2,    0,     -2,    -2,
+    -2,    -3,    5,     8,     6,     4,     -1,    -7,    -6,    -7,    -12,
+    -18,   -11,   -2,    -1,    -1,    -1,    -2,    -7,    -7,    -3,    -3,
+    -5,    -6,    -6,    -6,    -6,    -6,    -9,    -12,   -9,    -5,    1,
+    3,     5,     5,     8,     7,     3,     -5,    -3,    -2,    2,     3,
+    5,     5,     -1,    -2,    -4,    -8,    -9,    -9,    -7,    -12,   -13,
+    -17,   -19,   -16,   -19,   -21,   -21,   -19,   -11,   -6,    -3,    7,
+    8,     6,     2,     0,     1,     1,     -2,    -5,    0,     -2,    2,
+    1,     2,     0,     -2,    -1,    -10,   -21,   -25,   -24,   -21,   -19,
+    -14,   -8,    -3,    -5,    0,     0,     -5,    -6,    -3,    -6,    -9,
+    -13,   -19,   -20,   -21,   -21,   -24,   -25,   -27,   -27,   -29,   -26,
+    -19,   -14,   -14,   -13,   -8,    -5,    -10,   -10,   -6,    1,     4,
+    14,    22,    23,    24,    20,    20,    18,    14,    11,    9,     6,
+    8,     12,    15,    18,    18,    12,    8,     9,     9,     9,     7,
+    4,     9,     5,     6,     5,     3,     3,     -1,    -1,    -6,    -10,
+    -6,    -8,    -3,    0,     -2,    -3,    -2,    -6,    -6,    -7,    -3,
+    -3,    -3,    -2,    1,     -1,    -10,   -7,    -13,   -21,   -23,   -20,
+    -19,   -18,   -18,   -19,   -15,   -16,   -7,    -6,    -9,    -13,   -12,
+    -6,    -1,    3,     6,     7,     5,     3,     -3,    -11,   -18,   -20,
+    -26,   -29,   -27,   -27,   -24,   -30,   -29,   -28,   -23,   -18,   -21,
+    -18,   -15,   -9,    1,     9,     17,    21,    23,    18,    14,    5,
+    -1,    -2,    -1,    0,     3,     6,     5,     4,     4,     0,     -1,
+    1,     -4,    -9,    -13,   -11,   -20,   -21,   -19,   -14,   -9,    -4,
+    1,     6,     10,    16,    24,    30,    35,    31,    38,    37,    35,
+    39,    36,    36,    32,    30,    33,    31,    24,    19,    12,    4,
+    -1,    -7,    -11,   -7,    -5,    -3,    2,     6,     10,    16,    19,
+    21,    21,    16,    10,    14,    12,    14,    13,    12,    12,    5,
+    6,     2,     0,     1,     3,     4,     6,     9,     6,     2,     -1,
+    -3,    -10,   -15,   -13,   -17,   -19,   -15,   -16,   -15,   -13,   -8,
+    -8,    -7,    -10,   -5,    -2,    1,     5,     5,     11,    10,    12,
+    10,    9,     9,     15,    23,    33,    35,    33,    34,    34,    35,
+    34,    24,    30,    26,    23,    21,    20,    15,    10,    3,     4,
+    0,     -7,    -8,    -9,    -9,    -8,    -4,    0,     5,     5,     2,
+    3,     -2,    0,     0,     -1,    0,     -1,    1,     2,     6,     3,
+    1,     -9,    -5,    -6,    -2,    -8,    -12,   -9,    -10,   -7,    -8,
+    -8,    -6,    -2,    -2,    -1,    0,     -2,    -1,    -8,    -18,   -19,
+    -27,   -37,   -42,   -40,   -39,   -33,   -30,   -23,   -16,   -16,   -9,
+    -13,   -11,   -10,   -10,   -8,    -3,    -1,    2,     0,     -1,    2,
+    6,     4,     8,     10,    17,    21,    28,    31,    33,    28,    20,
+    12,    8,     -3,    -5,    -4,    -3,    2,     6,     9,     8,     2,
+    7,     4,     -6,    -9,    -15,   -13,   -15,   -17,   -14,   -11,   -12,
+    -5,    -6,    -4,    -6,    -11,   -11,   -7,    -4,    -6,    -8,    -13,
+    -10,   -7,    -12,   -11,   -12,   -13,   -12,   -9,    -9,    -10,   -10,
+    -6,    -8,    -8,    -7,    -9,    -9,    -7,    2,     5,     5,     6,
+    3,     4,     6,     3,     -1,    -2,    -2,    -2,    1,     5,     3,
+    4,     2,     -2,    -7,    -9,    -13,   -11,   -8,    2,     12,    23,
+    31,    37,    41,    40,    37,    36,    31,    31,    27,    28,    24,
+    13,    16,    14,    15,    9,     4,     4,     5,     4,     7,     12,
+    16,    14,    11,    13,    6,     -2,    -4,    -1,    -3,    3,     6,
+    6,     9,     7,     9,     7,     5,     0,     1,     -1,    -2,    -4,
+    -1,    0,     0,     -4,    0,     -4,    -9,    -15,   -16,   -18,   -15,
+    -10,   -6,    -8,    -5,    -2,    -2,    0,     4,     7,     0,     -2,
+    -3,    4,     3,     2,     -1,    -3,    -8,    -19,   -19,   -19,   -16,
+    -8,    -5,    0,     1,     2,     1,     -1,    -2,    -10,   -12,   -10,
+    -4,    3,     4,     2,     7,     8,     4,     1,     -5,    -5,    -4,
+    -1,    9,     10,    12,    15,    15,    14,    11,    20,    16,    19,
+    18,    26,    29,    21,    23,    16,    16,    3,     -3,    -4,    -10,
+    -12,   -10,   -6,    -7,    -12,   -17,   -14,   -16,   -19,   -13,   -10,
+    -13,   -13,   -2,    2,     3,     7,     13,    22,    21,    21,    21,
+    24,    27,    23,    22,    20,    17,    17,    16,    13,    11,    5,
+    1,     1,     5,     5,     3,     2,     -1,    2,     -5,    -6,    -3,
+    -11,   -9,    -6,    -5,    -10,   -4,    -1,    1,     2,     -1,    -4,
+    -4,    -9,    -9,    -7,    -3,    3,     -2,    1,     1,     4,     -4,
+    -8,    -8,    -17,   -17,   -13,   -13,   -18,   -18,   -25,   -27,   -21,
+    -22,   -18,   -7,    -1,    5,     9,     11,    11,    11,    15,    11,
+    4,     1,     6,     8,     17,    12,    10,    5,     -2,    -3,    -14,
+    -17,   -25,   -26,   -22,   -20,   -13,   -12,   -12,   -13,   -10,   -4,
+    -6,    -6,    -4,    -6,    -4,    0,     -3,    -7,    -7,    -10,   -17,
+    -14,   -9,    -3,    4,     4,     6,     1,     0,     0,     -6,    -3,
+    -4,    -3,    -6,    -9,    -9,    -5,    0,     1,     2,     -2,    3,
+    -1,    -4,    -5,    -11,   -14,   -17,   -14,   -12,   -14,   -19,   -21,
+    -25,   -35,   -40,   -39,   -31,   -24,   -13,   -4,    -1,    0,     0,
+    2,     -2,    -5,    -8,    -8,    -9,    -6,    -2,    0,     -5,    -6,
+    2,     5,     4,     1,     6,     8,     9,     14,    13,    19,    15,
+    19,    13,    14,    20,    16,    16,    14,    14,    17,    13,    12,
+    11,    6,     -1,    -7,    -9,    -10,   -11,   -2,    8,     12,    12,
+    12,    8,     4,     1,     -3,    -4,    -4,    -3,    1,     9,     14,
+    16,    10,    12,    9,     6,     4,     -1,    8,     6,     3,     6,
+    1,     -11,   -10,   -10,   -13,   -9,    -6,    -2,    -2,    9,     13,
+    17,    17,    19,    17,    16,    9,     -2,    -5,    -5,    -3,    -9,
+    -8,    -8,    -12,   -17,   -16,   -18,   -15,   -9,    -7,    1,     10,
+    17,    18,    23,    25,    23,    20,    15,    17,    18,    23,    33,
+    40,    43,    45,    51,    53,    47,    36,    27,    10,    5,     1,
+    4,     5,     4,     0,     0,     6,     7,     8,     9,     3,     2,
+    1,     0,     -1,    3,     5,     5,     13,    7,     4,     4,     3,
+    11,    17,    21,    31,    31,    31,    31,    28,    26,    23,    19,
+    16,    17,    16,    10,    10,    12,    9,     7,     -1,    -7,    -12,
+    -15,   -15,   -15,   -13,   -13,   -16,   -19,   -19,   -23,   -31,   -34,
+    -38,   -39,   -31,   -30,   -21,   -21,   -18,   -11,   -16,   -20,   -25,
+    -22,   -18,   -14,   -7,    -8,    -3,    2,     10,    13,    12,    10,
+    6,     2,     0,     0,     0,     -6,    -4,    -1,    0,     0,     -1,
+    -2,    1,     3,     8,     9,     3,     6,     2,     -4,    -2,    -3,
+    -7,    -4,    -3,    2,     6,     8,     10,    12,    15,    11,    15,
+    12,    13,    14,    15,    18,    14,    8,     4,     4,     3,     -4,
+    -5,    -4,    -2,    -3,    -2,    4,     9,     13,    18,    21,    20,
+    18,    15,    11,    6,     7,     10,    8,     6,     3,     -3,    -7,
+    -14,   -21,   -29,   -33,   -32,   -26,   -17,   -12,   -11,   -9,    -3,
+    -10,   -13,   -18,   -23,   -21,   -26,   -26,   -24,   -28,   -25,   -29,
+    -30,   -30,   -27,   -17,   -7,    2,     10,    13,    16,    16,    17,
+    18,    17,    19,    19,    20,    15,    14,    16,    14,    10,    5,
+    0,     -4,    -18,   -21,   -25,   -20,   -16,   -13,   -8,    -5,    2,
+    6,     11,    12,    18,    16,    18,    15,    13,    17,    18,    22,
+    21,    25,    26,    25,    26,    28,    31,    27,    20,    10,    3,
+    -6,    -10,   -16,   -19,   -18,   -15,   -13,   -10,   -2,    0,     2,
+    4,     3,     5,     -1,    0,     1,     2,     0,     -2,    -1,    -6,
+    -5,    -7,    -12,   -10,   -9,    -4,    -1,    3,     4,     2,     4,
+    4,     3,     -3,    -6,    -11,   -14,   -15,   -23,   -25,   -29,   -30,
+    -28,   -25,   -22,   -19,   -21,   -19,   -11,   -7,    -7,    -3,    -3,
+    -6,    -8,    -13,   -10,   -10,   -5,    1,     4,     9,     7,     6,
+    6,     4,     -5,    -11,   -8,    -6,    -3,    0,     3,     7,     11,
+    7,     3,     5,     6,     10,    12,    14,    16,    8,     5,     -1,
+    -1,    4,     0,     0,     -3,    -5,    -5,    -4,    -2,    -2,    1,
+    4,     7,     5,     10,    9,     6,     9,     12,    19,    28,    32,
+    32,    33,    31,    29,    20,    17,    16,    14,    15,    6,     -2,
+    -5,    -7,    -10,   -10,   -11,   -9,    -6,    -3,    8,     10,    10,
+    10,    12,    12,    7,     7,     5,     3,     2,     2,     -2,    -5,
+    -4,    -7,    -2,    -6,    -5,    -6,    -11,   -14,   -13,   -10,   -11,
+    -15,   -16,   -11,   -11,   -11,   -10,   -16,   -15,   -15,   -16,   -10,
+    -11,   -11,   -5,    -1,    2,     1,     2,     0,     1,     4,     8,
+    5,     -4,    -2,    -4,    -12,   -18,   -24,   -20,   -25,   -14,   -3,
+    4,     11,    13,    13,    7,     4,     -4,    -9,    -13,   -17,   -10,
+    -6,    -1,    0,     2,     2,     -1,    1,     -8,    -18,   -22,   -19,
+    -19,   -22,   -20,   -22,   -20,   -17,   -12,   -9,    -4,    3,     9,
+    9,     9,     7,     6,     13,    10,    11,    8,     4,     -1,    5,
+    7,     7,     8,     4,     2,     2,     -2,    -8,    -11,   -16,   -18,
+    -12,   -12,   -9,    -2,    3,     3,     5,     5,     6,     9,     11,
+    20,    22,    26,    30,    28,    22,    15,    15,    10,    11,    9,
+    6,     9,     9,     11,    10,    12,    10,    8,     8,     7,     9,
+    4,     3,     9,     5,     1,     2,     0,     -3,    -3,    0,     3,
+    0,     -2,    1,     4,     6,     4,     0,     1,     -4,    -13,   -13,
+    -11,   -20,   -21,   -15,   -17,   -23,   -22,   -24,   -29,   -24,   -29,
+    -32,   -21,   -13,   -11,   -9,    -9,    -8,    -13,   -11,   -11,   -11,
+    -11,   -17,   -17,   -21,   -23,   -27,   -32,   -33,   -32,   -31,   -35,
+    -31,   -26,   -24,   -18,   -10,   -1,    5,     13,    17,    15,    13,
+    8,     4,     6,     9,     10,    13,    11,    12,    13,    9,     5,
+    6,     8,     12,    21,    25,    24,    23,    16,    8,     7,     0,
+    -3,    -8,    -9,    -2,    1,     11,    18,    25,    30,    31,    27,
+    21,    19,    19,    18,    18,    22,    24,    16,    14,    8,     2,
+    -4,    -9,    -7,    -10,   -6,    -8,    -8,    -13,   -14,   -11,   -13,
+    -8,    -7,    6,     9,     10,    15,    17,    11,    11,    9,     2,
+    2,     -2,    2,     -6,    -6,    -7,    -14,   -11,   -12,   -13,   -17,
+    -22,   -25,   -30,   -24,   -16,   -4,    5,     2,     7,     5,     2,
+    -1,    1,     -4,    -4,    4,     8,     8,     5,     6,     6,     2,
+    1,     -2,    -9,    -14,   -17,   -16,   -15,   -14,   -12,   -11,   -6,
+    -6,    -2,    -3,    -3,    6,     13,    18,    27,    27,    26,    24,
+    22,    19,    18,    19,    12,    8,     7,     -2,    0,     -6,    -8,
+    -6,    -4,    -6,    -14,   -16,   -16,   -15,   -12,   -2,    6,     12,
+    16,    18,    14,    16,    13,    12,    17,    16,    17,    17,    12,
+    13,    10,    14,    14,    10,    2,     -1,    -3,    -5,    -10,   -15,
+    -13,   -20,   -21,   -21,   -21,   -19,   -20,   -18,   -8,    -4,    -1,
+    -1,    4,     2,     -3,    0,     -5,    -5,    -3,    -1,    0,     6,
+    5,     6,     7,     7,     3,     2,     1,     -5,    -3,    0,     3,
+    5,     7,     4,     10,    15,    15,    11,    6,     8,     9,     14,
+    19,    18,    14,    12,    16,    15,    11,    9,     9,     5,     4,
+    0,     -7,    -12,   -18,   -22,   -29,   -32,   -36,   -37,   -38,   -39,
+    -32,   -24,   -20,   -14,   -10,   -2,    0,     1,     9,     13,    21,
+    26,    31,    35,    40,    38,    32,    33,    25,    14,    11,    7,
+    1,     -1,    -6,    -5,    -11,   -20,   -22,   -19,   -16,   -9,    2,
+    9,     14,    14,    13,    13,    12,    10,    3,     2,     1,     0,
+    6,     5,     -1,    -4,    -13,   -17,   -21,   -25,   -29,   -30,   -23,
+    -14,   -4,    4,     11,    11,    12,    13,    13,    5,     6,     6,
+    7,     5,     5,     9,     -2,    3,     0,     -2,    -3,    -5,    -1,
+    3,     9,     16,    18,    17,    17,    11,    5,     1,     -4,    -13,
+    -12,   -7,    -7,    1,     6,     4,     2,     3,     1,     1,     0,
+    -1,    -5,    -5,    -3,    -5,    -1,    8,     9,     7,     12,    7,
+    6,     4,     3,     -1,    -1,    -4,    -14,   -16,   -18,   -24,   -34,
+    -44,   -37,   -37,   -36,   -28,   -19,   -15,   -6,    -2,    -3,    2,
+    5,     6,     3,     6,     6,     9,     7,     3,     -4,    -15,   -25,
+    -34,   -37,   -41,   -41,   -38,   -33,   -27,   -22,   -14,   -15,   -18,
+    -18,   -15,   -8,    -7,    -2,    2,     0,     4,     12,    13,    10,
+    17,    20,    16,    17,    23,    24,    22,    24,    22,    28,    26,
+    24,    22,    26,    28,    27,    23,    17,    10,    4,     4,     1,
+    -1,    0,     4,     9,     15,    14,    15,    14,    14,    13,    8,
+    0,     -1,    -11,   -13,   -4,    -3,    -5,    -3,    -1,    -6,    -5,
+    -7,    -4,    -2,    2,     7,     15,    20,    14,    13,    8,     2,
+    -6,    -15,   -23,   -25,   -20,   -22,   -20,   -14,   -10,   -4,    -2,
+    1,     -10,   -15,   -12,   -8,    -8,    -7,    -5,    -10,   -12,   -20,
+    -28,   -26,   -24,   -16,   -8,    -5,    3,     8,     9,     12,    12,
+    12,    14,    13,    12,    10,    13,    23,    29,    28,    33,    36,
+    32,    28,    23,    25,    26,    30,    34,    27,    22,    16,    12,
+    3,     -6,    -13,   -13,   -15,   -14,   -9,    -11,   -13,   -13,   -16,
+    -15,   -20,   -22,   -20,   -32,   -30,   -29,   -24,   -18,   -18,   -18,
+    -13,   -15,   -15,   -16,   -17,   -10,   -11,   -12,   -15,   -17,   -17,
+    -19,   -21,   -22,   -26,   -28,   -21,   -18,   -14,   -5,    2,     6,
+    7,     5,     3,     -2,    0,     -4,    -2,    -3,    -6,    -9,    -12,
+    -11,   -11,   -19,   -23,   -20,   -21,   -16,   -19,   -23,   -22,   -24,
+    -21,   -22,   -17,   -15,   -8,    -1,    4,     14,    18,    23,    24,
+    25,    25,    18,    15,    7,     2,     14,    19,    22,    20,    23,
+    22,    20,    19,    20,    17,    16,    21,    22,    21,    18,    9,
+    3,     -6,    -14,   -19,   -30,   -36,   -40,   -32,   -22,   -21,   -16,
+    -7,    -1,    3,     2,     3,     6,     9,     16,    20,    22,    26,
+    27,    29,    32,    30,    23,    19,    20,    21,    18,    22,    24,
+    15,    14,    9,     9,     7,     6,     9,     9,     16,    22,    20,
+    18,    18,    9,     -1,    -10,   -16,   -19,   -22,   -22,   -20,   -16,
+    -11,   -5,    0,     1,     4,     2,     0,     3,     5,     10,    8,
+    12,    10,    11,    9,     8,     7,     -3,    -4,    -10,   -11,   -5,
+    2,     8,     12,    12,    13,    14,    15,    14,    12,    10,    14,
+    13,    8,     0,     -2,    -3,    -9,    -6,    -13,   -21,   -12,   -12,
+    -8,    -9,    -14,   -16,   -19,   -23,   -22,   -23,   -30,   -26,   -17,
+    -14,   -9,    -2,    3,     11,    16,    17,    17,    11,    12,    13,
+    12,    9,     8,     7,     10,    17,    14,    13,    9,     7,     6,
+    5,     10,    10,    6,     10,    9,     1,     -5,    -10,   -12,   -17,
+    -16,   -14,   -13,   -10,   -6,    -2,    0,     -1,    2,     2,     -1,
+    2,     6,     12,    18,    23,    22,    23,    24,    20,    16,    10,
+    6,     9,     16,    15,    15,    16,    14,    8,     4,     0,     -3,
+    -7,    -4,    -5,    -5,    0,     -4,    1,     1,     1,     -4,    -10,
+    -17,   -25,   -25,   -28,   -28,   -27,   -25,   -20,   -20,   -20,   -22,
+    -14,   -11,   -4,    4,     6,     11,    10,    12,    9,     6,     2,
+    -6,    -10,   -12,   -7,    -1,    -6,    0,     1,     2,     5,     1,
+    -1,    1,     -3,    -6,    -4,    -5,    -4,    -6,    -5,    -7,    -10,
+    -10,   -8,    -11,   -9,    -2,    9,     15,    14,    20,    19,    19,
+    16,    16,    11,    3,     2,     2,     5,     4,     5,     3,     -1,
+    -1,    -6,    -11,   -16,   -18,   -18,   -12,   -17,   -18,   -13,   -15,
+    -5,    -4,    -3,    -1,    2,     6,     7,     11,    14,    17,    17,
+    18,    21,    18,    19,    18,    23,    27,    36,    32,    35,    30,
+    24,    25,    18,    10,    3,     -1,    -4,    -11,   -16,   -21,   -33,
+    -37,   -35,   -36,   -35,   -30,   -26,   -26,   -21,   -10,   -7,    -3,
+    -4,    -3,    -3,    -9,    -12,   -16,   -25,   -22,   -11,   -6,    2,
+    5,     7,     4,     -2,    -8,    -16,   -23,   -30,   -28,   -23,   -20,
+    -11,   -11,   -8,    5,     2,     -3,    -1,    -11,   -15,   -10,   -13,
+    -8,    -8,    -12,   -9,    -10,   -15,   -8,    -4,    -3,    7,     6,
+    13,    20,    25,    24,    25,    27,    28,    25,    23,    22,    27,
+    28,    27,    30,    28,    26,    20,    16,    13,    7,     2,     1,
+    6,     3,     -4,    -6,    -13,   -18,   -19,   -21,   -15,   -3,    -1,
+    10,    16,    17,    20,    24,    28,    28,    26,    26,    28,    27,
+    24,    23,    20,    20,    24,    20,    17,    14,    6,     0,     2,
+    1,     0,     -3,    -7,    -12,   -18,   -29,   -28,   -30,   -32,   -23,
+    -27,   -25,   -20,   -17,   -13,   -11,   -14,   -17,   -21,   -22,   -18,
+    -11,   -12,   -6,    -8,    -9,    -5,    -6,    -10,   -18,   -19,   -16,
+    -13,   -9,    -6,    -7,    -13,   -10,   -14,   -22,   -30,   -37,   -35,
+    -37,   -35,   -34,   -36,   -30,   -23,   -17,   -16,   -16,   -11,   -6,
+    -2,    3,     7,     7,     6,     7,     7,     13,    21,    20,    22,
+    23,    22,    24,    17,    5,     -1,    -2,    -8,    -13,   -14,   -17,
+    -24,   -28,   -23,   -22,   -19,   -12,   -14,   -10,   -14,   -21,   -20,
+    -21,   -22,   -13,   -6,    -1,    6,     4,     10,    11,    8,     10,
+    10,    17,    20,    27,    34,    32,    26,    26,    24,    17,    13,
+    6,     9,     12,    15,    17,    12,    11,    9,     3,     -3,    -3,
+    -8,    -9,    -4,    -2,    -2,    2,     1,     -1,    -3,    -7,    -8,
+    -11,   -15,   -8,    -5,    1,     9,     7,     10,    13,    17,    14,
+    12,    8,     6,     3,     6,     9,     8,     5,     0,     -2,    1,
+    1,     -3,    -6,    -12,   -17,   -17,   -23,   -28,   -33,   -31,   -29,
+    -30,   -35,   -28,   -25,   -17,   -5,    0,     6,     10,    14,    27,
+    31,    26,    31,    30,    32,    41,    42,    42,    43,    34,    32,
+    21,    12,    2,     1,     -3,    -1,    8,     13,    20,    19,    18,
+    19,    13,    8,     5,     7,     6,     7,     6,     4,     3,     -2,
+    0,     2,     -4,    -1,    -3,    2,     12,    22,    33,    32,    31,
+    35,    35,    34,    32,    26,    27,    26,    21,    17,    10,    1,
+    -3,    -14,   -21,   -19,   -21,   -19,   -24,   -24,   -19,   -16,   -13,
+    -16,   -13,   -15,   -17,   -12,   -9,    -4,    7,     19,    27,    33,
+    37,    34,    35,    30,    24,    23,    25,    21,    20,    18,    15,
+    12,    13,    8,     2,     -4,    -12,   -18,   -17,   -14,   -10,   -14,
+    -8,    -14,   -14,   -12,   -14,   -19,   -23,   -31,   -32,   -28,   -30,
+    -22,   -20,   -13,   1,     0,     6,     14,    15,    20,    22,    20,
+    16,    9,     2,     1,     3,     6,     7,     9,     10,    14,    17,
+    16,    14,    4,     -7,    -16,   -31,   -40,   -41,   -40,   -38,   -34,
+    -40,   -37,   -33,   -28,   -22,   -17,   -11,   -10,   -12,   -5,    -5,
+    -8,    -4,    0,     -1,    1,     1,     6,     11,    14,    22,    25,
+    28,    31,    32,    32,    31,    31,    20,    13,    12,    5,     4,
+    4,     2,     0,     -3,    -6,    -8,    -4,    -4,    -4,    -1,    7,
+    9,     10,    13,    13,    16,    10,    7,     3,     6,     8,     8,
+    15,    20,    23,    18,    15,    12,    4,     1,     0,     -4,    -4,
+    -1,    8,     11,    13,    21,    24,    19,    12,    2,     -5,    -11,
+    -15,   -17,   -17,   -19,   -23,   -28,   -34,   -33,   -37,   -29,   -27,
+    -24,   -17,   -13,   -8,    -6,    -2,    5,     3,     4,     -2,    -5,
+    -4,    0,     2,     3,     1,     -5,    -5,    -6,    -11,   -11,   -15,
+    -15,   -19,   -17,   -17,   -21,   -23,   -21,   -22,   -24,   -28,   -27,
+    -25,   -15,   -8,    -1,    2,     2,     3,     3,     2,     -2,    0,
+    1,     -1,    2,     5,     7,     2,     0,     2,     -6,    -9,    -8,
+    -6,    -3,    -3,    3,     0,     5,     0,     0,     -5,    -12,   -13,
+    -20,   -14,   -14,   -6,    -5,    -2,    0,     6,     11,    9,     9,
+    11,    10,    13,    19,    26,    29,    36,    37,    40,    35,    27,
+    20,    13,    6,     3,     -1,    -1,    -1,    -3,    -6,    -8,    -14,
+    -16,   -25,   -28,   -23,   -21,   -24,   -22,   -22,   -22,   -24,   -28,
+    -35,   -43,   -42,   -37,   -29,   -20,   -5,    2,     10,    23,    28,
+    30,    31,    30,    39,    43,    40,    41,    43,    43,    38,    29,
+    18,    14,    12,    3,     6,     3,     3,     0,     -1,    -3,    -5,
+    -5,    -8,    -8,    -10,   -6,    -1,    1,     5,     1,     2,     6,
+    0,     -3,    -7,    -13,   -10,   -7,    -8,    -7,    -3,    -5,    -4,
+    -4,    -4,    -5,    -2,    2,     3,     6,     4,     3,     -1,    -2,
+    -5,    -16,   -22,   -31,   -39,   -38,   -42,   -47,   -42,   -42,   -35,
+    -27,   -30,   -28,   -25,   -26,   -24,   -20,   -19,   -19,   -19,   -19,
+    -14,   -16,   -13,   -9,    -10,   -1,    8,     17,    21,    28,    26,
+    28,    24,    14,    8,     2,     0,     -4,    -4,    -13,   -16,   -16,
+    -13,   -12,   -7,    -5,    0,     -4,    -1,    2,     4,     8,     8,
+    10,    10,    10,    14,    16,    17,    23,    20,    27,    27,    27,
+    21,    14,    11,    0,     -4,    -8,    -8,    -1,    -1,    1,     6,
+    8,     23,    22,    23,    23,    25,    26,    26,    22,    21,    20,
+    22,    17,    12,    8,     3,     -2,    -2,    -4,    -5,    -3,    1,
+    7,     6,     8,     9,     12,    6,     1,     -4,    -8,    -6,    -3,
+    -4,    -5,    -3,    -7,    -6,    -6,    -11,   -11,   -19,   -23,   -26,
+    -28,   -34,   -41,   -41,   -44,   -45,   -47,   -40,   -39,   -33,   -29,
+    -21,   -14,   -16,   -6,    -7,    -3,    1,     6,     8,     11,    14,
+    14,    15,    15,    18,    18,    16,    17,    12,    15,    20,    21,
+    19,    21,    23,    22,    21,    16,    12,    8,     7,     7,     10,
+    13,    13,    16,    16,    16,    16,    15,    15,    12,    14,    14,
+    15,    12,    11,    17,    19,    19,    14,    13,    15,    17,    18,
+    20,    24,    27,    24,    19,    11,    10,    1,     0,     0,     -1,
+    3,     8,     16,    18,    17,    22,    22,    21,    19,    7,     0,
+    1,     -1,    -2,    -1,    -6,    -8,    -12,   -14,   -20,   -21,   -24,
+    -19,   -9,    -4,    -3,    2,     2,     3,     0,     -10,   -19,   -23,
+    -29,   -31,   -35,   -29,   -33,   -28,   -25,   -25,   -19,   -22,   -23,
+    -24,   -21,   -17,   -15,   -17,   -13,   -15,   -12,   -15,   -14,   -14,
+    -12,   -9,    -5,    1,     9,     13,    13,    17,    17,    15,    11,
+    12,    8,     13,    20,    24,    30,    29,    33,    30,    26,    23,
+    13,    9,     4,     3,     3,     5,     3,     2,     5,     3,     2,
+    1,     3,     6,     10,    14,    19,    23,    21,    20,    21,    17,
+    11,    5,     -3,    -7,    -12,   -15,   -16,   -13,   -15,   -13,   -7,
+    -4,    -5,    -5,    -1,    5,     11,    8,     7,     -2,    -2,    -5,
+    -6,    -1,    -2,    0,     2,     8,     13,    15,    17,    15,    16,
+    10,    13,    3,     -1,    -4,    -4,    -4,    0,     8,     13,    15,
+    9,     11,    9,     12,    9,     10,    10,    5,     11,    16,    21,
+    20,    15,    13,    5,     3,     -3,    1,     1,     0,     -4,    -7,
+    -9,    -7,    -9,    -10,   -7,    -6,    -3,    -2,    -3,    -3,    -6,
+    -12,   -16,   -22,   -21,   -26,   -28,   -25,   -24,   -23,   -23,   -28,
+    -32,   -29,   -26,   -26,   -23,   -29,   -23,   -16,   -11,   -7,    -9,
+    -10,   -12,   -18,   -20,   -20,   -26,   -23,   -16,   -17,   -10,   -7,
+    0,     3,     -2,    0,     -4,    -7,    -8,    -6,    -3,    -7,    -5,
+    -5,    1,     0,     -3,    -2,    -3,    5,     7,     10,    19,    17,
+    22,    21,    20,    16,    8,     9,     10,    12,    20,    28,    31,
+    28,    28,    26,    21,    14,    8,     5,     4,     5,     8,     9,
+    9,     13,    17,    16,    14,    20,    17,    13,    16,    17,    18,
+    18,    15,    11,    5,     -2,    -8,    -15,   -17,   -17,   -24,   -24,
+    -23,   -18,   -13,   -13,   -9,    -7,    -4,    0,     3,     6,     2,
+    2,     -4,    -5,    -5,    -4,    -4,    -2,    2,     6,     10,    7,
+    4,     2,     -2,    -3,    -8,    -10,   -14,   -27,   -29,   -37,   -36,
+    -29,   -27,   -19,   -7,    -3,    0,     -2,    2,     8,     13,    18,
+    15,    10,    10,    6,     1,     -5,    -12,   -17,   -20,   -23,   -23,
+    -22,   -19,   -17,   -10,   -6,    -3,    2,     0,     4,     11,    14,
+    19,    16,    6,     7,     3,     3,     4,     1,     7,     8,     7,
+    3,     -2,    0,     0,     0,     -1,    -2,    0,     4,     3,     5,
+    9,     9,     12,    7,     5,     0,     0,     1,     0,     2,     -6,
+    -10,   -9,    -13,   -15,   -19,   -15,   -18,   -16,   -17,   -9,    -5,
+    -2,    2,     2,     3,     7,     2,     -3,    -8,    -13,   -8,    1,
+    8,     12,    15,    17,    17,    11,    7,     0,     -4,    -8,    -8,
+    -3,    -1,    -4,    -6,    -6,    -13,   -12,   -12,   -13,   -12,   -8,
+    -9,    -5,    -4,    -2,    0,     -1,    -6,    -7,    -6,    -10,   -10,
+    -8,    -6,    1,     5,     6,     15,    18,    16,    12,    12,    12,
+    10,    13,    7,     0,     -9,    -10,   -11,   -6,    -8,    -8,    -4,
+    0,     6,     10,    11,    15,    15,    15,    12,    10,    6,     6,
+    11,    12,    20,    25,    23,    25,    18,    12,    6,     -1,    -4,
+    -10,   -12,   -9,    -13,   -16,   -15,   -18,   -18,   -22,   -22,   -17,
+    -14,   -12,   -8,    -3,    1,     4,     11,    13,    7,     0,     -8,
+    -11,   -11,   -13,   -14,   -12,   -11,   -9,    -6,    -5,    -2,    1,
+    5,     6,     10,    18,    17,    15,    13,    11,    12,    13,    10,
+    9,     13,    16,    16,    13,    11,    6,     5,     0,     -5,    -4,
+    -3,    2,     6,     5,     6,     11,    14,    20,    23,    28,    27,
+    22,    24,    23,    22,    16,    17,    12,    7,     -1,    -9,    -10,
+    -9,    -9,    -13,   -11,   -9,    -2,    -2,    -7,    -8,    -6,    -7,
+    -12,   -12,   -10,   0,     5,     11,    13,    11,    10,    7,     3,
+    0,     0,     3,     10,    14,    16,    18,    19,    21,    14,    15,
+    12,    7,     6,     7,     9,     7,     11,    6,     4,     4,     -1,
+    -9,    -12,   -12,   -14,   -9,    -9,    -6,    -5,    -4,    -6,    -7,
+    -12,   -15,   -17,   -27,   -23,   -20,   -19,   -19,   -18,   -24,   -20,
+    -25,   -28,   -33,   -31,   -29,   -27,   -15,   -12,   -7,    -3,    1,
+    -3,    -3,    -5,    -8,    -6,    0,     13,    17,    24,    25,    23,
+    24,    18,    8,     -3,    -4,    -4,    -7,    -3,    1,     4,     7,
+    9,     10,    14,    14,    20,    28,    35,    38,    42,    43,    43,
+    39,    30,    27,    19,    15,    8,     10,    12,    19,    25,    26,
+    27,    23,    22,    15,    10,    6,     8,     4,     6,     6,     3,
+    7,     7,     15,    11,    7,     6,     5,     9,     6,     0,     -3,
+    -14,   -21,   -21,   -30,   -39,   -42,   -40,   -37,   -37,   -36,   -32,
+    -30,   -24,   -21,   -22,   -23,   -24,   -28,   -31,   -31,   -29,   -27,
+    -30,   -31,   -31,   -31,   -34,   -33,   -34,   -26,   -21,   -15,   -10,
+    -5,    -3,    -2,    -3,    -6,    -5,    -11,   -14,   -10,   -5,    0,
+    9,     10,    18,    21,    19,    21,    11,    7,     4,     6,     6,
+    7,     3,     -6,    -9,    -16,   -23,   -24,   -23,   -26,   -18,   -16,
+    -11,   -8,    0,     6,     5,     6,     10,    8,     8,     16,    24,
+    24,    23,    24,    24,    24,    18,    9,     4,     -3,    -11,   -16,
+    -15,   -18,   -14,   -12,   -9,    -3,    -4,    -1,    8,     11,    10,
+    19,    21,    21,    23,    20,    22,    15,    9,     7,     5,     3,
+    1,     12,    13,    10,    18,    23,    31,    37,    40,    36,    38,
+    40,    40,    38,    27,    24,    21,    14,    12,    12,    7,     7,
+    15,    18,    19,    18,    17,    18,    14,    12,    11,    7,     5,
+    7,     9,     9,     15,    14,    15,    18,    16,    7,     0,     -5,
+    -6,    -6,    -6,    -1,    7,     9,     12,    6,     4,     4,     2,
+    -1,    2,     3,     3,     5,     4,     -1,    -13,   -19,   -29,   -34,
+    -39,   -43,   -49,   -54,   -53,   -55,   -55,   -56,   -59,   -58,   -49,
+    -41,   -32,   -19,   -10,   -2,    -4,    -1,    -6,    -19,   -27,   -26,
+    -27,   -27,   -21,   -22,   -20,   -26,   -26,   -20,   -20,   -20,   -21,
+    -17,   -18,   -7,    -6,    -6,    -5,    -1,    7,     18,    10,    16,
+    25,    24,    31,    30,    32,    30,    26,    24,    22,    23,    21,
+    23,    21,    24,    19,    17,    13,    12,    15,    6,     2,     -5,
+    -9,    -13,   -10,   -5,    1,     10,    13,    17,    13,    8,     5,
+    5,     6,     5,     13,    19,    16,    14,    12,    7,     15,    18,
+    19,    16,    4,     -1,    0,     -1,    -2,    -9,    -15,   -19,   -21,
+    -13,   -13,   -10,   -7,    -7,    -7,    -6,    -11,   -22,   -18,   -19,
+    -22,   -22,   -19,   -18,   -10,   -7,    -9,    -7,    -12,   -16,   -20,
+    -27,   -35,   -37,   -37,   -33,   -24,   -14,   -4,    8,     14,    19,
+    19,    16,    12,    6,     2,     -5,    -6,    -11,   -17,   -16,   -14,
+    -13,   -12,   -17,   -21,   -22,   -24,   -18,   -14,   -12,   -1,    4,
+    9,     17,    14,    9,     13,    14,    13,    14,    14,    12,    11,
+    15,    11,    16,    21,    20,    20,    22,    31,    30,    26,    15,
+    13,    6,     8,     5,     1,     -5,    -3,    2,     9,     14,    13,
+    16,    17,    18,    13,    10,    8,     7,     9,     12,    21,    23,
+    23,    21,    19,    16,    14,    5,     -4,    -12,   -15,   -16,   -12,
+    -9,    -12,   -14,   -17,   -16,   -15,   -14,   -15,   -28,   -27,   -24,
+    -12,   -8,    -3,    3,     9,     15,    18,    25,    25,    31,    32,
+    35,    36,    33,    36,    24,    13,    2,     -11,   -19,   -18,   -18,
+    -10,   -6,    -4,    0,     -3,    -3,    -15,   -18,   -17,   -9,    -7,
+    2,     5,     7,     6,     2,     -2,    -12,   -16,   -16,   -9,    -3,
+    6,     8,     15,    17,    16,    18,    11,    5,     -4,    -8,    -17,
+    -16,   -22,   -24,   -25,   -28,   -23,   -19,   -11,   -3,    5,     11,
+    22,    26,    29,    24,    14,    12,    7,     6,     -2,    -1,    2,
+    10,    23,    33,    36,    32,    31,    16,    3,     -4,    -3,    -3,
+    1,     8,     11,    13,    12,    8,     3,     5,     3,     1,     -1,
+    4,     2,     3,     8,     5,     5,     1,     -2,    -1,    -3,    -1,
+    5,     8,     10,    17,    17,    15,    19,    27,    18,    21,    23,
+    19,    20,    15,    1,     -7,    -18,   -24,   -24,   -33,   -28,   -32,
+    -30,   -30,   -30,   -30,   -29,   -30,   -41,   -43,   -50,   -51,   -49,
+    -42,   -32,   -19,   -10,   0,     4,     -2,    5,     9,     8,     12,
+    19,    17,    10,    9,     3,     1,     -4,    -8,    -4,    0,     5,
+    7,     10,    9,     12,    0,     -6,    -7,    -13,   -16,   -10,   -10,
+    -9,    -1,    -1,    -2,    -6,    -11,   -14,   -17,   -18,   -10,   -3,
+    -3,    0,     6,     1,     6,     4,     3,     3,     9,     16,    22,
+    28,    27,    32,    18,    21,    25,    20,    21,    18,    18,    22,
+    23,    15,    8,     -3,    -9,    -10,   -13,   -8,    3,     7,     18,
+    26,    23,    26,    30,    17,    11,    9,     -1,    0,     2,     2,
+    12,    15,    6,     1,     0,     -5,    2,     1,     -3,    -1,    -6,
+    -2,    -4,    -11,   -18,   -30,   -38,   -36,   -33,   -32,   -27,   -19,
+    -18,   -14,   -13,   -16,   -11,   -12,   -12,   -4,    0,     7,     13,
+    13,    10,    11,    6,     3,     3,     3,     4,     10,    4,     -1,
+    -3,    -11,   -21,   -27,   -34,   -33,   -31,   -33,   -28,   -22,   -21,
+    -14,   -8,    -13,   -10,   -8,    -12,   -7,    -11,   -3,    3,     5,
+    7,     7,     -1,    -12,   -13,   -17,   -21,   -8,    -2,    4,     7,
+    13,    18,    18,    16,    15,    13,    11,    15,    13,    12,    17,
+    18,    15,    15,    11,    -3,    -1,    2,     11,    15,    10,    18,
+    13,    10,    12,    9,     2,     2,     4,     -1,    6,     9,     11,
+    5,     7,     13,    8,     9,     10,    11,    9,     7,     11,    5,
+    3,     1,     -9,    -19,   -31,   -40,   -42,   -33,   -27,   -24,   -22,
+    -20,   -25,   -20,   -12,   -17,   -23,   -23,   -25,   -25,   -20,   -18,
+    -17,   -19,   -15,   -22,   -20,   -19,   -13,   -8,    -12,   0,     2,
+    -6,    -1,    -5,    -15,   -10,   -12,   -19,   -8,    -6,    -3,    9,
+    5,     12,    22,    10,    9,     12,    5,     8,     28,    13,    20,
+    25,    11,    16,    19,    10,    15,    14,    6,     23,    19,    18,
+    32,    17,    12,    19,    -1,    -8,    11,    -4,    -8,    9,     -4,
+    -6,    0,     -10,   -7,    -3,    -8,    -11,   -11,   -23,   -7,    -4,
+    -4,    14,    6,     4,     9,     3,     -4,    4,     2,     9,     26,
+    19,    26,    33,    22,    22,    24,    13,    20,    18,    18,    28,
+    28,    19,    24,    16,    -1,    1,     -12,   -34,   -28,   -25,   -27,
+    -13,   6,     8,     21,    25,    22,    19,    3,     4,     0,     -5,
+    6,     8,     1,     6,     8,     -4,    -3,    -10,   -23,   -17,   -9,
+    -10,   3,     6,     -1,    3,     -10,   -22,   -28,   -49,   -49,   -36,
+    -29,   -10,   8,     -1,    4,     14,    -3,    -14,   -5,    -16,   -10,
+    8,     7,     21,    24,    17,    25,    15,    -4,    13,    -7,    -23,
+    0,     -7,    -14,   12,    1,     -18,   -10,   -27,   -43,   -31,   -34,
+    -19,   -3,    -10,   15,    20,    -7,    10,    9,     -20,   7,     28,
+    14,    42,    54,    32,    34,    24,    5,     10,    -11,   -13,   11,
+    -6,    -4,    31,    7,     0,     34,    3,     -9,    5,     -24,   -33,
+    -14,   -11,   -1,    8,     0,     10,    7,     -7,    11,    10,    -6,
+    17,    16,    0,     10,    3,     -26,   -23,   -33,   -39,   -26,   -29,
+    -18,   -6,    -9,    -1,    5,     -11,   -6,    7,     -6,    1,     13,
+    8,     1,     3,     -13,   -23,   -25,   -33,   -28,   -21,   -9,    2,
+    4,     1,     8,     4,     -13,   -5,    -12,   -14,   3,     14,    18,
+    26,    30,    21,    20,    15,    15,    10,    5,     13,    11,    20,
+    25,    29,    18,    19,    9,     -10,   -15,   -13,   -12,   1,     16,
+    20,    30,    39,    37,    21,    15,    3,     -7,    -9,    -1,    2,
+    -6,    -7,    -10,   -20,   -19,   -19,   -31,   -25,   -12,   -15,   -13,
+    -17,   -18,   -14,   -24,   -24,   -18,   -28,   -24,   -3,    1,     17,
+    46,    48,    43,    46,    34,    12,    6,     -14,   -19,   -10,   -14,
+    3,     15,    3,     7,     7,     -13,   4,     9,     -2,    3,     22,
+    19,    25,    41,    48,    46,    36,    42,    40,    24,    33,    50,
+    29,    30,    57,    35,    13,    29,    17,    -9,    5,     15,    7,
+    13,    38,    47,    40,    56,    72,    42,    29,    40,    18,    14,
+    36,    52,    50,    58,    55,    42,    22,    20,    13,    -8,    8,
+    32,    26,    41,    70,    48,    51,    65,    36,    27,    23,    4,
+    5,     1,     -3,    2,     -8,    -23,   -6,    -30,   -46,   -24,   -40,
+    -45,   -22,   -32,   -35,   -24,   -50,   -41,   -35,   -56,   -38,   -29,
+    -55,   -25,   -7,    -40,   -26,   -25,   -63,   -51,   -40,   -61,   -47,
+    -38,   -38,   -5,    2,     3,     26,    -1,    -7,    8,     -20,   -17,
+    10,    -14,   -6,    41,    24,    27,    52,    26,    13,    25,    5,
+    -6,    2,     -7,    -2,    10,    4,     29,    36,    30,    74,    93,
+    91,    131,   150,   132,   167,   177,   158,   189,   188,   178,   200,
+    199,   187,   212,   202,   188,   210,   188,   173,   187,   175,   183,
+    215,   218,   236,   264,   253,   279,   296,   275,   290,   288,   261,
+    261,   261,   230,   216,   199,   157,   160,   147,   115,   108,   84,
+    50,    32,    7,     -30,   -56,   -96,   -130,  -146,  -179,  -199,  -223,
+    -255,  -280,  -293,  -326,  -341,  -352,  -391,  -410,  -429,  -464,  -489,
+    -507,  -538,  -559,  -577,  -602,  -634,  -656,  -679,  -696,  -702,  -700,
+    -699,  -700,  -687,  -666,  -665,  -656,  -634,  -626,  -609,  -572,  -539,
+    -518,  -484,  -462,  -444,  -418,  -390,  -364,  -336,  -295,  -245,  -210,
+    -175,  -127,  -97,   -63,   -28,   10,    45,    83,    121,   167,   222,
+    272,   324,   369,   396,   439,   485,   502,   536,   571,   585,   618,
+    656,   676,   705,   729,   744,   767,   776,   786,   798,   796,   813,
+    849,   855,   865,   883,   862,   843,   834,   794,   781,   778,   767,
+    746,   744,   721,   702,   681,   638,   607,   562,   521,   490,   447,
+    398,   361,   313,   255,   204,   123,   20,    -59,   -143,  -217,  -270,
+    -328,  -400,  -462,  -529,  -607,  -666,  -737,  -797,  -854,  -906,  -936,
+    -944,  -955,  -965,  -976,  -993,  -1003, -1007, -1032, -1040, -1045, -1055,
+    -1039, -1016, -1003, -990,  -995,  -1026, -1046, -1070, -1079, -1058, -1060,
+    -1062, -1028, -1010, -1006, -991,  -1000, -1004, -987,  -981,  -958,  -921,
+    -890,  -852,  -798,  -754,  -713,  -681,  -682,  -658,  -617,  -585,  -524,
+    -452,  -404,  -332,  -258,  -224,  -183,  -144,  -132,  -94,   -64,   -31,
+    37,    99,    147,   219,   280,   329,   389,   439,   483,   563,   632,
+    702,   799,   884,   965,   1050,  1107,  1150,  1209,  1260,  1308,  1383,
+    1446,  1514,  1582,  1632,  1679,  1727,  1770,  1804,  1837,  1872,  1916,
+    1961,  1999,  2038,  2071,  2089,  2097,  2107,  2091,  2084,  2072,  2051,
+    2021,  1998,  1940,  1868,  1814,  1734,  1641,  1559,  1480,  1395,  1305,
+    1213,  1115,  1015,  901,   785,   667,   520,   381,   256,   110,   -26,
+    -141,  -284,  -417,  -528,  -670,  -805,  -935,  -1080, -1206, -1324, -1438,
+    -1527, -1622, -1725, -1798, -1879, -1956, -2006, -2063, -2128, -2166, -2201,
+    -2238, -2257, -2292, -2316, -2337, -2357, -2356, -2362, -2382, -2375, -2368,
+    -2367, -2358, -2337, -2329, -2318, -2296, -2273, -2240, -2195, -2140, -2095,
+    -2044, -1990, -1932, -1872, -1803, -1737, -1673, -1602, -1520, -1428, -1325,
+    -1219, -1112, -1006, -896,  -780,  -681,  -591,  -481,  -388,  -294,  -189,
+    -85,   30,    148,   252,   348,   466,   579,   692,   811,   918,   1041,
+    1162,  1271,  1389,  1507,  1611,  1735,  1864,  1965,  2085,  2203,  2312,
+    2436,  2536,  2614,  2697,  2760,  2812,  2886,  2956,  3010,  3066,  3088,
+    3098,  3120,  3110,  3101,  3106,  3108,  3130,  3149,  3139,  3122,  3085,
+    3016,  2951,  2874,  2770,  2671,  2559,  2435,  2315,  2198,  2059,  1915,
+    1761,  1570,  1387,  1185,  984,   787,   601,   413,   224,   40,    -158,
+    -348,  -560,  -760,  -960,  -1147, -1312, -1471, -1621, -1779, -1925, -2069,
+    -2206, -2333, -2463, -2570, -2664, -2743, -2811, -2860, -2886, -2934, -2976,
+    -3015, -3057, -3074, -3076, -3079, -3060, -3032, -2998, -2950, -2920, -2893,
+    -2863, -2837, -2806, -2761, -2715, -2662, -2607, -2554, -2486, -2402, -2325,
+    -2264, -2190, -2127, -2063, -1989, -1932, -1862, -1788, -1724, -1640, -1545,
+    -1455, -1346, -1234, -1112, -984,  -859,  -735,  -610,  -494,  -384,  -280,
+    -176,  -68,   40,    140,   244,   363,   478,   596,   739,   876,   1001,
+    1128,  1240,  1352,  1474,  1595,  1717,  1853,  1972,  2093,  2215,  2328,
+    2432,  2533,  2641,  2744,  2855,  2949,  3055,  3157,  3242,  3329,  3415,
+    3479,  3528,  3569,  3588,  3617,  3649,  3676,  3708,  3747,  3751,  3753,
+    3744,  3693,  3640,  3576,  3470,  3369,  3248,  3098,  2976,  2838,  2690,
+    2557,  2395,  2222,  2055,  1872,  1675,  1488,  1279,  1057,  851,   623,
+    393,   180,   -74,   -315,  -537,  -771,  -979,  -1161, -1373, -1558, -1729,
+    -1932, -2110, -2294, -2478, -2636, -2785, -2917, -3007, -3094, -3183, -3247,
+    -3319, -3402, -3450, -3510, -3564, -3595, -3622, -3635, -3627, -3635, -3639,
+    -3620, -3620, -3610, -3596, -3581, -3535, -3495, -3455, -3410, -3361, -3323,
+    -3265, -3202, -3141, -3078, -3001, -2919, -2830, -2739, -2640, -2540, -2430,
+    -2320, -2192, -2057, -1909, -1761, -1603, -1422, -1244, -1059, -887,  -726,
+    -570,  -425,  -256,  -92,   69,    238,   411,   557,   728,   910,   1066,
+    1229,  1403,  1561,  1727,  1895,  2050,  2208,  2352,  2492,  2638,  2765,
+    2893,  3025,  3145,  3263,  3387,  3496,  3595,  3707,  3804,  3884,  3975,
+    4046,  4105,  4167,  4204,  4220,  4237,  4243,  4247,  4260,  4255,  4251,
+    4246,  4201,  4143,  4092,  3996,  3885,  3772,  3604,  3435,  3283,  3086,
+    2923,  2742,  2535,  2341,  2130,  1887,  1649,  1411,  1137,  915,   659,
+    398,   163,   -81,   -351,  -580,  -814,  -1069, -1262, -1476, -1689, -1850,
+    -2043, -2237, -2395, -2591, -2763, -2918, -3095, -3224, -3319, -3435, -3508,
+    -3582, -3698, -3772, -3858, -3950, -4008, -4047, -4088, -4093, -4085, -4098,
+    -4064, -4052, -4057, -4033, -4028, -4018, -3991, -3971, -3933, -3865, -3802,
+    -3727, -3633, -3562, -3477, -3392, -3300, -3210, -3115, -3018, -2924, -2819,
+    -2721, -2606, -2490, -2381, -2246, -2111, -1963, -1810, -1638, -1460, -1293,
+    -1132, -980,  -828,  -666,  -496,  -322,  -125,  72,    264,   470,   676,
+    879,   1087,  1280,  1457,  1633,  1799,  1970,  2152,  2327,  2501,  2678,
+    2840,  3007,  3165,  3301,  3434,  3558,  3667,  3791,  3912,  4023,  4140,
+    4257,  4359,  4475,  4554,  4614,  4656,  4682,  4697,  4726,  4749,  4775,
+    4810,  4812,  4812,  4810,  4768,  4697,  4620,  4502,  4368,  4210,  4031,
+    3860,  3663,  3472,  3291,  3076,  2849,  2642,  2392,  2140,  1890,  1610,
+    1325,  1064,  782,   494,   231,   -50,   -329,  -593,  -861,  -1112, -1345,
+    -1588, -1812, -2022, -2257, -2467, -2682, -2924, -3126, -3317, -3495, -3630,
+    -3737, -3855, -3941, -4031, -4128, -4200, -4281, -4348, -4388, -4427, -4449,
+    -4444, -4450, -4458, -4452, -4464, -4460, -4451, -4444, -4425, -4384, -4344,
+    -4289, -4234, -4160, -4076, -4000, -3917, -3837, -3753, -3669, -3558, -3460,
+    -3354, -3230, -3111, -2966, -2824, -2665, -2495, -2333, -2151, -1951, -1752,
+    -1554, -1367, -1222, -1053, -882,  -716,  -520,  -331,  -141,  62,    270,
+    476,   707,   923,   1133,  1349,  1534,  1735,  1943,  2124,  2317,  2511,
+    2668,  2839,  3002,  3140,  3317,  3481,  3615,  3771,  3920,  4050,  4196,
+    4319,  4430,  4556,  4657,  4765,  4868,  4945,  4999,  5057,  5075,  5100,
+    5123,  5133,  5134,  5127,  5104,  5084,  5058,  4968,  4896,  4750,  4575,
+    4381,  4179,  3971,  3776,  3590,  3394,  3209,  2991,  2800,  2535,  2269,
+    1972,  1654,  1319,  998,   697,   384,   105,   -187,  -476,  -759,  -1047,
+    -1316, -1579, -1841, -2085, -2317, -2550, -2745, -2938, -3145, -3326, -3523,
+    -3706, -3859, -3998, -4124, -4218, -4288, -4346, -4386, -4437, -4495, -4550,
+    -4619, -4680, -4732, -4779, -4813, -4820, -4842, -4825, -4791, -4773, -4742,
+    -4715, -4709, -4683, -4652, -4605, -4527, -4428, -4315, -4194, -4086, -3978,
+    -3872, -3779, -3685, -3569, -3458, -3313, -3121, -2921, -2693, -2454, -2230,
+    -1998, -1783, -1588, -1414, -1240, -1069, -886,  -690,  -473,  -256,  -36,
+    170,   384,   594,   797,   1015,  1235,  1449,  1664,  1882,  2098,  2311,
+    2504,  2681,  2843,  3019,  3171,  3337,  3534,  3709,  3885,  4072,  4235,
+    4380,  4524,  4641,  4746,  4864,  4979,  5087,  5213,  5308,  5393,  5450,
+    5468,  5475,  5472,  5452,  5462,  5467,  5453,  5451,  5425,  5342,  5255,
+    5113,  4914,  4725,  4512,  4273,  4053,  3866,  3632,  3436,  3205,  2955,
+    2705,  2420,  2095,  1794,  1503,  1195,  941,   639,   342,   56,    -269,
+    -601,  -894,  -1208, -1499, -1736, -1994, -2239, -2426, -2652, -2891, -3099,
+    -3361, -3588, -3793, -4013, -4183, -4302, -4439, -4523, -4613, -4734, -4809,
+    -4891, -4999, -5056, -5090, -5131, -5092, -5061, -5044, -4987, -4954, -4955,
+    -4924, -4911, -4873, -4809, -4755, -4673, -4555, -4440, -4316, -4187, -4088,
+    -3986, -3881, -3802, -3717, -3605, -3495, -3359, -3207, -3063, -2889, -2698,
+    -2504, -2306, -2088, -1861, -1627, -1415, -1201, -1000, -799,  -593,  -410,
+    -220,  -7,    203,   412,   634,   865,   1126,  1367,  1602,  1838,  2052,
+    2257,  2474,  2659,  2863,  3076,  3255,  3429,  3617,  3773,  3939,  4102,
+    4222,  4358,  4501,  4611,  4733,  4846,  4939,  5056,  5147,  5217,  5301,
+    5357,  5388,  5428,  5417,  5400,  5430,  5422,  5406,  5442,  5446,  5431,
+    5437,  5381,  5304,  5212,  5057,  4874,  4683,  4465,  4249,  4026,  3767,
+    3545,  3304,  3021,  2741,  2450,  2113,  1807,  1490,  1151,  841,   544,
+    212,   -102,  -439,  -788,  -1091, -1413, -1730, -2033, -2336, -2627, -2854,
+    -3118, -3350, -3560, -3781, -4008, -4194, -4376, -4524, -4640, -4757, -4865,
+    -4945, -5016, -5083, -5131, -5170, -5184, -5198, -5208, -5211, -5210, -5209,
+    -5192, -5174, -5154, -5108, -5052, -5002, -4932, -4854, -4780, -4704, -4604,
+    -4514, -4421, -4309, -4208, -4111, -4004, -3880, -3751, -3622, -3496, -3367,
+    -3210, -3047, -2867, -2654, -2430, -2177, -1897, -1651, -1417, -1182, -983,
+    -793,  -593,  -406,  -211,  17,    232,   461,   716,   958,   1197,  1441,
+    1674,  1899,  2130,  2355,  2573,  2788,  3004,  3220,  3419,  3612,  3809,
+    3973,  4120,  4277,  4433,  4573,  4742,  4902,  5037,  5165,  5282,  5377,
+    5460,  5539,  5596,  5654,  5716,  5741,  5759,  5770,  5776,  5762,  5751,
+    5737,  5706,  5675,  5644,  5550,  5446,  5324,  5169,  4974,  4767,  4530,
+    4289,  4067,  3823,  3621,  3391,  3145,  2878,  2575,  2228,  1890,  1525,
+    1149,  807,   473,   145,   -152,  -454,  -769,  -1057, -1374, -1703, -2033,
+    -2372, -2701, -2977, -3258, -3495, -3694, -3897, -4089, -4270, -4483, -4668,
+    -4840, -5015, -5140, -5225, -5304, -5334, -5350, -5390, -5398, -5403, -5428,
+    -5438, -5449, -5472, -5463, -5441, -5401, -5333, -5252, -5151, -5051, -4974,
+    -4880, -4805, -4729, -4626, -4526, -4403, -4248, -4088, -3939, -3778, -3617,
+    -3464, -3308, -3173, -3027, -2852, -2669, -2461, -2233, -1979, -1713, -1455,
+    -1216, -996,  -796,  -610,  -397,  -198,  21,    272,   517,   775,   1037,
+    1295,  1544,  1790,  2007,  2211,  2423,  2634,  2848,  3081,  3319,  3551,
+    3792,  4000,  4171,  4303,  4418,  4518,  4596,  4679,  4807,  4913,  5044,
+    5172,  5288,  5405,  5518,  5609,  5664,  5713,  5735,  5735,  5737,  5701,
+    5691,  5656,  5633,  5611,  5552,  5475,  5394,  5293,  5177,  5064,  4924,
+    4737,  4599,  4420,  4237,  4048,  3828,  3623,  3413,  3183,  2915,  2622,
+    2308,  1980,  1657,  1261,  901,   549,   205,   -85,   -383,  -688,  -969,
+    -1246, -1530, -1850, -2206, -2561, -2915, -3224, -3482, -3713, -3921, -4107,
+    -4287, -4470, -4660, -4850, -5057, -5239, -5395, -5540, -5619, -5697, -5724,
+    -5697, -5675, -5633, -5590, -5579, -5530, -5486, -5442, -5426, -5391, -5348,
+    -5276, -5197, -5124, -5039, -4925, -4808, -4677, -4581, -4479, -4343, -4218,
+    -4087, -3970, -3858, -3729, -3570, -3384, -3206, -3020, -2839, -2636, -2453,
+    -2287, -2185, -2154, -1926, -1562, -1223, -758,  -473,  -64,   395,   599,
+    880,   814,   938,   1172,  1498,  1928,  2127,  2422,  2608,  2841,  2937,
+    2886,  2815,  2985,  3324,  3757,  4152,  4481,  4652,  4917,  4965,  4766,
+    4583,  4328,  4503,  4815,  5118,  5408,  5682,  5956,  6082,  6055,  5744,
+    5426,  5341,  5427,  5606,  5882,  6065,  6226,  6428,  6477,  6385,  6009,
+    5728,  5552,  5439,  5339,  5200,  5008,  4947,  4835,  4614,  4330,  3887,
+    3521,  3111,  2460,  1983,  1297,  650,   279,   -353,  -720,  -1044, -1518,
+    -1668, -2117, -2496, -2743, -3266, -3607, -3790, -4149, -4075, -4042, -4096,
+    -3981, -4138, -4226, -4214, -4503, -4455, -4577, -4642, -4346, -4351, -4270,
+    -4263, -4522, -4521, -4673, -4814, -4731, -4950, -5011, -5004, -5288, -5341,
+    -5566, -5833, -5783, -5929, -5847, -5765, -5828, -5644, -5613, -5615, -5428,
+    -5291, -5014, -4554, -4277, -3964, -3854, -3829, -3612, -3603, -3438, -3137,
+    -2831, -2164, -1438, -939,  -330,  -156,  46,    242,   73,    242,   220,
+    239,   542,   565,   739,   872,   801,   857,   676,   543,   586,   567,
+    828,   1142,  1490,  1985,  2508,  2982,  3438,  3699,  3939,  4069,  4178,
+    4420,  4622,  4917,  5338,  5801,  6285,  6658,  6963,  7213,  7233,  7328,
+    7176,  7038,  7031,  6860,  6957,  6767,  6599,  6523,  6212,  6147,  6063,
+    5860,  6020,  6015,  6033,  6184,  5722,  5607,  5016,  4337,  4063,  3229,
+    3080,  3006,  2804,  3035,  2541,  2136,  1879,  1012,  401,   -575,  -1584,
+    -1930, -2278, -2485, -2477, -2712, -2747, -2766, -3320, -3592, -4188, -4669,
+    -4672, -4939, -4789, -4426, -4203, -3674, -3563, -3656, -3759, -4067, -4257,
+    -4522, -4970, -5204, -5237, -5139, -4907, -4911, -4917, -4921, -5007, -5230,
+    -5654, -6122, -6464, -6733, -6948, -7067, -6972, -6800, -6520, -6132, -5830,
+    -5382, -5091, -4797, -4546, -4472, -4362, -4350, -4235, -3851, -3454, -3144,
+    -2735, -2341, -1845, -1262, -958,  -549,  -166,  66,    382,   366,   352,
+    341,   85,    -13,   -176,  -303,  -235,  -341,  -309,  -227,  -249,  -50,
+    143,   384,   874,   1149,  1552,  2155,  2767,  3499,  3994,  4460,  4920,
+    5288,  5569,  5704,  5881,  6094,  6461,  6653,  6803,  7115,  7311,  7521,
+    7612,  7443,  7380,  7124,  6742,  6495,  5964,  5656,  5415,  5167,  5656,
+    5813,  6027,  6401,  6351,  6787,  7019,  6581,  6512,  5965,  5308,  5140,
+    4336,  4147,  3899,  3398,  3360,  2830,  2624,  1968,  1026,  395,   -699,
+    -1424, -2327, -3006, -3192, -3435, -3337, -3686, -3513, -3350, -3502, -3261,
+    -3878, -4005, -4063, -4187, -3767, -3598, -3384, -3300, -3094, -2857, -3023,
+    -3274, -3851, -4352, -4523, -4943, -5477, -5612, -5682, -5733, -5714, -5965,
+    -6110, -5950, -6158, -6548, -6897, -7165, -7281, -7352, -7258, -7185, -6659,
+    -5946, -5470, -4738, -4046, -3707, -3210, -3108, -3270, -3227, -3222, -3218,
+    -3017, -2943, -2668, -2296, -1593, -1061, -811,  -403,  -513,  -361,  -128,
+    -595,  -633,  -991,  -1205, -1159, -1284, -1330, -1164, -999,  -729,  -538,
+    -336,  27,    350,   794,   1245,  1646,  2446,  3210,  4017,  4835,  5271,
+    5739,  6028,  6140,  6212,  6161,  6066,  5984,  6081,  5995,  6152,  6301,
+    6278,  6424,  6377,  6396,  6362,  6152,  5788,  5309,  5071,  4860,  4704,
+    4804,  4919,  5258,  5869,  6121,  6365,  6694,  6692,  6694,  6532,  6187,
+    5808,  5704,  5302,  4816,  4611,  4043,  3775,  3249,  2600,  1933,  982,
+    336,   -848,  -1538, -2242, -3103, -3374, -3756, -3975, -4017, -4061, -3972,
+    -3749, -3609, -3853, -3850, -3714, -3760, -3736, -3914, -3923, -3830, -3541,
+    -3649, -3757, -3661, -3913, -4038, -4231, -4594, -4769, -5009, -5273, -5588,
+    -5676, -5937, -5997, -6060, -6164, -6414, -6623, -6765, -6857, -6771, -6921,
+    -6914, -6535, -6187, -5626, -5206, -4742, -4189, -3618, -3120, -2823, -2606,
+    -2550, -2703, -2736, -2626, -2498, -2406, -2133, -1852, -1348, -753,  -318,
+    162,   330,   524,   375,   9,     -204,  -866,  -1249, -1532, -1669, -1455,
+    -1235, -723,  -283,  262,   535,   862,   1340,  1712,  2316,  2625,  3171,
+    4015,  4698,  5516,  6006,  6452,  6838,  6921,  7003,  6735,  6339,  6138,
+    5768,  5575,  5593,  5568,  5728,  6041,  6233,  6260,  6175,  6048,  5728,
+    5366,  4931,  4340,  4194,  4174,  4330,  4743,  5028,  5754,  6250,  6598,
+    7120,  7114,  6962,  6675,  6157,  5373,  4797,  4081,  3237,  3153,  2588,
+    2143,  1639,  1021,  681,   -149,  -816,  -1987, -3003, -3493, -4138, -4420,
+    -4607, -4841, -4725, -4254, -4033, -3845, -3842, -4063, -4035, -4099, -4582,
+    -4718, -4779, -4689, -4437, -4327, -4352, -4119, -3881, -4061, -4345, -4768,
+    -5248, -5610, -5920, -6383, -6779, -6731, -6673, -6677, -6597, -6659, -6619,
+    -6417, -6516, -6862, -7017, -7069, -6944, -6715, -6376, -6000, -5162, -4333,
+    -3577, -2884, -2355, -1807, -1366, -1380, -1590, -1869, -1962, -1945, -2006,
+    -2141, -1960, -1516, -1025, -471,  -135,  85,    348,   239,   -8,    -475,
+    -951,  -1245, -1520, -1569, -1448, -1188, -517,  134,   827,   1585,  2114,
+    2792,  3214,  3651,  4230,  4546,  4894,  5321,  5588,  6105,  6583,  6877,
+    7014,  7087,  7068,  6876,  6695,  6280,  5684,  5385,  5205,  5064,  5033,
+    5028,  5080,  5322,  5510,  5461,  5390,  5541,  5494,  5443,  5306,  5065,
+    5193,  5338,  5513,  5818,  5911,  6345,  6506,  6514,  6543,  5981,  5703,
+    5082,  4228,  3517,  2424,  1880,  1245,  562,   -130,  -864,  -1156, -1561,
+    -1970, -2597, -3357, -3707, -4189, -4521, -4975, -5477, -5478, -5585, -5445,
+    -5353, -5327, -4971, -4580, -4431, -4469, -4432, -4422, -4275, -4227, -4507,
+    -4745, -4758, -4752, -4845, -4933, -5118, -5117, -5124, -5324, -5673, -5971,
+    -6152, -6366, -6702, -6970, -7159, -7136, -6929, -6917, -6703, -6520, -6302,
+    -5794, -5484, -5123, -4694, -4254, -3722, -3334, -2917, -2410, -1721, -1010,
+    -584,  -312,  27,    321,   327,   214,   -17,   -363,  -402,  -550,  -638,
+    -469,  -315,  -86,   142,   242,   387,   448,   458,   423,   321,   194,
+    285,   417,   717,   1176,  1673,  2402,  3144,  3985,  4764,  5406,  6056,
+    6507,  6783,  6891,  6868,  6850,  6717,  6532,  6359,  6248,  6303,  6279,
+    6140,  6071,  5927,  5687,  5480,  5146,  4835,  4572,  4447,  4481,  4578,
+    4840,  4936,  5246,  5659,  5732,  5856,  5658,  5403,  5282,  5004,  4949,
+    4843,  4681,  4884,  4886,  4967,  5108,  4781,  4647,  4240,  3443,  2768,
+    1830,  983,   309,   -769,  -1382, -1987, -2553, -2750, -3346, -3555, -4052,
+    -4400, -4599, -5196, -5437, -5945, -6340, -6343, -6554, -6611, -6381, -6184,
+    -5681, -5398, -5098, -4751, -4529, -4138, -4100, -4088, -4044, -4186, -4189,
+    -4263, -4453, -4465, -4598, -4651, -4726, -4919, -4926, -5142, -5286, -5490,
+    -5831, -6002, -6341, -6492, -6562, -6710, -6553, -6506, -6219, -5766, -5521,
+    -5008, -4556, -4002, -3293, -2769, -2069, -1467, -824,  -34,   509,   1034,
+    1385,  1560,  1650,  1664,  1419,  1016,  834,   511,   353,   381,   299,
+    523,   833,   956,   1280,  1492,  1425,  1547,  1350,  1143,  1114,  931,
+    1054,  1217,  1583,  2217,  2917,  4017,  4965,  5827,  6816,  7393,  7875,
+    8197,  8175,  7924,  7578,  7040,  6566,  6242,  5746,  5530,  5334,  5222,
+    5237,  5074,  5146,  5011,  4902,  4753,  4442,  4482,  4254,  4247,  4319,
+    4187,  4516,  4690,  4935,  5193,  5229,  5350,  5332,  5486,  5386,  5143,
+    4999,  4494,  4304,  3961,  3421,  2781,  2032,  1404,  614,   -88,   -956,
+    -1714, -2155, -2684, -3038, -3237, -3368, -3423, -3569, -3809, -4213, -4533,
+    -4973, -5514, -6011, -6663, -7084, -7258, -7158, -6947, -6639, -6111, -5548,
+    -4887, -4362, -4043, -3895, -3940, -4107, -4452, -4836, -5143, -5500, -5532,
+    -5510, -5485, -5096, -4739, -4375, -4065, -4063, -4094, -4252, -4576, -4904,
+    -5431, -5837, -6190, -6402, -6310, -6292, -5992, -5516, -5025, -4342, -3899,
+    -3386, -2697, -2077, -1493, -994,  -392,  232,   931,   1608,  1988,  2360,
+    2589,  2639,  2623,  2471,  2121,  1708,  1478,  1181,  1167,  1296,  1279,
+    1648,  1859,  2107,  2368,  2359,  2390,  2122,  1904,  1629,  1418,  1502,
+    1524,  1859,  2357,  3041,  3909,  4810,  5751,  6449,  7128,  7534,  7767,
+    7908,  7699,  7460,  7032,  6647,  6301,  5876,  5556,  5190,  4948,  4762,
+    4576,  4464,  4370,  4338,  4275,  4287,  4265,  4320,  4221,  4066,  3947,
+    3514,  3379,  3003,  2635,  2534,  2078,  2040,  1950,  1958,  2152,  2085,
+    2390,  2321,  2319,  2359,  1851,  1643,  877,   168,   -527,  -1245, -1704,
+    -2519, -2739, -3251, -3382, -3236, -3527, -3294, -3523, -3732, -3916, -4434,
+    -4888, -5615, -6161, -6729, -7283, -7543, -7920, -7865, -7660, -7430, -7034,
+    -6758, -6224, -5866, -5441, -5076, -4998, -4760, -4673, -4539, -4410, -4308,
+    -4131, -3992, -3791, -3611, -3448, -3213, -3070, -3046, -3048, -3168, -3244,
+    -3354, -3607, -3834, -4170, -4439, -4648, -4864, -4892, -4928, -4821, -4524,
+    -4211, -3576, -2819, -1968, -929,  -19,   1029,  2064,  2949,  3716,  4159,
+    4450,  4536,  4503,  4301,  3968,  3655,  3242,  2979,  2856,  2744,  2750,
+    2771,  2749,  2859,  2850,  2793,  2702,  2402,  2179,  1877,  1672,  1581,
+    1543,  1769,  1967,  2485,  3089,  3783,  4662,  5406,  6246,  6950,  7542,
+    8016,  8200,  8245,  8027,  7584,  6958,  6241,  5494,  4710,  3974,  3255,
+    2653,  2274,  2038,  1986,  1964,  2141,  2321,  2513,  2772,  2756,  2743,
+    2636,  2406,  2125,  1836,  1456,  1247,  1145,  995,   1077,  1140,  1290,
+    1561,  1685,  1762,  1609,  1391,  1147,  544,   84,    -754,  -1546, -2107,
+    -2806, -3137, -3522, -3732, -3826, -3834, -3609, -3493, -3340, -3254, -3499,
+    -3621, -3981, -4455, -4859, -5513, -6080, -6626, -7061, -7372, -7556, -7573,
+    -7515, -7366, -7091, -6799, -6366, -5887, -5484, -5098, -4746, -4334, -3941,
+    -3558, -3269, -3053, -2844, -2663, -2497, -2314, -2227, -2185, -2141, -2139,
+    -2070, -2037, -2031, -2062, -2205, -2348, -2544, -2774, -2979, -3298, -3520,
+    -3647, -3622, -3395, -3054, -2513, -1829, -948,  64,    1090,  2169,  3127,
+    3987,  4712,  5229,  5560,  5754,  5741,  5619,  5401,  5005,  4666,  4287,
+    3967,  3734,  3476,  3322,  3203,  3147,  3144,  3116,  3080,  3011,  2871,
+    2735,  2544,  2363,  2245,  2075,  2032,  2118,  2263,  2688,  3066,  3605,
+    4244,  4746,  5384,  5819,  6151,  6319,  6194,  5938,  5495,  4929,  4305,
+    3581,  2924,  2279,  1713,  1372,  1086,  1006,  983,   1006,  1146,  1249,
+    1349,  1360,  1231,  1084,  794,   502,   264,   -85,   -238,  -411,  -504,
+    -394,  -322,  -51,   188,   420,   589,   624,   666,   573,   338,   -86,
+    -564,  -1056, -1560, -1925, -2434, -2806, -3017, -3341, -3320, -3375, -3480,
+    -3410, -3567, -3553, -3595, -3805, -3919, -4284, -4482, -4754, -5190, -5354,
+    -5806, -6050, -6136, -6387, -6343, -6330, -6206, -5851, -5468, -4960, -4549,
+    -4080, -3542, -3150, -2698, -2440, -2318, -2132, -2067, -2081, -2017, -2099,
+    -2151, -2060, -2067, -1916, -1823, -1718, -1523, -1386, -1221, -1189, -1141,
+    -1014, -1008, -966,  -996,  -1015, -916,  -809,  -648,  -467,  -128,  237,
+    735,   1358,  1969,  2697,  3399,  4060,  4732,  5295,  5720,  6077,  6169,
+    6139,  5928,  5614,  5292,  4766,  4247,  3705,  3262,  3030,  2827,  2702,
+    2684,  2728,  2887,  3092,  3216,  3310,  3313,  3214,  3098,  2873,  2620,
+    2343,  2031,  1799,  1589,  1491,  1537,  1645,  1913,  2210,  2548,  2922,
+    3295,  3650,  3951,  4100,  4099,  3972,  3740,  3421,  2948,  2427,  1762,
+    1136,  574,   44,    -330,  -642,  -846,  -852,  -751,  -520,  -229,  44,
+    272,   446,   502,   443,   329,   66,    -191,  -492,  -841,  -1002, -1240,
+    -1237, -1199, -1177, -936,  -867,  -660,  -456,  -508,  -464,  -706,  -997,
+    -1265, -1780, -2178, -2724, -3270, -3735, -4142, -4378, -4609, -4666, -4749,
+    -4575, -4355, -4137, -3767, -3563, -3218, -2970, -2834, -2630, -2716, -2776,
+    -2920, -3210, -3363, -3764, -4023, -4125, -4268, -4194, -4223, -4005, -3639,
+    -3258, -2891, -2644, -2297, -1987, -1751, -1587, -1570, -1485, -1415, -1342,
+    -1194, -1100, -889,  -613,  -267,  161,   482,   865,   1269,  1639,  2005,
+    2202,  2381,  2549,  2628,  2700,  2625,  2559,  2481,  2357,  2319,  2192,
+    2142,  2199,  2283,  2514,  2670,  2919,  3214,  3510,  3830,  3971,  4080,
+    4073,  3911,  3700,  3359,  2954,  2549,  2094,  1766,  1556,  1442,  1462,
+    1560,  1808,  2070,  2357,  2606,  2730,  2831,  2737,  2582,  2309,  1931,
+    1585,  1178,  834,   529,   288,   214,   218,   302,   470,   679,   944,
+    1211,  1420,  1562,  1674,  1631,  1548,  1355,  1072,  776,   375,   25,
+    -320,  -614,  -818,  -992,  -991,  -906,  -755,  -525,  -291,  -17,   225,
+    447,   528,   546,   466,   270,   96,    -205,  -536,  -861,  -1148, -1383,
+    -1586, -1688, -1814, -1783, -1772, -1745, -1630, -1611, -1505, -1488, -1462,
+    -1409, -1519, -1489, -1609, -1723, -1755, -1977, -2042, -2132, -2215, -2184,
+    -2268, -2205, -2170, -2107, -1978, -1990, -1909, -1886, -1943, -1997, -2152,
+    -2326, -2500, -2762, -2987, -3227, -3392, -3522, -3630, -3579, -3469, -3262,
+    -2916, -2555, -2103, -1581, -1090, -531,  -20,   457,   873,   1228,  1561,
+    1809,  1999,  2105,  2139,  2196,  2201,  2149,  2113,  2038,  1990,  1913,
+    1787,  1705,  1595,  1490,  1372,  1201,  1113,  998,   917,   917,   894,
+    961,   1007,  1098,  1321,  1470,  1681,  1882,  2067,  2317,  2465,  2626,
+    2750,  2777,  2783,  2694,  2569,  2431,  2142,  1843,  1597,  1306,  1069,
+    824,   622,   532,   430,   388,   357,   377,   438,   414,   481,   468,
+    431,   454,   383,   374,   305,   207,   187,   133,   157,   115,   113,
+    206,   244,   382,   475,   591,   753,   821,   916,   908,   855,   754,
+    577,   399,   123,   -159,  -399,  -647,  -784,  -923,  -1010, -965,  -918,
+    -806,  -647,  -504,  -355,  -253,  -179,  -130,  -138,  -156,  -262,  -339,
+    -401,  -552,  -600,  -671,  -697,  -662,  -673,  -616,  -597,  -522,  -495,
+    -513,  -490,  -624,  -701,  -804,  -961,  -1073, -1328, -1503, -1656, -1798,
+    -1801, -1913, -1863, -1785, -1720, -1453, -1309, -1051, -846,  -715,  -487,
+    -457,  -357,  -331,  -400,  -427,  -627,  -765,  -873,  -1021, -1105, -1255,
+    -1312, -1357, -1370, -1288, -1261, -1165, -1139, -1062, -917,  -808,  -680,
+    -597,  -452,  -277,  -104,  122,   312,   558,   771,   919,   1110,  1205,
+    1312,  1355,  1302,  1280,  1151,  1049,  946,   818,   733,   569,   451,
+    429,   388,   408,   387,   376,   426,   463,   542,   576,   632,   666,
+    673,   740,   766,   791,   845,   829,   857,   841,   822,   835,   796,
+    773,   671,   600,   560,   484,   460,   371,   311,   284,   242,   277,
+    261,   261,   277,   273,   358,   380,   410,   433,   435,   471,   432,
+    414,   386,   330,   294,   194,   149,   108,   69,    84,    69,    92,
+    83,    75,    88,    53,    12,    -96,   -194,  -269,  -369,  -438,  -523,
+    -553,  -528,  -500,  -392,  -277,  -136,  53,    240,   466,   678,   870,
+    1050,  1178,  1294,  1336,  1310,  1247,  1080,  916,   677,   387,   120,
+    -182,  -471,  -740,  -972,  -1148, -1273, -1343, -1402, -1363, -1263, -1129,
+    -922,  -724,  -518,  -288,  -79,   111,   250,   364,   405,   405,   395,
+    284,   199,   83,    -43,   -126,  -244,  -313,  -400,  -451,  -497,  -610,
+    -672,  -807,  -951,  -1087, -1325, -1517, -1736, -1929, -2086, -2260, -2318,
+    -2356, -2271, -2125, -1967, -1685, -1379, -1000, -598,  -238,  149,   481,
+    790,   1042,  1185,  1287,  1274,  1195,  1068,  868,   654,   386,   138,
+    -65,   -273,  -450,  -598,  -665,  -670,  -669,  -620,  -553,  -425,  -288,
+    -179,  -72,   15,    122,   205,   263,   324,   357,   435,   518,   603,
+    709,   779,   892,   1006,  1107,  1170,  1183,  1190,  1173,  1116,  1016,
+    890,   750,   628,   488,   331,   197,   95,    43,    25,    1,     22,
+    97,    209,   363,   495,   615,   724,   833,   937,   984,   990,   933,
+    884,   851,   747,   678,   573,   497,   469,   401,   391,   352,   339,
+    352,   337,   354,   361,   370,   402,   411,   418,   440,   468,   526,
+    576,   619,   683,   766,   857,   965,   1038,  1114,  1159,  1172,  1167,
+    1106,  1006,  840,   644,   426,   177,   -110,  -390,  -665,  -929,  -1160,
+    -1375, -1497, -1550, -1592, -1553, -1507, -1394, -1201, -1084, -863,  -685,
+    -540,  -322,  -234,  -68,   29,    59,    160,   141,   170,   140,   79,
+    77,    -11,   -53,   -179,  -274,  -327,  -480,  -564,  -736,  -884,  -995,
+    -1185, -1300, -1461, -1617, -1711, -1832, -1831, -1863, -1865, -1776, -1691,
+    -1516, -1353, -1168, -954,  -729,  -490,  -305,  -93,   81,    211,   322,
+    364,   392,   384,   332,   264,   146,   29,    -101,  -230,  -357,  -486,
+    -616,  -705,  -752,  -801,  -809,  -788,  -750,  -654,  -546,  -456,  -328,
+    -200,  -78,   45,    137,   232,   316,   388,   447,   485,   528,   578,
+    630,   697,   760,   835,   910,   988,   1068,  1124,  1154,  1157,  1166,
+    1163,  1116,  1070,  1024,  994,   986,   988,   1030,  1110,  1212,  1303,
+    1411,  1498,  1551,  1599,  1587,  1565,  1481,  1336,  1212,  1028,  847,
+    669,   466,   330,   187,   61,    -9,    -54,   -55,   -20,   11,    69,
+    133,   195,   244,   253,   225,   182,   133,   62,    -11,   -96,   -168,
+    -199,  -214,  -213,  -197,  -167,  -127,  -105,  -86,   -83,   -109,  -140,
+    -217,  -323,  -448,  -588,  -717,  -854,  -971,  -1086, -1185, -1211, -1227,
+    -1180, -1135, -1099, -992,  -918,  -788,  -704,  -651,  -562,  -542,  -470,
+    -421,  -431,  -391,  -429,  -386,  -344,  -336,  -260,  -257,  -162,  -61,
+    -6,    100,   120,   178,   215,   179,   132,   15,    -106,  -238,  -416,
+    -595,  -765,  -929,  -1066, -1170, -1252, -1278, -1290, -1258, -1173, -1114,
+    -1012, -945,  -868,  -741,  -695,  -612,  -547,  -494,  -388,  -332,  -225,
+    -110,  22,    182,   318,   496,   677,   835,   992,   1104,  1162,  1166,
+    1133,  1054,  916,   709,   430,   164,   -90,   -340,  -600,  -853,  -1033,
+    -1135, -1177, -1146, -1079, -946,  -746,  -500,  -208,  83,    377,   673,
+    950,   1183,  1356,  1503,  1627,  1707,  1735,  1708,  1678,  1668,  1645,
+    1588,  1494,  1419,  1354,  1291,  1194,  1052,  900,   718,   524,   325,
+    110,   -114,  -330,  -500,  -630,  -729,  -803,  -834,  -795,  -727,  -627,
+    -492,  -325,  -125,  54,    238,   393,   528,   642,   691,   706,   661,
+    585,   504,   380,   245,   87,    -61,   -195,  -320,  -435,  -556,  -663,
+    -742,  -814,  -883,  -952,  -1009, -1038, -1047, -1067, -1063, -1050, -1020,
+    -949,  -888,  -795,  -698,  -574,  -405,  -257,  -70,   68,    203,   381,
+    479,   580,   619,   623,   645,   565,   492,   364,   206,   106,   -71,
+    -191,  -331,  -460,  -469,  -527,  -471,  -441,  -386,  -222,  -123,  60,
+    168,   245,   404,   470,   596,   605,   581,   633,   548,   562,   468,
+    355,   334,   192,   161,   62,    -36,   -39,   -146,  -121,  -167,  -243,
+    -229,  -302,  -276,  -327,  -415,  -419,  -444,  -396,  -433,  -455,  -407,
+    -357,  -244,  -221,  -158,  -63,   36,    172,   210,   296,   326,   351,
+    424,   367,   369,   300,   224,   235,   124,   54,    -39,   -122,  -118,
+    -239,  -304,  -360,  -403,  -361,  -418,  -427,  -394,  -342,  -259,  -232,
+    -176,  -110,  -48,   27,    48,    78,    90,    86,    91,    76,    57,
+    -1,    -34,   -53,   -103,  -151,  -209,  -239,  -261,  -319,  -354,  -372,
+    -382,  -385,  -411,  -432,  -428,  -431,  -446,  -471,  -496,  -512,  -532,
+    -562,  -570,  -567,  -543,  -499,  -457,  -379,  -290,  -204,  -94,   -11,
+    78,    155,   196,   234,   222,   198,   160,   113,   64,    5,     -57,
+    -108,  -136,  -175,  -186,  -196,  -184,  -125,  -90,   -25,   58,    146,
+    271,   372,   472,   562,   636,   709,   741,   760,   752,   730,   710,
+    688,   655,   608,   595,   570,   556,   540,   517,   513,   511,   497,
+    481,   449,   417,   401,   347,   325,   295,   248,   261,   238,   250,
+    294,   295,   367,   380,   416,   454,   430,   479,   443,   431,   430,
+    386,   397,   333,   292,   238,   176,   153,   54,    24,    -37,   -84,
+    -109,  -172,  -155,  -199,  -220,  -219,  -261,  -227,  -255,  -280,  -266,
+    -293,  -277,  -273,  -243,  -214,  -221,  -179,  -153,  -130,  -109,  -154,
+    -149,  -151,  -155,  -186,  -243,  -253,  -311,  -326,  -358,  -434,  -427,
+    -491,  -533,  -554,  -598,  -596,  -655,  -668,  -679,  -714,  -671,  -694,
+    -643,  -607,  -602,  -532,  -496,  -409,  -408,  -377,  -309,  -289,  -211,
+    -223,  -196,  -145,  -147,  -104,  -157,  -123,  -125,  -177,  -152,  -229,
+    -192,  -204,  -243,  -213,  -259,  -194,  -190,  -172,  -98,   -123,  -43,
+    -12,   41,    103,   87,    148,   150,   166,   154,   113,   118,   80,
+    54,    8,     4,     25,    12,    59,    70,    162,   260,   305,   387,
+    427,   501,   549,   564,   571,   517,   488,   423,   355,   294,   206,
+    165,   113,   92,    77,    62,    115,   116,   154,   162,   171,   218,
+    210,   221,   208,   192,   215,   176,   169,   114,   89,    89,    52,
+    62,    29,    35,    73,    98,    167,   195,   261,   325,   349,   401,
+    382,   393,   368,   302,   254,   174,   104,   6,     -78,   -136,  -203,
+    -229,  -291,  -303,  -284,  -294,  -241,  -235,  -222,  -186,  -187,  -156,
+    -160,  -149,  -122,  -114,  -71,   -44,   -28,   6,     20,    47,    57,
+    54,    52,    55,    53,    23,    9,     -16,   -59,   -86,   -158,  -223,
+    -292,  -372,  -421,  -498,  -532,  -561,  -570,  -531,  -512,  -456,  -367,
+    -297,  -206,  -125,  -37,   26,    88,    147,   157,   188,   169,   152,
+    152,   131,   99,    62,    44,    46,    53,    61,    61,    79,    110,
+    159,   175,   185,   237,   220,   278,   276,   239,   264,   203,   190,
+    138,   70,    34,    -9,    18,    1,     10,    71,    115,   191,   220,
+    255,   265,   296,   319,   270,   266,   214,   189,   187,   155,   145,
+    123,   149,   166,   172,   186,   179,   195,   213,   201,   182,   161,
+    150,   116,   76,    41,    -29,   -58,   -101,  -183,  -209,  -269,  -314,
+    -342,  -385,  -379,  -380,  -348,  -304,  -273,  -197,  -144,  -88,   -28,
+    -5,    11,    20,    27,    -5,    -24,   -22,   -61,   -73,   -87,   -124,
+    -118,  -133,  -150,  -160,  -198,  -196,  -219,  -228,  -239,  -281,  -276,
+    -275,  -288,  -277,  -305,  -324,  -302,  -294,  -292,  -266,  -261,  -224,
+    -203,  -210,  -190,  -198,  -176,  -180,  -201,  -196,  -198,  -175,  -166,
+    -151,  -127,  -114,  -59,   -48,   -8,    39,    75,    126,   131,   168,
+    160,   152,   142,   82,    36,    -13,   -49,   -81,   -105,  -105,  -103,
+    -65,   -38,   -16,   19,    33,    67,    82,    95,    110,   98,    111,
+    98,    87,    67,    54,    66,    52,    49,    53,    71,    106,   139,
+    186,   224,   270,   320,   361,   413,   433,   462,   473,   478,   480,
+    459,   441,   391,   339,   298,   239,   206,   159,   149,   120,   114,
+    117,   95,    106,   81,    67,    61,    30,    11,    -29,   -42,   -76,
+    -97,   -98,   -124,  -107,  -107,  -103,  -69,   -71,   -36,   -12,   23,
+    69,    86,    129,   152,   158,   162,   152,   127,   81,    48,    -9,
+    -80,   -120,  -172,  -201,  -225,  -276,  -297,  -311,  -330,  -339,  -361,
+    -375,  -389,  -376,  -365,  -374,  -378,  -375,  -370,  -358,  -347,  -355,
+    -338,  -314,  -289,  -244,  -212,  -168,  -129,  -80,   -26,   -12,   47,
+    79,    92,    105,   105,   113,   99,    85,    29,    -18,   -53,   -110,
+    -133,  -167,  -186,  -196,  -199,  -176,  -177,  -150,  -122,  -106,  -73,
+    -61,   -30,   -34,   -29,   -40,   -68,   -63,   -85,   -84,   -71,   -65,
+    -40,   -16,   23,    56,    87,    144,   167,   196,   206,   221,   243,
+    226,   233,   210,   192,   190,   150,   140,   110,   91,    77,    43,
+    27,    -10,   -5,    -5,    -22,   -9,    -7,    27,    48,    59,    64,
+    70,    87,    104,   139,   151,   188,   239,   270,   317,   311,   336,
+    349,   341,   330,   274,   254,   223,   195,   163,   102,   81,    43,
+    20,    8,     -37,   -28,   -31,   -29,   -21,   -39,   -16,   -22,   -11,
+    -21,   -41,   -32,   -47,   -39,   -60,   -75,   -71,   -94,   -98,   -131,
+    -147,  -139,  -145,  -146,  -165,  -150,  -136,  -112,  -90,   -106,  -86,
+    -91,   -87,   -98,   -136,  -121,  -135,  -124,  -132,  -144,  -114,  -108,
+    -87,   -74,   -75,   -50,   -30,   -5,    -18,   -24,   -3,    -3,    -6,
+    -41,   -76,   -98,   -127,  -159,  -215,  -257,  -263,  -268,  -266,  -262,
+    -237,  -194,  -144,  -113,  -99,   -61,   -28,   12,    21,    46,    76,
+    92,    130,   115,   123,   132,   135,   149,   134,   133,   132,   135,
+    138,   94,    76,    51,    19,    -15,   -72,   -98,   -125,  -135,  -154,
+    -174,  -171,  -164,  -139,  -130,  -99,   -74,   -40,   9,     34,    86,
+    129,   176,   214,   226,   245,   250,   280,   271,   256,   250,   226,
+    234,   212,   187,   178,   148,   144,   104,   79,    64,    37,    36,
+    9,     -10,   -23,   -38,   -35,   -62,   -67,   -67,   -82,   -70,   -80,
+    -75,   -59,   -34,   -3,    9,     48,    76,    101,   120,   120,   123,
+    126,   131,   112,   92,    77,    61,    54,    32,    3,     -18,   -28,
+    -39,   -56,   -71,   -91,   -92,   -100,  -124,  -134,  -142,  -144,  -155,
+    -177,  -178,  -175,  -171,  -168,  -160,  -141,  -123,  -89,   -73,   -64,
+    -46,   -39,   -18,   -19,   -34,   -32,   -46,   -51,   -63,   -74,   -73,
+    -81,   -70,   -83,   -71,   -49,   -39,   -12,   -1,    30,    48,    65,
+    94,    100,   125,   136,   148,   156,   138,   140,   124,   115,   86,
+    58,    57,    32,    43,    40,    44,    63,    60,    83,    90,    99,
+    115,   113,   135,   140,   148,   164,   172,   187,   182,   190,   183,
+    171,   171,   146,   139,   121,   105,   94,    61,    46,    17,    -6,
+    -34,   -70,   -89,   -121,  -138,  -158,  -178,  -190,  -206,  -206,  -210,
+    -214,  -204,  -196,  -173,  -154,  -128,  -97,   -81,   -58,   -51,   -46,
+    -38,   -47,   -49,   -57,   -58,   -57,   -59,   -49,   -58,   -58,   -54,
+    -60,   -48,   -65,   -72,   -72,   -78,   -70,   -77,   -73,   -76,   -79,
+    -76,   -90,   -90,   -91,   -88,   -76,   -67,   -43,   -16,   6,     27,
+    39,    55,    69,    71,    74,    65,    56,    60,    47,    37,    27,
+    8,     -5,    -29,   -50,   -71,   -89,   -96,   -114,  -111,  -113,  -115,
+    -105,  -112,  -90,   -78,   -68,   -49,   -46,   -26,   -14,   5,     18,
+    10,    14,    3,     5,     -9,    -20,   -15,   -30,   -26,   -33,   -31,
+    -23,   -23,   -12,   -21,   -20,   -16,   -23,   -20,   -13,   -7,    6,
+    28,    47,    69,    96,    115,   134,   147,   154,   166,   174,   186,
+    196,   202,   204,   198,   193,   181,   164,   144,   125,   113,   102,
+    96,    90,    92,    91,    96,    99,    99,    100,   99,    99,    93,
+    94,    86,    68,    55,    44,    36,    22,    13,    15,    13,    15,
+    21,    16,    11,    3,     -15,   -31,   -50,   -75,   -105,  -125,  -145,
+    -154,  -155,  -164,  -178,  -189,  -186,  -177,  -174,  -169,  -152,  -134,
+    -114,  -93,   -65,   -42,   -23,   -4,    -1,    6,     6,     2,     -4,
+    -18,   -26,   -25,   -25,   -23,   -32,   -31,   -33,   -39,   -50,   -68,
+    -69,   -74,   -79,   -78,   -83,   -85,   -85,   -77,   -71,   -61,   -42,
+    -27,   -3,    28,    59,    95,    123,   146,   155,   160,   162,   144,
+    130,   112,   94,    82,    67,    60,    46,    35,    35,    22,    4,
+    -14,   -27,   -35,   -45,   -52,   -61,   -62,   -65,   -68,   -55,   -52,
+    -43,   -38,   -34,   -20,   -8,    8,     18,    24,    34,    36,    37,
+    42,    46,    51,    50,    58,    76,    75,    70,    67,    58,    53,
+    48,    36,    23,    18,    10,    3,     9,     14,    24,    39,    43,
+    53,    62,    63,    66,    62,    66,    64,    59,    51,    25,    19,
+    6,     -10,   -19,   -26,   -35,   -43,   -44,   -37,   -47,   -43,   -50,
+    -54,   -60,   -69,   -75,   -84,   -91,   -93,   -98,   -96,   -99,   -91,
+    -87,   -91,   -88,   -84,   -80,   -75,   -61,   -48,   -44,   -40,   -37,
+    -34,   -45,   -52,   -58,   -72,   -82,   -84,   -78,   -68,   -65,   -63,
+    -51,   -42,   -27,   -22,   -13,   -3,    8,     20,    26,    31,    31,
+    37,    33,    29,    33,    31,    32,    31,    34,    44,    55,    68,
+    74,    69,    75,    73,    72,    65,    63,    67,    70,    83,    81,
+    81,    85,    84,    80,    75,    69,    53,    44,    36,    27,    20,
+    11,    1,     -4,    -19,   -26,   -27,   -25,   -21,   -14,   -12,   -12,
+    -14,   -9,    -21,   -29,   -40,   -50,   -50,   -54,   -46,   -35,   -17,
+    -4,    -1,    7,     20,    28,    26,    22,    23,    21,    23,    18,
+    13,    12,    7,     6,     3,     2,     -1,    -1,    4,     6,     17,
+    29,    35,    34,    34,    32,    28,    33,    26,    22,    16,    16,
+    22,    20,    13,    -1,    -1,    -7,    -15,   -20,   -30,   -32,   -38,
+    -39,   -45,   -45,   -53,   -63,   -70,   -83,   -96,   -107,  -113,  -122,
+    -122,  -118,  -114,  -114,  -113,  -112,  -111,  -110,  -107,  -103,  -102,
+    -94,   -80,   -71,   -58,   -52,   -47,   -40,   -43,   -47,   -48,   -50,
+    -39,   -46,   -44,   -44,   -44,   -43,   -45,   -41,   -40,   -34,   -32,
+    -23,   -12,   -6,    -1,    -1,    6,     12,    18,    20,    22,    32,
+    48,    65,    80,    93,    109,   122,   128,   131,   135,   135,   129,
+    126,   130,   127,   124,   125,   121,   122,   115,   118,   122,   128,
+    137,   143,   143,   141,   142,   134,   131,   121,   109,   105,   97,
+    93,    99,    96,    96,    94,    83,    84,    80,    77,    66,    59,
+    46,    42,    44,    32,    28,    20,    12,    8,     4,     4,     5,
+    3,     -4,    -7,    -6,    -14,   -19,   -24,   -34,   -40,   -45,   -52,
+    -61,   -62,   -60,   -57,   -57,   -61,   -63,   -61,   -65,   -73,   -81,
+    -89,   -94,   -93,   -89,   -87,   -82,   -82,   -84,   -81,   -86,   -82,
+    -84,   -86,   -90,   -86,   -83,   -82,   -81,   -80,   -80,   -76,   -75,
+    -76,   -70,   -69,   -68,   -61,   -53,   -50,   -43,   -38,   -42,   -43,
+    -41,   -41,   -39,   -34,   -27,   -21,   -16,   -20,   -22,   -27,   -36,
+    -39,   -38,   -40,   -37,   -35,   -28,   -14,   -6,    -3,    -2,    2,
+    4,     5,     15,    18,    25,    35,    36,    41,    45,    48,    52,
+    54,    52,    50,    60,    67,    76,    85,    85,    90,    86,    83,
+    84,    77,    77,    72,    77,    81,    89,    91,    93,    99,    101,
+    102,   98,    94,    87,    77,    70,    69,    63,    62,    55,    59,
+    58,    54,    51,    53,    57,    62,    65,    60,    54,    48,    45,
+    40,    29,    17,    8,     -3,    -14,   -17,   -18,   -20,   -25,   -34,
+    -40,   -44,   -53,   -56,   -63,   -71,   -71,   -69,   -66,   -62,   -66,
+    -67,   -68,   -71,   -75,   -79,   -79,   -73,   -67,   -60,   -49,   -46,
+    -45,   -45,   -46,   -55,   -64,   -67,   -72,   -74,   -70,   -68,   -67,
+    -69,   -70,   -64,   -56,   -55,   -54,   -51,   -41,   -30,   -26,   -28,
+    -29,   -30,   -28,   -25,   -27,   -20,   -12,   -5,    -2,    2,     3,
+    -3,    0,     -7,    -8,    -14,   -15,   -9,    -7,    4,     12,    24,
+    36,    41,    52,    58,    59,    51,    45,    48,    44,    46,    43,
+    40,    42,    47,    53,    52,    52,    63,    69,    74,    75,    80,
+    78,    69,    68,    59,    60,    54,    54,    54,    58,    66,    71,
+    78,    78,    75,    78,    72,    71,    61,    55,    53,    42,    36,
+    31,    28,    29,    23,    19,    25,    27,    27,    23,    29,    29,
+    20,    11,    5,     -4,    -10,   -31,   -38,   -39,   -36,   -33,   -27,
+    -17,   -15,   -14,   -17,   -13,   -14,   -25,   -33,   -44,   -51,   -61,
+    -63,   -63,   -65,   -67,   -66,   -63,   -59,   -52,   -48,   -45,   -44,
+    -50,   -62,   -74,   -84,   -89,   -100,  -101,  -102,  -96,   -95,   -85,
+    -76,   -78,   -72,   -71,   -66,   -61,   -63,   -60,   -62,   -72,   -69,
+    -69,   -58,   -56,   -50,   -37,   -28,   -17,   -17,   -16,   -17,   -18,
+    -18,   -13,   -7,    -4,    6,     17,    23,    25,    28,    24,    21,
+    17,    21,    27,    30,    33,    35,    46,    49,    48,    54,    56,
+    57,    58,    60,    64,    62,    64,    66,    67,    64,    70,    77,
+    83,    82,    84,    88,    89,    95,    86,    75,    64,    51,    36,
+    29,    26,    21,    26,    31,    38,    40,    55,    63,    65,    65,
+    64,    60,    54,    54,    49,    41,    34,    26,    21,    9,     6,
+    6,     5,     -1,    3,     5,     3,     2,     -4,    -13,   -13,   -24,
+    -32,   -33,   -36,   -33,   -24,   -18,   -15,   -9,    -5,    -5,    -14,
+    -17,   -24,   -34,   -36,   -42,   -43,   -36,   -42,   -43,   -43,   -38,
+    -36,   -27,   -20,   -23,   -21,   -28,   -25,   -22,   -24,   -25,   -23,
+    -22,   -30,   -31,   -26,   -25,   -20,   -15,   -8,    -10,   -11,   -13,
+    -18,   -22,   -30,   -36,   -35,   -39,   -35,   -34,   -27,   -24,   -19,
+    -15,   -7,    -6,    -7,    -2,    0,     7,     12,    14,    19,    20,
+    26,    26,    24,    16,    10,    4,     1,     3,     2,     9,     11,
+    17,    19,    27,    31,    31,    32,    30,    27,    25,    28,    27,
+    25,    22,    23,    23,    20,    21,    25,    36,    38,    40,    43,
+    40,    32,    27,    20,    9,     4,     1,     12,    27,    37,    49,
+    63,    73,    72,    73,    70,    67,    53,    39,    33,    26,    23,
+    13,    9,     6,     0,     -2,    -3,    0,     -1,    0,     -1,    -4,
+    -9,    -16,   -22,   -21,   -24,   -21,   -19,   -12,   -3,    0,     12,
+    14,    13,    3,     -6,    -13,   -27,   -34,   -42,   -41,   -44,   -42,
+    -43,   -46,   -42,   -40,   -39,   -36,   -31,   -29,   -30,   -22,   -19,
+    -21,   -20,   -17,   -17,   -22,   -31,   -41,   -45,   -54,   -65,   -64,
+    -68,   -70,   -74,   -70,   -64,   -62,   -61,   -60,   -58,   -52,   -46,
+    -43,   -37,   -35,   -40,   -41,   -47,   -52,   -58,   -62,   -61,   -53,
+    -54,   -46,   -41,   -40,   -34,   -29,   -20,   -15,   -8,    2,     12,
+    28,    35,    41,    42,    42,    43,    41,    43,    39,    45,    44,
+    46,    55,    54,    55,    55,    51,    48,    42,    43,    39,    40,
+    46,    54,    65,    70,    76,    81,    86,    89,    79,    73,    70,
+    62,    56,    52,    39,    32,    28,    17,    18,    19,    18,    15,
+    19,    20,    15,    13,    13,    10,    6,     5,     12,    10,    15,
+    20,    24,    30,    31,    28,    22,    17,    2,     -15,   -24,   -39,
+    -52,   -53,   -55,   -46,   -40,   -34,   -26,   -21,   -22,   -31,   -32,
+    -38,   -36,   -35,   -32,   -33,   -34,   -30,   -28,   -27,   -35,   -40,
+    -42,   -45,   -44,   -45,   -44,   -52,   -54,   -57,   -57,   -53,   -60,
+    -63,   -63,   -65,   -51,   -45,   -40,   -40,   -39,   -39,   -43,   -44,
+    -46,   -52,   -46,   -51,   -49,   -45,   -45,   -47,   -47,   -45,   -50,
+    -47,   -40,   -35,   -32,   -24,   -17,   -19,   -14,   -13,   -9,    -7,
+    -7,    -7,    -9,    0,     3,     7,     13,    12,    14,    15,    13,
+    6,     -1,    -3,    -9,    -10,   -5,    -2,    6,     9,     11,    12,
+    15,    19,    24,    37,    47,    47,    56,    53,    51,    52,    52,
+    47,    39,    38,    40,    41,    43,    44,    42,    43,    42,    41,
+    43,    40,    41,    35,    37,    39,    40,    41,    38,    30,    21,
+    14,    5,     2,     -1,    -2,    1,     -2,    6,     2,     4,     2,
+    -1,    -11,   -16,   -23,   -25,   -20,   -18,   -25,   -27,   -32,   -27,
+    -24,   -16,   -15,   -11,   -9,    -3,    -4,    -2,    -9,    -10,   -18,
+    -28,   -33,   -38,   -37,   -41,   -41,   -33,   -24,   -22,   -25,   -25,
+    -25,   -24,   -33,   -38,   -42,   -52,   -57,   -55,   -50,   -51,   -53,
+    -52,   -48,   -49,   -49,   -53,   -55,   -58,   -51,   -34,   -19,   -12,
+    -12,   -5,    1,     1,     0,     -6,    -2,    -10,   -11,   -11,   -6,
+    0,     -6,    2,     -2,    -6,    2,     5,     16,    18,    18,    21,
+    16,    18,    18,    20,    20,    13,    18,    9,     7,     12,    7,
+    8,     10,    16,    17,    18,    23,    26,    36,    44,    51,    55,
+    60,    64,    69,    68,    71,    70,    62,    58,    52,    44,    35,
+    31,    34,    32,    33,    36,    37,    38,    41,    47,    55,    56,
+    58,    60,    60,    57,    48,    41,    29,    19,    7,     4,     8,
+    9,     10,    8,     13,    15,    13,    8,     8,     6,     4,     10,
+    8,     -4,    -6,    -9,    -20,   -28,   -39,   -38,   -27,   -24,   -22,
+    -19,   -23,   -32,   -35,   -36,   -41,   -48,   -51,   -50,   -52,   -55,
+    -60,   -67,   -72,   -76,   -84,   -82,   -80,   -81,   -75,   -64,   -50,
+    -36,   -28,   -18,   -14,   -12,   -15,   -12,   -18,   -24,   -21,   -22,
+    -19,   -21,   -19,   -22,   -20,   -18,   -16,   -17,   -19,   -15,   -7,
+    1,     0,     0,     9,     14,    20,    24,    20,    16,    17,    20,
+    20,    25,    27,    26,    32,    33,    35,    38,    42,    38,    37,
+    39,    46,    44,    43,    45,    45,    42,    37,    34,    25,    21,
+    22,    33,    44,    49,    54,    53,    58,    54,    51,    46,    40,
+    37,    37,    39,    34,    37,    39,    31,    39,    38,    36,    35,
+    32,    33,    33,    32,    28,    23,    18,    22,    28,    31,    27,
+    18,    3,     4,     0,     -4,    -7,    -15,   -18,   -24,   -32,   -34,
+    -39,   -42,   -36,   -31,   -24,   -12,   -10,   -10,   -13,   -20,   -28,
+    -34,   -44,   -49,   -50,   -53,   -56,   -54,   -52,   -53,   -47,   -43,
+    -41,   -45,   -41,   -38,   -38,   -33,   -32,   -34,   -35,   -33,   -40,
+    -45,   -53,   -62,   -61,   -67,   -72,   -70,   -67,   -68,   -59,   -51,
+    -47,   -38,   -31,   -20,   -13,   -13,   -13,   -14,   -17,   -21,   -22,
+    -29,   -31,   -27,   -23,   -13,   -6,    4,     12,    17,    25,    23,
+    23,    25,    30,    30,    32,    31,    28,    27,    18,    14,    13,
+    3,     5,     7,     19,    35,    47,    61,    70,    84,    90,    95,
+    92,    94,    89,    77,    71,    66,    59,    50,    51,    50,    51,
+    53,    56,    65,    67,    69,    75,    74,    69,    67,    56,    51,
+    44,    34,    25,    17,    10,    6,     7,     7,     4,     6,     -1,
+    -1,    -2,    -9,    -9,    -9,    -7,    -5,    1,     -2,    -5,    -11,
+    -19,   -27,   -39,   -38,   -44,   -45,   -48,   -48,   -54,   -59,   -53,
+    -51,   -49,   -52,   -50,   -50,   -47,   -42,   -32,   -28,   -28,   -26,
+    -27,   -34,   -40,   -40,   -36,   -37,   -37,   -34,   -37,   -36,   -41,
+    -36,   -40,   -46,   -48,   -52,   -47,   -44,   -40,   -40,   -38,   -43,
+    -43,   -47,   -59,   -62,   -59,   -59,   -51,   -41,   -29,   -19,   -8,
+    -2,    1,     1,     -4,    -9,    -19,   -23,   -29,   -29,   -25,   -23,
+    -15,   -7,    -2,    6,     8,     15,    27,    35,    43,    40,    36,
+    35,    32,    25,    22,    19,    17,    13,    13,    21,    25,    28,
+    36,    44,    50,    57,    56,    58,    59,    62,    66,    70,    73,
+    69,    66,    66,    66,    62,    53,    48,    44,    38,    39,    44,
+    52,    51,    55,    57,    52,    49,    44,    36,    26,    16,    13,
+    13,    14,    14,    17,    14,    10,    6,     -5,    -14,   -23,   -24,
+    -21,   -28,   -25,   -27,   -29,   -29,   -33,   -33,   -39,   -42,   -43,
+    -41,   -40,   -43,   -46,   -45,   -43,   -42,   -41,   -41,   -46,   -46,
+    -52,   -52,   -52,   -59,   -63,   -70,   -68,   -73,   -77,   -73,   -68,
+    -66,   -62,   -64,   -66,   -58,   -54,   -51,   -52,   -48,   -47,   -43,
+    -40,   -39,   -33,   -26,   -19,   -17,   -16,   -17,   -14,   -9,    -10,
+    -3,    5,     5,     9,     5,     9,     8,     4,     3,     0,     -5,
+    -10,   -3,    2,     8,     14,    16,    20,    27,    39,    40,    44,
+    48,    43,    39,    34,    29,    22,    12,    8,     5,     0,     -2,
+    -3,    5,     12,    16,    19,    22,    25,    28,    35,    28,    30,
+    31,    30,    39,    43,    47,    43,    42,    41,    41,    41,    37,
+    37,    39,    37,    38,    43,    44,    41,    43,    34,    28,    25,
+    23,    30,    34,    32,    33,    29,    21,    18,    13,    14,    11,
+    3,     2,     1,     3,     1,     -1,    0,     -3,    -1,    -3,    -8,
+    -9,    -7,    -9,    -2,    0,     -3,    0,     1,     5,     0,     -1,
+    -9,    -13,   -8,    -11,   -18,   -23,   -25,   -29,   -29,   -26,   -27,
+    -29,   -25,   -24,   -23,   -18,   -19,   -18,   -17,   -21,   -22,   -30,
+    -38,   -42,   -42,   -42,   -40,   -41,   -43,   -39,   -38,   -37,   -36,
+    -33,   -31,   -28,   -27,   -18,   -15,   -7,    -8,    -8,    -1,    1,
+    3,     -5,    0,     -4,    -5,    -4,    -8,    -10,   -14,   -21,   -24,
+    -25,   -20,   -11,   -4,    3,     6,     13,    15,    12,    17,    16,
+    17,    17,    15,    21,    28,    33,    36,    35,    35,    29,    31,
+    29,    28,    23,    21,    14,    15,    27,    36,    40,    40,    43,
+    51,    56,    62,    69,    77,    80,    88,    88,    88,    82,    76,
+    63,    52,    44,    36,    26,    23,    25,    24,    27,    26,    31,
+    21,    13,    8,     -8,    -8,    -11,   -14,   -18,   -28,   -28,   -30,
+    -32,   -29,   -26,   -26,   -27,   -24,   -20,   -14,   -8,    -6,    -8,
+    -5,    -10,   -14,   -18,   -26,   -34,   -36,   -38,   -44,   -51,   -57,
+    -66,   -64,   -68,   -72,   -75,   -75,   -70,   -68,   -65,   -64,   -62,
+    -68,   -63,   -60,   -65,   -65,   -69,   -68,   -67,   -57,   -46,   -41,
+    -38,   -34,   -31,   -39,   -40,   -45,   -45,   -48,   -47,   -40,   -39,
+    -32,   -26,   -24,   -14,   -9,    -7,    -3,    -2,    3,     4,     0,
+    -2,    -2,    -2,    1,     3,     2,     3,     8,     13,    20,    25,
+    29,    31,    26,    17,    11,    3,     -5,    2,     6,     9,     11,
+    19,    26,    40,    51,    61,    60,    58,    61,    55,    55,    57,
+    60,    54,    40,    42,    38,    34,    38,    37,    34,    32,    35,
+    36,    35,    41,    36,    32,    29,    23,    22,    23,    22,    14,
+    13,    19,    19,    20,    22,    22,    17,    13,    6,     9,     13,
+    15,    17,    19,    11,    15,    8,     4,     6,     -1,    -3,    3,
+    7,     11,    8,     10,    7,     6,     4,     -4,    -5,    -11,   -9,
+    -16,   -14,   -14,   -16,   -16,   -22,   -19,   -19,   -13,   -9,    -4,
+    1,     1,     2,     -6,    -14,   -25,   -32,   -41,   -46,   -50,   -49,
+    -42,   -39,   -34,   -24,   -14,   -18,   -15,   -17,   -21,   -23,   -21,
+    -19,   -21,   -20,   -19,   -20,   -19,   -16,   -17,   -19,   -20,   -20,
+    -20,   -20,   -22,   -22,   -23,   -22,   -22,   -14,   -5,    5,     8,
+    13,    16,    19,    23,    19,    21,    16,    16,    18,    13,    18,
+    13,    15,    18,    12,    12,    6,     11,    8,     5,     5,     9,
+    17,    14,    15,    14,    16,    14,    14,    12,    9,     7,     9,
+    11,    13,    15,    15,    19,    17,    14,    8,     7,     4,     0,
+    3,     8,     10,    7,     8,     19,    15,    19,    18,    19,    17,
+    9,     14,    10,    4,     -3,    -11,   -19,   -25,   -31,   -35,   -36,
+    -28,   -21,   -8,    5,     8,     11,    13,    7,     4,     1,     -7,
+    -15,   -17,   -17,   -21,   -28,   -33,   -37,   -40,   -39,   -41,   -45,
+    -46,   -44,   -40,   -41,   -36,   -31,   -41,   -40,   -42,   -44,   -47,
+    -50,   -49,   -55,   -52,   -52,   -52,   -45,   -50,   -52,   -56,   -58,
+    -60,   -69,   -75,   -82,   -86,   -91,   -87,   -80,   -80,   -72,   -58,
+    -52,   -45,   -33,   -21,   -13,   -12,   -10,   -6,    -1,    -2,    -7,
+    -7,    -5,    -6,    -3,    9,     15,    25,    36,    35,    39,    28,
+    16,    11,    8,     11,    17,    27,    34,    36,    47,    49,    52,
+    52,    42,    46,    49,    55,    65,    66,    67,    62,    56,    53,
+    49,    50,    55,    53,    62,    69,    72,    73,    68,    61,    54,
+    46,    43,    38,    34,    39,    43,    42,    39,    36,    31,    26,
+    24,    17,    13,    14,    14,    21,    26,    29,    28,    26,    24,
+    18,    19,    16,    11,    6,     2,     -2,    1,     3,     2,     -4,
+    -3,    -1,    -3,    -2,    -2,    -5,    -3,    0,     3,     -3,    -6,
+    -6,    -15,   -19,   -25,   -30,   -35,   -39,   -34,   -34,   -34,   -31,
+    -17,   -17,   -8,    -2,    -2,    8,     14,    25,    24,    26,    22,
+    16,    10,    2,     -3,    -5,    -12,   -15,   -11,   -14,   -16,   -17,
+    -17,   -16,   -21,   -18,   -18,   -21,   -23,   -21,   -15,   -11,   -4,
+    -2,    3,     8,     10,    17,    18,    25,    24,    24,    24,    21,
+    24,    23,    24,    22,    23,    31,    39,    49,    58,    64,    67,
+    63,    57,    53,    52,    44,    45,    43,    40,    45,    42,    49,
+    50,    49,    52,    51,    48,    46,    38,    37,    35,    36,    37,
+    37,    37,    44,    45,    47,    42,    42,    36,    35,    44,    40,
+    40,    28,    24,    23,    18,    12,    9,     8,     10,    17,    17,
+    18,    12,    5,     -2,    -12,   -16,   -20,   -27,   -29,   -29,   -26,
+    -22,   -17,   -16,   -15,   -14,   -15,   -11,   -11,   -15,   -19,   -15,
+    -20,   -22,   -24,   -37,   -52,   -62,   -63,   -68,   -64,   -59,   -51,
+    -43,   -42,   -36,   -32,   -33,   -33,   -33,   -41,   -48,   -51,   -49,
+    -48,   -47,   -42,   -45,   -42,   -41,   -40,   -39,   -33,   -29,   -25,
+    -14,   -1,    -4,    -6,    -11,   -16,   -19,   -26,   -29,   -28,   -25,
+    -17,   -10,   -1,    -1,    3,     7,     -1,    -3,    -8,    -18,   -20,
+    -20,   -16,   -13,   -11,   -8,    0,     6,     8,     11,    14,    15,
+    20,    26,    26,    26,    24,    23,    24,    30,    34,    41,    52,
+    61,    70,    80,    85,    86,    89,    84,    87,    79,    67,    60,
+    57,    59,    63,    68,    74,    78,    84,    89,    91,    87,    81,
+    74,    69,    63,    59,    59,    56,    58,    60,    60,    59,    54,
+    49,    41,    40,    34,    25,    19,    11,    1,     0,     -1,    -4,
+    -8,    -12,   -12,   -17,   -22,   -31,   -44,   -54,   -58,   -68,   -74,
+    -80,   -80,   -73,   -65,   -61,   -61,   -55,   -50,   -50,   -59,   -65,
+    -69,   -73,   -73,   -78,   -79,   -83,   -87,   -87,   -88,   -94,   -103,
+    -107,  -107,  -109,  -106,  -113,  -115,  -110,  -105,  -100,  -100,  -92,
+    -78,   -62,   -49,   -39,   -35,   -27,   -26,   -25,   -24,   -22,   -23,
+    -28,   -26,   -22,   -15,   -11,   -4,    4,     13,    21,    32,    31,
+    28,    30,    30,    28,    23,    25,    23,    21,    25,    21,    26,
+    27,    32,    40,    48,    53,    55,    54,    55,    55,    54,    48,
+    44,    47,    48,    54,    60,    71,    79,    79,    74,    72,    59,
+    48,    42,    32,    26,    22,    21,    23,    22,    31,    42,    44,
+    41,    36,    30,    30,    33,    38,    35,    30,    28,    20,    15,
+    8,     4,     6,     9,     16,    26,    27,    23,    19,    16,    10,
+    4,     -4,    -12,   -12,   -16,   -16,   -19,   -24,   -23,   -23,   -31,
+    -34,   -38,   -40,   -41,   -39,   -39,   -36,   -36,   -40,   -45,   -48,
+    -53,   -66,   -73,   -76,   -76,   -78,   -75,   -71,   -65,   -59,   -58,
+    -59,   -56,   -60,   -62,   -62,   -62,   -64,   -68,   -73,   -79,   -80,
+    -85,   -87,   -85,   -78,   -72,   -66,   -56,   -48,   -42,   -37,   -35,
+    -32,   -33,   -31,   -25,   -26,   -27,   -16,   -18,   -18,   -13,   -14,
+    -17,   -22,   -24,   -25,   -23,   -19,   -14,   -12,   -11,   -7,    -4,
+    -1,    2,     5,     8,     10,    10,    18,    28,    29,    25,    22,
+    29,    21,    20,    21,    22,    30,    32,    41,    41,    45,    46,
+    49,    52,    57,    59,    58,    52,    46,    47,    56,    58,    49,
+    49,    46,    40,    33,    23,    14,    11,    16,    29,    34,    37,
+    41,    42,    48,    54,    60,    61,    62,    62,    69,    79,    76,
+    71,    72,    71,    64,    59,    54,    49,    40,    42,    34,    23,
+    27,    18,    13,    9,     3,     -4,    -8,    -16,   -18,   -20,   -26,
+    -28,   -30,   -32,   -29,   -32,   -35,   -39,   -41,   -38,   -34,   -31,
+    -26,   -18,   -21,   -20,   -22,   -28,   -35,   -34,   -31,   -33,   -31,
+    -31,   -40,   -43,   -45,   -53,   -64,   -67,   -74,   -75,   -74,   -75,
+    -70,   -61,   -56,   -45,   -37,   -30,   -33,   -35,   -32,   -31,   -27,
+    -25,   -19,   -17,   -14,   -9,    -4,    -1,    -3,    -4,    1,     8,
+    14,    20,    24,    25,    18,    11,    7,     -3,    -9,    -3,    4,
+    15,    30,    29,    33,    33,    36,    35,    31,    33,    34,    42,
+    43,    42,    47,    49,    53,    61,    69,    73,    74,    79,    81,
+    84,    76,    69,    62,    47,    39,    31,    19,    8,     2,     -6,
+    -5,    -3,    -3,    -1,    1,     -2,    -3,    -3,    -6,    -12,   -13,
+    -15,   -11,   -5,    -4,    -8,    -14,   -9,    -3,    0,     -3,    -4,
+    0,     3,     0,     -6,    -14,   -23,   -33,   -38,   -41,   -38,   -38,
+    -34,   -30,   -29,   -29,   -26,   -31,   -33,   -41,   -49,   -50,   -56,
+    -57,   -58,   -54,   -46,   -39,   -39,   -34,   -31,   -28,   -30,   -30,
+    -31,   -29,   -27,   -16,   -18,   -17,   -15,   -13,   -15,   -12,   -7,
+    -11,   -9,    -9,    -4,    -11,   -7,    -7,    -8,    -9,    -10,   -7,
+    -9,    1,     9,     15,    12,    19,    19,    18,    17,    13,    11,
+    8,     6,     10,    17,    20,    26,    28,    33,    39,    30,    25,
+    25,    18,    16,    21,    26,    30,    33,    32,    36,    42,    49,
+    46,    39,    44,    44,    37,    35,    30,    24,    22,    23,    26,
+    23,    25,    21,    24,    24,    22,
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.h
new file mode 100644
index 0000000..4cc8030
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/no_1000ms_sample_data.h

@@ -0,0 +1,29 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This data was created from the PCM data in a WAV file held in v2 of the
+// Speech Commands test dataset, at the path:
+// speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav
+// This should contain all 16,000 samples from the one-second file.
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_1000MS_SAMPLE_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_1000MS_SAMPLE_DATA_H_
+
+#include <cstdint>
+
+extern const int g_no_1000ms_sample_data_size;
+extern const int16_t g_no_1000ms_sample_data[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_1000MS_SAMPLE_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.cc
deleted file mode 100644
index e98c84f..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.cc
+++ /dev/null

@@ -1,152 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.h"
-
-/* File automatically created by
- * tensorflow/examples/speech_commands/wav_to_features.py \
- * --sample_rate=16000 \
- * --clip_duration_ms=1000 \
- * --window_size_ms=30 \
- * --window_stride_ms=20 \
- * --feature_bin_count=40 \
- * --quantize \
- * --preprocess="average" \
- * --input_wav="speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav" \
- * --output_c_file="no_features_data.cc" \
- */
-
-const int g_no_f9643d42_nohash_4_width = 43;
-const int g_no_f9643d42_nohash_4_height = 49;
-const unsigned char g_no_f9643d42_nohash_4_data[] = {
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   5,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   67, 2,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   139, 2,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   195, 2,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   230, 2,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  255, 7,
-    6, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 255, 7,  16, 1,   1,   0,  2, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 255, 7,   22, 0,  1,   0,
-    1, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 238, 5,   20, 3, 4,   1,  1,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  144, 4,   19, 3, 5,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  42, 6,   3,
-    1, 3,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  3, 1,   5,  0,  1,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  5, 1,   3,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    1, 0,   1,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
-    0,
-};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.h
deleted file mode 100644
index e2ee0c4..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.h
+++ /dev/null

@@ -1,23 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_
-
-extern const int g_no_f9643d42_nohash_4_width;
-extern const int g_no_f9643d42_nohash_4_height;
-extern const unsigned char g_no_f9643d42_nohash_4_data[];
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.cc
deleted file mode 100644
index c4fc5c3..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.cc
+++ /dev/null

@@ -1,23 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// See the header for documentation on the meaning of this data.
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.h"
-
-const uint8_t g_no_power_spectrum_data[g_no_power_spectrum_data_size] = {
-    255, 7, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0,   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.h
deleted file mode 100644
index fa39d3c..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.h
+++ /dev/null

@@ -1,29 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// This data was extracted from the larger feature data held in
-// no_features_data.cc and consists of the 29th spectrogram slice of 43 values.
-// This is the expected result of running the sample data in
-// no_30ms_sample_data.cc through through the preprocessing pipeline.
-
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_POWER_SPECTRUM_DATA_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_POWER_SPECTRUM_DATA_H_
-
-#include <cstdint>
-
-constexpr int g_no_power_spectrum_data_size = 43;
-extern const uint8_t g_no_power_spectrum_data[];
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_POWER_SPECTRUM_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/osx/audio_provider.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/osx/audio_provider.cc
index 892757e..6468c1a 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/osx/audio_provider.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/osx/audio_provider.cc

@@ -17,7 +17,7 @@
 
 #include <AudioToolbox/AudioToolbox.h>
 
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.h"
 
 namespace {
 

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc
deleted file mode 100644
index f8858aa..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc
+++ /dev/null

@@ -1,157 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Reference implementation of the preprocessing pipeline, with the same
-// results as the audio tutorial at
-// https://www.tensorflow.org/tutorials/sequences/audio_recognition
-// This module takes 30ms of PCM-encoded signed 16-bit audio samples (at 16KHz,
-// so 480 values), and extracts a power spectrum of frequencies. There are 43
-// frequency bands in the result, derived from the original 256 output from the
-// discrete Fourier transform, and averaged together in groups of 6.
-// It's expected that most platforms will have optimized versions of the
-// functions used here, for example replacing the DFT with an FFT, so this
-// version shouldn't be used where performance is critical.
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h"
-
-#include <cmath>
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
-
-namespace {
-
-// Needed because some platforms don't have M_PI defined.
-constexpr float kPi = 3.14159265358979323846f;
-
-// Performs a discrete Fourier transform on the real inputs. This corresponds to
-// rdft() in the FFT package at http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html,
-// and to kiss_fftr() in KISSFFT at https://github.com/mborgerding/kissfft.
-// It takes in an array of float real values, and returns a result of the same
-// length with float real and imaginary components interleaved, so
-// fourier_output[0] is the first real value, fourier_output[1] is the first
-// imaginary, fourier_output[2] is the second real, and so on.
-// The calling function should ensure that the array passed in as fourier_output
-// is at least time_series_size in length. Most optimized FFT implementations
-// require the length to be a power of two as well, but this version doesn't
-// enforce that.
-void CalculateDiscreteFourierTransform(float* time_series, int time_series_size,
-                                       float* fourier_output) {
-  for (int i = 0; i < time_series_size / 2; ++i) {
-    float real = 0;
-    for (int j = 0; j < time_series_size; ++j) {
-      real += time_series[j] * cos(j * i * kPi * 2 / time_series_size);
-    }
-    float imaginary = 0;
-    for (int j = 0; j < time_series_size; ++j) {
-      imaginary -= time_series[j] * sin(j * i * kPi * 2 / time_series_size);
-    }
-    fourier_output[(i * 2) + 0] = real;
-    fourier_output[(i * 2) + 1] = imaginary;
-  }
-}
-
-// Produces a simple sine curve that is used to ensure frequencies at the center
-// of the current sample window are weighted more heavily than those at the end.
-void CalculatePeriodicHann(int window_length, float* window_function) {
-  for (int i = 0; i < window_length; ++i) {
-    window_function[i] = 0.5 - 0.5 * cos((2 * kPi * i) / window_length);
-  }
-}
-
-}  // namespace
-
-TfLiteStatus Preprocess(tflite::ErrorReporter* error_reporter,
-                        const int16_t* input, int input_size, int output_size,
-                        uint8_t* output) {
-  // Ensure our input and output data arrays are valid.
-  if (input_size > kMaxAudioSampleSize) {
-    error_reporter->Report("Input size %d larger than %d", input_size,
-                           kMaxAudioSampleSize);
-    return kTfLiteError;
-  }
-  if (output_size != kFeatureSliceSize) {
-    error_reporter->Report("Requested output size %d doesn't match %d",
-                           output_size, kFeatureSliceSize);
-    return kTfLiteError;
-  }
-
-  // Pre-calculate the window function we'll be applying to the input data.
-  // In a real application, we'd calculate this table once in an initialization
-  // function and store it for repeated reuse.
-  float window_function[kMaxAudioSampleSize];
-  CalculatePeriodicHann(input_size, window_function);
-
-  // Apply the window function to our time series input, and pad it with zeroes
-  // to the next power of two.
-  float float_input[kMaxAudioSampleSize];
-  for (int i = 0; i < kMaxAudioSampleSize; ++i) {
-    if (i < input_size) {
-      float_input[i] =
-          (input[i] * window_function[i]) / static_cast<float>(1 << 15);
-    } else {
-      float_input[i] = 0.0f;
-    }
-  }
-
-  // Pull the frequency data from the time series sample.
-  float fourier_values[kMaxAudioSampleSize];
-  CalculateDiscreteFourierTransform(float_input, kMaxAudioSampleSize,
-                                    fourier_values);
-
-  // We have the complex numbers giving us information about each frequency
-  // band, but all we want to know is how strong each frequency is, so calculate
-  // the squared magnitude by adding together the squares of each component.
-  float power_spectrum[kMaxAudioSampleSize / 2];
-  for (int i = 0; i < (kMaxAudioSampleSize / 2); ++i) {
-    const float real = fourier_values[(i * 2) + 0];
-    const float imaginary = fourier_values[(i * 2) + 1];
-    power_spectrum[i] = (real * real) + (imaginary * imaginary);
-  }
-
-  // Finally, reduce the size of the output by averaging together six adjacent
-  // frequencies into each slot, producing an array of 43 values.
-  for (int i = 0; i < kFeatureSliceSize; ++i) {
-    float total = 0.0f;
-    for (int j = 0; j < kAverageWindowSize; ++j) {
-      const int index = (i * kAverageWindowSize) + j;
-      if (index < (kMaxAudioSampleSize / 2)) {
-        total += power_spectrum[index];
-      }
-    }
-    const float average = total / kAverageWindowSize;
-    // Quantize the result into eight bits, effectively multiplying by two.
-    // The 127.5 constant here has to match the features_max value defined in
-    // tensorflow/examples/speech_commands/input_data.py, and this also assumes
-    // that features_min is zero. It it wasn't, we'd have to subtract it first.
-    int quantized_average = roundf(average * (255.0f / 127.5f));
-    if (quantized_average < 0) {
-      quantized_average = 0;
-    }
-    if (quantized_average > 255) {
-      quantized_average = 255;
-    }
-    output[i] = quantized_average;
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus Preprocess_1sec(tflite::ErrorReporter* error_reporter,
-                             const int16_t* input, uint8_t* output) {
-  int i;
-  for (i = 0; i < 49; i++) {
-    Preprocess(error_reporter, input + i * 320, 480, 43, output + i * 43);
-  }
-  return kTfLiteOk;
-}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h b/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h
deleted file mode 100644
index d710bee..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h
+++ /dev/null

@@ -1,34 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_PREPROCESSOR_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_PREPROCESSOR_H_
-
-#include "tensorflow/lite/c/c_api_internal.h"
-#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
-
-// Converts audio sample data into a more compact form that's appropriate for
-// feeding into a neural network. There are reference implementations that use
-// both floating point and fixed point available, but because the calculations
-// involved can be time-consuming, it's recommended that you use or write
-// specialized versions for your platform.
-TfLiteStatus Preprocess(tflite::ErrorReporter* error_reporter,
-                        const int16_t* input, int input_size, int output_size,
-                        uint8_t* output);
-
-TfLiteStatus Preprocess_1sec(tflite::ErrorReporter* error_reporter,
-                             const int16_t* input, uint8_t* output);
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_PREPROCESSOR_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_test.cc
deleted file mode 100644
index e8b49f6..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_test.cc
+++ /dev/null

@@ -1,63 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h"
-#include "tensorflow/lite/c/c_api_internal.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.h"
-#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
-#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
-
-TF_LITE_MICRO_TESTS_BEGIN
-
-TF_LITE_MICRO_TEST(TestPreprocessor) {
-  tflite::MicroErrorReporter micro_error_reporter;
-  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
-
-  uint8_t yes_calculated_data[g_yes_power_spectrum_data_size];
-  TfLiteStatus yes_status = Preprocess(
-      error_reporter, g_yes_30ms_sample_data, g_yes_30ms_sample_data_size,
-      g_yes_power_spectrum_data_size, yes_calculated_data);
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, yes_status);
-
-  for (int i = 0; i < g_yes_power_spectrum_data_size; ++i) {
-    TF_LITE_MICRO_EXPECT_EQ(g_yes_power_spectrum_data[i],
-                            yes_calculated_data[i]);
-    if (g_yes_power_spectrum_data[i] != yes_calculated_data[i]) {
-      error_reporter->Report("Expected value %d but found %d",
-                             g_yes_power_spectrum_data[i],
-                             yes_calculated_data[i]);
-    }
-  }
-
-  uint8_t no_calculated_data[g_yes_power_spectrum_data_size];
-  TfLiteStatus no_status = Preprocess(
-      error_reporter, g_no_30ms_sample_data, g_no_30ms_sample_data_size,
-      g_no_power_spectrum_data_size, no_calculated_data);
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, no_status);
-
-  for (int i = 0; i < g_no_power_spectrum_data_size; ++i) {
-    TF_LITE_MICRO_EXPECT_EQ(g_no_power_spectrum_data[i], no_calculated_data[i]);
-    if (g_no_power_spectrum_data[i] != no_calculated_data[i]) {
-      error_reporter->Report("Expected value %d but found %d",
-                             g_no_power_spectrum_data[i],
-                             no_calculated_data[i]);
-    }
-  }
-}
-
-TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.cc
index 9366dc7..8187962 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.cc

@@ -28,8 +28,8 @@
       suppression_ms_(suppression_ms),
       minimum_count_(minimum_count),
       previous_results_(error_reporter) {
-  previous_top_label_ = "_silence_";
-  previous_top_label_time_ = 0;
+  previous_top_label_ = "silence";
+  previous_top_label_time_ = std::numeric_limits<int32_t>::min();
 }
 
 TfLiteStatus RecognizeCommands::ProcessLatestResults(

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.h b/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.h
index adefffe..292cd3e 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.h
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands.h

@@ -19,7 +19,7 @@
 #include <cstdint>
 
 #include "tensorflow/lite/c/c_api_internal.h"
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
 #include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
 
 // Partial implementation of std::dequeue, just providing the functionality
@@ -129,8 +129,8 @@
   // help reduce spurious recognitions.
   explicit RecognizeCommands(tflite::ErrorReporter* error_reporter,
                              int32_t average_window_duration_ms = 1000,
-                             uint8_t detection_threshold = 51,
-                             int32_t suppression_ms = 500,
+                             uint8_t detection_threshold = 200,
+                             int32_t suppression_ms = 1500,
                              int32_t minimum_count = 3);
 
   // Call this with the results of running a model on sample data.
@@ -149,8 +149,6 @@
 
   // Working variables
   PreviousResultsQueue previous_results_;
-  int previous_results_head_;
-  int previous_results_tail_;
   const char* previous_top_label_;
   int32_t previous_top_label_time_;
 };

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands_test.cc
index f0cc73f..6582c94 100644
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands_test.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/recognize_commands_test.cc

@@ -118,7 +118,9 @@
     }
   }
   TF_LITE_MICRO_EXPECT(has_found_new_command);
-  TF_LITE_MICRO_EXPECT_EQ(0, tflite::testing::TestStrcmp("yes", new_command));
+  if (has_found_new_command) {
+    TF_LITE_MICRO_EXPECT_EQ(0, tflite::testing::TestStrcmp("yes", new_command));
+  }
 
   TfLiteTensor no_results = tflite::testing::CreateQuantizedTensor(
       {0, 0, 0, 255}, tflite::testing::IntArrayFromInitializer({2, 1, 4}),
@@ -141,8 +143,10 @@
     }
   }
   TF_LITE_MICRO_EXPECT(has_found_new_command);
-  TF_LITE_MICRO_EXPECT_EQ(231, score);
-  TF_LITE_MICRO_EXPECT_EQ(0, tflite::testing::TestStrcmp("no", new_command));
+  if (has_found_new_command) {
+    TF_LITE_MICRO_EXPECT_EQ(231, score);
+    TF_LITE_MICRO_EXPECT_EQ(0, tflite::testing::TestStrcmp("no", new_command));
+  }
 }
 
 TF_LITE_MICRO_TEST(RecognizeCommandsTestBadInputLength) {

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/CMSIS/simple_features_generator.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/CMSIS/simple_features_generator.cc
new file mode 100644
index 0000000..403976e
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/CMSIS/simple_features_generator.cc

@@ -0,0 +1,95 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.h"
+
+extern "C" {
+#define IFFT_FLAG_R 0
+#define BIT_REVERSE_FLAG 1
+#define FFT_SIZE 512
+#define FFT_SIZE_DIV2 256
+#include <arm_math.h>
+#include "arm_cmplx_mag_squared_q10p6.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/CMSIS/hanning.h"
+}
+
+void quantize(q15_t* bufA, q15_t* bufB, uint8_t* output);
+
+q15_t bufA[FFT_SIZE];
+q15_t bufB[FFT_SIZE];
+arm_rfft_instance_q15 S_arm_fft;
+arm_status arm_math_status;
+
+namespace {
+// These constants allow us to allocate fixed-sized arrays on the stack for our
+// working memory.
+constexpr int kInputSize = 512;
+constexpr int kAverageWindowSize = 6;
+constexpr int kOutputSize =
+    ((kInputSize / 2) + (kAverageWindowSize - 1)) / kAverageWindowSize;
+}  // namespace
+
+TfLiteStatus GenerateSimpleFeatures(tflite::ErrorReporter* error_reporter,
+                                    const int16_t* input, int input_size,
+                                    int output_size, uint8_t* output) {
+  if (input_size > kInputSize) {
+    error_reporter->Report("Input size %d larger than %d", input_size,
+                           kInputSize);
+    return kTfLiteError;
+  }
+  if (output_size != kOutputSize) {
+    error_reporter->Report("Requested output size %d doesn't match %d",
+                           output_size, kOutputSize);
+    return kTfLiteError;
+  }
+
+  // 30ms at 16 kHz = 480 samples
+  // We want to pad the rest of the 512-sample buffer with zeros
+  arm_mult_q15((q15_t*)input, g_hanning, bufB, 480);
+  int i;
+  for (i = 480; i < 512; i++) {
+    bufB[i] = 0;
+  }
+
+  // Should move init code outside of Preprocess() function
+  arm_math_status =
+      arm_rfft_init_q15(&S_arm_fft, FFT_SIZE, IFFT_FLAG_R, BIT_REVERSE_FLAG);
+  arm_rfft_q15(&S_arm_fft, bufB, bufA);
+
+  // The rfft function packs data as follows:
+  // {real[0], real[N/2], real[1], imag[1], ..., real[N/2-1], imag[N/2-1]}
+  // Below we pack as follows:
+  // {real[0], 0, real[1], imag[1], ..., real[N/2-1], imag[N/2-1, real[N/2], 0}
+  bufA[FFT_SIZE_DIV2] = bufA[1];
+  bufA[FFT_SIZE_DIV2 + 1] = 0;
+  bufA[1] = 0;
+  arm_cmplx_mag_squared_q10p6(bufA, bufB, FFT_SIZE_DIV2 + 1);
+
+  quantize(bufA, bufB, output);
+
+  return kTfLiteOk;
+}
+
+void quantize(q15_t* bufA, q15_t* bufB, uint8_t* output) {
+  int i;
+  for (i = 0; i < 42; i++) {
+    arm_mean_q15(bufB + 6 * i, 6, bufA + i);
+  }
+  arm_mean_q15(bufB + 252, 5, bufA + 42);
+
+  for (i = 0; i < 43; i++) {
+    output[i] = (uint8_t)(bufA[i] >> 5);
+  }
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/fixed_point/simple_features_generator.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/fixed_point/simple_features_generator.cc
new file mode 100644
index 0000000..ad11684
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/fixed_point/simple_features_generator.cc

@@ -0,0 +1,212 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Reference implementation of the preprocessing pipeline, with the same
+// results as the audio tutorial at
+// https://www.tensorflow.org/tutorials/sequences/audio_recognition
+// This module takes 30ms of PCM-encoded signed 16-bit audio samples (at 16KHz,
+// so 480 values), and extracts a power spectrum of frequencies. There are 43
+// frequency bands in the result, derived from the original 256 output from the
+// discrete Fourier transform, and averaged together in groups of 6.
+// It's expected that most platforms will have optimized versions of the
+// functions used here, for example replacing the DFT with an FFT, so this
+// version shouldn't be used where performance is critical.
+// This implementation uses fixed point for any non-constant calculations,
+// instead of floating point, to help show how this can work on platforms that
+// don't have good float support.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.h"
+
+#include <cmath>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.h"
+
+namespace {
+
+// q format notation: qx.y => 1 sign bit, x-1 integer bits, y fraction bits.
+// Use standard (non-saturating) arithmetic with signed ints of size x+y bits.
+// Sacrifice some precision to avoid use of 64-bit ints.
+
+// q1.15 * q1.15 => q2.30
+inline int32_t Q1_15_FixedMultiply_Q2_30(int16_t a, int16_t b) {
+  int32_t big_a = a;
+  int32_t big_b = b;
+  return big_a * big_b;
+}
+
+// q2.30 * q2.30 => q10.22
+inline int32_t Q2_30_FixedMultiply_Q10_22(int32_t a, int32_t b) {
+  // q2.30 result
+  int32_t tmp = (a >> 15) * (b >> 15);
+  // q10.22 result
+  return tmp >> 8;
+}
+
+// q10.22 * q10.22 => q10.22
+// Will overflow if product is >= 512.
+// Largest product in small test set is 465.25
+inline int32_t Q10_22_FixedMultiply_Q10_22(int32_t a, int32_t b) {
+  // q10.22 result
+  return (a >> 11) * (b >> 11);
+}
+
+// float => q2.30
+// No checking for saturation.  Only used for inputs in range [-1, 1].
+inline int32_t FloatToFixed_Q2_30(float input) {
+  return static_cast<int32_t>(roundf(input * (1 << 30)));
+}
+
+// Performs a discrete Fourier transform on the real inputs. This corresponds to
+// rdft() in the FFT package at http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html,
+// and to kiss_fftr() in KISSFFT at https://github.com/mborgerding/kissfft.
+// It takes in an array of float real values, and returns a result of the same
+// length with q10.22 fixed point real and imaginary components interleaved, so
+// fourier_output[0] is the first real value, fourier_output[1] is the first
+// imaginary, fourier_output[2] is the second real, and so on.
+// The calling function should ensure that the array passed in as fourier_output
+// is at least time_series_size in length. Most optimized FFT implementations
+// require the length to be a power of two as well, but this version doesn't
+// enforce that.
+
+// input: q2.30 fixed point.  output: q10.22 fixed point.
+// Outputs interpreted as q10.22 fixed point are un-scaled.
+void CalculateDiscreteFourierTransform(int32_t* time_series,
+                                       int time_series_size,
+                                       int32_t* fourier_output) {
+  for (int i = 0; i < time_series_size / 2; ++i) {
+    int32_t real = 0;
+    for (int j = 0; j < time_series_size; ++j) {
+      const int32_t real_scale =
+          FloatToFixed_Q2_30(cos(j * i * M_PI * 2 / time_series_size));
+      real += Q2_30_FixedMultiply_Q10_22(time_series[j], real_scale);
+    }
+    int32_t imaginary = 0;
+    for (int j = 0; j < time_series_size; ++j) {
+      const int32_t imaginary_scale =
+          FloatToFixed_Q2_30(sin(j * i * M_PI * 2 / time_series_size));
+      imaginary -= Q2_30_FixedMultiply_Q10_22(time_series[j], imaginary_scale);
+    }
+    fourier_output[(i * 2) + 0] = real;
+    fourier_output[(i * 2) + 1] = imaginary;
+  }
+}
+
+// Produces a simple sine curve that is used to ensure frequencies at the center
+// of the current sample window are weighted more heavily than those at the end.
+// q1.15 output format.
+void CalculatePeriodicHann(int window_length, int16_t* window_function) {
+  for (int i = 0; i < window_length; ++i) {
+    const float real_value = (0.5 - 0.5 * cos((2 * M_PI * i) / window_length));
+    int tmp = static_cast<int32_t>(roundf(real_value * (1 << 15)));
+    // Saturate the 0x8000 value to 0x7fff
+    if (tmp > 0x7fff) tmp = 0x7fff;
+    window_function[i] = tmp;
+  }
+}
+
+}  // namespace
+
+TfLiteStatus GenerateSimpleFeatures(tflite::ErrorReporter* error_reporter,
+                                    const int16_t* input, int input_size,
+                                    int output_size, uint8_t* output) {
+  // Ensure our input and output data arrays are valid.
+  if (input_size > kMaxAudioSampleSize) {
+    error_reporter->Report("Input size %d larger than %d", input_size,
+                           kMaxAudioSampleSize);
+    return kTfLiteError;
+  }
+  if (output_size != kFeatureSliceSize) {
+    error_reporter->Report("Requested output size %d doesn't match %d",
+                           output_size, kFeatureSliceSize);
+    return kTfLiteError;
+  }
+
+  // Pre-calculate the window function we'll be applying to the input data.
+  // In a real application, we'd calculate this table once in an initialization
+  // function and store it for repeated reuse.
+  // q1.15 format.
+  int16_t window_function[kMaxAudioSampleSize];
+  CalculatePeriodicHann(input_size, window_function);
+
+  // Apply the window function to our time series input, and pad it with zeroes
+  // to the next power of two.
+  int32_t fixed_input[kMaxAudioSampleSize];
+  for (int i = 0; i < kMaxAudioSampleSize; ++i) {
+    if (i < input_size) {
+      // input is int16_t.  Treat as q1.15 fixed point value in range [-1,1)
+      // window_function is also q1.15 fixed point number
+      fixed_input[i] = Q1_15_FixedMultiply_Q2_30(input[i], window_function[i]);
+    } else {
+      fixed_input[i] = 0;
+    }
+  }
+
+  // Pull the frequency data from the time series sample.
+  // Calculated in q10.22 format from q2.30 inputs.
+  int32_t fourier_values[kMaxAudioSampleSize];
+  CalculateDiscreteFourierTransform(fixed_input, kMaxAudioSampleSize,
+                                    fourier_values);
+
+  // We have the complex numbers giving us information about each frequency
+  // band, but all we want to know is how strong each frequency is, so calculate
+  // the squared magnitude by adding together the squares of each component.
+  int32_t power_spectrum[kMaxAudioSampleSize / 2];
+  for (int i = 0; i < (kMaxAudioSampleSize / 2); ++i) {
+    const int32_t real = fourier_values[(i * 2) + 0];
+    const int32_t imaginary = fourier_values[(i * 2) + 1];
+    // q10.22 results
+    power_spectrum[i] = Q10_22_FixedMultiply_Q10_22(real, real) +
+                        Q10_22_FixedMultiply_Q10_22(imaginary, imaginary);
+  }
+
+  // Finally, reduce the size of the output by averaging together six adjacent
+  // frequencies into each slot, producing an array of 43 values.
+  // Power_spectrum numbers are q10.22.  Divide by kAverageWindowSize inside
+  // loop to prevent overflow.
+  for (int i = 0; i < kFeatureSliceSize; ++i) {
+    int32_t average = 0;
+    for (int j = 0; j < kAverageWindowSize; ++j) {
+      const int index = (i * kAverageWindowSize) + j;
+      if (index < (kMaxAudioSampleSize / 2)) {
+        average += power_spectrum[index] / kAverageWindowSize;
+      }
+    }
+    // Quantize the result into eight bits, effectively multiplying by two.
+    // The 127.5 constant here has to match the features_max value defined in
+    // tensorflow/examples/speech_commands/input_data.py, and this also assumes
+    // that features_min is zero.
+    //
+    // q10.22 input
+    // integer output
+    //
+    // output = (input - features_min) *
+    //     (output_max - output_min) / (features_max - features_min)
+    // == (input) * (255) / (127.5)
+    // == input * 2
+    // == input << 1
+    // Also want to round to nearest integer and only keep integer bits
+    // => ((input << 1) + 0x200000) >> 22
+    // == (input + 0x100000) >> 21
+    int32_t quantized_average = (average + 0x100000) >> 21;
+    if (quantized_average < 0) {
+      quantized_average = 0;
+    }
+    if (quantized_average > 255) {
+      quantized_average = 255;
+    }
+    output[i] = quantized_average;
+  }
+  return kTfLiteOk;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_power_spectrum_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_power_spectrum_data.cc
new file mode 100644
index 0000000..0b20f2f
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_power_spectrum_data.cc

@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See the header for documentation on the meaning of this data.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h"
+
+const uint8_t g_no_power_spectrum_data[g_no_power_spectrum_data_size] = {
+    255, 7, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h
new file mode 100644
index 0000000..9693950
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h

@@ -0,0 +1,29 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This data was extracted from the larger feature data held in
+// no_features_data.cc and consists of the 29th spectrogram slice of 43 values.
+// This is the expected result of running the sample data in
+// no_30ms_sample_data.cc through through the preprocessing pipeline.
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_NO_POWER_SPECTRUM_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_NO_POWER_SPECTRUM_DATA_H_
+
+#include <cstdint>
+
+constexpr int g_no_power_spectrum_data_size = 43;
+extern const uint8_t g_no_power_spectrum_data[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_NO_POWER_SPECTRUM_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_simple_features_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_simple_features_data.cc
new file mode 100644
index 0000000..3d3a953
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_simple_features_data.cc

@@ -0,0 +1,152 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_simple_features_data.h"
+
+/* File automatically created by
+ * tensorflow/examples/speech_commands/wav_to_features.py \
+ * --sample_rate=16000 \
+ * --clip_duration_ms=1000 \
+ * --window_size_ms=30 \
+ * --window_stride_ms=20 \
+ * --feature_bin_count=40 \
+ * --quantize=1 \
+ * --preprocess="average" \
+ * --input_wav="speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav" \
+ * --output_c_file="no_simple_features_data.cc" \
+ */
+
+const int g_no_simple_f9643d42_nohash_4_width = 43;
+const int g_no_simple_f9643d42_nohash_4_height = 49;
+const unsigned char g_no_simple_f9643d42_nohash_4_data[] = {
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   5,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   67, 2,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   139, 2,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   195, 2,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   230, 2,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  255, 7,
+    6, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 255, 7,  16, 1,   1,   0,  2, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 255, 7,   22, 0,  1,   0,
+    1, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 238, 5,   20, 3, 4,   1,  1,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  144, 4,   19, 3, 5,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  42, 6,   3,
+    1, 3,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  3, 1,   5,  0,  1,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  5, 1,   3,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    1, 0,   1,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0,
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_simple_features_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_simple_features_data.h
new file mode 100644
index 0000000..30332b3
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_simple_features_data.h

@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_NO_SIMPLE_FEATURES_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_NO_SIMPLE_FEATURES_DATA_H_
+
+extern const int g_no_simple_f9643d42_nohash_4_width;
+extern const int g_no_simple_f9643d42_nohash_4_height;
+extern const unsigned char g_no_simple_f9643d42_nohash_4_data[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_NO_SIMPLE_FEATURES_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.cc
new file mode 100644
index 0000000..3aa05b7
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.cc

@@ -0,0 +1,148 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Reference implementation of the preprocessing pipeline, with the same
+// results as the audio tutorial at
+// https://www.tensorflow.org/tutorials/sequences/audio_recognition
+// This module takes 30ms of PCM-encoded signed 16-bit audio samples (at 16KHz,
+// so 480 values), and extracts a power spectrum of frequencies. There are 43
+// frequency bands in the result, derived from the original 256 output from the
+// discrete Fourier transform, and averaged together in groups of 6.
+// It's expected that most platforms will have optimized versions of the
+// functions used here, for example replacing the DFT with an FFT, so this
+// version shouldn't be used where performance is critical.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.h"
+
+#include <cmath>
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.h"
+
+namespace {
+
+// Needed because some platforms don't have M_PI defined.
+constexpr float kPi = 3.14159265358979323846f;
+
+// Performs a discrete Fourier transform on the real inputs. This corresponds to
+// rdft() in the FFT package at http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html,
+// and to kiss_fftr() in KISSFFT at https://github.com/mborgerding/kissfft.
+// It takes in an array of float real values, and returns a result of the same
+// length with float real and imaginary components interleaved, so
+// fourier_output[0] is the first real value, fourier_output[1] is the first
+// imaginary, fourier_output[2] is the second real, and so on.
+// The calling function should ensure that the array passed in as fourier_output
+// is at least time_series_size in length. Most optimized FFT implementations
+// require the length to be a power of two as well, but this version doesn't
+// enforce that.
+void CalculateDiscreteFourierTransform(float* time_series, int time_series_size,
+                                       float* fourier_output) {
+  for (int i = 0; i < time_series_size / 2; ++i) {
+    float real = 0;
+    for (int j = 0; j < time_series_size; ++j) {
+      real += time_series[j] * cos(j * i * kPi * 2 / time_series_size);
+    }
+    float imaginary = 0;
+    for (int j = 0; j < time_series_size; ++j) {
+      imaginary -= time_series[j] * sin(j * i * kPi * 2 / time_series_size);
+    }
+    fourier_output[(i * 2) + 0] = real;
+    fourier_output[(i * 2) + 1] = imaginary;
+  }
+}
+
+// Produces a simple sine curve that is used to ensure frequencies at the center
+// of the current sample window are weighted more heavily than those at the end.
+void CalculatePeriodicHann(int window_length, float* window_function) {
+  for (int i = 0; i < window_length; ++i) {
+    window_function[i] = 0.5 - 0.5 * cos((2 * kPi * i) / window_length);
+  }
+}
+
+}  // namespace
+
+TfLiteStatus GenerateSimpleFeatures(tflite::ErrorReporter* error_reporter,
+                                    const int16_t* input, int input_size,
+                                    int output_size, uint8_t* output) {
+  // Ensure our input and output data arrays are valid.
+  if (input_size > kMaxAudioSampleSize) {
+    error_reporter->Report("Input size %d larger than %d", input_size,
+                           kMaxAudioSampleSize);
+    return kTfLiteError;
+  }
+  if (output_size != kFeatureSliceSize) {
+    error_reporter->Report("Requested output size %d doesn't match %d",
+                           output_size, kFeatureSliceSize);
+    return kTfLiteError;
+  }
+
+  // Pre-calculate the window function we'll be applying to the input data.
+  // In a real application, we'd calculate this table once in an initialization
+  // function and store it for repeated reuse.
+  float window_function[kMaxAudioSampleSize];
+  CalculatePeriodicHann(input_size, window_function);
+
+  // Apply the window function to our time series input, and pad it with zeroes
+  // to the next power of two.
+  float float_input[kMaxAudioSampleSize];
+  for (int i = 0; i < kMaxAudioSampleSize; ++i) {
+    if (i < input_size) {
+      float_input[i] =
+          (input[i] * window_function[i]) / static_cast<float>(1 << 15);
+    } else {
+      float_input[i] = 0.0f;
+    }
+  }
+
+  // Pull the frequency data from the time series sample.
+  float fourier_values[kMaxAudioSampleSize];
+  CalculateDiscreteFourierTransform(float_input, kMaxAudioSampleSize,
+                                    fourier_values);
+
+  // We have the complex numbers giving us information about each frequency
+  // band, but all we want to know is how strong each frequency is, so calculate
+  // the squared magnitude by adding together the squares of each component.
+  float power_spectrum[kMaxAudioSampleSize / 2];
+  for (int i = 0; i < (kMaxAudioSampleSize / 2); ++i) {
+    const float real = fourier_values[(i * 2) + 0];
+    const float imaginary = fourier_values[(i * 2) + 1];
+    power_spectrum[i] = (real * real) + (imaginary * imaginary);
+  }
+
+  // Finally, reduce the size of the output by averaging together six adjacent
+  // frequencies into each slot, producing an array of 43 values.
+  for (int i = 0; i < kFeatureSliceSize; ++i) {
+    float total = 0.0f;
+    for (int j = 0; j < kAverageWindowSize; ++j) {
+      const int index = (i * kAverageWindowSize) + j;
+      if (index < (kMaxAudioSampleSize / 2)) {
+        total += power_spectrum[index];
+      }
+    }
+    const float average = total / kAverageWindowSize;
+    // Quantize the result into eight bits, effectively multiplying by two.
+    // The 127.5 constant here has to match the features_max value defined in
+    // tensorflow/examples/speech_commands/input_data.py, and this also assumes
+    // that features_min is zero. It it wasn't, we'd have to subtract it first.
+    int quantized_average = roundf(average * (255.0f / 127.5f));
+    if (quantized_average < 0) {
+      quantized_average = 0;
+    }
+    if (quantized_average > 255) {
+      quantized_average = 255;
+    }
+    output[i] = quantized_average;
+  }
+  return kTfLiteOk;
+}

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.h b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.h
new file mode 100644
index 0000000..f4e86b1
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.h

@@ -0,0 +1,31 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_SIMPLE_FEATURES_GENERATOR_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_SIMPLE_FEATURES_GENERATOR_H_
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+
+// Converts audio sample data into a more compact form that's appropriate for
+// feeding into a neural network. There are reference implementations that use
+// both floating point and fixed point available, but because the calculations
+// involved can be time-consuming, it's recommended that you use or write
+// specialized versions for your platform.
+TfLiteStatus GenerateSimpleFeatures(tflite::ErrorReporter* error_reporter,
+                                    const int16_t* input, int input_size,
+                                    int output_size, uint8_t* output);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_SIMPLE_FEATURES_GENERATOR_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator_test.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator_test.cc
new file mode 100644
index 0000000..65e5263
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator_test.cc

@@ -0,0 +1,63 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_features_generator.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h"
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.h"
+#include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestSimpleFeaturesGenerator) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  uint8_t yes_calculated_data[g_yes_power_spectrum_data_size];
+  TfLiteStatus yes_status = GenerateSimpleFeatures(
+      error_reporter, g_yes_30ms_sample_data, g_yes_30ms_sample_data_size,
+      g_yes_power_spectrum_data_size, yes_calculated_data);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, yes_status);
+
+  for (int i = 0; i < g_yes_power_spectrum_data_size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(g_yes_power_spectrum_data[i],
+                            yes_calculated_data[i]);
+    if (g_yes_power_spectrum_data[i] != yes_calculated_data[i]) {
+      error_reporter->Report("Expected value %d but found %d",
+                             g_yes_power_spectrum_data[i],
+                             yes_calculated_data[i]);
+    }
+  }
+
+  uint8_t no_calculated_data[g_yes_power_spectrum_data_size];
+  TfLiteStatus no_status = GenerateSimpleFeatures(
+      error_reporter, g_no_30ms_sample_data, g_no_30ms_sample_data_size,
+      g_no_power_spectrum_data_size, no_calculated_data);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, no_status);
+
+  for (int i = 0; i < g_no_power_spectrum_data_size; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(g_no_power_spectrum_data[i], no_calculated_data[i]);
+    if (g_no_power_spectrum_data[i] != no_calculated_data[i]) {
+      error_reporter->Report("Expected value %d but found %d",
+                             g_no_power_spectrum_data[i],
+                             no_calculated_data[i]);
+    }
+  }
+}
+
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.cc
new file mode 100644
index 0000000..4842f8d
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.cc

@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.h"
+
+const char* kCategoryLabels[kCategoryCount] = {
+    "silence",
+    "unknown",
+    "yes",
+    "no",
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.h b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.h
new file mode 100644
index 0000000..d31d6b3
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/simple_model_settings.h

@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_SIMPLE_MODEL_SETTINGS_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_SIMPLE_MODEL_SETTINGS_H_
+
+// Keeping these as constant expressions allow us to allocate fixed-sized arrays
+// on the stack for our working memory.
+
+// The size of the input time series data we pass to the FFT to produce the
+// frequency information. This has to be a power of two, and since we're dealing
+// with 30ms of 16KHz inputs, which means 480 samples, this is the next value.
+constexpr int kMaxAudioSampleSize = 512;
+constexpr int kAudioSampleFrequency = 16000;
+
+// All of these values are derived from the values used during model training,
+// if you change your model you'll need to update these constants.
+constexpr int kAverageWindowSize = 6;
+constexpr int kFeatureSliceSize =
+    ((kMaxAudioSampleSize / 2) + (kAverageWindowSize - 1)) / kAverageWindowSize;
+constexpr int kFeatureSliceCount = 49;
+constexpr int kFeatureElementCount = (kFeatureSliceSize * kFeatureSliceCount);
+constexpr int kFeatureSliceStrideMs = 20;
+constexpr int kFeatureSliceDurationMs = 30;
+
+constexpr int kCategoryCount = 4;
+constexpr int kSilenceIndex = 0;
+constexpr int kUnknownIndex = 1;
+extern const char* kCategoryLabels[kCategoryCount];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_SIMPLE_MODEL_SETTINGS_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/tiny_conv_simple_features_model_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/tiny_conv_simple_features_model_data.cc
new file mode 100644
index 0000000..a14412e
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/tiny_conv_simple_features_model_data.cc

@@ -0,0 +1,1673 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Automatically created from a TensorFlow Lite flatbuffer using the command:
+// xxd -i tiny_conv.tflite > tiny_conv_simple_features_model_data.cc
+// See the README for a full description of the creation process.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/tiny_conv_simple_features_model_data.h"
+
+const unsigned char g_tiny_conv_simple_features_model_data[] = {
+    0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
+    0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x4d, 0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0xf4, 0x47, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+    0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74,
+    0x65, 0x64, 0x2e, 0x00, 0x09, 0x00, 0x00, 0x00, 0xd4, 0x47, 0x00, 0x00,
+    0xb4, 0x47, 0x00, 0x00, 0xe4, 0x02, 0x00, 0x00, 0xb4, 0x02, 0x00, 0x00,
+    0xac, 0x02, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb8, 0xb3, 0xff, 0xff,
+    0xbc, 0xb3, 0xff, 0xff, 0xc0, 0xb3, 0xff, 0xff, 0x1e, 0xb4, 0xff, 0xff,
+    0x04, 0x00, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, 0x89, 0xa5, 0xe8, 0xc1,
+    0xb1, 0x89, 0x5b, 0xc6, 0x4f, 0x9b, 0xd3, 0x74, 0x93, 0x88, 0xff, 0xaf,
+    0x89, 0xff, 0xf4, 0x70, 0xcc, 0x75, 0x78, 0xbf, 0x92, 0xcd, 0xa9, 0xa8,
+    0xd6, 0x6a, 0x6f, 0x7b, 0x7f, 0xd8, 0xa8, 0xb1, 0xe6, 0x32, 0x21, 0x70,
+    0xa0, 0x9c, 0x6f, 0xc8, 0xc6, 0x59, 0x67, 0x93, 0x97, 0xca, 0x3f, 0xde,
+    0xcb, 0x74, 0x7c, 0xb5, 0xa4, 0xd9, 0x66, 0xc6, 0x87, 0x98, 0xa5, 0xd0,
+    0xbb, 0xb9, 0xc2, 0xb2, 0xaa, 0x79, 0x25, 0xb9, 0x6d, 0x5a, 0xc8, 0x7f,
+    0x70, 0x85, 0x79, 0xbc, 0x6a, 0x9b, 0xd1, 0x9a, 0x9c, 0x51, 0x53, 0x71,
+    0x89, 0xc0, 0xb4, 0xac, 0xae, 0x47, 0x67, 0x70, 0x79, 0xd2, 0x81, 0xa5,
+    0xd2, 0x09, 0x38, 0x82, 0x74, 0xc9, 0x5d, 0xaf, 0xc1, 0x4f, 0x53, 0x99,
+    0xcb, 0xb7, 0x3a, 0xba, 0xe8, 0x7f, 0x76, 0xb9, 0xb3, 0xd3, 0x60, 0xc0,
+    0x93, 0x9f, 0x87, 0xbd, 0xd0, 0xb8, 0xca, 0xc1, 0xb6, 0x6c, 0x01, 0xc1,
+    0x5c, 0x5d, 0xb2, 0x82, 0x76, 0x77, 0x39, 0xbc, 0x72, 0x6a, 0xc3, 0xb4,
+    0x79, 0x21, 0x48, 0x42, 0x86, 0xa6, 0xbd, 0xaf, 0xae, 0x23, 0x9c, 0x69,
+    0x78, 0xc3, 0x6b, 0xb3, 0xab, 0x43, 0xb2, 0x88, 0x71, 0xc6, 0x6b, 0xbe,
+    0xc3, 0x75, 0xc2, 0xc3, 0xa5, 0xcf, 0x32, 0xbe, 0xcb, 0xb0, 0xb8, 0xc1,
+    0x9c, 0xcf, 0x64, 0xc4, 0xb4, 0x96, 0xa8, 0xb9, 0xcb, 0xc0, 0xc0, 0xb8,
+    0xb8, 0x77, 0x65, 0xc0, 0xc4, 0xb3, 0xc5, 0x77, 0x9b, 0x61, 0xd4, 0xac,
+    0x7e, 0x36, 0xb1, 0xae, 0x36, 0x36, 0xb8, 0x39, 0x6b, 0x70, 0x9c, 0xb5,
+    0x88, 0x5c, 0xb3, 0x6a, 0xad, 0xc5, 0x7b, 0xb4, 0xad, 0xaa, 0xc4, 0x84,
+    0x5e, 0xc4, 0x67, 0xc1, 0xde, 0xba, 0xcf, 0xbd, 0xa0, 0xd3, 0x35, 0xb3,
+    0xe7, 0xc8, 0xb8, 0xb8, 0xaf, 0xb4, 0x59, 0xb8, 0xb4, 0xac, 0xac, 0xaa,
+    0xc7, 0xad, 0xc8, 0xb6, 0xac, 0x99, 0xa0, 0xcb, 0xc1, 0xc8, 0xcb, 0x89,
+    0xc3, 0xac, 0xca, 0x8b, 0x97, 0x1f, 0xbd, 0xbf, 0x13, 0xad, 0xc8, 0x41,
+    0x56, 0x3c, 0x86, 0xb2, 0x61, 0xc4, 0xbb, 0x71, 0xba, 0x92, 0x8d, 0xc3,
+    0x86, 0xcb, 0xc5, 0x8d, 0x88, 0xc8, 0x6a, 0xbf, 0x9c, 0xcd, 0xcd, 0xc0,
+    0x81, 0xb1, 0x47, 0xb5, 0xf0, 0xce, 0xb1, 0xc1, 0xaa, 0xa8, 0x54, 0xcb,
+    0xbc, 0xc7, 0xc5, 0x8e, 0xc3, 0xce, 0xc7, 0xb9, 0xb9, 0xa1, 0xc5, 0xbd,
+    0xb8, 0xb8, 0xb7, 0x81, 0xb6, 0xba, 0xd2, 0x90, 0xbc, 0x96, 0xbe, 0xba,
+    0x53, 0xb5, 0xc7, 0x3c, 0x3c, 0x1f, 0x90, 0xaa, 0x5a, 0xb8, 0xba, 0x7e,
+    0xbc, 0x9e, 0xc2, 0xb1, 0x6e, 0xc0, 0xc4, 0x91, 0xf0, 0xb5, 0x60, 0xad,
+    0x73, 0xba, 0xcd, 0xba, 0x6e, 0x94, 0x39, 0xb5, 0xe4, 0xbe, 0xb4, 0xb5,
+    0xa0, 0xa9, 0x51, 0xac, 0xbc, 0xc2, 0xb3, 0x8a, 0xbd, 0x9a, 0xca, 0xb3,
+    0xbf, 0xaf, 0xb5, 0x9a, 0xb9, 0xc3, 0xb6, 0x92, 0xb5, 0xc1, 0xb0, 0x95,
+    0xd6, 0xcc, 0xbb, 0xbb, 0xa9, 0xb9, 0xac, 0x4a, 0x62, 0x27, 0xa7, 0xa7,
+    0x30, 0xbd, 0xb1, 0x73, 0xa1, 0x74, 0xc2, 0xb7, 0x58, 0xc0, 0xae, 0x8f,
+    0xe1, 0xac, 0x4e, 0xb0, 0x55, 0xc9, 0xc8, 0x9f, 0x83, 0x8e, 0x3e, 0xd5,
+    0xb5, 0xbe, 0xcd, 0xb2, 0xa6, 0xc8, 0x64, 0xac, 0xc0, 0xc8, 0xaf, 0x99,
+    0xc5, 0x9e, 0xb8, 0xbd, 0xa9, 0xc2, 0xb3, 0x81, 0xb4, 0xc2, 0xb4, 0x8f,
+    0xbc, 0xb8, 0x9c, 0x88, 0xbe, 0xc6, 0xbf, 0xba, 0xc8, 0xb4, 0xab, 0x5b,
+    0x92, 0x51, 0xb1, 0x9a, 0x44, 0xb9, 0xab, 0x80, 0xa5, 0x3e, 0xc0, 0xa5,
+    0x5c, 0xb6, 0xa8, 0xa2, 0xb3, 0x9a, 0x6b, 0xb3, 0x34, 0xc6, 0x7e, 0x96,
+    0xcb, 0x88, 0x48, 0xc6, 0xa3, 0xbb, 0xd2, 0xa2, 0xaf, 0xd0, 0x6e, 0xae,
+    0xb4, 0xce, 0xc8, 0x8f, 0xd7, 0xad, 0xc8, 0xb0, 0xae, 0xb7, 0xb2, 0x70,
+    0xb9, 0xad, 0xc1, 0xa0, 0xcb, 0xa2, 0xb0, 0x9b, 0xbe, 0xd3, 0xca, 0xb6,
+    0xbd, 0xaf, 0xa9, 0x82, 0xa1, 0xd7, 0xbc, 0x9b, 0x8b, 0xac, 0xaa, 0xac,
+    0xad, 0x37, 0xb7, 0xb6, 0x46, 0xae, 0xa9, 0xbd, 0x6b, 0x90, 0x5e, 0xcd,
+    0x23, 0xa4, 0x76, 0xa1, 0xc4, 0x96, 0x50, 0xcc, 0x95, 0x99, 0x93, 0xa7,
+    0xb2, 0xe1, 0x7c, 0xbd, 0xbd, 0xb5, 0xbf, 0x9a, 0xca, 0x80, 0xd7, 0xae,
+    0x79, 0xa8, 0xaa, 0xb2, 0xbc, 0x51, 0xda, 0xa3, 0x80, 0x8b, 0xa2, 0xc8,
+    0xd1, 0x94, 0xe1, 0xc4, 0xbd, 0xae, 0xae, 0xcc, 0xb3, 0xca, 0xd5, 0xa1,
+    0xd5, 0xa7, 0xaf, 0xd2, 0xb4, 0x8d, 0xcc, 0xc8, 0x63, 0xa3, 0xa4, 0xdf,
+    0x6f, 0x7e, 0x98, 0xdf, 0x1b, 0x7b, 0x43, 0x99, 0xb0, 0x99, 0x71, 0xdb,
+    0x63, 0x7b, 0x69, 0x9c, 0xba, 0xcd, 0x90, 0xd0, 0xb6, 0xa6, 0x9e, 0x95,
+    0x50, 0xb6, 0xff, 0xff, 0xae, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xc7, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00,
+    0xda, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0xc0, 0x44, 0x00, 0x00,
+    0x2c, 0x30, 0x38, 0x5a, 0x3d, 0x4c, 0x44, 0x3b, 0x48, 0x48, 0x44, 0x57,
+    0x3f, 0x43, 0x45, 0x3a, 0x24, 0x32, 0x21, 0x5c, 0x3f, 0x3a, 0x38, 0x3a,
+    0x35, 0x35, 0x2f, 0x51, 0x3c, 0x3a, 0x45, 0x3a, 0x3b, 0x41, 0x39, 0x55,
+    0x3c, 0x41, 0x39, 0x44, 0x3a, 0x40, 0x37, 0x48, 0x33, 0x47, 0x36, 0x3e,
+    0x3c, 0x41, 0x3f, 0x3e, 0x3e, 0x47, 0x36, 0x3e, 0x41, 0x33, 0x3e, 0x3b,
+    0x3a, 0x46, 0x45, 0x40, 0x48, 0x3a, 0x35, 0x4b, 0x45, 0x4d, 0x3c, 0x49,
+    0x42, 0x44, 0x3c, 0x4c, 0x3e, 0x3c, 0x44, 0x32, 0x33, 0x41, 0x36, 0x4b,
+    0x38, 0x3b, 0x3c, 0x38, 0x3b, 0x45, 0x34, 0x46, 0x40, 0x4e, 0x44, 0x35,
+    0x43, 0x36, 0x3d, 0x40, 0x3e, 0x48, 0x40, 0x34, 0x3a, 0x46, 0x45, 0x43,
+    0x45, 0x3f, 0x47, 0x37, 0x36, 0x35, 0x44, 0x3a, 0x3e, 0x37, 0x39, 0x40,
+    0x3a, 0x3f, 0x3f, 0x4c, 0x3e, 0x41, 0x43, 0x35, 0x3f, 0x3d, 0x3d, 0x4c,
+    0x3c, 0x4a, 0x46, 0x3c, 0x3a, 0x41, 0x40, 0x4e, 0x36, 0x47, 0x40, 0x3b,
+    0x47, 0x42, 0x38, 0x4d, 0x48, 0x47, 0x3c, 0x3c, 0x33, 0x3b, 0x3e, 0x42,
+    0x3f, 0x3e, 0x3a, 0x3d, 0x32, 0x39, 0x41, 0x46, 0x3a, 0x3a, 0x3e, 0x3e,
+    0x47, 0x48, 0x4e, 0x36, 0x44, 0x40, 0x41, 0x45, 0x3a, 0x3c, 0x38, 0x55,
+    0x2e, 0x26, 0x2f, 0x32, 0x3f, 0x41, 0x3e, 0x4c, 0x45, 0x36, 0x40, 0x31,
+    0x17, 0x2e, 0x14, 0x53, 0x34, 0x30, 0x34, 0x3f, 0x2e, 0x44, 0x2b, 0x4e,
+    0x34, 0x3e, 0x34, 0x43, 0x3d, 0x35, 0x3f, 0x46, 0x39, 0x40, 0x38, 0x3e,
+    0x35, 0x3b, 0x35, 0x45, 0x3d, 0x40, 0x38, 0x37, 0x40, 0x3e, 0x32, 0x3e,
+    0x41, 0x39, 0x30, 0x41, 0x3a, 0x32, 0x3e, 0x3d, 0x39, 0x31, 0x33, 0x3e,
+    0x41, 0x47, 0x40, 0x47, 0x35, 0x33, 0x3c, 0x32, 0x40, 0x3c, 0x42, 0x49,
+    0x34, 0x38, 0x39, 0x37, 0x39, 0x35, 0x40, 0x4d, 0x37, 0x43, 0x42, 0x3e,
+    0x3f, 0x3c, 0x3e, 0x51, 0x36, 0x37, 0x42, 0x41, 0x36, 0x31, 0x43, 0x3d,
+    0x46, 0x43, 0x37, 0x46, 0x32, 0x45, 0x42, 0x36, 0x3f, 0x42, 0x42, 0x41,
+    0x3d, 0x46, 0x39, 0x41, 0x3c, 0x3f, 0x38, 0x3c, 0x43, 0x43, 0x3d, 0x3c,
+    0x3d, 0x41, 0x38, 0x42, 0x3a, 0x3d, 0x43, 0x42, 0x41, 0x40, 0x39, 0x36,
+    0x3a, 0x3c, 0x3c, 0x4f, 0x44, 0x36, 0x39, 0x35, 0x46, 0x46, 0x36, 0x4a,
+    0x3a, 0x42, 0x43, 0x39, 0x3f, 0x3d, 0x3c, 0x47, 0x38, 0x3f, 0x43, 0x40,
+    0x36, 0x3c, 0x45, 0x3b, 0x33, 0x36, 0x3b, 0x39, 0x3c, 0x35, 0x40, 0x38,
+    0x40, 0x3e, 0x3f, 0x48, 0x3f, 0x34, 0x40, 0x53, 0x26, 0x2c, 0x29, 0x39,
+    0x2a, 0x38, 0x3f, 0x45, 0x32, 0x31, 0x4a, 0x37, 0x1c, 0x28, 0x09, 0x43,
+    0x35, 0x3b, 0x33, 0x3c, 0x32, 0x3f, 0x28, 0x41, 0x36, 0x35, 0x3a, 0x37,
+    0x41, 0x39, 0x32, 0x3c, 0x40, 0x3c, 0x3c, 0x32, 0x38, 0x39, 0x37, 0x44,
+    0x3a, 0x33, 0x41, 0x36, 0x37, 0x3c, 0x35, 0x3a, 0x3d, 0x30, 0x3d, 0x41,
+    0x37, 0x3c, 0x45, 0x3a, 0x37, 0x2f, 0x36, 0x3c, 0x3a, 0x3d, 0x39, 0x48,
+    0x46, 0x33, 0x3a, 0x3e, 0x40, 0x3d, 0x3b, 0x52, 0x38, 0x45, 0x34, 0x47,
+    0x39, 0x36, 0x37, 0x56, 0x42, 0x3f, 0x33, 0x36, 0x38, 0x3f, 0x40, 0x53,
+    0x3e, 0x37, 0x3d, 0x3c, 0x48, 0x3a, 0x3d, 0x33, 0x39, 0x40, 0x3e, 0x35,
+    0x3d, 0x46, 0x38, 0x36, 0x37, 0x43, 0x3a, 0x3c, 0x40, 0x38, 0x39, 0x3b,
+    0x39, 0x3a, 0x42, 0x3d, 0x34, 0x3f, 0x35, 0x43, 0x3a, 0x35, 0x46, 0x3a,
+    0x48, 0x38, 0x3b, 0x48, 0x3c, 0x35, 0x42, 0x3d, 0x3a, 0x3d, 0x38, 0x42,
+    0x3e, 0x3c, 0x33, 0x39, 0x34, 0x30, 0x42, 0x44, 0x41, 0x3d, 0x3c, 0x39,
+    0x3c, 0x3a, 0x39, 0x41, 0x3d, 0x44, 0x3c, 0x40, 0x3f, 0x3e, 0x42, 0x3f,
+    0x37, 0x40, 0x39, 0x3b, 0x42, 0x43, 0x49, 0x37, 0x39, 0x46, 0x35, 0x3c,
+    0x3e, 0x39, 0x45, 0x52, 0x24, 0x2d, 0x38, 0x35, 0x3a, 0x3a, 0x3c, 0x44,
+    0x39, 0x32, 0x51, 0x3f, 0x16, 0x34, 0x0a, 0x49, 0x39, 0x38, 0x39, 0x3e,
+    0x2f, 0x36, 0x24, 0x3f, 0x37, 0x34, 0x38, 0x3b, 0x34, 0x34, 0x30, 0x3b,
+    0x3d, 0x36, 0x35, 0x42, 0x33, 0x40, 0x37, 0x35, 0x43, 0x3f, 0x3f, 0x39,
+    0x3a, 0x43, 0x36, 0x3e, 0x39, 0x3d, 0x3f, 0x3d, 0x47, 0x3b, 0x39, 0x37,
+    0x35, 0x42, 0x3f, 0x3b, 0x41, 0x3a, 0x42, 0x4b, 0x3d, 0x3f, 0x3d, 0x3e,
+    0x38, 0x3b, 0x34, 0x4e, 0x3f, 0x39, 0x36, 0x43, 0x39, 0x35, 0x41, 0x4d,
+    0x3c, 0x39, 0x43, 0x33, 0x37, 0x3b, 0x41, 0x48, 0x3c, 0x3f, 0x39, 0x32,
+    0x35, 0x3d, 0x42, 0x35, 0x3d, 0x3e, 0x37, 0x3b, 0x38, 0x3a, 0x44, 0x36,
+    0x42, 0x35, 0x48, 0x40, 0x3a, 0x44, 0x44, 0x39, 0x43, 0x41, 0x3c, 0x37,
+    0x47, 0x3b, 0x42, 0x42, 0x45, 0x3a, 0x40, 0x46, 0x35, 0x3f, 0x3a, 0x48,
+    0x35, 0x44, 0x3f, 0x37, 0x33, 0x3e, 0x45, 0x49, 0x39, 0x43, 0x47, 0x37,
+    0x3f, 0x3f, 0x3b, 0x44, 0x38, 0x3d, 0x39, 0x42, 0x37, 0x3e, 0x40, 0x45,
+    0x3b, 0x3f, 0x40, 0x34, 0x42, 0x3f, 0x43, 0x3c, 0x43, 0x41, 0x38, 0x38,
+    0x38, 0x41, 0x55, 0x33, 0x33, 0x39, 0x39, 0x3c, 0x35, 0x39, 0x38, 0x42,
+    0x27, 0x26, 0x32, 0x41, 0x41, 0x32, 0x3f, 0x47, 0x3a, 0x38, 0x48, 0x37,
+    0x11, 0x27, 0x08, 0x49, 0x35, 0x42, 0x3c, 0x2e, 0x34, 0x43, 0x25, 0x3b,
+    0x3a, 0x33, 0x37, 0x30, 0x3c, 0x36, 0x2d, 0x3c, 0x3b, 0x39, 0x3b, 0x40,
+    0x46, 0x3a, 0x30, 0x42, 0x35, 0x32, 0x36, 0x3a, 0x3a, 0x34, 0x34, 0x33,
+    0x3d, 0x30, 0x3b, 0x42, 0x41, 0x3f, 0x3d, 0x3b, 0x44, 0x3d, 0x41, 0x41,
+    0x3d, 0x3f, 0x40, 0x51, 0x42, 0x42, 0x36, 0x45, 0x30, 0x40, 0x32, 0x4f,
+    0x3a, 0x3c, 0x40, 0x39, 0x3d, 0x3b, 0x3e, 0x4b, 0x3d, 0x37, 0x42, 0x46,
+    0x40, 0x40, 0x47, 0x3d, 0x35, 0x3c, 0x3f, 0x46, 0x37, 0x37, 0x3a, 0x2e,
+    0x3d, 0x3c, 0x3a, 0x46, 0x3a, 0x44, 0x3c, 0x3a, 0x32, 0x44, 0x31, 0x41,
+    0x43, 0x36, 0x49, 0x39, 0x3d, 0x37, 0x3f, 0x41, 0x3b, 0x3b, 0x3c, 0x42,
+    0x3c, 0x34, 0x3f, 0x3b, 0x40, 0x3e, 0x48, 0x47, 0x3e, 0x3c, 0x38, 0x39,
+    0x3f, 0x35, 0x39, 0x3f, 0x3e, 0x3e, 0x3b, 0x43, 0x41, 0x40, 0x43, 0x41,
+    0x3f, 0x37, 0x39, 0x41, 0x46, 0x32, 0x3d, 0x41, 0x36, 0x3f, 0x3e, 0x3f,
+    0x36, 0x48, 0x43, 0x3d, 0x43, 0x3f, 0x34, 0x3d, 0x34, 0x35, 0x4f, 0x32,
+    0x3c, 0x3f, 0x3d, 0x3f, 0x39, 0x3c, 0x3d, 0x47, 0x23, 0x36, 0x33, 0x45,
+    0x37, 0x2e, 0x42, 0x42, 0x39, 0x34, 0x4f, 0x3f, 0x19, 0x2b, 0x01, 0x50,
+    0x35, 0x3f, 0x37, 0x3c, 0x33, 0x35, 0x25, 0x32, 0x38, 0x3e, 0x40, 0x40,
+    0x2f, 0x38, 0x35, 0x3d, 0x31, 0x42, 0x44, 0x3c, 0x3a, 0x3d, 0x2d, 0x3e,
+    0x3b, 0x3e, 0x3d, 0x31, 0x3b, 0x37, 0x35, 0x31, 0x36, 0x35, 0x34, 0x31,
+    0x41, 0x3a, 0x33, 0x32, 0x3c, 0x31, 0x3e, 0x3d, 0x40, 0x3b, 0x34, 0x45,
+    0x36, 0x39, 0x3e, 0x3f, 0x3c, 0x45, 0x37, 0x4b, 0x42, 0x3d, 0x33, 0x43,
+    0x3e, 0x40, 0x35, 0x4e, 0x38, 0x36, 0x3a, 0x33, 0x38, 0x44, 0x3f, 0x3c,
+    0x3f, 0x40, 0x3a, 0x3c, 0x3c, 0x3c, 0x44, 0x29, 0x3a, 0x40, 0x35, 0x3a,
+    0x3d, 0x48, 0x3b, 0x30, 0x45, 0x41, 0x45, 0x40, 0x37, 0x32, 0x3a, 0x35,
+    0x3f, 0x38, 0x3b, 0x43, 0x3b, 0x3f, 0x33, 0x40, 0x3b, 0x40, 0x38, 0x33,
+    0x39, 0x3c, 0x3c, 0x3f, 0x43, 0x33, 0x43, 0x40, 0x43, 0x3d, 0x33, 0x42,
+    0x40, 0x32, 0x3e, 0x36, 0x40, 0x38, 0x43, 0x40, 0x44, 0x38, 0x34, 0x3c,
+    0x3e, 0x39, 0x47, 0x43, 0x40, 0x3b, 0x3f, 0x3f, 0x3c, 0x3b, 0x4b, 0x33,
+    0x36, 0x49, 0x32, 0x41, 0x48, 0x45, 0x57, 0x3a, 0x40, 0x42, 0x40, 0x46,
+    0x36, 0x35, 0x3c, 0x46, 0x22, 0x2e, 0x33, 0x3e, 0x3c, 0x39, 0x44, 0x4d,
+    0x3f, 0x41, 0x51, 0x44, 0x15, 0x2e, 0x02, 0x4e, 0x39, 0x3a, 0x3c, 0x35,
+    0x30, 0x38, 0x1e, 0x31, 0x40, 0x3b, 0x39, 0x3d, 0x3a, 0x37, 0x35, 0x36,
+    0x46, 0x36, 0x3c, 0x3e, 0x39, 0x3e, 0x32, 0x40, 0x3b, 0x35, 0x42, 0x41,
+    0x41, 0x38, 0x41, 0x35, 0x42, 0x36, 0x3c, 0x42, 0x3d, 0x41, 0x35, 0x31,
+    0x3f, 0x44, 0x3e, 0x41, 0x3f, 0x35, 0x42, 0x4b, 0x3e, 0x36, 0x37, 0x34,
+    0x36, 0x3d, 0x40, 0x49, 0x41, 0x3e, 0x3d, 0x3b, 0x38, 0x37, 0x40, 0x47,
+    0x35, 0x32, 0x43, 0x38, 0x36, 0x3b, 0x33, 0x47, 0x33, 0x34, 0x3d, 0x47,
+    0x3c, 0x37, 0x3d, 0x2b, 0x3a, 0x36, 0x3b, 0x3d, 0x43, 0x38, 0x35, 0x32,
+    0x32, 0x37, 0x43, 0x36, 0x3f, 0x48, 0x38, 0x30, 0x3a, 0x3c, 0x42, 0x34,
+    0x37, 0x3c, 0x37, 0x40, 0x48, 0x3e, 0x35, 0x3b, 0x3f, 0x38, 0x39, 0x3e,
+    0x37, 0x35, 0x36, 0x3d, 0x3b, 0x3c, 0x40, 0x3d, 0x34, 0x40, 0x46, 0x42,
+    0x3f, 0x3c, 0x3c, 0x3e, 0x40, 0x40, 0x3d, 0x3f, 0x3f, 0x44, 0x46, 0x41,
+    0x32, 0x43, 0x40, 0x41, 0x3c, 0x42, 0x39, 0x38, 0x48, 0x44, 0x3d, 0x38,
+    0x34, 0x40, 0x4e, 0x31, 0x3c, 0x42, 0x39, 0x48, 0x3c, 0x33, 0x3e, 0x40,
+    0x20, 0x27, 0x39, 0x45, 0x45, 0x36, 0x47, 0x4c, 0x35, 0x3e, 0x4a, 0x36,
+    0x16, 0x2f, 0x04, 0x4f, 0x3a, 0x35, 0x36, 0x3a, 0x2d, 0x36, 0x21, 0x34,
+    0x3b, 0x32, 0x3d, 0x3c, 0x3c, 0x3f, 0x3b, 0x3b, 0x41, 0x46, 0x40, 0x3d,
+    0x3b, 0x44, 0x33, 0x42, 0x34, 0x33, 0x3e, 0x45, 0x3f, 0x46, 0x39, 0x33,
+    0x3b, 0x37, 0x37, 0x37, 0x42, 0x47, 0x3c, 0x35, 0x31, 0x41, 0x44, 0x3a,
+    0x3b, 0x33, 0x39, 0x44, 0x42, 0x33, 0x3d, 0x3f, 0x43, 0x33, 0x41, 0x4a,
+    0x35, 0x46, 0x36, 0x3e, 0x39, 0x41, 0x41, 0x4c, 0x34, 0x3d, 0x38, 0x33,
+    0x3c, 0x3f, 0x43, 0x44, 0x37, 0x35, 0x35, 0x3c, 0x43, 0x34, 0x3e, 0x2d,
+    0x3f, 0x35, 0x38, 0x3c, 0x33, 0x35, 0x43, 0x2a, 0x40, 0x33, 0x34, 0x40,
+    0x3d, 0x38, 0x36, 0x2d, 0x36, 0x3c, 0x43, 0x3d, 0x37, 0x3d, 0x39, 0x38,
+    0x3b, 0x3e, 0x3c, 0x46, 0x35, 0x35, 0x43, 0x44, 0x39, 0x40, 0x34, 0x39,
+    0x3d, 0x34, 0x40, 0x45, 0x38, 0x35, 0x3e, 0x39, 0x3c, 0x44, 0x48, 0x44,
+    0x41, 0x3e, 0x3c, 0x45, 0x3a, 0x3c, 0x3c, 0x46, 0x3a, 0x40, 0x39, 0x43,
+    0x35, 0x35, 0x3e, 0x45, 0x3a, 0x34, 0x3c, 0x39, 0x46, 0x3a, 0x4f, 0x35,
+    0x32, 0x3d, 0x36, 0x41, 0x32, 0x38, 0x3f, 0x45, 0x2d, 0x34, 0x2a, 0x35,
+    0x43, 0x3f, 0x41, 0x49, 0x41, 0x3c, 0x4b, 0x3f, 0x17, 0x31, 0x02, 0x4f,
+    0x30, 0x38, 0x39, 0x40, 0x33, 0x3a, 0x25, 0x38, 0x35, 0x3c, 0x39, 0x35,
+    0x34, 0x41, 0x34, 0x43, 0x40, 0x40, 0x46, 0x3d, 0x40, 0x38, 0x3f, 0x3b,
+    0x35, 0x39, 0x3c, 0x39, 0x34, 0x38, 0x3f, 0x36, 0x3a, 0x38, 0x44, 0x3f,
+    0x3f, 0x38, 0x3c, 0x33, 0x41, 0x42, 0x38, 0x33, 0x3c, 0x3b, 0x3c, 0x46,
+    0x38, 0x3b, 0x3f, 0x33, 0x3f, 0x48, 0x3b, 0x49, 0x3f, 0x3a, 0x3d, 0x3f,
+    0x47, 0x3d, 0x30, 0x45, 0x36, 0x42, 0x3d, 0x36, 0x43, 0x38, 0x3b, 0x3d,
+    0x3c, 0x30, 0x3b, 0x43, 0x3d, 0x41, 0x34, 0x2e, 0x43, 0x3d, 0x43, 0x46,
+    0x43, 0x3c, 0x3c, 0x2e, 0x3c, 0x43, 0x34, 0x43, 0x3e, 0x43, 0x3f, 0x2b,
+    0x45, 0x40, 0x3a, 0x43, 0x36, 0x39, 0x3f, 0x3d, 0x3a, 0x3c, 0x35, 0x3b,
+    0x36, 0x3f, 0x45, 0x3e, 0x45, 0x40, 0x3f, 0x36, 0x45, 0x42, 0x35, 0x3e,
+    0x3a, 0x3a, 0x3f, 0x40, 0x3e, 0x3c, 0x39, 0x46, 0x43, 0x3e, 0x3f, 0x3f,
+    0x40, 0x3c, 0x40, 0x4b, 0x41, 0x35, 0x3b, 0x3e, 0x49, 0x32, 0x3e, 0x41,
+    0x31, 0x37, 0x3d, 0x3b, 0x3f, 0x45, 0x50, 0x3a, 0x3f, 0x3c, 0x44, 0x36,
+    0x43, 0x37, 0x3d, 0x4b, 0x29, 0x39, 0x2f, 0x38, 0x45, 0x36, 0x40, 0x4e,
+    0x39, 0x3f, 0x48, 0x43, 0x23, 0x3c, 0x06, 0x51, 0x37, 0x3b, 0x3e, 0x3b,
+    0x28, 0x45, 0x2b, 0x37, 0x3f, 0x33, 0x3f, 0x41, 0x31, 0x36, 0x33, 0x3a,
+    0x3a, 0x35, 0x3b, 0x33, 0x3e, 0x36, 0x35, 0x40, 0x3a, 0x34, 0x3a, 0x38,
+    0x34, 0x3a, 0x3a, 0x34, 0x42, 0x45, 0x40, 0x3e, 0x40, 0x38, 0x39, 0x34,
+    0x38, 0x37, 0x3f, 0x3e, 0x3c, 0x32, 0x3f, 0x46, 0x3f, 0x44, 0x3b, 0x3e,
+    0x44, 0x45, 0x36, 0x3e, 0x36, 0x3f, 0x3b, 0x40, 0x39, 0x34, 0x38, 0x41,
+    0x42, 0x3e, 0x3d, 0x47, 0x3e, 0x45, 0x33, 0x40, 0x3e, 0x3a, 0x44, 0x3d,
+    0x3c, 0x3a, 0x3a, 0x2c, 0x3a, 0x3d, 0x35, 0x45, 0x3c, 0x41, 0x36, 0x30,
+    0x32, 0x32, 0x3a, 0x3b, 0x35, 0x3c, 0x43, 0x2d, 0x35, 0x3f, 0x41, 0x37,
+    0x3f, 0x46, 0x34, 0x39, 0x3c, 0x43, 0x40, 0x3e, 0x3e, 0x36, 0x3e, 0x3c,
+    0x37, 0x3a, 0x3d, 0x3a, 0x3c, 0x38, 0x44, 0x41, 0x3f, 0x3b, 0x3c, 0x47,
+    0x40, 0x3b, 0x41, 0x47, 0x3e, 0x45, 0x39, 0x3e, 0x37, 0x45, 0x4b, 0x4c,
+    0x37, 0x37, 0x37, 0x3c, 0x3c, 0x3d, 0x40, 0x38, 0x39, 0x3e, 0x43, 0x3f,
+    0x38, 0x45, 0x51, 0x3c, 0x31, 0x34, 0x3b, 0x48, 0x46, 0x41, 0x40, 0x40,
+    0x2c, 0x39, 0x32, 0x42, 0x3c, 0x2e, 0x49, 0x4d, 0x3c, 0x3f, 0x45, 0x38,
+    0x20, 0x38, 0x03, 0x55, 0x33, 0x3e, 0x32, 0x39, 0x32, 0x3b, 0x24, 0x2b,
+    0x42, 0x35, 0x45, 0x32, 0x2e, 0x3b, 0x2f, 0x3f, 0x3c, 0x37, 0x39, 0x3b,
+    0x34, 0x34, 0x3d, 0x36, 0x3d, 0x39, 0x3b, 0x30, 0x3c, 0x3e, 0x40, 0x32,
+    0x3d, 0x3c, 0x3c, 0x3e, 0x33, 0x33, 0x3f, 0x3a, 0x33, 0x3e, 0x46, 0x36,
+    0x3a, 0x3d, 0x40, 0x40, 0x3f, 0x41, 0x3a, 0x42, 0x34, 0x32, 0x34, 0x46,
+    0x3b, 0x31, 0x40, 0x37, 0x37, 0x32, 0x3e, 0x47, 0x3f, 0x3b, 0x3e, 0x43,
+    0x49, 0x45, 0x3a, 0x3d, 0x3e, 0x44, 0x40, 0x31, 0x39, 0x3e, 0x3b, 0x2d,
+    0x3b, 0x3a, 0x33, 0x3d, 0x39, 0x37, 0x3e, 0x32, 0x41, 0x3c, 0x3a, 0x37,
+    0x3b, 0x40, 0x39, 0x2f, 0x3e, 0x3f, 0x47, 0x32, 0x3e, 0x3b, 0x3e, 0x3e,
+    0x40, 0x3e, 0x40, 0x3c, 0x41, 0x39, 0x38, 0x46, 0x45, 0x32, 0x47, 0x31,
+    0x36, 0x47, 0x37, 0x49, 0x3a, 0x3f, 0x47, 0x3a, 0x41, 0x3b, 0x3c, 0x4f,
+    0x3e, 0x36, 0x3b, 0x47, 0x35, 0x39, 0x41, 0x4e, 0x3d, 0x3e, 0x3b, 0x46,
+    0x38, 0x39, 0x3b, 0x45, 0x3e, 0x3f, 0x44, 0x42, 0x44, 0x3f, 0x55, 0x3b,
+    0x41, 0x3d, 0x43, 0x43, 0x37, 0x3f, 0x3d, 0x4c, 0x28, 0x3d, 0x36, 0x3c,
+    0x3e, 0x3e, 0x48, 0x50, 0x3e, 0x39, 0x45, 0x41, 0x22, 0x37, 0x07, 0x4f,
+    0x2e, 0x33, 0x38, 0x3f, 0x31, 0x3a, 0x1b, 0x36, 0x34, 0x38, 0x3c, 0x37,
+    0x37, 0x3e, 0x36, 0x35, 0x36, 0x3b, 0x3d, 0x38, 0x42, 0x48, 0x3d, 0x40,
+    0x40, 0x44, 0x3d, 0x39, 0x37, 0x3b, 0x3d, 0x33, 0x3d, 0x35, 0x42, 0x3c,
+    0x39, 0x3e, 0x43, 0x2d, 0x3c, 0x40, 0x43, 0x43, 0x45, 0x35, 0x3c, 0x44,
+    0x34, 0x3c, 0x3d, 0x31, 0x39, 0x40, 0x39, 0x3d, 0x3e, 0x34, 0x3e, 0x3b,
+    0x40, 0x38, 0x42, 0x4a, 0x40, 0x3b, 0x35, 0x3d, 0x36, 0x38, 0x35, 0x42,
+    0x3c, 0x3c, 0x3d, 0x3b, 0x38, 0x39, 0x45, 0x28, 0x3a, 0x37, 0x37, 0x35,
+    0x3a, 0x3d, 0x35, 0x2a, 0x3c, 0x3f, 0x37, 0x34, 0x37, 0x3f, 0x3e, 0x2b,
+    0x39, 0x43, 0x3b, 0x45, 0x35, 0x36, 0x36, 0x42, 0x33, 0x38, 0x3b, 0x35,
+    0x31, 0x3f, 0x41, 0x41, 0x3c, 0x41, 0x45, 0x42, 0x3b, 0x3c, 0x39, 0x46,
+    0x3c, 0x3e, 0x3a, 0x41, 0x39, 0x3d, 0x41, 0x4b, 0x40, 0x3f, 0x43, 0x3d,
+    0x39, 0x39, 0x44, 0x44, 0x37, 0x42, 0x3f, 0x44, 0x3e, 0x37, 0x42, 0x35,
+    0x44, 0x3f, 0x40, 0x42, 0x3f, 0x3a, 0x47, 0x3d, 0x38, 0x3a, 0x3b, 0x3a,
+    0x42, 0x36, 0x3a, 0x97, 0x32, 0x31, 0x30, 0x36, 0x47, 0x3e, 0x46, 0x51,
+    0x42, 0x34, 0x50, 0x34, 0x26, 0x3b, 0x06, 0x55, 0x3c, 0x3b, 0x2d, 0x3a,
+    0x37, 0x37, 0x1b, 0x32, 0x39, 0x3d, 0x36, 0x40, 0x3b, 0x3f, 0x33, 0x33,
+    0x3d, 0x37, 0x35, 0x37, 0x44, 0x3f, 0x35, 0x39, 0x33, 0x3c, 0x43, 0x39,
+    0x3f, 0x42, 0x3e, 0x34, 0x38, 0x38, 0x39, 0x3c, 0x48, 0x3c, 0x2f, 0x30,
+    0x40, 0x3c, 0x41, 0x3e, 0x3f, 0x3e, 0x36, 0x43, 0x40, 0x3c, 0x36, 0x43,
+    0x43, 0x38, 0x3a, 0x47, 0x3e, 0x37, 0x39, 0x3a, 0x43, 0x45, 0x38, 0x43,
+    0x3b, 0x45, 0x37, 0x44, 0x36, 0x45, 0x3a, 0x3e, 0x3e, 0x3e, 0x3d, 0x33,
+    0x39, 0x36, 0x48, 0x33, 0x30, 0x42, 0x33, 0x39, 0x37, 0x3a, 0x3f, 0x34,
+    0x34, 0x40, 0x40, 0x40, 0x3f, 0x3d, 0x3f, 0x33, 0x41, 0x40, 0x3b, 0x43,
+    0x3b, 0x3a, 0x40, 0x3a, 0x38, 0x3e, 0x38, 0x3b, 0x38, 0x42, 0x40, 0x40,
+    0x41, 0x35, 0x37, 0x38, 0x3b, 0x3c, 0x39, 0x4b, 0x32, 0x39, 0x42, 0x3c,
+    0x36, 0x3d, 0x32, 0x52, 0x3a, 0x31, 0x40, 0x40, 0x3a, 0x43, 0x3d, 0x46,
+    0x3c, 0x3e, 0x3e, 0x33, 0x3f, 0x41, 0x4d, 0x37, 0x39, 0x39, 0x3e, 0x3b,
+    0x40, 0x39, 0x53, 0x2d, 0x46, 0x3c, 0x32, 0x42, 0x3d, 0x40, 0x40, 0x4d,
+    0x2e, 0x34, 0x39, 0x3b, 0x46, 0x3b, 0x42, 0x4f, 0x3d, 0x39, 0x4e, 0x36,
+    0x1a, 0x31, 0x0e, 0x56, 0x36, 0x42, 0x38, 0x44, 0x36, 0x3a, 0x20, 0x30,
+    0x36, 0x34, 0x37, 0x38, 0x40, 0x41, 0x2a, 0x35, 0x3b, 0x3b, 0x3a, 0x38,
+    0x33, 0x39, 0x36, 0x41, 0x43, 0x39, 0x35, 0x3d, 0x37, 0x3d, 0x33, 0x31,
+    0x45, 0x33, 0x3f, 0x3b, 0x44, 0x38, 0x39, 0x34, 0x38, 0x39, 0x38, 0x3d,
+    0x3a, 0x3a, 0x41, 0x40, 0x44, 0x3e, 0x3f, 0x45, 0x34, 0x31, 0x34, 0x43,
+    0x3b, 0x34, 0x42, 0x3c, 0x3c, 0x43, 0x35, 0x45, 0x36, 0x38, 0x3d, 0x3c,
+    0x3f, 0x3d, 0x3e, 0x45, 0x41, 0x43, 0x35, 0x3f, 0x40, 0x3f, 0x3a, 0x34,
+    0x3d, 0x32, 0x41, 0x3d, 0x48, 0x42, 0x37, 0x2a, 0x3c, 0x3a, 0x3e, 0x49,
+    0x38, 0x36, 0x38, 0x2e, 0x36, 0x37, 0x34, 0x3e, 0x3c, 0x43, 0x43, 0x39,
+    0x39, 0x3b, 0x44, 0x46, 0x44, 0x43, 0x37, 0x46, 0x43, 0x34, 0x3b, 0x35,
+    0x42, 0x41, 0x3f, 0x3d, 0x3d, 0x3a, 0x42, 0x3e, 0x38, 0x47, 0x3d, 0x49,
+    0x45, 0x49, 0x3a, 0x3c, 0x3e, 0x37, 0x40, 0x46, 0x41, 0x33, 0x45, 0x36,
+    0x37, 0x44, 0x49, 0x3b, 0x44, 0x40, 0x33, 0x46, 0x37, 0x39, 0x4e, 0x3a,
+    0x43, 0x38, 0x3a, 0x42, 0x3a, 0x3d, 0x45, 0x50, 0x26, 0x34, 0x3b, 0x3c,
+    0x46, 0x46, 0x4c, 0x54, 0x3f, 0x35, 0x4e, 0x47, 0x21, 0x39, 0x0e, 0x54,
+    0x3a, 0x3a, 0x2f, 0x40, 0x2d, 0x3a, 0x1f, 0x31, 0x31, 0x42, 0x34, 0x45,
+    0x37, 0x36, 0x30, 0x3b, 0x3a, 0x3a, 0x36, 0x40, 0x32, 0x36, 0x3c, 0x3c,
+    0x37, 0x42, 0x35, 0x3e, 0x39, 0x47, 0x36, 0x32, 0x41, 0x30, 0x42, 0x39,
+    0x39, 0x44, 0x37, 0x30, 0x41, 0x3b, 0x3d, 0x3d, 0x43, 0x3b, 0x38, 0x45,
+    0x3b, 0x3a, 0x39, 0x3a, 0x31, 0x33, 0x43, 0x46, 0x3f, 0x41, 0x44, 0x3f,
+    0x3b, 0x44, 0x3a, 0x4c, 0x33, 0x33, 0x33, 0x3e, 0x37, 0x3e, 0x45, 0x45,
+    0x36, 0x42, 0x3e, 0x43, 0x40, 0x34, 0x36, 0x31, 0x38, 0x34, 0x41, 0x3b,
+    0x32, 0x38, 0x3e, 0x29, 0x47, 0x33, 0x37, 0x45, 0x3c, 0x3d, 0x43, 0x2c,
+    0x36, 0x3a, 0x3c, 0x40, 0x3d, 0x46, 0x3c, 0x37, 0x40, 0x44, 0x37, 0x38,
+    0x3e, 0x41, 0x3c, 0x40, 0x33, 0x3f, 0x44, 0x32, 0x44, 0x3a, 0x43, 0x42,
+    0x3e, 0x38, 0x44, 0x3b, 0x41, 0x48, 0x3f, 0x4e, 0x3f, 0x44, 0x35, 0x45,
+    0x34, 0x3f, 0x42, 0x4b, 0x37, 0x37, 0x3e, 0x45, 0x46, 0x45, 0x46, 0x3d,
+    0x3e, 0x39, 0x3b, 0x3a, 0x46, 0x3a, 0x56, 0x35, 0x46, 0x3d, 0x40, 0x3b,
+    0x36, 0x39, 0x3f, 0x54, 0x27, 0x2b, 0x34, 0x3c, 0x48, 0x3d, 0x49, 0x4c,
+    0x3e, 0x3d, 0x4e, 0x42, 0x25, 0x3b, 0x10, 0x4d, 0x30, 0x36, 0x3e, 0x36,
+    0x2e, 0x31, 0x1d, 0x37, 0x3a, 0x39, 0x33, 0x3f, 0x39, 0x38, 0x2e, 0x36,
+    0x44, 0x3e, 0x41, 0x37, 0x3b, 0x30, 0x3b, 0x48, 0x31, 0x39, 0x41, 0x3e,
+    0x37, 0x37, 0x34, 0x2f, 0x35, 0x3b, 0x3a, 0x3e, 0x45, 0x3e, 0x3f, 0x35,
+    0x39, 0x39, 0x3b, 0x44, 0x43, 0x3c, 0x3e, 0x46, 0x40, 0x3a, 0x36, 0x45,
+    0x41, 0x40, 0x36, 0x44, 0x3a, 0x37, 0x47, 0x47, 0x3d, 0x36, 0x43, 0x4e,
+    0x3b, 0x38, 0x40, 0x48, 0x44, 0x43, 0x45, 0x3f, 0x43, 0x3c, 0x3b, 0x37,
+    0x43, 0x41, 0x39, 0x2f, 0x3d, 0x45, 0x3e, 0x3e, 0x42, 0x40, 0x41, 0x2f,
+    0x47, 0x38, 0x3a, 0x48, 0x3e, 0x35, 0x37, 0x2a, 0x34, 0x38, 0x41, 0x3b,
+    0x3d, 0x37, 0x3b, 0x35, 0x38, 0x3e, 0x41, 0x3c, 0x41, 0x43, 0x3d, 0x46,
+    0x47, 0x47, 0x3d, 0x35, 0x48, 0x41, 0x3d, 0x3e, 0x34, 0x47, 0x38, 0x38,
+    0x39, 0x3e, 0x38, 0x4d, 0x43, 0x36, 0x42, 0x40, 0x3e, 0x41, 0x3f, 0x4c,
+    0x3e, 0x3e, 0x37, 0x44, 0x3e, 0x3b, 0x47, 0x3e, 0x3f, 0x3b, 0x39, 0x3c,
+    0x3c, 0x3c, 0x53, 0x3b, 0x3b, 0x32, 0x3e, 0x3f, 0x32, 0x3c, 0x37, 0x4b,
+    0x33, 0x30, 0x2f, 0x41, 0x47, 0x42, 0x49, 0x4f, 0x3b, 0x42, 0x4c, 0x44,
+    0x1f, 0x37, 0x16, 0x4e, 0x3b, 0x3f, 0x30, 0x36, 0x35, 0x38, 0x26, 0x36,
+    0x32, 0x3b, 0x38, 0x3c, 0x30, 0x3e, 0x34, 0x3e, 0x3d, 0x34, 0x39, 0x3c,
+    0x36, 0x47, 0x34, 0x41, 0x31, 0x39, 0x44, 0x3e, 0x39, 0x41, 0x32, 0x36,
+    0x3b, 0x3f, 0x32, 0x3d, 0x36, 0x3e, 0x40, 0x3d, 0x45, 0x32, 0x45, 0x42,
+    0x38, 0x43, 0x40, 0x42, 0x34, 0x3a, 0x43, 0x38, 0x47, 0x3f, 0x41, 0x47,
+    0x34, 0x44, 0x41, 0x39, 0x3c, 0x46, 0x36, 0x4f, 0x41, 0x3e, 0x38, 0x38,
+    0x3a, 0x3b, 0x43, 0x44, 0x37, 0x3f, 0x35, 0x43, 0x34, 0x3d, 0x40, 0x32,
+    0x3a, 0x3b, 0x3d, 0x34, 0x35, 0x43, 0x31, 0x2c, 0x3b, 0x36, 0x38, 0x41,
+    0x3c, 0x38, 0x3d, 0x31, 0x45, 0x46, 0x42, 0x41, 0x33, 0x3f, 0x3f, 0x3a,
+    0x36, 0x3f, 0x3c, 0x3c, 0x3c, 0x3e, 0x39, 0x3e, 0x40, 0x37, 0x47, 0x3e,
+    0x35, 0x39, 0x3d, 0x3d, 0x37, 0x36, 0x3e, 0x45, 0x38, 0x3d, 0x45, 0x43,
+    0x3a, 0x32, 0x3b, 0x3a, 0x32, 0x3c, 0x3d, 0x43, 0x3d, 0x33, 0x3b, 0x3d,
+    0x46, 0x3a, 0x44, 0x45, 0x3b, 0x3e, 0x3c, 0x42, 0x37, 0x37, 0x52, 0x2a,
+    0x3a, 0x35, 0x35, 0x3f, 0x40, 0x38, 0x40, 0x5b, 0x35, 0x32, 0x2b, 0x3d,
+    0x4a, 0x3c, 0x46, 0x56, 0x44, 0x30, 0x4d, 0x39, 0x20, 0x32, 0x0f, 0x4f,
+    0x33, 0x3c, 0x35, 0x35, 0x3a, 0x45, 0x29, 0x3b, 0x31, 0x38, 0x34, 0x38,
+    0x42, 0x45, 0x37, 0x3e, 0x37, 0x2e, 0x36, 0x43, 0x3f, 0x38, 0x2f, 0x41,
+    0x3f, 0x41, 0x3c, 0x31, 0x37, 0x36, 0x37, 0x39, 0x41, 0x3a, 0x3a, 0x40,
+    0x3e, 0x47, 0x3d, 0x37, 0x3c, 0x38, 0x35, 0x39, 0x3a, 0x43, 0x3f, 0x42,
+    0x42, 0x38, 0x3e, 0x40, 0x3c, 0x3a, 0x45, 0x48, 0x37, 0x3a, 0x3e, 0x35,
+    0x3a, 0x3d, 0x45, 0x4a, 0x3d, 0x37, 0x38, 0x3a, 0x3d, 0x46, 0x46, 0x41,
+    0x37, 0x41, 0x40, 0x48, 0x37, 0x34, 0x3b, 0x2c, 0x39, 0x34, 0x37, 0x35,
+    0x3a, 0x43, 0x39, 0x2e, 0x39, 0x3f, 0x40, 0x3e, 0x40, 0x40, 0x3c, 0x2d,
+    0x3e, 0x3c, 0x37, 0x39, 0x3c, 0x3b, 0x3d, 0x3f, 0x41, 0x48, 0x3b, 0x3d,
+    0x3b, 0x41, 0x45, 0x3e, 0x3a, 0x38, 0x3f, 0x3c, 0x3d, 0x3e, 0x40, 0x42,
+    0x46, 0x38, 0x43, 0x34, 0x35, 0x47, 0x3d, 0x46, 0x3f, 0x3e, 0x32, 0x3f,
+    0x3e, 0x3d, 0x47, 0x46, 0x38, 0x41, 0x45, 0x3f, 0x34, 0x3f, 0x41, 0x43,
+    0x3e, 0x3e, 0x44, 0x3b, 0x3b, 0x36, 0x51, 0x32, 0x37, 0x3c, 0x42, 0x43,
+    0x33, 0x39, 0x42, 0x61, 0x2c, 0x3b, 0x2e, 0x39, 0x42, 0x39, 0x42, 0x54,
+    0x3c, 0x3a, 0x48, 0x35, 0x26, 0x34, 0x15, 0x51, 0x35, 0x40, 0x36, 0x3c,
+    0x2d, 0x37, 0x25, 0x38, 0x33, 0x3d, 0x3d, 0x39, 0x3e, 0x3b, 0x2e, 0x4b,
+    0x3d, 0x3b, 0x42, 0x37, 0x37, 0x40, 0x37, 0x40, 0x35, 0x45, 0x37, 0x37,
+    0x3f, 0x41, 0x36, 0x39, 0x3c, 0x32, 0x3e, 0x38, 0x41, 0x40, 0x3e, 0x3f,
+    0x3b, 0x3c, 0x43, 0x35, 0x3e, 0x3d, 0x44, 0x44, 0x3a, 0x36, 0x39, 0x3f,
+    0x3a, 0x31, 0x42, 0x4d, 0x40, 0x33, 0x40, 0x45, 0x44, 0x3d, 0x40, 0x49,
+    0x41, 0x3f, 0x42, 0x3a, 0x34, 0x46, 0x38, 0x46, 0x42, 0x34, 0x3a, 0x40,
+    0x40, 0x41, 0x3d, 0x32, 0x35, 0x48, 0x35, 0x3e, 0x44, 0x41, 0x40, 0x2c,
+    0x46, 0x38, 0x38, 0x3f, 0x36, 0x40, 0x38, 0x2a, 0x43, 0x41, 0x3e, 0x35,
+    0x46, 0x3a, 0x45, 0x46, 0x46, 0x42, 0x3a, 0x3b, 0x40, 0x38, 0x35, 0x43,
+    0x38, 0x3d, 0x3b, 0x41, 0x36, 0x44, 0x3f, 0x3f, 0x34, 0x3e, 0x3c, 0x3d,
+    0x49, 0x36, 0x37, 0x4b, 0x38, 0x3c, 0x43, 0x37, 0x3a, 0x3f, 0x31, 0x45,
+    0x3b, 0x39, 0x3f, 0x40, 0x37, 0x3c, 0x42, 0x3f, 0x3c, 0x33, 0x40, 0x3b,
+    0x32, 0x3c, 0x52, 0x31, 0x3d, 0x44, 0x3b, 0x31, 0x46, 0x38, 0x40, 0x60,
+    0x2b, 0x3c, 0x37, 0x34, 0x43, 0x38, 0x45, 0x57, 0x37, 0x39, 0x49, 0x33,
+    0x2d, 0x3f, 0x18, 0x4e, 0x39, 0x39, 0x32, 0x3b, 0x34, 0x3b, 0x2c, 0x45,
+    0x33, 0x37, 0x45, 0x42, 0x3d, 0x37, 0x2a, 0x4c, 0x3d, 0x3f, 0x3c, 0x36,
+    0x37, 0x3c, 0x39, 0x47, 0x3d, 0x44, 0x3d, 0x40, 0x3d, 0x41, 0x34, 0x3e,
+    0x40, 0x34, 0x3b, 0x3a, 0x41, 0x36, 0x37, 0x40, 0x3e, 0x3f, 0x3a, 0x36,
+    0x3e, 0x35, 0x3b, 0x48, 0x41, 0x40, 0x3c, 0x42, 0x34, 0x41, 0x3f, 0x44,
+    0x34, 0x39, 0x33, 0x39, 0x39, 0x47, 0x40, 0x48, 0x38, 0x3a, 0x43, 0x43,
+    0x48, 0x3a, 0x3f, 0x46, 0x35, 0x3a, 0x33, 0x36, 0x32, 0x3c, 0x40, 0x34,
+    0x40, 0x3a, 0x42, 0x3a, 0x39, 0x38, 0x41, 0x35, 0x3a, 0x3f, 0x35, 0x40,
+    0x3f, 0x39, 0x39, 0x36, 0x38, 0x40, 0x3e, 0x3e, 0x3a, 0x31, 0x32, 0x44,
+    0x40, 0x47, 0x3a, 0x3c, 0x43, 0x43, 0x46, 0x48, 0x40, 0x35, 0x3d, 0x37,
+    0x44, 0x37, 0x33, 0x44, 0x3b, 0x3e, 0x3f, 0x37, 0x36, 0x3a, 0x38, 0x47,
+    0x3a, 0x44, 0x36, 0x42, 0x3e, 0x44, 0x34, 0x46, 0x33, 0x43, 0x44, 0x3e,
+    0x30, 0x48, 0x37, 0x38, 0x33, 0x3c, 0x46, 0x42, 0x38, 0x3d, 0x50, 0x39,
+    0x33, 0x38, 0x3e, 0x40, 0x3b, 0x2b, 0x3b, 0x5f, 0x2b, 0x32, 0x2f, 0x37,
+    0x3f, 0x3a, 0x40, 0x4e, 0x34, 0x38, 0x47, 0x37, 0x27, 0x2b, 0x1b, 0x4f,
+    0x36, 0x38, 0x3a, 0x3a, 0x3b, 0x38, 0x2e, 0x3f, 0x3f, 0x42, 0x42, 0x42,
+    0x36, 0x3e, 0x3c, 0x55, 0x39, 0x40, 0x44, 0x43, 0x3e, 0x33, 0x3c, 0x43,
+    0x38, 0x44, 0x3b, 0x46, 0x3f, 0x45, 0x34, 0x38, 0x3c, 0x41, 0x42, 0x3d,
+    0x42, 0x36, 0x43, 0x3f, 0x3c, 0x39, 0x3e, 0x39, 0x39, 0x42, 0x33, 0x47,
+    0x36, 0x3d, 0x3f, 0x3b, 0x40, 0x39, 0x3b, 0x49, 0x36, 0x40, 0x3d, 0x41,
+    0x40, 0x34, 0x3b, 0x4e, 0x3b, 0x36, 0x3b, 0x45, 0x40, 0x32, 0x3b, 0x49,
+    0x37, 0x38, 0x3a, 0x47, 0x37, 0x40, 0x3e, 0x38, 0x40, 0x3f, 0x3c, 0x3a,
+    0x47, 0x41, 0x42, 0x30, 0x40, 0x3c, 0x42, 0x3f, 0x31, 0x44, 0x39, 0x38,
+    0x3b, 0x38, 0x42, 0x43, 0x41, 0x35, 0x3a, 0x39, 0x3e, 0x38, 0x39, 0x3e,
+    0x3c, 0x42, 0x3d, 0x49, 0x47, 0x3c, 0x3f, 0x35, 0x41, 0x3a, 0x36, 0x43,
+    0x43, 0x3b, 0x39, 0x3b, 0x36, 0x43, 0x43, 0x4e, 0x3e, 0x35, 0x37, 0x3b,
+    0x3f, 0x37, 0x41, 0x48, 0x32, 0x44, 0x43, 0x32, 0x38, 0x39, 0x45, 0x39,
+    0x3e, 0x3d, 0x35, 0x39, 0x35, 0x39, 0x50, 0x37, 0x39, 0x40, 0x43, 0x47,
+    0x32, 0x2a, 0x40, 0x62, 0x24, 0x30, 0x36, 0x3e, 0x41, 0x32, 0x47, 0x58,
+    0x39, 0x36, 0x44, 0x34, 0x26, 0x34, 0x1e, 0x50, 0x3c, 0x3b, 0x3f, 0x42,
+    0x35, 0x3d, 0x2a, 0x4e, 0x40, 0x38, 0x36, 0x31, 0x3a, 0x30, 0x37, 0x4b,
+    0x3c, 0x3b, 0x3b, 0x41, 0x3b, 0x3c, 0x2e, 0x45, 0x44, 0x3f, 0x3b, 0x35,
+    0x3e, 0x33, 0x37, 0x3d, 0x40, 0x39, 0x39, 0x37, 0x40, 0x3e, 0x3a, 0x3e,
+    0x3c, 0x3c, 0x45, 0x40, 0x3c, 0x3f, 0x3a, 0x51, 0x47, 0x3a, 0x34, 0x39,
+    0x3b, 0x34, 0x44, 0x4c, 0x36, 0x3d, 0x3a, 0x35, 0x34, 0x36, 0x38, 0x4b,
+    0x3f, 0x40, 0x3f, 0x3e, 0x40, 0x41, 0x47, 0x43, 0x32, 0x38, 0x46, 0x44,
+    0x46, 0x43, 0x43, 0x37, 0x39, 0x49, 0x37, 0x36, 0x3e, 0x3d, 0x37, 0x3c,
+    0x39, 0x37, 0x34, 0x43, 0x45, 0x32, 0x3a, 0x3a, 0x38, 0x43, 0x3b, 0x40,
+    0x3b, 0x3f, 0x3d, 0x41, 0x40, 0x3d, 0x3a, 0x3b, 0x48, 0x37, 0x3d, 0x41,
+    0x40, 0x3e, 0x38, 0x41, 0x3d, 0x3a, 0x38, 0x49, 0x40, 0x3c, 0x42, 0x41,
+    0x3a, 0x38, 0x38, 0x4c, 0x3e, 0x41, 0x40, 0x3b, 0x3d, 0x3e, 0x3c, 0x46,
+    0x3e, 0x42, 0x41, 0x38, 0x42, 0x42, 0x41, 0x3e, 0x3e, 0x37, 0x3c, 0x43,
+    0x43, 0x3b, 0x54, 0x2b, 0x45, 0x3b, 0x43, 0x41, 0x41, 0x26, 0x3f, 0x60,
+    0x25, 0x2b, 0x2e, 0x3a, 0x40, 0x31, 0x40, 0x49, 0x40, 0x31, 0x46, 0x3c,
+    0x1e, 0x2a, 0x1a, 0x47, 0x33, 0x37, 0x37, 0x34, 0x31, 0x36, 0x25, 0x41,
+    0x2e, 0x36, 0x35, 0x33, 0x33, 0x34, 0x31, 0x45, 0x3a, 0x3f, 0x3d, 0x40,
+    0x3c, 0x41, 0x30, 0x3c, 0x3f, 0x46, 0x37, 0x3c, 0x3a, 0x3c, 0x36, 0x3a,
+    0x47, 0x3d, 0x31, 0x3f, 0x40, 0x3e, 0x36, 0x44, 0x41, 0x3d, 0x36, 0x3f,
+    0x37, 0x3f, 0x34, 0x4b, 0x31, 0x47, 0x43, 0x3e, 0x3e, 0x3a, 0x3b, 0x4b,
+    0x37, 0x32, 0x38, 0x3d, 0x37, 0x47, 0x46, 0x4d, 0x36, 0x3c, 0x3f, 0x3a,
+    0x41, 0x31, 0x47, 0x43, 0x3d, 0x3d, 0x3e, 0x35, 0x3d, 0x46, 0x49, 0x2a,
+    0x37, 0x3c, 0x39, 0x3d, 0x47, 0x3c, 0x34, 0x2c, 0x3e, 0x38, 0x47, 0x32,
+    0x36, 0x36, 0x41, 0x38, 0x35, 0x44, 0x48, 0x3b, 0x39, 0x3e, 0x38, 0x3e,
+    0x40, 0x36, 0x37, 0x46, 0x39, 0x3b, 0x34, 0x45, 0x40, 0x3b, 0x48, 0x36,
+    0x34, 0x44, 0x37, 0x46, 0x3f, 0x42, 0x33, 0x36, 0x43, 0x3c, 0x41, 0x46,
+    0x31, 0x42, 0x43, 0x44, 0x44, 0x3e, 0x42, 0x3b, 0x3b, 0x3a, 0x3c, 0x37,
+    0x42, 0x41, 0x46, 0x38, 0x41, 0x3b, 0x40, 0x44, 0x37, 0x3c, 0x4c, 0x2e,
+    0x3a, 0x3e, 0x3b, 0x36, 0x33, 0x27, 0x37, 0x5d, 0x27, 0x34, 0x32, 0x41,
+    0x41, 0x3f, 0x40, 0x5d, 0x40, 0x3d, 0x48, 0x39, 0x2e, 0x30, 0x1f, 0x3f,
+    0x38, 0x3f, 0x40, 0x33, 0x40, 0x38, 0x31, 0x3f, 0x42, 0x3e, 0x3b, 0x3a,
+    0x42, 0x36, 0x3a, 0x42, 0x3c, 0x3b, 0x3d, 0x41, 0x3d, 0x40, 0x40, 0x3e,
+    0x36, 0x41, 0x47, 0x3d, 0x33, 0x32, 0x33, 0x44, 0x3e, 0x3a, 0x3e, 0x3d,
+    0x45, 0x3f, 0x38, 0x3f, 0x40, 0x3a, 0x3c, 0x46, 0x32, 0x42, 0x3c, 0x51,
+    0x33, 0x38, 0x3a, 0x38, 0x41, 0x34, 0x45, 0x4e, 0x35, 0x3c, 0x42, 0x3e,
+    0x3f, 0x45, 0x44, 0x4e, 0x39, 0x47, 0x3a, 0x33, 0x3e, 0x3b, 0x45, 0x42,
+    0x37, 0x3a, 0x3e, 0x33, 0x41, 0x48, 0x32, 0x2a, 0x3b, 0x37, 0x3f, 0x3d,
+    0x3a, 0x42, 0x41, 0x2f, 0x34, 0x3e, 0x49, 0x3b, 0x38, 0x3e, 0x3d, 0x3a,
+    0x37, 0x3c, 0x44, 0x41, 0x39, 0x42, 0x3f, 0x39, 0x40, 0x35, 0x3d, 0x41,
+    0x3b, 0x45, 0x44, 0x48, 0x3d, 0x42, 0x36, 0x33, 0x3e, 0x44, 0x3f, 0x41,
+    0x42, 0x40, 0x49, 0x34, 0x48, 0x41, 0x3f, 0x40, 0x3c, 0x45, 0x47, 0x34,
+    0x41, 0x37, 0x47, 0x3e, 0x41, 0x41, 0x39, 0x42, 0x3f, 0x3a, 0x46, 0x33,
+    0x39, 0x41, 0x38, 0x38, 0x3e, 0x42, 0x41, 0x38, 0x35, 0x32, 0x33, 0x38,
+    0x3a, 0x3f, 0x45, 0x66, 0x33, 0x47, 0x38, 0x3c, 0x41, 0x2f, 0x48, 0x55,
+    0x33, 0x3e, 0x49, 0x3b, 0x3c, 0x30, 0x24, 0x45, 0x3c, 0x44, 0x43, 0x32,
+    0x3d, 0x3f, 0x35, 0x3b, 0x3e, 0x36, 0x38, 0x3a, 0x36, 0x37, 0x3b, 0x41,
+    0x38, 0x42, 0x3e, 0x43, 0x39, 0x3f, 0x3c, 0x40, 0x37, 0x43, 0x3e, 0x3b,
+    0x3d, 0x35, 0x35, 0x3d, 0x43, 0x3f, 0x3a, 0x35, 0x37, 0x3c, 0x31, 0x47,
+    0x44, 0x45, 0x40, 0x32, 0x44, 0x36, 0x38, 0x51, 0x3c, 0x41, 0x45, 0x37,
+    0x39, 0x44, 0x3e, 0x4f, 0x3c, 0x3a, 0x38, 0x40, 0x3f, 0x34, 0x39, 0x4e,
+    0x3d, 0x39, 0x45, 0x3f, 0x3e, 0x3c, 0x3b, 0x42, 0x3b, 0x3b, 0x34, 0x3d,
+    0x41, 0x44, 0x39, 0x2e, 0x37, 0x44, 0x45, 0x37, 0x3d, 0x41, 0x3f, 0x33,
+    0x3f, 0x3e, 0x3e, 0x40, 0x44, 0x3f, 0x37, 0x32, 0x35, 0x3e, 0x43, 0x41,
+    0x39, 0x37, 0x35, 0x3f, 0x48, 0x3d, 0x43, 0x49, 0x38, 0x35, 0x3f, 0x48,
+    0x3b, 0x3a, 0x34, 0x3f, 0x3c, 0x44, 0x3a, 0x40, 0x36, 0x35, 0x44, 0x36,
+    0x44, 0x3b, 0x3d, 0x38, 0x3c, 0x44, 0x47, 0x3a, 0x3b, 0x45, 0x41, 0x3a,
+    0x39, 0x35, 0x44, 0x3a, 0x49, 0x36, 0x48, 0x31, 0x42, 0x43, 0x42, 0x34,
+    0x41, 0x40, 0x4d, 0x36, 0x3e, 0x35, 0x39, 0x3b, 0x3f, 0x41, 0x38, 0x39,
+    0x3c, 0x44, 0x3f, 0x39, 0x3a, 0x36, 0x3d, 0x36, 0x3a, 0x3a, 0x34, 0x3b,
+    0x38, 0x2f, 0x40, 0x34, 0x32, 0x4d, 0x43, 0x45, 0x4e, 0x3f, 0x48, 0x35,
+    0x3b, 0x4d, 0x4f, 0x39, 0x42, 0x36, 0x46, 0x36, 0x4a, 0x3c, 0x37, 0x41,
+    0x40, 0x43, 0x50, 0x36, 0x3e, 0x39, 0x44, 0x40, 0x36, 0x47, 0x3f, 0x36,
+    0x45, 0x40, 0x45, 0x41, 0x3b, 0x37, 0x41, 0x39, 0x3b, 0x48, 0x37, 0x34,
+    0x41, 0x45, 0x49, 0x3f, 0x39, 0x49, 0x3f, 0x3a, 0x42, 0x34, 0x38, 0x37,
+    0x44, 0x34, 0x3c, 0x3d, 0x40, 0x47, 0x3a, 0x36, 0x3f, 0x3c, 0x41, 0x3e,
+    0x47, 0x46, 0x46, 0x43, 0x3f, 0x38, 0x3b, 0x40, 0x3f, 0x48, 0x3b, 0x4c,
+    0x3d, 0x4b, 0x34, 0x3b, 0x44, 0x43, 0x3c, 0x49, 0x38, 0x42, 0x41, 0x36,
+    0x33, 0x36, 0x40, 0x46, 0x40, 0x3a, 0x42, 0x3c, 0x3d, 0x35, 0x3c, 0x52,
+    0x3e, 0x40, 0x43, 0x43, 0x41, 0x3b, 0x3e, 0x44, 0x3f, 0x40, 0x40, 0x43,
+    0x3d, 0x3f, 0x36, 0x42, 0x3f, 0x3c, 0x34, 0x3d, 0x33, 0x41, 0x3c, 0x39,
+    0x34, 0x43, 0x3f, 0x34, 0x3c, 0x3a, 0x3a, 0x37, 0x42, 0x41, 0x40, 0x3e,
+    0x3d, 0x3c, 0x41, 0x3c, 0x38, 0x33, 0x49, 0x46, 0x40, 0x40, 0x3a, 0x46,
+    0x38, 0x3c, 0x37, 0x34, 0x3e, 0x3d, 0x32, 0x38, 0x3c, 0x4c, 0x3a, 0x34,
+    0x35, 0x32, 0x39, 0x40, 0x3a, 0x58, 0x40, 0x46, 0x42, 0x33, 0x45, 0x39,
+    0x34, 0x4f, 0x53, 0x45, 0x43, 0x3e, 0x41, 0x36, 0x3e, 0x3f, 0x40, 0x47,
+    0x4e, 0x3d, 0x53, 0x2b, 0x41, 0x36, 0x3e, 0x38, 0x47, 0x41, 0x3f, 0x34,
+    0x47, 0x40, 0x38, 0x39, 0x3d, 0x42, 0x3f, 0x3c, 0x48, 0x3a, 0x35, 0x3c,
+    0x45, 0x49, 0x3c, 0x33, 0x33, 0x3f, 0x3c, 0x46, 0x43, 0x3f, 0x45, 0x31,
+    0x35, 0x43, 0x46, 0x3a, 0x45, 0x3c, 0x37, 0x3a, 0x37, 0x36, 0x35, 0x3f,
+    0x38, 0x49, 0x34, 0x3f, 0x3c, 0x42, 0x49, 0x3e, 0x3e, 0x3c, 0x39, 0x49,
+    0x3e, 0x3c, 0x3b, 0x43, 0x44, 0x45, 0x39, 0x4b, 0x47, 0x47, 0x3e, 0x33,
+    0x3c, 0x31, 0x34, 0x4f, 0x45, 0x43, 0x40, 0x3d, 0x42, 0x3b, 0x43, 0x50,
+    0x3c, 0x3b, 0x37, 0x42, 0x47, 0x42, 0x3e, 0x4a, 0x3f, 0x3a, 0x48, 0x3d,
+    0x48, 0x45, 0x3e, 0x40, 0x3a, 0x3c, 0x3d, 0x39, 0x41, 0x42, 0x3c, 0x42,
+    0x43, 0x3c, 0x3b, 0x3d, 0x47, 0x49, 0x38, 0x3c, 0x46, 0x3a, 0x3c, 0x3f,
+    0x3a, 0x46, 0x3a, 0x3b, 0x3d, 0x3a, 0x49, 0x46, 0x38, 0x40, 0x3e, 0x38,
+    0x37, 0x32, 0x40, 0x3c, 0x42, 0x3d, 0x3b, 0x40, 0x3a, 0x38, 0x49, 0x33,
+    0x40, 0x38, 0x2b, 0x3a, 0x3c, 0x4f, 0x4d, 0x3e, 0x35, 0x3d, 0x3b, 0x40,
+    0x3a, 0x54, 0x3e, 0x3e, 0x43, 0x30, 0x47, 0x3d, 0x3b, 0x53, 0x52, 0x4a,
+    0x43, 0x41, 0x49, 0x37, 0x3b, 0x35, 0x44, 0x3c, 0x45, 0x40, 0x4f, 0x36,
+    0x4b, 0x42, 0x41, 0x3a, 0x41, 0x44, 0x47, 0x32, 0x43, 0x35, 0x3f, 0x37,
+    0x43, 0x41, 0x43, 0x36, 0x3f, 0x3b, 0x3d, 0x38, 0x3d, 0x40, 0x42, 0x36,
+    0x44, 0x3a, 0x39, 0x47, 0x37, 0x34, 0x42, 0x3a, 0x37, 0x38, 0x37, 0x3f,
+    0x36, 0x3b, 0x45, 0x3f, 0x3f, 0x3d, 0x39, 0x3d, 0x39, 0x41, 0x37, 0x3f,
+    0x3f, 0x3d, 0x3f, 0x41, 0x43, 0x41, 0x45, 0x43, 0x41, 0x3c, 0x3e, 0x40,
+    0x40, 0x39, 0x41, 0x4f, 0x47, 0x42, 0x46, 0x48, 0x3b, 0x3b, 0x3c, 0x46,
+    0x47, 0x3e, 0x46, 0x37, 0x38, 0x3d, 0x38, 0x52, 0x36, 0x46, 0x3c, 0x3a,
+    0x3b, 0x37, 0x48, 0x4b, 0x3f, 0x42, 0x3c, 0x36, 0x40, 0x37, 0x33, 0x4c,
+    0x39, 0x34, 0x41, 0x34, 0x3f, 0x3b, 0x35, 0x4b, 0x3b, 0x45, 0x43, 0x31,
+    0x3e, 0x39, 0x30, 0x3d, 0x32, 0x43, 0x44, 0x3c, 0x3e, 0x38, 0x43, 0x41,
+    0x3e, 0x37, 0x41, 0x39, 0x39, 0x44, 0x43, 0x38, 0x3f, 0x37, 0x48, 0x3f,
+    0x3b, 0x44, 0x37, 0x3f, 0x3a, 0x3f, 0x3b, 0x33, 0x42, 0x3e, 0x2f, 0x42,
+    0x44, 0x4f, 0x52, 0x3c, 0x34, 0x33, 0x39, 0x46, 0x31, 0x55, 0x43, 0x4e,
+    0x49, 0x38, 0x4d, 0x48, 0x34, 0x4d, 0x5c, 0x4d, 0x49, 0x37, 0x4f, 0x40,
+    0x3c, 0x3d, 0x41, 0x42, 0x3f, 0x51, 0x4b, 0x2f, 0x46, 0x35, 0x39, 0x3c,
+    0x49, 0x3d, 0x4e, 0x32, 0x43, 0x47, 0x31, 0x3e, 0x42, 0x4a, 0x4c, 0x39,
+    0x43, 0x46, 0x3e, 0x3f, 0x44, 0x3c, 0x42, 0x30, 0x3e, 0x34, 0x3b, 0x3b,
+    0x3a, 0x3c, 0x42, 0x3d, 0x3d, 0x48, 0x48, 0x36, 0x3a, 0x45, 0x38, 0x40,
+    0x3c, 0x41, 0x3f, 0x49, 0x42, 0x41, 0x38, 0x3d, 0x3d, 0x44, 0x3b, 0x3d,
+    0x35, 0x48, 0x43, 0x3b, 0x32, 0x41, 0x3e, 0x3a, 0x46, 0x41, 0x40, 0x54,
+    0x38, 0x3f, 0x3c, 0x36, 0x3b, 0x36, 0x43, 0x50, 0x38, 0x3c, 0x44, 0x3b,
+    0x43, 0x47, 0x32, 0x50, 0x3d, 0x46, 0x3d, 0x3b, 0x39, 0x37, 0x3b, 0x4a,
+    0x47, 0x43, 0x46, 0x3d, 0x3d, 0x41, 0x43, 0x45, 0x3b, 0x3c, 0x39, 0x47,
+    0x43, 0x42, 0x39, 0x4c, 0x34, 0x41, 0x45, 0x3b, 0x38, 0x3e, 0x37, 0x3f,
+    0x45, 0x43, 0x39, 0x42, 0x3c, 0x3d, 0x3d, 0x3c, 0x48, 0x39, 0x3b, 0x3a,
+    0x46, 0x45, 0x3d, 0x3a, 0x3f, 0x3a, 0x45, 0x36, 0x3d, 0x43, 0x36, 0x43,
+    0x42, 0x3d, 0x41, 0x3f, 0x3a, 0x3f, 0x31, 0x37, 0x48, 0x4f, 0x4e, 0x36,
+    0x30, 0x3a, 0x3e, 0x3e, 0x38, 0x57, 0x40, 0x47, 0x47, 0x38, 0x4f, 0x46,
+    0x3d, 0x4a, 0x50, 0x4c, 0x42, 0x3b, 0x4d, 0x3d, 0x3d, 0x33, 0x40, 0x41,
+    0x48, 0x4b, 0x46, 0x39, 0x4d, 0x30, 0x45, 0x38, 0x48, 0x3c, 0x48, 0x3b,
+    0x4d, 0x40, 0x3b, 0x40, 0x46, 0x41, 0x51, 0x34, 0x40, 0x43, 0x3f, 0x42,
+    0x45, 0x42, 0x3e, 0x35, 0x3d, 0x38, 0x37, 0x3a, 0x42, 0x40, 0x43, 0x3c,
+    0x3c, 0x3d, 0x43, 0x40, 0x45, 0x3a, 0x3e, 0x3a, 0x3e, 0x40, 0x43, 0x35,
+    0x37, 0x3f, 0x3f, 0x3e, 0x39, 0x3f, 0x47, 0x38, 0x3e, 0x44, 0x3b, 0x3c,
+    0x3b, 0x32, 0x40, 0x3e, 0x42, 0x45, 0x3a, 0x52, 0x3a, 0x3e, 0x45, 0x40,
+    0x41, 0x48, 0x3f, 0x4e, 0x3e, 0x42, 0x3d, 0x39, 0x3a, 0x33, 0x3f, 0x4b,
+    0x3e, 0x38, 0x36, 0x3e, 0x31, 0x41, 0x3a, 0x40, 0x3b, 0x37, 0x3f, 0x3e,
+    0x3e, 0x3f, 0x35, 0x44, 0x3d, 0x42, 0x3d, 0x44, 0x42, 0x3f, 0x3e, 0x44,
+    0x3e, 0x45, 0x37, 0x3a, 0x3b, 0x42, 0x3f, 0x41, 0x3b, 0x3f, 0x41, 0x41,
+    0x3e, 0x34, 0x47, 0x39, 0x46, 0x46, 0x37, 0x39, 0x3f, 0x45, 0x39, 0x39,
+    0x3a, 0x40, 0x38, 0x3a, 0x31, 0x34, 0x3a, 0x41, 0x38, 0x41, 0x3a, 0x41,
+    0x44, 0x37, 0x2d, 0x41, 0x43, 0x4d, 0x4b, 0x3b, 0x2c, 0x30, 0x42, 0x3b,
+    0x31, 0x56, 0x43, 0x47, 0x47, 0x38, 0x50, 0x44, 0x40, 0x52, 0x5a, 0x50,
+    0x44, 0x3f, 0x4b, 0x35, 0x3a, 0x36, 0x41, 0x44, 0x47, 0x4e, 0x52, 0x36,
+    0x45, 0x39, 0x38, 0x3c, 0x42, 0x44, 0x40, 0x3b, 0x4b, 0x38, 0x35, 0x35,
+    0x3f, 0x40, 0x4f, 0x39, 0x3d, 0x37, 0x34, 0x3e, 0x41, 0x4c, 0x40, 0x37,
+    0x3d, 0x3b, 0x37, 0x37, 0x40, 0x42, 0x35, 0x39, 0x41, 0x42, 0x3d, 0x34,
+    0x3c, 0x37, 0x3a, 0x3d, 0x46, 0x46, 0x46, 0x3f, 0x44, 0x3d, 0x3c, 0x40,
+    0x3c, 0x3a, 0x3d, 0x3b, 0x3b, 0x41, 0x47, 0x3a, 0x43, 0x43, 0x43, 0x3b,
+    0x3e, 0x3e, 0x42, 0x46, 0x36, 0x37, 0x45, 0x35, 0x3c, 0x3b, 0x31, 0x4b,
+    0x3c, 0x3e, 0x3a, 0x3a, 0x42, 0x42, 0x34, 0x47, 0x37, 0x34, 0x41, 0x3d,
+    0x3e, 0x39, 0x43, 0x47, 0x31, 0x3b, 0x40, 0x3b, 0x42, 0x3d, 0x44, 0x44,
+    0x37, 0x39, 0x44, 0x3b, 0x40, 0x3a, 0x3d, 0x44, 0x3c, 0x40, 0x42, 0x3b,
+    0x40, 0x3e, 0x32, 0x3d, 0x3c, 0x3e, 0x44, 0x3e, 0x47, 0x3d, 0x3f, 0x2e,
+    0x3e, 0x3d, 0x3f, 0x3b, 0x3b, 0x43, 0x43, 0x3c, 0x3a, 0x3c, 0x3a, 0x36,
+    0x38, 0x46, 0x30, 0x3e, 0x3f, 0x35, 0x3e, 0x34, 0x3c, 0x34, 0x32, 0x4a,
+    0x41, 0x48, 0x48, 0x3f, 0x34, 0x37, 0x42, 0x43, 0x36, 0x59, 0x42, 0x3f,
+    0x4b, 0x3d, 0x5d, 0x45, 0x3b, 0x51, 0x51, 0x4c, 0x41, 0x40, 0x4d, 0x36,
+    0x3f, 0x34, 0x39, 0x3d, 0x4a, 0x4b, 0x4f, 0x33, 0x48, 0x32, 0x3c, 0x32,
+    0x48, 0x4c, 0x4d, 0x3a, 0x49, 0x3a, 0x3a, 0x2e, 0x4b, 0x44, 0x4f, 0x33,
+    0x3a, 0x48, 0x34, 0x43, 0x38, 0x45, 0x44, 0x35, 0x3b, 0x3f, 0x40, 0x37,
+    0x35, 0x34, 0x38, 0x3e, 0x41, 0x3e, 0x3b, 0x47, 0x41, 0x47, 0x3c, 0x3c,
+    0x39, 0x40, 0x3e, 0x45, 0x36, 0x41, 0x3f, 0x3f, 0x3c, 0x44, 0x3f, 0x43,
+    0x3d, 0x3c, 0x49, 0x42, 0x3e, 0x3f, 0x48, 0x37, 0x43, 0x37, 0x43, 0x3d,
+    0x32, 0x42, 0x44, 0x39, 0x36, 0x37, 0x40, 0x46, 0x47, 0x3d, 0x3a, 0x42,
+    0x3f, 0x38, 0x37, 0x48, 0x39, 0x40, 0x3c, 0x37, 0x33, 0x38, 0x38, 0x40,
+    0x41, 0x3c, 0x3f, 0x3b, 0x40, 0x3a, 0x47, 0x46, 0x3a, 0x37, 0x42, 0x47,
+    0x3b, 0x3f, 0x3b, 0x40, 0x33, 0x3f, 0x3a, 0x3c, 0x38, 0x3a, 0x36, 0x38,
+    0x36, 0x40, 0x48, 0x42, 0x48, 0x3c, 0x43, 0x36, 0x32, 0x3b, 0x34, 0x39,
+    0x38, 0x46, 0x37, 0x3b, 0x44, 0x34, 0x36, 0x38, 0x3c, 0x43, 0x33, 0x3c,
+    0x3b, 0x45, 0x38, 0x38, 0x44, 0x33, 0x36, 0x4a, 0x46, 0x4c, 0x4a, 0x34,
+    0x36, 0x37, 0x43, 0x42, 0x33, 0x58, 0x43, 0x48, 0x44, 0x38, 0x5f, 0x3f,
+    0x3c, 0x4d, 0x53, 0x52, 0x43, 0x47, 0x52, 0x3e, 0x3b, 0x2d, 0x3b, 0x3a,
+    0x4b, 0x49, 0x53, 0x38, 0x4c, 0x2f, 0x38, 0x31, 0x42, 0x40, 0x48, 0x3f,
+    0x44, 0x3c, 0x3c, 0x34, 0x46, 0x3f, 0x49, 0x3a, 0x43, 0x3d, 0x34, 0x42,
+    0x36, 0x47, 0x51, 0x3c, 0x3d, 0x39, 0x39, 0x3a, 0x3b, 0x35, 0x35, 0x41,
+    0x47, 0x3c, 0x3b, 0x43, 0x3f, 0x45, 0x3e, 0x40, 0x3c, 0x3f, 0x3c, 0x42,
+    0x3b, 0x3e, 0x38, 0x3f, 0x3f, 0x41, 0x39, 0x39, 0x3d, 0x43, 0x4f, 0x3d,
+    0x48, 0x3b, 0x44, 0x45, 0x3d, 0x3b, 0x49, 0x43, 0x44, 0x3d, 0x37, 0x3b,
+    0x3c, 0x45, 0x46, 0x44, 0x35, 0x3e, 0x32, 0x35, 0x34, 0x3b, 0x40, 0x43,
+    0x3e, 0x45, 0x37, 0x3d, 0x3f, 0x43, 0x36, 0x3f, 0x3f, 0x43, 0x39, 0x44,
+    0x3e, 0x3e, 0x45, 0x40, 0x3e, 0x44, 0x3b, 0x3e, 0x42, 0x42, 0x3b, 0x3d,
+    0x3a, 0x40, 0x39, 0x3a, 0x32, 0x36, 0x41, 0x30, 0x39, 0x46, 0x33, 0x3f,
+    0x46, 0x40, 0x3c, 0x31, 0x41, 0x3a, 0x3f, 0x3f, 0x3b, 0x36, 0x3f, 0x38,
+    0x36, 0x3e, 0x35, 0x35, 0x3b, 0x3d, 0x3f, 0x39, 0x46, 0x37, 0x3a, 0x47,
+    0x37, 0x39, 0x2c, 0x55, 0x40, 0x4b, 0x4a, 0x39, 0x35, 0x42, 0x3d, 0x40,
+    0x3a, 0x54, 0x41, 0x48, 0x51, 0x3b, 0x61, 0x3e, 0x3e, 0x4d, 0x51, 0x52,
+    0x3e, 0x43, 0x52, 0x41, 0x48, 0x2d, 0x35, 0x35, 0x4b, 0x44, 0x4d, 0x3c,
+    0x54, 0x33, 0x39, 0x27, 0x4a, 0x44, 0x4a, 0x41, 0x3c, 0x3a, 0x31, 0x2f,
+    0x3d, 0x42, 0x48, 0x3f, 0x42, 0x40, 0x44, 0x3b, 0x40, 0x3e, 0x49, 0x3a,
+    0x3c, 0x35, 0x30, 0x3e, 0x3e, 0x3d, 0x36, 0x3a, 0x3e, 0x3a, 0x4a, 0x3e,
+    0x3d, 0x49, 0x40, 0x43, 0x3e, 0x45, 0x3f, 0x3c, 0x3b, 0x42, 0x3a, 0x39,
+    0x3b, 0x47, 0x3f, 0x39, 0x49, 0x46, 0x3d, 0x34, 0x32, 0x44, 0x46, 0x42,
+    0x47, 0x39, 0x49, 0x48, 0x3b, 0x38, 0x45, 0x45, 0x37, 0x38, 0x46, 0x46,
+    0x37, 0x42, 0x35, 0x34, 0x45, 0x42, 0x35, 0x43, 0x3b, 0x3a, 0x43, 0x43,
+    0x40, 0x42, 0x35, 0x3f, 0x38, 0x3f, 0x3a, 0x3a, 0x3b, 0x3f, 0x3e, 0x36,
+    0x3f, 0x3c, 0x48, 0x3b, 0x3a, 0x41, 0x41, 0x35, 0x33, 0x3f, 0x3b, 0x45,
+    0x48, 0x36, 0x40, 0x38, 0x47, 0x3d, 0x35, 0x40, 0x41, 0x42, 0x41, 0x37,
+    0x41, 0x3e, 0x36, 0x48, 0x3e, 0x3c, 0x32, 0x39, 0x41, 0x40, 0x38, 0x3f,
+    0x46, 0x43, 0x33, 0x40, 0x43, 0x43, 0x3a, 0x49, 0x3f, 0x35, 0x2c, 0x5d,
+    0x43, 0x49, 0x52, 0x3b, 0x3c, 0x41, 0x40, 0x4a, 0x33, 0x50, 0x41, 0x46,
+    0x52, 0x41, 0x68, 0x48, 0x44, 0x53, 0x54, 0x55, 0x42, 0x42, 0x57, 0x44,
+    0x47, 0x35, 0x35, 0x3e, 0x4b, 0x44, 0x4e, 0x38, 0x55, 0x2f, 0x36, 0x2d,
+    0x40, 0x48, 0x4b, 0x41, 0x48, 0x36, 0x32, 0x32, 0x44, 0x42, 0x47, 0x42,
+    0x48, 0x3d, 0x3d, 0x39, 0x3e, 0x35, 0x4b, 0x39, 0x38, 0x3a, 0x39, 0x46,
+    0x38, 0x3f, 0x3a, 0x42, 0x4b, 0x45, 0x3e, 0x32, 0x46, 0x43, 0x3b, 0x40,
+    0x45, 0x41, 0x3e, 0x43, 0x37, 0x3d, 0x43, 0x3b, 0x46, 0x48, 0x42, 0x3b,
+    0x3d, 0x48, 0x4a, 0x3c, 0x3b, 0x42, 0x40, 0x3c, 0x3a, 0x42, 0x38, 0x47,
+    0x3b, 0x3b, 0x3d, 0x41, 0x3f, 0x38, 0x3f, 0x4a, 0x44, 0x3f, 0x47, 0x3a,
+    0x47, 0x44, 0x43, 0x43, 0x34, 0x3d, 0x3a, 0x3c, 0x47, 0x3f, 0x3e, 0x39,
+    0x42, 0x4a, 0x40, 0x36, 0x40, 0x41, 0x42, 0x3f, 0x3f, 0x43, 0x39, 0x38,
+    0x3c, 0x3b, 0x4c, 0x2f, 0x41, 0x39, 0x40, 0x42, 0x3f, 0x42, 0x40, 0x36,
+    0x3b, 0x45, 0x41, 0x41, 0x44, 0x45, 0x42, 0x37, 0x3d, 0x3a, 0x33, 0x3e,
+    0x3b, 0x3b, 0x3c, 0x3d, 0x38, 0x49, 0x44, 0x39, 0x3f, 0x48, 0x3d, 0x41,
+    0x42, 0x43, 0x44, 0x3e, 0x41, 0x3d, 0x32, 0x59, 0x45, 0x4b, 0x4b, 0x38,
+    0x37, 0x3d, 0x48, 0x42, 0x3d, 0x52, 0x43, 0x46, 0x54, 0x48, 0x67, 0x4d,
+    0x45, 0x4e, 0x49, 0x52, 0x45, 0x45, 0x58, 0x3b, 0x41, 0x38, 0x3f, 0x3f,
+    0x49, 0x44, 0x4f, 0x48, 0x57, 0x31, 0x3c, 0x2a, 0x3e, 0x4c, 0x41, 0x40,
+    0x47, 0x3f, 0x33, 0x34, 0x3f, 0x42, 0x48, 0x43, 0x4b, 0x38, 0x39, 0x3d,
+    0x3f, 0x3e, 0x4b, 0x3f, 0x35, 0x36, 0x3c, 0x46, 0x3c, 0x45, 0x37, 0x3b,
+    0x3c, 0x39, 0x41, 0x40, 0x41, 0x43, 0x44, 0x41, 0x45, 0x4f, 0x44, 0x43,
+    0x44, 0x3c, 0x45, 0x34, 0x42, 0x45, 0x3f, 0x46, 0x3f, 0x43, 0x3d, 0x3a,
+    0x39, 0x47, 0x45, 0x3d, 0x3f, 0x3b, 0x3d, 0x42, 0x38, 0x48, 0x48, 0x3b,
+    0x3c, 0x3a, 0x3f, 0x41, 0x44, 0x4b, 0x44, 0x48, 0x41, 0x3c, 0x3d, 0x3c,
+    0x3e, 0x3a, 0x4a, 0x3b, 0x49, 0x35, 0x3a, 0x3d, 0x41, 0x3f, 0x49, 0x39,
+    0x44, 0x37, 0x3f, 0x3c, 0x42, 0x40, 0x4a, 0x46, 0x39, 0x38, 0x46, 0x37,
+    0x41, 0x46, 0x41, 0x45, 0x40, 0x3b, 0x3b, 0x33, 0x3b, 0x39, 0x3c, 0x43,
+    0x37, 0x3c, 0x44, 0x3d, 0x46, 0x39, 0x3c, 0x3c, 0x44, 0x48, 0x41, 0x44,
+    0x41, 0x43, 0x46, 0x3b, 0x47, 0x41, 0x31, 0x41, 0x44, 0x40, 0x43, 0x42,
+    0x3e, 0x43, 0x34, 0x65, 0x4f, 0x50, 0x4d, 0x3a, 0x37, 0x43, 0x4d, 0x4a,
+    0x3d, 0x54, 0x40, 0x42, 0x5b, 0x3b, 0x71, 0x49, 0x44, 0x4f, 0x54, 0x56,
+    0x48, 0x40, 0x52, 0x41, 0x42, 0x38, 0x3c, 0x49, 0x4a, 0x45, 0x51, 0x35,
+    0x54, 0x2f, 0x35, 0x25, 0x4d, 0x3f, 0x4d, 0x43, 0x49, 0x33, 0x32, 0x3a,
+    0x46, 0x48, 0x48, 0x3d, 0x43, 0x3a, 0x3c, 0x3a, 0x48, 0x40, 0x4b, 0x3b,
+    0x45, 0x3b, 0x3f, 0x38, 0x37, 0x41, 0x31, 0x3b, 0x41, 0x43, 0x43, 0x37,
+    0x48, 0x3f, 0x48, 0x37, 0x40, 0x4a, 0x43, 0x45, 0x3d, 0x39, 0x37, 0x37,
+    0x3c, 0x3f, 0x47, 0x48, 0x43, 0x3e, 0x41, 0x3f, 0x3e, 0x38, 0x3e, 0x37,
+    0x45, 0x45, 0x35, 0x44, 0x38, 0x3a, 0x49, 0x43, 0x40, 0x41, 0x40, 0x44,
+    0x3c, 0x3e, 0x40, 0x38, 0x42, 0x41, 0x3c, 0x41, 0x3a, 0x3b, 0x3c, 0x3a,
+    0x49, 0x3c, 0x42, 0x44, 0x3f, 0x39, 0x45, 0x32, 0x45, 0x43, 0x45, 0x39,
+    0x43, 0x41, 0x4b, 0x39, 0x32, 0x3c, 0x3c, 0x36, 0x39, 0x3f, 0x46, 0x32,
+    0x39, 0x35, 0x4f, 0x32, 0x3e, 0x40, 0x3d, 0x3e, 0x3a, 0x39, 0x4c, 0x38,
+    0x43, 0x38, 0x49, 0x3b, 0x33, 0x39, 0x3b, 0x36, 0x36, 0x43, 0x3b, 0x3c,
+    0x32, 0x3c, 0x3a, 0x45, 0x31, 0x3d, 0x37, 0x40, 0x3f, 0x3f, 0x35, 0xff,
+    0x49, 0x4e, 0x4c, 0x3c, 0x36, 0x43, 0x46, 0x45, 0x41, 0x59, 0x44, 0x4a,
+    0x53, 0x44, 0x71, 0x4a, 0x39, 0x4f, 0x50, 0x4b, 0x47, 0x42, 0x5a, 0x3c,
+    0x45, 0x38, 0x3e, 0x42, 0x53, 0x43, 0x52, 0x3a, 0x52, 0x34, 0x31, 0x20,
+    0x49, 0x4e, 0x46, 0x43, 0x4b, 0x3d, 0x2b, 0x27, 0x46, 0x46, 0x47, 0x41,
+    0x42, 0x37, 0x39, 0x38, 0x45, 0x3f, 0x51, 0x3d, 0x48, 0x3f, 0x33, 0x3f,
+    0x38, 0x45, 0x31, 0x38, 0x41, 0x3d, 0x47, 0x39, 0x42, 0x40, 0x4c, 0x3f,
+    0x40, 0x42, 0x41, 0x41, 0x41, 0x42, 0x39, 0x35, 0x3f, 0x46, 0x45, 0x36,
+    0x3f, 0x43, 0x3b, 0x39, 0x41, 0x38, 0x43, 0x37, 0x3d, 0x44, 0x3b, 0x40,
+    0x36, 0x3d, 0x42, 0x41, 0x41, 0x3d, 0x38, 0x4a, 0x40, 0x4a, 0x4c, 0x38,
+    0x3f, 0x40, 0x45, 0x3c, 0x3f, 0x4b, 0x43, 0x41, 0x43, 0x3e, 0x43, 0x3f,
+    0x36, 0x40, 0x40, 0x39, 0x3f, 0x3a, 0x3a, 0x30, 0x41, 0x3c, 0x3c, 0x34,
+    0x46, 0x38, 0x43, 0x34, 0x3a, 0x42, 0x43, 0x42, 0x40, 0x41, 0x49, 0x34,
+    0x35, 0x40, 0x47, 0x3d, 0x3d, 0x3e, 0x4c, 0x33, 0x3c, 0x3b, 0x39, 0x43,
+    0x3a, 0x3e, 0x3b, 0x37, 0x3f, 0x42, 0x31, 0x3d, 0x41, 0x3e, 0x32, 0x47,
+    0x34, 0x41, 0x3d, 0x35, 0x39, 0x40, 0x38, 0x69, 0x4f, 0x4a, 0x49, 0x37,
+    0x37, 0x44, 0x43, 0x46, 0x40, 0x58, 0x43, 0x48, 0x54, 0x46, 0x6c, 0x50,
+    0x3a, 0x50, 0x50, 0x57, 0x47, 0x46, 0x5c, 0x40, 0x40, 0x39, 0x3e, 0x46,
+    0x53, 0x46, 0x5c, 0x36, 0x4f, 0x32, 0x30, 0x2d, 0x4a, 0x48, 0x41, 0x45,
+    0x47, 0x2f, 0x32, 0x2b, 0x43, 0x40, 0x43, 0x3c, 0x40, 0x44, 0x3e, 0x37,
+    0x39, 0x3e, 0x48, 0x42, 0x45, 0x36, 0x47, 0x3f, 0x3b, 0x41, 0x35, 0x35,
+    0x3b, 0x3e, 0x35, 0x43, 0x3e, 0x41, 0x3d, 0x36, 0x41, 0x3c, 0x40, 0x44,
+    0x3d, 0x40, 0x35, 0x32, 0x48, 0x3e, 0x39, 0x42, 0x44, 0x3d, 0x39, 0x3b,
+    0x3b, 0x45, 0x40, 0x4a, 0x3f, 0x41, 0x43, 0x39, 0x42, 0x44, 0x4c, 0x3c,
+    0x3f, 0x3e, 0x3f, 0x43, 0x40, 0x42, 0x4c, 0x3b, 0x3e, 0x3d, 0x49, 0x42,
+    0x40, 0x44, 0x40, 0x34, 0x36, 0x40, 0x45, 0x39, 0x42, 0x40, 0x3e, 0x44,
+    0x45, 0x37, 0x3c, 0x38, 0x3e, 0x49, 0x3e, 0x3c, 0x41, 0x3d, 0x42, 0x32,
+    0x40, 0x45, 0x3e, 0x36, 0x44, 0x3a, 0x4e, 0x38, 0x43, 0x38, 0x40, 0x38,
+    0x49, 0x42, 0x40, 0x3d, 0x42, 0x48, 0x48, 0x3d, 0x41, 0x3a, 0x3f, 0x41,
+    0x38, 0x3c, 0x44, 0x39, 0x3a, 0x32, 0x3a, 0x3e, 0x3d, 0x3b, 0x39, 0x38,
+    0x3a, 0x43, 0x3a, 0x6b, 0x45, 0x50, 0x47, 0x33, 0x38, 0x48, 0x4d, 0x4f,
+    0x39, 0x4b, 0x46, 0x4a, 0x4f, 0x42, 0x6f, 0x4b, 0x40, 0x55, 0x54, 0x50,
+    0x42, 0x47, 0x5e, 0x46, 0x40, 0x34, 0x40, 0x47, 0x52, 0x46, 0x55, 0x3b,
+    0x4f, 0x2b, 0x35, 0x33, 0x4c, 0x44, 0x44, 0x48, 0x47, 0x37, 0x35, 0x27,
+    0x4a, 0x3b, 0x41, 0x40, 0x40, 0x3e, 0x36, 0x39, 0x3e, 0x3c, 0x45, 0x3f,
+    0x4d, 0x41, 0x3d, 0x48, 0x47, 0x46, 0x33, 0x3d, 0x3d, 0x3e, 0x34, 0x3f,
+    0x3e, 0x3a, 0x41, 0x35, 0x3b, 0x3e, 0x42, 0x3c, 0x42, 0x42, 0x40, 0x31,
+    0x37, 0x40, 0x36, 0x42, 0x48, 0x39, 0x3d, 0x3c, 0x3a, 0x43, 0x39, 0x3d,
+    0x47, 0x49, 0x43, 0x3d, 0x45, 0x39, 0x44, 0x37, 0x3e, 0x4d, 0x3d, 0x40,
+    0x3d, 0x4c, 0x4d, 0x44, 0x3c, 0x3d, 0x46, 0x41, 0x41, 0x42, 0x40, 0x40,
+    0x41, 0x3a, 0x3c, 0x3b, 0x3c, 0x44, 0x40, 0x34, 0x44, 0x38, 0x3b, 0x33,
+    0x45, 0x45, 0x44, 0x3f, 0x3e, 0x3a, 0x3b, 0x3b, 0x43, 0x39, 0x3a, 0x45,
+    0x3b, 0x3a, 0x4b, 0x39, 0x3d, 0x38, 0x41, 0x39, 0x42, 0x45, 0x43, 0x40,
+    0x3e, 0x35, 0x44, 0x3f, 0x45, 0x41, 0x40, 0x3e, 0x43, 0x42, 0x37, 0x3a,
+    0x38, 0x35, 0x3a, 0x48, 0x3e, 0x3b, 0x40, 0x38, 0x3c, 0x3c, 0x3b, 0x6a,
+    0x48, 0x4d, 0x4d, 0x34, 0x38, 0x40, 0x4a, 0x45, 0x3c, 0x4f, 0x41, 0x4b,
+    0x58, 0x46, 0x71, 0x49, 0x3d, 0x53, 0x44, 0x52, 0x42, 0x3e, 0x57, 0x4c,
+    0x4c, 0x38, 0x40, 0x3b, 0x5c, 0x4c, 0x52, 0x3e, 0x4c, 0x2d, 0x32, 0x37,
+    0x49, 0x3f, 0x41, 0x47, 0x4a, 0x3b, 0x2f, 0x26, 0x45, 0x40, 0x47, 0x42,
+    0x3d, 0x39, 0x2d, 0x2c, 0x3f, 0x45, 0x46, 0x44, 0x48, 0x43, 0x42, 0x48,
+    0x40, 0x41, 0x3b, 0x3b, 0x41, 0x3b, 0x39, 0x40, 0x3b, 0x47, 0x3f, 0x38,
+    0x3f, 0x49, 0x3b, 0x35, 0x40, 0x45, 0x38, 0x35, 0x36, 0x34, 0x3e, 0x3d,
+    0x46, 0x3e, 0x33, 0x38, 0x43, 0x48, 0x3f, 0x45, 0x31, 0x44, 0x38, 0x35,
+    0x3c, 0x41, 0x4b, 0x44, 0x3d, 0x43, 0x38, 0x48, 0x3c, 0x39, 0x4a, 0x42,
+    0x3d, 0x43, 0x3f, 0x49, 0x3e, 0x47, 0x49, 0x41, 0x3b, 0x3c, 0x47, 0x3a,
+    0x3d, 0x40, 0x4a, 0x38, 0x3d, 0x3b, 0x47, 0x3a, 0x36, 0x47, 0x42, 0x46,
+    0x3c, 0x3d, 0x45, 0x3b, 0x48, 0x3f, 0x38, 0x36, 0x39, 0x46, 0x43, 0x3a,
+    0x41, 0x3d, 0x39, 0x39, 0x46, 0x37, 0x3f, 0x3f, 0x3a, 0x46, 0x3f, 0x39,
+    0x49, 0x44, 0x42, 0x3a, 0x3a, 0x43, 0x3e, 0x42, 0x3d, 0x3d, 0x43, 0x40,
+    0x43, 0x3c, 0x3f, 0x43, 0x40, 0x42, 0x3b, 0x57, 0x4a, 0x4f, 0x4a, 0x2d,
+    0x3b, 0x48, 0x45, 0x42, 0x34, 0x4c, 0x3e, 0x4f, 0x4d, 0x40, 0x6c, 0x4b,
+    0x3b, 0x4d, 0x4c, 0x57, 0x49, 0x3d, 0x5d, 0x44, 0x43, 0x29, 0x42, 0x3f,
+    0x5b, 0x47, 0x4f, 0x3e, 0x54, 0x2e, 0x34, 0x34, 0x4b, 0x47, 0x46, 0x46,
+    0x4b, 0x34, 0x36, 0x28, 0x3e, 0x3f, 0x42, 0x40, 0x3b, 0x38, 0x39, 0x42,
+    0x49, 0x3d, 0x49, 0x47, 0x47, 0x3b, 0x43, 0x34, 0x39, 0x36, 0x42, 0x3d,
+    0x37, 0x40, 0x37, 0x38, 0x46, 0x42, 0x49, 0x37, 0x44, 0x3f, 0x38, 0x3e,
+    0x36, 0x32, 0x33, 0x38, 0x40, 0x46, 0x42, 0x34, 0x41, 0x42, 0x3e, 0x38,
+    0x44, 0x3e, 0x3f, 0x43, 0x3f, 0x43, 0x35, 0x3f, 0x4d, 0x3b, 0x43, 0x39,
+    0x40, 0x47, 0x3f, 0x4a, 0x3a, 0x3f, 0x45, 0x45, 0x48, 0x42, 0x3b, 0x47,
+    0x42, 0x4b, 0x47, 0x3e, 0x3c, 0x42, 0x46, 0x39, 0x41, 0x3f, 0x48, 0x33,
+    0x45, 0x34, 0x3d, 0x30, 0x40, 0x4c, 0x40, 0x40, 0x39, 0x37, 0x40, 0x33,
+    0x49, 0x42, 0x45, 0x38, 0x3c, 0x43, 0x45, 0x35, 0x37, 0x33, 0x34, 0x3b,
+    0x3b, 0x38, 0x39, 0x41, 0x42, 0x40, 0x3e, 0x3e, 0x41, 0x33, 0x3a, 0x36,
+    0x40, 0x3a, 0x3c, 0x45, 0x43, 0x3c, 0x40, 0x41, 0x49, 0x47, 0x35, 0x34,
+    0x3a, 0x3d, 0x3a, 0x68, 0x4f, 0x48, 0x43, 0x36, 0x37, 0x3e, 0x45, 0x49,
+    0x3a, 0x4d, 0x41, 0x3d, 0x46, 0x45, 0x65, 0x46, 0x38, 0x4d, 0x4a, 0x53,
+    0x43, 0x41, 0x5d, 0x47, 0x41, 0x34, 0x39, 0x43, 0x4e, 0x48, 0x50, 0x38,
+    0x53, 0x32, 0x30, 0x2e, 0x49, 0x4c, 0x4d, 0x3f, 0x46, 0x38, 0x34, 0x2b,
+    0x44, 0x44, 0x41, 0x41, 0x36, 0x40, 0x3f, 0x32, 0x46, 0x38, 0x50, 0x45,
+    0x3f, 0x3d, 0x3b, 0x36, 0x3b, 0x43, 0x3a, 0x34, 0x36, 0x3f, 0x39, 0x35,
+    0x3c, 0x40, 0x40, 0x37, 0x3c, 0x39, 0x3d, 0x36, 0x48, 0x3d, 0x43, 0x34,
+    0x3b, 0x46, 0x43, 0x41, 0x33, 0x3e, 0x44, 0x3d, 0x44, 0x44, 0x4c, 0x3c,
+    0x37, 0x49, 0x42, 0x35, 0x45, 0x3a, 0x3c, 0x41, 0x3a, 0x45, 0x46, 0x41,
+    0x3c, 0x48, 0x46, 0x36, 0x36, 0x42, 0x3b, 0x46, 0x42, 0x45, 0x44, 0x47,
+    0x3f, 0x44, 0x3a, 0x35, 0x37, 0x46, 0x40, 0x38, 0x40, 0x3d, 0x36, 0x2c,
+    0x34, 0x47, 0x40, 0x38, 0x3f, 0x3f, 0x44, 0x2d, 0x3b, 0x3d, 0x3e, 0x44,
+    0x3c, 0x40, 0x3e, 0x33, 0x3c, 0x3a, 0x49, 0x40, 0x42, 0x42, 0x3a, 0x3b,
+    0x33, 0x3d, 0x3c, 0x43, 0x3e, 0x3d, 0x3a, 0x3a, 0x48, 0x3e, 0x3c, 0x39,
+    0x3f, 0x44, 0x37, 0x40, 0x3f, 0x3c, 0x3e, 0x3d, 0x38, 0x42, 0x34, 0x62,
+    0x51, 0x47, 0x44, 0x3f, 0x32, 0x3c, 0x3f, 0x46, 0x3d, 0x46, 0x3e, 0x45,
+    0x4a, 0x3e, 0x5d, 0x43, 0x45, 0x49, 0x4a, 0x55, 0x41, 0x3c, 0x5a, 0x44,
+    0x43, 0x3b, 0x3c, 0x3a, 0x4b, 0x4e, 0x4d, 0x42, 0x49, 0x30, 0x3b, 0x38,
+    0x42, 0x44, 0x51, 0x40, 0x48, 0x33, 0x3f, 0x2b, 0x3c, 0x41, 0x3c, 0x45,
+    0x35, 0x39, 0x42, 0x37, 0x40, 0x46, 0x46, 0x3f, 0x41, 0x45, 0x42, 0x3d,
+    0x43, 0x38, 0x3e, 0x38, 0x3c, 0x39, 0x40, 0x38, 0x37, 0x36, 0x3d, 0x3d,
+    0x38, 0x47, 0x45, 0x3b, 0x45, 0x44, 0x42, 0x2e, 0x37, 0x40, 0x42, 0x42,
+    0x3c, 0x36, 0x3b, 0x39, 0x44, 0x4d, 0x42, 0x3f, 0x3a, 0x3e, 0x45, 0x34,
+    0x3c, 0x43, 0x47, 0x43, 0x3f, 0x48, 0x3b, 0x44, 0x3d, 0x44, 0x43, 0x3e,
+    0x40, 0x4a, 0x31, 0x42, 0x42, 0x43, 0x48, 0x45, 0x3a, 0x42, 0x36, 0x2f,
+    0x3c, 0x3e, 0x3b, 0x3b, 0x44, 0x3f, 0x3a, 0x2c, 0x47, 0x3f, 0x4a, 0x40,
+    0x40, 0x40, 0x3c, 0x2a, 0x3e, 0x44, 0x40, 0x43, 0x3a, 0x42, 0x39, 0x34,
+    0x49, 0x3e, 0x36, 0x42, 0x3f, 0x42, 0x33, 0x3b, 0x3c, 0x45, 0x39, 0x3f,
+    0x3e, 0x3f, 0x41, 0x3d, 0x32, 0x3b, 0x31, 0x40, 0x3f, 0x44, 0x3c, 0x3f,
+    0x40, 0x46, 0x45, 0x36, 0x36, 0x42, 0x30, 0x57, 0x47, 0x44, 0x48, 0x3f,
+    0x35, 0x37, 0x3f, 0x3f, 0x38, 0x4a, 0x41, 0x46, 0x50, 0x3d, 0x5b, 0x41,
+    0x3e, 0x3c, 0x4a, 0x54, 0x45, 0x41, 0x5b, 0x46, 0x3d, 0x3b, 0x43, 0x33,
+    0x45, 0x4e, 0x43, 0x3b, 0x44, 0x37, 0x37, 0x32, 0x4c, 0x3d, 0x4c, 0x3f,
+    0x49, 0x3b, 0x37, 0x3a, 0x33, 0x43, 0x3f, 0x40, 0x44, 0x36, 0x3b, 0x44,
+    0x45, 0x40, 0x3c, 0x3c, 0x41, 0x44, 0x3b, 0x3d, 0x33, 0x37, 0x3c, 0x35,
+    0x3d, 0x3f, 0x39, 0x38, 0x33, 0x43, 0x3e, 0x39, 0x3b, 0x3e, 0x41, 0x35,
+    0x40, 0x46, 0x43, 0x35, 0x41, 0x3d, 0x32, 0x39, 0x3c, 0x40, 0x3e, 0x3f,
+    0x42, 0x38, 0x3b, 0x45, 0x3a, 0x3d, 0x40, 0x36, 0x3a, 0x40, 0x46, 0x44,
+    0x48, 0x45, 0x3f, 0x3a, 0x45, 0x45, 0x3c, 0x3b, 0x40, 0x4c, 0x39, 0x3a,
+    0x38, 0x39, 0x46, 0x3a, 0x3e, 0x4b, 0x34, 0x39, 0x3d, 0x3f, 0x40, 0x39,
+    0x45, 0x31, 0x45, 0x29, 0x3f, 0x38, 0x3a, 0x3f, 0x38, 0x3b, 0x36, 0x2d,
+    0x43, 0x3d, 0x45, 0x3c, 0x46, 0x3f, 0x40, 0x3c, 0x3a, 0x3e, 0x3d, 0x38,
+    0x3f, 0x3c, 0x3f, 0x42, 0x35, 0x3f, 0x3a, 0x43, 0x3d, 0x43, 0x3d, 0x33,
+    0x3d, 0x48, 0x42, 0x3d, 0x45, 0x46, 0x3d, 0x35, 0x32, 0x44, 0x42, 0x37,
+    0x3d, 0x40, 0x3c, 0x47, 0x4a, 0x45, 0x47, 0x2f, 0x33, 0x36, 0x3f, 0x42,
+    0x38, 0x43, 0x3e, 0x3a, 0x41, 0x3f, 0x5f, 0x3f, 0x48, 0x3a, 0x44, 0x47,
+    0x41, 0x3e, 0x57, 0x42, 0x41, 0x33, 0x34, 0x39, 0x42, 0x44, 0x42, 0x3c,
+    0x49, 0x34, 0x37, 0x33, 0x47, 0x38, 0x43, 0x3d, 0x43, 0x3e, 0x3e, 0x36,
+    0x41, 0x41, 0x37, 0x40, 0x39, 0x3e, 0x3b, 0x3b, 0x3e, 0x41, 0x3d, 0x3b,
+    0x43, 0x3e, 0x39, 0x43, 0x2f, 0x3e, 0x33, 0x40, 0x45, 0x47, 0x30, 0x46,
+    0x3f, 0x3f, 0x37, 0x42, 0x3d, 0x42, 0x43, 0x37, 0x38, 0x3c, 0x35, 0x34,
+    0x41, 0x43, 0x3e, 0x3e, 0x3f, 0x49, 0x35, 0x35, 0x38, 0x36, 0x3a, 0x43,
+    0x38, 0x46, 0x48, 0x36, 0x3f, 0x39, 0x3b, 0x3e, 0x48, 0x47, 0x41, 0x34,
+    0x3b, 0x3c, 0x37, 0x3e, 0x40, 0x41, 0x3b, 0x3d, 0x43, 0x42, 0x3a, 0x39,
+    0x3b, 0x43, 0x38, 0x2b, 0x43, 0x41, 0x48, 0x35, 0x44, 0x44, 0x3e, 0x2c,
+    0x46, 0x40, 0x3e, 0x41, 0x38, 0x34, 0x35, 0x37, 0x34, 0x3f, 0x3d, 0x46,
+    0x33, 0x3c, 0x3c, 0x2e, 0x3b, 0x45, 0x3d, 0x3e, 0x3a, 0x42, 0x3c, 0x36,
+    0x3a, 0x42, 0x39, 0x43, 0x35, 0x39, 0x40, 0x44, 0x47, 0x41, 0x44, 0x3d,
+    0x41, 0x3e, 0x38, 0x39, 0x45, 0x3a, 0x35, 0x43, 0x3f, 0x44, 0x41, 0x49,
+    0x47, 0x3f, 0x44, 0x40, 0x38, 0x43, 0x40, 0x3e, 0x39, 0x42, 0x32, 0x3b,
+    0x42, 0x47, 0x57, 0x37, 0x36, 0x38, 0x43, 0x49, 0x3b, 0x34, 0x54, 0x42,
+    0x3d, 0x3f, 0x3e, 0x3b, 0x38, 0x41, 0x43, 0x3a, 0x44, 0x39, 0x34, 0x2c,
+    0x38, 0x43, 0x4b, 0x3f, 0x40, 0x3e, 0x32, 0x33, 0x3d, 0x44, 0x45, 0x44,
+    0x3e, 0x35, 0x37, 0x39, 0x40, 0x3e, 0x40, 0x3c, 0x34, 0x43, 0x37, 0x40,
+    0x39, 0x3e, 0x3d, 0x43, 0x3a, 0x44, 0x43, 0x44, 0x3d, 0x3b, 0x45, 0x3b,
+    0x3a, 0x3a, 0x3f, 0x37, 0x43, 0x3b, 0x33, 0x35, 0x40, 0x47, 0x3e, 0x3c,
+    0x39, 0x3c, 0x34, 0x29, 0x3c, 0x3e, 0x46, 0x3e, 0x3c, 0x38, 0x3f, 0x2d,
+    0x3d, 0x3d, 0x3f, 0x3f, 0x3d, 0x45, 0x3b, 0x32, 0x39, 0x3f, 0x41, 0x38,
+    0x36, 0x3e, 0x3a, 0x35, 0x40, 0x3f, 0x3b, 0x32, 0x3c, 0x39, 0x3e, 0x35,
+    0x3e, 0x45, 0x34, 0x38, 0x44, 0x39, 0x3f, 0x31, 0x34, 0x39, 0x3f, 0x38,
+    0x44, 0x42, 0x3f, 0x3b, 0x39, 0x3d, 0x39, 0x3b, 0x44, 0x46, 0x38, 0x3d,
+    0x45, 0x37, 0x40, 0x3a, 0x3a, 0x39, 0x35, 0x3c, 0x39, 0x40, 0x47, 0x3e,
+    0x38, 0x42, 0x41, 0x3b, 0x48, 0x3f, 0x3a, 0x3e, 0x3d, 0x3f, 0x32, 0x3b,
+    0x3f, 0x3d, 0x3e, 0x44, 0x43, 0x41, 0x44, 0x47, 0x48, 0x41, 0x41, 0x36,
+    0x3a, 0x33, 0x3c, 0x3c, 0x37, 0x3e, 0x40, 0x34, 0x3f, 0x42, 0x53, 0x40,
+    0x3f, 0x35, 0x3e, 0x46, 0x3a, 0x3e, 0x4b, 0x41, 0x46, 0x32, 0x39, 0x36,
+    0x3b, 0x4f, 0x36, 0x3c, 0x40, 0x3a, 0x40, 0x40, 0x47, 0x3e, 0x49, 0x37,
+    0x3f, 0x31, 0x3e, 0x40, 0x3b, 0x3f, 0x43, 0x44, 0x3a, 0x3d, 0x31, 0x41,
+    0x41, 0x33, 0x43, 0x40, 0x3c, 0x3a, 0x41, 0x40, 0x37, 0x3f, 0x34, 0x3e,
+    0x44, 0x42, 0x3d, 0x3f, 0x3f, 0x34, 0x36, 0x34, 0x31, 0x41, 0x32, 0x39,
+    0x3e, 0x3d, 0x42, 0x35, 0x3e, 0x3a, 0x41, 0x47, 0x3d, 0x42, 0x33, 0x32,
+    0x43, 0x42, 0x36, 0x41, 0x3e, 0x39, 0x46, 0x39, 0x35, 0x3d, 0x3d, 0x40,
+    0x38, 0x44, 0x3d, 0x31, 0x44, 0x39, 0x3a, 0x45, 0x42, 0x41, 0x3d, 0x36,
+    0x3f, 0x3c, 0x39, 0x3d, 0x32, 0x39, 0x42, 0x34, 0x3f, 0x38, 0x44, 0x3c,
+    0x43, 0x45, 0x41, 0x2d, 0x44, 0x42, 0x3d, 0x3f, 0x44, 0x38, 0x3d, 0x35,
+    0x3a, 0x48, 0x40, 0x3b, 0x3d, 0x36, 0x3b, 0x40, 0x3f, 0x3a, 0x3a, 0x3f,
+    0x3c, 0x33, 0x39, 0x3c, 0x3c, 0x38, 0x47, 0x36, 0x3d, 0x41, 0x46, 0x41,
+    0x34, 0x46, 0x48, 0x46, 0x3d, 0x3c, 0x40, 0x43, 0x3d, 0x41, 0x37, 0x3e,
+    0x39, 0x47, 0x3f, 0x39, 0x46, 0x43, 0x3f, 0x41, 0x45, 0x37, 0x40, 0x3a,
+    0x3d, 0x44, 0x3f, 0x3b, 0x3b, 0x40, 0x4f, 0x3d, 0x3d, 0x41, 0x3c, 0x43,
+    0x3e, 0x46, 0x4e, 0x40, 0x3f, 0x34, 0x48, 0x29, 0x45, 0x44, 0x46, 0x41,
+    0x45, 0x32, 0x3e, 0x38, 0x39, 0x3a, 0x3e, 0x3e, 0x4c, 0x34, 0x3c, 0x40,
+    0x4a, 0x44, 0x3d, 0x46, 0x3b, 0x3e, 0x42, 0x42, 0x3a, 0x41, 0x43, 0x41,
+    0x39, 0x3f, 0x3e, 0x3c, 0x36, 0x48, 0x3f, 0x3e, 0x3e, 0x37, 0x3f, 0x3f,
+    0x3b, 0x40, 0x3e, 0x35, 0x32, 0x35, 0x3f, 0x33, 0x3f, 0x38, 0x43, 0x37,
+    0x49, 0x38, 0x37, 0x3c, 0x3c, 0x40, 0x40, 0x3a, 0x3a, 0x46, 0x37, 0x34,
+    0x34, 0x3b, 0x3d, 0x2f, 0x3a, 0x38, 0x3d, 0x46, 0x3d, 0x3b, 0x3d, 0x38,
+    0x35, 0x37, 0x44, 0x3c, 0x3d, 0x3e, 0x40, 0x3a, 0x40, 0x33, 0x3e, 0x38,
+    0x40, 0x3e, 0x45, 0x37, 0x3f, 0x3b, 0x3c, 0x40, 0x3b, 0x3c, 0x3b, 0x33,
+    0x41, 0x3f, 0x3b, 0x42, 0x31, 0x3b, 0x3a, 0x39, 0x3d, 0x41, 0x39, 0x40,
+    0x43, 0x45, 0x39, 0x3b, 0x3a, 0x42, 0x43, 0x3d, 0x3f, 0x40, 0x47, 0x39,
+    0x37, 0x3f, 0x47, 0x3f, 0x45, 0x41, 0x39, 0x3a, 0x41, 0x38, 0x3c, 0x3c,
+    0x39, 0x40, 0x39, 0x3b, 0x3b, 0x3e, 0x38, 0x3b, 0x37, 0x48, 0x41, 0x3f,
+    0x3e, 0x37, 0x3d, 0x44, 0x3c, 0x3e, 0x40, 0x39, 0x41, 0x42, 0x3d, 0x45,
+    0x3b, 0x3e, 0x4c, 0x3b, 0x3a, 0x3a, 0x3e, 0x47, 0x3c, 0x3f, 0x48, 0x3f,
+    0x46, 0x3f, 0x39, 0x25, 0x44, 0x3a, 0x3b, 0x40, 0x41, 0x39, 0x39, 0x47,
+    0x3b, 0x32, 0x49, 0x42, 0x41, 0x3a, 0x43, 0x41, 0x3e, 0x35, 0x37, 0x3d,
+    0x49, 0x40, 0x45, 0x3b, 0x3c, 0x38, 0x48, 0x3c, 0x3c, 0x35, 0x3f, 0x41,
+    0x41, 0x4c, 0x36, 0x39, 0x37, 0x3d, 0x3b, 0x3e, 0x44, 0x32, 0x3d, 0x3f,
+    0x3a, 0x3b, 0x3a, 0x47, 0x38, 0x42, 0x36, 0x34, 0x43, 0x3f, 0x3e, 0x40,
+    0x34, 0x31, 0x36, 0x33, 0x42, 0x37, 0x41, 0x41, 0x40, 0x3d, 0x3d, 0x37,
+    0x43, 0x3a, 0x3e, 0x44, 0x43, 0x3c, 0x35, 0x38, 0x38, 0x3c, 0x43, 0x36,
+    0x3a, 0x38, 0x40, 0x3f, 0x3d, 0x3e, 0x37, 0x3b, 0x41, 0x3a, 0x3b, 0x3d,
+    0x3c, 0x41, 0x3c, 0x41, 0x47, 0x3f, 0x3f, 0x3b, 0x3d, 0x3f, 0x3b, 0x45,
+    0x38, 0x38, 0x40, 0x38, 0x46, 0x42, 0x39, 0x3d, 0x3d, 0x3b, 0x42, 0x36,
+    0x42, 0x41, 0x3e, 0x3e, 0x36, 0x3f, 0x37, 0x3f, 0x36, 0x48, 0x3b, 0x39,
+    0x3d, 0x3f, 0x43, 0x3e, 0x3c, 0x40, 0x48, 0x46, 0x43, 0x36, 0x42, 0x39,
+    0x46, 0x3c, 0x37, 0x38, 0x49, 0x37, 0x36, 0x39, 0x3e, 0x42, 0x48, 0x3a,
+    0x3c, 0x3e, 0x42, 0x30, 0x3e, 0x34, 0x39, 0x3b, 0x46, 0x61, 0x46, 0x1e,
+    0x4c, 0x3b, 0x40, 0x2d, 0x3c, 0x42, 0x32, 0x30, 0x49, 0x3e, 0x39, 0x34,
+    0x30, 0x40, 0x31, 0x38, 0x40, 0x3d, 0x3c, 0x35, 0x3a, 0x36, 0x40, 0x3b,
+    0x41, 0x40, 0x3b, 0x39, 0x37, 0x37, 0x3f, 0x3b, 0x3c, 0x3a, 0x40, 0x3a,
+    0x36, 0x3c, 0x42, 0x39, 0x3e, 0x36, 0x40, 0x42, 0x39, 0x40, 0x3b, 0x34,
+    0x37, 0x33, 0x36, 0x3f, 0x43, 0x33, 0x33, 0x27, 0x3d, 0x46, 0x40, 0x31,
+    0x38, 0x3e, 0x41, 0x20, 0x3f, 0x39, 0x42, 0x35, 0x35, 0x45, 0x40, 0x1e,
+    0x32, 0x35, 0x32, 0x3c, 0x35, 0x44, 0x46, 0x29, 0x3a, 0x3d, 0x37, 0x42,
+    0x3b, 0x45, 0x3a, 0x26, 0x38, 0x40, 0x30, 0x37, 0x41, 0x40, 0x39, 0x2b,
+    0x49, 0x3f, 0x43, 0x43, 0x40, 0x3a, 0x38, 0x29, 0x43, 0x3a, 0x37, 0x40,
+    0x3f, 0x35, 0x3a, 0x28, 0x36, 0x3e, 0x3f, 0x43, 0x3c, 0x39, 0x42, 0x2c,
+    0x38, 0x42, 0x38, 0x3d, 0x42, 0x38, 0x35, 0x2d, 0x34, 0x38, 0x3d, 0x43,
+    0x46, 0x3e, 0x3c, 0x27, 0x3e, 0x40, 0x46, 0x39, 0x35, 0x3d, 0x42, 0x35,
+    0x42, 0x36, 0x40, 0x3e, 0x3a, 0x3e, 0x3c, 0x37, 0x3a, 0x3c, 0x48, 0x48,
+    0x48, 0x37, 0x3d, 0x38, 0x4b, 0x40, 0x43, 0x3b, 0x41, 0x46, 0x3c, 0x34,
+    0x46, 0x3c, 0x3c, 0x3c, 0x4b, 0x64, 0x4a, 0x22, 0x52, 0x41, 0x42, 0x3b,
+    0x42, 0x4a, 0x34, 0x37, 0x4b, 0x44, 0x3b, 0x4a, 0x38, 0x3f, 0x38, 0x3a,
+    0x40, 0x41, 0x42, 0x3c, 0x33, 0x3e, 0x3c, 0x42, 0x2c, 0x4e, 0x47, 0x3f,
+    0x38, 0x33, 0x39, 0x3f, 0x3b, 0x45, 0x37, 0x3a, 0x42, 0x42, 0x44, 0x3f,
+    0x3c, 0x3c, 0x3e, 0x3d, 0x3c, 0x3c, 0x40, 0x2c, 0x3c, 0x3d, 0x42, 0x39,
+    0x3a, 0x37, 0x43, 0x2a, 0x3d, 0x40, 0x41, 0x41, 0x46, 0x46, 0x42, 0x28,
+    0x39, 0x3c, 0x37, 0x44, 0x46, 0x41, 0x47, 0x2b, 0x44, 0x33, 0x39, 0x3f,
+    0x3f, 0x43, 0x3d, 0x23, 0x3a, 0x43, 0x41, 0x3b, 0x41, 0x42, 0x33, 0x1f,
+    0x43, 0x3e, 0x3d, 0x40, 0x37, 0x33, 0x42, 0x28, 0x3b, 0x38, 0x37, 0x3c,
+    0x34, 0x40, 0x44, 0x2a, 0x3c, 0x3a, 0x41, 0x37, 0x45, 0x3f, 0x3e, 0x26,
+    0x41, 0x40, 0x35, 0x3d, 0x45, 0x3e, 0x3d, 0x29, 0x3c, 0x39, 0x3f, 0x3c,
+    0x3d, 0x39, 0x38, 0x2d, 0x39, 0x38, 0x38, 0x44, 0x3c, 0x3e, 0x38, 0x26,
+    0x40, 0x36, 0x39, 0x38, 0x3f, 0x32, 0x39, 0x35, 0x3d, 0x3e, 0x35, 0x3a,
+    0x3f, 0x3f, 0x31, 0x35, 0x34, 0x45, 0x3e, 0x43, 0x48, 0x3b, 0x37, 0x39,
+    0x4d, 0x46, 0x54, 0x40, 0x41, 0x4e, 0x3d, 0x38, 0x4d, 0x38, 0x3a, 0x3b,
+    0x49, 0x5a, 0x4a, 0x1e, 0x5e, 0x39, 0x38, 0x37, 0x3a, 0x51, 0x3a, 0x3c,
+    0x50, 0x3f, 0x40, 0x42, 0x33, 0x3b, 0x2e, 0x4a, 0x3f, 0x4a, 0x3b, 0x43,
+    0x36, 0x3e, 0x3d, 0x42, 0x39, 0x46, 0x4b, 0x3c, 0x3b, 0x3b, 0x35, 0x3e,
+    0x3d, 0x4b, 0x3f, 0x41, 0x3f, 0x3b, 0x42, 0x42, 0x38, 0x3a, 0x41, 0x3d,
+    0x36, 0x41, 0x37, 0x2f, 0x38, 0x37, 0x3f, 0x34, 0x35, 0x35, 0x45, 0x30,
+    0x31, 0x42, 0x31, 0x3a, 0x3a, 0x3e, 0x3d, 0x23, 0x3f, 0x43, 0x3b, 0x41,
+    0x35, 0x3b, 0x40, 0x25, 0x45, 0x3e, 0x42, 0x3b, 0x31, 0x40, 0x36, 0x28,
+    0x43, 0x42, 0x30, 0x42, 0x32, 0x32, 0x36, 0x2c, 0x35, 0x3a, 0x3d, 0x3a,
+    0x3c, 0x36, 0x3e, 0x30, 0x41, 0x42, 0x38, 0x41, 0x41, 0x3e, 0x3c, 0x23,
+    0x37, 0x40, 0x3c, 0x3e, 0x3e, 0x3a, 0x37, 0x2b, 0x36, 0x40, 0x41, 0x42,
+    0x3e, 0x38, 0x44, 0x22, 0x46, 0x38, 0x33, 0x3b, 0x3a, 0x3a, 0x3a, 0x24,
+    0x36, 0x3b, 0x38, 0x44, 0x34, 0x38, 0x40, 0x28, 0x38, 0x3d, 0x36, 0x44,
+    0x31, 0x3e, 0x37, 0x37, 0x36, 0x3f, 0x47, 0x38, 0x3b, 0x3e, 0x2c, 0x4c,
+    0x36, 0x3c, 0x3b, 0x41, 0x4c, 0x3d, 0x3d, 0x40, 0x49, 0x44, 0x52, 0x3f,
+    0x3b, 0x4d, 0x3c, 0x3a, 0x4f, 0x3b, 0x36, 0x3b, 0x4a, 0x5f, 0x4e, 0x1f,
+    0x57, 0x3c, 0x3d, 0x3d, 0x46, 0x59, 0x42, 0x45, 0x52, 0x3d, 0x3a, 0x41,
+    0x31, 0x39, 0x39, 0x4f, 0x43, 0x4e, 0x3e, 0x37, 0x3a, 0x37, 0x33, 0x47,
+    0x32, 0x45, 0x47, 0x43, 0x31, 0x33, 0x38, 0x43, 0x3e, 0x47, 0x3d, 0x32,
+    0x3b, 0x39, 0x3c, 0x42, 0x3d, 0x47, 0x42, 0x40, 0x3d, 0x3f, 0x3c, 0x34,
+    0x3b, 0x3e, 0x42, 0x3d, 0x43, 0x35, 0x42, 0x2c, 0x35, 0x3d, 0x3c, 0x3d,
+    0x3a, 0x3c, 0x46, 0x25, 0x43, 0x35, 0x3d, 0x39, 0x3a, 0x3c, 0x40, 0x2b,
+    0x33, 0x40, 0x3d, 0x46, 0x45, 0x37, 0x3c, 0x36, 0x43, 0x37, 0x3e, 0x3a,
+    0x3c, 0x47, 0x3f, 0x38, 0x36, 0x3e, 0x3a, 0x42, 0x3c, 0x42, 0x33, 0x39,
+    0x3c, 0x3a, 0x3c, 0x40, 0x48, 0x3b, 0x40, 0x32, 0x37, 0x47, 0x34, 0x38,
+    0x33, 0x3d, 0x49, 0x2d, 0x36, 0x42, 0x3d, 0x3e, 0x47, 0x3c, 0x42, 0x2c,
+    0x3b, 0x31, 0x3f, 0x3c, 0x3d, 0x3c, 0x3f, 0x2b, 0x41, 0x35, 0x33, 0x43,
+    0x47, 0x39, 0x34, 0x2a, 0x3a, 0x3a, 0x40, 0x3d, 0x44, 0x3c, 0x39, 0x34,
+    0x43, 0x40, 0x33, 0x3a, 0x3b, 0x42, 0x38, 0x3b, 0x34, 0x35, 0x40, 0x43,
+    0x4b, 0x41, 0x3d, 0x38, 0x49, 0x44, 0x4d, 0x37, 0x3a, 0x4b, 0x40, 0x39,
+    0x4e, 0x3b, 0x30, 0x38, 0x47, 0x5d, 0x50, 0x1f, 0x54, 0x35, 0x3a, 0x39,
+    0x40, 0x4c, 0x46, 0x42, 0x52, 0x39, 0x39, 0x45, 0x41, 0x3c, 0x30, 0x5b,
+    0x43, 0x4d, 0x4a, 0x3e, 0x31, 0x39, 0x41, 0x4c, 0x36, 0x44, 0x4c, 0x39,
+    0x32, 0x41, 0x47, 0x3e, 0x34, 0x49, 0x45, 0x3b, 0x34, 0x3a, 0x3b, 0x47,
+    0x43, 0x3e, 0x43, 0x32, 0x40, 0x3e, 0x3e, 0x38, 0x37, 0x3e, 0x37, 0x3a,
+    0x3a, 0x40, 0x48, 0x2f, 0x3e, 0x3e, 0x46, 0x3a, 0x3e, 0x35, 0x49, 0x30,
+    0x3a, 0x41, 0x3e, 0x39, 0x34, 0x45, 0x3d, 0x34, 0x48, 0x43, 0x43, 0x42,
+    0x33, 0x39, 0x3b, 0x3f, 0x30, 0x46, 0x41, 0x39, 0x48, 0x3a, 0x3c, 0x3e,
+    0x3f, 0x36, 0x40, 0x3d, 0x43, 0x40, 0x3e, 0x39, 0x44, 0x40, 0x44, 0x3b,
+    0x43, 0x42, 0x39, 0x38, 0x3a, 0x3f, 0x3b, 0x3f, 0x38, 0x3d, 0x34, 0x30,
+    0x34, 0x3d, 0x3f, 0x42, 0x44, 0x3e, 0x34, 0x32, 0x37, 0x46, 0x44, 0x38,
+    0x3c, 0x45, 0x39, 0x2b, 0x41, 0x3c, 0x40, 0x40, 0x3a, 0x3a, 0x3c, 0x32,
+    0x45, 0x42, 0x3d, 0x46, 0x38, 0x3b, 0x34, 0x35, 0x38, 0x43, 0x3d, 0x34,
+    0x42, 0x3b, 0x38, 0x3d, 0x37, 0x43, 0x3f, 0x39, 0x4e, 0x39, 0x40, 0x3f,
+    0x4d, 0x43, 0x49, 0x3f, 0x36, 0x41, 0x44, 0x39, 0x48, 0x3a, 0x35, 0x39,
+    0x48, 0x59, 0x4e, 0x25, 0x58, 0x39, 0x42, 0x35, 0x43, 0x4e, 0x42, 0x3f,
+    0x4a, 0x43, 0x3b, 0x3f, 0x3b, 0x37, 0x2b, 0x5a, 0x3d, 0x44, 0x3b, 0x40,
+    0x31, 0x38, 0x37, 0x44, 0x32, 0x3e, 0x41, 0x3d, 0x2c, 0x42, 0x42, 0x3c,
+    0x37, 0x45, 0x41, 0x41, 0x3d, 0x39, 0x41, 0x40, 0x3a, 0x46, 0x41, 0x40,
+    0x40, 0x3d, 0x38, 0x31, 0x37, 0x3f, 0x42, 0x38, 0x3f, 0x3c, 0x48, 0x30,
+    0x3e, 0x39, 0x3f, 0x3d, 0x3d, 0x44, 0x52, 0x35, 0x3b, 0x32, 0x42, 0x32,
+    0x3a, 0x43, 0x39, 0x3b, 0x31, 0x43, 0x36, 0x3c, 0x3c, 0x3c, 0x41, 0x45,
+    0x42, 0x49, 0x41, 0x3b, 0x42, 0x3e, 0x41, 0x44, 0x36, 0x41, 0x3f, 0x3c,
+    0x3e, 0x47, 0x45, 0x41, 0x38, 0x41, 0x3f, 0x43, 0x35, 0x32, 0x41, 0x39,
+    0x36, 0x47, 0x35, 0x42, 0x44, 0x3b, 0x3f, 0x34, 0x48, 0x41, 0x43, 0x42,
+    0x36, 0x3e, 0x3c, 0x3d, 0x3d, 0x3b, 0x42, 0x44, 0x3a, 0x44, 0x36, 0x2a,
+    0x41, 0x39, 0x3a, 0x41, 0x46, 0x3c, 0x44, 0x2f, 0x36, 0x39, 0x3b, 0x3f,
+    0x38, 0x45, 0x3c, 0x3c, 0x3e, 0x41, 0x3c, 0x39, 0x3e, 0x40, 0x2f, 0x45,
+    0x3b, 0x41, 0x40, 0x3c, 0x4e, 0x38, 0x3e, 0x48, 0x46, 0x40, 0x48, 0x44,
+    0x40, 0x4a, 0x45, 0x3c, 0x4f, 0x39, 0x37, 0x3a, 0x4e, 0x59, 0x5c, 0x22,
+    0x58, 0x32, 0x38, 0x34, 0x40, 0x4b, 0x43, 0x43, 0x4f, 0x3e, 0x39, 0x40,
+    0x37, 0x3e, 0x2f, 0x55, 0x3f, 0x40, 0x38, 0x3f, 0x3a, 0x33, 0x37, 0x3d,
+    0x34, 0x4c, 0x37, 0x3f, 0x32, 0x39, 0x45, 0x34, 0x44, 0x4c, 0x3f, 0x3b,
+    0x3c, 0x36, 0x36, 0x43, 0x36, 0x47, 0x41, 0x46, 0x41, 0x3e, 0x41, 0x3a,
+    0x43, 0x3a, 0x48, 0x42, 0x42, 0x3e, 0x4c, 0x36, 0x3d, 0x39, 0x43, 0x46,
+    0x3d, 0x42, 0x42, 0x3b, 0x45, 0x43, 0x3c, 0x40, 0x39, 0x37, 0x34, 0x45,
+    0x3f, 0x40, 0x34, 0x38, 0x43, 0x3f, 0x36, 0x47, 0x3f, 0x3b, 0x49, 0x3c,
+    0x3a, 0x3a, 0x42, 0x4c, 0x37, 0x3e, 0x3b, 0x32, 0x47, 0x40, 0x45, 0x4d,
+    0x39, 0x3b, 0x39, 0x40, 0x3e, 0x3c, 0x3d, 0x3a, 0x3d, 0x3b, 0x3e, 0x43,
+    0x3e, 0x3f, 0x3a, 0x3c, 0x41, 0x40, 0x39, 0x3c, 0x3a, 0x38, 0x39, 0x37,
+    0x36, 0x33, 0x43, 0x45, 0x3f, 0x45, 0x41, 0x30, 0x3b, 0x34, 0x3c, 0x39,
+    0x3b, 0x45, 0x37, 0x2e, 0x36, 0x34, 0x36, 0x44, 0x3d, 0x40, 0x3a, 0x3c,
+    0x3d, 0x3b, 0x38, 0x41, 0x42, 0x3a, 0x32, 0x4b, 0x38, 0x3e, 0x41, 0x46,
+    0x57, 0x3a, 0x44, 0x48, 0x47, 0x45, 0x47, 0x3e, 0x43, 0x42, 0x45, 0x3b,
+    0x50, 0x39, 0x37, 0x3f, 0x47, 0x51, 0x5e, 0x22, 0x59, 0x33, 0x3c, 0x37,
+    0x43, 0x50, 0x49, 0x47, 0x46, 0x42, 0x39, 0x44, 0x44, 0x3d, 0x2f, 0x53,
+    0x35, 0x41, 0x40, 0x3d, 0x2d, 0x35, 0x2f, 0x3e, 0x3f, 0x37, 0x38, 0x3e,
+    0x30, 0x45, 0x46, 0x38, 0x33, 0x3c, 0x3e, 0x3b, 0x44, 0x42, 0x47, 0x49,
+    0x43, 0x40, 0x3d, 0x3c, 0x38, 0x43, 0x3e, 0x38, 0x3d, 0x40, 0x36, 0x43,
+    0x43, 0x3e, 0x40, 0x3c, 0x44, 0x47, 0x43, 0x3d, 0x41, 0x39, 0x3e, 0x45,
+    0x39, 0x3d, 0x39, 0x40, 0x42, 0x40, 0x3b, 0x4a, 0x40, 0x41, 0x3f, 0x37,
+    0x43, 0x41, 0x37, 0x4c, 0x3f, 0x3d, 0x38, 0x3a, 0x42, 0x46, 0x43, 0x4d,
+    0x3c, 0x3a, 0x43, 0x3e, 0x3b, 0x3d, 0x46, 0x4a, 0x38, 0x3d, 0x3d, 0x39,
+    0x3e, 0x3c, 0x3b, 0x3e, 0x3a, 0x40, 0x40, 0x34, 0x41, 0x3f, 0x3e, 0x3f,
+    0x47, 0x3c, 0x32, 0x3a, 0x3c, 0x44, 0x3f, 0x42, 0x41, 0x43, 0x3e, 0x3a,
+    0x3b, 0x42, 0x41, 0x39, 0x39, 0x37, 0x39, 0x3e, 0x3d, 0x33, 0x3e, 0x35,
+    0x44, 0x37, 0x40, 0x35, 0x3f, 0x47, 0x37, 0x41, 0x35, 0x38, 0x47, 0x40,
+    0x43, 0x44, 0x2e, 0x48, 0x35, 0x44, 0x41, 0x3c, 0x47, 0x3d, 0x3d, 0x52,
+    0x48, 0x41, 0x44, 0x41, 0x42, 0x4b, 0x3e, 0x3d, 0x4e, 0x32, 0x34, 0x47,
+    0x55, 0x57, 0x5f, 0x22, 0x57, 0x33, 0x40, 0x37, 0x40, 0x4a, 0x4d, 0x47,
+    0x48, 0x38, 0x3e, 0x46, 0x37, 0x42, 0x28, 0x57, 0x38, 0x42, 0x36, 0x43,
+    0x35, 0x37, 0x39, 0x39, 0x42, 0x39, 0x38, 0x3c, 0x35, 0x3c, 0x3c, 0x3a,
+    0x3c, 0x4c, 0x45, 0x3f, 0x43, 0x3d, 0x45, 0x45, 0x40, 0x47, 0x3e, 0x3e,
+    0x3d, 0x4b, 0x49, 0x35, 0x43, 0x3c, 0x36, 0x46, 0x3c, 0x46, 0x42, 0x44,
+    0x3c, 0x42, 0x3d, 0x42, 0x44, 0x3c, 0x4a, 0x40, 0x40, 0x3c, 0x3b, 0x3c,
+    0x35, 0x34, 0x2e, 0x46, 0x38, 0x3d, 0x38, 0x44, 0x41, 0x40, 0x3c, 0x52,
+    0x3b, 0x3d, 0x3b, 0x3f, 0x42, 0x47, 0x44, 0x52, 0x44, 0x44, 0x39, 0x3f,
+    0x43, 0x35, 0x3c, 0x4d, 0x39, 0x3d, 0x3b, 0x37, 0x3e, 0x38, 0x3e, 0x49,
+    0x3a, 0x37, 0x3c, 0x49, 0x40, 0x41, 0x3c, 0x40, 0x3d, 0x38, 0x39, 0x3f,
+    0x44, 0x3e, 0x42, 0x3e, 0x47, 0x40, 0x34, 0x46, 0x48, 0x37, 0x45, 0x3e,
+    0x46, 0x3f, 0x35, 0x39, 0x38, 0x3f, 0x36, 0x2c, 0x40, 0x38, 0x3e, 0x3c,
+    0x32, 0x3c, 0x46, 0x3a, 0x3f, 0x41, 0x36, 0x49, 0x42, 0x38, 0x36, 0x43,
+    0x3d, 0x41, 0x46, 0x35, 0x4f, 0x3a, 0x41, 0x5c, 0x4a, 0x42, 0x4e, 0x42,
+    0x46, 0x54, 0x3f, 0x45, 0x4c, 0x30, 0x33, 0x44, 0x56, 0x5d, 0x68, 0x26,
+    0x60, 0x33, 0x3e, 0x3a, 0x42, 0x49, 0x52, 0x47, 0x51, 0x46, 0x40, 0x47,
+    0x41, 0x3b, 0x1b, 0x4f, 0x3c, 0x45, 0x3d, 0x3d, 0x32, 0x2f, 0x3e, 0x3c,
+    0x3c, 0x3f, 0x3b, 0x3c, 0x2c, 0x3a, 0x41, 0x3c, 0x35, 0x3e, 0x3e, 0x3c,
+    0x3d, 0x3f, 0x3e, 0x40, 0x40, 0x44, 0x42, 0x3c, 0x3c, 0x3c, 0x41, 0x3c,
+    0x3c, 0x3d, 0x3e, 0x3d, 0x3c, 0x3d, 0x4a, 0x46, 0x3f, 0x35, 0x33, 0x43,
+    0x42, 0x41, 0x4d, 0x48, 0x48, 0x44, 0x3e, 0x41, 0x41, 0x36, 0x3c, 0x4c,
+    0x34, 0x47, 0x42, 0x39, 0x3e, 0x43, 0x3a, 0x53, 0x3b, 0x3b, 0x42, 0x3d,
+    0x41, 0x3c, 0x3e, 0x52, 0x3a, 0x44, 0x34, 0x43, 0x3d, 0x3d, 0x3a, 0x50,
+    0x3e, 0x33, 0x41, 0x40, 0x3f, 0x38, 0x43, 0x42, 0x3b, 0x37, 0x3e, 0x43,
+    0x3f, 0x3c, 0x41, 0x49, 0x40, 0x32, 0x40, 0x3e, 0x3b, 0x3e, 0x44, 0x3c,
+    0x35, 0x37, 0x3d, 0x41, 0x34, 0x3f, 0x3a, 0x3c, 0x47, 0x32, 0x41, 0x3d,
+    0x3c, 0x3a, 0x4a, 0x31, 0x43, 0x38, 0x45, 0x37, 0x49, 0x3c, 0x34, 0x3f,
+    0x3d, 0x3d, 0x3d, 0x45, 0x47, 0x3e, 0x37, 0x48, 0x40, 0x3b, 0x45, 0x3d,
+    0x4e, 0x42, 0x3f, 0x57, 0x4b, 0x43, 0x4b, 0x3d, 0x3f, 0x47, 0x4a, 0x43,
+    0x4e, 0x30, 0x38, 0x45, 0x59, 0x60, 0x64, 0x2d, 0x5a, 0x2d, 0x34, 0x35,
+    0x47, 0x54, 0x4e, 0x3f, 0x44, 0x45, 0x3c, 0x43, 0x3d, 0x40, 0x1c, 0x5a,
+    0x36, 0x3f, 0x3a, 0x39, 0x37, 0x3c, 0x32, 0x3b, 0x2d, 0x4a, 0x42, 0x35,
+    0x30, 0x41, 0x43, 0x3d, 0x3d, 0x45, 0x38, 0x36, 0x3e, 0x40, 0x3a, 0x4a,
+    0x34, 0x3d, 0x44, 0x3c, 0x39, 0x3b, 0x52, 0x38, 0x40, 0x3b, 0x3f, 0x3f,
+    0x35, 0x37, 0x46, 0x48, 0x38, 0x3b, 0x40, 0x36, 0x3d, 0x3a, 0x4f, 0x45,
+    0x35, 0x3a, 0x35, 0x33, 0x37, 0x43, 0x42, 0x52, 0x37, 0x3b, 0x3d, 0x42,
+    0x44, 0x3d, 0x48, 0x58, 0x33, 0x3f, 0x41, 0x44, 0x44, 0x3f, 0x3b, 0x52,
+    0x47, 0x39, 0x32, 0x3b, 0x38, 0x35, 0x48, 0x50, 0x34, 0x30, 0x39, 0x43,
+    0x42, 0x40, 0x3b, 0x4b, 0x43, 0x3d, 0x34, 0x44, 0x33, 0x39, 0x44, 0x4b,
+    0x45, 0x3e, 0x3c, 0x3f, 0x3a, 0x3e, 0x3c, 0x45, 0x36, 0x3e, 0x3d, 0x40,
+    0x43, 0x46, 0x37, 0x3d, 0x3b, 0x42, 0x43, 0x3f, 0x3a, 0x41, 0x48, 0x2f,
+    0x3e, 0x39, 0x3a, 0x39, 0x3f, 0x3a, 0x41, 0x40, 0x40, 0x3c, 0x3b, 0x3b,
+    0x3f, 0x40, 0x3e, 0x42, 0x38, 0x3f, 0x38, 0x3c, 0x49, 0x45, 0x3f, 0x62,
+    0x55, 0x47, 0x4c, 0x3c, 0x3c, 0x4a, 0x4c, 0x46, 0x4f, 0x39, 0x3a, 0x3b,
+    0x5e, 0x58, 0x6f, 0x2b, 0x5a, 0x2f, 0x3a, 0x35, 0x4b, 0x47, 0x4a, 0x46,
+    0x45, 0x3e, 0x38, 0x4f, 0x3b, 0x3d, 0x21, 0x4b, 0x3d, 0x40, 0x37, 0x40,
+    0x2d, 0x2c, 0x43, 0x3f, 0x2b, 0x3e, 0x3d, 0x39, 0x2f, 0x39, 0x44, 0x3c,
+    0x39, 0x39, 0x43, 0x3b, 0x3d, 0x3b, 0x44, 0x39, 0x42, 0x42, 0x3e, 0x40,
+    0x3b, 0x42, 0x53, 0x40, 0x32, 0x3d, 0x35, 0x3f, 0x3d, 0x45, 0x48, 0x46,
+    0x3d, 0x43, 0x3c, 0x36, 0x35, 0x39, 0x3d, 0x4a, 0x39, 0x39, 0x3e, 0x41,
+    0x38, 0x36, 0x3b, 0x53, 0x3c, 0x36, 0x32, 0x3b, 0x43, 0x3d, 0x42, 0x57,
+    0x35, 0x2f, 0x38, 0x40, 0x2f, 0x3d, 0x3c, 0x4c, 0x40, 0x2f, 0x3a, 0x36,
+    0x39, 0x3c, 0x3a, 0x51, 0x3d, 0x37, 0x39, 0x3c, 0x42, 0x40, 0x43, 0x52,
+    0x3e, 0x42, 0x3e, 0x45, 0x36, 0x34, 0x42, 0x4b, 0x3a, 0x38, 0x37, 0x3f,
+    0x36, 0x41, 0x3a, 0x45, 0x3e, 0x38, 0x35, 0x41, 0x35, 0x34, 0x37, 0x3c,
+    0x3f, 0x31, 0x3c, 0x35, 0x33, 0x43, 0x36, 0x28, 0x44, 0x42, 0x3e, 0x42,
+    0x3a, 0x41, 0x43, 0x35, 0x3d, 0x3f, 0x40, 0x3e, 0x3d, 0x33, 0x31, 0x41,
+    0x3d, 0x40, 0x3b, 0x40, 0x51, 0x40, 0x3f, 0xfb, 0x51, 0x49, 0x4c, 0x3d,
+    0x44, 0x4e, 0x47, 0x42, 0x50, 0x39, 0x39, 0x40, 0x59, 0x5d, 0x70, 0x2c,
+    0x59, 0x39, 0x38, 0x2f, 0x46, 0x50, 0x51, 0x47, 0x4c, 0x3c, 0x39, 0x48,
+    0x44, 0x3a, 0x1a, 0x51, 0x35, 0x3e, 0x34, 0x3a, 0x3d, 0x2b, 0x41, 0x39,
+    0x37, 0x4d, 0x3e, 0x43, 0x38, 0x3b, 0x3a, 0x35, 0x36, 0x3a, 0x43, 0x39,
+    0x39, 0x3a, 0x46, 0x3b, 0x39, 0x3c, 0x46, 0x36, 0x3e, 0x3d, 0x4b, 0x3d,
+    0x3b, 0x46, 0x3a, 0x41, 0x31, 0x3c, 0x44, 0x4a, 0x37, 0x42, 0x39, 0x43,
+    0x43, 0x3e, 0x40, 0x47, 0x3c, 0x3e, 0x3b, 0x43, 0x34, 0x3a, 0x43, 0x53,
+    0x3f, 0x37, 0x39, 0x37, 0x3e, 0x3b, 0x46, 0x59, 0x37, 0x37, 0x33, 0x3d,
+    0x38, 0x42, 0x36, 0x58, 0x2e, 0x32, 0x2b, 0x45, 0x32, 0x33, 0x36, 0x50,
+    0x41, 0x3f, 0x37, 0x3d, 0x3f, 0x3d, 0x46, 0x49, 0x41, 0x38, 0x33, 0x3d,
+    0x33, 0x32, 0x3a, 0x49, 0x41, 0x41, 0x3d, 0x33, 0x3b, 0x3b, 0x3a, 0x46,
+    0x34, 0x44, 0x3f, 0x3b, 0x2f, 0x3f, 0x32, 0x3c, 0x3f, 0x43, 0x3e, 0x45,
+    0x3a, 0x3c, 0x43, 0x26, 0x46, 0x37, 0x38, 0x3e, 0x36, 0x31, 0x3e, 0x34,
+    0x39, 0x3a, 0x38, 0x42, 0x38, 0x3e, 0x32, 0x42, 0x37, 0x37, 0x3c, 0x3a,
+    0x48, 0x44, 0x3a, 0x68, 0x56, 0x46, 0x4d, 0x47, 0x40, 0x4e, 0x42, 0x46,
+    0x51, 0x40, 0x38, 0x43, 0x58, 0x5d, 0x6a, 0x31, 0x57, 0x32, 0x3c, 0x36,
+    0x49, 0x56, 0x52, 0x48, 0x4b, 0x41, 0x2f, 0x4d, 0x31, 0x43, 0x1b, 0x4c,
+    0x30, 0x44, 0x33, 0x36, 0x2c, 0x3d, 0x45, 0x3a, 0x35, 0x46, 0x3d, 0x39,
+    0x2e, 0x38, 0x3f, 0x37, 0x41, 0x44, 0x46, 0x31, 0x33, 0x46, 0x37, 0x37,
+    0x3f, 0x41, 0x45, 0x30, 0x46, 0x3b, 0x50, 0x3b, 0x40, 0x39, 0x42, 0x43,
+    0x35, 0x37, 0x40, 0x44, 0x3b, 0x41, 0x3d, 0x37, 0x3a, 0x41, 0x3d, 0x46,
+    0x36, 0x41, 0x38, 0x41, 0x38, 0x3d, 0x45, 0x58, 0x3d, 0x3a, 0x3d, 0x44,
+    0x45, 0x38, 0x48, 0x5c, 0x3d, 0x39, 0x43, 0x45, 0x41, 0x3e, 0x4a, 0x56,
+    0x40, 0x33, 0x30, 0x31, 0x42, 0x39, 0x38, 0x56, 0x30, 0x3a, 0x35, 0x3e,
+    0x3f, 0x38, 0x36, 0x47, 0x3c, 0x3a, 0x3d, 0x3f, 0x37, 0x35, 0x3b, 0x4d,
+    0x43, 0x36, 0x39, 0x37, 0x3e, 0x42, 0x3d, 0x3f, 0x40, 0x3f, 0x34, 0x3b,
+    0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a, 0x3a, 0x3c, 0x34, 0x3f, 0x3c, 0x2a,
+    0x49, 0x3b, 0x36, 0x3c, 0x35, 0x46, 0x38, 0x3b, 0x3c, 0x39, 0x38, 0x42,
+    0x39, 0x36, 0x2e, 0x4a, 0x3d, 0x39, 0x3f, 0x3f, 0x4b, 0x45, 0x3e, 0x67,
+    0x4b, 0x4b, 0x49, 0x3e, 0x3f, 0x53, 0x4c, 0x55, 0x47, 0x32, 0x3b, 0x39,
+    0x54, 0x5b, 0x6f, 0x29, 0x5a, 0x34, 0x3e, 0x26, 0x45, 0x52, 0x59, 0x44,
+    0x59, 0x39, 0x3c, 0x47, 0x36, 0x46, 0x16, 0x50, 0x32, 0x46, 0x34, 0x35,
+    0x35, 0x2d, 0x39, 0x38, 0x2c, 0x42, 0x43, 0x3b, 0x32, 0x3f, 0x37, 0x2f,
+    0x34, 0x43, 0x46, 0x3b, 0x3b, 0x41, 0x3c, 0x37, 0x3e, 0x43, 0x4b, 0x36,
+    0x3e, 0x3c, 0x4c, 0x42, 0x40, 0x3f, 0x49, 0x40, 0x3c, 0x40, 0x3c, 0x48,
+    0x35, 0x42, 0x3f, 0x42, 0x44, 0x40, 0x45, 0x4f, 0x3f, 0x3f, 0x40, 0x42,
+    0x3b, 0x3d, 0x49, 0x55, 0x42, 0x39, 0x41, 0x3b, 0x3f, 0x38, 0x44, 0x60,
+    0x34, 0x40, 0x3b, 0x3b, 0x35, 0x3d, 0x41, 0x4e, 0x35, 0x33, 0x30, 0x3a,
+    0x3a, 0x32, 0x42, 0x4f, 0x33, 0x34, 0x2f, 0x38, 0x49, 0x38, 0x40, 0x4c,
+    0x35, 0x38, 0x3e, 0x46, 0x3f, 0x3a, 0x3a, 0x45, 0x3b, 0x34, 0x2e, 0x39,
+    0x32, 0x3e, 0x40, 0x48, 0x35, 0x44, 0x3a, 0x34, 0x3f, 0x35, 0x3b, 0x32,
+    0x40, 0x43, 0x3e, 0x38, 0x3b, 0x43, 0x3c, 0x2b, 0x46, 0x43, 0x40, 0x32,
+    0x42, 0x3b, 0x49, 0x2e, 0x3b, 0x3a, 0x3e, 0x41, 0x3c, 0x3f, 0x31, 0x3b,
+    0x41, 0x33, 0x41, 0x3c, 0x4d, 0x40, 0x38, 0x68, 0x4c, 0x4c, 0x4e, 0x3f,
+    0x3f, 0x54, 0x4a, 0x3d, 0x4c, 0x33, 0x3b, 0x3a, 0x5d, 0x60, 0x71, 0x2b,
+    0x59, 0x33, 0x3c, 0x2c, 0x47, 0x52, 0x4f, 0x51, 0x56, 0x3d, 0x39, 0x44,
+    0x35, 0x41, 0x1b, 0x4a, 0x35, 0x41, 0x37, 0x35, 0x2c, 0x35, 0x37, 0x35,
+    0x38, 0x41, 0x38, 0x3e, 0x3c, 0x40, 0x3c, 0x2f, 0x38, 0x3e, 0x3f, 0x45,
+    0x40, 0x3d, 0x3c, 0x35, 0x3c, 0x46, 0x43, 0x39, 0x37, 0x42, 0x4e, 0x3c,
+    0x42, 0x46, 0x37, 0x33, 0x43, 0x3f, 0x47, 0x4a, 0x3d, 0x3e, 0x40, 0x40,
+    0x40, 0x3f, 0x4b, 0x54, 0x36, 0x3f, 0x37, 0x40, 0x39, 0x39, 0x47, 0x51,
+    0x3d, 0x39, 0x36, 0x36, 0x40, 0x40, 0x41, 0x5a, 0x38, 0x39, 0x42, 0x38,
+    0x40, 0x39, 0x43, 0x50, 0x3a, 0x3a, 0x32, 0x3c, 0x3c, 0x35, 0x44, 0x4a,
+    0x37, 0x35, 0x36, 0x3c, 0x35, 0x30, 0x48, 0x4b, 0x3c, 0x33, 0x37, 0x3e,
+    0x42, 0x3c, 0x42, 0x4e, 0x41, 0x32, 0x3e, 0x33, 0x49, 0x39, 0x3e, 0x42,
+    0x3d, 0x39, 0x37, 0x36, 0x35, 0x41, 0x3e, 0x37, 0x37, 0x3e, 0x3d, 0x38,
+    0x3a, 0x3c, 0x41, 0x29, 0x3c, 0x3b, 0x39, 0x40, 0x43, 0x3d, 0x3e, 0x33,
+    0x3f, 0x3f, 0x3e, 0x43, 0x43, 0x38, 0x38, 0x41, 0x3b, 0x38, 0x35, 0x3a,
+    0x4b, 0x44, 0x44, 0x55, 0x4e, 0x44, 0x4d, 0x49, 0x3e, 0x53, 0x45, 0x3f,
+    0x45, 0x3d, 0x36, 0x36, 0x4f, 0x5b, 0x6b, 0x28, 0x59, 0x34, 0x39, 0x34,
+    0x4f, 0x4d, 0x52, 0x3e, 0x51, 0x34, 0x35, 0x4a, 0x3b, 0x3f, 0x21, 0x45,
+    0x36, 0x3f, 0x38, 0x33, 0x2c, 0x37, 0x32, 0x2f, 0x2b, 0x44, 0x47, 0x3f,
+    0x38, 0x3a, 0x3f, 0x2e, 0x41, 0x3f, 0x3d, 0x41, 0x35, 0x48, 0x43, 0x40,
+    0x33, 0x44, 0x40, 0x38, 0x47, 0x44, 0x4c, 0x3d, 0x41, 0x3b, 0x39, 0x36,
+    0x3e, 0x44, 0x49, 0x48, 0x3c, 0x3b, 0x34, 0x34, 0x3f, 0x3c, 0x42, 0x52,
+    0x43, 0x41, 0x3c, 0x3c, 0x3d, 0x43, 0x48, 0x54, 0x39, 0x35, 0x39, 0x3c,
+    0x43, 0x3c, 0x44, 0x5f, 0x39, 0x3d, 0x38, 0x3f, 0x36, 0x3d, 0x43, 0x58,
+    0x33, 0x3d, 0x43, 0x33, 0x3f, 0x36, 0x39, 0x54, 0x3a, 0x37, 0x2d, 0x46,
+    0x43, 0x41, 0x47, 0x46, 0x3e, 0x42, 0x34, 0x49, 0x3a, 0x3f, 0x38, 0x50,
+    0x3a, 0x3b, 0x42, 0x3a, 0x3e, 0x3c, 0x3b, 0x40, 0x42, 0x45, 0x37, 0x3b,
+    0x2f, 0x3b, 0x46, 0x30, 0x42, 0x3b, 0x3b, 0x44, 0x3b, 0x3e, 0x40, 0x1e,
+    0x33, 0x40, 0x40, 0x3d, 0x39, 0x3a, 0x41, 0x33, 0x45, 0x3e, 0x3c, 0x3f,
+    0x3f, 0x38, 0x31, 0x46, 0x3b, 0x35, 0x42, 0x39, 0x49, 0x3e, 0x3d, 0x66,
+    0x53, 0x3f, 0x44, 0x40, 0x43, 0x45, 0x48, 0x45, 0x49, 0x2d, 0x3e, 0x3a,
+    0x4f, 0x5a, 0x62, 0x27, 0x54, 0x37, 0x35, 0x34, 0x42, 0x50, 0x54, 0x43,
+    0x4d, 0x38, 0x39, 0x48, 0x38, 0x4c, 0x21, 0x3f, 0x40, 0x3a, 0x3a, 0x2f,
+    0x37, 0x2f, 0x29, 0x2c, 0x36, 0x47, 0x3f, 0x41, 0x31, 0x33, 0x3e, 0x32,
+    0x3e, 0x40, 0x42, 0x40, 0x42, 0x3a, 0x46, 0x33, 0x44, 0x40, 0x3c, 0x43,
+    0x3d, 0x41, 0x4d, 0x3d, 0x3c, 0x47, 0x46, 0x43, 0x42, 0x3e, 0x44, 0x4e,
+    0x41, 0x3a, 0x44, 0x38, 0x45, 0x3b, 0x49, 0x4c, 0x40, 0x3f, 0x37, 0x3e,
+    0x3e, 0x46, 0x41, 0x51, 0x3f, 0x39, 0x30, 0x40, 0x3e, 0x38, 0x43, 0x5b,
+    0x33, 0x3e, 0x31, 0x42, 0x3d, 0x2f, 0x49, 0x57, 0x37, 0x31, 0x46, 0x44,
+    0x3e, 0x35, 0x40, 0x55, 0x36, 0x35, 0x3d, 0x3c, 0x38, 0x33, 0x42, 0x52,
+    0x3b, 0x39, 0x34, 0x31, 0x45, 0x34, 0x3c, 0x51, 0x33, 0x39, 0x3c, 0x40,
+    0x36, 0x36, 0x42, 0x3e, 0x37, 0x3e, 0x3b, 0x40, 0x3d, 0x36, 0x41, 0x30,
+    0x42, 0x45, 0x40, 0x49, 0x3d, 0x32, 0x46, 0x26, 0x40, 0x44, 0x3a, 0x3f,
+    0x3d, 0x46, 0x45, 0x31, 0x33, 0x34, 0x3e, 0x37, 0x46, 0x3b, 0x32, 0x3a,
+    0x3d, 0x31, 0x3c, 0x36, 0x50, 0x41, 0x3b, 0x5d, 0x53, 0x42, 0x44, 0x37,
+    0x3e, 0x4d, 0x41, 0x4b, 0x49, 0x2f, 0x35, 0x3a, 0x4e, 0x59, 0x5d, 0x27,
+    0x5c, 0x30, 0x3d, 0x3a, 0x46, 0x50, 0x57, 0x4a, 0x4c, 0x36, 0x37, 0x46,
+    0x48, 0x41, 0x24, 0x49, 0x36, 0x3e, 0x41, 0x45, 0x37, 0x38, 0x2e, 0x2e,
+    0x34, 0x3c, 0x38, 0x41, 0x36, 0x3d, 0x43, 0x36, 0x3e, 0x3e, 0x41, 0x3b,
+    0x42, 0x3c, 0x43, 0x38, 0x3e, 0x3d, 0x41, 0x48, 0x47, 0x4c, 0x45, 0x3b,
+    0x37, 0x41, 0x38, 0x41, 0x3d, 0x41, 0x46, 0x4e, 0x36, 0x45, 0x38, 0x39,
+    0x42, 0x42, 0x37, 0x4c, 0x34, 0x46, 0x3c, 0x44, 0x4a, 0x39, 0x45, 0x53,
+    0x3c, 0x3f, 0x41, 0x35, 0x3c, 0x45, 0x4c, 0x5a, 0x44, 0x41, 0x30, 0x35,
+    0x40, 0x39, 0x42, 0x5a, 0x36, 0x36, 0x3a, 0x3b, 0x43, 0x35, 0x3c, 0x56,
+    0x35, 0x38, 0x2b, 0x4a, 0x3c, 0x40, 0x45, 0x54, 0x37, 0x37, 0x3a, 0x44,
+    0x42, 0x3b, 0x3d, 0x4a, 0x3f, 0x37, 0x3b, 0x35, 0x34, 0x3f, 0x40, 0x48,
+    0x45, 0x3e, 0x37, 0x38, 0x41, 0x41, 0x3d, 0x37, 0x43, 0x3d, 0x3d, 0x45,
+    0x3a, 0x38, 0x3f, 0x23, 0x4a, 0x37, 0x42, 0x3c, 0x3f, 0x43, 0x42, 0x33,
+    0x37, 0x39, 0x35, 0x3b, 0x41, 0x36, 0x2f, 0x3b, 0x41, 0x3a, 0x44, 0x3d,
+    0x3e, 0x45, 0x44, 0x50, 0x47, 0x47, 0x48, 0x3c, 0x3f, 0x45, 0x43, 0x3f,
+    0x4a, 0x33, 0x3c, 0x3a, 0x52, 0x52, 0x5a, 0x23, 0x58, 0x31, 0x3b, 0x3b,
+    0x47, 0x44, 0x54, 0x45, 0x42, 0x38, 0x38, 0x40, 0x43, 0x3f, 0x2a, 0x46,
+    0x3b, 0x46, 0x3b, 0x46, 0x35, 0x37, 0x29, 0x35, 0x38, 0x41, 0x3a, 0x31,
+    0x44, 0x41, 0x39, 0x36, 0x45, 0x41, 0x40, 0x3e, 0x40, 0x44, 0x47, 0x37,
+    0x3f, 0x42, 0x49, 0x34, 0x46, 0x3d, 0x4b, 0x3d, 0x42, 0x3b, 0x42, 0x3e,
+    0x41, 0x3b, 0x3f, 0x43, 0x47, 0x45, 0x47, 0x41, 0x40, 0x3a, 0x3d, 0x45,
+    0x40, 0x36, 0x3b, 0x3b, 0x44, 0x37, 0x46, 0x55, 0x35, 0x42, 0x3f, 0x3a,
+    0x41, 0x41, 0x44, 0x5c, 0x31, 0x44, 0x3d, 0x46, 0x39, 0x38, 0x46, 0x59,
+    0x41, 0x3b, 0x3d, 0x39, 0x33, 0x3e, 0x41, 0x58, 0x33, 0x44, 0x34, 0x31,
+    0x48, 0x3e, 0x4d, 0x56, 0x36, 0x3c, 0x37, 0x46, 0x46, 0x38, 0x45, 0x53,
+    0x35, 0x3d, 0x3a, 0x31, 0x42, 0x48, 0x45, 0x44, 0x3b, 0x3b, 0x3c, 0x41,
+    0x3d, 0x42, 0x3f, 0x2f, 0x38, 0x3c, 0x3e, 0x41, 0x44, 0x3a, 0x4a, 0x24,
+    0x37, 0x3e, 0x37, 0x48, 0x40, 0x3f, 0x46, 0x3c, 0x47, 0x4a, 0x38, 0x47,
+    0x34, 0x45, 0x31, 0x42, 0x43, 0x44, 0x3f, 0x3f, 0x49, 0x40, 0x3c, 0x41,
+    0x4d, 0x43, 0x42, 0x39, 0x39, 0x48, 0x41, 0x38, 0x47, 0x3c, 0x3c, 0x42,
+    0x44, 0x55, 0x62, 0x2a, 0x5c, 0x32, 0x3a, 0x37, 0x4c, 0x44, 0x4f, 0x3e,
+    0x4e, 0x42, 0x3a, 0x42, 0x41, 0x4a, 0x35, 0x44, 0x45, 0x3b, 0x43, 0x41,
+    0x33, 0x38, 0x28, 0x36, 0x40, 0x47, 0x3e, 0x3e, 0x3e, 0x39, 0x3a, 0x37,
+    0x44, 0x44, 0x3f, 0x3b, 0x41, 0x3c, 0x45, 0x36, 0x38, 0x3a, 0x3c, 0x42,
+    0x42, 0x3f, 0x59, 0x3c, 0x47, 0x3d, 0x38, 0x3a, 0x42, 0x44, 0x41, 0x46,
+    0x3f, 0x43, 0x48, 0x42, 0x44, 0x35, 0x3f, 0x45, 0x36, 0x3f, 0x38, 0x3a,
+    0x44, 0x3d, 0x3d, 0x4e, 0x3e, 0x45, 0x40, 0x42, 0x3c, 0x33, 0x43, 0x5a,
+    0x38, 0x3e, 0x45, 0x3a, 0x3e, 0x42, 0x45, 0x52, 0x3c, 0x42, 0x3a, 0x38,
+    0x3d, 0x3b, 0x4a, 0x57, 0x38, 0x37, 0x47, 0x44, 0x3e, 0x3c, 0x38, 0x48,
+    0x36, 0x41, 0x3f, 0x41, 0x3a, 0x3a, 0x46, 0x47, 0x42, 0x40, 0x32, 0x33,
+    0x43, 0x37, 0x41, 0x43, 0x3e, 0x40, 0x3d, 0x3a, 0x3e, 0x38, 0x42, 0x30,
+    0x3e, 0x40, 0x46, 0x42, 0x40, 0x44, 0x42, 0x23, 0x31, 0x40, 0x3f, 0x3d,
+    0x3b, 0x33, 0x40, 0x33, 0x41, 0x33, 0x43, 0x41, 0x3a, 0x3e, 0x36, 0x40,
+    0x40, 0x45, 0x37, 0x42, 0x46, 0x42, 0x39, 0x48, 0x44, 0x40, 0x40, 0x45,
+    0x3c, 0x49, 0x41, 0x3f, 0x4c, 0x3d, 0x2f, 0x3f, 0x47, 0x52, 0x54, 0x2c,
+    0x55, 0x42, 0x44, 0x3b, 0x46, 0x4f, 0x48, 0x3c, 0x45, 0x39, 0x3f, 0x4b,
+    0x3f, 0x3f, 0x36, 0x42, 0x41, 0x48, 0x44, 0x44, 0x36, 0x3b, 0x37, 0x40,
+    0x39, 0x49, 0x3a, 0x35, 0x3e, 0x48, 0x31, 0x30, 0x44, 0x38, 0x4c, 0x3c,
+    0x41, 0x3e, 0x46, 0x32, 0x44, 0x3b, 0x42, 0x3c, 0x38, 0x3a, 0x47, 0x3f,
+    0x3a, 0x42, 0x3a, 0x43, 0x40, 0x4b, 0x47, 0x3c, 0x42, 0x46, 0x45, 0x42,
+    0x3c, 0x46, 0x3d, 0x3f, 0x3e, 0x36, 0x38, 0x3e, 0x46, 0x3c, 0x4d, 0x43,
+    0x49, 0x41, 0x48, 0x3c, 0x3d, 0x39, 0x43, 0x58, 0x3a, 0x41, 0x3f, 0x38,
+    0x37, 0x3f, 0x46, 0x5d, 0x3c, 0x3c, 0x39, 0x36, 0x3d, 0x46, 0x43, 0x50,
+    0x3a, 0x47, 0x39, 0x36, 0x41, 0x3f, 0x3e, 0x51, 0x31, 0x36, 0x3e, 0x3c,
+    0x3c, 0x3a, 0x48, 0x41, 0x3a, 0x43, 0x49, 0x3e, 0x42, 0x46, 0x3f, 0x41,
+    0x49, 0x33, 0x42, 0x41, 0x45, 0x40, 0x3d, 0x2b, 0x3d, 0x38, 0x40, 0x37,
+    0x3a, 0x31, 0x45, 0x26, 0x33, 0x3d, 0x3f, 0x39, 0x36, 0x3c, 0x38, 0x33,
+    0x34, 0x3f, 0x35, 0x44, 0x3a, 0x39, 0x32, 0x41, 0x35, 0x40, 0x3c, 0x3b,
+    0x4a, 0x3f, 0x3e, 0x3e, 0x4a, 0x3e, 0x42, 0x35, 0x38, 0x43, 0x3c, 0x37,
+    0x3d, 0x3c, 0x39, 0x43, 0x3f, 0x4e, 0x54, 0x33, 0x4b, 0x37, 0x43, 0x3b,
+    0x43, 0x48, 0x43, 0x42, 0x3d, 0x46, 0x45, 0x49, 0x3a, 0x39, 0x36, 0x4a,
+    0x48, 0x48, 0x37, 0x4b, 0x42, 0x47, 0x34, 0x34, 0x43, 0x42, 0x3a, 0x3d,
+    0x3c, 0x46, 0x34, 0x39, 0x40, 0x3b, 0x3e, 0x3e, 0x37, 0x3d, 0x53, 0x3b,
+    0x48, 0x3c, 0x43, 0x44, 0x3b, 0x3f, 0x57, 0x3d, 0x39, 0x3c, 0x39, 0x3a,
+    0x3e, 0x3f, 0x43, 0x3e, 0x41, 0x47, 0x3c, 0x41, 0x40, 0x41, 0x37, 0x3f,
+    0x3b, 0x43, 0x35, 0x3e, 0x45, 0x40, 0x47, 0x59, 0x41, 0x49, 0x3b, 0x3f,
+    0x47, 0x49, 0x4b, 0x61, 0x39, 0x48, 0x39, 0x3e, 0x44, 0x34, 0x3b, 0x59,
+    0x3c, 0x42, 0x45, 0x35, 0x42, 0x41, 0x39, 0x52, 0x42, 0x3c, 0x3d, 0x3e,
+    0x3d, 0x4a, 0x4a, 0x4d, 0x3c, 0x34, 0x44, 0x3c, 0x41, 0x34, 0x38, 0x46,
+    0x38, 0x45, 0x40, 0x45, 0x40, 0x3a, 0x3d, 0x44, 0x3a, 0x37, 0x3a, 0x3a,
+    0x3b, 0x42, 0x40, 0x34, 0x3b, 0x3c, 0x42, 0x40, 0x3d, 0x32, 0x40, 0x27,
+    0x37, 0x39, 0x37, 0x46, 0x48, 0x31, 0x40, 0x30, 0x42, 0x42, 0x3a, 0x40,
+    0x3d, 0x37, 0x2a, 0x40, 0x41, 0x37, 0x3c, 0x4a, 0x46, 0x45, 0x3d, 0x34,
+    0x48, 0x41, 0x42, 0x3e, 0x3f, 0x39, 0x3c, 0x3a, 0x4f, 0x3b, 0x32, 0x3e,
+    0x43, 0x51, 0x4f, 0x2a, 0x46, 0x3a, 0x3d, 0x3b, 0x40, 0x3d, 0x4c, 0x3c,
+    0x48, 0x40, 0x36, 0x4a, 0x3a, 0x38, 0x42, 0x43, 0x4c, 0x3d, 0x47, 0x47,
+    0x33, 0x3f, 0x2d, 0x37, 0x4a, 0x43, 0x38, 0x3e, 0x49, 0x42, 0x42, 0x3d,
+    0x43, 0x47, 0x41, 0x38, 0x46, 0x37, 0x46, 0x38, 0x47, 0x42, 0x49, 0x3d,
+    0x3b, 0x37, 0x4c, 0x3c, 0x3a, 0x45, 0x3f, 0x37, 0x36, 0x3d, 0x3c, 0x40,
+    0x3e, 0x45, 0x46, 0x41, 0x41, 0x40, 0x3c, 0x44, 0x47, 0x43, 0x37, 0x3f,
+    0x3e, 0x3a, 0x3a, 0x4b, 0x3a, 0x36, 0x3d, 0x3f, 0x38, 0x3f, 0x3c, 0x58,
+    0x40, 0x49, 0x3d, 0x42, 0x38, 0x3a, 0x47, 0x50, 0x3b, 0x49, 0x40, 0x44,
+    0x3e, 0x3c, 0x38, 0x52, 0x3a, 0x3e, 0x44, 0x3c, 0x35, 0x44, 0x3a, 0x47,
+    0x3e, 0x49, 0x3f, 0x47, 0x45, 0x39, 0x3b, 0x46, 0x44, 0x3e, 0x41, 0x46,
+    0x40, 0x41, 0x40, 0x40, 0x3a, 0x35, 0x3e, 0x36, 0x3e, 0x3e, 0x3d, 0x35,
+    0x3b, 0x3c, 0x38, 0x46, 0x3b, 0x3c, 0x41, 0x2c, 0x3f, 0x42, 0x38, 0x3b,
+    0x36, 0x3b, 0x39, 0x40, 0x40, 0x38, 0x36, 0x33, 0x34, 0x42, 0x2f, 0x44,
+    0x41, 0x40, 0x39, 0x35, 0x3b, 0x44, 0x42, 0x2c, 0x41, 0x3b, 0x44, 0x41,
+    0x35, 0x44, 0x3b, 0x34, 0x44, 0x49, 0x36, 0x39, 0x3a, 0x52, 0x4d, 0x2b,
+    0x40, 0x40, 0x3e, 0x39, 0x48, 0x42, 0x3c, 0x44, 0x46, 0x49, 0x3f, 0x54,
+    0x43, 0x40, 0x2e, 0x40, 0x4f, 0x36, 0x3e, 0x3f, 0x38, 0x48, 0x44, 0x3c,
+    0x44, 0x43, 0x41, 0x47, 0x40, 0x46, 0x40, 0x37, 0x41, 0x34, 0x3a, 0x41,
+    0x41, 0x3b, 0x49, 0x39, 0x42, 0x38, 0x3d, 0x39, 0x34, 0x35, 0x43, 0x36,
+    0x3e, 0x44, 0x3f, 0x40, 0x43, 0x40, 0x40, 0x3a, 0x47, 0x42, 0x3e, 0x42,
+    0x46, 0x35, 0x3a, 0x46, 0x3c, 0x3c, 0x3c, 0x3d, 0x3f, 0x40, 0x43, 0x4c,
+    0x3a, 0x37, 0x3f, 0x43, 0x47, 0x38, 0x42, 0x58, 0x42, 0x3b, 0x34, 0x37,
+    0x3e, 0x48, 0x3c, 0x57, 0x44, 0x3c, 0x3d, 0x3a, 0x36, 0x48, 0x3c, 0x51,
+    0x3d, 0x48, 0x45, 0x45, 0x38, 0x45, 0x40, 0x3f, 0x3b, 0x35, 0x3d, 0x3f,
+    0x38, 0x47, 0x39, 0x3b, 0x36, 0x49, 0x43, 0x40, 0x3f, 0x46, 0x38, 0x40,
+    0x3f, 0x3e, 0x39, 0x32, 0x47, 0x42, 0x35, 0x33, 0x39, 0x47, 0x3c, 0x36,
+    0x3b, 0x37, 0x43, 0x35, 0x3b, 0x3b, 0x34, 0x3b, 0x38, 0x3d, 0x3e, 0x3a,
+    0x35, 0x49, 0x38, 0x40, 0x3f, 0x3f, 0x3e, 0x37, 0x43, 0x3b, 0x3e, 0x3e,
+    0x3b, 0x40, 0x44, 0x39, 0x3d, 0x3f, 0x31, 0x42, 0x42, 0x3b, 0x41, 0x3d,
+    0x3e, 0x3c, 0x37, 0x34, 0x48, 0x3d, 0x49, 0x4a, 0x47, 0x36, 0x3a, 0x34,
+    0x37, 0x36, 0x3e, 0x38, 0x33, 0x45, 0x39, 0x44, 0x34, 0x49, 0x3a, 0x3d,
+    0x34, 0x31, 0x31, 0x3d, 0x34, 0x3d, 0x41, 0x3e, 0x49, 0x41, 0x34, 0x3f,
+    0x3a, 0x42, 0x3e, 0x40, 0x3f, 0x33, 0x46, 0x3f, 0x34, 0x39, 0x37, 0x46,
+    0x3e, 0x32, 0x3f, 0x45, 0x45, 0x41, 0x3b, 0x4b, 0x35, 0x35, 0x3b, 0x4a,
+    0x3d, 0x43, 0x3b, 0x44, 0x3c, 0x38, 0x31, 0x43, 0x39, 0x35, 0x41, 0x45,
+    0x37, 0x3e, 0x43, 0x47, 0x39, 0x40, 0x41, 0x41, 0x40, 0x32, 0x37, 0x3e,
+    0x3d, 0x39, 0x3b, 0x49, 0x33, 0x35, 0x38, 0x41, 0x45, 0x37, 0x3c, 0x49,
+    0x3b, 0x34, 0x34, 0x41, 0x3a, 0x3f, 0x3e, 0x47, 0x39, 0x3c, 0x34, 0x3a,
+    0x38, 0x44, 0x40, 0x51, 0x3a, 0x37, 0x3b, 0x3f, 0x3d, 0x3a, 0x45, 0x48,
+    0x3f, 0x46, 0x35, 0x43, 0x38, 0x43, 0x35, 0x4c, 0x42, 0x47, 0x44, 0x3d,
+    0x40, 0x3a, 0x39, 0x4e, 0x3d, 0x37, 0x3c, 0x42, 0x40, 0x48, 0x44, 0x4c,
+    0x31, 0x40, 0x42, 0x3b, 0x45, 0x45, 0x3f, 0x3e, 0x3d, 0x44, 0x3f, 0x31,
+    0x3f, 0x44, 0x45, 0x37, 0x3e, 0x3d, 0x35, 0x3b, 0x2d, 0x44, 0x4a, 0x3a,
+    0x2b, 0x37, 0x38, 0x46, 0x41, 0x39, 0x3c, 0x3c, 0x46, 0x33, 0x36, 0x3c,
+    0x4b, 0x34, 0x49, 0x50, 0x30, 0x3c, 0x33, 0x41, 0x44, 0x33, 0x43, 0x39,
+    0x36, 0x45, 0x33, 0x3b, 0x3d, 0x36, 0x47, 0x30, 0x42, 0x37, 0x49, 0x3e,
+    0x3b, 0x49, 0x3d, 0x3b, 0x3a, 0x41, 0x38, 0x44, 0x42, 0x3b, 0x3f, 0x40,
+    0x46, 0x35, 0x38, 0x3c, 0x48, 0x3a, 0x46, 0x41, 0x36, 0x36, 0x41, 0x3e,
+    0x43, 0x3e, 0x32, 0x39, 0x3a, 0x41, 0x30, 0x3e, 0x40, 0x3e, 0x36, 0x3a,
+    0x45, 0x45, 0x3a, 0x3c, 0x31, 0x3b, 0x47, 0x3f, 0x36, 0x3a, 0x3c, 0x41,
+    0x3b, 0x41, 0x39, 0x46, 0x3f, 0x3c, 0x34, 0x3e, 0x41, 0x45, 0x41, 0x42,
+    0x39, 0x40, 0x40, 0x44, 0x45, 0x42, 0x34, 0x3f, 0x3e, 0x31, 0x3b, 0x41,
+    0x33, 0x43, 0x37, 0x44, 0x44, 0x3a, 0x36, 0x36, 0x48, 0x3c, 0x37, 0x47,
+    0x39, 0x3e, 0x3e, 0x3c, 0x3c, 0x41, 0x3c, 0x44, 0x3b, 0x42, 0x3f, 0x3a,
+    0x43, 0x3b, 0x3e, 0x48, 0x36, 0x3f, 0x3d, 0x34, 0x40, 0x43, 0x35, 0x4f,
+    0x34, 0x39, 0x3b, 0x41, 0x40, 0x39, 0x37, 0x4c, 0x39, 0x36, 0x39, 0x39,
+    0x47, 0x41, 0x43, 0x3f, 0x3f, 0x33, 0x42, 0x3f, 0x42, 0x40, 0x37, 0x40,
+    0x3f, 0x34, 0x45, 0x3d, 0x2d, 0x3c, 0x44, 0x3b, 0x43, 0x37, 0x26, 0x50,
+    0x43, 0x44, 0x3d, 0x43, 0x42, 0x2d, 0x3c, 0x33, 0x4a, 0x32, 0x4a, 0x53,
+    0x33, 0x38, 0x27, 0x36, 0x42, 0x30, 0x47, 0x3d, 0x36, 0x45, 0x46, 0x36,
+    0x3b, 0x3b, 0x40, 0x33, 0x37, 0x36, 0x44, 0x46, 0x3d, 0x35, 0x40, 0x38,
+    0x3b, 0x40, 0x36, 0x3c, 0x3d, 0x37, 0x31, 0x41, 0x33, 0x3c, 0x38, 0x3f,
+    0x43, 0x3a, 0x40, 0x49, 0x38, 0x39, 0x38, 0x3d, 0x43, 0x3d, 0x39, 0x3b,
+    0x3d, 0x3f, 0x38, 0x42, 0x34, 0x43, 0x33, 0x3e, 0x43, 0x3e, 0x40, 0x42,
+    0x3b, 0x45, 0x37, 0x44, 0x43, 0x39, 0x3c, 0x3d, 0x37, 0x44, 0x3a, 0x3b,
+    0x47, 0x3f, 0x3a, 0x3c, 0x3a, 0x3b, 0x3f, 0x43, 0x3e, 0x3d, 0x46, 0x3e,
+    0x37, 0x36, 0x3f, 0x40, 0x42, 0x42, 0x37, 0x36, 0x48, 0x35, 0x44, 0x44,
+    0x39, 0x3c, 0x3b, 0x41, 0x44, 0x49, 0x3a, 0x40, 0x41, 0x36, 0x33, 0x3a,
+    0x3c, 0x3d, 0x40, 0x3f, 0x43, 0x36, 0x3c, 0x3a, 0x3f, 0x4b, 0x32, 0x49,
+    0x49, 0x3e, 0x3a, 0x3e, 0x3f, 0x41, 0x3c, 0x47, 0x40, 0x41, 0x45, 0x3e,
+    0x47, 0x47, 0x3f, 0x47, 0x45, 0x3e, 0x31, 0x43, 0x4a, 0x44, 0x36, 0x40,
+    0x41, 0x47, 0x3e, 0x42, 0x37, 0x40, 0x3b, 0x46, 0x37, 0x41, 0x3e, 0x3c,
+    0x27, 0x40, 0x49, 0x42, 0x42, 0x39, 0x30, 0x49, 0x43, 0x38, 0x3d, 0x42,
+    0x43, 0x2f, 0x3b, 0x37, 0x4b, 0x2d, 0x4f, 0x52, 0x30, 0x31, 0x2f, 0x3a,
+    0x49, 0x38, 0x4f, 0x45, 0x2e, 0x47, 0x3a, 0x32, 0x33, 0x3f, 0x4a, 0x2e,
+    0x33, 0x3b, 0x3e, 0x3e, 0x49, 0x45, 0x44, 0x38, 0x3c, 0x35, 0x45, 0x47,
+    0x41, 0x3b, 0x3c, 0x48, 0x46, 0x39, 0x39, 0x3b, 0x3f, 0x41, 0x38, 0x42,
+    0x3d, 0x46, 0x33, 0x41, 0x36, 0x3f, 0x3f, 0x3c, 0x33, 0x3e, 0x3e, 0x40,
+    0x44, 0x40, 0x3c, 0x38, 0x46, 0x3a, 0x40, 0x36, 0x42, 0x35, 0x3f, 0x3b,
+    0x3b, 0x43, 0x3c, 0x40, 0x40, 0x49, 0x2e, 0x39, 0x40, 0x3f, 0x45, 0x41,
+    0x3f, 0x30, 0x42, 0x3d, 0x40, 0x3c, 0x3a, 0x3b, 0x3b, 0x40, 0x39, 0x42,
+    0x3a, 0x3f, 0x3f, 0x3e, 0x35, 0x3b, 0x38, 0x45, 0x47, 0x35, 0x44, 0x3e,
+    0x3b, 0x3f, 0x3f, 0x40, 0x3a, 0x35, 0x30, 0x49, 0x45, 0x35, 0x3b, 0x39,
+    0x3b, 0x48, 0x3f, 0x37, 0x39, 0x40, 0x43, 0x45, 0x3d, 0x40, 0x41, 0x3a,
+    0x33, 0x3d, 0x3a, 0x4b, 0x40, 0x42, 0x40, 0x42, 0x43, 0x39, 0x3c, 0x49,
+    0x3e, 0x47, 0x3e, 0x44, 0x3f, 0x3a, 0x40, 0x41, 0x3f, 0x42, 0x42, 0x37,
+    0x3e, 0x3b, 0x36, 0x3e, 0x3b, 0x3c, 0x48, 0x43, 0x2d, 0x46, 0x4a, 0x38,
+    0x45, 0x3a, 0x29, 0x46, 0x40, 0x3c, 0x40, 0x44, 0x40, 0x33, 0x2f, 0x33,
+    0x48, 0x2e, 0x51, 0x4f, 0x2b, 0x32, 0x2e, 0x2d, 0x45, 0x33, 0x4d, 0x41,
+    0x29, 0x4b, 0x41, 0x39, 0x2f, 0x3a, 0x49, 0x31, 0x37, 0x40, 0x47, 0x4c,
+    0x3e, 0x31, 0x41, 0x3f, 0x43, 0x37, 0x45, 0x4f, 0x41, 0x3c, 0x30, 0x4a,
+    0x37, 0x37, 0x36, 0x39, 0x31, 0x3d, 0x36, 0x4b, 0x37, 0x44, 0x3c, 0x43,
+    0x44, 0x36, 0x3f, 0x3b, 0x34, 0x3e, 0x3a, 0x35, 0x38, 0x3f, 0x33, 0x37,
+    0x3b, 0x3d, 0x46, 0x38, 0x3b, 0x37, 0x38, 0x3b, 0x31, 0x3e, 0x3d, 0x3b,
+    0x3d, 0x39, 0x35, 0x33, 0x33, 0x3c, 0x39, 0x39, 0x48, 0x39, 0x39, 0x3f,
+    0x3e, 0x36, 0x47, 0x3a, 0x44, 0x40, 0x32, 0x3c, 0x37, 0x35, 0x40, 0x3f,
+    0x3a, 0x38, 0x3b, 0x3d, 0x46, 0x45, 0x36, 0x43, 0x40, 0x3d, 0x41, 0x41,
+    0x47, 0x3a, 0x3d, 0x3e, 0x43, 0x42, 0x32, 0x36, 0x41, 0x37, 0x3b, 0x35,
+    0x36, 0x44, 0x36, 0x3c, 0x43, 0x32, 0x3e, 0x3e, 0x42, 0x45, 0x32, 0x3c,
+    0x3a, 0x3b, 0x35, 0x43, 0x41, 0x3d, 0x44, 0x50, 0x43, 0x31, 0x3e, 0x44,
+    0x44, 0x41, 0x3a, 0x44, 0x36, 0x39, 0x3b, 0x3c, 0x32, 0x38, 0x3b, 0x45,
+    0x38, 0x43, 0x40, 0x42, 0x33, 0x3e, 0x4a, 0x42, 0x45, 0x39, 0x2f, 0x42,
+    0x39, 0x35, 0x44, 0x3e, 0x39, 0x2f, 0x34, 0x33, 0x49, 0x29, 0x50, 0x4f,
+    0x2b, 0x36, 0x34, 0x2d, 0x47, 0x33, 0x49, 0x3c, 0x33, 0x51, 0x49, 0x3f,
+    0x34, 0x39, 0x4a, 0x2c, 0x34, 0x45, 0x4f, 0x47, 0x34, 0x42, 0x3a, 0x3d,
+    0x36, 0x4a, 0x3b, 0x43, 0x36, 0x3f, 0x39, 0x4b, 0x38, 0x3a, 0x31, 0x3d,
+    0x32, 0x42, 0x3a, 0x47, 0x48, 0x3e, 0x44, 0x3f, 0x39, 0x3e, 0x44, 0x35,
+    0x41, 0x3c, 0x45, 0x3a, 0x3e, 0x3b, 0x3d, 0x2f, 0x37, 0x40, 0x3e, 0x43,
+    0x39, 0x39, 0x33, 0x3b, 0x37, 0x3b, 0x37, 0x37, 0x37, 0x39, 0x36, 0x31,
+    0x39, 0x3b, 0x41, 0x39, 0x3b, 0x40, 0x36, 0x37, 0x42, 0x39, 0x3a, 0x46,
+    0x3f, 0x30, 0x38, 0x39, 0x35, 0x32, 0x3e, 0x3a, 0x43, 0x43, 0x3e, 0x33,
+    0x42, 0x3f, 0x41, 0x3c, 0x46, 0x34, 0x34, 0x40, 0x43, 0x37, 0x32, 0x43,
+    0x3c, 0x37, 0x36, 0x33, 0x3d, 0x36, 0x3a, 0x40, 0x39, 0x38, 0x32, 0x3e,
+    0x32, 0x3d, 0x37, 0x49, 0x42, 0x47, 0x41, 0x3b, 0x3d, 0x3c, 0x3a, 0x37,
+    0x3c, 0x45, 0x3a, 0x45, 0x36, 0x44, 0x3a, 0x3a, 0x3a, 0x3c, 0x43, 0x3b,
+    0x3b, 0x35, 0x38, 0x47, 0x36, 0x40, 0x32, 0x43, 0x3e, 0x39, 0x42, 0x40,
+    0x2c, 0x3c, 0x4c, 0x4c, 0x43, 0x3b, 0x37, 0x4a, 0x3f, 0x3c, 0x45, 0x44,
+    0x3f, 0x30, 0x36, 0x31, 0x4f, 0x2f, 0x5d, 0x4b, 0x34, 0x34, 0x2d, 0x2b,
+    0x44, 0x31, 0x4e, 0x40, 0x2e, 0x4d, 0x48, 0x3e, 0x37, 0x2b, 0x49, 0x25,
+    0x31, 0x49, 0x44, 0x49, 0x39, 0x39, 0x4b, 0x3a, 0x3a, 0x41, 0x3e, 0x42,
+    0x3c, 0x36, 0x36, 0x4a, 0x32, 0x44, 0x3e, 0x48, 0x3e, 0x3c, 0x37, 0x49,
+    0x3d, 0x34, 0x3f, 0x37, 0x33, 0x36, 0x46, 0x3a, 0x3a, 0x31, 0x45, 0x3f,
+    0x3a, 0x31, 0x3b, 0x33, 0x41, 0x42, 0x35, 0x39, 0x38, 0x44, 0x36, 0x3a,
+    0x3f, 0x3b, 0x37, 0x3e, 0x3b, 0x38, 0x2f, 0x32, 0x44, 0x3d, 0x44, 0x41,
+    0x39, 0x36, 0x3a, 0x34, 0x39, 0x38, 0x34, 0x3f, 0x3b, 0x37, 0x34, 0x34,
+    0x40, 0x3d, 0x34, 0x3a, 0x46, 0x42, 0x3f, 0x34, 0x38, 0x33, 0x39, 0x44,
+    0x3f, 0x41, 0x3c, 0x31, 0x40, 0x32, 0x3f, 0x37, 0x37, 0x41, 0x3e, 0x35,
+    0x37, 0x48, 0x3b, 0x41, 0x3d, 0x3a, 0x3f, 0x35, 0x33, 0x3c, 0x36, 0x3b,
+    0x3a, 0x48, 0x33, 0x42, 0x37, 0x33, 0x39, 0x41, 0x3c, 0x3d, 0x3b, 0x4d,
+    0x39, 0x3a, 0x3e, 0x44, 0x3d, 0x41, 0x3b, 0x38, 0x49, 0x41, 0x3a, 0x38,
+    0x34, 0x38, 0x38, 0x3c, 0x45, 0x3c, 0x37, 0x3b, 0x36, 0x3e, 0x4a, 0x4b,
+    0x42, 0x3f, 0x32, 0x45, 0x46, 0x35, 0x46, 0x41, 0x38, 0x33, 0x39, 0x37,
+    0x44, 0x2b, 0x60, 0x4a, 0x2a, 0x2e, 0x35, 0x2d, 0x43, 0x37, 0x51, 0x47,
+    0x2f, 0x4d, 0x50, 0x3e, 0x3a, 0x33, 0x4f, 0x2a, 0x35, 0x45, 0x4a, 0x4c,
+    0x3b, 0x3d, 0x43, 0x44, 0x3d, 0x3f, 0x4a, 0x3e, 0x49, 0x37, 0x2e, 0x4f,
+    0x39, 0x3f, 0x32, 0x3c, 0x37, 0x3b, 0x39, 0x4d, 0x34, 0x3f, 0x46, 0x44,
+    0x3d, 0x40, 0x3f, 0x40, 0x39, 0x33, 0x39, 0x3e, 0x3d, 0x40, 0x31, 0x30,
+    0x35, 0x3d, 0x3e, 0x3a, 0x3e, 0x32, 0x31, 0x3e, 0x48, 0x3c, 0x40, 0x43,
+    0x3f, 0x3f, 0x34, 0x2e, 0x3a, 0x3e, 0x3b, 0x43, 0x45, 0x32, 0x3a, 0x31,
+    0x37, 0x38, 0x31, 0x35, 0x34, 0x3d, 0x42, 0x36, 0x46, 0x37, 0x32, 0x47,
+    0x41, 0x3c, 0x35, 0x35, 0x36, 0x41, 0x3a, 0x3b, 0x42, 0x44, 0x36, 0x31,
+    0x3c, 0x3d, 0x34, 0x34, 0x3b, 0x40, 0x40, 0x2e, 0x40, 0x46, 0x3b, 0x43,
+    0x3f, 0x40, 0x3b, 0x3a, 0x32, 0x40, 0x46, 0x39, 0x3c, 0x49, 0x2f, 0x3d,
+    0x49, 0x3e, 0x44, 0x3c, 0x3e, 0x35, 0x3f, 0x44, 0x41, 0x40, 0x3e, 0x47,
+    0x3d, 0x40, 0x3f, 0x41, 0x3b, 0x41, 0x41, 0x3f, 0x40, 0x3f, 0x3e, 0x3e,
+    0x3f, 0x43, 0x35, 0x40, 0x2b, 0x42, 0x45, 0x56, 0x40, 0x3c, 0x2f, 0x44,
+    0x44, 0x3d, 0x3e, 0x3d, 0x40, 0x2d, 0x39, 0x31, 0x54, 0x2f, 0x61, 0x48,
+    0x2e, 0x37, 0x37, 0x32, 0x3e, 0x2d, 0x52, 0x4d, 0x2d, 0x4d, 0x4c, 0x3a,
+    0x3a, 0x31, 0x4e, 0x2d, 0x31, 0x48, 0x47, 0x54, 0x45, 0x38, 0x3b, 0x3d,
+    0x42, 0x41, 0x44, 0x4a, 0x48, 0x42, 0x2f, 0x4d, 0x31, 0x34, 0x3a, 0x46,
+    0x37, 0x44, 0x2c, 0x45, 0x46, 0x43, 0x40, 0x3f, 0x34, 0x33, 0x40, 0x39,
+    0x32, 0x35, 0x3a, 0x40, 0x3f, 0x3f, 0x36, 0x32, 0x3f, 0x3d, 0x35, 0x48,
+    0x3c, 0x48, 0x37, 0x39, 0x35, 0x3f, 0x37, 0x3d, 0x44, 0x46, 0x2d, 0x2a,
+    0x47, 0x38, 0x3a, 0x39, 0x45, 0x3b, 0x40, 0x2d, 0x37, 0x33, 0x41, 0x3c,
+    0x40, 0x35, 0x3f, 0x32, 0x3a, 0x36, 0x40, 0x41, 0x3a, 0x3c, 0x33, 0x31,
+    0x42, 0x3f, 0x41, 0x3a, 0x41, 0x46, 0x38, 0x2f, 0x3c, 0x3d, 0x3d, 0x39,
+    0x3b, 0x46, 0x41, 0x31, 0x46, 0x36, 0x40, 0x48, 0x3c, 0x33, 0x42, 0x32,
+    0x3b, 0x40, 0x3f, 0x36, 0x37, 0x44, 0x34, 0x35, 0x32, 0x32, 0x37, 0x38,
+    0x33, 0x3b, 0x37, 0x4a, 0x3f, 0x46, 0x3a, 0x41, 0x32, 0x37, 0x30, 0x3e,
+    0x40, 0x35, 0x41, 0x40, 0x37, 0x41, 0x2b, 0x40, 0x3d, 0x3d, 0x32, 0x38,
+    0x34, 0x3e, 0x47, 0x61, 0x43, 0x3b, 0x3c, 0x42, 0x46, 0x3d, 0x40, 0x4a,
+    0x3c, 0x2d, 0x33, 0x35, 0x55, 0x38, 0x69, 0x4f, 0x33, 0x37, 0x30, 0x39,
+    0x44, 0x2e, 0x58, 0x4b, 0x2a, 0x51, 0x4b, 0x3c, 0x39, 0x2e, 0x51, 0x2d,
+    0x30, 0x4a, 0x42, 0x53, 0x3f, 0x39, 0x3e, 0x44, 0x3b, 0x40, 0x47, 0x44,
+    0x47, 0x3e, 0x39, 0x4b, 0x40, 0x3d, 0x42, 0x39, 0x3b, 0x39, 0x32, 0x42,
+    0x36, 0x36, 0x36, 0x42, 0x44, 0x34, 0x33, 0x40, 0x40, 0x40, 0x3a, 0x3a,
+    0x41, 0x3f, 0x31, 0x30, 0x3f, 0x31, 0x30, 0x39, 0x46, 0x36, 0x35, 0x34,
+    0x40, 0x43, 0x3c, 0x41, 0x31, 0x46, 0x35, 0x26, 0x44, 0x32, 0x3d, 0x35,
+    0x3d, 0x3c, 0x36, 0x32, 0x39, 0x3a, 0x30, 0x40, 0x48, 0x3e, 0x38, 0x37,
+    0x44, 0x3b, 0x3d, 0x42, 0x3d, 0x3c, 0x32, 0x2b, 0x3f, 0x41, 0x39, 0x3d,
+    0x3e, 0x3f, 0x35, 0x2f, 0x46, 0x3d, 0x3d, 0x3b, 0x45, 0x37, 0x31, 0x35,
+    0x44, 0x40, 0x3a, 0x45, 0x3a, 0x3c, 0x39, 0x31, 0x3b, 0x3d, 0x3b, 0x3a,
+    0x43, 0x44, 0x39, 0x47, 0x44, 0x36, 0x3e, 0x39, 0x48, 0x3f, 0x39, 0x4b,
+    0x3c, 0x36, 0x3d, 0x44, 0x44, 0x3f, 0x39, 0x43, 0x3f, 0x37, 0x3f, 0x37,
+    0x3b, 0x3b, 0x38, 0x3b, 0x3f, 0x40, 0x31, 0x44, 0x30, 0x44, 0x46, 0x5b,
+    0x46, 0x3f, 0x39, 0x40, 0x40, 0x37, 0x4a, 0x46, 0x3f, 0x36, 0x40, 0x39,
+    0x59, 0x3e, 0x66, 0x57, 0x32, 0x34, 0x2e, 0x33, 0x46, 0x31, 0x58, 0x44,
+    0x26, 0x4c, 0x4b, 0x3c, 0x39, 0x2e, 0x4d, 0x35, 0x32, 0x46, 0x52, 0x52,
+    0x3e, 0x40, 0x39, 0x3c, 0x39, 0x3d, 0x53, 0x48, 0x41, 0x3c, 0x3b, 0x4d,
+    0x3c, 0x3e, 0x38, 0x44, 0x3a, 0x3a, 0x29, 0x4a, 0x3c, 0x37, 0x36, 0x38,
+    0x3a, 0x31, 0x37, 0x39, 0x3a, 0x40, 0x46, 0x32, 0x42, 0x38, 0x32, 0x2e,
+    0x3a, 0x45, 0x44, 0x34, 0x34, 0x38, 0x32, 0x2e, 0x35, 0x40, 0x3a, 0x41,
+    0x42, 0x3d, 0x37, 0x2c, 0x3f, 0x37, 0x3c, 0x3d, 0x3a, 0x36, 0x33, 0x35,
+    0x3c, 0x34, 0x3c, 0x39, 0x3c, 0x3a, 0x37, 0x30, 0x30, 0x3e, 0x3d, 0x3a,
+    0x44, 0x37, 0x36, 0x32, 0x36, 0x37, 0x36, 0x3a, 0x3c, 0x41, 0x3a, 0x35,
+    0x36, 0x3a, 0x34, 0x40, 0x39, 0x40, 0x3e, 0x32, 0x34, 0x46, 0x33, 0x3f,
+    0x36, 0x45, 0x3e, 0x35, 0x3f, 0x38, 0x3f, 0x3e, 0x3b, 0x3a, 0x36, 0x3b,
+    0x36, 0x38, 0x32, 0x3f, 0x44, 0x3c, 0x35, 0x48, 0x38, 0x39, 0x31, 0x49,
+    0x3d, 0x43, 0x36, 0x3f, 0x31, 0x43, 0x36, 0x3e, 0x3e, 0x41, 0x39, 0x3b,
+    0x40, 0x42, 0x3c, 0x43, 0x36, 0x4a, 0x48, 0x67, 0x4e, 0x43, 0x36, 0x46,
+    0x44, 0x3f, 0x4b, 0x4b, 0x3f, 0x38, 0x3c, 0x3c, 0x5e, 0x38, 0x70, 0x52,
+    0x38, 0x32, 0x3b, 0x36, 0x4a, 0x2c, 0x52, 0x46, 0x29, 0x4f, 0x48, 0x42,
+    0x2d, 0x2e, 0x4f, 0x28, 0x28, 0x45, 0x4d, 0x52, 0x42, 0x3e, 0x3f, 0x41,
+    0x3c, 0x3a, 0x47, 0x50, 0x44, 0x45, 0x33, 0x4b, 0x3e, 0x3f, 0x42, 0x3d,
+    0x43, 0x34, 0x27, 0x3f, 0x42, 0x3e, 0x43, 0x3e, 0x3a, 0x3c, 0x37, 0x3b,
+    0x3f, 0x30, 0x3a, 0x3e, 0x3c, 0x34, 0x37, 0x24, 0x3d, 0x43, 0x40, 0x44,
+    0x40, 0x46, 0x31, 0x2f, 0x43, 0x38, 0x38, 0x39, 0x3c, 0x34, 0x2d, 0x2a,
+    0x38, 0x31, 0x43, 0x3b, 0x39, 0x3b, 0x32, 0x34, 0x3e, 0x39, 0x41, 0x3b,
+    0x3e, 0x33, 0x3a, 0x2a, 0x41, 0x3f, 0x3c, 0x43, 0x3b, 0x3e, 0x35, 0x2c,
+    0x38, 0x41, 0x33, 0x31, 0x3e, 0x3f, 0x3a, 0x3c, 0x3b, 0x35, 0x3f, 0x3d,
+    0x42, 0x3a, 0x3c, 0x35, 0x3f, 0x40, 0x3c, 0x3e, 0x37, 0x41, 0x3d, 0x38,
+    0x34, 0x31, 0x36, 0x3d, 0x3d, 0x47, 0x36, 0x44, 0x3f, 0x45, 0x3c, 0x3c,
+    0x35, 0x36, 0x31, 0x4f, 0x46, 0x3a, 0x41, 0x42, 0x40, 0x32, 0x33, 0x41,
+    0x34, 0x40, 0x3d, 0x43, 0x3b, 0x3a, 0x32, 0x3c, 0x42, 0x42, 0x3d, 0x43,
+    0x37, 0x45, 0x45, 0xff, 0x4b, 0x45, 0x3b, 0x40, 0x43, 0x3e, 0x47, 0x49,
+    0x3d, 0x3b, 0x3e, 0x33, 0x58, 0x35, 0x71, 0x54, 0x2f, 0x38, 0x38, 0x33,
+    0x47, 0x35, 0x5b, 0x46, 0x2c, 0x4c, 0x43, 0x37, 0x36, 0x39, 0x4f, 0x30,
+    0x26, 0x48, 0x51, 0x48, 0x46, 0x45, 0x3b, 0x39, 0x42, 0x50, 0x47, 0x4c,
+    0x4b, 0x3b, 0x3d, 0x4d, 0x41, 0x34, 0x40, 0x44, 0x38, 0x32, 0x2d, 0x43,
+    0x39, 0x36, 0x3b, 0x3b, 0x40, 0x3d, 0x37, 0x3c, 0x44, 0x39, 0x42, 0x37,
+    0x38, 0x38, 0x32, 0x2f, 0x41, 0x40, 0x3f, 0x3a, 0x37, 0x35, 0x3b, 0x2a,
+    0x37, 0x30, 0x3c, 0x37, 0x40, 0x38, 0x3a, 0x27, 0x44, 0x3d, 0x43, 0x40,
+    0x35, 0x3f, 0x3e, 0x32, 0x3e, 0x3c, 0x40, 0x39, 0x39, 0x3a, 0x41, 0x31,
+    0x3b, 0x3f, 0x34, 0x43, 0x3a, 0x38, 0x42, 0x2a, 0x47, 0x46, 0x3b, 0x38,
+    0x47, 0x45, 0x39, 0x31, 0x43, 0x40, 0x37, 0x3a, 0x3d, 0x3e, 0x39, 0x30,
+    0x36, 0x37, 0x3a, 0x43, 0x3f, 0x32, 0x31, 0x41, 0x45, 0x3e, 0x43, 0x38,
+    0x3f, 0x37, 0x3c, 0x49, 0x3b, 0x33, 0x3d, 0x3a, 0x37, 0x44, 0x32, 0x50,
+    0x39, 0x44, 0x3e, 0x3f, 0x3d, 0x41, 0x3e, 0x3e, 0x42, 0x44, 0x45, 0x3f,
+    0x36, 0x3f, 0x37, 0x39, 0x3b, 0x3d, 0x3b, 0x3b, 0x2f, 0x46, 0x40, 0x6d,
+    0x50, 0x45, 0x3b, 0x45, 0x46, 0x3b, 0x42, 0x48, 0x42, 0x3c, 0x39, 0x37,
+    0x57, 0x3b, 0x6c, 0x5b, 0x32, 0x35, 0x3d, 0x39, 0x48, 0x31, 0x5c, 0x46,
+    0x29, 0x4c, 0x3f, 0x3e, 0x37, 0x33, 0x58, 0x32, 0x2a, 0x43, 0x4c, 0x50,
+    0x3b, 0x44, 0x3c, 0x41, 0x39, 0x48, 0x55, 0x4c, 0x42, 0x38, 0x3b, 0x51,
+    0x3f, 0x38, 0x44, 0x46, 0x36, 0x3b, 0x38, 0x4a, 0x3f, 0x37, 0x36, 0x3c,
+    0x31, 0x3d, 0x32, 0x39, 0x3b, 0x3f, 0x3e, 0x35, 0x38, 0x3f, 0x34, 0x2b,
+    0x37, 0x36, 0x39, 0x40, 0x37, 0x41, 0x32, 0x27, 0x36, 0x33, 0x40, 0x3a,
+    0x3f, 0x44, 0x3f, 0x25, 0x38, 0x34, 0x42, 0x3c, 0x3a, 0x40, 0x38, 0x31,
+    0x49, 0x3e, 0x33, 0x3d, 0x31, 0x36, 0x39, 0x2b, 0x44, 0x2f, 0x43, 0x34,
+    0x34, 0x37, 0x39, 0x33, 0x3b, 0x34, 0x42, 0x3c, 0x40, 0x45, 0x36, 0x31,
+    0x43, 0x47, 0x3e, 0x3f, 0x40, 0x3a, 0x33, 0x34, 0x41, 0x44, 0x3a, 0x43,
+    0x3e, 0x38, 0x36, 0x31, 0x42, 0x44, 0x40, 0x41, 0x44, 0x43, 0x33, 0x42,
+    0x3d, 0x41, 0x3d, 0x3e, 0x3c, 0x39, 0x3e, 0x4f, 0x3f, 0x37, 0x31, 0x40,
+    0x3b, 0x38, 0x35, 0x3b, 0x44, 0x41, 0x41, 0x37, 0x40, 0x42, 0x2d, 0x3d,
+    0x39, 0x48, 0x44, 0x3e, 0x34, 0x48, 0x49, 0x6d, 0x45, 0x4b, 0x3a, 0x44,
+    0x49, 0x40, 0x4d, 0x51, 0x3f, 0x34, 0x3b, 0x40, 0x52, 0x34, 0x6f, 0x56,
+    0x33, 0x3e, 0x40, 0x39, 0x41, 0x32, 0x5d, 0x45, 0x2e, 0x51, 0x48, 0x3c,
+    0x2e, 0x2e, 0x51, 0x39, 0x32, 0x45, 0x4a, 0x4c, 0x3b, 0x40, 0x40, 0x3b,
+    0x36, 0x41, 0x54, 0x4e, 0x4a, 0x49, 0x3b, 0x4d, 0x3c, 0x41, 0x38, 0x47,
+    0x3d, 0x3c, 0x37, 0x48, 0x3f, 0x42, 0x3e, 0x36, 0x39, 0x46, 0x37, 0x3e,
+    0x3b, 0x38, 0x40, 0x3b, 0x39, 0x32, 0x3e, 0x29, 0x37, 0x35, 0x3c, 0x3d,
+    0x37, 0x3b, 0x35, 0x2f, 0x32, 0x3b, 0x37, 0x3c, 0x40, 0x3e, 0x39, 0x27,
+    0x3b, 0x38, 0x37, 0x36, 0x39, 0x37, 0x37, 0x35, 0x42, 0x3e, 0x3b, 0x43,
+    0x41, 0x3c, 0x37, 0x2a, 0x3a, 0x3e, 0x38, 0x40, 0x36, 0x3e, 0x44, 0x2e,
+    0x3e, 0x3a, 0x37, 0x3b, 0x3e, 0x41, 0x3d, 0x30, 0x3b, 0x3f, 0x41, 0x45,
+    0x3a, 0x48, 0x37, 0x2f, 0x3a, 0x37, 0x34, 0x43, 0x42, 0x3d, 0x38, 0x41,
+    0x3b, 0x3c, 0x39, 0x3c, 0x39, 0x47, 0x2e, 0x41, 0x42, 0x40, 0x32, 0x36,
+    0x43, 0x40, 0x3d, 0x4c, 0x38, 0x3e, 0x3b, 0x41, 0x3d, 0x3b, 0x34, 0x43,
+    0x43, 0x3f, 0x44, 0x3c, 0x3a, 0x33, 0x39, 0x42, 0x43, 0x3f, 0x33, 0x3d,
+    0x33, 0x3e, 0x48, 0x6b, 0x48, 0x43, 0x36, 0x47, 0x49, 0x44, 0x4a, 0x49,
+    0x3c, 0x31, 0x35, 0x3e, 0x5c, 0x34, 0x73, 0x53, 0x33, 0x3c, 0x32, 0x3b,
+    0x43, 0x27, 0x59, 0x4e, 0x2b, 0x51, 0x4f, 0x37, 0x36, 0x34, 0x56, 0x34,
+    0x32, 0x4f, 0x46, 0x50, 0x40, 0x40, 0x3c, 0x3e, 0x34, 0x37, 0x50, 0x49,
+    0x43, 0x47, 0x3e, 0x52, 0x44, 0x38, 0x3b, 0x4f, 0x3a, 0x3d, 0x2b, 0x4c,
+    0x40, 0x38, 0x3a, 0x35, 0x3a, 0x3a, 0x3d, 0x38, 0x3d, 0x3b, 0x37, 0x48,
+    0x3d, 0x3d, 0x32, 0x30, 0x3a, 0x34, 0x3f, 0x3a, 0x3b, 0x3e, 0x35, 0x2f,
+    0x3b, 0x3a, 0x45, 0x3d, 0x42, 0x33, 0x33, 0x24, 0x44, 0x39, 0x3c, 0x3d,
+    0x41, 0x3c, 0x37, 0x2c, 0x3b, 0x36, 0x34, 0x41, 0x3d, 0x3f, 0x39, 0x32,
+    0x3c, 0x40, 0x44, 0x3d, 0x41, 0x3d, 0x3a, 0x29, 0x3e, 0x3e, 0x43, 0x33,
+    0x3f, 0x3e, 0x3e, 0x31, 0x38, 0x3a, 0x34, 0x3d, 0x3f, 0x3e, 0x3a, 0x3d,
+    0x3e, 0x48, 0x45, 0x3d, 0x44, 0x37, 0x33, 0x3d, 0x45, 0x39, 0x40, 0x40,
+    0x42, 0x3f, 0x3f, 0x3d, 0x3a, 0x3b, 0x41, 0x33, 0x41, 0x3c, 0x32, 0x55,
+    0x43, 0x3a, 0x32, 0x40, 0x3c, 0x3e, 0x40, 0x43, 0x37, 0x3f, 0x40, 0x38,
+    0x43, 0x41, 0x36, 0x42, 0x44, 0x3c, 0x32, 0x3f, 0x38, 0x42, 0x46, 0x59,
+    0x4c, 0x41, 0x39, 0x47, 0x46, 0x46, 0x44, 0x44, 0x35, 0x42, 0x32, 0x39,
+    0x4f, 0x34, 0x6d, 0x55, 0x31, 0x3b, 0x3a, 0x3f, 0x44, 0x2c, 0x5d, 0x43,
+    0x26, 0x4a, 0x4f, 0x40, 0x36, 0x32, 0x4d, 0x33, 0x2f, 0x50, 0x4d, 0x57,
+    0x3b, 0x40, 0x42, 0x44, 0x41, 0x3f, 0x52, 0x4e, 0x35, 0x41, 0x44, 0x52,
+    0x40, 0x35, 0x39, 0x4b, 0x45, 0x34, 0x2c, 0x4a, 0x3b, 0x41, 0x31, 0x33,
+    0x3f, 0x3a, 0x36, 0x3c, 0x3c, 0x33, 0x30, 0x38, 0x43, 0x3f, 0x32, 0x2d,
+    0x3f, 0x3a, 0x38, 0x41, 0x39, 0x45, 0x36, 0x2e, 0x3c, 0x38, 0x45, 0x3f,
+    0x40, 0x3f, 0x3e, 0x26, 0x41, 0x37, 0x3c, 0x44, 0x3f, 0x3f, 0x35, 0x37,
+    0x46, 0x34, 0x37, 0x3e, 0x48, 0x38, 0x36, 0x34, 0x33, 0x39, 0x40, 0x3c,
+    0x42, 0x3d, 0x3b, 0x31, 0x38, 0x3b, 0x44, 0x42, 0x45, 0x38, 0x41, 0x30,
+    0x3d, 0x42, 0x36, 0x3f, 0x3b, 0x45, 0x37, 0x32, 0x3c, 0x37, 0x3d, 0x42,
+    0x38, 0x3d, 0x2f, 0x31, 0x39, 0x40, 0x3f, 0x44, 0x3a, 0x41, 0x44, 0x46,
+    0x3d, 0x3a, 0x32, 0x3b, 0x34, 0x47, 0x36, 0x4c, 0x47, 0x35, 0x3c, 0x33,
+    0x3b, 0x3c, 0x30, 0x43, 0x43, 0x3f, 0x31, 0x40, 0x3a, 0x37, 0x30, 0x46,
+    0x39, 0x3b, 0x42, 0x40, 0x2d, 0x3f, 0x3e, 0x6a, 0x50, 0x3b, 0x31, 0x54,
+    0x47, 0x3d, 0x48, 0x4e, 0x3b, 0x41, 0x3a, 0x39, 0x49, 0x36, 0x64, 0x4e,
+    0x32, 0x39, 0x3d, 0x37, 0x42, 0x2c, 0x5c, 0x43, 0x2a, 0x4b, 0x4b, 0x46,
+    0x30, 0x29, 0x52, 0x31, 0x35, 0x44, 0x4a, 0x4b, 0x3d, 0x3b, 0x4e, 0x42,
+    0x3d, 0x39, 0x42, 0x52, 0x3f, 0x36, 0x3e, 0x50, 0x3f, 0x32, 0x35, 0x3a,
+    0x40, 0x39, 0x35, 0x48, 0x3b, 0x3e, 0x41, 0x43, 0x43, 0x45, 0x2f, 0x36,
+    0x38, 0x34, 0x3f, 0x44, 0x32, 0x3f, 0x37, 0x33, 0x33, 0x35, 0x2e, 0x41,
+    0x37, 0x3e, 0x38, 0x28, 0x49, 0x30, 0x46, 0x39, 0x3b, 0x30, 0x38, 0x28,
+    0x3b, 0x3d, 0x3a, 0x43, 0x3f, 0x34, 0x43, 0x36, 0x39, 0x3c, 0x3e, 0x3e,
+    0x39, 0x3b, 0x39, 0x32, 0x3c, 0x36, 0x3e, 0x38, 0x34, 0x3c, 0x3a, 0x2a,
+    0x46, 0x3d, 0x40, 0x37, 0x3b, 0x39, 0x3b, 0x34, 0x38, 0x31, 0x43, 0x46,
+    0x3b, 0x43, 0x39, 0x2b, 0x38, 0x40, 0x3e, 0x39, 0x35, 0x3d, 0x2c, 0x36,
+    0x37, 0x40, 0x36, 0x40, 0x41, 0x38, 0x32, 0x3f, 0x36, 0x46, 0x34, 0x31,
+    0x40, 0x3e, 0x3c, 0x4e, 0x42, 0x3d, 0x36, 0x3f, 0x42, 0x3f, 0x33, 0x40,
+    0x34, 0x37, 0x3c, 0x3b, 0x31, 0x47, 0x32, 0x3c, 0x34, 0x3d, 0x42, 0x3b,
+    0x37, 0x41, 0x3b, 0x64, 0x52, 0x40, 0x36, 0x4e, 0x46, 0x3f, 0x3f, 0x47,
+    0x3c, 0x3a, 0x3a, 0x41, 0x4a, 0x32, 0x5e, 0x50, 0x2d, 0x39, 0x3a, 0x38,
+    0x3d, 0x2c, 0x5a, 0x3e, 0x2e, 0x47, 0x3e, 0x3e, 0x33, 0x29, 0x4c, 0x35,
+    0x30, 0x4d, 0x4d, 0x4d, 0x38, 0x42, 0x51, 0x47, 0x39, 0x3c, 0x43, 0x4b,
+    0x42, 0x3f, 0x3a, 0x4b, 0x44, 0x3f, 0x3a, 0x44, 0x3e, 0x37, 0x30, 0x45,
+    0x3d, 0x36, 0x34, 0x3f, 0x36, 0x35, 0x37, 0x36, 0x43, 0x3b, 0x37, 0x3e,
+    0x35, 0x3e, 0x32, 0x34, 0x32, 0x38, 0x3c, 0x3a, 0x3a, 0x3c, 0x30, 0x2b,
+    0x31, 0x37, 0x30, 0x42, 0x36, 0x37, 0x36, 0x2c, 0x3c, 0x31, 0x41, 0x37,
+    0x44, 0x41, 0x3b, 0x37, 0x41, 0x3f, 0x38, 0x3b, 0x3a, 0x3a, 0x3c, 0x2f,
+    0x47, 0x41, 0x3e, 0x33, 0x42, 0x3a, 0x32, 0x34, 0x44, 0x40, 0x43, 0x3d,
+    0x34, 0x41, 0x38, 0x35, 0x35, 0x3b, 0x45, 0x38, 0x32, 0x37, 0x3c, 0x2e,
+    0x39, 0x40, 0x30, 0x3e, 0x42, 0x35, 0x3d, 0x36, 0x3e, 0x3d, 0x39, 0x46,
+    0x3f, 0x36, 0x37, 0x49, 0x41, 0x39, 0x3d, 0x3d, 0x33, 0x44, 0x42, 0x50,
+    0x3d, 0x3c, 0x3e, 0x3f, 0x42, 0x42, 0x3b, 0x3d, 0x41, 0x31, 0x39, 0x3a,
+    0x44, 0x34, 0x38, 0x47, 0x44, 0x38, 0x3b, 0x42, 0x30, 0x42, 0x44, 0x57,
+    0x49, 0x3a, 0x39, 0x4f, 0x41, 0x3e, 0x40, 0x43, 0x37, 0x42, 0x3b, 0x48,
+    0x50, 0x29, 0x5b, 0x44, 0x2c, 0x40, 0x3f, 0x3c, 0x46, 0x34, 0x5c, 0x41,
+    0x2c, 0x48, 0x46, 0x46, 0x35, 0x32, 0x4c, 0x35, 0x2f, 0x3b, 0x48, 0x44,
+    0x41, 0x41, 0x49, 0x45, 0x34, 0x37, 0x44, 0x45, 0x43, 0x3b, 0x42, 0x44,
+    0x3a, 0x37, 0x48, 0x49, 0x34, 0x39, 0x33, 0x4a, 0x40, 0x3d, 0x33, 0x39,
+    0x39, 0x3b, 0x30, 0x31, 0x3d, 0x47, 0x3c, 0x3a, 0x34, 0x3c, 0x3a, 0x2b,
+    0x3a, 0x34, 0x41, 0x40, 0x42, 0x36, 0x44, 0x2c, 0x40, 0x47, 0x3b, 0x37,
+    0x38, 0x42, 0x44, 0x29, 0x36, 0x3d, 0x3d, 0x36, 0x42, 0x3b, 0x35, 0x36,
+    0x43, 0x39, 0x41, 0x3d, 0x45, 0x41, 0x31, 0x32, 0x40, 0x3d, 0x3c, 0x41,
+    0x3e, 0x3d, 0x35, 0x34, 0x32, 0x38, 0x36, 0x3f, 0x3b, 0x3d, 0x39, 0x36,
+    0x40, 0x3e, 0x3d, 0x3a, 0x3a, 0x3b, 0x3c, 0x32, 0x40, 0x34, 0x3a, 0x36,
+    0x42, 0x47, 0x3e, 0x33, 0x3a, 0x44, 0x30, 0x39, 0x40, 0x3a, 0x36, 0x44,
+    0x3c, 0x3b, 0x3f, 0x33, 0x3e, 0x3c, 0x35, 0x53, 0x43, 0x3c, 0x3f, 0x43,
+    0x3d, 0x44, 0x33, 0x47, 0x42, 0x40, 0x37, 0x3b, 0x43, 0x3f, 0x33, 0x41,
+    0x38, 0x42, 0x44, 0x3d, 0x2d, 0x3f, 0x46, 0x49, 0x4e, 0x3f, 0x36, 0x45,
+    0x45, 0x39, 0x40, 0x42, 0x39, 0x39, 0x3a, 0x42, 0x45, 0x2c, 0x61, 0x44,
+    0x30, 0x45, 0x38, 0x3a, 0x40, 0x37, 0x58, 0x39, 0x31, 0x3e, 0x3a, 0x3e,
+    0x37, 0x32, 0x4a, 0x39, 0x2e, 0x47, 0x3e, 0x4e, 0x3f, 0x3e, 0x48, 0x45,
+    0x3f, 0x48, 0x3a, 0x3f, 0x40, 0x36, 0x3a, 0x44, 0x36, 0x3e, 0x3d, 0x41,
+    0x45, 0x36, 0x36, 0x4b, 0x3a, 0x3d, 0x45, 0x48, 0x38, 0x45, 0x39, 0x38,
+    0x38, 0x3a, 0x42, 0x34, 0x3f, 0x34, 0x39, 0x34, 0x32, 0x3f, 0x3c, 0x3d,
+    0x3d, 0x47, 0x3a, 0x2f, 0x3c, 0x3e, 0x3f, 0x39, 0x35, 0x42, 0x3c, 0x2a,
+    0x3b, 0x35, 0x42, 0x44, 0x46, 0x39, 0x38, 0x39, 0x43, 0x3a, 0x38, 0x42,
+    0x3d, 0x3a, 0x40, 0x35, 0x34, 0x39, 0x3a, 0x38, 0x43, 0x42, 0x42, 0x2d,
+    0x31, 0x3b, 0x33, 0x40, 0x3b, 0x47, 0x35, 0x30, 0x3a, 0x3c, 0x3b, 0x47,
+    0x3a, 0x3c, 0x38, 0x35, 0x3c, 0x35, 0x3e, 0x3e, 0x39, 0x3d, 0x39, 0x40,
+    0x37, 0x33, 0x49, 0x38, 0x3c, 0x43, 0x34, 0x40, 0x39, 0x42, 0x3c, 0x3b,
+    0x3e, 0x45, 0x3e, 0x51, 0x3d, 0x3f, 0x3b, 0x34, 0x37, 0x3c, 0x40, 0x47,
+    0x3c, 0x41, 0x3f, 0x41, 0x37, 0x3e, 0x36, 0x3c, 0x42, 0x40, 0x3f, 0x3a,
+    0x3b, 0x42, 0x44, 0x4b, 0x4b, 0x37, 0x41, 0x4d, 0x41, 0x45, 0x40, 0x41,
+    0x40, 0x38, 0x37, 0x40, 0x42, 0x2c, 0x57, 0x43, 0x2d, 0x49, 0x3a, 0x3e,
+    0x37, 0x2f, 0x52, 0x37, 0x31, 0x42, 0x3b, 0x3f, 0x39, 0x38, 0x48, 0x3c,
+    0x37, 0x3d, 0x3a, 0x39, 0x3a, 0x45, 0x4b, 0x49, 0x3e, 0x44, 0x48, 0x49,
+    0x3d, 0x39, 0x3c, 0x41, 0x41, 0x38, 0x45, 0x38, 0x33, 0x3d, 0x37, 0x47,
+    0x34, 0x3f, 0x3b, 0x3d, 0x39, 0x34, 0x30, 0x39, 0x44, 0x36, 0x34, 0x3c,
+    0x37, 0x38, 0x45, 0x34, 0x40, 0x33, 0x41, 0x3a, 0x3e, 0x3c, 0x3b, 0x3a,
+    0x40, 0x3f, 0x3b, 0x3d, 0x3b, 0x46, 0x41, 0x2a, 0x3a, 0x3c, 0x42, 0x46,
+    0x33, 0x3f, 0x2d, 0x3a, 0x45, 0x45, 0x38, 0x3b, 0x44, 0x34, 0x35, 0x3f,
+    0x34, 0x43, 0x38, 0x3e, 0x41, 0x3b, 0x42, 0x38, 0x3d, 0x3f, 0x38, 0x45,
+    0x3b, 0x35, 0x39, 0x3c, 0x43, 0x43, 0x38, 0x34, 0x44, 0x43, 0x2e, 0x39,
+    0x39, 0x40, 0x39, 0x41, 0x41, 0x34, 0x3e, 0x44, 0x3d, 0x43, 0x3a, 0x3a,
+    0x3b, 0x3b, 0x36, 0x45, 0x3c, 0x43, 0x3d, 0x48, 0x36, 0x36, 0x39, 0x55,
+    0x35, 0x40, 0x3e, 0x49, 0x40, 0x3a, 0x3d, 0x3d, 0x34, 0x47, 0x40, 0x41,
+    0x40, 0x47, 0x39, 0x3e, 0x3b, 0x38, 0x3c, 0x3a, 0x35, 0x3e, 0x41, 0x4a,
+    0x4b, 0x3f, 0x36, 0x3d, 0x40, 0x3c, 0x39, 0x32, 0x33, 0x36, 0x30, 0x42,
+    0x42, 0x36, 0x54, 0x48, 0x2e, 0x4c, 0x34, 0x3c, 0x39, 0x36, 0x4e, 0x37,
+    0x2f, 0x3e, 0x30, 0x3d, 0x36, 0x3b, 0x45, 0x36, 0x37, 0x3e, 0x41, 0x4b,
+    0x3b, 0x36, 0x45, 0x3b, 0x38, 0x45, 0x3e, 0x43, 0x48, 0x46, 0x44, 0x44,
+    0x3e, 0x3b, 0x37, 0x3b, 0x3a, 0x3f, 0x3d, 0x44, 0x39, 0x38, 0x45, 0x43,
+    0x3d, 0x35, 0x39, 0x2c, 0x44, 0x41, 0x36, 0x40, 0x3d, 0x39, 0x3d, 0x2f,
+    0x3d, 0x39, 0x42, 0x3d, 0x36, 0x46, 0x43, 0x2c, 0x41, 0x3a, 0x30, 0x45,
+    0x3f, 0x41, 0x35, 0x2b, 0x3b, 0x38, 0x3a, 0x44, 0x32, 0x32, 0x39, 0x3c,
+    0x3a, 0x3a, 0x3c, 0x3a, 0x35, 0x40, 0x3b, 0x31, 0x36, 0x33, 0x35, 0x34,
+    0x3c, 0x3b, 0x3d, 0x36, 0x48, 0x3b, 0x3f, 0x42, 0x3e, 0x33, 0x2f, 0x3a,
+    0x49, 0x41, 0x39, 0x3e, 0x3c, 0x44, 0x3c, 0x39, 0x33, 0x39, 0x36, 0x35,
+    0x3d, 0x42, 0x34, 0x3e, 0x38, 0x45, 0x40, 0x45, 0x3d, 0x48, 0x42, 0x4a,
+    0x3f, 0x45, 0x38, 0x42, 0x44, 0x40, 0x34, 0x49, 0x44, 0x3d, 0x3a, 0x39,
+    0x3e, 0x3a, 0x42, 0x3e, 0x48, 0x42, 0x3e, 0x3a, 0x3f, 0x3f, 0x32, 0x3b,
+    0x38, 0x41, 0x3c, 0x39, 0x33, 0x45, 0x44, 0x3c, 0x48, 0x41, 0x41, 0x3d,
+    0x3a, 0x3c, 0x37, 0x33, 0x41, 0x3f, 0x38, 0x3a, 0x3f, 0x37, 0x51, 0x3c,
+    0x37, 0x3a, 0x43, 0x37, 0x40, 0x31, 0x4f, 0x34, 0x3b, 0x44, 0x45, 0x39,
+    0x40, 0x33, 0x49, 0x33, 0x3e, 0x35, 0x44, 0x3d, 0x3b, 0x3f, 0x43, 0x41,
+    0x43, 0x43, 0x48, 0x44, 0x46, 0x3b, 0x43, 0x3f, 0x3c, 0x3f, 0x3e, 0x3d,
+    0x3b, 0x41, 0x3c, 0x43, 0x30, 0x34, 0x39, 0x33, 0x3f, 0x38, 0x36, 0x2e,
+    0x33, 0x3f, 0x3c, 0x40, 0x3d, 0x3b, 0x3b, 0x31, 0x36, 0x41, 0x3b, 0x38,
+    0x46, 0x36, 0x34, 0x31, 0x42, 0x44, 0x33, 0x35, 0x3f, 0x36, 0x3c, 0x30,
+    0x3f, 0x31, 0x39, 0x3e, 0x3f, 0x47, 0x3e, 0x34, 0x36, 0x36, 0x34, 0x39,
+    0x37, 0x46, 0x40, 0x33, 0x3b, 0x3a, 0x3f, 0x41, 0x37, 0x44, 0x3a, 0x3f,
+    0x34, 0x45, 0x37, 0x33, 0x3f, 0x47, 0x41, 0x36, 0x39, 0x3e, 0x40, 0x38,
+    0x41, 0x3d, 0x3d, 0x36, 0x40, 0x3a, 0x3b, 0x3b, 0x41, 0x3b, 0x3a, 0x3f,
+    0x3f, 0x3b, 0x35, 0x42, 0x46, 0x3a, 0x30, 0x45, 0x40, 0x37, 0x39, 0x39,
+    0x3d, 0x38, 0x3f, 0x45, 0x3f, 0x31, 0x32, 0x3b, 0x35, 0x3e, 0x3b, 0x38,
+    0x3b, 0x44, 0x37, 0x39, 0x37, 0x42, 0x3f, 0x44, 0x38, 0x36, 0x37, 0x44,
+    0x45, 0x46, 0x41, 0x3b, 0x46, 0x42, 0x43, 0x43, 0x3a, 0x4b, 0x37, 0x35,
+    0x3b, 0x40, 0x32, 0x38, 0x41, 0x38, 0x4f, 0x3e, 0x36, 0x3f, 0x47, 0x3b,
+    0x47, 0x3b, 0x4a, 0x2e, 0x3d, 0x45, 0x3b, 0x46, 0x3e, 0x38, 0x43, 0x38,
+    0x41, 0x48, 0x3a, 0x39, 0x40, 0x45, 0x3b, 0x43, 0x40, 0x3e, 0x43, 0x41,
+    0x41, 0x3e, 0x39, 0x3f, 0x35, 0x42, 0x33, 0x3f, 0x3d, 0x32, 0x45, 0x3c,
+    0x41, 0x31, 0x45, 0x38, 0x43, 0x45, 0x41, 0x35, 0x35, 0x40, 0x44, 0x36,
+    0x3a, 0x3b, 0x3c, 0x2c, 0x3e, 0x41, 0x33, 0x3d, 0x46, 0x34, 0x3b, 0x30,
+    0x30, 0x42, 0x43, 0x3d, 0x3d, 0x3d, 0x43, 0x31, 0x3f, 0x40, 0x3a, 0x3f,
+    0x48, 0x3e, 0x3b, 0x39, 0x44, 0x43, 0x3b, 0x3a, 0x42, 0x38, 0x38, 0x3b,
+    0x3f, 0x44, 0x37, 0x3e, 0x45, 0x40, 0x41, 0x3b, 0x3c, 0x3a, 0x38, 0x37,
+    0x3b, 0x33, 0x3f, 0x35, 0x43, 0x3d, 0x33, 0x41, 0x3b, 0x46, 0x39, 0x32,
+    0x39, 0x3f, 0x3b, 0x39, 0x47, 0x3c, 0x3f, 0x39, 0x34, 0x3d, 0x3c, 0x46,
+    0x3f, 0x3e, 0x3e, 0x44, 0x34, 0x40, 0x3f, 0x39, 0x3c, 0x38, 0x36, 0x45,
+    0x42, 0x46, 0x3b, 0x44, 0x3a, 0x3d, 0x3b, 0x42, 0x3b, 0x3b, 0x3c, 0x45,
+    0x42, 0x3d, 0x36, 0x37, 0x3d, 0x43, 0x3f, 0x48, 0xa6, 0xfb, 0xff, 0xff,
+    0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xb3, 0x00, 0x00, 0x00,
+    0x39, 0xff, 0xff, 0xff, 0xe5, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,
+    0x68, 0xfb, 0xff, 0xff, 0xbc, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x70, 0x02, 0x00, 0x00,
+    0x70, 0x03, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0xf0, 0x01, 0x00, 0x00,
+    0x80, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00,
+    0xa4, 0x02, 0x00, 0x00, 0xba, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x24, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65,
+    0x6c, 0x73, 0x5f, 0x73, 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x3c, 0xfd, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x80, 0x3b, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0xfd, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x03, 0x1c, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x61, 0x64, 0x64, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xfd, 0xff, 0xff,
+    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x97, 0xf5, 0x3f,
+    0x01, 0x00, 0x00, 0x00, 0x87, 0x35, 0xa0, 0x43, 0x01, 0x00, 0x00, 0x00,
+    0xd6, 0xd7, 0x28, 0xc3, 0x92, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x1c, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75,
+    0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x14, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x05, 0x80, 0xbf, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x85, 0xc0, 0xbe, 0x43,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xfe, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x03, 0x3c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
+    0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e,
+    0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57,
+    0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72,
+    0x73, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0xa4, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0xae, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x17, 0xac, 0x6e, 0x3a, 0x01, 0x00, 0x00, 0x00,
+    0x20, 0x4e, 0x97, 0x3d, 0x01, 0x00, 0x00, 0x00, 0xaf, 0x27, 0x21, 0xbe,
+    0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x20, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68, 0x61, 0x70, 0x65, 0x5f,
+    0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x1c, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x42,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0xff, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xfc, 0xfe, 0xff, 0xff,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x17, 0xac, 0xee, 0x39, 0x5a, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x54, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67,
+    0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, 0x74, 0x5f, 0x31, 0x2f,
+    0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74,
+    0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, 0x73, 0x2f,
+    0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00,
+    0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x9d, 0xaf, 0xd0, 0x3a, 0x01, 0x00, 0x00, 0x00,
+    0xe7, 0x29, 0x9e, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x5b, 0x91, 0xc3, 0xbd,
+    0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00,
+    0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+    0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d,
+    0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x62, 0x1b, 0x1c, 0x3b,
+    0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,
+    0x02, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x14, 0x00, 0x1c, 0x00,
+    0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
+    0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00,
+    0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+    0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+    0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0xfa, 0xff, 0xff, 0xff, 0x00, 0x19, 0x06, 0x00,
+    0x06, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x09, 0x06, 0x00,
+    0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04};
+const int g_tiny_conv_simple_features_model_data_len = 19800;

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/tiny_conv_simple_features_model_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/tiny_conv_simple_features_model_data.h
new file mode 100644
index 0000000..cadf7d0
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/tiny_conv_simple_features_model_data.h

@@ -0,0 +1,27 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This is a standard TensorFlow Lite model file that has been converted into a
+// C data array, so it can be easily compiled into a binary for devices that
+// don't have a file system. It was created using the command:
+// xxd -i tiny_conv.tflite > tiny_conv_simple_features_model_data.cc
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_TINY_CONV_SIMPLE_FEATURES_MODEL_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_TINY_CONV_SIMPLE_FEATURES_MODEL_DATA_H_
+
+extern const unsigned char g_tiny_conv_simple_features_model_data[];
+extern const int g_tiny_conv_simple_features_model_data_len;
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_TINY_CONV_SIMPLE_FEATURES_MODEL_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.cc
new file mode 100644
index 0000000..cd46408
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.cc

@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See the header for documentation on the meaning of this data.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h"
+
+const uint8_t g_yes_power_spectrum_data[g_yes_power_spectrum_data_size] = {
+    8, 89, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 13, 1, 6, 23, 20, 6, 4, 0, 0, 0,
+    0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0,
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h
new file mode 100644
index 0000000..77e52d5
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h

@@ -0,0 +1,29 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This data was extracted from the larger feature data held in
+// no_features_data.cc and consists of the 26th spectrogram slice of 43 values.
+// This is the expected result of running the sample data in
+// yes_30ms_sample_data.cc through through the preprocessing pipeline.
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_YES_POWER_SPECTRUM_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_YES_POWER_SPECTRUM_DATA_H_
+
+#include <cstdint>
+
+constexpr int g_yes_power_spectrum_data_size = 43;
+extern const uint8_t g_yes_power_spectrum_data[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_YES_POWER_SPECTRUM_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_simple_features_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_simple_features_data.cc
new file mode 100644
index 0000000..2d660bb
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_simple_features_data.cc

@@ -0,0 +1,158 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_simple_features_data.h"
+
+/* File automatically created by
+ * tensorflow/examples/speech_commands/wav_to_features.py \
+ * --sample_rate=16000 \
+ * --clip_duration_ms=1000 \
+ * --window_size_ms=30 \
+ * --window_stride_ms=20 \
+ * --feature_bin_count=40 \
+ * --quantize=1 \
+ * --preprocess="average" \
+ * --input_wav="speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav" \
+ * --output_c_file="yes_simple_features_data.cc" \
+ */
+
+const int g_yes_simple_f2e59fea_nohash_1_width = 43;
+const int g_yes_simple_f2e59fea_nohash_1_height = 49;
+const unsigned char g_yes_simple_f2e59fea_nohash_1_data[] = {
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  1,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  1,   1,  1,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  4,   5,   1,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  1,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   2,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    1,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   1,  19, 1,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   1,   0,  1,  3,   3,   1,  1,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   8,   89, 8,   0,   0,  0,  0,   0,   0,  0,  0,   4,  13,
+    1,  6,  23,  20,  6,   4,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  19, 177, 42, 1,
+    1,  0,  0,   0,   0,   2,  3,   119, 51, 5,  139, 92,  58, 58, 15,  2,  1,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   13, 165, 176, 3,  1,  1,   0,   0,  1,  1,   32, 214,
+    26, 19, 113, 103, 28,  22, 27,  3,   1,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  12,  55, 128,
+    27, 1,  1,   0,   1,   4,  2,   52,  93, 10, 28,  156, 10, 21, 21,  3,  3,
+    1,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  14,  99,  32, 65, 7,   1,   2,  2,  6,   13, 121,
+    36, 15, 11,  112, 125, 14, 5,   13,  4,  4,  2,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   24, 25,
+    32, 5,  1,   0,   0,   0,  1,   0,   7,  5,  1,   1,   3,  3,  0,   3,  3,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   13,  13, 5,  1,   0,   0,  0,  0,   0,  3,
+    4,  1,  0,   1,   2,   3,  1,   1,   1,  4,  8,   1,   2,  1,  3,   1,  1,
+    0,  1,  1,   3,   1,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  1,
+    8,  2,  1,   0,   0,   0,  0,   0,   1,  1,  0,   0,   1,  1,  2,   0,  2,
+    1,  0,  2,   0,   2,   2,  3,   1,   1,  0,  1,   1,   4,  5,  1,   0,  1,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  1,   1,   1,  0,  1,   2,   1,  0,  1,   3,  1,
+    1,  3,  1,   1,   6,   2,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  2,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  1,   1,   0,  1,  2,   6,   2,  4,  2,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  3,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  1,
+    0,  0,  1,   2,   1,   1,  2,   1,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  4,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   2,  1,  0,   0,   2,  3,  5,   2,  0,
+    1,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   1,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   1,   2,  2,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  1,  0,   0,   0,  0,  1,   2,  3,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   1,  1,   1,   1,  0,  0,   0,   1,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_simple_features_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_simple_features_data.h
new file mode 100644
index 0000000..87ea4a4
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/simple_features/yes_simple_features_data.h

@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_YES_SIMPLE_FEATURES_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_YES_SIMPLE_FEATURES_DATA_H_
+
+extern const int g_yes_simple_f2e59fea_nohash_1_width;
+extern const int g_yes_simple_f2e59fea_nohash_1_height;
+extern const unsigned char g_yes_simple_f2e59fea_nohash_1_data[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_YES_SIMPLE_FEATURES_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
deleted file mode 100644
index 62e4359..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
+++ /dev/null

@@ -1,1673 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Automatically created from a TensorFlow Lite flatbuffer using the command:
-// xxd -i tiny_conv.tflite > tiny_conv_model_data.cc
-// See the README for a full description of the creation process.
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h"
-
-const unsigned char g_tiny_conv_model_data[] = {
-    0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
-    0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
-    0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x4d, 0x00, 0x00,
-    0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0xf4, 0x47, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
-    0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74,
-    0x65, 0x64, 0x2e, 0x00, 0x09, 0x00, 0x00, 0x00, 0xd4, 0x47, 0x00, 0x00,
-    0xb4, 0x47, 0x00, 0x00, 0xe4, 0x02, 0x00, 0x00, 0xb4, 0x02, 0x00, 0x00,
-    0xac, 0x02, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb8, 0xb3, 0xff, 0xff,
-    0xbc, 0xb3, 0xff, 0xff, 0xc0, 0xb3, 0xff, 0xff, 0x1e, 0xb4, 0xff, 0xff,
-    0x04, 0x00, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, 0x89, 0xa5, 0xe8, 0xc1,
-    0xb1, 0x89, 0x5b, 0xc6, 0x4f, 0x9b, 0xd3, 0x74, 0x93, 0x88, 0xff, 0xaf,
-    0x89, 0xff, 0xf4, 0x70, 0xcc, 0x75, 0x78, 0xbf, 0x92, 0xcd, 0xa9, 0xa8,
-    0xd6, 0x6a, 0x6f, 0x7b, 0x7f, 0xd8, 0xa8, 0xb1, 0xe6, 0x32, 0x21, 0x70,
-    0xa0, 0x9c, 0x6f, 0xc8, 0xc6, 0x59, 0x67, 0x93, 0x97, 0xca, 0x3f, 0xde,
-    0xcb, 0x74, 0x7c, 0xb5, 0xa4, 0xd9, 0x66, 0xc6, 0x87, 0x98, 0xa5, 0xd0,
-    0xbb, 0xb9, 0xc2, 0xb2, 0xaa, 0x79, 0x25, 0xb9, 0x6d, 0x5a, 0xc8, 0x7f,
-    0x70, 0x85, 0x79, 0xbc, 0x6a, 0x9b, 0xd1, 0x9a, 0x9c, 0x51, 0x53, 0x71,
-    0x89, 0xc0, 0xb4, 0xac, 0xae, 0x47, 0x67, 0x70, 0x79, 0xd2, 0x81, 0xa5,
-    0xd2, 0x09, 0x38, 0x82, 0x74, 0xc9, 0x5d, 0xaf, 0xc1, 0x4f, 0x53, 0x99,
-    0xcb, 0xb7, 0x3a, 0xba, 0xe8, 0x7f, 0x76, 0xb9, 0xb3, 0xd3, 0x60, 0xc0,
-    0x93, 0x9f, 0x87, 0xbd, 0xd0, 0xb8, 0xca, 0xc1, 0xb6, 0x6c, 0x01, 0xc1,
-    0x5c, 0x5d, 0xb2, 0x82, 0x76, 0x77, 0x39, 0xbc, 0x72, 0x6a, 0xc3, 0xb4,
-    0x79, 0x21, 0x48, 0x42, 0x86, 0xa6, 0xbd, 0xaf, 0xae, 0x23, 0x9c, 0x69,
-    0x78, 0xc3, 0x6b, 0xb3, 0xab, 0x43, 0xb2, 0x88, 0x71, 0xc6, 0x6b, 0xbe,
-    0xc3, 0x75, 0xc2, 0xc3, 0xa5, 0xcf, 0x32, 0xbe, 0xcb, 0xb0, 0xb8, 0xc1,
-    0x9c, 0xcf, 0x64, 0xc4, 0xb4, 0x96, 0xa8, 0xb9, 0xcb, 0xc0, 0xc0, 0xb8,
-    0xb8, 0x77, 0x65, 0xc0, 0xc4, 0xb3, 0xc5, 0x77, 0x9b, 0x61, 0xd4, 0xac,
-    0x7e, 0x36, 0xb1, 0xae, 0x36, 0x36, 0xb8, 0x39, 0x6b, 0x70, 0x9c, 0xb5,
-    0x88, 0x5c, 0xb3, 0x6a, 0xad, 0xc5, 0x7b, 0xb4, 0xad, 0xaa, 0xc4, 0x84,
-    0x5e, 0xc4, 0x67, 0xc1, 0xde, 0xba, 0xcf, 0xbd, 0xa0, 0xd3, 0x35, 0xb3,
-    0xe7, 0xc8, 0xb8, 0xb8, 0xaf, 0xb4, 0x59, 0xb8, 0xb4, 0xac, 0xac, 0xaa,
-    0xc7, 0xad, 0xc8, 0xb6, 0xac, 0x99, 0xa0, 0xcb, 0xc1, 0xc8, 0xcb, 0x89,
-    0xc3, 0xac, 0xca, 0x8b, 0x97, 0x1f, 0xbd, 0xbf, 0x13, 0xad, 0xc8, 0x41,
-    0x56, 0x3c, 0x86, 0xb2, 0x61, 0xc4, 0xbb, 0x71, 0xba, 0x92, 0x8d, 0xc3,
-    0x86, 0xcb, 0xc5, 0x8d, 0x88, 0xc8, 0x6a, 0xbf, 0x9c, 0xcd, 0xcd, 0xc0,
-    0x81, 0xb1, 0x47, 0xb5, 0xf0, 0xce, 0xb1, 0xc1, 0xaa, 0xa8, 0x54, 0xcb,
-    0xbc, 0xc7, 0xc5, 0x8e, 0xc3, 0xce, 0xc7, 0xb9, 0xb9, 0xa1, 0xc5, 0xbd,
-    0xb8, 0xb8, 0xb7, 0x81, 0xb6, 0xba, 0xd2, 0x90, 0xbc, 0x96, 0xbe, 0xba,
-    0x53, 0xb5, 0xc7, 0x3c, 0x3c, 0x1f, 0x90, 0xaa, 0x5a, 0xb8, 0xba, 0x7e,
-    0xbc, 0x9e, 0xc2, 0xb1, 0x6e, 0xc0, 0xc4, 0x91, 0xf0, 0xb5, 0x60, 0xad,
-    0x73, 0xba, 0xcd, 0xba, 0x6e, 0x94, 0x39, 0xb5, 0xe4, 0xbe, 0xb4, 0xb5,
-    0xa0, 0xa9, 0x51, 0xac, 0xbc, 0xc2, 0xb3, 0x8a, 0xbd, 0x9a, 0xca, 0xb3,
-    0xbf, 0xaf, 0xb5, 0x9a, 0xb9, 0xc3, 0xb6, 0x92, 0xb5, 0xc1, 0xb0, 0x95,
-    0xd6, 0xcc, 0xbb, 0xbb, 0xa9, 0xb9, 0xac, 0x4a, 0x62, 0x27, 0xa7, 0xa7,
-    0x30, 0xbd, 0xb1, 0x73, 0xa1, 0x74, 0xc2, 0xb7, 0x58, 0xc0, 0xae, 0x8f,
-    0xe1, 0xac, 0x4e, 0xb0, 0x55, 0xc9, 0xc8, 0x9f, 0x83, 0x8e, 0x3e, 0xd5,
-    0xb5, 0xbe, 0xcd, 0xb2, 0xa6, 0xc8, 0x64, 0xac, 0xc0, 0xc8, 0xaf, 0x99,
-    0xc5, 0x9e, 0xb8, 0xbd, 0xa9, 0xc2, 0xb3, 0x81, 0xb4, 0xc2, 0xb4, 0x8f,
-    0xbc, 0xb8, 0x9c, 0x88, 0xbe, 0xc6, 0xbf, 0xba, 0xc8, 0xb4, 0xab, 0x5b,
-    0x92, 0x51, 0xb1, 0x9a, 0x44, 0xb9, 0xab, 0x80, 0xa5, 0x3e, 0xc0, 0xa5,
-    0x5c, 0xb6, 0xa8, 0xa2, 0xb3, 0x9a, 0x6b, 0xb3, 0x34, 0xc6, 0x7e, 0x96,
-    0xcb, 0x88, 0x48, 0xc6, 0xa3, 0xbb, 0xd2, 0xa2, 0xaf, 0xd0, 0x6e, 0xae,
-    0xb4, 0xce, 0xc8, 0x8f, 0xd7, 0xad, 0xc8, 0xb0, 0xae, 0xb7, 0xb2, 0x70,
-    0xb9, 0xad, 0xc1, 0xa0, 0xcb, 0xa2, 0xb0, 0x9b, 0xbe, 0xd3, 0xca, 0xb6,
-    0xbd, 0xaf, 0xa9, 0x82, 0xa1, 0xd7, 0xbc, 0x9b, 0x8b, 0xac, 0xaa, 0xac,
-    0xad, 0x37, 0xb7, 0xb6, 0x46, 0xae, 0xa9, 0xbd, 0x6b, 0x90, 0x5e, 0xcd,
-    0x23, 0xa4, 0x76, 0xa1, 0xc4, 0x96, 0x50, 0xcc, 0x95, 0x99, 0x93, 0xa7,
-    0xb2, 0xe1, 0x7c, 0xbd, 0xbd, 0xb5, 0xbf, 0x9a, 0xca, 0x80, 0xd7, 0xae,
-    0x79, 0xa8, 0xaa, 0xb2, 0xbc, 0x51, 0xda, 0xa3, 0x80, 0x8b, 0xa2, 0xc8,
-    0xd1, 0x94, 0xe1, 0xc4, 0xbd, 0xae, 0xae, 0xcc, 0xb3, 0xca, 0xd5, 0xa1,
-    0xd5, 0xa7, 0xaf, 0xd2, 0xb4, 0x8d, 0xcc, 0xc8, 0x63, 0xa3, 0xa4, 0xdf,
-    0x6f, 0x7e, 0x98, 0xdf, 0x1b, 0x7b, 0x43, 0x99, 0xb0, 0x99, 0x71, 0xdb,
-    0x63, 0x7b, 0x69, 0x9c, 0xba, 0xcd, 0x90, 0xd0, 0xb6, 0xa6, 0x9e, 0x95,
-    0x50, 0xb6, 0xff, 0xff, 0xae, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
-    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0xc7, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00,
-    0xda, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0xc0, 0x44, 0x00, 0x00,
-    0x2c, 0x30, 0x38, 0x5a, 0x3d, 0x4c, 0x44, 0x3b, 0x48, 0x48, 0x44, 0x57,
-    0x3f, 0x43, 0x45, 0x3a, 0x24, 0x32, 0x21, 0x5c, 0x3f, 0x3a, 0x38, 0x3a,
-    0x35, 0x35, 0x2f, 0x51, 0x3c, 0x3a, 0x45, 0x3a, 0x3b, 0x41, 0x39, 0x55,
-    0x3c, 0x41, 0x39, 0x44, 0x3a, 0x40, 0x37, 0x48, 0x33, 0x47, 0x36, 0x3e,
-    0x3c, 0x41, 0x3f, 0x3e, 0x3e, 0x47, 0x36, 0x3e, 0x41, 0x33, 0x3e, 0x3b,
-    0x3a, 0x46, 0x45, 0x40, 0x48, 0x3a, 0x35, 0x4b, 0x45, 0x4d, 0x3c, 0x49,
-    0x42, 0x44, 0x3c, 0x4c, 0x3e, 0x3c, 0x44, 0x32, 0x33, 0x41, 0x36, 0x4b,
-    0x38, 0x3b, 0x3c, 0x38, 0x3b, 0x45, 0x34, 0x46, 0x40, 0x4e, 0x44, 0x35,
-    0x43, 0x36, 0x3d, 0x40, 0x3e, 0x48, 0x40, 0x34, 0x3a, 0x46, 0x45, 0x43,
-    0x45, 0x3f, 0x47, 0x37, 0x36, 0x35, 0x44, 0x3a, 0x3e, 0x37, 0x39, 0x40,
-    0x3a, 0x3f, 0x3f, 0x4c, 0x3e, 0x41, 0x43, 0x35, 0x3f, 0x3d, 0x3d, 0x4c,
-    0x3c, 0x4a, 0x46, 0x3c, 0x3a, 0x41, 0x40, 0x4e, 0x36, 0x47, 0x40, 0x3b,
-    0x47, 0x42, 0x38, 0x4d, 0x48, 0x47, 0x3c, 0x3c, 0x33, 0x3b, 0x3e, 0x42,
-    0x3f, 0x3e, 0x3a, 0x3d, 0x32, 0x39, 0x41, 0x46, 0x3a, 0x3a, 0x3e, 0x3e,
-    0x47, 0x48, 0x4e, 0x36, 0x44, 0x40, 0x41, 0x45, 0x3a, 0x3c, 0x38, 0x55,
-    0x2e, 0x26, 0x2f, 0x32, 0x3f, 0x41, 0x3e, 0x4c, 0x45, 0x36, 0x40, 0x31,
-    0x17, 0x2e, 0x14, 0x53, 0x34, 0x30, 0x34, 0x3f, 0x2e, 0x44, 0x2b, 0x4e,
-    0x34, 0x3e, 0x34, 0x43, 0x3d, 0x35, 0x3f, 0x46, 0x39, 0x40, 0x38, 0x3e,
-    0x35, 0x3b, 0x35, 0x45, 0x3d, 0x40, 0x38, 0x37, 0x40, 0x3e, 0x32, 0x3e,
-    0x41, 0x39, 0x30, 0x41, 0x3a, 0x32, 0x3e, 0x3d, 0x39, 0x31, 0x33, 0x3e,
-    0x41, 0x47, 0x40, 0x47, 0x35, 0x33, 0x3c, 0x32, 0x40, 0x3c, 0x42, 0x49,
-    0x34, 0x38, 0x39, 0x37, 0x39, 0x35, 0x40, 0x4d, 0x37, 0x43, 0x42, 0x3e,
-    0x3f, 0x3c, 0x3e, 0x51, 0x36, 0x37, 0x42, 0x41, 0x36, 0x31, 0x43, 0x3d,
-    0x46, 0x43, 0x37, 0x46, 0x32, 0x45, 0x42, 0x36, 0x3f, 0x42, 0x42, 0x41,
-    0x3d, 0x46, 0x39, 0x41, 0x3c, 0x3f, 0x38, 0x3c, 0x43, 0x43, 0x3d, 0x3c,
-    0x3d, 0x41, 0x38, 0x42, 0x3a, 0x3d, 0x43, 0x42, 0x41, 0x40, 0x39, 0x36,
-    0x3a, 0x3c, 0x3c, 0x4f, 0x44, 0x36, 0x39, 0x35, 0x46, 0x46, 0x36, 0x4a,
-    0x3a, 0x42, 0x43, 0x39, 0x3f, 0x3d, 0x3c, 0x47, 0x38, 0x3f, 0x43, 0x40,
-    0x36, 0x3c, 0x45, 0x3b, 0x33, 0x36, 0x3b, 0x39, 0x3c, 0x35, 0x40, 0x38,
-    0x40, 0x3e, 0x3f, 0x48, 0x3f, 0x34, 0x40, 0x53, 0x26, 0x2c, 0x29, 0x39,
-    0x2a, 0x38, 0x3f, 0x45, 0x32, 0x31, 0x4a, 0x37, 0x1c, 0x28, 0x09, 0x43,
-    0x35, 0x3b, 0x33, 0x3c, 0x32, 0x3f, 0x28, 0x41, 0x36, 0x35, 0x3a, 0x37,
-    0x41, 0x39, 0x32, 0x3c, 0x40, 0x3c, 0x3c, 0x32, 0x38, 0x39, 0x37, 0x44,
-    0x3a, 0x33, 0x41, 0x36, 0x37, 0x3c, 0x35, 0x3a, 0x3d, 0x30, 0x3d, 0x41,
-    0x37, 0x3c, 0x45, 0x3a, 0x37, 0x2f, 0x36, 0x3c, 0x3a, 0x3d, 0x39, 0x48,
-    0x46, 0x33, 0x3a, 0x3e, 0x40, 0x3d, 0x3b, 0x52, 0x38, 0x45, 0x34, 0x47,
-    0x39, 0x36, 0x37, 0x56, 0x42, 0x3f, 0x33, 0x36, 0x38, 0x3f, 0x40, 0x53,
-    0x3e, 0x37, 0x3d, 0x3c, 0x48, 0x3a, 0x3d, 0x33, 0x39, 0x40, 0x3e, 0x35,
-    0x3d, 0x46, 0x38, 0x36, 0x37, 0x43, 0x3a, 0x3c, 0x40, 0x38, 0x39, 0x3b,
-    0x39, 0x3a, 0x42, 0x3d, 0x34, 0x3f, 0x35, 0x43, 0x3a, 0x35, 0x46, 0x3a,
-    0x48, 0x38, 0x3b, 0x48, 0x3c, 0x35, 0x42, 0x3d, 0x3a, 0x3d, 0x38, 0x42,
-    0x3e, 0x3c, 0x33, 0x39, 0x34, 0x30, 0x42, 0x44, 0x41, 0x3d, 0x3c, 0x39,
-    0x3c, 0x3a, 0x39, 0x41, 0x3d, 0x44, 0x3c, 0x40, 0x3f, 0x3e, 0x42, 0x3f,
-    0x37, 0x40, 0x39, 0x3b, 0x42, 0x43, 0x49, 0x37, 0x39, 0x46, 0x35, 0x3c,
-    0x3e, 0x39, 0x45, 0x52, 0x24, 0x2d, 0x38, 0x35, 0x3a, 0x3a, 0x3c, 0x44,
-    0x39, 0x32, 0x51, 0x3f, 0x16, 0x34, 0x0a, 0x49, 0x39, 0x38, 0x39, 0x3e,
-    0x2f, 0x36, 0x24, 0x3f, 0x37, 0x34, 0x38, 0x3b, 0x34, 0x34, 0x30, 0x3b,
-    0x3d, 0x36, 0x35, 0x42, 0x33, 0x40, 0x37, 0x35, 0x43, 0x3f, 0x3f, 0x39,
-    0x3a, 0x43, 0x36, 0x3e, 0x39, 0x3d, 0x3f, 0x3d, 0x47, 0x3b, 0x39, 0x37,
-    0x35, 0x42, 0x3f, 0x3b, 0x41, 0x3a, 0x42, 0x4b, 0x3d, 0x3f, 0x3d, 0x3e,
-    0x38, 0x3b, 0x34, 0x4e, 0x3f, 0x39, 0x36, 0x43, 0x39, 0x35, 0x41, 0x4d,
-    0x3c, 0x39, 0x43, 0x33, 0x37, 0x3b, 0x41, 0x48, 0x3c, 0x3f, 0x39, 0x32,
-    0x35, 0x3d, 0x42, 0x35, 0x3d, 0x3e, 0x37, 0x3b, 0x38, 0x3a, 0x44, 0x36,
-    0x42, 0x35, 0x48, 0x40, 0x3a, 0x44, 0x44, 0x39, 0x43, 0x41, 0x3c, 0x37,
-    0x47, 0x3b, 0x42, 0x42, 0x45, 0x3a, 0x40, 0x46, 0x35, 0x3f, 0x3a, 0x48,
-    0x35, 0x44, 0x3f, 0x37, 0x33, 0x3e, 0x45, 0x49, 0x39, 0x43, 0x47, 0x37,
-    0x3f, 0x3f, 0x3b, 0x44, 0x38, 0x3d, 0x39, 0x42, 0x37, 0x3e, 0x40, 0x45,
-    0x3b, 0x3f, 0x40, 0x34, 0x42, 0x3f, 0x43, 0x3c, 0x43, 0x41, 0x38, 0x38,
-    0x38, 0x41, 0x55, 0x33, 0x33, 0x39, 0x39, 0x3c, 0x35, 0x39, 0x38, 0x42,
-    0x27, 0x26, 0x32, 0x41, 0x41, 0x32, 0x3f, 0x47, 0x3a, 0x38, 0x48, 0x37,
-    0x11, 0x27, 0x08, 0x49, 0x35, 0x42, 0x3c, 0x2e, 0x34, 0x43, 0x25, 0x3b,
-    0x3a, 0x33, 0x37, 0x30, 0x3c, 0x36, 0x2d, 0x3c, 0x3b, 0x39, 0x3b, 0x40,
-    0x46, 0x3a, 0x30, 0x42, 0x35, 0x32, 0x36, 0x3a, 0x3a, 0x34, 0x34, 0x33,
-    0x3d, 0x30, 0x3b, 0x42, 0x41, 0x3f, 0x3d, 0x3b, 0x44, 0x3d, 0x41, 0x41,
-    0x3d, 0x3f, 0x40, 0x51, 0x42, 0x42, 0x36, 0x45, 0x30, 0x40, 0x32, 0x4f,
-    0x3a, 0x3c, 0x40, 0x39, 0x3d, 0x3b, 0x3e, 0x4b, 0x3d, 0x37, 0x42, 0x46,
-    0x40, 0x40, 0x47, 0x3d, 0x35, 0x3c, 0x3f, 0x46, 0x37, 0x37, 0x3a, 0x2e,
-    0x3d, 0x3c, 0x3a, 0x46, 0x3a, 0x44, 0x3c, 0x3a, 0x32, 0x44, 0x31, 0x41,
-    0x43, 0x36, 0x49, 0x39, 0x3d, 0x37, 0x3f, 0x41, 0x3b, 0x3b, 0x3c, 0x42,
-    0x3c, 0x34, 0x3f, 0x3b, 0x40, 0x3e, 0x48, 0x47, 0x3e, 0x3c, 0x38, 0x39,
-    0x3f, 0x35, 0x39, 0x3f, 0x3e, 0x3e, 0x3b, 0x43, 0x41, 0x40, 0x43, 0x41,
-    0x3f, 0x37, 0x39, 0x41, 0x46, 0x32, 0x3d, 0x41, 0x36, 0x3f, 0x3e, 0x3f,
-    0x36, 0x48, 0x43, 0x3d, 0x43, 0x3f, 0x34, 0x3d, 0x34, 0x35, 0x4f, 0x32,
-    0x3c, 0x3f, 0x3d, 0x3f, 0x39, 0x3c, 0x3d, 0x47, 0x23, 0x36, 0x33, 0x45,
-    0x37, 0x2e, 0x42, 0x42, 0x39, 0x34, 0x4f, 0x3f, 0x19, 0x2b, 0x01, 0x50,
-    0x35, 0x3f, 0x37, 0x3c, 0x33, 0x35, 0x25, 0x32, 0x38, 0x3e, 0x40, 0x40,
-    0x2f, 0x38, 0x35, 0x3d, 0x31, 0x42, 0x44, 0x3c, 0x3a, 0x3d, 0x2d, 0x3e,
-    0x3b, 0x3e, 0x3d, 0x31, 0x3b, 0x37, 0x35, 0x31, 0x36, 0x35, 0x34, 0x31,
-    0x41, 0x3a, 0x33, 0x32, 0x3c, 0x31, 0x3e, 0x3d, 0x40, 0x3b, 0x34, 0x45,
-    0x36, 0x39, 0x3e, 0x3f, 0x3c, 0x45, 0x37, 0x4b, 0x42, 0x3d, 0x33, 0x43,
-    0x3e, 0x40, 0x35, 0x4e, 0x38, 0x36, 0x3a, 0x33, 0x38, 0x44, 0x3f, 0x3c,
-    0x3f, 0x40, 0x3a, 0x3c, 0x3c, 0x3c, 0x44, 0x29, 0x3a, 0x40, 0x35, 0x3a,
-    0x3d, 0x48, 0x3b, 0x30, 0x45, 0x41, 0x45, 0x40, 0x37, 0x32, 0x3a, 0x35,
-    0x3f, 0x38, 0x3b, 0x43, 0x3b, 0x3f, 0x33, 0x40, 0x3b, 0x40, 0x38, 0x33,
-    0x39, 0x3c, 0x3c, 0x3f, 0x43, 0x33, 0x43, 0x40, 0x43, 0x3d, 0x33, 0x42,
-    0x40, 0x32, 0x3e, 0x36, 0x40, 0x38, 0x43, 0x40, 0x44, 0x38, 0x34, 0x3c,
-    0x3e, 0x39, 0x47, 0x43, 0x40, 0x3b, 0x3f, 0x3f, 0x3c, 0x3b, 0x4b, 0x33,
-    0x36, 0x49, 0x32, 0x41, 0x48, 0x45, 0x57, 0x3a, 0x40, 0x42, 0x40, 0x46,
-    0x36, 0x35, 0x3c, 0x46, 0x22, 0x2e, 0x33, 0x3e, 0x3c, 0x39, 0x44, 0x4d,
-    0x3f, 0x41, 0x51, 0x44, 0x15, 0x2e, 0x02, 0x4e, 0x39, 0x3a, 0x3c, 0x35,
-    0x30, 0x38, 0x1e, 0x31, 0x40, 0x3b, 0x39, 0x3d, 0x3a, 0x37, 0x35, 0x36,
-    0x46, 0x36, 0x3c, 0x3e, 0x39, 0x3e, 0x32, 0x40, 0x3b, 0x35, 0x42, 0x41,
-    0x41, 0x38, 0x41, 0x35, 0x42, 0x36, 0x3c, 0x42, 0x3d, 0x41, 0x35, 0x31,
-    0x3f, 0x44, 0x3e, 0x41, 0x3f, 0x35, 0x42, 0x4b, 0x3e, 0x36, 0x37, 0x34,
-    0x36, 0x3d, 0x40, 0x49, 0x41, 0x3e, 0x3d, 0x3b, 0x38, 0x37, 0x40, 0x47,
-    0x35, 0x32, 0x43, 0x38, 0x36, 0x3b, 0x33, 0x47, 0x33, 0x34, 0x3d, 0x47,
-    0x3c, 0x37, 0x3d, 0x2b, 0x3a, 0x36, 0x3b, 0x3d, 0x43, 0x38, 0x35, 0x32,
-    0x32, 0x37, 0x43, 0x36, 0x3f, 0x48, 0x38, 0x30, 0x3a, 0x3c, 0x42, 0x34,
-    0x37, 0x3c, 0x37, 0x40, 0x48, 0x3e, 0x35, 0x3b, 0x3f, 0x38, 0x39, 0x3e,
-    0x37, 0x35, 0x36, 0x3d, 0x3b, 0x3c, 0x40, 0x3d, 0x34, 0x40, 0x46, 0x42,
-    0x3f, 0x3c, 0x3c, 0x3e, 0x40, 0x40, 0x3d, 0x3f, 0x3f, 0x44, 0x46, 0x41,
-    0x32, 0x43, 0x40, 0x41, 0x3c, 0x42, 0x39, 0x38, 0x48, 0x44, 0x3d, 0x38,
-    0x34, 0x40, 0x4e, 0x31, 0x3c, 0x42, 0x39, 0x48, 0x3c, 0x33, 0x3e, 0x40,
-    0x20, 0x27, 0x39, 0x45, 0x45, 0x36, 0x47, 0x4c, 0x35, 0x3e, 0x4a, 0x36,
-    0x16, 0x2f, 0x04, 0x4f, 0x3a, 0x35, 0x36, 0x3a, 0x2d, 0x36, 0x21, 0x34,
-    0x3b, 0x32, 0x3d, 0x3c, 0x3c, 0x3f, 0x3b, 0x3b, 0x41, 0x46, 0x40, 0x3d,
-    0x3b, 0x44, 0x33, 0x42, 0x34, 0x33, 0x3e, 0x45, 0x3f, 0x46, 0x39, 0x33,
-    0x3b, 0x37, 0x37, 0x37, 0x42, 0x47, 0x3c, 0x35, 0x31, 0x41, 0x44, 0x3a,
-    0x3b, 0x33, 0x39, 0x44, 0x42, 0x33, 0x3d, 0x3f, 0x43, 0x33, 0x41, 0x4a,
-    0x35, 0x46, 0x36, 0x3e, 0x39, 0x41, 0x41, 0x4c, 0x34, 0x3d, 0x38, 0x33,
-    0x3c, 0x3f, 0x43, 0x44, 0x37, 0x35, 0x35, 0x3c, 0x43, 0x34, 0x3e, 0x2d,
-    0x3f, 0x35, 0x38, 0x3c, 0x33, 0x35, 0x43, 0x2a, 0x40, 0x33, 0x34, 0x40,
-    0x3d, 0x38, 0x36, 0x2d, 0x36, 0x3c, 0x43, 0x3d, 0x37, 0x3d, 0x39, 0x38,
-    0x3b, 0x3e, 0x3c, 0x46, 0x35, 0x35, 0x43, 0x44, 0x39, 0x40, 0x34, 0x39,
-    0x3d, 0x34, 0x40, 0x45, 0x38, 0x35, 0x3e, 0x39, 0x3c, 0x44, 0x48, 0x44,
-    0x41, 0x3e, 0x3c, 0x45, 0x3a, 0x3c, 0x3c, 0x46, 0x3a, 0x40, 0x39, 0x43,
-    0x35, 0x35, 0x3e, 0x45, 0x3a, 0x34, 0x3c, 0x39, 0x46, 0x3a, 0x4f, 0x35,
-    0x32, 0x3d, 0x36, 0x41, 0x32, 0x38, 0x3f, 0x45, 0x2d, 0x34, 0x2a, 0x35,
-    0x43, 0x3f, 0x41, 0x49, 0x41, 0x3c, 0x4b, 0x3f, 0x17, 0x31, 0x02, 0x4f,
-    0x30, 0x38, 0x39, 0x40, 0x33, 0x3a, 0x25, 0x38, 0x35, 0x3c, 0x39, 0x35,
-    0x34, 0x41, 0x34, 0x43, 0x40, 0x40, 0x46, 0x3d, 0x40, 0x38, 0x3f, 0x3b,
-    0x35, 0x39, 0x3c, 0x39, 0x34, 0x38, 0x3f, 0x36, 0x3a, 0x38, 0x44, 0x3f,
-    0x3f, 0x38, 0x3c, 0x33, 0x41, 0x42, 0x38, 0x33, 0x3c, 0x3b, 0x3c, 0x46,
-    0x38, 0x3b, 0x3f, 0x33, 0x3f, 0x48, 0x3b, 0x49, 0x3f, 0x3a, 0x3d, 0x3f,
-    0x47, 0x3d, 0x30, 0x45, 0x36, 0x42, 0x3d, 0x36, 0x43, 0x38, 0x3b, 0x3d,
-    0x3c, 0x30, 0x3b, 0x43, 0x3d, 0x41, 0x34, 0x2e, 0x43, 0x3d, 0x43, 0x46,
-    0x43, 0x3c, 0x3c, 0x2e, 0x3c, 0x43, 0x34, 0x43, 0x3e, 0x43, 0x3f, 0x2b,
-    0x45, 0x40, 0x3a, 0x43, 0x36, 0x39, 0x3f, 0x3d, 0x3a, 0x3c, 0x35, 0x3b,
-    0x36, 0x3f, 0x45, 0x3e, 0x45, 0x40, 0x3f, 0x36, 0x45, 0x42, 0x35, 0x3e,
-    0x3a, 0x3a, 0x3f, 0x40, 0x3e, 0x3c, 0x39, 0x46, 0x43, 0x3e, 0x3f, 0x3f,
-    0x40, 0x3c, 0x40, 0x4b, 0x41, 0x35, 0x3b, 0x3e, 0x49, 0x32, 0x3e, 0x41,
-    0x31, 0x37, 0x3d, 0x3b, 0x3f, 0x45, 0x50, 0x3a, 0x3f, 0x3c, 0x44, 0x36,
-    0x43, 0x37, 0x3d, 0x4b, 0x29, 0x39, 0x2f, 0x38, 0x45, 0x36, 0x40, 0x4e,
-    0x39, 0x3f, 0x48, 0x43, 0x23, 0x3c, 0x06, 0x51, 0x37, 0x3b, 0x3e, 0x3b,
-    0x28, 0x45, 0x2b, 0x37, 0x3f, 0x33, 0x3f, 0x41, 0x31, 0x36, 0x33, 0x3a,
-    0x3a, 0x35, 0x3b, 0x33, 0x3e, 0x36, 0x35, 0x40, 0x3a, 0x34, 0x3a, 0x38,
-    0x34, 0x3a, 0x3a, 0x34, 0x42, 0x45, 0x40, 0x3e, 0x40, 0x38, 0x39, 0x34,
-    0x38, 0x37, 0x3f, 0x3e, 0x3c, 0x32, 0x3f, 0x46, 0x3f, 0x44, 0x3b, 0x3e,
-    0x44, 0x45, 0x36, 0x3e, 0x36, 0x3f, 0x3b, 0x40, 0x39, 0x34, 0x38, 0x41,
-    0x42, 0x3e, 0x3d, 0x47, 0x3e, 0x45, 0x33, 0x40, 0x3e, 0x3a, 0x44, 0x3d,
-    0x3c, 0x3a, 0x3a, 0x2c, 0x3a, 0x3d, 0x35, 0x45, 0x3c, 0x41, 0x36, 0x30,
-    0x32, 0x32, 0x3a, 0x3b, 0x35, 0x3c, 0x43, 0x2d, 0x35, 0x3f, 0x41, 0x37,
-    0x3f, 0x46, 0x34, 0x39, 0x3c, 0x43, 0x40, 0x3e, 0x3e, 0x36, 0x3e, 0x3c,
-    0x37, 0x3a, 0x3d, 0x3a, 0x3c, 0x38, 0x44, 0x41, 0x3f, 0x3b, 0x3c, 0x47,
-    0x40, 0x3b, 0x41, 0x47, 0x3e, 0x45, 0x39, 0x3e, 0x37, 0x45, 0x4b, 0x4c,
-    0x37, 0x37, 0x37, 0x3c, 0x3c, 0x3d, 0x40, 0x38, 0x39, 0x3e, 0x43, 0x3f,
-    0x38, 0x45, 0x51, 0x3c, 0x31, 0x34, 0x3b, 0x48, 0x46, 0x41, 0x40, 0x40,
-    0x2c, 0x39, 0x32, 0x42, 0x3c, 0x2e, 0x49, 0x4d, 0x3c, 0x3f, 0x45, 0x38,
-    0x20, 0x38, 0x03, 0x55, 0x33, 0x3e, 0x32, 0x39, 0x32, 0x3b, 0x24, 0x2b,
-    0x42, 0x35, 0x45, 0x32, 0x2e, 0x3b, 0x2f, 0x3f, 0x3c, 0x37, 0x39, 0x3b,
-    0x34, 0x34, 0x3d, 0x36, 0x3d, 0x39, 0x3b, 0x30, 0x3c, 0x3e, 0x40, 0x32,
-    0x3d, 0x3c, 0x3c, 0x3e, 0x33, 0x33, 0x3f, 0x3a, 0x33, 0x3e, 0x46, 0x36,
-    0x3a, 0x3d, 0x40, 0x40, 0x3f, 0x41, 0x3a, 0x42, 0x34, 0x32, 0x34, 0x46,
-    0x3b, 0x31, 0x40, 0x37, 0x37, 0x32, 0x3e, 0x47, 0x3f, 0x3b, 0x3e, 0x43,
-    0x49, 0x45, 0x3a, 0x3d, 0x3e, 0x44, 0x40, 0x31, 0x39, 0x3e, 0x3b, 0x2d,
-    0x3b, 0x3a, 0x33, 0x3d, 0x39, 0x37, 0x3e, 0x32, 0x41, 0x3c, 0x3a, 0x37,
-    0x3b, 0x40, 0x39, 0x2f, 0x3e, 0x3f, 0x47, 0x32, 0x3e, 0x3b, 0x3e, 0x3e,
-    0x40, 0x3e, 0x40, 0x3c, 0x41, 0x39, 0x38, 0x46, 0x45, 0x32, 0x47, 0x31,
-    0x36, 0x47, 0x37, 0x49, 0x3a, 0x3f, 0x47, 0x3a, 0x41, 0x3b, 0x3c, 0x4f,
-    0x3e, 0x36, 0x3b, 0x47, 0x35, 0x39, 0x41, 0x4e, 0x3d, 0x3e, 0x3b, 0x46,
-    0x38, 0x39, 0x3b, 0x45, 0x3e, 0x3f, 0x44, 0x42, 0x44, 0x3f, 0x55, 0x3b,
-    0x41, 0x3d, 0x43, 0x43, 0x37, 0x3f, 0x3d, 0x4c, 0x28, 0x3d, 0x36, 0x3c,
-    0x3e, 0x3e, 0x48, 0x50, 0x3e, 0x39, 0x45, 0x41, 0x22, 0x37, 0x07, 0x4f,
-    0x2e, 0x33, 0x38, 0x3f, 0x31, 0x3a, 0x1b, 0x36, 0x34, 0x38, 0x3c, 0x37,
-    0x37, 0x3e, 0x36, 0x35, 0x36, 0x3b, 0x3d, 0x38, 0x42, 0x48, 0x3d, 0x40,
-    0x40, 0x44, 0x3d, 0x39, 0x37, 0x3b, 0x3d, 0x33, 0x3d, 0x35, 0x42, 0x3c,
-    0x39, 0x3e, 0x43, 0x2d, 0x3c, 0x40, 0x43, 0x43, 0x45, 0x35, 0x3c, 0x44,
-    0x34, 0x3c, 0x3d, 0x31, 0x39, 0x40, 0x39, 0x3d, 0x3e, 0x34, 0x3e, 0x3b,
-    0x40, 0x38, 0x42, 0x4a, 0x40, 0x3b, 0x35, 0x3d, 0x36, 0x38, 0x35, 0x42,
-    0x3c, 0x3c, 0x3d, 0x3b, 0x38, 0x39, 0x45, 0x28, 0x3a, 0x37, 0x37, 0x35,
-    0x3a, 0x3d, 0x35, 0x2a, 0x3c, 0x3f, 0x37, 0x34, 0x37, 0x3f, 0x3e, 0x2b,
-    0x39, 0x43, 0x3b, 0x45, 0x35, 0x36, 0x36, 0x42, 0x33, 0x38, 0x3b, 0x35,
-    0x31, 0x3f, 0x41, 0x41, 0x3c, 0x41, 0x45, 0x42, 0x3b, 0x3c, 0x39, 0x46,
-    0x3c, 0x3e, 0x3a, 0x41, 0x39, 0x3d, 0x41, 0x4b, 0x40, 0x3f, 0x43, 0x3d,
-    0x39, 0x39, 0x44, 0x44, 0x37, 0x42, 0x3f, 0x44, 0x3e, 0x37, 0x42, 0x35,
-    0x44, 0x3f, 0x40, 0x42, 0x3f, 0x3a, 0x47, 0x3d, 0x38, 0x3a, 0x3b, 0x3a,
-    0x42, 0x36, 0x3a, 0x97, 0x32, 0x31, 0x30, 0x36, 0x47, 0x3e, 0x46, 0x51,
-    0x42, 0x34, 0x50, 0x34, 0x26, 0x3b, 0x06, 0x55, 0x3c, 0x3b, 0x2d, 0x3a,
-    0x37, 0x37, 0x1b, 0x32, 0x39, 0x3d, 0x36, 0x40, 0x3b, 0x3f, 0x33, 0x33,
-    0x3d, 0x37, 0x35, 0x37, 0x44, 0x3f, 0x35, 0x39, 0x33, 0x3c, 0x43, 0x39,
-    0x3f, 0x42, 0x3e, 0x34, 0x38, 0x38, 0x39, 0x3c, 0x48, 0x3c, 0x2f, 0x30,
-    0x40, 0x3c, 0x41, 0x3e, 0x3f, 0x3e, 0x36, 0x43, 0x40, 0x3c, 0x36, 0x43,
-    0x43, 0x38, 0x3a, 0x47, 0x3e, 0x37, 0x39, 0x3a, 0x43, 0x45, 0x38, 0x43,
-    0x3b, 0x45, 0x37, 0x44, 0x36, 0x45, 0x3a, 0x3e, 0x3e, 0x3e, 0x3d, 0x33,
-    0x39, 0x36, 0x48, 0x33, 0x30, 0x42, 0x33, 0x39, 0x37, 0x3a, 0x3f, 0x34,
-    0x34, 0x40, 0x40, 0x40, 0x3f, 0x3d, 0x3f, 0x33, 0x41, 0x40, 0x3b, 0x43,
-    0x3b, 0x3a, 0x40, 0x3a, 0x38, 0x3e, 0x38, 0x3b, 0x38, 0x42, 0x40, 0x40,
-    0x41, 0x35, 0x37, 0x38, 0x3b, 0x3c, 0x39, 0x4b, 0x32, 0x39, 0x42, 0x3c,
-    0x36, 0x3d, 0x32, 0x52, 0x3a, 0x31, 0x40, 0x40, 0x3a, 0x43, 0x3d, 0x46,
-    0x3c, 0x3e, 0x3e, 0x33, 0x3f, 0x41, 0x4d, 0x37, 0x39, 0x39, 0x3e, 0x3b,
-    0x40, 0x39, 0x53, 0x2d, 0x46, 0x3c, 0x32, 0x42, 0x3d, 0x40, 0x40, 0x4d,
-    0x2e, 0x34, 0x39, 0x3b, 0x46, 0x3b, 0x42, 0x4f, 0x3d, 0x39, 0x4e, 0x36,
-    0x1a, 0x31, 0x0e, 0x56, 0x36, 0x42, 0x38, 0x44, 0x36, 0x3a, 0x20, 0x30,
-    0x36, 0x34, 0x37, 0x38, 0x40, 0x41, 0x2a, 0x35, 0x3b, 0x3b, 0x3a, 0x38,
-    0x33, 0x39, 0x36, 0x41, 0x43, 0x39, 0x35, 0x3d, 0x37, 0x3d, 0x33, 0x31,
-    0x45, 0x33, 0x3f, 0x3b, 0x44, 0x38, 0x39, 0x34, 0x38, 0x39, 0x38, 0x3d,
-    0x3a, 0x3a, 0x41, 0x40, 0x44, 0x3e, 0x3f, 0x45, 0x34, 0x31, 0x34, 0x43,
-    0x3b, 0x34, 0x42, 0x3c, 0x3c, 0x43, 0x35, 0x45, 0x36, 0x38, 0x3d, 0x3c,
-    0x3f, 0x3d, 0x3e, 0x45, 0x41, 0x43, 0x35, 0x3f, 0x40, 0x3f, 0x3a, 0x34,
-    0x3d, 0x32, 0x41, 0x3d, 0x48, 0x42, 0x37, 0x2a, 0x3c, 0x3a, 0x3e, 0x49,
-    0x38, 0x36, 0x38, 0x2e, 0x36, 0x37, 0x34, 0x3e, 0x3c, 0x43, 0x43, 0x39,
-    0x39, 0x3b, 0x44, 0x46, 0x44, 0x43, 0x37, 0x46, 0x43, 0x34, 0x3b, 0x35,
-    0x42, 0x41, 0x3f, 0x3d, 0x3d, 0x3a, 0x42, 0x3e, 0x38, 0x47, 0x3d, 0x49,
-    0x45, 0x49, 0x3a, 0x3c, 0x3e, 0x37, 0x40, 0x46, 0x41, 0x33, 0x45, 0x36,
-    0x37, 0x44, 0x49, 0x3b, 0x44, 0x40, 0x33, 0x46, 0x37, 0x39, 0x4e, 0x3a,
-    0x43, 0x38, 0x3a, 0x42, 0x3a, 0x3d, 0x45, 0x50, 0x26, 0x34, 0x3b, 0x3c,
-    0x46, 0x46, 0x4c, 0x54, 0x3f, 0x35, 0x4e, 0x47, 0x21, 0x39, 0x0e, 0x54,
-    0x3a, 0x3a, 0x2f, 0x40, 0x2d, 0x3a, 0x1f, 0x31, 0x31, 0x42, 0x34, 0x45,
-    0x37, 0x36, 0x30, 0x3b, 0x3a, 0x3a, 0x36, 0x40, 0x32, 0x36, 0x3c, 0x3c,
-    0x37, 0x42, 0x35, 0x3e, 0x39, 0x47, 0x36, 0x32, 0x41, 0x30, 0x42, 0x39,
-    0x39, 0x44, 0x37, 0x30, 0x41, 0x3b, 0x3d, 0x3d, 0x43, 0x3b, 0x38, 0x45,
-    0x3b, 0x3a, 0x39, 0x3a, 0x31, 0x33, 0x43, 0x46, 0x3f, 0x41, 0x44, 0x3f,
-    0x3b, 0x44, 0x3a, 0x4c, 0x33, 0x33, 0x33, 0x3e, 0x37, 0x3e, 0x45, 0x45,
-    0x36, 0x42, 0x3e, 0x43, 0x40, 0x34, 0x36, 0x31, 0x38, 0x34, 0x41, 0x3b,
-    0x32, 0x38, 0x3e, 0x29, 0x47, 0x33, 0x37, 0x45, 0x3c, 0x3d, 0x43, 0x2c,
-    0x36, 0x3a, 0x3c, 0x40, 0x3d, 0x46, 0x3c, 0x37, 0x40, 0x44, 0x37, 0x38,
-    0x3e, 0x41, 0x3c, 0x40, 0x33, 0x3f, 0x44, 0x32, 0x44, 0x3a, 0x43, 0x42,
-    0x3e, 0x38, 0x44, 0x3b, 0x41, 0x48, 0x3f, 0x4e, 0x3f, 0x44, 0x35, 0x45,
-    0x34, 0x3f, 0x42, 0x4b, 0x37, 0x37, 0x3e, 0x45, 0x46, 0x45, 0x46, 0x3d,
-    0x3e, 0x39, 0x3b, 0x3a, 0x46, 0x3a, 0x56, 0x35, 0x46, 0x3d, 0x40, 0x3b,
-    0x36, 0x39, 0x3f, 0x54, 0x27, 0x2b, 0x34, 0x3c, 0x48, 0x3d, 0x49, 0x4c,
-    0x3e, 0x3d, 0x4e, 0x42, 0x25, 0x3b, 0x10, 0x4d, 0x30, 0x36, 0x3e, 0x36,
-    0x2e, 0x31, 0x1d, 0x37, 0x3a, 0x39, 0x33, 0x3f, 0x39, 0x38, 0x2e, 0x36,
-    0x44, 0x3e, 0x41, 0x37, 0x3b, 0x30, 0x3b, 0x48, 0x31, 0x39, 0x41, 0x3e,
-    0x37, 0x37, 0x34, 0x2f, 0x35, 0x3b, 0x3a, 0x3e, 0x45, 0x3e, 0x3f, 0x35,
-    0x39, 0x39, 0x3b, 0x44, 0x43, 0x3c, 0x3e, 0x46, 0x40, 0x3a, 0x36, 0x45,
-    0x41, 0x40, 0x36, 0x44, 0x3a, 0x37, 0x47, 0x47, 0x3d, 0x36, 0x43, 0x4e,
-    0x3b, 0x38, 0x40, 0x48, 0x44, 0x43, 0x45, 0x3f, 0x43, 0x3c, 0x3b, 0x37,
-    0x43, 0x41, 0x39, 0x2f, 0x3d, 0x45, 0x3e, 0x3e, 0x42, 0x40, 0x41, 0x2f,
-    0x47, 0x38, 0x3a, 0x48, 0x3e, 0x35, 0x37, 0x2a, 0x34, 0x38, 0x41, 0x3b,
-    0x3d, 0x37, 0x3b, 0x35, 0x38, 0x3e, 0x41, 0x3c, 0x41, 0x43, 0x3d, 0x46,
-    0x47, 0x47, 0x3d, 0x35, 0x48, 0x41, 0x3d, 0x3e, 0x34, 0x47, 0x38, 0x38,
-    0x39, 0x3e, 0x38, 0x4d, 0x43, 0x36, 0x42, 0x40, 0x3e, 0x41, 0x3f, 0x4c,
-    0x3e, 0x3e, 0x37, 0x44, 0x3e, 0x3b, 0x47, 0x3e, 0x3f, 0x3b, 0x39, 0x3c,
-    0x3c, 0x3c, 0x53, 0x3b, 0x3b, 0x32, 0x3e, 0x3f, 0x32, 0x3c, 0x37, 0x4b,
-    0x33, 0x30, 0x2f, 0x41, 0x47, 0x42, 0x49, 0x4f, 0x3b, 0x42, 0x4c, 0x44,
-    0x1f, 0x37, 0x16, 0x4e, 0x3b, 0x3f, 0x30, 0x36, 0x35, 0x38, 0x26, 0x36,
-    0x32, 0x3b, 0x38, 0x3c, 0x30, 0x3e, 0x34, 0x3e, 0x3d, 0x34, 0x39, 0x3c,
-    0x36, 0x47, 0x34, 0x41, 0x31, 0x39, 0x44, 0x3e, 0x39, 0x41, 0x32, 0x36,
-    0x3b, 0x3f, 0x32, 0x3d, 0x36, 0x3e, 0x40, 0x3d, 0x45, 0x32, 0x45, 0x42,
-    0x38, 0x43, 0x40, 0x42, 0x34, 0x3a, 0x43, 0x38, 0x47, 0x3f, 0x41, 0x47,
-    0x34, 0x44, 0x41, 0x39, 0x3c, 0x46, 0x36, 0x4f, 0x41, 0x3e, 0x38, 0x38,
-    0x3a, 0x3b, 0x43, 0x44, 0x37, 0x3f, 0x35, 0x43, 0x34, 0x3d, 0x40, 0x32,
-    0x3a, 0x3b, 0x3d, 0x34, 0x35, 0x43, 0x31, 0x2c, 0x3b, 0x36, 0x38, 0x41,
-    0x3c, 0x38, 0x3d, 0x31, 0x45, 0x46, 0x42, 0x41, 0x33, 0x3f, 0x3f, 0x3a,
-    0x36, 0x3f, 0x3c, 0x3c, 0x3c, 0x3e, 0x39, 0x3e, 0x40, 0x37, 0x47, 0x3e,
-    0x35, 0x39, 0x3d, 0x3d, 0x37, 0x36, 0x3e, 0x45, 0x38, 0x3d, 0x45, 0x43,
-    0x3a, 0x32, 0x3b, 0x3a, 0x32, 0x3c, 0x3d, 0x43, 0x3d, 0x33, 0x3b, 0x3d,
-    0x46, 0x3a, 0x44, 0x45, 0x3b, 0x3e, 0x3c, 0x42, 0x37, 0x37, 0x52, 0x2a,
-    0x3a, 0x35, 0x35, 0x3f, 0x40, 0x38, 0x40, 0x5b, 0x35, 0x32, 0x2b, 0x3d,
-    0x4a, 0x3c, 0x46, 0x56, 0x44, 0x30, 0x4d, 0x39, 0x20, 0x32, 0x0f, 0x4f,
-    0x33, 0x3c, 0x35, 0x35, 0x3a, 0x45, 0x29, 0x3b, 0x31, 0x38, 0x34, 0x38,
-    0x42, 0x45, 0x37, 0x3e, 0x37, 0x2e, 0x36, 0x43, 0x3f, 0x38, 0x2f, 0x41,
-    0x3f, 0x41, 0x3c, 0x31, 0x37, 0x36, 0x37, 0x39, 0x41, 0x3a, 0x3a, 0x40,
-    0x3e, 0x47, 0x3d, 0x37, 0x3c, 0x38, 0x35, 0x39, 0x3a, 0x43, 0x3f, 0x42,
-    0x42, 0x38, 0x3e, 0x40, 0x3c, 0x3a, 0x45, 0x48, 0x37, 0x3a, 0x3e, 0x35,
-    0x3a, 0x3d, 0x45, 0x4a, 0x3d, 0x37, 0x38, 0x3a, 0x3d, 0x46, 0x46, 0x41,
-    0x37, 0x41, 0x40, 0x48, 0x37, 0x34, 0x3b, 0x2c, 0x39, 0x34, 0x37, 0x35,
-    0x3a, 0x43, 0x39, 0x2e, 0x39, 0x3f, 0x40, 0x3e, 0x40, 0x40, 0x3c, 0x2d,
-    0x3e, 0x3c, 0x37, 0x39, 0x3c, 0x3b, 0x3d, 0x3f, 0x41, 0x48, 0x3b, 0x3d,
-    0x3b, 0x41, 0x45, 0x3e, 0x3a, 0x38, 0x3f, 0x3c, 0x3d, 0x3e, 0x40, 0x42,
-    0x46, 0x38, 0x43, 0x34, 0x35, 0x47, 0x3d, 0x46, 0x3f, 0x3e, 0x32, 0x3f,
-    0x3e, 0x3d, 0x47, 0x46, 0x38, 0x41, 0x45, 0x3f, 0x34, 0x3f, 0x41, 0x43,
-    0x3e, 0x3e, 0x44, 0x3b, 0x3b, 0x36, 0x51, 0x32, 0x37, 0x3c, 0x42, 0x43,
-    0x33, 0x39, 0x42, 0x61, 0x2c, 0x3b, 0x2e, 0x39, 0x42, 0x39, 0x42, 0x54,
-    0x3c, 0x3a, 0x48, 0x35, 0x26, 0x34, 0x15, 0x51, 0x35, 0x40, 0x36, 0x3c,
-    0x2d, 0x37, 0x25, 0x38, 0x33, 0x3d, 0x3d, 0x39, 0x3e, 0x3b, 0x2e, 0x4b,
-    0x3d, 0x3b, 0x42, 0x37, 0x37, 0x40, 0x37, 0x40, 0x35, 0x45, 0x37, 0x37,
-    0x3f, 0x41, 0x36, 0x39, 0x3c, 0x32, 0x3e, 0x38, 0x41, 0x40, 0x3e, 0x3f,
-    0x3b, 0x3c, 0x43, 0x35, 0x3e, 0x3d, 0x44, 0x44, 0x3a, 0x36, 0x39, 0x3f,
-    0x3a, 0x31, 0x42, 0x4d, 0x40, 0x33, 0x40, 0x45, 0x44, 0x3d, 0x40, 0x49,
-    0x41, 0x3f, 0x42, 0x3a, 0x34, 0x46, 0x38, 0x46, 0x42, 0x34, 0x3a, 0x40,
-    0x40, 0x41, 0x3d, 0x32, 0x35, 0x48, 0x35, 0x3e, 0x44, 0x41, 0x40, 0x2c,
-    0x46, 0x38, 0x38, 0x3f, 0x36, 0x40, 0x38, 0x2a, 0x43, 0x41, 0x3e, 0x35,
-    0x46, 0x3a, 0x45, 0x46, 0x46, 0x42, 0x3a, 0x3b, 0x40, 0x38, 0x35, 0x43,
-    0x38, 0x3d, 0x3b, 0x41, 0x36, 0x44, 0x3f, 0x3f, 0x34, 0x3e, 0x3c, 0x3d,
-    0x49, 0x36, 0x37, 0x4b, 0x38, 0x3c, 0x43, 0x37, 0x3a, 0x3f, 0x31, 0x45,
-    0x3b, 0x39, 0x3f, 0x40, 0x37, 0x3c, 0x42, 0x3f, 0x3c, 0x33, 0x40, 0x3b,
-    0x32, 0x3c, 0x52, 0x31, 0x3d, 0x44, 0x3b, 0x31, 0x46, 0x38, 0x40, 0x60,
-    0x2b, 0x3c, 0x37, 0x34, 0x43, 0x38, 0x45, 0x57, 0x37, 0x39, 0x49, 0x33,
-    0x2d, 0x3f, 0x18, 0x4e, 0x39, 0x39, 0x32, 0x3b, 0x34, 0x3b, 0x2c, 0x45,
-    0x33, 0x37, 0x45, 0x42, 0x3d, 0x37, 0x2a, 0x4c, 0x3d, 0x3f, 0x3c, 0x36,
-    0x37, 0x3c, 0x39, 0x47, 0x3d, 0x44, 0x3d, 0x40, 0x3d, 0x41, 0x34, 0x3e,
-    0x40, 0x34, 0x3b, 0x3a, 0x41, 0x36, 0x37, 0x40, 0x3e, 0x3f, 0x3a, 0x36,
-    0x3e, 0x35, 0x3b, 0x48, 0x41, 0x40, 0x3c, 0x42, 0x34, 0x41, 0x3f, 0x44,
-    0x34, 0x39, 0x33, 0x39, 0x39, 0x47, 0x40, 0x48, 0x38, 0x3a, 0x43, 0x43,
-    0x48, 0x3a, 0x3f, 0x46, 0x35, 0x3a, 0x33, 0x36, 0x32, 0x3c, 0x40, 0x34,
-    0x40, 0x3a, 0x42, 0x3a, 0x39, 0x38, 0x41, 0x35, 0x3a, 0x3f, 0x35, 0x40,
-    0x3f, 0x39, 0x39, 0x36, 0x38, 0x40, 0x3e, 0x3e, 0x3a, 0x31, 0x32, 0x44,
-    0x40, 0x47, 0x3a, 0x3c, 0x43, 0x43, 0x46, 0x48, 0x40, 0x35, 0x3d, 0x37,
-    0x44, 0x37, 0x33, 0x44, 0x3b, 0x3e, 0x3f, 0x37, 0x36, 0x3a, 0x38, 0x47,
-    0x3a, 0x44, 0x36, 0x42, 0x3e, 0x44, 0x34, 0x46, 0x33, 0x43, 0x44, 0x3e,
-    0x30, 0x48, 0x37, 0x38, 0x33, 0x3c, 0x46, 0x42, 0x38, 0x3d, 0x50, 0x39,
-    0x33, 0x38, 0x3e, 0x40, 0x3b, 0x2b, 0x3b, 0x5f, 0x2b, 0x32, 0x2f, 0x37,
-    0x3f, 0x3a, 0x40, 0x4e, 0x34, 0x38, 0x47, 0x37, 0x27, 0x2b, 0x1b, 0x4f,
-    0x36, 0x38, 0x3a, 0x3a, 0x3b, 0x38, 0x2e, 0x3f, 0x3f, 0x42, 0x42, 0x42,
-    0x36, 0x3e, 0x3c, 0x55, 0x39, 0x40, 0x44, 0x43, 0x3e, 0x33, 0x3c, 0x43,
-    0x38, 0x44, 0x3b, 0x46, 0x3f, 0x45, 0x34, 0x38, 0x3c, 0x41, 0x42, 0x3d,
-    0x42, 0x36, 0x43, 0x3f, 0x3c, 0x39, 0x3e, 0x39, 0x39, 0x42, 0x33, 0x47,
-    0x36, 0x3d, 0x3f, 0x3b, 0x40, 0x39, 0x3b, 0x49, 0x36, 0x40, 0x3d, 0x41,
-    0x40, 0x34, 0x3b, 0x4e, 0x3b, 0x36, 0x3b, 0x45, 0x40, 0x32, 0x3b, 0x49,
-    0x37, 0x38, 0x3a, 0x47, 0x37, 0x40, 0x3e, 0x38, 0x40, 0x3f, 0x3c, 0x3a,
-    0x47, 0x41, 0x42, 0x30, 0x40, 0x3c, 0x42, 0x3f, 0x31, 0x44, 0x39, 0x38,
-    0x3b, 0x38, 0x42, 0x43, 0x41, 0x35, 0x3a, 0x39, 0x3e, 0x38, 0x39, 0x3e,
-    0x3c, 0x42, 0x3d, 0x49, 0x47, 0x3c, 0x3f, 0x35, 0x41, 0x3a, 0x36, 0x43,
-    0x43, 0x3b, 0x39, 0x3b, 0x36, 0x43, 0x43, 0x4e, 0x3e, 0x35, 0x37, 0x3b,
-    0x3f, 0x37, 0x41, 0x48, 0x32, 0x44, 0x43, 0x32, 0x38, 0x39, 0x45, 0x39,
-    0x3e, 0x3d, 0x35, 0x39, 0x35, 0x39, 0x50, 0x37, 0x39, 0x40, 0x43, 0x47,
-    0x32, 0x2a, 0x40, 0x62, 0x24, 0x30, 0x36, 0x3e, 0x41, 0x32, 0x47, 0x58,
-    0x39, 0x36, 0x44, 0x34, 0x26, 0x34, 0x1e, 0x50, 0x3c, 0x3b, 0x3f, 0x42,
-    0x35, 0x3d, 0x2a, 0x4e, 0x40, 0x38, 0x36, 0x31, 0x3a, 0x30, 0x37, 0x4b,
-    0x3c, 0x3b, 0x3b, 0x41, 0x3b, 0x3c, 0x2e, 0x45, 0x44, 0x3f, 0x3b, 0x35,
-    0x3e, 0x33, 0x37, 0x3d, 0x40, 0x39, 0x39, 0x37, 0x40, 0x3e, 0x3a, 0x3e,
-    0x3c, 0x3c, 0x45, 0x40, 0x3c, 0x3f, 0x3a, 0x51, 0x47, 0x3a, 0x34, 0x39,
-    0x3b, 0x34, 0x44, 0x4c, 0x36, 0x3d, 0x3a, 0x35, 0x34, 0x36, 0x38, 0x4b,
-    0x3f, 0x40, 0x3f, 0x3e, 0x40, 0x41, 0x47, 0x43, 0x32, 0x38, 0x46, 0x44,
-    0x46, 0x43, 0x43, 0x37, 0x39, 0x49, 0x37, 0x36, 0x3e, 0x3d, 0x37, 0x3c,
-    0x39, 0x37, 0x34, 0x43, 0x45, 0x32, 0x3a, 0x3a, 0x38, 0x43, 0x3b, 0x40,
-    0x3b, 0x3f, 0x3d, 0x41, 0x40, 0x3d, 0x3a, 0x3b, 0x48, 0x37, 0x3d, 0x41,
-    0x40, 0x3e, 0x38, 0x41, 0x3d, 0x3a, 0x38, 0x49, 0x40, 0x3c, 0x42, 0x41,
-    0x3a, 0x38, 0x38, 0x4c, 0x3e, 0x41, 0x40, 0x3b, 0x3d, 0x3e, 0x3c, 0x46,
-    0x3e, 0x42, 0x41, 0x38, 0x42, 0x42, 0x41, 0x3e, 0x3e, 0x37, 0x3c, 0x43,
-    0x43, 0x3b, 0x54, 0x2b, 0x45, 0x3b, 0x43, 0x41, 0x41, 0x26, 0x3f, 0x60,
-    0x25, 0x2b, 0x2e, 0x3a, 0x40, 0x31, 0x40, 0x49, 0x40, 0x31, 0x46, 0x3c,
-    0x1e, 0x2a, 0x1a, 0x47, 0x33, 0x37, 0x37, 0x34, 0x31, 0x36, 0x25, 0x41,
-    0x2e, 0x36, 0x35, 0x33, 0x33, 0x34, 0x31, 0x45, 0x3a, 0x3f, 0x3d, 0x40,
-    0x3c, 0x41, 0x30, 0x3c, 0x3f, 0x46, 0x37, 0x3c, 0x3a, 0x3c, 0x36, 0x3a,
-    0x47, 0x3d, 0x31, 0x3f, 0x40, 0x3e, 0x36, 0x44, 0x41, 0x3d, 0x36, 0x3f,
-    0x37, 0x3f, 0x34, 0x4b, 0x31, 0x47, 0x43, 0x3e, 0x3e, 0x3a, 0x3b, 0x4b,
-    0x37, 0x32, 0x38, 0x3d, 0x37, 0x47, 0x46, 0x4d, 0x36, 0x3c, 0x3f, 0x3a,
-    0x41, 0x31, 0x47, 0x43, 0x3d, 0x3d, 0x3e, 0x35, 0x3d, 0x46, 0x49, 0x2a,
-    0x37, 0x3c, 0x39, 0x3d, 0x47, 0x3c, 0x34, 0x2c, 0x3e, 0x38, 0x47, 0x32,
-    0x36, 0x36, 0x41, 0x38, 0x35, 0x44, 0x48, 0x3b, 0x39, 0x3e, 0x38, 0x3e,
-    0x40, 0x36, 0x37, 0x46, 0x39, 0x3b, 0x34, 0x45, 0x40, 0x3b, 0x48, 0x36,
-    0x34, 0x44, 0x37, 0x46, 0x3f, 0x42, 0x33, 0x36, 0x43, 0x3c, 0x41, 0x46,
-    0x31, 0x42, 0x43, 0x44, 0x44, 0x3e, 0x42, 0x3b, 0x3b, 0x3a, 0x3c, 0x37,
-    0x42, 0x41, 0x46, 0x38, 0x41, 0x3b, 0x40, 0x44, 0x37, 0x3c, 0x4c, 0x2e,
-    0x3a, 0x3e, 0x3b, 0x36, 0x33, 0x27, 0x37, 0x5d, 0x27, 0x34, 0x32, 0x41,
-    0x41, 0x3f, 0x40, 0x5d, 0x40, 0x3d, 0x48, 0x39, 0x2e, 0x30, 0x1f, 0x3f,
-    0x38, 0x3f, 0x40, 0x33, 0x40, 0x38, 0x31, 0x3f, 0x42, 0x3e, 0x3b, 0x3a,
-    0x42, 0x36, 0x3a, 0x42, 0x3c, 0x3b, 0x3d, 0x41, 0x3d, 0x40, 0x40, 0x3e,
-    0x36, 0x41, 0x47, 0x3d, 0x33, 0x32, 0x33, 0x44, 0x3e, 0x3a, 0x3e, 0x3d,
-    0x45, 0x3f, 0x38, 0x3f, 0x40, 0x3a, 0x3c, 0x46, 0x32, 0x42, 0x3c, 0x51,
-    0x33, 0x38, 0x3a, 0x38, 0x41, 0x34, 0x45, 0x4e, 0x35, 0x3c, 0x42, 0x3e,
-    0x3f, 0x45, 0x44, 0x4e, 0x39, 0x47, 0x3a, 0x33, 0x3e, 0x3b, 0x45, 0x42,
-    0x37, 0x3a, 0x3e, 0x33, 0x41, 0x48, 0x32, 0x2a, 0x3b, 0x37, 0x3f, 0x3d,
-    0x3a, 0x42, 0x41, 0x2f, 0x34, 0x3e, 0x49, 0x3b, 0x38, 0x3e, 0x3d, 0x3a,
-    0x37, 0x3c, 0x44, 0x41, 0x39, 0x42, 0x3f, 0x39, 0x40, 0x35, 0x3d, 0x41,
-    0x3b, 0x45, 0x44, 0x48, 0x3d, 0x42, 0x36, 0x33, 0x3e, 0x44, 0x3f, 0x41,
-    0x42, 0x40, 0x49, 0x34, 0x48, 0x41, 0x3f, 0x40, 0x3c, 0x45, 0x47, 0x34,
-    0x41, 0x37, 0x47, 0x3e, 0x41, 0x41, 0x39, 0x42, 0x3f, 0x3a, 0x46, 0x33,
-    0x39, 0x41, 0x38, 0x38, 0x3e, 0x42, 0x41, 0x38, 0x35, 0x32, 0x33, 0x38,
-    0x3a, 0x3f, 0x45, 0x66, 0x33, 0x47, 0x38, 0x3c, 0x41, 0x2f, 0x48, 0x55,
-    0x33, 0x3e, 0x49, 0x3b, 0x3c, 0x30, 0x24, 0x45, 0x3c, 0x44, 0x43, 0x32,
-    0x3d, 0x3f, 0x35, 0x3b, 0x3e, 0x36, 0x38, 0x3a, 0x36, 0x37, 0x3b, 0x41,
-    0x38, 0x42, 0x3e, 0x43, 0x39, 0x3f, 0x3c, 0x40, 0x37, 0x43, 0x3e, 0x3b,
-    0x3d, 0x35, 0x35, 0x3d, 0x43, 0x3f, 0x3a, 0x35, 0x37, 0x3c, 0x31, 0x47,
-    0x44, 0x45, 0x40, 0x32, 0x44, 0x36, 0x38, 0x51, 0x3c, 0x41, 0x45, 0x37,
-    0x39, 0x44, 0x3e, 0x4f, 0x3c, 0x3a, 0x38, 0x40, 0x3f, 0x34, 0x39, 0x4e,
-    0x3d, 0x39, 0x45, 0x3f, 0x3e, 0x3c, 0x3b, 0x42, 0x3b, 0x3b, 0x34, 0x3d,
-    0x41, 0x44, 0x39, 0x2e, 0x37, 0x44, 0x45, 0x37, 0x3d, 0x41, 0x3f, 0x33,
-    0x3f, 0x3e, 0x3e, 0x40, 0x44, 0x3f, 0x37, 0x32, 0x35, 0x3e, 0x43, 0x41,
-    0x39, 0x37, 0x35, 0x3f, 0x48, 0x3d, 0x43, 0x49, 0x38, 0x35, 0x3f, 0x48,
-    0x3b, 0x3a, 0x34, 0x3f, 0x3c, 0x44, 0x3a, 0x40, 0x36, 0x35, 0x44, 0x36,
-    0x44, 0x3b, 0x3d, 0x38, 0x3c, 0x44, 0x47, 0x3a, 0x3b, 0x45, 0x41, 0x3a,
-    0x39, 0x35, 0x44, 0x3a, 0x49, 0x36, 0x48, 0x31, 0x42, 0x43, 0x42, 0x34,
-    0x41, 0x40, 0x4d, 0x36, 0x3e, 0x35, 0x39, 0x3b, 0x3f, 0x41, 0x38, 0x39,
-    0x3c, 0x44, 0x3f, 0x39, 0x3a, 0x36, 0x3d, 0x36, 0x3a, 0x3a, 0x34, 0x3b,
-    0x38, 0x2f, 0x40, 0x34, 0x32, 0x4d, 0x43, 0x45, 0x4e, 0x3f, 0x48, 0x35,
-    0x3b, 0x4d, 0x4f, 0x39, 0x42, 0x36, 0x46, 0x36, 0x4a, 0x3c, 0x37, 0x41,
-    0x40, 0x43, 0x50, 0x36, 0x3e, 0x39, 0x44, 0x40, 0x36, 0x47, 0x3f, 0x36,
-    0x45, 0x40, 0x45, 0x41, 0x3b, 0x37, 0x41, 0x39, 0x3b, 0x48, 0x37, 0x34,
-    0x41, 0x45, 0x49, 0x3f, 0x39, 0x49, 0x3f, 0x3a, 0x42, 0x34, 0x38, 0x37,
-    0x44, 0x34, 0x3c, 0x3d, 0x40, 0x47, 0x3a, 0x36, 0x3f, 0x3c, 0x41, 0x3e,
-    0x47, 0x46, 0x46, 0x43, 0x3f, 0x38, 0x3b, 0x40, 0x3f, 0x48, 0x3b, 0x4c,
-    0x3d, 0x4b, 0x34, 0x3b, 0x44, 0x43, 0x3c, 0x49, 0x38, 0x42, 0x41, 0x36,
-    0x33, 0x36, 0x40, 0x46, 0x40, 0x3a, 0x42, 0x3c, 0x3d, 0x35, 0x3c, 0x52,
-    0x3e, 0x40, 0x43, 0x43, 0x41, 0x3b, 0x3e, 0x44, 0x3f, 0x40, 0x40, 0x43,
-    0x3d, 0x3f, 0x36, 0x42, 0x3f, 0x3c, 0x34, 0x3d, 0x33, 0x41, 0x3c, 0x39,
-    0x34, 0x43, 0x3f, 0x34, 0x3c, 0x3a, 0x3a, 0x37, 0x42, 0x41, 0x40, 0x3e,
-    0x3d, 0x3c, 0x41, 0x3c, 0x38, 0x33, 0x49, 0x46, 0x40, 0x40, 0x3a, 0x46,
-    0x38, 0x3c, 0x37, 0x34, 0x3e, 0x3d, 0x32, 0x38, 0x3c, 0x4c, 0x3a, 0x34,
-    0x35, 0x32, 0x39, 0x40, 0x3a, 0x58, 0x40, 0x46, 0x42, 0x33, 0x45, 0x39,
-    0x34, 0x4f, 0x53, 0x45, 0x43, 0x3e, 0x41, 0x36, 0x3e, 0x3f, 0x40, 0x47,
-    0x4e, 0x3d, 0x53, 0x2b, 0x41, 0x36, 0x3e, 0x38, 0x47, 0x41, 0x3f, 0x34,
-    0x47, 0x40, 0x38, 0x39, 0x3d, 0x42, 0x3f, 0x3c, 0x48, 0x3a, 0x35, 0x3c,
-    0x45, 0x49, 0x3c, 0x33, 0x33, 0x3f, 0x3c, 0x46, 0x43, 0x3f, 0x45, 0x31,
-    0x35, 0x43, 0x46, 0x3a, 0x45, 0x3c, 0x37, 0x3a, 0x37, 0x36, 0x35, 0x3f,
-    0x38, 0x49, 0x34, 0x3f, 0x3c, 0x42, 0x49, 0x3e, 0x3e, 0x3c, 0x39, 0x49,
-    0x3e, 0x3c, 0x3b, 0x43, 0x44, 0x45, 0x39, 0x4b, 0x47, 0x47, 0x3e, 0x33,
-    0x3c, 0x31, 0x34, 0x4f, 0x45, 0x43, 0x40, 0x3d, 0x42, 0x3b, 0x43, 0x50,
-    0x3c, 0x3b, 0x37, 0x42, 0x47, 0x42, 0x3e, 0x4a, 0x3f, 0x3a, 0x48, 0x3d,
-    0x48, 0x45, 0x3e, 0x40, 0x3a, 0x3c, 0x3d, 0x39, 0x41, 0x42, 0x3c, 0x42,
-    0x43, 0x3c, 0x3b, 0x3d, 0x47, 0x49, 0x38, 0x3c, 0x46, 0x3a, 0x3c, 0x3f,
-    0x3a, 0x46, 0x3a, 0x3b, 0x3d, 0x3a, 0x49, 0x46, 0x38, 0x40, 0x3e, 0x38,
-    0x37, 0x32, 0x40, 0x3c, 0x42, 0x3d, 0x3b, 0x40, 0x3a, 0x38, 0x49, 0x33,
-    0x40, 0x38, 0x2b, 0x3a, 0x3c, 0x4f, 0x4d, 0x3e, 0x35, 0x3d, 0x3b, 0x40,
-    0x3a, 0x54, 0x3e, 0x3e, 0x43, 0x30, 0x47, 0x3d, 0x3b, 0x53, 0x52, 0x4a,
-    0x43, 0x41, 0x49, 0x37, 0x3b, 0x35, 0x44, 0x3c, 0x45, 0x40, 0x4f, 0x36,
-    0x4b, 0x42, 0x41, 0x3a, 0x41, 0x44, 0x47, 0x32, 0x43, 0x35, 0x3f, 0x37,
-    0x43, 0x41, 0x43, 0x36, 0x3f, 0x3b, 0x3d, 0x38, 0x3d, 0x40, 0x42, 0x36,
-    0x44, 0x3a, 0x39, 0x47, 0x37, 0x34, 0x42, 0x3a, 0x37, 0x38, 0x37, 0x3f,
-    0x36, 0x3b, 0x45, 0x3f, 0x3f, 0x3d, 0x39, 0x3d, 0x39, 0x41, 0x37, 0x3f,
-    0x3f, 0x3d, 0x3f, 0x41, 0x43, 0x41, 0x45, 0x43, 0x41, 0x3c, 0x3e, 0x40,
-    0x40, 0x39, 0x41, 0x4f, 0x47, 0x42, 0x46, 0x48, 0x3b, 0x3b, 0x3c, 0x46,
-    0x47, 0x3e, 0x46, 0x37, 0x38, 0x3d, 0x38, 0x52, 0x36, 0x46, 0x3c, 0x3a,
-    0x3b, 0x37, 0x48, 0x4b, 0x3f, 0x42, 0x3c, 0x36, 0x40, 0x37, 0x33, 0x4c,
-    0x39, 0x34, 0x41, 0x34, 0x3f, 0x3b, 0x35, 0x4b, 0x3b, 0x45, 0x43, 0x31,
-    0x3e, 0x39, 0x30, 0x3d, 0x32, 0x43, 0x44, 0x3c, 0x3e, 0x38, 0x43, 0x41,
-    0x3e, 0x37, 0x41, 0x39, 0x39, 0x44, 0x43, 0x38, 0x3f, 0x37, 0x48, 0x3f,
-    0x3b, 0x44, 0x37, 0x3f, 0x3a, 0x3f, 0x3b, 0x33, 0x42, 0x3e, 0x2f, 0x42,
-    0x44, 0x4f, 0x52, 0x3c, 0x34, 0x33, 0x39, 0x46, 0x31, 0x55, 0x43, 0x4e,
-    0x49, 0x38, 0x4d, 0x48, 0x34, 0x4d, 0x5c, 0x4d, 0x49, 0x37, 0x4f, 0x40,
-    0x3c, 0x3d, 0x41, 0x42, 0x3f, 0x51, 0x4b, 0x2f, 0x46, 0x35, 0x39, 0x3c,
-    0x49, 0x3d, 0x4e, 0x32, 0x43, 0x47, 0x31, 0x3e, 0x42, 0x4a, 0x4c, 0x39,
-    0x43, 0x46, 0x3e, 0x3f, 0x44, 0x3c, 0x42, 0x30, 0x3e, 0x34, 0x3b, 0x3b,
-    0x3a, 0x3c, 0x42, 0x3d, 0x3d, 0x48, 0x48, 0x36, 0x3a, 0x45, 0x38, 0x40,
-    0x3c, 0x41, 0x3f, 0x49, 0x42, 0x41, 0x38, 0x3d, 0x3d, 0x44, 0x3b, 0x3d,
-    0x35, 0x48, 0x43, 0x3b, 0x32, 0x41, 0x3e, 0x3a, 0x46, 0x41, 0x40, 0x54,
-    0x38, 0x3f, 0x3c, 0x36, 0x3b, 0x36, 0x43, 0x50, 0x38, 0x3c, 0x44, 0x3b,
-    0x43, 0x47, 0x32, 0x50, 0x3d, 0x46, 0x3d, 0x3b, 0x39, 0x37, 0x3b, 0x4a,
-    0x47, 0x43, 0x46, 0x3d, 0x3d, 0x41, 0x43, 0x45, 0x3b, 0x3c, 0x39, 0x47,
-    0x43, 0x42, 0x39, 0x4c, 0x34, 0x41, 0x45, 0x3b, 0x38, 0x3e, 0x37, 0x3f,
-    0x45, 0x43, 0x39, 0x42, 0x3c, 0x3d, 0x3d, 0x3c, 0x48, 0x39, 0x3b, 0x3a,
-    0x46, 0x45, 0x3d, 0x3a, 0x3f, 0x3a, 0x45, 0x36, 0x3d, 0x43, 0x36, 0x43,
-    0x42, 0x3d, 0x41, 0x3f, 0x3a, 0x3f, 0x31, 0x37, 0x48, 0x4f, 0x4e, 0x36,
-    0x30, 0x3a, 0x3e, 0x3e, 0x38, 0x57, 0x40, 0x47, 0x47, 0x38, 0x4f, 0x46,
-    0x3d, 0x4a, 0x50, 0x4c, 0x42, 0x3b, 0x4d, 0x3d, 0x3d, 0x33, 0x40, 0x41,
-    0x48, 0x4b, 0x46, 0x39, 0x4d, 0x30, 0x45, 0x38, 0x48, 0x3c, 0x48, 0x3b,
-    0x4d, 0x40, 0x3b, 0x40, 0x46, 0x41, 0x51, 0x34, 0x40, 0x43, 0x3f, 0x42,
-    0x45, 0x42, 0x3e, 0x35, 0x3d, 0x38, 0x37, 0x3a, 0x42, 0x40, 0x43, 0x3c,
-    0x3c, 0x3d, 0x43, 0x40, 0x45, 0x3a, 0x3e, 0x3a, 0x3e, 0x40, 0x43, 0x35,
-    0x37, 0x3f, 0x3f, 0x3e, 0x39, 0x3f, 0x47, 0x38, 0x3e, 0x44, 0x3b, 0x3c,
-    0x3b, 0x32, 0x40, 0x3e, 0x42, 0x45, 0x3a, 0x52, 0x3a, 0x3e, 0x45, 0x40,
-    0x41, 0x48, 0x3f, 0x4e, 0x3e, 0x42, 0x3d, 0x39, 0x3a, 0x33, 0x3f, 0x4b,
-    0x3e, 0x38, 0x36, 0x3e, 0x31, 0x41, 0x3a, 0x40, 0x3b, 0x37, 0x3f, 0x3e,
-    0x3e, 0x3f, 0x35, 0x44, 0x3d, 0x42, 0x3d, 0x44, 0x42, 0x3f, 0x3e, 0x44,
-    0x3e, 0x45, 0x37, 0x3a, 0x3b, 0x42, 0x3f, 0x41, 0x3b, 0x3f, 0x41, 0x41,
-    0x3e, 0x34, 0x47, 0x39, 0x46, 0x46, 0x37, 0x39, 0x3f, 0x45, 0x39, 0x39,
-    0x3a, 0x40, 0x38, 0x3a, 0x31, 0x34, 0x3a, 0x41, 0x38, 0x41, 0x3a, 0x41,
-    0x44, 0x37, 0x2d, 0x41, 0x43, 0x4d, 0x4b, 0x3b, 0x2c, 0x30, 0x42, 0x3b,
-    0x31, 0x56, 0x43, 0x47, 0x47, 0x38, 0x50, 0x44, 0x40, 0x52, 0x5a, 0x50,
-    0x44, 0x3f, 0x4b, 0x35, 0x3a, 0x36, 0x41, 0x44, 0x47, 0x4e, 0x52, 0x36,
-    0x45, 0x39, 0x38, 0x3c, 0x42, 0x44, 0x40, 0x3b, 0x4b, 0x38, 0x35, 0x35,
-    0x3f, 0x40, 0x4f, 0x39, 0x3d, 0x37, 0x34, 0x3e, 0x41, 0x4c, 0x40, 0x37,
-    0x3d, 0x3b, 0x37, 0x37, 0x40, 0x42, 0x35, 0x39, 0x41, 0x42, 0x3d, 0x34,
-    0x3c, 0x37, 0x3a, 0x3d, 0x46, 0x46, 0x46, 0x3f, 0x44, 0x3d, 0x3c, 0x40,
-    0x3c, 0x3a, 0x3d, 0x3b, 0x3b, 0x41, 0x47, 0x3a, 0x43, 0x43, 0x43, 0x3b,
-    0x3e, 0x3e, 0x42, 0x46, 0x36, 0x37, 0x45, 0x35, 0x3c, 0x3b, 0x31, 0x4b,
-    0x3c, 0x3e, 0x3a, 0x3a, 0x42, 0x42, 0x34, 0x47, 0x37, 0x34, 0x41, 0x3d,
-    0x3e, 0x39, 0x43, 0x47, 0x31, 0x3b, 0x40, 0x3b, 0x42, 0x3d, 0x44, 0x44,
-    0x37, 0x39, 0x44, 0x3b, 0x40, 0x3a, 0x3d, 0x44, 0x3c, 0x40, 0x42, 0x3b,
-    0x40, 0x3e, 0x32, 0x3d, 0x3c, 0x3e, 0x44, 0x3e, 0x47, 0x3d, 0x3f, 0x2e,
-    0x3e, 0x3d, 0x3f, 0x3b, 0x3b, 0x43, 0x43, 0x3c, 0x3a, 0x3c, 0x3a, 0x36,
-    0x38, 0x46, 0x30, 0x3e, 0x3f, 0x35, 0x3e, 0x34, 0x3c, 0x34, 0x32, 0x4a,
-    0x41, 0x48, 0x48, 0x3f, 0x34, 0x37, 0x42, 0x43, 0x36, 0x59, 0x42, 0x3f,
-    0x4b, 0x3d, 0x5d, 0x45, 0x3b, 0x51, 0x51, 0x4c, 0x41, 0x40, 0x4d, 0x36,
-    0x3f, 0x34, 0x39, 0x3d, 0x4a, 0x4b, 0x4f, 0x33, 0x48, 0x32, 0x3c, 0x32,
-    0x48, 0x4c, 0x4d, 0x3a, 0x49, 0x3a, 0x3a, 0x2e, 0x4b, 0x44, 0x4f, 0x33,
-    0x3a, 0x48, 0x34, 0x43, 0x38, 0x45, 0x44, 0x35, 0x3b, 0x3f, 0x40, 0x37,
-    0x35, 0x34, 0x38, 0x3e, 0x41, 0x3e, 0x3b, 0x47, 0x41, 0x47, 0x3c, 0x3c,
-    0x39, 0x40, 0x3e, 0x45, 0x36, 0x41, 0x3f, 0x3f, 0x3c, 0x44, 0x3f, 0x43,
-    0x3d, 0x3c, 0x49, 0x42, 0x3e, 0x3f, 0x48, 0x37, 0x43, 0x37, 0x43, 0x3d,
-    0x32, 0x42, 0x44, 0x39, 0x36, 0x37, 0x40, 0x46, 0x47, 0x3d, 0x3a, 0x42,
-    0x3f, 0x38, 0x37, 0x48, 0x39, 0x40, 0x3c, 0x37, 0x33, 0x38, 0x38, 0x40,
-    0x41, 0x3c, 0x3f, 0x3b, 0x40, 0x3a, 0x47, 0x46, 0x3a, 0x37, 0x42, 0x47,
-    0x3b, 0x3f, 0x3b, 0x40, 0x33, 0x3f, 0x3a, 0x3c, 0x38, 0x3a, 0x36, 0x38,
-    0x36, 0x40, 0x48, 0x42, 0x48, 0x3c, 0x43, 0x36, 0x32, 0x3b, 0x34, 0x39,
-    0x38, 0x46, 0x37, 0x3b, 0x44, 0x34, 0x36, 0x38, 0x3c, 0x43, 0x33, 0x3c,
-    0x3b, 0x45, 0x38, 0x38, 0x44, 0x33, 0x36, 0x4a, 0x46, 0x4c, 0x4a, 0x34,
-    0x36, 0x37, 0x43, 0x42, 0x33, 0x58, 0x43, 0x48, 0x44, 0x38, 0x5f, 0x3f,
-    0x3c, 0x4d, 0x53, 0x52, 0x43, 0x47, 0x52, 0x3e, 0x3b, 0x2d, 0x3b, 0x3a,
-    0x4b, 0x49, 0x53, 0x38, 0x4c, 0x2f, 0x38, 0x31, 0x42, 0x40, 0x48, 0x3f,
-    0x44, 0x3c, 0x3c, 0x34, 0x46, 0x3f, 0x49, 0x3a, 0x43, 0x3d, 0x34, 0x42,
-    0x36, 0x47, 0x51, 0x3c, 0x3d, 0x39, 0x39, 0x3a, 0x3b, 0x35, 0x35, 0x41,
-    0x47, 0x3c, 0x3b, 0x43, 0x3f, 0x45, 0x3e, 0x40, 0x3c, 0x3f, 0x3c, 0x42,
-    0x3b, 0x3e, 0x38, 0x3f, 0x3f, 0x41, 0x39, 0x39, 0x3d, 0x43, 0x4f, 0x3d,
-    0x48, 0x3b, 0x44, 0x45, 0x3d, 0x3b, 0x49, 0x43, 0x44, 0x3d, 0x37, 0x3b,
-    0x3c, 0x45, 0x46, 0x44, 0x35, 0x3e, 0x32, 0x35, 0x34, 0x3b, 0x40, 0x43,
-    0x3e, 0x45, 0x37, 0x3d, 0x3f, 0x43, 0x36, 0x3f, 0x3f, 0x43, 0x39, 0x44,
-    0x3e, 0x3e, 0x45, 0x40, 0x3e, 0x44, 0x3b, 0x3e, 0x42, 0x42, 0x3b, 0x3d,
-    0x3a, 0x40, 0x39, 0x3a, 0x32, 0x36, 0x41, 0x30, 0x39, 0x46, 0x33, 0x3f,
-    0x46, 0x40, 0x3c, 0x31, 0x41, 0x3a, 0x3f, 0x3f, 0x3b, 0x36, 0x3f, 0x38,
-    0x36, 0x3e, 0x35, 0x35, 0x3b, 0x3d, 0x3f, 0x39, 0x46, 0x37, 0x3a, 0x47,
-    0x37, 0x39, 0x2c, 0x55, 0x40, 0x4b, 0x4a, 0x39, 0x35, 0x42, 0x3d, 0x40,
-    0x3a, 0x54, 0x41, 0x48, 0x51, 0x3b, 0x61, 0x3e, 0x3e, 0x4d, 0x51, 0x52,
-    0x3e, 0x43, 0x52, 0x41, 0x48, 0x2d, 0x35, 0x35, 0x4b, 0x44, 0x4d, 0x3c,
-    0x54, 0x33, 0x39, 0x27, 0x4a, 0x44, 0x4a, 0x41, 0x3c, 0x3a, 0x31, 0x2f,
-    0x3d, 0x42, 0x48, 0x3f, 0x42, 0x40, 0x44, 0x3b, 0x40, 0x3e, 0x49, 0x3a,
-    0x3c, 0x35, 0x30, 0x3e, 0x3e, 0x3d, 0x36, 0x3a, 0x3e, 0x3a, 0x4a, 0x3e,
-    0x3d, 0x49, 0x40, 0x43, 0x3e, 0x45, 0x3f, 0x3c, 0x3b, 0x42, 0x3a, 0x39,
-    0x3b, 0x47, 0x3f, 0x39, 0x49, 0x46, 0x3d, 0x34, 0x32, 0x44, 0x46, 0x42,
-    0x47, 0x39, 0x49, 0x48, 0x3b, 0x38, 0x45, 0x45, 0x37, 0x38, 0x46, 0x46,
-    0x37, 0x42, 0x35, 0x34, 0x45, 0x42, 0x35, 0x43, 0x3b, 0x3a, 0x43, 0x43,
-    0x40, 0x42, 0x35, 0x3f, 0x38, 0x3f, 0x3a, 0x3a, 0x3b, 0x3f, 0x3e, 0x36,
-    0x3f, 0x3c, 0x48, 0x3b, 0x3a, 0x41, 0x41, 0x35, 0x33, 0x3f, 0x3b, 0x45,
-    0x48, 0x36, 0x40, 0x38, 0x47, 0x3d, 0x35, 0x40, 0x41, 0x42, 0x41, 0x37,
-    0x41, 0x3e, 0x36, 0x48, 0x3e, 0x3c, 0x32, 0x39, 0x41, 0x40, 0x38, 0x3f,
-    0x46, 0x43, 0x33, 0x40, 0x43, 0x43, 0x3a, 0x49, 0x3f, 0x35, 0x2c, 0x5d,
-    0x43, 0x49, 0x52, 0x3b, 0x3c, 0x41, 0x40, 0x4a, 0x33, 0x50, 0x41, 0x46,
-    0x52, 0x41, 0x68, 0x48, 0x44, 0x53, 0x54, 0x55, 0x42, 0x42, 0x57, 0x44,
-    0x47, 0x35, 0x35, 0x3e, 0x4b, 0x44, 0x4e, 0x38, 0x55, 0x2f, 0x36, 0x2d,
-    0x40, 0x48, 0x4b, 0x41, 0x48, 0x36, 0x32, 0x32, 0x44, 0x42, 0x47, 0x42,
-    0x48, 0x3d, 0x3d, 0x39, 0x3e, 0x35, 0x4b, 0x39, 0x38, 0x3a, 0x39, 0x46,
-    0x38, 0x3f, 0x3a, 0x42, 0x4b, 0x45, 0x3e, 0x32, 0x46, 0x43, 0x3b, 0x40,
-    0x45, 0x41, 0x3e, 0x43, 0x37, 0x3d, 0x43, 0x3b, 0x46, 0x48, 0x42, 0x3b,
-    0x3d, 0x48, 0x4a, 0x3c, 0x3b, 0x42, 0x40, 0x3c, 0x3a, 0x42, 0x38, 0x47,
-    0x3b, 0x3b, 0x3d, 0x41, 0x3f, 0x38, 0x3f, 0x4a, 0x44, 0x3f, 0x47, 0x3a,
-    0x47, 0x44, 0x43, 0x43, 0x34, 0x3d, 0x3a, 0x3c, 0x47, 0x3f, 0x3e, 0x39,
-    0x42, 0x4a, 0x40, 0x36, 0x40, 0x41, 0x42, 0x3f, 0x3f, 0x43, 0x39, 0x38,
-    0x3c, 0x3b, 0x4c, 0x2f, 0x41, 0x39, 0x40, 0x42, 0x3f, 0x42, 0x40, 0x36,
-    0x3b, 0x45, 0x41, 0x41, 0x44, 0x45, 0x42, 0x37, 0x3d, 0x3a, 0x33, 0x3e,
-    0x3b, 0x3b, 0x3c, 0x3d, 0x38, 0x49, 0x44, 0x39, 0x3f, 0x48, 0x3d, 0x41,
-    0x42, 0x43, 0x44, 0x3e, 0x41, 0x3d, 0x32, 0x59, 0x45, 0x4b, 0x4b, 0x38,
-    0x37, 0x3d, 0x48, 0x42, 0x3d, 0x52, 0x43, 0x46, 0x54, 0x48, 0x67, 0x4d,
-    0x45, 0x4e, 0x49, 0x52, 0x45, 0x45, 0x58, 0x3b, 0x41, 0x38, 0x3f, 0x3f,
-    0x49, 0x44, 0x4f, 0x48, 0x57, 0x31, 0x3c, 0x2a, 0x3e, 0x4c, 0x41, 0x40,
-    0x47, 0x3f, 0x33, 0x34, 0x3f, 0x42, 0x48, 0x43, 0x4b, 0x38, 0x39, 0x3d,
-    0x3f, 0x3e, 0x4b, 0x3f, 0x35, 0x36, 0x3c, 0x46, 0x3c, 0x45, 0x37, 0x3b,
-    0x3c, 0x39, 0x41, 0x40, 0x41, 0x43, 0x44, 0x41, 0x45, 0x4f, 0x44, 0x43,
-    0x44, 0x3c, 0x45, 0x34, 0x42, 0x45, 0x3f, 0x46, 0x3f, 0x43, 0x3d, 0x3a,
-    0x39, 0x47, 0x45, 0x3d, 0x3f, 0x3b, 0x3d, 0x42, 0x38, 0x48, 0x48, 0x3b,
-    0x3c, 0x3a, 0x3f, 0x41, 0x44, 0x4b, 0x44, 0x48, 0x41, 0x3c, 0x3d, 0x3c,
-    0x3e, 0x3a, 0x4a, 0x3b, 0x49, 0x35, 0x3a, 0x3d, 0x41, 0x3f, 0x49, 0x39,
-    0x44, 0x37, 0x3f, 0x3c, 0x42, 0x40, 0x4a, 0x46, 0x39, 0x38, 0x46, 0x37,
-    0x41, 0x46, 0x41, 0x45, 0x40, 0x3b, 0x3b, 0x33, 0x3b, 0x39, 0x3c, 0x43,
-    0x37, 0x3c, 0x44, 0x3d, 0x46, 0x39, 0x3c, 0x3c, 0x44, 0x48, 0x41, 0x44,
-    0x41, 0x43, 0x46, 0x3b, 0x47, 0x41, 0x31, 0x41, 0x44, 0x40, 0x43, 0x42,
-    0x3e, 0x43, 0x34, 0x65, 0x4f, 0x50, 0x4d, 0x3a, 0x37, 0x43, 0x4d, 0x4a,
-    0x3d, 0x54, 0x40, 0x42, 0x5b, 0x3b, 0x71, 0x49, 0x44, 0x4f, 0x54, 0x56,
-    0x48, 0x40, 0x52, 0x41, 0x42, 0x38, 0x3c, 0x49, 0x4a, 0x45, 0x51, 0x35,
-    0x54, 0x2f, 0x35, 0x25, 0x4d, 0x3f, 0x4d, 0x43, 0x49, 0x33, 0x32, 0x3a,
-    0x46, 0x48, 0x48, 0x3d, 0x43, 0x3a, 0x3c, 0x3a, 0x48, 0x40, 0x4b, 0x3b,
-    0x45, 0x3b, 0x3f, 0x38, 0x37, 0x41, 0x31, 0x3b, 0x41, 0x43, 0x43, 0x37,
-    0x48, 0x3f, 0x48, 0x37, 0x40, 0x4a, 0x43, 0x45, 0x3d, 0x39, 0x37, 0x37,
-    0x3c, 0x3f, 0x47, 0x48, 0x43, 0x3e, 0x41, 0x3f, 0x3e, 0x38, 0x3e, 0x37,
-    0x45, 0x45, 0x35, 0x44, 0x38, 0x3a, 0x49, 0x43, 0x40, 0x41, 0x40, 0x44,
-    0x3c, 0x3e, 0x40, 0x38, 0x42, 0x41, 0x3c, 0x41, 0x3a, 0x3b, 0x3c, 0x3a,
-    0x49, 0x3c, 0x42, 0x44, 0x3f, 0x39, 0x45, 0x32, 0x45, 0x43, 0x45, 0x39,
-    0x43, 0x41, 0x4b, 0x39, 0x32, 0x3c, 0x3c, 0x36, 0x39, 0x3f, 0x46, 0x32,
-    0x39, 0x35, 0x4f, 0x32, 0x3e, 0x40, 0x3d, 0x3e, 0x3a, 0x39, 0x4c, 0x38,
-    0x43, 0x38, 0x49, 0x3b, 0x33, 0x39, 0x3b, 0x36, 0x36, 0x43, 0x3b, 0x3c,
-    0x32, 0x3c, 0x3a, 0x45, 0x31, 0x3d, 0x37, 0x40, 0x3f, 0x3f, 0x35, 0xff,
-    0x49, 0x4e, 0x4c, 0x3c, 0x36, 0x43, 0x46, 0x45, 0x41, 0x59, 0x44, 0x4a,
-    0x53, 0x44, 0x71, 0x4a, 0x39, 0x4f, 0x50, 0x4b, 0x47, 0x42, 0x5a, 0x3c,
-    0x45, 0x38, 0x3e, 0x42, 0x53, 0x43, 0x52, 0x3a, 0x52, 0x34, 0x31, 0x20,
-    0x49, 0x4e, 0x46, 0x43, 0x4b, 0x3d, 0x2b, 0x27, 0x46, 0x46, 0x47, 0x41,
-    0x42, 0x37, 0x39, 0x38, 0x45, 0x3f, 0x51, 0x3d, 0x48, 0x3f, 0x33, 0x3f,
-    0x38, 0x45, 0x31, 0x38, 0x41, 0x3d, 0x47, 0x39, 0x42, 0x40, 0x4c, 0x3f,
-    0x40, 0x42, 0x41, 0x41, 0x41, 0x42, 0x39, 0x35, 0x3f, 0x46, 0x45, 0x36,
-    0x3f, 0x43, 0x3b, 0x39, 0x41, 0x38, 0x43, 0x37, 0x3d, 0x44, 0x3b, 0x40,
-    0x36, 0x3d, 0x42, 0x41, 0x41, 0x3d, 0x38, 0x4a, 0x40, 0x4a, 0x4c, 0x38,
-    0x3f, 0x40, 0x45, 0x3c, 0x3f, 0x4b, 0x43, 0x41, 0x43, 0x3e, 0x43, 0x3f,
-    0x36, 0x40, 0x40, 0x39, 0x3f, 0x3a, 0x3a, 0x30, 0x41, 0x3c, 0x3c, 0x34,
-    0x46, 0x38, 0x43, 0x34, 0x3a, 0x42, 0x43, 0x42, 0x40, 0x41, 0x49, 0x34,
-    0x35, 0x40, 0x47, 0x3d, 0x3d, 0x3e, 0x4c, 0x33, 0x3c, 0x3b, 0x39, 0x43,
-    0x3a, 0x3e, 0x3b, 0x37, 0x3f, 0x42, 0x31, 0x3d, 0x41, 0x3e, 0x32, 0x47,
-    0x34, 0x41, 0x3d, 0x35, 0x39, 0x40, 0x38, 0x69, 0x4f, 0x4a, 0x49, 0x37,
-    0x37, 0x44, 0x43, 0x46, 0x40, 0x58, 0x43, 0x48, 0x54, 0x46, 0x6c, 0x50,
-    0x3a, 0x50, 0x50, 0x57, 0x47, 0x46, 0x5c, 0x40, 0x40, 0x39, 0x3e, 0x46,
-    0x53, 0x46, 0x5c, 0x36, 0x4f, 0x32, 0x30, 0x2d, 0x4a, 0x48, 0x41, 0x45,
-    0x47, 0x2f, 0x32, 0x2b, 0x43, 0x40, 0x43, 0x3c, 0x40, 0x44, 0x3e, 0x37,
-    0x39, 0x3e, 0x48, 0x42, 0x45, 0x36, 0x47, 0x3f, 0x3b, 0x41, 0x35, 0x35,
-    0x3b, 0x3e, 0x35, 0x43, 0x3e, 0x41, 0x3d, 0x36, 0x41, 0x3c, 0x40, 0x44,
-    0x3d, 0x40, 0x35, 0x32, 0x48, 0x3e, 0x39, 0x42, 0x44, 0x3d, 0x39, 0x3b,
-    0x3b, 0x45, 0x40, 0x4a, 0x3f, 0x41, 0x43, 0x39, 0x42, 0x44, 0x4c, 0x3c,
-    0x3f, 0x3e, 0x3f, 0x43, 0x40, 0x42, 0x4c, 0x3b, 0x3e, 0x3d, 0x49, 0x42,
-    0x40, 0x44, 0x40, 0x34, 0x36, 0x40, 0x45, 0x39, 0x42, 0x40, 0x3e, 0x44,
-    0x45, 0x37, 0x3c, 0x38, 0x3e, 0x49, 0x3e, 0x3c, 0x41, 0x3d, 0x42, 0x32,
-    0x40, 0x45, 0x3e, 0x36, 0x44, 0x3a, 0x4e, 0x38, 0x43, 0x38, 0x40, 0x38,
-    0x49, 0x42, 0x40, 0x3d, 0x42, 0x48, 0x48, 0x3d, 0x41, 0x3a, 0x3f, 0x41,
-    0x38, 0x3c, 0x44, 0x39, 0x3a, 0x32, 0x3a, 0x3e, 0x3d, 0x3b, 0x39, 0x38,
-    0x3a, 0x43, 0x3a, 0x6b, 0x45, 0x50, 0x47, 0x33, 0x38, 0x48, 0x4d, 0x4f,
-    0x39, 0x4b, 0x46, 0x4a, 0x4f, 0x42, 0x6f, 0x4b, 0x40, 0x55, 0x54, 0x50,
-    0x42, 0x47, 0x5e, 0x46, 0x40, 0x34, 0x40, 0x47, 0x52, 0x46, 0x55, 0x3b,
-    0x4f, 0x2b, 0x35, 0x33, 0x4c, 0x44, 0x44, 0x48, 0x47, 0x37, 0x35, 0x27,
-    0x4a, 0x3b, 0x41, 0x40, 0x40, 0x3e, 0x36, 0x39, 0x3e, 0x3c, 0x45, 0x3f,
-    0x4d, 0x41, 0x3d, 0x48, 0x47, 0x46, 0x33, 0x3d, 0x3d, 0x3e, 0x34, 0x3f,
-    0x3e, 0x3a, 0x41, 0x35, 0x3b, 0x3e, 0x42, 0x3c, 0x42, 0x42, 0x40, 0x31,
-    0x37, 0x40, 0x36, 0x42, 0x48, 0x39, 0x3d, 0x3c, 0x3a, 0x43, 0x39, 0x3d,
-    0x47, 0x49, 0x43, 0x3d, 0x45, 0x39, 0x44, 0x37, 0x3e, 0x4d, 0x3d, 0x40,
-    0x3d, 0x4c, 0x4d, 0x44, 0x3c, 0x3d, 0x46, 0x41, 0x41, 0x42, 0x40, 0x40,
-    0x41, 0x3a, 0x3c, 0x3b, 0x3c, 0x44, 0x40, 0x34, 0x44, 0x38, 0x3b, 0x33,
-    0x45, 0x45, 0x44, 0x3f, 0x3e, 0x3a, 0x3b, 0x3b, 0x43, 0x39, 0x3a, 0x45,
-    0x3b, 0x3a, 0x4b, 0x39, 0x3d, 0x38, 0x41, 0x39, 0x42, 0x45, 0x43, 0x40,
-    0x3e, 0x35, 0x44, 0x3f, 0x45, 0x41, 0x40, 0x3e, 0x43, 0x42, 0x37, 0x3a,
-    0x38, 0x35, 0x3a, 0x48, 0x3e, 0x3b, 0x40, 0x38, 0x3c, 0x3c, 0x3b, 0x6a,
-    0x48, 0x4d, 0x4d, 0x34, 0x38, 0x40, 0x4a, 0x45, 0x3c, 0x4f, 0x41, 0x4b,
-    0x58, 0x46, 0x71, 0x49, 0x3d, 0x53, 0x44, 0x52, 0x42, 0x3e, 0x57, 0x4c,
-    0x4c, 0x38, 0x40, 0x3b, 0x5c, 0x4c, 0x52, 0x3e, 0x4c, 0x2d, 0x32, 0x37,
-    0x49, 0x3f, 0x41, 0x47, 0x4a, 0x3b, 0x2f, 0x26, 0x45, 0x40, 0x47, 0x42,
-    0x3d, 0x39, 0x2d, 0x2c, 0x3f, 0x45, 0x46, 0x44, 0x48, 0x43, 0x42, 0x48,
-    0x40, 0x41, 0x3b, 0x3b, 0x41, 0x3b, 0x39, 0x40, 0x3b, 0x47, 0x3f, 0x38,
-    0x3f, 0x49, 0x3b, 0x35, 0x40, 0x45, 0x38, 0x35, 0x36, 0x34, 0x3e, 0x3d,
-    0x46, 0x3e, 0x33, 0x38, 0x43, 0x48, 0x3f, 0x45, 0x31, 0x44, 0x38, 0x35,
-    0x3c, 0x41, 0x4b, 0x44, 0x3d, 0x43, 0x38, 0x48, 0x3c, 0x39, 0x4a, 0x42,
-    0x3d, 0x43, 0x3f, 0x49, 0x3e, 0x47, 0x49, 0x41, 0x3b, 0x3c, 0x47, 0x3a,
-    0x3d, 0x40, 0x4a, 0x38, 0x3d, 0x3b, 0x47, 0x3a, 0x36, 0x47, 0x42, 0x46,
-    0x3c, 0x3d, 0x45, 0x3b, 0x48, 0x3f, 0x38, 0x36, 0x39, 0x46, 0x43, 0x3a,
-    0x41, 0x3d, 0x39, 0x39, 0x46, 0x37, 0x3f, 0x3f, 0x3a, 0x46, 0x3f, 0x39,
-    0x49, 0x44, 0x42, 0x3a, 0x3a, 0x43, 0x3e, 0x42, 0x3d, 0x3d, 0x43, 0x40,
-    0x43, 0x3c, 0x3f, 0x43, 0x40, 0x42, 0x3b, 0x57, 0x4a, 0x4f, 0x4a, 0x2d,
-    0x3b, 0x48, 0x45, 0x42, 0x34, 0x4c, 0x3e, 0x4f, 0x4d, 0x40, 0x6c, 0x4b,
-    0x3b, 0x4d, 0x4c, 0x57, 0x49, 0x3d, 0x5d, 0x44, 0x43, 0x29, 0x42, 0x3f,
-    0x5b, 0x47, 0x4f, 0x3e, 0x54, 0x2e, 0x34, 0x34, 0x4b, 0x47, 0x46, 0x46,
-    0x4b, 0x34, 0x36, 0x28, 0x3e, 0x3f, 0x42, 0x40, 0x3b, 0x38, 0x39, 0x42,
-    0x49, 0x3d, 0x49, 0x47, 0x47, 0x3b, 0x43, 0x34, 0x39, 0x36, 0x42, 0x3d,
-    0x37, 0x40, 0x37, 0x38, 0x46, 0x42, 0x49, 0x37, 0x44, 0x3f, 0x38, 0x3e,
-    0x36, 0x32, 0x33, 0x38, 0x40, 0x46, 0x42, 0x34, 0x41, 0x42, 0x3e, 0x38,
-    0x44, 0x3e, 0x3f, 0x43, 0x3f, 0x43, 0x35, 0x3f, 0x4d, 0x3b, 0x43, 0x39,
-    0x40, 0x47, 0x3f, 0x4a, 0x3a, 0x3f, 0x45, 0x45, 0x48, 0x42, 0x3b, 0x47,
-    0x42, 0x4b, 0x47, 0x3e, 0x3c, 0x42, 0x46, 0x39, 0x41, 0x3f, 0x48, 0x33,
-    0x45, 0x34, 0x3d, 0x30, 0x40, 0x4c, 0x40, 0x40, 0x39, 0x37, 0x40, 0x33,
-    0x49, 0x42, 0x45, 0x38, 0x3c, 0x43, 0x45, 0x35, 0x37, 0x33, 0x34, 0x3b,
-    0x3b, 0x38, 0x39, 0x41, 0x42, 0x40, 0x3e, 0x3e, 0x41, 0x33, 0x3a, 0x36,
-    0x40, 0x3a, 0x3c, 0x45, 0x43, 0x3c, 0x40, 0x41, 0x49, 0x47, 0x35, 0x34,
-    0x3a, 0x3d, 0x3a, 0x68, 0x4f, 0x48, 0x43, 0x36, 0x37, 0x3e, 0x45, 0x49,
-    0x3a, 0x4d, 0x41, 0x3d, 0x46, 0x45, 0x65, 0x46, 0x38, 0x4d, 0x4a, 0x53,
-    0x43, 0x41, 0x5d, 0x47, 0x41, 0x34, 0x39, 0x43, 0x4e, 0x48, 0x50, 0x38,
-    0x53, 0x32, 0x30, 0x2e, 0x49, 0x4c, 0x4d, 0x3f, 0x46, 0x38, 0x34, 0x2b,
-    0x44, 0x44, 0x41, 0x41, 0x36, 0x40, 0x3f, 0x32, 0x46, 0x38, 0x50, 0x45,
-    0x3f, 0x3d, 0x3b, 0x36, 0x3b, 0x43, 0x3a, 0x34, 0x36, 0x3f, 0x39, 0x35,
-    0x3c, 0x40, 0x40, 0x37, 0x3c, 0x39, 0x3d, 0x36, 0x48, 0x3d, 0x43, 0x34,
-    0x3b, 0x46, 0x43, 0x41, 0x33, 0x3e, 0x44, 0x3d, 0x44, 0x44, 0x4c, 0x3c,
-    0x37, 0x49, 0x42, 0x35, 0x45, 0x3a, 0x3c, 0x41, 0x3a, 0x45, 0x46, 0x41,
-    0x3c, 0x48, 0x46, 0x36, 0x36, 0x42, 0x3b, 0x46, 0x42, 0x45, 0x44, 0x47,
-    0x3f, 0x44, 0x3a, 0x35, 0x37, 0x46, 0x40, 0x38, 0x40, 0x3d, 0x36, 0x2c,
-    0x34, 0x47, 0x40, 0x38, 0x3f, 0x3f, 0x44, 0x2d, 0x3b, 0x3d, 0x3e, 0x44,
-    0x3c, 0x40, 0x3e, 0x33, 0x3c, 0x3a, 0x49, 0x40, 0x42, 0x42, 0x3a, 0x3b,
-    0x33, 0x3d, 0x3c, 0x43, 0x3e, 0x3d, 0x3a, 0x3a, 0x48, 0x3e, 0x3c, 0x39,
-    0x3f, 0x44, 0x37, 0x40, 0x3f, 0x3c, 0x3e, 0x3d, 0x38, 0x42, 0x34, 0x62,
-    0x51, 0x47, 0x44, 0x3f, 0x32, 0x3c, 0x3f, 0x46, 0x3d, 0x46, 0x3e, 0x45,
-    0x4a, 0x3e, 0x5d, 0x43, 0x45, 0x49, 0x4a, 0x55, 0x41, 0x3c, 0x5a, 0x44,
-    0x43, 0x3b, 0x3c, 0x3a, 0x4b, 0x4e, 0x4d, 0x42, 0x49, 0x30, 0x3b, 0x38,
-    0x42, 0x44, 0x51, 0x40, 0x48, 0x33, 0x3f, 0x2b, 0x3c, 0x41, 0x3c, 0x45,
-    0x35, 0x39, 0x42, 0x37, 0x40, 0x46, 0x46, 0x3f, 0x41, 0x45, 0x42, 0x3d,
-    0x43, 0x38, 0x3e, 0x38, 0x3c, 0x39, 0x40, 0x38, 0x37, 0x36, 0x3d, 0x3d,
-    0x38, 0x47, 0x45, 0x3b, 0x45, 0x44, 0x42, 0x2e, 0x37, 0x40, 0x42, 0x42,
-    0x3c, 0x36, 0x3b, 0x39, 0x44, 0x4d, 0x42, 0x3f, 0x3a, 0x3e, 0x45, 0x34,
-    0x3c, 0x43, 0x47, 0x43, 0x3f, 0x48, 0x3b, 0x44, 0x3d, 0x44, 0x43, 0x3e,
-    0x40, 0x4a, 0x31, 0x42, 0x42, 0x43, 0x48, 0x45, 0x3a, 0x42, 0x36, 0x2f,
-    0x3c, 0x3e, 0x3b, 0x3b, 0x44, 0x3f, 0x3a, 0x2c, 0x47, 0x3f, 0x4a, 0x40,
-    0x40, 0x40, 0x3c, 0x2a, 0x3e, 0x44, 0x40, 0x43, 0x3a, 0x42, 0x39, 0x34,
-    0x49, 0x3e, 0x36, 0x42, 0x3f, 0x42, 0x33, 0x3b, 0x3c, 0x45, 0x39, 0x3f,
-    0x3e, 0x3f, 0x41, 0x3d, 0x32, 0x3b, 0x31, 0x40, 0x3f, 0x44, 0x3c, 0x3f,
-    0x40, 0x46, 0x45, 0x36, 0x36, 0x42, 0x30, 0x57, 0x47, 0x44, 0x48, 0x3f,
-    0x35, 0x37, 0x3f, 0x3f, 0x38, 0x4a, 0x41, 0x46, 0x50, 0x3d, 0x5b, 0x41,
-    0x3e, 0x3c, 0x4a, 0x54, 0x45, 0x41, 0x5b, 0x46, 0x3d, 0x3b, 0x43, 0x33,
-    0x45, 0x4e, 0x43, 0x3b, 0x44, 0x37, 0x37, 0x32, 0x4c, 0x3d, 0x4c, 0x3f,
-    0x49, 0x3b, 0x37, 0x3a, 0x33, 0x43, 0x3f, 0x40, 0x44, 0x36, 0x3b, 0x44,
-    0x45, 0x40, 0x3c, 0x3c, 0x41, 0x44, 0x3b, 0x3d, 0x33, 0x37, 0x3c, 0x35,
-    0x3d, 0x3f, 0x39, 0x38, 0x33, 0x43, 0x3e, 0x39, 0x3b, 0x3e, 0x41, 0x35,
-    0x40, 0x46, 0x43, 0x35, 0x41, 0x3d, 0x32, 0x39, 0x3c, 0x40, 0x3e, 0x3f,
-    0x42, 0x38, 0x3b, 0x45, 0x3a, 0x3d, 0x40, 0x36, 0x3a, 0x40, 0x46, 0x44,
-    0x48, 0x45, 0x3f, 0x3a, 0x45, 0x45, 0x3c, 0x3b, 0x40, 0x4c, 0x39, 0x3a,
-    0x38, 0x39, 0x46, 0x3a, 0x3e, 0x4b, 0x34, 0x39, 0x3d, 0x3f, 0x40, 0x39,
-    0x45, 0x31, 0x45, 0x29, 0x3f, 0x38, 0x3a, 0x3f, 0x38, 0x3b, 0x36, 0x2d,
-    0x43, 0x3d, 0x45, 0x3c, 0x46, 0x3f, 0x40, 0x3c, 0x3a, 0x3e, 0x3d, 0x38,
-    0x3f, 0x3c, 0x3f, 0x42, 0x35, 0x3f, 0x3a, 0x43, 0x3d, 0x43, 0x3d, 0x33,
-    0x3d, 0x48, 0x42, 0x3d, 0x45, 0x46, 0x3d, 0x35, 0x32, 0x44, 0x42, 0x37,
-    0x3d, 0x40, 0x3c, 0x47, 0x4a, 0x45, 0x47, 0x2f, 0x33, 0x36, 0x3f, 0x42,
-    0x38, 0x43, 0x3e, 0x3a, 0x41, 0x3f, 0x5f, 0x3f, 0x48, 0x3a, 0x44, 0x47,
-    0x41, 0x3e, 0x57, 0x42, 0x41, 0x33, 0x34, 0x39, 0x42, 0x44, 0x42, 0x3c,
-    0x49, 0x34, 0x37, 0x33, 0x47, 0x38, 0x43, 0x3d, 0x43, 0x3e, 0x3e, 0x36,
-    0x41, 0x41, 0x37, 0x40, 0x39, 0x3e, 0x3b, 0x3b, 0x3e, 0x41, 0x3d, 0x3b,
-    0x43, 0x3e, 0x39, 0x43, 0x2f, 0x3e, 0x33, 0x40, 0x45, 0x47, 0x30, 0x46,
-    0x3f, 0x3f, 0x37, 0x42, 0x3d, 0x42, 0x43, 0x37, 0x38, 0x3c, 0x35, 0x34,
-    0x41, 0x43, 0x3e, 0x3e, 0x3f, 0x49, 0x35, 0x35, 0x38, 0x36, 0x3a, 0x43,
-    0x38, 0x46, 0x48, 0x36, 0x3f, 0x39, 0x3b, 0x3e, 0x48, 0x47, 0x41, 0x34,
-    0x3b, 0x3c, 0x37, 0x3e, 0x40, 0x41, 0x3b, 0x3d, 0x43, 0x42, 0x3a, 0x39,
-    0x3b, 0x43, 0x38, 0x2b, 0x43, 0x41, 0x48, 0x35, 0x44, 0x44, 0x3e, 0x2c,
-    0x46, 0x40, 0x3e, 0x41, 0x38, 0x34, 0x35, 0x37, 0x34, 0x3f, 0x3d, 0x46,
-    0x33, 0x3c, 0x3c, 0x2e, 0x3b, 0x45, 0x3d, 0x3e, 0x3a, 0x42, 0x3c, 0x36,
-    0x3a, 0x42, 0x39, 0x43, 0x35, 0x39, 0x40, 0x44, 0x47, 0x41, 0x44, 0x3d,
-    0x41, 0x3e, 0x38, 0x39, 0x45, 0x3a, 0x35, 0x43, 0x3f, 0x44, 0x41, 0x49,
-    0x47, 0x3f, 0x44, 0x40, 0x38, 0x43, 0x40, 0x3e, 0x39, 0x42, 0x32, 0x3b,
-    0x42, 0x47, 0x57, 0x37, 0x36, 0x38, 0x43, 0x49, 0x3b, 0x34, 0x54, 0x42,
-    0x3d, 0x3f, 0x3e, 0x3b, 0x38, 0x41, 0x43, 0x3a, 0x44, 0x39, 0x34, 0x2c,
-    0x38, 0x43, 0x4b, 0x3f, 0x40, 0x3e, 0x32, 0x33, 0x3d, 0x44, 0x45, 0x44,
-    0x3e, 0x35, 0x37, 0x39, 0x40, 0x3e, 0x40, 0x3c, 0x34, 0x43, 0x37, 0x40,
-    0x39, 0x3e, 0x3d, 0x43, 0x3a, 0x44, 0x43, 0x44, 0x3d, 0x3b, 0x45, 0x3b,
-    0x3a, 0x3a, 0x3f, 0x37, 0x43, 0x3b, 0x33, 0x35, 0x40, 0x47, 0x3e, 0x3c,
-    0x39, 0x3c, 0x34, 0x29, 0x3c, 0x3e, 0x46, 0x3e, 0x3c, 0x38, 0x3f, 0x2d,
-    0x3d, 0x3d, 0x3f, 0x3f, 0x3d, 0x45, 0x3b, 0x32, 0x39, 0x3f, 0x41, 0x38,
-    0x36, 0x3e, 0x3a, 0x35, 0x40, 0x3f, 0x3b, 0x32, 0x3c, 0x39, 0x3e, 0x35,
-    0x3e, 0x45, 0x34, 0x38, 0x44, 0x39, 0x3f, 0x31, 0x34, 0x39, 0x3f, 0x38,
-    0x44, 0x42, 0x3f, 0x3b, 0x39, 0x3d, 0x39, 0x3b, 0x44, 0x46, 0x38, 0x3d,
-    0x45, 0x37, 0x40, 0x3a, 0x3a, 0x39, 0x35, 0x3c, 0x39, 0x40, 0x47, 0x3e,
-    0x38, 0x42, 0x41, 0x3b, 0x48, 0x3f, 0x3a, 0x3e, 0x3d, 0x3f, 0x32, 0x3b,
-    0x3f, 0x3d, 0x3e, 0x44, 0x43, 0x41, 0x44, 0x47, 0x48, 0x41, 0x41, 0x36,
-    0x3a, 0x33, 0x3c, 0x3c, 0x37, 0x3e, 0x40, 0x34, 0x3f, 0x42, 0x53, 0x40,
-    0x3f, 0x35, 0x3e, 0x46, 0x3a, 0x3e, 0x4b, 0x41, 0x46, 0x32, 0x39, 0x36,
-    0x3b, 0x4f, 0x36, 0x3c, 0x40, 0x3a, 0x40, 0x40, 0x47, 0x3e, 0x49, 0x37,
-    0x3f, 0x31, 0x3e, 0x40, 0x3b, 0x3f, 0x43, 0x44, 0x3a, 0x3d, 0x31, 0x41,
-    0x41, 0x33, 0x43, 0x40, 0x3c, 0x3a, 0x41, 0x40, 0x37, 0x3f, 0x34, 0x3e,
-    0x44, 0x42, 0x3d, 0x3f, 0x3f, 0x34, 0x36, 0x34, 0x31, 0x41, 0x32, 0x39,
-    0x3e, 0x3d, 0x42, 0x35, 0x3e, 0x3a, 0x41, 0x47, 0x3d, 0x42, 0x33, 0x32,
-    0x43, 0x42, 0x36, 0x41, 0x3e, 0x39, 0x46, 0x39, 0x35, 0x3d, 0x3d, 0x40,
-    0x38, 0x44, 0x3d, 0x31, 0x44, 0x39, 0x3a, 0x45, 0x42, 0x41, 0x3d, 0x36,
-    0x3f, 0x3c, 0x39, 0x3d, 0x32, 0x39, 0x42, 0x34, 0x3f, 0x38, 0x44, 0x3c,
-    0x43, 0x45, 0x41, 0x2d, 0x44, 0x42, 0x3d, 0x3f, 0x44, 0x38, 0x3d, 0x35,
-    0x3a, 0x48, 0x40, 0x3b, 0x3d, 0x36, 0x3b, 0x40, 0x3f, 0x3a, 0x3a, 0x3f,
-    0x3c, 0x33, 0x39, 0x3c, 0x3c, 0x38, 0x47, 0x36, 0x3d, 0x41, 0x46, 0x41,
-    0x34, 0x46, 0x48, 0x46, 0x3d, 0x3c, 0x40, 0x43, 0x3d, 0x41, 0x37, 0x3e,
-    0x39, 0x47, 0x3f, 0x39, 0x46, 0x43, 0x3f, 0x41, 0x45, 0x37, 0x40, 0x3a,
-    0x3d, 0x44, 0x3f, 0x3b, 0x3b, 0x40, 0x4f, 0x3d, 0x3d, 0x41, 0x3c, 0x43,
-    0x3e, 0x46, 0x4e, 0x40, 0x3f, 0x34, 0x48, 0x29, 0x45, 0x44, 0x46, 0x41,
-    0x45, 0x32, 0x3e, 0x38, 0x39, 0x3a, 0x3e, 0x3e, 0x4c, 0x34, 0x3c, 0x40,
-    0x4a, 0x44, 0x3d, 0x46, 0x3b, 0x3e, 0x42, 0x42, 0x3a, 0x41, 0x43, 0x41,
-    0x39, 0x3f, 0x3e, 0x3c, 0x36, 0x48, 0x3f, 0x3e, 0x3e, 0x37, 0x3f, 0x3f,
-    0x3b, 0x40, 0x3e, 0x35, 0x32, 0x35, 0x3f, 0x33, 0x3f, 0x38, 0x43, 0x37,
-    0x49, 0x38, 0x37, 0x3c, 0x3c, 0x40, 0x40, 0x3a, 0x3a, 0x46, 0x37, 0x34,
-    0x34, 0x3b, 0x3d, 0x2f, 0x3a, 0x38, 0x3d, 0x46, 0x3d, 0x3b, 0x3d, 0x38,
-    0x35, 0x37, 0x44, 0x3c, 0x3d, 0x3e, 0x40, 0x3a, 0x40, 0x33, 0x3e, 0x38,
-    0x40, 0x3e, 0x45, 0x37, 0x3f, 0x3b, 0x3c, 0x40, 0x3b, 0x3c, 0x3b, 0x33,
-    0x41, 0x3f, 0x3b, 0x42, 0x31, 0x3b, 0x3a, 0x39, 0x3d, 0x41, 0x39, 0x40,
-    0x43, 0x45, 0x39, 0x3b, 0x3a, 0x42, 0x43, 0x3d, 0x3f, 0x40, 0x47, 0x39,
-    0x37, 0x3f, 0x47, 0x3f, 0x45, 0x41, 0x39, 0x3a, 0x41, 0x38, 0x3c, 0x3c,
-    0x39, 0x40, 0x39, 0x3b, 0x3b, 0x3e, 0x38, 0x3b, 0x37, 0x48, 0x41, 0x3f,
-    0x3e, 0x37, 0x3d, 0x44, 0x3c, 0x3e, 0x40, 0x39, 0x41, 0x42, 0x3d, 0x45,
-    0x3b, 0x3e, 0x4c, 0x3b, 0x3a, 0x3a, 0x3e, 0x47, 0x3c, 0x3f, 0x48, 0x3f,
-    0x46, 0x3f, 0x39, 0x25, 0x44, 0x3a, 0x3b, 0x40, 0x41, 0x39, 0x39, 0x47,
-    0x3b, 0x32, 0x49, 0x42, 0x41, 0x3a, 0x43, 0x41, 0x3e, 0x35, 0x37, 0x3d,
-    0x49, 0x40, 0x45, 0x3b, 0x3c, 0x38, 0x48, 0x3c, 0x3c, 0x35, 0x3f, 0x41,
-    0x41, 0x4c, 0x36, 0x39, 0x37, 0x3d, 0x3b, 0x3e, 0x44, 0x32, 0x3d, 0x3f,
-    0x3a, 0x3b, 0x3a, 0x47, 0x38, 0x42, 0x36, 0x34, 0x43, 0x3f, 0x3e, 0x40,
-    0x34, 0x31, 0x36, 0x33, 0x42, 0x37, 0x41, 0x41, 0x40, 0x3d, 0x3d, 0x37,
-    0x43, 0x3a, 0x3e, 0x44, 0x43, 0x3c, 0x35, 0x38, 0x38, 0x3c, 0x43, 0x36,
-    0x3a, 0x38, 0x40, 0x3f, 0x3d, 0x3e, 0x37, 0x3b, 0x41, 0x3a, 0x3b, 0x3d,
-    0x3c, 0x41, 0x3c, 0x41, 0x47, 0x3f, 0x3f, 0x3b, 0x3d, 0x3f, 0x3b, 0x45,
-    0x38, 0x38, 0x40, 0x38, 0x46, 0x42, 0x39, 0x3d, 0x3d, 0x3b, 0x42, 0x36,
-    0x42, 0x41, 0x3e, 0x3e, 0x36, 0x3f, 0x37, 0x3f, 0x36, 0x48, 0x3b, 0x39,
-    0x3d, 0x3f, 0x43, 0x3e, 0x3c, 0x40, 0x48, 0x46, 0x43, 0x36, 0x42, 0x39,
-    0x46, 0x3c, 0x37, 0x38, 0x49, 0x37, 0x36, 0x39, 0x3e, 0x42, 0x48, 0x3a,
-    0x3c, 0x3e, 0x42, 0x30, 0x3e, 0x34, 0x39, 0x3b, 0x46, 0x61, 0x46, 0x1e,
-    0x4c, 0x3b, 0x40, 0x2d, 0x3c, 0x42, 0x32, 0x30, 0x49, 0x3e, 0x39, 0x34,
-    0x30, 0x40, 0x31, 0x38, 0x40, 0x3d, 0x3c, 0x35, 0x3a, 0x36, 0x40, 0x3b,
-    0x41, 0x40, 0x3b, 0x39, 0x37, 0x37, 0x3f, 0x3b, 0x3c, 0x3a, 0x40, 0x3a,
-    0x36, 0x3c, 0x42, 0x39, 0x3e, 0x36, 0x40, 0x42, 0x39, 0x40, 0x3b, 0x34,
-    0x37, 0x33, 0x36, 0x3f, 0x43, 0x33, 0x33, 0x27, 0x3d, 0x46, 0x40, 0x31,
-    0x38, 0x3e, 0x41, 0x20, 0x3f, 0x39, 0x42, 0x35, 0x35, 0x45, 0x40, 0x1e,
-    0x32, 0x35, 0x32, 0x3c, 0x35, 0x44, 0x46, 0x29, 0x3a, 0x3d, 0x37, 0x42,
-    0x3b, 0x45, 0x3a, 0x26, 0x38, 0x40, 0x30, 0x37, 0x41, 0x40, 0x39, 0x2b,
-    0x49, 0x3f, 0x43, 0x43, 0x40, 0x3a, 0x38, 0x29, 0x43, 0x3a, 0x37, 0x40,
-    0x3f, 0x35, 0x3a, 0x28, 0x36, 0x3e, 0x3f, 0x43, 0x3c, 0x39, 0x42, 0x2c,
-    0x38, 0x42, 0x38, 0x3d, 0x42, 0x38, 0x35, 0x2d, 0x34, 0x38, 0x3d, 0x43,
-    0x46, 0x3e, 0x3c, 0x27, 0x3e, 0x40, 0x46, 0x39, 0x35, 0x3d, 0x42, 0x35,
-    0x42, 0x36, 0x40, 0x3e, 0x3a, 0x3e, 0x3c, 0x37, 0x3a, 0x3c, 0x48, 0x48,
-    0x48, 0x37, 0x3d, 0x38, 0x4b, 0x40, 0x43, 0x3b, 0x41, 0x46, 0x3c, 0x34,
-    0x46, 0x3c, 0x3c, 0x3c, 0x4b, 0x64, 0x4a, 0x22, 0x52, 0x41, 0x42, 0x3b,
-    0x42, 0x4a, 0x34, 0x37, 0x4b, 0x44, 0x3b, 0x4a, 0x38, 0x3f, 0x38, 0x3a,
-    0x40, 0x41, 0x42, 0x3c, 0x33, 0x3e, 0x3c, 0x42, 0x2c, 0x4e, 0x47, 0x3f,
-    0x38, 0x33, 0x39, 0x3f, 0x3b, 0x45, 0x37, 0x3a, 0x42, 0x42, 0x44, 0x3f,
-    0x3c, 0x3c, 0x3e, 0x3d, 0x3c, 0x3c, 0x40, 0x2c, 0x3c, 0x3d, 0x42, 0x39,
-    0x3a, 0x37, 0x43, 0x2a, 0x3d, 0x40, 0x41, 0x41, 0x46, 0x46, 0x42, 0x28,
-    0x39, 0x3c, 0x37, 0x44, 0x46, 0x41, 0x47, 0x2b, 0x44, 0x33, 0x39, 0x3f,
-    0x3f, 0x43, 0x3d, 0x23, 0x3a, 0x43, 0x41, 0x3b, 0x41, 0x42, 0x33, 0x1f,
-    0x43, 0x3e, 0x3d, 0x40, 0x37, 0x33, 0x42, 0x28, 0x3b, 0x38, 0x37, 0x3c,
-    0x34, 0x40, 0x44, 0x2a, 0x3c, 0x3a, 0x41, 0x37, 0x45, 0x3f, 0x3e, 0x26,
-    0x41, 0x40, 0x35, 0x3d, 0x45, 0x3e, 0x3d, 0x29, 0x3c, 0x39, 0x3f, 0x3c,
-    0x3d, 0x39, 0x38, 0x2d, 0x39, 0x38, 0x38, 0x44, 0x3c, 0x3e, 0x38, 0x26,
-    0x40, 0x36, 0x39, 0x38, 0x3f, 0x32, 0x39, 0x35, 0x3d, 0x3e, 0x35, 0x3a,
-    0x3f, 0x3f, 0x31, 0x35, 0x34, 0x45, 0x3e, 0x43, 0x48, 0x3b, 0x37, 0x39,
-    0x4d, 0x46, 0x54, 0x40, 0x41, 0x4e, 0x3d, 0x38, 0x4d, 0x38, 0x3a, 0x3b,
-    0x49, 0x5a, 0x4a, 0x1e, 0x5e, 0x39, 0x38, 0x37, 0x3a, 0x51, 0x3a, 0x3c,
-    0x50, 0x3f, 0x40, 0x42, 0x33, 0x3b, 0x2e, 0x4a, 0x3f, 0x4a, 0x3b, 0x43,
-    0x36, 0x3e, 0x3d, 0x42, 0x39, 0x46, 0x4b, 0x3c, 0x3b, 0x3b, 0x35, 0x3e,
-    0x3d, 0x4b, 0x3f, 0x41, 0x3f, 0x3b, 0x42, 0x42, 0x38, 0x3a, 0x41, 0x3d,
-    0x36, 0x41, 0x37, 0x2f, 0x38, 0x37, 0x3f, 0x34, 0x35, 0x35, 0x45, 0x30,
-    0x31, 0x42, 0x31, 0x3a, 0x3a, 0x3e, 0x3d, 0x23, 0x3f, 0x43, 0x3b, 0x41,
-    0x35, 0x3b, 0x40, 0x25, 0x45, 0x3e, 0x42, 0x3b, 0x31, 0x40, 0x36, 0x28,
-    0x43, 0x42, 0x30, 0x42, 0x32, 0x32, 0x36, 0x2c, 0x35, 0x3a, 0x3d, 0x3a,
-    0x3c, 0x36, 0x3e, 0x30, 0x41, 0x42, 0x38, 0x41, 0x41, 0x3e, 0x3c, 0x23,
-    0x37, 0x40, 0x3c, 0x3e, 0x3e, 0x3a, 0x37, 0x2b, 0x36, 0x40, 0x41, 0x42,
-    0x3e, 0x38, 0x44, 0x22, 0x46, 0x38, 0x33, 0x3b, 0x3a, 0x3a, 0x3a, 0x24,
-    0x36, 0x3b, 0x38, 0x44, 0x34, 0x38, 0x40, 0x28, 0x38, 0x3d, 0x36, 0x44,
-    0x31, 0x3e, 0x37, 0x37, 0x36, 0x3f, 0x47, 0x38, 0x3b, 0x3e, 0x2c, 0x4c,
-    0x36, 0x3c, 0x3b, 0x41, 0x4c, 0x3d, 0x3d, 0x40, 0x49, 0x44, 0x52, 0x3f,
-    0x3b, 0x4d, 0x3c, 0x3a, 0x4f, 0x3b, 0x36, 0x3b, 0x4a, 0x5f, 0x4e, 0x1f,
-    0x57, 0x3c, 0x3d, 0x3d, 0x46, 0x59, 0x42, 0x45, 0x52, 0x3d, 0x3a, 0x41,
-    0x31, 0x39, 0x39, 0x4f, 0x43, 0x4e, 0x3e, 0x37, 0x3a, 0x37, 0x33, 0x47,
-    0x32, 0x45, 0x47, 0x43, 0x31, 0x33, 0x38, 0x43, 0x3e, 0x47, 0x3d, 0x32,
-    0x3b, 0x39, 0x3c, 0x42, 0x3d, 0x47, 0x42, 0x40, 0x3d, 0x3f, 0x3c, 0x34,
-    0x3b, 0x3e, 0x42, 0x3d, 0x43, 0x35, 0x42, 0x2c, 0x35, 0x3d, 0x3c, 0x3d,
-    0x3a, 0x3c, 0x46, 0x25, 0x43, 0x35, 0x3d, 0x39, 0x3a, 0x3c, 0x40, 0x2b,
-    0x33, 0x40, 0x3d, 0x46, 0x45, 0x37, 0x3c, 0x36, 0x43, 0x37, 0x3e, 0x3a,
-    0x3c, 0x47, 0x3f, 0x38, 0x36, 0x3e, 0x3a, 0x42, 0x3c, 0x42, 0x33, 0x39,
-    0x3c, 0x3a, 0x3c, 0x40, 0x48, 0x3b, 0x40, 0x32, 0x37, 0x47, 0x34, 0x38,
-    0x33, 0x3d, 0x49, 0x2d, 0x36, 0x42, 0x3d, 0x3e, 0x47, 0x3c, 0x42, 0x2c,
-    0x3b, 0x31, 0x3f, 0x3c, 0x3d, 0x3c, 0x3f, 0x2b, 0x41, 0x35, 0x33, 0x43,
-    0x47, 0x39, 0x34, 0x2a, 0x3a, 0x3a, 0x40, 0x3d, 0x44, 0x3c, 0x39, 0x34,
-    0x43, 0x40, 0x33, 0x3a, 0x3b, 0x42, 0x38, 0x3b, 0x34, 0x35, 0x40, 0x43,
-    0x4b, 0x41, 0x3d, 0x38, 0x49, 0x44, 0x4d, 0x37, 0x3a, 0x4b, 0x40, 0x39,
-    0x4e, 0x3b, 0x30, 0x38, 0x47, 0x5d, 0x50, 0x1f, 0x54, 0x35, 0x3a, 0x39,
-    0x40, 0x4c, 0x46, 0x42, 0x52, 0x39, 0x39, 0x45, 0x41, 0x3c, 0x30, 0x5b,
-    0x43, 0x4d, 0x4a, 0x3e, 0x31, 0x39, 0x41, 0x4c, 0x36, 0x44, 0x4c, 0x39,
-    0x32, 0x41, 0x47, 0x3e, 0x34, 0x49, 0x45, 0x3b, 0x34, 0x3a, 0x3b, 0x47,
-    0x43, 0x3e, 0x43, 0x32, 0x40, 0x3e, 0x3e, 0x38, 0x37, 0x3e, 0x37, 0x3a,
-    0x3a, 0x40, 0x48, 0x2f, 0x3e, 0x3e, 0x46, 0x3a, 0x3e, 0x35, 0x49, 0x30,
-    0x3a, 0x41, 0x3e, 0x39, 0x34, 0x45, 0x3d, 0x34, 0x48, 0x43, 0x43, 0x42,
-    0x33, 0x39, 0x3b, 0x3f, 0x30, 0x46, 0x41, 0x39, 0x48, 0x3a, 0x3c, 0x3e,
-    0x3f, 0x36, 0x40, 0x3d, 0x43, 0x40, 0x3e, 0x39, 0x44, 0x40, 0x44, 0x3b,
-    0x43, 0x42, 0x39, 0x38, 0x3a, 0x3f, 0x3b, 0x3f, 0x38, 0x3d, 0x34, 0x30,
-    0x34, 0x3d, 0x3f, 0x42, 0x44, 0x3e, 0x34, 0x32, 0x37, 0x46, 0x44, 0x38,
-    0x3c, 0x45, 0x39, 0x2b, 0x41, 0x3c, 0x40, 0x40, 0x3a, 0x3a, 0x3c, 0x32,
-    0x45, 0x42, 0x3d, 0x46, 0x38, 0x3b, 0x34, 0x35, 0x38, 0x43, 0x3d, 0x34,
-    0x42, 0x3b, 0x38, 0x3d, 0x37, 0x43, 0x3f, 0x39, 0x4e, 0x39, 0x40, 0x3f,
-    0x4d, 0x43, 0x49, 0x3f, 0x36, 0x41, 0x44, 0x39, 0x48, 0x3a, 0x35, 0x39,
-    0x48, 0x59, 0x4e, 0x25, 0x58, 0x39, 0x42, 0x35, 0x43, 0x4e, 0x42, 0x3f,
-    0x4a, 0x43, 0x3b, 0x3f, 0x3b, 0x37, 0x2b, 0x5a, 0x3d, 0x44, 0x3b, 0x40,
-    0x31, 0x38, 0x37, 0x44, 0x32, 0x3e, 0x41, 0x3d, 0x2c, 0x42, 0x42, 0x3c,
-    0x37, 0x45, 0x41, 0x41, 0x3d, 0x39, 0x41, 0x40, 0x3a, 0x46, 0x41, 0x40,
-    0x40, 0x3d, 0x38, 0x31, 0x37, 0x3f, 0x42, 0x38, 0x3f, 0x3c, 0x48, 0x30,
-    0x3e, 0x39, 0x3f, 0x3d, 0x3d, 0x44, 0x52, 0x35, 0x3b, 0x32, 0x42, 0x32,
-    0x3a, 0x43, 0x39, 0x3b, 0x31, 0x43, 0x36, 0x3c, 0x3c, 0x3c, 0x41, 0x45,
-    0x42, 0x49, 0x41, 0x3b, 0x42, 0x3e, 0x41, 0x44, 0x36, 0x41, 0x3f, 0x3c,
-    0x3e, 0x47, 0x45, 0x41, 0x38, 0x41, 0x3f, 0x43, 0x35, 0x32, 0x41, 0x39,
-    0x36, 0x47, 0x35, 0x42, 0x44, 0x3b, 0x3f, 0x34, 0x48, 0x41, 0x43, 0x42,
-    0x36, 0x3e, 0x3c, 0x3d, 0x3d, 0x3b, 0x42, 0x44, 0x3a, 0x44, 0x36, 0x2a,
-    0x41, 0x39, 0x3a, 0x41, 0x46, 0x3c, 0x44, 0x2f, 0x36, 0x39, 0x3b, 0x3f,
-    0x38, 0x45, 0x3c, 0x3c, 0x3e, 0x41, 0x3c, 0x39, 0x3e, 0x40, 0x2f, 0x45,
-    0x3b, 0x41, 0x40, 0x3c, 0x4e, 0x38, 0x3e, 0x48, 0x46, 0x40, 0x48, 0x44,
-    0x40, 0x4a, 0x45, 0x3c, 0x4f, 0x39, 0x37, 0x3a, 0x4e, 0x59, 0x5c, 0x22,
-    0x58, 0x32, 0x38, 0x34, 0x40, 0x4b, 0x43, 0x43, 0x4f, 0x3e, 0x39, 0x40,
-    0x37, 0x3e, 0x2f, 0x55, 0x3f, 0x40, 0x38, 0x3f, 0x3a, 0x33, 0x37, 0x3d,
-    0x34, 0x4c, 0x37, 0x3f, 0x32, 0x39, 0x45, 0x34, 0x44, 0x4c, 0x3f, 0x3b,
-    0x3c, 0x36, 0x36, 0x43, 0x36, 0x47, 0x41, 0x46, 0x41, 0x3e, 0x41, 0x3a,
-    0x43, 0x3a, 0x48, 0x42, 0x42, 0x3e, 0x4c, 0x36, 0x3d, 0x39, 0x43, 0x46,
-    0x3d, 0x42, 0x42, 0x3b, 0x45, 0x43, 0x3c, 0x40, 0x39, 0x37, 0x34, 0x45,
-    0x3f, 0x40, 0x34, 0x38, 0x43, 0x3f, 0x36, 0x47, 0x3f, 0x3b, 0x49, 0x3c,
-    0x3a, 0x3a, 0x42, 0x4c, 0x37, 0x3e, 0x3b, 0x32, 0x47, 0x40, 0x45, 0x4d,
-    0x39, 0x3b, 0x39, 0x40, 0x3e, 0x3c, 0x3d, 0x3a, 0x3d, 0x3b, 0x3e, 0x43,
-    0x3e, 0x3f, 0x3a, 0x3c, 0x41, 0x40, 0x39, 0x3c, 0x3a, 0x38, 0x39, 0x37,
-    0x36, 0x33, 0x43, 0x45, 0x3f, 0x45, 0x41, 0x30, 0x3b, 0x34, 0x3c, 0x39,
-    0x3b, 0x45, 0x37, 0x2e, 0x36, 0x34, 0x36, 0x44, 0x3d, 0x40, 0x3a, 0x3c,
-    0x3d, 0x3b, 0x38, 0x41, 0x42, 0x3a, 0x32, 0x4b, 0x38, 0x3e, 0x41, 0x46,
-    0x57, 0x3a, 0x44, 0x48, 0x47, 0x45, 0x47, 0x3e, 0x43, 0x42, 0x45, 0x3b,
-    0x50, 0x39, 0x37, 0x3f, 0x47, 0x51, 0x5e, 0x22, 0x59, 0x33, 0x3c, 0x37,
-    0x43, 0x50, 0x49, 0x47, 0x46, 0x42, 0x39, 0x44, 0x44, 0x3d, 0x2f, 0x53,
-    0x35, 0x41, 0x40, 0x3d, 0x2d, 0x35, 0x2f, 0x3e, 0x3f, 0x37, 0x38, 0x3e,
-    0x30, 0x45, 0x46, 0x38, 0x33, 0x3c, 0x3e, 0x3b, 0x44, 0x42, 0x47, 0x49,
-    0x43, 0x40, 0x3d, 0x3c, 0x38, 0x43, 0x3e, 0x38, 0x3d, 0x40, 0x36, 0x43,
-    0x43, 0x3e, 0x40, 0x3c, 0x44, 0x47, 0x43, 0x3d, 0x41, 0x39, 0x3e, 0x45,
-    0x39, 0x3d, 0x39, 0x40, 0x42, 0x40, 0x3b, 0x4a, 0x40, 0x41, 0x3f, 0x37,
-    0x43, 0x41, 0x37, 0x4c, 0x3f, 0x3d, 0x38, 0x3a, 0x42, 0x46, 0x43, 0x4d,
-    0x3c, 0x3a, 0x43, 0x3e, 0x3b, 0x3d, 0x46, 0x4a, 0x38, 0x3d, 0x3d, 0x39,
-    0x3e, 0x3c, 0x3b, 0x3e, 0x3a, 0x40, 0x40, 0x34, 0x41, 0x3f, 0x3e, 0x3f,
-    0x47, 0x3c, 0x32, 0x3a, 0x3c, 0x44, 0x3f, 0x42, 0x41, 0x43, 0x3e, 0x3a,
-    0x3b, 0x42, 0x41, 0x39, 0x39, 0x37, 0x39, 0x3e, 0x3d, 0x33, 0x3e, 0x35,
-    0x44, 0x37, 0x40, 0x35, 0x3f, 0x47, 0x37, 0x41, 0x35, 0x38, 0x47, 0x40,
-    0x43, 0x44, 0x2e, 0x48, 0x35, 0x44, 0x41, 0x3c, 0x47, 0x3d, 0x3d, 0x52,
-    0x48, 0x41, 0x44, 0x41, 0x42, 0x4b, 0x3e, 0x3d, 0x4e, 0x32, 0x34, 0x47,
-    0x55, 0x57, 0x5f, 0x22, 0x57, 0x33, 0x40, 0x37, 0x40, 0x4a, 0x4d, 0x47,
-    0x48, 0x38, 0x3e, 0x46, 0x37, 0x42, 0x28, 0x57, 0x38, 0x42, 0x36, 0x43,
-    0x35, 0x37, 0x39, 0x39, 0x42, 0x39, 0x38, 0x3c, 0x35, 0x3c, 0x3c, 0x3a,
-    0x3c, 0x4c, 0x45, 0x3f, 0x43, 0x3d, 0x45, 0x45, 0x40, 0x47, 0x3e, 0x3e,
-    0x3d, 0x4b, 0x49, 0x35, 0x43, 0x3c, 0x36, 0x46, 0x3c, 0x46, 0x42, 0x44,
-    0x3c, 0x42, 0x3d, 0x42, 0x44, 0x3c, 0x4a, 0x40, 0x40, 0x3c, 0x3b, 0x3c,
-    0x35, 0x34, 0x2e, 0x46, 0x38, 0x3d, 0x38, 0x44, 0x41, 0x40, 0x3c, 0x52,
-    0x3b, 0x3d, 0x3b, 0x3f, 0x42, 0x47, 0x44, 0x52, 0x44, 0x44, 0x39, 0x3f,
-    0x43, 0x35, 0x3c, 0x4d, 0x39, 0x3d, 0x3b, 0x37, 0x3e, 0x38, 0x3e, 0x49,
-    0x3a, 0x37, 0x3c, 0x49, 0x40, 0x41, 0x3c, 0x40, 0x3d, 0x38, 0x39, 0x3f,
-    0x44, 0x3e, 0x42, 0x3e, 0x47, 0x40, 0x34, 0x46, 0x48, 0x37, 0x45, 0x3e,
-    0x46, 0x3f, 0x35, 0x39, 0x38, 0x3f, 0x36, 0x2c, 0x40, 0x38, 0x3e, 0x3c,
-    0x32, 0x3c, 0x46, 0x3a, 0x3f, 0x41, 0x36, 0x49, 0x42, 0x38, 0x36, 0x43,
-    0x3d, 0x41, 0x46, 0x35, 0x4f, 0x3a, 0x41, 0x5c, 0x4a, 0x42, 0x4e, 0x42,
-    0x46, 0x54, 0x3f, 0x45, 0x4c, 0x30, 0x33, 0x44, 0x56, 0x5d, 0x68, 0x26,
-    0x60, 0x33, 0x3e, 0x3a, 0x42, 0x49, 0x52, 0x47, 0x51, 0x46, 0x40, 0x47,
-    0x41, 0x3b, 0x1b, 0x4f, 0x3c, 0x45, 0x3d, 0x3d, 0x32, 0x2f, 0x3e, 0x3c,
-    0x3c, 0x3f, 0x3b, 0x3c, 0x2c, 0x3a, 0x41, 0x3c, 0x35, 0x3e, 0x3e, 0x3c,
-    0x3d, 0x3f, 0x3e, 0x40, 0x40, 0x44, 0x42, 0x3c, 0x3c, 0x3c, 0x41, 0x3c,
-    0x3c, 0x3d, 0x3e, 0x3d, 0x3c, 0x3d, 0x4a, 0x46, 0x3f, 0x35, 0x33, 0x43,
-    0x42, 0x41, 0x4d, 0x48, 0x48, 0x44, 0x3e, 0x41, 0x41, 0x36, 0x3c, 0x4c,
-    0x34, 0x47, 0x42, 0x39, 0x3e, 0x43, 0x3a, 0x53, 0x3b, 0x3b, 0x42, 0x3d,
-    0x41, 0x3c, 0x3e, 0x52, 0x3a, 0x44, 0x34, 0x43, 0x3d, 0x3d, 0x3a, 0x50,
-    0x3e, 0x33, 0x41, 0x40, 0x3f, 0x38, 0x43, 0x42, 0x3b, 0x37, 0x3e, 0x43,
-    0x3f, 0x3c, 0x41, 0x49, 0x40, 0x32, 0x40, 0x3e, 0x3b, 0x3e, 0x44, 0x3c,
-    0x35, 0x37, 0x3d, 0x41, 0x34, 0x3f, 0x3a, 0x3c, 0x47, 0x32, 0x41, 0x3d,
-    0x3c, 0x3a, 0x4a, 0x31, 0x43, 0x38, 0x45, 0x37, 0x49, 0x3c, 0x34, 0x3f,
-    0x3d, 0x3d, 0x3d, 0x45, 0x47, 0x3e, 0x37, 0x48, 0x40, 0x3b, 0x45, 0x3d,
-    0x4e, 0x42, 0x3f, 0x57, 0x4b, 0x43, 0x4b, 0x3d, 0x3f, 0x47, 0x4a, 0x43,
-    0x4e, 0x30, 0x38, 0x45, 0x59, 0x60, 0x64, 0x2d, 0x5a, 0x2d, 0x34, 0x35,
-    0x47, 0x54, 0x4e, 0x3f, 0x44, 0x45, 0x3c, 0x43, 0x3d, 0x40, 0x1c, 0x5a,
-    0x36, 0x3f, 0x3a, 0x39, 0x37, 0x3c, 0x32, 0x3b, 0x2d, 0x4a, 0x42, 0x35,
-    0x30, 0x41, 0x43, 0x3d, 0x3d, 0x45, 0x38, 0x36, 0x3e, 0x40, 0x3a, 0x4a,
-    0x34, 0x3d, 0x44, 0x3c, 0x39, 0x3b, 0x52, 0x38, 0x40, 0x3b, 0x3f, 0x3f,
-    0x35, 0x37, 0x46, 0x48, 0x38, 0x3b, 0x40, 0x36, 0x3d, 0x3a, 0x4f, 0x45,
-    0x35, 0x3a, 0x35, 0x33, 0x37, 0x43, 0x42, 0x52, 0x37, 0x3b, 0x3d, 0x42,
-    0x44, 0x3d, 0x48, 0x58, 0x33, 0x3f, 0x41, 0x44, 0x44, 0x3f, 0x3b, 0x52,
-    0x47, 0x39, 0x32, 0x3b, 0x38, 0x35, 0x48, 0x50, 0x34, 0x30, 0x39, 0x43,
-    0x42, 0x40, 0x3b, 0x4b, 0x43, 0x3d, 0x34, 0x44, 0x33, 0x39, 0x44, 0x4b,
-    0x45, 0x3e, 0x3c, 0x3f, 0x3a, 0x3e, 0x3c, 0x45, 0x36, 0x3e, 0x3d, 0x40,
-    0x43, 0x46, 0x37, 0x3d, 0x3b, 0x42, 0x43, 0x3f, 0x3a, 0x41, 0x48, 0x2f,
-    0x3e, 0x39, 0x3a, 0x39, 0x3f, 0x3a, 0x41, 0x40, 0x40, 0x3c, 0x3b, 0x3b,
-    0x3f, 0x40, 0x3e, 0x42, 0x38, 0x3f, 0x38, 0x3c, 0x49, 0x45, 0x3f, 0x62,
-    0x55, 0x47, 0x4c, 0x3c, 0x3c, 0x4a, 0x4c, 0x46, 0x4f, 0x39, 0x3a, 0x3b,
-    0x5e, 0x58, 0x6f, 0x2b, 0x5a, 0x2f, 0x3a, 0x35, 0x4b, 0x47, 0x4a, 0x46,
-    0x45, 0x3e, 0x38, 0x4f, 0x3b, 0x3d, 0x21, 0x4b, 0x3d, 0x40, 0x37, 0x40,
-    0x2d, 0x2c, 0x43, 0x3f, 0x2b, 0x3e, 0x3d, 0x39, 0x2f, 0x39, 0x44, 0x3c,
-    0x39, 0x39, 0x43, 0x3b, 0x3d, 0x3b, 0x44, 0x39, 0x42, 0x42, 0x3e, 0x40,
-    0x3b, 0x42, 0x53, 0x40, 0x32, 0x3d, 0x35, 0x3f, 0x3d, 0x45, 0x48, 0x46,
-    0x3d, 0x43, 0x3c, 0x36, 0x35, 0x39, 0x3d, 0x4a, 0x39, 0x39, 0x3e, 0x41,
-    0x38, 0x36, 0x3b, 0x53, 0x3c, 0x36, 0x32, 0x3b, 0x43, 0x3d, 0x42, 0x57,
-    0x35, 0x2f, 0x38, 0x40, 0x2f, 0x3d, 0x3c, 0x4c, 0x40, 0x2f, 0x3a, 0x36,
-    0x39, 0x3c, 0x3a, 0x51, 0x3d, 0x37, 0x39, 0x3c, 0x42, 0x40, 0x43, 0x52,
-    0x3e, 0x42, 0x3e, 0x45, 0x36, 0x34, 0x42, 0x4b, 0x3a, 0x38, 0x37, 0x3f,
-    0x36, 0x41, 0x3a, 0x45, 0x3e, 0x38, 0x35, 0x41, 0x35, 0x34, 0x37, 0x3c,
-    0x3f, 0x31, 0x3c, 0x35, 0x33, 0x43, 0x36, 0x28, 0x44, 0x42, 0x3e, 0x42,
-    0x3a, 0x41, 0x43, 0x35, 0x3d, 0x3f, 0x40, 0x3e, 0x3d, 0x33, 0x31, 0x41,
-    0x3d, 0x40, 0x3b, 0x40, 0x51, 0x40, 0x3f, 0xfb, 0x51, 0x49, 0x4c, 0x3d,
-    0x44, 0x4e, 0x47, 0x42, 0x50, 0x39, 0x39, 0x40, 0x59, 0x5d, 0x70, 0x2c,
-    0x59, 0x39, 0x38, 0x2f, 0x46, 0x50, 0x51, 0x47, 0x4c, 0x3c, 0x39, 0x48,
-    0x44, 0x3a, 0x1a, 0x51, 0x35, 0x3e, 0x34, 0x3a, 0x3d, 0x2b, 0x41, 0x39,
-    0x37, 0x4d, 0x3e, 0x43, 0x38, 0x3b, 0x3a, 0x35, 0x36, 0x3a, 0x43, 0x39,
-    0x39, 0x3a, 0x46, 0x3b, 0x39, 0x3c, 0x46, 0x36, 0x3e, 0x3d, 0x4b, 0x3d,
-    0x3b, 0x46, 0x3a, 0x41, 0x31, 0x3c, 0x44, 0x4a, 0x37, 0x42, 0x39, 0x43,
-    0x43, 0x3e, 0x40, 0x47, 0x3c, 0x3e, 0x3b, 0x43, 0x34, 0x3a, 0x43, 0x53,
-    0x3f, 0x37, 0x39, 0x37, 0x3e, 0x3b, 0x46, 0x59, 0x37, 0x37, 0x33, 0x3d,
-    0x38, 0x42, 0x36, 0x58, 0x2e, 0x32, 0x2b, 0x45, 0x32, 0x33, 0x36, 0x50,
-    0x41, 0x3f, 0x37, 0x3d, 0x3f, 0x3d, 0x46, 0x49, 0x41, 0x38, 0x33, 0x3d,
-    0x33, 0x32, 0x3a, 0x49, 0x41, 0x41, 0x3d, 0x33, 0x3b, 0x3b, 0x3a, 0x46,
-    0x34, 0x44, 0x3f, 0x3b, 0x2f, 0x3f, 0x32, 0x3c, 0x3f, 0x43, 0x3e, 0x45,
-    0x3a, 0x3c, 0x43, 0x26, 0x46, 0x37, 0x38, 0x3e, 0x36, 0x31, 0x3e, 0x34,
-    0x39, 0x3a, 0x38, 0x42, 0x38, 0x3e, 0x32, 0x42, 0x37, 0x37, 0x3c, 0x3a,
-    0x48, 0x44, 0x3a, 0x68, 0x56, 0x46, 0x4d, 0x47, 0x40, 0x4e, 0x42, 0x46,
-    0x51, 0x40, 0x38, 0x43, 0x58, 0x5d, 0x6a, 0x31, 0x57, 0x32, 0x3c, 0x36,
-    0x49, 0x56, 0x52, 0x48, 0x4b, 0x41, 0x2f, 0x4d, 0x31, 0x43, 0x1b, 0x4c,
-    0x30, 0x44, 0x33, 0x36, 0x2c, 0x3d, 0x45, 0x3a, 0x35, 0x46, 0x3d, 0x39,
-    0x2e, 0x38, 0x3f, 0x37, 0x41, 0x44, 0x46, 0x31, 0x33, 0x46, 0x37, 0x37,
-    0x3f, 0x41, 0x45, 0x30, 0x46, 0x3b, 0x50, 0x3b, 0x40, 0x39, 0x42, 0x43,
-    0x35, 0x37, 0x40, 0x44, 0x3b, 0x41, 0x3d, 0x37, 0x3a, 0x41, 0x3d, 0x46,
-    0x36, 0x41, 0x38, 0x41, 0x38, 0x3d, 0x45, 0x58, 0x3d, 0x3a, 0x3d, 0x44,
-    0x45, 0x38, 0x48, 0x5c, 0x3d, 0x39, 0x43, 0x45, 0x41, 0x3e, 0x4a, 0x56,
-    0x40, 0x33, 0x30, 0x31, 0x42, 0x39, 0x38, 0x56, 0x30, 0x3a, 0x35, 0x3e,
-    0x3f, 0x38, 0x36, 0x47, 0x3c, 0x3a, 0x3d, 0x3f, 0x37, 0x35, 0x3b, 0x4d,
-    0x43, 0x36, 0x39, 0x37, 0x3e, 0x42, 0x3d, 0x3f, 0x40, 0x3f, 0x34, 0x3b,
-    0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a, 0x3a, 0x3c, 0x34, 0x3f, 0x3c, 0x2a,
-    0x49, 0x3b, 0x36, 0x3c, 0x35, 0x46, 0x38, 0x3b, 0x3c, 0x39, 0x38, 0x42,
-    0x39, 0x36, 0x2e, 0x4a, 0x3d, 0x39, 0x3f, 0x3f, 0x4b, 0x45, 0x3e, 0x67,
-    0x4b, 0x4b, 0x49, 0x3e, 0x3f, 0x53, 0x4c, 0x55, 0x47, 0x32, 0x3b, 0x39,
-    0x54, 0x5b, 0x6f, 0x29, 0x5a, 0x34, 0x3e, 0x26, 0x45, 0x52, 0x59, 0x44,
-    0x59, 0x39, 0x3c, 0x47, 0x36, 0x46, 0x16, 0x50, 0x32, 0x46, 0x34, 0x35,
-    0x35, 0x2d, 0x39, 0x38, 0x2c, 0x42, 0x43, 0x3b, 0x32, 0x3f, 0x37, 0x2f,
-    0x34, 0x43, 0x46, 0x3b, 0x3b, 0x41, 0x3c, 0x37, 0x3e, 0x43, 0x4b, 0x36,
-    0x3e, 0x3c, 0x4c, 0x42, 0x40, 0x3f, 0x49, 0x40, 0x3c, 0x40, 0x3c, 0x48,
-    0x35, 0x42, 0x3f, 0x42, 0x44, 0x40, 0x45, 0x4f, 0x3f, 0x3f, 0x40, 0x42,
-    0x3b, 0x3d, 0x49, 0x55, 0x42, 0x39, 0x41, 0x3b, 0x3f, 0x38, 0x44, 0x60,
-    0x34, 0x40, 0x3b, 0x3b, 0x35, 0x3d, 0x41, 0x4e, 0x35, 0x33, 0x30, 0x3a,
-    0x3a, 0x32, 0x42, 0x4f, 0x33, 0x34, 0x2f, 0x38, 0x49, 0x38, 0x40, 0x4c,
-    0x35, 0x38, 0x3e, 0x46, 0x3f, 0x3a, 0x3a, 0x45, 0x3b, 0x34, 0x2e, 0x39,
-    0x32, 0x3e, 0x40, 0x48, 0x35, 0x44, 0x3a, 0x34, 0x3f, 0x35, 0x3b, 0x32,
-    0x40, 0x43, 0x3e, 0x38, 0x3b, 0x43, 0x3c, 0x2b, 0x46, 0x43, 0x40, 0x32,
-    0x42, 0x3b, 0x49, 0x2e, 0x3b, 0x3a, 0x3e, 0x41, 0x3c, 0x3f, 0x31, 0x3b,
-    0x41, 0x33, 0x41, 0x3c, 0x4d, 0x40, 0x38, 0x68, 0x4c, 0x4c, 0x4e, 0x3f,
-    0x3f, 0x54, 0x4a, 0x3d, 0x4c, 0x33, 0x3b, 0x3a, 0x5d, 0x60, 0x71, 0x2b,
-    0x59, 0x33, 0x3c, 0x2c, 0x47, 0x52, 0x4f, 0x51, 0x56, 0x3d, 0x39, 0x44,
-    0x35, 0x41, 0x1b, 0x4a, 0x35, 0x41, 0x37, 0x35, 0x2c, 0x35, 0x37, 0x35,
-    0x38, 0x41, 0x38, 0x3e, 0x3c, 0x40, 0x3c, 0x2f, 0x38, 0x3e, 0x3f, 0x45,
-    0x40, 0x3d, 0x3c, 0x35, 0x3c, 0x46, 0x43, 0x39, 0x37, 0x42, 0x4e, 0x3c,
-    0x42, 0x46, 0x37, 0x33, 0x43, 0x3f, 0x47, 0x4a, 0x3d, 0x3e, 0x40, 0x40,
-    0x40, 0x3f, 0x4b, 0x54, 0x36, 0x3f, 0x37, 0x40, 0x39, 0x39, 0x47, 0x51,
-    0x3d, 0x39, 0x36, 0x36, 0x40, 0x40, 0x41, 0x5a, 0x38, 0x39, 0x42, 0x38,
-    0x40, 0x39, 0x43, 0x50, 0x3a, 0x3a, 0x32, 0x3c, 0x3c, 0x35, 0x44, 0x4a,
-    0x37, 0x35, 0x36, 0x3c, 0x35, 0x30, 0x48, 0x4b, 0x3c, 0x33, 0x37, 0x3e,
-    0x42, 0x3c, 0x42, 0x4e, 0x41, 0x32, 0x3e, 0x33, 0x49, 0x39, 0x3e, 0x42,
-    0x3d, 0x39, 0x37, 0x36, 0x35, 0x41, 0x3e, 0x37, 0x37, 0x3e, 0x3d, 0x38,
-    0x3a, 0x3c, 0x41, 0x29, 0x3c, 0x3b, 0x39, 0x40, 0x43, 0x3d, 0x3e, 0x33,
-    0x3f, 0x3f, 0x3e, 0x43, 0x43, 0x38, 0x38, 0x41, 0x3b, 0x38, 0x35, 0x3a,
-    0x4b, 0x44, 0x44, 0x55, 0x4e, 0x44, 0x4d, 0x49, 0x3e, 0x53, 0x45, 0x3f,
-    0x45, 0x3d, 0x36, 0x36, 0x4f, 0x5b, 0x6b, 0x28, 0x59, 0x34, 0x39, 0x34,
-    0x4f, 0x4d, 0x52, 0x3e, 0x51, 0x34, 0x35, 0x4a, 0x3b, 0x3f, 0x21, 0x45,
-    0x36, 0x3f, 0x38, 0x33, 0x2c, 0x37, 0x32, 0x2f, 0x2b, 0x44, 0x47, 0x3f,
-    0x38, 0x3a, 0x3f, 0x2e, 0x41, 0x3f, 0x3d, 0x41, 0x35, 0x48, 0x43, 0x40,
-    0x33, 0x44, 0x40, 0x38, 0x47, 0x44, 0x4c, 0x3d, 0x41, 0x3b, 0x39, 0x36,
-    0x3e, 0x44, 0x49, 0x48, 0x3c, 0x3b, 0x34, 0x34, 0x3f, 0x3c, 0x42, 0x52,
-    0x43, 0x41, 0x3c, 0x3c, 0x3d, 0x43, 0x48, 0x54, 0x39, 0x35, 0x39, 0x3c,
-    0x43, 0x3c, 0x44, 0x5f, 0x39, 0x3d, 0x38, 0x3f, 0x36, 0x3d, 0x43, 0x58,
-    0x33, 0x3d, 0x43, 0x33, 0x3f, 0x36, 0x39, 0x54, 0x3a, 0x37, 0x2d, 0x46,
-    0x43, 0x41, 0x47, 0x46, 0x3e, 0x42, 0x34, 0x49, 0x3a, 0x3f, 0x38, 0x50,
-    0x3a, 0x3b, 0x42, 0x3a, 0x3e, 0x3c, 0x3b, 0x40, 0x42, 0x45, 0x37, 0x3b,
-    0x2f, 0x3b, 0x46, 0x30, 0x42, 0x3b, 0x3b, 0x44, 0x3b, 0x3e, 0x40, 0x1e,
-    0x33, 0x40, 0x40, 0x3d, 0x39, 0x3a, 0x41, 0x33, 0x45, 0x3e, 0x3c, 0x3f,
-    0x3f, 0x38, 0x31, 0x46, 0x3b, 0x35, 0x42, 0x39, 0x49, 0x3e, 0x3d, 0x66,
-    0x53, 0x3f, 0x44, 0x40, 0x43, 0x45, 0x48, 0x45, 0x49, 0x2d, 0x3e, 0x3a,
-    0x4f, 0x5a, 0x62, 0x27, 0x54, 0x37, 0x35, 0x34, 0x42, 0x50, 0x54, 0x43,
-    0x4d, 0x38, 0x39, 0x48, 0x38, 0x4c, 0x21, 0x3f, 0x40, 0x3a, 0x3a, 0x2f,
-    0x37, 0x2f, 0x29, 0x2c, 0x36, 0x47, 0x3f, 0x41, 0x31, 0x33, 0x3e, 0x32,
-    0x3e, 0x40, 0x42, 0x40, 0x42, 0x3a, 0x46, 0x33, 0x44, 0x40, 0x3c, 0x43,
-    0x3d, 0x41, 0x4d, 0x3d, 0x3c, 0x47, 0x46, 0x43, 0x42, 0x3e, 0x44, 0x4e,
-    0x41, 0x3a, 0x44, 0x38, 0x45, 0x3b, 0x49, 0x4c, 0x40, 0x3f, 0x37, 0x3e,
-    0x3e, 0x46, 0x41, 0x51, 0x3f, 0x39, 0x30, 0x40, 0x3e, 0x38, 0x43, 0x5b,
-    0x33, 0x3e, 0x31, 0x42, 0x3d, 0x2f, 0x49, 0x57, 0x37, 0x31, 0x46, 0x44,
-    0x3e, 0x35, 0x40, 0x55, 0x36, 0x35, 0x3d, 0x3c, 0x38, 0x33, 0x42, 0x52,
-    0x3b, 0x39, 0x34, 0x31, 0x45, 0x34, 0x3c, 0x51, 0x33, 0x39, 0x3c, 0x40,
-    0x36, 0x36, 0x42, 0x3e, 0x37, 0x3e, 0x3b, 0x40, 0x3d, 0x36, 0x41, 0x30,
-    0x42, 0x45, 0x40, 0x49, 0x3d, 0x32, 0x46, 0x26, 0x40, 0x44, 0x3a, 0x3f,
-    0x3d, 0x46, 0x45, 0x31, 0x33, 0x34, 0x3e, 0x37, 0x46, 0x3b, 0x32, 0x3a,
-    0x3d, 0x31, 0x3c, 0x36, 0x50, 0x41, 0x3b, 0x5d, 0x53, 0x42, 0x44, 0x37,
-    0x3e, 0x4d, 0x41, 0x4b, 0x49, 0x2f, 0x35, 0x3a, 0x4e, 0x59, 0x5d, 0x27,
-    0x5c, 0x30, 0x3d, 0x3a, 0x46, 0x50, 0x57, 0x4a, 0x4c, 0x36, 0x37, 0x46,
-    0x48, 0x41, 0x24, 0x49, 0x36, 0x3e, 0x41, 0x45, 0x37, 0x38, 0x2e, 0x2e,
-    0x34, 0x3c, 0x38, 0x41, 0x36, 0x3d, 0x43, 0x36, 0x3e, 0x3e, 0x41, 0x3b,
-    0x42, 0x3c, 0x43, 0x38, 0x3e, 0x3d, 0x41, 0x48, 0x47, 0x4c, 0x45, 0x3b,
-    0x37, 0x41, 0x38, 0x41, 0x3d, 0x41, 0x46, 0x4e, 0x36, 0x45, 0x38, 0x39,
-    0x42, 0x42, 0x37, 0x4c, 0x34, 0x46, 0x3c, 0x44, 0x4a, 0x39, 0x45, 0x53,
-    0x3c, 0x3f, 0x41, 0x35, 0x3c, 0x45, 0x4c, 0x5a, 0x44, 0x41, 0x30, 0x35,
-    0x40, 0x39, 0x42, 0x5a, 0x36, 0x36, 0x3a, 0x3b, 0x43, 0x35, 0x3c, 0x56,
-    0x35, 0x38, 0x2b, 0x4a, 0x3c, 0x40, 0x45, 0x54, 0x37, 0x37, 0x3a, 0x44,
-    0x42, 0x3b, 0x3d, 0x4a, 0x3f, 0x37, 0x3b, 0x35, 0x34, 0x3f, 0x40, 0x48,
-    0x45, 0x3e, 0x37, 0x38, 0x41, 0x41, 0x3d, 0x37, 0x43, 0x3d, 0x3d, 0x45,
-    0x3a, 0x38, 0x3f, 0x23, 0x4a, 0x37, 0x42, 0x3c, 0x3f, 0x43, 0x42, 0x33,
-    0x37, 0x39, 0x35, 0x3b, 0x41, 0x36, 0x2f, 0x3b, 0x41, 0x3a, 0x44, 0x3d,
-    0x3e, 0x45, 0x44, 0x50, 0x47, 0x47, 0x48, 0x3c, 0x3f, 0x45, 0x43, 0x3f,
-    0x4a, 0x33, 0x3c, 0x3a, 0x52, 0x52, 0x5a, 0x23, 0x58, 0x31, 0x3b, 0x3b,
-    0x47, 0x44, 0x54, 0x45, 0x42, 0x38, 0x38, 0x40, 0x43, 0x3f, 0x2a, 0x46,
-    0x3b, 0x46, 0x3b, 0x46, 0x35, 0x37, 0x29, 0x35, 0x38, 0x41, 0x3a, 0x31,
-    0x44, 0x41, 0x39, 0x36, 0x45, 0x41, 0x40, 0x3e, 0x40, 0x44, 0x47, 0x37,
-    0x3f, 0x42, 0x49, 0x34, 0x46, 0x3d, 0x4b, 0x3d, 0x42, 0x3b, 0x42, 0x3e,
-    0x41, 0x3b, 0x3f, 0x43, 0x47, 0x45, 0x47, 0x41, 0x40, 0x3a, 0x3d, 0x45,
-    0x40, 0x36, 0x3b, 0x3b, 0x44, 0x37, 0x46, 0x55, 0x35, 0x42, 0x3f, 0x3a,
-    0x41, 0x41, 0x44, 0x5c, 0x31, 0x44, 0x3d, 0x46, 0x39, 0x38, 0x46, 0x59,
-    0x41, 0x3b, 0x3d, 0x39, 0x33, 0x3e, 0x41, 0x58, 0x33, 0x44, 0x34, 0x31,
-    0x48, 0x3e, 0x4d, 0x56, 0x36, 0x3c, 0x37, 0x46, 0x46, 0x38, 0x45, 0x53,
-    0x35, 0x3d, 0x3a, 0x31, 0x42, 0x48, 0x45, 0x44, 0x3b, 0x3b, 0x3c, 0x41,
-    0x3d, 0x42, 0x3f, 0x2f, 0x38, 0x3c, 0x3e, 0x41, 0x44, 0x3a, 0x4a, 0x24,
-    0x37, 0x3e, 0x37, 0x48, 0x40, 0x3f, 0x46, 0x3c, 0x47, 0x4a, 0x38, 0x47,
-    0x34, 0x45, 0x31, 0x42, 0x43, 0x44, 0x3f, 0x3f, 0x49, 0x40, 0x3c, 0x41,
-    0x4d, 0x43, 0x42, 0x39, 0x39, 0x48, 0x41, 0x38, 0x47, 0x3c, 0x3c, 0x42,
-    0x44, 0x55, 0x62, 0x2a, 0x5c, 0x32, 0x3a, 0x37, 0x4c, 0x44, 0x4f, 0x3e,
-    0x4e, 0x42, 0x3a, 0x42, 0x41, 0x4a, 0x35, 0x44, 0x45, 0x3b, 0x43, 0x41,
-    0x33, 0x38, 0x28, 0x36, 0x40, 0x47, 0x3e, 0x3e, 0x3e, 0x39, 0x3a, 0x37,
-    0x44, 0x44, 0x3f, 0x3b, 0x41, 0x3c, 0x45, 0x36, 0x38, 0x3a, 0x3c, 0x42,
-    0x42, 0x3f, 0x59, 0x3c, 0x47, 0x3d, 0x38, 0x3a, 0x42, 0x44, 0x41, 0x46,
-    0x3f, 0x43, 0x48, 0x42, 0x44, 0x35, 0x3f, 0x45, 0x36, 0x3f, 0x38, 0x3a,
-    0x44, 0x3d, 0x3d, 0x4e, 0x3e, 0x45, 0x40, 0x42, 0x3c, 0x33, 0x43, 0x5a,
-    0x38, 0x3e, 0x45, 0x3a, 0x3e, 0x42, 0x45, 0x52, 0x3c, 0x42, 0x3a, 0x38,
-    0x3d, 0x3b, 0x4a, 0x57, 0x38, 0x37, 0x47, 0x44, 0x3e, 0x3c, 0x38, 0x48,
-    0x36, 0x41, 0x3f, 0x41, 0x3a, 0x3a, 0x46, 0x47, 0x42, 0x40, 0x32, 0x33,
-    0x43, 0x37, 0x41, 0x43, 0x3e, 0x40, 0x3d, 0x3a, 0x3e, 0x38, 0x42, 0x30,
-    0x3e, 0x40, 0x46, 0x42, 0x40, 0x44, 0x42, 0x23, 0x31, 0x40, 0x3f, 0x3d,
-    0x3b, 0x33, 0x40, 0x33, 0x41, 0x33, 0x43, 0x41, 0x3a, 0x3e, 0x36, 0x40,
-    0x40, 0x45, 0x37, 0x42, 0x46, 0x42, 0x39, 0x48, 0x44, 0x40, 0x40, 0x45,
-    0x3c, 0x49, 0x41, 0x3f, 0x4c, 0x3d, 0x2f, 0x3f, 0x47, 0x52, 0x54, 0x2c,
-    0x55, 0x42, 0x44, 0x3b, 0x46, 0x4f, 0x48, 0x3c, 0x45, 0x39, 0x3f, 0x4b,
-    0x3f, 0x3f, 0x36, 0x42, 0x41, 0x48, 0x44, 0x44, 0x36, 0x3b, 0x37, 0x40,
-    0x39, 0x49, 0x3a, 0x35, 0x3e, 0x48, 0x31, 0x30, 0x44, 0x38, 0x4c, 0x3c,
-    0x41, 0x3e, 0x46, 0x32, 0x44, 0x3b, 0x42, 0x3c, 0x38, 0x3a, 0x47, 0x3f,
-    0x3a, 0x42, 0x3a, 0x43, 0x40, 0x4b, 0x47, 0x3c, 0x42, 0x46, 0x45, 0x42,
-    0x3c, 0x46, 0x3d, 0x3f, 0x3e, 0x36, 0x38, 0x3e, 0x46, 0x3c, 0x4d, 0x43,
-    0x49, 0x41, 0x48, 0x3c, 0x3d, 0x39, 0x43, 0x58, 0x3a, 0x41, 0x3f, 0x38,
-    0x37, 0x3f, 0x46, 0x5d, 0x3c, 0x3c, 0x39, 0x36, 0x3d, 0x46, 0x43, 0x50,
-    0x3a, 0x47, 0x39, 0x36, 0x41, 0x3f, 0x3e, 0x51, 0x31, 0x36, 0x3e, 0x3c,
-    0x3c, 0x3a, 0x48, 0x41, 0x3a, 0x43, 0x49, 0x3e, 0x42, 0x46, 0x3f, 0x41,
-    0x49, 0x33, 0x42, 0x41, 0x45, 0x40, 0x3d, 0x2b, 0x3d, 0x38, 0x40, 0x37,
-    0x3a, 0x31, 0x45, 0x26, 0x33, 0x3d, 0x3f, 0x39, 0x36, 0x3c, 0x38, 0x33,
-    0x34, 0x3f, 0x35, 0x44, 0x3a, 0x39, 0x32, 0x41, 0x35, 0x40, 0x3c, 0x3b,
-    0x4a, 0x3f, 0x3e, 0x3e, 0x4a, 0x3e, 0x42, 0x35, 0x38, 0x43, 0x3c, 0x37,
-    0x3d, 0x3c, 0x39, 0x43, 0x3f, 0x4e, 0x54, 0x33, 0x4b, 0x37, 0x43, 0x3b,
-    0x43, 0x48, 0x43, 0x42, 0x3d, 0x46, 0x45, 0x49, 0x3a, 0x39, 0x36, 0x4a,
-    0x48, 0x48, 0x37, 0x4b, 0x42, 0x47, 0x34, 0x34, 0x43, 0x42, 0x3a, 0x3d,
-    0x3c, 0x46, 0x34, 0x39, 0x40, 0x3b, 0x3e, 0x3e, 0x37, 0x3d, 0x53, 0x3b,
-    0x48, 0x3c, 0x43, 0x44, 0x3b, 0x3f, 0x57, 0x3d, 0x39, 0x3c, 0x39, 0x3a,
-    0x3e, 0x3f, 0x43, 0x3e, 0x41, 0x47, 0x3c, 0x41, 0x40, 0x41, 0x37, 0x3f,
-    0x3b, 0x43, 0x35, 0x3e, 0x45, 0x40, 0x47, 0x59, 0x41, 0x49, 0x3b, 0x3f,
-    0x47, 0x49, 0x4b, 0x61, 0x39, 0x48, 0x39, 0x3e, 0x44, 0x34, 0x3b, 0x59,
-    0x3c, 0x42, 0x45, 0x35, 0x42, 0x41, 0x39, 0x52, 0x42, 0x3c, 0x3d, 0x3e,
-    0x3d, 0x4a, 0x4a, 0x4d, 0x3c, 0x34, 0x44, 0x3c, 0x41, 0x34, 0x38, 0x46,
-    0x38, 0x45, 0x40, 0x45, 0x40, 0x3a, 0x3d, 0x44, 0x3a, 0x37, 0x3a, 0x3a,
-    0x3b, 0x42, 0x40, 0x34, 0x3b, 0x3c, 0x42, 0x40, 0x3d, 0x32, 0x40, 0x27,
-    0x37, 0x39, 0x37, 0x46, 0x48, 0x31, 0x40, 0x30, 0x42, 0x42, 0x3a, 0x40,
-    0x3d, 0x37, 0x2a, 0x40, 0x41, 0x37, 0x3c, 0x4a, 0x46, 0x45, 0x3d, 0x34,
-    0x48, 0x41, 0x42, 0x3e, 0x3f, 0x39, 0x3c, 0x3a, 0x4f, 0x3b, 0x32, 0x3e,
-    0x43, 0x51, 0x4f, 0x2a, 0x46, 0x3a, 0x3d, 0x3b, 0x40, 0x3d, 0x4c, 0x3c,
-    0x48, 0x40, 0x36, 0x4a, 0x3a, 0x38, 0x42, 0x43, 0x4c, 0x3d, 0x47, 0x47,
-    0x33, 0x3f, 0x2d, 0x37, 0x4a, 0x43, 0x38, 0x3e, 0x49, 0x42, 0x42, 0x3d,
-    0x43, 0x47, 0x41, 0x38, 0x46, 0x37, 0x46, 0x38, 0x47, 0x42, 0x49, 0x3d,
-    0x3b, 0x37, 0x4c, 0x3c, 0x3a, 0x45, 0x3f, 0x37, 0x36, 0x3d, 0x3c, 0x40,
-    0x3e, 0x45, 0x46, 0x41, 0x41, 0x40, 0x3c, 0x44, 0x47, 0x43, 0x37, 0x3f,
-    0x3e, 0x3a, 0x3a, 0x4b, 0x3a, 0x36, 0x3d, 0x3f, 0x38, 0x3f, 0x3c, 0x58,
-    0x40, 0x49, 0x3d, 0x42, 0x38, 0x3a, 0x47, 0x50, 0x3b, 0x49, 0x40, 0x44,
-    0x3e, 0x3c, 0x38, 0x52, 0x3a, 0x3e, 0x44, 0x3c, 0x35, 0x44, 0x3a, 0x47,
-    0x3e, 0x49, 0x3f, 0x47, 0x45, 0x39, 0x3b, 0x46, 0x44, 0x3e, 0x41, 0x46,
-    0x40, 0x41, 0x40, 0x40, 0x3a, 0x35, 0x3e, 0x36, 0x3e, 0x3e, 0x3d, 0x35,
-    0x3b, 0x3c, 0x38, 0x46, 0x3b, 0x3c, 0x41, 0x2c, 0x3f, 0x42, 0x38, 0x3b,
-    0x36, 0x3b, 0x39, 0x40, 0x40, 0x38, 0x36, 0x33, 0x34, 0x42, 0x2f, 0x44,
-    0x41, 0x40, 0x39, 0x35, 0x3b, 0x44, 0x42, 0x2c, 0x41, 0x3b, 0x44, 0x41,
-    0x35, 0x44, 0x3b, 0x34, 0x44, 0x49, 0x36, 0x39, 0x3a, 0x52, 0x4d, 0x2b,
-    0x40, 0x40, 0x3e, 0x39, 0x48, 0x42, 0x3c, 0x44, 0x46, 0x49, 0x3f, 0x54,
-    0x43, 0x40, 0x2e, 0x40, 0x4f, 0x36, 0x3e, 0x3f, 0x38, 0x48, 0x44, 0x3c,
-    0x44, 0x43, 0x41, 0x47, 0x40, 0x46, 0x40, 0x37, 0x41, 0x34, 0x3a, 0x41,
-    0x41, 0x3b, 0x49, 0x39, 0x42, 0x38, 0x3d, 0x39, 0x34, 0x35, 0x43, 0x36,
-    0x3e, 0x44, 0x3f, 0x40, 0x43, 0x40, 0x40, 0x3a, 0x47, 0x42, 0x3e, 0x42,
-    0x46, 0x35, 0x3a, 0x46, 0x3c, 0x3c, 0x3c, 0x3d, 0x3f, 0x40, 0x43, 0x4c,
-    0x3a, 0x37, 0x3f, 0x43, 0x47, 0x38, 0x42, 0x58, 0x42, 0x3b, 0x34, 0x37,
-    0x3e, 0x48, 0x3c, 0x57, 0x44, 0x3c, 0x3d, 0x3a, 0x36, 0x48, 0x3c, 0x51,
-    0x3d, 0x48, 0x45, 0x45, 0x38, 0x45, 0x40, 0x3f, 0x3b, 0x35, 0x3d, 0x3f,
-    0x38, 0x47, 0x39, 0x3b, 0x36, 0x49, 0x43, 0x40, 0x3f, 0x46, 0x38, 0x40,
-    0x3f, 0x3e, 0x39, 0x32, 0x47, 0x42, 0x35, 0x33, 0x39, 0x47, 0x3c, 0x36,
-    0x3b, 0x37, 0x43, 0x35, 0x3b, 0x3b, 0x34, 0x3b, 0x38, 0x3d, 0x3e, 0x3a,
-    0x35, 0x49, 0x38, 0x40, 0x3f, 0x3f, 0x3e, 0x37, 0x43, 0x3b, 0x3e, 0x3e,
-    0x3b, 0x40, 0x44, 0x39, 0x3d, 0x3f, 0x31, 0x42, 0x42, 0x3b, 0x41, 0x3d,
-    0x3e, 0x3c, 0x37, 0x34, 0x48, 0x3d, 0x49, 0x4a, 0x47, 0x36, 0x3a, 0x34,
-    0x37, 0x36, 0x3e, 0x38, 0x33, 0x45, 0x39, 0x44, 0x34, 0x49, 0x3a, 0x3d,
-    0x34, 0x31, 0x31, 0x3d, 0x34, 0x3d, 0x41, 0x3e, 0x49, 0x41, 0x34, 0x3f,
-    0x3a, 0x42, 0x3e, 0x40, 0x3f, 0x33, 0x46, 0x3f, 0x34, 0x39, 0x37, 0x46,
-    0x3e, 0x32, 0x3f, 0x45, 0x45, 0x41, 0x3b, 0x4b, 0x35, 0x35, 0x3b, 0x4a,
-    0x3d, 0x43, 0x3b, 0x44, 0x3c, 0x38, 0x31, 0x43, 0x39, 0x35, 0x41, 0x45,
-    0x37, 0x3e, 0x43, 0x47, 0x39, 0x40, 0x41, 0x41, 0x40, 0x32, 0x37, 0x3e,
-    0x3d, 0x39, 0x3b, 0x49, 0x33, 0x35, 0x38, 0x41, 0x45, 0x37, 0x3c, 0x49,
-    0x3b, 0x34, 0x34, 0x41, 0x3a, 0x3f, 0x3e, 0x47, 0x39, 0x3c, 0x34, 0x3a,
-    0x38, 0x44, 0x40, 0x51, 0x3a, 0x37, 0x3b, 0x3f, 0x3d, 0x3a, 0x45, 0x48,
-    0x3f, 0x46, 0x35, 0x43, 0x38, 0x43, 0x35, 0x4c, 0x42, 0x47, 0x44, 0x3d,
-    0x40, 0x3a, 0x39, 0x4e, 0x3d, 0x37, 0x3c, 0x42, 0x40, 0x48, 0x44, 0x4c,
-    0x31, 0x40, 0x42, 0x3b, 0x45, 0x45, 0x3f, 0x3e, 0x3d, 0x44, 0x3f, 0x31,
-    0x3f, 0x44, 0x45, 0x37, 0x3e, 0x3d, 0x35, 0x3b, 0x2d, 0x44, 0x4a, 0x3a,
-    0x2b, 0x37, 0x38, 0x46, 0x41, 0x39, 0x3c, 0x3c, 0x46, 0x33, 0x36, 0x3c,
-    0x4b, 0x34, 0x49, 0x50, 0x30, 0x3c, 0x33, 0x41, 0x44, 0x33, 0x43, 0x39,
-    0x36, 0x45, 0x33, 0x3b, 0x3d, 0x36, 0x47, 0x30, 0x42, 0x37, 0x49, 0x3e,
-    0x3b, 0x49, 0x3d, 0x3b, 0x3a, 0x41, 0x38, 0x44, 0x42, 0x3b, 0x3f, 0x40,
-    0x46, 0x35, 0x38, 0x3c, 0x48, 0x3a, 0x46, 0x41, 0x36, 0x36, 0x41, 0x3e,
-    0x43, 0x3e, 0x32, 0x39, 0x3a, 0x41, 0x30, 0x3e, 0x40, 0x3e, 0x36, 0x3a,
-    0x45, 0x45, 0x3a, 0x3c, 0x31, 0x3b, 0x47, 0x3f, 0x36, 0x3a, 0x3c, 0x41,
-    0x3b, 0x41, 0x39, 0x46, 0x3f, 0x3c, 0x34, 0x3e, 0x41, 0x45, 0x41, 0x42,
-    0x39, 0x40, 0x40, 0x44, 0x45, 0x42, 0x34, 0x3f, 0x3e, 0x31, 0x3b, 0x41,
-    0x33, 0x43, 0x37, 0x44, 0x44, 0x3a, 0x36, 0x36, 0x48, 0x3c, 0x37, 0x47,
-    0x39, 0x3e, 0x3e, 0x3c, 0x3c, 0x41, 0x3c, 0x44, 0x3b, 0x42, 0x3f, 0x3a,
-    0x43, 0x3b, 0x3e, 0x48, 0x36, 0x3f, 0x3d, 0x34, 0x40, 0x43, 0x35, 0x4f,
-    0x34, 0x39, 0x3b, 0x41, 0x40, 0x39, 0x37, 0x4c, 0x39, 0x36, 0x39, 0x39,
-    0x47, 0x41, 0x43, 0x3f, 0x3f, 0x33, 0x42, 0x3f, 0x42, 0x40, 0x37, 0x40,
-    0x3f, 0x34, 0x45, 0x3d, 0x2d, 0x3c, 0x44, 0x3b, 0x43, 0x37, 0x26, 0x50,
-    0x43, 0x44, 0x3d, 0x43, 0x42, 0x2d, 0x3c, 0x33, 0x4a, 0x32, 0x4a, 0x53,
-    0x33, 0x38, 0x27, 0x36, 0x42, 0x30, 0x47, 0x3d, 0x36, 0x45, 0x46, 0x36,
-    0x3b, 0x3b, 0x40, 0x33, 0x37, 0x36, 0x44, 0x46, 0x3d, 0x35, 0x40, 0x38,
-    0x3b, 0x40, 0x36, 0x3c, 0x3d, 0x37, 0x31, 0x41, 0x33, 0x3c, 0x38, 0x3f,
-    0x43, 0x3a, 0x40, 0x49, 0x38, 0x39, 0x38, 0x3d, 0x43, 0x3d, 0x39, 0x3b,
-    0x3d, 0x3f, 0x38, 0x42, 0x34, 0x43, 0x33, 0x3e, 0x43, 0x3e, 0x40, 0x42,
-    0x3b, 0x45, 0x37, 0x44, 0x43, 0x39, 0x3c, 0x3d, 0x37, 0x44, 0x3a, 0x3b,
-    0x47, 0x3f, 0x3a, 0x3c, 0x3a, 0x3b, 0x3f, 0x43, 0x3e, 0x3d, 0x46, 0x3e,
-    0x37, 0x36, 0x3f, 0x40, 0x42, 0x42, 0x37, 0x36, 0x48, 0x35, 0x44, 0x44,
-    0x39, 0x3c, 0x3b, 0x41, 0x44, 0x49, 0x3a, 0x40, 0x41, 0x36, 0x33, 0x3a,
-    0x3c, 0x3d, 0x40, 0x3f, 0x43, 0x36, 0x3c, 0x3a, 0x3f, 0x4b, 0x32, 0x49,
-    0x49, 0x3e, 0x3a, 0x3e, 0x3f, 0x41, 0x3c, 0x47, 0x40, 0x41, 0x45, 0x3e,
-    0x47, 0x47, 0x3f, 0x47, 0x45, 0x3e, 0x31, 0x43, 0x4a, 0x44, 0x36, 0x40,
-    0x41, 0x47, 0x3e, 0x42, 0x37, 0x40, 0x3b, 0x46, 0x37, 0x41, 0x3e, 0x3c,
-    0x27, 0x40, 0x49, 0x42, 0x42, 0x39, 0x30, 0x49, 0x43, 0x38, 0x3d, 0x42,
-    0x43, 0x2f, 0x3b, 0x37, 0x4b, 0x2d, 0x4f, 0x52, 0x30, 0x31, 0x2f, 0x3a,
-    0x49, 0x38, 0x4f, 0x45, 0x2e, 0x47, 0x3a, 0x32, 0x33, 0x3f, 0x4a, 0x2e,
-    0x33, 0x3b, 0x3e, 0x3e, 0x49, 0x45, 0x44, 0x38, 0x3c, 0x35, 0x45, 0x47,
-    0x41, 0x3b, 0x3c, 0x48, 0x46, 0x39, 0x39, 0x3b, 0x3f, 0x41, 0x38, 0x42,
-    0x3d, 0x46, 0x33, 0x41, 0x36, 0x3f, 0x3f, 0x3c, 0x33, 0x3e, 0x3e, 0x40,
-    0x44, 0x40, 0x3c, 0x38, 0x46, 0x3a, 0x40, 0x36, 0x42, 0x35, 0x3f, 0x3b,
-    0x3b, 0x43, 0x3c, 0x40, 0x40, 0x49, 0x2e, 0x39, 0x40, 0x3f, 0x45, 0x41,
-    0x3f, 0x30, 0x42, 0x3d, 0x40, 0x3c, 0x3a, 0x3b, 0x3b, 0x40, 0x39, 0x42,
-    0x3a, 0x3f, 0x3f, 0x3e, 0x35, 0x3b, 0x38, 0x45, 0x47, 0x35, 0x44, 0x3e,
-    0x3b, 0x3f, 0x3f, 0x40, 0x3a, 0x35, 0x30, 0x49, 0x45, 0x35, 0x3b, 0x39,
-    0x3b, 0x48, 0x3f, 0x37, 0x39, 0x40, 0x43, 0x45, 0x3d, 0x40, 0x41, 0x3a,
-    0x33, 0x3d, 0x3a, 0x4b, 0x40, 0x42, 0x40, 0x42, 0x43, 0x39, 0x3c, 0x49,
-    0x3e, 0x47, 0x3e, 0x44, 0x3f, 0x3a, 0x40, 0x41, 0x3f, 0x42, 0x42, 0x37,
-    0x3e, 0x3b, 0x36, 0x3e, 0x3b, 0x3c, 0x48, 0x43, 0x2d, 0x46, 0x4a, 0x38,
-    0x45, 0x3a, 0x29, 0x46, 0x40, 0x3c, 0x40, 0x44, 0x40, 0x33, 0x2f, 0x33,
-    0x48, 0x2e, 0x51, 0x4f, 0x2b, 0x32, 0x2e, 0x2d, 0x45, 0x33, 0x4d, 0x41,
-    0x29, 0x4b, 0x41, 0x39, 0x2f, 0x3a, 0x49, 0x31, 0x37, 0x40, 0x47, 0x4c,
-    0x3e, 0x31, 0x41, 0x3f, 0x43, 0x37, 0x45, 0x4f, 0x41, 0x3c, 0x30, 0x4a,
-    0x37, 0x37, 0x36, 0x39, 0x31, 0x3d, 0x36, 0x4b, 0x37, 0x44, 0x3c, 0x43,
-    0x44, 0x36, 0x3f, 0x3b, 0x34, 0x3e, 0x3a, 0x35, 0x38, 0x3f, 0x33, 0x37,
-    0x3b, 0x3d, 0x46, 0x38, 0x3b, 0x37, 0x38, 0x3b, 0x31, 0x3e, 0x3d, 0x3b,
-    0x3d, 0x39, 0x35, 0x33, 0x33, 0x3c, 0x39, 0x39, 0x48, 0x39, 0x39, 0x3f,
-    0x3e, 0x36, 0x47, 0x3a, 0x44, 0x40, 0x32, 0x3c, 0x37, 0x35, 0x40, 0x3f,
-    0x3a, 0x38, 0x3b, 0x3d, 0x46, 0x45, 0x36, 0x43, 0x40, 0x3d, 0x41, 0x41,
-    0x47, 0x3a, 0x3d, 0x3e, 0x43, 0x42, 0x32, 0x36, 0x41, 0x37, 0x3b, 0x35,
-    0x36, 0x44, 0x36, 0x3c, 0x43, 0x32, 0x3e, 0x3e, 0x42, 0x45, 0x32, 0x3c,
-    0x3a, 0x3b, 0x35, 0x43, 0x41, 0x3d, 0x44, 0x50, 0x43, 0x31, 0x3e, 0x44,
-    0x44, 0x41, 0x3a, 0x44, 0x36, 0x39, 0x3b, 0x3c, 0x32, 0x38, 0x3b, 0x45,
-    0x38, 0x43, 0x40, 0x42, 0x33, 0x3e, 0x4a, 0x42, 0x45, 0x39, 0x2f, 0x42,
-    0x39, 0x35, 0x44, 0x3e, 0x39, 0x2f, 0x34, 0x33, 0x49, 0x29, 0x50, 0x4f,
-    0x2b, 0x36, 0x34, 0x2d, 0x47, 0x33, 0x49, 0x3c, 0x33, 0x51, 0x49, 0x3f,
-    0x34, 0x39, 0x4a, 0x2c, 0x34, 0x45, 0x4f, 0x47, 0x34, 0x42, 0x3a, 0x3d,
-    0x36, 0x4a, 0x3b, 0x43, 0x36, 0x3f, 0x39, 0x4b, 0x38, 0x3a, 0x31, 0x3d,
-    0x32, 0x42, 0x3a, 0x47, 0x48, 0x3e, 0x44, 0x3f, 0x39, 0x3e, 0x44, 0x35,
-    0x41, 0x3c, 0x45, 0x3a, 0x3e, 0x3b, 0x3d, 0x2f, 0x37, 0x40, 0x3e, 0x43,
-    0x39, 0x39, 0x33, 0x3b, 0x37, 0x3b, 0x37, 0x37, 0x37, 0x39, 0x36, 0x31,
-    0x39, 0x3b, 0x41, 0x39, 0x3b, 0x40, 0x36, 0x37, 0x42, 0x39, 0x3a, 0x46,
-    0x3f, 0x30, 0x38, 0x39, 0x35, 0x32, 0x3e, 0x3a, 0x43, 0x43, 0x3e, 0x33,
-    0x42, 0x3f, 0x41, 0x3c, 0x46, 0x34, 0x34, 0x40, 0x43, 0x37, 0x32, 0x43,
-    0x3c, 0x37, 0x36, 0x33, 0x3d, 0x36, 0x3a, 0x40, 0x39, 0x38, 0x32, 0x3e,
-    0x32, 0x3d, 0x37, 0x49, 0x42, 0x47, 0x41, 0x3b, 0x3d, 0x3c, 0x3a, 0x37,
-    0x3c, 0x45, 0x3a, 0x45, 0x36, 0x44, 0x3a, 0x3a, 0x3a, 0x3c, 0x43, 0x3b,
-    0x3b, 0x35, 0x38, 0x47, 0x36, 0x40, 0x32, 0x43, 0x3e, 0x39, 0x42, 0x40,
-    0x2c, 0x3c, 0x4c, 0x4c, 0x43, 0x3b, 0x37, 0x4a, 0x3f, 0x3c, 0x45, 0x44,
-    0x3f, 0x30, 0x36, 0x31, 0x4f, 0x2f, 0x5d, 0x4b, 0x34, 0x34, 0x2d, 0x2b,
-    0x44, 0x31, 0x4e, 0x40, 0x2e, 0x4d, 0x48, 0x3e, 0x37, 0x2b, 0x49, 0x25,
-    0x31, 0x49, 0x44, 0x49, 0x39, 0x39, 0x4b, 0x3a, 0x3a, 0x41, 0x3e, 0x42,
-    0x3c, 0x36, 0x36, 0x4a, 0x32, 0x44, 0x3e, 0x48, 0x3e, 0x3c, 0x37, 0x49,
-    0x3d, 0x34, 0x3f, 0x37, 0x33, 0x36, 0x46, 0x3a, 0x3a, 0x31, 0x45, 0x3f,
-    0x3a, 0x31, 0x3b, 0x33, 0x41, 0x42, 0x35, 0x39, 0x38, 0x44, 0x36, 0x3a,
-    0x3f, 0x3b, 0x37, 0x3e, 0x3b, 0x38, 0x2f, 0x32, 0x44, 0x3d, 0x44, 0x41,
-    0x39, 0x36, 0x3a, 0x34, 0x39, 0x38, 0x34, 0x3f, 0x3b, 0x37, 0x34, 0x34,
-    0x40, 0x3d, 0x34, 0x3a, 0x46, 0x42, 0x3f, 0x34, 0x38, 0x33, 0x39, 0x44,
-    0x3f, 0x41, 0x3c, 0x31, 0x40, 0x32, 0x3f, 0x37, 0x37, 0x41, 0x3e, 0x35,
-    0x37, 0x48, 0x3b, 0x41, 0x3d, 0x3a, 0x3f, 0x35, 0x33, 0x3c, 0x36, 0x3b,
-    0x3a, 0x48, 0x33, 0x42, 0x37, 0x33, 0x39, 0x41, 0x3c, 0x3d, 0x3b, 0x4d,
-    0x39, 0x3a, 0x3e, 0x44, 0x3d, 0x41, 0x3b, 0x38, 0x49, 0x41, 0x3a, 0x38,
-    0x34, 0x38, 0x38, 0x3c, 0x45, 0x3c, 0x37, 0x3b, 0x36, 0x3e, 0x4a, 0x4b,
-    0x42, 0x3f, 0x32, 0x45, 0x46, 0x35, 0x46, 0x41, 0x38, 0x33, 0x39, 0x37,
-    0x44, 0x2b, 0x60, 0x4a, 0x2a, 0x2e, 0x35, 0x2d, 0x43, 0x37, 0x51, 0x47,
-    0x2f, 0x4d, 0x50, 0x3e, 0x3a, 0x33, 0x4f, 0x2a, 0x35, 0x45, 0x4a, 0x4c,
-    0x3b, 0x3d, 0x43, 0x44, 0x3d, 0x3f, 0x4a, 0x3e, 0x49, 0x37, 0x2e, 0x4f,
-    0x39, 0x3f, 0x32, 0x3c, 0x37, 0x3b, 0x39, 0x4d, 0x34, 0x3f, 0x46, 0x44,
-    0x3d, 0x40, 0x3f, 0x40, 0x39, 0x33, 0x39, 0x3e, 0x3d, 0x40, 0x31, 0x30,
-    0x35, 0x3d, 0x3e, 0x3a, 0x3e, 0x32, 0x31, 0x3e, 0x48, 0x3c, 0x40, 0x43,
-    0x3f, 0x3f, 0x34, 0x2e, 0x3a, 0x3e, 0x3b, 0x43, 0x45, 0x32, 0x3a, 0x31,
-    0x37, 0x38, 0x31, 0x35, 0x34, 0x3d, 0x42, 0x36, 0x46, 0x37, 0x32, 0x47,
-    0x41, 0x3c, 0x35, 0x35, 0x36, 0x41, 0x3a, 0x3b, 0x42, 0x44, 0x36, 0x31,
-    0x3c, 0x3d, 0x34, 0x34, 0x3b, 0x40, 0x40, 0x2e, 0x40, 0x46, 0x3b, 0x43,
-    0x3f, 0x40, 0x3b, 0x3a, 0x32, 0x40, 0x46, 0x39, 0x3c, 0x49, 0x2f, 0x3d,
-    0x49, 0x3e, 0x44, 0x3c, 0x3e, 0x35, 0x3f, 0x44, 0x41, 0x40, 0x3e, 0x47,
-    0x3d, 0x40, 0x3f, 0x41, 0x3b, 0x41, 0x41, 0x3f, 0x40, 0x3f, 0x3e, 0x3e,
-    0x3f, 0x43, 0x35, 0x40, 0x2b, 0x42, 0x45, 0x56, 0x40, 0x3c, 0x2f, 0x44,
-    0x44, 0x3d, 0x3e, 0x3d, 0x40, 0x2d, 0x39, 0x31, 0x54, 0x2f, 0x61, 0x48,
-    0x2e, 0x37, 0x37, 0x32, 0x3e, 0x2d, 0x52, 0x4d, 0x2d, 0x4d, 0x4c, 0x3a,
-    0x3a, 0x31, 0x4e, 0x2d, 0x31, 0x48, 0x47, 0x54, 0x45, 0x38, 0x3b, 0x3d,
-    0x42, 0x41, 0x44, 0x4a, 0x48, 0x42, 0x2f, 0x4d, 0x31, 0x34, 0x3a, 0x46,
-    0x37, 0x44, 0x2c, 0x45, 0x46, 0x43, 0x40, 0x3f, 0x34, 0x33, 0x40, 0x39,
-    0x32, 0x35, 0x3a, 0x40, 0x3f, 0x3f, 0x36, 0x32, 0x3f, 0x3d, 0x35, 0x48,
-    0x3c, 0x48, 0x37, 0x39, 0x35, 0x3f, 0x37, 0x3d, 0x44, 0x46, 0x2d, 0x2a,
-    0x47, 0x38, 0x3a, 0x39, 0x45, 0x3b, 0x40, 0x2d, 0x37, 0x33, 0x41, 0x3c,
-    0x40, 0x35, 0x3f, 0x32, 0x3a, 0x36, 0x40, 0x41, 0x3a, 0x3c, 0x33, 0x31,
-    0x42, 0x3f, 0x41, 0x3a, 0x41, 0x46, 0x38, 0x2f, 0x3c, 0x3d, 0x3d, 0x39,
-    0x3b, 0x46, 0x41, 0x31, 0x46, 0x36, 0x40, 0x48, 0x3c, 0x33, 0x42, 0x32,
-    0x3b, 0x40, 0x3f, 0x36, 0x37, 0x44, 0x34, 0x35, 0x32, 0x32, 0x37, 0x38,
-    0x33, 0x3b, 0x37, 0x4a, 0x3f, 0x46, 0x3a, 0x41, 0x32, 0x37, 0x30, 0x3e,
-    0x40, 0x35, 0x41, 0x40, 0x37, 0x41, 0x2b, 0x40, 0x3d, 0x3d, 0x32, 0x38,
-    0x34, 0x3e, 0x47, 0x61, 0x43, 0x3b, 0x3c, 0x42, 0x46, 0x3d, 0x40, 0x4a,
-    0x3c, 0x2d, 0x33, 0x35, 0x55, 0x38, 0x69, 0x4f, 0x33, 0x37, 0x30, 0x39,
-    0x44, 0x2e, 0x58, 0x4b, 0x2a, 0x51, 0x4b, 0x3c, 0x39, 0x2e, 0x51, 0x2d,
-    0x30, 0x4a, 0x42, 0x53, 0x3f, 0x39, 0x3e, 0x44, 0x3b, 0x40, 0x47, 0x44,
-    0x47, 0x3e, 0x39, 0x4b, 0x40, 0x3d, 0x42, 0x39, 0x3b, 0x39, 0x32, 0x42,
-    0x36, 0x36, 0x36, 0x42, 0x44, 0x34, 0x33, 0x40, 0x40, 0x40, 0x3a, 0x3a,
-    0x41, 0x3f, 0x31, 0x30, 0x3f, 0x31, 0x30, 0x39, 0x46, 0x36, 0x35, 0x34,
-    0x40, 0x43, 0x3c, 0x41, 0x31, 0x46, 0x35, 0x26, 0x44, 0x32, 0x3d, 0x35,
-    0x3d, 0x3c, 0x36, 0x32, 0x39, 0x3a, 0x30, 0x40, 0x48, 0x3e, 0x38, 0x37,
-    0x44, 0x3b, 0x3d, 0x42, 0x3d, 0x3c, 0x32, 0x2b, 0x3f, 0x41, 0x39, 0x3d,
-    0x3e, 0x3f, 0x35, 0x2f, 0x46, 0x3d, 0x3d, 0x3b, 0x45, 0x37, 0x31, 0x35,
-    0x44, 0x40, 0x3a, 0x45, 0x3a, 0x3c, 0x39, 0x31, 0x3b, 0x3d, 0x3b, 0x3a,
-    0x43, 0x44, 0x39, 0x47, 0x44, 0x36, 0x3e, 0x39, 0x48, 0x3f, 0x39, 0x4b,
-    0x3c, 0x36, 0x3d, 0x44, 0x44, 0x3f, 0x39, 0x43, 0x3f, 0x37, 0x3f, 0x37,
-    0x3b, 0x3b, 0x38, 0x3b, 0x3f, 0x40, 0x31, 0x44, 0x30, 0x44, 0x46, 0x5b,
-    0x46, 0x3f, 0x39, 0x40, 0x40, 0x37, 0x4a, 0x46, 0x3f, 0x36, 0x40, 0x39,
-    0x59, 0x3e, 0x66, 0x57, 0x32, 0x34, 0x2e, 0x33, 0x46, 0x31, 0x58, 0x44,
-    0x26, 0x4c, 0x4b, 0x3c, 0x39, 0x2e, 0x4d, 0x35, 0x32, 0x46, 0x52, 0x52,
-    0x3e, 0x40, 0x39, 0x3c, 0x39, 0x3d, 0x53, 0x48, 0x41, 0x3c, 0x3b, 0x4d,
-    0x3c, 0x3e, 0x38, 0x44, 0x3a, 0x3a, 0x29, 0x4a, 0x3c, 0x37, 0x36, 0x38,
-    0x3a, 0x31, 0x37, 0x39, 0x3a, 0x40, 0x46, 0x32, 0x42, 0x38, 0x32, 0x2e,
-    0x3a, 0x45, 0x44, 0x34, 0x34, 0x38, 0x32, 0x2e, 0x35, 0x40, 0x3a, 0x41,
-    0x42, 0x3d, 0x37, 0x2c, 0x3f, 0x37, 0x3c, 0x3d, 0x3a, 0x36, 0x33, 0x35,
-    0x3c, 0x34, 0x3c, 0x39, 0x3c, 0x3a, 0x37, 0x30, 0x30, 0x3e, 0x3d, 0x3a,
-    0x44, 0x37, 0x36, 0x32, 0x36, 0x37, 0x36, 0x3a, 0x3c, 0x41, 0x3a, 0x35,
-    0x36, 0x3a, 0x34, 0x40, 0x39, 0x40, 0x3e, 0x32, 0x34, 0x46, 0x33, 0x3f,
-    0x36, 0x45, 0x3e, 0x35, 0x3f, 0x38, 0x3f, 0x3e, 0x3b, 0x3a, 0x36, 0x3b,
-    0x36, 0x38, 0x32, 0x3f, 0x44, 0x3c, 0x35, 0x48, 0x38, 0x39, 0x31, 0x49,
-    0x3d, 0x43, 0x36, 0x3f, 0x31, 0x43, 0x36, 0x3e, 0x3e, 0x41, 0x39, 0x3b,
-    0x40, 0x42, 0x3c, 0x43, 0x36, 0x4a, 0x48, 0x67, 0x4e, 0x43, 0x36, 0x46,
-    0x44, 0x3f, 0x4b, 0x4b, 0x3f, 0x38, 0x3c, 0x3c, 0x5e, 0x38, 0x70, 0x52,
-    0x38, 0x32, 0x3b, 0x36, 0x4a, 0x2c, 0x52, 0x46, 0x29, 0x4f, 0x48, 0x42,
-    0x2d, 0x2e, 0x4f, 0x28, 0x28, 0x45, 0x4d, 0x52, 0x42, 0x3e, 0x3f, 0x41,
-    0x3c, 0x3a, 0x47, 0x50, 0x44, 0x45, 0x33, 0x4b, 0x3e, 0x3f, 0x42, 0x3d,
-    0x43, 0x34, 0x27, 0x3f, 0x42, 0x3e, 0x43, 0x3e, 0x3a, 0x3c, 0x37, 0x3b,
-    0x3f, 0x30, 0x3a, 0x3e, 0x3c, 0x34, 0x37, 0x24, 0x3d, 0x43, 0x40, 0x44,
-    0x40, 0x46, 0x31, 0x2f, 0x43, 0x38, 0x38, 0x39, 0x3c, 0x34, 0x2d, 0x2a,
-    0x38, 0x31, 0x43, 0x3b, 0x39, 0x3b, 0x32, 0x34, 0x3e, 0x39, 0x41, 0x3b,
-    0x3e, 0x33, 0x3a, 0x2a, 0x41, 0x3f, 0x3c, 0x43, 0x3b, 0x3e, 0x35, 0x2c,
-    0x38, 0x41, 0x33, 0x31, 0x3e, 0x3f, 0x3a, 0x3c, 0x3b, 0x35, 0x3f, 0x3d,
-    0x42, 0x3a, 0x3c, 0x35, 0x3f, 0x40, 0x3c, 0x3e, 0x37, 0x41, 0x3d, 0x38,
-    0x34, 0x31, 0x36, 0x3d, 0x3d, 0x47, 0x36, 0x44, 0x3f, 0x45, 0x3c, 0x3c,
-    0x35, 0x36, 0x31, 0x4f, 0x46, 0x3a, 0x41, 0x42, 0x40, 0x32, 0x33, 0x41,
-    0x34, 0x40, 0x3d, 0x43, 0x3b, 0x3a, 0x32, 0x3c, 0x42, 0x42, 0x3d, 0x43,
-    0x37, 0x45, 0x45, 0xff, 0x4b, 0x45, 0x3b, 0x40, 0x43, 0x3e, 0x47, 0x49,
-    0x3d, 0x3b, 0x3e, 0x33, 0x58, 0x35, 0x71, 0x54, 0x2f, 0x38, 0x38, 0x33,
-    0x47, 0x35, 0x5b, 0x46, 0x2c, 0x4c, 0x43, 0x37, 0x36, 0x39, 0x4f, 0x30,
-    0x26, 0x48, 0x51, 0x48, 0x46, 0x45, 0x3b, 0x39, 0x42, 0x50, 0x47, 0x4c,
-    0x4b, 0x3b, 0x3d, 0x4d, 0x41, 0x34, 0x40, 0x44, 0x38, 0x32, 0x2d, 0x43,
-    0x39, 0x36, 0x3b, 0x3b, 0x40, 0x3d, 0x37, 0x3c, 0x44, 0x39, 0x42, 0x37,
-    0x38, 0x38, 0x32, 0x2f, 0x41, 0x40, 0x3f, 0x3a, 0x37, 0x35, 0x3b, 0x2a,
-    0x37, 0x30, 0x3c, 0x37, 0x40, 0x38, 0x3a, 0x27, 0x44, 0x3d, 0x43, 0x40,
-    0x35, 0x3f, 0x3e, 0x32, 0x3e, 0x3c, 0x40, 0x39, 0x39, 0x3a, 0x41, 0x31,
-    0x3b, 0x3f, 0x34, 0x43, 0x3a, 0x38, 0x42, 0x2a, 0x47, 0x46, 0x3b, 0x38,
-    0x47, 0x45, 0x39, 0x31, 0x43, 0x40, 0x37, 0x3a, 0x3d, 0x3e, 0x39, 0x30,
-    0x36, 0x37, 0x3a, 0x43, 0x3f, 0x32, 0x31, 0x41, 0x45, 0x3e, 0x43, 0x38,
-    0x3f, 0x37, 0x3c, 0x49, 0x3b, 0x33, 0x3d, 0x3a, 0x37, 0x44, 0x32, 0x50,
-    0x39, 0x44, 0x3e, 0x3f, 0x3d, 0x41, 0x3e, 0x3e, 0x42, 0x44, 0x45, 0x3f,
-    0x36, 0x3f, 0x37, 0x39, 0x3b, 0x3d, 0x3b, 0x3b, 0x2f, 0x46, 0x40, 0x6d,
-    0x50, 0x45, 0x3b, 0x45, 0x46, 0x3b, 0x42, 0x48, 0x42, 0x3c, 0x39, 0x37,
-    0x57, 0x3b, 0x6c, 0x5b, 0x32, 0x35, 0x3d, 0x39, 0x48, 0x31, 0x5c, 0x46,
-    0x29, 0x4c, 0x3f, 0x3e, 0x37, 0x33, 0x58, 0x32, 0x2a, 0x43, 0x4c, 0x50,
-    0x3b, 0x44, 0x3c, 0x41, 0x39, 0x48, 0x55, 0x4c, 0x42, 0x38, 0x3b, 0x51,
-    0x3f, 0x38, 0x44, 0x46, 0x36, 0x3b, 0x38, 0x4a, 0x3f, 0x37, 0x36, 0x3c,
-    0x31, 0x3d, 0x32, 0x39, 0x3b, 0x3f, 0x3e, 0x35, 0x38, 0x3f, 0x34, 0x2b,
-    0x37, 0x36, 0x39, 0x40, 0x37, 0x41, 0x32, 0x27, 0x36, 0x33, 0x40, 0x3a,
-    0x3f, 0x44, 0x3f, 0x25, 0x38, 0x34, 0x42, 0x3c, 0x3a, 0x40, 0x38, 0x31,
-    0x49, 0x3e, 0x33, 0x3d, 0x31, 0x36, 0x39, 0x2b, 0x44, 0x2f, 0x43, 0x34,
-    0x34, 0x37, 0x39, 0x33, 0x3b, 0x34, 0x42, 0x3c, 0x40, 0x45, 0x36, 0x31,
-    0x43, 0x47, 0x3e, 0x3f, 0x40, 0x3a, 0x33, 0x34, 0x41, 0x44, 0x3a, 0x43,
-    0x3e, 0x38, 0x36, 0x31, 0x42, 0x44, 0x40, 0x41, 0x44, 0x43, 0x33, 0x42,
-    0x3d, 0x41, 0x3d, 0x3e, 0x3c, 0x39, 0x3e, 0x4f, 0x3f, 0x37, 0x31, 0x40,
-    0x3b, 0x38, 0x35, 0x3b, 0x44, 0x41, 0x41, 0x37, 0x40, 0x42, 0x2d, 0x3d,
-    0x39, 0x48, 0x44, 0x3e, 0x34, 0x48, 0x49, 0x6d, 0x45, 0x4b, 0x3a, 0x44,
-    0x49, 0x40, 0x4d, 0x51, 0x3f, 0x34, 0x3b, 0x40, 0x52, 0x34, 0x6f, 0x56,
-    0x33, 0x3e, 0x40, 0x39, 0x41, 0x32, 0x5d, 0x45, 0x2e, 0x51, 0x48, 0x3c,
-    0x2e, 0x2e, 0x51, 0x39, 0x32, 0x45, 0x4a, 0x4c, 0x3b, 0x40, 0x40, 0x3b,
-    0x36, 0x41, 0x54, 0x4e, 0x4a, 0x49, 0x3b, 0x4d, 0x3c, 0x41, 0x38, 0x47,
-    0x3d, 0x3c, 0x37, 0x48, 0x3f, 0x42, 0x3e, 0x36, 0x39, 0x46, 0x37, 0x3e,
-    0x3b, 0x38, 0x40, 0x3b, 0x39, 0x32, 0x3e, 0x29, 0x37, 0x35, 0x3c, 0x3d,
-    0x37, 0x3b, 0x35, 0x2f, 0x32, 0x3b, 0x37, 0x3c, 0x40, 0x3e, 0x39, 0x27,
-    0x3b, 0x38, 0x37, 0x36, 0x39, 0x37, 0x37, 0x35, 0x42, 0x3e, 0x3b, 0x43,
-    0x41, 0x3c, 0x37, 0x2a, 0x3a, 0x3e, 0x38, 0x40, 0x36, 0x3e, 0x44, 0x2e,
-    0x3e, 0x3a, 0x37, 0x3b, 0x3e, 0x41, 0x3d, 0x30, 0x3b, 0x3f, 0x41, 0x45,
-    0x3a, 0x48, 0x37, 0x2f, 0x3a, 0x37, 0x34, 0x43, 0x42, 0x3d, 0x38, 0x41,
-    0x3b, 0x3c, 0x39, 0x3c, 0x39, 0x47, 0x2e, 0x41, 0x42, 0x40, 0x32, 0x36,
-    0x43, 0x40, 0x3d, 0x4c, 0x38, 0x3e, 0x3b, 0x41, 0x3d, 0x3b, 0x34, 0x43,
-    0x43, 0x3f, 0x44, 0x3c, 0x3a, 0x33, 0x39, 0x42, 0x43, 0x3f, 0x33, 0x3d,
-    0x33, 0x3e, 0x48, 0x6b, 0x48, 0x43, 0x36, 0x47, 0x49, 0x44, 0x4a, 0x49,
-    0x3c, 0x31, 0x35, 0x3e, 0x5c, 0x34, 0x73, 0x53, 0x33, 0x3c, 0x32, 0x3b,
-    0x43, 0x27, 0x59, 0x4e, 0x2b, 0x51, 0x4f, 0x37, 0x36, 0x34, 0x56, 0x34,
-    0x32, 0x4f, 0x46, 0x50, 0x40, 0x40, 0x3c, 0x3e, 0x34, 0x37, 0x50, 0x49,
-    0x43, 0x47, 0x3e, 0x52, 0x44, 0x38, 0x3b, 0x4f, 0x3a, 0x3d, 0x2b, 0x4c,
-    0x40, 0x38, 0x3a, 0x35, 0x3a, 0x3a, 0x3d, 0x38, 0x3d, 0x3b, 0x37, 0x48,
-    0x3d, 0x3d, 0x32, 0x30, 0x3a, 0x34, 0x3f, 0x3a, 0x3b, 0x3e, 0x35, 0x2f,
-    0x3b, 0x3a, 0x45, 0x3d, 0x42, 0x33, 0x33, 0x24, 0x44, 0x39, 0x3c, 0x3d,
-    0x41, 0x3c, 0x37, 0x2c, 0x3b, 0x36, 0x34, 0x41, 0x3d, 0x3f, 0x39, 0x32,
-    0x3c, 0x40, 0x44, 0x3d, 0x41, 0x3d, 0x3a, 0x29, 0x3e, 0x3e, 0x43, 0x33,
-    0x3f, 0x3e, 0x3e, 0x31, 0x38, 0x3a, 0x34, 0x3d, 0x3f, 0x3e, 0x3a, 0x3d,
-    0x3e, 0x48, 0x45, 0x3d, 0x44, 0x37, 0x33, 0x3d, 0x45, 0x39, 0x40, 0x40,
-    0x42, 0x3f, 0x3f, 0x3d, 0x3a, 0x3b, 0x41, 0x33, 0x41, 0x3c, 0x32, 0x55,
-    0x43, 0x3a, 0x32, 0x40, 0x3c, 0x3e, 0x40, 0x43, 0x37, 0x3f, 0x40, 0x38,
-    0x43, 0x41, 0x36, 0x42, 0x44, 0x3c, 0x32, 0x3f, 0x38, 0x42, 0x46, 0x59,
-    0x4c, 0x41, 0x39, 0x47, 0x46, 0x46, 0x44, 0x44, 0x35, 0x42, 0x32, 0x39,
-    0x4f, 0x34, 0x6d, 0x55, 0x31, 0x3b, 0x3a, 0x3f, 0x44, 0x2c, 0x5d, 0x43,
-    0x26, 0x4a, 0x4f, 0x40, 0x36, 0x32, 0x4d, 0x33, 0x2f, 0x50, 0x4d, 0x57,
-    0x3b, 0x40, 0x42, 0x44, 0x41, 0x3f, 0x52, 0x4e, 0x35, 0x41, 0x44, 0x52,
-    0x40, 0x35, 0x39, 0x4b, 0x45, 0x34, 0x2c, 0x4a, 0x3b, 0x41, 0x31, 0x33,
-    0x3f, 0x3a, 0x36, 0x3c, 0x3c, 0x33, 0x30, 0x38, 0x43, 0x3f, 0x32, 0x2d,
-    0x3f, 0x3a, 0x38, 0x41, 0x39, 0x45, 0x36, 0x2e, 0x3c, 0x38, 0x45, 0x3f,
-    0x40, 0x3f, 0x3e, 0x26, 0x41, 0x37, 0x3c, 0x44, 0x3f, 0x3f, 0x35, 0x37,
-    0x46, 0x34, 0x37, 0x3e, 0x48, 0x38, 0x36, 0x34, 0x33, 0x39, 0x40, 0x3c,
-    0x42, 0x3d, 0x3b, 0x31, 0x38, 0x3b, 0x44, 0x42, 0x45, 0x38, 0x41, 0x30,
-    0x3d, 0x42, 0x36, 0x3f, 0x3b, 0x45, 0x37, 0x32, 0x3c, 0x37, 0x3d, 0x42,
-    0x38, 0x3d, 0x2f, 0x31, 0x39, 0x40, 0x3f, 0x44, 0x3a, 0x41, 0x44, 0x46,
-    0x3d, 0x3a, 0x32, 0x3b, 0x34, 0x47, 0x36, 0x4c, 0x47, 0x35, 0x3c, 0x33,
-    0x3b, 0x3c, 0x30, 0x43, 0x43, 0x3f, 0x31, 0x40, 0x3a, 0x37, 0x30, 0x46,
-    0x39, 0x3b, 0x42, 0x40, 0x2d, 0x3f, 0x3e, 0x6a, 0x50, 0x3b, 0x31, 0x54,
-    0x47, 0x3d, 0x48, 0x4e, 0x3b, 0x41, 0x3a, 0x39, 0x49, 0x36, 0x64, 0x4e,
-    0x32, 0x39, 0x3d, 0x37, 0x42, 0x2c, 0x5c, 0x43, 0x2a, 0x4b, 0x4b, 0x46,
-    0x30, 0x29, 0x52, 0x31, 0x35, 0x44, 0x4a, 0x4b, 0x3d, 0x3b, 0x4e, 0x42,
-    0x3d, 0x39, 0x42, 0x52, 0x3f, 0x36, 0x3e, 0x50, 0x3f, 0x32, 0x35, 0x3a,
-    0x40, 0x39, 0x35, 0x48, 0x3b, 0x3e, 0x41, 0x43, 0x43, 0x45, 0x2f, 0x36,
-    0x38, 0x34, 0x3f, 0x44, 0x32, 0x3f, 0x37, 0x33, 0x33, 0x35, 0x2e, 0x41,
-    0x37, 0x3e, 0x38, 0x28, 0x49, 0x30, 0x46, 0x39, 0x3b, 0x30, 0x38, 0x28,
-    0x3b, 0x3d, 0x3a, 0x43, 0x3f, 0x34, 0x43, 0x36, 0x39, 0x3c, 0x3e, 0x3e,
-    0x39, 0x3b, 0x39, 0x32, 0x3c, 0x36, 0x3e, 0x38, 0x34, 0x3c, 0x3a, 0x2a,
-    0x46, 0x3d, 0x40, 0x37, 0x3b, 0x39, 0x3b, 0x34, 0x38, 0x31, 0x43, 0x46,
-    0x3b, 0x43, 0x39, 0x2b, 0x38, 0x40, 0x3e, 0x39, 0x35, 0x3d, 0x2c, 0x36,
-    0x37, 0x40, 0x36, 0x40, 0x41, 0x38, 0x32, 0x3f, 0x36, 0x46, 0x34, 0x31,
-    0x40, 0x3e, 0x3c, 0x4e, 0x42, 0x3d, 0x36, 0x3f, 0x42, 0x3f, 0x33, 0x40,
-    0x34, 0x37, 0x3c, 0x3b, 0x31, 0x47, 0x32, 0x3c, 0x34, 0x3d, 0x42, 0x3b,
-    0x37, 0x41, 0x3b, 0x64, 0x52, 0x40, 0x36, 0x4e, 0x46, 0x3f, 0x3f, 0x47,
-    0x3c, 0x3a, 0x3a, 0x41, 0x4a, 0x32, 0x5e, 0x50, 0x2d, 0x39, 0x3a, 0x38,
-    0x3d, 0x2c, 0x5a, 0x3e, 0x2e, 0x47, 0x3e, 0x3e, 0x33, 0x29, 0x4c, 0x35,
-    0x30, 0x4d, 0x4d, 0x4d, 0x38, 0x42, 0x51, 0x47, 0x39, 0x3c, 0x43, 0x4b,
-    0x42, 0x3f, 0x3a, 0x4b, 0x44, 0x3f, 0x3a, 0x44, 0x3e, 0x37, 0x30, 0x45,
-    0x3d, 0x36, 0x34, 0x3f, 0x36, 0x35, 0x37, 0x36, 0x43, 0x3b, 0x37, 0x3e,
-    0x35, 0x3e, 0x32, 0x34, 0x32, 0x38, 0x3c, 0x3a, 0x3a, 0x3c, 0x30, 0x2b,
-    0x31, 0x37, 0x30, 0x42, 0x36, 0x37, 0x36, 0x2c, 0x3c, 0x31, 0x41, 0x37,
-    0x44, 0x41, 0x3b, 0x37, 0x41, 0x3f, 0x38, 0x3b, 0x3a, 0x3a, 0x3c, 0x2f,
-    0x47, 0x41, 0x3e, 0x33, 0x42, 0x3a, 0x32, 0x34, 0x44, 0x40, 0x43, 0x3d,
-    0x34, 0x41, 0x38, 0x35, 0x35, 0x3b, 0x45, 0x38, 0x32, 0x37, 0x3c, 0x2e,
-    0x39, 0x40, 0x30, 0x3e, 0x42, 0x35, 0x3d, 0x36, 0x3e, 0x3d, 0x39, 0x46,
-    0x3f, 0x36, 0x37, 0x49, 0x41, 0x39, 0x3d, 0x3d, 0x33, 0x44, 0x42, 0x50,
-    0x3d, 0x3c, 0x3e, 0x3f, 0x42, 0x42, 0x3b, 0x3d, 0x41, 0x31, 0x39, 0x3a,
-    0x44, 0x34, 0x38, 0x47, 0x44, 0x38, 0x3b, 0x42, 0x30, 0x42, 0x44, 0x57,
-    0x49, 0x3a, 0x39, 0x4f, 0x41, 0x3e, 0x40, 0x43, 0x37, 0x42, 0x3b, 0x48,
-    0x50, 0x29, 0x5b, 0x44, 0x2c, 0x40, 0x3f, 0x3c, 0x46, 0x34, 0x5c, 0x41,
-    0x2c, 0x48, 0x46, 0x46, 0x35, 0x32, 0x4c, 0x35, 0x2f, 0x3b, 0x48, 0x44,
-    0x41, 0x41, 0x49, 0x45, 0x34, 0x37, 0x44, 0x45, 0x43, 0x3b, 0x42, 0x44,
-    0x3a, 0x37, 0x48, 0x49, 0x34, 0x39, 0x33, 0x4a, 0x40, 0x3d, 0x33, 0x39,
-    0x39, 0x3b, 0x30, 0x31, 0x3d, 0x47, 0x3c, 0x3a, 0x34, 0x3c, 0x3a, 0x2b,
-    0x3a, 0x34, 0x41, 0x40, 0x42, 0x36, 0x44, 0x2c, 0x40, 0x47, 0x3b, 0x37,
-    0x38, 0x42, 0x44, 0x29, 0x36, 0x3d, 0x3d, 0x36, 0x42, 0x3b, 0x35, 0x36,
-    0x43, 0x39, 0x41, 0x3d, 0x45, 0x41, 0x31, 0x32, 0x40, 0x3d, 0x3c, 0x41,
-    0x3e, 0x3d, 0x35, 0x34, 0x32, 0x38, 0x36, 0x3f, 0x3b, 0x3d, 0x39, 0x36,
-    0x40, 0x3e, 0x3d, 0x3a, 0x3a, 0x3b, 0x3c, 0x32, 0x40, 0x34, 0x3a, 0x36,
-    0x42, 0x47, 0x3e, 0x33, 0x3a, 0x44, 0x30, 0x39, 0x40, 0x3a, 0x36, 0x44,
-    0x3c, 0x3b, 0x3f, 0x33, 0x3e, 0x3c, 0x35, 0x53, 0x43, 0x3c, 0x3f, 0x43,
-    0x3d, 0x44, 0x33, 0x47, 0x42, 0x40, 0x37, 0x3b, 0x43, 0x3f, 0x33, 0x41,
-    0x38, 0x42, 0x44, 0x3d, 0x2d, 0x3f, 0x46, 0x49, 0x4e, 0x3f, 0x36, 0x45,
-    0x45, 0x39, 0x40, 0x42, 0x39, 0x39, 0x3a, 0x42, 0x45, 0x2c, 0x61, 0x44,
-    0x30, 0x45, 0x38, 0x3a, 0x40, 0x37, 0x58, 0x39, 0x31, 0x3e, 0x3a, 0x3e,
-    0x37, 0x32, 0x4a, 0x39, 0x2e, 0x47, 0x3e, 0x4e, 0x3f, 0x3e, 0x48, 0x45,
-    0x3f, 0x48, 0x3a, 0x3f, 0x40, 0x36, 0x3a, 0x44, 0x36, 0x3e, 0x3d, 0x41,
-    0x45, 0x36, 0x36, 0x4b, 0x3a, 0x3d, 0x45, 0x48, 0x38, 0x45, 0x39, 0x38,
-    0x38, 0x3a, 0x42, 0x34, 0x3f, 0x34, 0x39, 0x34, 0x32, 0x3f, 0x3c, 0x3d,
-    0x3d, 0x47, 0x3a, 0x2f, 0x3c, 0x3e, 0x3f, 0x39, 0x35, 0x42, 0x3c, 0x2a,
-    0x3b, 0x35, 0x42, 0x44, 0x46, 0x39, 0x38, 0x39, 0x43, 0x3a, 0x38, 0x42,
-    0x3d, 0x3a, 0x40, 0x35, 0x34, 0x39, 0x3a, 0x38, 0x43, 0x42, 0x42, 0x2d,
-    0x31, 0x3b, 0x33, 0x40, 0x3b, 0x47, 0x35, 0x30, 0x3a, 0x3c, 0x3b, 0x47,
-    0x3a, 0x3c, 0x38, 0x35, 0x3c, 0x35, 0x3e, 0x3e, 0x39, 0x3d, 0x39, 0x40,
-    0x37, 0x33, 0x49, 0x38, 0x3c, 0x43, 0x34, 0x40, 0x39, 0x42, 0x3c, 0x3b,
-    0x3e, 0x45, 0x3e, 0x51, 0x3d, 0x3f, 0x3b, 0x34, 0x37, 0x3c, 0x40, 0x47,
-    0x3c, 0x41, 0x3f, 0x41, 0x37, 0x3e, 0x36, 0x3c, 0x42, 0x40, 0x3f, 0x3a,
-    0x3b, 0x42, 0x44, 0x4b, 0x4b, 0x37, 0x41, 0x4d, 0x41, 0x45, 0x40, 0x41,
-    0x40, 0x38, 0x37, 0x40, 0x42, 0x2c, 0x57, 0x43, 0x2d, 0x49, 0x3a, 0x3e,
-    0x37, 0x2f, 0x52, 0x37, 0x31, 0x42, 0x3b, 0x3f, 0x39, 0x38, 0x48, 0x3c,
-    0x37, 0x3d, 0x3a, 0x39, 0x3a, 0x45, 0x4b, 0x49, 0x3e, 0x44, 0x48, 0x49,
-    0x3d, 0x39, 0x3c, 0x41, 0x41, 0x38, 0x45, 0x38, 0x33, 0x3d, 0x37, 0x47,
-    0x34, 0x3f, 0x3b, 0x3d, 0x39, 0x34, 0x30, 0x39, 0x44, 0x36, 0x34, 0x3c,
-    0x37, 0x38, 0x45, 0x34, 0x40, 0x33, 0x41, 0x3a, 0x3e, 0x3c, 0x3b, 0x3a,
-    0x40, 0x3f, 0x3b, 0x3d, 0x3b, 0x46, 0x41, 0x2a, 0x3a, 0x3c, 0x42, 0x46,
-    0x33, 0x3f, 0x2d, 0x3a, 0x45, 0x45, 0x38, 0x3b, 0x44, 0x34, 0x35, 0x3f,
-    0x34, 0x43, 0x38, 0x3e, 0x41, 0x3b, 0x42, 0x38, 0x3d, 0x3f, 0x38, 0x45,
-    0x3b, 0x35, 0x39, 0x3c, 0x43, 0x43, 0x38, 0x34, 0x44, 0x43, 0x2e, 0x39,
-    0x39, 0x40, 0x39, 0x41, 0x41, 0x34, 0x3e, 0x44, 0x3d, 0x43, 0x3a, 0x3a,
-    0x3b, 0x3b, 0x36, 0x45, 0x3c, 0x43, 0x3d, 0x48, 0x36, 0x36, 0x39, 0x55,
-    0x35, 0x40, 0x3e, 0x49, 0x40, 0x3a, 0x3d, 0x3d, 0x34, 0x47, 0x40, 0x41,
-    0x40, 0x47, 0x39, 0x3e, 0x3b, 0x38, 0x3c, 0x3a, 0x35, 0x3e, 0x41, 0x4a,
-    0x4b, 0x3f, 0x36, 0x3d, 0x40, 0x3c, 0x39, 0x32, 0x33, 0x36, 0x30, 0x42,
-    0x42, 0x36, 0x54, 0x48, 0x2e, 0x4c, 0x34, 0x3c, 0x39, 0x36, 0x4e, 0x37,
-    0x2f, 0x3e, 0x30, 0x3d, 0x36, 0x3b, 0x45, 0x36, 0x37, 0x3e, 0x41, 0x4b,
-    0x3b, 0x36, 0x45, 0x3b, 0x38, 0x45, 0x3e, 0x43, 0x48, 0x46, 0x44, 0x44,
-    0x3e, 0x3b, 0x37, 0x3b, 0x3a, 0x3f, 0x3d, 0x44, 0x39, 0x38, 0x45, 0x43,
-    0x3d, 0x35, 0x39, 0x2c, 0x44, 0x41, 0x36, 0x40, 0x3d, 0x39, 0x3d, 0x2f,
-    0x3d, 0x39, 0x42, 0x3d, 0x36, 0x46, 0x43, 0x2c, 0x41, 0x3a, 0x30, 0x45,
-    0x3f, 0x41, 0x35, 0x2b, 0x3b, 0x38, 0x3a, 0x44, 0x32, 0x32, 0x39, 0x3c,
-    0x3a, 0x3a, 0x3c, 0x3a, 0x35, 0x40, 0x3b, 0x31, 0x36, 0x33, 0x35, 0x34,
-    0x3c, 0x3b, 0x3d, 0x36, 0x48, 0x3b, 0x3f, 0x42, 0x3e, 0x33, 0x2f, 0x3a,
-    0x49, 0x41, 0x39, 0x3e, 0x3c, 0x44, 0x3c, 0x39, 0x33, 0x39, 0x36, 0x35,
-    0x3d, 0x42, 0x34, 0x3e, 0x38, 0x45, 0x40, 0x45, 0x3d, 0x48, 0x42, 0x4a,
-    0x3f, 0x45, 0x38, 0x42, 0x44, 0x40, 0x34, 0x49, 0x44, 0x3d, 0x3a, 0x39,
-    0x3e, 0x3a, 0x42, 0x3e, 0x48, 0x42, 0x3e, 0x3a, 0x3f, 0x3f, 0x32, 0x3b,
-    0x38, 0x41, 0x3c, 0x39, 0x33, 0x45, 0x44, 0x3c, 0x48, 0x41, 0x41, 0x3d,
-    0x3a, 0x3c, 0x37, 0x33, 0x41, 0x3f, 0x38, 0x3a, 0x3f, 0x37, 0x51, 0x3c,
-    0x37, 0x3a, 0x43, 0x37, 0x40, 0x31, 0x4f, 0x34, 0x3b, 0x44, 0x45, 0x39,
-    0x40, 0x33, 0x49, 0x33, 0x3e, 0x35, 0x44, 0x3d, 0x3b, 0x3f, 0x43, 0x41,
-    0x43, 0x43, 0x48, 0x44, 0x46, 0x3b, 0x43, 0x3f, 0x3c, 0x3f, 0x3e, 0x3d,
-    0x3b, 0x41, 0x3c, 0x43, 0x30, 0x34, 0x39, 0x33, 0x3f, 0x38, 0x36, 0x2e,
-    0x33, 0x3f, 0x3c, 0x40, 0x3d, 0x3b, 0x3b, 0x31, 0x36, 0x41, 0x3b, 0x38,
-    0x46, 0x36, 0x34, 0x31, 0x42, 0x44, 0x33, 0x35, 0x3f, 0x36, 0x3c, 0x30,
-    0x3f, 0x31, 0x39, 0x3e, 0x3f, 0x47, 0x3e, 0x34, 0x36, 0x36, 0x34, 0x39,
-    0x37, 0x46, 0x40, 0x33, 0x3b, 0x3a, 0x3f, 0x41, 0x37, 0x44, 0x3a, 0x3f,
-    0x34, 0x45, 0x37, 0x33, 0x3f, 0x47, 0x41, 0x36, 0x39, 0x3e, 0x40, 0x38,
-    0x41, 0x3d, 0x3d, 0x36, 0x40, 0x3a, 0x3b, 0x3b, 0x41, 0x3b, 0x3a, 0x3f,
-    0x3f, 0x3b, 0x35, 0x42, 0x46, 0x3a, 0x30, 0x45, 0x40, 0x37, 0x39, 0x39,
-    0x3d, 0x38, 0x3f, 0x45, 0x3f, 0x31, 0x32, 0x3b, 0x35, 0x3e, 0x3b, 0x38,
-    0x3b, 0x44, 0x37, 0x39, 0x37, 0x42, 0x3f, 0x44, 0x38, 0x36, 0x37, 0x44,
-    0x45, 0x46, 0x41, 0x3b, 0x46, 0x42, 0x43, 0x43, 0x3a, 0x4b, 0x37, 0x35,
-    0x3b, 0x40, 0x32, 0x38, 0x41, 0x38, 0x4f, 0x3e, 0x36, 0x3f, 0x47, 0x3b,
-    0x47, 0x3b, 0x4a, 0x2e, 0x3d, 0x45, 0x3b, 0x46, 0x3e, 0x38, 0x43, 0x38,
-    0x41, 0x48, 0x3a, 0x39, 0x40, 0x45, 0x3b, 0x43, 0x40, 0x3e, 0x43, 0x41,
-    0x41, 0x3e, 0x39, 0x3f, 0x35, 0x42, 0x33, 0x3f, 0x3d, 0x32, 0x45, 0x3c,
-    0x41, 0x31, 0x45, 0x38, 0x43, 0x45, 0x41, 0x35, 0x35, 0x40, 0x44, 0x36,
-    0x3a, 0x3b, 0x3c, 0x2c, 0x3e, 0x41, 0x33, 0x3d, 0x46, 0x34, 0x3b, 0x30,
-    0x30, 0x42, 0x43, 0x3d, 0x3d, 0x3d, 0x43, 0x31, 0x3f, 0x40, 0x3a, 0x3f,
-    0x48, 0x3e, 0x3b, 0x39, 0x44, 0x43, 0x3b, 0x3a, 0x42, 0x38, 0x38, 0x3b,
-    0x3f, 0x44, 0x37, 0x3e, 0x45, 0x40, 0x41, 0x3b, 0x3c, 0x3a, 0x38, 0x37,
-    0x3b, 0x33, 0x3f, 0x35, 0x43, 0x3d, 0x33, 0x41, 0x3b, 0x46, 0x39, 0x32,
-    0x39, 0x3f, 0x3b, 0x39, 0x47, 0x3c, 0x3f, 0x39, 0x34, 0x3d, 0x3c, 0x46,
-    0x3f, 0x3e, 0x3e, 0x44, 0x34, 0x40, 0x3f, 0x39, 0x3c, 0x38, 0x36, 0x45,
-    0x42, 0x46, 0x3b, 0x44, 0x3a, 0x3d, 0x3b, 0x42, 0x3b, 0x3b, 0x3c, 0x45,
-    0x42, 0x3d, 0x36, 0x37, 0x3d, 0x43, 0x3f, 0x48, 0xa6, 0xfb, 0xff, 0xff,
-    0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xb3, 0x00, 0x00, 0x00,
-    0x39, 0xff, 0xff, 0xff, 0xe5, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,
-    0x68, 0xfb, 0xff, 0xff, 0xbc, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x70, 0x02, 0x00, 0x00,
-    0x70, 0x03, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0xf0, 0x01, 0x00, 0x00,
-    0x80, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00,
-    0xa4, 0x02, 0x00, 0x00, 0xba, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
-    0x24, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-    0x24, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65,
-    0x6c, 0x73, 0x5f, 0x73, 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0x3c, 0xfd, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x80, 0x3b, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f,
-    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0xfd, 0xff, 0xff,
-    0x00, 0x00, 0x00, 0x03, 0x1c, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-    0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
-    0x61, 0x64, 0x64, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xfd, 0xff, 0xff,
-    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x97, 0xf5, 0x3f,
-    0x01, 0x00, 0x00, 0x00, 0x87, 0x35, 0xa0, 0x43, 0x01, 0x00, 0x00, 0x00,
-    0xd6, 0xd7, 0x28, 0xc3, 0x92, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
-    0x1c, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-    0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75,
-    0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x19, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-    0x14, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x05, 0x80, 0xbf, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x85, 0xc0, 0xbe, 0x43,
-    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xfe, 0xff, 0xff,
-    0x00, 0x00, 0x00, 0x03, 0x3c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
-    0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
-    0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e,
-    0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57,
-    0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72,
-    0x73, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-    0xa4, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
-    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0xae, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x17, 0xac, 0x6e, 0x3a, 0x01, 0x00, 0x00, 0x00,
-    0x20, 0x4e, 0x97, 0x3d, 0x01, 0x00, 0x00, 0x00, 0xaf, 0x27, 0x21, 0xbe,
-    0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x20, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68, 0x61, 0x70, 0x65, 0x5f,
-    0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x1c, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x42,
-    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0xff, 0xff, 0xff,
-    0x00, 0x00, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-    0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
-    0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xfc, 0xfe, 0xff, 0xff,
-    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x17, 0xac, 0xee, 0x39, 0x5a, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
-    0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-    0x54, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67,
-    0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, 0x74, 0x5f, 0x31, 0x2f,
-    0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74,
-    0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, 0x73, 0x2f,
-    0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00,
-    0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,
-    0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
-    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x9d, 0xaf, 0xd0, 0x3a, 0x01, 0x00, 0x00, 0x00,
-    0xe7, 0x29, 0x9e, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x5b, 0x91, 0xc3, 0xbd,
-    0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00,
-    0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
-    0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d,
-    0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x62, 0x1b, 0x1c, 0x3b,
-    0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,
-    0x02, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-    0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
-    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x14, 0x00, 0x1c, 0x00,
-    0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
-    0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00,
-    0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
-    0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
-    0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-    0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0xfa, 0xff, 0xff, 0xff, 0x00, 0x19, 0x06, 0x00,
-    0x06, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x09, 0x06, 0x00,
-    0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04};
-const int g_tiny_conv_model_data_len = 19800;

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h
deleted file mode 100644
index a465dbf..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h
+++ /dev/null

@@ -1,27 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// This is a standard TensorFlow Lite model file that has been converted into a
-// C data array, so it can be easily compiled into a binary for devices that
-// don't have a file system. It was created using the command:
-// xxd -i tiny_conv.tflite > tiny_conv_model_data.cc
-
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_
-
-extern const unsigned char g_tiny_conv_model_data[];
-extern const int g_tiny_conv_model_data_len;
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.cc
new file mode 100644
index 0000000..e5f6ceb
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.cc

@@ -0,0 +1,1800 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See the header for documentation on the meaning of this data.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.h"
+
+const int g_yes_1000ms_sample_data_size = 16000;
+const int16_t g_yes_1000ms_sample_data[16000] = {
+    -7,     -12,    -18,    -20,    -20,    -21,    -21,    -25,    -29,
+    -31,    -31,    -30,    -30,    -29,    -30,    -30,    -29,    -28,
+    -24,    -22,    -17,    -12,    -8,     -7,     -6,     -1,     2,
+    5,      7,      8,      11,     15,     18,     19,     23,     24,
+    24,     27,     27,     26,     25,     28,     30,     32,     33,
+    31,     29,     27,     28,     30,     28,     26,     26,     24,
+    22,     17,     16,     15,     13,     10,     5,      0,      -4,
+    -4,     -7,     -9,     -12,    -14,    -14,    -13,    -11,    -10,
+    -8,     -6,     -3,     3,      7,      8,      12,     15,     18,
+    21,     19,     19,     21,     23,     24,     23,     22,     19,
+    17,     11,     5,      -3,     -12,    -22,    -28,    -35,    -45,
+    -54,    -62,    -69,    -76,    -84,    -92,    -100,   -109,   -116,
+    -117,   -120,   -120,   -120,   -122,   -124,   -126,   -123,   -121,
+    -116,   -113,   -107,   -97,    -88,    -75,    -61,    -50,    -41,
+    -27,    -12,    4,      21,     37,     58,     76,     93,     108,
+    121,    137,    156,    172,    184,    196,    205,    215,    224,
+    235,    242,    245,    242,    240,    238,    231,    223,    214,
+    205,    195,    178,    158,    135,    112,    90,     69,     46,
+    19,     -11,    -45,    -76,    -105,   -133,   -159,   -186,   -211,
+    -236,   -260,   -280,   -294,   -308,   -320,   -331,   -336,   -338,
+    -335,   -326,   -316,   -301,   -286,   -267,   -246,   -225,   -203,
+    -180,   -154,   -124,   -91,    -59,    -34,    -8,     19,     42,
+    64,     87,     103,    119,    134,    148,    162,    174,    182,
+    188,    190,    189,    187,    184,    180,    177,    171,    162,
+    154,    144,    137,    129,    118,    106,    95,     81,     69,
+    58,     48,     37,     26,     14,     3,      -7,     -22,    -31,
+    -42,    -52,    -62,    -69,    -75,    -79,    -82,    -87,    -88,
+    -92,    -94,    -91,    -87,    -85,    -81,    -74,    -70,    -64,
+    -55,    -47,    -40,    -33,    -25,    -19,    -12,    -6,     -4,
+    -1,     1,      1,      -2,     -9,     -15,    -17,    -18,    -20,
+    -22,    -22,    -26,    -31,    -33,    -35,    -31,    -26,    -17,
+    -4,     8,      19,     31,     44,     54,     64,     71,     79,
+    86,     92,     102,    109,    111,    109,    104,    96,     84,
+    70,     60,     51,     38,     27,     13,     4,      -3,     -9,
+    -13,    -18,    -26,    -33,    -32,    -27,    -20,    -10,    -4,
+    2,      6,      10,     14,     16,     21,     25,     29,     31,
+    33,     35,     37,     33,     22,     15,     13,     11,     12,
+    9,      5,      2,      1,      -3,     -9,     -17,    -27,    -32,
+    -35,    -36,    -36,    -42,    -50,    -56,    -66,    -77,    -85,
+    -96,    -100,   -106,   -113,   -118,   -121,   -119,   -117,   -119,
+    -122,   -124,   -123,   -112,   -94,    -77,    -64,    -51,    -37,
+    -22,    -3,     17,     37,     54,     68,     86,     100,    114,
+    134,    154,    167,    174,    178,    182,    189,    189,    187,
+    185,    179,    177,    174,    171,    157,    138,    123,    108,
+    94,     76,     50,     25,     6,      -8,     -20,    -37,    -59,
+    -86,    -110,   -132,   -147,   -159,   -169,   -178,   -191,   -203,
+    -213,   -217,   -215,   -208,   -199,   -194,   -195,   -190,   -178,
+    -165,   -155,   -144,   -134,   -123,   -103,   -80,    -56,    -35,
+    -18,    -4,     11,     23,     36,     50,     65,     78,     93,
+    111,    122,    129,    132,    131,    127,    125,    126,    126,
+    128,    127,    125,    122,    118,    111,    108,    104,    99,
+    93,     89,     90,     87,     82,     78,     75,     68,     65,
+    67,     69,     66,     61,     54,     39,     28,     15,     3,
+    -7,     -18,    -25,    -29,    -35,    -42,    -52,    -66,    -78,
+    -83,    -85,    -86,    -86,    -82,    -83,    -84,    -83,    -81,
+    -75,    -62,    -57,    -53,    -49,    -46,    -41,    -34,    -26,
+    -16,    -10,    -7,     -2,     2,      6,      12,     15,     19,
+    18,     15,     17,     21,     24,     30,     33,     27,     22,
+    21,     20,     23,     24,     21,     15,     13,     8,      3,
+    1,      -1,     -3,     -4,     -6,     -9,     -11,    -11,    -8,
+    -10,    -13,    -15,    -19,    -17,    -11,    -2,     1,      2,
+    6,      9,      10,     12,     13,     9,      8,      10,     13,
+    20,     18,     13,     10,     4,      1,      -2,     -6,     -11,
+    -13,    -16,    -18,    -15,    -18,    -21,    -21,    -22,    -23,
+    -25,    -23,    -22,    -20,    -19,    -16,    -12,    -10,    -9,
+    -11,    -15,    -19,    -22,    -19,    -14,    -11,    -9,     -11,
+    -17,    -20,    -18,    -19,    -15,    -11,    -8,     -2,     8,
+    19,     30,     36,     37,     36,     38,     45,     57,     69,
+    77,     81,     79,     75,     76,     74,     69,     66,     60,
+    53,     45,     36,     28,     22,     17,     10,     0,      -5,
+    -11,    -15,    -18,    -26,    -31,    -33,    -34,    -34,    -35,
+    -37,    -37,    -35,    -28,    -24,    -29,    -37,    -45,    -46,
+    -41,    -36,    -31,    -32,    -33,    -37,    -37,    -36,    -36,
+    -34,    -27,    -19,    -14,    -11,    -8,     -1,     6,      14,
+    19,     21,     25,     30,     34,     38,     38,     33,     26,
+    22,     19,     20,     18,     17,     15,     10,     2,      -3,
+    -5,     -10,    -13,    -13,    -13,    -16,    -16,    -16,    -15,
+    -13,    -14,    -13,    -16,    -19,    -20,    -18,    -17,    -18,
+    -16,    -16,    -24,    -28,    -28,    -28,    -23,    -21,    -21,
+    -20,    -24,    -27,    -23,    -18,    -14,    -7,     4,      11,
+    15,     19,     21,     25,     33,     39,     41,     45,     47,
+    50,     56,     58,     57,     59,     59,     55,     50,     47,
+    39,     34,     30,     24,     18,     11,     8,      3,      0,
+    -3,     -8,     -14,    -15,    -13,    -13,    -12,    -14,    -17,
+    -17,    -12,    -10,    -4,     -7,     -12,    -10,    -14,    -17,
+    -17,    -19,    -25,    -28,    -27,    -29,    -30,    -31,    -35,
+    -38,    -43,    -47,    -51,    -52,    -50,    -49,    -48,    -47,
+    -45,    -39,    -32,    -30,    -31,    -35,    -35,    -31,    -24,
+    -17,    -12,    -11,    -14,    -15,    -17,    -16,    -9,     -5,
+    -3,     -1,     0,      1,      0,      3,      12,     21,     26,
+    33,     35,     38,     45,     50,     53,     53,     54,     58,
+    61,     64,     69,     67,     66,     64,     58,     54,     51,
+    46,     44,     45,     41,     35,     31,     27,     25,     27,
+    25,     20,     13,     12,     16,     17,     17,     12,     7,
+    3,      2,      -2,     -4,     -8,     -14,    -19,    -25,    -29,
+    -38,    -49,    -60,    -69,    -73,    -71,    -74,    -82,    -89,
+    -98,    -103,   -104,   -103,   -99,    -98,    -98,    -98,    -99,
+    -97,    -94,    -91,    -85,    -82,    -78,    -74,    -74,    -71,
+    -68,    -61,    -54,    -52,    -47,    -41,    -36,    -32,    -21,
+    -12,    -3,     11,     26,     36,     44,     48,     55,     64,
+    77,     92,     100,    108,    117,    120,    122,    128,    130,
+    129,    130,    127,    124,    122,    121,    118,    114,    110,
+    102,    92,     85,     80,     77,     68,     55,     46,     39,
+    36,     34,     31,     27,     15,     5,      -1,     -5,     -11,
+    -20,    -29,    -37,    -43,    -46,    -47,    -54,    -61,    -65,
+    -74,    -82,    -84,    -91,    -94,    -96,    -104,   -109,   -111,
+    -111,   -112,   -113,   -111,   -112,   -110,   -104,   -99,    -96,
+    -93,    -89,    -87,    -81,    -71,    -63,    -54,    -45,    -43,
+    -37,    -30,    -24,    -17,    -12,    -8,     -2,     2,      15,
+    23,     28,     35,     41,     42,     44,     52,     58,     66,
+    74,     78,     80,     82,     85,     88,     90,     92,     92,
+    88,     87,     87,     79,     73,     69,     64,     62,     55,
+    50,     45,     41,     36,     29,     24,     20,     16,     12,
+    8,      5,      2,      1,      1,      0,      1,      -4,     -4,
+    -4,     -4,     -1,     1,      2,      1,      -3,     -6,     -1,
+    5,      6,      7,      8,      4,      2,      0,      -2,     -3,
+    0,      -3,     -4,     -3,     -4,     -5,     -8,     -15,    -20,
+    -25,    -28,    -32,    -37,    -38,    -39,    -43,    -48,    -55,
+    -62,    -69,    -75,    -75,    -78,    -81,    -83,    -89,    -89,
+    -92,    -91,    -91,    -89,    -83,    -81,    -74,    -66,    -63,
+    -54,    -45,    -39,    -31,    -23,    -15,    -4,     6,      14,
+    23,     29,     35,     41,     45,     49,     55,     61,     69,
+    75,     75,     76,     75,     74,     74,     73,     74,     72,
+    69,     69,     65,     62,     57,     52,     44,     35,     33,
+    29,     24,     14,     7,      3,      -4,     -12,    -17,    -20,
+    -22,    -27,    -32,    -34,    -39,    -42,    -43,    -42,    -43,
+    -40,    -38,    -36,    -36,    -37,    -36,    -33,    -31,    -27,
+    -24,    -23,    -22,    -17,    -11,    -7,     -7,     -7,     -3,
+    5,      13,     19,     25,     27,     25,     27,     35,     40,
+    40,     41,     45,     47,     50,     54,     52,     50,     45,
+    43,     44,     40,     34,     28,     24,     18,     11,     6,
+    -2,     -9,     -14,    -21,    -27,    -35,    -39,    -43,    -50,
+    -57,    -62,    -66,    -68,    -71,    -72,    -73,    -74,    -76,
+    -76,    -77,    -75,    -75,    -74,    -67,    -61,    -55,    -49,
+    -45,    -40,    -30,    -21,    -11,    -4,     4,      13,     23,
+    34,     44,     52,     59,     65,     70,     77,     84,     87,
+    88,     90,     91,     90,     89,     85,     80,     75,     72,
+    71,     64,     56,     48,     41,     34,     27,     21,     12,
+    1,      -11,    -19,    -28,    -33,    -39,    -46,    -50,    -53,
+    -58,    -63,    -66,    -71,    -73,    -76,    -76,    -74,    -73,
+    -71,    -67,    -65,    -62,    -60,    -55,    -51,    -45,    -39,
+    -35,    -31,    -27,    -20,    -13,    -6,     -3,     1,      8,
+    12,     18,     24,     26,     30,     35,     38,     44,     47,
+    47,     51,     53,     52,     53,     52,     50,     51,     49,
+    50,     51,     50,     48,     48,     45,     43,     42,     37,
+    34,     31,     31,     30,     26,     24,     21,     15,     12,
+    11,     7,      4,      1,      -3,     -5,     -7,     -9,     -15,
+    -21,    -26,    -28,    -31,    -35,    -39,    -46,    -48,    -49,
+    -53,    -58,    -63,    -67,    -69,    -71,    -72,    -74,    -75,
+    -77,    -77,    -73,    -72,    -69,    -65,    -60,    -55,    -50,
+    -47,    -43,    -38,    -30,    -25,    -20,    -12,    -4,     4,
+    9,      16,     20,     24,     28,     35,     43,     50,     58,
+    61,     65,     72,     74,     74,     76,     79,     78,     76,
+    78,     76,     76,     74,     70,     64,     59,     52,     46,
+    41,     33,     26,     19,     12,     5,      -2,     -8,     -15,
+    -20,    -26,    -31,    -37,    -39,    -41,    -44,    -44,    -47,
+    -51,    -52,    -52,    -48,    -45,    -46,    -48,    -45,    -42,
+    -40,    -36,    -32,    -27,    -24,    -22,    -18,    -16,    -11,
+    -10,    -5,     0,      3,      8,      11,     16,     18,     21,
+    23,     25,     26,     27,     28,     30,     31,     31,     30,
+    29,     27,     26,     23,     19,     17,     13,     10,     6,
+    0,      -2,     -5,     -10,    -12,    -15,    -19,    -23,    -26,
+    -29,    -30,    -30,    -32,    -33,    -34,    -35,    -34,    -31,
+    -29,    -29,    -28,    -28,    -23,    -19,    -17,    -12,    -12,
+    -10,    -5,     -2,     3,      7,      10,     13,     14,     19,
+    22,     26,     31,     34,     34,     35,     36,     39,     43,
+    45,     47,     47,     48,     49,     51,     48,     47,     50,
+    45,     41,     41,     38,     34,     34,     30,     23,     17,
+    11,     7,      4,      -4,     -9,     -15,    -23,    -28,    -32,
+    -35,    -39,    -45,    -46,    -49,    -53,    -52,    -53,    -55,
+    -56,    -56,    -55,    -54,    -53,    -53,    -51,    -47,    -44,
+    -42,    -40,    -37,    -33,    -28,    -25,    -23,    -18,    -15,
+    -8,     -6,     -2,     3,      8,      15,     18,     23,     26,
+    27,     32,     36,     36,     36,     39,     38,     38,     40,
+    39,     35,     31,     29,     25,     23,     19,     15,     11,
+    7,      5,      3,      1,      -1,     -6,     -8,     -7,     -10,
+    -9,     -10,    -11,    -10,    -7,     -6,     -8,     -6,     -5,
+    -4,     1,      2,      4,      7,      7,      9,      11,     11,
+    9,      9,      10,     11,     13,     17,     15,     15,     15,
+    17,     19,     17,     17,     17,     15,     15,     13,     11,
+    12,     8,      7,      5,      3,      0,      -4,     -4,     -6,
+    -9,     -12,    -14,    -15,    -15,    -16,    -20,    -19,    -20,
+    -20,    -20,    -18,    -18,    -21,    -22,    -21,    -21,    -23,
+    -20,    -20,    -23,    -24,    -23,    -25,    -25,    -25,    -25,
+    -26,    -24,    -23,    -23,    -23,    -23,    -22,    -19,    -18,
+    -15,    -14,    -10,    -8,     -4,     -1,     1,      3,      6,
+    8,      9,      14,     19,     22,     24,     26,     29,     32,
+    31,     34,     39,     42,     42,     46,     49,     50,     50,
+    52,     53,     52,     49,     49,     48,     48,     46,     45,
+    40,     34,     30,     25,     21,     17,     13,     10,     6,
+    2,      -4,     -9,     -12,    -15,    -18,    -21,    -26,    -28,
+    -31,    -32,    -33,    -35,    -35,    -38,    -37,    -36,    -34,
+    -35,    -35,    -33,    -33,    -34,    -30,    -26,    -27,    -25,
+    -23,    -22,    -18,    -15,    -16,    -12,    -9,     -9,     -6,
+    -1,     2,      3,      5,      8,      7,      9,      12,     15,
+    17,     18,     18,     19,     18,     20,     19,     18,     21,
+    20,     19,     18,     16,     15,     15,     15,     14,     12,
+    9,      9,      10,     8,      6,      4,      2,      1,      -1,
+    -3,     -1,     -3,     -2,     -4,     -5,     -5,     -8,     -8,
+    -10,    -10,    -8,     -8,     -8,     -7,     -8,     -8,     -8,
+    -9,     -11,    -12,    -11,    -9,     -7,     -8,     -8,     -8,
+    -10,    -8,     -7,     -8,     -7,     -6,     -7,     -5,     -3,
+    -3,     -3,     -3,     -2,     0,      3,      3,      5,      7,
+    10,     11,     10,     10,     12,     13,     16,     16,     16,
+    17,     15,     16,     17,     16,     14,     16,     13,     11,
+    11,     9,      9,      6,      4,      4,      3,      0,      -2,
+    -4,     -7,     -7,     -7,     -13,    -15,    -13,    -14,    -16,
+    -15,    -15,    -17,    -16,    -16,    -18,    -19,    -19,    -20,
+    -19,    -16,    -15,    -13,    -12,    -10,    -7,     -6,     -4,
+    -4,     -2,     0,      2,      6,      8,      10,     12,     14,
+    15,     14,     13,     13,     13,     15,     15,     17,     17,
+    17,     18,     17,     16,     15,     15,     14,     11,     9,
+    8,      8,      9,      8,      5,      5,      3,      -1,     -1,
+    -4,     -5,     -7,     -8,     -8,     -8,     -9,     -10,    -8,
+    -11,    -12,    -12,    -12,    -12,    -13,    -11,    -11,    -9,
+    -8,     -7,     -8,     -7,     -6,     -7,     -6,     -5,     -4,
+    -4,     -2,     -2,     -3,     -2,     -2,     -3,     0,      -1,
+    -3,     1,      1,      2,      4,      3,      5,      6,      3,
+    3,      4,      3,      3,      4,      5,      4,      6,      7,
+    7,      7,      6,      3,      3,      5,      3,      3,      6,
+    6,      7,      6,      4,      5,      2,      1,      1,      0,
+    0,      2,      1,      1,      1,      -1,     -2,     -3,     -5,
+    -4,     -5,     -4,     -4,     -6,     -4,     -4,     -4,     -5,
+    -6,     -5,     -6,     -5,     -4,     -5,     -4,     -3,     -4,
+    0,      2,      2,      2,      2,      2,      2,      3,      3,
+    5,      6,      6,      5,      6,      7,      6,      8,      6,
+    5,      5,      5,      6,      6,      6,      5,      5,      2,
+    2,      1,      2,      0,      -1,     -1,     -1,     -1,     0,
+    -1,     -4,     -6,     -8,     -8,     -9,     -8,     -7,     -6,
+    -5,     -5,     -6,     -3,     -4,     -5,     -4,     -7,     -6,
+    -4,     -2,     -1,     -1,     1,      1,      1,      1,      1,
+    2,      2,      1,      3,      4,      4,      6,      6,      6,
+    6,      4,      4,      4,      4,      3,      2,      2,      2,
+    2,      1,      1,      1,      0,      1,      1,      0,      -2,
+    -2,     -3,     -3,     -3,     -3,     -5,     -4,     -3,     -5,
+    -5,     -3,     -5,     -4,     -4,     -2,     -2,     -2,     -1,
+    -3,     -2,     -2,     -1,     -3,     -2,     -1,     -2,     -2,
+    -2,     0,      0,      0,      0,      0,      1,      0,      0,
+    1,      2,      3,      3,      3,      4,      5,      4,      3,
+    4,      5,      5,      7,      7,      6,      9,      8,      6,
+    7,      8,      6,      5,      7,      8,      8,      8,      7,
+    6,      5,      4,      4,      4,      5,      4,      2,      1,
+    2,      1,      0,      -2,     -3,     -2,     -4,     -6,     -6,
+    -7,     -7,     -8,     -9,     -9,     -9,     -9,     -9,     -9,
+    -9,     -10,    -10,    -10,    -8,     -7,     -8,     -6,     -5,
+    -4,     -3,     -5,     -2,     -2,     -2,     -1,     -1,     0,
+    1,      1,      2,      3,      2,      4,      3,      3,      5,
+    3,      3,      5,      4,      5,      6,      5,      4,      5,
+    3,      2,      2,      3,      4,      4,      4,      4,      4,
+    3,      4,      4,      4,      3,      2,      2,      2,      2,
+    2,      2,      2,      2,      1,      1,      1,      2,      1,
+    1,      2,      1,      1,      2,      1,      1,      1,      -1,
+    0,      1,      0,      -1,     1,      -1,     -1,     -1,     -2,
+    -1,     -1,     -1,     -1,     -1,     -1,     -1,     -1,     -2,
+    -1,     0,      -1,     -1,     1,      1,      2,      0,      -1,
+    0,      -1,     -1,     0,      0,      1,      2,      2,      2,
+    1,      1,      0,      0,      0,      0,      1,      1,      0,
+    0,      0,      0,      0,      -1,     -2,     -1,     -3,     -4,
+    -4,     -4,     -4,     -4,     -4,     -4,     -3,     -3,     -5,
+    -6,     -4,     -2,     -2,     -1,     -1,     -1,     -2,     1,
+    -1,     1,      0,      0,      1,      1,      1,      1,      2,
+    1,      2,      2,      3,      3,      3,      3,      4,      5,
+    5,      5,      5,      5,      5,      5,      5,      6,      6,
+    5,      5,      5,      6,      6,      5,      3,      6,      5,
+    4,      5,      3,      2,      2,      2,      2,      1,      1,
+    2,      0,      -1,     0,      -1,     -1,     -1,     -1,     -1,
+    -1,     -1,     -3,     -3,     -3,     -3,     -4,     -4,     -5,
+    -6,     -6,     -6,     -6,     -6,     -6,     -5,     -5,     -6,
+    -5,     -4,     -4,     -4,     -4,     -2,     -2,     -2,     -1,
+    -2,     0,      1,      0,      1,      3,      4,      4,      4,
+    4,      4,      4,      5,      4,      4,      4,      5,      7,
+    5,      4,      4,      4,      4,      3,      2,      2,      2,
+    2,      2,      0,      1,      1,      0,      1,      1,      -1,
+    0,      -1,     -2,     -1,     -3,     -4,     -4,     -3,     -5,
+    -5,     -5,     -5,     -5,     -5,     -4,     -3,     -3,     -2,
+    -3,     -2,     -2,     -5,     -3,     -3,     -3,     -2,     0,
+    1,      1,      1,      1,      1,      1,      1,      1,      3,
+    3,      4,      4,      4,      4,      5,      5,      2,      3,
+    4,      3,      5,      4,      3,      4,      3,      3,      5,
+    5,      3,      4,      2,      1,      1,      3,      4,      3,
+    1,      3,      2,      1,      2,      1,      0,      1,      0,
+    1,      0,      1,      1,      1,      1,      0,      -1,     0,
+    0,      -1,     -1,     -2,     -1,     -1,     -2,     0,      -1,
+    -2,     -1,     -1,     -2,     -2,     -1,     -3,     -3,     -3,
+    -3,     -3,     -4,     -3,     -5,     -6,     -4,     -4,     -5,
+    -4,     -3,     -5,     -6,     -4,     -5,     -6,     -4,     -3,
+    -5,     -4,     -3,     -4,     -3,     -2,     -2,     -2,     0,
+    0,      1,      1,      0,      0,      0,      1,      1,      3,
+    3,      3,      4,      3,      3,      3,      3,      3,      3,
+    3,      3,      3,      3,      3,      3,      3,      3,      3,
+    1,      1,      1,      1,      1,      1,      1,      0,      0,
+    0,      1,      -2,     -1,     1,      0,      -1,     -2,     -2,
+    0,      1,      0,      1,      1,      1,      1,      0,      0,
+    1,      0,      0,      2,      1,      0,      1,      1,      1,
+    1,      3,      3,      3,      4,      3,      3,      4,      2,
+    2,      2,      2,      2,      2,      2,      1,      2,      2,
+    2,      2,      -1,     -1,     -1,     -1,     -1,     -1,     -1,
+    -1,     -1,     -3,     -3,     -3,     -5,     -4,     -5,     -5,
+    -5,     -5,     -7,     -7,     -7,     -8,     -7,     -8,     -7,
+    -8,     -8,     -7,     -8,     -8,     -8,     -8,     -7,     -6,
+    -6,     -6,     -7,     -6,     -6,     -5,     -5,     -3,     -2,
+    -2,     -1,     0,      -1,     0,      1,      2,      2,      3,
+    3,      3,      6,      7,      7,      7,      8,      9,      8,
+    10,     10,     9,      10,     11,     9,      10,     12,     11,
+    10,     9,      9,      9,      9,      10,     9,      6,      6,
+    5,      5,      6,      3,      1,      1,      0,      1,      0,
+    0,      1,      -1,     -2,     -2,     -1,     -3,     -3,     -2,
+    -4,     -4,     -3,     -2,     -4,     -4,     -4,     -5,     -3,
+    -3,     -5,     -3,     -3,     -5,     -4,     -2,     -2,     -3,
+    -3,     -1,     0,      -1,     0,      0,      0,      -2,     -1,
+    0,      -1,     -2,     -2,     -2,     -2,     -1,     -3,     -2,
+    -3,     -4,     -3,     -3,     -3,     -3,     -3,     -3,     -3,
+    -2,     -4,     -6,     -5,     -3,     -2,     -4,     -3,     -2,
+    -4,     -4,     -4,     -3,     -4,     -5,     -4,     -5,     -3,
+    -2,     -5,     -2,     -4,     -4,     -3,     -2,     -1,     -1,
+    -1,     0,      2,      2,      1,      1,      3,      3,      3,
+    3,      4,      4,      5,      6,      5,      5,      6,      7,
+    7,      7,      8,      8,      7,      9,      9,      9,      9,
+    10,     9,      9,      9,      9,      9,      9,      8,      7,
+    9,      9,      6,      7,      5,      2,      3,      2,      1,
+    1,      0,      -2,     -2,     -2,     -3,     -3,     -2,     -2,
+    -4,     -5,     -4,     -4,     -4,     -4,     -5,     -4,     -4,
+    -5,     -4,     -5,     -4,     -5,     -6,     -4,     -4,     -5,
+    -5,     -5,     -5,     -6,     -4,     -4,     -4,     -3,     -2,
+    -3,     -3,     -2,     -2,     -1,     -2,     -3,     -1,     0,
+    -1,     0,      0,      0,      0,      1,      0,      0,      0,
+    0,      -1,     1,      1,      1,      0,      -2,     -2,     -3,
+    -3,     -4,     -4,     -6,     -7,     -5,     -4,     -5,     -5,
+    -4,     -6,     -8,     -7,     -6,     -5,     -5,     -5,     -4,
+    -4,     -5,     -4,     -3,     -3,     0,      0,      -2,     -1,
+    0,      0,      1,      1,      2,      2,      2,      2,      2,
+    4,      5,      5,      5,      6,      7,      7,      9,      10,
+    10,     10,     12,     12,     13,     14,     14,     14,     15,
+    15,     15,     15,     15,     15,     14,     15,     15,     12,
+    13,     13,     12,     10,     11,     11,     11,     10,     8,
+    6,      5,      7,      6,      6,      4,      3,      4,      5,
+    3,      2,      2,      1,      1,      2,      3,      1,      0,
+    0,      1,      0,      -2,     -1,     -2,     -3,     -3,     -3,
+    -3,     -4,     -6,     -8,     -9,     -9,     -10,    -12,    -14,
+    -15,    -18,    -21,    -21,    -21,    -21,    -22,    -24,    -26,
+    -26,    -27,    -27,    -28,    -26,    -25,    -26,    -28,    -27,
+    -24,    -23,    -23,    -24,    -21,    -17,    -17,    -15,    -12,
+    -12,    -12,    -12,    -9,     -7,     -6,     -5,     -3,     -3,
+    -2,     0,      0,      1,      3,      7,      6,      4,      6,
+    7,      8,      11,     10,     10,     13,     15,     14,     13,
+    18,     20,     18,     19,     21,     23,     24,     23,     22,
+    24,     26,     26,     26,     27,     25,     23,     25,     27,
+    28,     28,     28,     23,     19,     23,     24,     20,     20,
+    21,     15,     13,     15,     16,     14,     11,     8,      7,
+    8,      11,     11,     6,      4,      8,      7,      6,      7,
+    6,      4,      7,      13,     12,     7,      8,      8,      4,
+    1,      1,      1,      2,      -4,     -12,    -18,    -24,    -25,
+    -25,    -32,    -41,    -55,    -59,    -61,    -75,    -87,    -96,
+    -109,   -122,   -133,   -141,   -148,   -157,   -168,   -180,   -191,
+    -198,   -202,   -207,   -206,   -207,   -211,   -211,   -208,   -203,
+    -189,   -171,   -153,   -132,   -114,   -96,    -75,    -54,    -30,
+    -5,     19,     43,     61,     77,     93,     106,    123,    143,
+    161,    182,    198,    202,    201,    209,    229,    242,    240,
+    235,    239,    249,    258,    255,    242,    233,    245,    268,
+    278,    256,    223,    223,    253,    263,    235,    198,    178,
+    188,    215,    230,    200,    143,    113,    128,    158,    158,
+    128,    99,     90,     82,     70,     56,     32,     7,      14,
+    46,     36,     -23,    -71,    -76,    -54,    -36,    -39,    -74,
+    -118,   -134,   -122,   -101,   -104,   -129,   -164,   -174,   -129,
+    -86,    -109,   -184,   -219,   -191,   -147,   -141,   -183,   -249,
+    -290,   -269,   -236,   -266,   -346,   -394,   -366,   -325,   -353,
+    -431,   -472,   -406,   -313,   -316,   -398,   -449,   -401,   -287,
+    -194,   -164,   -193,   -245,   -212,   -55,    75,     67,     26,
+    67,     165,    237,    269,    293,    319,    333,    368,    414,
+    432,    463,    488,    448,    404,    391,    377,    361,    365,
+    376,    308,    197,    150,    129,    73,     53,     91,     43,
+    -107,   -165,   -54,    1,      -148,   -312,   -273,   -125,   -62,
+    -128,   -258,   -294,   -141,   70,     57,     -217,   -378,   -145,
+    198,    289,    169,    -47,    -219,   -101,   264,    458,    217,
+    -163,   -199,   13,     121,    101,    -51,    -293,   -319,   -62,
+    24,     -274,   -474,   -296,   -170,   -336,   -422,   -285,   -248,
+    -302,   -130,   98,     -11,    -257,   -146,   184,    278,    264,
+    331,    192,    -35,    235,    805,    830,    315,    82,     322,
+    503,    522,    619,    557,    242,    163,    399,    507,    489,
+    618,    602,    156,    -164,   112,    476,    406,    94,     -154,
+    -242,   -132,   56,     5,      -325,   -566,   -527,   -478,   -624,
+    -692,   -561,   -551,   -744,   -836,   -671,   -520,   -626,   -736,
+    -647,   -581,   -639,   -687,   -702,   -739,   -665,   -383,   -236,
+    -414,   -513,   -321,   -114,   -43,    32,     65,     -98,    -236,
+    34,     608,    924,    680,    218,    56,     329,    847,    1214,
+    1006,   341,    11,     340,    667,    553,    353,    355,    415,
+    416,    364,    257,    108,    6,      113,    293,    233,    46,
+    4,      25,     -10,    -12,    55,     40,     -65,    -56,    -26,
+    -101,   -61,    143,    229,    78,     -161,   -210,   103,    424,
+    377,    86,     -274,   -491,   -328,   -37,    60,     128,    188,
+    -105,   -625,   -823,   -464,   138,    389,    111,    -343,   -526,
+    -306,   13,     205,    250,    -35,    -554,   -764,   -498,   -42,
+    167,    -210,   -639,   -448,   -101,   -110,   -171,   -74,    -39,
+    47,     424,    616,    324,    98,     367,    853,    942,    416,
+    -184,   -130,   339,    472,    369,    239,    -165,   -418,   101,
+    742,    659,    325,    365,    476,    233,    -14,    270,    785,
+    719,    -29,    -533,   -220,   237,    305,    179,    -190,   -644,
+    -610,   -380,   -526,   -601,   -237,   48,     -36,    -124,   -49,
+    -6,     23,     117,    55,     -199,   -428,   -512,   -338,   -238,
+    -424,   -323,   -135,   -464,   -657,   -189,   100,    -379,   -964,
+    -893,   -346,   -64,    -322,   -650,   -480,   32,     238,    201,
+    386,    616,    611,    400,    195,    357,    842,    1051,   832,
+    712,    829,    1070,   1307,   1081,   551,    363,    544,    623,
+    239,    -374,   -609,   -230,   375,    486,    -52,    -446,   -270,
+    181,    645,    601,    -135,   -654,   -256,   567,    840,    380,
+    -54,    18,     334,    386,    21,     -214,   83,     243,    -316,
+    -937,   -1074,  -1006,  -896,   -674,   -424,   -331,   -354,   -380,
+    -481,   -392,   80,     358,    171,    -170,   -624,   -796,   -130,
+    706,    803,    381,    152,    367,    620,    685,    655,    347,
+    36,     180,    417,    412,    358,    288,    189,    150,    16,
+    -240,   -428,   -428,   -266,   -335,   -819,   -1150,  -946,   -587,
+    -437,   -580,   -961,   -1218,  -1065,  -704,   -431,   -350,   -315,
+    -214,   -162,   -81,    26,     -8,     -52,    -117,   -226,   -40,
+    285,    241,    -2,     -69,    57,     207,    81,     -144,   -69,
+    65,     84,     49,     -168,   -248,   126,    502,    472,    192,
+    120,    442,    667,    551,    512,    634,    814,    1014,   1098,
+    1156,   1112,   974,    1144,   1330,   1099,   825,    847,    877,
+    555,    2,      -243,   -102,   -196,   -471,   -377,   -235,   -439,
+    -622,   -547,   -470,   -495,   -431,   -197,   -21,    21,     -9,
+    -246,   -438,   -238,   -31,    0,      96,     137,    -25,    -211,
+    -181,   -149,   -350,   -368,   -33,    21,     -308,   -323,   32,
+    379,    605,    531,    85,     -374,   -367,   9,      277,    147,
+    -356,   -698,   -494,   -140,   -126,   -354,   -549,   -673,   -642,
+    -428,   -269,   -273,   -246,   -216,   -349,   -323,   -16,    32,
+    -387,   -742,   -662,   -434,   -223,   41,     140,    -58,    -227,
+    -80,    93,     20,     -166,   -360,   -536,   -555,   -305,   -33,
+    -23,    -86,    -75,    -9,     82,     -1,     -156,   24,     532,
+    916,    956,    835,    901,    1127,   1279,   1417,   1435,   1144,
+    822,    862,    1214,   1352,   1001,   611,    539,    532,    369,
+    189,    170,    308,    465,    430,    232,    64,     14,     51,
+    -37,    -244,   -321,   -276,   -144,   57,     77,     -215,   -467,
+    -335,   -186,   -245,   -133,   -81,    -588,   -1130,  -959,   -520,
+    -631,   -1122,  -1270,  -971,   -873,   -1118,  -1157,  -1078,  -1296,
+    -1365,  -1010,  -873,   -1138,  -1061,  -379,   89,     51,     177,
+    372,    185,    -14,    63,     197,    125,    -123,   -60,    243,
+    195,    88,     201,    115,    -63,    -12,    -79,    -492,   -751,
+    -489,   49,     163,    -293,   -424,   -52,    229,    302,    212,
+    217,    315,    70,     -207,   -210,   -173,   129,    619,    556,
+    213,    181,    170,    112,    167,    322,    451,    206,    -136,
+    58,     426,    526,    524,    394,    387,    568,    481,    297,
+    164,    8,      263,    664,    777,    943,    989,    934,    1283,
+    1495,   1153,   861,    738,    582,    614,    692,    655,    629,
+    432,    127,    -119,   -338,   -313,   -138,   -204,   -561,   -994,
+    -1168,  -948,   -700,   -658,   -788,   -1053,  -1027,  -684,   -566,
+    -528,   -355,   -335,   -323,   -28,    206,    87,     56,     387,
+    585,    296,    24,     261,    492,    248,    -132,   -469,   -674,
+    -502,   -235,   -255,   -517,   -847,   -1038,  -965,   -707,   -630,
+    -767,   -639,   -298,   -193,   -290,   -310,   -118,   74,     -77,
+    -337,   -324,   -120,   187,    323,    -72,    -552,   -454,   -14,
+    29,     -427,   -803,   -735,   -586,   -762,   -918,   -783,   -649,
+    -723,   -857,   -786,   -626,   -591,   -417,   -83,    167,    262,
+    49,     -161,   157,    842,    1298,   1356,   1206,   1041,   1194,
+    1461,   1323,   1070,   1221,   1687,   2051,   2002,   1673,   1464,
+    1550,   1851,   1907,   1531,   1327,   1399,   1342,   1287,   1264,
+    1152,   1030,   878,    716,    601,    454,    264,    264,    352,
+    151,    -193,   -296,   -161,   -93,    -215,   -423,   -617,   -668,
+    -547,   -416,   -464,   -807,   -1175,  -1174,  -1045,  -1076,  -1023,
+    -829,   -710,   -745,   -1069,  -1443,  -1417,  -1099,  -939,   -1165,
+    -1307,  -1056,  -843,   -638,   -304,   -190,   -334,   -578,   -770,
+    -705,   -675,   -947,   -957,   -565,   -437,   -617,   -843,   -1015,
+    -813,   -489,   -584,   -904,   -1054,  -797,   -229,   -26,    -208,
+    -66,    398,    710,    644,    390,    413,    726,    992,    1204,
+    1337,   1234,   1104,   1038,   1001,   1043,   982,    847,    885,
+    1024,   1098,   1138,   1108,   1038,   966,    885,    882,    878,
+    929,    1005,   944,    1008,   1284,   1415,   1289,   1007,   760,
+    812,    947,    806,    455,    111,    -72,    -290,   -611,   -626,
+    -559,   -765,   -1034,  -1375,  -1632,  -1565,  -1588,  -1728,  -1585,
+    -1477,  -1547,  -1533,  -1371,  -1103,  -995,   -1090,  -1102,  -947,
+    -686,   -403,   -295,   -250,   -107,   -86,    -171,   -150,   12,
+    234,    283,    185,    300,    461,    393,    382,    434,    378,
+    306,    202,    195,    253,    -8,     -307,   -105,   264,    342,
+    212,    34,     -57,    78,     435,    571,    180,    -165,   -51,
+    339,    705,    683,    464,    658,    958,    825,    579,    465,
+    390,    241,    61,     202,    429,    128,    -122,   241,    406,
+    39,     -167,   -60,    15,     -31,    -68,    146,    402,    344,
+    227,    208,    87,     -25,    -31,    -66,    -169,   -249,   -87,
+    75,     -181,   -438,   -249,   49,     87,     -40,    -16,    53,
+    -86,    -74,    98,     78,     110,    169,    -84,    -323,   -251,
+    -102,   -172,   -513,   -750,   -675,   -568,   -587,   -583,   -523,
+    -450,   -302,   -245,   -356,   -480,   -590,   -495,   -183,   -105,
+    -191,   -215,   -308,   -206,   39,     4,      -77,    -21,    74,
+    186,    218,    356,    611,    489,    83,     13,     246,    371,
+    348,    240,    61,     -66,    -107,   -170,   -205,   -74,    200,
+    277,    45,     -11,    180,    263,    100,    -74,    102,    246,
+    6,      -154,   -162,   -197,   -128,   -189,   -227,   -49,    -238,
+    -490,   -333,   -188,   1,      215,    150,    144,    128,    -33,
+    187,    532,    676,    911,    773,    283,    351,    673,    620,
+    349,    105,    205,    425,    325,    295,    372,    340,    511,
+    628,    394,    224,    187,    91,     -174,   -556,   -482,   -37,
+    -9,     -226,   -382,   -568,   -466,   -208,   -241,   -426,   -656,
+    -814,   -788,   -902,   -1065,  -946,   -860,   -896,   -831,   -744,
+    -672,   -685,   -743,   -723,   -783,   -813,   -570,   -341,   -239,
+    -57,    137,    348,    576,    593,    454,    429,    503,    449,
+    238,    173,    350,    423,    419,    530,    501,    272,    156,
+    207,    295,    404,    568,    676,    419,    30,     113,    463,
+    550,    473,    349,    126,    33,     144,    207,    193,    267,
+    304,    81,     -252,   -401,   -368,   -347,   -404,   -452,   -408,
+    -272,   -40,    234,    281,    48,     -72,    -18,    54,     208,
+    309,    285,    245,    164,    38,     -20,    148,    430,    563,
+    655,    679,    453,    300,    319,    219,    25,     -15,    54,
+    -117,   -444,   -431,   -135,   -147,   -468,   -667,   -722,   -593,
+    -301,   -217,   -428,   -642,   -598,   -400,   -422,   -602,   -628,
+    -554,   -509,   -501,   -541,   -488,   -250,   -129,   -284,   -441,
+    -358,   -161,   -82,    4,      134,    157,    290,    516,    582,
+    702,    859,    871,    858,    759,    615,    616,    754,    839,
+    725,    464,    259,    187,    127,    150,    280,    238,    92,
+    78,     5,      -86,    6,      67,     -14,    -92,    -143,   -211,
+    -89,    213,    300,    107,    -91,    -154,   -153,   -238,   -355,
+    -314,   -227,   -168,   -92,    -142,   -219,   -156,   -47,    53,
+    -15,    -195,   -161,   -186,   -382,   -395,   -297,   -238,   -240,
+    -390,   -502,   -336,   -97,    -29,    -116,   -290,   -289,   -67,
+    74,     112,    119,    182,    358,    382,    315,    341,    290,
+    218,    190,    101,    -51,    -168,   -132,   -41,    -39,    -15,
+    104,    186,    151,    68,     89,     154,    67,     10,     143,
+    120,    -185,   -382,   -365,   -263,   -145,   -111,   -159,   -190,
+    -53,    151,    177,    179,    384,    553,    502,    490,    572,
+    600,    573,    442,    119,    -212,   -260,   -166,   -318,   -506,
+    -413,   -279,   -285,   -354,   -390,   -278,   -142,   -85,    -18,
+    -19,    -121,   -143,   -32,    88,     118,    42,     -96,    -187,
+    -167,   -113,   -172,   -270,   -256,   -178,   -192,   -249,   -128,
+    103,    132,    -47,    -147,   -104,   -56,    -9,     45,     35,
+    109,    315,    381,    326,    336,    457,    667,    786,    675,
+    489,    460,    569,    595,    470,    303,    272,    448,    620,
+    545,    226,    -92,    -128,   91,     172,    -98,    -385,   -378,
+    -264,   -284,   -362,   -314,   -148,   -72,    -198,   -350,   -353,
+    -344,   -389,   -353,   -292,   -327,   -413,   -473,   -519,   -588,
+    -577,   -546,   -737,   -989,   -1030,  -997,   -1010,  -861,   -683,
+    -731,   -690,   -419,   -197,   -47,    112,    167,    74,     41,
+    176,    309,    438,    671,    781,    793,    868,    904,    991,
+    1099,   987,    812,    816,    869,    766,    605,    633,    728,
+    592,    424,    460,    405,    170,    75,     30,     -105,   -58,
+    63,     -58,    -242,   -359,   -415,   -255,   -44,    -127,   -266,
+    -191,   -187,   -296,   -273,   -260,   -341,   -345,   -324,   -384,
+    -467,   -421,   -233,   -125,   -227,   -341,   -256,   -168,   -217,
+    -249,   -302,   -447,   -425,   -274,   -289,   -299,   -229,   -275,
+    -272,   -103,   -57,    -117,   -106,   -162,   -256,   -184,   -31,
+    51,     69,     31,     -19,    72,     256,    318,    331,    254,
+    28,     -7,     121,    48,     -64,    58,     183,    152,    161,
+    201,    167,    190,    287,    278,    157,    56,     103,    332,
+    460,    299,    166,    238,    308,    374,    508,    509,    373,
+    275,    270,    298,    229,    185,    192,    23,     -160,   -80,
+    67,     31,     -170,   -378,   -384,   -330,   -500,   -648,   -615,
+    -686,   -716,   -510,   -510,   -771,   -752,   -475,   -434,   -556,
+    -480,   -403,   -515,   -464,   -255,   -177,   -105,   29,     95,
+    152,    210,    190,    180,    279,    408,    325,    225,    462,
+    607,    537,    759,    1022,   973,    945,    964,    846,    818,
+    952,    907,    584,    313,    302,    428,    533,    479,    260,
+    178,    262,    185,    18,     -77,    -263,   -370,   -208,   -240,
+    -589,   -739,   -572,   -444,   -405,   -357,   -475,   -738,   -771,
+    -542,   -441,   -529,   -651,   -803,   -823,   -556,   -285,   -227,
+    -233,   -202,   -168,   -110,   -78,    -220,   -302,   -56,    129,
+    -60,    -149,   54,     130,    169,    324,    231,    24,     89,
+    269,    320,    262,    231,    225,    138,    67,     153,    310,
+    399,    269,    -21,    -197,   -183,   -59,    144,    234,    -13,
+    -274,   -168,   32,     -37,    -277,   -417,   -441,   -416,   -324,
+    -312,   -467,   -540,   -373,   -166,   -161,   -297,   -365,   -341,
+    -246,   -69,    81,     99,     -3,     11,     305,    540,    449,
+    394,    586,    667,    606,    685,    665,    425,    410,    585,
+    509,    360,    424,    538,    583,    482,    250,    159,    310,
+    423,    217,    -131,   -280,   -204,   -51,    -12,    -204,   -338,
+    -232,   -143,   -201,   -306,   -374,   -336,   -229,   -257,   -453,
+    -576,   -497,   -379,   -326,   -302,   -372,   -504,   -453,   -229,
+    -133,   -226,   -328,   -326,   -261,   -151,   -6,     97,     143,
+    164,    143,    138,    267,    433,    500,    470,    297,    143,
+    279,    504,    556,    475,    333,    233,    225,    228,    198,
+    128,    24,     -17,    4,      -55,    -187,   -251,   -213,   -119,
+    -94,    -214,   -357,   -349,   -246,   -195,   -183,   -261,   -440,
+    -533,   -476,   -341,   -213,   -170,   -220,   -299,   -220,   -8,
+    51,     -11,    19,     172,    292,    189,    9,      -6,     102,
+    238,    384,    477,    448,    353,    304,    354,    473,    543,
+    400,    229,    275,    380,    425,    415,    371,    398,    460,
+    377,    202,    154,    199,    110,    -123,   -365,   -524,   -524,
+    -360,   -134,   -47,    -182,   -348,   -453,   -542,   -503,   -376,
+    -398,   -521,   -595,   -621,   -560,   -439,   -284,   -115,   -80,
+    -123,   -57,    28,     -15,    -60,    -9,     47,     119,    203,
+    288,    435,    571,    635,    706,    750,    627,    436,    345,
+    330,    398,    460,    368,    213,    127,    140,    215,    202,
+    58,     -99,    -244,   -387,   -470,   -527,   -637,   -754,   -791,
+    -768,   -742,   -739,   -735,   -704,   -649,   -552,   -479,   -491,
+    -494,   -454,   -433,   -422,   -398,   -315,   -115,   75,     175,
+    244,    307,    360,    398,    460,    532,    529,    446,    422,
+    497,    541,    504,    541,    702,    803,    744,    645,    621,
+    727,    877,    873,    734,    593,    513,    523,    516,    412,
+    336,    334,    274,    199,    163,    123,    125,    117,    107,
+    140,    72,     -73,    -114,   -68,    -15,    13,     -122,   -338,
+    -367,   -325,   -386,   -497,   -608,   -634,   -546,   -477,   -427,
+    -377,   -412,   -464,   -436,   -343,   -276,   -327,   -390,   -313,
+    -149,   -17,    2,      -93,    -146,   -104,   -76,    -87,    -131,
+    -224,   -280,   -194,   -46,    12,     -76,    -189,   -151,   18,
+    160,    200,    99,     -81,    -149,   -95,    -31,    -6,     -45,
+    -93,    -97,    -71,    0,      73,     34,     -82,    -129,   -102,
+    -84,    -96,    -107,   -69,    -5,     6,      18,     48,     35,
+    27,     32,     -4,     -71,    -30,    119,    205,    266,    352,
+    325,    237,    282,    352,    358,    342,    265,    203,    200,
+    159,    120,    159,    195,    185,    133,    37,     20,     152,
+    312,    363,    316,    255,    251,    259,    211,    160,    86,
+    -4,     -30,    -79,    -154,   -213,   -271,   -243,   -146,   -147,
+    -211,   -283,   -319,   -219,   -157,   -207,   -237,   -252,   -245,
+    -136,   0,      42,     -22,    -108,   -82,    34,     130,    179,
+    152,    98,     105,    110,    116,    180,    175,    66,     -9,
+    -9,     36,     82,     75,     12,     -39,    -14,    23,     1,
+    12,     31,     -61,    -155,   -184,   -158,   -86,    -60,    -67,
+    -63,    -84,    -100,   -81,    -115,   -171,   -157,   -150,   -179,
+    -191,   -209,   -245,   -217,   -128,   -54,    -42,    -73,    -100,
+    -88,    -10,    104,    199,    249,    227,    201,    204,    151,
+    83,     75,     87,     84,     67,     34,     18,     44,     110,
+    218,    275,    232,    190,    209,    263,    294,    256,    174,
+    108,    37,     -54,    -110,   -129,   -179,   -293,   -360,   -339,
+    -282,   -190,   -135,   -188,   -239,   -234,   -227,   -182,   -127,
+    -89,    -51,    -73,    -136,   -151,   -85,    0,      72,     129,
+    122,    65,     44,     103,    202,    272,    252,    170,    148,
+    167,    152,    130,    127,    79,     14,     70,     157,    142,
+    109,    70,     -25,    -57,    -6,     46,     98,     135,    135,
+    82,     16,     10,     68,     87,     -20,    -120,   -116,   -98,
+    -102,   -129,   -204,   -271,   -282,   -252,   -216,   -215,   -221,
+    -156,   -70,    -66,    -120,   -156,   -146,   -126,   -84,    -15,
+    -21,    -76,    -8,     131,    146,    86,     42,     12,     44,
+    110,    169,    171,    91,     68,     173,    262,    248,    160,
+    36,     -90,    -109,   -24,    -12,    -57,    -64,    -78,    -89,
+    -75,    -87,    -101,   -82,    -72,    -76,    -81,    -63,    -34,
+    -4,     61,     87,     46,     23,     -1,     -8,     40,     63,
+    46,     45,     39,     14,     -11,    -25,    -16,    36,     78,
+    85,     110,    120,    132,    189,    228,    217,    154,    89,
+    57,     14,     -14,    -6,     0,      13,     8,      -50,    -68,
+    -60,    -107,   -140,   -126,   -122,   -151,   -147,   -118,   -105,
+    -85,    -83,    -100,   -139,   -195,   -194,   -168,   -183,   -173,
+    -148,   -166,   -168,   -123,   -59,    -11,    20,     64,     98,
+    80,     58,     83,     111,    143,    176,    171,    152,    146,
+    165,    174,    143,    93,     30,     5,      21,     42,     35,
+    -37,    -94,    -61,    -12,    -5,     -27,    -58,    -85,    -81,
+    -11,    79,     65,     -14,    -17,    15,     -4,     -2,     39,
+    20,     -29,    -19,    3,      -11,    -39,    -62,    -43,    -34,
+    -60,    -77,    -119,   -163,   -128,   -5,     87,     73,     51,
+    116,    189,    217,    240,    234,    177,    192,    295,    344,
+    313,    263,    236,    240,    230,    179,    99,     19,     -25,
+    -16,    -9,     -35,    -66,    -53,    -16,    -40,    -70,    -81,
+    -102,   -86,    -87,    -156,   -225,   -228,   -145,   -52,    -22,
+    -57,    -171,   -255,   -247,   -208,   -165,   -187,   -242,   -275,
+    -261,   -168,   -75,    -13,    8,      -62,    -125,   -136,   -133,
+    -81,    -11,    -17,    -80,    -115,   -103,   -27,    71,     134,
+    137,    44,     -48,    -24,    69,     156,    194,    175,    112,
+    55,     54,     101,    148,    157,    142,    100,    44,     27,
+    63,     106,    107,    89,     67,     37,     17,     30,     63,
+    69,     61,     21,     -37,    -55,    -72,    -53,    -26,    -53,
+    -77,    -87,    -109,   -119,   -80,    -36,    -29,    -38,    -48,
+    -57,    -65,    -16,    52,     83,     83,     24,     -27,    -14,
+    9,      27,     52,     50,     45,     90,     132,    117,    75,
+    16,     -1,     60,     95,     55,     25,     26,     20,     61,
+    119,    89,     1,      -61,    -68,    -46,    -36,    -40,    -39,
+    -49,    -58,    -16,    30,     13,     -12,    18,     35,     6,
+    3,      30,     22,     25,     52,     32,     12,     9,      -5,
+    -16,    -25,    -33,    -38,    -44,    -76,    -118,   -118,   -96,
+    -54,    -3,     9,      -31,    -82,    -84,    -35,    18,     25,
+    -26,    -72,    -48,    8,      25,     8,      -20,    -66,    -105,
+    -102,   -80,    -73,    -79,    -80,    -70,    -59,    -55,    -82,
+    -113,   -85,    -51,    -59,    -57,    -38,    -13,    -7,     -18,
+    -6,     20,     51,     55,     18,     -8,     -7,     24,     78,
+    119,    137,    135,    139,    153,    144,    155,    179,    166,
+    128,    56,     8,      38,     85,     94,     72,     20,     -32,
+    -9,     25,     17,     -15,    -84,    -123,   -106,   -82,    -62,
+    -60,    -43,    -4,     -12,    -45,    -68,    -108,   -100,   -47,
+    -49,    -64,    -50,    -9,     37,     59,     68,     62,     53,
+    49,     25,     13,     32,     40,     60,     109,    82,     18,
+    10,     -1,     21,     102,    111,    40,     -10,    -9,     20,
+    31,     0,      -51,    -108,   -135,   -89,    -21,    1,      -54,
+    -125,   -129,   -113,   -144,   -205,   -227,   -167,   -118,   -114,
+    -100,   -71,    5,      34,     -51,    -119,   -120,   -72,    10,
+    56,     51,     58,     65,     98,     135,    84,     20,     -3,
+    -1,     57,     135,    137,    90,     88,     107,    102,    45,
+    -4,     9,      48,     95,     99,     65,     42,     44,     78,
+    80,     29,     11,     39,     27,     0,      7,      19,     10,
+    -45,    -99,    -86,    -77,    -74,    -57,    -74,    -84,    -92,
+    -134,   -114,   -65,    -73,    -76,    -96,    -105,   -50,    -31,
+    -17,    17,     9,      18,     62,     75,     55,     63,     76,
+    61,     61,     80,     103,    107,    110,    131,    134,    120,
+    94,     66,     70,     78,     59,     52,     57,     53,     72,
+    76,     31,     -18,    -53,    -57,    -35,    -17,    -9,     -27,
+    -34,    -7,     -17,    -26,    -13,    -60,    -86,    -53,    -42,
+    -36,    -36,    -46,    -13,    19,     -16,    -47,    -15,    11,
+    -9,     -18,    -26,    -24,    14,     8,      -53,    -54,    15,
+    43,     15,     -9,     -5,     5,      -12,    -40,    -57,    -74,
+    -94,    -105,   -91,    -20,    30,     -10,    -50,    -58,    -52,
+    -42,    -47,    -54,    -61,    -83,    -64,    -30,    -3,     31,
+    9,      -35,    -43,    -31,    6,      50,     54,     55,     67,
+    53,     43,     30,     27,     62,     37,     -26,    -52,    -54,
+    -29,    3,      -12,    -23,    11,     26,     23,     31,     57,
+    66,     46,     32,     35,     83,     124,    111,    124,    157,
+    143,    101,    80,     60,     27,     11,     21,     22,     9,
+    -4,     -26,    -41,    -35,    -50,    -103,   -138,   -116,   -90,
+    -89,    -90,    -79,    -74,    -58,    -18,    -12,    -29,    -36,
+    -17,    22,     30,     -1,     -8,     8,      10,     19,     31,
+    36,     38,     41,     28,     -7,     -14,    -6,     -20,    -30,
+    -11,    -2,     -9,     0,      25,     56,     78,     68,     40,
+    34,     47,     50,     40,     37,     26,     28,     53,     61,
+    57,     25,     -35,    -75,    -65,    -48,    -65,    -81,    -67,
+    -53,    -41,    3,      19,     -3,     -9,     -2,     -1,     -24,
+    -36,    -23,    -26,    -29,    -9,     0,      -15,    -17,    -9,
+    12,     50,     45,     14,     19,     37,     24,     9,      16,
+    13,     -16,    -19,    3,      -3,     -12,    -10,    -23,    -43,
+    -47,    -38,    -46,    -44,    -7,     3,      -19,    -13,    -26,
+    -52,    -29,    -19,    -32,    0,      11,     -26,    -24,    -20,
+    -41,    -30,    -24,    -53,    -67,    -26,    23,     20,     9,
+    6,      -8,     3,      16,     7,      3,      -5,     2,      33,
+    53,     72,     94,     86,     69,     96,     118,    95,     91,
+    78,     32,     26,     48,     48,     37,     21,     7,      -6,
+    -8,     8,      1,      -17,    -2,     18,     1,      -28,    -51,
+    -84,    -93,    -74,    -46,    -18,    -19,    -31,    -10,    10,
+    10,     7,      -5,     -30,    -39,    -28,    -9,     10,     17,
+    11,     14,     20,     -1,     2,      18,     7,      15,     40,
+    40,     32,     27,     23,     31,     43,     33,     7,      -3,
+    18,     51,     53,     31,     21,     14,     16,     14,     4,
+    11,     16,     1,      -24,    -38,    -33,    -27,    -50,    -74,
+    -70,    -60,    -54,    -44,    -22,    -22,    -43,    -33,    -16,
+    -35,    -36,    -18,    -27,    -42,    -46,    -36,    -17,    -15,
+    -22,    -21,    -20,    -2,     15,     12,     22,     27,     22,
+    41,     57,     60,     63,     54,     56,     65,     62,     68,
+    58,     34,     53,     70,     58,     60,     51,     33,     41,
+    39,     16,     -3,     -16,    -18,    -15,    -18,    -32,    -76,
+    -85,    -62,    -82,    -87,    -68,    -84,    -75,    -40,    -48,
+    -55,    -45,    -42,    -24,    -14,    -1,     27,     23,     -1,
+    -2,     12,     15,     32,     55,     52,     55,     82,     81,
+    58,     62,     59,     37,     24,     20,     17,     18,     19,
+    15,     14,     5,      -18,    -27,    -20,    -19,    -34,    -39,
+    -29,    -30,    -27,    -27,    -48,    -52,    -54,    -77,    -48,
+    -18,    -36,    -34,    -13,    -21,    -38,    -28,    -15,    -7,
+    -6,     -20,    -18,    2,      4,      -11,    -5,     7,      1,
+    1,      12,     -2,     -17,    7,      15,     2,      15,     34,
+    48,     78,     94,     82,     66,     66,     64,     47,     44,
+    57,     64,     74,     65,     34,     26,     31,     32,     33,
+    18,     5,      -1,     -18,    -22,    -31,    -54,    -37,    -32,
+    -74,    -89,    -77,    -73,    -65,    -72,    -75,    -39,    -21,
+    -31,    -31,    -24,    -19,    -8,     -4,     7,      26,     22,
+    15,     13,     11,     28,     47,     42,     35,     28,     5,
+    18,     55,     55,     45,     44,     18,     9,      18,     -2,
+    -5,     6,      -15,    -16,    -12,    -20,    -4,     4,      -15,
+    -18,    -10,    -5,     -2,     -16,    -24,    -14,    -7,     -14,
+    -33,    -33,    -20,    -17,    -17,    -18,    -30,    -37,    -35,
+    -34,    -13,    -3,     -28,    -28,    -10,    -21,    -17,    -4,
+    -12,    -16,    -20,    -27,    -16,    -8,     -4,     14,     24,
+    11,     17,     30,     27,     14,     7,      28,     30,     22,
+    45,     47,     23,     31,     23,     -5,     10,     17,     -5,
+    2,      15,     9,      20,     29,     11,     -9,     -8,     8,
+    10,     -1,     -14,    -30,    -30,    -8,     -9,     -20,    -17,
+    -17,    -12,    1,      6,      -7,     -18,    -6,     10,     -6,
+    -7,     29,     35,     21,     16,     9,      25,     44,     26,
+    21,     34,     28,     40,     41,     9,      -2,     1,      12,
+    34,     18,     -12,    -10,    -16,    -29,    -24,    -25,    -20,
+    -17,    -35,    -29,    -12,    -29,    -39,    -32,    -30,    -17,
+    -12,    -28,    -20,    -5,     -4,     7,      14,     10,     3,
+    -3,     0,      19,     27,     4,      -21,    -18,    -7,     -4,
+    0,      1,      -6,     -17,    -30,    -24,    -11,    -9,     0,
+    -1,     0,      -3,     -12,    1,      15,     -2,     3,      16,
+    -3,     -8,     7,      3,      13,     32,     23,     10,     -6,
+    -11,    8,      4,      -12,    -9,     3,      12,     -2,     -31,
+    -36,    -33,    -37,    -17,    -5,     -20,    -14,    4,      5,
+    4,      6,      17,     31,     27,     23,     16,     -1,     -4,
+    15,     24,     21,     18,     7,      -7,     -14,    18,     41,
+    25,     14,     13,     2,      5,      12,     8,      15,     10,
+    2,      13,     10,     3,      5,      -1,     0,      11,     10,
+    6,      2,      7,      10,     -4,     -3,     2,      -13,    -4,
+    14,     -4,     -17,    -11,    -4,     8,      3,      -8,     -1,
+    -7,     -20,    -4,     23,     23,     8,      5,      24,     21,
+    -5,     -2,     7,      -9,     -15,    -8,     -6,     6,      2,
+    -26,    -19,    1,      -19,    -31,    -27,    -34,    -41,    -47,
+    -39,    -12,    -12,    -29,    -32,    -41,    -36,    -26,    -36,
+    -35,    -33,    -29,    -1,     5,      -13,    -21,    -21,    -3,
+    12,     1,      -7,     -1,     2,      12,     9,      -1,     15,
+    21,     18,     25,     4,      -13,    5,      12,     16,     33,
+    33,     19,     21,     26,     30,     30,     24,     23,     19,
+    22,     34,     39,     28,     15,     14,     24,     24,     18,
+    12,     10,     4,      8,      28,     29,     2,      -7,     6,
+    8,      10,     2,      -13,    -8,     -2,     0,      12,     13,
+    -1,     3,      21,     26,     24,     17,     11,     15,     19,
+    19,     19,     11,     1,      3,      3,      0,      -5,     -11,
+    -16,    -26,    -18,    3,      -5,     -17,    2,      10,     6,
+    6,      -8,     -11,    4,      -3,     -17,    -10,    -17,    -37,
+    -31,    -17,    -26,    -37,    -42,    -53,    -49,    -34,    -40,
+    -39,    -21,    -17,    -23,    -23,    -25,    -30,    -24,    -13,
+    -10,    -10,    1,      1,      -7,     7,      19,     11,     4,
+    -3,     -8,     1,      6,      7,      25,     22,     -5,     3,
+    20,     7,      -1,     14,     17,     18,     20,     12,     25,
+    41,     23,     19,     37,     39,     21,     17,     23,     17,
+    6,      9,      15,     4,      -15,    -8,     8,      7,      1,
+    -12,    -18,    -14,    -15,    -10,    0,      -3,     3,      13,
+    -8,     -21,    -8,     -26,    -29,    -1,     -9,     -24,    -19,
+    -22,    -24,    -18,    -25,    -27,    -28,    -34,    -26,    -9,
+    -14,    -14,    -8,     -8,     -5,     4,      4,      -10,    -12,
+    -7,     -8,     -10,    -15,    -19,    -10,    -5,     -9,     -9,
+    -19,    -33,    -27,    -14,    -15,    -14,    -16,    -25,    -10,
+    5,      -7,     -11,    2,      3,      7,      17,     28,     33,
+    32,     33,     39,     49,     57,     63,     62,     64,     67,
+    59,     55,     67,     71,     58,     53,     53,     44,     38,
+    44,     51,     51,     45,     35,     34,     46,     55,     48,
+    36,     21,     3,      -5,     2,      7,      0,      -17,    -30,
+    -34,    -48,    -62,    -64,    -66,    -66,    -62,    -79,    -90,
+    -85,    -88,    -88,    -85,    -88,    -103,   -112,   -112,   -102,
+    -99,    -102,   -103,   -110,   -100,   -80,    -60,    -57,    -68,
+    -59,    -45,    -35,    -6,     9,      -3,     2,      32,     45,
+    48,     51,     40,     51,     78,     85,     83,     87,     94,
+    101,    104,    105,    100,    86,     82,     96,     102,    96,
+    85,     68,     63,     65,     55,     50,     46,     28,     32,
+    43,     33,     30,     27,     8,      18,     36,     27,     20,
+    13,     -14,    -19,    8,      12,     0,      -1,     -12,    -24,
+    -20,    -27,    -39,    -39,    -39,    -44,    -38,    -32,    -42,
+    -38,    -33,    -43,    -55,    -57,    -60,    -61,    -56,    -57,
+    -55,    -43,    -46,    -58,    -55,    -50,    -50,    -51,    -48,
+    -46,    -44,    -36,    -26,    -20,    -13,    -11,    -8,     1,
+    5,      0,      8,      21,     31,     42,     39,     43,     56,
+    48,     37,     45,     45,     47,     52,     46,     40,     26,
+    18,     28,     30,     22,     14,     0,      -3,     8,      0,
+    -7,     0,      -10,    -13,    -9,     -13,    -13,    -18,    -33,
+    -32,    -26,    -37,    -41,    -32,    -26,    -30,    -34,    -31,
+    -38,    -40,    -24,    -25,    -29,    -15,    -18,    -23,    -4,
+    2,      -7,     0,      5,      10,     22,     23,     25,     31,
+    33,     37,     38,     39,     43,     46,     41,     44,     46,
+    37,     35,     46,     63,     67,     52,     38,     30,     35,
+    41,     41,     41,     29,     15,     16,     4,      -4,     3,
+    -12,    -18,    -13,    -27,    -39,    -47,    -55,    -44,    -43,
+    -53,    -45,    -36,    -37,    -37,    -38,    -40,    -49,    -57,
+    -41,    -24,    -28,    -31,    -26,    -20,    -15,    -21,    -23,
+    -18,    -19,    -14,    -10,    -11,    1,      -6,     -26,    -14,
+    -1,     -7,     -10,    -11,    -9,     0,      -4,     -9,     3,
+    8,      0,      -2,     1,      16,     20,     7,      9,      10,
+    8,      18,     12,     11,     17,     -6,     -19,    0,      0,
+    -10,    -6,     -12,    -14,    -11,    -9,     -2,     -10,    -19,
+    -9,     -11,    -4,     18,     7,      -3,     9,      17,     23,
+    28,     25,     19,     19,     24,     33,     37,     30,     28,
+    35,     44,     43,     33,     31,     30,     26,     33,     39,
+    35,     31,     27,     19,     23,     24,     19,     13,     0,
+    0,      2,      -7,     -9,     -10,    -13,    -6,     -6,     -23,
+    -28,    -15,    -9,     -20,    -34,    -30,    -15,    -12,    -11,
+    -3,     -4,     -4,     6,      15,     9,      -11,    -20,    3,
+    26,     23,     1,      -16,    -3,     12,     2,      -22,    -36,
+    -35,    -28,    -20,    -13,    -19,    -38,    -43,    -29,    -11,
+    -5,     -15,    -37,    -40,    -9,     12,     -1,     -23,    -30,
+    -16,    12,     21,     -1,     -25,    -21,    4,      34,     55,
+    34,     -12,    -11,    47,     99,     107,    58,     0,      8,
+    78,     148,    151,    56,     -40,    -2,     142,    215,    99,
+    -67,    -64,    76,     153,    99,     -21,    -107,   -92,    -1,
+    106,    107,    -123,   -395,   -334,   60,     274,    -69,    -597,
+    -626,   -126,   238,    18,     -447,   -577,   -312,   -34,    20,
+    -89,    -242,   -332,   -222,   74,     262,    64,     -285,   -232,
+    259,    563,    294,    -138,   -130,   312,    642,    515,    189,
+    57,     187,    415,    538,    467,    277,    109,    134,    334,
+    441,    299,    59,     -7,     128,    228,    146,    -20,    -99,
+    -34,    60,     24,     -108,   -188,   -147,   -57,    -48,    -142,
+    -224,   -210,   -144,   -122,   -175,   -212,   -176,   -150,   -199,
+    -256,   -210,   -100,   -79,    -195,   -298,   -248,   -107,   -48,
+    -110,   -192,   -224,   -189,   -112,   -40,    -31,    -124,   -238,
+    -193,   -3,     87,     -53,    -221,   -165,   48,     132,    -2,
+    -150,   -109,   61,     147,    83,     -20,    -60,    -13,    85,
+    157,    130,    17,     -68,    -10,    147,    217,    116,    -20,
+    -21,    103,    200,    158,    52,     35,     105,    155,    132,
+    81,     74,     110,    114,    74,     48,     68,     100,    77,
+    27,     30,     48,     19,     -15,    7,      63,     53,     -56,
+    -123,   -41,    81,     75,     -61,    -154,   -84,    45,     68,
+    -24,    -105,   -76,    22,     53,     -13,    -63,    -21,    54,
+    59,     -1,     -34,    16,     80,     81,     48,     37,     61,
+    89,     88,     101,    134,    132,    100,    83,     125,    188,
+    173,    101,    95,     172,    214,    149,    68,     94,     181,
+    177,    103,    83,     132,    165,    122,    83,     140,    191,
+    153,    92,     106,    198,    226,    138,    85,     146,    215,
+    187,    110,    77,     115,    146,    115,    91,     96,     78,
+    27,     -3,     42,     102,    71,     -23,    -46,    30,     95,
+    63,     -18,    -25,    77,     174,    138,    13,     -25,    96,
+    218,    181,    34,     -70,    -45,    17,     2,      -67,    -174,
+    -346,   -516,   -553,   -446,   -455,   -789,   -1213,  -1308,  -1046,
+    -878,   -1179,  -1691,  -1839,  -1528,  -1219,  -1292,  -1623,  -1772,
+    -1538,  -1147,  -921,   -951,   -1038,  -929,   -549,   -95,    155,
+    127,    97,     387,    931,    1339,   1380,   1234,   1276,   1661,
+    2102,   2223,   2027,   1848,   1942,   2198,   2295,   2119,   1856,
+    1725,   1745,   1752,   1601,   1335,   1102,   993,    952,    830,
+    570,    286,    139,    133,    85,     -135,   -436,   -638,   -645,
+    -571,   -620,   -835,   -1064,  -1151,  -1069,  -951,   -964,   -1109,
+    -1209,  -1162,  -1044,  -961,   -944,   -977,   -1001,  -912,   -687,
+    -517,   -623,   -887,   -897,   -469,   10,     -35,    -590,   -934,
+    -545,   184,    427,    -53,    -619,   -563,   40,     489,    339,
+    -128,   -306,   -6,     403,    497,    232,    -55,    0,      388,
+    704,    584,    145,    -76,    260,    816,    942,    485,    2,
+    65,     575,    923,    744,    290,    76,     276,    596,    662,
+    419,    134,    92,     280,    434,    344,    88,     -66,    8,
+    151,    126,    -81,    -239,   -176,   -29,    -74,    -351,   -574,
+    -487,   -208,   -132,   -426,   -780,   -797,   -577,   -595,   -978,
+    -1169,  -667,   -36,    -548,   -2285,  -3281,  -1756,  927,    1236,
+    -1911,  -5006,  -4073,  -66,    2017,   -295,   -3701,  -3797,  -892,
+    975,    -165,   -1978,  -1636,  374,    1482,   679,    -567,   -591,
+    706,    2337,   3224,   2743,   1269,   287,    1221,   3597,   5083,
+    4106,   1858,   972,    2334,   4096,   4167,   2806,   1916,   2383,
+    3045,   2508,   1220,   820,    1784,   2669,   1981,   204,    -876,
+    -470,   510,    803,    170,    -787,   -1568,  -1893,  -1598,  -1027,
+    -992,   -1803,  -2610,  -2484,  -1905,  -2113,  -3113,  -3399,  -2267,
+    -1261,  -2007,  -3637,  -3909,  -2340,  -893,   -1158,  -2272,  -2486,
+    -1639,  -915,   -777,   -596,   -91,    196,    85,     210,    875,
+    1373,   1247,   1219,   1958,   2718,   2328,   1196,   1008,   2350,
+    3677,   3269,   1503,   366,    922,    2264,   2810,   1996,   608,
+    -168,   75,     680,    811,    395,    -56,    -318,   -607,   -966,
+    -1108,  -925,   -613,   -368,   -369,   -919,   -1926,  -2460,  -1685,
+    -300,   155,    -611,   -1524,  -2204,  -3227,  -3859,  -2037,  1622,
+    2382,   -2583,  -8448,  -7544,  -84,    4814,   915,    -6423,  -7558,
+    -1746,  2515,   -59,    -4587,  -3858,  1260,   3625,   187,    -4148,
+    -3500,  1542,   5467,   4780,   1256,   -1127,  -403,   2481,   5332,
+    6346,   5014,   2536,   1216,   2467,   5039,   6238,   5070,   3381,
+    3269,   4173,   3905,   2248,   1586,   3299,   5240,   4362,   1004,
+    -1382,  -489,   2113,   3168,   1620,   -742,   -1824,  -1435,  -897,
+    -1058,  -1500,  -1545,  -1398,  -1965,  -3266,  -4136,  -3756,  -2609,
+    -1804,  -1986,  -3087,  -4599,  -5296,  -4051,  -1731,  -781,   -2228,
+    -4092,  -3977,  -2325,  -1353,  -1568,  -1490,  -428,   178,    -672,
+    -1650,  -1058,  749,    2039,   2079,   1540,   897,    310,    572,
+    2266,   4265,   4265,   1869,   -231,   559,    3332,   4752,   3229,
+    768,    101,    1364,   2463,   1984,   819,    411,    723,    675,
+    -162,   -923,   -743,   -32,    185,    -516,   -1653,  -2359,  -2103,
+    -986,   42,     -205,   -1702,  -2870,  -2337,  -809,   -221,   -982,
+    -1544,  -946,   -598,   -2117,  -4291,  -4100,  -857,   1948,   338,
+    -4799,  -7972,  -5403,  173,    2371,   -1063,  -5533,  -5578,  -1777,
+    605,    -985,   -3249,  -2213,  1184,   2691,   560,    -2356,  -2288,
+    1233,   5244,   6441,   4004,   370,    -663,   2555,   7404,   9282,
+    6573,   2612,   1836,   4662,   7467,   7393,   5421,   4262,   4741,
+    5362,   4705,   3163,   2397,   3337,   4887,   4810,   2254,   -749,
+    -1316,  772,    2706,   2016,   -573,   -2552,  -2746,  -2012,  -1647,
+    -1978,  -2579,  -3105,  -3473,  -3911,  -4484,  -4891,  -4795,  -4163,
+    -3543,  -3538,  -4275,  -5356,  -5743,  -4637,  -2614,  -1301,  -1825,
+    -3341,  -4011,  -2937,  -751,   1007,   1245,   235,    -639,   -61,
+    1626,   2864,   2967,   2734,   3013,   3329,   2914,   2312,   2666,
+    3839,   4308,   3162,   1453,   768,    1255,   1887,   2006,   1715,
+    1031,   -297,   -1660,  -1690,  -277,   813,    -30,    -2137,  -3370,
+    -2854,  -1553,  -593,   -413,   -1146,  -2567,  -3440,  -2369,  -205,
+    379,    -1258,  -2315,  -812,   262,    -3205,  -8576,  -7894,  738,
+    7492,   1951,   -11595, -17098, -6934,  7139,   8065,   -4575,  -14199,
+    -8946,  3606,   7504,   -547,   -8242,  -5113,  4406,   8113,   2134,
+    -5040,  -4089,  4157,   10934,  10158,  4167,   -565,   -192,   4428,
+    9765,   12201,  9861,   4512,   1225,   3451,   8483,   10133,  6497,
+    2574,   3333,   6806,   6986,   2487,   -1214,  623,    5416,   6647,
+    2204,   -3289,  -4556,  -1565,  1544,   1525,   -1236,  -4293,  -5695,
+    -5174,  -3995,  -3403,  -3449,  -3750,  -4505,  -6014,  -7296,  -6523,
+    -3849,  -2096,  -3288,  -5722,  -6004,  -3581,  -1497,  -1960,  -3330,
+    -2800,  -434,   964,    -111,   -1739,  -1136,  1736,   4151,   3736,
+    1274,   -451,   469,    3386,   5833,   5898,   3646,   1085,   272,
+    1743,   4061,   5108,   3837,   1490,   246,    967,    1866,   859,
+    -1069,  -974,   1542,   2835,   47,     -4285,  -5068,  -1567,  1781,
+    1223,   -1997,  -4227,  -3747,  -1720,  41,     245,    -1228,  -2972,
+    -2673,  22,     1980,   -930,   -7721,  -11271, -5725,  4974,   8484,
+    -2007,  -16979, -19255, -4670,  11057,  9690,   -6417,  -17537, -10841,
+    4262,   9292,   206,    -9128,  -6224,  4828,   10018,  3699,   -5183,
+    -5121,  4702,   14279,  14466,  5778,   -2633,  -2185,  7036,   16118,
+    16305,  8081,   390,    499,    6580,   11150,  10036,  5704,   2902,
+    3378,   4664,   3786,   863,    -796,   1216,   4609,   4493,   -338,
+    -5670,  -6486,  -2751,  884,    571,    -3095,  -6446,  -6997,  -5770,
+    -5041,  -5016,  -4216,  -2579,  -2468,  -5088,  -8129,  -7964,  -4228,
+    -323,   497,    -1556,  -3653,  -3615,  -1718,  464,    1808,   2386,
+    2832,   3085,   2905,   2676,   3473,   5501,   7094,   6442,   3929,
+    1663,   1436,   3254,   5807,   7100,   5044,   -34,    -4091,  -2992,
+    2149,   5333,   2562,   -3067,  -5877,  -4480,  -2080,  -1793,  -3026,
+    -3838,  -3735,  -3663,  -4472,  -5756,  -5753,  -3576,  -640,   -274,
+    -3965,  -7787,  -6757,  -717,   4380,   3595,   -1553,  -5936,  -8603,
+    -10223, -8952,  -922,   9700,   9355,   -7788,  -25795, -22413, 2268,
+    20887,  12133,  -11291, -20129, -5899,  10236,  8585,   -3645,  -6300,
+    4667,   14216,  9346,   -3593,  -8558,  715,    15085,  21179,  14887,
+    3733,   -2703,  -675,   7170,   15131,  18360,  13959,  4205,   -2825,
+    -656,   7594,   11845,  7182,   319,    -439,   3255,   3213,   -3299,
+    -8972,  -6318,  2300,   7190,   2254,   -9247,  -17334, -15064, -4452,
+    5160,   5127,   -4268,  -14501, -17256, -11145, -1830,  3786,   2984,
+    -2498,  -8101,  -9587,  -5703,  622,    4570,   4035,   1442,   729,
+    2493,   3534,   2433,   2239,   5944,   11438,  12371,  6496,   -211,
+    -156,   7092,   13566,  11979,  3928,   -2545,  -2226,  2713,   6150,
+    5117,   1270,   -1851,  -2859,  -2376,  -1909,  -2364,  -3401,  -4183,
+    -3897,  -2875,  -3205,  -5503,  -7822,  -7501,  -3934,  -942,   -1572,
+    -4262,  -5939,  -4671,  -2353,  -1387,  -1159,  -1270,  -1328,  -606,
+    474,    1044,   -2647,  -11603, -17081, -10374, 5922,   14849,  2056,
+    -22033, -31238, -14612, 11094,  17910,  1778,   -15538, -15417, -2045,
+    6690,   2855,   -2559,  473,    8823,   11423,  3782,   -4649,  -2775,
+    9111,   20847,  21610,  11572,  962,    -1465,  5731,   15559,  20008,
+    16950,  9230,   2204,   114,    3088,   8130,   10523,  7643,   2045,
+    -2107,  -2945,  -2538,  -3593,  -5210,  -4403,  -857,   1328,   -2497,
+    -11667, -18881, -16866, -6286,  3400,   2835,   -7811,  -18322, -19279,
+    -10025, 1525,   6930,   3766,   -4647,  -11401, -9904,  -322,   10100,
+    12428,  5874,   -274,   926,    6762,   9360,   6778,   5904,   10509,
+    15077,  12681,  3846,   -1653,  2460,   11036,  14737,  8967,   -1021,
+    -6168,  -3899,  2328,   6041,   3404,   -2878,  -7672,  -6869,  -1918,
+    801,    -2188,  -7419,  -8083,  -2687,  1898,   -692,   -8121,  -11198,
+    -5642,  2830,   5915,   1120,   -5666,  -8314,  -5770,  118,    4614,
+    4713,   1482,   -2544,  -3331,  -3779,  -8931,  -13840, -10273, 3355,
+    13432,  2906,   -20058, -30890, -17080, 7759,   16047,  2886,   -12525,
+    -15117, -5998,  1614,   2294,   2684,   4610,   6236,   5486,   2514,
+    1346,   1962,   4564,   11022,  17438,  18182,  10179,  -796,   -3019,
+    5456,   15942,  18468,  11176,  2796,   -143,   1670,   3922,   3836,
+    3337,   3330,   1623,   -2609,  -7177,  -7654,  -4250,  -2210,  -3491,
+    -5312,  -4380,  -3103,  -6738,  -13209, -14278, -6529,  3346,   4931,
+    -2861,  -11176, -12097, -5552,  2679,   7102,   6050,   1301,   -3350,
+    -3378,  1785,   7413,   9059,   7013,   5043,   5331,   5197,   3143,
+    1862,   3790,   8037,   10159,  7236,   1450,   -3393,  -3980,  598,
+    6251,   7410,   1502,   -7144,  -10260, -5116,  2386,   4197,   -894,
+    -6255,  -6026,  -1493,  873,    -1639,  -4426,  -2720,  2252,   4206,
+    158,    -4631,  -4466,  537,    4709,   4528,   1691,   -828,   -1394,
+    -455,   756,    2662,   3101,   1730,   -3579,  -12987, -18531, -12998,
+    1944,   11963,  1503,   -19826, -29919, -18138, 2254,   7644,   -1829,
+    -9260,  -6516,  134,    -793,   -5234,  -2336,  6264,   12828,  11829,
+    6589,   3429,   2592,   4795,   11433,  19490,  21681,  13136,  379,
+    -4138,  3585,   14812,  17633,  10124,  623,    -2287,  696,    2273,
+    -926,   -5000,  -4391,  -386,   139,    -4657,  -11003, -13946, -11930,
+    -7460,  -1932,  1277,   -2311,  -10543, -16920, -14512, -4039,  4987,
+    7518,   3175,   -4213,  -7535,  -4747,  3590,   12231,  13419,  8429,
+    2377,   1080,   5563,   8497,   7304,   5331,   5656,   8235,   6997,
+    998,    -3131,  -1857,  3017,   5883,   3744,   -408,   -4503,  -6489,
+    -4796,  -374,   3254,   1651,   -2830,  -5206,  -3690,  -681,   -969,
+    -2819,  -2616,  19,     3379,   2359,   -2476,  -6413,  -6111,  -463,
+    4664,   4106,   -565,   -4801,  -4960,  -1242,  2479,   3706,   2168,
+    -1104,  -3048,  -1563,  1217,   2013,   -5714,  -17921, -21743, -10839,
+    7751,   13091,  -4648,  -26509, -29653, -9872,  10100,  9523,   -4335,
+    -12121, -5509,  4923,   6380,   1839,   -508,   3312,   10704,  14545,
+    12317,  5508,   -243,   2421,   11485,  19096,  18306,  8626,   -1357,
+    -5542,  -1695,  7815,   13549,  10229,  -23,    -8373,  -7496,  -2775,
+    -1016,  -2900,  -4868,  -4103,  -4535,  -6851,  -8099,  -8137,  -6414,
+    -4023,  -1790,  -45,    -1513,  -4791,  -6160,  -4105,  1060,   5970,
+    7099,   3934,   -996,   -2213,  1973,   6975,   7927,   4726,   2474,
+    3951,   5221,   2642,   -2359,  -3579,  1362,   6614,   6282,   116,
+    -5643,  -5733,  -1884,  2107,   3418,   2566,   684,    -2319,  -3803,
+    -2133,  1512,   2943,   475,    -1004,  753,    3095,   1652,   -3074,
+    -4562,  -932,   3815,   4486,   -22,    -4199,  -4666,  -2201,  284,
+    316,    -914,   -2297,  -2441,  -1538,  -435,   909,    626,    -1222,
+    -1534,  -429,   1711,   2386,   -1786,  -10676, -18200, -16272, -3805,
+    9505,   8238,   -9397,  -24577, -22256, -4907,  8659,   5940,   -3701,
+    -6764,  40,     6190,   4239,   208,    238,    7081,   14458,  15143,
+    10726,  3479,   -706,   1700,   9131,   17577,  17708,  7959,   -5009,
+    -11508, -5347,  5635,   10789,  6499,   -3121,  -9303,  -9814,  -6625,
+    -3333,  -3193,  -4349,  -5615,  -6188,  -5123,  -4441,  -4550,  -4074,
+    -2769,  -61,    2441,   2881,   1395,   -578,   -341,   2509,   6034,
+    8202,   6377,   2696,   1272,   2589,   4787,   4611,   2378,   2124,
+    3911,   4872,   2049,   -3374,  -5770,  -2705,  3179,   5905,   2589,
+    -2792,  -5419,  -3176,  1056,   2875,   2483,   1205,   605,    856,
+    1012,   892,    105,    -411,   707,    2924,   4184,   1755,   -2553,
+    -4857,  -3556,  401,    2466,   945,    -2315,  -5556,  -5549,  -2241,
+    534,    601,    -1774,  -3034,  -1962,  -886,   -448,   -720,   -467,
+    864,    760,    -22,    -2546,  -10211, -17121, -15877, -4803,  7993,
+    7254,   -6563,  -18374, -17755, -6143,  3291,   4322,   1822,   416,
+    2788,   5190,   4256,   2627,   2590,   6398,   12709,  15757,  12829,
+    5542,   -667,   167,    7241,   14346,  14826,  6392,   -3516,  -7434,
+    -4607,  1054,   2988,   847,    -1549,  -2641,  -3046,  -5363,  -8256,
+    -9130,  -6906,  -1460,  2260,   1568,   -2911,  -8580,  -9418,  -3675,
+    5021,   10127,  7909,   1478,   -4015,  -3331,  2450,   7291,   7632,
+    2567,   -2022,  -899,   3418,   5544,   1349,   -4117,  -3409,  1758,
+    6000,   3526,   -3975,  -7331,  -3931,  2747,   7037,   4962,   -21,
+    -2902,  -2008,  1306,   4461,   6364,   5956,   3623,   1734,   793,
+    44,     -893,   -1041,  1633,   5264,   4870,   -943,   -7404,  -8611,
+    -4974,  -1192,  185,    -1334,  -3672,  -4910,  -5132,  -4387,  -3532,
+    -3233,  -2430,  -469,   1245,   892,    -969,   -2441,  -2140,  320,
+    4999,   5954,   -4638,  -20056, -24424, -8954,  13558,  16089,  -3145,
+    -20665, -19447, -4802,  4488,   3733,   943,    683,    3109,   6219,
+    9247,   7736,   782,    -1410,  8024,   20877,  20174,  4723,   -7148,
+    -2758,  11240,  17896,  11462,  414,    -6134,  -4913,  113,    2818,
+    98,     -5900,  -8369,  -4446,  924,    1657,   -3389,  -10569, -13223,
+    -7690,  2339,   7741,   1634,   -9014,  -10982, -1172,  9642,   9098,
+    1310,   -2795,  -1040,  2790,   3808,   3559,   3064,   -527,   -3160,
+    -1391,  3120,   5224,   -144,   -6714,  -6416,  -719,   5630,   7253,
+    2735,   -2973,  -4325,  679,    7146,   8220,   4055,   -42,    814,
+    5288,   7658,   6592,   3051,   -746,   -541,   3401,   6030,   1953,
+    -6340,  -8619,  -2689,  4076,   3217,   -4875,  -9612,  -7826,  -4293,
+    -2441,  -4080,  -5740,  -5529,  -3656,  -506,   -1035,  -5787,  -9518,
+    -7034,  2323,   9287,   6495,   -1853,  -6110,  -3281,  -1708,  -8958,
+    -19544, -18870, -2771,  13029,  10762,  -7491,  -21837, -18923, -4183,
+    8733,   12580,  9779,   4597,   738,    1460,   6302,   9711,   8375,
+    8143,   12512,  15808,  11272,  389,    -5554,  161,    11080,  15851,
+    10426,  692,    -6372,  -6808,  -2525,  652,    827,    -219,   -349,
+    -622,   -3328,  -7883,  -11020, -8961,  -3240,  1884,   4155,   1995,
+    -3530,  -7816,  -6444,  -218,   6086,   9279,   7901,   3113,   -2352,
+    -5757,  -3836,  2022,   4572,   894,    -3519,  -3311,  -534,   -618,
+    -3716,  -5515,  -3290,  1495,   4374,   4455,   2961,   -645,   -3247,
+    -656,   5273,   9838,   9751,   5755,   1863,   158,    1457,   4585,
+    6390,   5379,   2894,   2284,   1867,   -2279,  -7051,  -6578,  70,
+    4745,   1660,   -4524,  -8007,  -7088,  -5690,  -5467,  -4178,  -2679,
+    -2218,  -3422,  -4167,  -4313,  -6105,  -6633,  -4202,  864,    5119,
+    4084,   -163,   -5331,  -8699,  -8710,  -7313,  -4649,  -2471,  -1419,
+    -1136,  -3199,  -6428,  -8048,  -4902,  1089,   4681,   5723,   5535,
+    5146,   4006,   2052,   2314,   5274,   8680,   9907,   8776,   6722,
+    2548,   -2403,  -3303,  1224,   7406,   9468,   5089,   -1197,  -4384,
+    -3570,  -298,   1776,   2005,   2041,   1326,   971,    -180,   -2334,
+    -1170,  1913,   4281,   4732,   2874,   1174,   -1341,  -3384,  -2503,
+    368,    4031,   3270,   -986,   -3519,  -5360,  -6004,  -5576,  -3603,
+    208,    708,    -2137,  -4940,  -5349,  -3588,  -2796,  -1399,  1017,
+    3144,   4196,   2483,   828,    338,    919,    3842,   6202,   7189,
+    7499,   6330,   4847,   3252,   2136,   3698,   5845,   5566,   3019,
+    267,    -55,    -1091,  -4220,  -5041,  -3430,  -280,   171,    -4649,
+    -8723,  -9280,  -5975,  -3192,  -3974,  -3912,  -4053,  -3748,  -3570,
+    -5871,  -5499,  -3552,  -1691,  320,    341,    748,    -313,   -3436,
+    -4687,  -3681,  21,     2550,   643,    -2123,  -3254,  -2226,  -1044,
+    -1617,  -1510,  183,    1250,   726,    -1662,  -3388,  -1759,  933,
+    3817,   5242,   3025,   248,    -1339,  -514,   2022,   3410,   3970,
+    3324,   2632,   2603,   2240,   2166,   1271,   487,    1076,   2039,
+    3296,   3836,   3610,   2913,   2718,   4213,   5555,   6023,   4769,
+    2442,   2067,   2173,   1623,   1201,   348,    52,     -124,   -1528,
+    -2834,  -3604,  -3463,  -2357,  -2564,  -3775,  -3801,  -1929,  -465,
+    -2109,  -3743,  -2657,  200,    2580,   954,    -1304,  -95,    1549,
+    2303,   1795,   1633,   3356,   3699,   2361,   792,    1148,   4045,
+    4820,   3851,   3197,   2449,   2704,   1722,   -652,   -1154,  -393,
+    113,    -1010,  -3328,  -4342,  -3939,  -3345,  -3697,  -5115,  -5610,
+    -4202,  -3639,  -5088,  -5351,  -3216,  -862,   -414,   -1839,  -3996,
+    -4831,  -2467,  147,    1055,   1288,   -247,   -2225,  -2233,  -1562,
+    -1278,  -936,   -961,   -935,   -367,   -323,   -459,   -1940,  -3974,
+    -2262,  -13,    2,      -401,   -1825,  -2308,  -1124,  448,    2154,
+    2434,   1300,   -812,   -1337,  1325,   3374,   3466,   2500,   2156,
+    3439,   3549,   2068,   1392,   1986,   3025,   3944,   3898,   3259,
+    4467,   6347,   5356,   2893,   1690,   2072,   4136,   5313,   2776,
+    -236,   -1063,  -794,   524,    802,    -1377,  -2879,  -2167,  -1439,
+    -1595,  -1539,  -1666,  -2495,  -2375,  -1253,  -515,   -187,   -1409,
+    -2847,  -511,   2411,   1761,   492,    -18,    607,    2350,   3288,
+    3505,   2741,   1099,   699,    2017,   3214,   3333,   1567,   33,
+    1260,   1925,   808,    -377,   -2558,  -3781,  -1677,  164,    -580,
+    -1727,  -2619,  -3421,  -3586,  -3957,  -4562,  -3646,  -2285,  -3437,
+    -5293,  -4792,  -4128,  -4012,  -2920,  -2249,  -2439,  -3737,  -5607,
+    -4427,  -1259,  71,     609,    555,    -1039,  -3354,  -5388,  -3760,
+    415,    2513,   2513,   819,    -1436,  -2780,  -2740,  -501,   2727,
+    3936,   1491,   -965,   -766,   -484,   -223,   361,    695,    1771,
+    1130,   -1839,  -1764,  797,    -31,    -2549,  -1790,  2108,   4043,
+    887,    -154,   2411,   2605,   2012,   1977,   3923,   6630,   4176,
+    107,    -311,   1731,   1910,   1011,   3119,   3219,   998,    -1282,
+    -2832,  -1645,  -685,   945,    2574,   2543,   -267,   -5015,  -3819,
+    -342,   1228,   2055,   -619,   -1233,  2069,   2896,   1095,   62,
+    1365,   3366,   4584,   4956,   3323,   -19,    -50,    4024,   5222,
+    3695,   3118,   1933,   1256,   1443,   128,    -119,   2043,   2477,
+    1823,   1324,   30,     -1363,  -3023,  -3074,  -188,   621,    -1775,
+    -2806,  -2961,  -2753,  -4359,  -5350,  -1220,  -116,   -4157,  -4811,
+    -2793,  -1040,  -1957,  -2862,  -1901,  -3192,  -3720,  -2357,  -1727,
+    -387,   -2131,  -5011,  -3650,  -454,   596,    -1298,  -3716,  -3122,
+    496,    136,    -2415,  -1675,  -811,   -837,   140,    -1243,  -187,
+    -1431,  -5320,  -2121,  100,    -467,   2465,   681,    -2093,  1224,
+    1632,   1428,   1776,   648,    2480,   3622,   876,    259,    1403,
+    2139,   3117,   497,    -763,   -170,   279,    1769,   342,    -871,
+    -25,    -1549,  -2290,  290,    1042,   -796,   -4291,  -3895,  159,
+    1264,   -540,   -2328,  -702,   1972,   852,    -2274,  -798,   1126,
+    -579,   -480,   3481,   3833,   1004,   901,    1536,   1809,   3103,
+    2521,   3183,   5220,   1800,   -266,   4663,   4230,   -790,   159,
+    2274,   5114,   4304,   -1998,  344,    4921,   -343,   -2048,  1180,
+    2112,   3109,   -10,    -1818,  552,    -1360,  -2889,  -1302,  -1918,
+    -37,    1406,   -1762,  -3054,  -1446,  -2073,  -4292,  -3214,  1163,
+    2333,   -712,   -2583,  -2058,  -1034,  -600,   -3796,  -2395,  2137,
+    -1122,  -1927,  702,    -2196,  -4374,  -3257,  -1558,  -256,   -728,
+    -395,   -176,   -1529,  -2772,  -1121,  -340,   -1147,  -250,   -4079,
+    -473,   4241,   -2818,  -3523,  3255,   2355,   -2550,  -1082,  1197,
+    2213,   -94,    -237,   3123,   1314,   -1075,  977,    1081,   2045,
+    2966,   -1328,  -1069,  -741,   -524,   -380,   -2766,  -986,   926,
+    -3281,  -1554,  2554,   -3620,  -6394,  -1680,  -321,   2889,   243,
+    -1567,  2276,   -1294,  -525,   2010,   -4883,  -1495,  6778,   2085,
+    -873,   2496,   418,    -1156,  -1179,  1604,   6173,   1190,   -2381,
+    5788,   2431,   -4941,  -242,   1248,   1023,   4426,   3399,   2726,
+    1388,   -922,   595,    392,    1414,   6260,   2673,   -973,   2237,
+    1776,   -2393,  -757,   4158,   2842,   -2327,  505,    1230,   -3623,
+    -917,   336,    -1400,  -1018,  1771,   2696,   -570,   -2435,  886,
+    2309,   -2865,  -1328,  2077,   -1967,  -3486,  -411,   961,    -1661,
+    -1979,  1179,   -493,   -2597,  1995,   284,    -3300,  -2213,  184,
+    312,    -1665,  -641,   -1325,  -1276,  90,     69,     476,    -778,
+    -1099,  853,    1515,   1630,   1188,   -877,   -1751,  702,    2983,
+    -201,   664,    4018,   -352,   -1864,  875,    2367,   813,    -2463,
+    -702,   886,    -2204,  -2216,  399,    -1729,  -2408,  1412,   -2757,
+    -3530,  449,    -2554,  -3910,  906,    697,    -1696,  566,    -1360,
+    -1991,  81,     -1756,  -159,   1180,   -667,   -584,   -359,   183,
+    1943,   -412,   -1747,  1659,   1961,   280,    294,    222,    2000,
+    2076,   829,    -43,    -880,   3353,   3615,   1279,   1746,   -1031,
+    1301,   3477,   -777,   2567,   1215,   -2344,  3556,   561,    -2166,
+    1119,   2377,   -391,   -1825,  -2359,  49,     1764,   391,    -291,
+    325,    1223,   1443,   -624,   -2828,  1381,   2438,   28,     -652,
+    -166,   581,    -2039,  -374,   -20,    -2459,  -1149,  1505,   2008,
+    -1798,  -3848,  -1796,  -2208,  -2224,  -878,   728,    -154,   -534,
+    1061,   538,    -1465,  73,     1147,   82,     -119,   3800,   4797,
+    -873,   784,    1458,   -148,   3180,   1319,   908,    4951,   584,
+    -57,    2394,   -967,   586,    405,    -1601,  3566,   -285,   -3949,
+    -1301,  -1953,  -1223,  -1831,  -3477,  -779,   -389,   -3169,  -1828,
+    -1496,  -1451,  -556,   -3327,  -209,   534,    -4908,  131,    -386,
+    -5232,  1373,   2129,   -1740,  -1957,  -1102,  76,     396,    -1426,
+    -179,   1357,   -3276,  -1420,  3819,   -44,    56,     2777,   -1202,
+    1908,   1410,   2031,   3495,   -2197,  -163,   1565,   239,    2803,
+    480,    -1636,  1180,   616,    1206,   1166,   -1579,  1572,   814,
+    -774,   2310,   740,    -2606,  1234,   -603,   -362,   1562,   -2134,
+    652,    -777,   -2353,  5464,   377,    -2490,  1012,   157,    680,
+    -1389,  -1898,  1135,   -1,     -1730,  1800,   -1466,  -1687,  -1469,
+    -3250,  -1081,  1381,   -81,    -204,   -26,    353,    1941,   174,
+    104,    2009,   1032,   -871,   3280,   3398,   -651,   -154,   3309,
+    1964,   448,    812,    -17,    887,    2405,   3295,   -54,    -2396,
+    1410,   1380,   -1156,  296,    -1706,  -1729,  401,    -970,   -878,
+    -723,   -2285,  1259,   1320,   -1960,  -1039,  -211,   -661,   -763,
+    -1599,  -43,    308,    -1841,  72,     -2075,  -3010,  -497,   506,
+    -377,   247,    1932,   -1788,  -2419,  257,    208,    -2176,  488,
+    2827,   -1720,  -1649,  -619,   520,    1103,   -1231,  -1327,  2162,
+    1535,   -383,   315,    -1488,  -235,   1761,   -27,    -232,   515,
+    127,    -2239,  654,    2871,   -379,   -1274,  2445,   874,    -2444,
+    514,    -206,   -1289,  1314,   1869,   1316,   1878,   -1454,  -982,
+    476,    359,    2084,   -708,   405,    -246,   -1071,  1757,   -866,
+    -2331,  783,    501,    -853,   896,    36,     -2468,  -1138,  1445,
+    -613,   -687,   1999,   -449,   -731,   1478,   384,    -45,    96,
+    1530,   1919,   186,    -94,    1347,   -329,   -348,   1631,   574,
+    1062,   735,    -1652,  675,    244,    1241,   1137,   -2469,  621,
+    45,     -612,   1308,   -2015,  -208,   2392,   -1646,  -67,    77,
+    -1558,  113,    1263,   -236,   -971,   -333,   -733,   -555,   2024,
+    -135,   -3817,  -398,   1696,   -1179,  -1473,  1175,   -166,   618,
+    1132,   -2504,  -575,   146,    -688,   1323,   150,    -2021,  15,
+    1673,   347,    -1535,  -106,   235,    -32,    1167,   -471,   -503,
+    -1260,  416,    -13,    -1082,  1036,   -790,   -1676,  487,    985,
+    77,     57,     -1175,  1146,   2023,   -1706,  -404,   3249,   -739,
+    -979,   3044,   -514,   -168,   2201,   -2863,  1009,   1833,   -2309,
+    1565,   476,    -1698,  1667,   -496,   -2193,  1686,   532,    336,
+    -1095,  -1655,  578,    -909,   -1263,  2569,   -2833,  -1808,  2860,
+    -822,   27,     1098,   -1371,  1585,   -284,   -1074,  2944,   -764,
+    -2871,  2484,   1179,   -1213,  -670,   -1226,  1112,   1837,   -299,
+    -388,   -51,    1,      992,    -723,   -361,   1723,   -1115,  -2012,
+    1261,   -9,     -127,   -510,   -1550,  1448,   957,    -1930,  171,
+    776,    -2104,  14,     764,    -599,   -745,   -438,   -371,   -659,
+    1075,   282,    -3116,  684,    3747,   22,     -2139,  816,    1413,
+    -333,   458,    906,    483,    -1084,  797,    1039,   -467,   -377,
+    1386,   -1182,  610,    1787,   -1354,  -2800,  2638,   424,    -2372,
+    1153,   -51,    -689,   290,    -2199,  818,    3755,   -2674,  -1689,
+    3497,   -507,   -1978,  1729,   1413,   215,    -76,    53,     759,
+    371,    -1529,  1005,   -770,   -685,   1754,   -908,   -653,   1047,
+    -1066,  -784,   -199,   -526,   86,     -1750,  -916,   1839,   580,
+    -1884,  319,    226,    -977,   212,    202,    -741,   -1013,  2057,
+    69,     -2961,  974,    1964,   -512,   -224,   1554,   -79,    -1142,
+    1853,   -71,    1009,   1174,   -718,   2040,   -158,   -1508,  1042,
+    0,      -1219,  1212,   448,    -208,   -47,    -779,   -867,   1924,
+    -254,   -1085,  -221,   -1283,  1543,   -584,   -951,   225,    -1089,
+    -464,   -853,   -615,   1576,   -2313,  -1214,  950,    -2548,  -314,
+    1201,   -1527,  952,    764,    -1915,  528,    169,    -1676,  1742,
+    425,    -2346,  932,    290,    109,    492,    -379,   932,    70,
+    582,    135,    769,    1665,   -1751,  576,    1013,   366,    2339,
+    71,     637,    1500,   576,    111,    494,    765,    1170,   1421,
+    -5,     -892,   2054,   -640,   160,    1426,   -651,   348,    -841,
+    -558,   1563,   277,    -408,   -1468,  482,    -1538,  -2255,  968,
+    -1307,  -454,   1306,   -3085,  -1680,  2624,   -2191,  -1719,  1891,
+    -3826,  -1441,  2736,   -3694,  -266,   1897,   -4468,  841,    2828,
+    -4060,  -318,   2305,   -1662,  528,    3056,   -2429,  -156,   2045,
+    -753,   475,    419,    -597,   1100,   1845,   504,    1067,   -402,
+    -824,   1807,   1192,   459,    200,    1728,   50,     -497,   678,
+    -355,   938,    1239,   -1223,  360,    1251,   -95,    981,    1029,
+    -1940,  260,    1627,   -2387,  3426,   519,    -3141,  1822,   -506,
+    -1471,  1101,   -2137,  1069,   885,    -2618,  1673,   -463,   -1558,
+    1439,   -386,   -1923,  1538,   -1313,  -1735,  540,    -1433,  -915,
+    494,    -839,   -1527,  -1143,  480,    -1081,  27,     1732,   -1285,
+    -1833,  1952,   -667,   -1626,  1819,   -1293,  -1323,  2139,   -376,
+    -1392,  1277,   -1172,  -240,   2907,   -1875,  -238,   2573,   -1068,
+    -471,   2065,   -686,   -1315,  2575,   233,    -1005,  1135,   706,
+    534,    278,    -182,   1091,   -21,    -222,   1413,   -371,   -54,
+    1108,   -103,   382,    -70,    787,    894,    -108,   1308,   1113,
+    -1412,  574,    1140,   -2032,  500,    569,    -1251,  951,    -50,
+    -1398,  772,    -474,   -1536,  1297,   251,    -2321,  109,    -703,
+    -425,   40,     -1354,  -773,   -225,   -1743,  -1839,  1244,   261,
+    -3082,  -424,   1162,   -937,   123,    -322,   -407,   -561,   -331,
+    1369,   -1142,  -1050,  1024,   1116,   -213,   -752,   1521,   -383,
+    -415,   1011,   947,    -713,   743,    1945,   -237,   881,    600,
+    -757,   885,    -835,   756,    2454,   -1985,  699,    1572,   -1652,
+    673,    232,    -42,    1975,   -736,   -270,   1660,   -704,   -96,
+    1264,   -428,   278,    774,    -954,   -1325,  756,    1275,   -594,
+    -353,   204,    -1130,  -782,   -432,   -979,   268,    378,    20,
+    -870,   405,    -357,   -1661,  637,    473,    293,    -314,   -895,
+    3,      -175,   -1016,  -643,   204,    -588,   -1007,  -131,   401,
+    -849,   -476,   271,    320,    -198,   533,    -25,    -1994,  1421,
+    525,    -1611,  1261,   507,    -488,   1093,   361,    -1814,  2230,
+    312,    -196,   3242,   -803,   -962,   1714,   -1479,  1426,   1612,
+    -1953,  1376,   -581,   -669,   1370,   -1251,  426,    1274,   -470,
+    1757,   807,    -589,   1275,   126,    -871,   1025,   -1331,  287,
+    1258,   -1813,  146,    -839,   -1471,  828,    -402,   -281,   1704,
+    -1341,  -231,   939,    -1035,  -472,   -197,   -764,   -380,   -816,
+    -266,   382,    -497,   -1708,  -591,   1119,   -1941,  178,    969,
+    -1656,  685,    1004,   -1114,  -127,   -1473,  -678,   1610,   -1253,
+    277,    1807,   -1642,  -461,   2033,   -1449,  392,    98,     -157,
+    1525,   -860,   2455,   413,    -2159,  2457,   475,    -374,   1532,
+    -981,   843,    973,    324,    1168,   225,    -407,   1487,   681,
+    -680,   1098,   117,    245,    1238,   -223,   1076,   -428,   -466,
+    2593,   -663,   -1225,  1303,   -933,   -561,   1190,   -1071,  -1229,
+    406,    -284,   -13,    198,    -1494,  -637,   352,    -1960,  420,
+    49,     -1472,  -761,   -234,   -2213,  -1750,  -521,   -1554,  -813,
+    662,    -633,   -1388,  -15,    -947,   -391,   -152,   -894,   631,
+    -461,   -885,   633,    -51,    -1063,  218,    1149,   -61,    -274,
+    988,    -140,   7,      1774,   1558,   -623,   755,    1352,   -511,
+    1106,   744,    17,     2640,   -91,    697,    1547,   -1757,  1832,
+    1859,   -206,   1505,   575,    -444,   556,    250,    1786,   792,
+    -125,   -266,   407,    501,    798,    -536,   -1214,  58,     6,
+    354,    -685,   613,    99,     -2022,  -116,   -236,   -182,   263,
+    -824,   -1187,  -142,   -138,   -1228,  -1008,  786,    -1421,  -1127,
+    -269,   -2278,  841,    222,    -2423,  678,    -1153,  -2082,  574,
+    -570,   -729,   180,    -777,   212,    270,    -274,   1077,   -493,
+    118,    804,    -1260,  349,    799,    545,    481,    971,    1099,
+    1146,   -273,   34,     1728,   1128,   411,    758,    308,    -808,
+    950,    1490,   209,    -265,   1154,   -11,    -460,   2644,   -122,
+    -728,   2033,   -1100,  -305,   1774,   -208,   -1567,  -57,    -140,
+    -670,   -454,   -1390,  -80,    978,    -438,   -731,   -684,   344,
+    -458,   -199,   -126,   -1663,  -883,   642,    -1517,  -1144,  -375,
+    -422,   -452,   -1815,  -791,   763,    -1502,  -205,   684,    -1641,
+    448,    1399,   -2160,  804,    1088,   -2214,  1030,   1585,   -1093,
+    -11,    1718,   -360,   -81,    1294,   398,    218,    1225,   644,
+    505,    2090,   -385,   526,    2111,   -303,   -316,   1550,   1323,
+    -459,   881,    1874,   -1256,  1429,   2485,   -1003,  -552,   14,
+    432,    952,    471,    -633,   408,    -358,   140,    554,    -1260,
+    -404,   245,    -2572,  954,    1005,   -1621,  -82,    -175,   -957,
+    112,    106,    -1117,  -819,   -62,    -785,   71,     93,     -1296,
+    -1680,  242,    -956,   -2696,  302,    -204,   -1404,  254,    -558,
+    -201,   -630,   16,     -436,   -1647,  1649,   -1096,  -1267,  2273,
+    -1270,  20,     1749,   -2509,  780,    942,    -1859,  2762,   304,
+    -300,   2617,   -947,   861,    2601,   -1153,  754,    1629,   -681,
+    686,    1443,   -235,   1900,   5,      -565,   1559,   285,    -170,
+    757,    480,    547,    752,    -427,   50,     839,    -95,    -791,
+    -1698,  -291,   -62,    -1730,  524,    1008,   -2176,  -369,   165,
+    -749,   -972,   -287,   889,    -1218,  -1712,  833,    -855,   -995,
+    -14,    -793,   -1815,  605,    -607,   -1890,  769,    -781,   230,
+    1155,   -2000,  876,    1835,   -1617,  9,      1058,   -1232,  859,
+    1486,   -1301,  1595,   501,    -951,   2935,   -921,   -634,   2826,
+    -793,   655,    2660,   -232,   235,    1879,   481,    -51,    804,
+    987,    -360,   -331,   2099,   -302,   -149,   1966,   -1233,  -12,
+    1330,   -2265,  1256,   -116,   -1394,  2937,   -995,   -1572,  2964,
+    -2257,  -2587,  1820,   -2132,  -1609,  778,    -1596,  -486,   560,
+    -1749,  274,    -706,   -1714,  1304,   -360,   -2657,  1833,   -750,
+    -1729,  433,    -1461,  -794,   -1545,  -892,   385,    -891,   -374,
+    1261,   -589,   235,    815,    -773,   -669,   636,    -471,   136,
+    871,    -392,   782,    677,    -472,   1130,   1029,   -1262,  1070,
+    2171,   575,    675,    600,    2104,   1077,   -182,   2621,   -604,
+    -30,    3302,   -1331,  599,    742,    291,    1329,   -551,   1043,
+    1729,   -1754,  1220,   1113,   -2174,  1281,   743,    -2027,  851,
+    -205,   -1576,  214,    -1629,  -605,   -394,   -1508,  -254,   -63,
+    -489,   -847,   -26,    -997,   -1065,  -120,   -376,   -1283,  -1393,
+    83,     -212,   -1610,  419,    -1120,  -590,   395,    -1210,  -21,
+    -273,   -622,   899,    -196,   -1059,  1130,   616,    -529,   -166,
+    794,    22,     -216,   862,    664,    -390,   980,    228,    789,
+    182,    402,    2149,   -1133,  799,    2637,   -799,   176,    1306,
+    905,    -93,    677,    338,    121,    483,    297,    339,    347,
+    249,    731,    40,     66,     112,    -889,   -128,   582,    -1191,
+    -67,    -1364,  -233,   488,    -1734,  -634,   1517,   -1657,  -1015,
+    594,    -1422,  1396,   -1357,  -1617,  1254,   -1596,  -941,   789,
+    -1860,  -77,    245,    -327,   569,    -723,   104,    905,    -543,
+    -918,   1387,   -42,    -440,   619,    68,     45,     1364,   -880,
+    19,     1491,   -561,   1174,   1403,   -1411,  1351,   1222,   -612,
+    864,    877,    -658,   382,    864,    -552,   1286,   309,    -105,
+    1083,   -170,   -289,   1049,   -248,   -537,   625,    -48,    337,
+    -385,   532,    -315,   -1398,  588,    -628,   -1192,  649,    -806,
+    -170,   541,    -2267,  1052,   274,    -1970,  833,    253,    -1345,
+    -290,   -120,   -959,   -94,    -189,   -1397,  -136,   -155,   -654,
+    207,    -706,   617,    415,    -1962,  1169,   670,    -1132,  319,
+    297,    -589,   100,    510,    -620,   610,    -153,   -15,    1327,
+    -99,    229,    281,    169,    1015,   -106,   1197,   577,    -698,
+    577,    931,    -964,   1605,   505,    -1713,  2369,   115,    -1585,
+    1839,   664,    -1411,  867,    620,    329,    491,    -1119,  420,
+    266,    -1708,  499,    -69,    -1037,  795,    -321,   -959,   32,
+    235,    -1748,  295,    -249,   -230,   485,    -1185,  -97,    489,
+    -2036,  711,    405,    -2800,  593,    434,    -1038,  536,    347,
+    -570,   705,    -806,   -290,   818,    -999,   53,     1585,   -756,
+    -657,   1180,   115,    -364,   217,    -226,   1033,   347,    -20,
+    611,    658,    590,    -128,   -451,   1676,   -660,   -21,    805,
+    -880,   1481,   412,    -1534,  1522,   221,    -132,   662,    -407,
+    613,    1132,   -551,   -187,   1184,   -577,   -444,   953,    -1034,
+    -472,   461,    -865,   -99,    637,    -572,   300,    450,    -591,
+    137,    404,    -972,   306,    -524,   -1167,  433,    124,    -1326,
+    -368,   -305,   -917,   452,    -626,   -695,   656,    258,    -1401,
+    270,    446,    -1045,  636,    -357,   -1072,  913,    512,    -1732,
+    489,    952,    -747,   58,     673,    -453,   1125,   -488,   46,
+    1723,   -1244,  417,    1803,   -1215,  623,    659,    -560,   676,
+    -9,     92,     701,    1100,   -623,   142,    283,    -512,   547,
+    576,    -525,   -155,   1143,   -1286,  -329,   1959,   -1302,  -459,
+    1188,   -1199,  1020,   -118,   -1303,  956,    -905,   -647,   595,
+    -356,   -1354,  -74,    750,    -791,   -335,   56,     -862,   -36,
+    276,    -279,   46,     -485,   -181,   196,    -584,   -238,   259,
+    -314,   -77,    383,    509,    -386,   -180,   859,    -542,   955,
+    372,    -362,   1458,   113,    -106,   1495,   -534,   63,     1295,
+    -505,   846,    983,    -1097,  1764,   320,    -185,   1061,   -525,
+    115,    217,    -328,   326,    312,    374,    179,    -683,   485,
+    -1286,  147,    -583,   -979,   888,    -504,   -1235,  715,    -1050,
+    -1111,  848,    -828,   -1043,  -115,   -327,   22,     -451,   -1008,
+    98,     -262,   -545,   -363,   -48,    -257,   -731,   878,    96,
+    -1186,  426,    359,    -1101,  1074,   -267,   521,    -375,   -166,
+    1398,   -994,   780,    550,    124,    -298,   581,    236,    305,
+    -111,   396,    741,    -10,    662,    155,    271,    563,    65,
+    -318,   812,    -483,   843,    75,     -714,   1152,   -26,    -190,
+    -97,    533,    -111,   -564,   724,    -24,    -820,   835,    -473,
+    -632,   154,    -104,   -932,   919,    -606,   -619,   496,    -310,
+    -271,   -360,   120,    -630,   126,    65,     -931,   548,    -207,
+    -455,   410,    -282,   -931,   944,    -354,   69,     412,    -661,
+    1068,   -969,   -443,   1894,   -1281,  -442,   2003,   -1640,  713,
+    852,    -1344,  1338,   -457,   243,    498,    -697,   -129,   993,
+    -388,   -76,    1039,   -768,   492,    -104,   -58,    951,    -854,
+    181,    1093,   -1111,  491,    544,    -1061,  118,    586,    -477,
+    -411,   392,    233,    91,     -908,   532,    218,    -1176,  670,
+    -74,    -674,   696,    -801,   194,    592,    -1790,  762,    -564,
+    -791,   595,    -145,   -727,   228,    434,    -246,   -232,   -169,
+    281,    -324,   289,    -120,   -270,   -49,    282,    250,    -56,
+    -405,   507,    27,     -1060,  1329,   -203,   -204,   1677,   -767,
+    -313,   1272,   -968,   717,    183,    -1652,  2157,   -75,    -1906,
+    2590,   -428,   -1614,  2564,   -1511,  -240,   1421,   -1911,  1420,
+    396,    -1397,  1691,   -694,   -1500,  1942,   -823,   -784,   841,
+    -635,   759,    -447,   351,    44,     -946,   227,    441,    -564,
+    155,    -719,   182,    509,    -320,   -300,   205,    -662,   726,
+    469,    -1240,  191,    664,    -269,   -152,   -18,    214,    -149,
+    -257,   347,    76,     -79,    -384,   874,    -387,   -269,   892,
+    -783,   537,    46,     27,     251,    -332,   133,    377,    -522,
+    232,    626,    -362,   -499,   1112,   -342,   -522,   362,    -187,
+    547,    -384,   -155,   517,    -551,   227,    651,    -825,   -88,
+    579,    -758,   -40,    456,    -774,   542,    -164,   -482,   968,
+    -1000,  -394,   1094,   -885,   431,    74,     -348,   403,    -959,
+    831,    -465,   -330,   762,    -717,   -645,   1342,   -499,   -416,
+    944,    -417,   -438,   737,    -368,   -42,    740,    -1234,  689,
+    29,     -106,   619,    -824,   -10,    1047,   -824,   146,    -59,
+    210,    163,    -43,    522,    -352,   213,    460,    -1049,  599,
+    308,    -843,   632,    223,    -504,   296,    530,    -931,   751,
+    -176,   -524,   379,    236,    -626,   66,     662,    -575,   191,
+    -175,   -619,   660,    -424,   -217,   704,    -498,   200,    62,
+    -543,   280,    91,     -378,   54,     168,    -554,   670,    -215,
+    -1097,  1805,   -1015,  -617,   1642,   -1560,  727,    61,     7,
+    -48,    -659,   1308,   -752,   -613,   914,    160,    -469,   164,
+    -167,   274,    326,    -667,   497,    333,    -757,   1252,   -481,
+    -1257,  2019,   -949,   -719,   1676,   -1078,  250,    323,    -1100,
+    1550,   145,    -1697,  972,    522,    -966,   374,    -365,   846,
+    -276,   -756,   629,    -278,   302,    -151,   -243,   -363,   841,
+    -7,     -1092,  476,    45,     201,    -378,   -456,   1113,   -926,
+    97,     178,    -240,   326,    -597,   472,    -10,    -190,   394,
+    -501,   -259,   307,    133,    240,    -433,   -192,   472,    -190,
+    12,     398,    -191,   -605,   1295,   -576,   -154,   474,    -661,
+    866,    -968,   172,    887,    -736,   36,     259,    -201,   265,
+    460,    -859,   622,    102,    -690,   776,    -80,    -745,   919,
+    140,    -750,   224,    134,    -236,   -196,   456,    409,    -1069,
+    600,    239,    -306,   -383,   541,    -213,   -323,   -121,   700,
+    -735,   179,    222,    -613,   653,    -711,   -81,    592,    -694,
+    117,    703,    -772,   -264,   644,    -117,   -422,   276,    64,
+    -355,   -430,   800,    -74,    -619,   1207,   -1057,  4,      960,
+    -1219,  977,    -78,    -1186,  1536,   267,    -1388,  1144,   -90,
+    -1052,  1889,   -1255,  -387,   1815,   -1763,  1037,   421,    -1003,
+    767,    -24,    -277,   -54,    759,    -285,   -1015,  1422,   -581,
+    -121,   547,    -687,   288,    440,    -626,   -623,   1261,   -248,
+    -1133,  1204,   -714,   382,    219,    -851,   240,    -161,   672,
+    -261,   -855,   1043,   -599,   111,    -362,   225,    641,    -913,
+    -122,   1075,   -1165,  432,    131,    -803,   978,    33,     -1291,
+    992,    224,    -1054,  789,    -121,   -215,   262,    -11,    89,
+    -174,   365,    -240,   114,    406,    -813,   291,    233,    158,
+    -377,   194,    216,    -477,   635,    -228,   -512,   599,    23,
+    -273,   71,     258,    10,     -155,   -198,   354,    61,     -749,
+    768,    -19,    -709,   596,    97,     -276,   164,    69,     -144,
+    -20,    529,    -897,   188,    480,    -703,   836,    -874,   259,
+    917,    -1044,  -7,     566,    -97,    -439,   256,    -466,   998,
+    -360,   -1134,  1619,   -762,   -752,   1446,   -707,   -177,   652,
+    -899,   579,    253,    -410,   146,    -262,   275,    353,    -610,
+    52,     671,    -862,   419,    -140,   273,    247,    -1062,  1005,
+    -175,   -497,   772,    -431,   -101,   450,    -598,   266,    428,
+    -842,   477,    -11,    -554,   642,    17,     -787,   544,    445,
+    -625,   -205,   796,    -222,   -733,   764,    -572,   423,    166,
+    -994,   931,    -228,   -303,   362,    -214,   104,    448,    -1091,
+    722,    570,    -1311,  773,    259,    -648,   477,    193,    -682,
+    302,    459,    -464,   -383,   1120,   -561,   -564,   1083,   -372,
+    -354,   864,    -586,   -200,   502,    -331,   27,     446,    -657,
+    281,    571,    -888,   502,    251,    -423,   116,    277,    -263,
+    118,    -170,   168,    367,    -723,   202,    438,    -793,   451,
+    -30,    -292,   202,    38,     -188,   -66,    221,    -90,    -105,
+    7,      346,    -578,   337,    247,    -371,   -14,    22,     36,
+    151,    -322,   -244,   692,    -556,   -5,     550,    -560,   200,
+    161,    -347,   191,    258,    -520,   441,    -212,   -215,   584,
+    -428,   -251,   213,    90,     -187,   109,    138,    -211,   -17,
+    191,    111,    -259,   161,    -141,   232,    -175,   0,      154,
+    -369,   539,    -171,   -438,   484,    43,     -375,   -37,    249,
+    196,    -328,   -106,   541,    -531,   103,    240,    -191,   186,
+    -363,   40,     585,    -573,   258,    170,    -593,   515,    -261,
+    -86,    407,    -339,   164,    -214,   -34,    464,    -377,   -206,
+    336,    -230,   239,    -85,    -69,    322,    -503,   322,    142,
+    -748,   867,    -160,   -753,   836,    -249,   -362,   750,    -374,
+    -222,   448,    -82,    -246,   399,    13,     -429,   441,    -47,
+    -127,   -29,    337,    -502,   318,    132,    -457,   498,    -145,
+    -91,    98,     208,    -179,   54,     62,     -260,   237,    96,
+    -161,   32,     -150,   93,     21,     -31,    74,     75,     -322,
+    164,    168,    -191,   119,    -121,   -66,    -195,   296,    -128,
+    -251,   381,    -56,    -338,   281,    -29,    -472,   664,    -301,
+    -275,   423,    -285,   -77,    258,    -82,    -139,   160,    -54,
+    -26,    27,     75,     -49,    -196,   305,    -131,   -187,   262,
+    -37,    -206,   65,     269,    -240,   -144,   261,    54,     -338,
+    355,    3,      -503,   535,    -253,   -210,   433,    -290,   -33,
+    381,    -546,   173,    252,    -364,   271,    -329,   166,    266,
+    -564,   507,    -32,    -648,   861,    -400,   -357,   819,    -519,
+    -74,    392,    -423,   426,    -306,   -93,    691,    -991,   537,
+    467,    -992,   614,    426,    -823,   491,    182,    -371,   174,
+    84,     -64,    98,     -96,    23,     182,    -69,    -211,   226,
+    18,     -134,   334,    -514,   352,    378,    -623,   363,    266,
+    -592,   493,    -46,    -369,   594,    -440,   -10,    295,    -368,
+    326,    -192,   -140,   306,    -305,   140,    198,    -396,   202,
+    154,    -341,   208,    -8,     -169,   -76,    106,    20,     -347,
+    233,    30,     -193,   117,    -9,     -165,   182,    -4,     -195,
+    96,     131,    -188,   -106,   166,    -71,    -99,    57,     4,
+    -31,    -131,   101,    63,     -199,   225,    -25,    -281,   342,
+    -247,   -170,   516,    -289,   -263,   422,    -158,   -148,   363,
+    -192,   -138,   122,    62,     -105,   7,      194,    -53,    -224,
+    83,     173,    -182,   20,     178,    -274,   182,    74,     -109,
+    -5,     319,    -303,   -72,    428,    -371,   50,     271,    -204,
+    17,     161,    -256,   169,    93,     -169,   94,     -89,    139,
+    80,     -199,   325,    -67,    -83,    202,    -154,   16,     202,
+    -325,   162,    61,     -93,    201,    -278,   236,    108,    -477,
+    594,    -145,   -370,   647,    -261,   -356,   669,    -369,   -181,
+    420,    -266,   -154,   159,    -25,    53,     -40,    -22,    68,
+    -203,   144,    -2,     -173,   88,     -3,     -62,    2,      75,
+    55,     -95,    -130,   219,    -142,   -191,   164,    -170,   44,
+    0,      -246,   249,    -27,    -413,   461,    27,     -490,   292,
+    19,     -145,   13,     99,     91,     -466,   209,    295,    -773,
+    465,    210,    -680,   410,    163,    -358,   399,    -201,   87,
+    23,     -212,   270,    -230,   86,     159,    -353,   381,    -73,
+    -456,   726,    -353,   -357,   754,    -367,   -344,   657,    -59,
+    -417,   432,    35,     -309,   153,    97,     -69,    89,     -101,
+    63,     107,    -127,   106,    112,    -26,    -236,   376,    43,
+    -479,   544,    -57,    -407,   447,    -148,   -103,   195,    -198,
+    80,     156,    -228,   35,     145,    -77,    -55,    130,    -33,
+    -190,   123,    41,     -170,   74,     114,    -241,   67,     192,
+    -195,   -76,    186,    -136,   -133,   213,    -105,   -110,   144,
+    -51,    -126,   154,    -59,    -124,   147,    -49,    -132,   82,
+    26,     -130,   63,     68,     -211,   97,     131,    -224,   59,
+    184,    -250,   59,     205,    -225,   -67,    163,    -135,   -24,
+    74,     -22,    -4,     -81,    21,     71,     -137,   71,     47,
+    -120,   71,     34,     -65,    138,    -6,     -116,   112,    -47,
+    -39,    20,     -75,    64,     -7,     2,      35,     52,     -61,
+    -29,    81,     -61,    -30,    195,    -91,    -136,   261,    -11,
+    -186,   162,    -86,    -35,    152,    -106,   -32,    126,    -4,
+    49,     33,     -9,     -11,    46,     111,    -132,   -3,     204,
+    -175,   -10,    281,    -146,   -94,    226,    -126,   -36,    58,
+    -14,    61,     -172,   48,     193,    -221,   83,     149,    -279,
+    195,    130,    -357,   226,    102,    -260,   191,    16,     -223,
+    124,    14,     -144,   90,     -31,    -81,    -66,    54,     103,
+    -181,   29,     174,    -281,   92,     81,     -226,   139,    -133,
+    -41,    167,    -147,   44,     27,     -132,   107,    -34,    -122,
+    105,    -54,    17,     52,     -131,   138,    33,     -206,   158,
+    43,     -80,    24,     10,     -27,    33,     43,     -71,    15,
+    71,     -42,    14,     18,     0,      -3,     -14,    -14,    58,
+    46,     -99,    122,    105,    -202,   125,    119,    -238,   112,
+    133,    -242,   113,    129,    -301,   52,     161,    -177,   82,
+    73,     -139,   46,     122,    -119,   22,     155,    -230,   23,
+    242,    -211,   -12,    182,    -184,   -57,    190,    -34,    -101,
+    58,     -20,    6,      103,    -61,    -78,    12,     18,     12,
+    86,     -71,    -27,    43,     -24,    8,      39,     -109,   21,
+    -4,     -44,    66,     13,     -59,    61,     -39,    35,     113,
+    -179,   19,     171,    -158,   14,     112,    -133,   26,     9,
+    -43,    -9,     6,      41,     -77,    22,     80,     -61,    -63,
+    65,     -32,    -32,    125,    -105,   -11,    114,    -120,   42,
+    42,     -92,    45,     -56,    -25,    131,    -83,    -24,    97,
+    -51,    -5,     67,     -69,    7,      41,     -27,    8,      3,
+    -10,    8,      -3,     -87,    -28,    122,    -33,    -58,    124,
+    -53,    -50,    67,     -115,   -17,    111,    -112,   -30,    101,
+    -24,    -13,    41,     3,      45,     -13,    -34,    23,     23,
+    -19,    13,     -49,    -49,    68,     -68,    -32,    91,     -58,
+    -18,    73,     -19,    -27,    17,     -33,    -35,    99,     -38,
+    -99,    78,     -31,    -62,    95,     -71,    -124,   184,    -15,
+    -146,   160,    -27,    -109,   140,    -25,    -63,    84,     -34,
+    -18,    58,     -68,    -16,    22,     -87,    86,     23,     -130,
+    61,     62,     -132,   51,     168,    -139,   35,     133,    -121,
+    50,     102,    -120,   40,     126,    -87,    -40,    119,    -14,
+    -59,    78,     11,     -68,    41,     24,     -25,    55,     -2,
+    15,     21,     -73,    56,     88,     -74,    -41,    4,      -10,
+    -4,     5,      7,      -39,    -3,     -4,     -39,    94,     52,
+    -135,   42,     90,     -86,    12,     21,     -55,    -70,    -37,
+    55,     -63,    -35,    50,     -100,   21,     84,     -151,   24,
+    87,     -94,    51,     2,      -58,    104,    -61,    -70,    60,
+    -25,    -42,    -31,    55,     35,     -129,   47,     69,     -65,
+    77,     2,      -60,    110,    -32,    -69,    84,     -54,    -26,
+    98,     -28,    -7,     49,     -49,    -19,    119,    -11,    -157,
+    20,     106,    29,     -8,     -38,    -30,    72,     30,     -3,
+    1,      -32,    -11,    -9,     52,     46,     -144,   -38,    86,
+    -31,    -9,     -42,    -75,    142,    34,     -64,    79,     -109,
+    -55,    195,    -69,    -80,    48,     -49,    62,     25,     -111,
+    -42,    52,     19,     -41,    1,      -16,    -33,    44,     30,
+    -21,    17,     -2,     -30,    111,    34,     -111,   83,     55,
+    -119,   66,     62,     -89,    63,     -39,    -143,   168,    21,
+    -158,   158,    32,     -132,   134,    -3,     -77,    88,     -45,
+    -18,    117,    -51,    -71,    10,     30,     35,     -27,    -63,
+    13,     34,     23,     -23,    19,     -4,     -92,    34,     74,
+    -69,    -15,    20,     -36,    56,     -36,    -96,    69,     -34,
+    -122,   32,     31,     -51,    -3,     -21,    4,      43,     -44,
+    6,      81,     -39,    -35,    26,     -38,    -24,    29,     -16,
+    -47,    -6,     19,     -7,     -9,     41,     32,     13,     -2,
+    -21,    3,      24,     49,     -3,     -66,    14,     95,     -7,
+    -52,    80,     68,     -72,    -14,    39,     2,      24,     -6,
+    -53,    86,     21,     -78,    67,     28,     -34,    16,     -23,
+    -1,     70,     -3,     -58,    45,     33,     -94,    -34,    62,
+    41,     -11,    -27,    27,     46,     14,     -33,    -12,    44,
+    -16,    -59,    6,      45,     -3,     -42,    2,      13,     19,
+    -1,     -71,    3,      42,     -36,    6,      17,     26,     5,
+    -46,    6,      -68,    -75,    86,     -20,    -90,    80,     4,
+    -86,    5,      2,      -33,    -15,    -2,     -8,     -18,    15,
+    -7,     -25,    27,     -28,    -88,    39,     -2,     -85,    58,
+    40,     -45,    3,      17,     0,      11,     -4,     -3,     84,
+    22,     -113,   8,      94,     10,     9,      28,     6,      -3,
+    5,      -2,     23,     23,     -1,     -40,    20,     48,     -40,
+    -21,    72,     7,      -40,    -1,     27,     16,     30,     31,
+    -16,    11,     9,      -71,    -7,     62,     21,     -61,    -19,
+    78,     -2,     -22,    67,     -42,    -12,    75,     -79,    47,
+    86,     -124,   -42,    21,     4,      23,     -32,    -7,     19,
+    1,      -13,    -46,    2,      32,     -43,    -7,     86,     -16,
+    -22,    46,     -61,    -35,    11,     -64,    -38,    17,     -12,
+    -27,    20,     41,     6,      -58,    -61,    58,     -51,    -77,
+    36,     -25,    19,     93,     -76,    1,      72,     -92,    15,
+    40,     -56,    65,     13,     -29,    82,     -9,     -21,    24,
+    -83,    -5,     4,      -63,    77,     80,     -58,    -6,     -19,
+    -43,    100,    5,      -36,    63,     33,     -26,    -48,    26,
+    -18,    -75,    34,     24,     -45,    -1,     6,      -35,    -24,
+    -23,    -22,    47,     -15,    -46,    31,     -40,    -41,    74,
+    -32,    -73,    59,     -51,    -26,    143,    -29,    -42,    93,
+    -44,    -21,    56,     -7,     55,     51,     -61,    74,     111,
+    -71,    35,     124,    -123,   -3,     62,     -79,    100,    49,
+    -122,   143,    79,     -137,   72,     30,     -82,    75,     -10,
+    -48,    35,     -23,    -25,    34,     0,      -54,    -6,     34,
+    -46,    -59,    -7,     -72,    -6,     70,     -41,    -39,    23,
+    -33,    11,     104,    -44,    -30,    54,     -69,    -20,    62,
+    -75,    1,      45,     -69,    1,      40,     -59,    -15,    18,
+    -16,    38,     -1,     -52,    8,      14,     -32,    11,     -15,
+    -58,    18,     -22,    -44,    69,     40,     -50,    -21,    1,
+    -35,    -3,     -5,     -20,    40,     36,     -41,    -36,    -43,
+    -11,    48,     -34,    -40,    51,     -10,    -9,     30,     10,
+    12,     51,     51,     -8,     -16,    32,     -6,     31,     24,
+    -38,    43,     18,     -15,    53,     -10,    -55,    9,      8,
+    -28,    21,     10,     -26,    21,     10,     -9,     5,      -29,
+    -13,    38,     -1,     -11,    49,     0,      -41,    10,     23,
+    -25,    -35,    -2,     -32,    -10,    58,     -6,     -18,    16,
+    -9,     4,      11,     17,     21,     21,     12,     -2,     49,
+    -16,    -128,   21,     75,     -32,    22,     34,     -59,    48,
+    75,     -69,    -11,    -2,     -65,    39,     57,     -54,    -79,
+    -11,    -20,    -13,    38,     4,      -9,     -22,    -22,    33,
+    -7,     -52,    10,     -10,    -19,    54,     47,     -21,    -35,
+    -6,     -4,     11,     8,      -28,    1,      8,      -4,     30,
+    1,      -22,    26,     -7,     -24,    56,     25,     -45,    13,
+    24,     -32,    13,     22,     -46,    -2,     15,     -39,    28,
+    32,     -69,    0,      27,     -69,    0,      39,     -40,    28,
+    55,     -27,    -13,    0,      -14,    37,     25,     -25,    34,
+    -3,     -69,    26,     39,     -41,    -6,     29,     -7,     5,
+    66,     41,     -27,    -17,    6,      -14,    -21,    0,      29,
+    -9,     -26,    32,     -5,     -34,    60,     15,     -60,    20,
+    13,     11,     43,     -48,    -15,    88,     -13,    -55,    26,
+    -32,    -46,    35,     14,     -37,    -11,    12,     -20,    11,
+    9,      -64,    -16,    17,     5,      38,     7,      -30,    -9,
+    -49,    -11,    52,     -15,    -38,    -27,    -12,    36,     53,
+    1,      -37,    -17,    -12,    0,      31,     1,      13,     40,
+    -15,    2,      47,     -15,    -17,    28,     -2,     -4,     25,
+    -6,     -12,    2,      -17,    -9,     5,      -15,    17,     21,
+    -28,    0,      15,     -43,    -63,    -6,     -14,    -8,     37,
+    -34,    -40,    30,     -12,    -14,    37,     -13,    -16,    26,
+    -15,    -2,     13,     -37,    -13,    32,     13,     -8,     -2,
+    -12,    -8,     9,      9,      -3,     4,      13,     34,     -2,
+    -22,    40,     19,     29,     25,     -48,    -17,    23,     17,
+    7,      3,      0,      12,     37,     -1,     -25,    30,     41,
+    -7,     7,      29,     -31,    -31,    -23,    -27,    5,      2,
+    -18,    -2,     22,     9,      -6,     5,      -7,     -24,    9,
+    0,      -28,    19,     61,     -11,    -45,    21,     -28,    -65,
+    28,     33,     -44,    -27,    -6,     -26,    -8,     4,      5,
+    9,      -10,    -46,    -20,    20,     -7,     -7,     -33,    -26,
+    50,     9,      -65,    -22,    -3,     -20,    15,     21,     20,
+    24,     -16,    -27,    -13,    14,     21,     -38,    -48,    9,
+    35,     28,     21,     3,      -31,    -8,     57,     32,     -35,
+    -22,    20,     14,     12,     28,     39,     0,      -18,    44,
+    -2,     -17,    53,     0,      -27,    33,     43,     5,      -10,
+    25,     47,     -3,     -4,     36,     15,     -12,    -3,     29,
+    41,     23,     23,     -8,     -32,    15,     37,     0,      3,
+    22,     31,     1,      -20,    27,     2,      -50,    0,      33,
+    16,     -16,    -17,    18,     -26,    -34,    31,     -27,    -84,
+    -33,    4,      -5,     -22,    -17,    -28,    -66,    -24,    8,
+    -16,    -25,    -51,    -13,    45,     -11,    -49,    -26,    -49,
+    -38,    21,     10,     -52,    -58,    -19,    -4,     9,      -31,
+    -29,    55,     2,      -45,    29,     10,     -22,    49,     33,
+    -27,    -19,    -5,     30,     47,     11,     -11,    -2,     8,
+    5,      17,     8,      3,      57,     63,     28,     24,     11,
+    2,      14,     22,     7,      7,      2,      23,     33,     -2,
+    -8,     14,     7,      20,     57,     32,     -5,     12,     23,
+    10,     17,     26,     -18,    -72,    -6,     74,     61,     13,
+    -17,    -21,    -7,     29,     45,     5,      -52,    -49,    1,
+    10,     35,     40,     -46,    -66,    7,      31,     -27,    -44,
+    -12,    -41,    -22,    32,     -12,    -32,    -3,     -17,    -22,
+    -22,    -31,    -30,    -23,    -13,    3,      0,      -21,    -19,
+    -7,     -17,    -9,     18,     -40,    -64,    1,      4,      -4,
+    8,      -17,    -28,    -1,     9,      -7,     -9,     27,     6,
+    -63,    -32,    52,     25,     -46,    -23,    -6,     -11,    35,
+    29,     -50,    -44,    17,     -6,     -12,    53,     28,     -17,
+    -9,     28,     34,     -20,    -18,    22,     43,     28,     -6,
+    8,      14,     19,     28,     14,     27,     26,     12,     76,
+    66,     -18,    -2,     18,     -12,    -1,     -2,     -1,     51,
+    30,     -18,    5,      14,     -12,    2,      13,     -25,    -9,
+    32,     7,      -5,     15,     -12,    -33,    -18,    -13,    6,
+    0,      -25,    -12,    1,      -17,    0,      13,     -24,    -27,
+    4,      35,     14,     -22,    5,      13,     -18,    -30,    -10,
+    -7,     -7,     31,     23,     -27,    -26,    9,      47,     6,
+    -50,    -11,    19,     1,      11,     12,     -19,    -43,    -18,
+    10,     -6,     -3,     12,     2,      -12,    -16,    10,     9,
+    -25,    -21,    -10,    -13,    0,      8,      -1,     -9,     10,
+    4,      -34,    14,     46,     5,      18,     24,     -15,    -7,
+    20,     -1,     -13,    7,      11,     14,     11,     -2,     8,
+    27,     10,     -1,     13,     -2,     -7,     48,     44,     -15,
+    -16,    -6,     3,      7,      -35,    -25,    8,      -31,    -16,
+    30,     36,     22,     -13,    -21,    -10,    8,      2,      -58,
+    -37,    32,     25,     -1,     -25,    -21,    3,      3,      -6,
+    -11,    -3,     2,      4,      34,     22,     -25,    -19,    0,
+    -6,     -10,    -8,     -35,    -32,    8,      -3,     -20,    -11,
+    -6,     3,      8,      -8,     3,      25,     23,     -7,     -35,
+    -15,    8,      -20,    -6,     15,     -44,    -29,    19,     -5,
+    -1,     18,     28,     6,      -21,    9,      11,     -20,    -10,
+    18,     22,     6,      -2,     12,     6,      23,     34,     -20,
+    -19,    1,      -10,    34,     41,     13,     6,      3,      22,
+    11,     -4,     4,      -12,    -8,     17,     18,     12,     -1,
+    5,      9,      -6,     -2,     4,      1,      3,      2,      -6,
+    -32,    -25,    9,      18,     27,     -4,     -54,    -29,    2,
+    -3,     -18,    -38,    -28,    -10,    9,      20,     5,      -9,
+    -15,    -3,     2,      -14,    -15,    -6,     5,      10,     6,
+    3,      -11,    -9,     -5,     -20,    -13,    8,      3,      -14,
+    6,      20,     -15,    -21,    9,      19,     21,     12,     -4,
+    -21,    -17,    16,     27,     -4,     -28,    -2,     26,     9,
+    -12,    -16,    -28,    -28,    -4,     4,      -15,    -9,     3,
+    -10,    -16,    2,      17,     -10,    -26,    3,      16,     26,
+    17,     -12,    -9,     2,      -2,     -5,     -11,    5,      28,
+    1,      -14,    13,     14,     5,      18,     6,      -17,    -5,
+    7,      2,      -3,     11,     10,     -1,     50,     36,     -28,
+    21,     39,     -9,     -6,     2,      10,     36,     20,     -2,
+    -3,     -11,    -10,    -6,     -5,     -4,     -8,     2,      17,
+    1,      -13,    11,     -13,    -36,    11,     14,     -19,    -6,
+    3,      0,      20,     -5,     -24,    12,     7,      -11,    2,
+    -15,    -28,    -1,     6,      -14,    -31,    -39,    -19,    19,
+    37,     3,      -32,    -27,    -6,     13,     31,     15,     -41,
+    -41,    25,     35,     -3,     -16,    -25,    -19,    -10,    -3,
+    19,     10,     -4,     7,      -4,     -19,    -12,    -13,    -9,
+    6,      2,      -12,    -6,     12,     6,      -1,     -5,     -19,
+    -7,     7,      40,     56,     -3,     -13,    21,     24,     7,
+    -11,    -9,     -3,     24,     28,     -10,    1,      12,     21,
+    24,     -16,    -15,    4,      -7,     -2,     19,     13,     -11,
+    -7,     -8,     15,     41,     5,      -16,    -18,    -11,    26,
+    26,     -5,     -12,    -14,    -6,     10,     8,      -8,     -16,
+    -16,    -3,     10,     1,      -3,     -3,     -2,     -15,    -18,
+    6,      -4,     -4,     21,     4,      -2,     15,     13,     0,
+    -2,     12,     7,      -15,    -9,     1,      -2,     2,      -1,
+    -9,     -15,    -17,    -14,    -10,    1,      -4,     -16,    -17,
+    -1,     18,     8,      1,      22,     11,     -19,    -10,    4,
+    -23,    -29,    0,      -2,     -14,    -6,     13,     7,      -23,
+    -13,    10,     9,      11,     10,     4,      -4,     -4,     1,
+    6,      14,     9,      2,      0,      2,      6,      4,      -9,
+    -18,    -8,     8,      18,     8,      13,     9,      -27,    -22,
+    -10,    -24,    -9,     17,     11,     2,      9,      3,      -13,
+    -10,    -1,     -7,     -1,     10,     -4,     1,      16,     12,
+    -6,     -14,    -2,     -5,     -1,     0,      -1,     6,      -9,
+    -3,     12,     4,      1,      -2,     2,      17,     24,     22,
+    9,      8,      21,     14,     -2,     -2,     4,      -1,     -7,
+    -7,     -6,     -1,     -6,     17,     30,     -7,     -10,    -3,
+    -19,    -18,    2,      21,     4,      -20,    -6,     -1,     -18,
+    -14,    -6,     -7,     -1,     6,      10,     8,      -5,     0,
+    10,     -22,    -40,    -22,    4,      34,     16,     -19,    -16,
+    -12,    -17,    -16,    -17,    -29,    -28,    -4,     10,     16,
+    22,     13,     4,      -1,     -5,     16,     15,     -11,    -6,
+    9,      3,      -14,    -22,    -19,    -12,    5,      -5,     -15,
+    3,      9,      27,     17,     -4,     8,      -2,     1,      16,
+    11,     9,      9,      8,      -14,    -16,    7,      -5,     -15,
+    -11,    -5,     19,     25,     25,     43,     21,     -9,     -9,
+    -19,    -10,    14,     -11,    -19,    8,      3,      1,      11,
+    -1,     -24,    -20,    -1,     2,      7,      24,     22,     11,
+    8,      6,      -2,     -11,    -3,     -2,     -4,     0,      -7,
+    0,      6,      -1,     -16,    -35,    -8,     8,      -11,    -6,
+    6,      18,     16,     7,      12,     5,      -2,     -3,     -10,
+    -21,    -27,    -10,    -3,     -3,     8,      0,      -9,     -10,
+    -3,     0,      -5,     6,      9,      19,     23,     8,      -5,
+    -19,    -16,    -5,     -6,     -27,    -22,    1,      6,      8,
+    2,      -9,     -13,    -15,    -18,    -13,    4,      25,     29,
+    26,     -2,     -22,    1,      8,      1,      -6,     -6,     -7,
+    -20,    0,      13,     -14,    -24,    -24,    -21,    2,      14,
+    16,     23,     15,     10,     10,     5,      0,      -26,    -32,
+    3,      19,     5,      -8,     -7,     -8,     -3,     17,     27,
+    -7,     -28,    10,     32,     10,     1,      10,     3,      -4,
+    22,     24,     -31,    -40,    0,      6,      5,      17,     17,
+    1,      10,     30,     8,      -12,    -6,     9,      6,      -12,
+    -5,     1,      -4,     6,      11,     0,      -9,     -4,     -3,
+    -4,     -3,     2,      0,      -2,     -9,     -27,    -23,    2,
+    13,     -6,     -9,     -3,     -12,    -2,     10,     6,      -7,
+    -19,    -31,    -13,    16,     11,     -3,     -13,    -15,    0,
+    7,      -3,     -7,     -1,     -4,     7,      15,     0,      -12,
+    -8,     -1,     -7,     -12,    -21,    -17,    5,      30,     25,
+    -6,     -6,     0,      -12,    -8,     2,      13,     11,     1,
+    5,      4,      4,      10,     -1,     -20,    -12,    -4,     3,
+    15,     11,     -7,     -24,    -4,     8,      -2,     -14,    -25,
+    -17,    7,      21,     14,     1,      0,      12,     17,     13,
+    6,      1,      6,      14,     11,     -10,    -21,    -12,    -4,
+    3,      -2,     -21,    -24,    -2,     12,     14,     17,     4,
+    -2,     11,     11,     11,     1,      -34,    -32,    -5,     10,
+    7,      -11,    -12,    6,      7,      -4,     -10,    -15,    -5,
+    17,     21,     0,      -15,    -15,    -1,     5,      -18,    -18,
+    -10,    -9,     24,     27,     -9,     -14,    0,      9,      25,
+    22,     1,      -7,     -2,     16,     13,     -14,    -10,    7,
+    0,      2,      15,     2,      -9,     5,      10,     -5,     -3,
+    10,     3,      0,      15,     15,     -1,     -3,     8,      6,
+    -7,     -7,     2,      0,      -4,     5,      -8,     -37,    -28,
+    -1,     8,      6,      10,     -1,     -12,    12,     28,     8,
+    -17,    -16,    -15,    -17,    1,      6,      -4,     -8,     -4,
+    -15,    -15,    6,      -9,     -15,    10,     9,      -13,    -8,
+    5,      -2,     -10,    5,      12,     -27,    -33,    9,      8,
+    -16,    -3,     16,     -3,     -7,     22,     22,     10,     5,
+    -11,    -16,    -4,     9,      12,     6,      -3,     2,      2,
+    -1,     4,      -7,     -8,     1,      8,      19,
+};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.h
new file mode 100644
index 0000000..33aeea5
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/yes_1000ms_sample_data.h

@@ -0,0 +1,29 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This data was created from the PCM data in a WAV file held in v2 of the
+// Speech Commands test dataset, at the path:
+// speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav
+// This should contain all 16,000 samples from the one-second file.
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_1000MS_SAMPLE_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_1000MS_SAMPLE_DATA_H_
+
+#include <cstdint>
+
+extern const int g_yes_1000ms_sample_data_size;
+extern const int16_t g_yes_1000ms_sample_data[];
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_1000MS_SAMPLE_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
deleted file mode 100644
index 2eb737f..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
+++ /dev/null

@@ -1,158 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.h"
-
-/* File automatically created by
- * tensorflow/examples/speech_commands/wav_to_features.py \
- * --sample_rate=16000 \
- * --clip_duration_ms=1000 \
- * --window_size_ms=30 \
- * --window_stride_ms=20 \
- * --feature_bin_count=40 \
- * --quantize \
- * --preprocess="average" \
- * --input_wav="speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav" \
- * --output_c_file="yes_features_data.cc" \
- */
-
-const int g_yes_f2e59fea_nohash_1_width = 43;
-const int g_yes_f2e59fea_nohash_1_height = 49;
-const unsigned char g_yes_f2e59fea_nohash_1_data[] = {
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  1,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  1,   1,  1,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  4,   5,   1,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  1,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   2,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    1,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   1,  19, 1,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   1,   0,  1,  3,   3,   1,  1,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   8,   89, 8,   0,   0,  0,  0,   0,   0,  0,  0,   4,  13,
-    1,  6,  23,  20,  6,   4,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  19, 177, 42, 1,
-    1,  0,  0,   0,   0,   2,  3,   119, 51, 5,  139, 92,  58, 58, 15,  2,  1,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   13, 165, 176, 3,  1,  1,   0,   0,  1,  1,   32, 214,
-    26, 19, 113, 103, 28,  22, 27,  3,   1,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  12,  55, 128,
-    27, 1,  1,   0,   1,   4,  2,   52,  93, 10, 28,  156, 10, 21, 21,  3,  3,
-    1,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  14,  99,  32, 65, 7,   1,   2,  2,  6,   13, 121,
-    36, 15, 11,  112, 125, 14, 5,   13,  4,  4,  2,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   24, 25,
-    32, 5,  1,   0,   0,   0,  1,   0,   7,  5,  1,   1,   3,  3,  0,   3,  3,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   13,  13, 5,  1,   0,   0,  0,  0,   0,  3,
-    4,  1,  0,   1,   2,   3,  1,   1,   1,  4,  8,   1,   2,  1,  3,   1,  1,
-    0,  1,  1,   3,   1,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  1,
-    8,  2,  1,   0,   0,   0,  0,   0,   1,  1,  0,   0,   1,  1,  2,   0,  2,
-    1,  0,  2,   0,   2,   2,  3,   1,   1,  0,  1,   1,   4,  5,  1,   0,  1,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  1,   1,   1,  0,  1,   2,   1,  0,  1,   3,  1,
-    1,  3,  1,   1,   6,   2,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  2,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  1,   1,   0,  1,  2,   6,   2,  4,  2,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  3,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  1,
-    0,  0,  1,   2,   1,   1,  2,   1,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  4,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   2,  1,  0,   0,   2,  3,  5,   2,  0,
-    1,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   1,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   1,   2,  2,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  1,  0,   0,   0,  0,  1,   2,  3,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   1,  1,   1,   1,  0,  0,   0,   1,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
-    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,
-};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.h
deleted file mode 100644
index 39a3bb9..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.h
+++ /dev/null

@@ -1,23 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_
-
-extern const int g_yes_f2e59fea_nohash_1_width;
-extern const int g_yes_f2e59fea_nohash_1_height;
-extern const unsigned char g_yes_f2e59fea_nohash_1_data[];
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.cc
deleted file mode 100644
index 9a34a20..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.cc
+++ /dev/null

@@ -1,23 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// See the header for documentation on the meaning of this data.
-
-#include "tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.h"
-
-const uint8_t g_yes_power_spectrum_data[g_yes_power_spectrum_data_size] = {
-    8, 89, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 13, 1, 6, 23, 20, 6, 4, 0, 0, 0,
-    0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0,
-};

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.h b/tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.h
deleted file mode 100644
index 5c8c00a..0000000
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.h
+++ /dev/null

@@ -1,29 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// This data was extracted from the larger feature data held in
-// no_features_data.cc and consists of the 26th spectrogram slice of 43 values.
-// This is the expected result of running the sample data in
-// yes_30ms_sample_data.cc through through the preprocessing pipeline.
-
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_POWER_SPECTRUM_DATA_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_POWER_SPECTRUM_DATA_H_
-
-#include <cstdint>
-
-constexpr int g_yes_power_spectrum_data_size = 43;
-extern const uint8_t g_yes_power_spectrum_data[];
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_POWER_SPECTRUM_DATA_H_

diff --git a/tensorflow/lite/experimental/micro/testing/micro_test.h b/tensorflow/lite/experimental/micro/testing/micro_test.h
index 2f20dd5..32e9a57 100644
--- a/tensorflow/lite/experimental/micro/testing/micro_test.h
+++ b/tensorflow/lite/experimental/micro/testing/micro_test.h

@@ -107,13 +107,13 @@
     }                                                                          \
   } while (false)
 
-#define TF_LITE_MICRO_EXPECT_EQ(x, y)                                         \
-  do {                                                                        \
-    if ((x) != (y)) {                                                         \
-      micro_test::reporter->Report(#x " == " #y " failed at %s:%d", __FILE__, \
-                                   __LINE__);                                 \
-      micro_test::did_test_fail = true;                                       \
-    }                                                                         \
+#define TF_LITE_MICRO_EXPECT_EQ(x, y)                                          \
+  do {                                                                         \
+    if ((x) != (y)) {                                                          \
+      micro_test::reporter->Report(#x " == " #y " failed at %s:%d (%d vs %d)", \
+                                   __FILE__, __LINE__, (x), (y));              \
+      micro_test::did_test_fail = true;                                        \
+    }                                                                          \
   } while (false)
 
 #define TF_LITE_MICRO_EXPECT_NE(x, y)                                         \

diff --git a/tensorflow/lite/experimental/micro/tools/ci_build/ci_build_micro_projects.sh b/tensorflow/lite/experimental/micro/tools/ci_build/ci_build_micro_projects.sh
new file mode 100755
index 0000000..dcec726
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/tools/ci_build/ci_build_micro_projects.sh

@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Creates the project file distributions for the TensorFlow Lite Micro test and
+# example targets aimed at embedded platforms.
+#
+# Usage: ci_build_micro_projects.sh <TARGET OS> <TAGS>
+#
+# For example:
+# ci_build_micro_projects.sh mbed "CMSIS disco_f746ng"
+
+set -e
+set -x
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_DIR=${SCRIPT_DIR}/../../../../../..
+cd ${ROOT_DIR}
+pwd
+
+tensorflow/lite/experimental/micro/tools/make/download_dependencies.sh
+
+make -f tensorflow/lite/experimental/micro/tools/make/Makefile \
+  TARGET=${1} \
+  TAGS="${2}" \
+  generate_projects

diff --git a/tensorflow/lite/experimental/micro/tools/make/Makefile b/tensorflow/lite/experimental/micro/tools/make/Makefile
index 3457faf..1179b28 100644
--- a/tensorflow/lite/experimental/micro/tools/make/Makefile
+++ b/tensorflow/lite/experimental/micro/tools/make/Makefile

@@ -126,12 +126,20 @@
 
 MAKE_PROJECT_FILES := \
   README_MAKE.md \
-  Makefile
+  Makefile \
+  .vscode/tasks.json
 
 MBED_PROJECT_FILES := \
   README_MBED.md \
   mbed-os.lib \
-  mbed_app.json
+  mbed_app.json \
+  .vscode/tasks.json
+
+KEIL_PROJECT_FILES := \
+  README_KEIL.md \
+  keil_project.uvprojx
+
+ALL_PROJECT_TARGETS :=
 
 KEIL_PROJECT_FILES := \
   README_KEIL.md \
@@ -209,12 +217,22 @@
 $(BINDIR)%.test_target: $(BINDIR)%_test
 	$(TEST_SCRIPT) $< '~~~ALL TESTS PASSED~~~'
 
+# snease: Add %.bin rule here since BINDIR is now defined
+# These are microcontroller-specific rules for converting the ELF output
+# of the linker into a binary image that can be loaded directly.
+OBJCOPY := $(TARGET_TOOLCHAIN_PREFIX)objcopy
+$(BINDIR)%.bin: $(BINDIR)%
+	@mkdir -p $(dir $@)
+	$(OBJCOPY) $< $@ -O binary
+
 # Generate standalone makefile projects for all of the test targets.
 $(foreach TEST_TARGET,$(MICROLITE_TEST_SRCS),\
 $(eval $(call microlite_test,$(notdir $(basename $(TEST_TARGET))),$(TEST_TARGET))))
 
 test: $(MICROLITE_TEST_TARGETS)
 
+generate_projects: $(ALL_PROJECT_TARGETS)
+
 # Gets rid of all generated files.
 clean:
 	rm -rf $(MAKEFILE_DIR)/gen

diff --git a/tensorflow/lite/experimental/micro/tools/make/download_dependencies.sh b/tensorflow/lite/experimental/micro/tools/make/download_dependencies.sh
index 82c15e3..639f002 100755
--- a/tensorflow/lite/experimental/micro/tools/make/download_dependencies.sh
+++ b/tensorflow/lite/experimental/micro/tools/make/download_dependencies.sh

@@ -35,9 +35,11 @@
 STM32_BARE_LIB_URL="https://github.com/google/stm32_bare_lib/archive/c07d611fb0af58450c5a3e0ab4d52b47f99bc82d.zip"
 SIFIVE_FE310_LIB_URL="https://github.com/sifive/freedom-e-sdk/archive/baeeb8fd497a99b3c141d7494309ec2e64f19bdf.zip"
 RISCV_TOOLCHAIN_URL="https://static.dev.sifive.com/dev-tools/riscv64-unknown-elf-gcc-20181030-x86_64-linux-ubuntu14.tar.gz"
+AM_SDK_URL="http://s3.asia.ambiqmicro.com/downloads/AmbiqSuite-Rel2.0.0.zip"
 AP3_URL="https://github.com/AmbiqMicro/TFLiteMicro_Apollo3/archive/dfbcef9a57276c087d95aab7cb234f1d4c9eaaba.zip"
 CUST_CMSIS_URL="https://github.com/AmbiqMicro/TFLiteMicro_CustCMSIS/archive/8f63966c5692e6a3a83956efd2e4aed77c4c9949.zip"
 GCC_EMBEDDED_URL="https://developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-linux.tar.bz2"
+KISSFFT_URL="http://downloads.sourceforge.net/project/kissfft/kissfft/v1_3_0/kiss_fft130.zip"
 
 download_and_extract() {
   local usage="Usage: download_and_extract URL DIR"
@@ -72,37 +74,54 @@
   find "${dir}" -type f -name '*BUILD' -delete
 }
 
-patch_apollo3_sdk() {
-  local ap3_dir="${1}"
-  if [ ! -f ${ap3_dir}/VERSION.txt ]; then
-    echo "Could not find ${ap3_dir}, skipping Apollo3 SDK";
+patch_am_sdk() {
+  local am_dir="${1}"
+  if [ ! -f ${am_dir}/VERSION.txt ]; then
+    echo "Could not find ${am_dir}, skipping AmbiqMicro SDK patch";
     return;
   fi
-  local src_dir=${ap3_dir}/boards/apollo3_evb/examples/hello_world/gcc
-  local dest_dir=${ap3_dir}/boards/apollo3_evb/examples/hello_world/gcc_patched
+
+  local src_dir=${am_dir}/boards/apollo3_evb/examples/hello_world/gcc
+  local dest_dir=${am_dir}/boards/apollo3_evb/examples/hello_world/gcc_patched
+
   rm -rf ${dest_dir}
   mkdir ${dest_dir}
+
   cp "${src_dir}/startup_gcc.c" "${dest_dir}/startup_gcc.c"
   cp "${src_dir}/hello_world.ld" "${dest_dir}/apollo3evb.ld"
-  sed -i -e '131s/1024/1024\*20/g' "${dest_dir}/startup_gcc.c"
+
+  sed -i -e '114s/1024/1024\*20/g' "${dest_dir}/startup_gcc.c"
   sed -i -e 's/main/_main/g' "${dest_dir}/startup_gcc.c"
+
   sed -i -e '3s/hello_world.ld/apollo3evb.ld/g' "${dest_dir}/apollo3evb.ld"
   sed -i -e '3s/startup_gnu/startup_gcc/g' "${dest_dir}/apollo3evb.ld"
-  sed -i -e '6s/am_reset_isr/Reset_Handler/g' "${dest_dir}/apollo3evb.ld"
   sed -i -e '22s/\*(.text\*)/\*(.text\*)\n\n\t\/\* These are the C++ global constructors.  Stick them all here and\n\t \* then walk through the array in main() calling them all.\n\t \*\/\n\t_init_array_start = .;\n\tKEEP (\*(SORT(.init_array\*)))\n\t_init_array_end = .;\n\n\t\/\* XXX Currently not doing anything for global destructors. \*\/\n/g' "${dest_dir}/apollo3evb.ld"
   sed -i -e "70s/} > SRAM/} > SRAM\n    \/\* Add this to satisfy reference to symbol 'end' from libnosys.a(sbrk.o)\n     \* to denote the HEAP start.\n     \*\/\n   end = .;/g" "${dest_dir}/apollo3evb.ld"
+
   echo "Finished preparing Apollo3 files"
 }
 
+patch_kissfft() {
+  sed -i -E "s@#ifdef FIXED_POINT@// Patched automatically by download_dependencies.sh so default is 16 bit.\n#ifndef FIXED_POINT\n#define FIXED_POINT (16)\n#endif\n// End patch.\n\n#ifdef FIXED_POINT@g" tensorflow/lite/experimental/micro/tools/make/downloads/kissfft/kiss_fft.h
+  sed -i -E "s@#define KISS_FFT_MALLOC malloc@#define KISS_FFT_MALLOC(X) (void*)(0) /* Patched. */@g" tensorflow/lite/experimental/micro/tools/make/downloads/kissfft/kiss_fft.h
+  sed -i -E "s@#define KISS_FFT_FREE free@#define KISS_FFT_FREE(X) /* Patched. */@g" tensorflow/lite/experimental/micro/tools/make/downloads/kissfft/kiss_fft.h
+  sed -i -E "s@(fprintf.*\);)@/* \1 */@g" tensorflow/lite/experimental/micro/tools/make/downloads/kissfft/tools/kiss_fftr.c
+  sed -i -E "s@(exit.*\);)@return; /* \1 */@g" tensorflow/lite/experimental/micro/tools/make/downloads/kissfft/tools/kiss_fftr.c
+  echo "Finished patching kissfft"
+}
+
 download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp"
 download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers"
 download_and_extract "${CMSIS_URL}" "${DOWNLOADS_DIR}/cmsis"
 download_and_extract "${STM32_BARE_LIB_URL}" "${DOWNLOADS_DIR}/stm32_bare_lib"
 download_and_extract "${SIFIVE_FE310_LIB_URL}" "${DOWNLOADS_DIR}/sifive_fe310_lib"
 download_and_extract "${RISCV_TOOLCHAIN_URL}" "${DOWNLOADS_DIR}/riscv_toolchain"
+download_and_extract "${AM_SDK_URL}" "${DOWNLOADS_DIR}/AmbiqSuite-Rel2.0.0"
+patch_am_sdk "${DOWNLOADS_DIR}/AmbiqSuite-Rel2.0.0"
 download_and_extract "${AP3_URL}" "${DOWNLOADS_DIR}/apollo3_ext"
-patch_apollo3_sdk "${DOWNLOADS_DIR}/Apollo3-SDK-2018.08.13"
 download_and_extract "${CUST_CMSIS_URL}" "${DOWNLOADS_DIR}/CMSIS_ext"
 download_and_extract "${GCC_EMBEDDED_URL}" "${DOWNLOADS_DIR}/gcc_embedded"
+download_and_extract "${KISSFFT_URL}" "${DOWNLOADS_DIR}/kissfft"
+patch_kissfft "${DOWNLOADS_DIR}/kissfft"
 
 echo "download_dependencies.sh completed successfully." >&2

diff --git a/tensorflow/lite/experimental/micro/tools/make/helper_functions.inc b/tensorflow/lite/experimental/micro/tools/make/helper_functions.inc
index 28f7618..89f473b 100644
--- a/tensorflow/lite/experimental/micro/tools/make/helper_functions.inc
+++ b/tensorflow/lite/experimental/micro/tools/make/helper_functions.inc

@@ -78,7 +78,13 @@
         --input_template=$$< --output_file=$$@ --executable=$(3) \
         --srcs="$(4)" --hdrs="$(5)" --include_paths="$$(PROJECT_INCLUDES)"
 
+$(PRJDIR)$(3)/$(1)/.vscode/tasks.json : tensorflow/lite/experimental/micro/tools/make/templates/tasks.json.$(1).tpl
+	@mkdir -p $$(dir $$@)
+	cp $$< $$@
+
 generate_$(3)_$(1)_project: $(addprefix $(PRJDIR)$(3)/$(1)/, $(4) $(5) $(2))
+
+ALL_PROJECT_TARGETS += generate_$(3)_$(1)_project
 endef
 
 # Specialized version of generate_project for TF Lite Micro test targets that

diff --git a/tensorflow/lite/experimental/micro/tools/make/targets/apollo3evb_makefile.inc b/tensorflow/lite/experimental/micro/tools/make/targets/apollo3evb_makefile.inc
index 6ed402a..fa9dc9c 100644
--- a/tensorflow/lite/experimental/micro/tools/make/targets/apollo3evb_makefile.inc
+++ b/tensorflow/lite/experimental/micro/tools/make/targets/apollo3evb_makefile.inc

@@ -5,9 +5,9 @@
   TARGET_TOOLCHAIN_PREFIX := arm-none-eabi-
   # Download the Ambiq Apollo3 SDK and set this variable to find the header
   # files:
-  APOLLO3_SDK := $(MAKEFILE_DIR)/downloads/Apollo3-SDK-2018.08.13
+  APOLLO3_SDK := $(MAKEFILE_DIR)/downloads/AmbiqSuite-Rel2.0.0
   # Need a pointer to the GNU ARM toolchain for crtbegin.o for the fp functions
-  # with the softfp interfaces.
+  # with the hard interfaces.
   GCC_ARM := $(MAKEFILE_DIR)/downloads/gcc_embedded/
 
   PLATFORM_FLAGS = \
@@ -31,7 +31,7 @@
     -mcpu=cortex-m4 \
     -mthumb \
     -mfpu=fpv4-sp-d16 \
-    -mfloat-abi=softfp \
+    -mfloat-abi=hard \
     -std=gnu++11 \
     -Wvla \
     -Wall \
@@ -49,7 +49,7 @@
   CXXFLAGS += $(PLATFORM_FLAGS)
   CCFLAGS += $(PLATFORM_FLAGS)
   LDFLAGS += \
-    -mthumb -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mfloat-abi=softfp \
+    -mthumb -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mfloat-abi=hard \
     -nostartfiles -static \
     -Wl,--gc-sections -Wl,--entry,Reset_Handler \
     -Wl,--start-group -lm -lc -lgcc -Wl,--end-group \
@@ -61,7 +61,7 @@
   MICROLITE_LIBS := \
     $(APOLLO3_SDK)/boards/apollo3_evb/bsp/gcc/bin/libam_bsp.a \
     $(APOLLO3_SDK)/mcu/apollo3/hal/gcc/bin/libam_hal.a \
-    $(GCC_ARM)/lib/gcc/arm-none-eabi/7.3.1/thumb/v7e-m/fpv4-sp/softfp/crtbegin.o \
+    $(GCC_ARM)/lib/gcc/arm-none-eabi/7.3.1/thumb/v7e-m/fpv4-sp/hard/crtbegin.o \
     -lm
   INCLUDES += \
     -isystem$(MAKEFILE_DIR)/downloads/cmsis/CMSIS/Core/Include/ \

diff --git a/tensorflow/lite/experimental/micro/tools/make/targets/ecm3531/README.md b/tensorflow/lite/experimental/micro/tools/make/targets/ecm3531/README.md
index 77ea4e0..3e339fe 100644
--- a/tensorflow/lite/experimental/micro/tools/make/targets/ecm3531/README.md
+++ b/tensorflow/lite/experimental/micro/tools/make/targets/ecm3531/README.md

@@ -1,8 +1,9 @@
-Compiling instructions here  https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro
-
+Compiling instructions here
+https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro
 
 CONTACT INFORMATION:
 
-Contact info@etacompute.com  for more information on obtaining the Eta Compute SDK and evalution board.
+Contact info@etacompute.com for more information on obtaining the Eta Compute
+SDK and evalution board.
 
 www.etacompute.com

diff --git a/tensorflow/lite/experimental/micro/tools/make/templates/tasks.json.make.tpl b/tensorflow/lite/experimental/micro/tools/make/templates/tasks.json.make.tpl
new file mode 100644
index 0000000..141994d
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/tools/make/templates/tasks.json.make.tpl

@@ -0,0 +1,16 @@
+{
+    // See https://go.microsoft.com/fwlink/?LinkId=733558
+    // for the documentation about the tasks.json format
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "label": "Make Build",
+            "type": "shell",
+            "command": "make",
+            "group": {
+                "kind": "build",
+                "isDefault": true
+                }
+        }
+    ]
+}
\ No newline at end of file

diff --git a/tensorflow/lite/experimental/micro/tools/make/templates/tasks.json.mbed.tpl b/tensorflow/lite/experimental/micro/tools/make/templates/tasks.json.mbed.tpl
new file mode 100644
index 0000000..616f3b2
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/tools/make/templates/tasks.json.mbed.tpl

@@ -0,0 +1,39 @@
+{
+    // See https://go.microsoft.com/fwlink/?LinkId=733558
+    // for the documentation about the tasks.json format
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "label": "Mbed Config Root",
+            "type": "shell",
+            "command": "mbed config root .",
+        },
+        {
+            "label": "Mbed Deploy",
+            "type": "shell",
+            "command": "mbed deploy",
+        },
+        {
+            "label": "Mbed Patch C++11",
+            "type": "shell",
+            "command": "python",
+            "args": [
+                "-c",
+                "import fileinput, glob;\nfor filename in glob.glob(\"mbed-os/tools/profiles/*.json\"):\n  for line in fileinput.input(filename, inplace=True):\n    print line.replace(\"\\\"-std=gnu++98\\\"\",\"\\\"-std=c++11\\\", \\\"-fpermissive\\\"\")"
+            ]
+        },
+        {
+            "label": "Mbed Init",
+            "dependsOn": ["Mbed Config Root", "Mbed Deploy", "Mbed Patch C++11"]
+        },
+        {
+            "label": "Mbed build",
+            "type": "shell",
+            "command": "mbed compile -m auto -t GCC_ARM",
+            "group": {
+                "kind": "build",
+                "isDefault": true
+                }
+        }
+    ]
+}
\ No newline at end of file

diff --git a/tensorflow/lite/experimental/microfrontend/lib/BUILD b/tensorflow/lite/experimental/microfrontend/lib/BUILD
index a055e52..8dd42fc 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/BUILD
+++ b/tensorflow/lite/experimental/microfrontend/lib/BUILD

@@ -6,6 +6,11 @@
 
 licenses(["notice"])  # Apache 2.0
 
+load(
+    "//tensorflow/lite/experimental/micro/testing:micro_test.bzl",
+    "tflite_micro_cc_test",
+)
+
 cc_library(
     name = "bits",
     hdrs = ["bits.h"],
@@ -117,72 +122,65 @@
     ],
 )
 
-cc_test(
+tflite_micro_cc_test(
     name = "fft_test",
-    size = "small",
     srcs = ["fft_test.cc"],
     deps = [
         ":fft",
-        "@com_google_googletest//:gtest_main",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
     ],
 )
 
-cc_test(
+tflite_micro_cc_test(
     name = "filterbank_test",
-    size = "small",
     srcs = ["filterbank_test.cc"],
     deps = [
         ":filterbank",
-        "@com_google_googletest//:gtest_main",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
     ],
 )
 
-cc_test(
+tflite_micro_cc_test(
     name = "frontend_test",
-    size = "small",
     srcs = ["frontend_test.cc"],
     deps = [
         ":frontend",
-        "@com_google_googletest//:gtest_main",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
     ],
 )
 
-cc_test(
+tflite_micro_cc_test(
     name = "log_scale_test",
-    size = "small",
     srcs = ["log_scale_test.cc"],
     deps = [
         ":log_scale",
-        "@com_google_googletest//:gtest_main",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
     ],
 )
 
-cc_test(
+tflite_micro_cc_test(
     name = "noise_reduction_test",
-    size = "small",
     srcs = ["noise_reduction_test.cc"],
     deps = [
         ":noise_reduction",
-        "@com_google_googletest//:gtest_main",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
     ],
 )
 
-cc_test(
+tflite_micro_cc_test(
     name = "pcan_gain_control_test",
-    size = "small",
     srcs = ["pcan_gain_control_test.cc"],
     deps = [
         ":pcan_gain_control",
-        "@com_google_googletest//:gtest_main",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
     ],
 )
 
-cc_test(
+tflite_micro_cc_test(
     name = "window_test",
-    size = "small",
     srcs = ["window_test.cc"],
     deps = [
         ":window",
-        "@com_google_googletest//:gtest_main",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
     ],
 )

diff --git a/tensorflow/lite/experimental/microfrontend/lib/bits.h b/tensorflow/lite/experimental/microfrontend/lib/bits.h
index bf15466..04b3ba6 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/bits.h
+++ b/tensorflow/lite/experimental/microfrontend/lib/bits.h

@@ -63,14 +63,14 @@
 
 static inline int CountLeadingZeros64(uint64_t n) {
 #if defined(_MSC_VER) && defined(_M_X64)
-  // MSVC does not have __buitin_clzll. Use _BitScanReverse64.
+  // MSVC does not have __builtin_clzll. Use _BitScanReverse64.
   unsigned long result = 0;  // NOLINT(runtime/int)
   if (_BitScanReverse64(&result, n)) {
     return 63 - result;
   }
   return 64;
 #elif defined(_MSC_VER)
-  // MSVC does not have __buitin_clzll. Compose two calls to _BitScanReverse
+  // MSVC does not have __builtin_clzll. Compose two calls to _BitScanReverse
   unsigned long result = 0;  // NOLINT(runtime/int)
   if ((n >> 32) && _BitScanReverse(&result, n >> 32)) {
     return 31 - result;

diff --git a/tensorflow/lite/experimental/microfrontend/lib/fft_test.cc b/tensorflow/lite/experimental/microfrontend/lib/fft_test.cc
index 1b754c1..ec1f247 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/fft_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/fft_test.cc

@@ -15,8 +15,7 @@
 #include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
 #include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
 
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
 
 namespace {
 
@@ -25,9 +24,13 @@
     0, -28328, 0, 21447, 0, -13312, 0, 5943,   0, -1152, 0};
 const int kScaleShift = 0;
 
-TEST(FftTest, CheckOutputValues) {
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(FftTest_CheckOutputValues) {
   struct FftState state;
-  ASSERT_TRUE(
+  TF_LITE_MICRO_EXPECT(
       FftPopulateState(&state, sizeof(kFakeWindow) / sizeof(kFakeWindow[0])));
 
   FftInit(&state);
@@ -37,14 +40,15 @@
       {0, 0},    {-10, 9},     {-20, 0},   {-9, -10},     {0, 25},  {-119, 119},
       {-887, 0}, {3000, 3000}, {0, -6401}, {-3000, 3000}, {886, 0}, {118, 119},
       {0, 25},   {9, -10},     {19, 0},    {9, 9},        {0, 0}};
-  ASSERT_EQ(state.fft_size / 2 + 1, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.fft_size / 2 + 1,
+                          sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i <= state.fft_size / 2; ++i) {
-    EXPECT_EQ(state.output[i].real, expected[i].real);
-    EXPECT_EQ(state.output[i].imag, expected[i].imag);
+    TF_LITE_MICRO_EXPECT_EQ(state.output[i].real, expected[i].real);
+    TF_LITE_MICRO_EXPECT_EQ(state.output[i].imag, expected[i].imag);
   }
 
   FftFreeStateContents(&state);
 }
 
-}  // namespace
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/microfrontend/lib/filterbank_test.cc b/tensorflow/lite/experimental/microfrontend/lib/filterbank_test.cc
index 41f0064..16257aa 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/filterbank_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/filterbank_test.cc

@@ -17,8 +17,7 @@
 
 #include <cstring>
 
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
 
 namespace {
 
@@ -33,9 +32,9 @@
 const int kScaleShift = 0;
 
 // Test filterbank generation using scaled-down defaults.
-class FilterbankTest : public ::testing::Test {
- protected:
-  FilterbankTest() {
+class FilterbankTestConfig {
+ public:
+  FilterbankTestConfig() {
     config_.num_channels = 2;
     config_.lower_band_limit = 8.0;
     config_.upper_band_limit = 450.0;
@@ -44,110 +43,124 @@
   struct FilterbankConfig config_;
 };
 
-TEST_F(FilterbankTest, CheckStartIndex) {
-  struct FilterbankState state;
-  ASSERT_TRUE(
-      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+}  // namespace
 
-  EXPECT_EQ(state.start_index, kStartIndex);
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(FilterbankTest_CheckStartIndex) {
+  FilterbankTestConfig config;
+  struct FilterbankState state;
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(&config.config_, &state,
+                                               kSampleRate, kSpectrumSize));
+
+  TF_LITE_MICRO_EXPECT_EQ(state.start_index, kStartIndex);
 
   FilterbankFreeStateContents(&state);
 }
 
-TEST_F(FilterbankTest, CheckEndIndex) {
+TF_LITE_MICRO_TEST(FilterbankTest_CheckEndIndex) {
+  FilterbankTestConfig config;
   struct FilterbankState state;
-  ASSERT_TRUE(
-      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(&config.config_, &state,
+                                               kSampleRate, kSpectrumSize));
 
-  EXPECT_EQ(state.end_index, kEndIndex);
+  TF_LITE_MICRO_EXPECT_EQ(state.end_index, kEndIndex);
 
   FilterbankFreeStateContents(&state);
 }
 
-TEST_F(FilterbankTest, CheckChannelFrequencyStarts) {
+TF_LITE_MICRO_TEST(FilterbankTest_CheckChannelFrequencyStarts) {
+  FilterbankTestConfig config;
   struct FilterbankState state;
-  ASSERT_TRUE(
-      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(&config.config_, &state,
+                                               kSampleRate, kSpectrumSize));
 
   const int16_t expected[] = {0, 4, 8};
-  ASSERT_EQ(state.num_channels + 1, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels + 1,
+                          sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i <= state.num_channels; ++i) {
-    EXPECT_EQ(state.channel_frequency_starts[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.channel_frequency_starts[i], expected[i]);
   }
 
   FilterbankFreeStateContents(&state);
 }
 
-TEST_F(FilterbankTest, CheckChannelWeightStarts) {
+TF_LITE_MICRO_TEST(FilterbankTest_CheckChannelWeightStarts) {
+  FilterbankTestConfig config;
   struct FilterbankState state;
-  ASSERT_TRUE(
-      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(&config.config_, &state,
+                                               kSampleRate, kSpectrumSize));
 
   const int16_t expected[] = {0, 8, 16};
-  ASSERT_EQ(state.num_channels + 1, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels + 1,
+                          sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i <= state.num_channels; ++i) {
-    EXPECT_EQ(state.channel_weight_starts[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.channel_weight_starts[i], expected[i]);
   }
 
   FilterbankFreeStateContents(&state);
 }
 
-TEST_F(FilterbankTest, CheckChannelWidths) {
+TF_LITE_MICRO_TEST(FilterbankTest_CheckChannelWidths) {
+  FilterbankTestConfig config;
   struct FilterbankState state;
-  ASSERT_TRUE(
-      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(&config.config_, &state,
+                                               kSampleRate, kSpectrumSize));
 
   const int16_t expected[] = {8, 8, 8};
-  ASSERT_EQ(state.num_channels + 1, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels + 1,
+                          sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i <= state.num_channels; ++i) {
-    EXPECT_EQ(state.channel_widths[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.channel_widths[i], expected[i]);
   }
 
   FilterbankFreeStateContents(&state);
 }
 
-TEST_F(FilterbankTest, CheckWeights) {
+TF_LITE_MICRO_TEST(FilterbankTest_CheckWeights) {
+  FilterbankTestConfig config;
   struct FilterbankState state;
-  ASSERT_TRUE(
-      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(&config.config_, &state,
+                                               kSampleRate, kSpectrumSize));
 
   const int16_t expected[] = {0, 3277, 2217, 1200, 222,  0,   0,   0,
                               0, 3376, 2468, 1591, 744,  0,   0,   0,
                               0, 4020, 3226, 2456, 1708, 983, 277, 0};
-  ASSERT_EQ(state.channel_weight_starts[state.num_channels] +
-                state.channel_widths[state.num_channels],
-            sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.channel_weight_starts[state.num_channels] +
+                              state.channel_widths[state.num_channels],
+                          sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
-    EXPECT_EQ(state.weights[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.weights[i], expected[i]);
   }
 
   FilterbankFreeStateContents(&state);
 }
 
-TEST_F(FilterbankTest, CheckUnweights) {
+TF_LITE_MICRO_TEST(FilterbankTest_CheckUnweights) {
+  FilterbankTestConfig config;
   struct FilterbankState state;
-  ASSERT_TRUE(
-      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(&config.config_, &state,
+                                               kSampleRate, kSpectrumSize));
 
   const int16_t expected[] = {0, 819, 1879, 2896, 3874, 0,    0,    0,
                               0, 720, 1628, 2505, 3352, 0,    0,    0,
                               0, 76,  870,  1640, 2388, 3113, 3819, 0};
-  ASSERT_EQ(state.channel_weight_starts[state.num_channels] +
-                state.channel_widths[state.num_channels],
-            sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.channel_weight_starts[state.num_channels] +
+                              state.channel_widths[state.num_channels],
+                          sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
-    EXPECT_EQ(state.unweights[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.unweights[i], expected[i]);
   }
 
   FilterbankFreeStateContents(&state);
 }
 
-TEST_F(FilterbankTest, CheckConvertFftComplexToEnergy) {
+TF_LITE_MICRO_TEST(FilterbankTest_CheckConvertFftComplexToEnergy) {
   struct FilterbankState state;
   state.start_index = kStartIndex;
   state.end_index = kEndIndex;
@@ -161,42 +174,46 @@
 
   int i;
   for (i = state.start_index; i < state.end_index; ++i) {
-    EXPECT_EQ(energy[i], kEnergy[i]);
+    TF_LITE_MICRO_EXPECT_EQ(energy[i], kEnergy[i]);
   }
 }
 
-TEST_F(FilterbankTest, CheckAccumulateChannels) {
+TF_LITE_MICRO_TEST(FilterbankTest_CheckAccumulateChannels) {
+  FilterbankTestConfig config;
   struct FilterbankState state;
-  ASSERT_TRUE(
-      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(&config.config_, &state,
+                                               kSampleRate, kSpectrumSize));
 
   FilterbankAccumulateChannels(&state, kEnergy);
 
-  ASSERT_EQ(state.num_channels + 1, sizeof(kWork) / sizeof(kWork[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels + 1,
+                          sizeof(kWork) / sizeof(kWork[0]));
   int i;
   for (i = 0; i <= state.num_channels; ++i) {
-    EXPECT_EQ(state.work[i], kWork[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.work[i], kWork[i]);
   }
 
   FilterbankFreeStateContents(&state);
 }
 
-TEST_F(FilterbankTest, CheckSqrt) {
+TF_LITE_MICRO_TEST(FilterbankTest_CheckSqrt) {
+  FilterbankTestConfig config;
   struct FilterbankState state;
-  ASSERT_TRUE(
-      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+  TF_LITE_MICRO_EXPECT(FilterbankPopulateState(&config.config_, &state,
+                                               kSampleRate, kSpectrumSize));
   std::memcpy(state.work, kWork, sizeof(kWork));
 
   uint32_t* scaled_filterbank = FilterbankSqrt(&state, kScaleShift);
 
   const uint32_t expected[] = {247311, 508620};
-  ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels,
+                          sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < state.num_channels; ++i) {
-    EXPECT_EQ(scaled_filterbank[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(scaled_filterbank[i], expected[i]);
   }
 
   FilterbankFreeStateContents(&state);
 }
 
-}  // namespace
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc b/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc
index a6faa1f..568484f 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc

@@ -15,8 +15,7 @@
 #include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
 #include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h"
 
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
 
 namespace {
 
@@ -29,9 +28,9 @@
     0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768};
 
 // Test end-to-end frontend behaviors.
-class FrontendTest : public ::testing::Test {
- protected:
-  FrontendTest() {
+class FrontendTestConfig {
+ public:
+  FrontendTestConfig() {
     config_.window.size_ms = 25;
     config_.window.step_size_ms = 10;
     config_.noise_reduction.smoothing_bits = 10;
@@ -53,9 +52,15 @@
   struct FrontendConfig config_;
 };
 
-TEST_F(FrontendTest, CheckOutputValues) {
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(FrontendTest_CheckOutputValues) {
+  FrontendTestConfig config;
   struct FrontendState state;
-  ASSERT_TRUE(FrontendPopulateState(&config_, &state, kSampleRate));
+  TF_LITE_MICRO_EXPECT(
+      FrontendPopulateState(&config.config_, &state, kSampleRate));
   size_t num_samples_read;
 
   struct FrontendOutput output = FrontendProcessSamples(
@@ -63,18 +68,20 @@
       sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read);
 
   const uint16_t expected[] = {479, 425};
-  ASSERT_EQ(output.size, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(output.size, sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < output.size; ++i) {
-    EXPECT_EQ(output.values[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(output.values[i], expected[i]);
   }
 
   FrontendFreeStateContents(&state);
 }
 
-TEST_F(FrontendTest, CheckConsecutiveWindow) {
+TF_LITE_MICRO_TEST(FrontendTest_CheckConsecutiveWindow) {
+  FrontendTestConfig config;
   struct FrontendState state;
-  ASSERT_TRUE(FrontendPopulateState(&config_, &state, kSampleRate));
+  TF_LITE_MICRO_EXPECT(
+      FrontendPopulateState(&config.config_, &state, kSampleRate));
   size_t num_samples_read;
 
   FrontendProcessSamples(&state, kFakeAudioData,
@@ -86,18 +93,20 @@
       &num_samples_read);
 
   const int16_t expected[] = {436, 378};
-  ASSERT_EQ(output.size, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(output.size, sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < output.size; ++i) {
-    EXPECT_EQ(output.values[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(output.values[i], expected[i]);
   }
 
   FrontendFreeStateContents(&state);
 }
 
-TEST_F(FrontendTest, CheckNotEnoughSamples) {
+TF_LITE_MICRO_TEST(FrontendTest_CheckNotEnoughSamples) {
+  FrontendTestConfig config;
   struct FrontendState state;
-  ASSERT_TRUE(FrontendPopulateState(&config_, &state, kSampleRate));
+  TF_LITE_MICRO_EXPECT(
+      FrontendPopulateState(&config.config_, &state, kSampleRate));
   size_t num_samples_read;
 
   FrontendProcessSamples(&state, kFakeAudioData,
@@ -113,10 +122,10 @@
           kStepSamples,
       &num_samples_read);
 
-  EXPECT_EQ(output.size, 0);
-  EXPECT_EQ(output.values, nullptr);
+  TF_LITE_MICRO_EXPECT_EQ(output.size, 0);
+  TF_LITE_MICRO_EXPECT_EQ(output.values, nullptr);
 
   FrontendFreeStateContents(&state);
 }
 
-}  // namespace
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/microfrontend/lib/log_scale_test.cc b/tensorflow/lite/experimental/microfrontend/lib/log_scale_test.cc
index 1ea0842..be52fd4 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/log_scale_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/log_scale_test.cc

@@ -15,15 +15,18 @@
 #include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
 #include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
 
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
 
 namespace {
 
 const int kScaleShift = 6;
 const int kCorrectionBits = -1;
 
-TEST(LogScaleTest, CheckOutputValues) {
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(LogScaleTest_CheckOutputValues) {
   struct LogScaleState state;
   state.enable_log = true;
   state.scale_shift = kScaleShift;
@@ -36,11 +39,11 @@
   const uint16_t expected[] = {479, 425};
   int i;
   for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
-    EXPECT_EQ(output[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(output[i], expected[i]);
   }
 }
 
-TEST(LogScaleTest, CheckOutputValuesNoLog) {
+TF_LITE_MICRO_TEST(LogScaleTest_CheckOutputValuesNoLog) {
   struct LogScaleState state;
   state.enable_log = false;
   state.scale_shift = kScaleShift;
@@ -53,8 +56,8 @@
   const uint16_t expected[] = {65535, 45998};
   int i;
   for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
-    EXPECT_EQ(output[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(output[i], expected[i]);
   }
 }
 
-}  // namespace
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_test.cc b/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_test.cc
index 13d58b2..ba864c4 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_test.cc

@@ -15,17 +15,16 @@
 #include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
 #include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
 
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
 
 namespace {
 
 const int kNumChannels = 2;
 
 // Test noise reduction using default config values.
-class NoiseReductionTest : public ::testing::Test {
- protected:
-  NoiseReductionTest() {
+class NoiseReductionTestConfig {
+ public:
+  NoiseReductionTestConfig() {
     config_.smoothing_bits = 10;
     config_.even_smoothing = 0.025;
     config_.odd_smoothing = 0.06;
@@ -35,38 +34,48 @@
   struct NoiseReductionConfig config_;
 };
 
-TEST_F(NoiseReductionTest, TestNoiseReductionEstimate) {
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(NoiseReductionTest_TestNoiseReductionEstimate) {
+  NoiseReductionTestConfig config;
   struct NoiseReductionState state;
-  ASSERT_TRUE(NoiseReductionPopulateState(&config_, &state, kNumChannels));
+  TF_LITE_MICRO_EXPECT(
+      NoiseReductionPopulateState(&config.config_, &state, kNumChannels));
 
   uint32_t signal[] = {247311, 508620};
   NoiseReductionApply(&state, signal);
 
   const uint32_t expected[] = {6321887, 31248341};
-  ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels,
+                          sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < state.num_channels; ++i) {
-    EXPECT_EQ(state.estimate[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.estimate[i], expected[i]);
   }
 
   NoiseReductionFreeStateContents(&state);
 }
 
-TEST_F(NoiseReductionTest, TestNoiseReduction) {
+TF_LITE_MICRO_TEST(NoiseReductionTest_TestNoiseReduction) {
+  NoiseReductionTestConfig config;
   struct NoiseReductionState state;
-  ASSERT_TRUE(NoiseReductionPopulateState(&config_, &state, kNumChannels));
+  TF_LITE_MICRO_EXPECT(
+      NoiseReductionPopulateState(&config.config_, &state, kNumChannels));
 
   uint32_t signal[] = {247311, 508620};
   NoiseReductionApply(&state, signal);
 
   const uint32_t expected[] = {241137, 478104};
-  ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels,
+                          sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < state.num_channels; ++i) {
-    EXPECT_EQ(signal[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(signal[i], expected[i]);
   }
 
   NoiseReductionFreeStateContents(&state);
 }
 
-}  // namespace
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc b/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc
index 7c92d2d..93d7a8b 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc

@@ -15,8 +15,7 @@
 #include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
 #include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
 
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
 
 namespace {
 
@@ -25,9 +24,9 @@
 const int kCorrectionBits = -1;
 
 // Test pcan auto gain control using default config values.
-class PcanGainControlTest : public ::testing::Test {
- protected:
-  PcanGainControlTest() {
+class PcanGainControlTestConfig {
+ public:
+  PcanGainControlTestConfig() {
     config_.enable_pcan = 1;
     config_.strength = 0.95;
     config_.offset = 80.0;
@@ -37,24 +36,30 @@
   struct PcanGainControlConfig config_;
 };
 
-TEST_F(PcanGainControlTest, TestPcanGainControl) {
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(PcanGainControlTest_TestPcanGainControl) {
   uint32_t estimate[] = {6321887, 31248341};
+  PcanGainControlTestConfig config;
   struct PcanGainControlState state;
-  ASSERT_TRUE(PcanGainControlPopulateState(&config_, &state, estimate,
-                                           kNumChannels, kSmoothingBits,
-                                           kCorrectionBits));
+  TF_LITE_MICRO_EXPECT(PcanGainControlPopulateState(
+      &config.config_, &state, estimate, kNumChannels, kSmoothingBits,
+      kCorrectionBits));
 
   uint32_t signal[] = {241137, 478104};
   PcanGainControlApply(&state, signal);
 
   const uint32_t expected[] = {3578, 1533};
-  ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.num_channels,
+                          sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < state.num_channels; ++i) {
-    EXPECT_EQ(signal[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(signal[i], expected[i]);
   }
 
   PcanGainControlFreeStateContents(&state);
 }
 
-}  // namespace
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/microfrontend/lib/window_test.cc b/tensorflow/lite/experimental/microfrontend/lib/window_test.cc
index 60f1144..cf9df52 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/window_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/window_test.cc

@@ -15,8 +15,7 @@
 #include "tensorflow/lite/experimental/microfrontend/lib/window.h"
 #include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
 
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
 
 namespace {
 
@@ -29,9 +28,9 @@
     0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768};
 
 // Test window function behaviors using default config values.
-class WindowTest : public ::testing::Test {
- protected:
-  WindowTest() {
+class WindowTestConfig {
+ public:
+  WindowTestConfig() {
     config_.size_ms = 25;
     config_.step_size_ms = 10;
   }
@@ -39,84 +38,98 @@
   struct WindowConfig config_;
 };
 
-TEST_F(WindowTest, CheckCoefficients) {
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(WindowState_CheckCoefficients) {
+  WindowTestConfig config;
   struct WindowState state;
-  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  TF_LITE_MICRO_EXPECT(
+      WindowPopulateState(&config.config_, &state, kSampleRate));
 
   const int16_t expected[] = {16,   144,  391,  743,  1176, 1664, 2177,
                               2681, 3145, 3541, 3843, 4032, 4096, 4032,
                               3843, 3541, 3145, 2681, 2177, 1664, 1176,
                               743,  391,  144,  16};
-  ASSERT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < state.size; ++i) {
-    EXPECT_EQ(state.coefficients[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.coefficients[i], expected[i]);
   }
 
   WindowFreeStateContents(&state);
 }
 
-TEST_F(WindowTest, CheckResidualInput) {
+TF_LITE_MICRO_TEST(WindowState_CheckResidualInput) {
+  WindowTestConfig config;
   struct WindowState state;
-  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  TF_LITE_MICRO_EXPECT(
+      WindowPopulateState(&config.config_, &state, kSampleRate));
   size_t num_samples_read;
 
-  ASSERT_TRUE(WindowProcessSamples(
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
       &state, kFakeAudioData,
       sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
 
   int i;
   for (i = kStepSamples; i < kWindowSamples; ++i) {
-    EXPECT_EQ(state.input[i - kStepSamples], kFakeAudioData[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.input[i - kStepSamples], kFakeAudioData[i]);
   }
 
   WindowFreeStateContents(&state);
 }
 
-TEST_F(WindowTest, CheckOutputValues) {
+TF_LITE_MICRO_TEST(WindowState_CheckOutputValues) {
+  WindowTestConfig config;
   struct WindowState state;
-  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  TF_LITE_MICRO_EXPECT(
+      WindowPopulateState(&config.config_, &state, kSampleRate));
   size_t num_samples_read;
 
-  ASSERT_TRUE(WindowProcessSamples(
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
       &state, kFakeAudioData,
       sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
 
   const int16_t expected[] = {
       0, 1151,   0, -5944, 0, 13311,  0, -21448, 0, 28327, 0, -32256, 0, 32255,
       0, -28328, 0, 21447, 0, -13312, 0, 5943,   0, -1152, 0};
-  ASSERT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < state.size; ++i) {
-    EXPECT_EQ(state.output[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.output[i], expected[i]);
   }
 
   WindowFreeStateContents(&state);
 }
 
-TEST_F(WindowTest, CheckMaxAbsValue) {
+TF_LITE_MICRO_TEST(WindowState_CheckMaxAbsValue) {
+  WindowTestConfig config;
   struct WindowState state;
-  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  TF_LITE_MICRO_EXPECT(
+      WindowPopulateState(&config.config_, &state, kSampleRate));
   size_t num_samples_read;
 
-  ASSERT_TRUE(WindowProcessSamples(
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
       &state, kFakeAudioData,
       sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
 
-  EXPECT_EQ(state.max_abs_output_value, 32256);
+  TF_LITE_MICRO_EXPECT_EQ(state.max_abs_output_value, 32256);
 
   WindowFreeStateContents(&state);
 }
 
-TEST_F(WindowTest, CheckConsecutiveWindow) {
+TF_LITE_MICRO_TEST(WindowState_CheckConsecutiveWindow) {
+  WindowTestConfig config;
   struct WindowState state;
-  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  TF_LITE_MICRO_EXPECT(
+      WindowPopulateState(&config.config_, &state, kSampleRate));
   size_t num_samples_read;
 
-  ASSERT_TRUE(WindowProcessSamples(
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
       &state, kFakeAudioData,
       sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
-  ASSERT_TRUE(WindowProcessSamples(
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
       &state, kFakeAudioData + kWindowSamples,
       sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples,
       &num_samples_read));
@@ -124,38 +137,41 @@
   const int16_t expected[] = {
       0, -1152, 0, 5943,   0, -13312, 0, 21447, 0, -28328, 0, 32255, 0, -32256,
       0, 28327, 0, -21448, 0, 13311,  0, -5944, 0, 1151,   0};
-  ASSERT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
+  TF_LITE_MICRO_EXPECT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
   int i;
   for (i = 0; i < state.size; ++i) {
-    EXPECT_EQ(state.output[i], expected[i]);
+    TF_LITE_MICRO_EXPECT_EQ(state.output[i], expected[i]);
   }
 
   WindowFreeStateContents(&state);
 }
 
-TEST_F(WindowTest, CheckNotEnoughSamples) {
+TF_LITE_MICRO_TEST(WindowState_CheckNotEnoughSamples) {
+  WindowTestConfig config;
   struct WindowState state;
-  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  TF_LITE_MICRO_EXPECT(
+      WindowPopulateState(&config.config_, &state, kSampleRate));
   size_t num_samples_read;
 
-  ASSERT_TRUE(WindowProcessSamples(
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
       &state, kFakeAudioData,
       sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
-  ASSERT_TRUE(WindowProcessSamples(
+  TF_LITE_MICRO_EXPECT(WindowProcessSamples(
       &state, kFakeAudioData + kWindowSamples,
       sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples,
       &num_samples_read));
-  ASSERT_FALSE(WindowProcessSamples(
-      &state, kFakeAudioData + kWindowSamples + kStepSamples,
-      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples -
-          kStepSamples,
-      &num_samples_read));
+  TF_LITE_MICRO_EXPECT_EQ(
+      false, WindowProcessSamples(
+                 &state, kFakeAudioData + kWindowSamples + kStepSamples,
+                 sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) -
+                     kWindowSamples - kStepSamples,
+                 &num_samples_read));
 
-  EXPECT_EQ(
+  TF_LITE_MICRO_EXPECT_EQ(
       state.input_used,
       sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - 2 * kStepSamples);
 
   WindowFreeStateContents(&state);
 }
 
-}  // namespace
+TF_LITE_MICRO_TESTS_END

diff --git a/tensorflow/lite/experimental/objc/BUILD.apple b/tensorflow/lite/experimental/objc/BUILD.apple
new file mode 100644
index 0000000..3317c33
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/BUILD.apple

@@ -0,0 +1,109 @@
+# TensorFlow Lite Objective-C API.
+
+package(default_visibility = ["//visibility:private"])
+
+licenses(["notice"])  # Apache 2.0
+
+load("@build_bazel_rules_apple//apple:ios.bzl", "ios_unit_test")
+
+SOURCES = glob([
+    "sources/*.h",
+    "sources/*.m",
+    "sources/*.mm",
+])
+
+API_HEADERS = glob([
+    "apis/*.h",
+])
+
+MINIMUM_OS_VERSION = "9.0"
+
+# Compiler flags for building regular non-test libraries.
+RELEASE_COPTS = [
+    # Enables language-specific warnings for Objective-C, Objective-C++, C, and C++.
+    "-Wall",
+    # Warns if functions, variables, and types marked with the deprecated attribute are being used.
+    "-Wdeprecated-declarations",
+    # Warns for errors in documentation.
+    "-Wdocumentation",
+    # Turns all warnings into errors.
+    "-Werror",
+    # Enables extra warning flags that are not enabled by -Wall.
+    "-Wextra",
+    # Warns if a global function is defined without a previous prototype declaration.
+    "-Wmissing-prototypes",
+    # From -Wextra. Disables warning when signed value is converted to unsigned value during comparison.
+    "-Wno-sign-compare",
+    # From -Wextra. Disables warning for unused parameters, which are common in delegate methods and block callbacks.
+    "-Wno-unused-parameter",
+    # Warns if a global or local variable or type declaration shadows another variable, parameter, type, class member, or instance variable.
+    "-Wshadow",
+    # Warns if a function is declared or defined without specifying the argument types. For a block with no args, use (void) instead of ().
+    "-Wstrict-prototypes",
+    # Warns if an @selector() expression is encountered with a method name that hasn't been defined yet.
+    "-Wundeclared-selector",
+    # Turn off warnings for headers not part of TensorFlow Lite Objective-C API.
+    "--system-header-prefix=tensorflow/lite/experimental/c/",
+]
+
+# Compiler flags for building test libraries.
+TEST_COPTS = RELEASE_COPTS + [
+    # From -Wall. Disables warning when passing nil to a callee that requires a non-null argument.
+    "-Wno-nonnull",
+    # Disables warning when a global or local variable or type declaration shadows another.
+    "-Wno-shadow",
+]
+
+# Default tags for filtering targets. Targets in this file are restricted to Apple platforms.
+DEFAULT_TAGS = [
+    "apple",
+    "manual",
+]
+
+objc_library(
+    name = "TensorFlowLite",
+    srcs = SOURCES,
+    hdrs = API_HEADERS,
+    copts = RELEASE_COPTS,
+    tags = DEFAULT_TAGS,
+    deps = [
+        "//tensorflow/lite/experimental/c:c_api",
+    ],
+    alwayslink = 1,
+)
+
+ios_unit_test(
+    name = "TensorFlowLiteTests",
+    size = "small",
+    minimum_os_version = MINIMUM_OS_VERSION,
+    tags = DEFAULT_TAGS + [
+        # These sanitizer tests are not supported by iOS build toolchain (b/74292221).
+        # Disabled these for iOS test targets.
+        "noasan",
+        "notsan",
+        "nomsan",
+    ],
+    deps = [":TensorFlowLiteTestsLib"],
+)
+
+objc_library(
+    name = "TensorFlowLiteTestsLib",
+    testonly = 1,
+    srcs = glob([
+        "tests/*.m",
+    ]),
+    hdrs = glob([
+        "apis/*.h",
+        "sources/*.h",
+        "tests/*.h",
+    ]),
+    copts = TEST_COPTS,
+    resources = [
+        "//tensorflow/lite:testdata/add.bin",
+        "//tensorflow/lite:testdata/add_quantized.bin",
+    ],
+    tags = DEFAULT_TAGS,
+    deps = [
+        ":TensorFlowLite",
+    ],
+)

diff --git a/tensorflow/lite/experimental/objc/README.md b/tensorflow/lite/experimental/objc/README.md
new file mode 100644
index 0000000..2940e05
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/README.md

@@ -0,0 +1,52 @@
+# TensorFlow Lite Objective-C Library
+
+[TensorFlow Lite](https://www.tensorflow.org/lite/) is TensorFlow's lightweight
+solution for Objective-C developers. It enables low-latency inference of
+on-device machine learning models with a small binary size and fast performance
+supporting hardware acceleration.
+
+## Getting Started
+
+### Bazel
+
+In your `BUILD` file, add the `TensorFlowLite` dependency:
+
+```python
+objc_library(
+  deps = [
+      "//tensorflow/lite/experimental/objc:TensorFlowLite",
+  ],
+)
+```
+
+If you would like to build the Objective-C TensorFlow Lite library using Bazel on Apple
+platforms, clone or download the [TensorFlow GitHub repo](https://github.com/tensorflow/tensorflow),
+then navigate to the root `tensorflow` directory and execute the `configure.py` script:
+
+```shell
+python configure.py
+```
+
+Follow the prompts and when asked to configure the Bazel rules for Apple
+platforms, enter `y`.
+
+Build the `TensorFlowLite` Objective-C library target:
+
+```shell
+bazel build tensorflow/lite/experimental/objc:TensorFlowLite
+```
+
+Build the `TensorFlowLiteTests` target:
+
+```shell
+bazel test tensorflow/lite/experimental/objc:TensorFlowLiteTests
+```
+
+### Tulsi
+
+Open the `TensorFlowLiteObjc.tulsiproj` using the Tulsi application on Mac or by
+running the following command in Terminal from the root source directory:
+
+```shell
+generate_xcodeproj.sh --genconfig tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj:TensorFlowLiteObjC --outputfolder ~/path/to/xcodeproj
+```

diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen b/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
new file mode 100644
index 0000000..091ef4e
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen

@@ -0,0 +1,60 @@
+{
+  "sourceFilters" : [
+    "tensorflow/lite",
+    "tensorflow/lite/experimental/c",
+    "tensorflow/lite/experimental/objc",
+    "tensorflow/lite/experimental/objc/apis",
+    "tensorflow/lite/experimental/objc/sources",
+    "tensorflow/lite/experimental/objc/tests",
+    "tensorflow/lite/kernels",
+    "tensorflow/lite/kernels/internal",
+    "tensorflow/lite/nnapi",
+    "tensorflow/lite/schema",
+  ],
+  "buildTargets" : [
+    "//tensorflow/lite/experimental/objc:TensorFlowLite",
+    "//tensorflow/lite/experimental/objc:TensorFlowLiteTests",
+  ],
+  "projectName" : "TensorFlowLiteObjC",
+  "optionSet" : {
+    "LaunchActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildStartupOptionsRelease" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildOptionsRelease" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildOptionsDebug" : {
+      "p" : "$(inherited)"
+    },
+    "EnvironmentVariables" : {
+      "p" : "$(inherited)"
+    },
+    "BuildActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "CommandlineArguments" : {
+      "p" : "$(inherited)"
+    },
+    "TestActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildStartupOptionsDebug" : {
+      "p" : "$(inherited)"
+    },
+    "BuildActionPostActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "TestActionPostActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "LaunchActionPostActionScript" : {
+      "p" : "$(inherited)"
+    }
+  },
+  "additionalFilePaths" : [
+    "tensorflow/lite/experimental/objc/BUILD",
+  ]
+}

diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf b/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
new file mode 100644
index 0000000..0b6fedf
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf

@@ -0,0 +1,17 @@
+{
+  "configDefaults" : {
+    "optionSet" : {
+      "BazelBuildOptionsDebug" : {
+
+      },
+      "BazelBuildOptionsRelease" : {
+
+      },
+    }
+  },
+  "projectName" : "TensorFlowLiteObjC",
+  "packages" : [
+    "tensorflow/lite/experimental/objc"
+  ],
+  "workspaceRoot" : "../../../../.."
+}

diff --git a/tensorflow/lite/experimental/objc/apis/TFLInterpreter.h b/tensorflow/lite/experimental/objc/apis/TFLInterpreter.h
new file mode 100644
index 0000000..3c06a4b
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/apis/TFLInterpreter.h

@@ -0,0 +1,179 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+@class TFLInterpreterOptions;
+@class TFLTensor;
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * @enum TFLInterpreterErrorCode
+ * This enum specifies various error codes related to `TFLInterpreter`.
+ */
+typedef NS_ENUM(NSUInteger, TFLInterpreterErrorCode) {
+  /** Provided tensor index is invalid. */
+  TFLInterpreterErrorCodeInvalidTensorIndex,
+
+  /** Input data has invalid byte size. */
+  TFLInterpreterErrorCodeInvalidInputByteSize,
+
+  /** Provided shape is invalid. It must be a non-empty array of positive unsigned integers. */
+  TFLInterpreterErrorCodeInvalidShape,
+
+  /** Provided model cannot be loaded. */
+  TFLInterpreterErrorCodeFailedToLoadModel,
+
+  /** Failed to create `TFLInterpreter`. */
+  TFLInterpreterErrorCodeFailedToCreateInterpreter,
+
+  /** Failed to invoke `TFLInterpreter`. */
+  TFLInterpreterErrorCodeFailedToInvoke,
+
+  /** Failed to retrieve a tensor. */
+  TFLInterpreterErrorCodeFailedToGetTensor,
+
+  /** Invalid tensor. */
+  TFLInterpreterErrorCodeInvalidTensor,
+
+  /** Failed to resize an input tensor. */
+  TFLInterpreterErrorCodeFailedToResizeInputTensor,
+
+  /** Failed to copy data into an input tensor. */
+  TFLInterpreterErrorCodeFailedToCopyDataToInputTensor,
+
+  /** Copying data into an output tensor not allowed. */
+  TFLInterpreterErrorCodeCopyDataToOutputTensorNotAllowed,
+
+  /** Failed to get data from a tensor. */
+  TFLInterpreterErrorCodeFailedToGetDataFromTensor,
+
+  /** Failed to allocate memory for tensors. */
+  TFLInterpreterErrorCodeFailedToAllocateTensors,
+
+  /** Operaton not allowed without allocating memory for tensors first. */
+  TFLInterpreterErrorCodeAllocateTensorsRequired,
+
+  /** Operaton not allowed without invoking the interpreter first. */
+  TFLInterpreterErrorCodeInvokeInterpreterRequired,
+};
+
+/**
+ * A TensorFlow Lite model interpreter.
+ */
+@interface TFLInterpreter : NSObject
+
+/** The total number of input tensors. 0 if the interpreter creation failed. */
+@property(nonatomic, readonly) NSUInteger inputTensorCount;
+
+/** The total number of output tensors. 0 if the interpreter creation failed. */
+@property(nonatomic, readonly) NSUInteger outputTensorCount;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+/**
+ * Initializes a new TensorFlow Lite interpreter instance with the given model file path and the
+ * default interpreter options.
+ *
+ * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
+ * @param error An optional error parameter populated when there is an error in initializing the
+ *     interpreter.
+ *
+ * @return A new instance of `TFLInterpreter` with the given model and the default interpreter
+ *     options. `nil` if there is an error in initializing the interpreter.
+ */
+- (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error;
+
+/**
+ * Initializes a new TensorFlow Lite interpreter instance with the given model file path and
+ * options.
+ *
+ * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
+ * @param options Options to use for configuring the TensorFlow Lite interpreter.
+ * @param error An optional error parameter populated when there is an error in initializing the
+ *     interpreter.
+ *
+ * @return A new instance of `TFLInterpreter` with the given model and options. `nil` if there is an
+ *     error in initializing the interpreter.
+ */
+- (nullable instancetype)initWithModelPath:(NSString *)modelPath
+                                   options:(TFLInterpreterOptions *)options
+                                     error:(NSError **)error NS_DESIGNATED_INITIALIZER;
+
+/**
+ * Invokes the interpreter to run inference.
+ *
+ * @param error An optional error parameter populated when there is an error in invoking the
+ *     interpreter.
+ *
+ * @return Whether the invocation is successful. Returns NO if an error occurred.
+ */
+- (BOOL)invokeWithError:(NSError **)error;
+
+/**
+ * Returns the input tensor at the given index.
+ *
+ * @param index The index of an input tensor.
+ * @param error An optional error parameter populated when there is an error in looking up the input
+ *     tensor.
+ *
+ * @return The input tensor at the given index. `nil` if there is an error. See the `TFLTensor`
+ *     class documentation for more details on the life expectancy between the returned tensor and
+ *     this interpreter.
+ */
+- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Returns the output tensor at the given index.
+ *
+ * @param index The index of an output tensor.
+ * @param error An optional error parameter populated when there is an error in looking up the
+ *     output tensor.
+ *
+ * @return The output tensor at the given index. `nil` if there is an error. See the `TFLTensor`
+ *     class documentation for more details on the life expectancy between the returned tensor and
+ *     this interpreter.
+ */
+- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Resizes the input tensor at the given index to the specified shape (an array of positive unsigned
+ * integers).
+ *
+ * @param index The index of an input tensor.
+ * @param shape Shape that the given input tensor should be resized to. It should be an array of
+ *     positive unsigned integer(s) containing the size of each dimension.
+ * @param error An optional error parameter populated when there is an error in resizing the input
+ *     tensor.
+ *
+ * @return Whether the input tensor was resized successfully. Returns NO if an error occurred.
+ */
+- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index
+                         toShape:(NSArray<NSNumber *> *)shape
+                           error:(NSError **)error;
+
+/**
+ * Allocates memory for tensors.
+ *
+ * @param error An optional error parameter populated when there is an error in allocating memory.
+ *
+ * @return Whether memory allocation is successful. Returns NO if an error occurred.
+ */
+- (BOOL)allocateTensorsWithError:(NSError **)error;
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h b/tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h
new file mode 100644
index 0000000..6461fbf
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h

@@ -0,0 +1,37 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Custom configuration options for a TensorFlow Lite interpreter. */
+@interface TFLInterpreterOptions : NSObject
+
+/**
+ * Maximum number of threads that the interpreter should run on. Defaults to 0 (unspecified, letting
+ * TensorFlow Lite to optimize the threading decision).
+ */
+@property(nonatomic) NSUInteger numberOfThreads;
+
+/**
+ * Initializes a new instance of `TFLInterpreterOptions`.
+ *
+ * @return A new instance of `TFLInterpreterOptions`.
+ */
+- (instancetype)init NS_DESIGNATED_INITIALIZER;
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h b/tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h
new file mode 100644
index 0000000..3d5cf79
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h

@@ -0,0 +1,36 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * Parameters for asymmetric quantization. Quantized values can be converted to float values using:
+ * `realValue = scale * (quantizedValue - zeroPoint)`.
+ */
+@interface TFLQuantizationParameters : NSObject
+
+/** Scale of asymmetric quantization. */
+@property(nonatomic, readonly) float scale;
+
+/** Zero point of asymmetric quantization. */
+@property(nonatomic, readonly) int32_t zeroPoint;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/apis/TFLTensor.h b/tensorflow/lite/experimental/objc/apis/TFLTensor.h
new file mode 100644
index 0000000..dc710ab
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/apis/TFLTensor.h

@@ -0,0 +1,111 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+@class TFLQuantizationParameters;
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * @enum TFLTensorDataType
+ * This enum specifies supported TensorFlow Lite tensor data types.
+ */
+typedef NS_ENUM(NSUInteger, TFLTensorDataType) {
+  /** Tensor data type not available. This indicates an error with the model. */
+  TFLTensorDataTypeNoType,
+
+  /** 32-bit single precision floating point. */
+  TFLTensorDataTypeFloat32,
+
+  /** 32-bit signed integer. */
+  TFLTensorDataTypeInt32,
+
+  /** 8-bit unsigned integer. */
+  TFLTensorDataTypeUInt8,
+
+  /** 64-bit signed integer. */
+  TFLTensorDataTypeInt64,
+
+  /** Boolean. */
+  TFLTensorDataTypeBool,
+
+  /** 16-bit signed integer. */
+  TFLTensorDataTypeInt16,
+
+  /** 8-bit signed integer. */
+  TFLTensorDataTypeInt8,
+};
+
+/**
+ * An input or output tensor in a TensorFlow Lite model.
+ *
+ * @warning Each `TFLTensor` instance is associated with a `TFLInterpreter` instance. Multiple
+ *     `TFLTensor` instances of the same TensorFlow Lite model are associated with the same
+ *     `TFLInterpreter` instance. As long as a `TFLTensor` instance is still in use, its associated
+ *     `TFLInterpreter` instance will not be deallocated.
+ */
+@interface TFLTensor : NSObject
+
+/** Name of the tensor. */
+@property(nonatomic, readonly, copy) NSString *name;
+
+/** Data type of the tensor. */
+@property(nonatomic, readonly) TFLTensorDataType dataType;
+
+/** Parameters for asymmetric quantization. `nil` if the tensor does not use quantization. */
+@property(nonatomic, readonly, nullable) TFLQuantizationParameters *quantizationParameters;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+/**
+ * Copies the given data into an input tensor. This is allowed only for an input tensor and only
+ * before the interpreter is invoked; otherwise an error will be returned.
+ *
+ * @param data The data to set. The byte size of the data must match what's required by the input
+ *     tensor.
+ * @param error An optional error parameter populated when there is an error in copying the data.
+ *
+ * @return Whether the data was copied into the input tensor successfully. Returns NO if an error
+ *     occurred.
+ */
+- (BOOL)copyData:(NSData *)data error:(NSError **)error;
+
+/**
+ * Retrieves a copy of data in the tensor. For an output tensor, the data is only available after
+ * the interpreter invocation has successfully completed; otherwise an error will be returned.
+ *
+ * @param error An optional error parameter populated when there is an error in retrieving the data.
+ *
+ * @return A copy of data in the tensor. `nil` if there is an error in retrieving the data or the
+ *     data is not available.
+ */
+- (nullable NSData *)dataWithError:(NSError **)error;
+
+/**
+ * Retrieves the shape of the tensor, an array of positive unsigned integers containing the size
+ * of each dimension. For example: the shape of [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] is
+ * [2, 2, 3] (i.e. an array of 2 arrays of 2 arrays of 3 numbers).
+ *
+ * @param error An optional error parameter populated when there is an error in retrieving the
+ *     shape.
+ *
+ * @return The shape of the tensor. `nil` if there is an error in retrieving the shape.
+ */
+- (nullable NSArray<NSNumber *> *)shapeWithError:(NSError **)error;
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.h b/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.h
new file mode 100644
index 0000000..ce8d50c
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.h

@@ -0,0 +1,40 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+#import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Helper utility for error reporting. */
+@interface TFLErrorUtil : NSObject
+
+/**
+ * Creates and saves an interpreter error with the given error code and description.
+ *
+ * @param code Error code.
+ * @param description Error description.
+ * @param error Pointer to where to save the created error. If `nil`, no error will be saved.
+ */
++ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code
+                         description:(NSString *)description
+                               error:(NSError **)error;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.m b/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.m
new file mode 100644
index 0000000..aa973c7
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.m

@@ -0,0 +1,38 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "TFLErrorUtil.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Error domain of TensorFlow Lite interpreter related errors. */
+static NSString *const TFLInterpreterErrorDomain = @"org.tensorflow.lite.interpreter";
+
+@implementation TFLErrorUtil
+
+#pragma mark - Public
+
++ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code
+                         description:(NSString *)description
+                               error:(NSError **)error {
+  if (error) {
+    *error = [NSError errorWithDomain:TFLInterpreterErrorDomain
+                                 code:code
+                             userInfo:@{NSLocalizedDescriptionKey : description}];
+  }
+}
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/sources/TFLInterpreter+Internal.h b/tensorflow/lite/experimental/objc/sources/TFLInterpreter+Internal.h
new file mode 100644
index 0000000..9b900c4
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/sources/TFLInterpreter+Internal.h

@@ -0,0 +1,63 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h"
+
+@class TFLTensor;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface TFLInterpreter (Internal)
+
+/**
+ * Copies the given data into the input tensor at the given index. This is allowed only before the
+ * interpreter is invoked.
+ *
+ * @param data The data to set. The byte size of the data must match what's required by the input
+ *     tensor at the given index.
+ * @param index An input tensor index.
+ * @param error An optional error parameter populated when there is an error in setting the data.
+ *
+ * @return Whether the data was copied into the input tensor at the given index successfully.
+ *     Returns NO if an error occurred.
+ */
+- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Retrieves a copy of the data from the given tensor. For an output tensor, the interpreter
+ * invocation has to complete before the data can be retrieved.
+ *
+ * @param tensor A tensor.
+ * @param error An optional error parameter populated when there is an error in getting the data.
+ *
+ * @return The data of the given tensor. `nil` if there is an error or data is not available.
+ */
+- (nullable NSData *)dataFromTensor:(TFLTensor *)tensor error:(NSError **)error;
+
+/**
+ * Retrieves the shape of the given tensor, an array of positive unsigned integer(s) containing the
+ * size of each dimension. For example: shape of [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] is
+ * [2, 2, 3].
+ *
+ * @param tensor An input or output tensor.
+ * @param error An optional error parameter populated when there is an error in retrieving the
+ *     shape.
+ *
+ * @return The shape of the tensor. `nil` if there is an error in retrieving the shape.
+ */
+- (nullable NSArray<NSNumber *> *)shapeOfTensor:(TFLTensor *)tensor error:(NSError **)error;
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm b/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm
new file mode 100644
index 0000000..a8ca982
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm

@@ -0,0 +1,407 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h"
+
+#import "TFLErrorUtil.h"
+#import "TFLQuantizationParameters+Internal.h"
+#import "TFLTensor+Internal.h"
+#import "tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+#import "tensorflow/lite/experimental/objc/apis/TFLTensor.h"
+
+#include "tensorflow/lite/experimental/c/c_api.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * Error reporter for TFLInterpreter.
+ *
+ * @param user_data User data. Not used.
+ * @param format Error message which may contain argument formatting specifiers.
+ * @param args Values of the arguments in the error message.
+ */
+static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_list args) {
+  NSLog(@"%@", [[NSString alloc] initWithFormat:@(format) arguments:args]);
+}
+
+@interface TFLInterpreter ()
+
+/** TFL_Interpreter backed by C API. */
+@property(nonatomic, nullable) TFL_Interpreter *interpreter;
+
+@end
+
+@implementation TFLInterpreter
+
+#pragma mark - NSObject
+
+- (void)dealloc {
+  TFL_DeleteInterpreter(_interpreter);
+}
+
+#pragma mark - Public
+
+- (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error {
+  return [self initWithModelPath:modelPath
+                         options:[[TFLInterpreterOptions alloc] init]
+                           error:error];
+}
+
+- (nullable instancetype)initWithModelPath:(NSString *)modelPath
+                                   options:(TFLInterpreterOptions *)options
+                                     error:(NSError **)error {
+  self = [super init];
+
+  if (self != nil) {
+    TFL_Model *model = nullptr;
+    TFL_InterpreterOptions *cOptions = nullptr;
+
+    @try {
+      const char *modelPathCString = modelPath.UTF8String;
+      NSString *pathErrorString =
+          [NSString stringWithFormat:@"Cannot load model from path (%@).", modelPath];
+      if (modelPathCString == nullptr) {
+        [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel
+                                       description:pathErrorString
+                                             error:error];
+        return nil;
+      }
+
+      model = TFL_NewModelFromFile(modelPathCString);
+      if (model == nullptr) {
+        [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel
+                                       description:pathErrorString
+                                             error:error];
+        return nil;
+      }
+
+      cOptions = TFL_NewInterpreterOptions();
+      if (cOptions == nullptr) {
+        [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
+                                       description:@"Failed to create the interpreter."
+                                             error:error];
+        return nil;
+      }
+
+      if (options.numberOfThreads > 0) {
+        TFL_InterpreterOptionsSetNumThreads(cOptions, (int32_t)options.numberOfThreads);
+      }
+      TFL_InterpreterOptionsSetErrorReporter(cOptions, TFLInterpreterErrorReporter, nullptr);
+
+      _interpreter = TFL_NewInterpreter(model, cOptions);
+      if (_interpreter == nullptr) {
+        [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
+                                       description:@"Failed to create the interpreter."
+                                             error:error];
+        return nil;
+      }
+
+      _inputTensorCount = (NSUInteger)TFL_InterpreterGetInputTensorCount(_interpreter);
+      _outputTensorCount = (NSUInteger)TFL_InterpreterGetOutputTensorCount(_interpreter);
+      if (_inputTensorCount <= 0 || _outputTensorCount <= 0) {
+        [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
+                                       description:@"Failed to create the interpreter."
+                                             error:error];
+        return nil;
+      }
+    } @finally {
+      TFL_DeleteInterpreterOptions(cOptions);
+      TFL_DeleteModel(model);
+    }
+  }
+
+  return self;
+}
+
+- (BOOL)invokeWithError:(NSError **)error {
+  if (TFL_InterpreterInvoke(self.interpreter) != kTfLiteOk) {
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToInvoke
+                                   description:@"Failed to invoke the interpreter."
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
+    return nil;
+  }
+
+  return [self tensorOfType:TFLTensorTypeInput atIndex:index error:error];
+}
+
+- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) {
+    return nil;
+  }
+
+  return [self tensorOfType:TFLTensorTypeOutput atIndex:index error:error];
+}
+
+- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index
+                         toShape:(NSArray<NSNumber *> *)shape
+                           error:(NSError **)error {
+  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
+    return NO;
+  }
+
+  if (shape.count == 0) {
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape
+                                   description:@"Invalid shape. Must not be empty."
+                                         error:error];
+    return NO;
+  }
+
+  int cDimensions[self.inputTensorCount];
+  for (int dimIndex = 0; dimIndex < shape.count; ++dimIndex) {
+    int dimension = shape[dimIndex].intValue;
+    if (dimension <= 0) {
+      NSString *errorDescription = @"Invalid shape. Dimensions must be positive integers.";
+      [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape
+                                     description:errorDescription
+                                           error:error];
+      return NO;
+    }
+    cDimensions[dimIndex] = dimension;
+  }
+
+  if (TFL_InterpreterResizeInputTensor(self.interpreter, (int32_t)index, cDimensions,
+                                       (int32_t)shape.count) != kTfLiteOk) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Failed to resize input tensor at index (%lu).", (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToResizeInputTensor
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+- (BOOL)allocateTensorsWithError:(NSError **)error {
+  if (TFL_InterpreterAllocateTensors(self.interpreter) != kTfLiteOk) {
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToAllocateTensors
+                                   description:@"Failed to allocate memory for tensors."
+                                         error:error];
+    return NO;
+  }
+  return YES;
+}
+
+#pragma mark - TFLInterpreter (Internal)
+
+- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  const TFL_Tensor *cTensor = [self cTensorOfType:TFLTensorTypeInput atIndex:index error:error];
+  if (cTensor == nullptr) {
+    return NO;
+  }
+
+  NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(cTensor);
+  if (data.length != byteSize) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Input tensor at index (%lu) expects data size (%lu), but got (%lu).",
+                         (unsigned long)index, byteSize, (unsigned long)data.length];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidInputByteSize
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  if (TFL_TensorCopyFromBuffer((TFL_Tensor *)cTensor, data.bytes, data.length) != kTfLiteOk) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Failed to copy data into input tensor at index (%lu).",
+                                   (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+- (nullable NSData *)dataFromTensor:(TFLTensor *)tensor error:(NSError **)error {
+  const TFL_Tensor *cTensor = [self cTensorOfType:tensor.type atIndex:tensor.index error:error];
+  if (cTensor == nullptr) {
+    return nil;
+  }
+
+  void *bytes = TFL_TensorData(cTensor);
+  NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(cTensor);
+  if (bytes == nullptr || byteSize == 0) {
+    NSString *tensorType = [TFLTensor stringForTensorType:tensor.type];
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Failed to get data from %@ tensor at index (%lu).", tensorType,
+                                   (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromTensor
+                                   description:errorDescription
+                                         error:error];
+    return nil;
+  }
+
+  return [NSData dataWithBytes:bytes length:byteSize];
+}
+
+- (nullable NSArray<NSNumber *> *)shapeOfTensor:(TFLTensor *)tensor error:(NSError **)error {
+  const TFL_Tensor *cTensor = [self cTensorOfType:tensor.type atIndex:tensor.index error:error];
+  if (cTensor == nullptr) {
+    return nil;
+  }
+
+  NSString *tensorType = [TFLTensor stringForTensorType:tensor.type];
+  int32_t rank = TFL_TensorNumDims(cTensor);
+  if (rank <= 0) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid rank (%d).", tensorType,
+                                   (unsigned long)index, rank];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensor
+                                   description:errorDescription
+                                         error:error];
+    return nil;
+  }
+
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:rank];
+  for (int32_t dimIndex = 0; dimIndex < rank; dimIndex++) {
+    int32_t dimension = TFL_TensorDim(cTensor, dimIndex);
+    if (dimension <= 0) {
+      NSString *errorDescription =
+          [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid %d-th dimension (%d).",
+                                     tensorType, (unsigned long)index, dimIndex, dimension];
+      [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensor
+                                     description:errorDescription
+                                           error:error];
+      return nil;
+    }
+    shape[dimIndex] = @((NSUInteger)dimension);
+  }
+
+  return shape;
+}
+
+#pragma mark - Private
+
+- (const TFL_Tensor *)cTensorOfType:(TFLTensorType)type
+                            atIndex:(NSUInteger)index
+                              error:(NSError **)error {
+  const TFL_Tensor *tensor = nullptr;
+
+  switch (type) {
+    case TFLTensorTypeInput:
+      tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index);
+      break;
+    case TFLTensorTypeOutput:
+      tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index);
+      break;
+  }
+
+  if (tensor == nullptr) {
+    NSString *tensorType = [TFLTensor stringForTensorType:type];
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Failed to get %@ tensor at index (%lu).", tensorType,
+                                   (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
+                                   description:errorDescription
+                                         error:error];
+  }
+
+  return tensor;
+}
+
+- (nullable TFLTensor *)tensorOfType:(TFLTensorType)type
+                             atIndex:(NSUInteger)index
+                               error:(NSError **)error {
+  const TFL_Tensor *tensor = [self cTensorOfType:type atIndex:index error:error];
+
+  if (tensor == nullptr) {
+    return nil;
+  }
+
+  NSString *tensorType = [TFLTensor stringForTensorType:type];
+  const char *cName = TFL_TensorName(tensor);
+  if (cName == nullptr) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Failed to get name of %@ tensor at index (%lu).", tensorType,
+                                   (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensor
+                                   description:errorDescription
+                                         error:error];
+    return nil;
+  }
+  NSString *name = [NSString stringWithUTF8String:cName];
+
+  TFLTensorDataType dataType = [self tensorDataTypeFromCTensorType:TFL_TensorType(tensor)];
+
+  TFL_QuantizationParams cParams = TFL_TensorQuantizationParams(tensor);
+  TFLQuantizationParameters *quantizationParams;
+
+  // TODO(b/119735362): Update this check once the TFL_QuantizationParams struct has a mode.
+  if (cParams.scale != 0.0) {
+    quantizationParams = [[TFLQuantizationParameters alloc] initWithScale:cParams.scale
+                                                                zeroPoint:cParams.zero_point];
+  }
+
+  // TODO: Set quantization parameters when C API supports it.
+  return [[TFLTensor alloc] initWithInterpreter:self
+                                           type:type
+                                          index:index
+                                           name:name
+                                       dataType:dataType
+                         quantizationParameters:quantizationParams];
+}
+
+- (TFLTensorDataType)tensorDataTypeFromCTensorType:(TFL_Type)cTensorType {
+  switch (cTensorType) {
+    case kTfLiteFloat32:
+      return TFLTensorDataTypeFloat32;
+    case kTfLiteInt32:
+      return TFLTensorDataTypeInt32;
+    case kTfLiteUInt8:
+      return TFLTensorDataTypeUInt8;
+    case kTfLiteInt8:
+      return TFLTensorDataTypeInt8;
+    case kTfLiteInt64:
+      return TFLTensorDataTypeInt64;
+    case kTfLiteBool:
+      return TFLTensorDataTypeBool;
+    case kTfLiteInt16:
+      return TFLTensorDataTypeInt16;
+    case kTfLiteNoType:
+    case kTfLiteString:
+    case kTfLiteComplex64:
+      // kTfLiteString and kTfLiteComplex64 are not supported in TensorFlow Lite Objc API.
+      return TFLTensorDataTypeNoType;
+  }
+}
+
+- (BOOL)isValidTensorIndex:(NSUInteger)index
+                belowLimit:(NSUInteger)totalTensorCount
+                     error:(NSError **)error {
+  if (index >= totalTensorCount) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Invalid tensor index (%lu) exceeds max (%lu).",
+                                   (unsigned long)index, (unsigned long)(totalTensorCount - 1)];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensorIndex
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/sources/TFLInterpreterOptions.m b/tensorflow/lite/experimental/objc/sources/TFLInterpreterOptions.m
new file mode 100644
index 0000000..d129bef
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/sources/TFLInterpreterOptions.m

@@ -0,0 +1,30 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@implementation TFLInterpreterOptions
+
+#pragma mark - Public
+
+- (instancetype)init {
+  self = [super init];
+  return self;
+}
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters+Internal.h b/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters+Internal.h
new file mode 100644
index 0000000..37d9ef0
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters+Internal.h

@@ -0,0 +1,33 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface TFLQuantizationParameters (Internal)
+
+/**
+ * Initializes a `TFLQuantizationParameters` instance with the given scale and zero point.
+ *
+ * @param scale Scale of asymmetric quantization.
+ * @param zeroPoint Zero point of asymmetric quantization.
+ *
+ * @return A new instance of `TFLQuantizationParameters` with the given scale and zero point.
+ */
+- (instancetype)initWithScale:(float)scale zeroPoint:(int32_t)zeroPoint;
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters.m b/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters.m
new file mode 100644
index 0000000..44cb90d
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters.m

@@ -0,0 +1,36 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h"
+
+#import "TFLQuantizationParameters+Internal.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@implementation TFLQuantizationParameters
+
+#pragma mark - TFLTensor (Internal)
+
+- (instancetype)initWithScale:(float)scale zeroPoint:(int32_t)zeroPoint {
+  self = [super init];
+  if (self != nil) {
+    _scale = scale;
+    _zeroPoint = zeroPoint;
+  }
+  return self;
+}
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/sources/TFLTensor+Internal.h b/tensorflow/lite/experimental/objc/sources/TFLTensor+Internal.h
new file mode 100644
index 0000000..3d5c51c
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/sources/TFLTensor+Internal.h

@@ -0,0 +1,74 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "tensorflow/lite/experimental/objc/apis/TFLTensor.h"
+
+@class TFLInterpreter;
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * @enum TFLTensorType
+ * This enum specifies input or output tensor types.
+ */
+typedef NS_ENUM(NSUInteger, TFLTensorType) {
+  /** Input tensor type. */
+  TFLTensorTypeInput,
+
+  /** Output tensor type. */
+  TFLTensorTypeOutput,
+};
+
+@interface TFLTensor (Internal)
+
+/** Input or output tensor type. */
+@property(nonatomic, readonly) TFLTensorType type;
+
+/** Index of the tensor. */
+@property(nonatomic, readonly) NSUInteger index;
+
+/**
+ * Initializes a `TFLTensor` with the given interpreter, name, data type, and quantization
+ * parameters.
+ *
+ * @param interpreter Interpreter backing the tensor.
+ * @param type Input or output tensor type.
+ * @param index Index of the tensor.
+ * @param name Name of the tensor.
+ * @param dataType Data type of the tensor.
+ * @param quantizationParameters Quantization parameters of the tensor. `nil` if the tensor does not
+ *     use quantization.
+ *
+ * @return A new instance of `TFLTensor` with the given name, data type, shape, and quantization
+ *     parameters.
+ */
+- (instancetype)initWithInterpreter:(TFLInterpreter *)interpreter
+                               type:(TFLTensorType)type
+                              index:(NSUInteger)index
+                               name:(NSString *)name
+                           dataType:(TFLTensorDataType)dataType
+             quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters;
+
+/**
+ * Returns the string name of the given input or output tensor type.
+ *
+ * @param type Input or output tensor type.
+ *
+ * @return The string name of the given input or output tensor type.
+ */
++ (NSString *)stringForTensorType:(TFLTensorType)type;
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/sources/TFLTensor.m b/tensorflow/lite/experimental/objc/sources/TFLTensor.m
new file mode 100644
index 0000000..2eaebfd
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/sources/TFLTensor.m

@@ -0,0 +1,103 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "tensorflow/lite/experimental/objc/apis/TFLTensor.h"
+
+#import "TFLErrorUtil.h"
+#import "TFLInterpreter+Internal.h"
+#import "TFLTensor+Internal.h"
+
+#import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+// String names of input or output tensor types.
+static NSString *const kTFLInputTensorTypeString = @"input";
+static NSString *const kTFLOutputTensorTypeString = @"output";
+
+@interface TFLTensor ()
+
+// Redefines readonly properties.
+@property(nonatomic) TFLTensorType type;
+@property(nonatomic) NSUInteger index;
+@property(nonatomic, copy) NSString *name;
+@property(nonatomic) TFLTensorDataType dataType;
+@property(nonatomic, nullable) TFLQuantizationParameters *quantizationParameters;
+
+/**
+ * The backing interpreter. It's a strong reference to ensure that the interpreter is never released
+ * before this tensor is released.
+ *
+ * @warning Never let the interpreter hold a strong reference to the tensor to avoid retain cycles.
+ */
+@property(nonatomic) TFLInterpreter *interpreter;
+
+@end
+
+@implementation TFLTensor
+
+#pragma mark - Public
+
+- (BOOL)copyData:(NSData *)data error:(NSError **)error {
+  if (self.type == TFLTensorTypeOutput) {
+    [TFLErrorUtil
+        saveInterpreterErrorWithCode:TFLInterpreterErrorCodeCopyDataToOutputTensorNotAllowed
+                         description:@"Cannot copy data into an output tensor."
+                               error:error];
+    return NO;
+  }
+
+  return [self.interpreter copyData:data toInputTensorAtIndex:self.index error:error];
+}
+
+- (nullable NSData *)dataWithError:(NSError **)error {
+  return [self.interpreter dataFromTensor:self error:error];
+}
+
+- (nullable NSArray<NSNumber *> *)shapeWithError:(NSError **)error {
+  return [self.interpreter shapeOfTensor:self error:error];
+}
+
+#pragma mark - TFLTensor (Internal)
+
+- (instancetype)initWithInterpreter:(TFLInterpreter *)interpreter
+                               type:(TFLTensorType)type
+                              index:(NSUInteger)index
+                               name:(NSString *)name
+                           dataType:(TFLTensorDataType)dataType
+             quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters {
+  self = [super init];
+  if (self != nil) {
+    _interpreter = interpreter;
+    _type = type;
+    _index = index;
+    _name = [name copy];
+    _dataType = dataType;
+    _quantizationParameters = quantizationParameters;
+  }
+  return self;
+}
+
++ (NSString *)stringForTensorType:(TFLTensorType)type {
+  switch (type) {
+    case TFLTensorTypeInput:
+      return kTFLInputTensorTypeString;
+    case TFLTensorTypeOutput:
+      return kTFLOutputTensorTypeString;
+  }
+}
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m b/tensorflow/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
new file mode 100644
index 0000000..00b800d
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m

@@ -0,0 +1,49 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+
+#import <XCTest/XCTest.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * Unit tests for TFLInterpreterOptions.
+ */
+@interface TFLInterpreterOptionsTests : XCTestCase
+@end
+
+@implementation TFLInterpreterOptionsTests
+
+#pragma mark - Tests
+
+- (void)testInit {
+  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
+  XCTAssertNotNil(options);
+  XCTAssertEqual(options.numberOfThreads, 0);
+}
+
+- (void)testSetNumberOfThread {
+  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
+  options.numberOfThreads = 2;
+  XCTAssertEqual(options.numberOfThreads, 2);
+  options.numberOfThreads = 0;
+  XCTAssertEqual(options.numberOfThreads, 0);
+  options.numberOfThreads = 3;
+  XCTAssertEqual(options.numberOfThreads, 3);
+}
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/tests/TFLInterpreterTests.m b/tensorflow/lite/experimental/objc/tests/TFLInterpreterTests.m
new file mode 100644
index 0000000..eefa9b9
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/tests/TFLInterpreterTests.m

@@ -0,0 +1,358 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h"
+
+#import <XCTest/XCTest.h>
+
+#import "tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+#import "tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h"
+#import "tensorflow/lite/experimental/objc/apis/TFLTensor.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Float model resource name. */
+static NSString *const kAddFloatModelResourceName = @"add";
+
+/** Quantized model resource name. */
+static NSString *const kAddQuantizedModelResourceName = @"add_quantized";
+
+/** Model resource type. */
+static NSString *const kAddModelResourceType = @"bin";
+
+/** Rank of the input and output tensor in the Add model. */
+static const NSUInteger kAddModelTensorRank = 1U;
+
+/** Size of the first (and only) dimension of the input and output tensor in the Add model. */
+static const NSUInteger kAddModelTensorFirstDimensionSize = 2U;
+
+/** Quantization scale of the quantized model. */
+static const float kAddQuantizedModelScale = 0.003922F;
+
+/** Quantization zero point of the quantized model. */
+static const int32_t kAddQuantizedModelZeroPoint = 0;
+
+/** Invalid input tensor index. */
+static const NSUInteger kInvalidInputTensorIndex = 1U;
+
+/** Invalid output tensor index. */
+static const NSUInteger kInvalidOutputTensorIndex = 1U;
+
+/** Accurary used in comparing floating numbers. */
+static const float kTestAccuracy = 1E-5F;
+
+/**
+ * Unit tests for TFLInterpreter.
+ */
+@interface TFLInterpreterTests : XCTestCase
+
+/** Absolute path of the Add float model resource. */
+@property(nonatomic, nullable) NSString *floatModelPath;
+
+/** Default interpreter using the Add model. */
+@property(nonatomic, nullable) TFLInterpreter *interpreter;
+
+@end
+
+@implementation TFLInterpreterTests
+
+#pragma mark - XCTestCase
+
+- (void)setUp {
+  [super setUp];
+
+  NSBundle *bundle = [NSBundle bundleForClass:[self class]];
+  self.floatModelPath = [bundle pathForResource:kAddFloatModelResourceName
+                                         ofType:kAddModelResourceType];
+  NSError *error;
+  self.interpreter = [[TFLInterpreter alloc] initWithModelPath:self.floatModelPath error:&error];
+  XCTAssertNil(error);
+  XCTAssertNotNil(self.interpreter);
+  XCTAssertTrue([self.interpreter allocateTensorsWithError:nil]);
+}
+
+- (void)tearDown {
+  self.floatModelPath = nil;
+  self.interpreter = nil;
+
+  [super tearDown];
+}
+
+#pragma mark - Tests
+
+- (void)testSuccessfulFullRunAddFloatModel {
+  // Shape for both input and output tensor.
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
+
+  // Creates the interpreter options.
+  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
+  XCTAssertNotNil(options);
+  options.numberOfThreads = 2;
+
+  // Creates the interpreter.
+  NSError *error;
+  TFLInterpreter *customInterpreter = [[TFLInterpreter alloc] initWithModelPath:self.floatModelPath
+                                                                        options:options
+                                                                          error:&error];
+  XCTAssertNil(error);
+  XCTAssertNotNil(customInterpreter);
+
+  // Allocates memory for tensors.
+  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies input and output tensor counts.
+  XCTAssertEqual(customInterpreter.inputTensorCount, 1);
+  XCTAssertEqual(customInterpreter.outputTensorCount, 1);
+
+  // Resizes the intput tensor.
+  XCTAssertTrue([customInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
+  XCTAssertNil(error);
+
+  // Re-allocates memory for tensors.
+  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies the input tensor.
+  TFLTensor *inputTensor = [customInterpreter inputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(inputTensor);
+  XCTAssertNil(error);
+  XCTAssertTrue([inputTensor.name isEqualToString:@"input"]);
+  XCTAssertEqual(inputTensor.dataType, TFLTensorDataTypeFloat32);
+  NSArray *inputTensorShape = [inputTensor shapeWithError:&error];
+  XCTAssertNil(error);
+  XCTAssertTrue([shape isEqualToArray:inputTensorShape]);
+
+  // Copies the input data.
+  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
+  float one = 1.f;
+  float three = 3.f;
+  [inputData appendBytes:&one length:sizeof(float)];
+  [inputData appendBytes:&three length:sizeof(float)];
+  XCTAssertTrue([inputTensor copyData:inputData error:&error]);
+  XCTAssertNil(error);
+
+  // Invokes the interpreter.
+  XCTAssertTrue([customInterpreter invokeWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies the output tensor.
+  TFLTensor *outputTensor = [customInterpreter outputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(outputTensor);
+  XCTAssertNil(error);
+  XCTAssertTrue([outputTensor.name isEqualToString:@"output"]);
+  XCTAssertEqual(outputTensor.dataType, TFLTensorDataTypeFloat32);
+  NSArray *outputTensorShape = [outputTensor shapeWithError:&error];
+  XCTAssertNil(error);
+  XCTAssertTrue([shape isEqualToArray:outputTensorShape]);
+
+  // Tries to query an invalid output tensor index.
+  TFLTensor *invalidOutputTensor = [customInterpreter outputTensorAtIndex:kInvalidOutputTensorIndex
+                                                                    error:&error];
+  XCTAssertNil(invalidOutputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
+
+  // Gets the output tensor data.
+  error = nil;
+  NSData *outputData = [outputTensor dataWithError:&error];
+  XCTAssertNotNil(outputData);
+  XCTAssertNil(error);
+  float output[kAddModelTensorFirstDimensionSize];
+  [outputData getBytes:output length:(sizeof(float) * kAddModelTensorFirstDimensionSize)];
+  XCTAssertEqualWithAccuracy(output[0], 3.f, kTestAccuracy);
+  XCTAssertEqualWithAccuracy(output[1], 9.f, kTestAccuracy);
+}
+
+- (void)testSuccessfulFullRunQuantizedModel {
+  // Shape for both input and output tensor.
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
+
+  // Creates the interpreter options.
+  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
+  XCTAssertNotNil(options);
+  options.numberOfThreads = 2;
+
+  NSBundle *bundle = [NSBundle bundleForClass:[self class]];
+  NSString *quantizedModelPath = [bundle pathForResource:kAddQuantizedModelResourceName
+                                                  ofType:kAddModelResourceType];
+
+  // Creates the interpreter.
+  NSError *error;
+  TFLInterpreter *customInterpreter =
+      [[TFLInterpreter alloc] initWithModelPath:quantizedModelPath options:options error:&error];
+  XCTAssertNil(error);
+  XCTAssertNotNil(customInterpreter);
+
+  // Allocates memory for tensors.
+  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies input and output tensor counts.
+  XCTAssertEqual(customInterpreter.inputTensorCount, 1);
+  XCTAssertEqual(customInterpreter.outputTensorCount, 1);
+
+  // Resizes the intput tensor.
+  XCTAssertTrue([customInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
+  XCTAssertNil(error);
+
+  // Re-allocates memory for tensors.
+  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies the input tensor.
+  TFLTensor *inputTensor = [customInterpreter inputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(inputTensor);
+  XCTAssertNil(error);
+  XCTAssertTrue([inputTensor.name isEqualToString:@"input"]);
+  XCTAssertEqual(inputTensor.dataType, TFLTensorDataTypeUInt8);
+  XCTAssertEqualWithAccuracy(inputTensor.quantizationParameters.scale, kAddQuantizedModelScale,
+                             kTestAccuracy);
+  XCTAssertEqual(inputTensor.quantizationParameters.zeroPoint, kAddQuantizedModelZeroPoint);
+  NSArray *inputTensorShape = [inputTensor shapeWithError:&error];
+  XCTAssertNil(error);
+  XCTAssertTrue([shape isEqualToArray:inputTensorShape]);
+
+  // Copies the input data.
+  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
+  uint8_t one = 1;
+  uint8_t three = 3;
+  [inputData appendBytes:&one length:sizeof(uint8_t)];
+  [inputData appendBytes:&three length:sizeof(uint8_t)];
+  XCTAssertTrue([inputTensor copyData:inputData error:&error]);
+  XCTAssertNil(error);
+
+  // Invokes the interpreter.
+  XCTAssertTrue([customInterpreter invokeWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies the output tensor.
+  TFLTensor *outputTensor = [customInterpreter outputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(outputTensor);
+  XCTAssertNil(error);
+  XCTAssertTrue([outputTensor.name isEqualToString:@"output"]);
+  XCTAssertEqual(outputTensor.dataType, TFLTensorDataTypeUInt8);
+  XCTAssertEqualWithAccuracy(outputTensor.quantizationParameters.scale, kAddQuantizedModelScale,
+                             kTestAccuracy);
+  XCTAssertEqual(outputTensor.quantizationParameters.zeroPoint, kAddQuantizedModelZeroPoint);
+  NSArray *outputTensorShape = [outputTensor shapeWithError:&error];
+  XCTAssertNil(error);
+  XCTAssertTrue([shape isEqualToArray:outputTensorShape]);
+
+  // Tries to query an invalid output tensor index.
+  TFLTensor *invalidOutputTensor = [customInterpreter outputTensorAtIndex:kInvalidOutputTensorIndex
+                                                                    error:&error];
+  XCTAssertNil(invalidOutputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
+
+  // Gets the output tensor data.
+  error = nil;
+  NSData *outputData = [outputTensor dataWithError:&error];
+  XCTAssertNotNil(outputData);
+  XCTAssertNil(error);
+  uint8_t output[kAddModelTensorFirstDimensionSize];
+  [outputData getBytes:output length:(sizeof(uint8_t) * kAddModelTensorFirstDimensionSize)];
+  XCTAssertEqual(output[0], 3);
+  XCTAssertEqual(output[1], 9);
+}
+
+- (void)testInitWithModelPath_invalidPath {
+  // Shape for both input and output tensor.
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
+
+  // Creates the interpreter.
+  NSError *error;
+  TFLInterpreter *brokenInterpreter = [[TFLInterpreter alloc] initWithModelPath:@"InvalidPath"
+                                                                          error:&error];
+  XCTAssertNil(brokenInterpreter);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+}
+
+- (void)testInvoke_beforeAllocation {
+  NSError *error;
+  TFLInterpreter *interpreterWithoutAllocation =
+      [[TFLInterpreter alloc] initWithModelPath:self.floatModelPath error:&error];
+  XCTAssertNotNil(interpreterWithoutAllocation);
+  XCTAssertNil(error);
+
+  XCTAssertFalse([interpreterWithoutAllocation invokeWithError:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToInvoke);
+}
+
+- (void)testInputTensorAtIndex_invalidIndex {
+  NSError *error;
+  TFLTensor *inputTensor = [self.interpreter inputTensorAtIndex:kInvalidInputTensorIndex
+                                                          error:&error];
+  XCTAssertNil(inputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
+}
+
+- (void)testResizeInputTensorAtIndex_invalidIndex {
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
+  NSError *error;
+  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:kInvalidInputTensorIndex
+                                                    toShape:shape
+                                                      error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
+}
+
+- (void)testResizeInputTensorAtIndex_emptyShape {
+  NSMutableArray *emptyShape = [NSMutableArray arrayWithCapacity:0];
+  NSError *error;
+  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:emptyShape error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape);
+}
+
+- (void)testResizeInputTensorAtIndex_zeroDimensionSize {
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:0];
+  NSError *error;
+  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape);
+}
+
+- (void)testCopyDataToInputTensorAtIndex_invalidInputDataByteSize {
+  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
+  float one = 1.f;
+  float three = 3.f;
+  [inputData appendBytes:&one length:sizeof(float)];
+  [inputData appendBytes:&three length:(sizeof(float) - 1)];
+  NSError *error;
+  TFLTensor *inputTensor = [self.interpreter inputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(inputTensor);
+  XCTAssertNil(error);
+  XCTAssertFalse([inputTensor copyData:inputData error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidInputByteSize);
+}
+
+- (void)testCopyDataToOutputTensorAtIndex_notAllowed {
+  NSMutableData *data = [NSMutableData dataWithCapacity:0];
+  float one = 1.f;
+  float three = 3.f;
+  [data appendBytes:&one length:sizeof(float)];
+  [data appendBytes:&three length:(sizeof(float) - 1)];
+  NSError *error;
+  TFLTensor *outputTensor = [self.interpreter outputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(outputTensor);
+  XCTAssertNil(error);
+  XCTAssertFalse([outputTensor copyData:data error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeCopyDataToOutputTensorNotAllowed);
+}
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/objc/tests/TFLQuantizationParametersTests.m b/tensorflow/lite/experimental/objc/tests/TFLQuantizationParametersTests.m
new file mode 100644
index 0000000..239e0bc
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/tests/TFLQuantizationParametersTests.m

@@ -0,0 +1,48 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h"
+
+#import <XCTest/XCTest.h>
+
+#import "tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters+Internal.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Test scale of quantization parameters. */
+static const float kTestScale = 2.0;
+
+/** Test zero point of quantization parameters. */
+static const int32_t kTestZeroPoint = 128;
+
+/**
+ * Unit tests for TFLQuantizationParameters.
+ */
+@interface TFLQuantizationParametersTests : XCTestCase
+@end
+
+@implementation TFLQuantizationParametersTests
+
+#pragma mark - Tests
+
+- (void)testInitWithScaleAndZeroPoint {
+  TFLQuantizationParameters *params =
+      [[TFLQuantizationParameters alloc] initWithScale:kTestScale zeroPoint:kTestZeroPoint];
+  XCTAssertEqual(params.scale, kTestScale);
+  XCTAssertEqual(params.zeroPoint, kTestZeroPoint);
+}
+
+@end
+
+NS_ASSUME_NONNULL_END

diff --git a/tensorflow/lite/experimental/swift/BUILD.apple b/tensorflow/lite/experimental/swift/BUILD.apple
new file mode 100644
index 0000000..35875bb
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/BUILD.apple

@@ -0,0 +1,106 @@
+# TensorFlow Lite for Swift.
+
+package(default_visibility = ["//visibility:private"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("@build_bazel_rules_apple//apple:ios.bzl", "ios_application", "ios_unit_test")
+load("@build_bazel_rules_swift//swift:swift.bzl", "swift_library")
+
+MINIMUM_OS_VERSION = "9.0"
+
+SWIFT_COPTS = [
+    "-wmo",
+]
+
+# Default tags for filtering targets. Targets in this file are restricted to Apple platforms.
+DEFAULT_TAGS = [
+    "apple",
+    "manual",
+]
+
+swift_library(
+    name = "TensorFlowLite",
+    srcs = glob(["Sources/*.swift"]),
+    copts = SWIFT_COPTS,
+    module_name = "TensorFlowLite",
+    tags = DEFAULT_TAGS,
+    deps = [
+        "//tensorflow/lite/experimental/c:c_api",
+    ],
+)
+
+ios_unit_test(
+    name = "TensorFlowLiteTests",
+    size = "small",
+    minimum_os_version = MINIMUM_OS_VERSION,
+    tags = DEFAULT_TAGS + [
+        # DISABLED: Following sanitizer tests are not supported by iOS test targets.
+        "noasan",
+        "nomsan",
+        "notsan",
+    ],
+    deps = [":TensorFlowLiteTestsLib"],
+)
+
+swift_library(
+    name = "TensorFlowLiteTestsLib",
+    testonly = 1,
+    srcs = glob(["Tests/*.swift"]),
+    copts = SWIFT_COPTS,
+    tags = DEFAULT_TAGS,
+    deps = [
+        ":TensorFlowLite",
+        ":TestResources",
+    ],
+)
+
+objc_library(
+    name = "TestResources",
+    resources = [
+        "//tensorflow/lite:testdata/add.bin",
+        "//tensorflow/lite:testdata/add_quantized.bin",
+        "//tensorflow/lite:testdata/multi_add.bin",
+    ],
+    tags = DEFAULT_TAGS,
+)
+
+ios_application(
+    name = "TensorFlowLiteApp",
+    app_icons = glob(["TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/AppIcon.appiconset/**"]),
+    bundle_id = "com.tensorflow.lite.swift.TensorFlowLite",
+    families = [
+        "ipad",
+        "iphone",
+    ],
+    infoplists = ["TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Info.plist"],
+    launch_storyboard = "TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/LaunchScreen.storyboard",
+    minimum_os_version = MINIMUM_OS_VERSION,
+    sdk_frameworks = [
+        "CoreGraphics",
+    ],
+    tags = DEFAULT_TAGS,
+    deps = [":TensorFlowLiteAppLib"],
+)
+
+swift_library(
+    name = "TensorFlowLiteAppLib",
+    srcs = glob(["TestApps/TensorFlowLiteApp/TensorFlowLiteApp/*.swift"]),
+    module_name = "TensorFlowLiteAppLib",
+    tags = DEFAULT_TAGS,
+    deps = [
+        ":TensorFlowLite",
+        ":TensorFlowLiteAppResources",
+    ],
+)
+
+objc_library(
+    name = "TensorFlowLiteAppResources",
+    storyboards = glob([
+        "TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/*.storyboard",
+    ]),
+    tags = DEFAULT_TAGS,
+    deps = [":TestResources"],
+)

diff --git a/tensorflow/lite/experimental/swift/LICENSE b/tensorflow/lite/experimental/swift/LICENSE
new file mode 100644
index 0000000..d6456956
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/LICENSE

@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

diff --git a/tensorflow/lite/experimental/swift/README.md b/tensorflow/lite/experimental/swift/README.md
new file mode 100644
index 0000000..cf7eeac
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/README.md

@@ -0,0 +1,78 @@
+# TensorFlow Lite for Swift
+
+[TensorFlow Lite](https://www.tensorflow.org/lite/) is TensorFlow's lightweight
+solution for Swift developers. It enables low-latency inference of on-device
+machine learning models with a small binary size and fast performance supporting
+hardware acceleration.
+
+## Getting Started
+
+### Bazel
+
+In your `BUILD` file, add the `TensorFlowLite` dependency:
+
+```python
+swift_library(
+  deps = [
+      "//tensorflow/lite/experimental/swift:TensorFlowLite",
+  ],
+)
+```
+
+In your Swift files, import the module:
+
+```swift
+import TensorFlowLite
+```
+
+If you would like to build the Swift TensorFlow Lite library using Bazel on Apple
+platforms, clone or download the [TensorFlow GitHub repo](https://github.com/tensorflow/tensorflow),
+then navigate to the root `tensorflow` directory and execute the `configure.py` script:
+
+```shell
+python configure.py
+```
+
+Follow the prompts and when asked to configure the Bazel rules for Apple
+platforms, enter `y`.
+
+Build the `TensorFlowLite` Swift library target:
+
+```shell
+bazel build tensorflow/lite/experimental/swift:TensorFlowLite
+```
+
+Build the `TensorFlowLiteTests` target:
+
+```shell
+bazel test tensorflow/lite/experimental/swift:TensorFlowLiteTests --swiftcopt=-enable-testing
+```
+
+Note that `--swiftcopt=-enable-testing` is required for optimized builds (`-c opt`).
+
+### Tulsi
+
+Open the `TensorFlowLite.tulsiproj` using the [TulsiApp](https://github.com/bazelbuild/tulsi) or by
+running the [`generate_xcodeproj.sh`](https://github.com/bazelbuild/tulsi/blob/master/src/tools/generate_xcodeproj.sh)
+script:
+
+```shell
+generate_xcodeproj.sh --genconfig tensorflow/lite/swift/TensorFlowLite.tulsiproj:TensorFlowLite --outputfolder ~/path/to/generated/TensorFlowLite.xcodeproj
+```
+
+### CocoaPods
+
+Add the following to your `Podfile`:
+
+```ruby
+use_frameworks!
+pod 'TensorFlowLiteSwift'
+```
+
+Then, run `pod install`.
+
+In your Swift files, import the module:
+
+```swift
+import TensorFlowLite
+```

diff --git a/tensorflow/lite/experimental/swift/Sources/Interpreter.swift b/tensorflow/lite/experimental/swift/Sources/Interpreter.swift
new file mode 100644
index 0000000..a14b596
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Sources/Interpreter.swift

@@ -0,0 +1,265 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+import TensorFlowLiteCAPI
+
+/// A TensorFlow Lite interpreter that performs inference from a given model.
+public final class Interpreter {
+
+  /// The `TFL_Interpreter` C pointer type represented as an `UnsafePointer<TFL_Interpreter>`.
+  private typealias CInterpreter = OpaquePointer
+
+  /// Total number of input tensors associated with the model.
+  public var inputTensorCount: Int {
+    return Int(TFL_InterpreterGetInputTensorCount(cInterpreter))
+  }
+
+  /// Total number of output tensors associated with the model.
+  public var outputTensorCount: Int {
+    return Int(TFL_InterpreterGetOutputTensorCount(cInterpreter))
+  }
+
+  /// The underlying `TFL_Interpreter` C pointer.
+  private var cInterpreter: CInterpreter?
+
+  /// Creates a new model interpreter instance.
+  ///
+  /// - Parameters:
+  ///   - modelPath: Local file path to a TensorFlow Lite model.
+  ///   - options: Custom configurations for the interpreter. The default is `nil` indicating that
+  ///       interpreter will determine the configuration options.
+  /// - Throws: An error if the model could not be loaded or the interpreter could not be created.
+  public init(modelPath: String, options: InterpreterOptions? = nil) throws {
+    guard let model = Model(filePath: modelPath) else { throw InterpreterError.failedToLoadModel }
+
+    let cInterpreterOptions: OpaquePointer? = try options.map { options in
+      guard let cOptions = TFL_NewInterpreterOptions() else {
+        throw InterpreterError.failedToCreateInterpreter
+      }
+      if let threadCount = options.threadCount, threadCount > 0 {
+        TFL_InterpreterOptionsSetNumThreads(cOptions, Int32(threadCount))
+      }
+      if options.isErrorLoggingEnabled {
+        TFL_InterpreterOptionsSetErrorReporter(
+          cOptions,
+          { (_, format, arguments) in
+            guard let cFormat = format,
+                  let message = String(cFormat: cFormat, arguments: arguments)
+            else {
+              return
+            }
+            print(String(describing: InterpreterError.tensorFlowLiteError(message)))
+          },
+          nil
+        )
+      }
+      return cOptions
+    }
+    defer { TFL_DeleteInterpreterOptions(cInterpreterOptions) }
+
+    guard let cInterpreter = TFL_NewInterpreter(model.cModel, cInterpreterOptions) else {
+      throw InterpreterError.failedToCreateInterpreter
+    }
+    self.cInterpreter = cInterpreter
+  }
+
+  deinit {
+    TFL_DeleteInterpreter(cInterpreter)
+  }
+
+  /// Invokes the interpreter to perform inference from the loaded graph.
+  ///
+  /// - Throws: An error if the model was not ready because tensors were not allocated.
+  public func invoke() throws {
+    guard TFL_InterpreterInvoke(cInterpreter) == kTfLiteOk else {
+      // TODO(b/117510052): Determine which error to throw.
+      throw InterpreterError.allocateTensorsRequired
+    }
+  }
+
+  /// Returns the input tensor at the given index.
+  ///
+  /// - Parameters:
+  ///   - index: The index for the input tensor.
+  /// - Throws: An error if the index is invalid or the tensors have not been allocated.
+  /// - Returns: The input tensor at the given index.
+  public func input(at index: Int) throws -> Tensor {
+    let maxIndex = inputTensorCount - 1
+    guard case 0...maxIndex = index else {
+      throw InterpreterError.invalidTensorIndex(index: index, maxIndex: maxIndex)
+    }
+    guard let cTensor = TFL_InterpreterGetInputTensor(cInterpreter, Int32(index)),
+          let bytes = TFL_TensorData(cTensor),
+          let nameCString = TFL_TensorName(cTensor)
+    else {
+      throw InterpreterError.allocateTensorsRequired
+    }
+    guard let dataType = TensorDataType(type: TFL_TensorType(cTensor)) else {
+      throw InterpreterError.invalidTensorDataType
+    }
+
+    let name = String(cString: nameCString)
+    let rank = TFL_TensorNumDims(cTensor)
+    let dimensions = (0..<rank).map { Int(TFL_TensorDim(cTensor, $0)) }
+    let shape = TensorShape(dimensions)
+    let byteCount = TFL_TensorByteSize(cTensor)
+    let data = Data(bytes: bytes, count: byteCount)
+    let cQuantizationParams = TFL_TensorQuantizationParams(cTensor)
+    let scale = cQuantizationParams.scale
+    let zeroPoint = Int(cQuantizationParams.zero_point)
+    var quantizationParameters: QuantizationParameters? = nil
+    if scale != 0.0 {
+      // TODO(b/117510052): Update this check once the TfLiteQuantizationParams struct has a mode.
+      quantizationParameters = QuantizationParameters(scale: scale, zeroPoint: zeroPoint)
+    }
+    let tensor = Tensor(
+      name: name,
+      dataType: dataType,
+      shape: shape,
+      data: data,
+      quantizationParameters: quantizationParameters
+    )
+    return tensor
+  }
+
+  /// Returns the output tensor at the given index.
+  ///
+  /// - Parameters:
+  ///   - index: The index for the output tensor.
+  /// - Throws: An error if the index is invalid, tensors haven't been allocated, or interpreter
+  ///     hasn't been invoked for models that dynamically compute output tensors based on the values
+  ///     of its input tensors.
+  /// - Returns: The output tensor at the given index.
+  public func output(at index: Int) throws -> Tensor {
+    let maxIndex = outputTensorCount - 1
+    guard case 0...maxIndex = index else {
+      throw InterpreterError.invalidTensorIndex(index: index, maxIndex: maxIndex)
+    }
+    guard let cTensor = TFL_InterpreterGetOutputTensor(cInterpreter, Int32(index)),
+          let bytes = TFL_TensorData(cTensor),
+          let nameCString = TFL_TensorName(cTensor)
+    else {
+      // TODO(b/117510052): Determine which error to throw.
+      throw InterpreterError.invokeInterpreterRequired
+    }
+    guard let dataType = TensorDataType(type: TFL_TensorType(cTensor)) else {
+      throw InterpreterError.invalidTensorDataType
+    }
+
+    let name = String(cString: nameCString)
+    let rank = TFL_TensorNumDims(cTensor)
+    let dimensions = (0..<rank).map { Int(TFL_TensorDim(cTensor, $0)) }
+    let shape = TensorShape(dimensions)
+    let byteCount = TFL_TensorByteSize(cTensor)
+    let data = Data(bytes: bytes, count: byteCount)
+    let cQuantizationParams = TFL_TensorQuantizationParams(cTensor)
+    let scale = cQuantizationParams.scale
+    let zeroPoint = Int(cQuantizationParams.zero_point)
+    var quantizationParameters: QuantizationParameters? = nil
+    if scale != 0.0 {
+      // TODO(b/117510052): Update this check once the TfLiteQuantizationParams struct has a mode.
+      quantizationParameters = QuantizationParameters(scale: scale, zeroPoint: zeroPoint)
+    }
+    let tensor = Tensor(
+      name: name,
+      dataType: dataType,
+      shape: shape,
+      data: data,
+      quantizationParameters: quantizationParameters
+    )
+    return tensor
+  }
+
+  /// Resizes the input tensor at the given index to the specified tensor shape.
+  ///
+  /// - Note: After resizing an input tensor, the client **must** explicitly call
+  ///     `allocateTensors()` before attempting to access the resized tensor data or invoking the
+  ///     interpreter to perform inference.
+  /// - Parameters:
+  ///   - index: The index for the input tensor.
+  ///   - shape: The shape that the input tensor should be resized to.
+  /// - Throws: An error if the input tensor at the given index could not be resized.
+  public func resizeInput(at index: Int, to shape: TensorShape) throws {
+    let maxIndex = inputTensorCount - 1
+    guard case 0...maxIndex = index else {
+      throw InterpreterError.invalidTensorIndex(index: index, maxIndex: maxIndex)
+    }
+    guard TFL_InterpreterResizeInputTensor(
+            cInterpreter,
+            Int32(index),
+            shape.int32Dimensions,
+            Int32(shape.rank)
+          ) == kTfLiteOk
+    else {
+      throw InterpreterError.failedToResizeInputTensor(index: index)
+    }
+  }
+
+  /// Copies the given data to the input tensor at the given index.
+  ///
+  /// - Parameters:
+  ///   - data: The data to be copied to the input tensor's data buffer.
+  ///   - index: The index for the input tensor.
+  /// - Throws: An error if the `data.count` does not match the input tensor's `data.count` or if
+  ///     the given index is invalid.
+  /// - Returns: The input tensor with the copied data.
+  @discardableResult
+  public func copy(_ data: Data, toInputAt index: Int) throws -> Tensor {
+    let maxIndex = inputTensorCount - 1
+    guard case 0...maxIndex = index else {
+      throw InterpreterError.invalidTensorIndex(index: index, maxIndex: maxIndex)
+    }
+    guard let cTensor = TFL_InterpreterGetInputTensor(cInterpreter, Int32(index)) else {
+      throw InterpreterError.allocateTensorsRequired
+    }
+
+    let byteCount = TFL_TensorByteSize(cTensor)
+    guard data.count == byteCount else {
+      throw InterpreterError.invalidTensorDataCount(provided: data.count, required: byteCount)
+    }
+
+    let status = data.withUnsafeBytes { TFL_TensorCopyFromBuffer(cTensor, $0, data.count) }
+    guard status == kTfLiteOk else { throw InterpreterError.failedToCopyDataToInputTensor }
+    return try input(at: index)
+  }
+
+  /// Allocates memory for all input tensors based on their `TensorShape`s.
+  ///
+  /// - Note: This is a relatively expensive operation and should only be called after creating the
+  ///     interpreter and/or resizing any input tensors.
+  /// - Throws: An error if memory could not be allocated for the input tensors.
+  public func allocateTensors() throws {
+    guard TFL_InterpreterAllocateTensors(cInterpreter) == kTfLiteOk else {
+      throw InterpreterError.failedToAllocateTensors
+    }
+  }
+}
+
+// MARK: - Extensions
+
+extension String {
+  /// Returns a new `String` initialized by using the given format C array as a template into which
+  /// the remaining argument values are substituted according to the user’s default locale.
+  ///
+  /// - Note: Returns `nil` if a new `String` could not be constructed from the given values.
+  /// - Parameters:
+  ///   - cFormat: The format C array as a template for substituting values.
+  ///   - arguments: A C pointer to a `va_list` of arguments to substitute into `cFormat`.
+  init?(cFormat: UnsafePointer<CChar>, arguments: CVaListPointer) {
+    var buffer: UnsafeMutablePointer<CChar>?
+    guard vasprintf(&buffer, cFormat, arguments) != 0, let cString = buffer else { return nil }
+    self.init(validatingUTF8: cString)
+  }
+}

diff --git a/tensorflow/lite/experimental/swift/Sources/InterpreterError.swift b/tensorflow/lite/experimental/swift/Sources/InterpreterError.swift
new file mode 100644
index 0000000..5de58b9
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Sources/InterpreterError.swift

@@ -0,0 +1,99 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+
+/// TensorFlow Lite interpreter errors.
+public enum InterpreterError: Error {
+  case invalidTensorIndex(index: Int, maxIndex: Int)
+  case invalidTensorDataCount(provided: Int, required: Int)
+  case invalidTensorDataType
+  case failedToLoadModel
+  case failedToCreateInterpreter
+  case failedToResizeInputTensor(index: Int)
+  case failedToCopyDataToInputTensor
+  case failedToAllocateTensors
+  case allocateTensorsRequired
+  case invokeInterpreterRequired
+  case tensorFlowLiteError(String)
+}
+
+// MARK: - Extensions
+
+extension InterpreterError: LocalizedError {
+  /// Localized description of the interpreter error.
+  public var errorDescription: String? {
+    switch self {
+    case .invalidTensorIndex(let index, let maxIndex):
+      return "Invalid tensor index \(index), max index is \(maxIndex)."
+    case .invalidTensorDataCount(let providedCount, let requiredCount):
+      return "Provided data count \(providedCount) must match the required count \(requiredCount)."
+    case .invalidTensorDataType:
+      return "Tensor data type is unsupported or could not be determined because of a model error."
+    case .failedToLoadModel:
+      return "Failed to load the given model."
+    case .failedToCreateInterpreter:
+      return "Failed to create the interpreter."
+    case .failedToResizeInputTensor(let index):
+      return "Failed to resize input tesnor at index \(index)."
+    case .failedToCopyDataToInputTensor:
+      return "Failed to copy data to input tensor."
+    case .failedToAllocateTensors:
+      return "Failed to allocate memory for input tensors."
+    case .allocateTensorsRequired:
+      return "Must call allocateTensors()."
+    case .invokeInterpreterRequired:
+      return "Must call invoke()."
+    case .tensorFlowLiteError(let message):
+      return "TensorFlow Lite Error: \(message)"
+    }
+  }
+}
+
+extension InterpreterError: CustomStringConvertible {
+  /// Textual representation of the TensorFlow Lite interpreter error.
+  public var description: String {
+    return errorDescription ?? "Unknown error."
+  }
+}
+
+#if swift(>=4.2)
+extension InterpreterError: Equatable {}
+#else
+extension InterpreterError: Equatable {
+  public static func == (lhs: InterpreterError, rhs: InterpreterError) -> Bool {
+    switch (lhs, rhs) {
+    case (.invalidTensorDataType, .invalidTensorDataType),
+         (.failedToLoadModel, .failedToLoadModel),
+         (.failedToCreateInterpreter, .failedToCreateInterpreter),
+         (.failedToAllocateTensors, .failedToAllocateTensors),
+         (.allocateTensorsRequired, .allocateTensorsRequired),
+         (.invokeInterpreterRequired, .invokeInterpreterRequired):
+      return true
+    case (.invalidTensorIndex(let lhsIndex, let lhsMaxIndex),
+          .invalidTensorIndex(let rhsIndex, let rhsMaxIndex)):
+      return lhsIndex == rhsIndex && lhsMaxIndex == rhsMaxIndex
+    case (.invalidTensorDataCount(let lhsProvidedCount, let lhsRequiredCount),
+          .invalidTensorDataCount(let rhsProvidedCount, let rhsRequiredCount)):
+      return lhsProvidedCount == rhsProvidedCount && lhsRequiredCount == rhsRequiredCount
+    case (.failedToResizeInputTensor(let lhsIndex), .failedToResizeInputTensor(let rhsIndex)):
+      return lhsIndex == rhsIndex
+    case (.tensorFlowLiteError(let lhsMessage), .tensorFlowLiteError(let rhsMessage)):
+      return lhsMessage == rhsMessage
+    default:
+      return false
+    }
+  }
+}
+#endif  // swift(>=4.2)

diff --git a/tensorflow/lite/experimental/swift/Sources/InterpreterOptions.swift b/tensorflow/lite/experimental/swift/Sources/InterpreterOptions.swift
new file mode 100644
index 0000000..2365fd7
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Sources/InterpreterOptions.swift

@@ -0,0 +1,29 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+
+/// Custom configuration options for a TensorFlow Lite interpreter.
+public struct InterpreterOptions: Equatable {
+
+  /// Maximum number of CPU threads that the interpreter should run on. Default is `nil` which
+  /// indicates that the `Interpreter` will decide the number of threads to use.
+  public var threadCount: Int? = nil
+
+  /// Whether error logging to the console is enabled. The default is `false`.
+  public var isErrorLoggingEnabled = false
+
+  /// Creates a new instance of interpreter options.
+  public init() {}
+}

diff --git a/tensorflow/lite/experimental/swift/Sources/Model.swift b/tensorflow/lite/experimental/swift/Sources/Model.swift
new file mode 100644
index 0000000..e8c49ff
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Sources/Model.swift

@@ -0,0 +1,40 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+import TensorFlowLiteCAPI
+
+/// A TensorFlow Lite model used by the 'Interpreter` to perform inference.
+final class Model {
+
+  /// The `TFL_Model` C pointer type represented as an `UnsafePointer<TFL_Model>`.
+  typealias CModel = OpaquePointer
+
+  /// The underlying `TFL_Model` C pointer.
+  let cModel: CModel?
+
+  /// Creates a new model instance.
+  ///
+  /// - Precondition: Initialization can fail if the given `filePath` is invalid.
+  /// - Parameters:
+  ///   - filePath: Local file path to a TensorFlow Lite model.
+  init?(filePath: String) {
+    guard !filePath.isEmpty, let cModel = TFL_NewModelFromFile(filePath) else { return nil }
+    self.cModel = cModel
+  }
+
+  deinit {
+    TFL_DeleteModel(cModel)
+  }
+}

diff --git a/tensorflow/lite/experimental/swift/Sources/QuantizationParameters.swift b/tensorflow/lite/experimental/swift/Sources/QuantizationParameters.swift
new file mode 100644
index 0000000..f367875
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Sources/QuantizationParameters.swift

@@ -0,0 +1,38 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+
+/// Parameters that determine the mapping of quantized values to real values. Quantized values can
+/// be mapped to float values using the following conversion:
+/// `realValue = scale * (quantizedValue - zeroPoint)`.
+public struct QuantizationParameters {
+
+  /// Difference between real values corresponding to consecutive quantized values differing by 1.
+  /// For example, the range of quantized values for `UInt8` data type is [0, 255].
+  public let scale: Float
+
+  /// Quantized value that corresponds to the real 0 value.
+  public let zeroPoint: Int
+
+  /// Creates a new quantization parameters instance.
+  ///
+  /// - Parameters:
+  ///   - scale: Scale value for asymmetric quantization.
+  ///   - zeroPoint: Zero point for asymmetric quantization.
+  init(scale: Float, zeroPoint: Int) {
+    self.scale = scale
+    self.zeroPoint = zeroPoint
+  }
+}

diff --git a/tensorflow/lite/experimental/swift/Sources/Tensor.swift b/tensorflow/lite/experimental/swift/Sources/Tensor.swift
new file mode 100644
index 0000000..b738d87
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Sources/Tensor.swift

@@ -0,0 +1,138 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+import TensorFlowLiteCAPI
+
+/// An input or output tensor in a TensorFlow Lite graph.
+public struct Tensor {
+
+  /// Name of the tensor.
+  public let name: String
+
+  /// Data type of the tensor.
+  public let dataType: TensorDataType
+
+  /// Shape of the tensor.
+  public let shape: TensorShape
+
+  /// Data in the input or output tensor.
+  public let data: Data
+
+  /// Quantization parameters for the tensor if using a quantized model.
+  public let quantizationParameters: QuantizationParameters?
+
+  /// Creates a new input or output tensor instance.
+  ///
+  /// - Parameters:
+  ///   - name: Name of the tensor.
+  ///   - dataType: Data type of the tensor.
+  ///   - data: Data in the input tensor.
+  ///   - quantizationParameters Quantization parameters for the tensor if using a quantized model.
+  ///       The default is `nil`.
+  init(
+    name: String,
+    dataType: TensorDataType,
+    shape: TensorShape,
+    data: Data,
+    quantizationParameters: QuantizationParameters? = nil
+  ) {
+    self.name = name
+    self.dataType = dataType
+    self.shape = shape
+    self.data = data
+    self.quantizationParameters = quantizationParameters
+  }
+}
+
+/// Supported TensorFlow Lite tensor data types.
+public enum TensorDataType: Equatable {
+  /// 32-bit single precision floating point tensor data type.
+  case float32
+  /// 8-bit unsigned integer tensor data type.
+  case uInt8
+  /// 16-bit signed integer tensor data type.
+  case int16
+  /// 32-bit signed integer tensor data type.
+  case int32
+  /// 64-bit signed integer tensor data type.
+  case int64
+  /// Boolean tensor data type.
+  case bool
+
+  /// Creates a new tensor data type from the given `TFL_Type` or `nil` if the data type is
+  /// unsupported or could not be determined because there was an error.
+  ///
+  /// - Parameter type: A data type supported by a tensor.
+  init?(type: TFL_Type) {
+    switch type {
+    case kTfLiteFloat32:
+      self = .float32
+    case kTfLiteUInt8:
+      self = .uInt8
+    case kTfLiteInt16:
+      self = .int16
+    case kTfLiteInt32:
+      self = .int32
+    case kTfLiteInt64:
+      self = .int64
+    case kTfLiteBool:
+      self = .bool
+    case kTfLiteNoType:
+      fallthrough
+    default:
+      return nil
+    }
+  }
+}
+
+/// The shape of a TensorFlow Lite tensor.
+public struct TensorShape {
+
+  /// The number of dimensions of the tensor.
+  public let rank: Int
+
+  /// Array of dimensions for the tensor.
+  public let dimensions: [Int]
+
+  /// Array of `Int32` dimensions for the tensor.
+  var int32Dimensions: [Int32] { return dimensions.map(Int32.init) }
+
+  /// Creates a new tensor shape instance with the given array of dimensions.
+  ///
+  /// - Parameters:
+  ///   - dimensions: Dimensions for the tensor.
+  public init(_ dimensions: [Int]) {
+    self.rank = dimensions.count
+    self.dimensions = dimensions
+  }
+
+  /// Creates a new tensor shape instance with the given elements representing the dimensions.
+  ///
+  /// - Parameters:
+  ///   - elements: Dimensions for the tensor.
+  public init(_ elements: Int...) {
+    self.init(elements)
+  }
+}
+
+extension TensorShape: ExpressibleByArrayLiteral {
+  /// Creates a new tensor shape instance with the given array literal representing the dimensions.
+  ///
+  /// - Parameters:
+  ///   - arrayLiteral: Dimensions for the tensor.
+  public init(arrayLiteral: Int...) {
+    self.init(arrayLiteral)
+  }
+}

diff --git a/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/Configs/TensorFlowLite.tulsigen b/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/Configs/TensorFlowLite.tulsigen
new file mode 100644
index 0000000..16bc6cb
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/Configs/TensorFlowLite.tulsigen

@@ -0,0 +1,57 @@
+{
+  "sourceFilters" : [
+    "tensorflow/lite/experimental/c",
+    "tensorflow/lite/experimental/swift",
+    "tensorflow/lite/experimental/swift/Sources",
+    "tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp",
+    "tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj",
+    "tensorflow/lite/experimental/swift/Tests",
+  ],
+  "buildTargets" : [
+    "//tensorflow/lite/experimental/swift:TensorFlowLite",
+    "//tensorflow/lite/experimental/swift:TensorFlowLiteApp",
+    "//tensorflow/lite/experimental/swift:TensorFlowLiteTests",
+  ],
+  "projectName" : "TensorFlowLite",
+  "optionSet" : {
+    "LaunchActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildStartupOptionsRelease" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildOptionsRelease" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildOptionsDebug" : {
+      "p" : "$(inherited)"
+    },
+    "EnvironmentVariables" : {
+      "p" : "$(inherited)"
+    },
+    "BuildActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "CommandlineArguments" : {
+      "p" : "$(inherited)"
+    },
+    "TestActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildStartupOptionsDebug" : {
+      "p" : "$(inherited)"
+    },
+    "BuildActionPostActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "TestActionPostActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "LaunchActionPostActionScript" : {
+      "p" : "$(inherited)"
+    }
+  },
+  "additionalFilePaths" : [
+    "tensorflow/lite/experimental/swift/BUILD"
+  ]
+}

diff --git a/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/project.tulsiconf b/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/project.tulsiconf
new file mode 100644
index 0000000..82ac8aa
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/project.tulsiconf

@@ -0,0 +1,14 @@
+{
+  "configDefaults" : {
+    "optionSet" : {
+      "ProjectPrioritizesSwift" : {
+        "p" : "YES"
+      }
+    }
+  },
+  "projectName" : "TensorFlowLite",
+  "packages" : [
+    "tensorflow/lite/experimental/swift"
+  ],
+  "workspaceRoot" : "../../../../.."
+}

diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp.xcodeproj/project.pbxproj b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp.xcodeproj/project.pbxproj
new file mode 100644
index 0000000..fbbf9a1
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp.xcodeproj/project.pbxproj

@@ -0,0 +1,345 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		4A7304B421500B8400C90B21 /* Data+TensorFlowLite.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4A7304B321500B8300C90B21 /* Data+TensorFlowLite.swift */; };
+		4AA72B732146ED64006C3AEF /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA72B722146ED64006C3AEF /* AppDelegate.swift */; };
+		4AA72B752146ED64006C3AEF /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA72B742146ED64006C3AEF /* ViewController.swift */; };
+		4AA72B782146ED64006C3AEF /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 4AA72B762146ED64006C3AEF /* Main.storyboard */; };
+		4AA72B7A2146ED66006C3AEF /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 4AA72B792146ED66006C3AEF /* Assets.xcassets */; };
+		4AA72B7D2146ED66006C3AEF /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 4AA72B7B2146ED66006C3AEF /* LaunchScreen.storyboard */; };
+		4ADDE0CE2176600E00FF07A2 /* Array+TensorFlowLite.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4ADDE0CD2176600900FF07A2 /* Array+TensorFlowLite.swift */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		4A7304B321500B8300C90B21 /* Data+TensorFlowLite.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "Data+TensorFlowLite.swift"; sourceTree = "<group>"; };
+		4AA72B6F2146ED64006C3AEF /* TensorFlowLiteApp.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = TensorFlowLiteApp.app; sourceTree = BUILT_PRODUCTS_DIR; };
+		4AA72B722146ED64006C3AEF /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
+		4AA72B742146ED64006C3AEF /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
+		4AA72B772146ED64006C3AEF /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
+		4AA72B792146ED66006C3AEF /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
+		4AA72B7C2146ED66006C3AEF /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
+		4AA72B7E2146ED66006C3AEF /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		4ADDE0CD2176600900FF07A2 /* Array+TensorFlowLite.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "Array+TensorFlowLite.swift"; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		4AA72B6C2146ED64006C3AEF /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		4AA72B662146ED64006C3AEF = {
+			isa = PBXGroup;
+			children = (
+				4AA72B712146ED64006C3AEF /* TensorFlowLiteApp */,
+				4AA72B702146ED64006C3AEF /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		4AA72B702146ED64006C3AEF /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				4AA72B6F2146ED64006C3AEF /* TensorFlowLiteApp.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		4AA72B712146ED64006C3AEF /* TensorFlowLiteApp */ = {
+			isa = PBXGroup;
+			children = (
+				4AA72B722146ED64006C3AEF /* AppDelegate.swift */,
+				4ADDE0CD2176600900FF07A2 /* Array+TensorFlowLite.swift */,
+				4A7304B321500B8300C90B21 /* Data+TensorFlowLite.swift */,
+				4AA72B742146ED64006C3AEF /* ViewController.swift */,
+				4AA72B762146ED64006C3AEF /* Main.storyboard */,
+				4AA72B792146ED66006C3AEF /* Assets.xcassets */,
+				4AA72B7B2146ED66006C3AEF /* LaunchScreen.storyboard */,
+				4AA72B7E2146ED66006C3AEF /* Info.plist */,
+			);
+			path = TensorFlowLiteApp;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		4AA72B6E2146ED64006C3AEF /* TensorFlowLiteApp */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 4AA72B812146ED66006C3AEF /* Build configuration list for PBXNativeTarget "TensorFlowLiteApp" */;
+			buildPhases = (
+				4AA72B6B2146ED64006C3AEF /* Sources */,
+				4AA72B6C2146ED64006C3AEF /* Frameworks */,
+				4AA72B6D2146ED64006C3AEF /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = TensorFlowLiteApp;
+			productName = TensorFlowLiteApp;
+			productReference = 4AA72B6F2146ED64006C3AEF /* TensorFlowLiteApp.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		4AA72B672146ED64006C3AEF /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastSwiftUpdateCheck = 0940;
+				LastUpgradeCheck = 0940;
+				ORGANIZATIONNAME = Google;
+				TargetAttributes = {
+					4AA72B6E2146ED64006C3AEF = {
+						CreatedOnToolsVersion = 9.4.1;
+					};
+				};
+			};
+			buildConfigurationList = 4AA72B6A2146ED64006C3AEF /* Build configuration list for PBXProject "TensorFlowLiteApp" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 4AA72B662146ED64006C3AEF;
+			productRefGroup = 4AA72B702146ED64006C3AEF /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				4AA72B6E2146ED64006C3AEF /* TensorFlowLiteApp */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		4AA72B6D2146ED64006C3AEF /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				4AA72B7D2146ED66006C3AEF /* LaunchScreen.storyboard in Resources */,
+				4AA72B7A2146ED66006C3AEF /* Assets.xcassets in Resources */,
+				4AA72B782146ED64006C3AEF /* Main.storyboard in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		4AA72B6B2146ED64006C3AEF /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				4AA72B732146ED64006C3AEF /* AppDelegate.swift in Sources */,
+				4ADDE0CE2176600E00FF07A2 /* Array+TensorFlowLite.swift in Sources */,
+				4A7304B421500B8400C90B21 /* Data+TensorFlowLite.swift in Sources */,
+				4AA72B752146ED64006C3AEF /* ViewController.swift in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXVariantGroup section */
+		4AA72B762146ED64006C3AEF /* Main.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				4AA72B772146ED64006C3AEF /* Base */,
+			);
+			name = Main.storyboard;
+			sourceTree = "<group>";
+		};
+		4AA72B7B2146ED66006C3AEF /* LaunchScreen.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				4AA72B7C2146ED66006C3AEF /* Base */,
+			);
+			name = LaunchScreen.storyboard;
+			sourceTree = "<group>";
+		};
+/* End PBXVariantGroup section */
+
+/* Begin XCBuildConfiguration section */
+		4AA72B7F2146ED66006C3AEF /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.4;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+			};
+			name = Debug;
+		};
+		4AA72B802146ED66006C3AEF /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.4;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = iphoneos;
+				SWIFT_COMPILATION_MODE = wholemodule;
+				SWIFT_OPTIMIZATION_LEVEL = "-O";
+				VALIDATE_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		4AA72B822146ED66006C3AEF /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CODE_SIGN_STYLE = Automatic;
+				INFOPLIST_FILE = TensorFlowLiteApp/Info.plist;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.tensorflow.lite.swift.TensorFlowLite;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		4AA72B832146ED66006C3AEF /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CODE_SIGN_STYLE = Automatic;
+				INFOPLIST_FILE = TensorFlowLiteApp/Info.plist;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.tensorflow.lite.swift.TensorFlowLite;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		4AA72B6A2146ED64006C3AEF /* Build configuration list for PBXProject "TensorFlowLiteApp" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				4AA72B7F2146ED66006C3AEF /* Debug */,
+				4AA72B802146ED66006C3AEF /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		4AA72B812146ED66006C3AEF /* Build configuration list for PBXNativeTarget "TensorFlowLiteApp" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				4AA72B822146ED66006C3AEF /* Debug */,
+				4AA72B832146ED66006C3AEF /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 4AA72B672146ED64006C3AEF /* Project object */;
+}

diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/AppDelegate.swift b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/AppDelegate.swift
new file mode 100644
index 0000000..ffa90a0
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/AppDelegate.swift

@@ -0,0 +1,24 @@
+import UIKit
+
+@UIApplicationMain
+
+final class AppDelegate: UIResponder, UIApplicationDelegate {
+
+  /// The main window of the app.
+  var window: UIWindow?
+
+  func application(
+    _ application: UIApplication,
+    didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]? = nil
+  ) -> Bool {
+    return true
+  }
+}
+
+// MARK: - Extensions
+
+#if !swift(>=4.2)
+extension UIApplication {
+  typealias LaunchOptionsKey = UIApplicationLaunchOptionsKey
+}
+#endif  // !swift(>=4.2)

diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Array+TensorFlowLite.swift b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Array+TensorFlowLite.swift
new file mode 100644
index 0000000..56df1ce
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Array+TensorFlowLite.swift

@@ -0,0 +1,22 @@
+import Foundation
+
+extension Array {
+  /// Creates a new array from the bytes of the given unsafe data.
+  ///
+  /// - Warning: The array's `Element` type must be trivial in that it can be copied bit for bit
+  ///     with no indirection or reference-counting operations; otherwise, copying the raw bytes in
+  ///     the `unsafeData`'s buffer to a new array returns an unsafe copy.
+  /// - Note: Returns `nil` if `unsafeData.count` is not a multiple of
+  ///     `MemoryLayout<Element>.stride`.
+  /// - Parameter unsafeData: The data containing the bytes to turn into an array.
+  init?(unsafeData: Data) {
+    guard unsafeData.count % MemoryLayout<Element>.stride == 0 else { return nil }
+    let elements = unsafeData.withUnsafeBytes {
+      UnsafeBufferPointer<Element>(
+        start: $0,
+        count: unsafeData.count / MemoryLayout<Element>.stride
+      )
+    }
+    self.init(elements)
+  }
+}

diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/AppIcon.appiconset/Contents.json b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/AppIcon.appiconset/Contents.json
new file mode 100644
index 0000000..d8db8d6
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/AppIcon.appiconset/Contents.json

@@ -0,0 +1,98 @@
+{
+  "images" : [
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "29x29",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "29x29",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "40x40",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "40x40",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "60x60",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "60x60",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "29x29",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "29x29",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "40x40",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "40x40",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "76x76",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "76x76",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "83.5x83.5",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ios-marketing",
+      "size" : "1024x1024",
+      "scale" : "1x"
+    }
+  ],
+  "info" : {
+    "version" : 1,
+    "author" : "xcode"
+  }
+}
\ No newline at end of file

diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/Contents.json b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/Contents.json
new file mode 100644
index 0000000..da4a164
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/Contents.json

@@ -0,0 +1,6 @@
+{
+  "info" : {
+    "version" : 1,
+    "author" : "xcode"
+  }
+}
\ No newline at end of file

diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/LaunchScreen.storyboard b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/LaunchScreen.storyboard
new file mode 100644
index 0000000..a07a132
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/LaunchScreen.storyboard

@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14109" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
+    <device id="retina4_7" orientation="portrait">
+        <adaptation id="fullscreen"/>
+    </device>
+    <dependencies>
+        <deployment identifier="iOS"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14088"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="EHf-IW-A2E">
+            <objects>
+                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
+                    <layoutGuides>
+                        <viewControllerLayoutGuide type="top" id="Llm-lL-Icb"/>
+                        <viewControllerLayoutGuide type="bottom" id="xb3-aO-Qok"/>
+                    </layoutGuides>
+                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <subviews>
+                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="TensorFlowLite" textAlignment="center" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="3Gq-PV-hia">
+                                <rect key="frame" x="16" y="315" width="343" height="38.5"/>
+                                <fontDescription key="fontDescription" type="boldSystem" pointSize="32"/>
+                                <nil key="textColor"/>
+                                <nil key="highlightedColor"/>
+                            </label>
+                        </subviews>
+                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                        <constraints>
+                            <constraint firstItem="3Gq-PV-hia" firstAttribute="leading" secondItem="Ze5-6b-2t3" secondAttribute="leading" constant="16" id="aXL-9T-5Pf"/>
+                            <constraint firstItem="3Gq-PV-hia" firstAttribute="centerY" secondItem="Ze5-6b-2t3" secondAttribute="centerY" id="cDf-Go-1FR"/>
+                            <constraint firstAttribute="trailing" secondItem="3Gq-PV-hia" secondAttribute="trailing" constant="16" id="fB9-BX-A3B"/>
+                        </constraints>
+                    </view>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="52" y="374.66266866566718"/>
+        </scene>
+    </scenes>
+</document>

diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/Main.storyboard b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/Main.storyboard
new file mode 100644
index 0000000..10cae6e
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/Main.storyboard

@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
+    <device id="retina4_7" orientation="portrait">
+        <adaptation id="fullscreen"/>
+    </device>
+    <dependencies>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14460.20"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="tne-QT-ifu">
+            <objects>
+                <viewController storyboardIdentifier="viewController" useStoryboardIdentifierAsRestorationIdentifier="YES" id="BYZ-38-t0r" customClass="ViewController" customModule="TensorFlowLiteAppLib" sceneMemberID="viewController">
+                    <layoutGuides>
+                        <viewControllerLayoutGuide type="top" id="y3c-jy-aDJ"/>
+                        <viewControllerLayoutGuide type="bottom" id="wfy-db-euE"/>
+                    </layoutGuides>
+                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <subviews>
+                            <textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" editable="NO" selectable="NO" translatesAutoresizingMaskIntoConstraints="NO" id="7Mj-sL-hrd">
+                                <rect key="frame" x="0.0" y="367" width="375" height="300"/>
+                                <color key="backgroundColor" red="0.0" green="0.47843137250000001" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="300" id="YUb-MC-D5w"/>
+                                </constraints>
+                                <color key="textColor" cocoaTouchSystemColor="tableCellGroupedBackgroundColor"/>
+                                <fontDescription key="fontDescription" type="system" pointSize="14"/>
+                                <textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
+                            </textView>
+                            <toolbar opaque="NO" clearsContextBeforeDrawing="NO" contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="Qwg-EP-bd6" userLabel="Bottom Toolbar">
+                                <rect key="frame" x="0.0" y="323" width="375" height="44"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="44" id="jhT-Q0-E9N"/>
+                                </constraints>
+                                <items>
+                                    <barButtonItem style="plain" systemItem="flexibleSpace" id="P3q-uA-YUa"/>
+                                    <barButtonItem title="Invoke Interpreter" id="A4J-Mg-nmd" userLabel="Invoke Button">
+                                        <connections>
+                                            <action selector="invokeInterpreter:" destination="BYZ-38-t0r" id="lZU-x7-PsJ"/>
+                                        </connections>
+                                    </barButtonItem>
+                                    <barButtonItem style="plain" systemItem="flexibleSpace" id="Qad-Pa-ySg"/>
+                                </items>
+                            </toolbar>
+                            <toolbar opaque="NO" clearsContextBeforeDrawing="NO" contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="Gkb-TR-PCB" userLabel="Top Toolbar">
+                                <rect key="frame" x="0.0" y="28" width="375" height="44"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="44" id="hSD-2q-fUE"/>
+                                </constraints>
+                                <items>
+                                    <barButtonItem style="plain" id="LKw-TX-bbH">
+                                        <segmentedControl key="customView" opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="left" contentVerticalAlignment="top" segmentControlStyle="bar" selectedSegmentIndex="0" id="rhA-nW-xzT">
+                                            <rect key="frame" x="16" y="7" width="343" height="30"/>
+                                            <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                                            <segments>
+                                                <segment title="Add"/>
+                                                <segment title="AddQuantized"/>
+                                                <segment title="MultiAdd"/>
+                                            </segments>
+                                            <connections>
+                                                <action selector="modelChanged:" destination="BYZ-38-t0r" eventType="valueChanged" id="YnG-Ov-B5D"/>
+                                            </connections>
+                                        </segmentedControl>
+                                    </barButtonItem>
+                                </items>
+                            </toolbar>
+                        </subviews>
+                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                        <constraints>
+                            <constraint firstAttribute="trailing" secondItem="Gkb-TR-PCB" secondAttribute="trailing" id="4Cr-Sf-I7n"/>
+                            <constraint firstItem="7Mj-sL-hrd" firstAttribute="bottom" secondItem="wfy-db-euE" secondAttribute="top" id="6ot-zD-sze"/>
+                            <constraint firstItem="7Mj-sL-hrd" firstAttribute="top" secondItem="Qwg-EP-bd6" secondAttribute="bottom" id="ELA-C6-NiG"/>
+                            <constraint firstAttribute="trailing" secondItem="7Mj-sL-hrd" secondAttribute="trailing" id="HDO-xr-mBl"/>
+                            <constraint firstItem="Gkb-TR-PCB" firstAttribute="leading" secondItem="8bC-Xf-vdC" secondAttribute="leading" id="Kmo-6K-gS4"/>
+                            <constraint firstItem="Qwg-EP-bd6" firstAttribute="leading" secondItem="8bC-Xf-vdC" secondAttribute="leading" id="hGu-lm-fMG"/>
+                            <constraint firstAttribute="trailing" secondItem="Qwg-EP-bd6" secondAttribute="trailing" id="iXR-LK-nTO"/>
+                            <constraint firstItem="7Mj-sL-hrd" firstAttribute="leading" secondItem="8bC-Xf-vdC" secondAttribute="leading" id="nr7-jW-ZYf"/>
+                            <constraint firstItem="Gkb-TR-PCB" firstAttribute="top" secondItem="y3c-jy-aDJ" secondAttribute="bottom" constant="8" id="uCF-VW-rR0"/>
+                        </constraints>
+                    </view>
+                    <connections>
+                        <outlet property="invokeButton" destination="A4J-Mg-nmd" id="UxZ-Ft-E45"/>
+                        <outlet property="modelControl" destination="rhA-nW-xzT" id="KKf-TT-BQ2"/>
+                        <outlet property="resultsTextView" destination="7Mj-sL-hrd" id="T4I-z4-tYA"/>
+                    </connections>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="125.59999999999999" y="133.5832083958021"/>
+        </scene>
+    </scenes>
+</document>

diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Data+TensorFlowLite.swift b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Data+TensorFlowLite.swift
new file mode 100644
index 0000000..bc8a70c
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Data+TensorFlowLite.swift

@@ -0,0 +1,13 @@
+import Foundation
+
+extension Data {
+  /// Creates a new buffer by copying the buffer pointer of the given array.
+  ///
+  /// - Warning: The given array's element type `T` must be trivial in that it can be copied bit
+  ///     for bit with no indirection or reference-counting operations; otherwise, reinterpreting
+  ///     data from the resulting buffer has undefined behavior.
+  /// - Parameter array: An array with elements of type `T`.
+  init<T>(copyingBufferOf array: [T]) {
+    self = array.withUnsafeBufferPointer(Data.init)
+  }
+}

diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Info.plist b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Info.plist
new file mode 100644
index 0000000..3ca3875
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Info.plist

@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>APPL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>0.0.1</string>
+	<key>LSRequiresIPhoneOS</key>
+	<true/>
+	<key>NSCameraUsageDescription</key>
+	<string>NSCameraUsageDescription</string>
+	<key>NSPhotoLibraryUsageDescription</key>
+	<string>Select a photo to detect objects in.</string>
+	<key>UILaunchStoryboardName</key>
+	<string>LaunchScreen</string>
+	<key>UIMainStoryboardFile</key>
+	<string>Main</string>
+	<key>UIRequiredDeviceCapabilities</key>
+	<array>
+		<string>armv7</string>
+	</array>
+	<key>UISupportedInterfaceOrientations</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationPortraitUpsideDown</string>
+	</array>
+	<key>UISupportedInterfaceOrientations~ipad</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationPortraitUpsideDown</string>
+	</array>
+</dict>
+</plist>

diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/ViewController.swift b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/ViewController.swift
new file mode 100644
index 0000000..73c74fd
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/ViewController.swift

@@ -0,0 +1,299 @@
+import TensorFlowLite
+import UIKit
+
+class ViewController: UIViewController {
+
+  // MARK: - Properties
+
+  /// TensorFlowLite interpreter object for performing inference from a given model.
+  private var interpreter: Interpreter?
+
+  /// Serial dispatch queue for managing `Interpreter` calls.
+  private let interpreterQueue = DispatchQueue(
+    label: Constant.dispatchQueueLabel,
+    qos: .userInitiated
+  )
+
+  /// The currently selected model.
+  private var currentModel: Model {
+    guard let currentModel = Model(rawValue: modelControl.selectedSegmentIndex) else {
+      preconditionFailure("Invalid model for selected segment index.")
+    }
+    return currentModel
+  }
+
+  /// A description of the current model.
+  private var modelDescription: String {
+    guard let interpreter = interpreter else { return "" }
+    let inputCount = interpreter.inputTensorCount
+    let outputCount = interpreter.outputTensorCount
+    let inputTensors = (0..<inputCount).map { index in
+      var tensorInfo = "  Input \(index + 1): "
+      do {
+        let tensor = try interpreter.input(at: index)
+        tensorInfo += "\(tensor)"
+      } catch let error {
+        tensorInfo += "\(error.localizedDescription)"
+      }
+      return tensorInfo
+    }.joined(separator: "\n")
+    let outputTensors = (0..<outputCount).map { index in
+      var tensorInfo = "  Output \(index + 1): "
+      do {
+        let tensor = try interpreter.output(at: index)
+        tensorInfo += "\(tensor)"
+      } catch let error {
+        tensorInfo += "\(error.localizedDescription)"
+      }
+      return tensorInfo
+    }.joined(separator: "\n")
+    return "Model Description:\n" +
+             "  Input Tensor Count = \(inputCount)\n\(inputTensors)\n\n" +
+             "  Output Tensor Count = \(outputCount)\n\(outputTensors)"
+  }
+
+  // MARK: - IBOutlets
+
+  /// A segmented control for changing models. See the `Model` enum for available models.
+  @IBOutlet private var modelControl: UISegmentedControl!
+
+  @IBOutlet private var resultsTextView: UITextView!
+  @IBOutlet private var invokeButton: UIBarButtonItem!
+
+  // MARK: - UIViewController
+
+  override func viewDidLoad() {
+    super.viewDidLoad()
+
+    invokeButton.isEnabled = false
+    loadModel()
+  }
+
+  // MARK: - IBActions
+
+  @IBAction func modelChanged(_ sender: Any) {
+    invokeButton.isEnabled = false
+    updateResultsText("Switched to the \(currentModel.description) model.")
+    loadModel()
+  }
+
+  @IBAction func invokeInterpreter(_ sender: Any) {
+    switch currentModel {
+    case .add:
+      invokeAdd()
+    case .addQuantized:
+      invokeAddQuantized()
+    case .multiAdd:
+      invokeMultiAdd()
+    }
+  }
+
+  // MARK: - Private
+
+  private func loadModel() {
+    let fileInfo = currentModel.fileInfo
+    guard let modelPath = Bundle.main.path(forResource: fileInfo.name, ofType: fileInfo.extension)
+    else {
+      updateResultsText("Failed to load the \(currentModel.description) model.")
+      return
+    }
+    setUpInterpreter(withModelPath: modelPath)
+  }
+
+  private func setUpInterpreter(withModelPath modelPath: String) {
+    interpreterQueue.async {
+      do {
+        var options = InterpreterOptions()
+        options.isErrorLoggingEnabled = true
+        self.interpreter = try Interpreter(modelPath: modelPath, options: options)
+      } catch let error {
+        self.updateResultsText(
+          "Failed to create the interpreter with error: \(error.localizedDescription)"
+        )
+        return
+      }
+      safeDispatchOnMain { self.invokeButton.isEnabled = true }
+    }
+  }
+
+  private func invokeAdd() {
+    interpreterQueue.async {
+      guard let interpreter = self.interpreter else {
+        self.updateResultsText(Constant.nilInterpreterErrorMessage)
+        return
+      }
+      do {
+        try interpreter.resizeInput(at: 0, to: [2])
+        try interpreter.allocateTensors()
+        let input: [Float32] = [1, 3]
+        let resultsText = self.modelDescription + "\n\n" +
+          "Performing 2 add operations on input \(input.description) equals: "
+        self.updateResultsText(resultsText)
+        let data = Data(copyingBufferOf: input)
+        try interpreter.copy(data, toInputAt: 0)
+        try interpreter.invoke()
+        let outputTensor = try interpreter.output(at: 0)
+        let results: () -> String = {
+          guard let results = [Float32](unsafeData: outputTensor.data) else { return "No results." }
+          return resultsText + results.description
+        }
+        self.updateResultsText(results())
+      } catch let error {
+        self.updateResultsText(
+          "Failed to invoke the interpreter with error: \(error.localizedDescription)"
+        )
+        return
+      }
+    }
+  }
+
+  private func invokeAddQuantized() {
+    interpreterQueue.async {
+      guard let interpreter = self.interpreter else {
+        self.updateResultsText(Constant.nilInterpreterErrorMessage)
+        return
+      }
+      do {
+        try interpreter.resizeInput(at: 0, to: [2])
+        try interpreter.allocateTensors()
+        let input: [UInt8] = [1, 3]
+        let resultsText = self.modelDescription + "\n\n" +
+          "Performing 2 add operations on quantized input \(input.description) equals: "
+        self.updateResultsText(resultsText)
+        let data = Data(input)
+        try interpreter.copy(data, toInputAt: 0)
+        try interpreter.invoke()
+        let outputTensor = try interpreter.output(at: 0)
+        let results: () -> String = {
+          guard let quantizationParameters = outputTensor.quantizationParameters else {
+            return "No results."
+          }
+          let quantizedResults = [UInt8](outputTensor.data)
+          let dequantizedResults = quantizedResults.map {
+            quantizationParameters.scale * Float(Int($0) - quantizationParameters.zeroPoint)
+          }
+          return resultsText + quantizedResults.description +
+                   ", dequantized results: " + dequantizedResults.description
+        }
+        self.updateResultsText(results())
+      } catch let error {
+        self.updateResultsText(
+          "Failed to invoke the interpreter with error: \(error.localizedDescription)"
+        )
+        return
+      }
+    }
+  }
+
+  private func invokeMultiAdd() {
+    interpreterQueue.async {
+      guard let interpreter = self.interpreter else {
+        self.updateResultsText(Constant.nilInterpreterErrorMessage)
+        return
+      }
+      do {
+        let shape = TensorShape(2)
+        try (0..<interpreter.inputTensorCount).forEach { index in
+          try interpreter.resizeInput(at: index, to: shape)
+        }
+        try interpreter.allocateTensors()
+        let inputs = try (0..<interpreter.inputTensorCount).map { index -> [Float32] in
+          let input = [Float32(index + 1), Float32(index + 2)]
+          let data = Data(copyingBufferOf: input)
+          try interpreter.copy(data, toInputAt: index)
+          return input
+        }
+        let resultsText = self.modelDescription + "\n\n" +
+          "Performing 3 add operations on inputs \(inputs.description) equals: "
+        self.updateResultsText(resultsText)
+        try interpreter.invoke()
+        let results = try (0..<interpreter.outputTensorCount).map { index -> [Float32] in
+          let tensor = try interpreter.output(at: index)
+          return [Float32](unsafeData: tensor.data) ?? []
+        }
+        self.updateResultsText(resultsText + results.description)
+      } catch let error {
+        self.updateResultsText(
+          "Failed to invoke the interpreter with error: \(error.localizedDescription)"
+        )
+        return
+      }
+    }
+  }
+
+  private func updateResultsText(_ text: String? = nil) {
+    safeDispatchOnMain { self.resultsTextView.text = text }
+  }
+}
+
+// MARK: - Constants
+
+private enum Constant {
+  static let dispatchQueueLabel = "TensorFlowLiteInterpreterQueue"
+  static let nilInterpreterErrorMessage =
+    "Failed to invoke the interpreter because the interpreter was nil."
+}
+
+/// Models that can be loaded by the TensorFlow Lite `Interpreter`.
+private enum Model: Int, CustomStringConvertible {
+  /// A float model that performs two add operations on one input tensor and returns the result in
+  /// one output tensor.
+  case add = 0
+  /// A quantized model that performs two add operations on one input tensor and returns the result
+  /// in one output tensor.
+  case addQuantized = 1
+  /// A float model that performs three add operations on four input tensors and returns the results
+  /// in 2 output tensors.
+  case multiAdd = 2
+
+  var fileInfo: (name: String, extension: String) {
+    switch self {
+    case .add:
+      return Add.fileInfo
+    case .addQuantized:
+      return AddQuantized.fileInfo
+    case .multiAdd:
+      return MultiAdd.fileInfo
+    }
+  }
+
+  // MARK: - CustomStringConvertible
+
+  var description: String {
+    switch self {
+    case .add:
+      return Add.name
+    case .addQuantized:
+      return AddQuantized.name
+    case .multiAdd:
+      return MultiAdd.name
+    }
+  }
+}
+
+/// Values for the `Add` model.
+private enum Add {
+  static let name = "Add"
+  static let fileInfo = (name: "add", extension: "bin")
+}
+
+/// Values for the `AddQuantized` model.
+private enum AddQuantized {
+  static let name = "AddQuantized"
+  static let fileInfo = (name: "add_quantized", extension: "bin")
+}
+
+/// Values for the `MultiAdd` model.
+private enum MultiAdd {
+  static let name = "MultiAdd"
+  static let fileInfo = (name: "multi_add", extension: "bin")
+}
+
+// MARK: - Fileprivate
+
+/// Safely dispatches the given block on the main queue. If the current thread is `main`, the block
+/// is executed synchronously; otherwise, the block is executed asynchronously on the main thread.
+fileprivate func safeDispatchOnMain(_ block: @escaping () -> Void) {
+  if Thread.isMainThread { block(); return }
+  DispatchQueue.main.async { block() }
+}

diff --git a/tensorflow/lite/experimental/swift/Tests/InterpreterOptionsTests.swift b/tensorflow/lite/experimental/swift/Tests/InterpreterOptionsTests.swift
new file mode 100644
index 0000000..54b4f59
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Tests/InterpreterOptionsTests.swift

@@ -0,0 +1,54 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+@testable import TensorFlowLite
+import XCTest
+
+class InterpreterOptionsTests: XCTestCase {
+
+  func testInterpreterOptions_InitWithDefaultValues() {
+    let options = InterpreterOptions()
+    XCTAssertNil(options.threadCount)
+    XCTAssertFalse(options.isErrorLoggingEnabled)
+  }
+
+  func testInterpreterOptions_InitWithCustomValues() {
+    var options = InterpreterOptions()
+    options.threadCount = 2
+    XCTAssertEqual(options.threadCount, 2)
+    options.isErrorLoggingEnabled = true
+    XCTAssertTrue(options.isErrorLoggingEnabled)
+  }
+
+  func testInterpreterOptions_Equatable() {
+    var options1 = InterpreterOptions()
+    var options2 = InterpreterOptions()
+    XCTAssertEqual(options1, options2)
+
+    options1.threadCount = 2
+    options2.threadCount = 2
+    XCTAssertEqual(options1, options2)
+
+    options2.threadCount = 3
+    XCTAssertNotEqual(options1, options2)
+    options2.threadCount = 2
+
+    options1.isErrorLoggingEnabled = true
+    options2.isErrorLoggingEnabled = true
+    XCTAssertEqual(options1, options2)
+
+    options2.isErrorLoggingEnabled = false
+    XCTAssertNotEqual(options1, options2)
+  }
+}

diff --git a/tensorflow/lite/experimental/swift/Tests/InterpreterTests.swift b/tensorflow/lite/experimental/swift/Tests/InterpreterTests.swift
new file mode 100644
index 0000000..e98da5f
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Tests/InterpreterTests.swift

@@ -0,0 +1,315 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+@testable import TensorFlowLite
+import XCTest
+
+class InterpreterTests: XCTestCase {
+
+  var interpreter: Interpreter!
+
+  override func setUp() {
+    super.setUp()
+
+    interpreter = try! Interpreter(modelPath: AddModel.path)
+  }
+
+  override func tearDown() {
+    interpreter = nil
+
+    super.tearDown()
+  }
+
+  func testInterpreter_InitWithModelPath() {
+    XCTAssertNoThrow(try Interpreter(modelPath: AddModel.path))
+  }
+
+  func testInterpreter_Init_ThrowsFailedToLoadModel() {
+    XCTAssertThrowsError(try Interpreter(modelPath: "/invalid/path")) { error in
+      self.assertEqualErrors(actual: error, expected: .failedToLoadModel)
+    }
+  }
+
+  func testInterpreter_InitWithModelPathAndOptions() {
+    var options = InterpreterOptions()
+    options.threadCount = 2
+    XCTAssertNoThrow(try Interpreter(modelPath: AddModel.path, options: options))
+  }
+
+  func testInterpreter_InputTensorCount() {
+    XCTAssertEqual(interpreter.inputTensorCount, AddModel.inputTensorCount)
+  }
+
+  func testInterpreter_OutputTensorCount() {
+    XCTAssertEqual(interpreter.outputTensorCount, AddModel.outputTensorCount)
+  }
+
+  func testInterpreter_Invoke() throws {
+    try interpreter.allocateTensors()
+    XCTAssertNoThrow(try interpreter.invoke())
+  }
+
+  func testInterpreter_Invoke_ThrowsAllocateTensorsRequired_ModelNotReady() {
+    XCTAssertThrowsError(try interpreter.invoke()) { error in
+      self.assertEqualErrors(actual: error, expected: .allocateTensorsRequired)
+    }
+  }
+
+  func testInterpreter_InputTensorAtIndex() throws {
+    try setUpAddModelInputTensor()
+    let inputTensor = try interpreter.input(at: AddModel.validIndex)
+    XCTAssertEqual(inputTensor, AddModel.inputTensor)
+  }
+
+  func testInterpreter_InputTensorAtIndex_QuantizedModel() throws {
+    interpreter = try Interpreter(modelPath: AddQuantizedModel.path)
+    try setUpAddQuantizedModelInputTensor()
+    let inputTensor = try interpreter.input(at: AddQuantizedModel.inputOutputIndex)
+    XCTAssertEqual(inputTensor, AddQuantizedModel.inputTensor)
+  }
+
+  func testInterpreter_InputTensorAtIndex_ThrowsInvalidIndex() throws {
+    try interpreter.allocateTensors()
+    XCTAssertThrowsError(try interpreter.input(at: AddModel.invalidIndex)) { error in
+      let maxIndex = AddModel.inputTensorCount - 1
+      self.assertEqualErrors(
+        actual: error,
+        expected: .invalidTensorIndex(index: AddModel.invalidIndex, maxIndex: maxIndex)
+      )
+    }
+  }
+
+  func testInterpreter_InputTensorAtIndex_ThrowsAllocateTensorsRequired() {
+    XCTAssertThrowsError(try interpreter.input(at: AddModel.validIndex)) { error in
+      self.assertEqualErrors(actual: error, expected: .allocateTensorsRequired)
+    }
+  }
+
+  func testInterpreter_OutputTensorAtIndex() throws {
+    try setUpAddModelInputTensor()
+    try interpreter.invoke()
+    let outputTensor = try interpreter.output(at: AddModel.validIndex)
+    XCTAssertEqual(outputTensor, AddModel.outputTensor)
+    let expectedResults = [Float32](unsafeData: outputTensor.data)
+    XCTAssertEqual(expectedResults, AddModel.results)
+  }
+
+  func testInterpreter_OutputTensorAtIndex_QuantizedModel() throws {
+    interpreter = try Interpreter(modelPath: AddQuantizedModel.path)
+    try setUpAddQuantizedModelInputTensor()
+    try interpreter.invoke()
+    let outputTensor = try interpreter.output(at: AddQuantizedModel.inputOutputIndex)
+    XCTAssertEqual(outputTensor, AddQuantizedModel.outputTensor)
+    let expectedResults = [UInt8](outputTensor.data)
+    XCTAssertEqual(expectedResults, AddQuantizedModel.results)
+  }
+
+  func testInterpreter_OutputTensorAtIndex_ThrowsInvalidIndex() throws {
+    try interpreter.allocateTensors()
+    try interpreter.invoke()
+    XCTAssertThrowsError(try interpreter.output(at: AddModel.invalidIndex)) { error in
+      let maxIndex = AddModel.outputTensorCount - 1
+      self.assertEqualErrors(
+        actual: error,
+        expected: .invalidTensorIndex(index: AddModel.invalidIndex, maxIndex: maxIndex)
+      )
+    }
+  }
+
+  func testInterpreter_OutputTensorAtIndex_ThrowsInvokeInterpreterRequired() {
+    XCTAssertThrowsError(try interpreter.output(at: AddModel.validIndex)) { error in
+      self.assertEqualErrors(actual: error, expected: .invokeInterpreterRequired)
+    }
+  }
+
+  func testInterpreter_ResizeInputTensorAtIndexToShape() {
+    XCTAssertNoThrow(try interpreter.resizeInput(at: AddModel.validIndex, to: [2, 2, 3]))
+    XCTAssertNoThrow(try interpreter.allocateTensors())
+  }
+
+  func testInterpreter_ResizeInputTensorAtIndexToShape_ThrowsInvalidIndex() {
+    XCTAssertThrowsError(try interpreter.resizeInput(
+      at: AddModel.invalidIndex,
+      to: [2, 2, 3]
+    )) { error in
+      let maxIndex = AddModel.inputTensorCount - 1
+      self.assertEqualErrors(
+        actual: error,
+        expected: .invalidTensorIndex(index: AddModel.invalidIndex, maxIndex: maxIndex)
+      )
+    }
+  }
+
+  func testInterpreter_CopyDataToInputTensorAtIndex() throws {
+    try interpreter.resizeInput(at: AddModel.validIndex, to: AddModel.shape)
+    try interpreter.allocateTensors()
+    let inputTensor = try interpreter.copy(AddModel.inputData, toInputAt: AddModel.validIndex)
+    XCTAssertEqual(inputTensor.data, AddModel.inputData)
+  }
+
+  func testInterpreter_CopyDataToInputTensorAtIndex_ThrowsInvalidIndex() {
+    XCTAssertThrowsError(try interpreter.copy(
+      AddModel.inputData,
+      toInputAt: AddModel.invalidIndex
+    )) { error in
+      let maxIndex = AddModel.inputTensorCount - 1
+      self.assertEqualErrors(
+        actual: error,
+        expected: .invalidTensorIndex(index: AddModel.invalidIndex, maxIndex: maxIndex)
+      )
+    }
+  }
+
+  func testInterpreter_CopyDataToInputTensorAtIndex_ThrowsInvalidDataCount() throws {
+    try interpreter.resizeInput(at: AddModel.validIndex, to: AddModel.shape)
+    try interpreter.allocateTensors()
+    let invalidData = Data(count: AddModel.dataCount - 1)
+    XCTAssertThrowsError(try interpreter.copy(
+      invalidData,
+      toInputAt: AddModel.validIndex
+    )) { error in
+      self.assertEqualErrors(
+        actual: error,
+        expected: .invalidTensorDataCount(provided: invalidData.count, required: AddModel.dataCount)
+      )
+    }
+  }
+
+  func testInterpreter_AllocateTensors() {
+    XCTAssertNoThrow(try interpreter.allocateTensors())
+  }
+
+  // MARK: - Private
+
+  private func setUpAddModelInputTensor() throws {
+    precondition(interpreter != nil)
+    try interpreter.resizeInput(at: AddModel.validIndex, to: AddModel.shape)
+    try interpreter.allocateTensors()
+    try interpreter.copy(AddModel.inputData, toInputAt: AddModel.validIndex)
+  }
+
+  private func setUpAddQuantizedModelInputTensor() throws {
+    precondition(interpreter != nil)
+    try interpreter.resizeInput(at: AddQuantizedModel.inputOutputIndex, to: AddQuantizedModel.shape)
+    try interpreter.allocateTensors()
+    try interpreter.copy(AddQuantizedModel.inputData, toInputAt: AddQuantizedModel.inputOutputIndex)
+  }
+
+  private func assertEqualErrors(actual: Error, expected: InterpreterError) {
+    guard let actual = actual as? InterpreterError else {
+      XCTFail("Actual error should be of type InterpreterError.")
+      return
+    }
+    XCTAssertEqual(actual, expected)
+  }
+}
+
+// MARK: - Constants
+
+/// Values for the `add.bin` model.
+private enum AddModel {
+  static let info = (name: "add", extension: "bin")
+  static let inputTensorCount = 1
+  static let outputTensorCount = 1
+  static let invalidIndex = 1
+  static let validIndex = 0
+  static let shape: TensorShape = [2]
+  static let dataCount = inputData.count
+  static let inputData = Data(copyingBufferOf: [Float32(1.0), Float32(3.0)])
+  static let outputData = Data(copyingBufferOf: [Float32(3.0), Float32(9.0)])
+  static let results = [Float32(3.0), Float32(9.0)]
+
+  static let inputTensor = Tensor(
+    name: "input",
+    dataType: .float32,
+    shape: shape,
+    data: inputData
+  )
+  static let outputTensor = Tensor(
+    name: "output",
+    dataType: .float32,
+    shape: shape,
+    data: outputData
+  )
+
+  static var path: String = {
+    let bundle = Bundle(for: InterpreterTests.self)
+    guard let path = bundle.path(forResource: info.name, ofType: info.extension) else { return "" }
+    return path
+  }()
+}
+
+/// Values for the `add_quantized.bin` model.
+private enum AddQuantizedModel {
+  static let info = (name: "add_quantized", extension: "bin")
+  static let inputOutputIndex = 0
+  static let shape: TensorShape = [2]
+  static let inputData = Data([1, 3])
+  static let outputData = Data([3, 9])
+  static let quantizationParameters = QuantizationParameters(scale: 0.003922, zeroPoint: 0)
+  static let results: [UInt8] = [3, 9]
+
+  static let inputTensor = Tensor(
+    name: "input",
+    dataType: .uInt8,
+    shape: shape,
+    data: inputData,
+    quantizationParameters: quantizationParameters
+  )
+  static let outputTensor = Tensor(
+    name: "output",
+    dataType: .uInt8,
+    shape: shape,
+    data: outputData,
+    quantizationParameters: quantizationParameters
+  )
+
+  static var path: String = {
+    let bundle = Bundle(for: InterpreterTests.self)
+    guard let path = bundle.path(forResource: info.name, ofType: info.extension) else { return "" }
+    return path
+  }()
+}
+
+// MARK: - Extensions
+
+extension Array {
+  /// Creates a new array from the bytes of the given unsafe data.
+  ///
+  /// - Note: Returns `nil` if `unsafeData.count` is not a multiple of
+  ///     `MemoryLayout<Element>.stride`.
+  /// - Parameter unsafeData: The data containing the bytes to turn into an array.
+  init?(unsafeData: Data) {
+    guard unsafeData.count % MemoryLayout<Element>.stride == 0 else { return nil }
+    let elements = unsafeData.withUnsafeBytes {
+      UnsafeBufferPointer<Element>(
+        start: $0,
+        count: unsafeData.count / MemoryLayout<Element>.stride
+      )
+    }
+    self.init(elements)
+  }
+}
+
+extension Data {
+  /// Creates a new buffer by copying the buffer pointer of the given array.
+  ///
+  /// - Warning: The given array's element type `T` must be trivial in that it can be copied bit
+  ///     for bit with no indirection or reference-counting operations; otherwise, reinterpreting
+  ///     data from the resulting buffer has undefined behavior.
+  /// - Parameter array: An array with elements of type `T`.
+  init<T>(copyingBufferOf array: [T]) {
+    self = array.withUnsafeBufferPointer(Data.init)
+  }
+}

diff --git a/tensorflow/lite/experimental/swift/Tests/ModelTests.swift b/tensorflow/lite/experimental/swift/Tests/ModelTests.swift
new file mode 100644
index 0000000..025db18
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Tests/ModelTests.swift

@@ -0,0 +1,59 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+@testable import TensorFlowLite
+import XCTest
+
+class ModelTests: XCTestCase {
+
+  var modelPath: String!
+
+  override func setUp() {
+    super.setUp()
+
+    let bundle = Bundle(for: type(of: self))
+    guard let modelPath = bundle.path(
+            forResource: Constant.modelInfo.name,
+            ofType: Constant.modelInfo.extension)
+    else {
+      XCTFail("Failed to get the model file path.")
+      return
+    }
+    self.modelPath = modelPath
+  }
+
+  override func tearDown() {
+    modelPath = nil
+
+    super.tearDown()
+  }
+
+  func testModel_InitWithFilePath() {
+    XCTAssertNotNil(Model(filePath: modelPath))
+  }
+
+  func testModel_InitWithEmptyFilePath_FailsInitialization() {
+    XCTAssertNil(Model(filePath: ""))
+  }
+
+  func testModel_InitWithInvalidFilePath_FailsInitialization() {
+    XCTAssertNil(Model(filePath: "invalid/path"))
+  }
+}
+
+// MARK: - Constants
+
+private enum Constant {
+  static let modelInfo = (name: "add", extension: "bin")
+}

diff --git a/tensorflow/lite/experimental/swift/Tests/QuantizationParametersTests.swift b/tensorflow/lite/experimental/swift/Tests/QuantizationParametersTests.swift
new file mode 100644
index 0000000..65648c2
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Tests/QuantizationParametersTests.swift

@@ -0,0 +1,43 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+@testable import TensorFlowLite
+import XCTest
+
+class QuantizationParametersTests: XCTestCase {
+
+  func testQuantizationParameters_InitWithCustomValues() {
+    let parameters = QuantizationParameters(scale: 0.5, zeroPoint: 1)
+    XCTAssertEqual(parameters.scale, 0.5)
+    XCTAssertEqual(parameters.zeroPoint, 1)
+  }
+
+  func testQuantizationParameters_Equatable() {
+    let parameters1 = QuantizationParameters(scale: 0.5, zeroPoint: 1)
+    let parameters2 = QuantizationParameters(scale: 0.5, zeroPoint: 1)
+    XCTAssertEqual(parameters1, parameters2)
+
+    let parameters3 = QuantizationParameters(scale: 0.4, zeroPoint: 1)
+    XCTAssertNotEqual(parameters1, parameters3)
+    XCTAssertNotEqual(parameters2, parameters3)
+  }
+}
+
+// MARK: - Extensions
+
+extension QuantizationParameters: Equatable {
+  public static func == (lhs: QuantizationParameters, rhs: QuantizationParameters) -> Bool {
+    return lhs.scale == rhs.scale && lhs.zeroPoint == rhs.zeroPoint
+  }
+}

diff --git a/tensorflow/lite/experimental/swift/Tests/TensorTests.swift b/tensorflow/lite/experimental/swift/Tests/TensorTests.swift
new file mode 100644
index 0000000..4540043
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/Tests/TensorTests.swift

@@ -0,0 +1,83 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+@testable import TensorFlowLite
+import XCTest
+
+class TensorTests: XCTestCase {
+
+  // MARK: - Tensor
+
+  func testTensor_Init() {
+    let name = "InputTensor"
+    let dataType: TensorDataType = .uInt8
+    let shape = TensorShape(Constant.dimensions)
+    guard let data = name.data(using: .utf8) else { XCTFail("Data should not be nil."); return }
+    let quantizationParameters = QuantizationParameters(scale: 0.5, zeroPoint: 1)
+    let inputTensor = Tensor(
+      name: name,
+      dataType: dataType,
+      shape: shape,
+      data: data,
+      quantizationParameters: quantizationParameters
+    )
+    XCTAssertEqual(inputTensor.name, name)
+    XCTAssertEqual(inputTensor.dataType, dataType)
+    XCTAssertEqual(inputTensor.shape, shape)
+    XCTAssertEqual(inputTensor.data, data)
+    XCTAssertEqual(inputTensor.quantizationParameters, quantizationParameters)
+  }
+
+  // MARK: - TensorShape
+
+  func testTensorShape_InitWithArray() {
+    let shape = TensorShape(Constant.dimensions)
+    XCTAssertEqual(shape.rank, Constant.dimensions.count)
+    XCTAssertEqual(shape.dimensions, Constant.dimensions)
+  }
+
+  func testTensorShape_InitWithElements() {
+    let shape = TensorShape(2, 2, 3)
+    XCTAssertEqual(shape.rank, Constant.dimensions.count)
+    XCTAssertEqual(shape.dimensions, Constant.dimensions)
+  }
+
+  func testTensorShape_InitWithArrayLiteral() {
+    let shape: TensorShape = [2, 2, 3]
+    XCTAssertEqual(shape.rank, Constant.dimensions.count)
+    XCTAssertEqual(shape.dimensions, Constant.dimensions)
+  }
+}
+
+// MARK: - Constants
+
+private enum Constant {
+  /// Array of 2 arrays of 2 arrays of 3 numbers: [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]].
+  static let dimensions = [2, 2, 3]
+}
+
+// MARK: - Extensions
+
+extension TensorShape: Equatable {
+  public static func == (lhs: TensorShape, rhs: TensorShape) -> Bool {
+    return lhs.rank == rhs.rank && lhs.dimensions == rhs.dimensions
+  }
+}
+
+extension Tensor: Equatable {
+  public static func == (lhs: Tensor, rhs: Tensor) -> Bool {
+    return lhs.name == rhs.name && lhs.dataType == rhs.dataType && lhs.shape == rhs.shape &&
+           lhs.data == rhs.data && lhs.quantizationParameters == rhs.quantizationParameters
+  }
+}

diff --git a/tensorflow/lite/experimental/writer/enum_mapping.h b/tensorflow/lite/experimental/writer/enum_mapping.h
index cb6ec3e..949a255 100644
--- a/tensorflow/lite/experimental/writer/enum_mapping.h
+++ b/tensorflow/lite/experimental/writer/enum_mapping.h

@@ -112,5 +112,29 @@
   }
 }
 
+inline MirrorPadMode MirrorPaddingModeToSchema(TfLiteMirrorPaddingMode mode) {
+  switch (mode) {
+    case kTfLiteMirrorPaddingUnknown:
+      return MirrorPadMode_REFLECT;  // TODO(aselle): consider an error
+    case kTfLiteMirrorPaddingReflect:
+      return MirrorPadMode_REFLECT;
+    case kTfLiteMirrorPaddingSymmetric:
+      return MirrorPadMode_SYMMETRIC;
+  }
+}
+
+inline CombinerType CombinerTypeToSchema(TfLiteCombinerType type) {
+  switch (type) {
+    case kTfLiteCombinerTypeSum:
+      return CombinerType_SUM;
+    case kTfLiteCombinerTypeMean:
+      return CombinerType_MEAN;
+    case kTfLiteCombinerTypeSqrtn:
+      return CombinerType_SQRTN;
+  }
+}
+
+// int
+
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_EXPERIMENTAL_WRITER_ENUM_MAPPING_H_

diff --git a/tensorflow/lite/experimental/writer/option_writer_generator.cc b/tensorflow/lite/experimental/writer/option_writer_generator.cc
index 7381344..264a238 100644
--- a/tensorflow/lite/experimental/writer/option_writer_generator.cc
+++ b/tensorflow/lite/experimental/writer/option_writer_generator.cc

@@ -22,54 +22,59 @@
 namespace tflite {
 namespace {
 // This is generated by grepping
-//  cat  third_party/tensorflow/lite/builtin_op_data.h
-//| grep "^} TfLite" | sed 's/^} TfLite\(.*\)Params;/\1Params/g' | grep -v "^}"
-static const char* param_structs[] = {"TfLiteConvParams",
-                                      "TfLitePoolParams",
-                                      "TfLiteDepthwiseConvParams",
-                                      "TfLiteSVDFParams",
-                                      "TfLiteRNNParams",
-                                      "TfLiteSequenceRNNParams",
-                                      "TfLiteFullyConnectedParams",
-                                      "TfLiteLSHProjectionParams",
-                                      "TfLiteSoftmaxParams",
-                                      "TfLiteConcatenationParams",
-                                      "TfLiteAddParams",
-                                      "TfLiteSpaceToBatchNDParams",
+//  cat  third_party/tensorflow/lite/c/builtin_op_data.h | grep "^} TfLite" |
+//  sed 's/^} \(TfLite.*\)Params;/\1Params/g' | grep -v "^}" | sed
+//  's/\(.*\)/"\1",/g' | sort
+static const char* param_structs[] = {"TfLiteAddParams",
+                                      "TfLiteArgMaxParams",
+                                      "TfLiteArgMinParams",
                                       "TfLiteBatchToSpaceNDParams",
-                                      "TfLiteMulParams",
-                                      "TfLiteSubParams",
+                                      "TfLiteBidirectionalSequenceLSTMParams",
+                                      "TfLiteBidirectionalSequenceRNNParams",
+                                      "TfLiteCastParams",
+                                      "TfLiteConcatenationParams",
+                                      "TfLiteConvParams",
+                                      "TfLiteDepthwiseConvParams",
                                       "TfLiteDivParams",
+                                      "TfLiteEmbeddingLookupSparseParams",
+                                      "TfLiteFakeQuantParams",
+                                      "TfLiteFullyConnectedParams",
+                                      "TfLiteGatherParams",
                                       "TfLiteL2NormParams",
+                                      "TfLiteLeakyReluParams",
                                       "TfLiteLocalResponseNormParams",
+                                      "TfLiteLSHProjectionParams",
                                       "TfLiteLSTMParams",
-                                      "TfLiteResizeBilinearParams",
-                                      "TfLiteResizeNearestNeighborParams",
+                                      "TfLiteMirrorPaddingParams",
+                                      "TfLiteMulParams",
+                                      "TfLiteOneHotParams",
+                                      "TfLitePackParams",
                                       "TfLitePadParams",
                                       "TfLitePadV2Params",
-                                      "TfLiteReshapeParams",
-                                      "TfLiteSkipGramParams",
-                                      "TfLiteSpaceToDepthParams",
-                                      "TfLiteCastParams",
-                                      "TfLiteEmbeddingLookupSparseParams",
-                                      "TfLiteGatherParams",
-                                      "TfLiteTransposeParams",
+                                      "TfLitePoolParams",
                                       "TfLiteReducerParams",
+                                      "TfLiteReshapeParams",
+                                      "TfLiteResizeBilinearParams",
+                                      "TfLiteResizeNearestNeighborParams",
+                                      "TfLiteRNNParams",
+                                      "TfLiteSequenceRNNParams",
+                                      "TfLiteShapeParams",
+                                      "TfLiteSkipGramParams",
+                                      "TfLiteSoftmaxParams",
+                                      "TfLiteSpaceToBatchNDParams",
+                                      "TfLiteSpaceToDepthParams",
+                                      "TfLiteSparseToDenseParams",
                                       "TfLiteSplitParams",
                                       "TfLiteSplitVParams",
                                       "TfLiteSqueezeParams",
                                       "TfLiteStridedSliceParams",
-                                      "TfLiteArgMaxParams",
-                                      "TfLiteArgMinParams",
+                                      "TfLiteSubParams",
+                                      "TfLiteSVDFParams",
                                       "TfLiteTransposeConvParams",
-                                      "TfLiteSparseToDenseParams",
-                                      "TfLiteShapeParams",
-                                      "TfLiteFakeQuantParams",
-                                      "TfLitePackParams",
-                                      "TfLiteOneHotParams",
-                                      "TfLiteLeakyReluParams",
-                                      "TfLiteMirrorPaddingParams",
+                                      "TfLiteTransposeParams",
+                                      "TfLiteUnidirectionalSequenceLSTMParams",
                                       "TfLiteUniqueParams",
+                                      "TfLiteUnpackParams",
                                       nullptr};
 }  // namespace
 
@@ -142,7 +147,6 @@
     op_to_option_["REDUCE_MAX"] = "ReducerOptions";
     op_to_option_["REDUCE_MIN"] = "ReducerOptions";
     op_to_option_["REDUCE_ANY"] = "ReducerOptions";
-    op_to_option_["UNPACK"] = "";
     op_to_option_["SUM"] = "ReducerOptions";
     op_to_option_["REDUCE_MAX"] = "ReducerOptions";
     op_to_option_["REDUCE_PROD"] = "ReducerOptions";
@@ -151,30 +155,26 @@
     op_to_option_["AVERAGE_POOL_2D"] = "Pool2DOptions";
     op_to_option_["MAX_POOL_2D"] = "Pool2DOptions";
     op_to_option_["L2_NORMALIZATION"] = "L2NormOptions";
-    op_to_option_["BIDIRECTIONAL_SEQUENCE_LSTM"] = "LSTMOptions";
-    op_to_option_["UNIDIRECTIONAL_SEQUENCE_LSTM"] = "LSTMOptions";
-    op_to_option_["BIDIRECTIONAL_SEQUENCE_RNN"] = "SequenceRNNOptions";
     op_to_option_["UNIDIRECTIONAL_SEQUENCE_RNN"] = "SequenceRNNOptions";
-    op_to_option_["UNIDIRECTIONAL_SEQUENCE_RNN"] = "SequenceRNNOptions";
-    op_to_option_["MIRROR_PAD"] = "";  // TODO(karimnosseir): MirrorPadOptions.
-    op_to_option_["UNIQUE"] = "";      // TODO(karimnosseir): UniqueOptions.
-    // Manually specified mappings between ops and options (none)
-    op_to_option_["EMBEDDING_LOOKUP"] =
-        "";  // TODO(aselle): maybe something else.
+    op_to_option_["MAXIMUM"] = "MaximumMinimumOptions";
+    op_to_option_["MINIMUM"] = "MaximumMinimumOptions";
+    op_to_option_["CUSTOM"] = "";    // TODO(aselle): maybe something else.
+    op_to_option_["DELEGATE"] = "";  // TODO(aselle): maybe something else.
+
+    // Manually specified mappings between ops to "none" options -- these are
+    // ops without a corresponding Options message in schema as yet. If these
+    // options do get assigned an Options message in future, they need to be
+    // updated here as well.
+    op_to_option_["EMBEDDING_LOOKUP"] = "";
     op_to_option_["FLOOR"] = "";
     op_to_option_["CEIL"] = "";
-    op_to_option_["HASHTABLE_LOOKUP"] =
-        "";  // TODO(aselle): maybe something else.
+    op_to_option_["HASHTABLE_LOOKUP"] = "";
     op_to_option_["LOGISTIC"] = "";
     op_to_option_["RELU"] = "";
     op_to_option_["RELU_N1_TO_1"] = "";
     op_to_option_["RELU6"] = "";
     op_to_option_["TANH"] = "";
-    op_to_option_["CUSTOM"] = "";    // TODO(aselle): maybe something else.
-    op_to_option_["DELEGATE"] = "";  // TODO(aselle): maybe something else.
     op_to_option_["PRELU"] = "";
-    op_to_option_["MAXIMUM"] = "";  // TODO(aselle): MaximumMinimumOptions
-    op_to_option_["MINIMUM"] = "";  // TODO(aselle): MaximumMinimumOptions
     op_to_option_["SIN"] = "";
     op_to_option_["LOG"] = "";
     op_to_option_["SQRT"] = "";
@@ -186,6 +186,7 @@
     option_to_struct_["DepthwiseConv2DOptions"] = "TfLiteDepthwiseConvParams";
     option_to_struct_["LocalResponseNormalizationOptions"] =
         "TfLiteLocalResponseNormParams";
+    option_to_struct_["MirrorPadOptions"] = "TfLiteMirrorPaddingParams";
     // Now for every op, try to find an option.
     bool fatal = false;
     for (auto op_name : ops_) {
@@ -225,13 +226,15 @@
           if (!param_struct_found) {
             std::cerr << "Failed to get param struct for option " << option_name
                       << std::endl;
-            fatal = true;
           } else {
             option_to_struct_.insert(std::make_pair(option_name, params_guess));
           }
         }
       }
     }
+    if (fatal) {
+      exit(1);
+    }
   }
 
  private:
@@ -242,16 +245,28 @@
       option_to_type_function_;
 };
 
+void GenerateImportForResizeBilinearOp(FILE* fp) {
+  fprintf(fp,
+          "  case BuiltinOperator_RESIZE_BILINEAR:  {\n"
+          "    const auto* params = reinterpret_cast<const "
+          "TfLiteResizeBilinearParams*>(builtin_op_data);\n"
+          "    auto union_type = CreateResizeBilinearOptions(*fbb, "
+          "params->align_corners).Union();\n"
+          "    return std::make_pair(BuiltinOptions_ResizeBilinearOptions, "
+          "union_type);\n"
+          "  }\n  break;\n");
+}
+
 void GenerateImportForOp(FILE* fp, const std::string& op_name,
                          const std::string& option_name,
                          const std::string& option_type,
                          const flatbuffers::TypeTable* options,
                          const std::string& struct_name) {
-  // Skip tricky ones for now
-  if (struct_name == "TfLiteResizeBilinearParams") return;
-  if (struct_name == "TfLiteSqueezeParams") return;
-  if (struct_name == "TfLiteEmbeddingLookupSparseParams") return;
-  if (struct_name == "TfLiteReshapeParams") return;
+  // Special-case ResizeBilinear which has some deprecated fields.
+  if (struct_name == "TfLiteResizeBilinearParams") {
+    GenerateImportForResizeBilinearOp(fp);
+    return;
+  }
 
   fprintf(fp, "  case BuiltinOperator_%s:  {\n", op_name.c_str());
   fprintf(fp,
@@ -261,6 +276,9 @@
 
   for (size_t i = 0; i < options->num_elems; i++) {
     std::string elem_name = options->names[i];
+    bool is_int_vector = false;
+    std::string vector_name = elem_name;
+    std::string vector_size;
     // TODO(aselle): Irregular naming in builtins
     if (elem_name == "fused_activation_function")
       elem_name = "activation";
@@ -272,8 +290,26 @@
       elem_name = "dilation_height_factor";
     else if (elem_name == "dilation_w_factor")
       elem_name = "dilation_width_factor";
-    else if (elem_name == "new_shape")
-      elem_name = "shape";
+    else if (elem_name == "idx_out_type")
+      elem_name = "index_out_type";
+
+    // Vector fields treated specially.
+    if (elem_name == "new_shape") {
+      is_int_vector = true;
+      vector_name = "shape";
+      vector_size = "num_dimensions";
+    } else if (elem_name == "squeeze_dims") {
+      is_int_vector = true;
+      vector_size = "num_squeeze_dims";
+    }
+
+    if (is_int_vector) {
+      fprintf(fp,
+              "    auto val%zu = fbb->CreateVector("
+              "std::vector<int>(params->%s, params->%s + params->%s));\n",
+              i, vector_name.c_str(), vector_name.c_str(), vector_size.c_str());
+      continue;
+    }
 
     flatbuffers::TypeCode code = options->type_codes[i];
     auto contained_type = code.sequence_ref != -1
@@ -292,6 +328,10 @@
       mapper = "LSTMKernelTypeToSchema";
     } else if (contained_type == LSHProjectionTypeTypeTable) {
       mapper = "LSHProjectionTypeToSchema";
+    } else if (contained_type == MirrorPadModeTypeTable) {
+      mapper = "MirrorPaddingModeToSchema";
+    } else if (contained_type == CombinerTypeTypeTable) {
+      mapper = "CombinerTypeToSchema";
     }
 
     fprintf(fp,

diff --git a/tensorflow/lite/experimental/writer/writer_lib.cc b/tensorflow/lite/experimental/writer/writer_lib.cc
index a0ce4b7..2bdc41b 100644
--- a/tensorflow/lite/experimental/writer/writer_lib.cc
+++ b/tensorflow/lite/experimental/writer/writer_lib.cc

@@ -219,6 +219,11 @@
   std::vector<int> output;
   output.reserve(input.size());
   for (int x : input) {
+    // Special value representing an optional tensor which is not present.
+    if (x == -1) {
+      output.push_back(x);
+      continue;
+    }
     if (tensor_to_written_tensor_[x] != -1) {
       output.push_back(tensor_to_written_tensor_[x]);
     }

diff --git a/tensorflow/lite/g3doc/devguide.md b/tensorflow/lite/g3doc/devguide.md
index cbad036..7e23f1d 100644
--- a/tensorflow/lite/g3doc/devguide.md
+++ b/tensorflow/lite/g3doc/devguide.md

@@ -92,7 +92,7 @@
 
 TensorFlow models may be saved as a .pb or .pbtxt `tf.GraphDef` file. In order
 to convert the `tf.GraphDef` file to TensorFlow Lite, the model must first be
-frozen. This process invovles several file formats including the `frozen
+frozen. This process involves several file formats including the `frozen
 GraphDef`:
 
 *   `tf.GraphDef` (.pb or .pbtxt) — A protobuf that represents the TensorFlow
@@ -166,7 +166,7 @@
 troubleshooting help, and if that doesn't help, please
 [file an issue](https://github.com/tensorflow/tensorflow/issues).
 
-### Graph vizualization tool
+### Graph Visualization tool
 
 The [development repo](https://github.com/tensorflow/tensorflow) contains a tool
 to visualize TensorFlow Lite models after conversion. To build the

diff --git a/tensorflow/lite/g3doc/models/_index.yaml b/tensorflow/lite/g3doc/models/_index.yaml
index f4d8bc4..1c6f41b 100644
--- a/tensorflow/lite/g3doc/models/_index.yaml
+++ b/tensorflow/lite/g3doc/models/_index.yaml

@@ -4,7 +4,7 @@
 landing_page:
   body_class: tfo-hide-page-nav
   custom_css_path: /site-assets/css/style.css
-  show_side_navs: true
+  nav: both
   rows:
 
   # Hero

diff --git a/tensorflow/lite/g3doc/tf_ops_compatibility.md b/tensorflow/lite/g3doc/tf_ops_compatibility.md
index d7c71df..4f5def9 100644
--- a/tensorflow/lite/g3doc/tf_ops_compatibility.md
+++ b/tensorflow/lite/g3doc/tf_ops_compatibility.md

@@ -414,6 +414,18 @@
 }
 ```
 
+**GATHER_ND**
+
+```
+Inputs {
+  0: params tensor
+  1: indices tensor
+}
+Outputs {
+  0: a tensor with same type as the params tensor.
+}
+```
+
 **GREATER**
 
 ```
@@ -713,6 +725,17 @@
 }
 ```
 
+**RANK**
+
+```
+Inputs {
+  0: a tensor
+}
+Outputs {
+  0: a 0-D int32 Tensor representing the rank of input
+}
+```
+
 **RELU**
 
 ```
@@ -1000,6 +1023,22 @@
 }
 ```
 
+**WHERE**
+
+```
+Inputs {
+  0: A tensor of type bool.
+  1: A tensor which may have the same shape as condition. If condition is rank
+     1, x may have higher rank, but its first dimension must match the size of
+     condition.
+  2: A tensor with the same shape and type as x.
+}
+Outputs {
+  0: A tensor with the same type and shape as x, y if they are non-None, or
+     a tensor with shape (num_true, dim_size(condition)).
+}
+```
+
 **ZEROS_LIKE**
 
 ```

diff --git a/tensorflow/lite/g3doc/using_select_tf_ops.md b/tensorflow/lite/g3doc/using_select_tf_ops.md
index 1c5f23b..7c1ad20 100644
--- a/tensorflow/lite/g3doc/using_select_tf_ops.md
+++ b/tensorflow/lite/g3doc/using_select_tf_ops.md

@@ -49,7 +49,7 @@
 limitations.
 
 The following example shows how to use `target_ops` in the
-[`TFLiteConverter`](./convert/python_api) Python API.
+[`TFLiteConverter`](./convert/python_api.md) Python API.
 
 ```
 import tensorflow as tf
@@ -150,8 +150,8 @@
 added to
 `tensorflow/lite/examples/ios/camera/tflite_camera_example_with_select_tf_ops.xcodeproj`.
 
-To use this feature in a your own project, either clone the example project or
-set the project settings for a new or existing project to the following:
+To use this feature in your own project, either clone the example project or set
+the project settings for a new or existing project to the following:
 
 *   In Build Phases -> Link Binary With Libraries, add the static libraries
     under `tensorflow/contrib/makefile/gen/lib/` directory:

diff --git a/tensorflow/lite/graph_info.cc b/tensorflow/lite/graph_info.cc
index 1cec0d0..a909192 100644
--- a/tensorflow/lite/graph_info.cc
+++ b/tensorflow/lite/graph_info.cc

@@ -14,6 +14,7 @@
 ==============================================================================*/
 #include "tensorflow/lite/graph_info.h"
 #include <algorithm>
+#include "tensorflow/lite/c/c_api_internal.h"
 
 namespace tflite {
 
@@ -94,6 +95,10 @@
     // been identified.
     for (int output_index : info_->outputs()) {
       int output_epoch = tensor_epochs_[output_index];
+      if (output_epoch == kEpochAlwaysReady) {
+        // This happens when an input of subgraph is also an output of subgraph.
+        continue;
+      }
       NodeSubset& output_subset = (*node_subsets_)[output_epoch];
       output_subset.output_tensors.push_back(output_index);
     }
@@ -138,7 +143,8 @@
     // See if all dependencies of this node are already assigned to a
     // node sub set.
     for (int input_tensor_index : TfLiteIntArrayView(node.inputs)) {
-      if (tensor_epochs_[input_tensor_index] == kEpochNotReady) {
+      if (input_tensor_index != kOptionalTensor &&
+          tensor_epochs_[input_tensor_index] == kEpochNotReady) {
         return false;
       }
     }
@@ -162,6 +168,9 @@
       // Look at our inputs one more time to update that tensor's
       // epochs' outputs
       for (int input_tensor_index : TfLiteIntArrayView(node.inputs)) {
+        if (input_tensor_index == kOptionalTensor) {
+          continue;
+        }
         int input_epoch = tensor_epochs_[input_tensor_index];
         int node_epoch = current_epoch;
         if (input_epoch != node_epoch) {

diff --git a/tensorflow/lite/graph_info_test.cc b/tensorflow/lite/graph_info_test.cc
index 4d8bbdc..b72728a 100644
--- a/tensorflow/lite/graph_info_test.cc
+++ b/tensorflow/lite/graph_info_test.cc

@@ -101,7 +101,7 @@
 }
 
 // Test an empty trivial graph with no partitions.
-TEST(PartitionTest, Nodes0_PartitionNodes0) {
+TEST(PartitionTest, Nodes0PartitionNodes0) {
   SimpleTestGraph graph;
   std::vector<int> nodes_to_partition = {};
   std::vector<NodeSubset> generated_subgraphs;
@@ -109,6 +109,20 @@
   CheckPartitionSubgraphs(generated_subgraphs, {});
 }
 
+// Test a trivial graph with no node and only 1 tensor.
+// The tensor is input & output of the graph at the same time.
+// Note: This is a regression test to ensure the partitioning logic
+// handles this case without crashing.
+TEST(PartitionTest, Nodes0PartitionNodes0Tensors1) {
+  SimpleTestGraph graph;
+  graph.AddTensors(1);
+  graph.SetInputsAndOutputs({0}, {0});
+  std::vector<int> nodes_to_partition = {};
+  std::vector<NodeSubset> generated_subgraphs;
+  PartitionGraph(graph, nodes_to_partition, &generated_subgraphs);
+  CheckPartitionSubgraphs(generated_subgraphs, {});
+}
+
 // Test a 1 node graph with no partitions.
 // Input: tensor(0) -> node(0) -> tensor(1), nodes_to_partition=[]
 // Output: [kTfNoPartition, tensor(0) -> node(0) -> tensor(1)]

diff --git a/tensorflow/lite/interpreter.cc b/tensorflow/lite/interpreter.cc
index c840c9a..75a23de 100644
--- a/tensorflow/lite/interpreter.cc
+++ b/tensorflow/lite/interpreter.cc

@@ -223,8 +223,6 @@
 }
 
 TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
-  // TODO(ycling): It seems Flex delegate doesn't work on non-primary subgraphs.
-  // Need to investigate.
   for (auto& subgraph : subgraphs_) {
     TF_LITE_ENSURE_OK(context_, subgraph->ModifyGraphWithDelegate(delegate));
   }

diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
index 725bb32..16cca45 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java

@@ -107,7 +107,7 @@
       throw new IllegalArgumentException(
           "Null inputs are allowed only if the Tensor is bound to a buffer handle.");
     }
-    throwExceptionIfTypeIsIncompatible(src);
+    throwIfDataIsIncompatible(src);
     if (isByteBuffer(src)) {
       ByteBuffer srcBuffer = (ByteBuffer) src;
       // For direct ByteBuffer instances we support zero-copy. Note that this assumes the caller
@@ -138,7 +138,7 @@
       throw new IllegalArgumentException(
           "Null outputs are allowed only if the Tensor is bound to a buffer handle.");
     }
-    throwExceptionIfTypeIsIncompatible(dst);
+    throwIfDataIsIncompatible(dst);
     if (dst instanceof ByteBuffer) {
       ByteBuffer dstByteBuffer = (ByteBuffer) dst;
       dstByteBuffer.put(buffer());
@@ -159,6 +159,7 @@
     if (isByteBuffer(input)) {
       return null;
     }
+    throwIfTypeIsIncompatible(input);
     int[] inputShape = computeShapeOf(input);
     if (Arrays.equals(shapeCopy, inputShape)) {
       return null;
@@ -243,7 +244,27 @@
     }
   }
 
-  private void throwExceptionIfTypeIsIncompatible(Object o) {
+  private void throwIfDataIsIncompatible(Object o) {
+    throwIfTypeIsIncompatible(o);
+    throwIfShapeIsIncompatible(o);
+  }
+
+  private void throwIfTypeIsIncompatible(Object o) {
+    // ByteBuffer payloads can map to any type, so exempt it from the check.
+    if (isByteBuffer(o)) {
+      return;
+    }
+    DataType oType = dataTypeOf(o);
+    if (oType != dtype) {
+      throw new IllegalArgumentException(
+          String.format(
+              "Cannot convert between a TensorFlowLite tensor with type %s and a Java "
+                  + "object of type %s (which is compatible with the TensorFlowLite type %s).",
+              dtype, o.getClass().getName(), oType));
+    }
+  }
+
+  private void throwIfShapeIsIncompatible(Object o) {
     if (isByteBuffer(o)) {
       ByteBuffer oBuffer = (ByteBuffer) o;
       if (oBuffer.capacity() != numBytes()) {
@@ -255,15 +276,6 @@
       }
       return;
     }
-    DataType oType = dataTypeOf(o);
-    if (oType != dtype) {
-      throw new IllegalArgumentException(
-          String.format(
-              "Cannot convert between a TensorFlowLite tensor with type %s and a Java "
-                  + "object of type %s (which is compatible with the TensorFlowLite type %s).",
-              dtype, o.getClass().getName(), oType));
-    }
-
     int[] oShape = computeShapeOf(o);
     if (!Arrays.equals(oShape, shapeCopy)) {
       throw new IllegalArgumentException(

diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
index c5496e3..ff33256 100644
--- a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
+++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java

@@ -21,6 +21,7 @@
 import java.io.File;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
+import java.nio.FloatBuffer;
 import java.nio.MappedByteBuffer;
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
@@ -248,6 +249,18 @@
   }
 
   @Test
+  public void testRunWithUnsupportedInputType() {
+    FloatBuffer floatBuffer = FloatBuffer.allocate(10);
+    float[][][][] parsedOutputs = new float[2][8][8][3];
+    try (Interpreter interpreter = new Interpreter(MODEL_FILE)) {
+      interpreter.run(floatBuffer, parsedOutputs);
+      fail();
+    } catch (IllegalArgumentException e) {
+      assertThat(e).hasMessageThat().contains("DataType error: cannot resolve DataType of");
+    }
+  }
+
+  @Test
   public void testRunWithWrongOutputType() {
     Interpreter interpreter = new Interpreter(MODEL_FILE);
     float[] oneD = {1.23f, 6.54f, 7.81f};

diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD
index 90e4d82..c24b6ed 100644
--- a/tensorflow/lite/kernels/BUILD
+++ b/tensorflow/lite/kernels/BUILD

@@ -180,6 +180,7 @@
         "floor_mod.cc",
         "fully_connected.cc",
         "gather.cc",
+        "gather_nd.cc",
         "hashtable_lookup.cc",
         "if.cc",
         "l2norm.cc",
@@ -198,6 +199,7 @@
         "pooling.cc",
         "pow.cc",
         "range.cc",
+        "rank.cc",
         "reduce.cc",
         "reshape.cc",
         "resize_bilinear.cc",
@@ -225,6 +227,8 @@
         "unidirectional_sequence_rnn.cc",
         "unique.cc",
         "unpack.cc",
+        "where.cc",
+        "while.cc",
         "zeros_like.cc",
     ],
     hdrs = [
@@ -716,6 +720,19 @@
 )
 
 tf_cc_test(
+    name = "gather_nd_test",
+    size = "small",
+    srcs = ["gather_nd_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+tf_cc_test(
     name = "topk_v2_test",
     size = "small",
     srcs = ["topk_v2_test.cc"],
@@ -1082,6 +1099,19 @@
 )
 
 tf_cc_test(
+    name = "rank_test",
+    size = "small",
+    srcs = ["rank_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+tf_cc_test(
     name = "pow_test",
     size = "small",
     srcs = ["pow_test.cc"],
@@ -1159,6 +1189,19 @@
 )
 
 tf_cc_test(
+    name = "where_test",
+    size = "small",
+    srcs = ["where_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/lite:builtin_op_data",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+tf_cc_test(
     name = "zeros_like_test",
     size = "small",
     srcs = ["zeros_like_test.cc"],
@@ -1218,6 +1261,24 @@
     deps = [
         ":builtin_ops",
         ":kernel_util",
+        ":subgraph_test_util",
+        ":test_util",
+        "//tensorflow/lite:builtin_op_data",
+        "//tensorflow/lite:framework",
+        "@com_google_googletest//:gtest",
+        "@flatbuffers",
+    ],
+)
+
+tf_cc_test(
+    name = "while_test",
+    size = "small",
+    srcs = ["while_test.cc"],
+    tags = ["tflite_not_portable_ios"],
+    deps = [
+        ":builtin_ops",
+        ":kernel_util",
+        ":subgraph_test_util",
         "//tensorflow/lite:builtin_op_data",
         "//tensorflow/lite:framework",
         "//tensorflow/lite/kernels:test_util",
@@ -1285,3 +1346,31 @@
         "@com_google_googletest//:gtest_main",
     ],
 )
+
+cc_library(
+    name = "subgraph_test_util",
+    testonly = 1,
+    srcs = ["subgraph_test_util.cc"],
+    hdrs = ["subgraph_test_util.h"],
+    deps = [
+        ":builtin_ops",
+        ":kernel_util",
+        ":test_util",
+        "//tensorflow/lite:builtin_op_data",
+        "//tensorflow/lite:framework",
+        "@com_google_googletest//:gtest",
+        "@flatbuffers",
+    ],
+)
+
+tf_cc_test(
+    name = "subgraph_test_util_test",
+    size = "small",
+    srcs = ["subgraph_test_util_test.cc"],
+    deps = [
+        ":subgraph_test_util",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)

diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc
index 2b35cc4..7ef99cd 100644
--- a/tensorflow/lite/kernels/activations.cc
+++ b/tensorflow/lite/kernels/activations.cc

@@ -23,7 +23,10 @@
 #include "tensorflow/lite/c/c_api_internal.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
 #include "tensorflow/lite/kernels/internal/tensor.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
@@ -118,7 +121,7 @@
   TfLiteTensor* output = GetOutput(context, node, 0);
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
 
-  if (input->type == kTfLiteUInt8) {
+  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
     static constexpr int kInputIntegerBits = 4;
 
     const double input_real_multiplier =
@@ -177,8 +180,15 @@
   TfLiteTensor* output = GetOutput(context, node, 0);
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
 
-  if (input->type == kTfLiteUInt8) {
-    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
+    if (input->type == kTfLiteUInt8) {
+      TF_LITE_ENSURE_EQ(context, output->params.zero_point,
+                        std::numeric_limits<uint8_t>::min());
+    }
+    if (input->type == kTfLiteInt8) {
+      TF_LITE_ENSURE_EQ(context, output->params.zero_point,
+                        std::numeric_limits<int8_t>::min());
+    }
     TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
 
     static constexpr int kInputIntegerBits = 4;
@@ -261,8 +271,13 @@
   TfLiteTensor* output = GetOutput(context, node, 0);
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
 
-  if (input->type == kTfLiteUInt8) {
-    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 255);
+  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
+    if (input->type == kTfLiteUInt8) {
+      TF_LITE_ENSURE_EQ(context, output->params.zero_point, 255);
+    }
+    if (input->type == kTfLiteInt8) {
+      TF_LITE_ENSURE_EQ(context, output->params.zero_point, 127);
+    }
     TF_LITE_ENSURE_EQ(context, output->params.scale, 16.0 / 256);
 
     static const double kBeta = 1.0;
@@ -353,6 +368,24 @@
   }
 }
 
+namespace {
+template <typename T>
+void QuantizedRelu6(const TfLiteTensor* input, TfLiteTensor* output) {
+  ActivationParams params;
+  params.activation_type = FusedActivationFunctionType::kRelu6;
+  params.quantized_activation_min =
+      std::max(static_cast<int32_t>(std::numeric_limits<T>::min()),
+               output->params.zero_point +
+                   static_cast<int32>(roundf(0.f / output->params.scale)));
+  params.quantized_activation_max =
+      std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
+               output->params.zero_point +
+                   static_cast<int32>(roundf(6.f / output->params.scale)));
+  optimized_ops::ReluX(params, GetTensorShape(input), GetTensorData<T>(input),
+                       GetTensorShape(output), GetTensorData<T>(output));
+}
+}  // namespace
+
 TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
@@ -365,23 +398,16 @@
       for (; in < in_end; in++, out++) *out = std::min(std::max(0.f, *in), 6.f);
       return kTfLiteOk;
     } break;
-    case kTfLiteUInt8: {
-      ActivationParams params;
-      params.activation_type = FusedActivationFunctionType::kRelu6;
-      params.quantized_activation_min = std::max(
-          0, output->params.zero_point +
-                 static_cast<int32>(roundf(0.f / output->params.scale)));
-      params.quantized_activation_max = std::min(
-          255, output->params.zero_point +
-                   static_cast<int32>(roundf(6.f / output->params.scale)));
-      optimized_ops::ReluX(params, GetTensorShape(input),
-                           GetTensorData<uint8>(input), GetTensorShape(output),
-                           GetTensorData<uint8>(output));
+    case kTfLiteUInt8:
+      QuantizedRelu6<uint8_t>(input, output);
+      return kTfLiteOk;
+    case kTfLiteInt8: {
+      QuantizedRelu6<int8_t>(input, output);
       return kTfLiteOk;
     } break;
     default:
       context->ReportError(
-          context, "Only float32 and uint8 supported currently, got %s.",
+          context, "Only float32, uint8 and int8 supported currently, got %s.",
           TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
@@ -436,6 +462,16 @@
       }
       return kTfLiteOk;
     } break;
+    case kTfLiteInt8: {
+      const auto input_shape = GetTensorShape(input);
+      const auto output_shape = GetTensorShape(output);
+      const int size = MatchingFlatSize(input_shape, output_shape);
+      reference_integer_ops::Tanh(
+          input->params.zero_point, data->input_range_radius,
+          data->input_multiplier, data->input_left_shift, size,
+          GetTensorData<int8_t>(input), GetTensorData<int8_t>(output));
+      return kTfLiteOk;
+    } break;
     default:
       context->ReportError(context, "Only float32 supported currently, got %s.",
                            TfLiteTypeGetName(input->type));
@@ -493,6 +529,15 @@
       }
       break;
     }
+    case kTfLiteInt8: {
+      const int input_size =
+          MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
+      reference_integer_ops::Logistic(
+          input->params.zero_point, data->input_range_radius,
+          data->input_multiplier, data->input_left_shift, input_size,
+          GetTensorData<int8_t>(input), GetTensorData<int8_t>(output));
+      break;
+    }
     default:
       context->ReportError(context, "Only float32 supported currently, got %s.",
                            TfLiteTypeGetName(input->type));
@@ -815,6 +860,21 @@
       }
       return kTfLiteOk;
     }
+    case kTfLiteInt8: {
+      const auto input_shape = GetTensorShape(input);
+      const auto output_shape = GetTensorShape(output);
+      const int trailing_dim = input_shape.DimensionsCount() - 1;
+      const int outer_size =
+          MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+      const int depth =
+          MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+      reference_integer_ops::LogSoftmax(
+          data->input_multiplier, data->input_left_shift,
+          data->reverse_scaling_divisor, data->reverse_scaling_right_shift,
+          data->diff_min, outer_size, depth, GetTensorData<int8_t>(input),
+          GetTensorData<int8_t>(output));
+      return kTfLiteOk;
+    }
     default:
       context->ReportError(context, "Only float32 supported currently., got %s",
                            TfLiteTypeGetName(input->type));

diff --git a/tensorflow/lite/kernels/activations_test.cc b/tensorflow/lite/kernels/activations_test.cc
index 5e3c56e..ccce4b0 100644
--- a/tensorflow/lite/kernels/activations_test.cc
+++ b/tensorflow/lite/kernels/activations_test.cc

@@ -32,6 +32,8 @@
     input_ = AddInput(input);
     if (input.type == TensorType_UINT8) {
       output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
+    } else if (input.type == TensorType_INT8) {
+      output_ = AddOutput({input.type, {}, 0, 0, 1. / 256, -128});
     } else {
       output_ = AddOutput({input.type, {}});
     }
@@ -172,7 +174,7 @@
                              })));
 }
 
-TEST(QuantizedActivationsOpTest, Relu6) {
+TEST(QuantizedActivationsOpTest, Relu6Uint8) {
   const float kMin = -1;
   const float kMax = 127.f / 128.f;
   QuantizedActivationsOpModel m(
@@ -195,7 +197,29 @@
               ElementsAreArray({128, 128, 160, 192, 176, 128, 224, 144}));
 }
 
-TEST(QuantizedActivationsOpTest, Tanh) {
+TEST(QuantizedActivationsOpTest, Relu6Int8) {
+  const float kMin = -1;
+  const float kMax = 127.f / 128.f;
+  QuantizedActivationsOpModel m(
+      BuiltinOperator_RELU6,
+      /*input=*/{TensorType_INT8, {1, 2, 4, 1}, 8 * kMin, 8 * kMax},
+      /*output=*/{TensorType_INT8, {1, 2, 4, 1}, 8 * kMin, 8 * kMax});
+  m.SetInput<int8_t>({
+      0, -6, 2, 4,   //
+      3, -2, 10, 1,  //
+  });
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(), ElementsAreArray(ArrayFloatNear(
+                                                    {
+                                                        0, 0, 2, 4,  //
+                                                        3, 0, 6, 1,  //
+                                                    },
+                                                    kQuantizedTolerance)));
+  EXPECT_THAT(m.GetOutput<int8_t>(),
+              ElementsAreArray({0, 0, 32, 64, 48, 0, 96, 16}));
+}
+
+TEST(QuantizedActivationsOpTest, TanhUint8) {
   const float kMin = -1;
   const float kMax = 127.f / 128.f;
   QuantizedActivationsOpModel m(
@@ -218,6 +242,29 @@
               ElementsAreArray({128, 0, 251, 255, 0, 5, 255, 225}));
 }
 
+TEST(QuantizedActivationsOpTest, TanhInt8) {
+  const float kMin = -1;
+  const float kMax = 127.f / 128.f;
+  QuantizedActivationsOpModel m(
+      BuiltinOperator_TANH,
+      /*input=*/{TensorType_INT8, {1, 2, 4, 1}, 8 * kMin, 8 * kMax},
+      /*output=*/{TensorType_INT8, {1, 2, 4, 1}, kMin, kMax});
+  m.SetInput<int8_t>({
+      0, -6, 2, 4,   //
+      -4, -2, 8, 1,  //
+  });
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear(
+                  {
+                      0.0, -0.999987, 0.964027, 0.999329,     //
+                      -0.999329, -0.96402, 0.99999, 0.76159,  //
+                  },
+                  kQuantizedTolerance)));
+  EXPECT_THAT(m.GetOutput<int8_t>(),
+              ElementsAreArray({0, -128, 123, 127, -128, -123, 127, 97}));
+}
+
 TEST(QuantizedActivationsOpTest, TanhInt16) {
   const float kMin = -1;
   const float kMax = 32767.f / 32768.f;
@@ -253,7 +300,7 @@
                              })));
 }
 
-TEST(QuantizedActivationsOpTest, Sigmoid) {
+TEST(QuantizedActivationsOpTest, SigmoidUint8) {
   QuantizedActivationsOpModel m(
       BuiltinOperator_LOGISTIC,
       /*input=*/{TensorType_UINT8, {1, 2, 4, 1}, -10, 10});
@@ -273,6 +320,26 @@
               ElementsAreArray({128, 1, 227, 251, 244, 32, 255, 188}));
 }
 
+TEST(QuantizedActivationsOpTest, SigmoidInt8) {
+  QuantizedActivationsOpModel m(
+      BuiltinOperator_LOGISTIC,
+      /*input=*/{TensorType_INT8, {1, 2, 4, 1}, -10, 10});
+  m.SetInput<int8_t>({
+      0, -6, 2, 4,   //
+      3, -2, 10, 1,  //
+  });
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear(
+                  {
+                      0.5, 0.002473, 0.880797, 0.982014,       //
+                      0.952574, 0.119203, 0.999955, 0.731059,  //
+                  },
+                  kQuantizedTolerance)));
+  EXPECT_THAT(m.GetOutput<int8_t>(),
+              ElementsAreArray({0, -127, 99, 123, 116, -99, 127, 60}));
+}
+
 TEST(QuantizedActivationsOpTest, SigmoidInt16) {
   const float kMin = -1;
   const float kMax = 32767.f / 32768.f;
@@ -705,7 +772,7 @@
                               })));
 }
 
-TEST(QuantizedActivationsOpTest, LogSoftmax) {
+TEST(QuantizedActivationsOpTest, LogSoftmaxUint8) {
   const float kLogSoftmaxQuantizedTolerance = 16 / 256.0;
   QuantizedActivationsOpModel m(
       BuiltinOperator_LOG_SOFTMAX,
@@ -727,6 +794,30 @@
               ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111}));
 }
 
+TEST(QuantizedActivationsOpTest, LogSoftmaxInt8) {
+  const float kLogSoftmaxQuantizedTolerance = 0.06355;
+  QuantizedActivationsOpModel m(
+      BuiltinOperator_LOG_SOFTMAX,
+      /*input=*/{TensorType_INT8, {2, 4}, -10, 10},
+      /*output=*/{TensorType_INT8, {}, 0, 0, 16. / 256, 127});
+  m.SetInput<int8_t>({
+      0, -6, 2, 4,   //
+      3, -2, 10, 1,  //
+  });
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear(
+                  {
+                      -4.14297, -10.14297, -2.14297, -.142971,    //
+                      -7.00104, -12.00104, -.00104087, -9.00104,  //
+                  },
+                  kLogSoftmaxQuantizedTolerance)));
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray({
+                                         61, -36, 93, 125,   //
+                                         15, -65, 127, -16,  //
+                                     }));
+}
+
 // A base class of PRelu op model. It provides the constructor for
 // FloatPReluOpModel and QuantizedPReluOpModel.
 class BasePReluOpModel : public SingleOpModel {

diff --git a/tensorflow/lite/kernels/add.cc b/tensorflow/lite/kernels/add.cc
index 9867cc5..4cfe435 100644
--- a/tensorflow/lite/kernels/add.cc
+++ b/tensorflow/lite/kernels/add.cc

@@ -12,6 +12,7 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/c_api_internal.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
@@ -92,7 +93,7 @@
     output_size = TfLiteIntArrayCopy(input1->dims);
   }
 
-  if (output->type == kTfLiteUInt8) {
+  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
     // 8bit -> 8bit general quantized path, with general rescalings
     data->input1_offset = -input1->params.zero_point;
     data->input2_offset = -input2->params.zero_point;
@@ -117,10 +118,15 @@
     QuantizeMultiplierSmallerThanOneExp(
         real_output_multiplier, &data->output_multiplier, &data->output_shift);
 
-    CalculateActivationRangeUint8(params->activation, output,
-                                  &data->output_activation_min,
-                                  &data->output_activation_max);
-
+    if (output->type == kTfLiteUInt8) {
+      CalculateActivationRangeUint8(params->activation, output,
+                                    &data->output_activation_min,
+                                    &data->output_activation_max);
+    } else {
+      CalculateActivationRangeInt8(params->activation, output,
+                                   &data->output_activation_min,
+                                   &data->output_activation_max);
+    }
   } else if (output->type == kTfLiteInt16) {
     // 16bit -> 16bit special quantized path, supporting only a rather
     // narrow case of quantization parameters: zero_points must all be 0
@@ -219,7 +225,7 @@
                               const TfLiteTensor* input1,
                               const TfLiteTensor* input2,
                               TfLiteTensor* output) {
-  if (output->type == kTfLiteUInt8) {
+  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
     tflite::ArithmeticParams op_params;
     op_params.left_shift = data->left_shift;
     op_params.input1_offset = data->input1_offset;
@@ -235,25 +241,33 @@
                         data->output_activation_max, &op_params);
     bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
         GetTensorShape(input1), GetTensorShape(input2), &op_params);
-#define TF_LITE_ADD(type, opname)                                      \
-  type::opname(op_params, GetTensorShape(input1),                      \
-               GetTensorData<uint8_t>(input1), GetTensorShape(input2), \
-               GetTensorData<uint8_t>(input2), GetTensorShape(output), \
-               GetTensorData<uint8_t>(output));
-    if (kernel_type == kReference) {
+#define TF_LITE_ADD(type, opname, dtype)                             \
+  type::opname(op_params, GetTensorShape(input1),                    \
+               GetTensorData<dtype>(input1), GetTensorShape(input2), \
+               GetTensorData<dtype>(input2), GetTensorShape(output), \
+               GetTensorData<dtype>(output));
+    if (output->type == kTfLiteInt8) {
       if (need_broadcast) {
-        TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow);
+        TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
       } else {
-        TF_LITE_ADD(reference_ops, Add);
+        TF_LITE_ADD(reference_integer_ops, Add, int8_t);
       }
     } else {
-      if (op_params.broadcast_category ==
-          BroadcastableOpCategory::kGenericBroadcast) {
-        TF_LITE_ADD(optimized_ops, BroadcastAdd4DSlow);
-      } else if (need_broadcast) {
-        TF_LITE_ADD(optimized_ops, BroadcastAddFivefold);
+      if (kernel_type == kReference) {
+        if (need_broadcast) {
+          TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
+        } else {
+          TF_LITE_ADD(reference_ops, Add, uint8_t);
+        }
       } else {
-        TF_LITE_ADD(optimized_ops, Add);
+        if (op_params.broadcast_category ==
+            BroadcastableOpCategory::kGenericBroadcast) {
+          TF_LITE_ADD(optimized_ops, BroadcastAdd4DSlow, uint8_t);
+        } else if (need_broadcast) {
+          TF_LITE_ADD(optimized_ops, BroadcastAddFivefold, uint8_t);
+        } else {
+          TF_LITE_ADD(optimized_ops, Add, uint8_t);
+        }
       }
     }
 #undef TF_LITE_ADD
@@ -292,7 +306,8 @@
 
   if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
     EvalAdd<kernel_type>(context, node, params, data, input1, input2, output);
-  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
+  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
+             output->type == kTfLiteInt16) {
     TF_LITE_ENSURE_OK(context,
                       EvalAddQuantized<kernel_type>(context, node, params, data,
                                                     input1, input2, output));

diff --git a/tensorflow/lite/kernels/add_test.cc b/tensorflow/lite/kernels/add_test.cc
index 16045d4..2904f4a 100644
--- a/tensorflow/lite/kernels/add_test.cc
+++ b/tensorflow/lite/kernels/add_test.cc

@@ -63,9 +63,10 @@
  public:
   using BaseAddOpModel::BaseAddOpModel;
 
+  template <typename integer_dtype>
   std::vector<float> GetDequantizedOutput() {
-    return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
-                               GetScale(output_), GetZeroPoint(output_));
+    return Dequantize<integer_dtype>(ExtractVector<integer_dtype>(output_),
+                                     GetScale(output_), GetZeroPoint(output_));
   }
 
   std::vector<float> GetDequantizedOutputInt16() {
@@ -74,17 +75,15 @@
   }
 };
 
-// for quantized Add, the error shouldn't exceed 2*step
+// for quantized Add, the error shouldn't exceed step
 float GetTolerance(float min, float max) {
   float kQuantizedStep = (max - min) / 255.0;
-  float kQuantizedTolerance = 2.0 * kQuantizedStep;
-  return kQuantizedTolerance;
+  return kQuantizedStep;
 }
 
 float GetToleranceInt16(float min, float max) {
   float kQuantizedStep = (max - min) / 32767.f;
-  float kQuantizedTolerance = 2.0 * kQuantizedStep;
-  return kQuantizedTolerance;
+  return kQuantizedStep;
 }
 
 TEST(FloatAddOpModel, NoActivation) {
@@ -191,7 +190,8 @@
   }
 }
 
-TEST(QuantizedAddOpModel, QuantizedTestsNoActivation) {
+template <TensorType tensor_type, typename integer_dtype>
+void QuantizedTestsNoActivation() {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
   std::vector<std::vector<float>> inputs1 = {
       {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}};
@@ -200,19 +200,28 @@
   std::vector<std::vector<float>> results = {
       {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
-    QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                          {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                          {TensorType_UINT8, {}, -1.0, 1.0},
+    QuantizedAddOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                          {tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                          {tensor_type, {}, -1.0, 1.0},
                           ActivationFunctionType_NONE);
-    m.QuantizeAndPopulate<uint8_t>(m.input1(), inputs1[i]);
-    m.QuantizeAndPopulate<uint8_t>(m.input2(), inputs2[i]);
+    m.QuantizeAndPopulate<integer_dtype>(m.input1(), inputs1[i]);
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(), inputs2[i]);
     m.Invoke();
-    EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear(
-                                              results[i], kQuantizedTolerance)))
+    EXPECT_THAT(
+        m.GetDequantizedOutput<integer_dtype>(),
+        ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance)))
         << "With test number " << i;
   }
 }
 
+TEST(QuantizedAddOpModel, QuantizedTestsNoActivationUInt8) {
+  QuantizedTestsNoActivation<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt8) {
+  QuantizedTestsNoActivation<TensorType_INT8, int8_t>();
+}
+
 TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
   const float kMin = -1.f;
   const float kMax = 32767.f / 32768.f;
@@ -238,7 +247,8 @@
   }
 }
 
-TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) {
+template <enum TensorType tensor_type, typename integer_dtype>
+void QuantizedTestsActivationRELU_N1_TO_1() {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
   std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
                                              {-0.8, 0.2, 0.7, 0.3}};
@@ -247,53 +257,74 @@
   std::vector<std::vector<float>> results = {{-0.2, 0.6, 1.0, -0.1},
                                              {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
-    QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                          {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                          {TensorType_UINT8, {}, -1.0, 1.0},
+    QuantizedAddOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                          {tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                          {tensor_type, {}, -1.0, 1.0},
                           ActivationFunctionType_RELU_N1_TO_1);
-    m.QuantizeAndPopulate<uint8_t>(m.input1(), inputs1[i]);
-    m.QuantizeAndPopulate<uint8_t>(m.input2(), inputs2[i]);
+    m.QuantizeAndPopulate<integer_dtype>(m.input1(), inputs1[i]);
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(), inputs2[i]);
     m.Invoke();
-    EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear(
-                                              results[i], kQuantizedTolerance)))
+    EXPECT_THAT(
+        m.GetDequantizedOutput<integer_dtype>(),
+        ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance)))
         << "With test number " << i;
   }
 }
 
-TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) {
+TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1UInt8) {
+  QuantizedTestsActivationRELU_N1_TO_1<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1Int8) {
+  QuantizedTestsActivationRELU_N1_TO_1<TensorType_INT8, int8_t>();
+}
+
+template <enum TensorType tensor_type, typename integer_dtype>
+void QuantizedVariousInputShapes() {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
   std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
-    QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
-                          {TensorType_UINT8, test_shapes[i], -3.0, 3.0},
-                          {TensorType_UINT8, {}, -3.0, 3.0},
+    QuantizedAddOpModel m({tensor_type, test_shapes[i], -3.0, 3.0},
+                          {tensor_type, test_shapes[i], -3.0, 3.0},
+                          {tensor_type, {}, -3.0, 3.0},
                           ActivationFunctionType_NONE);
-    m.QuantizeAndPopulate<uint8_t>(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
-    m.QuantizeAndPopulate<uint8_t>(m.input2(), {0.1, 0.3, 0.3, 0.5, 1.1, 0.1});
+    m.QuantizeAndPopulate<integer_dtype>(m.input1(),
+                                         {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(),
+                                         {0.1, 0.3, 0.3, 0.5, 1.1, 0.1});
     m.Invoke();
-    EXPECT_THAT(m.GetDequantizedOutput(),
+    EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
                 ElementsAreArray(ArrayFloatNear({-1.9, 0.5, 1.0, 1.3, 2.2, 2.1},
                                                 kQuantizedTolerance)))
         << "With shape number " << i;
   }
 }
 
-TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcast) {
+TEST(QuantizedAddOpModel, QuantizedVariousInputShapesUInt8) {
+  QuantizedVariousInputShapes<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedAddOpModel, QuantizedVariousInputShapesInt8) {
+  QuantizedVariousInputShapes<TensorType_INT8, int8_t>();
+}
+
+template <enum TensorType tensor_type, typename integer_dtype>
+void QuantizedWithScalarBroadcast() {
   float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
   std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     QuantizedAddOpModel model_fixture(
-        {TensorType_UINT8, test_shapes[i], -3.f, 3.f},
-        {TensorType_UINT8, {}, -3.f, 3.f}, {TensorType_UINT8, {}, -3.f, 3.f},
-        ActivationFunctionType_NONE);
-    model_fixture.QuantizeAndPopulate<uint8_t>(
+        {tensor_type, test_shapes[i], -3.f, 3.f}, {tensor_type, {}, -3.f, 3.f},
+        {tensor_type, {}, -3.f, 3.f}, ActivationFunctionType_NONE);
+    model_fixture.QuantizeAndPopulate<integer_dtype>(
         model_fixture.input1(), {-2.0f, 0.2f, 0.7f, 0.8f, 1.1f, 2.0f});
-    model_fixture.QuantizeAndPopulate<uint8_t>(model_fixture.input2(), {0.1f});
+    model_fixture.QuantizeAndPopulate<integer_dtype>(model_fixture.input2(),
+                                                     {0.1f});
     model_fixture.Invoke();
     EXPECT_THAT(
-        model_fixture.GetDequantizedOutput(),
+        model_fixture.GetDequantizedOutput<integer_dtype>(),
         ElementsAreArray(ArrayFloatNear({-1.9f, 0.3f, 0.8f, 0.9f, 1.2f, 2.1f},
                                         kQuantizedTolerance)))
         << "With shape number " << i;
@@ -301,22 +332,31 @@
   // Re-run with exchanged inputs.
   for (int i = 0; i < test_shapes.size(); ++i) {
     QuantizedAddOpModel model_fixture(
-        {TensorType_UINT8, {}, -3.f, 3.f},
-        {TensorType_UINT8, test_shapes[i], -3.f, 3.f},
-        {TensorType_UINT8, {}, -3.f, 3.f}, ActivationFunctionType_NONE);
-    model_fixture.QuantizeAndPopulate<uint8_t>(model_fixture.input1(), {0.1f});
-    model_fixture.QuantizeAndPopulate<uint8_t>(
+        {tensor_type, {}, -3.f, 3.f}, {tensor_type, test_shapes[i], -3.f, 3.f},
+        {tensor_type, {}, -3.f, 3.f}, ActivationFunctionType_NONE);
+    model_fixture.QuantizeAndPopulate<integer_dtype>(model_fixture.input1(),
+                                                     {0.1f});
+    model_fixture.QuantizeAndPopulate<integer_dtype>(
         model_fixture.input2(), {-2.0f, 0.2f, 0.7f, 0.8f, 1.1f, 2.0f});
     model_fixture.Invoke();
     EXPECT_THAT(
-        model_fixture.GetDequantizedOutput(),
+        model_fixture.GetDequantizedOutput<integer_dtype>(),
         ElementsAreArray(ArrayFloatNear({-1.9f, 0.3f, 0.8f, 0.9f, 1.2f, 2.1f},
                                         kQuantizedTolerance)))
         << "With shape number " << i;
   }
 }
 
-TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcast) {
+TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastUInt8) {
+  QuantizedWithScalarBroadcast<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt8) {
+  QuantizedWithScalarBroadcast<TensorType_INT8, int8_t>();
+}
+
+template <enum TensorType tensor_type, typename integer_dtype>
+void QuantizedWithMixedBroadcast() {
   float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
   const std::vector<int> base_shape = {2, 3, 1, 2};
   std::vector<std::vector<int>> test_shapes = {
@@ -335,40 +375,48 @@
       {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f,
        -1.3f}};
   for (int i = 0; i < test_shapes.size(); ++i) {
-    QuantizedAddOpModel model_fixture(
-        {TensorType_UINT8, base_shape, -3.f, 3.f},
-        {TensorType_UINT8, test_shapes[i], -3.f, 3.f},
-        {TensorType_UINT8, {}, -3.f, 3.f}, ActivationFunctionType_NONE);
-    model_fixture.QuantizeAndPopulate<uint8_t>(
+    QuantizedAddOpModel model_fixture({tensor_type, base_shape, -3.f, 3.f},
+                                      {tensor_type, test_shapes[i], -3.f, 3.f},
+                                      {tensor_type, {}, -3.f, 3.f},
+                                      ActivationFunctionType_NONE);
+    model_fixture.QuantizeAndPopulate<integer_dtype>(
         model_fixture.input1(), {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f,
                                  2.8f, -1.6f, 0.0f, 0.7f, -2.2f});
-    model_fixture.QuantizeAndPopulate<uint8_t>(
+    model_fixture.QuantizeAndPopulate<integer_dtype>(
         model_fixture.input2(), {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f});
     model_fixture.Invoke();
     EXPECT_THAT(
-        model_fixture.GetDequantizedOutput(),
+        model_fixture.GetDequantizedOutput<integer_dtype>(),
         ElementsAreArray(ArrayFloatNear(test_outputs[i], kQuantizedTolerance)))
         << "With shape number " << i;
   }
   // Re-run with exchanged inputs.
   for (int i = 0; i < test_shapes.size(); ++i) {
-    QuantizedAddOpModel model_fixture(
-        {TensorType_UINT8, test_shapes[i], -3.f, 3.f},
-        {TensorType_UINT8, base_shape, -3.f, 3.f},
-        {TensorType_UINT8, {}, -3.f, 3.f}, ActivationFunctionType_NONE);
-    model_fixture.QuantizeAndPopulate<uint8_t>(
+    QuantizedAddOpModel model_fixture({tensor_type, test_shapes[i], -3.f, 3.f},
+                                      {tensor_type, base_shape, -3.f, 3.f},
+                                      {tensor_type, {}, -3.f, 3.f},
+                                      ActivationFunctionType_NONE);
+    model_fixture.QuantizeAndPopulate<integer_dtype>(
         model_fixture.input1(), {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f});
-    model_fixture.QuantizeAndPopulate<uint8_t>(
+    model_fixture.QuantizeAndPopulate<integer_dtype>(
         model_fixture.input2(), {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f,
                                  2.8f, -1.6f, 0.0f, 0.7f, -2.2f});
     model_fixture.Invoke();
     EXPECT_THAT(
-        model_fixture.GetDequantizedOutput(),
+        model_fixture.GetDequantizedOutput<integer_dtype>(),
         ElementsAreArray(ArrayFloatNear(test_outputs[i], kQuantizedTolerance)))
         << "With shape number " << i;
   }
 }
 
+TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastUInt8) {
+  QuantizedWithMixedBroadcast<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt8) {
+  QuantizedWithMixedBroadcast<TensorType_INT8, int8_t>();
+}
+
 }  // namespace
 }  // namespace tflite
 int main(int argc, char** argv) {

diff --git a/tensorflow/lite/kernels/arg_min_max.cc b/tensorflow/lite/kernels/arg_min_max.cc
index f9adf6b..e5223ba 100644
--- a/tensorflow/lite/kernels/arg_min_max.cc
+++ b/tensorflow/lite/kernels/arg_min_max.cc

@@ -80,13 +80,14 @@
   switch (input->type) {
     case kTfLiteFloat32:
     case kTfLiteUInt8:
+    case kTfLiteInt8:
     case kTfLiteInt32:
       break;
 
     default:
       context->ReportError(
           context,
-          "Unkonwn input type: %d, only float32 and int types are supported",
+          "Unknown input type: %d, only float32 and int types are supported",
           input->type);
       return kTfLiteError;
   }
@@ -135,6 +136,9 @@
           case kTfLiteUInt8:
             TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
             break;
+          case kTfLiteInt8:
+            TF_LITE_ARG_MIN_MAX(int8_t, int32_t, int32_t);
+            break;
           case kTfLiteInt32:
             TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t);
             break;
@@ -150,6 +154,9 @@
           case kTfLiteUInt8:
             TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
             break;
+          case kTfLiteInt8:
+            TF_LITE_ARG_MIN_MAX(int8_t, int32_t, int64_t);
+            break;
           case kTfLiteInt32:
             TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t);
             break;

diff --git a/tensorflow/lite/kernels/arg_min_max_test.cc b/tensorflow/lite/kernels/arg_min_max_test.cc
index 1b1000f..01ea923 100644
--- a/tensorflow/lite/kernels/arg_min_max_test.cc
+++ b/tensorflow/lite/kernels/arg_min_max_test.cc

@@ -86,6 +86,28 @@
   EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1}));
 }
 
+TEST(ArgMaxOpTest, GetMaxArgUInt8) {
+  ArgMaxOpModel<int32_t> model({1, 1, 1, 4}, TensorType_UINT8, TensorType_INT32,
+                               TensorType_INT32);
+  model.PopulateTensor<uint8_t>(model.input(), {1, 9, 7, 3});
+  model.PopulateTensor<int>(model.axis(), {3});
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({1}));
+  EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1}));
+}
+
+TEST(ArgMaxOpTest, GetMaxArgInt8) {
+  ArgMaxOpModel<int32_t> model({1, 1, 1, 4}, TensorType_INT8, TensorType_INT32,
+                               TensorType_INT32);
+  model.PopulateTensor<int8_t>(model.input(), {-1, -9, 7, 3});
+  model.PopulateTensor<int>(model.axis(), {3});
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({2}));
+  EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1}));
+}
+
 TEST(ArgMaxOpTest, GetMaxArgInt) {
   ArgMaxOpModel<int32_t> model({1, 1, 1, 4}, TensorType_INT32, TensorType_INT32,
                                TensorType_INT32);

diff --git a/tensorflow/lite/kernels/batch_to_space_nd.cc b/tensorflow/lite/kernels/batch_to_space_nd.cc
index 34fdf34..ce85aed 100644
--- a/tensorflow/lite/kernels/batch_to_space_nd.cc
+++ b/tensorflow/lite/kernels/batch_to_space_nd.cc

@@ -148,6 +148,13 @@
         TF_LITE_BATCH_TO_SPACE_ND(optimized_ops, uint8_t);
       }
       break;
+    case kTfLiteInt8:
+      if (kernel_type == kReference) {
+        TF_LITE_BATCH_TO_SPACE_ND(reference_ops, int8_t);
+      } else {
+        TF_LITE_BATCH_TO_SPACE_ND(optimized_ops, int8_t);
+      }
+      break;
     case kTfLiteInt32:
       if (kernel_type == kReference) {
         TF_LITE_BATCH_TO_SPACE_ND(reference_ops, int32_t);

diff --git a/tensorflow/lite/kernels/batch_to_space_nd_test.cc b/tensorflow/lite/kernels/batch_to_space_nd_test.cc
index f330895..bd806b5 100644
--- a/tensorflow/lite/kernels/batch_to_space_nd_test.cc
+++ b/tensorflow/lite/kernels/batch_to_space_nd_test.cc

@@ -26,8 +26,9 @@
 
 class BatchToSpaceNDOpModel : public SingleOpModel {
  public:
-  void SetInput(std::initializer_list<float> data) {
-    PopulateTensor<float>(input_, data);
+  template <typename T>
+  void SetInput(std::initializer_list<T> data) {
+    PopulateTensor<T>(input_, data);
   }
 
   void SetBlockShape(std::initializer_list<int> data) {
@@ -38,7 +39,10 @@
     PopulateTensor<int>(crops_, data);
   }
 
-  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  template <typename T>
+  std::vector<T> GetOutput() {
+    return ExtractVector<T>(output_);
+  }
   std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
 
  protected:
@@ -58,11 +62,12 @@
  public:
   BatchToSpaceNDOpConstModel(std::initializer_list<int> input_shape,
                              std::initializer_list<int> block_shape,
-                             std::initializer_list<int> crops) {
-    input_ = AddInput(TensorType_FLOAT32);
+                             std::initializer_list<int> crops,
+                             const TensorType& type = TensorType_FLOAT32) {
+    input_ = AddInput(type);
     block_shape_ = AddConstInput(TensorType_INT32, block_shape, {2});
     crops_ = AddConstInput(TensorType_INT32, crops, {2, 2});
-    output_ = AddOutput(TensorType_FLOAT32);
+    output_ = AddOutput(type);
 
     SetBuiltinOp(BuiltinOperator_BATCH_TO_SPACE_ND,
                  BuiltinOptions_BatchToSpaceNDOptions,
@@ -81,11 +86,12 @@
 //    m.Invoke();
 class BatchToSpaceNDOpDynamicModel : public BatchToSpaceNDOpModel {
  public:
-  BatchToSpaceNDOpDynamicModel(std::initializer_list<int> input_shape) {
-    input_ = AddInput(TensorType_FLOAT32);
+  BatchToSpaceNDOpDynamicModel(std::initializer_list<int> input_shape,
+                               const TensorType& type = TensorType_FLOAT32) {
+    input_ = AddInput(type);
     block_shape_ = AddInput(TensorType_INT32);
     crops_ = AddInput(TensorType_INT32);
-    output_ = AddOutput(TensorType_FLOAT32);
+    output_ = AddOutput(type);
 
     SetBuiltinOp(BuiltinOperator_BATCH_TO_SPACE_ND,
                  BuiltinOptions_BatchToSpaceNDOptions,
@@ -96,22 +102,47 @@
 
 TEST(BatchToSpaceNDOpTest, SimpleConstTest) {
   BatchToSpaceNDOpConstModel m({4, 2, 2, 1}, {2, 2}, {0, 0, 0, 0});
-  m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  m.SetInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 5, 2, 6, 9, 13, 10, 14, 3, 7,
-                                               4, 8, 11, 15, 12, 16}));
+  EXPECT_THAT(m.GetOutput<float>(),
+              ElementsAreArray(
+                  {1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16}));
+}
+
+TEST(BatchToSpaceNDOpTest, SimpleConstTestInt8) {
+  BatchToSpaceNDOpConstModel m({4, 2, 2, 1}, {2, 2}, {0, 0, 0, 0},
+                               TensorType_INT8);
+  m.SetInput<int8_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(m.GetOutput<int8_t>(),
+              ElementsAreArray(
+                  {1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16}));
 }
 
 TEST(BatchToSpaceNDOpTest, SimpleDynamicTest) {
   BatchToSpaceNDOpDynamicModel m({4, 2, 2, 1});
-  m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  m.SetInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
   m.SetBlockShape({2, 2});
   m.SetCrops({0, 0, 0, 0});
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 5, 2, 6, 9, 13, 10, 14, 3, 7,
-                                               4, 8, 11, 15, 12, 16}));
+  EXPECT_THAT(m.GetOutput<float>(),
+              ElementsAreArray(
+                  {1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16}));
+}
+
+TEST(BatchToSpaceNDOpTest, SimpleDynamicTestInt8) {
+  BatchToSpaceNDOpDynamicModel m({4, 2, 2, 1}, TensorType_INT8);
+  m.SetInput<int8_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  m.SetBlockShape({2, 2});
+  m.SetCrops({0, 0, 0, 0});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(m.GetOutput<int8_t>(),
+              ElementsAreArray(
+                  {1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16}));
 }
 
 #ifdef GTEST_HAS_DEATH_TEST
@@ -127,7 +158,7 @@
 
 TEST(BatchToSpaceNDOpTest, InvalidCropsDynamicTest) {
   BatchToSpaceNDOpDynamicModel m({4, 2, 2, 1});
-  m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  m.SetInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
   m.SetBlockShape({2, 2});
   m.SetCrops({0, 0, -1, 0});
   EXPECT_DEATH(m.Invoke(), "crops.2. >= 0 was not true.");

diff --git a/tensorflow/lite/kernels/comparisons.cc b/tensorflow/lite/kernels/comparisons.cc
index a914449..d492419 100644
--- a/tensorflow/lite/kernels/comparisons.cc
+++ b/tensorflow/lite/kernels/comparisons.cc

@@ -59,11 +59,12 @@
 
 // TODO(ruic): optimize macros below to using template functions.
 #define TF_LITE_QUANTIZE_COMPARISON(opname)                                    \
+  template <typename input_dtype>                                              \
   void EvalQuantized##opname(TfLiteContext* context, TfLiteNode* node,         \
                              const TfLiteTensor* input1,                       \
                              const TfLiteTensor* input2, TfLiteTensor* output, \
                              bool requires_broadcast) {                        \
-    if (input1->type == kTfLiteUInt8) {                                        \
+    if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {         \
       auto input1_offset = -input1->params.zero_point;                         \
       auto input2_offset = -input2->params.zero_point;                         \
       const int left_shift = 8;                                                \
@@ -87,14 +88,16 @@
       op_params.input2_shift = input2_shift;                                   \
       if (requires_broadcast) {                                                \
         reference_ops::Broadcast4DSlow##opname##WithScaling(                   \
-            op_params, GetTensorShape(input1), GetTensorData<uint8_t>(input1), \
-            GetTensorShape(input2), GetTensorData<uint8_t>(input2),            \
-            GetTensorShape(output), GetTensorData<bool>(output));              \
+            op_params, GetTensorShape(input1),                                 \
+            GetTensorData<input_dtype>(input1), GetTensorShape(input2),        \
+            GetTensorData<input_dtype>(input2), GetTensorShape(output),        \
+            GetTensorData<bool>(output));                                      \
       } else {                                                                 \
         reference_ops::opname##WithScaling(                                    \
-            op_params, GetTensorShape(input1), GetTensorData<uint8_t>(input1), \
-            GetTensorShape(input2), GetTensorData<uint8_t>(input2),            \
-            GetTensorShape(output), GetTensorData<bool>(output));              \
+            op_params, GetTensorShape(input1),                                 \
+            GetTensorData<input_dtype>(input1), GetTensorShape(input2),        \
+            GetTensorData<input_dtype>(input2), GetTensorShape(output),        \
+            GetTensorData<bool>(output));                                      \
       }                                                                        \
     }                                                                          \
   }
@@ -136,8 +139,12 @@
       TF_LITE_COMPARISON(int64_t, Equal, requires_broadcast);
       break;
     case kTfLiteUInt8:
-      EvalQuantizedEqual(context, node, input1, input2, output,
-                         requires_broadcast);
+      EvalQuantizedEqual<uint8_t>(context, node, input1, input2, output,
+                                  requires_broadcast);
+      break;
+    case kTfLiteInt8:
+      EvalQuantizedEqual<int8_t>(context, node, input1, input2, output,
+                                 requires_broadcast);
       break;
     default:
       context->ReportError(context,
@@ -165,8 +172,12 @@
       TF_LITE_COMPARISON(int64_t, NotEqual, requires_broadcast);
       break;
     case kTfLiteUInt8:
-      EvalQuantizedNotEqual(context, node, input1, input2, output,
-                            requires_broadcast);
+      EvalQuantizedNotEqual<uint8_t>(context, node, input1, input2, output,
+                                     requires_broadcast);
+      break;
+    case kTfLiteInt8:
+      EvalQuantizedNotEqual<int8_t>(context, node, input1, input2, output,
+                                    requires_broadcast);
       break;
     default:
       context->ReportError(context,
@@ -193,8 +204,12 @@
       TF_LITE_COMPARISON(int64_t, Greater, requires_broadcast);
       break;
     case kTfLiteUInt8:
-      EvalQuantizedGreater(context, node, input1, input2, output,
-                           requires_broadcast);
+      EvalQuantizedGreater<uint8_t>(context, node, input1, input2, output,
+                                    requires_broadcast);
+      break;
+    case kTfLiteInt8:
+      EvalQuantizedGreater<int8_t>(context, node, input1, input2, output,
+                                   requires_broadcast);
       break;
     default:
       context->ReportError(context,
@@ -221,8 +236,12 @@
       TF_LITE_COMPARISON(int64_t, GreaterEqual, requires_broadcast);
       break;
     case kTfLiteUInt8:
-      EvalQuantizedGreaterEqual(context, node, input1, input2, output,
-                                requires_broadcast);
+      EvalQuantizedGreaterEqual<uint8_t>(context, node, input1, input2, output,
+                                         requires_broadcast);
+      break;
+    case kTfLiteInt8:
+      EvalQuantizedGreaterEqual<int8_t>(context, node, input1, input2, output,
+                                        requires_broadcast);
       break;
     default:
       context->ReportError(context,
@@ -249,8 +268,12 @@
       TF_LITE_COMPARISON(int64_t, Less, requires_broadcast);
       break;
     case kTfLiteUInt8:
-      EvalQuantizedLess(context, node, input1, input2, output,
-                        requires_broadcast);
+      EvalQuantizedLess<uint8_t>(context, node, input1, input2, output,
+                                 requires_broadcast);
+      break;
+    case kTfLiteInt8:
+      EvalQuantizedLess<int8_t>(context, node, input1, input2, output,
+                                requires_broadcast);
       break;
     default:
       context->ReportError(context,
@@ -277,8 +300,12 @@
       TF_LITE_COMPARISON(int64_t, LessEqual, requires_broadcast);
       break;
     case kTfLiteUInt8:
-      EvalQuantizedLessEqual(context, node, input1, input2, output,
-                             requires_broadcast);
+      EvalQuantizedLessEqual<uint8_t>(context, node, input1, input2, output,
+                                      requires_broadcast);
+      break;
+    case kTfLiteInt8:
+      EvalQuantizedLessEqual<int8_t>(context, node, input1, input2, output,
+                                     requires_broadcast);
       break;
     default:
       context->ReportError(context,

diff --git a/tensorflow/lite/kernels/comparisons_test.cc b/tensorflow/lite/kernels/comparisons_test.cc
index ab10c95..6ec1f09 100644
--- a/tensorflow/lite/kernels/comparisons_test.cc
+++ b/tensorflow/lite/kernels/comparisons_test.cc

@@ -363,7 +363,7 @@
   EXPECT_THAT(model.GetOutputShape(), ElementsAre(1, 1, 2, 4));
 }
 
-TEST(QuantizedComparisonsTest, EqualQuantized) {
+TEST(QuantizedComparisonsTest, EqualUInt8Quantized) {
   const float kMin = -1.f;
   const float kMax = 128.f;
   ComparisonOpModel model({TensorType_UINT8, {1, 2, 2, 1}, kMin, kMax},
@@ -376,7 +376,20 @@
   EXPECT_THAT(model.GetOutput(), ElementsAre(true, false, true, false));
 }
 
-TEST(QuantizedComparisonsTest, NotEqualQuantized) {
+TEST(QuantizedComparisonsTest, EqualInt8Quantized) {
+  const float kMin = -127.f;
+  const float kMax = 127.f;
+  ComparisonOpModel model({TensorType_INT8, {1, 2, 2, 1}, kMin, kMax},
+                          {TensorType_INT8, {1, 2, 2, 1}, kMin, kMax},
+                          TensorType_INT8, BuiltinOperator_EQUAL);
+  model.QuantizeAndPopulate<int8_t>(model.input1(), {1, -9, 7, 3});
+  model.QuantizeAndPopulate<int8_t>(model.input2(), {-1, 2, 7, 5});
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAre(false, false, true, false));
+}
+
+TEST(QuantizedComparisonsTest, NotEqualUInt8Quantized) {
   const float kMin = -1.f;
   const float kMax = 128.f;
   ComparisonOpModel model({TensorType_UINT8, {1, 2, 2, 1}, kMin, kMax},
@@ -389,6 +402,19 @@
   EXPECT_THAT(model.GetOutput(), ElementsAre(false, true, false, true));
 }
 
+TEST(QuantizedComparisonsTest, NotEqualInt8Quantized) {
+  const float kMin = -127.f;
+  const float kMax = 127.f;
+  ComparisonOpModel model({TensorType_INT8, {1, 2, 2, 1}, kMin, kMax},
+                          {TensorType_INT8, {1, 2, 2, 1}, kMin, kMax},
+                          TensorType_INT8, BuiltinOperator_NOT_EQUAL);
+  model.QuantizeAndPopulate<int8_t>(model.input1(), {1, -9, 7, 3});
+  model.QuantizeAndPopulate<int8_t>(model.input2(), {1, 2, 7, 5});
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAre(false, true, false, true));
+}
+
 TEST(ComparisonsTest, GreaterQuantized) {
   const float kMin = -1.f;
   const float kMax = 128.f;
@@ -470,7 +496,7 @@
   }
 }
 
-TEST(ComparisonsTest, QuantizedNotEqualWithBroadcast) {
+TEST(ComparisonsTest, QuantizedUInt8NotEqualWithBroadcast) {
   const float kMin = -1.f;
   const float kMax = 128.f;
   std::vector<std::vector<int>> test_shapes = {
@@ -488,7 +514,25 @@
   }
 }
 
-TEST(ComparisonsTest, QuantizedGreaterWithBroadcast) {
+TEST(ComparisonsTest, QuantizedInt8NotEqualWithBroadcast) {
+  const float kMin = -127.f;
+  const float kMax = 127.f;
+  std::vector<std::vector<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    ComparisonOpModel model({TensorType_INT8, test_shapes[i], kMin, kMax},
+                            {TensorType_INT8, {}, kMin, kMax}, TensorType_INT8,
+                            BuiltinOperator_NOT_EQUAL);
+    model.QuantizeAndPopulate<int8_t>(model.input1(), {-20, 2, 7, -8, 11, 20});
+    model.QuantizeAndPopulate<int8_t>(model.input2(), {2});
+    model.Invoke();
+    EXPECT_THAT(model.GetOutput(),
+                ElementsAre(true, false, true, true, true, true))
+        << "With shape number " << i;
+  }
+}
+
+TEST(ComparisonsTest, QuantizedUInt8GreaterWithBroadcast) {
   const float kMin = -1.f;
   const float kMax = 128.f;
   std::vector<std::vector<int>> test_shapes = {
@@ -506,7 +550,25 @@
   }
 }
 
-TEST(ComparisonsTest, QuantizedGreaterEqualWithBroadcast) {
+TEST(ComparisonsTest, QuantizedInt8GreaterWithBroadcast) {
+  const float kMin = -127.f;
+  const float kMax = 127.f;
+  std::vector<std::vector<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    ComparisonOpModel model({TensorType_INT8, test_shapes[i], kMin, kMax},
+                            {TensorType_INT8, {}, kMin, kMax}, TensorType_INT8,
+                            BuiltinOperator_GREATER);
+    model.QuantizeAndPopulate<int8_t>(model.input1(), {20, -2, -71, 8, 11, 20});
+    model.QuantizeAndPopulate<int8_t>(model.input2(), {8});
+    model.Invoke();
+    EXPECT_THAT(model.GetOutput(),
+                ElementsAre(true, false, false, false, true, true))
+        << "With shape number " << i;
+  }
+}
+
+TEST(ComparisonsTest, QuantizedUInt8GreaterEqualWithBroadcast) {
   const float kMin = -1.f;
   const float kMax = 128.f;
   std::vector<std::vector<int>> test_shapes = {
@@ -524,7 +586,25 @@
   }
 }
 
-TEST(ComparisonsTest, QuantizedLessWithBroadcast) {
+TEST(ComparisonsTest, QuantizedInt8GreaterEqualWithBroadcast) {
+  const float kMin = -127.f;
+  const float kMax = 127.f;
+  std::vector<std::vector<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    ComparisonOpModel model({TensorType_INT8, test_shapes[i], kMin, kMax},
+                            {TensorType_INT8, {}, kMin, kMax}, TensorType_INT8,
+                            BuiltinOperator_GREATER_EQUAL);
+    model.QuantizeAndPopulate<int8_t>(model.input1(), {20, -2, -71, 8, 11, 20});
+    model.QuantizeAndPopulate<int8_t>(model.input2(), {8});
+    model.Invoke();
+    EXPECT_THAT(model.GetOutput(),
+                ElementsAre(true, false, false, true, true, true))
+        << "With shape number " << i;
+  }
+}
+
+TEST(ComparisonsTest, QuantizedUInt8LessWithBroadcast) {
   const float kMin = -1.f;
   const float kMax = 128.f;
   std::vector<std::vector<int>> test_shapes = {
@@ -542,7 +622,25 @@
   }
 }
 
-TEST(ComparisonsTest, QuantizedLessEqualWithBroadcast) {
+TEST(ComparisonsTest, QuantizedInt8LessWithBroadcast) {
+  const float kMin = -127.f;
+  const float kMax = 127.f;
+  std::vector<std::vector<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    ComparisonOpModel model({TensorType_INT8, test_shapes[i], kMin, kMax},
+                            {TensorType_INT8, {}, kMin, kMax}, TensorType_INT8,
+                            BuiltinOperator_LESS);
+    model.QuantizeAndPopulate<int8_t>(model.input1(), {20, -2, -71, 8, 11, 20});
+    model.QuantizeAndPopulate<int8_t>(model.input2(), {8});
+    model.Invoke();
+    EXPECT_THAT(model.GetOutput(),
+                ElementsAre(false, true, true, false, false, false))
+        << "With shape number " << i;
+  }
+}
+
+TEST(ComparisonsTest, QuantizedUInt8LessEqualWithBroadcast) {
   const float kMin = -1.f;
   const float kMax = 128.f;
   std::vector<std::vector<int>> test_shapes = {
@@ -560,6 +658,24 @@
   }
 }
 
+TEST(ComparisonsTest, QuantizedInt8LessEqualWithBroadcast) {
+  const float kMin = -127.f;
+  const float kMax = 127.f;
+  std::vector<std::vector<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    ComparisonOpModel model({TensorType_INT8, test_shapes[i], kMin, kMax},
+                            {TensorType_INT8, {}, kMin, kMax}, TensorType_INT8,
+                            BuiltinOperator_LESS_EQUAL);
+    model.QuantizeAndPopulate<int8_t>(model.input1(), {20, -2, -71, 8, 11, 20});
+    model.QuantizeAndPopulate<int8_t>(model.input2(), {8});
+    model.Invoke();
+    EXPECT_THAT(model.GetOutput(),
+                ElementsAre(false, true, true, true, false, false))
+        << "With shape number " << i;
+  }
+}
+
 }  // namespace
 }  // namespace tflite
 

diff --git a/tensorflow/lite/kernels/elementwise.cc b/tensorflow/lite/kernels/elementwise.cc
index a79388b..1cc188a 100644
--- a/tensorflow/lite/kernels/elementwise.cc
+++ b/tensorflow/lite/kernels/elementwise.cc

@@ -83,6 +83,10 @@
   return EvalNumeric(context, node, std::sin);
 }
 
+TfLiteStatus CosEval(TfLiteContext* context, TfLiteNode* node) {
+  return EvalNumeric(context, node, std::cos);
+}
+
 TfLiteStatus LogEval(TfLiteContext* context, TfLiteNode* node) {
   return EvalNumeric(context, node, std::log);
 }
@@ -122,6 +126,14 @@
   return &r;
 }
 
+TfLiteRegistration* Register_COS() {
+  static TfLiteRegistration r = {
+      /*init=*/nullptr, /*free=*/nullptr,
+      elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
+      elementwise::CosEval};
+  return &r;
+}
+
 TfLiteRegistration* Register_LOG() {
   static TfLiteRegistration r = {
       /*init=*/nullptr, /*free=*/nullptr,

diff --git a/tensorflow/lite/kernels/elementwise_test.cc b/tensorflow/lite/kernels/elementwise_test.cc
index 7d24320..89f2a50 100644
--- a/tensorflow/lite/kernels/elementwise_test.cc
+++ b/tensorflow/lite/kernels/elementwise_test.cc

@@ -65,6 +65,15 @@
   EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({1, 1, 4, 1}));
 }
 
+TEST(ElementWise, Cos) {
+  ElementWiseOpFloatModel m(BuiltinOperator_COS, {1, 1, 4, 1});
+  m.PopulateTensor<float>(m.input(), {0, 3.1415926, -3.1415926, 1});
+  m.Invoke();
+  EXPECT_THAT(m.ExtractVector<float>(m.output()),
+              ElementsAreArray(ArrayFloatNear({1, -1, -1, 0.54030})));
+  EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({1, 1, 4, 1}));
+}
+
 TEST(ElementWise, Log) {
   ElementWiseOpFloatModel m(BuiltinOperator_LOG, {1, 1, 4, 1});
   m.PopulateTensor<float>(m.input(), {1, 3.1415926, 1, 1});

diff --git a/tensorflow/lite/kernels/fully_connected.cc b/tensorflow/lite/kernels/fully_connected.cc
index 7b4d29c..7ed77c4 100644
--- a/tensorflow/lite/kernels/fully_connected.cc
+++ b/tensorflow/lite/kernels/fully_connected.cc

@@ -26,6 +26,7 @@
 #include "tensorflow/lite/kernels/gemm_support.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
 #include "tensorflow/lite/kernels/internal/tensor.h"
 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
@@ -288,6 +289,27 @@
     macro_name(target_namespace, kRelu6);                            \
   }
 
+namespace {
+void FullyConnectedInt8(const OpData* data, const TfLiteTensor* input,
+                        const TfLiteTensor* filter, const TfLiteTensor* bias,
+                        TfLiteTensor* output,
+                        gemmlowp::GemmContext* gemm_context) {
+  FullyConnectedParams op_params;
+  op_params.input_offset = -input->params.zero_point;
+  op_params.weights_offset = -filter->params.zero_point;
+  op_params.output_offset = output->params.zero_point;
+  op_params.output_multiplier = data->output_multiplier;
+  op_params.output_shift = -data->output_shift;
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
+  reference_integer_ops::FullyConnected(
+      op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
+      GetTensorShape(filter), GetTensorData<int8_t>(filter),
+      GetTensorShape(bias), GetTensorData<int32_t>(bias),
+      GetTensorShape(output), GetTensorData<int8_t>(output), gemm_context);
+}
+}  // namespace
+
 template <KernelType kernel_type>
 TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
                            TfLiteFullyConnectedParams* params, OpData* data,
@@ -321,6 +343,9 @@
       case kTfLiteUInt8:
         TF_LITE_FULLY_CONNECTED(reference_ops, uint8_t);
         break;
+      case kTfLiteInt8:
+        FullyConnectedInt8(data, input, filter, bias, output, gemm_context);
+        break;
       case kTfLiteInt16:
         TF_LITE_FULLY_CONNECTED(reference_ops, int16_t);
         break;
@@ -341,6 +366,9 @@
       case kTfLiteUInt8:
         TF_LITE_FULLY_CONNECTED(optimized_ops, uint8_t);
         break;
+      case kTfLiteInt8:
+        FullyConnectedInt8(data, input, filter, bias, output, gemm_context);
+        break;
       case kTfLiteInt16:
         TF_LITE_FULLY_CONNECTED(optimized_ops, int16_t);
         break;

diff --git a/tensorflow/lite/kernels/fully_connected_test.cc b/tensorflow/lite/kernels/fully_connected_test.cc
index 31aa3f3..ae8e2ac 100644
--- a/tensorflow/lite/kernels/fully_connected_test.cc
+++ b/tensorflow/lite/kernels/fully_connected_test.cc

@@ -221,9 +221,12 @@
   void SetBias(const std::vector<float>& data) {
     QuantizeAndPopulate<int32_t>(bias_, data);
   }
+  template <typename T>
   void SetWeights(const std::vector<float>& data) {
-    QuantizeAndPopulate<uint8_t>(weights_, data);
+    QuantizeAndPopulate<T>(weights_, data);
   }
+
+  template <typename T>
   void ShuffleAndSetWeights(const std::vector<float>& data, int input_depth,
                             int output_depth) {
     std::vector<float> shuffled_data(data.size());
@@ -242,15 +245,17 @@
     }
     TfLiteTensor* t = interpreter_->tensor(weights_);
     auto quantized_data =
-        Quantize<uint8_t>(shuffled_data, t->params.scale, t->params.zero_point);
-    for (uint8_t& q : quantized_data) {
+        Quantize<T>(shuffled_data, t->params.scale, t->params.zero_point);
+    for (T& q : quantized_data) {
       q ^= 0x80;
     }
     PopulateTensor(weights_, 0, quantized_data.data(),
                    quantized_data.data() + quantized_data.size());
   }
+
+  template <typename T>
   void SetInput(const std::vector<float>& data) {
-    QuantizeAndPopulate<uint8_t>(input_, data);
+    QuantizeAndPopulate<T>(input_, data);
   }
 
   template <typename T>
@@ -423,21 +428,21 @@
   EXPECT_THAT(m.GetOutput(), ElementsAre(10, 8));
 }
 
-TEST_P(QuantizedFullyConnectedOpTest, SimpleTestQuantized) {
+TEST_P(QuantizedFullyConnectedOpTest, SimpleTestQuantizedUint8) {
   QuantizedFullyConnectedOpModel m(
       GetRegistration(), /*units=*/3, /*batches*/ 2,
       /*input=*/{TensorType_UINT8, {2, 10}, -63.5, 64},
       /*output=*/{TensorType_UINT8, {}, -127, 128});
 
   // input_product_scale < output_scale was not true.
-  m.SetWeights({
+  m.SetWeights<uint8_t>({
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
   });
   m.SetBias({1, 2, 3});
 
-  m.SetInput({
+  m.SetInput<uint8_t>({
       1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
       1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
   });
@@ -453,22 +458,48 @@
               ElementsAre(151, 152, 153, 185, 186, 187));
 }
 
-TEST_P(QuantizedFullyConnectedOpTest,
-       SimpleTestQuantizedOutputMultiplierGreaterThan1) {
-  // real_multiplier = 2.
+TEST_P(QuantizedFullyConnectedOpTest, SimpleTestQuantizedInt8) {
   QuantizedFullyConnectedOpModel m(
-      GetRegistration(), /*units=*/3, /*batches*/ 2,
-      /*input=*/{TensorType_UINT8, {2, 10}, -127, 128},
-      /*output=*/{TensorType_UINT8, {}, -63.5, 64});
+      ops::builtin::Register_FULLY_CONNECTED_REF(), /*units=*/3, /*batches*/ 2,
+      /*input=*/{TensorType_INT8, {2, 10}, -63.5, 64},
+      /*output=*/{TensorType_INT8, {}, -127, 128});
 
-  m.SetWeights({
+  // input_product_scale < output_scale was not true.
+  m.SetWeights<int8_t>({
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
   });
   m.SetBias({1, 2, 3});
 
-  m.SetInput({
+  m.SetInput<int8_t>({
+      1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+      1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+  });
+
+  m.Invoke();
+
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear({24, 25, 26, 58, 59, 60})));
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAre(23, 24, 25, 57, 58, 59));
+}
+
+TEST_P(QuantizedFullyConnectedOpTest,
+       SimpleTestQuantizedOutputMultiplierGreaterThan1Uint8) {
+  // real_multiplier = 2.
+  QuantizedFullyConnectedOpModel m(
+      GetRegistration(), /*units=*/3, /*batches*/ 2,
+      /*input=*/{TensorType_UINT8, {2, 10}, -127, 128},
+      /*output=*/{TensorType_UINT8, {}, -63.5, 64});
+
+  m.SetWeights<uint8_t>({
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
+  });
+  m.SetBias({1, 2, 3});
+
+  m.SetInput<uint8_t>({
       1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
       1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
   });
@@ -484,6 +515,36 @@
               ElementsAre(175, 177, 179, 243, 245, 247));
 }
 
+TEST_P(QuantizedFullyConnectedOpTest,
+       SimpleTestQuantizedOutputMultiplierGreaterThan1Int8) {
+  // real_multiplier = 2.
+  QuantizedFullyConnectedOpModel m(
+      ops::builtin::Register_FULLY_CONNECTED_REF(), /*units=*/3, /*batches*/ 2,
+      /*input=*/{TensorType_INT8, {2, 10}, -127, 128},
+      /*output=*/{TensorType_INT8, {}, -63.5, 64});
+
+  m.SetWeights<int8_t>({
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
+  });
+  m.SetBias({1, 2, 3});
+
+  m.SetInput<int8_t>({
+      1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+      1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+  });
+
+  m.Invoke();
+
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear({
+                  24, 25, 26,  // first batch
+                  58, 59, 60,  // second batch
+              })));
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAre(47, 49, 51, 115, 117, 119));
+}
+
 void SimpleTestQuantizedInt16OutputCase(
     TfLiteRegistration* registration, int input_depth, int output_depth,
     int batches, FullyConnectedOptionsWeightsFormat weights_format) {
@@ -519,7 +580,7 @@
   // and set the (possibly shuffled) weights.
   switch (weights_format) {
     case FullyConnectedOptionsWeightsFormat_DEFAULT:
-      m.SetWeights(weights_data);
+      m.SetWeights<uint8_t>(weights_data);
       break;
     case FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8:
       // The shuffled path currently supports only a restrictive subset of
@@ -527,7 +588,7 @@
       CHECK_EQ(input_depth % 16, 0);
       CHECK_EQ(output_depth % 4, 0);
       CHECK(batches == 1 || batches == 4);
-      m.ShuffleAndSetWeights(weights_data, input_depth, output_depth);
+      m.ShuffleAndSetWeights<uint8_t>(weights_data, input_depth, output_depth);
       break;
     default:
       LOG(FATAL) << "Unhandled weights format";
@@ -549,7 +610,7 @@
   }
 
   m.SetBias(bias_data);
-  m.SetInput(input_data);
+  m.SetInput<uint8_t>(input_data);
 
   m.Invoke();
 
@@ -691,21 +752,21 @@
                              }));
 }
 
-TEST_P(QuantizedFullyConnectedOpTest, SimpleTest4dInputQuantized) {
+TEST_P(QuantizedFullyConnectedOpTest, SimpleTest4dInputQuantizedUint8) {
   QuantizedFullyConnectedOpModel m(
       GetRegistration(), /*units=*/3, /*batches=*/2,
       /*input=*/{TensorType_UINT8, {4, 1, 5, 1}, -63.5, 64},
       /*output=*/{TensorType_UINT8, {}, -127, 128});
 
   // input_product_scale < output_scale was not true.
-  m.SetWeights({
+  m.SetWeights<uint8_t>({
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
   });
   m.SetBias({1, 2, 3});
 
-  m.SetInput({
+  m.SetInput<uint8_t>({
       1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
       1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
   });
@@ -722,21 +783,21 @@
 }
 
 TEST_P(QuantizedFullyConnectedOpTest,
-       SimpleTest4dInputQuantizedOutputMultiplierGreaterThan1) {
+       SimpleTest4dInputQuantizedOutputMultiplierGreaterThan1Uint8) {
   // real_multiplier = 2.
   QuantizedFullyConnectedOpModel m(
       GetRegistration(), /*units=*/3, /*batches=*/2,
       /*input=*/{TensorType_UINT8, {4, 1, 5, 1}, -127, 128},
       /*output=*/{TensorType_UINT8, {}, -63.5, 64});
 
-  m.SetWeights({
+  m.SetWeights<uint8_t>({
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
   });
   m.SetBias({1, 2, 3});
 
-  m.SetInput({
+  m.SetInput<uint8_t>({
       1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
       1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
   });

diff --git a/tensorflow/lite/kernels/gather_nd.cc b/tensorflow/lite/kernels/gather_nd.cc
new file mode 100644
index 0000000..20e9865
--- /dev/null
+++ b/tensorflow/lite/kernels/gather_nd.cc

@@ -0,0 +1,154 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace gather_nd {
+constexpr int kParams = 0;
+constexpr int kIndices = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* params = GetInput(context, node, kParams);
+  const TfLiteTensor* indices = GetInput(context, node, kIndices);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  switch (params->type) {
+    case kTfLiteFloat32:
+    case kTfLiteUInt8:
+    case kTfLiteInt8:
+    case kTfLiteInt64:
+    case kTfLiteInt32:
+      break;
+    default:
+      context->ReportError(
+          context, "Params of type '%s' are not supported by gather_nd.",
+          TfLiteTypeGetName(params->type));
+      return kTfLiteError;
+  }
+  switch (indices->type) {
+    case kTfLiteInt64:
+    case kTfLiteInt32:
+      break;
+    default:
+      context->ReportError(
+          context, "Indices of type '%s' are not supported by gather_nd.",
+          TfLiteTypeGetName(indices->type));
+      return kTfLiteError;
+  }
+
+  const int params_rank = NumDimensions(params);
+  const int indices_rank = NumDimensions(indices);
+  const int indices_nd = SizeOfDimension(indices, indices_rank - 1);
+  if (params_rank < 1) {
+    context->ReportError(context, "Params must be at least a vector.");
+    return kTfLiteError;
+  }
+  if (indices_rank < 1) {
+    context->ReportError(context, "Indices must be at least a vector.");
+    return kTfLiteError;
+  }
+  if (indices_nd > params_rank) {
+    context->ReportError(
+        context, "Index innermost dimension length must be <= params rank.");
+    return kTfLiteError;
+  }
+
+  // Assign to output the input type.
+  output->type = params->type;
+
+  // The result shape is
+  // indices.shape[:-1] + params.shape[indices.shape[-1]:]
+  const int output_rank = indices_rank + params_rank - indices_nd - 1;
+  TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_rank);
+  int output_index = 0;
+  for (int i = 0; i < indices_rank - 1; ++i) {
+    output_shape->data[output_index++] = indices->dims->data[i];
+  }
+  for (int i = indices_nd; i < params_rank; ++i) {
+    output_shape->data[output_index++] = params->dims->data[i];
+  }
+  return context->ResizeTensor(context, output, output_shape);
+}
+
+template <typename ParamsT, typename IndicesT>
+TfLiteStatus GatherNd(const TfLiteTensor* params, const TfLiteTensor* indices,
+                      TfLiteTensor* output) {
+  reference_ops::GatherNd(
+      GetTensorShape(params), GetTensorData<ParamsT>(params),
+      GetTensorShape(indices), GetTensorData<IndicesT>(indices),
+      GetTensorShape(output), GetTensorData<ParamsT>(output));
+  return kTfLiteOk;
+}
+
+template <typename IndicesT>
+TfLiteStatus EvalGatherNd(TfLiteContext* context, const TfLiteTensor* params,
+                          const TfLiteTensor* indices, TfLiteTensor* output) {
+  switch (params->type) {
+    case kTfLiteFloat32:
+      return GatherNd<float, IndicesT>(params, indices, output);
+    case kTfLiteUInt8:
+      return GatherNd<uint8_t, IndicesT>(params, indices, output);
+    case kTfLiteInt8:
+      return GatherNd<int8_t, IndicesT>(params, indices, output);
+    case kTfLiteInt32:
+      return GatherNd<int32_t, IndicesT>(params, indices, output);
+    case kTfLiteInt64:
+      return GatherNd<int64_t, IndicesT>(params, indices, output);
+    default:
+      context->ReportError(context,
+                           "Params type '%s' are not supported by gather_nd.",
+                           TfLiteTypeGetName(params->type));
+      return kTfLiteError;
+  }
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor* params = GetInput(context, node, kParams);
+  const TfLiteTensor* indices = GetInput(context, node, kIndices);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  switch (indices->type) {
+    case kTfLiteInt32:
+      return EvalGatherNd<int32_t>(context, params, indices, output);
+    case kTfLiteInt64:
+      return EvalGatherNd<int64_t>(context, params, indices, output);
+    default:
+      context->ReportError(
+          context, "Indices of type '%s' are not supported by gather_nd.",
+          TfLiteTypeGetName(indices->type));
+      return kTfLiteError;
+  }
+}
+}  // namespace gather_nd
+
+TfLiteRegistration* Register_GATHER_ND() {
+  static TfLiteRegistration r = {/*init*/ nullptr, /*free*/ nullptr,
+                                 gather_nd::Prepare, gather_nd::Eval};
+  return &r;
+}
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite

diff --git a/tensorflow/lite/kernels/gather_nd_test.cc b/tensorflow/lite/kernels/gather_nd_test.cc
new file mode 100644
index 0000000..a5e93ef
--- /dev/null
+++ b/tensorflow/lite/kernels/gather_nd_test.cc

@@ -0,0 +1,323 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/kernels/test_util.h"
+#include "tensorflow/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class GatherNdOpModel : public SingleOpModel {
+ public:
+  GatherNdOpModel(const TensorData& params, const TensorData& indices) {
+    params_ = AddInput(params);
+    indices_ = AddInput(indices);
+    output_ = AddOutput(params.type);
+    SetBuiltinOp(BuiltinOperator_GATHER_ND, BuiltinOptions_GatherNdOptions,
+                 CreateGatherNdOptions(builder_).Union());
+    BuildInterpreter({GetShape(params_), GetShape(indices_)});
+  }
+
+  template <typename T>
+  void SetInput(std::initializer_list<T> data) {
+    PopulateTensor<T>(params_, data);
+  }
+
+  template <typename T>
+  void SetPositions(std::initializer_list<T> data) {
+    PopulateTensor<T>(indices_, data);
+  }
+
+  template <typename T>
+  std::vector<T> GetOutput() {
+    return ExtractVector<T>(output_);
+  }
+
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ protected:
+  int params_;
+  int indices_;
+  int output_;
+};
+
+TEST(GatherNdOpTest, ElementIndexingIntoMatrix) {
+  GatherNdOpModel m({TensorType_FLOAT32, {2, 2}}, {TensorType_INT32, {2, 2}});
+  m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
+  m.SetPositions<int32_t>({0, 0, 1, 1});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray({1.1, 2.2}));
+}
+
+TEST(GatherNdOpTest, SliceIndexingIntoMatrix) {
+  GatherNdOpModel m({TensorType_FLOAT32, {2, 2}}, {TensorType_INT32, {2, 1}});
+  m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
+  m.SetPositions<int32_t>({1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray({2.1, 2.2, 1.1, 1.2}));
+}
+
+TEST(GatherNdOpTest, BatchedIndexingIntoMatrix1) {
+  GatherNdOpModel m({TensorType_FLOAT32, {2, 2}},
+                    {TensorType_INT32, {2, 1, 1}});
+  m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
+  m.SetPositions<int32_t>({1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray({2.1, 2.2, 1.1, 1.2}));
+}
+
+TEST(GatherNdOpTest, BatchedIndexingIntoMatrix2) {
+  GatherNdOpModel m({TensorType_FLOAT32, {2, 2}},
+                    {TensorType_INT32, {2, 1, 2}});
+  m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
+  m.SetPositions<int32_t>({0, 0, 1, 1});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray({1.1, 2.2}));
+}
+
+TEST(GatherNdOpTest, DuplicateIndexingIntoMatrix) {
+  GatherNdOpModel m({TensorType_FLOAT32, {2, 2}}, {TensorType_INT32, {2, 2}});
+  m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
+  m.SetPositions<int32_t>({0, 0, 0, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray({1.1, 1.1}));
+}
+
+TEST(GatherNdOpTest, ElementIndexingIntoRank3Tensor) {
+  GatherNdOpModel m({TensorType_FLOAT32, {3, 2, 3}},
+                    {TensorType_INT32, {1, 2, 3}});
+  m.SetInput<float>({1.1, -1.2, 1.3, -2.1, 2.2, 2.3,   //
+                     3.1, 3.2, -3.3, -4.1, -4.2, 4.3,  //
+                     5.1, -5.2, 5.3, 6.1, -6.2, 6.3});
+  m.SetPositions<int32_t>({0, 0, 1, 1, 1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray({-1.2, -4.1}));
+}
+
+TEST(GatherNdOpTest, SliceIndexingIntoRank3Tensor) {
+  GatherNdOpModel m({TensorType_FLOAT32, {3, 2, 3}},
+                    {TensorType_INT32, {2, 1}});
+  m.SetInput<float>({1.1, -1.2, 1.3, -2.1, 2.2, 2.3,   //
+                     3.1, 3.2, -3.3, -4.1, -4.2, 4.3,  //
+                     5.1, -5.2, 5.3, 6.1, -6.2, 6.3});
+  m.SetPositions<int32_t>({0, 2});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(),
+              ElementsAreArray({1.1, -1.2, 1.3, -2.1, 2.2, 2.3, 5.1, -5.2, 5.3,
+                                6.1, -6.2, 6.3}));
+}
+
+TEST(GatherNdOpTest, BatchedIndexingIntoRank3Tensor1) {
+  GatherNdOpModel m({TensorType_FLOAT32, {3, 2, 3}},
+                    {TensorType_INT32, {2, 1, 3}});
+  m.SetInput<float>({1.1, -1.2, 1.3, -2.1, 2.2, 2.3,   //
+                     3.1, 3.2, -3.3, -4.1, -4.2, 4.3,  //
+                     5.1, -5.2, 5.3, 6.1, -6.2, 6.3});
+  m.SetPositions<int32_t>({0, 0, 1, 1, 1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray({-1.2, -4.1}));
+}
+
+TEST(GatherNdOpTest, BatchedIndexingIntoRank3Tensor2) {
+  GatherNdOpModel m({TensorType_FLOAT32, {3, 2, 3}},
+                    {TensorType_INT32, {2, 1, 1}});
+  m.SetInput<float>({1.1, -1.2, 1.3, -2.1, 2.2, 2.3,   //
+                     3.1, 3.2, -3.3, -4.1, -4.2, 4.3,  //
+                     5.1, -5.2, 5.3, 6.1, -6.2, 6.3});
+  m.SetPositions<int32_t>({1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(),
+              ElementsAreArray({3.1, 3.2, -3.3, -4.1, -4.2, 4.3, 1.1, -1.2, 1.3,
+                                -2.1, 2.2, 2.3}));
+}
+
+TEST(GatherNdOpTest, BatchedIndexingIntoRank3Tensor3) {
+  GatherNdOpModel m({TensorType_FLOAT32, {3, 2, 3}},
+                    {TensorType_INT32, {2, 2, 2}});
+  m.SetInput<float>({1.1, -1.2, 1.3, -2.1, 2.2, 2.3,   //
+                     3.1, 3.2, -3.3, -4.1, -4.2, 4.3,  //
+                     5.1, -5.2, 5.3, 6.1, -6.2, 6.3});
+  m.SetPositions<int32_t>({0, 1, 1, 0, 0, 0, 2, 1});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(),
+              ElementsAreArray({-2.1, 2.2, 2.3, 3.1, 3.2, -3.3, 1.1, -1.2, 1.3,
+                                6.1, -6.2, 6.3}));
+}
+
+TEST(GatherNdOpTest, BatchedIndexingIntoRank3Tensor4) {
+  GatherNdOpModel m({TensorType_FLOAT32, {3, 2, 3}},
+                    {TensorType_INT32, {2, 2, 3}});
+  m.SetInput<float>({1.1, -1.2, 1.3, -2.1, 2.2, 2.3,   //
+                     3.1, 3.2, -3.3, -4.1, -4.2, 4.3,  //
+                     5.1, -5.2, 5.3, 6.1, -6.2, 6.3});
+  m.SetPositions<int32_t>({0, 0, 1, 1, 0, 1, 1, 1, 2, 2, 1, 2});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray({-1.2, 3.2, 4.3, 6.3}));
+}
+
+TEST(GatherNdOpTest, DuplicateIndexingIntoRank3Tensor) {
+  GatherNdOpModel m({TensorType_FLOAT32, {3, 2, 3}},
+                    {TensorType_INT32, {2, 2}});
+  m.SetInput<float>({1.1, -1.2, 1.3, -2.1, 2.2, 2.3,   //
+                     3.1, 3.2, -3.3, -4.1, -4.2, 4.3,  //
+                     5.1, -5.2, 5.3, 6.1, -6.2, 6.3});
+  m.SetPositions<int32_t>({0, 1, 0, 1});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(),
+              ElementsAreArray({-2.1, 2.2, 2.3, -2.1, 2.2, 2.3}));
+}
+
+TEST(GatherNdOpTest, Float32Int32) {
+  GatherNdOpModel m({TensorType_FLOAT32, {3, 2, 3}},
+                    {TensorType_INT32, {2, 2}});
+  m.SetInput<float>({1.1, -1.2, 1.3, -2.1, 2.2, 2.3,   //
+                     3.1, 3.2, -3.3, -4.1, -4.2, 4.3,  //
+                     5.1, -5.2, 5.3, 6.1, -6.2, 6.3});
+  m.SetPositions<int32_t>({0, 1, 1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(),
+              ElementsAreArray({-2.1, 2.2, 2.3, 3.1, 3.2, -3.3}));
+}
+
+TEST(GatherNdOpTest, Float32Int64) {
+  GatherNdOpModel m({TensorType_FLOAT32, {3, 2, 3}},
+                    {TensorType_INT64, {2, 2}});
+  m.SetInput<float>({1.1, -1.2, 1.3, -2.1, 2.2, 2.3,   //
+                     3.1, 3.2, -3.3, -4.1, -4.2, 4.3,  //
+                     5.1, -5.2, 5.3, 6.1, -6.2, 6.3});
+  m.SetPositions<int64_t>({0LL, 1LL, 1LL, 0LL});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<float>(),
+              ElementsAreArray({-2.1, 2.2, 2.3, 3.1, 3.2, -3.3}));
+}
+
+TEST(GatherNdOpTest, Int32Int32) {
+  GatherNdOpModel m({TensorType_INT32, {3, 2, 3}}, {TensorType_INT32, {2, 2}});
+  m.SetInput<int32_t>({1, -1, 1, -2, 2, 2,   //
+                       3, 3, -3, -4, -4, 4,  //
+                       5, -5, 5, 6, -6, 6});
+  m.SetPositions<int32_t>({0, 1, 1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<int32_t>(), ElementsAreArray({-2, 2, 2, 3, 3, -3}));
+}
+
+TEST(GatherNdOpTest, Int32Int64) {
+  GatherNdOpModel m({TensorType_INT32, {3, 2, 3}}, {TensorType_INT64, {2, 2}});
+  m.SetInput<int32_t>({1, -1, 1, -2, 2, 2,   //
+                       3, 3, -3, -4, -4, 4,  //
+                       5, -5, 5, 6, -6, 6});
+  m.SetPositions<int64_t>({0LL, 1LL, 1LL, 0LL});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<int32_t>(), ElementsAreArray({-2, 2, 2, 3, 3, -3}));
+}
+
+TEST(GatherNdOpTest, Uint8Int32) {
+  GatherNdOpModel m({TensorType_UINT8, {3, 2, 3}}, {TensorType_INT32, {2, 2}});
+  m.SetInput<uint8_t>({1, 1, 1, 2, 2, 2,  //
+                       3, 3, 3, 4, 4, 4,  //
+                       5, 5, 5, 6, 6, 6});
+  m.SetPositions<int32_t>({0, 1, 1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<uint8_t>(), ElementsAreArray({2, 2, 2, 3, 3, 3}));
+}
+
+TEST(GatherNdOpTest, Uint8Int64) {
+  GatherNdOpModel m({TensorType_UINT8, {3, 2, 3}}, {TensorType_INT64, {2, 2}});
+  m.SetInput<uint8_t>({1, 1, 1, 2, 2, 2,  //
+                       3, 3, 3, 4, 4, 4,  //
+                       5, 5, 5, 6, 6, 6});
+  m.SetPositions<int64_t>({0, 1, 1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<uint8_t>(), ElementsAreArray({2, 2, 2, 3, 3, 3}));
+}
+
+TEST(GatherNdOpTest, Int8Int32) {
+  GatherNdOpModel m({TensorType_INT8, {3, 2, 3}}, {TensorType_INT32, {2, 2}});
+  m.SetInput<int8_t>({1, -1, 1, -2, 2, 2,   //
+                      3, 3, -3, -4, -4, 4,  //
+                      5, -5, 5, 6, -6, 6});
+  m.SetPositions<int32_t>({0, 1, 1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray({-2, 2, 2, 3, 3, -3}));
+}
+
+TEST(GatherNdOpTest, Int8Int64) {
+  GatherNdOpModel m({TensorType_INT8, {3, 2, 3}}, {TensorType_INT64, {2, 2}});
+  m.SetInput<int8_t>({1, -1, 1, -2, 2, 2,   //
+                      3, 3, -3, -4, -4, 4,  //
+                      5, -5, 5, 6, -6, 6});
+  m.SetPositions<int64_t>({0LL, 1LL, 1LL, 0LL});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray({-2, 2, 2, 3, 3, -3}));
+}
+
+TEST(GatherNdOpTest, Int64Int32) {
+  GatherNdOpModel m({TensorType_INT64, {3, 2, 3}}, {TensorType_INT32, {2, 2}});
+  m.SetInput<int64_t>({1LL, -1LL, 1LL, -2LL, 2LL, 2LL,   //
+                       3LL, 3LL, -3LL, -4LL, -4LL, 4LL,  //
+                       5LL, -5LL, 5LL, 6LL, -6LL, 6LL});
+  m.SetPositions<int32_t>({0, 1, 1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<int64_t>(),
+              ElementsAreArray({-2LL, 2LL, 2LL, 3LL, 3LL, -3LL}));
+}
+
+TEST(GatherNdOpTest, Int64Int64) {
+  GatherNdOpModel m({TensorType_INT64, {3, 2, 3}}, {TensorType_INT64, {2, 2}});
+  m.SetInput<int64_t>({1LL, -1LL, 1LL, -2LL, 2LL, 2LL,   //
+                       3LL, 3LL, -3LL, -4LL, -4LL, 4LL,  //
+                       5LL, -5LL, 5LL, 6LL, -6LL, 6LL});
+  m.SetPositions<int64_t>({0LL, 1LL, 1LL, 0LL});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<int64_t>(),
+              ElementsAreArray({-2LL, 2LL, 2LL, 3LL, 3LL, -3LL}));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

diff --git a/tensorflow/lite/kernels/if.cc b/tensorflow/lite/kernels/if.cc
index a814ac9..1bd394e 100644
--- a/tensorflow/lite/kernels/if.cc
+++ b/tensorflow/lite/kernels/if.cc

@@ -153,6 +153,10 @@
   // in `Prepare`, so we don't need to do it here again.
   TF_LITE_ENSURE_OK(context, active_branch_subgraph.Invoke());
 
+  for (int tensor_index : active_branch_subgraph.outputs()) {
+    active_branch_subgraph.EnsureTensorDataIsReadable(tensor_index);
+  }
+
   bool has_dynamic_output_tensors = false;
   for (int i = 0; i < node->outputs->size; ++i) {
     TfLiteTensor* output = GetOutput(context, node, i);

diff --git a/tensorflow/lite/kernels/if_test.cc b/tensorflow/lite/kernels/if_test.cc
index 6593a58..0f90db1 100644
--- a/tensorflow/lite/kernels/if_test.cc
+++ b/tensorflow/lite/kernels/if_test.cc

@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,207 +12,42 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+
 #include <gtest/gtest.h>
 #include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/kernels/subgraph_test_util.h"
 #include "tensorflow/lite/kernels/test_util.h"
 #include "tensorflow/lite/model.h"
 
 namespace tflite {
-namespace ops {
-namespace builtin {
-// ADD and MUL are used to test simple branch.
-TfLiteRegistration* Register_ADD();
-TfLiteRegistration* Register_MUL();
-// Pad is used to test dynamic sized subgraphs.
-TfLiteRegistration* Register_PAD();
-}  // namespace builtin
-namespace custom {
-TfLiteRegistration* Register_IF();
-}  // namespace custom
-}  // namespace ops
+
+using subgraph_test_util::CheckIntTensor;
+using subgraph_test_util::ControlFlowOpTest;
+using subgraph_test_util::FillIntTensor;
 
 namespace {
 
-void SetupTensor(Subgraph* subgraph, int tensor_index,
-                 const std::vector<int>& shape, TfLiteType type) {
-  ASSERT_EQ(subgraph->SetTensorParametersReadWrite(
-                tensor_index, type, "", shape.size(), shape.data(), {}, false),
-            kTfLiteOk);
-}
-
-// TODO(ycling): Consider to move all the test helper functions to another
-// build target (e.g. subgraph_test_util).
-// Build a subgraph with an add op. Helper function for testing.
-void BuildAddSubgraph(Subgraph* subgraph) {
-  int first_new_tensor_index;
-  ASSERT_EQ(subgraph->AddTensors(3, &first_new_tensor_index), kTfLiteOk);
-  ASSERT_EQ(first_new_tensor_index, 0);
-  ASSERT_EQ(subgraph->SetInputs({0, 1}), kTfLiteOk);
-  ASSERT_EQ(subgraph->SetOutputs({2}), kTfLiteOk);
-
-  SetupTensor(subgraph, 0, {2}, kTfLiteInt32);
-  SetupTensor(subgraph, 1, {1, 2}, kTfLiteInt32);
-  // Intentionally set the wrong output size for testing. This should be
-  // overridden by Prepare function.
-  SetupTensor(subgraph, 2, {100}, kTfLiteInt32);
-
-  TfLiteAddParams* params =
-      reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
-  params->activation = kTfLiteActNone;
-  int node_index;
-  subgraph->AddNodeWithParameters({0, 1}, {2}, nullptr, 0, params,
-                                  ::tflite::ops::builtin::Register_ADD(),
-                                  &node_index);
-}
-
-// Build a subgraph with an mul op. Helper function for testing.
-void BuildMulSubgraph(Subgraph* subgraph) {
-  int first_new_tensor_index;
-  ASSERT_EQ(subgraph->AddTensors(3, &first_new_tensor_index), kTfLiteOk);
-  ASSERT_EQ(first_new_tensor_index, 0);
-  ASSERT_EQ(subgraph->SetInputs({0, 1}), kTfLiteOk);
-  ASSERT_EQ(subgraph->SetOutputs({2}), kTfLiteOk);
-
-  SetupTensor(subgraph, 0, {2}, kTfLiteInt32);
-  SetupTensor(subgraph, 1, {1, 2}, kTfLiteInt32);
-  // Intentionally set the wrong output size for testing. This should be
-  // overridden by Prepare function.
-  SetupTensor(subgraph, 2, {100}, kTfLiteInt32);
-
-  TfLiteMulParams* params =
-      reinterpret_cast<TfLiteMulParams*>(malloc(sizeof(TfLiteMulParams)));
-  params->activation = kTfLiteActNone;
-  int node_index;
-  subgraph->AddNodeWithParameters({0, 1}, {2}, nullptr, 0, params,
-                                  ::tflite::ops::builtin::Register_MUL(),
-                                  &node_index);
-}
-
-// Build a subgraph with a pad op. Helper function for testing.
-void BuildPadSubgraph(Subgraph* subgraph) {
-  int first_new_tensor_index;
-  ASSERT_EQ(subgraph->AddTensors(3, &first_new_tensor_index), kTfLiteOk);
-  ASSERT_EQ(first_new_tensor_index, 0);
-  ASSERT_EQ(subgraph->SetInputs({0, 1}), kTfLiteOk);
-  ASSERT_EQ(subgraph->SetOutputs({2}), kTfLiteOk);
-
-  SetupTensor(subgraph, 0, {2}, kTfLiteInt32);
-  SetupTensor(subgraph, 1, {1, 2}, kTfLiteInt32);
-  // Intentionally set the wrong output size for testing. This should be
-  // overridden by Prepare function.
-  SetupTensor(subgraph, 2, {100}, kTfLiteInt32);
-
-  TfLitePadParams* params =
-      reinterpret_cast<TfLitePadParams*>(malloc(sizeof(TfLitePadParams)));
-  int node_index;
-  subgraph->AddNodeWithParameters({0, 1}, {2}, nullptr, 0, params,
-                                  ::tflite::ops::builtin::Register_PAD(),
-                                  &node_index);
-}
-
-void BuildIfSubgraph(Subgraph* subgraph) {
-  int first_new_tensor_index;
-  ASSERT_EQ(subgraph->AddTensors(4, &first_new_tensor_index), kTfLiteOk);
-  ASSERT_EQ(first_new_tensor_index, 0);
-  ASSERT_EQ(subgraph->SetInputs({0, 1, 2}), kTfLiteOk);
-  ASSERT_EQ(subgraph->SetOutputs({3}), kTfLiteOk);
-
-  SetupTensor(subgraph, 0, {1}, kTfLiteBool);
-  SetupTensor(subgraph, 1, {2}, kTfLiteInt32);
-  SetupTensor(subgraph, 2, {1, 2}, kTfLiteInt32);
-  // Intentionally set the wrong output size for testing. This should be
-  // overridden by Prepare function.
-  SetupTensor(subgraph, 3, {100}, kTfLiteInt32);
-
-  flexbuffers::Builder fbb;
-  fbb.Map([&]() {
-    fbb.Int("then_subgraph_index", 1);
-    fbb.Int("else_subgraph_index", 2);
-  });
-  fbb.Finish();
-  const auto& buffer = fbb.GetBuffer();
-
-  int node_index;
-  subgraph->AddNodeWithParameters(
-      {0, 1, 2}, {3}, reinterpret_cast<const char*>(buffer.data()),
-      buffer.size(), nullptr, ::tflite::ops::custom::Register_IF(),
-      &node_index);
-}
-
-void FillIntTensor(TfLiteTensor* tensor, const std::vector<int32_t>& data) {
-  int count = NumElements(tensor);
-  ASSERT_EQ(count, data.size());
-  for (int i = 0; i < count; ++i) {
-    tensor->data.i32[i] = data[i];
-  }
-}
-
-void CheckIntTensor(const TfLiteTensor* tensor, const std::vector<int>& shape,
-                    const std::vector<int32_t>& data) {
-  ASSERT_EQ(tensor->dims->size, shape.size());
-  for (int i = 0; i < tensor->dims->size; ++i) {
-    ASSERT_EQ(tensor->dims->data[i], shape[i]);
-  }
-  ASSERT_EQ(tensor->type, kTfLiteInt32);
-  int count = NumElements(tensor);
-  ASSERT_EQ(count, data.size());
-  for (int i = 0; i < count; ++i) {
-    EXPECT_EQ(tensor->data.i32[i], data[i]);
-  }
-}
-
-// TestHelperfunctionTest tests the helper functions defined in this file.
-TEST(TestHelperfunctionTest, TestBuildAddSubgraph) {
-  std::unique_ptr<Interpreter> interpreter(new Interpreter);
-  BuildAddSubgraph(&interpreter->primary_subgraph());
-  ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteOk);
-  FillIntTensor(interpreter->tensor(interpreter->inputs()[0]), {5, 7});
-  FillIntTensor(interpreter->tensor(interpreter->inputs()[1]), {1, 2});
-  ASSERT_EQ(interpreter->Invoke(), kTfLiteOk);
-  TfLiteTensor* output = interpreter->tensor(interpreter->outputs()[0]);
-  CheckIntTensor(output, {1, 2}, {6, 9});
-}
-
-TEST(TestHelperfunctionTest, TestBuildMulSubgraph) {
-  std::unique_ptr<Interpreter> interpreter(new Interpreter);
-  BuildMulSubgraph(&interpreter->primary_subgraph());
-  ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteOk);
-  FillIntTensor(interpreter->tensor(interpreter->inputs()[0]), {5, 7});
-  FillIntTensor(interpreter->tensor(interpreter->inputs()[1]), {1, 2});
-  ASSERT_EQ(interpreter->Invoke(), kTfLiteOk);
-  TfLiteTensor* output = interpreter->tensor(interpreter->outputs()[0]);
-  CheckIntTensor(output, {1, 2}, {5, 14});
-}
-
-TEST(TestHelperfunctionTest, TestBuildPadSubgraph) {
-  std::unique_ptr<Interpreter> interpreter(new Interpreter);
-  BuildPadSubgraph(&interpreter->primary_subgraph());
-  ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteOk);
-  FillIntTensor(interpreter->tensor(interpreter->inputs()[0]), {5, 7});
-  FillIntTensor(interpreter->tensor(interpreter->inputs()[1]), {1, 2});
-  ASSERT_EQ(interpreter->Invoke(), kTfLiteOk);
-  TfLiteTensor* output = interpreter->tensor(interpreter->outputs()[0]);
-  CheckIntTensor(output, {5}, {0, 5, 7, 0, 0});
-}
-
 // A simple test that performs `ADD` if condition is true, and `MUL` otherwise.
 // The computation is: `cond ? a + b : a * b`.
-class SimpleIfTest : public ::testing::Test {
+class SimpleIfTest : public ControlFlowOpTest {
  protected:
   void SetUp() override {
-    interpreter_.reset(new Interpreter);
     interpreter_->AddSubgraphs(2);
-    BuildAddSubgraph(interpreter_->subgraph(1));
-    BuildMulSubgraph(interpreter_->subgraph(2));
-    BuildIfSubgraph(&interpreter_->primary_subgraph());
+    builder_->BuildAddSubgraph(interpreter_->subgraph(1));
+    builder_->BuildMulSubgraph(interpreter_->subgraph(2));
+    builder_->BuildIfSubgraph(&interpreter_->primary_subgraph());
+
+    interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {1});
+    interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {2});
+    interpreter_->ResizeInputTensor(interpreter_->inputs()[2], {1, 2});
     ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+
     FillIntTensor(interpreter_->tensor(interpreter_->inputs()[1]), {5, 7});
     FillIntTensor(interpreter_->tensor(interpreter_->inputs()[2]), {1, 2});
   }
-  std::unique_ptr<Interpreter> interpreter_;
 };
 
 TEST_F(SimpleIfTest, TestIfTrue) {
@@ -231,19 +66,22 @@
 
 // Test IF op using subgraphs with dynamically sized outputs.
 // The computation is: `cond ? a + b : pad(a, b)`.
-class DynamicSubgraphIfTest : public ::testing::Test {
+class DynamicSubgraphIfTest : public ControlFlowOpTest {
  protected:
   void SetUp() override {
-    interpreter_.reset(new Interpreter);
     interpreter_->AddSubgraphs(2);
-    BuildAddSubgraph(interpreter_->subgraph(1));
-    BuildPadSubgraph(interpreter_->subgraph(2));
-    BuildIfSubgraph(&interpreter_->primary_subgraph());
+    builder_->BuildAddSubgraph(interpreter_->subgraph(1));
+    builder_->BuildPadSubgraph(interpreter_->subgraph(2));
+    builder_->BuildIfSubgraph(&interpreter_->primary_subgraph());
+
+    interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {1});
+    interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {2});
+    interpreter_->ResizeInputTensor(interpreter_->inputs()[2], {1, 2});
     ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+
     FillIntTensor(interpreter_->tensor(interpreter_->inputs()[1]), {5, 7});
     FillIntTensor(interpreter_->tensor(interpreter_->inputs()[2]), {1, 2});
   }
-  std::unique_ptr<Interpreter> interpreter_;
 };
 
 TEST_F(DynamicSubgraphIfTest, TestIfTrue) {

diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD
index 97b3225..816b88d 100644
--- a/tensorflow/lite/kernels/internal/BUILD
+++ b/tensorflow/lite/kernels/internal/BUILD

@@ -306,11 +306,17 @@
         "reference/depthwiseconv_float.h",
         "reference/depthwiseconv_uint8.h",
         "reference/fully_connected.h",
+        "reference/integer_ops/add.h",
         "reference/integer_ops/conv.h",
         "reference/integer_ops/depthwise_conv.h",
         "reference/integer_ops/dequantize.h",
+        "reference/integer_ops/fully_connected.h",
+        "reference/integer_ops/log_softmax.h",
+        "reference/integer_ops/logistic.h",
+        "reference/integer_ops/mul.h",
         "reference/integer_ops/pooling.h",
         "reference/integer_ops/softmax.h",
+        "reference/integer_ops/tanh.h",
         "reference/reference_ops.h",
         "reference/softmax.h",
     ],
@@ -584,7 +590,10 @@
 
 cc_test(
     name = "depthwiseconv_quantized_test",
-    srcs = ["depthwiseconv_quantized_test.cc"],
+    srcs = [
+        "depthwiseconv_quantized_test.cc",
+        "optimized/depthwiseconv_uint8_transitional.h",
+    ],
     shard_count = 2,
     deps = [
         ":optimized_base",
@@ -593,6 +602,7 @@
         ":types",
         "@com_google_absl//absl/strings",
         "@com_google_googletest//:gtest_main",
+        "@gemmlowp",
     ],
 )
 
@@ -643,7 +653,7 @@
     srcs = [
         "logsoftmax_quantized_test.cc",
     ],
-    shard_count = 3,
+    shard_count = 4,
     tags = [
         # TODO(b/122242739): Reenable after fixing the flakiness?
         "nomac",

diff --git a/tensorflow/lite/kernels/internal/common.h b/tensorflow/lite/kernels/internal/common.h
index bc30ac9..e00a3f4 100644
--- a/tensorflow/lite/kernels/internal/common.h
+++ b/tensorflow/lite/kernels/internal/common.h

@@ -131,6 +131,221 @@
 #endif
 }
 
+// TODO(b/77858996): Add these to gemmlowp.
+template <typename IntegerType>
+IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) {
+  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
+  return a;
+}
+
+template <>
+inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) {
+  std::int64_t a64 = a;
+  std::int64_t b64 = b;
+  std::int64_t sum = a64 + b64;
+  return static_cast<std::int32_t>(std::min(
+      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
+      std::max(
+          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
+          sum)));
+}
+
+template <typename tRawType, int tIntegerBits>
+gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp(
+    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
+    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
+  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
+      SaturatingAddNonGemmlowp(a.raw(), b.raw()));
+}
+
+template <typename IntegerType>
+IntegerType SaturatingSub(IntegerType a, IntegerType b) {
+  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
+  return a;
+}
+
+template <>
+inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) {
+  std::int32_t a32 = a;
+  std::int32_t b32 = b;
+  std::int32_t diff = a32 - b32;
+  return static_cast<std::int16_t>(std::min(32767, std::max(-32768, diff)));
+}
+
+template <>
+inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) {
+  std::int64_t a64 = a;
+  std::int64_t b64 = b;
+  std::int64_t diff = a64 - b64;
+  return static_cast<std::int32_t>(std::min(
+      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
+      std::max(
+          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
+          diff)));
+}
+
+template <typename tRawType, int tIntegerBits>
+gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
+    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
+    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
+  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
+      SaturatingSub(a.raw(), b.raw()));
+}
+// End section to be moved to gemmlowp.
+
+template <typename IntegerType>
+IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) {
+  if (exponent == 0) {
+    return x;
+  }
+  using ScalarIntegerType =
+      typename gemmlowp::FixedPointRawTypeTraits<IntegerType>::ScalarRawType;
+  const IntegerType min =
+      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::min());
+  const IntegerType max =
+      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::max());
+  const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType);
+
+  const std::int32_t threshold =
+      ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1);
+  const IntegerType positive_mask =
+      gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup<IntegerType>(threshold));
+  const IntegerType negative_mask =
+      gemmlowp::MaskIfLessThan(x, gemmlowp::Dup<IntegerType>(-threshold));
+
+  IntegerType result = gemmlowp::ShiftLeft(x, exponent);
+  result = gemmlowp::SelectUsingMask(positive_mask, max, result);
+  result = gemmlowp::SelectUsingMask(negative_mask, min, result);
+  return result;
+}
+
+// If we want to leave IntegerBits fixed, then multiplication
+// by a power of two has to be saturating/rounding, not exact anymore.
+template <typename tRawType, int tIntegerBits>
+gemmlowp::FixedPoint<tRawType, tIntegerBits>
+SaturatingRoundingMultiplyByPOTParam(
+    gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) {
+  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
+      SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
+}
+
+// Minimum output bits to accommodate log of maximum input range.  It actually
+// does not matter if one considers, say, [-64,64] or [-64,64).
+//
+// For example, run this through Octave:
+// [0:127; ...
+//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
+//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
+constexpr int min_log_x_output_bits(int input_bits) {
+  return input_bits > 90
+             ? 7
+             : input_bits > 44
+                   ? 6
+                   : input_bits > 21
+                         ? 5
+                         : input_bits > 10
+                               ? 4
+                               : input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1;
+}
+
+// Although currently the name of this function says that it cannot handle
+// values less than 1, in practice it can handle as low as 1/x_max, where
+// x_max is the largest representable input.  In other words, the output range
+// is symmetric.
+template <int OutputIntegerBits, int InputIntegerBits>
+inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
+log_x_for_x_greater_than_or_equal_to_1_impl(
+    gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
+  // assert(__builtin_clz(0u) >= std::numeric_limits<uint32>::digits - 1);
+  // assert(__builtin_clz(0u) <= std::numeric_limits<uint32>::digits);
+  using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
+  // The reason for accumulating the result with an extra bit of headroom is
+  // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
+  // recip_denom will otherwise introduce an error.
+  static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
+  using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumIntegerBits>;
+
+  const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
+      FixedPoint0, 1488522236, std::log(2.0));
+  const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
+      FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5)));
+  const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
+      FixedPoint0, 1518500250, std::sqrt(0.5));
+  const FixedPoint0 one_quarter =
+      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0);
+
+  const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
+      FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0)));
+  const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
+      FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0)));
+  const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
+      FixedPoint0, 1057819769,
+      2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0)));
+  const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
+      FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0)));
+
+  const FixedPointAccum shifted_quarter =
+      gemmlowp::Rescale<kAccumIntegerBits>(one_quarter);
+
+  // Reinterpret the input value as Q0.31, because we will figure out the
+  // required shift "ourselves" instead of using, say, Rescale.
+  FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
+  // z_a_pow_2 = input_integer_bits - z_a_headroom;
+  int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32>(z_a.raw()));
+  FixedPoint0 r_a_tmp =
+      SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
+  const int32 r_a_raw =
+      SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
+  // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
+  // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
+  //                   InputIntegerBits - z_b_headroom - 0.25);
+  const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
+      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
+          InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)),
+      shifted_quarter);
+
+  // z_b is treated like z_a, but premultiplying by sqrt(0.5).
+  FixedPoint0 z_b = z_a * sqrt_half;
+  int z_b_headroom = CountLeadingZeros(static_cast<uint32>(z_b.raw())) - 1;
+  const int32 r_b_raw =
+      SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
+  const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
+      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
+          InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)),
+      shifted_quarter);
+
+  const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
+  const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw(
+      std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw()));
+
+  const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half);
+  FixedPoint0 q = r - sqrt_sqrt_half;
+  q = q + q;
+
+  const FixedPoint0 common_sq = q * q;
+  const FixedPoint0 num = q * r + q * common_sq * alpha_n;
+  const FixedPoint0 denom_minus_one_0 =
+      p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q;
+  const FixedPoint0 recip_denom =
+      one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0);
+
+  const FixedPointAccum num_scaled = gemmlowp::Rescale<kAccumIntegerBits>(num);
+  return gemmlowp::Rescale<OutputIntegerBits>(z_pow_2_adj * log_2 +
+                                              num_scaled * recip_denom);
+}
+
+template <int OutputIntegerBits, int InputIntegerBits>
+inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
+log_x_for_x_greater_than_or_equal_to_1(
+    gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
+  static_assert(
+      OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
+      "Output integer bits must be sufficent to accommodate logs of inputs.");
+  return log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
+                                                     InputIntegerBits>(
+      input_val);
+}
+
 inline int32 GetReciprocal(int32 x, int x_integer_digits,
                            int* num_bits_over_unit) {
   int headroom_plus_one = CountLeadingZeros(static_cast<uint32>(x));

diff --git a/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc b/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc
index b396e62..5a05390 100644
--- a/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc
+++ b/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc

@@ -30,11 +30,13 @@
 #include "absl/strings/substitute.h"
 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h"
+#include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h"
 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
 
 namespace tflite {
 namespace {
 
+using optimized_ops::depthwise_conv::DotProduct3x3KernelType;
 using ::testing::Bool;
 using ::testing::Values;
 
@@ -57,7 +59,7 @@
 
 // The TestParam structure below is the preferred parameterization of tests. A
 // tuple version is defined in order to support value-parameterized tests.
-typedef std::tuple<DepthwiseConvInvocation, int, bool, bool, bool,
+typedef std::tuple<DepthwiseConvImplementation, int, bool, bool, bool,
                    DepthwiseConvOutputRounding, bool>
     TestParamTuple;
 
@@ -82,7 +84,8 @@
                             param.test_depth_multiplier);
   }
 
-  DepthwiseConvInvocation forced_invocation = DepthwiseConvInvocation::kNone;
+  DepthwiseConvImplementation forced_invocation =
+      DepthwiseConvImplementation::kNone;
   int tests_to_run = 0;
   bool test_stride = false;
   bool test_pad = false;
@@ -99,7 +102,7 @@
     const RuntimeShape& bias_shape, const int32* bias_data,
     const RuntimeShape& output_shape, uint8* output_data) {
   switch (test_param.forced_invocation) {
-    case DepthwiseConvInvocation::kUseNeon3x3: {
+    case DepthwiseConvImplementation::kUseNeon3x3: {
 // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
 // Jetson TX-2. This compiler does not support the offsetof() macro.
 #if defined(__aarch64__) && !defined(GOOGLE_L4T)
@@ -114,7 +117,7 @@
 
       // Check that parameter combination is supported.
       const bool basic_3x3_kernel_supported =
-          optimized_ops::Fast3x3FilterKernelSupported(
+          optimized_ops::depthwise_conv::Fast3x3FilterKernelSupported(
               input_shape, filter_shape, stride_width, stride_height,
               dilation_width_factor, dilation_height_factor, pad_width,
               pad_height, depth_multiplier, output_shape, output_shift);
@@ -127,7 +130,7 @@
           << " output_height = " << output_shape.Dims(1);
 
       // Call kernel optimized for depthwise convolutions using 3x3 filters.
-      optimized_ops::DepthwiseConv3x3Filter(
+      optimized_ops::depthwise_conv::DepthwiseConv3x3Filter(
           params, input_shape, input_data, filter_shape, filter_data,
           bias_shape, bias_data, output_shape, output_data);
       return;
@@ -135,23 +138,41 @@
       break;
 #endif
     }
-    case DepthwiseConvInvocation::kUseNeon3x3DotProduct:
-    case DepthwiseConvInvocation::kUseCModel3x3DotProduct:
-    case DepthwiseConvInvocation::kUseUnwound3x3DotProduct:
-    case DepthwiseConvInvocation::kUseIntrinsics3x3DotProduct:
+    case DepthwiseConvImplementation::kUseNeon3x3DotProduct:
+    case DepthwiseConvImplementation::kUseUnwound3x3DotProduct:
+    case DepthwiseConvImplementation::kUseIntrinsics3x3DotProduct:
       // TODO(b/118426582) Placeholder for future dispatches.
       break;
-    case DepthwiseConvInvocation::kUseGenericKernel: {
-      optimized_ops::DepthwiseConvGeneral(params, input_shape, input_data,
-                                          filter_shape, filter_data, bias_shape,
-                                          bias_data, output_shape, output_data);
+    case DepthwiseConvImplementation::kUseCModel3x3DotProduct: {
+      DotProduct3x3KernelType kernel_type =
+          optimized_ops::depthwise_conv::CategorizeDotProductKernel(params);
+
+      ASSERT_TRUE(
+          kernel_type == DotProduct3x3KernelType::kPlain ||
+          kernel_type == DotProduct3x3KernelType::kStride2 ||
+          kernel_type ==
+              DotProduct3x3KernelType::kWithDepthMultiplicationStride1 ||
+          kernel_type ==
+              DotProduct3x3KernelType::kWithDepthMultiplicationStride2)
+          << "Kernel type = " << static_cast<int>(kernel_type);
+
+      optimized_ops::depthwise_conv::DepthwiseConvDotProduct3x3<
+          DepthwiseConvImplementation::kUseCModel3x3DotProduct>(
+          params, input_shape, input_data, filter_shape, filter_data,
+          bias_shape, bias_data, output_shape, output_data);
       return;
     }
-    case DepthwiseConvInvocation::kNone:
+    case DepthwiseConvImplementation::kUseGenericKernel: {
+      optimized_ops::depthwise_conv::DepthwiseConvGeneral(
+          params, input_shape, input_data, filter_shape, filter_data,
+          bias_shape, bias_data, output_shape, output_data);
+      return;
+    }
+    case DepthwiseConvImplementation::kNone:
     default:
       break;
   }
-  EXPECT_EQ(test_param.forced_invocation, DepthwiseConvInvocation::kNone)
+  EXPECT_EQ(test_param.forced_invocation, DepthwiseConvImplementation::kNone)
       << "TODO(b/118426582) requested kernel was not invoked / available yet";
   optimized_ops::DepthwiseConv(params, input_shape, input_data, filter_shape,
                                filter_data, bias_shape, bias_data, output_shape,
@@ -191,7 +212,7 @@
   op_params.output_shift = -output_shift;
   switch (test_param.output_rounding) {
     case DepthwiseConvOutputRounding::kUpward:
-      reference_ops::DepthwiseConvBasicKernel<
+      reference_ops::depthwise_conv::DepthwiseConvBasicKernel<
           DepthwiseConvOutputRounding::kAwayFromZero>::Run(op_params,
                                                            input_shape,
                                                            input_data,
@@ -449,7 +470,7 @@
       UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid;
 
   // Adjust for, or reject, special cases.
-  if (test_param.forced_invocation != DepthwiseConvInvocation::kNone) {
+  if (test_param.forced_invocation != DepthwiseConvImplementation::kNone) {
     // With stride == 2 and SAME, padding width and height are the left and top
     // padding amounts. When there is an even input dimension, padding + 1 is
     // required on the right / bottom. This is not handled by these kernels, so
@@ -509,7 +530,7 @@
       dilation_width_factor, dilation_height_factor, padding_type);
 }
 
-void TestOneDepthwiseConv(DepthwiseConvInvocation forced_invocation,
+void TestOneDepthwiseConv(DepthwiseConvImplementation forced_invocation,
                           DepthwiseConvOutputRounding output_rounding) {
   TestParam test_param;
   test_param.forced_invocation = forced_invocation;
@@ -519,7 +540,7 @@
 }
 
 void TestOneDepthwiseConv3x3Filter(
-    DepthwiseConvInvocation forced_invocation,
+    DepthwiseConvImplementation forced_invocation,
     DepthwiseConvOutputRounding output_rounding) {
   TestParam test_param;
   test_param.forced_invocation = forced_invocation;
@@ -537,7 +558,7 @@
 TEST(TestDepthwiseConv, TestDepthwiseConv) {
   const int kTestsToRun = 10 * 1000;
   for (int i = 0; i < kTestsToRun; i++) {
-    TestOneDepthwiseConv(DepthwiseConvInvocation::kNone,
+    TestOneDepthwiseConv(DepthwiseConvImplementation::kNone,
                          DepthwiseConvOutputRounding::kAwayFromZero);
   }
 }
@@ -546,7 +567,7 @@
 TEST(TestDepthwiseConv, TestGenericKernel) {
   const int kTestsToRun = 10 * 1000;
   for (int i = 0; i < kTestsToRun; i++) {
-    TestOneDepthwiseConv(DepthwiseConvInvocation::kUseGenericKernel,
+    TestOneDepthwiseConv(DepthwiseConvImplementation::kUseGenericKernel,
                          DepthwiseConvOutputRounding::kAwayFromZero);
   }
 }
@@ -554,7 +575,7 @@
 TEST(TestDepthwiseConv, TestKernel3x3Filter) {
   const int kTestsToRun = 1000;
   for (int i = 0; i < kTestsToRun; i++) {
-    TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kNone,
+    TestOneDepthwiseConv3x3Filter(DepthwiseConvImplementation::kNone,
                                   DepthwiseConvOutputRounding::kAwayFromZero);
   }
 }
@@ -564,8 +585,9 @@
 TEST(TestDepthwiseConv, TestGenericKernel3x3Filter) {
   const int kTestsToRun = 100;
   for (int i = 0; i < kTestsToRun; i++) {
-    TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kUseGenericKernel,
-                                  DepthwiseConvOutputRounding::kAwayFromZero);
+    TestOneDepthwiseConv3x3Filter(
+        DepthwiseConvImplementation::kUseGenericKernel,
+        DepthwiseConvOutputRounding::kAwayFromZero);
   }
 }
 
@@ -573,7 +595,7 @@
 TEST(TestDepthwiseConv, TestNeon3x3Filter) {
   const int kTestsToRun = 3 * 1000;
   for (int i = 0; i < kTestsToRun; i++) {
-    TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kUseNeon3x3,
+    TestOneDepthwiseConv3x3Filter(DepthwiseConvImplementation::kUseNeon3x3,
                                   DepthwiseConvOutputRounding::kAwayFromZero);
   }
 }
@@ -592,11 +614,11 @@
 INSTANTIATE_TEST_SUITE_P(
     Neon3x3Kernel, DepthwiseConvTest,
     testing::Combine(
-        Values(DepthwiseConvInvocation::kUseNeon3x3),  // forced_invocation
-        Values(1000),                                  // tests_to_run
-        Bool(),                                        // test_stride
-        Values(false),                                 // test_pad
-        Values(false),                                 // test_depth_multiplier
+        Values(DepthwiseConvImplementation::kUseNeon3x3),  // forced_invocation
+        Values(1000),                                      // tests_to_run
+        Bool(),                                            // test_stride
+        Values(false),                                     // test_pad
+        Values(false),  // test_depth_multiplier
         Values(DepthwiseConvOutputRounding::kAwayFromZero),  // output_rounding
         Values(false)                                        // loose_tolerance
         ),
@@ -608,11 +630,25 @@
 INSTANTIATE_TEST_SUITE_P(
     GenericKernel, DepthwiseConvTest,
     testing::Combine(
-        Values(
-            DepthwiseConvInvocation::kUseGenericKernel),  // forced_invocation
-        Values(100),                                      // tests_to_run
-        Bool(),                                           // test_stride
-        Bool(),                                           // test_pad
+        Values(DepthwiseConvImplementation::
+                   kUseGenericKernel),                 // forced_invocation
+        Values(100),                                   // tests_to_run
+        Bool(),                                        // test_stride
+        Bool(),                                        // test_pad
+        Bool(),                                        // test_depth_multiplier
+        Values(DepthwiseConvOutputRounding::kUpward),  // output_rounding
+        Values(false)                                  // loose_tolerance
+        ),
+    TestParam::TestNameSuffix);
+
+INSTANTIATE_TEST_SUITE_P(
+    CModel, DepthwiseConvTest,
+    testing::Combine(
+        Values(DepthwiseConvImplementation::
+                   kUseCModel3x3DotProduct),           // forced_invocation
+        Values(1000),                                  // tests_to_run
+        Bool(),                                        // test_stride
+        Bool(),                                        // test_pad
         Bool(),                                        // test_depth_multiplier
         Values(DepthwiseConvOutputRounding::kUpward),  // output_rounding
         Values(false)                                  // loose_tolerance

diff --git a/tensorflow/lite/kernels/internal/log_quantized_test.cc b/tensorflow/lite/kernels/internal/log_quantized_test.cc
index 8c39350..c31c8e3 100644
--- a/tensorflow/lite/kernels/internal/log_quantized_test.cc
+++ b/tensorflow/lite/kernels/internal/log_quantized_test.cc

@@ -121,8 +121,7 @@
                    const string& check_label, int tolerance) {
   const int n = test_input.size();
   std::vector<int32> float_gen_output(n, 0);
-  std::vector<int32> reference_output(n, 0);
-  std::vector<int32> optimized_output(n, 0);
+  std::vector<int32> quantized_output(n, 0);
 
   // Workaround the stupid things that intelligent humans do.
   // Consequence of __builtin_clz(0u) may equal 31 instead of 32.
@@ -132,45 +131,21 @@
   }
 
   for (int i = 0; i < n; ++i) {
-    reference_output[i] =
-        tflite::reference_ops::log_x_for_x_greater_than_or_equal_to_1_impl<
-            OutputIntegerBits, InputIntegerBits>(
-            gemmlowp::FixedPoint<int32, InputIntegerBits>::FromRaw(
-                fudged_input[i]))
-            .raw();
-    optimized_output[i] =
-        tflite::optimized_ops::log_x_for_x_greater_than_or_equal_to_1_impl<
-            OutputIntegerBits, InputIntegerBits>(
+    quantized_output[i] =
+        tflite::log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
+                                                            InputIntegerBits>(
             gemmlowp::FixedPoint<int32, InputIntegerBits>::FromRaw(
                 fudged_input[i]))
             .raw();
     float_gen_output[i] = LogPositiveValuesViaFloat(
         fudged_input[i], InputIntegerBits, OutputIntegerBits);
   }
-  // Note that first check is intolerant.
-  {
-    std::ostringstream label;
-    label << check_label << " / optimized vs reference / InputIntegerBits="
-          << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits;
-    CheckOutputData(
-        optimized_output, reference_output, test_input, label.str(),
-        InputIntegerBits, OutputIntegerBits, 0);
-  }
   {
     std::ostringstream label;
     label << check_label << " / reference vs float-gen / InputIntegerBits="
           << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits;
-    CheckOutputData(
-        reference_output, float_gen_output, test_input, label.str(),
-        InputIntegerBits, OutputIntegerBits, tolerance);
-  }
-  {
-    std::ostringstream label;
-    label << check_label << " optimized vs float-gen / InputIntegerBits="
-          << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits;
-    CheckOutputData(
-        optimized_output, float_gen_output, test_input, label.str(),
-        InputIntegerBits, OutputIntegerBits, tolerance);
+    CheckOutputData(quantized_output, float_gen_output, test_input, label.str(),
+                    InputIntegerBits, OutputIntegerBits, tolerance);
   }
 }
 

diff --git a/tensorflow/lite/kernels/internal/logsoftmax_quantized_test.cc b/tensorflow/lite/kernels/internal/logsoftmax_quantized_test.cc
index 945300d..d0d2654 100644
--- a/tensorflow/lite/kernels/internal/logsoftmax_quantized_test.cc
+++ b/tensorflow/lite/kernels/internal/logsoftmax_quantized_test.cc

@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -25,6 +25,8 @@
 #include <gtest/gtest.h>
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h"
 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
 #include "tensorflow/lite/kernels/internal/test_util.h"
 #include "tensorflow/lite/string.h"
@@ -61,7 +63,42 @@
   }
 }
 
-void CheckOutputData(const uint8* test_output, const uint8* reference_output,
+// Same as above except for the following change:
+// - input and output data type
+// - Dequnatize function
+// - clamping values
+void RunLogSoftmaxFloatReference(const int8* input_data,
+                                 const RuntimeShape& shape_common,
+                                 int32 input_offset, const double input_scale,
+                                 int stride, float beta,
+                                 int8* reference_output_data) {
+  const int ref_buffer_size = shape_common.FlatSize();
+  std::vector<float> reference_dequant_data(ref_buffer_size);
+  std::vector<float> reference_output_float_data(ref_buffer_size);
+
+  // Reference data generated via Dequant of input into float, and then applying
+  // float LogSoftmax.
+  DequantizationParams dq_params;
+  dq_params.zero_point = input_offset;
+  dq_params.scale = input_scale;
+  reference_integer_ops::Dequantize(dq_params, shape_common, input_data,
+                                    shape_common,
+                                    reference_dequant_data.data());
+  SoftmaxParams sm_params;
+  optimized_ops::LogSoftmax(sm_params, shape_common,
+                            reference_dequant_data.data(), shape_common,
+                            reference_output_float_data.data());
+  // Work with quantized scaling for LogSoftmax, under which 255 represents 0,
+  // and -16 gets nudged up to 0.
+  for (int i = 0; i < ref_buffer_size; i++) {
+    reference_output_data[i] = std::max(
+        -128, static_cast<int>(
+                  127 + std::round(16.0f * reference_output_float_data[i])));
+  }
+}
+
+template <typename T>
+void CheckOutputData(const T* test_output, const T* reference_output,
                      const RuntimeShape& shape_common,
                      const string& check_label, bool be_exacting) {
   const int buffer_size = shape_common.FlatSize();
@@ -144,15 +181,58 @@
   reference_ops::LogSoftmax(params, shape_common, input_data, shape_common,
                             reference_quant_logsoftmax_output.data());
 
-  CheckOutputData(optimized_logsoftmax_output.data(),
-                  reference_float_logsoftmax_output.data(), shape_common,
-                  "Optimized vs float reference", false);
-  CheckOutputData(optimized_logsoftmax_output.data(),
-                  reference_quant_logsoftmax_output.data(), shape_common,
-                  "Optimized vs quant reference", true);
-  CheckOutputData(reference_quant_logsoftmax_output.data(),
-                  reference_float_logsoftmax_output.data(), shape_common,
-                  "Quant reference vs float reference", false);
+  CheckOutputData<uint8_t>(optimized_logsoftmax_output.data(),
+                           reference_float_logsoftmax_output.data(),
+                           shape_common, "Optimized vs float reference", false);
+  CheckOutputData<uint8_t>(optimized_logsoftmax_output.data(),
+                           reference_quant_logsoftmax_output.data(),
+                           shape_common, "Optimized vs quant reference", true);
+  CheckOutputData<uint8_t>(reference_quant_logsoftmax_output.data(),
+                           reference_float_logsoftmax_output.data(),
+                           shape_common, "Quant reference vs float reference",
+                           false);
+}
+
+// Runs the LogSoftmax and compares against the float reference implementation
+// and the int8 quantized reference implementation.
+void RunOneLogSoftmaxTest(const int8* input_data,
+                          const RuntimeShape& shape_common, int32 input_offset,
+                          const double input_scale, int stride, float beta) {
+  const int buffer_size = shape_common.FlatSize();
+  std::vector<int8> quantized_logsoftmax_reference_implementation(buffer_size);
+  std::vector<int8> float_logsoftmax_optimized_implementation(buffer_size);
+
+  RunLogSoftmaxFloatReference(input_data, shape_common, input_offset,
+                              input_scale, stride, beta,
+                              float_logsoftmax_optimized_implementation.data());
+
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  int32 reverse_scaling_divisor;
+  int reverse_scaling_right_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessLogSoftmaxScalingExp(
+      beta, input_scale, kScaledDiffIntegerBits, &input_beta_multiplier,
+      &input_beta_left_shift, &reverse_scaling_divisor,
+      &reverse_scaling_right_shift);
+  reverse_scaling_right_shift *= -1;
+  // diff_min has a negative value, and is used to limit the maximum magnitude
+  // of the diffs, which are <= 0.
+  const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits,
+                                                     input_beta_left_shift);
+
+  const int outer_size =
+      shape_common.Dims(0) * shape_common.Dims(1) * shape_common.Dims(2);
+  const int inner_size = shape_common.Dims(3);
+  reference_integer_ops::LogSoftmax(
+      input_beta_multiplier, input_beta_left_shift, reverse_scaling_divisor,
+      reverse_scaling_right_shift, diff_min, outer_size, inner_size, input_data,
+      quantized_logsoftmax_reference_implementation.data());
+
+  CheckOutputData<int8_t>(quantized_logsoftmax_reference_implementation.data(),
+                          float_logsoftmax_optimized_implementation.data(),
+                          shape_common, "Quant reference vs float reference",
+                          false);
 }
 
 // This function picks some random LogSoftmax params, which are checked for
@@ -161,6 +241,7 @@
 // to loop until a test has been run.
 //
 // Currently we do not reject for any reason.
+template <typename T>
 bool TryOneUniformLogSoftmax() {
   // We pick mostly positive values, on the whole emphasizing smaller values and
   // therefore faster tests.  We test a wider range of depths.  In the case of
@@ -178,7 +259,7 @@
       RuntimeShape({batch, input_height, input_width, input_depth});
   const int buffer_size = shape_common.FlatSize();
 
-  std::vector<uint8> input_data(buffer_size);
+  std::vector<T> input_data(buffer_size);
   FillRandom(&input_data);
   RunOneLogSoftmaxTest(input_data.data(), shape_common, input_offset,
                        input_scale, stride, beta);
@@ -224,15 +305,23 @@
   return true;
 }
 
-TEST(TestQuantizedLogSoftmax, UniformLogSoftmaxTests) {
+TEST(TestQuantizedLogSoftmax, UniformLogSoftmaxUint8Tests) {
   const int kTestsToRun = 100;
   for (int i = 0; i < kTestsToRun; i++) {
-    while (!TryOneUniformLogSoftmax()) {
+    while (!TryOneUniformLogSoftmax<uint8_t>()) {
     }
   }
 }
 
-TEST(TestQuantizedLogSoftmax, SkyscraperLogSoftmaxTests) {
+TEST(TestQuantizedLogSoftmax, UniformLogSoftmaxUint8Int8Tests) {
+  const int kTestsToRun = 100;
+  for (int i = 0; i < kTestsToRun; i++) {
+    while (!TryOneUniformLogSoftmax<int8_t>()) {
+    }
+  }
+}
+
+TEST(TestQuantizedLogSoftmax, SkyscraperLogSoftmaxUint8Tests) {
   const int kTestsToRun = 100;
   for (int i = 0; i < kTestsToRun; i++) {
     while (!TryOneSkyscraperLogSoftmax(false)) {
@@ -240,7 +329,7 @@
   }
 }
 
-TEST(TestQuantizedLogSoftmax, SmallSkyscraperLogSoftmaxTests) {
+TEST(TestQuantizedLogSoftmax, SmallSkyscraperLogSoftmaxUint8Tests) {
   const int kTestsToRun = 100;
   for (int i = 0; i < kTestsToRun; i++) {
     while (!TryOneSkyscraperLogSoftmax(true)) {

diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index 1362949..84d7016 100644
--- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h

@@ -24,6 +24,7 @@
 
 namespace tflite {
 namespace optimized_ops {
+namespace depthwise_conv {
 
 // Implementation of quantized DepthwiseConv
 
@@ -1946,6 +1947,8 @@
   }
 }
 
+}  // namespace depthwise_conv
+
 inline void DepthwiseConv(
     const DepthwiseParams& params, const RuntimeShape& input_shape,
     const uint8* input_data, const RuntimeShape& filter_shape,
@@ -1980,23 +1983,23 @@
 
   // Call kernel optimized for depthwise convolutions using 3x3 filters if
   // parameters are supported.
-  if (Fast3x3FilterKernelSupported(
+  if (depthwise_conv::Fast3x3FilterKernelSupported(
           input_shape, filter_shape, stride_width, stride_height,
           dilation_width_factor, dilation_height_factor, pad_width, pad_height,
           depth_multiplier, output_shape, output_shift)) {
     gemmlowp::ScopedProfilingLabel specialized_label("DepthwiseConv/8bit/3x3");
-    DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape,
-                           filter_data, bias_shape, bias_data, output_shape,
-                           output_data);
+    depthwise_conv::DepthwiseConv3x3Filter(
+        params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+        bias_data, output_shape, output_data);
     return;
   }
 #endif
 
   gemmlowp::ScopedProfilingLabel specialized_label(
       "DepthwiseConv/8bit/General");
-  DepthwiseConvGeneral(params, input_shape, input_data, filter_shape,
-                       filter_data, bias_shape, bias_data, output_shape,
-                       output_data);
+  depthwise_conv::DepthwiseConvGeneral(params, input_shape, input_data,
+                                       filter_shape, filter_data, bias_shape,
+                                       bias_data, output_shape, output_data);
 }
 
 }  // namespace optimized_ops

diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
index b7993c3..f43c2b3 100644
--- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
+++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h

@@ -23,6 +23,10 @@
 
 namespace tflite {
 namespace optimized_ops {
+namespace depthwise_conv {
+
+constexpr int kDepthwiseConvScratchWorkspaceSize = 10 * 10 * 64;
+constexpr int kDepthwiseConvAdjustedBiasLimit = 256;
 
 // See CategorizeDotProductKernel for definitive taxonomy.
 enum class DotProduct3x3KernelType {
@@ -61,13 +65,14 @@
   }
 }
 
+#define STR(s) STR_UNEXPANDED(s)
+#define STR_UNEXPANDED(s) #s
+
 // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
 // Jetson TX-2. This compiler does not support the offsetof() macro.
 #if defined(__aarch64__) && !defined(GOOGLE_L4T)
 #include <stddef.h>
 
-#define DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE 10 * 10 * 64
-
 // Encapsulates constant parameters used in DepthwiseConv.
 // 64-bit is used for types that will be added to 64-bit addresses in asm.
 struct DepthwiseConvParams {
@@ -91,9 +96,6 @@
   int32 output_height;
 };
 
-#define STR(s) STR_UNEXPANDED(s)
-#define STR_UNEXPANDED(s) #s
-
 // Represents the number of bytes offset from the start of the
 // DepthwiseConvParams struct. This is used in the asm to load parameters.
 // Keep these values in sync with the static_asserts below.
@@ -168,7 +170,46 @@
 static_assert(offsetof(DepthwiseConvParams, output_height) ==
                   OFFSET_OUTPUT_HEIGHT,
               "");
+#endif
 
+// Encapsulates constant parameters used in DepthwiseConv using dot-product ops.
+// 64-bit is used for types that will be added to 64-bit addresses in asm.
+//
+// This structure is specifically designed for use in asm.
+struct DepthwiseConvDotProdParams {
+  int64_t input_depth;
+  int64_t output_depth;
+  int32 workspace_height_stride;
+  int32 input_width_overall_micro_repeats;
+  int32 input_width_micro_repeats;
+  int32 depth_micro_repeats;
+  int32 inbound_block_height;
+  int32 residual_width;
+  int32 input_height_stride;
+  int32 stride;
+  int32 output_width_overall_micro_repeats;
+  int32 output_width_micro_repeats;
+  int32 output_residual_width;
+  int32 output_height_stride;
+  int32 bias_increment;
+  int32 padding_left;
+  int32 padding_right;
+  int32 padding_top;
+  int32 padding_bottom;
+  int32 height_macro_count;
+  int32 width_macro_count;
+  int32 outbound_block_height;
+  int32 workspace_width_micro_repeats;
+  int32 input_offset;
+  int32 output_offset;
+  int32 output_multiplier;
+  int32 output_shift;
+  int32 quantized_activation_min;
+  int32 quantized_activation_max;
+  int32 four_over_stride;
+};
+
+#if defined(__aarch64__) && !defined(GOOGLE_L4T)
 template <int32 kDepth, int32 kStrideWidth, int32 kStrideHeight>
 struct DepthwiseConvWindow {};
 
@@ -2964,8 +3005,6 @@
 #undef OFFSET_INPUT_HEIGHT
 #undef OFFSET_OUTPUT_WIDTH
 #undef OFFSET_OUTPUT_HEIGHT
-#undef STR
-#undef STR_UNEXPANDED
 
 // Copies a subset of the input designated by |input_ptr| into |output_ptr|
 // with the specified output dimensions. Supports output depths of 64 only as
@@ -3048,7 +3087,7 @@
         get_shuffle_input_size(kStrideWidth, shuffle_params.output_width));
     TFLITE_DCHECK(64 * shuffle_params.input_width *
                       shuffle_params.input_height <=
-                  DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE);
+                  kDepthwiseConvScratchWorkspaceSize);
 
     int32 out_x = start_x;
 
@@ -3376,7 +3415,7 @@
   // allocated on the stack. Eventually we will want to move it to the heap
   // and have it allocated outside of this function, like the im2col_array
   // used in gemmlowp.
-  uint8 shuffle_workspace[DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE];
+  uint8 shuffle_workspace[kDepthwiseConvScratchWorkspaceSize];
 
   for (int32 b = 0; b < batches; ++b) {
     const uint8* input_ptr = input_data + b * input_batch_size;
@@ -3455,9 +3494,12 @@
     }
   }
 }
-
 #endif  // __aarch64__
 
+#undef STR
+#undef STR_UNEXPANDED
+
+}  // namespace depthwise_conv
 }  // namespace optimized_ops
 }  // namespace tflite
 

diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h
new file mode 100644
index 0000000..a00544f
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h

@@ -0,0 +1,1393 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_TRANSITIONAL_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_TRANSITIONAL_H_
+
+// This file provides kernel implementations that are not used in shipped
+// inference code, but rather (a) show how model C++ code is designed and then
+// transformed into asm code, and (b) aid with maintenance and later development
+// of variations. Many projects (even including, say, the classic NAG libraries)
+// develop highly optimized code, but do not maintain intermediate versions.
+// Often the result is incomprehensible final-version code.
+
+#include <algorithm>
+
+#include "fixedpoint/fixedpoint.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
+#include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h"
+#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace optimized_ops {
+namespace depthwise_conv {
+
+// Permute filter data, and adjust bias data to account for symmetric input
+// offset. Details are provided in the implementation of the
+// kUseCModel3x3DotProduct version.
+//
+// See the comments preceding DepthwiseConvDotProduct3x3() for further notes.
+template <DepthwiseConvImplementation implementation>
+struct ProcessPerDepth {
+  // Routine is contained in a static Run() method. No default template version
+  // is supplied, so that all implementations are deliberate choices of template
+  // specialization.
+  //
+  // Note that the signature of the Run() method will be designed for the asm
+  // implementation rather than conforming to style.
+};
+
+template <>
+struct ProcessPerDepth<DepthwiseConvImplementation::kUseCModel3x3DotProduct> {
+  // Filter data is provided as filter_block[3][3][depth/8][2][4]: height 3,
+  // width 3,  sub-block 0 or 1, depth 4. Filter data is written as
+  // filter_bank[3][2][4][4]; height 3, sub-block, depth 4, width 4.
+  //
+  // Note that this rearrangement is much like that performed on input data when
+  // filling the workspace, and optimized versions will be similar.
+  static inline void FillFilterBank(int depth, const uint8* filter_block,
+                                    int8 filter_bank[3][2][4][4]) {
+    constexpr int kSymmetricZeroPoint = 128;
+    // Load filter data in, 8-bytes down depth / sub-block at a time.
+    //
+    // loaded_filter has dimensions height 3, width 4, sub-block 0 or 1,
+    // depth 4.
+    uint8 loaded_filter[3][4][2][4];
+    for (int y = 0; y < 3; ++y) {
+      for (int x = 0; x < 3; ++x) {
+        memcpy(loaded_filter[y][x][0], &filter_block[3 * y * depth + x * depth],
+               8);
+      }
+      // Pad the filter with symmetric representation of 0, so that the values
+      // become 0 when the zero-poing is added below. Thus these filter taps are
+      // effectively disregarded in later filtering.
+      memset(loaded_filter[y][3][0], kSymmetricZeroPoint, 8);
+    }
+    for (int y = 0; y < 3; ++y) {
+      for (int z = 0; z < 4; ++z) {
+        for (int x = 0; x < 4; ++x) {
+          filter_bank[y][0][z][x] =
+              loaded_filter[y][x][0][z] - kSymmetricZeroPoint;
+          filter_bank[y][1][z][x] =
+              loaded_filter[y][x][1][z] - kSymmetricZeroPoint;
+        }
+      }
+    }
+  }
+
+  // Adjust the bias (weights) data according to the input offset.
+  //
+  // The output calculation is
+  // out[h][w][d] = bias[d] + sum_ij (in[h+i][w+j][d] + in_offset) *
+  //                                 (filter[i][j][d] + filter_offset)
+  // (where offsets are expressed as differences from 128).
+  //
+  // Since we cannot efficiently handle varying offsets / bias across the image,
+  // we insist on filter_offset = 0.
+  //
+  // This function calculates
+  // adjusted_bias[d] = bias[d] + sum_ij in_offset * filter[i][j][d]
+  // which accounts for input offset. If the bias is constant over the depth,
+  // the adjusted bias will vary.
+  static inline void AdjustBias(int32 input_offset,
+                                const int8 filter_bank[3][2][4][4],
+                                const int32* bias_data,
+                                int32 adjusted_bias_block[2][4]) {
+    constexpr int kSymmetricZeroPoint = 128;
+    TFLITE_DCHECK_GE(input_offset, -255);
+    TFLITE_DCHECK_LE(input_offset, 0);
+    // For instance, if input_offset == 128, no adjustment is needed.
+    const int32 input_offset_difference = input_offset + kSymmetricZeroPoint;
+
+    for (int s = 0; s < 2; ++s) {
+      for (int z = 0; z < 4; ++z) {
+        adjusted_bias_block[s][z] = bias_data[4 * s + z];
+        for (int i = 0; i < 9; ++i) {
+          adjusted_bias_block[s][z] +=
+              input_offset_difference * filter_bank[i % 3][s][z][i / 3];
+        }
+      }
+    }
+  }
+
+  static void Run(const uint8* filter_data, const int32* bias_data,
+                  int8* shuffled_filter_data, int32* adjusted_bias_data,
+                  const DepthwiseConvDotProdParams* function_params) {
+    constexpr int shuffled_filter_increment = 2 * 3 * 4 * 4;
+    const int depth = function_params->output_depth;
+    const int bias_increment = function_params->bias_increment;
+    const int32 input_offset = function_params->input_offset;
+
+    int8 filter_bank[3][2][4][4];
+    int32 adjusted_bias_block[2][4];
+
+    for (int j_depth = 0; j_depth < (depth >> 3); ++j_depth) {
+      FillFilterBank(depth, filter_data + 8 * j_depth, filter_bank);
+      AdjustBias(input_offset, filter_bank,
+                 bias_data + 2 * j_depth * bias_increment, adjusted_bias_block);
+
+      memcpy(shuffled_filter_data, filter_bank[0][0][0],
+             shuffled_filter_increment);
+      shuffled_filter_data += shuffled_filter_increment;
+      memcpy(adjusted_bias_data, adjusted_bias_block[0],
+             8 * sizeof(adjusted_bias_block[0][0]));
+      adjusted_bias_data += 8;
+    }
+  }
+};
+
+// Copy a macro block of data from the input buffer into the workspace,
+// permuting data within each micro block.
+//
+// (a) Copy a macro block of data, padding as required along the width and
+//     height.
+// (b) Transpose the data within each micro block.
+//
+// See the comments preceding DepthwiseConvDotProduct3x3() for further notes.
+template <DepthwiseConvImplementation implementation,
+          DepthwiseConvDepthMultiplication depth_multiplication,
+          int32 max_padding>
+struct PackMacroBlock {
+  // Routine is contained in a static Run() method. No default template version
+  // is supplied, so that all implementations are deliberate choices of template
+  // specialization.
+  //
+  // Note that the signature of the Run() method will be designed for the asm
+  // implementation rather than conforming to style.
+};
+
+template <int32 max_padding>
+struct PackMacroBlock<DepthwiseConvImplementation::kUseCModel3x3DotProduct,
+                      DepthwiseConvDepthMultiplication::kNoMultiplication,
+                      max_padding> {
+  // A straight copy of a macro block of input data into a scratch buffer.
+  //
+  // Requirement: depth_micro_repeats > 0.
+  static inline void CopyMacroBlock(
+      int32 height_block_number, int32 width_block_number,
+      const DepthwiseConvDotProdParams& function_params,
+      const uint8* input_block_data, int8* scratch_block_data) {
+    TFLITE_DCHECK_LE(max_padding, 1);
+
+    // Strides.
+    // The input depth and count of micro blocks provide the width strides.
+    const int input_height_stride = function_params.input_height_stride;
+    const int workspace_height_stride = function_params.workspace_height_stride;
+    const int input_depth = function_params.input_depth;
+    const int depth_micro_repeats = function_params.depth_micro_repeats;
+    TFLITE_DCHECK_GT(depth_micro_repeats, 0);
+
+    // Remaining iteration and dimension parameters.
+    //
+    // If width_overall_micro_repeats = input_width_micro_repeats + 1, then the
+    // final micro block is incomplete.
+    const int width_overall_micro_repeats =
+        function_params.input_width_overall_micro_repeats;
+    int input_width_micro_repeats = function_params.input_width_micro_repeats;
+    const int residual_width = function_params.residual_width;
+    const int block_height = function_params.inbound_block_height;
+
+    const int padding_left = function_params.padding_left;
+    const int padding_right = function_params.padding_right;
+    const int padding_top = function_params.padding_top;
+    const int padding_bottom = function_params.padding_bottom;
+
+    const bool leading_width_padding =
+        padding_left > 0 && width_block_number == 0;
+    const bool trailing_width_padding =
+        padding_right > 0 &&
+        width_block_number == (function_params.width_macro_count - 1);
+    const bool leading_height_padding =
+        padding_top > 0 && height_block_number < 0;
+    const bool trailing_height_padding =
+        padding_bottom > 0 &&
+        height_block_number == (function_params.height_macro_count - 1);
+
+    // Modify the trailing case to reflect the input width.
+    int input_residual_width =
+        input_width_micro_repeats < width_overall_micro_repeats ? residual_width
+                                                                : 4;
+    if (trailing_width_padding) {
+      input_residual_width -= 1;
+      input_width_micro_repeats = width_overall_micro_repeats - 1;
+    }
+
+    constexpr int kSymmetricZeroPoint = 128;
+    const int32 input_offset_difference =
+        function_params.input_offset + kSymmetricZeroPoint;
+
+    // We load data into a temporary buffer and then save, to match subsequent
+    // processing. This will make it easier to combine stages into one ASM
+    // routine.
+    int8 tmp_load[4][2][4];
+
+    int copy_block_height = block_height;
+    if (leading_height_padding) {
+      memset(scratch_block_data, -input_offset_difference,
+             workspace_height_stride);
+      scratch_block_data += workspace_height_stride;
+      input_block_data += input_height_stride;
+      copy_block_height -= 1;
+    }
+    if (trailing_height_padding) {
+      copy_block_height -= 1;
+    }
+
+    // The outer 3 loops go through all the micro blocks in a macro block.
+    for (int k_height = 0; k_height < copy_block_height; ++k_height) {
+      for (int j_width = 0; j_width < width_overall_micro_repeats; ++j_width) {
+        // Figure out division of work (available input vs trailing padding).
+        int adjusted_residual_width =
+            j_width == input_width_micro_repeats ? input_residual_width : 4;
+
+        int start_width = 0;
+        if (leading_width_padding && j_width == 0) {
+          start_width = 1;
+          memset(tmp_load[0][0], -input_offset_difference, 8);
+        }
+        if (adjusted_residual_width < 4) {
+          for (int x = adjusted_residual_width; x < 4; ++x) {
+            memset(tmp_load[x][0], -input_offset_difference, 8);
+          }
+        }
+
+        for (int i_depth = 0; i_depth < depth_micro_repeats; ++i_depth) {
+          // The inner 3 loops go through the sub-block, depth and width within
+          // each micro block.
+
+          // Load, and apply symmetric offset.
+          int8* scratch_data =
+              scratch_block_data + k_height * workspace_height_stride +
+              j_width * 4 * 8 + i_depth * 4 * 8 * width_overall_micro_repeats;
+          const uint8* input_data = input_block_data +
+                                    k_height * input_height_stride +
+                                    j_width * 4 * input_depth + i_depth * 8;
+          // Full-size macro blocks are 2*4*4 = 32 bytes.
+          for (int x = start_width; x < adjusted_residual_width; ++x) {
+            for (int s = 0; s < 2; ++s) {
+              for (int d = 0; d < 4; ++d) {
+                tmp_load[x][s][d] = input_data[x * input_depth + 4 * s + d] -
+                                    kSymmetricZeroPoint;
+              }
+            }
+          }
+
+          // Save results.
+          memcpy(&scratch_data[0], tmp_load[0][0], 8);
+          memcpy(&scratch_data[8], tmp_load[1][0], 8);
+          memcpy(&scratch_data[16], tmp_load[2][0], 8);
+          memcpy(&scratch_data[24], tmp_load[3][0], 8);
+        }
+      }
+    }
+
+    if (trailing_height_padding) {
+      memset(scratch_block_data + copy_block_height * workspace_height_stride,
+             -input_offset_difference, workspace_height_stride);
+    }
+  }
+
+  // Transpose 4x4 blocks within each sub-micro-block.
+  //
+  // Implemented somewhat like NEON register manipulation, so that we can see
+  // equivalence of the two approaches.
+  static inline void MicroTransposeBlocks(
+      const DepthwiseConvDotProdParams& function_params,
+      int8* scratch_block_data) {
+    const int workspace_height_stride = function_params.workspace_height_stride;
+    const int width_overall_micro_repeats =
+        function_params.input_width_overall_micro_repeats;
+    const int depth_micro_repeats = function_params.depth_micro_repeats;
+    const int block_height = function_params.inbound_block_height;
+
+    // Transpositions are 4x4, but doing 2 at a time is more efficient in the
+    // NEON code we are simulating.
+    int8 tmp_load[4][2][4];         // [width][sub-block][depth]
+    int8 tmp_transposed[4][2][4];   // [depth][sub-block][width]
+    int8 tmp_interleaved[2][4][4];  // [sub-block][depth][width]
+
+    // The outer 3 loops go through all the micro blocks in a macro block.
+    for (int k_height = 0; k_height < block_height; ++k_height) {
+      for (int j_width = 0; j_width < width_overall_micro_repeats; ++j_width) {
+        for (int i_depth = 0; i_depth < depth_micro_repeats; ++i_depth) {
+          int8* scratch_data =
+              scratch_block_data + k_height * workspace_height_stride +
+              j_width * 4 * 8 + i_depth * 4 * 8 * width_overall_micro_repeats;
+          // A. Load data
+          memcpy(tmp_load[0][0], &scratch_data[0], 8);
+          memcpy(tmp_load[1][0], &scratch_data[8], 8);
+          memcpy(tmp_load[2][0], &scratch_data[16], 8);
+          memcpy(tmp_load[3][0], &scratch_data[24], 8);
+
+          // B. Simulate between-register transposition.
+          for (int x = 0; x < 4; ++x) {
+            for (int y = 0; y < 4; ++y) {
+              tmp_transposed[x][0][y] = tmp_load[y][0][x];
+              tmp_transposed[x][1][y] = tmp_load[y][1][x];
+            }
+          }
+
+          // C. Simulate between-register interleaving.
+          for (int x = 0; x < 4; ++x) {
+            for (int y = 0; y < 4; ++y) {
+              tmp_interleaved[0][x][y] = tmp_transposed[x][0][y];
+              tmp_interleaved[1][x][y] = tmp_transposed[x][1][y];
+            }
+          }
+          // D. Simulate mangled storage arrangement.
+          memcpy(&scratch_data[0], tmp_interleaved[0][0], 16);
+          memcpy(&scratch_data[16], tmp_interleaved[1][0], 16);
+        }
+      }
+    }
+  }
+
+  static inline void Run(int32 height_block_number, int32 width_block_number,
+                         const uint8* input_block_data,
+                         int8* scratch_block_data,
+                         const DepthwiseConvDotProdParams* function_params) {
+    CopyMacroBlock(height_block_number, width_block_number, *function_params,
+                   input_block_data, scratch_block_data);
+    MicroTransposeBlocks(*function_params, scratch_block_data);
+  }
+};
+
+template <int32 max_padding>
+struct PackMacroBlock<DepthwiseConvImplementation::kUseCModel3x3DotProduct,
+                      DepthwiseConvDepthMultiplication::kUnitInputDepth,
+                      max_padding> {
+  static inline void Run(int32 height_block_number, int32 width_block_number,
+                         const uint8* input_block_data,
+                         int8* scratch_block_data,
+                         const DepthwiseConvDotProdParams* function_params) {
+    // Currently support for padding is limited to 1 on any side.
+    TFLITE_DCHECK_LE(max_padding, 1);
+
+    // Strides.
+    // The count of micro blocks (below) provides the width strides.
+    const int input_height_stride = function_params->input_height_stride;
+    const int workspace_height_stride =
+        function_params->workspace_height_stride;
+
+    // Remaining iteration and dimension parameters.
+    //
+    // If width_overall_micro_repeats = input_width_micro_repeats + 1, then the
+    // final micro block is incomplete.
+    const int width_overall_micro_repeats =
+        function_params->input_width_overall_micro_repeats;
+    const int input_width_micro_repeats =
+        function_params->input_width_micro_repeats;
+    const int residual_width = function_params->residual_width;
+    const int block_height = function_params->inbound_block_height;
+    TFLITE_DCHECK_GE(workspace_height_stride, 4 * width_overall_micro_repeats);
+
+    const int padding_left = function_params->padding_left;
+    const int padding_right = function_params->padding_right;
+    const int padding_top = function_params->padding_top;
+    const int padding_bottom = function_params->padding_bottom;
+
+    const bool leading_width_padding =
+        padding_left > 0 && width_block_number == 0;
+    const bool trailing_width_padding =
+        padding_right > 0 &&
+        width_block_number == (function_params->width_macro_count - 1);
+    const bool leading_height_padding =
+        padding_top > 0 && height_block_number < 0;
+    const bool trailing_height_padding =
+        padding_bottom > 0 &&
+        height_block_number == (function_params->height_macro_count - 1);
+
+    constexpr int kSymmetricZeroPoint = 128;
+    const int32 input_offset_difference =
+        function_params->input_offset + kSymmetricZeroPoint;
+
+    int copy_block_height = block_height;
+    if (leading_height_padding) {
+      memset(scratch_block_data, -input_offset_difference,
+             workspace_height_stride);
+      scratch_block_data += workspace_height_stride;
+      input_block_data += input_height_stride;
+      copy_block_height -= 1;
+    }
+    if (trailing_height_padding) {
+      copy_block_height -= 1;
+    }
+
+    int adjusted_residual_width =
+        input_width_micro_repeats < width_overall_micro_repeats ? residual_width
+                                                                : 4;
+
+    if (trailing_width_padding) {
+      adjusted_residual_width -= 1;
+    }
+    int start_width = 0;
+    if (leading_width_padding) {
+      start_width = 1;
+      input_block_data += 1;
+    }
+
+    const int copy_size = (width_overall_micro_repeats - 1) * 4 +
+                          adjusted_residual_width - start_width;
+
+    TFLITE_DCHECK_LE(
+        copy_size,
+        input_height_stride - width_block_number * input_width_micro_repeats);
+    // We may drop up to stride-1 of trailing input.
+    TFLITE_DCHECK_GE(copy_size, input_height_stride - 1);
+
+    // When there is unit input depth, the micro-block iteration need only be
+    // through the height. The micro blocks are contiguous across the width.
+    for (int k_height = 0; k_height < copy_block_height; ++k_height) {
+      const uint8* input_data =
+          input_block_data + k_height * input_height_stride;
+      int8* scratch_data =
+          scratch_block_data + k_height * workspace_height_stride;
+
+      // Handle leading padding. This is overwritten if there is no padding.
+      scratch_data[0] = -input_offset_difference;
+
+      memcpy(&scratch_data[start_width], input_data, copy_size);
+      for (int i = 0; i < copy_size; ++i) {
+        scratch_data[start_width + i] += -kSymmetricZeroPoint;
+      }
+
+      // Handle trailing padding, and fill in remainder of micro block.
+      memset(&scratch_data[start_width + copy_size], -input_offset_difference,
+             4 - adjusted_residual_width);
+    }
+
+    if (trailing_height_padding) {
+      memset(scratch_block_data + copy_block_height * workspace_height_stride,
+             -input_offset_difference, workspace_height_stride);
+    }
+  }
+};
+
+// Apply filter to macro block of input data and store results. Details are
+// provided in the implementation of the kUseCModel3x3DotProduct version.
+//
+// Parameters for repeats and residual sizes are in terms of outputs.
+//
+// See the comments preceding DepthwiseConvDotProduct3x3() for further notes.
+template <DepthwiseConvImplementation implementation,
+          DepthwiseConvDepthMultiplication depth_multiplication, int32 stride>
+struct KernelMacroBlock {
+  // Routine is contained in a static Run() method. No default template version
+  // is supplied, so that all implementations are deliberate choices of template
+  // specialization.
+  //
+  // Note that the signature of the Run() method will be designed for the asm
+  // implementation rather than conforming to style.
+};
+
+// Apply filter to macro block of input data and store results.
+//
+// Requirement: depth_micro_repeats > 0 || residual_depth > 0.
+template <int32 stride>
+struct KernelMacroBlock<DepthwiseConvImplementation::kUseCModel3x3DotProduct,
+                        DepthwiseConvDepthMultiplication::kNoMultiplication,
+                        stride> {
+  // Construct a width-shifted combination of two input sub-blocks, effectively
+  // concatenating them.
+  //
+  // The filter is applied using sub-blocks. These are in the needed form for
+  // the first (width) offset. For subsequent offsets, the filter is applied to
+  // shifted and combined data. The concatentation and shifting herein is fairly
+  // straightforward, but in the optimized code is an area of creativity in
+  // design because NEON instructions do not directly support the required
+  // between-register permutation.
+  //
+  // In NEON optimized code, input data is grouped in 4-byte blocks. In order to
+  // move along the width for each output point calculation, data is shifted, in
+  // essence between two such blocks.
+  //
+  // selected_data has format height 3, depth 4, width 4.
+  //
+  // When the micro block is trailing (the last across the macro-block width),
+  // it would be illegal to load the right (next) block, and the no_right_block
+  // indicates this scenario.
+  static inline void ConcatenateInputSubBlocks(int offset, int sub_block,
+                                               int workspace_height_stride,
+                                               int width_micro_stride,
+                                               bool no_right_block,
+                                               const int8* input_block,
+                                               int8 selected_data[3][4][4]) {
+    TFLITE_DCHECK_GE(offset, 0);
+    TFLITE_DCHECK_LT(offset, 4);
+
+    // The input banks have same format as selected_data.
+    int8 left_bank[3][4][4];
+    int8 right_bank[3][4][4];
+
+    // Work through one slice, by row, at a time.
+    for (int k_height = 0; k_height < 3; ++k_height) {
+      // Simulate demangling of mangled storage arrangement.
+      const int8* left_input_block =
+          &input_block[k_height * workspace_height_stride + sub_block * 2 * 8];
+      memcpy(left_bank[k_height][0], left_input_block, 16);
+      if (no_right_block) {
+        memset(right_bank[k_height][0], 0, 16);
+      } else {
+        const int8* right_input_block =
+            &input_block[k_height * workspace_height_stride +
+                         sub_block * 2 * 8 + width_micro_stride];
+        memcpy(right_bank[k_height][0], right_input_block, 16);
+      }
+      for (int depth_index = 0; depth_index < 4; ++depth_index) {
+        memcpy(selected_data[k_height][depth_index],
+               &left_bank[k_height][depth_index][offset], 4 - offset);
+        memcpy(&selected_data[k_height][depth_index][4 - offset],
+               right_bank[k_height][depth_index], offset);
+      }
+    }
+  }
+
+  // Straight implementation of 3x3 filter within sub-micro block.
+  static inline void Calculate3x3FilterOutput(
+      const DepthwiseConvDotProdParams& params, int sub_block,
+      const int8 selected_data[3][4][4], const int8 filter_bank[3][2][4][4],
+      const int32* bias_data, uint8 output_values[4]) {
+    const int32 output_activation_min = params.quantized_activation_min;
+    const int32 output_activation_max = params.quantized_activation_max;
+    const int32 output_multiplier = params.output_multiplier;
+    const int32 output_shift = params.output_shift;
+    const int32 output_offset = params.output_offset;
+    for (int d = 0; d < 4; ++d) {
+      int32 acc = 0;
+      for (int y = 0; y < 3; ++y) {
+        for (int x = 0; x < 4; ++x) {
+          int32 input_val = selected_data[y][d][x];
+          int32 filter_val = filter_bank[y][sub_block][d][x];
+          acc += filter_val * input_val;
+        }
+      }
+      acc += bias_data[d];
+      acc = reference_ops::depthwise_conv::DepthwiseConvRound<
+          DepthwiseConvOutputRounding::kUpward>(acc, output_multiplier,
+                                                output_shift);
+      acc += output_offset;
+      acc = std::max(acc, output_activation_min);
+      acc = std::min(acc, output_activation_max);
+      output_values[d] = static_cast<uint8>(acc);
+    }
+  }
+
+  static inline void Run(const int8* scratch_block_data,
+                         const int8* filter_workspace, const int32* bias_data,
+                         uint8* output_block_data,
+                         const DepthwiseConvDotProdParams* function_params) {
+    const int workspace_height_stride =
+        function_params->workspace_height_stride;
+    const int input_width_overall_micro_repeats =
+        function_params->input_width_overall_micro_repeats;
+    const int output_width_micro_repeats =
+        function_params->output_width_micro_repeats;
+    const int depth_micro_repeats = function_params->depth_micro_repeats;
+    const int depth = function_params->input_depth;
+    const int stride_val = function_params->stride;
+    const int four_over_stride = function_params->four_over_stride;
+
+    const int output_width_overall_micro_repeats =
+        function_params->output_width_overall_micro_repeats;
+    const int block_height = function_params->outbound_block_height;
+    const int residual_width = function_params->output_residual_width;
+    const int output_height_stride = function_params->output_height_stride;
+    constexpr int bias_increment = 4;
+    TFLITE_DCHECK_EQ(function_params->bias_increment, bias_increment);
+
+    TFLITE_DCHECK(depth_micro_repeats > 0);
+    const int width_micro_stride = 4 * 8;
+    const int depth_micro_stride =
+        width_micro_stride * input_width_overall_micro_repeats;
+
+    constexpr int shuffled_filter_increment = 2 * 3 * 4 * 4;
+
+    // Simulate NEON-register transposition of subset of filter.
+    int8 filter_bank[3][2][4][4];  // Height 3, sub-block,  depth 4, width 4.
+    // Simulate NEON-register input data concatenation + sub-selection.
+    int8 sub_selected_input_data[3][4][4];  // Height 3, depth 4, width 4.
+    uint8 output_values[4];                 // Depth 4.
+
+    // The outer 3 loops go through all the micro blocks in a macro block, and
+    // separately treat the two sub-blocks within each micro block.
+    for (int j_depth = 0; j_depth < depth_micro_repeats; ++j_depth) {
+      memcpy(filter_bank[0][0][0],
+             filter_workspace + j_depth * shuffled_filter_increment,
+             shuffled_filter_increment);
+
+      for (int s = 0; s < 2; ++s) {
+        for (int k_height = 0; k_height < block_height; ++k_height) {
+          const int8* scratch_data =
+              scratch_block_data +
+              workspace_height_stride * k_height * stride_val +
+              depth_micro_stride * j_depth;
+          uint8* output_data =
+              output_block_data + output_height_stride * k_height + 8 * j_depth;
+
+          for (int i_width = 0; i_width < output_width_overall_micro_repeats;
+               ++i_width) {
+            const int output_width = i_width == output_width_micro_repeats
+                                         ? residual_width
+                                         : four_over_stride;
+            const bool no_right_block = (output_width - 1) * stride_val < 2;
+            TFLITE_DCHECK_LE(output_width * stride_val, 4);
+            const int8* input_data =
+                scratch_data + width_micro_stride * i_width;
+            // Iterate over input width shifts within sub-micro blocks.
+            for (int x = 0; x < output_width; ++x) {
+              ConcatenateInputSubBlocks(x * stride_val, s,
+                                        workspace_height_stride,
+                                        width_micro_stride, no_right_block,
+                                        input_data, sub_selected_input_data);
+              Calculate3x3FilterOutput(
+                  *function_params, s, sub_selected_input_data, filter_bank,
+                  bias_data + (2 * j_depth + s) * bias_increment,
+                  output_values);
+              for (int d = 0; d < 4; ++d) {
+                output_data[depth * (four_over_stride * i_width + x) + 4 * s +
+                            d] = output_values[d];
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+};
+
+// Apply filter to macro block of input data and store results.
+//
+// Parameters for repeats and residual sizes are in terms of outputs.
+//
+// Requirement: depth_micro_repeats > 0 || residual_depth > 0.
+template <int32 stride>
+struct KernelMacroBlock<DepthwiseConvImplementation::kUseCModel3x3DotProduct,
+                        DepthwiseConvDepthMultiplication::kUnitInputDepth,
+                        stride> {
+  // Construct a width-shifted combination of two input sub-blocks, effectively
+  // concatenating them.
+  //
+  // The filter is applied using sub-blocks. These are in the needed form for
+  // the first (width) offset. For subsequent offsets, the filter is applied to
+  // shifted and combined data. The concatentation and shifting herein is fairly
+  // straightforward, but in the optimized code is an area of creativity in
+  // design because NEON instructions do not directly support the required
+  // between-register permutation.
+  //
+  // In NEON optimized code, input data is grouped in 4-byte blocks. In order to
+  // move along the width for each output point calculation, data is shifted, in
+  // essence between two such blocks.
+  //
+  // selected_data has format height 3, width 4.
+  //
+  // When the micro block is trailing (the last across the macro-block width),
+  // it would be illegal to load the right (next) block, and the no_right_block
+  // indicates this scenario.
+  static inline void ConcatenateInputSubBlocks(int offset,
+                                               int workspace_height_stride,
+                                               bool no_right_block,
+                                               const int8* input_block,
+                                               int8 selected_data[3][4]) {
+    TFLITE_DCHECK_GE(offset, 0);
+    TFLITE_DCHECK_LT(offset, 4);
+    if (no_right_block) {
+      for (int k_height = 0; k_height < 3; ++k_height) {
+        memcpy(selected_data[k_height],
+               &input_block[k_height * workspace_height_stride + offset],
+               4 - offset);
+      }
+    } else {
+      for (int k_height = 0; k_height < 3; ++k_height) {
+        memcpy(selected_data[k_height],
+               &input_block[k_height * workspace_height_stride + offset], 4);
+      }
+    }
+  }
+
+  // Straight implementation of 3x3 filter within sub-micro block.
+  static inline void Calculate3x3FilterOutput(
+      const DepthwiseConvDotProdParams& function_params, int sub_block,
+      const int8 selected_data[3][4], const int8 filter_bank[3][2][4][4],
+      const int32* bias_data, uint8 output_values[4]) {
+    const int32 output_activation_min =
+        function_params.quantized_activation_min;
+    const int32 output_activation_max =
+        function_params.quantized_activation_max;
+    const int32 output_multiplier = function_params.output_multiplier;
+    const int32 output_shift = function_params.output_shift;
+    const int32 output_offset = function_params.output_offset;
+    for (int d = 0; d < 4; ++d) {
+      int32 acc = 0;
+      for (int y = 0; y < 3; ++y) {
+        for (int x = 0; x < 4; ++x) {
+          int32 input_val = selected_data[y][x];
+          int32 filter_val = filter_bank[y][sub_block][d][x];
+          acc += filter_val * input_val;
+        }
+      }
+      acc += bias_data[d];
+      acc = reference_ops::depthwise_conv::DepthwiseConvRound<
+          DepthwiseConvOutputRounding::kUpward>(acc, output_multiplier,
+                                                output_shift);
+      acc += output_offset;
+      acc = std::max(acc, output_activation_min);
+      acc = std::min(acc, output_activation_max);
+      output_values[d] = static_cast<uint8>(acc);
+    }
+  }
+
+  static inline void Run(const int8* scratch_block_data,
+                         const int8* filter_workspace, const int32* bias_data,
+                         uint8* output_block_data,
+                         const DepthwiseConvDotProdParams* function_params) {
+    const int workspace_height_stride =
+        function_params->workspace_height_stride;
+    const int output_width_micro_repeats =
+        function_params->output_width_micro_repeats;
+    const int depth_micro_repeats = function_params->depth_micro_repeats;
+    const int depth = function_params->output_depth;
+    const int stride_val = function_params->stride;
+    const int four_over_stride = function_params->four_over_stride;
+
+    const int workspace_width_micro_repeats =
+        function_params->workspace_width_micro_repeats;
+    const int output_width_overall_micro_repeats =
+        function_params->output_width_overall_micro_repeats;
+    const int block_height = function_params->outbound_block_height;
+    const int residual_width = function_params->output_residual_width;
+    const int output_height_stride = function_params->output_height_stride;
+    constexpr int bias_increment = 4;
+    TFLITE_DCHECK_EQ(function_params->bias_increment, bias_increment);
+
+    TFLITE_DCHECK(depth_micro_repeats > 0);
+
+    constexpr int shuffled_filter_increment = 2 * 3 * 4 * 4;
+
+    // Simulate NEON-register transposition of subset of filter.
+    int8 filter_bank[3][2][4][4];  // Height 3, sub-block,  depth 4, width 4.
+    // Simulate NEON-register input data concatenation + sub-selection.
+    int8 sub_selected_input_data[3][4];  // Height 3, depth 4, width 4.
+    uint8 output_values[4];              // Depth 4.
+
+    // The outer 3 loops go through all the micro blocks in a macro block, and
+    // separately treat the two sub-blocks within each micro block.
+    for (int j_depth = 0; j_depth < depth_micro_repeats; ++j_depth) {
+      memcpy(filter_bank[0][0][0],
+             filter_workspace + j_depth * shuffled_filter_increment,
+             shuffled_filter_increment);
+
+      for (int s = 0; s < 2; ++s) {
+        for (int k_height = 0; k_height < block_height; ++k_height) {
+          const int8* scratch_data =
+              scratch_block_data +
+              workspace_height_stride * k_height * stride_val;
+          uint8* output_data =
+              output_block_data + output_height_stride * k_height + 8 * j_depth;
+
+          for (int i_width = 0; i_width < output_width_overall_micro_repeats;
+               ++i_width) {
+            const int output_width = i_width == output_width_micro_repeats
+                                         ? residual_width
+                                         : four_over_stride;
+            const bool no_right_block = i_width == output_width_micro_repeats &&
+                                        output_width_overall_micro_repeats ==
+                                            workspace_width_micro_repeats;
+            TFLITE_DCHECK_LE(output_width * stride_val, 4);
+            const int8* input_data = scratch_data + 4 * i_width;
+            // Iterate over input width shifts within 4x4 blocks.
+            for (int x = 0; x < output_width; ++x) {
+              ConcatenateInputSubBlocks(x * stride_val, workspace_height_stride,
+                                        no_right_block, input_data,
+                                        sub_selected_input_data);
+              Calculate3x3FilterOutput(
+                  *function_params, s, sub_selected_input_data, filter_bank,
+                  bias_data + (2 * j_depth + s) * bias_increment,
+                  output_values);
+              for (int d = 0; d < 4; ++d) {
+                output_data[depth * (four_over_stride * i_width + x) + 4 * s +
+                            d] = output_values[d];
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+};
+
+// Top-level implementation function for 3x3 depthwise convolution using
+// NEON dot-product instructions.
+//
+// MACRO & MICRO BLOCKS
+//
+// The task is divided into macro blocks. Data is copied first into a macro
+// block in a workspace. This has two purposes: (a) bringing data into
+// cache, and (b) permuting data so that it can be used much more easily in
+// a dot-product filter.
+//
+// When there is no depth multiplication:
+//
+// The permutations required for dot-products are local, within 4 data points
+// down the depth and 4 across the width. We want to pull in input data at least
+// 8-bytes at a time, down the depth, and so we divide the macro blocks into
+// 1x4x8 (height, width, depth) and further divide the micro blocks into
+// sub-blocks with shape (1x4x4).
+//
+// Each macro-block is constructed from micro-blocks that are internally
+// rearranged during loading into the macro-block workspace.
+//
+// In other words, the micro-block shape is
+//     {1, 1, 4, 8}
+// Each macro block is typically shape
+//     {1, height_block_size, 4 * workspace_width_micro_repeats, 64}
+// and workspace_width_micro_repeats is chosen so it fits into the
+// workspace.
+//
+// However, if depth < 64, we decrease the macro block depth, enabling us to
+// increase the macro-block width.
+//
+// When there is depth multiplication:
+//
+// We require input-depth = 1 and exploit that instead.  Note that output data
+// is still full-depth, *as is the filter and bias data after certain
+// adjustments*, and so the filter stage in this case still proceeds in
+// terms of sub-blocks.
+//
+// The Magic of these numbers:
+//     4 is the number of input elements used in each dot-product.
+//     8 is the number of inputs we load at a time into a register.
+//     64 is min amount of data to be loaded in a stretch (when possible).
+//
+// FILTER DATA PREPARATION
+//
+// Filter data needs to be permuted in a fashion like that of input data, and
+// this is done in a preprocessing stage. In addition, this stage extends the
+// filter in the direction of width from 3 to 4. The extra filter taps are set
+// to zero so that input data does not have to be zeroed before applying
+// dot-products.
+//
+// OVERALL COUNTS: HANDLING TRAILING ITERATION
+//
+// Often it is necessary to handle the last iteration in a loop differently,
+// generally because the final item is shorter. The logic to detect the
+// special case can be a bit expensive. We use a scheme in which there are
+// two counts, in a pattern like xxx_yyy_repeats and
+// xxx_overall_yyy_repeats. The first gives the count of "normal"
+// iterations. The loop iterates over the second count, and the induction
+// variable is checked to see if it reaches xxx_yyy_repeats. If there is no
+// special trailing iteration, xxx_yyy_repeats = xxx_overall_yyy_repeats,
+// and the special code is not executed.
+//
+// Example:
+// Suppose that we characterize a size s as
+// f(s) -> (block-4-repetitions, remainder, overall_repetitions):
+// f(11) -> (2, 3, 3)
+// f(12) -> (3, 0, 3)
+// f(13) -> (3, 1, 4)
+//
+// POINTING OUTSIDE OF INPUT ARRAY.
+//
+// When there is padding, the input data pointer passed to the fill routines
+// points outside of the input array and into a kind-of virtual padded
+// margin. It turns out that this simplifies the code and removes
+// conditional statements. It is hard to explain why without comparing two
+// versions of the code. In summary, this way the adjustment into the margin
+// can be made unconditionally, and the correction back into the input array
+// is done where there is a conditional already.
+//
+// OVERLAP
+//
+// Since this is *depthwise* conv, neither the batch nor the depth have overlap.
+// The height and depth overlap by (filter_size - 1). Thus some data is used
+// twice on the borders of macro blocks.
+//
+template <DepthwiseConvImplementation implementation>
+inline void DepthwiseConvDotProduct3x3(
+    const DepthwiseParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    uint8* output_data) {
+  // Check kernel restrictions.
+  constexpr int filter_size = 3;
+  constexpr int kSymmetricZeroPoint = 128;
+  constexpr int kMaxStride = 2;
+  constexpr int kMaxPadding = 1;
+  TFLITE_DCHECK_EQ(params.weights_offset, -kSymmetricZeroPoint);
+  TFLITE_DCHECK_LE(params.stride_width, kMaxStride);
+  TFLITE_DCHECK_EQ(params.stride_height, params.stride_width);
+  TFLITE_DCHECK_EQ(params.dilation_width_factor, 1);
+  TFLITE_DCHECK_EQ(params.dilation_height_factor, 1);
+  TFLITE_DCHECK_LE(params.padding_values.width, kMaxPadding);
+  TFLITE_DCHECK_LE(params.padding_values.height, kMaxPadding);
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(params.quantized_activation_min,
+                   params.quantized_activation_max);
+
+  // Key kernel parameters (along with padding handled later).
+  const int stride = params.stride_width;
+  const int depth_multiplier = params.depth_multiplier;
+  const bool has_depth_multiplication = depth_multiplier > 1;
+
+  // Extract task dimensions.
+  const int input_depth = input_shape.Dims(3);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  TFLITE_DCHECK(!has_depth_multiplication || input_depth == 1);
+  TFLITE_DCHECK(has_depth_multiplication || input_depth == output_depth);
+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
+  TFLITE_DCHECK_EQ(input_depth * depth_multiplier, output_depth);
+  TFLITE_DCHECK_EQ(MatchingDim(filter_shape, 1, filter_shape, 2), filter_size);
+
+  // Return now if nothing to do.
+  if (output_width == 0 || output_height == 0) {
+    return;
+  }
+
+  // Kernel parameter structure: set basic fields.
+  //
+  // In asm it is easier to pass a structure than more than, say, 8 parameters.
+  DepthwiseConvDotProdParams function_params;
+  function_params.input_depth = input_depth;
+  function_params.output_depth = output_depth;
+  function_params.input_offset = params.input_offset;
+  function_params.output_offset = params.output_offset;
+  function_params.output_multiplier = params.output_multiplier;
+  function_params.output_shift = params.output_shift;
+  function_params.quantized_activation_min = params.quantized_activation_min;
+  function_params.quantized_activation_max = params.quantized_activation_max;
+  function_params.stride = stride;
+
+  // Handle inbound bias data.
+  //
+  // Note that this data is adjusted in a per-depth process before the main
+  // filters. The adjustment accounts for a non-symmetric input offset.
+  //
+  // Kernel subroutines need to be able to operate consistently on an bias
+  // array. Where there is no bias, we provide one filled with zeros.
+  constexpr int kMinBiasLoad = 8;
+  int32 zero_bias_data[kMinBiasLoad];
+  if (bias_data) {
+    function_params.bias_increment = 4;
+  } else {
+    memset(zero_bias_data, 0, sizeof(zero_bias_data));
+    bias_data = &zero_bias_data[0];
+    function_params.bias_increment = 0;
+  }
+  TFLITE_DCHECK_LE(2 * function_params.bias_increment, kMinBiasLoad);
+
+  // Process padding.
+  //
+  // Whether "correct" or not, this matches ComputeConvSizes. When there is
+  // stride > 1 there can be padding on the bottom or top, and therefore
+  // we need to consider padding. This is true even if one or other of the
+  // padding_values is 0.
+  const int padded_width = (output_width - 1) * stride + filter_size;
+  {
+    const int padding_left = params.padding_values.width;
+    // Right padding would be -1 if discarding input because of stride.
+    const int padding_right =
+        std::max(padded_width - input_width - padding_left, 0);
+    const int padding_top = params.padding_values.height;
+    const int padded_height = (output_height - 1) * stride + filter_size;
+    const int padding_bottom =
+        std::max(padded_height - input_height - padding_top, 0);
+
+    function_params.padding_left = padding_left;
+    function_params.padding_right = padding_right;
+    function_params.padding_top = padding_top;
+    function_params.padding_bottom = padding_bottom;
+
+    TFLITE_DCHECK_LE(padding_left, padding_right);
+    TFLITE_DCHECK_LE(padding_top, padding_bottom);
+  }
+  // When stride == 1 left or top padding may only be non-zero.
+  // This is when padding is specified but not needed on a trailing dimension.
+  // When stride == 2 right or bottom padding may only be non-zero.
+  // This is a result of the details of the padding calculations.
+  const bool padding_required =
+      params.padding_type == tflite::PaddingType::kSame ||
+      function_params.padding_right > 0 || function_params.padding_bottom > 0;
+
+  // Choose parameter-specific kernel subroutines.
+  //
+  // The main part of the kernel has two stages. First, a temporary workspace is
+  // filled with padded and permuted data. Second, the filter is applied to the
+  // workspace data to generate output.
+  //
+  // The workspace fill stage handles padding so that the filter stage does not
+  // need to account for it. The workspace fill stage does not need to
+  // understand striding, and implicitly handles striding through the parameters
+  // that it is given.
+  using pack_macro_block_func_t = decltype(
+      &PackMacroBlock<implementation,
+                      DepthwiseConvDepthMultiplication::kNoMultiplication,
+                      0>::Run);
+  using kernel_macro_block_func_t = decltype(
+      &KernelMacroBlock<implementation,
+                        DepthwiseConvDepthMultiplication::kNoMultiplication,
+                        1>::Run);
+  pack_macro_block_func_t pack_macro_block_func;
+  kernel_macro_block_func_t kernel_macro_block_func;
+  {
+    if (has_depth_multiplication) {
+      if (padding_required) {
+        pack_macro_block_func =
+            PackMacroBlock<implementation,
+                           DepthwiseConvDepthMultiplication::kUnitInputDepth,
+                           /*max_padding=*/1>::Run;
+      } else {
+        pack_macro_block_func =
+            PackMacroBlock<implementation,
+                           DepthwiseConvDepthMultiplication::kUnitInputDepth,
+                           /*max_padding=*/0>::Run;
+      }
+      if (stride == 1) {
+        kernel_macro_block_func =
+            KernelMacroBlock<implementation,
+                             DepthwiseConvDepthMultiplication::kUnitInputDepth,
+                             /*stride=*/1>::Run;
+      } else {
+        kernel_macro_block_func =
+            KernelMacroBlock<implementation,
+                             DepthwiseConvDepthMultiplication::kUnitInputDepth,
+                             /*stride=*/2>::Run;
+      }
+    } else {
+      if (padding_required) {
+        pack_macro_block_func =
+            PackMacroBlock<implementation,
+                           DepthwiseConvDepthMultiplication::kNoMultiplication,
+                           /*max_padding=*/1>::Run;
+      } else {
+        pack_macro_block_func =
+            PackMacroBlock<implementation,
+                           DepthwiseConvDepthMultiplication::kNoMultiplication,
+                           /*max_padding=*/0>::Run;
+      }
+      if (stride == 1) {
+        kernel_macro_block_func = KernelMacroBlock<
+            implementation, DepthwiseConvDepthMultiplication::kNoMultiplication,
+            /*stride=*/1>::Run;
+      } else {
+        kernel_macro_block_func = KernelMacroBlock<
+            implementation, DepthwiseConvDepthMultiplication::kNoMultiplication,
+            /*stride=*/2>::Run;
+      }
+    }
+  }
+
+  // Stride-only variables.
+  //
+  // stride == 1 ? 4 : 2:
+  const int output_height_per_macro = 6 - 2 * stride;
+  // output_height_per_macro * stride:
+  constexpr int input_height_per_macro = 4;
+  // Number of rows per micro block (= rows per macro block) is
+  //   (output_height_per_macro - 1) * stride + 1 + (filter_size - 1)
+  //   = stride == 1 ? 3 + filter_size : 2 + filter_size:
+  const int height_block_size = 4 + filter_size - stride;
+  const int input_height_overlap = filter_size - stride;
+  // stride == 1 ? 4 : 2:
+  function_params.four_over_stride = output_height_per_macro;
+
+  TFLITE_DCHECK_EQ(stride * function_params.four_over_stride, 4);
+  TFLITE_DCHECK_EQ(height_block_size,
+                   input_height_per_macro + input_height_overlap);
+
+  // Create workspaces.
+  //
+  // Filter workspace is for shuffle: only first depth/8 is used.
+  // indexed as [depth/8][sub-block][height][depth][width].
+  TFLITE_DCHECK_LE(output_depth, kDepthwiseConvAdjustedBiasLimit);
+  TFLITE_DCHECK_EQ(kDepthwiseConvAdjustedBiasLimit % 8, 0);
+  int8 macroblock_workspace[kDepthwiseConvScratchWorkspaceSize];
+  int32 adjusted_bias_data[kDepthwiseConvAdjustedBiasLimit];
+  int8 filter_workspace[kDepthwiseConvAdjustedBiasLimit >> 3][3][2][4][4];
+
+  // Output depth characterization.
+  //
+  const int depth_macro_count = output_depth / 64;
+  const int depth_overall_macro_count = (output_depth + 63) / 64;
+  // Number of micro blocks down the depth in a final incomplete macro block.
+  const int depth_trailing_micro_repeats = output_depth / 8 % 8;
+  // The output_depth may not have a remainder: it must be a multiple of 8.
+  TFLITE_DCHECK_EQ(output_depth,
+                   64 * depth_macro_count + 8 * depth_trailing_micro_repeats);
+
+  // Characterize the first macro block depth, the largest.
+  //
+  // We base treatment of the width on the trailing macro block if there are
+  // no full blocks, in order to do more work together (that is, increase
+  // workspace_width_micro_repeats when largest_macro_depth < 64).
+  const int largest_macro_depth =
+      has_depth_multiplication
+          ? 1
+          : (depth_macro_count > 0 ? 64 : 8 * depth_trailing_micro_repeats);
+
+  // Characterize width, consumption of input and generation of output.
+  //
+  // In the case of depth multiplication, we ensure that some of the workspace
+  // at the end remains unused. This enables the filter routines to load the
+  // "next" data, of at least 16 bytes, even when at the end of the workspace.
+  // It is relatively expensive to detect the end micro block. It is also very
+  // difficult to test for (to trigger) erroneous reads (past end of array) in
+  // the depth multplication case.
+  int workspace_width_micro_repeats =
+      (has_depth_multiplication ? kDepthwiseConvScratchWorkspaceSize - 16
+                                : kDepthwiseConvScratchWorkspaceSize) /
+      (4 * largest_macro_depth * height_block_size);
+  // When there is no depth multiplication, the workspace depth is a multiple of
+  // 8, which ensures that workspace rows are 16-byte aligned. (Actually 32,
+  // because of the micro width of 4.) This is not necessarily the case under
+  // depth multiplication, so we adjust now to impose this restriction.
+  if (has_depth_multiplication) {
+    workspace_width_micro_repeats = (workspace_width_micro_repeats / 4) * 4;
+  }
+  TFLITE_DCHECK_EQ((workspace_width_micro_repeats * largest_macro_depth) % 4,
+                   0);
+  // Discount 1 of the micro-block repeats in each macro block to account for
+  // overlap.
+  const int consumed_width_per_macro_block =
+      4 * (workspace_width_micro_repeats - 1);
+  const int output_width_per_macro_block =
+      function_params.four_over_stride * (workspace_width_micro_repeats - 1);
+  TFLITE_DCHECK_GT(workspace_width_micro_repeats, 1);
+  TFLITE_DCHECK_EQ(output_width_per_macro_block * stride,
+                   consumed_width_per_macro_block);
+
+  // Width repetitions and residuals.
+  //
+  // Use of the workspace is characterized primarily in terms of *padded input*.
+  // Striding only matters in a few places.
+  //
+  // Simplifications: We require that there always be at least one full
+  // micro-block across the width. Since the maximum padding is 1, the trailing
+  // padding cannot span two micro blocks.
+  const int residual_micro_width = padded_width % 4;
+  // We base the count of macro blocks on the amount of padded input data each
+  // one consumes.
+  int width_overall_macro_count = (padded_width - residual_micro_width +
+                                   consumed_width_per_macro_block - 1) /
+                                  consumed_width_per_macro_block;
+  // Recall that we left a micro block at the end of each macro block for use as
+  // overlap. There is a special case in which we can use one fewer macro
+  // blocks, with the last one consuming extra input. (But not if the
+  // calculation thinks that we can use zero blocks.)
+  if (padded_width <=
+      ((width_overall_macro_count - 1) * consumed_width_per_macro_block + 4)) {
+    width_overall_macro_count -= 1;
+  }
+  width_overall_macro_count = std::max(width_overall_macro_count, 1);
+  // We always have to treat the final macro block along width as trailing,
+  // because even if it is full in terms of padded input, it will be incomplete
+  // in terms of output.
+  const int width_macro_count = width_overall_macro_count - 1;
+  // Micro blocks are traversed in terms of input in fill routines.
+  const int width_trailing_micro_repeats =
+      (padded_width - consumed_width_per_macro_block * width_macro_count) / 4;
+  const int width_overall_trailing_micro_repeats =
+      (padded_width - consumed_width_per_macro_block * width_macro_count + 3) /
+      4;
+  // Micro blocks are traversed in terms of output in filtering routines.
+  const int residual_output_micro_width =
+      (output_width - 1) % function_params.four_over_stride + 1;
+  const int output_width_trailing_micro_repeats =
+      residual_micro_width > (filter_size - 1)
+          ? width_trailing_micro_repeats
+          : width_trailing_micro_repeats - 1;
+  // Check results.
+  TFLITE_DCHECK_GT(width_overall_trailing_micro_repeats, 0);
+  TFLITE_DCHECK_EQ(padded_width,
+                   residual_micro_width +
+                       consumed_width_per_macro_block * width_macro_count +
+                       4 * width_trailing_micro_repeats);
+  TFLITE_DCHECK_LE(width_overall_macro_count, width_macro_count + 1);
+  TFLITE_DCHECK_GE(width_overall_macro_count, width_macro_count);
+
+  // Height repetitions and residuals.
+  //
+  const int height_macro_count = output_height / output_height_per_macro;
+  const int residual_output_height = output_height % output_height_per_macro;
+  const int height_overall_macro_count =
+      (output_height + output_height_per_macro - 1) / output_height_per_macro;
+  TFLITE_DCHECK_EQ(
+      output_height,
+      residual_output_height + output_height_per_macro * height_macro_count);
+  TFLITE_DCHECK_LE(height_overall_macro_count, height_macro_count + 1);
+  TFLITE_DCHECK_GE(height_overall_macro_count, height_macro_count);
+
+  // Data strides.
+  //
+  const int input_height_stride = input_width * input_depth;
+  const int output_height_stride = output_width * output_depth;
+  const int input_batch_stride = input_height_stride * input_height;
+  const int output_batch_stride = output_height_stride * output_height;
+  const int input_depth_macro_stride = has_depth_multiplication ? 0 : 64;
+  const int input_width_macro_stride =
+      input_depth * consumed_width_per_macro_block;
+  const int output_width_macro_stride =
+      output_depth * output_width_per_macro_block;
+
+  // Store parameters that do not vary across macro blocks.
+  //
+  function_params.workspace_width_micro_repeats = workspace_width_micro_repeats;
+  function_params.height_macro_count = height_overall_macro_count;
+  function_params.width_macro_count = width_overall_macro_count;
+  function_params.input_height_stride = input_height_stride;
+  function_params.output_height_stride = output_height_stride;
+  function_params.residual_width = residual_micro_width;
+
+  // Preprocess filter and bias data.
+  //
+  ProcessPerDepth<implementation>::Run(filter_data, bias_data,
+                                       filter_workspace[0][0][0][0],
+                                       adjusted_bias_data, &function_params);
+  function_params.bias_increment = 4;  // Adjusted bias data always spans depth.
+
+  // Main process.
+  //
+  // Most kernels are nested batch-height-width-depth. Here we proceed over
+  // macro blocks batch-width-depth-height.
+  //
+  // Example of handling of trailing iteration: when there is trailing depth,
+  // depth_overall_macro_count = depth_macro_count + 1, so we can adjust the
+  // dimensions for trailing macro blocks by looking for
+  // j_depth == depth_macro_count.
+  for (int b = 0; b < batches; ++b) {
+    for (int k_width = 0; k_width < width_overall_macro_count; ++k_width) {
+      // Figure out the work to be done for this macro block. If it trails in
+      // any dimension, the work in that dimension is adjusted.
+      // The work to be done across widths has 3 cases:
+      // (a) A full macro block,
+      // (b) Partial terminal macro block, with input and output ending in
+      //     same micro block, and
+      // (c) Partial terminal macro block, with output corresponding to one
+      //     fewer micro blocks, because filter extends across micro-block
+      //     boundary.
+      if (k_width != width_macro_count) {
+        function_params.output_residual_width = 0;
+        function_params.input_width_micro_repeats =
+            workspace_width_micro_repeats;
+        function_params.input_width_overall_micro_repeats =
+            workspace_width_micro_repeats;
+        function_params.output_width_micro_repeats =
+            workspace_width_micro_repeats - 1;
+      } else {
+        function_params.output_residual_width = residual_output_micro_width;
+        function_params.input_width_micro_repeats =
+            width_trailing_micro_repeats;
+        function_params.input_width_overall_micro_repeats =
+            width_overall_trailing_micro_repeats;
+        function_params.output_width_micro_repeats =
+            output_width_trailing_micro_repeats;
+      }
+      function_params.output_width_overall_micro_repeats =
+          function_params.output_residual_width == 0
+              ? function_params.output_width_micro_repeats
+              : function_params.output_width_micro_repeats + 1;
+
+      for (int j_depth = 0; j_depth < depth_overall_macro_count; ++j_depth) {
+        const uint8* input_data_block =
+            input_data + b * input_batch_stride +
+            j_depth * input_depth_macro_stride +
+            k_width * input_width_macro_stride -
+            function_params.padding_left * input_depth -
+            function_params.padding_top * input_height_stride;
+        uint8* output_data_block = output_data + b * output_batch_stride +
+                                   j_depth * 64 +
+                                   k_width * output_width_macro_stride;
+
+        function_params.depth_micro_repeats =
+            j_depth == depth_macro_count ? depth_trailing_micro_repeats : 8;
+        // Under depth multiplication the workspace_height_stride does not have
+        // to depend on input_width_overall_micro_repeats, but this improves the
+        // compactness of workspace use.
+        const int workspace_height_stride =
+            has_depth_multiplication
+                ? 16 * ((function_params.input_width_overall_micro_repeats +
+                         3) >>
+                        2)
+                : 4 * function_params.input_width_overall_micro_repeats * 8 *
+                      function_params.depth_micro_repeats;
+        TFLITE_DCHECK_EQ(workspace_height_stride % 16, 0);
+        function_params.workspace_height_stride = workspace_height_stride;
+
+        // For the first macro block for output rows we fill in the first few
+        // rows.  After this we will copy them (see below in loop.)
+        function_params.inbound_block_height = input_height_overlap;
+        pack_macro_block_func(-1, k_width, input_data_block,
+                              macroblock_workspace, &function_params);
+        input_data_block += input_height_stride * input_height_overlap;
+
+        for (int i_height = 0; i_height < height_overall_macro_count;
+             ++i_height) {
+          if (i_height != height_macro_count) {
+            function_params.inbound_block_height = input_height_per_macro;
+            function_params.outbound_block_height = output_height_per_macro;
+          } else {
+            function_params.inbound_block_height =
+                residual_output_height * stride;
+            function_params.outbound_block_height = residual_output_height;
+          }
+          TFLITE_DCHECK_LT(i_height * output_height_per_macro, output_height);
+          TFLITE_DCHECK_LT(i_height * input_height_per_macro, input_height);
+          TFLITE_DCHECK_LT(k_width * output_width_per_macro_block,
+                           output_width);
+          TFLITE_DCHECK_LT(k_width * consumed_width_per_macro_block,
+                           input_width);
+
+          // Macro blocks overlap by input_height_overlap rows, so we copy
+          // those instead of filling in afresh.  The first macro block across
+          // output rows was filled in outside of the loop (above).
+          if (i_height > 0) {
+            memcpy(macroblock_workspace,
+                   macroblock_workspace +
+                       input_height_per_macro * workspace_height_stride,
+                   input_height_overlap * workspace_height_stride);
+          }
+
+          pack_macro_block_func(
+              i_height, k_width, input_data_block,
+              macroblock_workspace +
+                  input_height_overlap * workspace_height_stride,
+              &function_params);
+
+          kernel_macro_block_func(macroblock_workspace,
+                                  filter_workspace[8 * j_depth][0][0][0],
+                                  adjusted_bias_data + 64 * j_depth,
+                                  output_data_block, &function_params);
+
+          input_data_block += input_height_stride * input_height_per_macro;
+          output_data_block += output_height_stride * output_height_per_macro;
+        }
+      }
+    }
+  }
+}
+
+}  // namespace depthwise_conv
+}  // namespace optimized_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_TRANSITIONAL_H_

diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
index 5dc03e7..3927540cc3 100644
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h

@@ -182,45 +182,6 @@
   return MatrixMap<Scalar>(data, rows, cols);
 }
 
-// This is like the template-parameter version, except that the power-of-two is
-// passed as a function parameter. The template version is to be preferred,
-// since some target hardware optimizations depend on the range of the exponent.
-template <typename IntegerType>
-IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) {
-  if (exponent == 0) {
-    return x;
-  }
-  using ScalarIntegerType =
-      typename gemmlowp::FixedPointRawTypeTraits<IntegerType>::ScalarRawType;
-  const IntegerType min =
-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::min());
-  const IntegerType max =
-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::max());
-  const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType);
-
-  const std::int32_t threshold =
-      ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1);
-  const IntegerType positive_mask =
-      gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup<IntegerType>(threshold));
-  const IntegerType negative_mask =
-      gemmlowp::MaskIfLessThan(x, gemmlowp::Dup<IntegerType>(-threshold));
-
-  IntegerType result = gemmlowp::ShiftLeft(x, exponent);
-  result = gemmlowp::SelectUsingMask(positive_mask, max, result);
-  result = gemmlowp::SelectUsingMask(negative_mask, min, result);
-  return result;
-}
-
-// This is like the template-parameter version, except that the power-of-two is
-// passed as a function parameter. See raw-integer version for further comments.
-template <typename tRawType, int tIntegerBits>
-gemmlowp::FixedPoint<tRawType, tIntegerBits>
-SaturatingRoundingMultiplyByPOTParam(
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) {
-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
-      SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
-}
-
 inline void AddBiasAndEvalActivationFunction(float output_activation_min,
                                              float output_activation_max,
                                              const RuntimeShape& bias_shape,
@@ -841,24 +802,21 @@
 }
 
 #ifdef USE_NEON
-inline void FullyConnectedAsGEMV(
+inline void FullyConnectedAsGEMVWorkerImpl(
     const RuntimeShape& input_shape, const uint8* input_data,
     int32 input_offset, const RuntimeShape& filter_shape,
     const uint8* filter_data, int32 filter_offset,
     const RuntimeShape& bias_shape, const int32* bias_data, int32 output_offset,
     int32 output_multiplier, int output_shift, int32 output_activation_min,
     int32 output_activation_max, const RuntimeShape& output_shape,
-    uint8* output_data) {
+    uint8* output_data, int row_start, int row_end) {
   gemmlowp::ScopedProfilingLabel label("FullyConnectedAsGEMV/8bit");
   TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
   TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
   TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
   const int output_dim_count = output_shape.DimensionsCount();
-  const int filter_dim_count = filter_shape.DimensionsCount();
   TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_shape, output_dim_count - 1), 1);
   const int input_size = FlatSizeSkipDim(input_shape, 0);
-  const int output_size = MatchingDim(filter_shape, filter_dim_count - 2,
-                                      output_shape, output_dim_count - 1);
   static constexpr int kPeel = 4;
   const bool shift_left = (output_shift > 0);
   for (int k = 0; k < input_size; k += 64) {
@@ -867,81 +825,139 @@
   for (int k = 0; k < kPeel * input_size; k += 64) {
     optimized_ops_preload_l1_stream(filter_data + k);
   }
-  TFLITE_DCHECK(!(output_size % kPeel));
-  const int32* bias_ptr = bias_data;
-  uint8* output_ptr = output_data;
-  for (int out = 0; out < output_size; out += kPeel) {
-    int32x4_t acc[kPeel];
-    for (int k = 0; k < kPeel; k++) {
-      acc[k] = vdupq_n_s32(0);
-    }
+
+  TFLITE_DCHECK_GE(row_end - row_start, kPeel);
+
+  for (int out = row_start; out < row_end; out += kPeel) {
+    out = std::min(out, row_end - kPeel);
+    int32x4_t acc0 = vdupq_n_s32(0);
+    int32x4_t acc1 = acc0;
+    int32x4_t acc2 = acc0;
+    int32x4_t acc3 = acc0;
     const int16x8_t input_offset_vec = vdupq_n_s16(input_offset);
     const int16x8_t filter_offset_vec = vdupq_n_s16(filter_offset);
     int in = 0;
     for (; in <= input_size - 16; in += 16) {
       const uint8x16_t input_val_u8 = vld1q_u8(input_data + in);
-      uint8x16_t filter_val_u8[kPeel];
-      for (int k = 0; k < kPeel; k++) {
-        const uint8* filter_ptr = filter_data + in + (out + k) * input_size;
-        filter_val_u8[k] = vld1q_u8(filter_ptr);
-        optimized_ops_preload_l1_stream(filter_ptr + 64);
-      }
-      int16x8_t input_val[2];
-      const uint8x8_t low = vget_low_u8(input_val_u8);
-      const uint8x8_t high = vget_high_u8(input_val_u8);
-      input_val[0] = vreinterpretq_s16_u16(vmovl_u8(low));
-      input_val[1] = vreinterpretq_s16_u16(vmovl_u8(high));
-      input_val[0] = vaddq_s16(input_val[0], input_offset_vec);
-      input_val[1] = vaddq_s16(input_val[1], input_offset_vec);
-      int16x8_t filter_val[kPeel][2];
-      for (int k = 0; k < kPeel; k++) {
-        const uint8x8_t low = vget_low_u8(filter_val_u8[k]);
-        const uint8x8_t high = vget_high_u8(filter_val_u8[k]);
-        filter_val[k][0] = vreinterpretq_s16_u16(vmovl_u8(low));
-        filter_val[k][1] = vreinterpretq_s16_u16(vmovl_u8(high));
-        filter_val[k][0] = vaddq_s16(filter_val[k][0], filter_offset_vec);
-        filter_val[k][1] = vaddq_s16(filter_val[k][1], filter_offset_vec);
-      }
-      for (int p = 0; p < 2; p++) {
-        for (int k = 0; k < kPeel; k++) {
-          acc[k] = vmlal_s16(acc[k], vget_low_s16(filter_val[k][p]),
-                             vget_low_s16(input_val[p]));
-        }
-        for (int k = 0; k < kPeel; k++) {
-          acc[k] = vmlal_s16(acc[k], vget_high_s16(filter_val[k][p]),
-                             vget_high_s16(input_val[p]));
-        }
-      }
+      const uint8* filter_ptr = filter_data + in + out * input_size;
+      uint8x16_t filter_val_u8_0 = vld1q_u8(filter_ptr);
+      optimized_ops_preload_l1_stream(filter_ptr + 64);
+      filter_ptr += input_size;
+      uint8x16_t filter_val_u8_1 = vld1q_u8(filter_ptr);
+      optimized_ops_preload_l1_stream(filter_ptr + 64);
+      filter_ptr += input_size;
+      uint8x16_t filter_val_u8_2 = vld1q_u8(filter_ptr);
+      optimized_ops_preload_l1_stream(filter_ptr + 64);
+      filter_ptr += input_size;
+      uint8x16_t filter_val_u8_3 = vld1q_u8(filter_ptr);
+      optimized_ops_preload_l1_stream(filter_ptr + 64);
+      int16x8_t input_val_0, input_val_1;
+      uint8x8_t low = vget_low_u8(input_val_u8);
+      uint8x8_t high = vget_high_u8(input_val_u8);
+      input_val_0 = vreinterpretq_s16_u16(vmovl_u8(low));
+      input_val_1 = vreinterpretq_s16_u16(vmovl_u8(high));
+      input_val_0 = vaddq_s16(input_val_0, input_offset_vec);
+      input_val_1 = vaddq_s16(input_val_1, input_offset_vec);
+      low = vget_low_u8(filter_val_u8_0);
+      high = vget_high_u8(filter_val_u8_0);
+      int16x8_t filter_val_0_0 = vreinterpretq_s16_u16(vmovl_u8(low));
+      int16x8_t filter_val_0_1 = vreinterpretq_s16_u16(vmovl_u8(high));
+      filter_val_0_0 = vaddq_s16(filter_val_0_0, filter_offset_vec);
+      filter_val_0_1 = vaddq_s16(filter_val_0_1, filter_offset_vec);
+      low = vget_low_u8(filter_val_u8_1);
+      high = vget_high_u8(filter_val_u8_1);
+      int16x8_t filter_val_1_0 = vreinterpretq_s16_u16(vmovl_u8(low));
+      int16x8_t filter_val_1_1 = vreinterpretq_s16_u16(vmovl_u8(high));
+      filter_val_1_0 = vaddq_s16(filter_val_1_0, filter_offset_vec);
+      filter_val_1_1 = vaddq_s16(filter_val_1_1, filter_offset_vec);
+      low = vget_low_u8(filter_val_u8_2);
+      high = vget_high_u8(filter_val_u8_2);
+      int16x8_t filter_val_2_0 = vreinterpretq_s16_u16(vmovl_u8(low));
+      int16x8_t filter_val_2_1 = vreinterpretq_s16_u16(vmovl_u8(high));
+      filter_val_2_0 = vaddq_s16(filter_val_2_0, filter_offset_vec);
+      filter_val_2_1 = vaddq_s16(filter_val_2_1, filter_offset_vec);
+      low = vget_low_u8(filter_val_u8_3);
+      high = vget_high_u8(filter_val_u8_3);
+      int16x8_t filter_val_3_0 = vreinterpretq_s16_u16(vmovl_u8(low));
+      int16x8_t filter_val_3_1 = vreinterpretq_s16_u16(vmovl_u8(high));
+      filter_val_3_0 = vaddq_s16(filter_val_3_0, filter_offset_vec);
+      filter_val_3_1 = vaddq_s16(filter_val_3_1, filter_offset_vec);
+      acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0_0),
+                       vget_low_s16(input_val_0));
+      acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1_0),
+                       vget_low_s16(input_val_0));
+      acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2_0),
+                       vget_low_s16(input_val_0));
+      acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3_0),
+                       vget_low_s16(input_val_0));
+      acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0_1),
+                       vget_low_s16(input_val_1));
+      acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1_1),
+                       vget_low_s16(input_val_1));
+      acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2_1),
+                       vget_low_s16(input_val_1));
+      acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3_1),
+                       vget_low_s16(input_val_1));
+      acc0 = vmlal_s16(acc0, vget_high_s16(filter_val_0_0),
+                       vget_high_s16(input_val_0));
+      acc1 = vmlal_s16(acc1, vget_high_s16(filter_val_1_0),
+                       vget_high_s16(input_val_0));
+      acc2 = vmlal_s16(acc2, vget_high_s16(filter_val_2_0),
+                       vget_high_s16(input_val_0));
+      acc3 = vmlal_s16(acc3, vget_high_s16(filter_val_3_0),
+                       vget_high_s16(input_val_0));
+      acc0 = vmlal_s16(acc0, vget_high_s16(filter_val_0_1),
+                       vget_high_s16(input_val_1));
+      acc1 = vmlal_s16(acc1, vget_high_s16(filter_val_1_1),
+                       vget_high_s16(input_val_1));
+      acc2 = vmlal_s16(acc2, vget_high_s16(filter_val_2_1),
+                       vget_high_s16(input_val_1));
+      acc3 = vmlal_s16(acc3, vget_high_s16(filter_val_3_1),
+                       vget_high_s16(input_val_1));
     }
     for (; in <= input_size - 8; in += 8) {
       const uint8x8_t input_val_u8 = vld1_u8(input_data + in);
-      uint8x8_t filter_val_u8[kPeel];
-      for (int k = 0; k < kPeel; k++) {
-        const uint8* filter_ptr = filter_data + in + (out + k) * input_size;
-        filter_val_u8[k] = vld1_u8(filter_ptr);
-      }
-      int16x8_t input_val;
-      input_val = vreinterpretq_s16_u16(vmovl_u8(input_val_u8));
+      const uint8* filter_ptr = filter_data + in + out * input_size;
+      uint8x8_t filter_val_u8_0 = vld1_u8(filter_ptr);
+      filter_ptr += input_size;
+      uint8x8_t filter_val_u8_1 = vld1_u8(filter_ptr);
+      filter_ptr += input_size;
+      uint8x8_t filter_val_u8_2 = vld1_u8(filter_ptr);
+      filter_ptr += input_size;
+      uint8x8_t filter_val_u8_3 = vld1_u8(filter_ptr);
+      int16x8_t input_val = vreinterpretq_s16_u16(vmovl_u8(input_val_u8));
       input_val = vaddq_s16(input_val, input_offset_vec);
-      int16x8_t filter_val[kPeel];
-      for (int k = 0; k < kPeel; k++) {
-        filter_val[k] = vreinterpretq_s16_u16(vmovl_u8(filter_val_u8[k]));
-        filter_val[k] = vaddq_s16(filter_val[k], filter_offset_vec);
-      }
-      for (int k = 0; k < kPeel; k++) {
-        acc[k] = vmlal_s16(acc[k], vget_low_s16(filter_val[k]),
-                           vget_low_s16(input_val));
-      }
-      for (int k = 0; k < kPeel; k++) {
-        acc[k] = vmlal_s16(acc[k], vget_high_s16(filter_val[k]),
-                           vget_high_s16(input_val));
-      }
+      int16x8_t filter_val_0 = vreinterpretq_s16_u16(vmovl_u8(filter_val_u8_0));
+      filter_val_0 = vaddq_s16(filter_val_0, filter_offset_vec);
+      int16x8_t filter_val_1 = vreinterpretq_s16_u16(vmovl_u8(filter_val_u8_1));
+      filter_val_1 = vaddq_s16(filter_val_1, filter_offset_vec);
+      int16x8_t filter_val_2 = vreinterpretq_s16_u16(vmovl_u8(filter_val_u8_2));
+      filter_val_2 = vaddq_s16(filter_val_2, filter_offset_vec);
+      int16x8_t filter_val_3 = vreinterpretq_s16_u16(vmovl_u8(filter_val_u8_3));
+      filter_val_3 = vaddq_s16(filter_val_3, filter_offset_vec);
+      acc0 =
+          vmlal_s16(acc0, vget_low_s16(filter_val_0), vget_low_s16(input_val));
+      acc1 =
+          vmlal_s16(acc1, vget_low_s16(filter_val_1), vget_low_s16(input_val));
+      acc2 =
+          vmlal_s16(acc2, vget_low_s16(filter_val_2), vget_low_s16(input_val));
+      acc3 =
+          vmlal_s16(acc3, vget_low_s16(filter_val_3), vget_low_s16(input_val));
+      acc0 = vmlal_s16(acc0, vget_high_s16(filter_val_0),
+                       vget_high_s16(input_val));
+      acc1 = vmlal_s16(acc1, vget_high_s16(filter_val_1),
+                       vget_high_s16(input_val));
+      acc2 = vmlal_s16(acc2, vget_high_s16(filter_val_2),
+                       vget_high_s16(input_val));
+      acc3 = vmlal_s16(acc3, vget_high_s16(filter_val_3),
+                       vget_high_s16(input_val));
     }
     if (in < input_size) {
-      int32 buf[4 * kPeel];
-      for (int k = 0; k < 4; k++) {
-        vst1q_s32(buf + 4 * k, acc[k]);
-      }
+      int32 buf[16];
+      vst1q_s32(buf + 0, acc0);
+      vst1q_s32(buf + 4, acc1);
+      vst1q_s32(buf + 8, acc2);
+      vst1q_s32(buf + 12, acc3);
       for (; in < input_size; in++) {
         int lane = (in + 8 - input_size) % 4;
         const int32 input_val = input_data[in] + input_offset;
@@ -951,26 +967,28 @@
           buf[lane + 4 * k] += filter_val * input_val;
         }
       }
-      for (int k = 0; k < 4; k++) {
-        acc[k] = vld1q_s32(buf + 4 * k);
-      }
+      acc0 = vld1q_s32(buf + 0);
+      acc1 = vld1q_s32(buf + 4);
+      acc2 = vld1q_s32(buf + 8);
+      acc3 = vld1q_s32(buf + 12);
     }
 
     // Horizontally reduce accumulators
-    int32x2_t pairwise_reduced_acc[kPeel];
-    for (int k = 0; k < kPeel; k++) {
-      pairwise_reduced_acc[k] =
-          vpadd_s32(vget_low_s32(acc[k]), vget_high_s32(acc[k]));
-    }
-    static_assert(kPeel == 4, "the code below currently assumes kPeel = 4");
+    int32x2_t pairwise_reduced_acc_0 =
+        vpadd_s32(vget_low_s32(acc0), vget_high_s32(acc0));
+    int32x2_t pairwise_reduced_acc_1 =
+        vpadd_s32(vget_low_s32(acc1), vget_high_s32(acc1));
+    int32x2_t pairwise_reduced_acc_2 =
+        vpadd_s32(vget_low_s32(acc2), vget_high_s32(acc2));
+    int32x2_t pairwise_reduced_acc_3 =
+        vpadd_s32(vget_low_s32(acc3), vget_high_s32(acc3));
     const int32x2_t reduced_lo =
-        vpadd_s32(pairwise_reduced_acc[0], pairwise_reduced_acc[1]);
+        vpadd_s32(pairwise_reduced_acc_0, pairwise_reduced_acc_1);
     const int32x2_t reduced_hi =
-        vpadd_s32(pairwise_reduced_acc[2], pairwise_reduced_acc[3]);
+        vpadd_s32(pairwise_reduced_acc_2, pairwise_reduced_acc_3);
     int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi);
     // Add bias values.
-    int32x4_t bias_vec = vld1q_s32(bias_ptr);
-    bias_ptr += 4;
+    int32x4_t bias_vec = vld1q_s32(bias_data + out);
     reduced = vaddq_s32(reduced, bias_vec);
     if (shift_left) {
       const int32 multiplier_power_of_two = 1 << output_shift;
@@ -993,12 +1011,117 @@
     // Apply the clamping from the activation function
     res8 = vmax_u8(res8, vdup_n_u8(output_activation_min));
     res8 = vmin_u8(res8, vdup_n_u8(output_activation_max));
-    // Store results to destination. Assumes 32bit alignment.
-    vst1_lane_u32(reinterpret_cast<uint32*>(output_ptr),
-                  vreinterpret_u32_u8(res8), 0);
-    output_ptr += kPeel;
+    // Store results to destination.
+    vst1_lane_u8(output_data + out + 0, res8, 0);
+    vst1_lane_u8(output_data + out + 1, res8, 1);
+    vst1_lane_u8(output_data + out + 2, res8, 2);
+    vst1_lane_u8(output_data + out + 3, res8, 3);
   }
 }
+
+struct FullyConnectedAsGEMVWorkerTask : public gemmlowp::Task {
+  FullyConnectedAsGEMVWorkerTask(const RuntimeShape& input_shape,
+                                 const uint8* input_data, int32 input_offset,
+                                 const RuntimeShape& filter_shape,
+                                 const uint8* filter_data, int32 filter_offset,
+                                 const RuntimeShape& bias_shape,
+                                 const int32* bias_data, int32 output_offset,
+                                 int32 output_multiplier, int output_shift,
+                                 int32 output_activation_min,
+                                 int32 output_activation_max,
+                                 const RuntimeShape& output_shape,
+                                 uint8* output_data, int row_start, int row_end)
+      : input_shape_(input_shape),
+        input_data_(input_data),
+        input_offset_(input_offset),
+        filter_shape_(filter_shape),
+        filter_data_(filter_data),
+        filter_offset_(filter_offset),
+        bias_shape_(bias_shape),
+        bias_data_(bias_data),
+        output_offset_(output_offset),
+        output_multiplier_(output_multiplier),
+        output_shift_(output_shift),
+        output_activation_min_(output_activation_min),
+        output_activation_max_(output_activation_max),
+        output_shape_(output_shape),
+        output_data_(output_data),
+        row_start_(row_start),
+        row_end_(row_end) {}
+
+  void Run() override {
+    FullyConnectedAsGEMVWorkerImpl(
+        input_shape_, input_data_, input_offset_, filter_shape_, filter_data_,
+        filter_offset_, bias_shape_, bias_data_, output_offset_,
+        output_multiplier_, output_shift_, output_activation_min_,
+        output_activation_max_, output_shape_, output_data_, row_start_,
+        row_end_);
+  }
+
+  const RuntimeShape& input_shape_;
+  const uint8* input_data_;
+  int32 input_offset_;
+  const RuntimeShape& filter_shape_;
+  const uint8* filter_data_;
+  int32 filter_offset_;
+  const RuntimeShape& bias_shape_;
+  const int32* bias_data_;
+  int32 output_offset_;
+  int32 output_multiplier_;
+  int output_shift_;
+  int32 output_activation_min_;
+  int32 output_activation_max_;
+  const RuntimeShape& output_shape_;
+  uint8* output_data_;
+  gemmlowp::GemmContext* gemm_context_;
+  int row_start_;
+  int row_end_;
+};
+
+inline void FullyConnectedAsGEMV(
+    const RuntimeShape& input_shape, const uint8* input_data,
+    int32 input_offset, const RuntimeShape& filter_shape,
+    const uint8* filter_data, int32 filter_offset,
+    const RuntimeShape& bias_shape, const int32* bias_data, int32 output_offset,
+    int32 output_multiplier, int output_shift, int32 output_activation_min,
+    int32 output_activation_max, const RuntimeShape& output_shape,
+    uint8* output_data, gemmlowp::GemmContext* gemm_context) {
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_rows = output_shape.Dims(output_dim_count - 1);
+  const int input_size = FlatSizeSkipDim(input_shape, 0);
+  static constexpr int kKernelRows = 4;
+  const int thread_count = gemmlowp::HowManyThreads<kKernelRows>(
+      gemm_context->max_num_threads(), output_rows, batches, input_size);
+  if (thread_count == 1) {
+    // Single-thread case: do the computation on the current thread, don't
+    // use a threadpool
+    FullyConnectedAsGEMVWorkerImpl(
+        input_shape, input_data, input_offset, filter_shape, filter_data,
+        filter_offset, bias_shape, bias_data, output_offset, output_multiplier,
+        output_shift, output_activation_min, output_activation_max,
+        output_shape, output_data, 0, output_rows);
+    return;
+  }
+
+  // Multi-threaded case: use the gemmlowp context's threadpool.
+  TFLITE_DCHECK_GT(thread_count, 1);
+  std::vector<gemmlowp::Task*> tasks(thread_count);
+  const int kRowsPerWorker =
+      gemmlowp::RoundUp<kKernelRows>(output_rows / thread_count);
+  int row_start = 0;
+  for (int i = 0; i < thread_count; ++i) {
+    int row_end = std::min(output_rows, row_start + kRowsPerWorker);
+    tasks[i] = new FullyConnectedAsGEMVWorkerTask(
+        input_shape, input_data, input_offset, filter_shape, filter_data,
+        filter_offset, bias_shape, bias_data, output_offset, output_multiplier,
+        output_shift, output_activation_min, output_activation_max,
+        output_shape, output_data, row_start, row_end);
+    row_start = row_end;
+  }
+  TFLITE_DCHECK_EQ(row_start, output_rows);
+  gemm_context->workers_pool()->Execute(tasks);
+}
 #endif  // USE_NEON
 
 struct GemmlowpOutputPipeline {
@@ -1054,14 +1177,16 @@
   const int filter_dim_count = filter_shape.DimensionsCount();
   const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
 #ifdef USE_NEON
-  const int output_size = MatchingDim(filter_shape, filter_dim_count - 2,
-                                      output_shape, output_dim_count - 1);
-  if (batches == 1 && !(output_size % 4)) {
-    return FullyConnectedAsGEMV(
-        input_shape, input_data, input_offset, filter_shape, filter_data,
-        filter_offset, bias_shape, bias_data, output_offset, output_multiplier,
-        output_shift, output_activation_min, output_activation_max,
-        output_shape, output_data);
+  if (batches == 1) {
+    const int output_size = MatchingDim(filter_shape, filter_dim_count - 2,
+                                        output_shape, output_dim_count - 1);
+    if (output_size >= 4) {
+      return FullyConnectedAsGEMV(
+          input_shape, input_data, input_offset, filter_shape, filter_data,
+          filter_offset, bias_shape, bias_data, output_offset,
+          output_multiplier, output_shift, output_activation_min,
+          output_activation_max, output_shape, output_data, gemm_context);
+    }
   }
 #endif  // USE_NEON
   const int filter_rows = filter_shape.Dims(filter_dim_count - 2);
@@ -2084,6 +2209,21 @@
   TFLITE_DCHECK_EQ(output_cols, gemm_input_cols);
   TFLITE_DCHECK_EQ(filter_cols, gemm_input_rows);
   TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_rows);
+
+#ifdef USE_NEON
+  if (gemm_input_cols == 1 && output_rows >= 4) {
+    RuntimeShape fc_filter_shape{
+        filter_shape.Dims(0),
+        filter_shape.Dims(filter_shape.DimensionsCount() - 1)};
+
+    return FullyConnectedAsGEMV(
+        *gemm_input_shape, gemm_input_data, input_offset, fc_filter_shape,
+        filter_data, filter_offset, bias_shape, bias_data, output_offset,
+        output_multiplier, output_shift, output_activation_min,
+        output_activation_max, output_shape, output_data, gemm_context);
+  }
+#endif
+
   gemmlowp::MatrixMap<const uint8, gemmlowp::MapOrder::RowMajor> filter_matrix(
       filter_data, filter_rows, filter_cols);
   gemmlowp::MatrixMap<const uint8, gemmlowp::MapOrder::ColMajor> input_matrix(
@@ -2679,7 +2819,7 @@
     // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
     // dimension.
     for (int i0 = 0; i0 < y0; ++i0) {
-      const uint8* input2_data_ptr;
+      const uint8* input2_data_ptr = nullptr;
       for (int i1 = 0; i1 < y1; ++i1) {
         input2_data_ptr = input2_data_reset;
         for (int i2 = 0; i2 < y2; ++i2) {
@@ -2708,7 +2848,7 @@
     // for y4 == 1 and the loop over y3 is contained within the
     // AddScalarBroadcast function.
     for (int i0 = 0; i0 < y0; ++i0) {
-      const uint8* input2_data_ptr;
+      const uint8* input2_data_ptr = nullptr;
       for (int i1 = 0; i1 < y1; ++i1) {
         input2_data_ptr = input2_data_reset;
         for (int i2 = 0; i2 < y2; ++i2) {
@@ -3065,7 +3205,7 @@
   int y4 = params.broadcast_shape[4];
   if (y4 > 1) {
     for (int i0 = 0; i0 < y0; ++i0) {
-      const uint8* input2_data_ptr;
+      const uint8* input2_data_ptr = nullptr;
       for (int i1 = 0; i1 < y1; ++i1) {
         input2_data_ptr = input2_data_reset;
         for (int i2 = 0; i2 < y2; ++i2) {
@@ -3082,7 +3222,7 @@
     }
   } else {
     for (int i0 = 0; i0 < y0; ++i0) {
-      const uint8* input2_data_ptr;
+      const uint8* input2_data_ptr = nullptr;
       for (int i1 = 0; i1 < y1; ++i1) {
         input2_data_ptr = input2_data_reset;
         for (int i2 = 0; i2 < y2; ++i2) {
@@ -3710,6 +3850,14 @@
                         const uint8* input_data,
                         const RuntimeShape& output_shape, uint8* output_data) {
   gemmlowp::ScopedProfilingLabel label("AveragePool/8bit");
+
+  // Here, and in other pooling ops, in order to maintain locality of reference,
+  // to minimize some recalculations, and to load into NEON vector registers, we
+  // use an inner loop down the depth. Since depths can be large and hence we
+  // would need arbitrarily large temporary storage, we divide the work up into
+  // depth tranches just within the batch loop.
+  static constexpr int kPoolingAccTrancheSize = 256;
+
   TFLITE_DCHECK_LE(params.quantized_activation_min,
                    params.quantized_activation_max);
   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
@@ -3722,69 +3870,76 @@
   const int output_width = output_shape.Dims(2);
   const int stride_height = params.stride_height;
   const int stride_width = params.stride_width;
+
+  uint16 acc[kPoolingAccTrancheSize];
   for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        const int in_x_origin =
-            (out_x * stride_width) - params.padding_values.width;
-        const int in_y_origin =
-            (out_y * stride_height) - params.padding_values.height;
-        const int filter_x_start = std::max(0, -in_x_origin);
-        const int filter_x_end =
-            std::min(params.filter_width, input_width - in_x_origin);
-        const int filter_y_start = std::max(0, -in_y_origin);
-        const int filter_y_end =
-            std::min(params.filter_height, input_height - in_y_origin);
-        const int filter_count =
-            (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
-        // 2560 is required by MobileNetV2 with depth multiplier 2.
-        static constexpr int kAccBufferMaxSize = 4096;
-        TFLITE_DCHECK_LE(depth, kAccBufferMaxSize);
-        uint16 acc[kAccBufferMaxSize];
-        memset(acc, 0, depth * sizeof(acc[0]));
-        const uint8* input_ptr =
-            input_data +
-            depth * (in_x_origin +
-                     input_width * (in_y_origin + input_height * batch));
-        for (int fy = filter_y_start; fy < filter_y_end; fy++) {
-          const uint8* input_row_ptr =
-              input_ptr + depth * (fy * input_width + filter_x_start);
-          for (int fx = filter_x_start; fx < filter_x_end; fx++) {
-            int channel = 0;
+    // We proceed through the depth in tranches (see comment above). The
+    // depth_base is the depth at the beginning of the tranche. The
+    // tranche_depth is the depth dimension of the tranche.
+    for (int depth_base = 0; depth_base < depth;
+         depth_base += kPoolingAccTrancheSize) {
+      const int tranche_depth =
+          std::min(depth - depth_base, kPoolingAccTrancheSize);
+      for (int out_y = 0; out_y < output_height; ++out_y) {
+        for (int out_x = 0; out_x < output_width; ++out_x) {
+          const int in_x_origin =
+              (out_x * stride_width) - params.padding_values.width;
+          const int in_y_origin =
+              (out_y * stride_height) - params.padding_values.height;
+          const int filter_x_start = std::max(0, -in_x_origin);
+          const int filter_x_end =
+              std::min(params.filter_width, input_width - in_x_origin);
+          const int filter_y_start = std::max(0, -in_y_origin);
+          const int filter_y_end =
+              std::min(params.filter_height, input_height - in_y_origin);
+          const int filter_count =
+              (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+          memset(acc, 0, tranche_depth * sizeof(acc[0]));
+          const uint8* input_ptr =
+              input_data + depth_base +
+              depth * (in_x_origin +
+                       input_width * (in_y_origin + input_height * batch));
+          for (int fy = filter_y_start; fy < filter_y_end; fy++) {
+            const uint8* input_row_ptr =
+                input_ptr + depth * (fy * input_width + filter_x_start);
+            for (int fx = filter_x_start; fx < filter_x_end; fx++) {
+              const uint8* input_channel_ptr = input_row_ptr;
+              int channel = 0;
 #ifdef USE_NEON
-            for (; channel <= depth - 16; channel += 16) {
-              uint16x8_t acc_reg[2];
-              for (int i = 0; i < 2; i++) {
-                acc_reg[i] = vld1q_u16(acc + channel + 8 * i);
+              for (; channel <= tranche_depth - 16; channel += 16) {
+                uint16x8_t acc_reg[2];
+                for (int i = 0; i < 2; i++) {
+                  acc_reg[i] = vld1q_u16(acc + channel + 8 * i);
+                }
+                uint8x16_t input_reg = vld1q_u8(input_channel_ptr);
+                input_channel_ptr += 16;
+                acc_reg[0] = vaddw_u8(acc_reg[0], vget_low_u8(input_reg));
+                acc_reg[1] = vaddw_u8(acc_reg[1], vget_high_u8(input_reg));
+                for (int i = 0; i < 2; i++) {
+                  vst1q_u16(acc + channel + 8 * i, acc_reg[i]);
+                }
               }
-              uint8x16_t input_reg = vld1q_u8(input_row_ptr);
-              input_row_ptr += 16;
-              acc_reg[0] = vaddw_u8(acc_reg[0], vget_low_u8(input_reg));
-              acc_reg[1] = vaddw_u8(acc_reg[1], vget_high_u8(input_reg));
-              for (int i = 0; i < 2; i++) {
-                vst1q_u16(acc + channel + 8 * i, acc_reg[i]);
+              for (; channel <= tranche_depth - 8; channel += 8) {
+                uint16x8_t acc_reg = vld1q_u16(acc + channel);
+                uint8x8_t input_reg = vld1_u8(input_channel_ptr);
+                input_channel_ptr += 8;
+                acc_reg = vaddw_u8(acc_reg, input_reg);
+                vst1q_u16(acc + channel, acc_reg);
               }
-            }
-            for (; channel <= depth - 8; channel += 8) {
-              uint16x8_t acc_reg = vld1q_u16(acc + channel);
-              uint8x8_t input_reg = vld1_u8(input_row_ptr);
-              input_row_ptr += 8;
-              acc_reg = vaddw_u8(acc_reg, input_reg);
-              vst1q_u16(acc + channel, acc_reg);
-            }
 #endif
-            for (; channel < depth; ++channel) {
-              acc[channel] += *input_row_ptr++;
+              for (; channel < tranche_depth; ++channel) {
+                acc[channel] += *input_channel_ptr++;
+              }
+              input_row_ptr += depth;
             }
           }
-        }
-        uint8* output_ptr =
-            output_data + Offset(output_shape, batch, out_y, out_x, 0);
-        int channel = 0;
+          uint8* output_ptr = output_data + Offset(output_shape, batch, out_y,
+                                                   out_x, depth_base);
+          int channel = 0;
 #ifdef USE_NEON
 #define AVGPOOL_DIVIDING_BY(FILTER_COUNT)                               \
   if (filter_count == FILTER_COUNT) {                                   \
-    for (; channel <= depth - 8; channel += 8) {                        \
+    for (; channel <= tranche_depth - 8; channel += 8) {                \
       uint16 buf[8];                                                    \
       for (int i = 0; i < 8; i++) {                                     \
         buf[i] = (acc[channel + i] + FILTER_COUNT / 2) / FILTER_COUNT;  \
@@ -3795,25 +3950,26 @@
       vst1_u8(output_ptr + channel, buf8);                              \
     }                                                                   \
   }
-        AVGPOOL_DIVIDING_BY(9)
-        AVGPOOL_DIVIDING_BY(15)
+          AVGPOOL_DIVIDING_BY(9)
+          AVGPOOL_DIVIDING_BY(15)
 #undef AVGPOOL_DIVIDING_BY
-        for (; channel <= depth - 8; channel += 8) {
-          uint16 buf[8];
-          for (int i = 0; i < 8; i++) {
-            buf[i] = (acc[channel + i] + filter_count / 2) / filter_count;
+          for (; channel <= tranche_depth - 8; channel += 8) {
+            uint16 buf[8];
+            for (int i = 0; i < 8; i++) {
+              buf[i] = (acc[channel + i] + filter_count / 2) / filter_count;
+            }
+            uint8x8_t buf8 = vqmovn_u16(vld1q_u16(buf));
+            buf8 = vmin_u8(buf8, vdup_n_u8(params.quantized_activation_max));
+            buf8 = vmax_u8(buf8, vdup_n_u8(params.quantized_activation_min));
+            vst1_u8(output_ptr + channel, buf8);
           }
-          uint8x8_t buf8 = vqmovn_u16(vld1q_u16(buf));
-          buf8 = vmin_u8(buf8, vdup_n_u8(params.quantized_activation_max));
-          buf8 = vmax_u8(buf8, vdup_n_u8(params.quantized_activation_min));
-          vst1_u8(output_ptr + channel, buf8);
-        }
 #endif
-        for (; channel < depth; ++channel) {
-          uint16 a = (acc[channel] + filter_count / 2) / filter_count;
-          a = std::max<uint16>(a, params.quantized_activation_min);
-          a = std::min<uint16>(a, params.quantized_activation_max);
-          output_ptr[channel] = static_cast<uint8>(a);
+          for (; channel < tranche_depth; ++channel) {
+            uint16 a = (acc[channel] + filter_count / 2) / filter_count;
+            a = std::max<uint16>(a, params.quantized_activation_min);
+            a = std::min<uint16>(a, params.quantized_activation_max);
+            output_ptr[channel] = static_cast<uint8>(a);
+          }
         }
       }
     }
@@ -3878,6 +4034,14 @@
                     const uint8* input_data, const RuntimeShape& output_shape,
                     uint8* output_data) {
   gemmlowp::ScopedProfilingLabel label("MaxPool/8bit");
+
+  // Here, and in other pooling ops, in order to maintain locality of reference,
+  // to minimize some recalculations, and to load into NEON vector registers, we
+  // use an inner loop down the depth. Since depths can be large and hence we
+  // would need arbitrarily large temporary storage, we divide the work up into
+  // depth tranches just within the batch loop.
+  static constexpr int kPoolingAccTrancheSize = 256;
+
   TFLITE_DCHECK_LE(params.quantized_activation_min,
                    params.quantized_activation_max);
   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
@@ -3890,77 +4054,85 @@
   const int output_width = output_shape.Dims(2);
   const int stride_height = params.stride_height;
   const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        const int in_x_origin =
-            (out_x * stride_width) - params.padding_values.width;
-        const int in_y_origin =
-            (out_y * stride_height) - params.padding_values.height;
-        const int filter_x_start = std::max(0, -in_x_origin);
-        const int filter_x_end =
-            std::min(params.filter_width, input_width - in_x_origin);
-        const int filter_y_start = std::max(0, -in_y_origin);
-        const int filter_y_end =
-            std::min(params.filter_height, input_height - in_y_origin);
-        // 2560 is required by MobileNetV2 with depth multiplier 2.
-        static constexpr int kAccBufferMaxSize = 4096;
-        TFLITE_DCHECK_LE(depth, kAccBufferMaxSize);
-        uint8 acc[kAccBufferMaxSize];
-        memset(acc, 0, depth * sizeof(acc[0]));
-        const uint8* input_ptr =
-            input_data +
-            depth * (in_x_origin +
-                     input_width * (in_y_origin + input_height * batch));
-        for (int fy = filter_y_start; fy < filter_y_end; fy++) {
-          const uint8* input_row_ptr =
-              input_ptr + depth * (fy * input_width + filter_x_start);
-          for (int fx = filter_x_start; fx < filter_x_end; fx++) {
-            int channel = 0;
-#ifdef USE_NEON
-            for (; channel <= depth - 16; channel += 16) {
-              uint8x16_t acc_reg = vld1q_u8(acc + channel);
-              uint8x16_t input_reg = vld1q_u8(input_row_ptr);
-              input_row_ptr += 16;
-              acc_reg = vmaxq_u8(acc_reg, input_reg);
-              vst1q_u8(acc + channel, acc_reg);
-            }
 
-            for (; channel <= depth - 8; channel += 8) {
-              uint8x8_t acc_reg = vld1_u8(acc + channel);
-              uint8x8_t input_reg = vld1_u8(input_row_ptr);
-              input_row_ptr += 8;
-              acc_reg = vmax_u8(acc_reg, input_reg);
-              vst1_u8(acc + channel, acc_reg);
-            }
+  uint8 acc[kPoolingAccTrancheSize];
+  for (int batch = 0; batch < batches; ++batch) {
+    // We proceed through the depth in tranches (see comment above). The
+    // depth_base is the depth at the beginning of the tranche. The
+    // tranche_depth is the depth dimension of the tranche.
+    for (int depth_base = 0; depth_base < depth;
+         depth_base += kPoolingAccTrancheSize) {
+      const int tranche_depth =
+          std::min(depth - depth_base, kPoolingAccTrancheSize);
+      for (int out_y = 0; out_y < output_height; ++out_y) {
+        for (int out_x = 0; out_x < output_width; ++out_x) {
+          const int in_x_origin =
+              (out_x * stride_width) - params.padding_values.width;
+          const int in_y_origin =
+              (out_y * stride_height) - params.padding_values.height;
+          const int filter_x_start = std::max(0, -in_x_origin);
+          const int filter_x_end =
+              std::min(params.filter_width, input_width - in_x_origin);
+          const int filter_y_start = std::max(0, -in_y_origin);
+          const int filter_y_end =
+              std::min(params.filter_height, input_height - in_y_origin);
+          memset(acc, 0, tranche_depth * sizeof(acc[0]));
+          const uint8* input_ptr =
+              input_data + depth_base +
+              depth * (in_x_origin +
+                       input_width * (in_y_origin + input_height * batch));
+          for (int fy = filter_y_start; fy < filter_y_end; fy++) {
+            const uint8* input_row_ptr =
+                input_ptr + depth * (fy * input_width + filter_x_start);
+            for (int fx = filter_x_start; fx < filter_x_end; fx++) {
+              const uint8* input_channel_ptr = input_row_ptr;
+              int channel = 0;
+#ifdef USE_NEON
+              for (; channel <= tranche_depth - 16; channel += 16) {
+                uint8x16_t acc_reg = vld1q_u8(acc + channel);
+                uint8x16_t input_reg = vld1q_u8(input_channel_ptr);
+                input_channel_ptr += 16;
+                acc_reg = vmaxq_u8(acc_reg, input_reg);
+                vst1q_u8(acc + channel, acc_reg);
+              }
+
+              for (; channel <= tranche_depth - 8; channel += 8) {
+                uint8x8_t acc_reg = vld1_u8(acc + channel);
+                uint8x8_t input_reg = vld1_u8(input_channel_ptr);
+                input_channel_ptr += 8;
+                acc_reg = vmax_u8(acc_reg, input_reg);
+                vst1_u8(acc + channel, acc_reg);
+              }
 #endif
-            for (; channel < depth; ++channel) {
-              acc[channel] = std::max(acc[channel], *input_row_ptr++);
+              for (; channel < tranche_depth; ++channel) {
+                acc[channel] = std::max(acc[channel], *input_channel_ptr++);
+              }
+              input_row_ptr += depth;
             }
           }
-        }
-        uint8* output_ptr =
-            output_data + Offset(output_shape, batch, out_y, out_x, 0);
-        int channel = 0;
+          uint8* output_ptr = output_data + Offset(output_shape, batch, out_y,
+                                                   out_x, depth_base);
+          int channel = 0;
 #ifdef USE_NEON
-        for (; channel <= depth - 16; channel += 16) {
-          uint8x16_t a = vld1q_u8(acc + channel);
-          a = vminq_u8(a, vdupq_n_u8(params.quantized_activation_max));
-          a = vmaxq_u8(a, vdupq_n_u8(params.quantized_activation_min));
-          vst1q_u8(output_ptr + channel, a);
-        }
-        for (; channel <= depth - 8; channel += 8) {
-          uint8x8_t a = vld1_u8(acc + channel);
-          a = vmin_u8(a, vdup_n_u8(params.quantized_activation_max));
-          a = vmax_u8(a, vdup_n_u8(params.quantized_activation_min));
-          vst1_u8(output_ptr + channel, a);
-        }
+          for (; channel <= tranche_depth - 16; channel += 16) {
+            uint8x16_t a = vld1q_u8(acc + channel);
+            a = vminq_u8(a, vdupq_n_u8(params.quantized_activation_max));
+            a = vmaxq_u8(a, vdupq_n_u8(params.quantized_activation_min));
+            vst1q_u8(output_ptr + channel, a);
+          }
+          for (; channel <= tranche_depth - 8; channel += 8) {
+            uint8x8_t a = vld1_u8(acc + channel);
+            a = vmin_u8(a, vdup_n_u8(params.quantized_activation_max));
+            a = vmax_u8(a, vdup_n_u8(params.quantized_activation_min));
+            vst1_u8(output_ptr + channel, a);
+          }
 #endif
-        for (; channel < depth; ++channel) {
-          uint8 a = acc[channel];
-          a = std::max<uint8>(a, params.quantized_activation_min);
-          a = std::min<uint8>(a, params.quantized_activation_max);
-          output_ptr[channel] = static_cast<uint8>(a);
+          for (; channel < tranche_depth; ++channel) {
+            uint8 a = acc[channel];
+            a = std::max<uint8>(a, params.quantized_activation_min);
+            a = std::min<uint8>(a, params.quantized_activation_max);
+            output_ptr[channel] = static_cast<uint8>(a);
+          }
         }
       }
     }
@@ -4346,119 +4518,6 @@
   }
 }
 
-template <int OutputIntegerBits, int InputIntegerBits>
-inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
-log_x_for_x_greater_than_or_equal_to_1_impl(
-    gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
-  // assert(__builtin_clz(0u) >= std::numeric_limits<uint32>::digits - 1);
-  // assert(__builtin_clz(0u) <= std::numeric_limits<uint32>::digits);
-  using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
-  // The reason for accumulating the result with an extra bit of headroom is
-  // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
-  // recip_denom will otherwise introduce an error.
-  static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
-  using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumIntegerBits>;
-
-  const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1488522236, std::log(2.0));
-  const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5)));
-  const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1518500250, std::sqrt(0.5));
-  const FixedPoint0 one_quarter =
-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0);
-
-  const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1057819769,
-      2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0)));
-
-  const FixedPointAccum shifted_quarter =
-      gemmlowp::Rescale<kAccumIntegerBits>(one_quarter);
-
-  // Reinterpret the input value as Q0.31, because we will figure out the
-  // required shift "ourselves" instead of using, say, Rescale.
-  FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
-  // z_a_pow_2 = input_integer_bits - z_a_headroom;
-  int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32>(z_a.raw()));
-  FixedPoint0 r_a_tmp =
-      SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
-  const int32 r_a_raw =
-      SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
-  // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
-  // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
-  //                   InputIntegerBits - z_b_headroom - 0.25);
-  const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
-          InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)),
-      shifted_quarter);
-
-  // z_b is treated like z_a, but premultiplying by sqrt(0.5).
-  FixedPoint0 z_b = z_a * sqrt_half;
-  int z_b_headroom = CountLeadingZeros(static_cast<uint32>(z_b.raw())) - 1;
-  const int32 r_b_raw =
-      SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
-  const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
-          InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)),
-      shifted_quarter);
-
-  const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
-  const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw(
-      std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw()));
-
-  const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half);
-  FixedPoint0 q = r - sqrt_sqrt_half;
-  q = q + q;
-
-  const FixedPoint0 common_sq = q * q;
-  const FixedPoint0 num = q * r + q * common_sq * alpha_n;
-  const FixedPoint0 denom_minus_one_0 =
-      p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q;
-  const FixedPoint0 recip_denom =
-      one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0);
-
-  const FixedPointAccum num_scaled = gemmlowp::Rescale<kAccumIntegerBits>(num);
-  return gemmlowp::Rescale<OutputIntegerBits>(z_pow_2_adj * log_2 +
-                                              num_scaled * recip_denom);
-}
-
-// Minimum output bits to accommodate log of maximum input range.  It actually
-// does not matter if one considers, say, [-64,64] or [-64,64).
-//
-// For example, run this through Octave:
-// [0:127; ...
-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
-constexpr int min_log_x_output_bits(int input_bits) {
-  return input_bits > 90
-             ? 7
-             : input_bits > 44
-                   ? 6
-                   : input_bits > 21
-                         ? 5
-                         : input_bits > 10
-                               ? 4
-                               : input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1;
-}
-
-template <int OutputIntegerBits, int InputIntegerBits>
-inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
-log_x_for_x_greater_than_or_equal_to_1(
-    gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
-  static_assert(
-      OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
-      "Output integer bits must be sufficent to accommodate logs of inputs.");
-  return log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
-                                                     InputIntegerBits>(
-      input_val);
-}
-
 // Currently just a copy of the reference code.
 inline void LogSoftmax(const SoftmaxParams& params,
                        const RuntimeShape& input_shape, const uint8* input_data,

diff --git a/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
index 7cc5679..c38f374 100644
--- a/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h

@@ -27,7 +27,7 @@
 // Used in tests and template parameters to control which version of depthwise
 // convolution is called. Primarily for reference code, and specializations
 // forced in tests.
-enum class DepthwiseConvInvocation {
+enum class DepthwiseConvImplementation {
   // Run all tests against kUseStandardEntry even if also testing another
   // kernel, since we need to be sure that the main DepthwiseConv() function in
   // optimized_ops.h dispatches to a correctly-executing kernel.
@@ -59,6 +59,7 @@
 };
 
 namespace reference_ops {
+namespace depthwise_conv {
 
 template <DepthwiseConvOutputRounding output_rounding>
 inline int32 DepthwiseConvRound(int32 x, int32 quantized_multiplier,
@@ -172,13 +173,15 @@
   }
 };
 
+}  // namespace depthwise_conv
+
 inline void DepthwiseConv(
     const DepthwiseParams& params, const RuntimeShape& input_shape,
     const uint8* input_data, const RuntimeShape& filter_shape,
     const uint8* filter_data, const RuntimeShape& bias_shape,
     const int32* bias_data, const RuntimeShape& output_shape,
     uint8* output_data) {
-  return DepthwiseConvBasicKernel<
+  return depthwise_conv::DepthwiseConvBasicKernel<
       DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
                                                        input_data, filter_shape,
                                                        filter_data, bias_shape,

diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/add.h b/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
new file mode 100644
index 0000000..a694ba2
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/add.h

@@ -0,0 +1,144 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
+
+#include <limits>
+#include "public/gemmlowp.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace reference_integer_ops {
+
+// Element-wise add that can often be used for inner loop of broadcast add as
+// well as the non-broadcast add.
+inline void AddElementwise(int size, const ArithmeticParams& params,
+                           const int8_t* input1_data, const int8_t* input2_data,
+                           int8_t* output_data) {
+  const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
+  TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
+  TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
+  TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
+  TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
+
+  for (int i = 0; i < size; ++i) {
+    const int32 input1_val = params.input1_offset + input1_data[i];
+    const int32 input2_val = params.input2_offset + input2_data[i];
+    const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
+    const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
+    const int32 scaled_input1_val =
+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
+            shifted_input1_val, params.input1_multiplier, params.input1_shift);
+    const int32 scaled_input2_val =
+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
+            shifted_input2_val, params.input2_multiplier, params.input2_shift);
+    const int32 raw_sum = scaled_input1_val + scaled_input2_val;
+    const int32 raw_output =
+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
+            raw_sum, params.output_multiplier, params.output_shift) +
+        params.output_offset;
+    const int32 clamped_output =
+        std::min(params.quantized_activation_max,
+                 std::max(params.quantized_activation_min, raw_output));
+    output_data[i] = static_cast<int8_t>(clamped_output);
+  }
+}
+
+inline void Add(const ArithmeticParams& params,
+                const RuntimeShape& input1_shape, const int8_t* input1_data,
+                const RuntimeShape& input2_shape, const int8_t* input2_data,
+                const RuntimeShape& output_shape, int8_t* output_data) {
+  TFLITE_DCHECK_LE(params.quantized_activation_min,
+                   params.quantized_activation_max);
+  const int flat_size =
+      MatchingFlatSize(input1_shape, input2_shape, output_shape);
+
+  const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
+  TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
+  TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
+  TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
+  TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
+  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
+}
+
+inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
+                               const RuntimeShape& input1_shape,
+                               const int8_t* input1_data,
+                               const RuntimeShape& input2_shape,
+                               const int8_t* input2_data,
+                               const RuntimeShape& output_shape,
+                               int8_t* output_data) {
+  gemmlowp::ScopedProfilingLabel label("BroadcastAdd4DSlow/int8");
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
+                                      &desc2);
+  const RuntimeShape extended_output_shape =
+      RuntimeShape::ExtendedShape(4, output_shape);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest stride,
+  // typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for the
+  // best cache behavior.
+  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
+    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
+      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
+        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
+          const int32_t input1_val =
+              params.input1_offset +
+              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
+          const int32_t input2_val =
+              params.input2_offset +
+              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
+          const int32_t shifted_input1_val =
+              input1_val * (1 << params.left_shift);
+          const int32_t shifted_input2_val =
+              input2_val * (1 << params.left_shift);
+          const int32_t scaled_input1_val =
+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
+                  shifted_input1_val, params.input1_multiplier,
+                  params.input1_shift);
+          const int32_t scaled_input2_val =
+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
+                  shifted_input2_val, params.input2_multiplier,
+                  params.input2_shift);
+          const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+          const int32_t raw_output =
+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
+                  raw_sum, params.output_multiplier, params.output_shift) +
+              params.output_offset;
+          const int32_t clamped_output =
+              std::min(params.quantized_activation_max,
+                       std::max(params.quantized_activation_min, raw_output));
+          output_data[Offset(extended_output_shape, b, y, x, c)] =
+              static_cast<int8_t>(clamped_output);
+        }
+      }
+    }
+  }
+}
+
+}  // namespace reference_integer_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_

diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
new file mode 100644
index 0000000..36b349f
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h

@@ -0,0 +1,69 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
+
+#include "tensorflow/lite/kernels/internal/common.h"
+
+namespace tflite {
+namespace reference_integer_ops {
+
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const int8_t* input_data, const RuntimeShape& filter_shape,
+    const int8_t* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    int8_t* output_data, void* gemm_context) {
+  (void)gemm_context;  // only used in optimized code.
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
+
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int batches = output_shape.Dims(0);
+  const int output_depth = output_shape.Dims(1);
+  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+  for (int b = 0; b < batches; ++b) {
+    for (int out_c = 0; out_c < output_depth; ++out_c) {
+      int32 acc = 0;
+      for (int d = 0; d < accum_depth; ++d) {
+        int32 input_val = input_data[b * accum_depth + d];
+        int32 filter_val = filter_data[out_c * accum_depth + d];
+        acc += (filter_val + filter_offset) * (input_val + input_offset);
+      }
+      if (bias_data) {
+        acc += bias_data[out_c];
+      }
+      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+      acc += output_offset;
+      acc = std::max(acc, output_activation_min);
+      acc = std::min(acc, output_activation_max);
+      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
+    }
+  }
+}
+
+}  // namespace reference_integer_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_

diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h b/tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h
new file mode 100644
index 0000000..f22bb4f
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h

@@ -0,0 +1,111 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOG_SOFTMAX_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOG_SOFTMAX_H_
+
+#include "tensorflow/lite/kernels/internal/common.h"
+
+namespace tflite {
+namespace reference_integer_ops {
+
+inline void LogSoftmax(int32_t input_multiplier, int32_t input_shift,
+                       int32_t reverse_multiplier, int32_t reverse_shift,
+                       int32_t diff_min, int32_t outer_size, int32_t depth,
+                       const int8* input_data, int8* output_data) {
+  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
+  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
+  static constexpr int32_t kMinInt32 = std::numeric_limits<int32_t>::min();
+
+  // [-16, 0] is mapped to [-128, 127] with 1/16 as scale and 127 as zero
+  // point. This nudges the output to [-255/16, 0].
+  static constexpr int32_t kOutputZeroPoint = 127;
+
+  // All IntegerBits must agree with Prepare function.
+  // Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible.
+  static constexpr int kInputIntegerBits = 5;
+  static constexpr int kAccumulationIntegerBits = 12;
+  static constexpr int kOutputIntegerBits = 4;
+  using F5 = gemmlowp::FixedPoint<int32, kInputIntegerBits>;
+  using F12 = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>;
+
+  for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
+    int8 max_in_row = kMinInt8;
+    for (int inner_index = 0; inner_index < depth; ++inner_index) {
+      max_in_row =
+          std::max(max_in_row, input_data[outer_index * depth + inner_index]);
+    }
+
+    // Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps.
+    F12 sum_of_exps_in_q12 = F12::FromRaw(0);
+    for (int inner_index = 0; inner_index < depth; ++inner_index) {
+      int32_t input_diff =
+          static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
+          max_in_row;
+      if (input_diff >= diff_min) {
+        const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
+            input_diff, input_multiplier, input_shift);
+        sum_of_exps_in_q12 =
+            sum_of_exps_in_q12 +
+            gemmlowp::Rescale<kAccumulationIntegerBits>(
+                exp_on_negative_values(F5::FromRaw(input_diff_in_q5)));
+      }
+    }
+
+    const int32_t log_sum_of_exps_in_q5 =
+        log_x_for_x_greater_than_or_equal_to_1<kInputIntegerBits>(
+            sum_of_exps_in_q12)
+            .raw();
+
+    // Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is
+    // smallest representable in Q5.26 plus the log_sum_of_exps.
+    const int32_t shifted_log_sum_of_exps_in_q5 =
+        log_sum_of_exps_in_q5 + kMinInt32;
+    const int32_t adjusted_diff_min = std::max(
+        diff_min - 1,
+        MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5,
+                                      reverse_multiplier, -reverse_shift));
+
+    for (int inner_index = 0; inner_index < depth; ++inner_index) {
+      int32_t input_diff =
+          static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
+          max_in_row;
+      // Note use of > below instead of >= above.
+      if (input_diff > adjusted_diff_min) {
+        const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
+            input_diff, input_multiplier, input_shift);
+
+        // Rescale and downcast.
+        int32_t output_in_q27 =
+            gemmlowp::RoundingDivideByPOT(
+                (input_diff_in_q5 - log_sum_of_exps_in_q5),
+                31 - kInputIntegerBits - kOutputIntegerBits) +
+            kOutputZeroPoint;
+
+        output_in_q27 =
+            std::max(std::min(output_in_q27, static_cast<int32_t>(kMaxInt8)),
+                     static_cast<int32_t>(kMinInt8));
+        output_data[outer_index * depth + inner_index] =
+            static_cast<int8_t>(output_in_q27);
+      } else {
+        output_data[outer_index * depth + inner_index] = kMinInt8;
+      }
+    }
+  }
+}
+
+}  // namespace reference_integer_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOG_SOFTMAX_H_

diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h b/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
new file mode 100644
index 0000000..8277c3b
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h

@@ -0,0 +1,64 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
+
+#include <limits>
+#include "tensorflow/lite/kernels/internal/common.h"
+
+namespace tflite {
+namespace reference_integer_ops {
+
+inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
+                     int32_t input_multiplier, int32_t input_left_shift,
+                     int32_t input_size, const int8_t* input_data,
+                     int8_t* output_data) {
+  // Integer bits must be in sync with Prepare() function.
+  static constexpr int32_t kInputIntegerBits = 4;
+  static constexpr int32_t kOutputIntegerBits = 8;
+  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
+  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
+  static constexpr int32_t kOutputZeroPoint = -128;
+
+  for (int i = 0; i < input_size; ++i) {
+    const int32_t input =
+        static_cast<int32_t>(input_data[i]) - input_zero_point;
+    if (input <= -input_range_radius) {
+      output_data[i] = kMinInt8;
+    } else if (input >= input_range_radius) {
+      output_data[i] = kMaxInt8;
+    } else {
+      const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
+          input, input_multiplier, input_left_shift);
+      using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
+      const int32_t output_in_q0 =
+          gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
+
+      // Rescale and downcast.
+      using gemmlowp::RoundingDivideByPOT;
+      int32_t output_in_q23 =
+          RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
+      output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
+                                        static_cast<int32_t>(kMinInt8)),
+                               static_cast<int32_t>(kMaxInt8));
+      output_data[i] = static_cast<int8_t>(output_in_q23);
+    }
+  }
+}
+
+}  // namespace reference_integer_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_

diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h b/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
new file mode 100644
index 0000000..5e33d08
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h

@@ -0,0 +1,130 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
+
+#include "public/gemmlowp.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+
+namespace tflite {
+namespace reference_integer_ops {
+
+inline void MulElementwise(int size, const ArithmeticParams& params,
+                           const int8_t* input1_data, const int8_t* input2_data,
+                           int8_t* output_data) {
+  for (int i = 0; i < size; ++i) {
+    const int32 input1_val = params.input1_offset + input1_data[i];
+    const int32 input2_val = params.input2_offset + input2_data[i];
+    const int32 unclamped_result =
+        params.output_offset +
+        MultiplyByQuantizedMultiplierSmallerThanOneExp(input1_val * input2_val,
+                                                       params.output_multiplier,
+                                                       params.output_shift);
+    const int32 clamped_output =
+        std::min(params.quantized_activation_max,
+                 std::max(params.quantized_activation_min, unclamped_result));
+    output_data[i] = static_cast<int8_t>(clamped_output);
+  }
+}
+
+inline void Mul(const ArithmeticParams& params,
+                const RuntimeShape& input1_shape, const int8_t* input1_data,
+                const RuntimeShape& input2_shape, const int8_t* input2_data,
+                const RuntimeShape& output_shape, int8_t* output_data) {
+  TFLITE_DCHECK_LE(params.quantized_activation_min,
+                   params.quantized_activation_max);
+  gemmlowp::ScopedProfilingLabel label("Mul/8bit");
+  const int flat_size =
+      MatchingFlatSize(input1_shape, input2_shape, output_shape);
+
+  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
+}
+
+// Mul with 16 bit inputs and int8_t outputs.
+inline void Mul(const ArithmeticParams& params,
+                const RuntimeShape& input1_shape, const int16* input1_data,
+                const RuntimeShape& input2_shape, const int16* input2_data,
+                const RuntimeShape& output_shape, int8_t* output_data) {
+  gemmlowp::ScopedProfilingLabel label("Mul/Int16Int8");
+  int32 output_offset = params.output_offset;
+  int32 output_activation_min = params.quantized_activation_min;
+  int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+  const int flat_size =
+      MatchingFlatSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++) {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result =
+        F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    int16 rescaled_result =
+        gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+    int16 clamped_result =
+        std::min<int16>(output_activation_max - output_offset, rescaled_result);
+    clamped_result =
+        std::max<int16>(output_activation_min - output_offset, clamped_result);
+    output_data[i] = output_offset + clamped_result;
+  }
+}
+
+inline void BroadcastMul4DSlow(const ArithmeticParams& params,
+                               const RuntimeShape& input1_shape,
+                               const int8_t* input1_data,
+                               const RuntimeShape& input2_shape,
+                               const int8_t* input2_data,
+                               const RuntimeShape& output_shape,
+                               int8_t* output_data) {
+  gemmlowp::ScopedProfilingLabel label("BroadcastMul4DSlow/8bit");
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  // The input shapes are extended as part of NdArrayDesc initialization.
+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
+                                      &desc2);
+  const RuntimeShape extended_output_shape =
+      RuntimeShape::ExtendedShape(4, output_shape);
+
+  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
+    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
+      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
+        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
+          const int32 input1_val =
+              params.input1_offset +
+              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
+          const int32 input2_val =
+              params.input2_offset +
+              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
+          const int32 unclamped_result =
+              params.output_offset +
+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
+                  input1_val * input2_val, params.output_multiplier,
+                  params.output_shift);
+          const int32 clamped_output = std::min(
+              params.quantized_activation_max,
+              std::max(params.quantized_activation_min, unclamped_result));
+          output_data[Offset(extended_output_shape, b, y, x, c)] =
+              static_cast<int8_t>(clamped_output);
+        }
+      }
+    }
+  }
+}
+
+}  // namespace reference_integer_ops
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_

diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h b/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
index 22750bc..2762bec 100644
--- a/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h

@@ -15,6 +15,7 @@
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
 
+#include <limits>
 #include "tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
@@ -77,6 +78,63 @@
   }
 }
 
+inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
+                    const int8* input_data, const RuntimeShape& output_shape,
+                    int8* output_data) {
+  TFLITE_DCHECK_LE(params.quantized_activation_min,
+                   params.quantized_activation_max);
+  TFLITE_DCHECK_GE(params.quantized_activation_min,
+                   std::numeric_limits<int8_t>::min());
+  TFLITE_DCHECK_LE(params.quantized_activation_max,
+                   std::numeric_limits<int8_t>::max());
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+  for (int batch = 0; batch < batches; ++batch) {
+    for (int out_y = 0; out_y < output_height; ++out_y) {
+      for (int out_x = 0; out_x < output_width; ++out_x) {
+        for (int channel = 0; channel < depth; ++channel) {
+          const int in_x_origin =
+              (out_x * stride_width) - params.padding_values.width;
+          const int in_y_origin =
+              (out_y * stride_height) - params.padding_values.height;
+          // Compute the boundaries of the filter region clamped so as to
+          // ensure that the filter window fits in the input array.
+          const int filter_x_start = std::max(0, -in_x_origin);
+          const int filter_x_end =
+              std::min(params.filter_width, input_width - in_x_origin);
+          const int filter_y_start = std::max(0, -in_y_origin);
+          const int filter_y_end =
+              std::min(params.filter_height, input_height - in_y_origin);
+          int8_t max = std::numeric_limits<int8_t>::lowest();
+          for (int filter_y = filter_y_start; filter_y < filter_y_end;
+               ++filter_y) {
+            for (int filter_x = filter_x_start; filter_x < filter_x_end;
+                 ++filter_x) {
+              const int in_x = in_x_origin + filter_x;
+              const int in_y = in_y_origin + filter_y;
+              max = std::max(
+                  max,
+                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
+            }
+          }
+          max = std::max<int8_t>(max, params.quantized_activation_min);
+          max = std::min<int8_t>(max, params.quantized_activation_max);
+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
+              static_cast<int8_t>(max);
+        }
+      }
+    }
+  }
+}
+
 }  // namespace reference_integer_ops
 }  // namespace tflite
 

diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
new file mode 100644
index 0000000..081928b
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h

@@ -0,0 +1,63 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
+
+#include <limits>
+#include "tensorflow/lite/kernels/internal/common.h"
+
+namespace tflite {
+namespace reference_integer_ops {
+
+inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
+                 int32_t input_multiplier, int32_t input_shift,
+                 int32_t input_size, const int8_t* input_data,
+                 int8_t* output_data) {
+  // Integer bits must be in sync with Prepare() function.
+  static constexpr int32_t kInputIntegerBits = 4;
+  static constexpr int32_t kOutputScale = 7;
+  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
+  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
+  using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
+
+  for (int i = 0; i < input_size; ++i) {
+    const int32_t input =
+        static_cast<int32_t>(input_data[i]) - input_zero_point;
+    if (input <= -input_range_radius) {
+      output_data[i] = kMinInt8;
+    } else if (input >= input_range_radius) {
+      output_data[i] = kMaxInt8;
+    } else {
+      const int32_t input_in_q4 =
+          MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
+      const int32_t output_in_q0 =
+          gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
+
+      // Rescale and downcast.
+      using gemmlowp::RoundingDivideByPOT;
+      int32_t output_in_q24 =
+          RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
+      output_in_q24 =
+          std::min(std::max(output_in_q24, static_cast<int32_t>(kMinInt8)),
+                   static_cast<int32_t>(kMaxInt8));
+      output_data[i] = static_cast<int8_t>(output_in_q24);
+    }
+  }
+}
+
+}  // namespace reference_integer_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_

diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h
index 84f62b1..66f18d2 100644
--- a/tensorflow/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h

@@ -36,68 +36,6 @@
 
 namespace tflite {
 
-// TODO(b/77858996): Add these to gemmlowp.
-template <typename IntegerType>
-IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) {
-  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
-  return a;
-}
-
-template <>
-inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) {
-  std::int64_t a64 = a;
-  std::int64_t b64 = b;
-  std::int64_t sum = a64 + b64;
-  return static_cast<std::int32_t>(std::min(
-      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
-      std::max(
-          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
-          sum)));
-}
-
-template <typename tRawType, int tIntegerBits>
-gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp(
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
-      SaturatingAddNonGemmlowp(a.raw(), b.raw()));
-}
-
-template <typename IntegerType>
-IntegerType SaturatingSub(IntegerType a, IntegerType b) {
-  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
-  return a;
-}
-
-template <>
-inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) {
-  std::int32_t a32 = a;
-  std::int32_t b32 = b;
-  std::int32_t diff = a32 - b32;
-  return static_cast<std::int16_t>(std::min(32767, std::max(-32768, diff)));
-}
-
-template <>
-inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) {
-  std::int64_t a64 = a;
-  std::int64_t b64 = b;
-  std::int64_t diff = a64 - b64;
-  return static_cast<std::int32_t>(std::min(
-      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
-      std::max(
-          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
-          diff)));
-}
-
-template <typename tRawType, int tIntegerBits>
-gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
-      SaturatingSub(a.raw(), b.raw()));
-}
-// End section to be moved to gemmlowp.
-
 namespace reference_ops {
 
 // Return true for broadcast case, false otherwise.
@@ -192,59 +130,6 @@
   return true;
 }
 
-template <typename T>
-int CountLeadingZeros(T integer_input) {
-  static_assert(std::is_unsigned<T>::value,
-                "Only unsigned integer types handled.");
-  if (integer_input == 0) {
-    return std::numeric_limits<T>::digits;
-  }
-  const T one_in_leading_positive = static_cast<T>(1)
-                                    << (std::numeric_limits<T>::digits - 1);
-  int leading_zeros = 0;
-  while (integer_input < one_in_leading_positive) {
-    integer_input <<= 1;
-    ++leading_zeros;
-  }
-  return leading_zeros;
-}
-
-template <typename IntegerType>
-IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) {
-  if (exponent == 0) {
-    return x;
-  }
-  using ScalarIntegerType =
-      typename gemmlowp::FixedPointRawTypeTraits<IntegerType>::ScalarRawType;
-  const IntegerType min =
-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::min());
-  const IntegerType max =
-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::max());
-  const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType);
-
-  const std::int32_t threshold =
-      ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1);
-  const IntegerType positive_mask =
-      gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup<IntegerType>(threshold));
-  const IntegerType negative_mask =
-      gemmlowp::MaskIfLessThan(x, gemmlowp::Dup<IntegerType>(-threshold));
-
-  IntegerType result = gemmlowp::ShiftLeft(x, exponent);
-  result = gemmlowp::SelectUsingMask(positive_mask, max, result);
-  result = gemmlowp::SelectUsingMask(negative_mask, min, result);
-  return result;
-}
-
-// If we want to leave IntegerBits fixed, then multiplication
-// by a power of two has to be saturating/rounding, not exact anymore.
-template <typename tRawType, int tIntegerBits>
-gemmlowp::FixedPoint<tRawType, tIntegerBits>
-SaturatingRoundingMultiplyByPOTParam(
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) {
-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
-      SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
-}
-
 inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
                  const float* input_data, const RuntimeShape& filter_shape,
                  const float* filter_data, const RuntimeShape& bias_shape,
@@ -543,16 +428,17 @@
   }
 }
 
+template <typename T>
 inline void ReluX(const tflite::ActivationParams& params,
-                  const RuntimeShape& input_shape, const uint8* input_data,
-                  const RuntimeShape& output_shape, uint8* output_data) {
+                  const RuntimeShape& input_shape, const T* input_data,
+                  const RuntimeShape& output_shape, T* output_data) {
   gemmlowp::ScopedProfilingLabel label("Quantized ReluX (not fused)");
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  const uint8 max_value = params.quantized_activation_max;
-  const uint8 min_value = params.quantized_activation_min;
+  const T max_value = params.quantized_activation_max;
+  const T min_value = params.quantized_activation_min;
   for (int i = 0; i < flat_size; ++i) {
-    const uint8 val = input_data[i];
-    const uint8 clamped =
+    const T val = input_data[i];
+    const T clamped =
         val > max_value ? max_value : val < min_value ? min_value : val;
     output_data[i] = clamped;
   }
@@ -1901,11 +1787,17 @@
   const int outputs_count = params.num_split;
 
   int outer_size = 1;
-  for (int i = 0; i < params.axis; i++) {
+  int axis = params.axis;
+  if (axis < 0) {
+    axis += dimensions;
+  }
+  TFLITE_DCHECK_GE(axis, 0);
+  TFLITE_DCHECK_LT(axis, dimensions);
+  for (int i = 0; i < axis; ++i) {
     outer_size *= input_shape.Dims(i);
   }
   int copy_size = 1;
-  for (int i = params.axis + 1; i < dimensions; i++) {
+  for (int i = axis + 1; i < dimensions; ++i) {
     copy_size *= input_shape.Dims(i);
   }
   TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
@@ -2745,121 +2637,6 @@
   }
 }
 
-// Although currently the name of this function says that it cannot handle
-// values less than 1, in practice it can handle as low as 1/x_max, where
-// x_max is the largest representable input.  In other words, the output range
-// is symmetric.
-template <int OutputIntegerBits, int InputIntegerBits>
-inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
-log_x_for_x_greater_than_or_equal_to_1_impl(
-    gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
-  using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
-  // The reason for accumulating the result with an extra bit of headroom is
-  // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
-  // recip_denom will otherwise introduce an error.
-  static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
-  using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumIntegerBits>;
-
-  const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1488522236, std::log(2.0));
-  const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5)));
-  const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1518500250, std::sqrt(0.5));
-  const FixedPoint0 one_quarter =
-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0);
-
-  const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1057819769,
-      2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0)));
-
-  const FixedPointAccum shifted_quarter =
-      gemmlowp::Rescale<kAccumIntegerBits>(one_quarter);
-
-  // Reinterpret the input value as Q0.31, because we will figure out the
-  // required shift "ourselves" instead of using, say, Rescale.
-  FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
-  // z_a_pow_2 = input_integer_bits - z_a_headroom;
-  int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32>(z_a.raw()));
-  FixedPoint0 r_a_tmp =
-      SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
-  const int32 r_a_raw =
-      SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
-  // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
-  // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
-  //                   InputIntegerBits - z_b_headroom - 0.25);
-  const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
-          InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)),
-      shifted_quarter);
-
-  // z_b is treated like z_a, but premultiplying by sqrt(0.5).
-  FixedPoint0 z_b = z_a * sqrt_half;
-  int z_b_headroom = CountLeadingZeros(static_cast<uint32>(z_b.raw())) - 1;
-  const int32 r_b_raw =
-      SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
-  const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
-          InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)),
-      shifted_quarter);
-
-  const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
-  const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw(
-      std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw()));
-
-  const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half);
-  FixedPoint0 q = r - sqrt_sqrt_half;
-  q = q + q;
-
-  const FixedPoint0 common_sq = q * q;
-  const FixedPoint0 num = q * r + q * common_sq * alpha_n;
-  const FixedPoint0 denom_minus_one_0 =
-      p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q;
-  const FixedPoint0 recip_denom =
-      one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0);
-
-  const FixedPointAccum num_scaled = gemmlowp::Rescale<kAccumIntegerBits>(num);
-  return gemmlowp::Rescale<OutputIntegerBits>(z_pow_2_adj * log_2 +
-                                              num_scaled * recip_denom);
-}
-
-// Minimum output bits to accommodate log of maximum input range.  It actually
-// does not matter if one considers, say, [-64,64] or [-64,64).
-//
-// For example, run this through Octave:
-// [0:127; ...
-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
-constexpr int min_log_x_output_bits(int input_bits) {
-  return input_bits > 90
-             ? 7
-             : input_bits > 44
-                   ? 6
-                   : input_bits > 21
-                         ? 5
-                         : input_bits > 10
-                               ? 4
-                               : input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1;
-}
-
-template <int OutputIntegerBits, int InputIntegerBits>
-inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
-log_x_for_x_greater_than_or_equal_to_1(
-    gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
-  static_assert(
-      OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
-      "Output integer bits must be sufficent to accommodate logs of inputs.");
-  return log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
-                                                     InputIntegerBits>(
-      input_val);
-}
-
 inline void LogSoftmax(const SoftmaxParams& params,
                        const RuntimeShape& input_shape, const uint8* input_data,
                        const RuntimeShape& output_shape, uint8* output_data) {
@@ -3233,6 +3010,43 @@
   }
 }
 
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape& params_shape,
+                     const ParamsT* params_data,
+                     const RuntimeShape& indices_shape,
+                     const IndicesT* indices_data,
+                     const RuntimeShape& output_shape, ParamsT* output_data) {
+  gemmlowp::ScopedProfilingLabel label("GatherNd");
+
+  int n_slices = 1;
+  int slice_size = 1;
+  const int indices_dims = indices_shape.DimensionsCount();
+  const int indices_nd = indices_shape.Dims(indices_dims - 1);
+  const int params_dims = params_shape.DimensionsCount();
+  for (int i = 0; i < indices_dims - 1; ++i) {
+    n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = indices_nd; i < params_dims; ++i) {
+    slice_size *= params_shape.Dims(i);
+  }
+
+  int remain_flat_size = params_shape.FlatSize();
+  std::vector<int> dims_to_count(indices_nd, 0);
+  for (int i = 0; i < indices_nd; ++i) {
+    dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+    remain_flat_size = dims_to_count[i];
+  }
+
+  for (int i = 0; i < n_slices; ++i) {
+    int from_pos = 0;
+    for (int j = 0; j < indices_nd; ++j) {
+      from_pos += indices_data[i * indices_nd + j] * dims_to_count[j];
+    }
+    std::memcpy(output_data + i * slice_size, params_data + from_pos,
+                sizeof(ParamsT) * slice_size);
+  }
+}
+
 template <typename T>
 inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
                            const RuntimeShape& unextended_input_shape,
@@ -4548,6 +4362,34 @@
   }
 }
 
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape& input_condition_shape,
+                      const D* input_condition_data, T* output_data) {
+  const size_t size = input_condition_shape.FlatSize();
+  const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+  std::vector<int> dims_to_count(cond_rank, 0);
+  int cur_flat_size = size;
+  for (int i = 0; i < cond_rank; ++i) {
+    dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+    cur_flat_size = dims_to_count[i];
+  }
+
+  int output_index = 0;
+  for (int i = 0; i < size; ++i) {
+    if (input_condition_data[i]) {
+      // Insert the coordinate of the current item (row major) into output.
+      int flat_index = i;
+      for (int j = 0; j < cond_rank; ++j) {
+        int coord_j = flat_index / dims_to_count[j];
+        output_data[output_index * cond_rank + j] = coord_j;
+        flat_index %= dims_to_count[j];
+      }
+      output_index++;
+    }
+  }
+}
+
 // For easy implementation, the indices is always a vector of size-4 vectors.
 template <typename T, typename TI>
 inline void SparseToDense(const std::vector<std::vector<TI>>& indices,

diff --git a/tensorflow/lite/kernels/kernel_util.cc b/tensorflow/lite/kernels/kernel_util.cc
index eafc1e4..d6984e9 100644
--- a/tensorflow/lite/kernels/kernel_util.cc
+++ b/tensorflow/lite/kernels/kernel_util.cc

@@ -149,6 +149,9 @@
   if (output->type == kTfLiteUInt8) {
     qmin = std::numeric_limits<uint8_t>::min();
     qmax = std::numeric_limits<uint8_t>::max();
+  } else if (output->type == kTfLiteInt8) {
+    qmin = std::numeric_limits<int8_t>::min();
+    qmax = std::numeric_limits<int8_t>::max();
   } else if (output->type == kTfLiteInt16) {
     qmin = std::numeric_limits<int16_t>::min();
     qmax = std::numeric_limits<int16_t>::max();

diff --git a/tensorflow/lite/kernels/maximum_minimum.cc b/tensorflow/lite/kernels/maximum_minimum.cc
index 3bcaabf..0e15254 100644
--- a/tensorflow/lite/kernels/maximum_minimum.cc
+++ b/tensorflow/lite/kernels/maximum_minimum.cc

@@ -108,6 +108,9 @@
       case kTfLiteUInt8:
         TFLiteOperation<uint8_t, OpType>(context, node, op_context);
         break;
+      case kTfLiteInt8:
+        TFLiteOperation<int8_t, OpType>(context, node, op_context);
+        break;
       case kTfLiteInt32:
        TFLiteOperation<int32_t, OpType>(context, node, op_context);
         break;

diff --git a/tensorflow/lite/kernels/maximum_minimum_test.cc b/tensorflow/lite/kernels/maximum_minimum_test.cc
index acb74e0..6567c8f 100644
--- a/tensorflow/lite/kernels/maximum_minimum_test.cc
+++ b/tensorflow/lite/kernels/maximum_minimum_test.cc

@@ -112,6 +112,17 @@
                      {0, 0, 1, 11, 2, 1});
 }
 
+TEST(MaxMinOpTest, Int8Test) {
+  std::initializer_list<int8_t> data1 = {1, 0, 2, 11, 2, 23};
+  std::initializer_list<int8_t> data2 = {0, 0, 1, 12, 123, 1};
+  TestModel<int8_t>(BuiltinOperator_MAXIMUM, {TensorType_INT8, {3, 1, 2}},
+                    {TensorType_INT8, {3, 1, 2}}, {TensorType_INT8, {3, 1, 2}},
+                    data1, data2, {1, 0, 2, 12, 123, 23});
+  TestModel<int8_t>(BuiltinOperator_MINIMUM, {TensorType_INT8, {3, 1, 2}},
+                    {TensorType_INT8, {3, 1, 2}}, {TensorType_INT8, {3, 1, 2}},
+                    data1, data2, {0, 0, 1, 11, 2, 1});
+}
+
 TEST(MaximumOpTest, FloatWithBroadcastTest) {
   std::initializer_list<float> data1 = {1.0, 0.0, -1.0, -2.0, -1.44, 11.0};
   std::initializer_list<float> data2 = {0.5, 2.0};

diff --git a/tensorflow/lite/kernels/mirror_pad.cc b/tensorflow/lite/kernels/mirror_pad.cc
index 65a98ef..c0924da 100644
--- a/tensorflow/lite/kernels/mirror_pad.cc
+++ b/tensorflow/lite/kernels/mirror_pad.cc

@@ -37,10 +37,6 @@
   // Note: This is not owned by default. It will point to the value
   // in the input tensor.
   const void* value = nullptr;
-  // The start index of the values of this tensor in the output buffer.
-  int start = -1;
-  // The end index of the values of this tensor in the output buffer.
-  int end = -1;
   // If this tensor is not one value, then this vector will have
   // all the tensors that belongs to this tensor.
   // Pointers are not owned.
@@ -70,6 +66,8 @@
 struct OpData {
   // Holds intermediate data structure of the padded tensor.
   std::vector<PaddedTensor> pad_tensor_buffer;
+  // Total number of intermediate elements in the pad_tensor_buffer.
+  int num_elements;
 };
 
 // Util method to initialize the memory of the padded tensor.
@@ -81,7 +79,6 @@
   // nodes in the next level, and swap while moving on dimensions of the tensor.
   std::vector<PaddedTensor*> current_nodes, next_level;
   current_nodes.push_back(&(*padded_tensor_buffer)[element_index]);
-  current_nodes[0]->start = current_nodes[0]->end = -1;
   element_index++;
   int next_level_size = 1;
   while (!current_nodes.empty() && dimension_index < dims_size) {
@@ -93,7 +90,6 @@
       padded_tensor->values.resize(dims->data[dimension_index]);
       for (int i = 0; i < dims->data[dimension_index]; ++i) {
         padded_tensor->values[i] = &(*padded_tensor_buffer)[element_index];
-        padded_tensor->values[i]->start = padded_tensor->values[i]->end = -1;
         next_level[index++] = padded_tensor->values[i];
         element_index++;
       }
@@ -242,40 +238,25 @@
 // Fills 'output_data' with data from 'padded_tensor'.
 // The function does this recursively by setting left padding first then
 // original data, followed by the right padding.
-// The functions returns the index in 'output_data' to be filled with data.
 template <typename T>
-int FillOutput(PaddedTensor* padded_tensor, T* output_data,
+int FillOutput(const PaddedTensor* padded_tensor, T* output_data,
                int index_in_output) {
   if (padded_tensor == nullptr || output_data == nullptr) {
     return -1;
   }
-  // Check if this tensor value was computed and written in the output
-  // already. If yes, just copy the values.
-  if (padded_tensor->start != -1) {
-    const int size = padded_tensor->end - padded_tensor->start + 1;
-    memcpy(output_data + index_in_output, output_data + padded_tensor->start,
-           size * sizeof(T));
-    return index_in_output + size;
-  }
-  // Record the start index in the output.
-  padded_tensor->start = index_in_output;
-  // Check for single value.
   if (padded_tensor->value != nullptr) {
     output_data[index_in_output] = *static_cast<const T*>(padded_tensor->value);
-    padded_tensor->end = index_in_output;
     return index_in_output + 1;
   }
-  for (auto* tensor : padded_tensor->left_pad_ptrs) {
+  for (const auto* tensor : padded_tensor->left_pad_ptrs) {
     index_in_output = FillOutput(tensor, output_data, index_in_output);
   }
-  for (auto& tensor : padded_tensor->values) {
+  for (const auto& tensor : padded_tensor->values) {
     index_in_output = FillOutput(tensor, output_data, index_in_output);
   }
-  for (auto* tensor : padded_tensor->right_pad_ptrs) {
+  for (const auto* tensor : padded_tensor->right_pad_ptrs) {
     index_in_output = FillOutput(tensor, output_data, index_in_output);
   }
-  // Record the end index in the output.
-  padded_tensor->end = index_in_output - 1;
   return index_in_output;
 }
 
@@ -392,6 +373,7 @@
     num_elements += extra_nodes;
   }
   op_data->pad_tensor_buffer.resize(num_elements);
+  op_data->num_elements = num_elements;
 
   if (!IsConstantTensor(padding_matrix)) {
     SetTensorToDynamic(output_tensor);

diff --git a/tensorflow/lite/kernels/mirror_pad_test.cc b/tensorflow/lite/kernels/mirror_pad_test.cc
index 91e48fa..fd09e6e 100644
--- a/tensorflow/lite/kernels/mirror_pad_test.cc
+++ b/tensorflow/lite/kernels/mirror_pad_test.cc

@@ -185,18 +185,5 @@
   EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 2, 3, 3, 2}));
 }
 
-TEST(MirrorPadTest, Pad_1D_Symmetric_Multiple_Invoke) {
-  BaseMirrorPadOpModel<int> model(
-      {TensorType_INT32, {3}}, {TensorType_INT32, {1, 2}},
-      {TensorType_INT32, {}}, tflite::MirrorPadMode_SYMMETRIC);
-  model.PopulateTensor<int>(model.input_tensor_id(), {1, 2, 3});
-  model.PopulateTensor<int>(model.padding_matrix_tensor_id(), {0, 2});
-  model.Invoke();
-  EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 2, 3, 3, 2}));
-  model.PopulateTensor<int>(model.input_tensor_id(), {4, 5, 6});
-  model.Invoke();
-  EXPECT_THAT(model.GetOutput(), ElementsAreArray({4, 5, 6, 6, 5}));
-}
-
 }  // namespace
 }  // namespace tflite

diff --git a/tensorflow/lite/kernels/mul.cc b/tensorflow/lite/kernels/mul.cc
index 01039a7..e0ff672 100644
--- a/tensorflow/lite/kernels/mul.cc
+++ b/tensorflow/lite/kernels/mul.cc

@@ -12,6 +12,7 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/c_api_internal.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
@@ -87,8 +88,14 @@
                                   &data->output_activation_min,
                                   &data->output_activation_max);
   }
+  if (output->type == kTfLiteInt8) {
+    CalculateActivationRangeInt8(params->activation, output,
+                                 &data->output_activation_min,
+                                 &data->output_activation_max);
+  }
 
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
+  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
+      output->type == kTfLiteInt16) {
     double real_multiplier =
         input1->params.scale * input2->params.scale / output->params.scale;
     QuantizeMultiplierSmallerThanOneExp(
@@ -151,8 +158,8 @@
                            TfLiteMulParams* params, const OpData* data,
                            const TfLiteTensor* input1,
                            const TfLiteTensor* input2, TfLiteTensor* output) {
-  if (input1->type == kTfLiteUInt8 && input2->type == kTfLiteUInt8 &&
-      output->type == kTfLiteUInt8) {
+  if (input1->type == input2->type && input1->type == output->type &&
+      (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8)) {
     tflite::ArithmeticParams op_params;
     SetActivationParams(data->output_activation_min,
                         data->output_activation_max, &op_params);
@@ -163,23 +170,31 @@
     op_params.output_shift = data->output_shift;
     bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
         GetTensorShape(input1), GetTensorShape(input2), &op_params);
-#define TF_LITE_MUL(type, opname)                                      \
-  type::opname(op_params, GetTensorShape(input1),                      \
-               GetTensorData<uint8_t>(input1), GetTensorShape(input2), \
-               GetTensorData<uint8_t>(input2), GetTensorShape(output), \
-               GetTensorData<uint8_t>(output))
-
-    if (kernel_type == kReference) {
+#define TF_LITE_MUL(type, opname, dtype)                             \
+  type::opname(op_params, GetTensorShape(input1),                    \
+               GetTensorData<dtype>(input1), GetTensorShape(input2), \
+               GetTensorData<dtype>(input2), GetTensorShape(output), \
+               GetTensorData<dtype>(output))
+    if (input1->type == kTfLiteInt8) {
       if (need_broadcast) {
-        TF_LITE_MUL(reference_ops, BroadcastMul4DSlow);
+        TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
       } else {
-        TF_LITE_MUL(reference_ops, Mul);
+        TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
       }
     } else {
-      if (need_broadcast) {
-        TF_LITE_MUL(optimized_ops, BroadcastMulFivefold);
+      // type == kTfLiteUInt8
+      if (kernel_type == kReference) {
+        if (need_broadcast) {
+          TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
+        } else {
+          TF_LITE_MUL(reference_ops, Mul, uint8_t);
+        }
       } else {
-        TF_LITE_MUL(optimized_ops, Mul);
+        if (need_broadcast) {
+          TF_LITE_MUL(optimized_ops, BroadcastMulFivefold, uint8_t);
+        } else {
+          TF_LITE_MUL(optimized_ops, Mul, uint8_t);
+        }
       }
     }
 #undef TF_LITE_MUL
@@ -198,8 +213,8 @@
     }
 #undef TF_LITE_MUL
   } else if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
-             output->type == kTfLiteUInt8) {
-#define TF_LITE_MUL(type, opname)                                      \
+             (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8)) {
+#define TF_LITE_MUL(type, opname, output_dtype)                        \
   tflite::ArithmeticParams op_params;                                  \
   SetActivationParams(data->output_activation_min,                     \
                       data->output_activation_max, &op_params);        \
@@ -207,11 +222,15 @@
   type::opname(op_params, GetTensorShape(input1),                      \
                GetTensorData<int16_t>(input1), GetTensorShape(input2), \
                GetTensorData<int16_t>(input2), GetTensorShape(output), \
-               GetTensorData<uint8_t>(output))
-    if (kernel_type == kReference) {
-      TF_LITE_MUL(reference_ops, Mul);
+               GetTensorData<output_dtype>(output))
+    if (output->type == kTfLiteInt8) {
+      TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
     } else {
-      TF_LITE_MUL(optimized_ops, Mul);
+      if (kernel_type == kReference) {
+        TF_LITE_MUL(reference_ops, Mul, uint8_t);
+      } else {
+        TF_LITE_MUL(optimized_ops, Mul, uint8_t);
+      }
     }
 #undef TF_LITE_MUL
   } else {
@@ -233,14 +252,15 @@
 
   if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
     EvalMul<kernel_type>(context, node, params, data, input1, input2, output);
-  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
+  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
+             output->type == kTfLiteInt16) {
     TF_LITE_ENSURE_OK(
         context, EvalQuantized<kernel_type>(context, node, params, data, input1,
                                             input2, output));
   } else {
     context->ReportError(context,
-                         "Mul only supports FLOAT32, INT32 and quantized UINT8 "
-                         "and INT16 now, got %d.",
+                         "Mul only supports FLOAT32, INT32 and quantized UINT8,"
+                         " INT8 and INT16 now, got %d.",
                          output->type);
     return kTfLiteError;
   }

diff --git a/tensorflow/lite/kernels/mul_test.cc b/tensorflow/lite/kernels/mul_test.cc
index 200cc26..96f5a8a 100644
--- a/tensorflow/lite/kernels/mul_test.cc
+++ b/tensorflow/lite/kernels/mul_test.cc

@@ -73,9 +73,10 @@
  public:
   using BaseMulOpModel::BaseMulOpModel;
 
+  template <typename integer_dtype>
   std::vector<float> GetDequantizedOutput() {
-    return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
-                               GetScale(output_), GetZeroPoint(output_));
+    return Dequantize<integer_dtype>(ExtractVector<integer_dtype>(output_),
+                                     GetScale(output_), GetZeroPoint(output_));
   }
 
   std::vector<float> GetDequantizedOutputInt16() {
@@ -191,19 +192,28 @@
   }
 }
 
-TEST(QuantizedMulOpTest, NoActivation) {
-  QuantizedMulOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                        {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                        {TensorType_UINT8, {}, -1.0, 1.0},
+template <TensorType tensor_type, typename integer_dtype>
+void NoActivation() {
+  QuantizedMulOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                        {tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                        {tensor_type, {}, -1.0, 1.0},
                         ActivationFunctionType_NONE);
-  m.QuantizeAndPopulate<uint8_t>(m.input1(), {-0.8, 0.2, 0.9, 0.7});
-  m.QuantizeAndPopulate<uint8_t>(m.input2(), {0.6, 0.4, 0.9, 0.8});
+  m.QuantizeAndPopulate<integer_dtype>(m.input1(), {-0.8, 0.2, 0.9, 0.7});
+  m.QuantizeAndPopulate<integer_dtype>(m.input2(), {0.6, 0.4, 0.9, 0.8});
   m.Invoke();
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
               ElementsAreArray(ArrayFloatNear({-0.48, 0.08, 0.81, 0.56},
                                               kQuantizedTolerance)));
 }
 
+TEST(QuantizedMulOpTest, NoActivationUInt8) {
+  NoActivation<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedMulOpTest, NoActivationInt8) {
+  NoActivation<TensorType_INT8, int8_t>();
+}
+
 TEST(QuantizedMulOpTest, NoActivationInt16) {
   const float kMin = -1.f;
   const float kMax = 32767.f / 32768.f;
@@ -219,23 +229,32 @@
                                               kQuantizedToleranceInt16)));
 }
 
-TEST(QuantizedMulOpTest, NoActivationInt16WithUint8Output) {
+template <TensorType tensor_type, typename integer_dtype>
+void NoActivationInt16With8BitOutput() {
   const float kMinInt16 = -1.f;
   const float kMaxInt16 = 32767.f / 32768.f;
   const float kMinUint8 = -1.f;
   const float kMaxUint8 = 127.f / 128.f;
   QuantizedMulOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMinInt16, kMaxInt16},
                         {TensorType_INT16, {1, 2, 2, 1}, kMinInt16, kMaxInt16},
-                        {TensorType_UINT8, {}, kMinUint8, kMaxUint8},
+                        {tensor_type, {}, kMinUint8, kMaxUint8},
                         ActivationFunctionType_NONE);
   m.QuantizeAndPopulate<int16_t>(m.input1(), {-0.8, 0.2, 0.9, 0.7});
   m.QuantizeAndPopulate<int16_t>(m.input2(), {0.6, 0.4, 0.9, 0.8});
   m.Invoke();
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
               ElementsAreArray(ArrayFloatNear({-0.48, 0.08, 0.81, 0.56},
                                               kQuantizedTolerance)));
 }
 
+TEST(QuantizedMulOpTest, NoActivationInt16WithUint8Output) {
+  NoActivationInt16With8BitOutput<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedMulOpTest, NoActivationInt16Withint8Output) {
+  NoActivationInt16With8BitOutput<TensorType_INT8, int8_t>();
+}
+
 // for quantized Mul, the error shouldn't exceed 2*step
 float GetTolerance(int min, int max) {
   float kQuantizedStep = (max - min) / 255.0;
@@ -243,25 +262,35 @@
   return kQuantizedTolerance;
 }
 
-TEST(QuantizedMulOpTest, WithBroadcast) {
+template <TensorType tensor_type, typename integer_dtype>
+void WithBroadcast() {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
   std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
-    QuantizedMulOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
-                          {TensorType_UINT8, {}, -3.0, 3.0},  // always a scalar
-                          {TensorType_UINT8, {}, -3.0, 3.0},
+    QuantizedMulOpModel m({tensor_type, test_shapes[i], -3.0, 3.0},
+                          {tensor_type, {}, -3.0, 3.0},  // always a scalar
+                          {tensor_type, {}, -3.0, 3.0},
                           ActivationFunctionType_NONE);
-    m.QuantizeAndPopulate<uint8_t>(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
-    m.QuantizeAndPopulate<uint8_t>(m.input2(), {0.1});
+    m.QuantizeAndPopulate<integer_dtype>(m.input1(),
+                                         {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(), {0.1});
     m.Invoke();
-    EXPECT_THAT(m.GetDequantizedOutput(),
+    EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
                 ElementsAreArray(ArrayFloatNear(
                     {-0.2, 0.02, 0.07, 0.08, 0.11, 0.2}, kQuantizedTolerance)))
         << "With shape number " << i;
   }
 }
 
+TEST(QuantizedMulOpTest, WithBroadcastUInt8) {
+  WithBroadcast<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedMulOpTest, WithBroadcastInt8) {
+  WithBroadcast<TensorType_INT8, int8_t>();
+}
+
 }  // namespace
 }  // namespace tflite
 

diff --git a/tensorflow/lite/kernels/pack.cc b/tensorflow/lite/kernels/pack.cc
index d15a5a0..e26abaa 100644
--- a/tensorflow/lite/kernels/pack.cc
+++ b/tensorflow/lite/kernels/pack.cc

@@ -39,8 +39,8 @@
   // TODO(renjieliu): Support negative axis.
   TF_LITE_ENSURE(context, data->axis >= 0);
   if (input0->type != kTfLiteInt32 && input0->type != kTfLiteFloat32 &&
-      input0->type != kTfLiteUInt8 && input0->type != kTfLiteInt16 &&
-      input0->type != kTfLiteInt64) {
+      input0->type != kTfLiteUInt8 && input0->type != kTfLiteInt8 &&
+      input0->type != kTfLiteInt16 && input0->type != kTfLiteInt64) {
     context->ReportError(context, "Type '%s' is not supported by pack.",
                          TfLiteTypeGetName(input0->type));
     return kTfLiteError;
@@ -106,6 +106,10 @@
       PackImpl<uint8_t>(context, node, output, data->values_count, data->axis);
       break;
     }
+    case kTfLiteInt8: {
+      PackImpl<int8_t>(context, node, output, data->values_count, data->axis);
+      break;
+    }
     case kTfLiteInt32: {
       PackImpl<int32_t>(context, node, output, data->values_count, data->axis);
       break;

diff --git a/tensorflow/lite/kernels/pack_test.cc b/tensorflow/lite/kernels/pack_test.cc
index 530cc2e..f441115 100644
--- a/tensorflow/lite/kernels/pack_test.cc
+++ b/tensorflow/lite/kernels/pack_test.cc

@@ -191,6 +191,37 @@
               ElementsAreArray({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
 }
 
+// int8
+TEST(PackOpTest, Int8ThreeInputs) {
+  PackOpModel<int8_t> model({TensorType_INT8, {2}}, 0, 3);
+  model.SetInput(0, {1, 4});
+  model.SetInput(1, {2, 5});
+  model.SetInput(2, {3, 6});
+  model.Invoke();
+  EXPECT_THAT(model.GetOutputShape(), ElementsAre(3, 2));
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 4, 2, 5, 3, 6}));
+}
+
+TEST(PackOpTest, Int8ThreeInputsDifferentAxis) {
+  PackOpModel<int8_t> model({TensorType_INT8, {2}}, 1, 3);
+  model.SetInput(0, {1, 4});
+  model.SetInput(1, {2, 5});
+  model.SetInput(2, {3, 6});
+  model.Invoke();
+  EXPECT_THAT(model.GetOutputShape(), ElementsAre(2, 3));
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6}));
+}
+
+TEST(PackOpTest, Int8MultilDimensions) {
+  PackOpModel<int8_t> model({TensorType_INT8, {2, 3}}, 1, 2);
+  model.SetInput(0, {1, 2, 3, 4, 5, 6});
+  model.SetInput(1, {7, 8, 9, 10, 11, 12});
+  model.Invoke();
+  EXPECT_THAT(model.GetOutputShape(), ElementsAre(2, 2, 3));
+  EXPECT_THAT(model.GetOutput(),
+              ElementsAreArray({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+}
+
 }  // namespace
 }  // namespace tflite
 

diff --git a/tensorflow/lite/kernels/pad.cc b/tensorflow/lite/kernels/pad.cc
index 8e6ed6e..b60b3dd 100644
--- a/tensorflow/lite/kernels/pad.cc
+++ b/tensorflow/lite/kernels/pad.cc

@@ -214,6 +214,31 @@
         }
       }
     } break;
+    case kTfLiteInt8: {
+      int8_t pad_value;
+      if (op_context.constant_values == nullptr) {
+        // Quantized Pad requires that 0 is represented in the quantized
+        // range.
+        TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
+                                    std::numeric_limits<int8_t>::min());
+        TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
+                                    std::numeric_limits<int8_t>::max());
+        pad_value = static_cast<int8_t>(op_context.output->params.zero_point);
+      } else {
+        // Quantized Pad requires that 'constant_values' is represented in the
+        // same quantized range as the input and output tensors.
+        TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
+                          op_context.constant_values->params.zero_point);
+        TF_LITE_ENSURE_EQ(context, op_context.output->params.scale,
+                          op_context.constant_values->params.scale);
+        pad_value = *GetTensorData<int8_t>(op_context.constant_values);
+      }
+      if (op_context.resizing_category == ResizingCategory::kImageStyle) {
+        TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value);
+      } else {
+        TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value);
+      }
+    } break;
     case kTfLiteInt32: {
       int32_t pad_value =
           op_context.constant_values == nullptr

diff --git a/tensorflow/lite/kernels/pad_test.cc b/tensorflow/lite/kernels/pad_test.cc
index 3caa406..97f9526 100644
--- a/tensorflow/lite/kernels/pad_test.cc
+++ b/tensorflow/lite/kernels/pad_test.cc

@@ -24,31 +24,34 @@
 using ::testing::ElementsAreArray;
 using ::testing::Matcher;
 
-template <typename T>
+template <typename RegularInputOuput, typename QuantizedInputOuput>
 class PadOpModel : public SingleOpModel {
  public:
-  void SetInput(std::initializer_list<T> data) {
-    PopulateTensor<T>(input_, data);
+  void SetInput(std::initializer_list<RegularInputOuput> data) {
+    PopulateTensor<RegularInputOuput>(input_, data);
   }
 
   void SetQuantizedInput(std::initializer_list<float> data) {
-    QuantizeAndPopulate<uint8_t>(input_, data);
+    QuantizeAndPopulate<QuantizedInputOuput>(input_, data);
   }
 
   void SetQuantizedPadValue(float data) {
-    QuantizeAndPopulate<uint8_t>(constant_values_, {data});
+    QuantizeAndPopulate<QuantizedInputOuput>(constant_values_, {data});
   }
 
   void SetPaddings(std::initializer_list<int> paddings) {
     PopulateTensor<int>(paddings_, paddings);
   }
 
-  std::vector<T> GetOutput() { return ExtractVector<T>(output_); }
+  std::vector<RegularInputOuput> GetOutput() {
+    return ExtractVector<RegularInputOuput>(output_);
+  }
   std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
 
   std::vector<float> GetDequantizedOutput() {
-    return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
-                               GetScale(output_), GetZeroPoint(output_));
+    return Dequantize<QuantizedInputOuput>(
+        ExtractVector<QuantizedInputOuput>(output_), GetScale(output_),
+        GetZeroPoint(output_));
   }
 
  protected:
@@ -59,18 +62,18 @@
 };
 
 // Tests case where paddings is a const tensor. Type T is the dtype.
-template <typename T>
-class PadV2OpConstModel : public PadOpModel<T> {
+template <typename T1, typename T2>
+class PadV2OpConstModel : public PadOpModel<T1, T2> {
  public:
   PadV2OpConstModel(const TensorData& input,
                     std::initializer_list<int> paddings_shape,
-                    std::initializer_list<int> paddings, T constant_values,
+                    std::initializer_list<int> paddings, T1 constant_values,
                     const TensorData& output) {
     this->input_ = this->AddInput(input);
     this->paddings_ =
         this->AddConstInput(TensorType_INT32, paddings, paddings_shape);
     this->constant_values_ =
-        this->AddConstInput(GetTensorType<T>(), {constant_values}, {1});
+        this->AddConstInput(GetTensorType<T1>(), {constant_values}, {1});
 
     this->output_ = this->AddOutput(output);
 
@@ -103,7 +106,7 @@
 //    PadOpDynamicModel m(input_shape, paddings_shape, paddings_data);
 //    m.SetInput(input_data);
 //    m.Invoke();
-class PadOpConstModel : public PadOpModel<float> {
+class PadOpConstModel : public PadOpModel<float, uint8_t> {
  public:
   PadOpConstModel(const TensorData& input,
                   std::initializer_list<int> paddings_shape,
@@ -121,16 +124,18 @@
 };
 
 // Test case where paddings is a non-const tensor.
-template <typename T>
-class PadV2OpDynamicModel : public PadOpModel<T> {
+template <typename RegularInputOuput, typename QuantizedInputOuput>
+class PadV2OpDynamicModel
+    : public PadOpModel<RegularInputOuput, QuantizedInputOuput> {
  public:
   PadV2OpDynamicModel(const TensorData& input,
                       std::initializer_list<int> paddings_shape,
-                      T constant_values, const TensorData& output) {
+                      RegularInputOuput constant_values,
+                      const TensorData& output) {
     this->input_ = this->AddInput(input);
     this->paddings_ = this->AddInput(TensorType_INT32);
-    this->constant_values_ =
-        this->AddConstInput(GetTensorType<T>(), {constant_values}, {1});
+    this->constant_values_ = this->AddConstInput(
+        GetTensorType<RegularInputOuput>(), {constant_values}, {1});
     this->output_ = this->AddOutput(output);
 
     this->SetBuiltinOp(BuiltinOperator_PADV2, BuiltinOptions_PadV2Options,
@@ -159,7 +164,7 @@
 //    m.SetInput(input_data);
 //    m.SetPaddings(paddings_data);
 //    m.Invoke();
-class PadOpDynamicModel : public PadOpModel<float> {
+class PadOpDynamicModel : public PadOpModel<float, uint8_t> {
  public:
   PadOpDynamicModel(const TensorData& input,
                     std::initializer_list<int> paddings_shape,
@@ -377,34 +382,34 @@
 
 #ifdef GTEST_HAS_DEATH_TEST
 TEST(PadV2OpTest, TooManyDimensions) {
-  EXPECT_DEATH(PadV2OpConstModel<float>(
-                   {TensorType_FLOAT32, {1, 2, 3, 4, 5, 6, 7, 8, 9}}, {9, 2},
-                   {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}, 0.0,
-                   {TensorType_FLOAT32}),
+  typedef PadV2OpConstModel<float, uint8_t> f;
+  EXPECT_DEATH(f({TensorType_FLOAT32, {1, 2, 3, 4, 5, 6, 7, 8, 9}}, {9, 2},
+                 {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}, 0.0,
+                 {TensorType_FLOAT32}),
                "dims <= 4");
 }
 
 TEST(PadV2OpTest, UnequalDimensions) {
-  EXPECT_DEATH(
-      PadV2OpConstModel<float>({TensorType_FLOAT32, {1, 1, 2, 1}}, {3, 2},
-                               {1, 1, 2, 2, 3, 3}, 0.0, {TensorType_FLOAT32}),
-      "3 != 4");
+  typedef PadV2OpConstModel<float, uint8_t> f;
+  EXPECT_DEATH(f({TensorType_FLOAT32, {1, 1, 2, 1}}, {3, 2}, {1, 1, 2, 2, 3, 3},
+                 0.0, {TensorType_FLOAT32}),
+               "3 != 4");
 }
 
 TEST(PadV2OpTest, InvalidPadValue) {
-  EXPECT_DEATH(PadV2OpConstModel<float>({TensorType_FLOAT32, {1, 1, 2, 1}},
-                                        {4, 2}, {0, 0, 1, -1, 2, -1, 0, 0}, 0.0,
-                                        {TensorType_FLOAT32}),
+  typedef PadV2OpConstModel<float, uint8_t> f;
+  EXPECT_DEATH(f({TensorType_FLOAT32, {1, 1, 2, 1}}, {4, 2},
+                 {0, 0, 1, -1, 2, -1, 0, 0}, 0.0, {TensorType_FLOAT32}),
                "Pad value has to be greater than equal to 0.");
 }
 #endif
 
-TEST(PadV2OpTest, SimpleConstTest) {
+TEST(PadV2OpTest, SimpleConstTestUint8) {
   // Padding is represented as four 2-D lists representing above padding and
   // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}).
-  PadV2OpConstModel<float> m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2},
-                             {0, 0, 1, 1, 1, 1, 0, 0}, 0.0,
-                             {TensorType_FLOAT32});
+  PadV2OpConstModel<float, uint8_t> m({TensorType_FLOAT32, {1, 2, 2, 1}},
+                                      {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, 0.0,
+                                      {TensorType_FLOAT32});
   m.SetInput({1, 2, 3, 4});
   m.Invoke();
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4,
@@ -412,11 +417,38 @@
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
 }
 
-TEST(PadV2OpTest, SimpleConstFloat32ValuedTest) {
+TEST(PadV2OpTest, SimpleConstTestInt8) {
   // Padding is represented as four 2-D lists representing above padding and
   // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}).
-  PadV2OpConstModel<float> m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2},
-                             {0, 0, 1, 1, 1, 1, 0, 0}, 5, {TensorType_FLOAT32});
+  PadV2OpConstModel<float, int8_t> m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2},
+                                     {0, 0, 1, 1, 1, 1, 0, 0}, 0.0,
+                                     {TensorType_FLOAT32});
+  m.SetInput({1, 2, 3, 4});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4,
+                                               0, 0, 0, 0, 0}));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
+}
+
+TEST(PadV2OpTest, SimpleConstFloat32ValuedTestUint8) {
+  // Padding is represented as four 2-D lists representing above padding and
+  // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}).
+  PadV2OpConstModel<float, uint8_t> m({TensorType_FLOAT32, {1, 2, 2, 1}},
+                                      {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, 5,
+                                      {TensorType_FLOAT32});
+  m.SetInput({1, 2, 3, 4});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 1, 2, 5, 5, 3, 4,
+                                               5, 5, 5, 5, 5}));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
+}
+
+TEST(PadV2OpTest, SimpleConstFloat32ValuedTestInt8) {
+  // Padding is represented as four 2-D lists representing above padding and
+  // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}).
+  PadV2OpConstModel<float, int8_t> m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2},
+                                     {0, 0, 1, 1, 1, 1, 0, 0}, 5,
+                                     {TensorType_FLOAT32});
   m.SetInput({1, 2, 3, 4});
   m.Invoke();
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 1, 2, 5, 5, 3, 4,
@@ -427,8 +459,9 @@
 TEST(PadV2OpTest, Simple4DConstFloat32ValuedTest) {
   // Padding is represented as four 2-D lists representing above padding and
   // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}).
-  PadV2OpConstModel<float> m({TensorType_FLOAT32, {1, 1, 2, 1}}, {4, 2},
-                             {0, 1, 0, 0, 0, 0, 0, 1}, 5, {TensorType_FLOAT32});
+  PadV2OpConstModel<float, uint8_t> m({TensorType_FLOAT32, {1, 1, 2, 1}},
+                                      {4, 2}, {0, 1, 0, 0, 0, 0, 0, 1}, 5,
+                                      {TensorType_FLOAT32});
   m.SetInput({3, 3});
   m.Invoke();
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 5, 3, 5, 5, 5, 5, 5}));
@@ -438,8 +471,9 @@
 TEST(PadV2OpTest, SimpleConstInt32ValuedTest) {
   // Padding is represented as four 2-D lists representing above padding and
   // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}).
-  PadV2OpConstModel<int32_t> m({TensorType_INT32, {1, 2, 2, 1}}, {4, 2},
-                               {0, 0, 1, 1, 1, 1, 0, 0}, 5, {TensorType_INT32});
+  PadV2OpConstModel<int32_t, uint8_t> m({TensorType_INT32, {1, 2, 2, 1}},
+                                        {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, 5,
+                                        {TensorType_INT32});
   m.SetInput({1, 2, 3, 4});
   m.Invoke();
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 1, 2, 5, 5, 3, 4,
@@ -448,8 +482,8 @@
 }
 
 TEST(PadV2OpTest, SimpleDynamicTest) {
-  PadV2OpDynamicModel<float> m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, 0.0,
-                               {TensorType_FLOAT32});
+  PadV2OpDynamicModel<float, uint8_t> m({TensorType_FLOAT32, {1, 2, 2, 1}},
+                                        {4, 2}, 0.0, {TensorType_FLOAT32});
   m.SetInput({1, 2, 3, 4});
   m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0});
   m.Invoke();
@@ -459,8 +493,8 @@
 }
 
 TEST(PadV2OpTest, SimpleDynamicValuedTest) {
-  PadV2OpDynamicModel<float> m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, 5,
-                               {TensorType_FLOAT32});
+  PadV2OpDynamicModel<float, uint8_t> m({TensorType_FLOAT32, {1, 2, 2, 1}},
+                                        {4, 2}, 5, {TensorType_FLOAT32});
   m.SetInput({1, 2, 3, 4});
   m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0});
   m.Invoke();
@@ -470,8 +504,9 @@
 }
 
 TEST(PadV2OpTest, AdvancedConstTest) {
-  PadV2OpConstModel<float> m({TensorType_FLOAT32, {1, 2, 3, 1}}, {4, 2},
-                             {0, 0, 0, 2, 1, 3, 0, 0}, 0, {TensorType_FLOAT32});
+  PadV2OpConstModel<float, uint8_t> m({TensorType_FLOAT32, {1, 2, 3, 1}},
+                                      {4, 2}, {0, 0, 0, 2, 1, 3, 0, 0}, 0,
+                                      {TensorType_FLOAT32});
   m.SetInput({1, 2, 3, 4, 5, 6});
   m.Invoke();
   EXPECT_THAT(m.GetOutput(),
@@ -481,8 +516,8 @@
 }
 
 TEST(PadV2OpTest, AdvancedDynamicTest) {
-  PadV2OpDynamicModel<float> m({TensorType_FLOAT32, {1, 2, 3, 1}}, {4, 2}, 0,
-                               {TensorType_FLOAT32});
+  PadV2OpDynamicModel<float, uint8_t> m({TensorType_FLOAT32, {1, 2, 3, 1}},
+                                        {4, 2}, 0, {TensorType_FLOAT32});
   m.SetInput({1, 2, 3, 4, 5, 6});
   m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0});
   m.Invoke();
@@ -505,21 +540,20 @@
 TEST_F(QuantizedPadV2OpTest, ZeroNotInQuantizationRange) {
   // The test_util and actual quantization code currently ensure that the range
   // must include zero, but if that ever changes, this test will catch it.
-  EXPECT_DEATH(
-      PadV2OpConstModel<float> m({TensorType_UINT8, {1, 2, 2, 1}, 1.0, 2.0},
-                                 {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, 0,
-                                 {TensorType_UINT8, {}, 1.0, 2.0}),
-      ".*Check failed: f_min <= 0.*");
+  typedef PadV2OpConstModel<float, uint8_t> f;
+  EXPECT_DEATH(f({TensorType_UINT8, {1, 2, 2, 1}, 1.0, 2.0}, {4, 2},
+                 {0, 0, 1, 1, 1, 1, 0, 0}, 0, {TensorType_UINT8, {}, 1.0, 2.0}),
+               ".*Check failed: f_min <= 0.*");
 }
 #endif
 
 TEST_F(QuantizedPadV2OpTest, SimpleConstTest) {
   // Padding is represented as four 2-D lists representing above padding and
   // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}).
-  PadV2OpConstModel<uint8_t> m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                               {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0},
-                               {TensorType_UINT8, {1}, -1.0, 1.0},
-                               {TensorType_UINT8, {}, -1.0, 1.0});
+  PadV2OpConstModel<uint8_t, uint8_t> m(
+      {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2},
+      {0, 0, 1, 1, 1, 1, 0, 0}, {TensorType_UINT8, {1}, -1.0, 1.0},
+      {TensorType_UINT8, {}, -1.0, 1.0});
   m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7});
   m.SetQuantizedPadValue(0);
   m.Invoke();
@@ -531,9 +565,9 @@
 }
 
 TEST_F(QuantizedPadV2OpTest, SimpleDynamicTest) {
-  PadV2OpDynamicModel<uint8_t> m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                                 {4, 2}, {TensorType_UINT8, {1}, -1.0, 1.0},
-                                 {TensorType_UINT8, {}, -1.0, 1.0});
+  PadV2OpDynamicModel<uint8_t, uint8_t> m(
+      {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2},
+      {TensorType_UINT8, {1}, -1.0, 1.0}, {TensorType_UINT8, {}, -1.0, 1.0});
   m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7});
   m.SetQuantizedPadValue(0);
   m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0});
@@ -546,10 +580,10 @@
 }
 
 TEST_F(QuantizedPadV2OpTest, AdvancedConstTest) {
-  PadV2OpConstModel<uint8_t> m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0},
-                               {4, 2}, {0, 0, 0, 2, 1, 3, 0, 0},
-                               {TensorType_UINT8, {1}, -1.0, 1.0},
-                               {TensorType_UINT8, {}, -1.0, 1.0});
+  PadV2OpConstModel<uint8_t, uint8_t> m(
+      {TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2},
+      {0, 0, 0, 2, 1, 3, 0, 0}, {TensorType_UINT8, {1}, -1.0, 1.0},
+      {TensorType_UINT8, {}, -1.0, 1.0});
   m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3});
   m.SetQuantizedPadValue(0);
   m.Invoke();
@@ -562,9 +596,9 @@
 }
 
 TEST_F(QuantizedPadV2OpTest, AdvancedDynamicTest) {
-  PadV2OpDynamicModel<uint8_t> m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0},
-                                 {4, 2}, {TensorType_UINT8, {1}, -1.0, 1.0},
-                                 {TensorType_UINT8, {}, -1.0, 1.0});
+  PadV2OpDynamicModel<uint8_t, uint8_t> m(
+      {TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2},
+      {TensorType_UINT8, {1}, -1.0, 1.0}, {TensorType_UINT8, {}, -1.0, 1.0});
   m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3});
   m.SetQuantizedPadValue(0);
   m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0});
@@ -580,10 +614,10 @@
 TEST_F(QuantizedPadV2OpTest, SimpleConstValuedTest) {
   // Padding is represented as four 2-D lists representing above padding and
   // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}).
-  PadV2OpConstModel<uint8_t> m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                               {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0},
-                               {TensorType_UINT8, {1}, -1.0, 1.0},
-                               {TensorType_UINT8, {}, -1.0, 1.0});
+  PadV2OpConstModel<uint8_t, uint8_t> m(
+      {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2},
+      {0, 0, 1, 1, 1, 1, 0, 0}, {TensorType_UINT8, {1}, -1.0, 1.0},
+      {TensorType_UINT8, {}, -1.0, 1.0});
   m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7});
   m.SetQuantizedPadValue(-0.5);
   m.Invoke();
@@ -596,9 +630,9 @@
 }
 
 TEST_F(QuantizedPadV2OpTest, SimpleDynamicValuedTest) {
-  PadV2OpDynamicModel<uint8_t> m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                                 {4, 2}, {TensorType_UINT8, {1}, -1.0, 1.0},
-                                 {TensorType_UINT8, {}, -1.0, 1.0});
+  PadV2OpDynamicModel<uint8_t, uint8_t> m(
+      {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2},
+      {TensorType_UINT8, {1}, -1.0, 1.0}, {TensorType_UINT8, {}, -1.0, 1.0});
   m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7});
   m.SetQuantizedPadValue(-0.5);
   m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0});
@@ -612,10 +646,10 @@
 }
 
 TEST_F(QuantizedPadV2OpTest, AdvancedConstValuedTest) {
-  PadV2OpConstModel<uint8_t> m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0},
-                               {4, 2}, {0, 0, 0, 2, 1, 3, 0, 0},
-                               {TensorType_UINT8, {1}, -1.0, 1.0},
-                               {TensorType_UINT8, {}, -1.0, 1.0});
+  PadV2OpConstModel<uint8_t, uint8_t> m(
+      {TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2},
+      {0, 0, 0, 2, 1, 3, 0, 0}, {TensorType_UINT8, {1}, -1.0, 1.0},
+      {TensorType_UINT8, {}, -1.0, 1.0});
   m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3});
   m.SetQuantizedPadValue(-0.5);
   m.Invoke();
@@ -629,9 +663,9 @@
 }
 
 TEST_F(QuantizedPadV2OpTest, AdvancedDynamicValuedTest) {
-  PadV2OpDynamicModel<uint8_t> m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0},
-                                 {4, 2}, {TensorType_UINT8, {1}, -1.0, 1.0},
-                                 {TensorType_UINT8, {}, -1.0, 1.0});
+  PadV2OpDynamicModel<uint8_t, uint8_t> m(
+      {TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2},
+      {TensorType_UINT8, {1}, -1.0, 1.0}, {TensorType_UINT8, {}, -1.0, 1.0});
   m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3});
   m.SetQuantizedPadValue(-0.5);
   m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0});

diff --git a/tensorflow/lite/kernels/pooling.cc b/tensorflow/lite/kernels/pooling.cc
index e6155fc..bdf736d 100644
--- a/tensorflow/lite/kernels/pooling.cc
+++ b/tensorflow/lite/kernels/pooling.cc

@@ -226,9 +226,9 @@
 }
 
 template <KernelType kernel_type>
-void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                      TfLitePoolParams* params, OpData* data,
-                      const TfLiteTensor* input, TfLiteTensor* output) {
+void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
+                           TfLitePoolParams* params, OpData* data,
+                           const TfLiteTensor* input, TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
   CalculateActivationRangeUint8(params->activation, output, &activation_min,
@@ -255,6 +255,31 @@
 }
 
 template <KernelType kernel_type>
+void MaxEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
+                          TfLitePoolParams* params, OpData* data,
+                          const TfLiteTensor* input, TfLiteTensor* output) {
+  int32_t activation_min;
+  int32_t activation_max;
+  CalculateActivationRangeInt8(params->activation, output, &activation_min,
+                               &activation_max);
+#define TF_LITE_MAX_POOL(type)                                        \
+  tflite::PoolParams op_params;                                       \
+  op_params.stride_height = params->stride_height;                    \
+  op_params.stride_width = params->stride_width;                      \
+  op_params.filter_height = params->filter_height;                    \
+  op_params.filter_width = params->filter_width;                      \
+  op_params.padding_values.height = data->padding.height;             \
+  op_params.padding_values.width = data->padding.width;               \
+  op_params.quantized_activation_min = activation_min;                \
+  op_params.quantized_activation_max = activation_max;                \
+  type::MaxPool(op_params, GetTensorShape(input),                     \
+                GetTensorData<int8_t>(input), GetTensorShape(output), \
+                GetTensorData<int8_t>(output))
+  TF_LITE_MAX_POOL(reference_integer_ops);
+#undef TF_LITE_MAX_POOL
+}
+
+template <KernelType kernel_type>
 void L2EvalFloat(TfLiteContext* context, TfLiteNode* node,
                  TfLitePoolParams* params, OpData* data,
                  const TfLiteTensor* input, TfLiteTensor* output) {
@@ -321,7 +346,12 @@
       MaxEvalFloat<kernel_type>(context, node, params, data, input, output);
       break;
     case kTfLiteUInt8:
-      MaxEvalQuantized<kernel_type>(context, node, params, data, input, output);
+      MaxEvalQuantizedUInt8<kernel_type>(context, node, params, data, input,
+                                         output);
+      break;
+    case kTfLiteInt8:
+      MaxEvalQuantizedInt8<kernel_type>(context, node, params, data, input,
+                                        output);
       break;
     default:
       context->ReportError(context, "Type %d not currently supported.",

diff --git a/tensorflow/lite/kernels/pooling_test.cc b/tensorflow/lite/kernels/pooling_test.cc
index e1b7934..4627d7a 100644
--- a/tensorflow/lite/kernels/pooling_test.cc
+++ b/tensorflow/lite/kernels/pooling_test.cc

@@ -97,6 +97,24 @@
   }
 };
 
+// Replicate each entry in a vector n times along depth (innermost dimension).
+// The values are incremented by delta, creating ramps offset by each input
+// value. This is used to create simple and predicatable variation.
+std::vector<float> ReplicateDepthRamp(const std::vector<float>& image_plane,
+                                      int n, float delta) {
+  const int size = image_plane.size();
+  std::vector<float> ramped_data(n * size);
+  // The input is treated as a 1-D even if logically it is multi-dimensional.
+  for (int input_index = 0; input_index < size; ++input_index) {
+    for (int depth = 0; depth < n; ++depth) {
+      ramped_data[n * input_index + depth] =
+          image_plane[input_index] + depth * delta;
+    }
+  }
+
+  return ramped_data;
+}
+
 TEST(FloatPoolingOpTest, AveragePool) {
   FloatPoolingOpModel m(BuiltinOperator_AVERAGE_POOL_2D,
                         /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}},
@@ -147,6 +165,31 @@
   EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear({16})));
 }
 
+TEST(QuantizedPoolingOpTest, AveragePoolLargeDepth) {
+  // Test with a larger depth that is not a multiple of the tranche size, or of
+  // any register-oriented multiples such as 8 and 16.
+  constexpr int depth = 1999;  // Prime number.
+  QuantizedPoolingOpModel m(
+      BuiltinOperator_AVERAGE_POOL_2D,
+      /*input=*/{TensorType_UINT8, {1, 2, 4, depth}, 0, 15.9375},
+      /*filter_width=*/2, /*filter_height=*/2,
+      /*output=*/{TensorType_UINT8, {}, 0, 15.9375});
+
+  std::vector<float> input_image_plane({
+      0.f, 6.f, 2.f, 4.f,   //
+      3.f, 2.f, 10.f, 7.f,  //
+  });
+  std::vector<float> output_image_plane({2.75f, 5.75f});
+
+  m.SetInput(ReplicateDepthRamp(input_image_plane, depth, 1.f / 512.f));
+  m.Invoke();
+
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray(ArrayFloatNear(
+                  ReplicateDepthRamp(output_image_plane, depth, 1.f / 512.f),
+                  1. / 32.f)));
+}
+
 // Test quantized AveragePool with int8 input and output. The input is the same
 // as the uint8 test QuantizedPoolingOpTest.AveragePool. The float output is
 // identical to uint8 test and quantized output is identical to uint8 test with
@@ -204,7 +247,7 @@
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({6, 10}));
 }
 
-TEST(QuantizedPoolingOpTest, MaxPool) {
+TEST(QuantizedUInt8PoolingOpTest, MaxPool) {
   // Choose the input ranges carefully so that the dequantized output matches
   // the results of the float model above.
   QuantizedPoolingOpModel m(
@@ -223,6 +266,50 @@
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({96, 160}));
 }
 
+TEST(QuantizedPoolingOpTest, MaxPoolLargeDepth) {
+  // Test with a larger depth that is not a multiple of the tranche size, or of
+  // any register-oriented multiples such as 8 and 16.
+  constexpr int depth = 1999;  // Prime number.
+  QuantizedPoolingOpModel m(
+      BuiltinOperator_MAX_POOL_2D,
+      /*input=*/{TensorType_UINT8, {1, 2, 4, depth}, 0, 15.9375},
+      /*filter_width=*/2, /*filter_height=*/2,
+      /*output=*/{TensorType_UINT8, {}, 0, 15.9375});
+
+  std::vector<float> input_image_plane({
+      0.f, 6.f, 2.f, 4.f,   //
+      3.f, 2.f, 10.f, 7.f,  //
+  });
+  std::vector<float> output_image_plane({6.f, 10.f});
+
+  m.SetInput(ReplicateDepthRamp(input_image_plane, depth, 1.f / 512.f));
+  m.Invoke();
+
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray(ArrayFloatNear(
+                  ReplicateDepthRamp(output_image_plane, depth, 1.f / 512.f),
+                  1. / 32.f)));
+}
+
+TEST(QuantizedInt8PoolingOpTest, MaxPool) {
+  // Choose the input ranges carefully so that the dequantized output matches
+  // the results of the float model above.
+  SymmetricQuantizedPoolingOpModel m(
+      BuiltinOperator_MAX_POOL_2D,
+      /*input=*/{TensorType_INT8, {1, 2, 4, 1}, 0, 15.9375},
+      /*filter_width=*/2, /*filter_height=*/2,
+      /*output=*/{TensorType_INT8, {}, 0, 15.9375});
+  m.SetInput({
+      0, -6, 2, 4,   //
+      3, 2, -10, 7,  //
+  });
+  m.Invoke();
+
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray(ArrayFloatNear({3, 7})));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({-80, -16}));
+}
+
 TEST(FloatPoolingOpTest, L2Pool) {
   FloatPoolingOpModel m(BuiltinOperator_L2_POOL_2D,
                         /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}},

diff --git a/tensorflow/lite/kernels/rank.cc b/tensorflow/lite/kernels/rank.cc
new file mode 100644
index 0000000..8cef1f5
--- /dev/null
+++ b/tensorflow/lite/kernels/rank.cc

@@ -0,0 +1,65 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace rank {
+
+constexpr int kInputTensor = 0;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  output->type = kTfLiteInt32;
+
+  // Rank produces a 0-D int32 Tensor representing the rank of input.
+  TfLiteIntArray* output_size = TfLiteIntArrayCreate(0);
+  return context->ResizeTensor(context, output, output_size);
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(output), 0);
+
+  if (output->type == kTfLiteInt32) {
+    int32_t* output_data = GetTensorData<int32_t>(output);
+    *output_data = NumDimensions(input);
+  } else {
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace rank
+
+TfLiteRegistration* Register_RANK() {
+  static TfLiteRegistration r = {nullptr, nullptr, rank::Prepare, rank::Eval};
+  return &r;
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite

diff --git a/tensorflow/lite/kernels/rank_test.cc b/tensorflow/lite/kernels/rank_test.cc
new file mode 100644
index 0000000..3c31fc5
--- /dev/null
+++ b/tensorflow/lite/kernels/rank_test.cc

@@ -0,0 +1,91 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <initializer_list>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/kernels/test_util.h"
+#include "tensorflow/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class RankOpModel : public SingleOpModel {
+ public:
+  RankOpModel(std::initializer_list<int> input_shape, TensorType input_type) {
+    TensorType output_type = TensorType_INT32;
+    input_ = AddInput(input_type);
+    output_ = AddOutput(output_type);
+    SetBuiltinOp(BuiltinOperator_RANK, BuiltinOptions_RankOptions,
+                 CreateRankOptions(builder_).Union());
+    BuildInterpreter({input_shape});
+  }
+
+  TfLiteStatus InvokeWithResult() { return interpreter_->Invoke(); }
+
+  int input() { return input_; }
+
+  std::vector<int32_t> GetOutput() { return ExtractVector<int32_t>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST(RankOpTest, InputTypeFloat) {
+  RankOpModel model({1, 3, 1, 3, 5}, TensorType_FLOAT32);
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({5}));
+  EXPECT_TRUE(model.GetOutputShape().empty());
+}
+
+TEST(RankOpTest, InputTypeInt) {
+  RankOpModel model({1, 3, 1, 3, 5}, TensorType_INT32);
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({5}));
+  EXPECT_TRUE(model.GetOutputShape().empty());
+}
+
+TEST(RankOpTest, ScalarTensor) {
+  RankOpModel model({}, TensorType_FLOAT32);
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({0}));
+  EXPECT_TRUE(model.GetOutputShape().empty());
+}
+
+TEST(RankOpTest, EmptyTensor) {
+  RankOpModel model({1, 0}, TensorType_FLOAT32);
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({2}));
+  EXPECT_TRUE(model.GetOutputShape().empty());
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

diff --git a/tensorflow/lite/kernels/reduce.cc b/tensorflow/lite/kernels/reduce.cc
index 1193f84..3fb2715 100644
--- a/tensorflow/lite/kernels/reduce.cc
+++ b/tensorflow/lite/kernels/reduce.cc

@@ -180,6 +180,9 @@
     case kTfLiteUInt8:
       temp_sum->type = kTfLiteInt32;
       break;
+    case kTfLiteInt8:
+      temp_sum->type = kTfLiteInt32;
+      break;
     case kTfLiteBool:
       temp_sum->type = kTfLiteBool;
       break;
@@ -465,6 +468,9 @@
     case kTfLiteUInt8:
       return EvalType<uint8_t>(context, node, &op_context, reduce_type);
       break;
+    case kTfLiteInt8:
+      return EvalType<int8_t>(context, node, &op_context, reduce_type);
+      break;
     case kTfLiteBool:
       return EvalType<bool>(context, node, &op_context, reduce_type);
       break;

diff --git a/tensorflow/lite/kernels/reduce_test.cc b/tensorflow/lite/kernels/reduce_test.cc
index 16c5939..373fa56 100644
--- a/tensorflow/lite/kernels/reduce_test.cc
+++ b/tensorflow/lite/kernels/reduce_test.cc

@@ -38,9 +38,10 @@
     return ExtractVector<T>(output_);
   }
 
+  template <typename T>
   std::vector<float> GetDequantizedOutput() {
-    return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
-                               GetScale(output_), GetZeroPoint(output_));
+    return Dequantize<T>(ExtractVector<T>(output_), GetScale(output_),
+                         GetZeroPoint(output_));
   }
 
   std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
@@ -280,7 +281,7 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 3}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
               ElementsAreArray(ArrayFloatNear({0.25098, 0.25098, 0.25098},
                                               kQuantizedTolerance)));
 }
@@ -294,7 +295,7 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
               ElementsAreArray(
                   ArrayFloatNear({0.235294, 0.313726}, kQuantizedTolerance)));
 }
@@ -361,8 +362,9 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear(
-                                            {0.4, 0.4}, kQuantizedTolerance)));
+  EXPECT_THAT(
+      m.GetDequantizedOutput<uint8_t>(),
+      ElementsAreArray(ArrayFloatNear({0.4, 0.4}, kQuantizedTolerance)));
 }
 
 TEST(ConstUint8MeanOpTest, KeepDims) {
@@ -374,7 +376,7 @@
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
   EXPECT_THAT(
-      m.GetDequantizedOutput(),
+      m.GetDequantizedOutput<uint8_t>(),
       ElementsAreArray(ArrayFloatNear({0.3, 0.35, 0.55}, kQuantizedTolerance)));
 }
 
@@ -390,7 +392,7 @@
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
   EXPECT_THAT(
-      m.GetDequantizedOutput(),
+      m.GetDequantizedOutput<uint8_t>(),
       ElementsAreArray(ArrayFloatNear({-1.75, -1.68}, kQuantizedTolerance)));
 }
 
@@ -406,7 +408,7 @@
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
   EXPECT_THAT(
-      m.GetDequantizedOutput(),
+      m.GetDequantizedOutput<uint8_t>(),
       ElementsAreArray(ArrayFloatNear({9.2815, 0.3695}, kQuantizedTolerance)));
 }
 
@@ -420,7 +422,7 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), IsEmpty());
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
               ElementsAreArray(ArrayFloatNear({0.643}, kQuantizedTolerance)));
 }
 
@@ -433,7 +435,7 @@
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
   EXPECT_THAT(
-      m.GetDequantizedOutput(),
+      m.GetDequantizedOutput<uint8_t>(),
       ElementsAreArray(ArrayFloatNear({0.3, 0.35, 0.55}, kQuantizedTolerance)));
 }
 
@@ -526,7 +528,7 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
               ElementsAreArray(
                   ArrayFloatNear({-0.823529, -0.815686}, kQuantizedTolerance)));
 }
@@ -539,8 +541,9 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear(
-                                            {1.2, 1.2}, kQuantizedTolerance)));
+  EXPECT_THAT(
+      m.GetDequantizedOutput<uint8_t>(),
+      ElementsAreArray(ArrayFloatNear({1.2, 1.2}, kQuantizedTolerance)));
 }
 
 TEST(ConstUint8SumOpTest, KeepDims) {
@@ -551,7 +554,7 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
               ElementsAreArray(ArrayFloatNear({-0.407843, -0.313726, 0.0941177},
                                               kQuantizedTolerance)));
 }
@@ -567,7 +570,7 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
               ElementsAreArray(
                   ArrayFloatNear({1.48235, 1.64706}, kQuantizedTolerance)));
 }
@@ -584,7 +587,7 @@
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
   EXPECT_THAT(
-      m.GetDequantizedOutput(),
+      m.GetDequantizedOutput<uint8_t>(),
       ElementsAreArray(ArrayFloatNear({6.47059, 10.698}, kQuantizedTolerance)));
 }
 
@@ -741,7 +744,20 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(
+                  ArrayFloatNear({0.501961, 0.603922}, kQuantizedTolerance)));
+}
+
+TEST(ConstInt8MaxOpTest, NotKeepDims) {
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  MaxOpConstModel m({TensorType_INT8, {1, 3, 2}, -1.0, 1.0},
+                    {TensorType_INT8, {2}, -1.0, 1.0}, {1}, {1}, false);
+  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
               ElementsAreArray(
                   ArrayFloatNear({0.501961, 0.603922}, kQuantizedTolerance)));
 }
@@ -754,7 +770,20 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(
+                  ArrayFloatNear({0.4, 0.4, 0.603922}, kQuantizedTolerance)));
+}
+
+TEST(ConstInt8MaxOpTest, KeepDims) {
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  MaxOpConstModel m({TensorType_INT8, {3, 2}, -1.0, 1.0},
+                    {TensorType_INT8, {3}, -1.0, 1.0}, {1}, {1}, true);
+  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
               ElementsAreArray(
                   ArrayFloatNear({0.4, 0.4, 0.603922}, kQuantizedTolerance)));
 }
@@ -770,7 +799,23 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(
+                  ArrayFloatNear({1.2902, 0.247059}, kQuantizedTolerance)));
+}
+
+TEST(DynamicInt8MaxOpTest, NotKeepDims) {
+  float kQuantizedTolerance = GetTolerance(-5.0, 2.0);
+  std::vector<float> data = {1.3, -4.8, -3.6, 0.24};
+  MaxOpDynamicModel m({TensorType_INT8, {2, 2}, -5.0, 2.0},
+                      {TensorType_INT8, {2}, -5.0, 2.0},
+                      {TensorType_INT32, {1}}, false);
+  std::vector<int> axis = {1};
+  m.SetAxis(axis);
+  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
               ElementsAreArray(
                   ArrayFloatNear({1.2902, 0.247059}, kQuantizedTolerance)));
 }
@@ -786,7 +831,23 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(
+                  ArrayFloatNear({11.1294, 0.862745}, kQuantizedTolerance)));
+}
+
+TEST(DynamicInt8MaxOpTest, KeepDims) {
+  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
+  std::vector<float> data = {11.14, -0.14, 7.423, 0.879};
+  MaxOpDynamicModel m({TensorType_INT8, {2, 2}, -10.0, 12.0},
+                      {TensorType_INT8, {2}, -10.0, 12.0},
+                      {TensorType_INT32, {1}}, true);
+  std::vector<int> axis = {0};
+  m.SetAxis(axis);
+  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
               ElementsAreArray(
                   ArrayFloatNear({11.1294, 0.862745}, kQuantizedTolerance)));
 }
@@ -801,7 +862,21 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), IsEmpty());
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance)));
+}
+
+TEST(DynamicInt8MaxOpTest, Scalar) {
+  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
+  std::vector<float> data = {11.14};
+  MaxOpDynamicModel m({TensorType_INT8, {}, -10.0, 12.0},
+                      {TensorType_INT8, {}, -10.0, 12.0},
+                      {TensorType_INT32, {1}}, true);
+  std::vector<int> axis = {0};
+  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
               ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance)));
 }
 
@@ -883,7 +958,20 @@
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
   EXPECT_THAT(
-      m.GetDequantizedOutput(),
+      m.GetDequantizedOutput<uint8_t>(),
+      ElementsAreArray(ArrayFloatNear({0.294117, 0.2}, kQuantizedTolerance)));
+}
+
+TEST(ConstInt8MinOpTest, NotKeepDims) {
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  MinOpConstModel m({TensorType_INT8, {1, 3, 2}, -1.0, 1.0},
+                    {TensorType_INT8, {2}, -1.0, 1.0}, {1}, {1}, false);
+  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
+  EXPECT_THAT(
+      m.GetDequantizedOutput<int8_t>(),
       ElementsAreArray(ArrayFloatNear({0.294117, 0.2}, kQuantizedTolerance)));
 }
 
@@ -896,7 +984,20 @@
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
   EXPECT_THAT(
-      m.GetDequantizedOutput(),
+      m.GetDequantizedOutput<uint8_t>(),
+      ElementsAreArray(ArrayFloatNear({0.2, 0.3, 0.5}, kQuantizedTolerance)));
+}
+
+TEST(ConstInt8MinOpTest, KeepDims) {
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  MinOpConstModel m({TensorType_INT8, {3, 2}, -1.0, 1.0},
+                    {TensorType_INT8, {3}, -1.0, 1.0}, {1}, {1}, true);
+  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
+  EXPECT_THAT(
+      m.GetDequantizedOutput<int8_t>(),
       ElementsAreArray(ArrayFloatNear({0.2, 0.3, 0.5}, kQuantizedTolerance)));
 }
 
@@ -912,7 +1013,23 @@
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
   EXPECT_THAT(
-      m.GetDequantizedOutput(),
+      m.GetDequantizedOutput<uint8_t>(),
+      ElementsAreArray(ArrayFloatNear({-4.807843, -3.6}, kQuantizedTolerance)));
+}
+
+TEST(DynamicInt8MinOpTest, NotKeepDims) {
+  float kQuantizedTolerance = GetTolerance(-5.0, 2.0);
+  std::vector<float> data = {1.3, -4.8, -3.6, 0.24};
+  MinOpDynamicModel m({TensorType_INT8, {2, 2}, -5.0, 2.0},
+                      {TensorType_INT8, {2}, -5.0, 2.0},
+                      {TensorType_INT32, {1}}, false);
+  std::vector<int> axis = {1};
+  m.SetAxis(axis);
+  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
+  EXPECT_THAT(
+      m.GetDequantizedOutput<int8_t>(),
       ElementsAreArray(ArrayFloatNear({-4.807843, -3.6}, kQuantizedTolerance)));
 }
 
@@ -927,7 +1044,23 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(
+                  ArrayFloatNear({7.427451, -0.164706}, kQuantizedTolerance)));
+}
+
+TEST(DynamicInt8MinOpTest, KeepDims) {
+  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
+  std::vector<float> data = {11.14, -0.14, 7.423, 0.879};
+  MinOpDynamicModel m({TensorType_INT8, {2, 2}, -10.0, 12.0},
+                      {TensorType_INT8, {2}, -10.0, 12.0},
+                      {TensorType_INT32, {1}}, true);
+  std::vector<int> axis = {0};
+  m.SetAxis(axis);
+  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
               ElementsAreArray(
                   ArrayFloatNear({7.427451, -0.164706}, kQuantizedTolerance)));
 }
@@ -942,7 +1075,21 @@
   m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), IsEmpty());
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance)));
+}
+
+TEST(DynamicInt8MinOpTest, Scalar) {
+  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
+  std::vector<float> data = {11.14};
+  MinOpDynamicModel m({TensorType_INT8, {}, -10.0, 12.0},
+                      {TensorType_INT8, {}, -10.0, 12.0},
+                      {TensorType_INT32, {1}}, true);
+  std::vector<int> axis = {0};
+  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
               ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance)));
 }
 

diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc
index aad6deb..aa03580 100644
--- a/tensorflow/lite/kernels/register.cc
+++ b/tensorflow/lite/kernels/register.cc

@@ -25,6 +25,7 @@
 TfLiteRegistration* Register_MFCC();
 TfLiteRegistration* Register_DETECTION_POSTPROCESS();
 TfLiteRegistration* Register_IF();
+TfLiteRegistration* Register_WHILE();
 
 }  // namespace custom
 
@@ -104,6 +105,7 @@
 TfLiteRegistration* Register_SELECT();
 TfLiteRegistration* Register_SLICE();
 TfLiteRegistration* Register_SIN();
+TfLiteRegistration* Register_COS();
 TfLiteRegistration* Register_TRANSPOSE_CONV();
 TfLiteRegistration* Register_EXPAND_DIMS();
 TfLiteRegistration* Register_SPARSE_TO_DENSE();
@@ -112,6 +114,7 @@
 TfLiteRegistration* Register_SQRT();
 TfLiteRegistration* Register_RSQRT();
 TfLiteRegistration* Register_SHAPE();
+TfLiteRegistration* Register_RANK();
 TfLiteRegistration* Register_POW();
 TfLiteRegistration* Register_FAKE_QUANT();
 TfLiteRegistration* Register_PACK();
@@ -132,6 +135,8 @@
 TfLiteRegistration* Register_UNIQUE();
 TfLiteRegistration* Register_REVERSE_V2();
 TfLiteRegistration* Register_ADD_N();
+TfLiteRegistration* Register_GATHER_ND();
+TfLiteRegistration* Register_WHERE();
 
 TfLiteStatus UnsupportedTensorFlowOp(TfLiteContext* context, TfLiteNode* node) {
   context->ReportError(
@@ -164,13 +169,19 @@
   AddBuiltin(BuiltinOperator_ABS, Register_ABS());
   AddBuiltin(BuiltinOperator_RELU, Register_RELU());
   AddBuiltin(BuiltinOperator_RELU_N1_TO_1, Register_RELU_N1_TO_1());
-  AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
-  AddBuiltin(BuiltinOperator_TANH, Register_TANH());
-  AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC());
+  AddBuiltin(BuiltinOperator_RELU6, Register_RELU6(), /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_TANH, Register_TANH(), /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D(),
              /* min_version */ 1,
              /* max_version */ 2);
-  AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D());
+  AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_L2_POOL_2D, Register_L2_POOL_2D());
   AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(),
              /* min_version */ 1,
@@ -199,16 +210,22 @@
              Register_EMBEDDING_LOOKUP_SPARSE());
   AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED(),
              /* min_version */ 1,
-             /* max_version */ 3);
+             /* max_version */ 4);
   AddBuiltin(BuiltinOperator_LSH_PROJECTION, Register_LSH_PROJECTION());
   AddBuiltin(BuiltinOperator_HASHTABLE_LOOKUP, Register_HASHTABLE_LOOKUP());
   AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(),
              /* min_version */ 1,
              /* max_version */ 2);
   AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION());
-  AddBuiltin(BuiltinOperator_ADD, Register_ADD());
-  AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND());
-  AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND, Register_BATCH_TO_SPACE_ND());
+  AddBuiltin(BuiltinOperator_ADD, Register_ADD(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND, Register_BATCH_TO_SPACE_ND(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_MUL, Register_MUL());
   AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
   AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
@@ -221,65 +238,115 @@
   AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
              Register_UNIDIRECTIONAL_SEQUENCE_LSTM(), /* min_version */ 1,
              /* max_version */ 2);
-  AddBuiltin(BuiltinOperator_PAD, Register_PAD());
-  AddBuiltin(BuiltinOperator_PADV2, Register_PADV2());
+  AddBuiltin(BuiltinOperator_PAD, Register_PAD(), /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_PADV2, Register_PADV2(), /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
-  AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR());
+  AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
-             Register_RESIZE_NEAREST_NEIGHBOR());
+             Register_RESIZE_NEAREST_NEIGHBOR(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_SKIP_GRAM, Register_SKIP_GRAM());
-  AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH());
+  AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_GATHER, Register_GATHER(),
              /* min_version */ 1,
              /* max_version */ 2);
-  AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE());
+  AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_MEAN, Register_MEAN());
   AddBuiltin(BuiltinOperator_DIV, Register_DIV());
-  AddBuiltin(BuiltinOperator_SUB, Register_SUB());
-  AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT());
+  AddBuiltin(BuiltinOperator_SUB, Register_SUB(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), /* min_version */ 1,
+             /* max_version */ 3);
   AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V());
   AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE());
-  AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE());
+  AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_EXP, Register_EXP());
-  AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2());
+  AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_LOG, Register_LOG());
-  AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX());
+  AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_CAST, Register_CAST());
   AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(),
              /* min_version */ 1,
              /* max_version */ 2);
   AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
-  AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
-  AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
-  AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX());
-  AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN());
-  AddBuiltin(BuiltinOperator_GREATER, Register_GREATER());
-  AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL());
-  AddBuiltin(BuiltinOperator_LESS, Register_LESS());
-  AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL());
+  AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_LESS, Register_LESS(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR());
   AddBuiltin(BuiltinOperator_CEIL, Register_CEIL());
   AddBuiltin(BuiltinOperator_NEG, Register_NEG());
-  AddBuiltin(BuiltinOperator_SELECT, Register_SELECT());
-  AddBuiltin(BuiltinOperator_SLICE, Register_SLICE());
+  AddBuiltin(BuiltinOperator_SELECT, Register_SELECT(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_SLICE, Register_SLICE(), /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_SIN, Register_SIN());
+  AddBuiltin(BuiltinOperator_COS, Register_COS());
   AddBuiltin(BuiltinOperator_TRANSPOSE_CONV, Register_TRANSPOSE_CONV());
   AddBuiltin(BuiltinOperator_TILE, Register_TILE());
   AddBuiltin(BuiltinOperator_SUM, Register_SUM());
   AddBuiltin(BuiltinOperator_REDUCE_PROD, Register_REDUCE_PROD());
-  AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX());
-  AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN());
+  AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_REDUCE_ANY, Register_REDUCE_ANY());
   AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXPAND_DIMS());
   AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE());
-  AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL());
-  AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL());
+  AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
   AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT());
   AddBuiltin(BuiltinOperator_SHAPE, Register_SHAPE());
+  AddBuiltin(BuiltinOperator_RANK, Register_RANK());
   AddBuiltin(BuiltinOperator_POW, Register_POW());
   AddBuiltin(BuiltinOperator_FAKE_QUANT, Register_FAKE_QUANT(), 1, 2);
-  AddBuiltin(BuiltinOperator_PACK, Register_PACK());
+  AddBuiltin(BuiltinOperator_PACK, Register_PACK(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_ONE_HOT, Register_ONE_HOT());
   AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR());
   AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND());
@@ -297,6 +364,8 @@
   AddBuiltin(BuiltinOperator_UNIQUE, Register_UNIQUE());
   AddBuiltin(BuiltinOperator_REVERSE_V2, Register_REVERSE_V2());
   AddBuiltin(BuiltinOperator_ADD_N, Register_ADD_N());
+  AddBuiltin(BuiltinOperator_GATHER_ND, Register_GATHER_ND());
+  AddBuiltin(BuiltinOperator_WHERE, Register_WHERE());
 
   // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that
   // custom ops aren't always included by default.
@@ -308,6 +377,7 @@
 
   // WARNING: Control flow ops are experimental and subject to change.
   AddCustom("Experimental_If", tflite::ops::custom::Register_IF());
+  AddCustom("Experimental_While", tflite::ops::custom::Register_WHILE());
 }
 
 }  // namespace builtin

diff --git a/tensorflow/lite/kernels/resize_bilinear.cc b/tensorflow/lite/kernels/resize_bilinear.cc
index d42cb18..7383d03 100644
--- a/tensorflow/lite/kernels/resize_bilinear.cc
+++ b/tensorflow/lite/kernels/resize_bilinear.cc

@@ -109,6 +109,8 @@
     if (kernel_type == kGenericOptimized || kernel_type == kNeonOptimized) {
       TF_LITE_RESIZE_BILINEAR(optimized_ops, uint8_t);
     }
+  } else if (output->type == kTfLiteInt8) {
+    TF_LITE_RESIZE_BILINEAR(reference_ops, int8_t);
 #undef TF_LITE_RESIZE_BILINEAR
   } else {
     context->ReportError(context, "Output type is %d, requires float.",

diff --git a/tensorflow/lite/kernels/resize_bilinear_test.cc b/tensorflow/lite/kernels/resize_bilinear_test.cc
index d3f4837..a5ead9c 100644
--- a/tensorflow/lite/kernels/resize_bilinear_test.cc
+++ b/tensorflow/lite/kernels/resize_bilinear_test.cc

@@ -78,7 +78,7 @@
               ElementsAreArray(ArrayFloatNear({3, 5, 6})));
 }
 
-TEST(ResizeBilinearOpTest, HorizontalResize8Bit) {
+TEST(ResizeBilinearOpTest, HorizontalResizeUInt8) {
   ResizeBilinearOpModel m({TensorType_UINT8, {1, 1, 2, 1}});
   m.SetInput<uint8>({3, 6});
   m.SetSize({1, 3});
@@ -93,6 +93,21 @@
               ElementsAreArray(ArrayFloatNear({3, 5, 6})));
 }
 
+TEST(ResizeBilinearOpTest, HorizontalResizeInt8) {
+  ResizeBilinearOpModel m({TensorType_INT8, {1, 1, 2, 1}});
+  m.SetInput<int8_t>({3, 6});
+  m.SetSize({1, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear({3, 5, 6})));
+
+  ResizeBilinearOpModel const_m({TensorType_INT8, {1, 1, 2, 1}}, {1, 3});
+  const_m.SetInput<int8_t>({3, 6});
+  const_m.Invoke();
+  EXPECT_THAT(const_m.GetOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear({3, 5, 6})));
+}
+
 TEST(ResizeBilinearOpTest, VerticalResize) {
   ResizeBilinearOpModel m({TensorType_FLOAT32, {1, 2, 1, 1}});
   m.SetInput<float>({3, 9});
@@ -108,7 +123,7 @@
               ElementsAreArray(ArrayFloatNear({3, 7, 9})));
 }
 
-TEST(ResizeBilinearOpTest, VerticalResize8Bit) {
+TEST(ResizeBilinearOpTest, VerticalResizeUInt8) {
   ResizeBilinearOpModel m({TensorType_UINT8, {1, 2, 1, 1}});
   m.SetInput<uint8>({3, 9});
   m.SetSize({3, 1});
@@ -123,6 +138,21 @@
               ElementsAreArray(ArrayFloatNear({3, 7, 9})));
 }
 
+TEST(ResizeBilinearOpTest, VerticalResizeInt8) {
+  ResizeBilinearOpModel m({TensorType_INT8, {1, 2, 1, 1}});
+  m.SetInput<int8_t>({3, 9});
+  m.SetSize({3, 1});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear({3, 7, 9})));
+
+  ResizeBilinearOpModel const_m({TensorType_INT8, {1, 2, 1, 1}}, {3, 1});
+  const_m.SetInput<int8_t>({3, 9});
+  const_m.Invoke();
+  EXPECT_THAT(const_m.GetOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear({3, 7, 9})));
+}
+
 TEST(ResizeBilinearOpTest, TwoDimensionalResize) {
   ResizeBilinearOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}});
   m.SetInput<float>({
@@ -150,7 +180,7 @@
                                           })));
 }
 
-TEST(ResizeBilinearOpTest, TwoDimensionalResize8Bit) {
+TEST(ResizeBilinearOpTest, TwoDimensionalResizeUInt8) {
   ResizeBilinearOpModel m({TensorType_UINT8, {1, 2, 2, 1}});
   m.SetInput<uint8>({
       3, 6,  //
@@ -177,6 +207,33 @@
                                           })));
 }
 
+TEST(ResizeBilinearOpTest, TwoDimensionalResizeInt8) {
+  ResizeBilinearOpModel m({TensorType_INT8, {1, 2, 2, 1}});
+  m.SetInput<int8_t>({
+      3, 6,  //
+      9, 12  //
+  });
+  m.SetSize({3, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                         3, 5, 6,    //
+                                         7, 9, 10,   //
+                                         9, 11, 12,  //
+                                     })));
+
+  ResizeBilinearOpModel const_m({TensorType_INT8, {1, 2, 2, 1}}, {3, 3});
+  const_m.SetInput<int8_t>({
+      3, 6,  //
+      9, 12  //
+  });
+  const_m.Invoke();
+  EXPECT_THAT(const_m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                               3, 5, 6,    //
+                                               7, 9, 10,   //
+                                               9, 11, 12,  //
+                                           })));
+}
+
 TEST(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatches) {
   ResizeBilinearOpModel m({TensorType_FLOAT32, {2, 2, 2, 1}});
   m.SetInput<float>({
@@ -241,7 +298,7 @@
                                           })));
 }
 
-TEST(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatches8Bit) {
+TEST(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatchesUInt8) {
   ResizeBilinearOpModel m({TensorType_UINT8, {2, 2, 2, 1}});
   m.SetInput<uint8>({
       3, 6,   //
@@ -278,7 +335,44 @@
                                           })));
 }
 
-TEST(ResizeBilinearOpTest, ThreeDimensionalResize8Bit) {
+TEST(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatchesInt8) {
+  ResizeBilinearOpModel m({TensorType_INT8, {2, 2, 2, 1}});
+  m.SetInput<int8_t>({
+      3, 6,   //
+      9, 12,  //
+      4, 10,  //
+      12, 16  //
+  });
+  m.SetSize({3, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                         3, 5, 6,     //
+                                         7, 9, 10,    //
+                                         9, 11, 12,   //
+                                         4, 8, 10,    //
+                                         9, 12, 13,   //
+                                         12, 14, 16,  //
+                                     })));
+
+  ResizeBilinearOpModel const_m({TensorType_INT8, {2, 2, 2, 1}}, {3, 3});
+  const_m.SetInput<int8_t>({
+      3, 6,   //
+      9, 12,  //
+      4, 10,  //
+      12, 16  //
+  });
+  const_m.Invoke();
+  EXPECT_THAT(const_m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                               3, 5, 6,     //
+                                               7, 9, 10,    //
+                                               9, 11, 12,   //
+                                               4, 8, 10,    //
+                                               9, 12, 13,   //
+                                               12, 14, 16,  //
+                                           })));
+}
+
+TEST(ResizeBilinearOpTest, ThreeDimensionalResizeUInt8) {
   ResizeBilinearOpModel m({TensorType_UINT8, {1, 2, 2, 2}});
   m.SetInput<uint8>({
       3, 4, 6, 10,     //
@@ -304,6 +398,33 @@
                                               10, 12, 12, 14, 14, 16,  //
                                           })));
 }
+
+TEST(ResizeBilinearOpTest, ThreeDimensionalResizeInt8) {
+  ResizeBilinearOpModel m({TensorType_INT8, {1, 2, 2, 2}});
+  m.SetInput<int8_t>({
+      3, 4, 6, 10,     //
+      10, 12, 14, 16,  //
+  });
+  m.SetSize({3, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                         3, 4, 5, 8, 6, 10,       //
+                                         7, 9, 10, 12, 11, 13,    //
+                                         10, 12, 12, 14, 14, 16,  //
+                                     })));
+
+  ResizeBilinearOpModel const_m({TensorType_INT8, {1, 2, 2, 2}}, {3, 3});
+  const_m.SetInput<int8_t>({
+      3, 4, 6, 10,     //
+      10, 12, 14, 16,  //
+  });
+  const_m.Invoke();
+  EXPECT_THAT(const_m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                               3, 4, 5, 8, 6, 10,       //
+                                               7, 9, 10, 12, 11, 13,    //
+                                               10, 12, 12, 14, 14, 16,  //
+                                           })));
+}
 }  // namespace
 }  // namespace tflite
 

diff --git a/tensorflow/lite/kernels/resize_nearest_neighbor.cc b/tensorflow/lite/kernels/resize_nearest_neighbor.cc
index a48d800..3030a4f 100644
--- a/tensorflow/lite/kernels/resize_nearest_neighbor.cc
+++ b/tensorflow/lite/kernels/resize_nearest_neighbor.cc

@@ -106,8 +106,14 @@
           GetTensorShape(size), GetTensorData<int32>(size),
           GetTensorShape(output), GetTensorData<uint8_t>(output));
     }
+  } else if (output->type == kTfLiteInt8) {
+    reference_ops::ResizeNearestNeighbor(
+        op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
+        GetTensorShape(size), GetTensorData<int32>(size),
+        GetTensorShape(output), GetTensorData<int8_t>(output));
   } else {
-    context->ReportError(context, "Output type is %d, requires float or uint8.",
+    context->ReportError(context,
+                         "Output type is %d, requires float, uint8 or int8.",
                          output->type);
     return kTfLiteError;
   }

diff --git a/tensorflow/lite/kernels/resize_nearest_neighbor_test.cc b/tensorflow/lite/kernels/resize_nearest_neighbor_test.cc
index 03e2eff..63b4f13 100644
--- a/tensorflow/lite/kernels/resize_nearest_neighbor_test.cc
+++ b/tensorflow/lite/kernels/resize_nearest_neighbor_test.cc

@@ -79,7 +79,7 @@
               ElementsAreArray(ArrayFloatNear({3, 3, 6})));
 }
 
-TEST(ResizeNearestNeighborOpTest, HorizontalResize8Bit) {
+TEST(ResizeNearestNeighborOpTest, HorizontalResizeUInt8) {
   ResizeNearestNeighborOpModel m({TensorType_UINT8, {1, 1, 2, 1}});
   m.SetInput<uint8>({3, 6});
   m.SetSize({1, 3});
@@ -95,6 +95,21 @@
               ElementsAreArray(ArrayFloatNear({3, 3, 6})));
 }
 
+TEST(ResizeNearestNeighborOpTest, HorizontalResizeInt8) {
+  ResizeNearestNeighborOpModel m({TensorType_INT8, {1, 1, 2, 1}});
+  m.SetInput<int8_t>({-3, 6});
+  m.SetSize({1, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear({-3, -3, 6})));
+
+  ResizeNearestNeighborOpModel const_m({TensorType_INT8, {1, 1, 2, 1}}, {1, 3});
+  const_m.SetInput<int8_t>({-3, 6});
+  const_m.Invoke();
+  EXPECT_THAT(const_m.GetOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear({-3, -3, 6})));
+}
+
 TEST(ResizeNearestNeighborOpTest, VerticalResize) {
   ResizeNearestNeighborOpModel m({TensorType_FLOAT32, {1, 2, 1, 1}});
   m.SetInput<float>({3, 9});
@@ -111,7 +126,7 @@
               ElementsAreArray(ArrayFloatNear({3, 3, 9})));
 }
 
-TEST(ResizeNearestNeighborOpTest, VerticalResize8Bit) {
+TEST(ResizeNearestNeighborOpTest, VerticalResizeUInt8) {
   ResizeNearestNeighborOpModel m({TensorType_UINT8, {1, 2, 1, 1}});
   m.SetInput<uint8>({3, 9});
   m.SetSize({3, 1});
@@ -127,6 +142,21 @@
               ElementsAreArray(ArrayFloatNear({3, 3, 9})));
 }
 
+TEST(ResizeNearestNeighborOpTest, VerticalResizeInt8) {
+  ResizeNearestNeighborOpModel m({TensorType_INT8, {1, 2, 1, 1}});
+  m.SetInput<int8_t>({3, -9});
+  m.SetSize({3, 1});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear({3, 3, -9})));
+
+  ResizeNearestNeighborOpModel const_m({TensorType_INT8, {1, 2, 1, 1}}, {3, 1});
+  const_m.SetInput<int8_t>({3, -9});
+  const_m.Invoke();
+  EXPECT_THAT(const_m.GetOutput<int8_t>(),
+              ElementsAreArray(ArrayFloatNear({3, 3, -9})));
+}
+
 TEST(ResizeNearestNeighborOpTest, TwoDimensionalResize) {
   ResizeNearestNeighborOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}});
   m.SetInput<float>({
@@ -155,7 +185,7 @@
                                           })));
 }
 
-TEST(ResizeNearestNeighborOpTest, TwoDimensionalResize8Bit) {
+TEST(ResizeNearestNeighborOpTest, TwoDimensionalResizeUInt8) {
   ResizeNearestNeighborOpModel m({TensorType_UINT8, {1, 2, 2, 1}});
   m.SetInput<uint8>({
       3, 6,  //
@@ -183,6 +213,33 @@
                                           })));
 }
 
+TEST(ResizeNearestNeighborOpTest, TwoDimensionalResizeInt8) {
+  ResizeNearestNeighborOpModel m({TensorType_INT8, {1, 2, 2, 1}});
+  m.SetInput<int8_t>({
+      3, -6,  //
+      9, 12   //
+  });
+  m.SetSize({3, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                         3, 3, -6,  //
+                                         3, 3, -6,  //
+                                         9, 9, 12,  //
+                                     })));
+
+  ResizeNearestNeighborOpModel const_m({TensorType_INT8, {1, 2, 2, 1}}, {3, 3});
+  const_m.SetInput<int8_t>({
+      3, -6,  //
+      9, 12   //
+  });
+  const_m.Invoke();
+  EXPECT_THAT(const_m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                               3, 3, -6,  //
+                                               3, 3, -6,  //
+                                               9, 9, 12,  //
+                                           })));
+}
+
 TEST(ResizeNearestNeighborOpTest, TwoDimensionalResizeWithTwoBatches) {
   ResizeNearestNeighborOpModel m({TensorType_FLOAT32, {2, 2, 2, 1}});
   m.SetInput<float>({
@@ -249,7 +306,7 @@
                                           })));
 }
 
-TEST(ResizeNearestNeighborOpTest, TwoDimensionalResizeWithTwoBatches8Bit) {
+TEST(ResizeNearestNeighborOpTest, TwoDimensionalResizeWithTwoBatchesUInt8) {
   ResizeNearestNeighborOpModel m({TensorType_UINT8, {2, 2, 2, 1}});
   m.SetInput<uint8>({
       3, 6,   //
@@ -287,7 +344,44 @@
                                           })));
 }
 
-TEST(ResizeNearestNeighborOpTest, ThreeDimensionalResize8Bit) {
+TEST(ResizeNearestNeighborOpTest, TwoDimensionalResizeWithTwoBatchesInt8) {
+  ResizeNearestNeighborOpModel m({TensorType_INT8, {2, 2, 2, 1}});
+  m.SetInput<int8_t>({
+      3, 6,    //
+      9, -12,  //
+      -4, 10,  //
+      12, 16   //
+  });
+  m.SetSize({3, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                         3, 3, 6,     //
+                                         3, 3, 6,     //
+                                         9, 9, -12,   //
+                                         -4, -4, 10,  //
+                                         -4, -4, 10,  //
+                                         12, 12, 16,  //
+                                     })));
+
+  ResizeNearestNeighborOpModel const_m({TensorType_INT8, {2, 2, 2, 1}}, {3, 3});
+  const_m.SetInput<int8_t>({
+      3, 6,    //
+      9, -12,  //
+      -4, 10,  //
+      12, 16   //
+  });
+  const_m.Invoke();
+  EXPECT_THAT(const_m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                               3, 3, 6,     //
+                                               3, 3, 6,     //
+                                               9, 9, -12,   //
+                                               -4, -4, 10,  //
+                                               -4, -4, 10,  //
+                                               12, 12, 16,  //
+                                           })));
+}
+
+TEST(ResizeNearestNeighborOpTest, ThreeDimensionalResizeUInt8) {
   ResizeNearestNeighborOpModel m({TensorType_UINT8, {1, 2, 2, 2}});
   m.SetInput<uint8>({
       3, 4, 6, 10,     //
@@ -315,6 +409,33 @@
                                           })));
 }
 
+TEST(ResizeNearestNeighborOpTest, ThreeDimensionalResizeInt8) {
+  ResizeNearestNeighborOpModel m({TensorType_INT8, {1, 2, 2, 2}});
+  m.SetInput<int8_t>({
+      3, 4, -6, 10,     //
+      10, 12, -14, 16,  //
+  });
+  m.SetSize({3, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                         3, 4, 3, 4, -6, 10,       //
+                                         3, 4, 3, 4, -6, 10,       //
+                                         10, 12, 10, 12, -14, 16,  //
+                                     })));
+
+  ResizeNearestNeighborOpModel const_m({TensorType_INT8, {1, 2, 2, 2}}, {3, 3});
+  const_m.SetInput<int8_t>({
+      3, 4, -6, 10,     //
+      10, 12, -14, 16,  //
+  });
+  const_m.Invoke();
+  EXPECT_THAT(const_m.GetOutput<int8_t>(), ElementsAreArray(ArrayFloatNear({
+                                               3, 4, 3, 4, -6, 10,       //
+                                               3, 4, 3, 4, -6, 10,       //
+                                               10, 12, 10, 12, -14, 16,  //
+                                           })));
+}
+
 }  // namespace
 }  // namespace tflite
 

diff --git a/tensorflow/lite/kernels/select.cc b/tensorflow/lite/kernels/select.cc
index 4687ab4..d1c63d8 100644
--- a/tensorflow/lite/kernels/select.cc
+++ b/tensorflow/lite/kernels/select.cc

@@ -89,6 +89,9 @@
     case kTfLiteUInt8:                                                         \
       TF_LITE_SELECT(uint8_t, op);                                             \
       break;                                                                   \
+    case kTfLiteInt8:                                                          \
+      TF_LITE_SELECT(int8_t, op);                                              \
+      break;                                                                   \
     case kTfLiteInt16:                                                         \
       TF_LITE_SELECT(int16_t, op);                                             \
       break;                                                                   \

diff --git a/tensorflow/lite/kernels/select_test.cc b/tensorflow/lite/kernels/select_test.cc
index 5111300..d7cadeb 100644
--- a/tensorflow/lite/kernels/select_test.cc
+++ b/tensorflow/lite/kernels/select_test.cc

@@ -96,6 +96,19 @@
   EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4}));
 }
 
+TEST(SelectOpTest, SelectInt8) {
+  SelectOpModel model({1, 1, 1, 4}, {1, 1, 1, 4}, {1, 1, 1, 4},
+                      TensorType_INT8);
+
+  model.PopulateTensor<bool>(model.input1(), {false, true, false, false});
+  model.PopulateTensor<int8_t>(model.input2(), {1, -2, 3, 4});
+  model.PopulateTensor<int8_t>(model.input3(), {5, 6, 7, -8});
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput<int8_t>(), ElementsAreArray({5, -2, 7, -8}));
+  EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4}));
+}
+
 TEST(SelectOpTest, SelectInt16) {
   SelectOpModel model({1, 1, 1, 4}, {1, 1, 1, 4}, {1, 1, 1, 4},
                       TensorType_INT16);

diff --git a/tensorflow/lite/kernels/slice.cc b/tensorflow/lite/kernels/slice.cc
index 5fca7a3..650c65d 100644
--- a/tensorflow/lite/kernels/slice.cc
+++ b/tensorflow/lite/kernels/slice.cc

@@ -204,6 +204,9 @@
     case kTfLiteInt64:
       TF_LITE_SLICE(int64_t, kernel_type);
       break;
+    case kTfLiteInt8:
+      TF_LITE_SLICE(int8_t, kernel_type);
+      break;
     case kTfLiteUInt8:
       TF_LITE_SLICE(uint8_t, kernel_type);
       break;

diff --git a/tensorflow/lite/kernels/slice_test.cc b/tensorflow/lite/kernels/slice_test.cc
index 563329d..102218b 100644
--- a/tensorflow/lite/kernels/slice_test.cc
+++ b/tensorflow/lite/kernels/slice_test.cc

@@ -163,6 +163,28 @@
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 3, 5, 5, 5}));
 }
 
+TEST(SliceOpTest, SliceUint8) {
+  SliceOpModel<uint8_t, int32_t> m({3, 2, 3, 1}, {4}, {4}, TensorType_INT32,
+                                   TensorType_UINT8);
+  m.SetInput({1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6});
+  m.SetBegin({1, 0, 0, 0});
+  m.SetSize({2, 1, -1, 1});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 1, 3, 1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 3, 5, 5, 5}));
+}
+
+TEST(SliceOpTest, SliceInt8) {
+  SliceOpModel<int8_t, int32_t> m({3, 2, 3, 1}, {4}, {4}, TensorType_INT32,
+                                  TensorType_INT8);
+  m.SetInput({1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6});
+  m.SetBegin({1, 0, 0, 0});
+  m.SetSize({2, 1, -1, 1});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 1, 3, 1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 3, 5, 5, 5}));
+}
+
 }  // namespace
 }  // namespace tflite
 

diff --git a/tensorflow/lite/kernels/space_to_batch_nd.cc b/tensorflow/lite/kernels/space_to_batch_nd.cc
index 1c61b2e..2fb7198 100644
--- a/tensorflow/lite/kernels/space_to_batch_nd.cc
+++ b/tensorflow/lite/kernels/space_to_batch_nd.cc

@@ -141,6 +141,15 @@
                                   op_context.output->params.zero_point);
       }
       break;
+    case kTfLiteInt8:
+      if (kernel_type == kReference) {
+        TF_LITE_SPACE_TO_BATCH_ND(reference_ops, int8_t,
+                                  op_context.output->params.zero_point);
+      } else {
+        TF_LITE_SPACE_TO_BATCH_ND(optimized_ops, int8_t,
+                                  op_context.output->params.zero_point);
+      }
+      break;
     case kTfLiteInt32:
       if (kernel_type == kReference) {
         TF_LITE_SPACE_TO_BATCH_ND(reference_ops, int32_t, 0);

diff --git a/tensorflow/lite/kernels/space_to_batch_nd_test.cc b/tensorflow/lite/kernels/space_to_batch_nd_test.cc
index c5d6e9a..52a7798 100644
--- a/tensorflow/lite/kernels/space_to_batch_nd_test.cc
+++ b/tensorflow/lite/kernels/space_to_batch_nd_test.cc

@@ -31,8 +31,9 @@
     PopulateTensor<float>(input_, data);
   }
 
+  template <typename T>
   void SetQuantizedInput(std::initializer_list<float> data) {
-    QuantizeAndPopulate<uint8_t>(input_, data);
+    QuantizeAndPopulate<T>(input_, data);
   }
 
   void SetBlockShape(std::initializer_list<int> data) {
@@ -46,9 +47,10 @@
   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
   std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
 
+  template <typename T>
   std::vector<float> GetDequantizedOutput() {
-    return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
-                               GetScale(output_), GetZeroPoint(output_));
+    return Dequantize<T>(ExtractVector<T>(output_), GetScale(output_),
+                         GetZeroPoint(output_));
   }
 
  protected:
@@ -233,29 +235,62 @@
 }
 #endif
 
-TEST_F(QuantizedSpaceToBatchNDOpTest, SimplePaddingConstTest) {
+TEST_F(QuantizedSpaceToBatchNDOpTest, SimplePaddingConstTestUint8) {
   SpaceToBatchNDOpConstModel m({TensorType_UINT8, {1, 5, 2, 1}, -1.0, 1.0},
                                {3, 2}, {1, 0, 2, 0},
                                {TensorType_UINT8, {}, -1.0, 1.0});
-  m.SetQuantizedInput({-0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 0.1});
+  m.SetQuantizedInput<uint8_t>(
+      {-0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 0.1});
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({6, 2, 2, 1}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
               ElementsAreArray(DequantizedArrayNear(
                   {0, 0,   0, -0.5, 0, 0,    0, 0.6,  0, -0.1, 0, -0.7,
                    0, 0.2, 0, 0.8,  0, -0.3, 0, -0.9, 0, 0.4,  0, 0.1},
                   -1.0, 1.0)));
 }
 
-TEST_F(QuantizedSpaceToBatchNDOpTest, SimplePaddingDynamicTest) {
+TEST_F(QuantizedSpaceToBatchNDOpTest, SimplePaddingConstTestInt8) {
+  SpaceToBatchNDOpConstModel m({TensorType_INT8, {1, 5, 2, 1}, -1.0, 1.0},
+                               {3, 2}, {1, 0, 2, 0},
+                               {TensorType_INT8, {}, -1.0, 1.0});
+  m.SetQuantizedInput<int8_t>(
+      {-0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 0.1});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({6, 2, 2, 1}));
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
+              ElementsAreArray(DequantizedArrayNear(
+                  {0, 0,   0, -0.5, 0, 0,    0, 0.6,  0, -0.1, 0, -0.7,
+                   0, 0.2, 0, 0.8,  0, -0.3, 0, -0.9, 0, 0.4,  0, 0.1},
+                  -1.0, 1.0)));
+}
+
+TEST_F(QuantizedSpaceToBatchNDOpTest, SimplePaddingDynamicTestUint8) {
   SpaceToBatchNDOpDynamicModel m({TensorType_UINT8, {1, 5, 2, 1}, -1.0, 1.0},
                                  {TensorType_UINT8, {}, -1.0, 1.0});
-  m.SetQuantizedInput({-0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 0.1});
+  m.SetQuantizedInput<uint8_t>(
+      {-0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 0.1});
   m.SetBlockShape({3, 2});
   m.SetPaddings({1, 0, 2, 0});
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({6, 2, 2, 1}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(DequantizedArrayNear(
+                  {0, 0,   0, -0.5, 0, 0,    0, 0.6,  0, -0.1, 0, -0.7,
+                   0, 0.2, 0, 0.8,  0, -0.3, 0, -0.9, 0, 0.4,  0, 0.1},
+                  -1.0, 1.0)));
+}
+
+TEST_F(QuantizedSpaceToBatchNDOpTest, SimplePaddingDynamicTestInt8) {
+  SpaceToBatchNDOpDynamicModel m({TensorType_INT8, {1, 5, 2, 1}, -1.0, 1.0},
+                                 {TensorType_INT8, {}, -1.0, 1.0});
+  m.SetQuantizedInput<int8_t>(
+      {-0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 0.1});
+  m.SetBlockShape({3, 2});
+  m.SetPaddings({1, 0, 2, 0});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({6, 2, 2, 1}));
+  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
               ElementsAreArray(DequantizedArrayNear(
                   {0, 0,   0, -0.5, 0, 0,    0, 0.6,  0, -0.1, 0, -0.7,
                    0, 0.2, 0, 0.8,  0, -0.3, 0, -0.9, 0, 0.4,  0, 0.1},
@@ -266,10 +301,10 @@
   SpaceToBatchNDOpConstModel m({TensorType_UINT8, {1, 4, 2, 1}, -1.0, 1.0},
                                {3, 2}, {1, 1, 2, 4},
                                {TensorType_UINT8, {}, -1.0, 1.0});
-  m.SetQuantizedInput({-0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8});
+  m.SetQuantizedInput<uint8_t>({-0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8});
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({6, 2, 4, 1}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
               ElementsAreArray(DequantizedArrayNear(
                   {
                       0, 0,    0, 0, 0, -0.5, 0, 0, 0, 0,   0, 0, 0, 0.6, 0, 0,
@@ -282,12 +317,12 @@
 TEST_F(QuantizedSpaceToBatchNDOpTest, ComplexPaddingDynamicTest) {
   SpaceToBatchNDOpDynamicModel m({TensorType_UINT8, {1, 4, 2, 1}, -1.0, 1.0},
                                  {TensorType_UINT8, {}, -1.0, 1.0});
-  m.SetQuantizedInput({-0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8});
+  m.SetQuantizedInput<uint8_t>({-0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8});
   m.SetBlockShape({3, 2});
   m.SetPaddings({1, 1, 2, 4});
   m.Invoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({6, 2, 4, 1}));
-  EXPECT_THAT(m.GetDequantizedOutput(),
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
               ElementsAreArray(DequantizedArrayNear(
                   {
                       0, 0,    0, 0, 0, -0.5, 0, 0, 0, 0,   0, 0, 0, 0.6, 0, 0,

diff --git a/tensorflow/lite/kernels/space_to_depth.cc b/tensorflow/lite/kernels/space_to_depth.cc
index 79e28bf..cf6b0bd 100644
--- a/tensorflow/lite/kernels/space_to_depth.cc
+++ b/tensorflow/lite/kernels/space_to_depth.cc

@@ -50,7 +50,8 @@
   auto data_type = output->type;
   TF_LITE_ENSURE(context,
                  data_type == kTfLiteFloat32 || data_type == kTfLiteUInt8 ||
-                     data_type == kTfLiteInt32 || data_type == kTfLiteInt64);
+                     data_type == kTfLiteInt8 || data_type == kTfLiteInt32 ||
+                     data_type == kTfLiteInt64);
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
 
   const int block_size = params->block_size;
@@ -100,6 +101,13 @@
         TF_LITE_SPACE_TO_DEPTH(optimized_ops, uint8_t);
       }
       break;
+    case kTfLiteInt8:
+      if (kernel_type == kReference) {
+        TF_LITE_SPACE_TO_DEPTH(reference_ops, int8_t);
+      } else {
+        TF_LITE_SPACE_TO_DEPTH(optimized_ops, int8_t);
+      }
+      break;
     case kTfLiteInt32:
       if (kernel_type == kReference) {
         TF_LITE_SPACE_TO_DEPTH(reference_ops, int32_t);

diff --git a/tensorflow/lite/kernels/space_to_depth_test.cc b/tensorflow/lite/kernels/space_to_depth_test.cc
index 3fa8d86..58665fc 100644
--- a/tensorflow/lite/kernels/space_to_depth_test.cc
+++ b/tensorflow/lite/kernels/space_to_depth_test.cc

@@ -74,6 +74,14 @@
   EXPECT_THAT(m.GetOutputShape(), ElementsAre(1, 1, 1, 4));
 }
 
+TEST(SpaceToDepthOpModel, int8) {
+  SpaceToDepthOpModel m({TensorType_INT8, {1, 2, 2, 1}}, 2);
+  m.SetInput<int8_t>({1, 2, 3, 4});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray({1, 2, 3, 4}));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAre(1, 1, 1, 4));
+}
+
 TEST(SpaceToDepthOpModel, Int32) {
   SpaceToDepthOpModel m({TensorType_INT32, {1, 2, 2, 3}}, 2);
   m.SetInput<int32_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});

diff --git a/tensorflow/lite/kernels/split.cc b/tensorflow/lite/kernels/split.cc
index 7902ed2..c0f701f 100644
--- a/tensorflow/lite/kernels/split.cc
+++ b/tensorflow/lite/kernels/split.cc

@@ -76,9 +76,10 @@
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), op_context.params->num_splits);
 
   auto input_type = op_context.input->type;
-  TF_LITE_ENSURE(context, input_type == kTfLiteFloat32 ||
-                              input_type == kTfLiteUInt8 ||
-                              input_type == kTfLiteInt16);
+  TF_LITE_ENSURE(context,
+                 input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
+                     input_type == kTfLiteInt8 || input_type == kTfLiteInt16 ||
+                     input_type == kTfLiteInt32);
   for (int i = 0; i < NumOutputs(node); ++i) {
     GetOutput(context, node, i)->type = input_type;
   }
@@ -137,15 +138,23 @@
       TF_LITE_SPLIT(uint8_t);
       break;
     }
+    case kTfLiteInt8: {
+      TF_LITE_SPLIT(int8_t);
+      break;
+    }
     case kTfLiteInt16: {
       TF_LITE_SPLIT(int16_t);
       break;
     }
+    case kTfLiteInt32: {
+      TF_LITE_SPLIT(int32_t);
+      break;
+    }
     default:
-      context->ReportError(
-          context,
-          "Only float32, uint8 and int16 are currently supported, got %d.",
-          op_context.input->type);
+      context->ReportError(context,
+                           "Only float32, uint8, int8, int16 and int32 are "
+                           "currently supported, got %d.",
+                           op_context.input->type);
       return kTfLiteError;
   }
 #undef TF_LITE_SPLIT

diff --git a/tensorflow/lite/kernels/split_test.cc b/tensorflow/lite/kernels/split_test.cc
index f3d9ea3..fa313d4 100644
--- a/tensorflow/lite/kernels/split_test.cc
+++ b/tensorflow/lite/kernels/split_test.cc

@@ -47,13 +47,15 @@
     }
   }
 
-  void SetInput(std::initializer_list<float> data) {
+  template <typename T>
+  void SetInput(std::initializer_list<T> data) {
     PopulateTensor(input_, data);
   }
   void SetAxis(int axis) { PopulateTensor(axis_, {axis}); }
 
-  std::vector<float> GetOutput(int i) {
-    return ExtractVector<float>(outputs_[i]);
+  template <typename T>
+  std::vector<T> GetOutput(int i) {
+    return ExtractVector<T>(outputs_[i]);
   }
   std::vector<int> GetOutputShape(int i) { return GetTensorShape(outputs_[i]); }
 
@@ -63,33 +65,34 @@
   std::vector<int> outputs_;
 };
 
-using TensorValues = std::initializer_list<float>;
-
+template <typename T>
 void Check(int axis, int num_splits, std::initializer_list<int> input_shape,
            std::initializer_list<int> output_shape,
-           const TensorValues& input_data,
-           const std::vector<TensorValues>& output_data) {
+           const std::initializer_list<T>& input_data,
+           const std::vector<std::initializer_list<T>>& output_data,
+           const TensorType& type = TensorType_FLOAT32) {
   auto debug = [&](int i) {
     std::stringstream ss;
     ss << "for output tensor " << i << " axis=" << axis
        << " and num_splits=" << num_splits;
     return ss.str();
   };
-  SplitOpModel m({TensorType_FLOAT32, input_shape}, num_splits);
+  SplitOpModel m({type, input_shape}, num_splits);
   m.SetInput(input_data);
   m.SetAxis(axis);
   m.Invoke();
   for (int i = 0; i < num_splits; ++i) {
-    EXPECT_THAT(m.GetOutput(i), ElementsAreArray(output_data[i])) << debug(i);
+    EXPECT_THAT(m.GetOutput<T>(i), ElementsAreArray(output_data[i]))
+        << debug(i);
     EXPECT_THAT(m.GetOutputShape(i), ElementsAreArray(output_shape))
         << debug(i);
   }
 
-  SplitOpModel const_m({TensorType_FLOAT32, input_shape}, num_splits, axis);
+  SplitOpModel const_m({type, input_shape}, num_splits, axis);
   const_m.SetInput(input_data);
   const_m.Invoke();
   for (int i = 0; i < num_splits; ++i) {
-    EXPECT_THAT(const_m.GetOutput(i), ElementsAreArray(output_data[i]))
+    EXPECT_THAT(const_m.GetOutput<T>(i), ElementsAreArray(output_data[i]))
         << debug(i);
     EXPECT_THAT(const_m.GetOutputShape(i), ElementsAreArray(output_shape))
         << debug(i);
@@ -97,44 +100,106 @@
 }
 
 TEST(SplitOpTest, FourDimensional) {
-  Check(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
-        {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-        {
-            {1, 2, 3, 4, 5, 6, 7, 8},
-            {9, 10, 11, 12, 13, 14, 15, 16},
-        });
-  Check(/*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
-        {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-        {
-            {1, 2, 3, 4, 9, 10, 11, 12},
-            {5, 6, 7, 8, 13, 14, 15, 16},
-        });
-  Check(/*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
-        {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-        {
-            {1, 2, 5, 6, 9, 10, 13, 14},
-            {3, 4, 7, 8, 11, 12, 15, 16},
-        });
-  Check(/*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
-        {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-        {
-            {1, 3, 5, 7, 9, 11, 13, 15},
-            {2, 4, 6, 8, 10, 12, 14, 16},
-        });
+  Check<float>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+               {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+               {
+                   {1, 2, 3, 4, 5, 6, 7, 8},
+                   {9, 10, 11, 12, 13, 14, 15, 16},
+               });
+  Check<float>(/*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
+               {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+               {
+                   {1, 2, 3, 4, 9, 10, 11, 12},
+                   {5, 6, 7, 8, 13, 14, 15, 16},
+               });
+  Check<float>(/*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
+               {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+               {
+                   {1, 2, 5, 6, 9, 10, 13, 14},
+                   {3, 4, 7, 8, 11, 12, 15, 16},
+               });
+  Check<float>(/*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
+               {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+               {
+                   {1, 3, 5, 7, 9, 11, 13, 15},
+                   {2, 4, 6, 8, 10, 12, 14, 16},
+               });
+}
+
+TEST(SplitOpTest, FourDimensionalInt8) {
+  Check<int8_t>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+                {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                {
+                    {1, 2, 3, 4, 5, 6, 7, 8},
+                    {9, 10, 11, 12, 13, 14, 15, 16},
+                },
+                TensorType_INT8);
+  Check<int8_t>(/*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
+                {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                {
+                    {1, 2, 3, 4, 9, 10, 11, 12},
+                    {5, 6, 7, 8, 13, 14, 15, 16},
+                },
+                TensorType_INT8);
+  Check<int8_t>(/*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
+                {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                {
+                    {1, 2, 5, 6, 9, 10, 13, 14},
+                    {3, 4, 7, 8, 11, 12, 15, 16},
+                },
+                TensorType_INT8);
+  Check<int8_t>(/*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
+                {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                {
+                    {1, 3, 5, 7, 9, 11, 13, 15},
+                    {2, 4, 6, 8, 10, 12, 14, 16},
+                },
+                TensorType_INT8);
+}
+
+TEST(SplitOpTest, FourDimensionalInt32) {
+  Check<int32_t>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+                 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                 {
+                     {1, 2, 3, 4, 5, 6, 7, 8},
+                     {9, 10, 11, 12, 13, 14, 15, 16},
+                 },
+                 TensorType_INT32);
+  Check<int32_t>(/*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
+                 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                 {
+                     {1, 2, 3, 4, 9, 10, 11, 12},
+                     {5, 6, 7, 8, 13, 14, 15, 16},
+                 },
+                 TensorType_INT32);
+  Check<int32_t>(/*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
+                 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                 {
+                     {1, 2, 5, 6, 9, 10, 13, 14},
+                     {3, 4, 7, 8, 11, 12, 15, 16},
+                 },
+                 TensorType_INT32);
+  Check<int32_t>(/*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
+                 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                 {
+                     {1, 3, 5, 7, 9, 11, 13, 15},
+                     {2, 4, 6, 8, 10, 12, 14, 16},
+                 },
+                 TensorType_INT32);
 }
 
 TEST(SplitOpTest, OneDimensional) {
-  Check(/*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
-        {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
+  Check<float>(/*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
+               {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
 }
 
 TEST(SplitOpTest, NegativeAxis) {
-  Check(/*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
-        {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-        {
-            {1, 2, 3, 4, 5, 6, 7, 8},
-            {9, 10, 11, 12, 13, 14, 15, 16},
-        });
+  Check<float>(/*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+               {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+               {
+                   {1, 2, 3, 4, 5, 6, 7, 8},
+                   {9, 10, 11, 12, 13, 14, 15, 16},
+               });
 }
 
 }  // namespace

diff --git a/tensorflow/lite/kernels/strided_slice.cc b/tensorflow/lite/kernels/strided_slice.cc
index c797a98..8c25ffa 100644
--- a/tensorflow/lite/kernels/strided_slice.cc
+++ b/tensorflow/lite/kernels/strided_slice.cc

@@ -234,6 +234,11 @@
         TF_LITE_STRIDED_SLICE(reference_ops, uint8_t);
       }
       break;
+    case kTfLiteInt8:
+      if (kernel_type == kReference) {
+        TF_LITE_STRIDED_SLICE(reference_ops, int8_t);
+      }
+      break;
     default:
       context->ReportError(context,
                            "Type %d is currently not supported "

diff --git a/tensorflow/lite/kernels/strided_slice_test.cc b/tensorflow/lite/kernels/strided_slice_test.cc
index 34875bf..cac9e16 100644
--- a/tensorflow/lite/kernels/strided_slice_test.cc
+++ b/tensorflow/lite/kernels/strided_slice_test.cc

@@ -577,6 +577,18 @@
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 2}));
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6}));
 }
+
+TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis1int8) {
+  StridedSliceOpModel<int8_t, TensorType_INT8> m({2, 3, 2}, {3}, {3}, {3}, 0, 0,
+                                                 0, 0, 1);
+  m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+  m.SetBegin({0, 0, 0});
+  m.SetEnd({1, 3, 2});
+  m.SetStrides({1, 1, 1});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 2}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6}));
+}
 }  // namespace
 }  // namespace tflite
 

diff --git a/tensorflow/lite/kernels/sub.cc b/tensorflow/lite/kernels/sub.cc
index 9144144..8bd6052 100644
--- a/tensorflow/lite/kernels/sub.cc
+++ b/tensorflow/lite/kernels/sub.cc

@@ -12,10 +12,12 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <limits>
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/c_api_internal.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
 #include "tensorflow/lite/kernels/internal/tensor.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
@@ -68,21 +70,39 @@
   delete reinterpret_cast<OpData*>(buffer);
 }
 
-TfLiteStatus PrepareUint8SubOp(TfLiteContext* context,
-                               const TfLiteTensor* input_1,
-                               const TfLiteTensor* input_2,
-                               TfLiteTensor* output, TfLiteSubParams* params,
-                               OpData* op_params, int op_sign) {
+TfLiteStatus Prepare8BitSubOp(TfLiteContext* context,
+                              const TfLiteTensor* input_1,
+                              const TfLiteTensor* input_2, TfLiteTensor* output,
+                              TfLiteSubParams* params, OpData* op_params,
+                              int op_sign) {
+  TF_LITE_ENSURE(context,
+                 output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
   const auto& input1_quantization_params = input_1->params;
   const auto& input2_quantization_params = input_2->params;
   const auto& output_quantization_params = output->params;
+  int32_t integer_type_min = 0;
+  int32_t integer_type_max = 0;
+  if (output->type == kTfLiteUInt8) {
+    integer_type_min = std::numeric_limits<uint8_t>::min();
+    integer_type_max = std::numeric_limits<uint8_t>::max();
+  } else {
+    // output->type == kTfLiteInt8
+    integer_type_min = std::numeric_limits<int8_t>::min();
+    integer_type_max = std::numeric_limits<int8_t>::max();
+  }
 
-  TF_LITE_ENSURE(context, input1_quantization_params.zero_point >= 0);
-  TF_LITE_ENSURE(context, input1_quantization_params.zero_point <= 255);
-  TF_LITE_ENSURE(context, input2_quantization_params.zero_point >= 0);
-  TF_LITE_ENSURE(context, input2_quantization_params.zero_point <= 255);
-  TF_LITE_ENSURE(context, output_quantization_params.zero_point >= 0);
-  TF_LITE_ENSURE(context, output_quantization_params.zero_point <= 255);
+  TF_LITE_ENSURE(context,
+                 input1_quantization_params.zero_point >= integer_type_min);
+  TF_LITE_ENSURE(context,
+                 input1_quantization_params.zero_point <= integer_type_max);
+  TF_LITE_ENSURE(context,
+                 input2_quantization_params.zero_point >= integer_type_min);
+  TF_LITE_ENSURE(context,
+                 input2_quantization_params.zero_point <= integer_type_max);
+  TF_LITE_ENSURE(context,
+                 output_quantization_params.zero_point >= integer_type_min);
+  TF_LITE_ENSURE(context,
+                 output_quantization_params.zero_point <= integer_type_max);
 
   op_params->input1_offset = -input1_quantization_params.zero_point;
   op_params->input2_offset = -input2_quantization_params.zero_point;
@@ -109,10 +129,15 @@
   tflite::QuantizeMultiplierSmallerThanOneExp(real_output_multiplier,
                                               &op_params->output_multiplier,
                                               &op_params->output_shift);
-
-  CalculateActivationRangeUint8(params->activation, output,
-                                &op_params->output_activation_min,
-                                &op_params->output_activation_max);
+  if (output->type == kTfLiteUInt8) {
+    CalculateActivationRangeUint8(params->activation, output,
+                                  &op_params->output_activation_min,
+                                  &op_params->output_activation_max);
+  } else {
+    CalculateActivationRangeInt8(params->activation, output,
+                                 &op_params->output_activation_min,
+                                 &op_params->output_activation_max);
+  }
   return kTfLiteOk;
 }
 
@@ -186,9 +211,9 @@
     output_size = TfLiteIntArrayCopy(input1->dims);
   }
 
-  if (output->type == kTfLiteUInt8) {
-    TF_LITE_ENSURE_OK(context, PrepareUint8SubOp(context, input1, input2,
-                                                 output, params, data, -1));
+  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+    TF_LITE_ENSURE_OK(context, Prepare8BitSubOp(context, input1, input2, output,
+                                                params, data, -1));
   } else if (output->type == kTfLiteInt16) {
     TF_LITE_ENSURE_OK(context, PrepareInt16SubOp(context, input1, input2,
                                                  output, params, data));
@@ -271,9 +296,15 @@
                GetTensorData<data_type>(input1), GetTensorShape(input2), \
                GetTensorData<data_type>(input2), GetTensorShape(output), \
                GetTensorData<data_type>(output))
-  if (output->type == kTfLiteUInt8) {
     // NOTE: We are using the add kernels. This is possible as the second values
     // multiplier is negated before being passed down.
+  if (output->type == kTfLiteInt8) {
+    if (need_broadcast) {
+      TF_LITE_SUB(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
+    } else {
+      TF_LITE_SUB(reference_integer_ops, Add, int8_t);
+    }
+  } else if (output->type == kTfLiteUInt8) {
     if (kernel_type == kReference) {
       if (need_broadcast) {
         TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, uint8_t);
@@ -319,7 +350,8 @@
 
   if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
     EvalSub<kernel_type>(context, node, params, data, input1, input2, output);
-  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
+  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
+             output->type == kTfLiteInt16) {
     EvalQuantized<kernel_type>(context, node, params, data, input1, input2,
                                output);
   } else {

diff --git a/tensorflow/lite/kernels/sub_test.cc b/tensorflow/lite/kernels/sub_test.cc
index 23927c6..3c19678 100644
--- a/tensorflow/lite/kernels/sub_test.cc
+++ b/tensorflow/lite/kernels/sub_test.cc

@@ -63,9 +63,10 @@
  public:
   using BaseSubOpModel::BaseSubOpModel;
 
+  template <typename integer_dtype>
   std::vector<float> GetDequantizedOutput() {
-    return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
-                               GetScale(output_), GetZeroPoint(output_));
+    return Dequantize<integer_dtype>(ExtractVector<integer_dtype>(output_),
+                                     GetScale(output_), GetZeroPoint(output_));
   }
 
   std::vector<float> GetDequantizedOutputInt16() {
@@ -74,17 +75,15 @@
   }
 };
 
-// for quantized Sub, the error shouldn't exceed 2*step
+// for quantized Sub, the error shouldn't exceed step
 float GetTolerance(int min, int max) {
   float kQuantizedStep = (max - min) / 255.0;
-  float kQuantizedTolerance = 2.0 * kQuantizedStep;
-  return kQuantizedTolerance;
+  return kQuantizedStep;
 }
 
 float GetToleranceInt16(float min, float max) {
   float kQuantizedStep = (max - min) / std::numeric_limits<int16_t>::max();
-  float kQuantizedTolerance = 2.0 * kQuantizedStep;
-  return kQuantizedTolerance;
+  return kQuantizedStep;
 }
 
 TEST(FloatSubOpModel, NoActivation) {
@@ -194,7 +193,8 @@
   }
 }
 
-TEST(QuantizedSubOpModel, QuantizedTestsNoActivation) {
+template <TensorType tensor_type, typename integer_dtype>
+void QuantizedTestsNoActivation() {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
   std::vector<std::vector<float>> inputs1 = {
       {0.1, 0.2, 0.3, 0.4}, {-0.2, 0.2, 0.4, 0.7}, {-0.01, 0.2, 0.7, 0.3}};
@@ -204,20 +204,30 @@
                                              {-0.8, -0.2, -0.1, 0.9},
                                              {-0.61, -0.2, 0.88, -0.2}};
   for (int i = 0; i < inputs1.size(); ++i) {
-    QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                          {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                          {TensorType_UINT8, {}, -1.0, 1.0},
+    QuantizedSubOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                          {tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                          {tensor_type, {}, -1.0, 1.0},
                           ActivationFunctionType_NONE);
-    m.QuantizeAndPopulate<uint8_t>(m.input1(), inputs1[i]);
-    m.QuantizeAndPopulate<uint8_t>(m.input2(), inputs2[i]);
+    m.QuantizeAndPopulate<integer_dtype>(m.input1(), inputs1[i]);
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(), inputs2[i]);
     m.Invoke();
-    EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear(
-                                              results[i], kQuantizedTolerance)))
+    EXPECT_THAT(
+        m.GetDequantizedOutput<integer_dtype>(),
+        ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance)))
         << "With test number " << i;
   }
 }
 
-TEST(QuantizedSubOpModel, QuantizedTestsActivationRELU_N1_TO_1) {
+TEST(QuantizedSubOpModel, QuantizedTestsNoActivationUInt8) {
+  QuantizedTestsNoActivation<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt8) {
+  QuantizedTestsNoActivation<TensorType_INT8, int8_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void QuantizedTestsActivationRELU_N1_TO_1() {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
   std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
                                              {-0.8, 0.2, 0.7, 0.5}};
@@ -226,57 +236,85 @@
   std::vector<std::vector<float>> results = {{-1.0, -0.2, 0.0, 1.0},
                                              {-1.0, -0.2, 1.0, 0.2}};
   for (int i = 0; i < inputs1.size(); ++i) {
-    QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                          {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
-                          {TensorType_UINT8, {}, -1.0, 1.0},
+    QuantizedSubOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                          {tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                          {tensor_type, {}, -1.0, 1.0},
                           ActivationFunctionType_RELU_N1_TO_1);
-    m.QuantizeAndPopulate<uint8_t>(m.input1(), inputs1[i]);
-    m.QuantizeAndPopulate<uint8_t>(m.input2(), inputs2[i]);
+    m.QuantizeAndPopulate<integer_dtype>(m.input1(), inputs1[i]);
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(), inputs2[i]);
     m.Invoke();
-    EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear(
-                                              results[i], kQuantizedTolerance)))
+    EXPECT_THAT(
+        m.GetDequantizedOutput<integer_dtype>(),
+        ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance)))
         << "With test number " << i;
   }
 }
+TEST(QuantizedSubOpModel, QuantizedTestsActivationRELUN1TO1UInt8) {
+  QuantizedTestsActivationRELU_N1_TO_1<TensorType_UINT8, uint8_t>();
+}
 
-TEST(QuantizedSubOpModel, QuantizedVariousInputShapes) {
+TEST(QuantizedSubOpModel, QuantizedTestsActivationRELUN1TO1Int8) {
+  QuantizedTestsActivationRELU_N1_TO_1<TensorType_INT8, int8_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void QuantizedVariousInputShapes() {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
   std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
-    QuantizedSubOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
-                          {TensorType_UINT8, test_shapes[i], -3.0, 3.0},
-                          {TensorType_UINT8, {}, -3.0, 3.0},
+    QuantizedSubOpModel m({tensor_type, test_shapes[i], -3.0, 3.0},
+                          {tensor_type, test_shapes[i], -3.0, 3.0},
+                          {tensor_type, {}, -3.0, 3.0},
                           ActivationFunctionType_NONE);
-    m.QuantizeAndPopulate<uint8_t>(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
-    m.QuantizeAndPopulate<uint8_t>(m.input2(), {0.1, 0.3, 0.3, 0.5, 1.1, 0.1});
+    m.QuantizeAndPopulate<integer_dtype>(m.input1(),
+                                         {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(),
+                                         {0.1, 0.3, 0.3, 0.5, 1.1, 0.1});
     m.Invoke();
-    EXPECT_THAT(m.GetDequantizedOutput(),
+    EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
                 ElementsAreArray(ArrayFloatNear(
                     {-2.1, -0.1, 0.4, 0.3, 0.0, 1.9}, kQuantizedTolerance)))
         << "With shape number " << i;
   }
 }
 
-TEST(QuantizedSubOpModel, QuantizedWithBroadcast) {
+TEST(QuantizedSubOpModel, QuantizedVariousInputShapesUInt8) {
+  QuantizedVariousInputShapes<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt8) {
+  QuantizedVariousInputShapes<TensorType_INT8, int8_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void QuantizedWithBroadcast() {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
   std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
-    QuantizedSubOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
-                          {TensorType_UINT8, {}, -3.0, 3.0},
-                          {TensorType_UINT8, {}, -3.0, 3.0},
-                          ActivationFunctionType_NONE);
-    m.QuantizeAndPopulate<uint8_t>(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
-    m.QuantizeAndPopulate<uint8_t>(m.input2(), {0.7});
+    QuantizedSubOpModel m(
+        {tensor_type, test_shapes[i], -3.0, 3.0}, {tensor_type, {}, -3.0, 3.0},
+        {tensor_type, {}, -3.0, 3.0}, ActivationFunctionType_NONE);
+    m.QuantizeAndPopulate<integer_dtype>(m.input1(),
+                                         {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(), {0.7});
     m.Invoke();
-    EXPECT_THAT(m.GetDequantizedOutput(),
+    EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
                 ElementsAreArray(ArrayFloatNear(
                     {-2.7, -0.5, 0.0, 0.1, 0.4, 1.3}, kQuantizedTolerance)))
         << "With shape number " << i;
   }
 }
 
+TEST(QuantizedSubOpModel, QuantizedWithBroadcastUInt8) {
+  QuantizedWithBroadcast<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt8) {
+  QuantizedWithBroadcast<TensorType_INT8, int8_t>();
+}
+
 TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) {
   const float kMin = -1.f;
   const float kMax =

diff --git a/tensorflow/lite/kernels/subgraph_test_util.cc b/tensorflow/lite/kernels/subgraph_test_util.cc
new file mode 100644
index 0000000..e712be1
--- /dev/null
+++ b/tensorflow/lite/kernels/subgraph_test_util.cc

@@ -0,0 +1,409 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/subgraph_test_util.h"
+
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
+#include "tensorflow/lite/core/subgraph.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/kernels/test_util.h"
+#include "tensorflow/lite/model.h"
+
+namespace tflite {
+
+namespace ops {
+namespace builtin {
+// ADD and MUL are used to test simple branch.
+TfLiteRegistration* Register_ADD();
+TfLiteRegistration* Register_MUL();
+// ADD and MUL are used to test dynamic sized subgraphs.
+TfLiteRegistration* Register_PAD();
+TfLiteRegistration* Register_LESS_EQUAL();
+}  // namespace builtin
+namespace custom {
+TfLiteRegistration* Register_IF();
+TfLiteRegistration* Register_WHILE();
+}  // namespace custom
+}  // namespace ops
+
+namespace subgraph_test_util {
+
+namespace {
+
+void SetupTensor(Subgraph* subgraph, int tensor_index, TfLiteType type) {
+  ASSERT_EQ(subgraph->SetTensorParametersReadWrite(tensor_index, type, "", 0,
+                                                   nullptr, {}, false),
+            kTfLiteOk);
+}
+
+}  // namespace
+
+SubgraphBuilder::~SubgraphBuilder() {
+  for (auto buffer : buffers_) {
+    free(buffer);
+  }
+}
+
+void SubgraphBuilder::BuildAddSubgraph(Subgraph* subgraph) {
+  const int kInput1 = 0;
+  const int kInput2 = 1;
+  const int kOutput = 2;
+  const int kTensorCount = 3;
+  // kInput1(0) --> +---+
+  //                |ADD| --> kOutput(2)
+  // kInput2(1) --> +---+
+
+  int first_new_tensor_index;
+  ASSERT_EQ(subgraph->AddTensors(kTensorCount, &first_new_tensor_index),
+            kTfLiteOk);
+  ASSERT_EQ(first_new_tensor_index, 0);
+  ASSERT_EQ(subgraph->SetInputs({kInput1, kInput2}), kTfLiteOk);
+  ASSERT_EQ(subgraph->SetOutputs({kOutput}), kTfLiteOk);
+
+  SetupTensor(subgraph, kInput1, kTfLiteInt32);
+  SetupTensor(subgraph, kInput2, kTfLiteInt32);
+  SetupTensor(subgraph, kOutput, kTfLiteInt32);
+
+  TfLiteAddParams* params =
+      reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
+  params->activation = kTfLiteActNone;
+  int node_index;
+  subgraph->AddNodeWithParameters(
+      {kInput1, kInput2}, {kOutput}, nullptr, 0, params,
+      ::tflite::ops::builtin::Register_ADD(), &node_index);
+}
+
+// Build a subgraph with an mul op. Helper function for testing.
+void SubgraphBuilder::BuildMulSubgraph(Subgraph* subgraph) {
+  const int kInput1 = 0;
+  const int kInput2 = 1;
+  const int kOutput = 2;
+  const int kTensorCount = 3;
+  // kInput1(0) --> +---+
+  //                |MUL| --> kOutput(2)
+  // kInput2(1) --> +---+
+
+  int first_new_tensor_index;
+  ASSERT_EQ(subgraph->AddTensors(kTensorCount, &first_new_tensor_index),
+            kTfLiteOk);
+  ASSERT_EQ(first_new_tensor_index, 0);
+  ASSERT_EQ(subgraph->SetInputs({kInput1, kInput2}), kTfLiteOk);
+  ASSERT_EQ(subgraph->SetOutputs({kOutput}), kTfLiteOk);
+
+  SetupTensor(subgraph, kInput1, kTfLiteInt32);
+  SetupTensor(subgraph, kInput2, kTfLiteInt32);
+  SetupTensor(subgraph, kOutput, kTfLiteInt32);
+
+  TfLiteMulParams* params =
+      reinterpret_cast<TfLiteMulParams*>(malloc(sizeof(TfLiteMulParams)));
+  params->activation = kTfLiteActNone;
+  int node_index;
+  subgraph->AddNodeWithParameters(
+      {kInput1, kInput2}, {kOutput}, nullptr, 0, params,
+      ::tflite::ops::builtin::Register_MUL(), &node_index);
+}
+
+// Build a subgraph with a pad op. Helper function for testing.
+void SubgraphBuilder::BuildPadSubgraph(Subgraph* subgraph) {
+  const int kInput1 = 0;
+  const int kInput2 = 1;
+  const int kOutput = 2;
+  const int kTensorCount = 3;
+  // kInput1(0) --> +---+
+  //                |PAD| --> kOutput(2)
+  // kInput2(1) --> +---+
+
+  int first_new_tensor_index;
+  ASSERT_EQ(subgraph->AddTensors(kTensorCount, &first_new_tensor_index),
+            kTfLiteOk);
+  ASSERT_EQ(first_new_tensor_index, 0);
+  ASSERT_EQ(subgraph->SetInputs({kInput1, kInput2}), kTfLiteOk);
+  ASSERT_EQ(subgraph->SetOutputs({kOutput}), kTfLiteOk);
+
+  SetupTensor(subgraph, kInput1, kTfLiteInt32);
+  SetupTensor(subgraph, kInput2, kTfLiteInt32);
+  SetupTensor(subgraph, kOutput, kTfLiteInt32);
+
+  TfLitePadParams* params =
+      reinterpret_cast<TfLitePadParams*>(malloc(sizeof(TfLitePadParams)));
+  int node_index;
+  subgraph->AddNodeWithParameters(
+      {kInput1, kInput2}, {kOutput}, nullptr, 0, params,
+      ::tflite::ops::builtin::Register_PAD(), &node_index);
+}
+
+void SubgraphBuilder::BuildIfSubgraph(Subgraph* subgraph) {
+  const int kCondInput = 0;
+  const int kInput1 = 1;
+  const int kInput2 = 2;
+  const int kOutput = 3;
+  const int kTensorCount = 4;
+
+  // kCondInput(0) --> +----+
+  // kInput1(1)  ----> | IF | --> kOutput(3)
+  // kInput2(2)  ----> +----+
+
+  int first_new_tensor_index;
+  ASSERT_EQ(subgraph->AddTensors(kTensorCount, &first_new_tensor_index),
+            kTfLiteOk);
+  ASSERT_EQ(first_new_tensor_index, 0);
+  ASSERT_EQ(subgraph->SetInputs({kCondInput, kInput1, kInput2}), kTfLiteOk);
+  ASSERT_EQ(subgraph->SetOutputs({kOutput}), kTfLiteOk);
+
+  SetupTensor(subgraph, kCondInput, kTfLiteBool);
+  SetupTensor(subgraph, kInput1, kTfLiteInt32);
+  SetupTensor(subgraph, kInput2, kTfLiteInt32);
+  SetupTensor(subgraph, kOutput, kTfLiteInt32);
+
+  flexbuffers::Builder fbb;
+  fbb.Map([&]() {
+    fbb.Int("then_subgraph_index", 1);
+    fbb.Int("else_subgraph_index", 2);
+  });
+  fbb.Finish();
+  const auto& buffer = fbb.GetBuffer();
+
+  int node_index;
+  subgraph->AddNodeWithParameters(
+      {kCondInput, kInput1, kInput2}, {kOutput},
+      reinterpret_cast<const char*>(buffer.data()), buffer.size(), nullptr,
+      ::tflite::ops::custom::Register_IF(), &node_index);
+}
+
+void SubgraphBuilder::BuildLessEqualCondSubgraph(Subgraph* subgraph, int rhs) {
+  const int kInput1 = 0;
+  const int kInput2 = 1;
+  const int kOutput = 2;
+  const int kConstRhs = 3;
+  const int kTensorCount = 4;
+
+  // kInput1(0) ----> +------------+
+  //                  | LESS_EQUAL | --> kOutput(2)
+  // kConstRhs(3) --> +------------+
+  //
+  // kInput2(1) --> (unused)
+
+  int first_new_tensor_index;
+  ASSERT_EQ(subgraph->AddTensors(kTensorCount, &first_new_tensor_index),
+            kTfLiteOk);
+  ASSERT_EQ(first_new_tensor_index, 0);
+  ASSERT_EQ(subgraph->SetInputs({kInput1, kInput2}), kTfLiteOk);
+  ASSERT_EQ(subgraph->SetOutputs({kOutput}), kTfLiteOk);
+
+  SetupTensor(subgraph, kInput1, kTfLiteInt32);
+  SetupTensor(subgraph, kInput2, kTfLiteInt32);
+  SetupTensor(subgraph, kOutput, kTfLiteBool);
+
+  CreateConstantInt32Tensor(subgraph, kConstRhs, {1}, {rhs});
+  int node_index;
+  subgraph->AddNodeWithParameters(
+      {kInput1, kConstRhs}, {kOutput}, nullptr, 0, nullptr,
+      ::tflite::ops::builtin::Register_LESS_EQUAL(), &node_index);
+}
+
+void SubgraphBuilder::BuildAccumulateLoopBodySubgraph(Subgraph* subgraph) {
+  const int kInputCounter = 0;
+  const int kInputValue = 1;
+  const int kOutputCounter = 2;
+  const int kOutputValue = 3;
+  const int kConstStep = 4;
+  const int kTensorCount = 5;
+
+  // kInputCounter(0) --> +-----+
+  //                      | ADD | --> kOutputCounter(2)
+  // kConstStep(4) -----> +-----+            |
+  //                                         |
+  //                                         v
+  //                                      +-----+
+  //                                      | ADD | --> kOutputValue(3)
+  // kInputValue(1) ----------------------+-----+
+
+  int first_new_tensor_index;
+  ASSERT_EQ(subgraph->AddTensors(kTensorCount, &first_new_tensor_index),
+            kTfLiteOk);
+  ASSERT_EQ(first_new_tensor_index, 0);
+  ASSERT_EQ(subgraph->SetInputs({kInputCounter, kInputValue}), kTfLiteOk);
+  ASSERT_EQ(subgraph->SetOutputs({kOutputCounter, kOutputValue}), kTfLiteOk);
+
+  SetupTensor(subgraph, kInputCounter, kTfLiteInt32);
+  SetupTensor(subgraph, kInputValue, kTfLiteInt32);
+  SetupTensor(subgraph, kOutputCounter, kTfLiteInt32);
+  SetupTensor(subgraph, kOutputValue, kTfLiteInt32);
+  CreateConstantInt32Tensor(subgraph, kConstStep, {1}, {1});
+
+  int node_index;
+  TfLiteAddParams* params =
+      reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
+  params->activation = kTfLiteActNone;
+  subgraph->AddNodeWithParameters({0, 4}, {2}, nullptr, 0, params,
+                                  ::tflite::ops::builtin::Register_ADD(),
+                                  &node_index);
+  params = reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
+  params->activation = kTfLiteActNone;
+  subgraph->AddNodeWithParameters({2, 1}, {3}, nullptr, 0, params,
+                                  ::tflite::ops::builtin::Register_ADD(),
+                                  &node_index);
+}
+
+void SubgraphBuilder::BuildPadLoopBodySubgraph(Subgraph* subgraph,
+                                               const std::vector<int> padding) {
+  const int kInputCounter = 0;
+  const int kInputValue = 1;
+  const int kOutputCounter = 2;
+  const int kOutputValue = 3;
+  const int kConstStep = 4;
+  const int kConstPadding = 5;
+  const int kTensorCount = 6;
+
+  // kInputCounter(0) --> +-----+
+  //                      | ADD | --> kOutputCounter(2)
+  // kConstStep(4) -----> +-----+
+  //
+  // kInputValue(1) ----> +-----+
+  //                      | PAD | --> kOutputValue(3)
+  // kConstPadding(5) --> +-----+
+
+  int first_new_tensor_index;
+  ASSERT_EQ(subgraph->AddTensors(kTensorCount, &first_new_tensor_index),
+            kTfLiteOk);
+  ASSERT_EQ(first_new_tensor_index, 0);
+  ASSERT_EQ(subgraph->SetInputs({kInputCounter, kInputValue}), kTfLiteOk);
+  ASSERT_EQ(subgraph->SetOutputs({kOutputCounter, kOutputValue}), kTfLiteOk);
+
+  SetupTensor(subgraph, kInputCounter, kTfLiteInt32);
+  SetupTensor(subgraph, kInputValue, kTfLiteInt32);
+  SetupTensor(subgraph, kOutputCounter, kTfLiteInt32);
+  SetupTensor(subgraph, kOutputValue, kTfLiteInt32);
+
+  CreateConstantInt32Tensor(subgraph, kConstStep, {1}, {1});
+  ASSERT_EQ(padding.size() % 2, 0);
+  int padding_dims = padding.size();
+  CreateConstantInt32Tensor(subgraph, kConstPadding, {1, padding_dims},
+                            padding);
+
+  int node_index;
+  TfLiteAddParams* add_params =
+      reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
+  add_params->activation = kTfLiteActNone;
+  subgraph->AddNodeWithParameters(
+      {kInputCounter, kConstStep}, {kOutputCounter}, nullptr, 0, add_params,
+      ::tflite::ops::builtin::Register_ADD(), &node_index);
+  TfLitePadParams* pad_params =
+      reinterpret_cast<TfLitePadParams*>(malloc(sizeof(TfLiteAddParams)));
+  subgraph->AddNodeWithParameters(
+      {kInputValue, kConstPadding}, {kOutputValue}, nullptr, 0, pad_params,
+      ::tflite::ops::builtin::Register_PAD(), &node_index);
+}
+
+void SubgraphBuilder::BuildWhileSubgraph(Subgraph* subgraph) {
+  const int kInput1 = 0;
+  const int kInput2 = 1;
+  const int kOutput1 = 2;
+  const int kOutput2 = 3;
+  const int kTensorCount = 4;
+
+  // kInput1(0) --> +-------+ --> kOutput1(2)
+  //                | WHILE |
+  // kInput2(1) --> +-------+ --> kOutput2(3)
+
+  int first_new_tensor_index;
+  ASSERT_EQ(subgraph->AddTensors(kTensorCount, &first_new_tensor_index),
+            kTfLiteOk);
+  ASSERT_EQ(first_new_tensor_index, 0);
+  ASSERT_EQ(subgraph->SetInputs({kInput1, kInput2}), kTfLiteOk);
+  ASSERT_EQ(subgraph->SetOutputs({kOutput1, kOutput2}), kTfLiteOk);
+
+  SetupTensor(subgraph, kInput1, kTfLiteInt32);
+  SetupTensor(subgraph, kInput2, kTfLiteInt32);
+  SetupTensor(subgraph, kOutput1, kTfLiteInt32);
+  SetupTensor(subgraph, kOutput2, kTfLiteInt32);
+
+  flexbuffers::Builder fbb;
+  fbb.Map([&]() {
+    fbb.Int("cond_subgraph_index", 1);
+    fbb.Int("body_subgraph_index", 2);
+  });
+  fbb.Finish();
+  const auto& buffer = fbb.GetBuffer();
+
+  int node_index;
+  subgraph->AddNodeWithParameters(
+      {0, 1}, {2, 3}, reinterpret_cast<const char*>(buffer.data()),
+      buffer.size(), nullptr, ::tflite::ops::custom::Register_WHILE(),
+      &node_index);
+}
+
+void SubgraphBuilder::CreateConstantInt32Tensor(Subgraph* subgraph,
+                                                int tensor_index,
+                                                const std::vector<int>& shape,
+                                                const std::vector<int>& data) {
+  ASSERT_GT(shape.size(), 0);
+  int num_elements = 1;
+  for (int dim : shape) {
+    num_elements *= dim;
+  }
+  ASSERT_EQ(data.size(), num_elements);
+  size_t size_in_bytes = sizeof(int32_t) * num_elements;
+  // Maybe aligned.
+  int32_t* buffer = reinterpret_cast<int32_t*>(malloc(size_in_bytes));
+  for (int i = 0; i < num_elements; ++i) {
+    buffer[i] = data[i];
+  }
+  buffers_.push_back(buffer);
+  ASSERT_EQ(subgraph->SetTensorParametersReadOnly(
+                tensor_index, kTfLiteInt32, "", shape, {},
+                reinterpret_cast<const char*>(buffer), size_in_bytes),
+            kTfLiteOk);
+}
+
+void FillIntTensor(TfLiteTensor* tensor, const std::vector<int32_t>& data) {
+  int count = NumElements(tensor);
+  ASSERT_EQ(count, data.size());
+  for (int i = 0; i < count; ++i) {
+    tensor->data.i32[i] = data[i];
+  }
+}
+
+void CheckIntTensor(const TfLiteTensor* tensor, const std::vector<int>& shape,
+                    const std::vector<int32_t>& data) {
+  ASSERT_EQ(tensor->dims->size, shape.size());
+  for (int i = 0; i < tensor->dims->size; ++i) {
+    ASSERT_EQ(tensor->dims->data[i], shape[i]);
+  }
+  ASSERT_EQ(tensor->type, kTfLiteInt32);
+  int count = NumElements(tensor);
+  ASSERT_EQ(count, data.size());
+  for (int i = 0; i < count; ++i) {
+    EXPECT_EQ(tensor->data.i32[i], data[i]);
+  }
+}
+
+void CheckBoolTensor(const TfLiteTensor* tensor, const std::vector<int>& shape,
+                     const std::vector<bool>& data) {
+  ASSERT_EQ(tensor->dims->size, shape.size());
+  for (int i = 0; i < tensor->dims->size; ++i) {
+    ASSERT_EQ(tensor->dims->data[i], shape[i]);
+  }
+  ASSERT_EQ(tensor->type, kTfLiteBool);
+  int count = NumElements(tensor);
+  ASSERT_EQ(count, data.size());
+  for (int i = 0; i < count; ++i) {
+    EXPECT_EQ(tensor->data.b[i], data[i]);
+  }
+}
+
+}  // namespace subgraph_test_util
+}  // namespace tflite

diff --git a/tensorflow/lite/kernels/subgraph_test_util.h b/tensorflow/lite/kernels/subgraph_test_util.h
new file mode 100644
index 0000000..972f138
--- /dev/null
+++ b/tensorflow/lite/kernels/subgraph_test_util.h

@@ -0,0 +1,123 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This module provides helper functions for testing the interaction between
+// control flow ops and subgraphs.
+// For convenience, we mostly only use `kTfLiteInt32` in this module.
+
+#ifndef TENSORFLOW_LITE_KERNELS_SUBGRAPH_TEST_UTIL_H_
+#define TENSORFLOW_LITE_KERNELS_SUBGRAPH_TEST_UTIL_H_
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/core/subgraph.h"
+#include "tensorflow/lite/interpreter.h"
+
+namespace tflite {
+namespace subgraph_test_util {
+
+// TODO(ycling): This file should be renamed as
+// `control_flow_test_util` to avoid confusion. I'll do it immediately
+// in a separated change.
+class SubgraphBuilder {
+ public:
+  ~SubgraphBuilder();
+
+  // Build a subgraph with a single Add op.
+  // 2 inputs. 1 output.
+  void BuildAddSubgraph(Subgraph* subgraph);
+
+  // Build a subgraph with a single Mul op.
+  // 2 inputs. 1 output.
+  void BuildMulSubgraph(Subgraph* subgraph);
+
+  // Build a subgraph with a single Pad op.
+  // 2 inputs. 1 output.
+  void BuildPadSubgraph(Subgraph* subgraph);
+
+  // Build a subgraph with a single If op.
+  // 3 inputs:
+  //   The 1st input is condition with boolean type.
+  //   The 2nd and 3rd inputs are feed input the branch subgraphs.
+  // 1 output.
+  void BuildIfSubgraph(Subgraph* subgraph);
+
+  // Build a subgraph with a single Less op.
+  // The subgraph is used as the condition subgraph for testing `While` op.
+  // 2 inputs:
+  //   The 1st input is a counter with `kTfLiteInt32` type.
+  //   The 2nd input is ignored in this subgraph.
+  // 1 output with `kTfLiteBool` type.
+  //   Equivalent to (input < rhs).
+  void BuildLessEqualCondSubgraph(Subgraph* subgraph, int rhs);
+
+  // An accumulate loop body subgraph. Used to produce triangle number
+  // seqeuence. 2 inputs and 2 outpus
+  //   Equivalent to (counter, value) -> (counter + 1, counter + 1 + value)
+  void BuildAccumulateLoopBodySubgraph(Subgraph* subgraph);
+
+  // A pad loop body subgraph. When used in a loop it will repeatively enlarge
+  // the
+  //   tensor.
+  // 2 inputs and 2 outputs.
+  //   Equivalent to (counter, value) -> (counter + 1, tf.pad(value, padding))
+  // Note the padding is created as a constant tensor.
+  void BuildPadLoopBodySubgraph(Subgraph* subgraph,
+                                const std::vector<int> padding);
+
+  // Build a subgraph with a single While op.
+  // 2 inputs, 2 outputs.
+  void BuildWhileSubgraph(Subgraph* subgraph);
+
+ private:
+  void CreateConstantInt32Tensor(Subgraph* subgraph, int tensor_index,
+                                 const std::vector<int>& shape,
+                                 const std::vector<int>& data);
+  std::vector<void*> buffers_;
+};
+
+class ControlFlowOpTest : public ::testing::Test {
+ public:
+  ControlFlowOpTest()
+      : interpreter_(new Interpreter), builder_(new SubgraphBuilder) {}
+
+  ~ControlFlowOpTest() override {
+    interpreter_.reset();
+    builder_.reset();
+  }
+
+ protected:
+  std::unique_ptr<Interpreter> interpreter_;
+  std::unique_ptr<SubgraphBuilder> builder_;
+};
+
+// Fill a `TfLiteTensor` with a 32-bits integer vector.
+// Preconditions:
+// * The tensor must have `kTfLiteInt32` type.
+// * The tensor must be allocated.
+// * The element count of the tensor must be equal to the length or
+//   the vector.
+void FillIntTensor(TfLiteTensor* tensor, const std::vector<int32_t>& data);
+
+// Check if the shape and int32 data of a tensor is as expected.
+void CheckIntTensor(const TfLiteTensor* tensor, const std::vector<int>& shape,
+                    const std::vector<int32_t>& data);
+// Check if the shape and bool data of a tensor is as expected.
+void CheckBoolTensor(const TfLiteTensor* tensor, const std::vector<int>& shape,
+                     const std::vector<bool>& data);
+
+}  // namespace subgraph_test_util
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_SUBGRAPH_TEST_UTIL_H_

diff --git a/tensorflow/lite/kernels/subgraph_test_util_test.cc b/tensorflow/lite/kernels/subgraph_test_util_test.cc
new file mode 100644
index 0000000..04e5118
--- /dev/null
+++ b/tensorflow/lite/kernels/subgraph_test_util_test.cc

@@ -0,0 +1,157 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/subgraph_test_util.h"
+#include <gtest/gtest.h>
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/test_util.h"
+
+namespace tflite {
+
+namespace subgraph_test_util {
+
+namespace {
+
+class SubgraphBuilderTest : public ::testing::Test {
+ public:
+  SubgraphBuilderTest()
+      : interpreter_(new Interpreter), builder_(new SubgraphBuilder) {}
+
+  ~SubgraphBuilderTest() override {
+    interpreter_.reset();
+    builder_.reset();
+  }
+
+ protected:
+  void TestAccumelateLoopBody(int input1, int input2, int output1,
+                              int output2) {
+    interpreter_.reset(new Interpreter);
+    builder_->BuildAccumulateLoopBodySubgraph(
+        &interpreter_->primary_subgraph());
+
+    interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {1});
+    interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {1});
+    ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+
+    FillIntTensor(interpreter_->tensor(interpreter_->inputs()[0]), {input1});
+    FillIntTensor(interpreter_->tensor(interpreter_->inputs()[1]), {input2});
+
+    ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+    TfLiteTensor* output_tensor1 =
+        interpreter_->tensor(interpreter_->outputs()[0]);
+    CheckIntTensor(output_tensor1, {1}, {output1});
+    TfLiteTensor* output_tensor2 =
+        interpreter_->tensor(interpreter_->outputs()[1]);
+    CheckIntTensor(output_tensor2, {1}, {output2});
+  }
+
+  std::unique_ptr<Interpreter> interpreter_;
+  std::unique_ptr<SubgraphBuilder> builder_;
+};
+
+TEST_F(SubgraphBuilderTest, TestBuildAddSubgraph) {
+  builder_->BuildAddSubgraph(&interpreter_->primary_subgraph());
+
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {2});
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {1, 2});
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[0]), {5, 7});
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[1]), {1, 2});
+  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+
+  TfLiteTensor* output = interpreter_->tensor(interpreter_->outputs()[0]);
+  CheckIntTensor(output, {1, 2}, {6, 9});
+}
+
+TEST_F(SubgraphBuilderTest, TestBuildMulSubgraph) {
+  builder_->BuildMulSubgraph(&interpreter_->primary_subgraph());
+
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {2});
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {1, 2});
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[0]), {5, 7});
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[1]), {1, 2});
+  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+
+  TfLiteTensor* output = interpreter_->tensor(interpreter_->outputs()[0]);
+  CheckIntTensor(output, {1, 2}, {5, 14});
+}
+
+TEST_F(SubgraphBuilderTest, TestBuildPadSubgraph) {
+  builder_->BuildPadSubgraph(&interpreter_->primary_subgraph());
+
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {2});
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {1, 2});
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[0]), {5, 7});
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[1]), {1, 2});
+  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+
+  TfLiteTensor* output = interpreter_->tensor(interpreter_->outputs()[0]);
+  CheckIntTensor(output, {5}, {0, 5, 7, 0, 0});
+}
+
+TEST_F(SubgraphBuilderTest, TestBuildLessEqualCondSubgraph) {
+  builder_->BuildLessEqualCondSubgraph(&interpreter_->primary_subgraph(), 3);
+
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {5});
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {10, 10});
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+
+  // Test [1, 2, 3, 4, 5] <= 3 == [true, true, true, false, false]
+  // (with broadcasting).
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[0]),
+                {1, 2, 3, 4, 5});
+  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+  TfLiteTensor* output = interpreter_->tensor(interpreter_->outputs()[0]);
+  CheckBoolTensor(output, {5}, {true, true, true, false, false});
+}
+
+TEST_F(SubgraphBuilderTest, TestBuildAccumulateLoopBodySubgraph) {
+  TestAccumelateLoopBody(1, 1, 2, 3);
+  TestAccumelateLoopBody(2, 3, 3, 6);
+  TestAccumelateLoopBody(3, 6, 4, 10);
+}
+
+TEST_F(SubgraphBuilderTest, TestBuildPadLoopBodySubgraph) {
+  builder_->BuildPadLoopBodySubgraph(&interpreter_->primary_subgraph(), {1, 2});
+
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {1});
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {5});
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[0]), {1});
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[1]),
+                {0, 5, 7, 0, 0});
+
+  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+  TfLiteTensor* output1 = interpreter_->tensor(interpreter_->outputs()[0]);
+  CheckIntTensor(output1, {1}, {2});
+  TfLiteTensor* output2 = interpreter_->tensor(interpreter_->outputs()[1]);
+  CheckIntTensor(output2, {8}, {0, 0, 5, 7, 0, 0, 0, 0});
+}
+
+}  // namespace
+}  // namespace subgraph_test_util
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

diff --git a/tensorflow/lite/kernels/topk_v2.cc b/tensorflow/lite/kernels/topk_v2.cc
index 444b01e..64973d7 100644
--- a/tensorflow/lite/kernels/topk_v2.cc
+++ b/tensorflow/lite/kernels/topk_v2.cc

@@ -207,6 +207,10 @@
       TopK(row_size, num_rows, input->data.uint8, k, output_indexes->data.i32,
            output_values->data.uint8);
       break;
+    case kTfLiteInt8:
+      TopK(row_size, num_rows, input->data.int8, k, output_indexes->data.i32,
+           output_values->data.int8);
+      break;
     case kTfLiteInt32:
       TopK(row_size, num_rows, input->data.i32, k, output_indexes->data.i32,
            output_values->data.i32);

diff --git a/tensorflow/lite/kernels/topk_v2_test.cc b/tensorflow/lite/kernels/topk_v2_test.cc
index 108b812..0097ae2 100644
--- a/tensorflow/lite/kernels/topk_v2_test.cc
+++ b/tensorflow/lite/kernels/topk_v2_test.cc

@@ -46,6 +46,10 @@
     PopulateTensor<uint8_t>(input_, data);
   }
 
+  void SetInputInt8(std::initializer_list<int8_t> data) {
+    PopulateTensor<int8_t>(input_, data);
+  }
+
   void SetInputInt32(std::initializer_list<int32_t> data) {
     PopulateTensor<int32_t>(input_, data);
   }
@@ -66,6 +70,10 @@
     return ExtractVector<uint8_t>(output_values_);
   }
 
+  std::vector<int8_t> GetValuesInt8() {
+    return ExtractVector<int8_t>(output_values_);
+  }
+
   std::vector<int32_t> GetValuesInt32() {
     return ExtractVector<int32_t>(output_values_);
   }
@@ -128,6 +136,14 @@
   EXPECT_THAT(m.GetValuesUInt8(), ElementsAreArray({3, 2, 251, 250}));
 }
 
+TEST(TopKV2OpTest, TypeInt8) {
+  TopKV2OpModel m({2, 3}, TensorType_INT8, 2);
+  m.SetInputInt8({1, 2, 3, -126, 125, -24});
+  m.Invoke();
+  EXPECT_THAT(m.GetIndexes(), ElementsAreArray({2, 1, 1, 2}));
+  EXPECT_THAT(m.GetValuesInt8(), ElementsAreArray({3, 2, 125, -24}));
+}
+
 // Check that int32_t works.
 TEST(TopKV2OpTest, TypeInt32) {
   TopKV2OpModel m({2, 3}, TensorType_INT32, 2);

diff --git a/tensorflow/lite/kernels/transpose.cc b/tensorflow/lite/kernels/transpose.cc
index 7a6d320..0ef4972 100644
--- a/tensorflow/lite/kernels/transpose.cc
+++ b/tensorflow/lite/kernels/transpose.cc

@@ -117,6 +117,11 @@
         TF_LITE_TRANSPOSE(reference_ops, uint8_t);
       }
       break;
+    case kTfLiteInt8:
+      if (kernel_type == kReference) {
+        TF_LITE_TRANSPOSE(reference_ops, int8_t);
+      }
+      break;
     case kTfLiteInt32:
       if (kernel_type == kReference) {
         TF_LITE_TRANSPOSE(reference_ops, int32_t);

diff --git a/tensorflow/lite/kernels/transpose_test.cc b/tensorflow/lite/kernels/transpose_test.cc
index 93df2c8..7164415 100644
--- a/tensorflow/lite/kernels/transpose_test.cc
+++ b/tensorflow/lite/kernels/transpose_test.cc

@@ -25,16 +25,17 @@
 
 using ::testing::ElementsAreArray;
 
+template <typename T>
 void RunTestPermutation(const std::vector<int>& shape,
                         const std::vector<int>& perms,
-                        std::vector<float>* input_transposed) {
+                        std::vector<T>* input_transposed) {
   // Count elements and allocate output.
   int count = 1;
   for (auto factor : shape) count *= factor;
   input_transposed->resize(count);
 
   // Create the dummy data
-  std::vector<float> input(count);
+  std::vector<T> input(count);
   for (int i = 0; i < input.size(); i++) {
     input[i] = i;
   }
@@ -64,8 +65,8 @@
     params.perm[i] = perms[i];
   }
 
-  reference_ops::Transpose<float>(params, input_shape, input.data(),
-                                  output_shape, input_transposed->data());
+  reference_ops::Transpose<T>(params, input_shape, input.data(), output_shape,
+                              input_transposed->data());
 }
 
 TEST(TransposeTest, TestRefOps1D) {
@@ -125,6 +126,28 @@
   ASSERT_EQ(out, ref);
 }
 
+TEST(TransposeTest, TestRefOps4DInt8) {
+  std::vector<int8_t> out;
+  // Basic 4d.
+  RunTestPermutation({2, 3, 4, 5}, {2, 0, 1, 3}, &out);
+  ASSERT_EQ(
+      out,
+      std::vector<int8_t>(
+          {0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
+           60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+           5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
+           65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+           10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
+           70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+           15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
+           75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119}));
+  RunTestPermutation({2, 3, 4, 5}, {0, 1, 2, 3}, &out);
+  // Basic identity.
+  std::vector<int8_t> ref(out.size());
+  for (int k = 0; k < ref.size(); k++) ref[k] = k;
+  ASSERT_EQ(out, ref);
+}
+
 class TransposeOpModel : public SingleOpModel {
  public:
   void SetInput(std::initializer_list<float> data) {

diff --git a/tensorflow/lite/kernels/unpack.cc b/tensorflow/lite/kernels/unpack.cc
index 1caffe1..99ad4bb 100644
--- a/tensorflow/lite/kernels/unpack.cc
+++ b/tensorflow/lite/kernels/unpack.cc

@@ -52,9 +52,11 @@
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
   TF_LITE_ENSURE(context, NumDimensions(input) > 1);
-  TF_LITE_ENSURE(context, NumDimensions(input) > data->axis);
-  // TODO(renjieliu): Support negative axis.
-  TF_LITE_ENSURE(context, data->axis >= 0);
+  int axis = data->axis;
+  if (axis < 0) {
+    axis += NumDimensions(input);
+  }
+  TF_LITE_ENSURE(context, 0 <= axis && axis < NumDimensions(input));
   if (input->type != kTfLiteInt32 && input->type != kTfLiteFloat32) {
     context->ReportError(context,
                          "Currently pack only supports int32 and float32.");
@@ -67,12 +69,12 @@
   TfLiteIntArray* output_shape = TfLiteIntArrayCreate(NumDimensions(input) - 1);
   int o = 0;
   for (int index = 0; index < NumDimensions(input); ++index) {
-    if (index != data->axis) {
+    if (index != axis) {
       output_shape->data[o++] = input_shape->data[index];
     }
   }
 
-  TF_LITE_ENSURE_EQ(context, data->num, input_shape->data[data->axis]);
+  TF_LITE_ENSURE_EQ(context, data->num, input_shape->data[axis]);
   for (int i = 0; i < data->num; ++i) {
     TfLiteIntArray* copied_output_shape = TfLiteIntArrayCopy(output_shape);
     TfLiteTensor* output = GetOutput(context, node, i);

diff --git a/tensorflow/lite/kernels/unpack_test.cc b/tensorflow/lite/kernels/unpack_test.cc
index 9b60cce..76f7dff 100644
--- a/tensorflow/lite/kernels/unpack_test.cc
+++ b/tensorflow/lite/kernels/unpack_test.cc

@@ -28,14 +28,16 @@
 class UnpackOpModel : public SingleOpModel {
  public:
   UnpackOpModel(const TensorData& input, int axis) {
-    CHECK_LE(axis, input.shape.size());
+    if (axis < 0) {
+      axis += input.shape.size();
+    }
     const int num_outputs = input.shape[axis];
     input_ = AddInput(input);
     for (int i = 0; i < num_outputs; ++i) {
       outputs_.push_back(AddOutput(input.type));
     }
     SetBuiltinOp(BuiltinOperator_UNPACK, BuiltinOptions_UnpackOptions,
-                 CreatePackOptions(builder_, num_outputs, axis).Union());
+                 CreateUnpackOptions(builder_, num_outputs, axis).Union());
     BuildInterpreter({GetShape(input_)});
   }
 
@@ -104,6 +106,44 @@
   EXPECT_THAT(output_datas[1], ElementsAre(2, 4, 6));
 }
 
+TEST(UnpackOpTest, FloatThreeOutputsNegativeAxisOne) {
+  UnpackOpModel<float> model({TensorType_FLOAT32, {3, 2}}, -1);
+  model.SetInput({1, 2, 3, 4, 5, 6});
+  model.Invoke();
+
+  // Check outputs shapes.
+  const std::vector<std::vector<int>>& output_shapes = model.GetOutputShapes();
+  EXPECT_EQ(output_shapes.size(), 2);
+  EXPECT_THAT(output_shapes[0], ElementsAre(3));
+  EXPECT_THAT(output_shapes[1], ElementsAre(3));
+
+  // Check outputs values.
+  const std::vector<std::vector<float>>& output_datas = model.GetOutputDatas();
+  EXPECT_EQ(output_datas.size(), 2);
+  EXPECT_THAT(output_datas[0], ElementsAre(1, 3, 5));
+  EXPECT_THAT(output_datas[1], ElementsAre(2, 4, 6));
+}
+
+TEST(UnpackOpTest, FloatThreeOutputsNegativeAxisTwo) {
+  UnpackOpModel<float> model({TensorType_FLOAT32, {3, 2}}, -2);
+  model.SetInput({1, 2, 3, 4, 5, 6});
+  model.Invoke();
+
+  // Check outputs shapes.
+  const std::vector<std::vector<int>>& output_shapes = model.GetOutputShapes();
+  EXPECT_EQ(output_shapes.size(), 3);
+  EXPECT_THAT(output_shapes[0], ElementsAre(2));
+  EXPECT_THAT(output_shapes[1], ElementsAre(2));
+  EXPECT_THAT(output_shapes[2], ElementsAre(2));
+
+  // Check outputs values.
+  const std::vector<std::vector<float>>& output_datas = model.GetOutputDatas();
+  EXPECT_EQ(output_datas.size(), 3);
+  EXPECT_THAT(output_datas[0], ElementsAre(1, 2));
+  EXPECT_THAT(output_datas[1], ElementsAre(3, 4));
+  EXPECT_THAT(output_datas[2], ElementsAre(5, 6));
+}
+
 TEST(UnpackOpTest, FloatOneOutput) {
   UnpackOpModel<float> model({TensorType_FLOAT32, {1, 6}}, 0);
   model.SetInput({1, 2, 3, 4, 5, 6});

diff --git a/tensorflow/lite/kernels/where.cc b/tensorflow/lite/kernels/where.cc
new file mode 100644
index 0000000..96ee36f
--- /dev/null
+++ b/tensorflow/lite/kernels/where.cc

@@ -0,0 +1,105 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace where {
+
+constexpr int kInputConditionTensor = 0;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
+                                const TfLiteTensor* cond_tensor,
+                                TfLiteTensor* output_tensor) {
+  // Output tensor should have shape:
+  // (num_true, cond_rank), where num_true denotes the number of true values
+  // in condition.
+  const RuntimeShape& cond_shape = GetTensorShape(cond_tensor);
+  const int size = cond_shape.FlatSize();
+  const int cond_rank = cond_shape.DimensionsCount();
+  const bool* cond_data = GetTensorData<bool>(cond_tensor);
+
+  int true_count = 0;
+  for (int i = 0; i < size; ++i) {
+    if (cond_data[i]) {
+      true_count++;
+    }
+  }
+  TfLiteIntArray* output_dims = TfLiteIntArrayCreate(2);
+  output_dims->data[0] = true_count;
+  output_dims->data[1] = cond_rank;
+  return context->ResizeTensor(context, output_tensor, output_dims);
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* cond_tensor =
+      GetInput(context, node, kInputConditionTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  if (cond_tensor->type != kTfLiteBool) {
+    context->ReportError(context,
+                         "Condition tensor must be of type bool, but saw '%s'.",
+                         TfLiteTypeGetName(cond_tensor->type));
+    return kTfLiteError;
+  }
+
+  // As output will be a 2D tensor of indices, we use int32 as data type.
+  output->type = kTfLiteInt32;
+
+  // Exit early if cond is a non-const tensor. Set output tensor to dynamic so
+  // output size can be determined in Eval.
+  if (!IsConstantTensor(cond_tensor)) {
+    SetTensorToDynamic(output);
+    return kTfLiteOk;
+  }
+  return ResizeOutputTensor(context, cond_tensor, output);
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor* cond_tensor =
+      GetInput(context, node, kInputConditionTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  if (IsDynamicTensor(output)) {
+    TF_LITE_ENSURE_OK(context,
+                      ResizeOutputTensor(context, cond_tensor, output));
+  }
+
+  reference_ops::SelectTrueCoords(GetTensorShape(cond_tensor),
+                                  GetTensorData<bool>(cond_tensor),
+                                  GetTensorData<int32_t>(output));
+  return kTfLiteOk;
+}
+}  // namespace where
+
+TfLiteRegistration* Register_WHERE() {
+  static TfLiteRegistration r = {/*init*/ nullptr, /*free*/ nullptr,
+                                 where::Prepare, where::Eval};
+  return &r;
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite

diff --git a/tensorflow/lite/kernels/where_test.cc b/tensorflow/lite/kernels/where_test.cc
new file mode 100644
index 0000000..89bd7c4
--- /dev/null
+++ b/tensorflow/lite/kernels/where_test.cc

@@ -0,0 +1,161 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/kernels/test_util.h"
+#include "tensorflow/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class BaseWhereOpModel : public SingleOpModel {
+ public:
+  BaseWhereOpModel(const TensorData& input, const TensorData& output) {
+    input_ = AddInput(input);
+    output_ = AddOutput(output);
+    SetBuiltinOp(BuiltinOperator_WHERE, BuiltinOptions_WhereOptions,
+                 CreateWhereOptions(builder_).Union());
+    BuildInterpreter({GetShape(input_)});
+  }
+
+  int input() { return input_; }
+
+ protected:
+  int input_;
+  int output_;
+};
+
+class IntegerWhereOpModel : public BaseWhereOpModel {
+ public:
+  using BaseWhereOpModel::BaseWhereOpModel;
+
+  std::vector<int32_t> GetOutput() { return ExtractVector<int32_t>(output_); }
+};
+
+TEST(WhereOpTest, SelectFromVectorNoResult) {
+  IntegerWhereOpModel m({TensorType_BOOL, {3}}, {TensorType_INT32, {}});
+  m.PopulateTensor<bool>(m.input(), {false, false, false});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput().size(), 0);
+}
+
+TEST(WhereOpTest, SelectFromVector) {
+  IntegerWhereOpModel m({TensorType_BOOL, {3}}, {TensorType_INT32, {}});
+  m.PopulateTensor<bool>(m.input(), {true, false, true});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 2}));
+}
+
+TEST(WhereOpTest, SelectFromMatrixNoResult) {
+  IntegerWhereOpModel m({TensorType_BOOL, {3, 3}}, {TensorType_INT32, {}});
+  m.PopulateTensor<bool>(m.input(), {false, false, false,  //
+                                     false, false, false,  //
+                                     false, false, false});
+  m.Invoke();
+  EXPECT_EQ(m.GetOutput().size(), 0);
+}
+
+TEST(WhereOpTest, SelectFromMatrix1) {
+  IntegerWhereOpModel m({TensorType_BOOL, {3, 1}}, {TensorType_INT32, {}});
+  m.PopulateTensor<bool>(m.input(), {true, false, true});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0,  //
+                                               2, 0}));
+}
+
+TEST(WhereOpTest, SelectFromMatrix2) {
+  IntegerWhereOpModel m({TensorType_BOOL, {3, 3}}, {TensorType_INT32, {}});
+  m.PopulateTensor<bool>(m.input(), {true, true, false,   //
+                                     true, false, false,  //
+                                     true, false, true});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0,  //
+                                               0, 1,  //
+                                               1, 0,  //
+                                               2, 0,  //
+                                               2, 2}));
+}
+
+TEST(WhereOpTest, SelectFromMatrix3) {
+  IntegerWhereOpModel m({TensorType_BOOL, {3, 5}}, {TensorType_INT32, {}});
+  m.PopulateTensor<bool>(m.input(), {true, false, false, true, true,   //
+                                     false, true, true, false, false,  //
+                                     true, false, true, false, false});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0,  //
+                                               0, 3,  //
+                                               0, 4,  //
+                                               1, 1,  //
+                                               1, 2,  //
+                                               2, 0,  //
+                                               2, 2}));
+}
+
+TEST(WhereOpTest, SelectFromRank3TensorNoResult) {
+  IntegerWhereOpModel m({TensorType_BOOL, {2, 2, 2}}, {TensorType_INT32, {}});
+  m.PopulateTensor<bool>(m.input(), {false, false, false, false,  //
+                                     false, false, false, false});
+  m.Invoke();
+  EXPECT_EQ(m.GetOutput().size(), 0);
+}
+
+TEST(WhereOpTest, SelectFromRank3Tensor1) {
+  IntegerWhereOpModel m({TensorType_BOOL, {2, 1, 3}}, {TensorType_INT32, {}});
+  m.PopulateTensor<bool>(m.input(), {true, false, true,  //
+                                     false, false, true});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0,  //
+                                               0, 0, 2,  //
+                                               1, 0, 2}));
+}
+
+TEST(WhereOpTest, SelectFromRank3Tensor2) {
+  IntegerWhereOpModel m({TensorType_BOOL, {2, 2, 2}}, {TensorType_INT32, {}});
+  m.PopulateTensor<bool>(m.input(), {true, true, false, true,  //
+                                     false, false, true, true});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0,  //
+                                               0, 0, 1,  //
+                                               0, 1, 1,  //
+                                               1, 1, 0,  //
+                                               1, 1, 1}));
+}
+
+TEST(WhereOpTest, SelectFromRank3Tensor3) {
+  IntegerWhereOpModel m({TensorType_BOOL, {2, 3, 2}}, {TensorType_INT32, {}});
+  m.PopulateTensor<bool>(m.input(), {true, true, false, true, false, false,  //
+                                     false, false, true, false, true, true});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0,  //
+                                               0, 0, 1,  //
+                                               0, 1, 1,  //
+                                               1, 1, 0,  //
+                                               1, 2, 0,  //
+                                               1, 2, 1}));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

diff --git a/tensorflow/lite/kernels/while.cc b/tensorflow/lite/kernels/while.cc
new file mode 100644
index 0000000..07a48e6
--- /dev/null
+++ b/tensorflow/lite/kernels/while.cc

@@ -0,0 +1,312 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/context_util.h"
+#include "tensorflow/lite/core/subgraph.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace custom {
+namespace while_kernel {
+
+namespace {
+
+// Propagate tensor shapes and types from `src_tensor_indices` in `src_subgraph`
+// to `dst_tensor_indices` in `dst_subgraph`.
+template <typename SrcVector, typename DstVector>
+TfLiteStatus CopyTensorsShapeAndType(TfLiteContext* context,
+                                     Subgraph* src_subgraph,
+                                     const SrcVector& src_tensor_indices,
+                                     Subgraph* dst_subgraph,
+                                     const DstVector& dst_tensor_indices) {
+  TF_LITE_ENSURE_EQ(context, src_tensor_indices.size(),
+                    dst_tensor_indices.size());
+  for (int i = 0; i < src_tensor_indices.size(); ++i) {
+    const TfLiteTensor* src_tensor =
+        src_subgraph->tensor(src_tensor_indices[i]);
+    std::vector<int> dims(src_tensor->dims->data,
+                          src_tensor->dims->data + src_tensor->dims->size);
+    dst_subgraph->ResizeInputTensor(dst_tensor_indices[i], dims);
+    TfLiteTensor* dst_tensor = dst_subgraph->tensor(dst_tensor_indices[i]);
+    dst_tensor->type = src_tensor->type;
+  }
+  return kTfLiteOk;
+}
+
+// Copy the tensors data from tensors `src_tensor_indices` in `src_subgraph`
+// to `dst_tensor_indices` in `dst_subgraph`.
+template <typename SrcVector, typename DstVector>
+TfLiteStatus CopyTensorsData(TfLiteContext* context, Subgraph* src_subgraph,
+                             const SrcVector& src_tensor_indices,
+                             Subgraph* dst_subgraph,
+                             const DstVector& dst_tensor_indices) {
+  TF_LITE_ENSURE_EQ(context, src_tensor_indices.size(),
+                    dst_tensor_indices.size());
+  for (int i = 0; i < src_tensor_indices.size(); ++i) {
+    const TfLiteTensor* src_tensor =
+        src_subgraph->tensor(src_tensor_indices[i]);
+    TfLiteTensor* dst_tensor = dst_subgraph->tensor(dst_tensor_indices[i]);
+    TF_LITE_ENSURE_EQ(context, src_tensor->bytes, dst_tensor->bytes);
+    memcpy(dst_tensor->data.raw, src_tensor->data.raw, src_tensor->bytes);
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus CheckCondOutput(TfLiteContext* context,
+                             const TfLiteTensor* cond_output) {
+  // The condition output must be a single boolean value.
+  TF_LITE_ENSURE_EQ(context, cond_output->type, kTfLiteBool);
+  if (cond_output->dims->size == 0) {
+    // It's okay if it's a 0D scalar.
+    return kTfLiteOk;
+  }
+  // Otherwise it must be 1D with shape [1].
+  TF_LITE_ENSURE_EQ(context, cond_output->dims->size, 1);
+  TF_LITE_ENSURE_EQ(context, cond_output->dims->data[0], 1);
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+struct OpData {
+  int cond_subgraph_index;
+  int body_subgraph_index;
+  bool cond_has_dynamic_output_tensors;
+  bool body_has_dynamic_output_tensors;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  auto* op_data = new OpData;
+  const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
+  const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
+  op_data->cond_subgraph_index = m["cond_subgraph_index"].AsInt32();
+  op_data->body_subgraph_index = m["body_subgraph_index"].AsInt32();
+  op_data->cond_has_dynamic_output_tensors = false;
+  op_data->body_has_dynamic_output_tensors = false;
+  return op_data;
+}
+
+void Free(TfLiteContext* context, void* buffer) {
+  delete reinterpret_cast<OpData*>(buffer);
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+  int num_inputs = node->inputs->size;
+  // The number of outputs should be the same as number of inputs.
+  TF_LITE_ENSURE_EQ(context, node->outputs->size, num_inputs);
+
+  // Check subgraph indices and get subgraphs.
+  Subgraph* this_subgraph = reinterpret_cast<Subgraph*>(context->impl_);
+  auto* subgraphs = this_subgraph->GetSubgraphs();
+  TF_LITE_ENSURE(context, op_data->cond_subgraph_index < subgraphs->size());
+  TF_LITE_ENSURE(context, op_data->body_subgraph_index < subgraphs->size());
+
+  Subgraph* cond_subgraph = (*subgraphs)[op_data->cond_subgraph_index].get();
+  Subgraph* body_subgraph = (*subgraphs)[op_data->body_subgraph_index].get();
+
+  // Check input & output count of the condition subgraph.
+  TF_LITE_ENSURE_EQ(context, cond_subgraph->inputs().size(), num_inputs);
+  TF_LITE_ENSURE_EQ(context, cond_subgraph->outputs().size(), 1);
+
+  // Check input & output count of the body subgraph.
+  TF_LITE_ENSURE_EQ(context, body_subgraph->inputs().size(), num_inputs);
+  TF_LITE_ENSURE_EQ(context, body_subgraph->outputs().size(), num_inputs);
+
+  // Prepare and check the condition subgraph.
+  TF_LITE_ENSURE_OK(
+      context, CopyTensorsShapeAndType(context, this_subgraph,
+                                       TfLiteIntArrayView(node->inputs),
+                                       cond_subgraph, cond_subgraph->inputs()));
+  TF_LITE_ENSURE_OK(context, cond_subgraph->AllocateTensors());
+  TfLiteTensor* cond_output =
+      cond_subgraph->tensor(cond_subgraph->outputs()[0]);
+  // TODO(ycling): Handle the case the cond subgraph has dynamic tensor outputs.
+  // This should rarely happens. In most cases the output is static with shape
+  // [1]. However theoretically intermediate tensors in the cond subgraph
+  // can be dynamic.
+  if (IsDynamicTensor(cond_output)) {
+    op_data->cond_has_dynamic_output_tensors = true;
+  } else {
+    TF_LITE_ENSURE_STATUS(CheckCondOutput(context, cond_output));
+  }
+
+  // Prepare and check the body subgraph.
+  TF_LITE_ENSURE_OK(
+      context, CopyTensorsShapeAndType(context, this_subgraph,
+                                       TfLiteIntArrayView(node->inputs),
+                                       body_subgraph, body_subgraph->inputs()));
+  TF_LITE_ENSURE_OK(context, body_subgraph->AllocateTensors());
+  if (body_subgraph->HasDynamicTensors()) {
+    op_data->body_has_dynamic_output_tensors = true;
+  } else {
+    for (int i = 0; i < num_inputs; ++i) {
+      TfLiteTensor* body_input =
+          body_subgraph->tensor(body_subgraph->inputs()[i]);
+      TfLiteTensor* body_output =
+          body_subgraph->tensor(body_subgraph->outputs()[i]);
+      TF_LITE_ENSURE_EQ(context, body_input->type, body_output->type);
+
+      // TODO(ycling): Support dynamic sized body subgraph.
+      TF_LITE_ENSURE(context, !IsDynamicTensor(body_output));
+      if (!TfLiteIntArrayEqual(body_input->dims, body_output->dims)) {
+        // If the output shape of the body subgraph is static w.r.t. a fixed
+        // input size, but it's different from input size, it's still considered
+        // dynamic. For example: If a subgraph keeps padding its input with a
+        // fixed padding, the output shape is static w.r.t the input shape and
+        // padding, but running it in a loop will keep bloating the tensor.
+        op_data->body_has_dynamic_output_tensors = true;
+        break;
+      }
+    }
+  }
+  for (int i = 0; i < num_inputs; ++i) {
+    TfLiteTensor* output = GetOutput(context, node, i);
+    if (op_data->body_has_dynamic_output_tensors) {
+      SetTensorToDynamic(output);
+    } else {
+      TfLiteTensor* body_output =
+          body_subgraph->tensor(body_subgraph->outputs()[i]);
+      TfLiteIntArray* output_size = TfLiteIntArrayCopy(body_output->dims);
+      TF_LITE_ENSURE_OK(context,
+                        context->ResizeTensor(context, output, output_size));
+    }
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+  Subgraph* this_subgraph = reinterpret_cast<Subgraph*>(context->impl_);
+  auto* subgraphs = this_subgraph->GetSubgraphs();
+  Subgraph* cond_subgraph = (*subgraphs)[op_data->cond_subgraph_index].get();
+  Subgraph* body_subgraph = (*subgraphs)[op_data->body_subgraph_index].get();
+
+  // The follow graph illustrates the current implementation.
+  //
+  // This Subgraph          Cond Subgraph         Body Subgraph
+  // +-----------+   (1)   +------------+   (3)   +------------+
+  // |   WHILE   |-------->|  SUBGRAPH  |-------->|  SUBGRAPH  |
+  // |   INPUT   |        /|   INPUT    |<-----   |   INPUT    |
+  // +-----------+       / +------------+      \  +------------+
+  //                    /        |              \       |
+  //               (6) /         | (2)       (5) \      | (4)
+  //                  /          v                \     v
+  // +-----------+   /     +------------+         +------------+
+  // |   WHILE   |<--      |  SUBGRAPH  |         |  SUBGRAPH  |
+  // |   OUTPUT  |         |   OUTPUT   |         |   OUTPUT   |
+  // +-----------+         +------------+         +------------+
+  //
+  // (1) Copy the inputs of WHILE op to the inputs of condition subgraph.
+  // (2) Invoke condition subgraph.
+  //     Jump to step 5 if result is false.
+  // (3) Copy the inputs of condition subgraph to the inputs of body subgraph.
+  // (4) Invoke body subgraph.
+  // (5) Copy the outputs of body subgraph to the inputs condition subgraph.
+  //     Jump back to step 2!
+  // (6) Copy the inputs of condition subgraph to the outputs of WHILE op.
+  //
+  // If the body subgraph has dynamic sized outputs, it's required to resize the
+  // tensor before copying in step 1, 3, 4 and 6.
+  //
+  // Note the flow is carefully designed to handle the dynamic sized output
+  // case. The loop invariant is: The newest value is in the inputs of condition
+  // subgraph. This is always true before step 2.
+  //
+  // This is the best we can do without sharing tensor buffer across subgraph
+  // boundry. Currently we copy the input / output between the subgraphs. This
+  // isn't optimized yet and a lot of redundent copies are made.
+  // TODO(b/120234921): Optimize and avoid copying tensors between subgraphs.
+  TF_LITE_ENSURE_OK(
+      context,
+      CopyTensorsData(context, this_subgraph, TfLiteIntArrayView(node->inputs),
+                      cond_subgraph, cond_subgraph->inputs()));
+
+  while (true) {
+    TF_LITE_ENSURE_OK(context, cond_subgraph->Invoke());
+    int cond_subgraph_output_index = cond_subgraph->outputs()[0];
+    cond_subgraph->EnsureTensorDataIsReadable(cond_subgraph_output_index);
+    TfLiteTensor* cond_output =
+        cond_subgraph->tensor(cond_subgraph_output_index);
+    if (op_data->cond_has_dynamic_output_tensors) {
+      TF_LITE_ENSURE_STATUS(CheckCondOutput(context, cond_output));
+    }
+
+    if (!cond_output->data.b[0]) {
+      break;
+    }
+    if (op_data->body_has_dynamic_output_tensors) {
+      TF_LITE_ENSURE_OK(context,
+                        CopyTensorsShapeAndType(
+                            context, cond_subgraph, cond_subgraph->inputs(),
+                            body_subgraph, body_subgraph->inputs()));
+      TF_LITE_ENSURE_OK(context, body_subgraph->AllocateTensors());
+    }
+
+    TF_LITE_ENSURE_OK(
+        context,
+        CopyTensorsData(context, cond_subgraph, cond_subgraph->inputs(),
+                        body_subgraph, body_subgraph->inputs()));
+
+    TF_LITE_ENSURE_OK(context, body_subgraph->Invoke());
+
+    for (int tensor_index : body_subgraph->outputs()) {
+      body_subgraph->EnsureTensorDataIsReadable(tensor_index);
+    }
+
+    if (op_data->body_has_dynamic_output_tensors) {
+      TF_LITE_ENSURE_OK(context,
+                        CopyTensorsShapeAndType(
+                            context, body_subgraph, body_subgraph->outputs(),
+                            cond_subgraph, cond_subgraph->inputs()));
+      TF_LITE_ENSURE_OK(context, cond_subgraph->AllocateTensors());
+    }
+
+    TF_LITE_ENSURE_OK(
+        context,
+        CopyTensorsData(context, body_subgraph, body_subgraph->outputs(),
+                        cond_subgraph, cond_subgraph->inputs()));
+  }
+
+  // Note that copying from body's output will fail if body is never invoked.
+  // TODO(b/120234921): Optimize and avoid copying tensors between subgraphs.
+  if (op_data->body_has_dynamic_output_tensors) {
+    TF_LITE_ENSURE_OK(
+        context, CopyTensorsShapeAndType(context, cond_subgraph,
+                                         cond_subgraph->inputs(), this_subgraph,
+                                         TfLiteIntArrayView(node->outputs)));
+  }
+
+  TF_LITE_ENSURE_OK(
+      context,
+      CopyTensorsData(context, cond_subgraph, cond_subgraph->inputs(),
+                      this_subgraph, TfLiteIntArrayView(node->outputs)));
+  return kTfLiteOk;
+}
+
+}  // namespace while_kernel
+
+TfLiteRegistration* Register_WHILE() {
+  static TfLiteRegistration r = {while_kernel::Init, while_kernel::Free,
+                                 while_kernel::Prepare, while_kernel::Eval};
+  return &r;
+}
+
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite

diff --git a/tensorflow/lite/kernels/while_test.cc b/tensorflow/lite/kernels/while_test.cc
new file mode 100644
index 0000000..9946b4a
--- /dev/null
+++ b/tensorflow/lite/kernels/while_test.cc

@@ -0,0 +1,94 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/kernels/subgraph_test_util.h"
+#include "tensorflow/lite/kernels/test_util.h"
+#include "tensorflow/lite/model.h"
+
+namespace tflite {
+
+using subgraph_test_util::CheckIntTensor;
+using subgraph_test_util::ControlFlowOpTest;
+using subgraph_test_util::FillIntTensor;
+
+namespace {
+
+class WhileTest : public ControlFlowOpTest {};
+
+// The test builds a model that produces the i-th number of
+// triangular number sequence.
+//
+// TODO(ycling): Consider to improve this test case by adding a
+// concat into the body subgraph.
+TEST_F(WhileTest, TestTriangularNumberSequence) {
+  const std::vector<int> expected = {1, 3, 6, 10, 15, 21, 28};
+  for (int i = 0; i < expected.size(); ++i) {
+    interpreter_.reset(new Interpreter);
+    interpreter_->AddSubgraphs(2);
+    builder_->BuildLessEqualCondSubgraph(interpreter_->subgraph(1), i);
+    builder_->BuildAccumulateLoopBodySubgraph(interpreter_->subgraph(2));
+    builder_->BuildWhileSubgraph(&interpreter_->primary_subgraph());
+
+    interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {1});
+    interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {1});
+    ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+    FillIntTensor(interpreter_->tensor(interpreter_->inputs()[0]), {1});
+    FillIntTensor(interpreter_->tensor(interpreter_->inputs()[1]), {1});
+
+    ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+    TfLiteTensor* output1 = interpreter_->tensor(interpreter_->outputs()[0]);
+    CheckIntTensor(output1, {1}, {i + 1});
+    TfLiteTensor* output2 = interpreter_->tensor(interpreter_->outputs()[1]);
+    CheckIntTensor(output2, {1}, {expected[i]});
+  }
+}
+
+// This requires dynamic sized subgraphs and it's not supported right now.
+// TODO(ycling): Support dynamic sized subgraphs.
+TEST_F(WhileTest, TestPadLoop) {
+  interpreter_.reset(new Interpreter);
+  interpreter_->AddSubgraphs(2);
+  builder_->BuildLessEqualCondSubgraph(interpreter_->subgraph(1), 3);
+  builder_->BuildPadLoopBodySubgraph(interpreter_->subgraph(2), {1, 2});
+  builder_->BuildWhileSubgraph(&interpreter_->primary_subgraph());
+
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {1});
+  interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {2});
+  // This is not supported yet. The test ensures thatit doesn't crash and raises
+  // an error properly.
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[0]), {1});
+  FillIntTensor(interpreter_->tensor(interpreter_->inputs()[1]), {5, 7});
+
+  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+  TfLiteTensor* output1 = interpreter_->tensor(interpreter_->outputs()[0]);
+  CheckIntTensor(output1, {1}, {4});
+  TfLiteTensor* output2 = interpreter_->tensor(interpreter_->outputs()[1]);
+  CheckIntTensor(output2, {11}, {0, 0, 0, 5, 7, 0, 0, 0, 0, 0, 0});
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

diff --git a/tensorflow/lite/nnapi/nnapi_implementation.cc b/tensorflow/lite/nnapi/nnapi_implementation.cc
index 36301f1..bbc0c86 100644
--- a/tensorflow/lite/nnapi/nnapi_implementation.cc
+++ b/tensorflow/lite/nnapi/nnapi_implementation.cc

@@ -45,6 +45,19 @@
       }
       result = result * 10 + digit;
     }
+    // TODO(levp): remove once SDK gets updated to 29th level
+    // Upgrade SDK version for pre-release Q to be able to test functionality
+    // available from SDK level 29.
+    if (result == 28) {
+      char versionCodename[PROP_VALUE_MAX];
+      const char* versionCodenameProp = "ro.build.version.codename";
+      length = __system_property_get(versionCodenameProp, versionCodename);
+      if (length != 0) {
+        if (versionCodename[0] == 'Q') {
+          return 29;
+        }
+      }
+    }
     return result;
   }
   return 0;

diff --git a/tensorflow/lite/nnapi_delegate.cc b/tensorflow/lite/nnapi_delegate.cc
index 7c67468..8d90ec9 100644
--- a/tensorflow/lite/nnapi_delegate.cc
+++ b/tensorflow/lite/nnapi_delegate.cc

@@ -610,6 +610,7 @@
       case tflite::BuiltinOperator_SPLIT:
       case tflite::BuiltinOperator_STRIDED_SLICE:
       case tflite::BuiltinOperator_EXP:
+      case tflite::BuiltinOperator_COS:
       case tflite::BuiltinOperator_LOG_SOFTMAX:
       case tflite::BuiltinOperator_DEQUANTIZE:
       case tflite::BuiltinOperator_DELEGATE:
@@ -665,6 +666,9 @@
       case tflite::BuiltinOperator_CEIL:
       case tflite::BuiltinOperator_REVERSE_V2:
       case tflite::BuiltinOperator_ADD_N:
+      case tflite::BuiltinOperator_GATHER_ND:
+      case tflite::BuiltinOperator_WHERE:
+      case tflite::BuiltinOperator_RANK:
         logError("Op code %d is currently not delegated to NNAPI", builtin);
         return kTfLiteError;
         break;

diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD
index aa0ffe9..dd5a760 100644
--- a/tensorflow/lite/python/BUILD
+++ b/tensorflow/lite/python/BUILD

@@ -13,7 +13,6 @@
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/lite/python/interpreter_wrapper:tensorflow_wrap_interpreter_wrapper",
-        "//tensorflow/python:util",
         "//third_party/py/numpy",
     ],
 )
@@ -40,6 +39,14 @@
     srcs = ["tflite_convert.py"],
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
+    deps = [":tflite_convert_main_lib"],
+)
+
+py_library(
+    name = "tflite_convert_main_lib",
+    srcs = ["tflite_convert.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
     deps = [":tflite_convert_lib"],
 )
 
@@ -64,6 +71,7 @@
         ":interpreter",
         ":lite_constants",
         ":op_hint",
+        "//tensorflow/lite/python/optimize:calibrator",
         "//tensorflow/python:graph_util",
         "//tensorflow/python:tf_optimizer",
         "//tensorflow/python/keras",
@@ -89,6 +97,21 @@
 )
 
 py_test(
+    name = "lite_v2_test",
+    srcs = ["lite_v2_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_windows",
+    ],
+    deps = [
+        ":lite",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
     name = "lite_flex_test",
     srcs = ["lite_flex_test.py"],
     srcs_version = "PY2AND3",

diff --git a/tensorflow/lite/python/convert.py b/tensorflow/lite/python/convert.py
index 9c60399..4d38ee9 100644
--- a/tensorflow/lite/python/convert.py
+++ b/tensorflow/lite/python/convert.py

@@ -443,7 +443,7 @@
   return data
 
 
-@_tf_export("lite.toco_convert")
+@_tf_export(v1=["lite.toco_convert"])
 @deprecation.deprecated(None, "Use `lite.TFLiteConverter` instead.")
 def toco_convert(input_data, input_tensors, output_tensors, *args, **kwargs):
   """Convert a model using TOCO.

diff --git a/tensorflow/lite/python/interpreter.py b/tensorflow/lite/python/interpreter.py
index a1325f0..9b9516f 100644
--- a/tensorflow/lite/python/interpreter.py
+++ b/tensorflow/lite/python/interpreter.py

@@ -216,7 +216,8 @@
   def get_tensor(self, tensor_index):
     """Gets the value of the input tensor (get a copy).
 
-    If you wish to avoid the copy, use `tensor()`.
+    If you wish to avoid the copy, use `tensor()`. This function cannot be used
+    to read intermediate results.
 
     Args:
       tensor_index: Tensor index of tensor to get. This value can be gotten from
@@ -233,7 +234,8 @@
     This allows reading and writing to this tensors w/o copies. This more
     closely mirrors the C++ Interpreter class interface's tensor() member, hence
     the name. Be careful to not hold these output references through calls
-    to `allocate_tensors()` and `invoke()`.
+    to `allocate_tensors()` and `invoke()`. This function cannot be used to read
+    intermediate results.
 
     Usage:
 

diff --git a/tensorflow/lite/python/interpreter_test.py b/tensorflow/lite/python/interpreter_test.py
index 7ec56a2..b217792 100644
--- a/tensorflow/lite/python/interpreter_test.py
+++ b/tensorflow/lite/python/interpreter_test.py

@@ -91,6 +91,41 @@
     output_data = interpreter.get_tensor(output_details[0]['index'])
     self.assertTrue((expected_output == output_data).all())
 
+  def testString(self):
+    interpreter = interpreter_wrapper.Interpreter(
+        model_path=resource_loader.get_path_to_datafile(
+            'testdata/gather_string.tflite'))
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    self.assertEqual(2, len(input_details))
+    self.assertEqual('input', input_details[0]['name'])
+    self.assertEqual(np.string_, input_details[0]['dtype'])
+    self.assertTrue(([10] == input_details[0]['shape']).all())
+    self.assertEqual((0.0, 0), input_details[0]['quantization'])
+    self.assertEqual('indices', input_details[1]['name'])
+    self.assertEqual(np.int64, input_details[1]['dtype'])
+    self.assertTrue(([3] == input_details[1]['shape']).all())
+    self.assertEqual((0.0, 0), input_details[1]['quantization'])
+
+    output_details = interpreter.get_output_details()
+    self.assertEqual(1, len(output_details))
+    self.assertEqual('output', output_details[0]['name'])
+    self.assertEqual(np.string_, output_details[0]['dtype'])
+    self.assertTrue(([3] == output_details[0]['shape']).all())
+    self.assertEqual((0.0, 0), output_details[0]['quantization'])
+
+    test_input = np.array([1, 2, 3], dtype=np.int64)
+    interpreter.set_tensor(input_details[1]['index'], test_input)
+
+    test_input = np.array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'])
+    expected_output = np.array([b'b', b'c', b'd'])
+    interpreter.set_tensor(input_details[0]['index'], test_input)
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+    self.assertTrue((expected_output == output_data).all())
+
 
 class InterpreterTestErrorPropagation(test_util.TensorFlowTestCase):
 

diff --git a/tensorflow/lite/python/interpreter_wrapper/BUILD b/tensorflow/lite/python/interpreter_wrapper/BUILD
index 6de6fb4..6ec7ce4 100644
--- a/tensorflow/lite/python/interpreter_wrapper/BUILD
+++ b/tensorflow/lite/python/interpreter_wrapper/BUILD

@@ -7,13 +7,25 @@
 load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
 
 cc_library(
+    name = "numpy",
+    srcs = ["numpy.cc"],
+    hdrs = ["numpy.h"],
+    deps = [
+        "//third_party/py/numpy:headers",
+        "//third_party/python_runtime:headers",
+    ],
+)
+
+cc_library(
     name = "interpreter_wrapper_lib",
     srcs = ["interpreter_wrapper.cc"],
     hdrs = ["interpreter_wrapper.h"],
     deps = [
+        ":numpy",
         ":python_error_reporter",
         ":python_utils",
         "//tensorflow/lite:framework",
+        "//tensorflow/lite:string_util",
         "//tensorflow/lite/kernels:builtin_ops",
         "//third_party/py/numpy:headers",
         "//third_party/python_runtime:headers",
@@ -36,7 +48,9 @@
     srcs = ["python_utils.cc"],
     hdrs = ["python_utils.h"],
     deps = [
+        ":numpy",
         "//tensorflow/lite:framework",
+        "//tensorflow/lite:string_util",
         "//third_party/py/numpy:headers",
         "//third_party/python_runtime:headers",
     ],

diff --git a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc
index 9ccaabb..6023587 100644
--- a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc
+++ b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc

@@ -21,24 +21,10 @@
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/model.h"
+#include "tensorflow/lite/python/interpreter_wrapper/numpy.h"
 #include "tensorflow/lite/python/interpreter_wrapper/python_error_reporter.h"
 #include "tensorflow/lite/python/interpreter_wrapper/python_utils.h"
-
-// Disallow Numpy 1.7 deprecated symbols.
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-
-#include <Python.h>
-
-#include "numpy/arrayobject.h"
-#include "numpy/ufuncobject.h"
-
-#if PY_MAJOR_VERSION >= 3
-#define PY_TO_CPPSTRING PyBytes_AsStringAndSize
-#define CPP_TO_PYSTRING PyBytes_FromStringAndSize
-#else
-#define PY_TO_CPPSTRING PyString_AsStringAndSize
-#define CPP_TO_PYSTRING PyString_FromStringAndSize
-#endif
+#include "tensorflow/lite/string_util.h"
 
 #define TFLITE_PY_CHECK(x)               \
   if ((x) != kTfLiteOk) {                \
@@ -64,11 +50,7 @@
 
 namespace {
 
-// Calls PyArray's initialization to initialize all the API pointers. Note that
-// this usage implies only this translation unit can use the pointers. See
-// tensorflow/python/core/numpy.cc for a strategy if we ever need to extend
-// this further.
-void ImportNumpy() { import_array1(); }
+using python_utils::PyDecrefDeleter;
 
 std::unique_ptr<tflite::Interpreter> CreateInterpreter(
     const tflite::FlatBufferModel* model,
@@ -77,7 +59,7 @@
     return nullptr;
   }
 
-  ImportNumpy();
+  ::tflite::python::ImportNumpy();
 
   std::unique_ptr<tflite::Interpreter> interpreter;
   if (tflite::InterpreterBuilder(*model, resolver)(&interpreter) != kTfLiteOk) {
@@ -86,10 +68,6 @@
   return interpreter;
 }
 
-struct PyDecrefDeleter {
-  void operator()(PyObject* p) const { Py_DECREF(p); }
-};
-
 PyObject* PyArrayFromIntVector(const int* data, npy_intp size) {
   void* pydata = malloc(size * sizeof(int));
   memcpy(pydata, data, size * sizeof(int));
@@ -267,7 +245,7 @@
   }
 
   PyArrayObject* array = reinterpret_cast<PyArrayObject*>(array_safe.get());
-  const TfLiteTensor* tensor = interpreter_->tensor(i);
+  TfLiteTensor* tensor = interpreter_->tensor(i);
 
   if (python_utils::TfLiteTypeFromPyArray(array) != tensor->type) {
     PyErr_Format(PyExc_ValueError,
@@ -279,26 +257,41 @@
   }
 
   if (PyArray_NDIM(array) != tensor->dims->size) {
-    PyErr_SetString(PyExc_ValueError, "Cannot set tensor: Dimension mismatch");
+    PyErr_Format(PyExc_ValueError,
+                 "Cannot set tensor: Dimension mismatch."
+                 " Got %d"
+                 " but expected %d for input %d.",
+                 PyArray_NDIM(array), tensor->dims->size, i);
     return nullptr;
   }
 
   for (int j = 0; j < PyArray_NDIM(array); j++) {
     if (tensor->dims->data[j] != PyArray_SHAPE(array)[j]) {
-      PyErr_SetString(PyExc_ValueError,
-                      "Cannot set tensor: Dimension mismatch");
+      PyErr_Format(PyExc_ValueError,
+                   "Cannot set tensor: Dimension mismatch."
+                   " Got %ld"
+                   " but expected %d for dimension %d of input %d.",
+                   PyArray_SHAPE(array)[j], tensor->dims->data[j], j, i);
       return nullptr;
     }
   }
 
-  size_t size = PyArray_NBYTES(array);
-  if (size != tensor->bytes) {
-    PyErr_Format(PyExc_ValueError,
-                 "numpy array had %zu bytes but expected %zu bytes.", size,
-                 tensor->bytes);
-    return nullptr;
+  if (tensor->type != kTfLiteString) {
+    size_t size = PyArray_NBYTES(array);
+    if (size != tensor->bytes) {
+      PyErr_Format(PyExc_ValueError,
+                   "numpy array had %zu bytes but expected %zu bytes.", size,
+                   tensor->bytes);
+      return nullptr;
+    }
+    memcpy(tensor->data.raw, PyArray_DATA(array), size);
+  } else {
+    DynamicBuffer dynamic_buffer;
+    if (!python_utils::FillStringBufferWithPyArray(value, &dynamic_buffer)) {
+      return nullptr;
+    }
+    dynamic_buffer.WriteToTensor(tensor, nullptr);
   }
-  memcpy(tensor->data.raw, PyArray_DATA(array), size);
   Py_RETURN_NONE;
 }
 
@@ -345,19 +338,51 @@
 
   std::vector<npy_intp> dims(tensor->dims->data,
                              tensor->dims->data + tensor->dims->size);
-  // Make a buffer copy but we must tell Numpy It owns that data or else
-  // it will leak.
-  void* data = malloc(tensor->bytes);
-  if (!data) {
-    PyErr_SetString(PyExc_ValueError, "Malloc to copy tensor failed.");
-    return nullptr;
+  if (tensor->type != kTfLiteString) {
+    // Make a buffer copy but we must tell Numpy It owns that data or else
+    // it will leak.
+    void* data = malloc(tensor->bytes);
+    if (!data) {
+      PyErr_SetString(PyExc_ValueError, "Malloc to copy tensor failed.");
+      return nullptr;
+    }
+    memcpy(data, tensor->data.raw, tensor->bytes);
+    PyObject* np_array =
+        PyArray_SimpleNewFromData(dims.size(), dims.data(), type_num, data);
+    PyArray_ENABLEFLAGS(reinterpret_cast<PyArrayObject*>(np_array),
+                        NPY_ARRAY_OWNDATA);
+    return PyArray_Return(reinterpret_cast<PyArrayObject*>(np_array));
+  } else {
+    // Create a C-order array so the data is contiguous in memory.
+    const int32_t kCOrder = 0;
+    PyObject* py_object =
+        PyArray_EMPTY(dims.size(), dims.data(), NPY_OBJECT, kCOrder);
+
+    if (py_object == nullptr) {
+      PyErr_SetString(PyExc_MemoryError, "Failed to allocate PyArray.");
+      return nullptr;
+    }
+
+    PyArrayObject* py_array = reinterpret_cast<PyArrayObject*>(py_object);
+    PyObject** data = reinterpret_cast<PyObject**>(PyArray_DATA(py_array));
+    auto num_strings = GetStringCount(tensor->data.raw);
+    for (int j = 0; j < num_strings; ++j) {
+      auto ref = GetString(tensor->data.raw, j);
+
+      PyObject* bytes = PyBytes_FromStringAndSize(ref.str, ref.len);
+      if (bytes == nullptr) {
+        Py_DECREF(py_object);
+        PyErr_Format(PyExc_ValueError,
+                     "Could not create PyBytes from string %d of input %d.", j,
+                     i);
+        return nullptr;
+      }
+      // PyArray_EMPTY produces an array full of Py_None, which we must decref.
+      Py_DECREF(data[j]);
+      data[j] = bytes;
+    }
+    return py_object;
   }
-  memcpy(data, tensor->data.raw, tensor->bytes);
-  PyObject* np_array =
-      PyArray_SimpleNewFromData(dims.size(), dims.data(), type_num, data);
-  PyArray_ENABLEFLAGS(reinterpret_cast<PyArrayObject*>(np_array),
-                      NPY_ARRAY_OWNDATA);
-  return PyArray_Return(reinterpret_cast<PyArrayObject*>(np_array));
 }
 
 PyObject* InterpreterWrapper::tensor(PyObject* base_object, int i) {
@@ -394,7 +419,8 @@
   char * buf = nullptr;
   Py_ssize_t length;
   std::unique_ptr<PythonErrorReporter> error_reporter(new PythonErrorReporter);
-  if (PY_TO_CPPSTRING(data, &buf, &length) == -1) {
+
+  if (python_utils::ConvertFromPyString(data, &buf, &length) == -1) {
     return nullptr;
   }
   std::unique_ptr<tflite::FlatBufferModel> model =

diff --git a/tensorflow/lite/python/interpreter_wrapper/numpy.cc b/tensorflow/lite/python/interpreter_wrapper/numpy.cc
new file mode 100644
index 0000000..ff5403d
--- /dev/null
+++ b/tensorflow/lite/python/interpreter_wrapper/numpy.cc

@@ -0,0 +1,25 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define TFLITE_IMPORT_NUMPY  // See numpy.h for explanation.
+#include "tensorflow/lite/python/interpreter_wrapper/numpy.h"
+
+namespace tflite {
+namespace python {
+
+void ImportNumpy() { import_array1(); }
+
+}  // namespace python
+}  // namespace tflite

diff --git a/tensorflow/lite/python/interpreter_wrapper/numpy.h b/tensorflow/lite/python/interpreter_wrapper/numpy.h
new file mode 100644
index 0000000..a3b013f
--- /dev/null
+++ b/tensorflow/lite/python/interpreter_wrapper/numpy.h

@@ -0,0 +1,62 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_PYTHON_INTERPRETER_WRAPPER_NUMPY_H_
+#define TENSORFLOW_LITE_PYTHON_INTERPRETER_WRAPPER_NUMPY_H_
+
+#ifdef PyArray_Type
+#error "Numpy cannot be included before numpy.h."
+#endif
+
+// Disallow Numpy 1.7 deprecated symbols.
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+
+// To handle PyArray_* calles, numpy defines a static lookup table called
+// PyArray_API, or PY_ARRAY_UNIQUE_SYMBOL, if defined. This causes the
+// PyArray_* pointers to be different for different translation units, unless
+// we take care of selectivel defined NO_IMPORT_ARRAY.
+//
+// Virtually every usage will define NO_IMPORT_ARRAY, and will have access to
+// the lookup table via:
+//   extern void **PyArray_API;
+// In numpy.cc we will define TFLITE_IMPORT_NUMPY, effectively disabling that
+// and instead using:
+//   void **PyArray_API;
+// which is initialized when ImportNumpy() is called.
+//
+// If we don't define PY_ARRAY_UNIQUE_SYMBOL then PyArray_API is a static
+// variable, which causes strange crashes when the pointers are used across
+// translation unit boundaries.
+//
+// For mone info see https://sourceforge.net/p/numpy/mailman/message/5700519
+// See also tensorflow/python/lib/core/numpy.h for a similar approach.
+#define PY_ARRAY_UNIQUE_SYMBOL _tensorflow_numpy_api
+#ifndef TFLITE_IMPORT_NUMPY
+#define NO_IMPORT_ARRAY
+#endif
+
+#include <Python.h>
+
+#include "numpy/arrayobject.h"
+#include "numpy/ufuncobject.h"
+
+namespace tflite {
+namespace python {
+
+void ImportNumpy();
+
+}  // namespace python
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_PYTHON_INTERPRETER_WRAPPER_NUMPY_H_

diff --git a/tensorflow/lite/python/interpreter_wrapper/python_utils.cc b/tensorflow/lite/python/interpreter_wrapper/python_utils.cc
index 2dc6043..e3d7136 100644
--- a/tensorflow/lite/python/interpreter_wrapper/python_utils.cc
+++ b/tensorflow/lite/python/interpreter_wrapper/python_utils.cc

@@ -15,9 +15,19 @@
 
 #include "tensorflow/lite/python/interpreter_wrapper/python_utils.h"
 
+#include <memory>
+
+#include "tensorflow/lite/python/interpreter_wrapper/numpy.h"
+
 namespace tflite {
 namespace python_utils {
 
+struct PyObjectDereferencer {
+  void operator()(PyObject* py_object) const { Py_DECREF(py_object); }
+};
+
+using UniquePyObjectRef = std::unique_ptr<PyObject, PyObjectDereferencer>;
+
 int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type) {
   switch (tf_lite_type) {
     case kTfLiteFloat32:
@@ -33,7 +43,7 @@
     case kTfLiteInt64:
       return NPY_INT64;
     case kTfLiteString:
-      return NPY_OBJECT;
+      return NPY_STRING;
     case kTfLiteBool:
       return NPY_BOOL;
     case kTfLiteComplex64:
@@ -73,5 +83,98 @@
   return kTfLiteNoType;
 }
 
+#if PY_VERSION_HEX >= 0x03030000
+bool FillStringBufferFromPyUnicode(PyObject* value,
+                                   DynamicBuffer* dynamic_buffer) {
+  Py_ssize_t len = -1;
+  const char* buf = PyUnicode_AsUTF8AndSize(value, &len);
+  if (buf == NULL) {
+    PyErr_SetString(PyExc_ValueError, "PyUnicode_AsUTF8AndSize() failed.");
+    return false;
+  }
+  dynamic_buffer->AddString(buf, len);
+  return true;
+}
+#else
+bool FillStringBufferFromPyUnicode(PyObject* value,
+                                   DynamicBuffer* dynamic_buffer) {
+  UniquePyObjectRef utemp(PyUnicode_AsUTF8String(value));
+  if (!utemp) {
+    PyErr_SetString(PyExc_ValueError, "PyUnicode_AsUTF8String() failed.");
+    return false;
+  }
+  char* buf = nullptr;
+  Py_ssize_t len = -1;
+  if (PyBytes_AsStringAndSize(utemp.get(), &buf, &len) == -1) {
+    PyErr_SetString(PyExc_ValueError, "PyBytes_AsStringAndSize() failed.");
+    return false;
+  }
+  dynamic_buffer->AddString(buf, len);
+  return true;
+}
+#endif
+
+bool FillStringBufferFromPyString(PyObject* value,
+                                  DynamicBuffer* dynamic_buffer) {
+  if (PyUnicode_Check(value)) {
+    return FillStringBufferFromPyUnicode(value, dynamic_buffer);
+  }
+
+  char* buf = nullptr;
+  Py_ssize_t len = -1;
+  if (PyBytes_AsStringAndSize(value, &buf, &len) == -1) {
+    PyErr_SetString(PyExc_ValueError, "PyBytes_AsStringAndSize() failed.");
+    return false;
+  }
+  dynamic_buffer->AddString(buf, len);
+  return true;
+}
+
+bool FillStringBufferWithPyArray(PyObject* value,
+                                 DynamicBuffer* dynamic_buffer) {
+  PyArrayObject* array = reinterpret_cast<PyArrayObject*>(value);
+  switch (PyArray_TYPE(array)) {
+    case NPY_OBJECT:
+    case NPY_STRING:
+    case NPY_UNICODE: {
+      UniquePyObjectRef iter(PyArray_IterNew(value));
+      while (PyArray_ITER_NOTDONE(iter.get())) {
+        UniquePyObjectRef item(PyArray_GETITEM(
+            array, reinterpret_cast<char*>(PyArray_ITER_DATA(iter.get()))));
+
+        if (!FillStringBufferFromPyString(item.get(), dynamic_buffer)) {
+          return false;
+        }
+
+        PyArray_ITER_NEXT(iter.get());
+      }
+      return true;
+    }
+    default:
+      break;
+  }
+
+  PyErr_Format(PyExc_ValueError,
+               "Cannot use numpy array of type %d for string tensor.",
+               PyArray_TYPE(array));
+  return false;
+}
+
+int ConvertFromPyString(PyObject* obj, char** data, Py_ssize_t* length) {
+#if PY_MAJOR_VERSION >= 3
+  return PyBytes_AsStringAndSize(obj, data, length);
+#else
+  return PyString_AsStringAndSize(obj, data, length);
+#endif
+}
+
+PyObject* ConvertToPyString(const char* data, size_t length) {
+#if PY_MAJOR_VERSION >= 3
+  return PyBytes_FromStringAndSize(data, length);
+#else
+  return PyString_FromStringAndSize(data, length);
+#endif
+}
+
 }  // namespace python_utils
 }  // namespace tflite

diff --git a/tensorflow/lite/python/interpreter_wrapper/python_utils.h b/tensorflow/lite/python/interpreter_wrapper/python_utils.h
index 30a4422..f467737 100644
--- a/tensorflow/lite/python/interpreter_wrapper/python_utils.h
+++ b/tensorflow/lite/python/interpreter_wrapper/python_utils.h

@@ -17,22 +17,26 @@
 #define TENSORFLOW_LITE_PYTHON_INTERPRETER_WRAPPER_PYTHON_UTILS_H_
 
 #include "tensorflow/lite/context.h"
-
-// Disallow Numpy 1.7 deprecated symbols.
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-
-#include <Python.h>
-
-#include "numpy/arrayobject.h"
-#include "numpy/ufuncobject.h"
+#include "tensorflow/lite/python/interpreter_wrapper/numpy.h"
+#include "tensorflow/lite/string_util.h"
 
 namespace tflite {
 namespace python_utils {
 
+struct PyDecrefDeleter {
+  void operator()(PyObject* p) const { Py_DECREF(p); }
+};
+
 int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type);
 
 TfLiteType TfLiteTypeFromPyArray(PyArrayObject* array);
 
+bool FillStringBufferWithPyArray(PyObject* value,
+                                 DynamicBuffer* dynamic_buffer);
+
+int ConvertFromPyString(PyObject* obj, char** data, Py_ssize_t* length);
+PyObject* ConvertToPyString(const char* data, size_t length);
+
 }  // namespace python_utils
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_PYTHON_INTERPRETER_WRAPPER_PYTHON_UTILS_H_

diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py
index 3b0aa02b..e609a2cb 100644
--- a/tensorflow/lite/python/lite.py
+++ b/tensorflow/lite/python/lite.py

@@ -12,27 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TensorFlow Lite tooling helper functionality.
+"""TensorFlow Lite tooling helper functionality."""
 
-EXPERIMENTAL: APIs here are unstable and likely to change without notice.
-
-@@TocoConverter
-@@TFLiteConverter
-@@toco_convert
-@@toco_convert_protos
-@@Interpreter
-@@OpHint
-@@convert_op_hints_to_stubs
-@@build_toco_convert_protos
-
-@@TFLITE
-@@GRAPHVIZ_DOT
-
-"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import warnings
+import enum
 from six import PY3
 
 from google.protobuf import text_format as _text_format
@@ -52,12 +39,17 @@
 from tensorflow.lite.python.interpreter import Interpreter  # pylint: disable=unused-import
 from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs  # pylint: disable=unused-import
 from tensorflow.lite.python.op_hint import OpHint  # pylint: disable=unused-import
+from tensorflow.lite.python.optimize import calibrator as _calibrator
 from tensorflow.core.framework import graph_pb2 as _graph_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2 as _rewriter_config_pb2
 from tensorflow.core.protobuf import config_pb2 as _config_pb2
 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2
 from tensorflow.python import keras as _keras
 from tensorflow.python.client import session as _session
+from tensorflow.python.eager import def_function as _def_function
+from tensorflow.python.eager import function as _function
+from tensorflow.python.framework import convert_to_constants as _convert_to_constants
+from tensorflow.python.framework import dtypes as _dtypes
 from tensorflow.python.framework import graph_util as _tf_graph_util
 from tensorflow.python.framework import ops as _ops
 from tensorflow.python.framework.errors_impl import NotFoundError as _NotFoundError
@@ -71,18 +63,20 @@
 from tensorflow.python.util.tf_export import tf_export as _tf_export
 
 
-def _run_graph_optimizations(graph_def, input_arrays, output_arrays):
+def _run_graph_optimizations(graph_def, input_arrays, output_arrays,
+                             graph=None):
   """Apply standard TensorFlow optimizations to the graph_def.
 
   Args:
     graph_def: Frozen GraphDef to be optimized.
     input_arrays: List of arrays that are considered inputs of the graph.
     output_arrays: List of arrays that are considered outputs of the graph.
+    graph: TensorFlow Graph. Required when Eager mode is enabled. (default None)
 
   Returns:
     A new, optimized GraphDef.
   """
-  meta_graph = _export_meta_graph(graph_def=graph_def)
+  meta_graph = _export_meta_graph(graph_def=graph_def, graph=graph)
 
   # We need to add a collection called 'train_op' so that grappler
   # knows what the outputs are.
@@ -100,6 +94,196 @@
   return _tf_optimizer.OptimizeGraph(config, meta_graph)
 
 
+@_tf_export("lite.Optimize")
+class Optimize(enum.Enum):
+  """Enum defining the optimizations to apply when generating tflite graphs.
+
+  Some optimizations may come at the cost of accuracy.
+  """
+
+  # Optimize for size.
+  #
+  # Optimizations that reduce the size of the model.
+  # The model size will be reduced. Optimizations can include quantizing the
+  # weights of the floating point model.
+  OPTIMIZE_FOR_SIZE = "OPTIMIZE_FOR_SIZE"
+
+  # Optimize for latency.
+  #
+  # Optimizations that reduce the latency of the model.
+  # The model latency will be reduced. Optimizations can include quantizing the
+  # weights of the floating point model.
+  OPTIMIZE_FOR_LATENCY = "OPTIMIZE_FOR_LATENCY"
+
+  def __str__(self):
+    return self.value
+
+
+@_tf_export("lite.RepresentativeDataset")
+class RepresentativeDataset(object):
+  """Representative dataset to evaluate optimizations.
+
+  A representative dataset that can be used to evaluate optimizations by the
+  converter. E.g. converter can use these examples to estimate (min, max) ranges
+  by calibrating the model on inputs. This can allow converter to quantize a
+  converted floating point model.
+  """
+
+  def __init__(self, input_gen, output_gen=None):
+    """Creates a representative dataset.
+
+    Args:
+      input_gen: an input generator that can be used to generate input samples
+        for the model. This must be a callable object that returns an object
+        that supports the `iter()` protocol (e.g. a generator function). The
+        elements generated must have same type and shape as inputs to the model.
+      output_gen: (optional) an output generator that can be used to generate
+        output samples for the model. This must be a callable object that
+        returns an object that supports the `iter()` protocol (e.g. a generator
+        function). The elements generated must have same type and shape as
+        outputs to the model. (default None)
+    """
+    self.input_gen = input_gen
+    self.output_gen = output_gen
+
+
+class TFLiteConverterV2(object):
+  """Converts a TensorFlow model into TensorFlow Lite model.
+
+  Attributes:
+    allow_custom_ops: Boolean indicating whether to allow custom operations.
+      When false any unknown operation is an error. When true, custom ops are
+      created for any op that is unknown. The developer will need to provide
+      these to the TensorFlow Lite runtime with a custom resolver. (default
+      False)
+    target_ops: Experimental flag, subject to change. Set of OpsSet options
+      indicating which converter to use. (default set([OpsSet.TFLITE_BUILTINS]))
+    optimizations: Experimental flag, subject to change, A list of optimizations
+      to apply when converting the model. The converter applies the
+      optimizations by giving priority to the optimizations specified earlier in
+      the list. E.g. `[optimize.OPTIMIZE_FOR_SIZE,
+      optimize.OPTIMIZE_FOR_LATENCY]` requires the converter to do both size and
+      latency optimizations giving priority to size optimizations over latency
+      optimizations.
+    representative_dataset: a representative dataset that can be used to
+      generate input and output samples for the model. The converter can use the
+      dataset to evaluate different optimizations.
+
+  Example usage:
+
+    ```python
+    # Converting a GraphDef from a ConcreteFunction.
+    converter = lite.TFLiteConverter.from_concrete_function(func)
+    tflite_model = converter.convert()
+    open("converted_model.tflite", "wb").write(tflite_model)
+    ```
+  """
+
+  def __init__(self, func):
+    """Constructor for TFLiteConverter.
+
+    Args:
+      func: TensorFlow ConcreteFunction.
+    """
+    self._func = func
+    self.allow_custom_ops = False
+    self.target_ops = set([OpsSet.TFLITE_BUILTINS])
+    self.representative_dataset = None
+    self.optimizations = []
+
+  @classmethod
+  def from_concrete_function(cls, func):
+    """Creates a TFLiteConverter class from a ConcreteFunction.
+
+    Args:
+      func: TensorFlow ConcreteFunction.
+
+    Returns:
+      TFLiteConverter class.
+    """
+    if not isinstance(func, _function.ConcreteFunction):
+      message = "This function takes in a ConcreteFunction."
+      if isinstance(func, _def_function.Function):
+        message += (" To get the ConcreteFunction from a Function,"
+                    " call from_concrete_function.")
+      raise ValueError(message)
+    return cls(func)
+
+  def convert(self):
+    """Converts a TensorFlow GraphDef based on instance variables.
+
+    Returns:
+      The converted data in serialized format.
+
+    Raises:
+      ValueError:
+        Input shape is not specified.
+        None value for dimension in input_tensor.
+    """
+    graph_def = _convert_to_constants.convert_variables_to_constants_v2(
+        self._func)
+    input_tensors = [
+        tensor for tensor in self._func.inputs
+        if tensor.dtype != _dtypes.resource
+    ]
+    output_tensors = self._func.outputs
+
+    # Run a Grappler pass.
+    graph_def = _run_graph_optimizations(graph_def, input_tensors,
+                                         output_tensors, self._func.graph)
+
+    # Checks dimensions in input tensor.
+    for tensor in input_tensors:
+      # Note that shape_list might be empty for scalar shapes.
+      shape_list = tensor.get_shape().as_list()
+      if None in shape_list[1:]:
+        raise ValueError(
+            "None is only supported in the 1st dimension. Tensor '{0}' has "
+            "invalid shape '{1}'.".format(_tensor_name(tensor), shape_list))
+      elif shape_list and shape_list[0] is None:
+        self._set_batch_size(batch_size=1)
+
+    if self.representative_dataset:
+      if not isinstance(self.representative_dataset, RepresentativeDataset):
+        raise TypeError("representative_dataset must be an instance of "
+                        "RepresentativeDataset")
+      if self.representative_dataset.input_gen is None:
+        raise ValueError(
+            "Provide an input generator for representative_dataset")
+
+    # TODO(shashishekhar): For now use optimizations order is ignored.
+    # Both size and latency optimizations decide whether to apply post
+    # training optimizations.
+    post_training_optimize = bool(
+        len(
+            set(self.optimizations)
+            & set([Optimize.OPTIMIZE_FOR_LATENCY, Optimize.OPTIMIZE_FOR_SIZE])))
+    # Do weights only quantization if there is no dataset for calibration.
+    weights_only_quantize_flag = (
+        post_training_optimize and (self.representative_dataset is None))
+
+    converter_kwargs = {
+        "input_format": constants.TENSORFLOW_GRAPHDEF,
+        "allow_custom_ops": self.allow_custom_ops,
+        "post_training_quantize": weights_only_quantize_flag,
+        "target_ops": self.target_ops,
+    }
+
+    # Converts model.
+    result = _toco_convert_impl(
+        input_data=graph_def,
+        input_tensors=input_tensors,
+        output_tensors=output_tensors,
+        **converter_kwargs)
+
+    if self.representative_dataset and post_training_optimize:
+      calibrate_quantize = _calibrator.Calibrator(result)
+      result = calibrate_quantize.calibrate_and_quantize(
+          self.representative_dataset.input_gen)
+
+    return result
+
+
 @_tf_export("lite.TFLiteConverter")
 class TFLiteConverter(object):
   """Convert a TensorFlow model into `output_format` using TOCO.
@@ -141,10 +325,11 @@
       created for any op that is unknown. The developer will need to provide
       these to the TensorFlow Lite runtime with a custom resolver.
       (default False)
-    post_training_quantize: Boolean indicating whether to quantize the weights
-      of the converted float model. Model size will be reduced and there will be
-      latency improvements (at the cost of accuracy).
-      (default False)
+    post_training_quantize: deprecated, please specify
+     `[optimize.OPTIMIZE_FOR_SIZE]` for `optimizations` instead. Boolean
+     indicating whether to quantize the weights of the converted float model.
+     Model size will be reduced and there will be latency improvements
+     (at the cost of accuracy). (default False)
     dump_graphviz_dir: Full filepath of folder to dump the graphs at various
       stages of processing GraphViz .dot files. Preferred over
       --output_format=GRAPHVIZ_DOT in order to keep the requirements of the
@@ -154,6 +339,16 @@
     target_ops: Experimental flag, subject to change. Set of OpsSet
       options indicating which converter to use.
       (default set([OpsSet.TFLITE_BUILTINS]))
+    optimizations: Experimental flag, subject to change, A list of
+      optimizations to apply when converting the model. The converter applies
+      the optimizations by giving priority to the optimizations specified
+      earlier in the list. E.g.
+      `[optimize.OPTIMIZE_FOR_SIZE, optimize.OPTIMIZE_FOR_LATENCY]` requires
+      the converter to do both size and latency optimizations giving priority
+      to size optimizations over latency optimizations.
+    representative_dataset: a representative dataset that can be used to
+      generate input and output samples for the model. The converter can use
+      the dataset to evaluate different optimizations.
 
   Example usage:
 
@@ -216,10 +411,12 @@
     self.reorder_across_fake_quant = False
     self.change_concat_input_ranges = False
     self.allow_custom_ops = False
-    self.post_training_quantize = False
+    self._post_training_quantize = False
     self.dump_graphviz_dir = None
     self.dump_graphviz_video = False
     self.target_ops = set([OpsSet.TFLITE_BUILTINS])
+    self.representative_dataset = None
+    self.optimizations = []
 
     # Attributes are used by models that cannot be loaded into TensorFlow.
     if not self._has_valid_tensors():
@@ -419,6 +616,27 @@
     graph_def = _freeze_graph(sess, output_tensors)
     return cls(graph_def, input_tensors, output_tensors)
 
+  def __setattr__(self, name, value):
+    if name == "post_training_quantize":
+      warnings.warn("Property %s is deprecated, "
+                    "please use optimizations=[Optimize.OPTIMIZE_FOR_SIZE]"
+                    " instead." % name)
+      if value:
+        # Use OPTIMIZE_FOR_SIZE for post training for now.
+        self.optimizations = [Optimize.OPTIMIZE_FOR_SIZE]
+      else:
+        self.optimizations = []
+      return
+    object.__setattr__(self, name, value)
+
+  def __getattribute__(self, name):
+    if name == "post_training_quantize":
+      warnings.warn("Property %s is deprecated, "
+                    "please use optimizations=[Optimize.OPTIMIZE_FOR_SIZE]"
+                    " instead." % name)
+      return Optimize.OPTIMIZE_FOR_SIZE in set(self.optimizations)
+    return object.__getattribute__(self, name)
+
   def convert(self):
     """Converts a TensorFlow GraphDef based on instance variables.
 
@@ -463,6 +681,24 @@
                          "tensors '{0}'.".format(",".join(invalid_stats)))
     else:
       quantized_stats = None
+    if self.representative_dataset:
+      if not isinstance(self.representative_dataset, RepresentativeDataset):
+        raise TypeError(
+            "representative_dataset must be an instance of "
+            "RepresentativeDataset")
+      if self.representative_dataset.input_gen is None:
+        raise ValueError(
+            "Provide an input generator for representative_dataset")
+
+    # TODO(shashishekhar): For now use optimizations order is ignored.
+    # Both size and latency optimizations decide whether to apply post
+    # training optimizations.
+    post_training_optimize = bool(
+        len(set(self.optimizations) & set([Optimize.OPTIMIZE_FOR_LATENCY,
+                                           Optimize.OPTIMIZE_FOR_SIZE])))
+    # Do weights only quantization if there is no dataset for calibration.
+    weights_only_quantize_flag = (
+        post_training_optimize and (self.representative_dataset is None))
 
     converter_kwargs = {
         "inference_type": self.inference_type,
@@ -475,7 +711,7 @@
         "reorder_across_fake_quant": self.reorder_across_fake_quant,
         "change_concat_input_ranges": self.change_concat_input_ranges,
         "allow_custom_ops": self.allow_custom_ops,
-        "post_training_quantize": self.post_training_quantize,
+        "post_training_quantize": weights_only_quantize_flag,
         "target_ops": self.target_ops,
         "dump_graphviz_dir": self.dump_graphviz_dir,
         "dump_graphviz_video": self.dump_graphviz_video
@@ -504,6 +740,12 @@
           input_arrays_with_shape=self._input_arrays_with_shape,
           output_arrays=self._output_arrays,
           **converter_kwargs)
+
+    if self.representative_dataset and post_training_optimize:
+      calibrate_quantize = _calibrator.Calibrator(result)
+      result = calibrate_quantize.calibrate_and_quantize(
+          self.representative_dataset.input_gen)
+
     return result
 
   def get_input_arrays(self):
@@ -545,7 +787,7 @@
       tensor.set_shape(shape)
 
 
-@_tf_export("lite.TocoConverter")
+@_tf_export(v1=["lite.TocoConverter"])
 class TocoConverter(object):
   """Convert a TensorFlow model into `output_format` using TOCO.
 

diff --git a/tensorflow/lite/python/lite_test.py b/tensorflow/lite/python/lite_test.py
index ff91d5e..d41b7a7 100644
--- a/tensorflow/lite/python/lite_test.py
+++ b/tensorflow/lite/python/lite_test.py

@@ -32,6 +32,7 @@
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops.variables import global_variables_initializer as _global_variables_initializer
 from tensorflow.python.platform import gfile
@@ -131,13 +132,13 @@
     input_details = interpreter.get_input_details()
     self.assertEqual(1, len(input_details))
     self.assertEqual('Placeholder', input_details[0]['name'])
-    self.assertEqual(np.object_, input_details[0]['dtype'])
+    self.assertEqual(np.string_, input_details[0]['dtype'])
     self.assertTrue(([4] == input_details[0]['shape']).all())
 
     output_details = interpreter.get_output_details()
     self.assertEqual(1, len(output_details))
     self.assertEqual('Reshape', output_details[0]['name'])
-    self.assertEqual(np.object_, output_details[0]['dtype'])
+    self.assertEqual(np.string_, output_details[0]['dtype'])
     self.assertTrue(([2, 2] == output_details[0]['shape']).all())
     # TODO(b/122659643): Test setting/getting string data via the python
     # interpreter API after support has been added.
@@ -481,6 +482,29 @@
     self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all())
     self.assertTrue(output_details[0]['quantization'][0] > 0)  # scale
 
+  def testPostTrainingQuantizeDeprecatedAttribute(self):
+    in_tensor_1 = array_ops.placeholder(
+        shape=[33, 33], dtype=dtypes.float32, name='inputA')
+    in_tensor_2 = constant_op.constant(
+        np.random.uniform(low=-10., high=10., size=(33, 33)),
+        shape=[33, 33],
+        dtype=dtypes.float32,
+        name='inputB')
+    out_tensor = math_ops.matmul(in_tensor_1, in_tensor_2, name='output')
+    sess = session.Session()
+
+    quantized_converter = lite.TFLiteConverter.from_session(
+        sess, [in_tensor_1], [out_tensor])
+    self.assertFalse(quantized_converter.post_training_quantize)
+
+    quantized_converter.post_training_quantize = True
+    self.assertTrue(quantized_converter.post_training_quantize)
+    self.assertEqual(quantized_converter.optimizations,
+                     [lite.Optimize.OPTIMIZE_FOR_SIZE])
+
+    quantized_tflite = quantized_converter.convert()
+    self.assertTrue(quantized_tflite)
+
   def testPostTrainingQuantize(self):
     np.random.seed(0)
     # We need the tensor to have more than 1024 elements for quantize_weights
@@ -504,7 +528,53 @@
     # Convert quantized weights model.
     quantized_converter = lite.TFLiteConverter.from_session(
         sess, [in_tensor_1], [out_tensor])
-    quantized_converter.post_training_quantize = True
+    quantized_converter.optimizations = [lite.Optimize.OPTIMIZE_FOR_SIZE]
+    quantized_tflite = quantized_converter.convert()
+    self.assertTrue(quantized_tflite)
+
+    # Ensure that the quantized weights tflite model is smaller.
+    self.assertTrue(len(quantized_tflite) < len(float_tflite))
+
+  def testPostTrainingCalibrateAndQuantize(self):
+    np.random.seed(0)
+    # Create a mobilenet like model.
+    output_channel = 16
+    depth_multiplier = 1
+    inp = array_ops.placeholder(dtype=dtypes.float32, shape=(1, 5, 5, 3))
+    conv = nn_ops.conv2d(
+        inp,
+        filter=array_ops.zeros([3, 3, 3, output_channel]),
+        strides=[1, 1, 1, 1],
+        padding='SAME')
+    dconv = nn_ops.depthwise_conv2d_native(
+        conv,
+        filter=array_ops.zeros(
+            [16, 16, output_channel, output_channel * depth_multiplier]),
+        strides=[1, 1, 1, 1],
+        padding='SAME')
+    pool = nn_ops.pool(
+        dconv, window_shape=[2, 2], pooling_type='AVG', padding='SAME')
+    max_pool = nn_ops.pool(
+        pool, window_shape=[2, 2], pooling_type='MAX', padding='SAME')
+    output = nn_ops.softmax(max_pool)
+
+    def calibration_gen():
+      for _ in range(10):
+        yield np.random.uniform(-1, 1, size=(1, 5, 5, 3)).astype(np.float32)
+
+    sess = session.Session()
+
+    # Convert float model.
+    float_converter = lite.TFLiteConverter.from_session(sess, [inp], [output])
+    float_tflite = float_converter.convert()
+    self.assertTrue(float_tflite)
+
+    # Convert quantized weights model.
+    quantized_converter = lite.TFLiteConverter.from_session(
+        sess, [inp], [output])
+    quantized_converter.optimizations = [lite.Optimize.OPTIMIZE_FOR_SIZE]
+    quantized_converter.representative_dataset = lite.RepresentativeDataset(
+        calibration_gen)
     quantized_tflite = quantized_converter.convert()
     self.assertTrue(quantized_tflite)
 

diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py
new file mode 100644
index 0000000..126a778
--- /dev/null
+++ b/tensorflow/lite/python/lite_v2_test.py

@@ -0,0 +1,182 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for lite.py functionality related to TensorFlow 2.0."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from tensorflow.lite.python import lite
+from tensorflow.lite.python.interpreter import Interpreter
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model.load import load
+from tensorflow.python.saved_model.save import save
+from tensorflow.python.training.tracking import tracking
+
+
+class FromConcreteFunctionTest(test_util.TensorFlowTestCase):
+
+  def _evaluateTFLiteModel(self, tflite_model, input_data):
+    """Evaluates the model on the `input_data`."""
+    interpreter = Interpreter(model_content=tflite_model)
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    for input_tensor, tensor_data in zip(input_details, input_data):
+      interpreter.set_tensor(input_tensor['index'], tensor_data.numpy())
+    interpreter.invoke()
+    return interpreter.get_tensor(output_details[0]['index'])
+
+  @test_util.run_v2_only
+  def testTypeInvalid(self):
+    root = tracking.AutoTrackable()
+    root.v1 = variables.Variable(3.)
+    root.v2 = variables.Variable(2.)
+    root.f = def_function.function(lambda x: root.v1 * root.v2 * x)
+
+    with self.assertRaises(ValueError) as error:
+      _ = lite.TFLiteConverterV2.from_concrete_function(root.f)
+    self.assertIn('call from_concrete_function', str(error.exception))
+
+  @test_util.run_v2_only
+  def testFloat(self):
+    input_data = constant_op.constant(1., shape=[1])
+    root = tracking.AutoTrackable()
+    root.v1 = variables.Variable(3.)
+    root.v2 = variables.Variable(2.)
+    root.f = def_function.function(lambda x: root.v1 * root.v2 * x)
+    concrete_func = root.f.get_concrete_function(input_data)
+
+    # Convert model.
+    converter = lite.TFLiteConverterV2.from_concrete_function(concrete_func)
+    tflite_model = converter.convert()
+
+    # Check values from converted model.
+    expected_value = root.f(input_data)
+    actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
+    self.assertEqual(expected_value.numpy(), actual_value)
+
+  @test_util.run_v2_only
+  def testSizeNone(self):
+    # Test with a shape of None
+    input_data = constant_op.constant(1., shape=None)
+    root = tracking.AutoTrackable()
+    root.v1 = variables.Variable(3.)
+    root.f = def_function.function(lambda x: root.v1 * x)
+    concrete_func = root.f.get_concrete_function(input_data)
+
+    # Convert model.
+    converter = lite.TFLiteConverterV2.from_concrete_function(concrete_func)
+    tflite_model = converter.convert()
+
+    # Check values from converted model.
+    expected_value = root.f(input_data)
+    actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
+    self.assertEqual(expected_value.numpy(), actual_value)
+
+  @test_util.run_v2_only
+  def testConstSavedModel(self):
+    """Test a basic model with functions to make sure functions are inlined."""
+    self.skipTest('b/124205572')
+    input_data = constant_op.constant(1., shape=[1])
+    root = tracking.AutoTrackable()
+    root.f = def_function.function(lambda x: 2. * x)
+    to_save = root.f.get_concrete_function(input_data)
+
+    save_dir = os.path.join(self.get_temp_dir(), 'saved_model')
+    save(root, save_dir, to_save)
+    saved_model = load(save_dir)
+    concrete_func = saved_model.signatures['serving_default']
+
+    # Convert model and ensure model is not None.
+    converter = lite.TFLiteConverterV2.from_concrete_function(concrete_func)
+    tflite_model = converter.convert()
+
+    # Check values from converted model.
+    expected_value = root.f(input_data)
+    actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
+    self.assertEqual(expected_value.numpy(), actual_value)
+
+  @test_util.run_v2_only
+  def testVariableSavedModel(self):
+    """Test a basic model with Variables with saving/loading the SavedModel."""
+    self.skipTest('b/124205572')
+    input_data = constant_op.constant(1., shape=[1])
+    root = tracking.AutoTrackable()
+    root.v1 = variables.Variable(3.)
+    root.v2 = variables.Variable(2.)
+    root.f = def_function.function(lambda x: root.v1 * root.v2 * x)
+    to_save = root.f.get_concrete_function(input_data)
+
+    save_dir = os.path.join(self.get_temp_dir(), 'saved_model')
+    save(root, save_dir, to_save)
+    saved_model = load(save_dir)
+    concrete_func = saved_model.signatures['serving_default']
+
+    # Convert model and ensure model is not None.
+    converter = lite.TFLiteConverterV2.from_concrete_function(concrete_func)
+    tflite_model = converter.convert()
+
+    # Check values from converted model.
+    expected_value = root.f(input_data)
+    actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
+    self.assertEqual(expected_value.numpy(), actual_value)
+
+  @test_util.run_v2_only
+  def testMultiFunctionModel(self):
+    """Test a basic model with Variables."""
+
+    class BasicModel(tracking.AutoTrackable):
+
+      def __init__(self):
+        self.y = None
+        self.z = None
+
+      @def_function.function
+      def add(self, x):
+        if self.y is None:
+          self.y = variables.Variable(2.)
+        return x + self.y
+
+      @def_function.function
+      def sub(self, x):
+        if self.z is None:
+          self.z = variables.Variable(3.)
+        return x - self.z
+
+    input_data = constant_op.constant(1., shape=[1])
+    root = BasicModel()
+    concrete_func = root.add.get_concrete_function(input_data)
+
+    # Convert model and ensure model is not None.
+    converter = lite.TFLiteConverterV2.from_concrete_function(concrete_func)
+    tflite_model = converter.convert()
+
+    # Check values from converted model.
+    expected_value = root.add(input_data)
+    actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
+    self.assertEqual(expected_value.numpy(), actual_value)
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/lite/python/optimize/BUILD b/tensorflow/lite/python/optimize/BUILD
index 74b573b..51310af 100644
--- a/tensorflow/lite/python/optimize/BUILD
+++ b/tensorflow/lite/python/optimize/BUILD

@@ -13,11 +13,12 @@
     deps = [
         "//tensorflow/lite:framework",
         "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/python/interpreter_wrapper:numpy",
         "//tensorflow/lite/python/interpreter_wrapper:python_error_reporter",
         "//tensorflow/lite/python/interpreter_wrapper:python_utils",
-        "//tensorflow/lite/tools/optimize:calibration_reader",
-        "//tensorflow/lite/tools/optimize:calibrator_lib",
         "//tensorflow/lite/tools/optimize:quantize_model",
+        "//tensorflow/lite/tools/optimize/calibration:calibration_reader",
+        "//tensorflow/lite/tools/optimize/calibration:calibrator_lib",
         "//third_party/py/numpy:headers",
         "//third_party/python_runtime:headers",
         "@com_google_absl//absl/memory",

diff --git a/tensorflow/lite/python/optimize/calibration_wrapper.cc b/tensorflow/lite/python/optimize/calibration_wrapper.cc
index d6fe1fb..5a481d4 100644
--- a/tensorflow/lite/python/optimize/calibration_wrapper.cc
+++ b/tensorflow/lite/python/optimize/calibration_wrapper.cc

@@ -22,28 +22,13 @@
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/model.h"
+#include "tensorflow/lite/python/interpreter_wrapper/numpy.h"
 #include "tensorflow/lite/python/interpreter_wrapper/python_error_reporter.h"
 #include "tensorflow/lite/python/interpreter_wrapper/python_utils.h"
-#include "tensorflow/lite/tools/optimize/calibration_reader.h"
-#include "tensorflow/lite/tools/optimize/calibrator.h"
+#include "tensorflow/lite/tools/optimize/calibration/calibration_reader.h"
+#include "tensorflow/lite/tools/optimize/calibration/calibrator.h"
 #include "tensorflow/lite/tools/optimize/quantize_model.h"
 
-// Disallow Numpy 1.7 deprecated symbols.
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-
-#include <Python.h>
-
-#include "numpy/arrayobject.h"
-#include "numpy/ufuncobject.h"
-
-#if PY_MAJOR_VERSION >= 3
-#define PY_TO_CPPSTRING PyBytes_AsStringAndSize
-#define CPP_TO_PYSTRING PyBytes_FromStringAndSize
-#else
-#define PY_TO_CPPSTRING PyString_AsStringAndSize
-#define CPP_TO_PYSTRING PyString_FromStringAndSize
-#endif
-
 #define TFLITE_PY_CHECK(x)               \
   if ((x) != kTfLiteOk) {                \
     return error_reporter_->exception(); \
@@ -60,9 +45,7 @@
 
 namespace {
 
-struct PyDecrefDeleter {
-  void operator()(PyObject* p) const { Py_DECREF(p); }
-};
+using python_utils::PyDecrefDeleter;
 
 std::unique_ptr<tflite::ModelT> CreateMutableModel(const tflite::Model& model) {
   std::unique_ptr<tflite::ModelT> copied_model =
@@ -185,7 +168,8 @@
     error_reporter_->exception();
     return nullptr;
   }
-  return CPP_TO_PYSTRING(
+
+  return python_utils::ConvertToPyString(
       reinterpret_cast<const char*>(builder.GetCurrentBufferPointer()),
       builder.GetSize());
 }
@@ -196,7 +180,8 @@
   char* buf = nullptr;
   Py_ssize_t length;
   std::unique_ptr<PythonErrorReporter> error_reporter(new PythonErrorReporter);
-  if (PY_TO_CPPSTRING(data, &buf, &length) == -1) {
+
+  if (python_utils::ConvertFromPyString(data, &buf, &length) == -1) {
     return nullptr;
   }
   std::unique_ptr<tflite::FlatBufferModel> model =

diff --git a/tensorflow/lite/python/testdata/BUILD b/tensorflow/lite/python/testdata/BUILD
index 4689c31..2fa08e5 100644
--- a/tensorflow/lite/python/testdata/BUILD
+++ b/tensorflow/lite/python/testdata/BUILD

@@ -32,9 +32,20 @@
     ],
 )
 
+tf_to_tflite(
+    name = "gather_string",
+    src = "gather.pbtxt",
+    out = "gather_string.tflite",
+    options = [
+        "--input_arrays=input,indices",
+        "--output_arrays=output",
+    ],
+)
+
 filegroup(
     name = "interpreter_test_data",
     srcs = [
+        ":gather_string",
         ":permute_float",
         ":permute_uint8",
     ],

diff --git a/tensorflow/lite/python/testdata/gather.pbtxt b/tensorflow/lite/python/testdata/gather.pbtxt
new file mode 100644
index 0000000..0b1193c
--- /dev/null
+++ b/tensorflow/lite/python/testdata/gather.pbtxt

@@ -0,0 +1,93 @@
+node {
+  name: "input"
+  op: "Placeholder"
+  device: "/device:CPU:0"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 10
+        }
+      }
+    }
+  }
+}
+node {
+  name: "indices"
+  op: "Placeholder"
+  device: "/device:CPU:0"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+      }
+    }
+  }
+}
+node {
+  name: "axis"
+  op: "Const"
+  device: "/device:CPU:0"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "output"
+  op: "GatherV2"
+  input: "input"
+  input: "indices"
+  input: "axis"
+  device: "/device:CPU:0"
+  attr {
+    key: "Taxis"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_STRING
+    }
+  }
+}
+versions {
+  producer: 27
+}

diff --git a/tensorflow/lite/schema/BUILD b/tensorflow/lite/schema/BUILD
index ea51676..e554191 100644
--- a/tensorflow/lite/schema/BUILD
+++ b/tensorflow/lite/schema/BUILD

@@ -9,6 +9,12 @@
 
 py_binary(
     name = "upgrade_schema",
+    srcs = ["upgrade_schema.py"],
+    deps = [":upgrade_schema_main_lib"],
+)
+
+py_library(
+    name = "upgrade_schema_main_lib",
     srcs = [
         "upgrade_schema.py",
     ],
@@ -39,7 +45,7 @@
         "notap",
     ],
     deps = [
-        ":upgrade_schema",
+        ":upgrade_schema_main_lib",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_test_lib",
     ],

diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs
index 098b7c7..5df56cc 100644
--- a/tensorflow/lite/schema/schema.fbs
+++ b/tensorflow/lite/schema/schema.fbs

@@ -220,6 +220,10 @@
   CEIL = 104,
   REVERSE_V2 = 105,
   ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
 }
 
 // Options for the builtin operators.
@@ -306,6 +310,10 @@
   UniqueOptions,
   ReverseV2Options,
   AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -549,6 +557,9 @@
 table ExpOptions {
 }
 
+table CosOptions {
+}
+
 table ReducerOptions {
   keep_dims: bool;
 }
@@ -643,6 +654,9 @@
   out_type : TensorType;
 }
 
+table RankOptions {
+}
+
 table PowOptions {
 }
 
@@ -729,6 +743,12 @@
 table AddNOptions {
 }
 
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {

diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h
index f0a6b00..e33f0a3 100755
--- a/tensorflow/lite/schema/schema_generated.h
+++ b/tensorflow/lite/schema/schema_generated.h

@@ -139,6 +139,9 @@
 struct ExpOptions;
 struct ExpOptionsT;
 
+struct CosOptions;
+struct CosOptionsT;
+
 struct ReducerOptions;
 struct ReducerOptionsT;
 
@@ -214,6 +217,9 @@
 struct ShapeOptions;
 struct ShapeOptionsT;
 
+struct RankOptions;
+struct RankOptionsT;
+
 struct PowOptions;
 struct PowOptionsT;
 
@@ -277,6 +283,12 @@
 struct AddNOptions;
 struct AddNOptionsT;
 
+struct GatherNdOptions;
+struct GatherNdOptionsT;
+
+struct WhereOptions;
+struct WhereOptionsT;
+
 struct OperatorCode;
 struct OperatorCodeT;
 
@@ -533,11 +545,15 @@
   BuiltinOperator_CEIL = 104,
   BuiltinOperator_REVERSE_V2 = 105,
   BuiltinOperator_ADD_N = 106,
+  BuiltinOperator_GATHER_ND = 107,
+  BuiltinOperator_COS = 108,
+  BuiltinOperator_WHERE = 109,
+  BuiltinOperator_RANK = 110,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_ADD_N
+  BuiltinOperator_MAX = BuiltinOperator_RANK
 };
 
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[106] {
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[110] {
   static const BuiltinOperator values[] = {
     BuiltinOperator_ADD,
     BuiltinOperator_AVERAGE_POOL_2D,
@@ -644,7 +660,11 @@
     BuiltinOperator_UNIQUE,
     BuiltinOperator_CEIL,
     BuiltinOperator_REVERSE_V2,
-    BuiltinOperator_ADD_N
+    BuiltinOperator_ADD_N,
+    BuiltinOperator_GATHER_ND,
+    BuiltinOperator_COS,
+    BuiltinOperator_WHERE,
+    BuiltinOperator_RANK
   };
   return values;
 }
@@ -758,6 +778,10 @@
     "CEIL",
     "REVERSE_V2",
     "ADD_N",
+    "GATHER_ND",
+    "COS",
+    "WHERE",
+    "RANK",
     nullptr
   };
   return names;
@@ -852,11 +876,15 @@
   BuiltinOptions_UniqueOptions = 80,
   BuiltinOptions_ReverseV2Options = 81,
   BuiltinOptions_AddNOptions = 82,
+  BuiltinOptions_GatherNdOptions = 83,
+  BuiltinOptions_CosOptions = 84,
+  BuiltinOptions_WhereOptions = 85,
+  BuiltinOptions_RankOptions = 86,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_AddNOptions
+  BuiltinOptions_MAX = BuiltinOptions_RankOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[83] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[87] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -940,7 +968,11 @@
     BuiltinOptions_SplitVOptions,
     BuiltinOptions_UniqueOptions,
     BuiltinOptions_ReverseV2Options,
-    BuiltinOptions_AddNOptions
+    BuiltinOptions_AddNOptions,
+    BuiltinOptions_GatherNdOptions,
+    BuiltinOptions_CosOptions,
+    BuiltinOptions_WhereOptions,
+    BuiltinOptions_RankOptions
   };
   return values;
 }
@@ -1030,6 +1062,10 @@
     "UniqueOptions",
     "ReverseV2Options",
     "AddNOptions",
+    "GatherNdOptions",
+    "CosOptions",
+    "WhereOptions",
+    "RankOptions",
     nullptr
   };
   return names;
@@ -1372,6 +1408,22 @@
   static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions;
 };
 
+template<> struct BuiltinOptionsTraits<GatherNdOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions;
+};
+
+template<> struct BuiltinOptionsTraits<CosOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_CosOptions;
+};
+
+template<> struct BuiltinOptionsTraits<WhereOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions;
+};
+
+template<> struct BuiltinOptionsTraits<RankOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_RankOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -2059,6 +2111,38 @@
     return type == BuiltinOptions_AddNOptions ?
       reinterpret_cast<const AddNOptionsT *>(value) : nullptr;
   }
+  GatherNdOptionsT *AsGatherNdOptions() {
+    return type == BuiltinOptions_GatherNdOptions ?
+      reinterpret_cast<GatherNdOptionsT *>(value) : nullptr;
+  }
+  const GatherNdOptionsT *AsGatherNdOptions() const {
+    return type == BuiltinOptions_GatherNdOptions ?
+      reinterpret_cast<const GatherNdOptionsT *>(value) : nullptr;
+  }
+  CosOptionsT *AsCosOptions() {
+    return type == BuiltinOptions_CosOptions ?
+      reinterpret_cast<CosOptionsT *>(value) : nullptr;
+  }
+  const CosOptionsT *AsCosOptions() const {
+    return type == BuiltinOptions_CosOptions ?
+      reinterpret_cast<const CosOptionsT *>(value) : nullptr;
+  }
+  WhereOptionsT *AsWhereOptions() {
+    return type == BuiltinOptions_WhereOptions ?
+      reinterpret_cast<WhereOptionsT *>(value) : nullptr;
+  }
+  const WhereOptionsT *AsWhereOptions() const {
+    return type == BuiltinOptions_WhereOptions ?
+      reinterpret_cast<const WhereOptionsT *>(value) : nullptr;
+  }
+  RankOptionsT *AsRankOptions() {
+    return type == BuiltinOptions_RankOptions ?
+      reinterpret_cast<RankOptionsT *>(value) : nullptr;
+  }
+  const RankOptionsT *AsRankOptions() const {
+    return type == BuiltinOptions_RankOptions ?
+      reinterpret_cast<const RankOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -4991,6 +5075,46 @@
 
 flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct CosOptionsT : public flatbuffers::NativeTable {
+  typedef CosOptions TableType;
+  CosOptionsT() {
+  }
+};
+
+struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef CosOptionsT NativeTableType;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  CosOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CosOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CosOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CosOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  CosOptionsBuilder &operator=(const CosOptionsBuilder &);
+  flatbuffers::Offset<CosOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CosOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CosOptions> CreateCosOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  CosOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct ReducerOptionsT : public flatbuffers::NativeTable {
   typedef ReducerOptions TableType;
   bool keep_dims;
@@ -6237,6 +6361,46 @@
 
 flatbuffers::Offset<ShapeOptions> CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct RankOptionsT : public flatbuffers::NativeTable {
+  typedef RankOptions TableType;
+  RankOptionsT() {
+  }
+};
+
+struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef RankOptionsT NativeTableType;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  RankOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(RankOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RankOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RankOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  RankOptionsBuilder &operator=(const RankOptionsBuilder &);
+  flatbuffers::Offset<RankOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RankOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RankOptions> CreateRankOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  RankOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct PowOptionsT : public flatbuffers::NativeTable {
   typedef PowOptions TableType;
   PowOptionsT() {
@@ -7235,6 +7399,86 @@
 
 flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct GatherNdOptionsT : public flatbuffers::NativeTable {
+  typedef GatherNdOptions TableType;
+  GatherNdOptionsT() {
+  }
+};
+
+struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef GatherNdOptionsT NativeTableType;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  GatherNdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(GatherNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GatherNdOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GatherNdOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &);
+  flatbuffers::Offset<GatherNdOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GatherNdOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GatherNdOptions> CreateGatherNdOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  GatherNdOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GatherNdOptions> CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct WhereOptionsT : public flatbuffers::NativeTable {
+  typedef WhereOptions TableType;
+  WhereOptionsT() {
+  }
+};
+
+struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef WhereOptionsT NativeTableType;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  WhereOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(WhereOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<WhereOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct WhereOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  WhereOptionsBuilder &operator=(const WhereOptionsBuilder &);
+  flatbuffers::Offset<WhereOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<WhereOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  WhereOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct OperatorCodeT : public flatbuffers::NativeTable {
   typedef OperatorCode TableType;
   BuiltinOperator builtin_code;
@@ -7614,6 +7858,18 @@
   const AddNOptions *builtin_options_as_AddNOptions() const {
     return builtin_options_type() == BuiltinOptions_AddNOptions ? static_cast<const AddNOptions *>(builtin_options()) : nullptr;
   }
+  const GatherNdOptions *builtin_options_as_GatherNdOptions() const {
+    return builtin_options_type() == BuiltinOptions_GatherNdOptions ? static_cast<const GatherNdOptions *>(builtin_options()) : nullptr;
+  }
+  const CosOptions *builtin_options_as_CosOptions() const {
+    return builtin_options_type() == BuiltinOptions_CosOptions ? static_cast<const CosOptions *>(builtin_options()) : nullptr;
+  }
+  const WhereOptions *builtin_options_as_WhereOptions() const {
+    return builtin_options_type() == BuiltinOptions_WhereOptions ? static_cast<const WhereOptions *>(builtin_options()) : nullptr;
+  }
+  const RankOptions *builtin_options_as_RankOptions() const {
+    return builtin_options_type() == BuiltinOptions_RankOptions ? static_cast<const RankOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -7973,6 +8229,22 @@
   return builtin_options_as_AddNOptions();
 }
 
+template<> inline const GatherNdOptions *Operator::builtin_options_as<GatherNdOptions>() const {
+  return builtin_options_as_GatherNdOptions();
+}
+
+template<> inline const CosOptions *Operator::builtin_options_as<CosOptions>() const {
+  return builtin_options_as_CosOptions();
+}
+
+template<> inline const WhereOptions *Operator::builtin_options_as<WhereOptions>() const {
+  return builtin_options_as_WhereOptions();
+}
+
+template<> inline const RankOptions *Operator::builtin_options_as<RankOptions>() const {
+  return builtin_options_as_RankOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -9518,6 +9790,29 @@
       _fbb);
 }
 
+inline CosOptionsT *CosOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new CosOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void CosOptions::UnPackTo(CosOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<CosOptions> CosOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateCosOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CosOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateCosOptions(
+      _fbb);
+}
+
 inline ReducerOptionsT *ReducerOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new ReducerOptionsT();
   UnPackTo(_o, _resolver);
@@ -10147,6 +10442,29 @@
       _out_type);
 }
 
+inline RankOptionsT *RankOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new RankOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void RankOptions::UnPackTo(RankOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<RankOptions> RankOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateRankOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RankOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateRankOptions(
+      _fbb);
+}
+
 inline PowOptionsT *PowOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new PowOptionsT();
   UnPackTo(_o, _resolver);
@@ -10666,6 +10984,52 @@
       _fbb);
 }
 
+inline GatherNdOptionsT *GatherNdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new GatherNdOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void GatherNdOptions::UnPackTo(GatherNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<GatherNdOptions> GatherNdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateGatherNdOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GatherNdOptions> CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GatherNdOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateGatherNdOptions(
+      _fbb);
+}
+
+inline WhereOptionsT *WhereOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new WhereOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void WhereOptions::UnPackTo(WhereOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<WhereOptions> WhereOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateWhereOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const WhereOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateWhereOptions(
+      _fbb);
+}
+
 inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorCodeT();
   UnPackTo(_o, _resolver);
@@ -11252,6 +11616,22 @@
       auto ptr = reinterpret_cast<const AddNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_GatherNdOptions: {
+      auto ptr = reinterpret_cast<const GatherNdOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CosOptions: {
+      auto ptr = reinterpret_cast<const CosOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_WhereOptions: {
+      auto ptr = reinterpret_cast<const WhereOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RankOptions: {
+      auto ptr = reinterpret_cast<const RankOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return false;
   }
 }
@@ -11598,6 +11978,22 @@
       auto ptr = reinterpret_cast<const AddNOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_GatherNdOptions: {
+      auto ptr = reinterpret_cast<const GatherNdOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CosOptions: {
+      auto ptr = reinterpret_cast<const CosOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_WhereOptions: {
+      auto ptr = reinterpret_cast<const WhereOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_RankOptions: {
+      auto ptr = reinterpret_cast<const RankOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -11932,6 +12328,22 @@
       auto ptr = reinterpret_cast<const AddNOptionsT *>(value);
       return CreateAddNOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_GatherNdOptions: {
+      auto ptr = reinterpret_cast<const GatherNdOptionsT *>(value);
+      return CreateGatherNdOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CosOptions: {
+      auto ptr = reinterpret_cast<const CosOptionsT *>(value);
+      return CreateCosOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_WhereOptions: {
+      auto ptr = reinterpret_cast<const WhereOptionsT *>(value);
+      return CreateWhereOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_RankOptions: {
+      auto ptr = reinterpret_cast<const RankOptionsT *>(value);
+      return CreateRankOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -12266,6 +12678,22 @@
       value = new AddNOptionsT(*reinterpret_cast<AddNOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_GatherNdOptions: {
+      value = new GatherNdOptionsT(*reinterpret_cast<GatherNdOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CosOptions: {
+      value = new CosOptionsT(*reinterpret_cast<CosOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_WhereOptions: {
+      value = new WhereOptionsT(*reinterpret_cast<WhereOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_RankOptions: {
+      value = new RankOptionsT(*reinterpret_cast<RankOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -12683,6 +13111,26 @@
       delete ptr;
       break;
     }
+    case BuiltinOptions_GatherNdOptions: {
+      auto ptr = reinterpret_cast<GatherNdOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CosOptions: {
+      auto ptr = reinterpret_cast<CosOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_WhereOptions: {
+      auto ptr = reinterpret_cast<WhereOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_RankOptions: {
+      auto ptr = reinterpret_cast<RankOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;

diff --git a/tensorflow/lite/testdata/test_input.csv b/tensorflow/lite/testdata/test_input.csv
new file mode 100644
index 0000000..33894d3
--- /dev/null
+++ b/tensorflow/lite/testdata/test_input.csv

@@ -0,0 +1 @@
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
\ No newline at end of file

diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD
index 3840b52..68512b9 100644
--- a/tensorflow/lite/testing/BUILD
+++ b/tensorflow/lite/testing/BUILD

@@ -10,6 +10,7 @@
     "generated_test_models_all",
 )
 load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite")
+load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_binary",
@@ -78,6 +79,7 @@
     srcs_version = "PY2AND3",
     deps = [
         ":generate_examples_report",
+        ":string_util_wrapper",
         "//tensorflow:tensorflow_py",
         "//tensorflow/python:graph_util",
         "//third_party/py/numpy",
@@ -159,6 +161,7 @@
     srcs = ["tflite_driver.cc"],
     hdrs = ["tflite_driver.h"],
     deps = [
+        ":join",
         ":split",
         ":test_runner",
         "//tensorflow/lite:builtin_op_data",
@@ -391,4 +394,29 @@
     ],
 )
 
+cc_library(
+    name = "string_util_lib",
+    srcs = ["string_util.cc"],
+    hdrs = ["string_util.h"],
+    deps = [
+        "//tensorflow/lite:string_util",
+        "//tensorflow/lite/python/interpreter_wrapper:numpy",
+        "//tensorflow/lite/python/interpreter_wrapper:python_utils",
+        "//third_party/py/numpy:headers",
+        "//third_party/python_runtime:headers",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+tf_py_wrap_cc(
+    name = "string_util_wrapper",
+    srcs = [
+        "string_util.i",
+    ],
+    deps = [
+        ":string_util_lib",
+        "//third_party/python_runtime:headers",
+    ],
+)
+
 tflite_portable_test_suite()

diff --git a/tensorflow/lite/testing/generate_examples.py b/tensorflow/lite/testing/generate_examples.py
index 7ebc595..a7e16e6 100644
--- a/tensorflow/lite/testing/generate_examples.py
+++ b/tensorflow/lite/testing/generate_examples.py

@@ -36,6 +36,7 @@
 import os
 import random
 import re
+import string
 import sys
 import tempfile
 import traceback
@@ -52,6 +53,7 @@
 from google.protobuf import text_format
 # TODO(aselle): switch to TensorFlow's resource_loader
 from tensorflow.lite.testing import generate_examples_report as report_lib
+from tensorflow.lite.testing import string_util_wrapper
 from tensorflow.python.framework import graph_util as tf_graph_util
 from tensorflow.python.ops import rnn
 
@@ -163,6 +165,16 @@
   return s
 
 
+def format_result(t):
+  """Convert a tensor to a format that can be used in test specs."""
+  if t.dtype.kind not in [np.dtype(np.string_).kind, np.dtype(np.object_).kind]:
+    # Output 9 digits after the point to ensure the precision is good enough.
+    values = ["{:.9f}".format(value) for value in list(t.flatten())]
+    return ",".join(values)
+  else:
+    return string_util_wrapper.SerializeAsHexString(t.flatten())
+
+
 def write_examples(fp, examples):
   """Given a list `examples`, write a text format representation.
 
@@ -179,9 +191,7 @@
     """Write tensor in file format supported by TFLITE example."""
     fp.write("dtype,%s\n" % x.dtype)
     fp.write("shape," + ",".join(map(str, x.shape)) + "\n")
-    # Output 9 digits after the point to ensure the precision is good enough.
-    values = ["{:.9f}".format(value) for value in list(x.flatten())]
-    fp.write("values," + ",".join(values) + "\n")
+    fp.write("values," + format_result(x) + "\n")
 
   fp.write("test_cases,%d\n" % len(examples))
   for example in examples:
@@ -214,11 +224,9 @@
     fp.write("invoke {\n")
 
     for t in example["inputs"]:
-      values = ["{:.9f}".format(value) for value in list(t.flatten())]
-      fp.write("  input: \"" + ",".join(values) + "\"\n")
+      fp.write("  input: \"" + format_result(t) + "\"\n")
     for t in example["outputs"]:
-      values = ["{:.9f}".format(value) for value in list(t.flatten())]
-      fp.write("  output: \"" + ",".join(values) + "\"\n")
+      fp.write("  output: \"" + format_result(t) + "\"\n")
     fp.write("}\n")
 
 
@@ -230,6 +238,7 @@
     tf.int16: (np.int16, "QUANTIZED_INT16"),
     tf.int64: (np.int64, "INT64"),
     tf.bool: (np.bool, "BOOL"),
+    tf.string: (np.string_, "STRING"),
 }
 
 
@@ -245,6 +254,10 @@
     value = np.random.randint(min_value, max_value+1, shape)
   elif dtype == tf.bool:
     value = np.random.choice([True, False], size=shape)
+  elif dtype == np.string_:
+    # Not the best strings, but they will do for some basic testing.
+    letters = list(string.ascii_uppercase)
+    return np.random.choice(letters, size=shape).astype(dtype)
   return np.dtype(dtype).type(value) if np.isscalar(value) else value.astype(
       dtype)
 
@@ -300,8 +313,13 @@
 
   extra_toco_options = ExtraTocoOptions()
   extra_toco_options.drop_control_dependency = True
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs,
-                    extra_toco_options)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      extra_toco_options,
+      expected_tf_failures=3)
 
 
 def toco_convert(graph_def_str, input_tensors, output_tensors,
@@ -369,7 +387,7 @@
                       make_test_inputs,
                       extra_toco_options=ExtraTocoOptions(),
                       use_frozen_graph=False,
-                      expected_tf_success=None):
+                      expected_tf_failures=0):
   """Helper to make a zip file of a bunch of TensorFlow models.
 
   This does a cartestian product of the dictionary of test_parameters and
@@ -389,8 +407,9 @@
       `output_tensors` and returns tuple `(input_values, output_values)`.
     extra_toco_options: Additional toco options.
     use_frozen_graph: Whether or not freeze graph before toco converter.
-    expected_tf_success: Number of times tensorflow is supposed to succeed in
-      executing the input graphs. `None` means "unknown".
+    expected_tf_failures: Number of times tensorflow is expected to fail in
+      executing the input graphs. In some cases it is OK for TensorFlow to
+      fail because the one or more combination of parameters is invalid.
 
   Raises:
     RuntimeError: if there are toco errors that can't be ignored.
@@ -551,10 +570,17 @@
                    " and %d TOCO converted graphs (%.1f%%"), zip_path,
                   total_conversions, tf_success, toco_success, percent)
 
-  if expected_tf_success is not None and tf_success != expected_tf_success:
-    raise RuntimeError(
-        "Expected TF to succeed %d times, but that happened %d times" %
-        (expected_tf_success, tf_success))
+  tf_failures = parameter_count - tf_success
+
+  if tf_failures / parameter_count > 0.8:
+    raise RuntimeError(("Test for '%s' is not very useful. "
+                        "TensorFlow fails in %d percent of the cases.") %
+                       (zip_path, int(100 * tf_failures / parameter_count)))
+
+  if tf_failures != expected_tf_failures:
+    raise RuntimeError(("Expected TF to fail %d times while generating '%s', "
+                        "but that happened %d times") % (expected_tf_failures,
+                                                         zip_path, tf_failures))
 
   if not FLAGS.ignore_toco_errors and toco_errors > 0:
     raise RuntimeError(
@@ -573,11 +599,12 @@
 
   pool_op = pool_op_in
 
-  def f(zip_path):
+  def f(zip_path, expected_tf_failures=0):
     """Actual function that generates examples.
 
     Args:
       zip_path: path to write zip to.
+      expected_tf_failures: number of expected tensorflow failures.
     """
 
     # Chose a set of parameters
@@ -606,20 +633,26 @@
       return [input_values], sess.run(
           outputs, feed_dict=dict(zip(inputs, [input_values])))
 
-    make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+    make_zip_of_tests(
+        zip_path,
+        test_parameters,
+        build_graph,
+        build_inputs,
+        expected_tf_failures=expected_tf_failures)
+
   return f
 
 
 def make_l2_pool_tests(zip_path):
-  make_pool_tests(make_l2_pool)(zip_path)
+  make_pool_tests(make_l2_pool)(zip_path, expected_tf_failures=80)
 
 
 def make_avg_pool_tests(zip_path):
-  make_pool_tests(tf.nn.avg_pool)(zip_path)
+  make_pool_tests(tf.nn.avg_pool)(zip_path, expected_tf_failures=80)
 
 
 def make_max_pool_tests(zip_path):
-  make_pool_tests(tf.nn.max_pool)(zip_path)
+  make_pool_tests(tf.nn.max_pool)(zip_path, expected_tf_failures=80)
 
 
 def make_abs_tests(zip_path):
@@ -834,11 +867,10 @@
         parameters["input_shape"], dtype=_TF_TYPE_INFO[parameters["dtype"]][0])
     return [dummy_input], sess.run(outputs, feed_dict={inputs[0]: dummy_input})
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs,
-                    expected_tf_success=20)
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
-def make_binary_op_tests(zip_path, binary_operator):
+def make_binary_op_tests(zip_path, binary_operator, expected_tf_failures=0):
   """Make a set of tests to do binary ops with and without broadcast."""
 
   test_parameters = [
@@ -908,7 +940,12 @@
             inputs[1]: input2
         })
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=expected_tf_failures)
 
 
 def make_reduce_tests(reduce_op,
@@ -1074,6 +1111,34 @@
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
+def make_cos_tests(zip_path):
+  """Make a set of tests to do cos."""
+
+  test_parameters = [{
+      "input_dtype": [tf.float32],
+      "input_shape": [[], [3], [1, 100], [4, 2, 3], [5, 224, 224, 3]],
+  }]
+
+  def build_graph(parameters):
+    """Build the cos op testing graph."""
+    input_tensor = tf.placeholder(
+        dtype=parameters["input_dtype"],
+        name="input",
+        shape=parameters["input_shape"])
+
+    out = tf.cos(input_tensor)
+    return [input_tensor], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    values = [
+        create_tensor_data(parameters["input_dtype"], parameters["input_shape"],
+                           min_value=-np.pi, max_value=np.pi)
+    ]
+    return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
 def make_log_softmax_tests(zip_path):
   """Make a set of tests to do log_softmax."""
 
@@ -1137,7 +1202,12 @@
     ]
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=8)
 
 
 def make_minimum_tests(zip_path):
@@ -1172,7 +1242,12 @@
     ]
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=8)
 
 
 def make_binary_op_tests_func(binary_operator):
@@ -1242,7 +1317,7 @@
 
 
 def make_pow_tests(zip_path):
-  make_binary_op_tests(zip_path, tf.pow)
+  make_binary_op_tests(zip_path, tf.pow, expected_tf_failures=7)
 
 
 def make_floor_div_tests(zip_path):
@@ -1260,16 +1335,23 @@
 def make_gather_tests(zip_path):
   """Make a set of tests to do gather."""
 
-  test_parameters = [{
-      # TODO(mgubin): add string tests when they are supported by Toco.
-      # TODO(mgubin): add tests for Nd indices when they are supported by
-      # TfLite.
-      "params_dtype": [tf.float32, tf.int32, tf.int64],
-      "params_shape": [[10], [1, 2, 20]],
-      "indices_dtype": [tf.int32, tf.int64],
-      "indices_shape": [[3], [5]],
-      "axis": [-1, 0, 1],
-  }]
+  test_parameters = [
+      {
+          "params_dtype": [tf.float32, tf.int32, tf.int64],
+          "params_shape": [[10], [1, 2, 20]],
+          "indices_dtype": [tf.int32, tf.int64],
+          "indices_shape": [[3], [5]],
+          "axis": [-1, 0, 1],
+      },
+      {
+          # TODO(b/123895910): add Nd support for strings.
+          "params_dtype": [tf.string],
+          "params_shape": [[8]],
+          "indices_dtype": [tf.int32],
+          "indices_shape": [[3]],
+          "axis": [0],
+      }
+  ]
 
   def build_graph(parameters):
     """Build the gather op testing graph."""
@@ -1300,7 +1382,56 @@
       test_parameters,
       build_graph,
       build_inputs,
-      expected_tf_success=60)
+      expected_tf_failures=12)
+
+
+def make_gather_nd_tests(zip_path):
+  """Make a set of tests to do gather_nd."""
+
+  test_parameters = [
+      {
+          "params_dtype": [tf.float32, tf.int32, tf.int64],
+          "params_shape": [[5, 1]],
+          "indices_dtype": [tf.int32, tf.int64],
+          "indices_shape": [[1, 1]],
+      },
+      {
+          "params_dtype": [tf.float32, tf.int32, tf.int64],
+          "params_shape": [[5, 5]],
+          "indices_dtype": [tf.int32, tf.int64],
+          "indices_shape": [[2, 1], [2, 2]],
+      },
+      {
+          "params_dtype": [tf.float32, tf.int32, tf.int64],
+          "params_shape": [[5, 5, 10]],
+          "indices_dtype": [tf.int32, tf.int64],
+          "indices_shape": [[3, 1], [2, 2], [2, 3], [2, 1, 3]],
+      },
+  ]
+
+  def build_graph(parameters):
+    """Build the gather_nd op testing graph."""
+    params = tf.placeholder(
+        dtype=parameters["params_dtype"],
+        name="params",
+        shape=parameters["params_shape"])
+    indices = tf.placeholder(
+        dtype=parameters["indices_dtype"],
+        name="indices",
+        shape=parameters["indices_shape"])
+    out = tf.gather_nd(params, indices)
+    return [params, indices], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    params = create_tensor_data(parameters["params_dtype"],
+                                parameters["params_shape"])
+    indices = create_tensor_data(parameters["indices_dtype"],
+                                 parameters["indices_shape"], 0,
+                                 parameters["params_shape"][0] - 1)
+    return [params, indices], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [params, indices])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
 def make_gather_with_constant_tests(zip_path):
@@ -1329,8 +1460,7 @@
     return [reference_values], sess.run(
         outputs, feed_dict={inputs[0]: reference_values})
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs,
-                    expected_tf_success=2)
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
 def make_global_batch_norm_tests(zip_path):
@@ -1466,7 +1596,12 @@
       values.append(create_tensor_data(np.float32, filter_shape))
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=40)
 
 
 # Note: This is a regression test for a bug (b/122651451) that Toco incorrectly
@@ -1708,7 +1843,12 @@
       values.append(create_tensor_data(np.float32, filter_shape))
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=4)
 
 
 def make_split_tests(zip_path):
@@ -1731,7 +1871,12 @@
     values = [create_tensor_data(np.float32, parameters["input_shape"])]
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=112)
 
 
 def make_splitv_tests(zip_path):
@@ -1754,7 +1899,12 @@
     values = [create_tensor_data(np.float32, parameters["input_shape"])]
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=158)
 
 
 def make_concat_tests(zip_path):
@@ -1796,7 +1946,12 @@
     return all_values, sess.run(
         outputs, feed_dict=dict(zip(inputs, all_values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=60)
 
 
 def make_fully_connected_tests(zip_path):
@@ -1857,7 +2012,12 @@
       values.append(create_tensor_data(np.float32, parameters["shape2"]))
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=10)
 
 
 def make_l2norm_tests(zip_path):
@@ -1887,7 +2047,12 @@
     return [input_values], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_values])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=9)
 
 
 def make_local_response_norm_tests(zip_path):
@@ -2113,6 +2278,29 @@
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
+def make_rank_tests(zip_path):
+  """Make a set of tests to do rank."""
+
+  test_parameters = [{
+      "input_dtype": [tf.float32, tf.int32],
+      "input_shape": [[], [0], [1, 1, 1, 3], [2, 3, 4, 5], [5, 5], [10]],
+  }]
+
+  def build_graph(parameters):
+    """Build the rank op testing graph."""
+    input_value = tf.placeholder(dtype=parameters["input_dtype"], name="input")
+    out = tf.rank(input_value)
+    return [input_value], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    input_value = create_tensor_data(parameters["input_dtype"],
+                                     parameters["input_shape"])
+    return [input_value], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [input_value])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
 def make_one_hot_tests(zip_path):
   """Make a set of tests to do one_hot."""
 
@@ -2372,7 +2560,12 @@
       values.append(np.array(parameters["paddings"]))
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=56)
 
 
 def make_batch_to_space_nd_tests(zip_path):
@@ -2485,7 +2678,12 @@
       values.append(np.array(parameters["perm"]))
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=9)
 
 
 def make_squeeze_tests(zip_path):
@@ -2523,10 +2721,16 @@
     return [input_values], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_values])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=12)
 
 
-def _make_strided_slice_tests(zip_path, test_parameters):
+def _make_strided_slice_tests(zip_path, test_parameters,
+                              expected_tf_failures=0):
   """Utility function to make strided_slice_tests based on parameters."""
 
   def build_graph(parameters):
@@ -2586,7 +2790,12 @@
 
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=expected_tf_failures)
 
 
 def make_strided_slice_tests(zip_path):
@@ -2660,7 +2869,7 @@
           "constant_indices": [False],
       },
   ]
-  _make_strided_slice_tests(zip_path, test_parameters)
+  _make_strided_slice_tests(zip_path, test_parameters, expected_tf_failures=2)
 
 
 def make_strided_slice_1d_exhaustive_tests(zip_path):
@@ -2862,7 +3071,12 @@
     return [input_value], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_value])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=4)
 
 
 def make_equal_tests(zip_path):
@@ -2897,7 +3111,12 @@
     return [input_value1, input_value2], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_value1, input_value2])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=3)
 
 
 def make_not_equal_tests(zip_path):
@@ -2931,7 +3150,12 @@
     return [input_value1, input_value2], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_value1, input_value2])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=3)
 
 
 def make_greater_tests(zip_path):
@@ -2965,7 +3189,12 @@
     return [input_value1, input_value2], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_value1, input_value2])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=3)
 
 
 def make_greater_equal_tests(zip_path):
@@ -2999,7 +3228,12 @@
     return [input_value1, input_value2], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_value1, input_value2])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=3)
 
 
 def make_less_tests(zip_path):
@@ -3033,7 +3267,12 @@
     return [input_value1, input_value2], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_value1, input_value2])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=3)
 
 
 def make_less_equal_tests(zip_path):
@@ -3067,7 +3306,12 @@
     return [input_value1, input_value2], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_value1, input_value2])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=3)
 
 
 def make_floor_tests(zip_path):
@@ -3319,7 +3563,12 @@
 
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=18)
 
 
 def make_conv2d_transpose_tests(zip_path):
@@ -3356,12 +3605,7 @@
     ]
     return values, sess.run(outputs, feed_dict=dict(zip(inputs, values)))
 
-  make_zip_of_tests(
-      zip_path,
-      test_parameters,
-      build_graph,
-      build_inputs,
-      expected_tf_success=4)
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
 # Since compute output_shape is fairly complicated for
@@ -3611,7 +3855,12 @@
     return all_values, sess.run(
         outputs, feed_dict=dict(zip(inputs, all_values)))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=72)
 
 
 def make_unpack_tests(zip_path):
@@ -3702,13 +3951,18 @@
     return [input1, input2], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input1, input2])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+  make_zip_of_tests(
+      zip_path,
+      test_parameters,
+      build_graph,
+      build_inputs,
+      expected_tf_failures=12)
 
 
 def _make_logical_tests(op):
   """Make a set of tests to do logical operations."""
 
-  def logical(zip_path):
+  def logical(zip_path, expected_tf_failures=0):
     """Generate examples."""
     test_parameters = [{
         "input_shape_pair": [([], []), ([1, 1, 1, 3], [1, 1, 1, 3]),
@@ -3733,19 +3987,24 @@
       return [input_value1, input_value2], sess.run(
           outputs, feed_dict=dict(zip(inputs, [input_value1, input_value2])))
 
-    make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+    make_zip_of_tests(
+        zip_path,
+        test_parameters,
+        build_graph,
+        build_inputs,
+        expected_tf_failures=expected_tf_failures)
 
   return logical
 
 
 def make_logical_or_tests(zip_path):
   """Make a set of tests to do logical_or."""
-  return _make_logical_tests(tf.logical_or)(zip_path)
+  return _make_logical_tests(tf.logical_or)(zip_path, expected_tf_failures=1)
 
 
 def make_logical_and_tests(zip_path):
   """Make a set of tests to do logical_and."""
-  return _make_logical_tests(tf.logical_and)(zip_path)
+  return _make_logical_tests(tf.logical_and)(zip_path, expected_tf_failures=1)
 
 
 def make_logical_xor_tests(zip_path):
@@ -3753,7 +4012,7 @@
 
     Test logical_not as well.
   """
-  return _make_logical_tests(tf.logical_xor)(zip_path)
+  return _make_logical_tests(tf.logical_xor)(zip_path, expected_tf_failures=1)
 
 
 def make_mirror_pad_tests(zip_path):
@@ -3836,18 +4095,23 @@
     return input_values, sess.run(
         outputs, feed_dict=dict(zip(inputs, input_values)))
 
-  make_zip_of_tests(
-      zip_path,
-      test_parameters,
-      build_graph,
-      build_inputs,
-      expected_tf_success=8)
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
 def make_unroll_batch_matmul_tests(zip_path):
   """Make a set of tests to test unroll_batch_matmul."""
 
-  test_parameters = [{"dtype": [tf.float32], "shape": [[(2, 2, 3), (2, 3, 2)]]}]
+  test_parameters = [{
+      "dtype": [tf.float32],
+      "shape": [[(2, 2, 3), (2, 3, 2), False, False],
+                [(2, 2, 3), (2, 3, 2), True, True],
+                [(2, 2, 3), (2, 2, 3), False, True],
+                [(2, 2, 3), (2, 2, 3), True, False],
+                [(4, 2, 2, 3), (4, 2, 3, 2), False, False],
+                [(4, 2, 2, 3), (4, 2, 3, 2), True, True],
+                [(4, 2, 2, 3), (4, 2, 2, 3), False, True],
+                [(4, 2, 2, 3), (4, 2, 2, 3), True, False]]
+  }]
 
   def build_graph(parameters):
     """Build the batch_matmul op testing graph."""
@@ -3856,7 +4120,11 @@
     input_tensor2 = tf.placeholder(
         dtype=parameters["dtype"], shape=parameters["shape"][1])
     # Should be unrolled and replaced with fully_connected ops in the end.
-    out = tf.matmul(input_tensor1, input_tensor2)
+    out = tf.matmul(
+        input_tensor1,
+        input_tensor2,
+        transpose_a=parameters["shape"][2],
+        transpose_b=parameters["shape"][3])
     return [input_tensor1, input_tensor2], [out]
 
   def build_inputs(parameters, sess, inputs, outputs):
@@ -3893,8 +4161,7 @@
     return [input_value], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_value])))
 
-  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs,
-                    expected_tf_success=3)
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
 def make_unique_tests(zip_path):
@@ -3939,12 +4206,7 @@
     return input_values, sess.run(
         outputs, feed_dict=dict(zip(inputs, input_values)))
 
-  make_zip_of_tests(
-      zip_path,
-      test_parameters,
-      build_graph,
-      build_inputs,
-      expected_tf_success=9)
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
 def make_reverse_v2_tests(zip_path):

diff --git a/tensorflow/lite/testing/kernel_test/BUILD b/tensorflow/lite/testing/kernel_test/BUILD
new file mode 100644
index 0000000..c46e80c
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/BUILD

@@ -0,0 +1,124 @@
+package(default_visibility = [
+    "//visibility:public",
+])
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_cc_binary",
+    "tf_cc_test",
+)
+
+cc_library(
+    name = "util",
+    hdrs = ["util.h"],
+    deps = [
+        ":input_generator",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/testing:split",
+        "//tensorflow/lite/testing:tflite_driver",
+    ] + select({
+        "//conditions:default": [
+            "//tensorflow/core:framework_internal",
+            "//tensorflow/core:lib",
+        ],
+        "//tensorflow:android": [
+            "//tensorflow/core:android_tensorflow_lib",
+        ],
+    }),
+)
+
+tf_cc_test(
+    name = "util_test",
+    size = "small",
+    srcs = ["util_test.cc"],
+    data = [
+        "//tensorflow/lite:testdata/add.bin",
+        "//tensorflow/lite:testdata/test_input.csv",
+    ],
+    tags = [
+        "no_oss",
+    ],
+    deps = [
+        ":util",
+        "//tensorflow/lite/testing:tflite_driver",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+tf_cc_binary(
+    name = "tflite_kernel_runner",
+    srcs = ["tflite_kernel_runner.cc"],
+    deps = [
+        ":util",
+    ],
+)
+
+tf_cc_binary(
+    name = "generate_diff_report",
+    srcs = ["generate_diff_report.cc"],
+    deps = [
+        ":diff_analyzer",
+        "//tensorflow/core:framework_internal",
+    ],
+)
+
+cc_library(
+    name = "input_generator",
+    srcs = ["input_generator.cc"],
+    hdrs = ["input_generator.h"],
+    deps = [
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:string",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/testing:join",
+        "//tensorflow/lite/testing:split",
+    ],
+)
+
+tf_cc_test(
+    name = "input_generator_test",
+    size = "small",
+    srcs = ["input_generator_test.cc"],
+    data = [
+        "//tensorflow/lite:testdata/multi_add.bin",
+        "//tensorflow/lite:testdata/test_input.csv",
+    ],
+    tags = [
+        "no_oss",
+    ],
+    deps = [
+        ":input_generator",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_library(
+    name = "diff_analyzer",
+    srcs = ["diff_analyzer.cc"],
+    hdrs = ["diff_analyzer.h"],
+    deps = [
+        "//tensorflow/lite:string",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/testing:split",
+    ],
+)
+
+tf_cc_test(
+    name = "diff_analyzer_test",
+    size = "small",
+    srcs = ["diff_analyzer_test.cc"],
+    data = [
+        "//tensorflow/lite:testdata/test_input.csv",
+    ],
+    tags = [
+        "no_oss",
+    ],
+    deps = [
+        ":diff_analyzer",
+        "//tensorflow/core:lib",
+        "@com_google_googletest//:gtest_main",
+    ],
+)

diff --git a/tensorflow/lite/testing/kernel_test/diff_analyzer.cc b/tensorflow/lite/testing/kernel_test/diff_analyzer.cc
new file mode 100644
index 0000000..7d6fcc8
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/diff_analyzer.cc

@@ -0,0 +1,115 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/testing/kernel_test/diff_analyzer.h"
+
+#include <cmath>
+#include <fstream>
+#include "tensorflow/lite/testing/split.h"
+
+namespace tflite {
+namespace testing {
+
+namespace {
+float CalculateNormalizedMaxDiff(const std::vector<float>& base,
+                                 const std::vector<float>& test) {
+  float diff = 0;
+  // For numerical stability in case the tensor is all 0.
+  float base_max = 1e-6;
+
+  for (int i = 0; i < base.size(); i++) {
+    diff = std::max(diff, std::abs(base[i] - test[i]));
+    base_max = std::max(base_max, base[i]);
+  }
+
+  return diff / base_max;
+}
+
+float CalculateNormalizedL2Norm(const std::vector<float>& base,
+                                const std::vector<float>& test) {
+  float l2_error = 0;
+  // For numerical stability in case the tensor is all 0.
+  float base_max = 1e-6;
+
+  for (int i = 0; i < base.size(); i++) {
+    float diff = base[i] - test[i];
+    l2_error += diff * diff;
+    base_max = std::max(base_max, base[i]);
+  }
+
+  l2_error /= base.size();
+
+  return std::sqrt(l2_error) / base_max;
+}
+
+TfLiteStatus Populate(const string& filename,
+                      std::vector<std::vector<float>>* tensors) {
+  if (filename.empty()) {
+    fprintf(stderr, "Empty input file name.");
+    return kTfLiteError;
+  }
+
+  std::ifstream file(filename);
+  string content;
+  while (std::getline(file, content, '\n')) {
+    tensors->push_back(Split<float>(content, ","));
+  }
+
+  file.close();
+  return kTfLiteOk;
+}
+}  // namespace
+
+TfLiteStatus DiffAnalyzer::ReadFiles(const string& base, const string& test) {
+  TF_LITE_ENSURE_STATUS(Populate(base, &base_tensors_));
+  TF_LITE_ENSURE_STATUS(Populate(test, &test_tensors_));
+
+  if (base_tensors_.size() != test_tensors_.size()) {
+    fprintf(stderr, "Golden and test tensor dimensions don't match.");
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus DiffAnalyzer::WriteReport(const string& filename) {
+  if (filename.empty()) {
+    fprintf(stderr, "Empty output file name.");
+    return kTfLiteError;
+  }
+
+  std::ofstream output_file;
+  output_file.open(filename, std::fstream::out | std::fstream::trunc);
+  if (!output_file) {
+    fprintf(stderr, "Failed to open output file %s.", filename.c_str());
+    return kTfLiteError;
+  }
+
+  output_file << "Normalized L2 Error"
+              << ","
+              << "Normalized Max Diff"
+              << "\n";
+  for (int i = 0; i < base_tensors_.size(); i++) {
+    float l2_error =
+        CalculateNormalizedL2Norm(base_tensors_[i], test_tensors_[i]);
+    float max_diff =
+        CalculateNormalizedMaxDiff(base_tensors_[i], test_tensors_[i]);
+    output_file << l2_error << "," << max_diff << "\n";
+  }
+
+  output_file.close();
+  return kTfLiteOk;
+}
+}  // namespace testing
+}  // namespace tflite

diff --git a/tensorflow/lite/testing/kernel_test/diff_analyzer.h b/tensorflow/lite/testing/kernel_test/diff_analyzer.h
new file mode 100644
index 0000000..aecbaea
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/diff_analyzer.h

@@ -0,0 +1,42 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_TESTING_KERNEL_TEST_DIFF_ANALYZER_H_
+#define TENSORFLOW_LITE_TESTING_KERNEL_TEST_DIFF_ANALYZER_H_
+
+#include <vector>
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/string.h"
+
+namespace tflite {
+namespace testing {
+
+// Reads the baseline and test files with output tensor values, and calculates
+// the diff metrics.
+class DiffAnalyzer {
+ public:
+  DiffAnalyzer() = default;
+  TfLiteStatus ReadFiles(const string& base, const string& test);
+  TfLiteStatus WriteReport(const string& filename);
+
+ private:
+  std::vector<std::vector<float>> base_tensors_;
+  std::vector<std::vector<float>> test_tensors_;
+};
+
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_TESTING_KERNEL_TEST_DIFF_ANALYZER_H_

diff --git a/tensorflow/lite/testing/kernel_test/diff_analyzer_test.cc b/tensorflow/lite/testing/kernel_test/diff_analyzer_test.cc
new file mode 100644
index 0000000..f92a3e6
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/diff_analyzer_test.cc

@@ -0,0 +1,47 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/testing/kernel_test/diff_analyzer.h"
+
+#include <fstream>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/core/lib/io/path.h"
+
+namespace tflite {
+namespace testing {
+
+namespace {
+
+TEST(DiffAnalyzerTest, ZeroDiff) {
+  DiffAnalyzer diff_analyzer;
+  string filename = "third_party/tensorflow/lite/testdata/test_input.csv";
+  ASSERT_EQ(diff_analyzer.ReadFiles(filename, filename), kTfLiteOk);
+
+  string output_file =
+      tensorflow::io::JoinPath(FLAGS_test_tmpdir + "diff_report.csv");
+  ASSERT_EQ(diff_analyzer.WriteReport(output_file), kTfLiteOk);
+
+  std::string content;
+  std::ifstream file(output_file);
+  std::getline(file, content);
+  std::getline(file, content);
+  ASSERT_EQ(content, "0,0");
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace tflite

diff --git a/tensorflow/lite/testing/kernel_test/generate_diff_report.cc b/tensorflow/lite/testing/kernel_test/generate_diff_report.cc
new file mode 100644
index 0000000..afa6a9a
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/generate_diff_report.cc

@@ -0,0 +1,34 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <vector>
+
+#include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/lite/testing/kernel_test/diff_analyzer.h"
+
+int main(int argc, char** argv) {
+  string base, test, output;
+  std::vector<tensorflow::Flag> flag_list = {
+      tensorflow::Flag("base", &base, "Path to the base serialized tensor."),
+      tensorflow::Flag("test", &test, "Path to the test serialized tensor."),
+      tensorflow::Flag("output", &output, "Path to the output file."),
+  };
+  tensorflow::Flags::Parse(&argc, argv, flag_list);
+
+  tflite::testing::DiffAnalyzer diff_analyzer;
+  diff_analyzer.ReadFiles(base, test);
+  diff_analyzer.WriteReport(output);
+  return 0;
+}

diff --git a/tensorflow/lite/testing/kernel_test/input_generator.cc b/tensorflow/lite/testing/kernel_test/input_generator.cc
new file mode 100644
index 0000000..897e185
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/input_generator.cc

@@ -0,0 +1,208 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/testing/kernel_test/input_generator.h"
+
+#include <fstream>
+#include <limits>
+#include <random>
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/testing/join.h"
+#include "tensorflow/lite/testing/split.h"
+
+namespace tflite {
+namespace testing {
+
+namespace {
+
+template <typename T>
+std::vector<T> GenerateRandomTensor(TfLiteIntArray* dims,
+                                    const std::function<T(int)>& random_func) {
+  int64_t num_elements = 1;
+  for (int i = 0; i < dims->size; i++) {
+    num_elements *= dims->data[i];
+  }
+
+  std::vector<T> result(num_elements);
+  for (int i = 0; i < num_elements; i++) {
+    result[i] = random_func(i);
+  }
+  return result;
+}
+
+template <typename T>
+std::vector<T> GenerateUniform(TfLiteIntArray* dims, float min, float max) {
+  auto random_float = [](float min, float max) {
+    // TODO(yunluli): Change seed for each invocation if needed.
+    static unsigned int seed;
+    return min + (max - min) * static_cast<float>(rand_r(&seed)) / RAND_MAX;
+  };
+
+  std::function<T(int)> random_t = [&](int) {
+    return static_cast<T>(random_float(min, max));
+  };
+  std::vector<T> data = GenerateRandomTensor(dims, random_t);
+  return data;
+}
+
+template <typename T>
+std::vector<T> GenerateGaussian(TfLiteIntArray* dims, float min, float max) {
+  auto random_float = [](float min, float max) {
+    static std::default_random_engine generator;
+    // We generate a float number within [0, 1) following a mormal distribution
+    // with mean = 0.5 and stddev = 1/3, and use it to scale the final random
+    // number into the desired range.
+    static std::normal_distribution<double> distribution(0.5, 1.0 / 3);
+    auto rand_n = distribution(generator);
+    while (rand_n < 0 || rand_n >= 1) {
+      rand_n = distribution(generator);
+    }
+
+    return min + (max - min) * static_cast<float>(rand_n);
+  };
+
+  std::function<T(int)> random_t = [&](int) {
+    return static_cast<T>(random_float(min, max));
+  };
+  std::vector<T> data = GenerateRandomTensor(dims, random_t);
+  return data;
+}
+
+}  // namespace
+
+TfLiteStatus InputGenerator::LoadModel(const string& model_dir) {
+  model_ = FlatBufferModel::BuildFromFile(model_dir.c_str());
+  if (!model_) {
+    fprintf(stderr, "Cannot load model %s", model_dir.c_str());
+    return kTfLiteError;
+  }
+
+  ::tflite::ops::builtin::BuiltinOpResolver builtin_ops;
+  InterpreterBuilder(*model_, builtin_ops)(&interpreter_);
+  if (!interpreter_) {
+    fprintf(stderr, "Failed to build interpreter.");
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus InputGenerator::ReadInputsFromFile(const string& filename) {
+  if (filename.empty()) {
+    fprintf(stderr, "Empty input file name.");
+    return kTfLiteError;
+  }
+
+  std::ifstream input_file(filename);
+  string input;
+  while (std::getline(input_file, input, '\n')) {
+    inputs_.push_back(input);
+  }
+  input_file.close();
+  return kTfLiteOk;
+}
+
+TfLiteStatus InputGenerator::WriteInputsToFile(const string& filename) {
+  if (filename.empty()) {
+    fprintf(stderr, "Empty input file name.");
+    return kTfLiteError;
+  }
+
+  std::ofstream output_file;
+  output_file.open(filename, std::fstream::out | std::fstream::trunc);
+  if (!output_file) {
+    fprintf(stderr, "Failed to open output file %s.", filename.c_str());
+    return kTfLiteError;
+  }
+
+  for (const auto& input : inputs_) {
+    output_file << input << "\n";
+  }
+  output_file.close();
+
+  return kTfLiteOk;
+}
+
+// TODO(yunluli): Support more tensor types when needed.
+TfLiteStatus InputGenerator::GenerateInput(const string& distribution) {
+  auto input_tensor_ids = interpreter_->inputs();
+  for (auto id : input_tensor_ids) {
+    auto* tensor = interpreter_->tensor(id);
+    if (distribution == "UNIFORM") {
+      switch (tensor->type) {
+        case kTfLiteInt8: {
+          auto data = GenerateUniform<int8_t>(
+              tensor->dims, std::numeric_limits<int8_t>::min(),
+              std::numeric_limits<int8_t>::max());
+          inputs_.push_back(Join(data.data(), data.size(), ","));
+          break;
+        }
+        case kTfLiteUInt8: {
+          auto data = GenerateUniform<uint8_t>(
+              tensor->dims, std::numeric_limits<uint8_t>::min(),
+              std::numeric_limits<uint8_t>::max());
+          inputs_.push_back(Join(data.data(), data.size(), ","));
+          break;
+        }
+        case kTfLiteFloat32: {
+          auto data = GenerateUniform<float>(tensor->dims, -1, 1);
+          inputs_.push_back(JoinDefault(data.data(), data.size(), ","));
+          break;
+        }
+        default:
+          fprintf(stderr, "Unsupported input tensor type %s.",
+                  TfLiteTypeGetName(tensor->type));
+          break;
+      }
+    } else if (distribution == "GAUSSIAN") {
+      switch (tensor->type) {
+        case kTfLiteInt8: {
+          auto data = GenerateGaussian<int8_t>(
+              tensor->dims, std::numeric_limits<int8_t>::min(),
+              std::numeric_limits<int8_t>::max());
+          inputs_.push_back(Join(data.data(), data.size(), ","));
+          break;
+        }
+        case kTfLiteUInt8: {
+          auto data = GenerateGaussian<uint8_t>(
+              tensor->dims, std::numeric_limits<uint8_t>::min(),
+              std::numeric_limits<uint8_t>::max());
+          inputs_.push_back(Join(data.data(), data.size(), ","));
+          break;
+        }
+        case kTfLiteFloat32: {
+          auto data = GenerateGaussian<float>(tensor->dims, -1, 1);
+          inputs_.push_back(JoinDefault(data.data(), data.size(), ","));
+          break;
+        }
+        default:
+          fprintf(stderr, "Unsupported input tensor type %s.",
+                  TfLiteTypeGetName(tensor->type));
+          break;
+      }
+    } else {
+      fprintf(stderr, "Unsupported distribution %s.", distribution.c_str());
+      return kTfLiteError;
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+std::vector<string> InputGenerator::GetInputs() { return inputs_; }
+
+}  // namespace testing
+}  // namespace tflite

diff --git a/tensorflow/lite/testing/kernel_test/input_generator.h b/tensorflow/lite/testing/kernel_test/input_generator.h
new file mode 100644
index 0000000..859c706
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/input_generator.h

@@ -0,0 +1,50 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_TESTING_KERNEL_TEST_INPUT_GENERATOR_H_
+#define TENSORFLOW_LITE_TESTING_KERNEL_TEST_INPUT_GENERATOR_H_
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/string.h"
+
+namespace tflite {
+namespace testing {
+
+// Generate random input, or read input from a file for kernel diff test.
+// Needs to load the tflite graph to get information like tensor shape and
+// data type.
+class InputGenerator {
+ public:
+  InputGenerator() = default;
+  TfLiteStatus LoadModel(const string& model_dir);
+  TfLiteStatus ReadInputsFromFile(const string& filename);
+  TfLiteStatus GenerateInput(const string& distribution);
+  std::vector<string> GetInputs();
+  TfLiteStatus WriteInputsToFile(const string& filename);
+
+ private:
+  std::unique_ptr<FlatBufferModel> model_;
+  std::unique_ptr<Interpreter> interpreter_;
+  std::vector<string> inputs_;
+};
+
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_TESTING_KERNEL_TEST_INPUT_GENERATOR_H_

diff --git a/tensorflow/lite/testing/kernel_test/input_generator_test.cc b/tensorflow/lite/testing/kernel_test/input_generator_test.cc
new file mode 100644
index 0000000..b6bd8b9
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/input_generator_test.cc

@@ -0,0 +1,81 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/testing/kernel_test/input_generator.h"
+
+#include <fstream>
+#include <map>
+
+#include <gmock/gmock.h>
+#include "testing/base/public/googletest.h"
+#include <gtest/gtest.h>
+
+namespace tflite {
+namespace testing {
+
+namespace {
+
+TEST(InputGeneratorTest, LoadModel) {
+  InputGenerator input_generator;
+  ASSERT_EQ(input_generator.LoadModel(
+                "third_party/tensorflow/lite/testdata/multi_add.bin"),
+            kTfLiteOk);
+}
+
+TEST(InputGeneratorTest, ReadWriteSimpleFile) {
+  InputGenerator input_generator;
+  ASSERT_EQ(input_generator.ReadInputsFromFile(
+                "third_party/tensorflow/lite/testdata/test_input.csv"),
+            kTfLiteOk);
+
+  std::vector<string> inputs;
+  std::string content = "1";
+  for (int i = 0; i < 1 * 8 * 8 * 3 - 1; i++) {
+    content.append(",1");
+  }
+  inputs.push_back(content);
+  ASSERT_EQ(input_generator.GetInputs(), inputs);
+
+  auto output_filename = FLAGS_test_tmpdir + "/out.csv";
+  ASSERT_EQ(input_generator.WriteInputsToFile(output_filename), kTfLiteOk);
+
+  std::ifstream in(output_filename);
+  std::string out;
+  std::getline(in, out, '\n');
+  ASSERT_EQ(out, content);
+}
+
+TEST(InputGeneratorTest, GenerateUniformInput) {
+  InputGenerator input_generator;
+  ASSERT_EQ(input_generator.LoadModel(
+                "third_party/tensorflow/lite/testdata/multi_add.bin"),
+            kTfLiteOk);
+  input_generator.GenerateInput("UNIFORM");
+  auto inputs = input_generator.GetInputs();
+  ASSERT_EQ(inputs.size(), 4);
+}
+
+TEST(InputGeneratorTest, GenerateGaussianInput) {
+  InputGenerator input_generator;
+  ASSERT_EQ(input_generator.LoadModel(
+                "third_party/tensorflow/lite/testdata/multi_add.bin"),
+            kTfLiteOk);
+  input_generator.GenerateInput("GAUSSIAN");
+  auto inputs = input_generator.GetInputs();
+  ASSERT_EQ(inputs.size(), 4);
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace tflite

diff --git a/tensorflow/lite/testing/kernel_test/tflite_kernel_runner.cc b/tensorflow/lite/testing/kernel_test/tflite_kernel_runner.cc
new file mode 100644
index 0000000..34c1728
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/tflite_kernel_runner.cc

@@ -0,0 +1,32 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/testing/kernel_test/util.h"
+
+int main(int argc, char** argv) {
+  tflite::testing::kernel_test::TestOptions options =
+      tflite::testing::kernel_test::ParseTfliteKernelTestFlags(&argc, argv);
+  const bool run_reference_kernel = options.kernel_type == "REFERENCE";
+  const bool use_nnapi = options.kernel_type == "NNAPI";
+
+  auto runner = absl::make_unique<tflite::testing::TfLiteDriver>(
+      use_nnapi, "", run_reference_kernel);
+  if (tflite::testing::kernel_test::RunKernelTest(options, runner.get()) ==
+      kTfLiteOk) {
+    return 0;
+  }
+
+  return -1;
+}

diff --git a/tensorflow/lite/testing/kernel_test/util.h b/tensorflow/lite/testing/kernel_test/util.h
new file mode 100644
index 0000000..d940e5a
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/util.h

@@ -0,0 +1,122 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_TESTING_KERNEL_TEST_UTIL_H_
+#define TENSORFLOW_LITE_TESTING_KERNEL_TEST_UTIL_H_
+
+#include <fstream>
+
+#include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/testing/kernel_test/input_generator.h"
+#include "tensorflow/lite/testing/split.h"
+#include "tensorflow/lite/testing/tflite_driver.h"
+
+namespace tflite {
+namespace testing {
+namespace kernel_test {
+
+struct TestOptions {
+  // Path of tensorflow lite model.
+  string tflite_model;
+  // Path of the input file. If empty, generate at runtime.
+  string read_input_from_file;
+  // Path to dump the input file.
+  string dump_input_to_file;
+  // Path to dump the output.
+  string dump_output_to_file;
+  // Input distribution.
+  string input_distribution;
+  // Kernel type.
+  string kernel_type;
+};
+
+TestOptions ParseTfliteKernelTestFlags(int* argc, char** argv) {
+  TestOptions options;
+  std::vector<tensorflow::Flag> flags = {
+      tensorflow::Flag("tflite_model", &options.tflite_model,
+                       "Path of tensorflow lite model."),
+      tensorflow::Flag("read_input_from_file", &options.read_input_from_file,
+                       "File to read input data from. If empty, generates "
+                       "input at runtime."),
+      tensorflow::Flag("dump_input_to_file", &options.dump_input_to_file,
+                       "File to dump randomly generated input."),
+      tensorflow::Flag("dump_output_to_file", &options.dump_output_to_file,
+                       "File to dump output."),
+      tensorflow::Flag("input_distribution", &options.input_distribution,
+                       "Input distribution. Default: Gaussian."),
+      tensorflow::Flag("kernel_type", &options.kernel_type, "Kernel type."),
+  };
+
+  tensorflow::Flags::Parse(argc, argv, flags);
+
+  return options;
+}
+
+TfLiteStatus RunKernelTest(const kernel_test::TestOptions& options,
+                           TestRunner* runner) {
+  InputGenerator input_generator;
+
+  if (options.read_input_from_file.empty()) {
+    TF_LITE_ENSURE_STATUS(input_generator.LoadModel(options.tflite_model));
+    TF_LITE_ENSURE_STATUS(
+        input_generator.GenerateInput(options.input_distribution));
+  } else {
+    TF_LITE_ENSURE_STATUS(
+        input_generator.ReadInputsFromFile(options.read_input_from_file));
+  }
+
+  runner->LoadModel(options.tflite_model);
+  runner->AllocateTensors();
+  if (!runner->IsValid()) return kTfLiteError;
+  auto input_tensor_ids = runner->GetInputs();
+  auto inputs = input_generator.GetInputs();
+  if (inputs.size() != input_tensor_ids.size()) {
+    fprintf(stderr,
+            "Number of input tensors generated doesn't match what the model "
+            "asks for.");
+  }
+  for (int i = 0; i < inputs.size(); i++) {
+    runner->SetInput(input_tensor_ids[i], inputs[i]);
+  }
+
+  runner->Invoke();
+
+  if (!options.dump_input_to_file.empty()) {
+    TF_LITE_ENSURE_STATUS(
+        input_generator.WriteInputsToFile(options.dump_input_to_file));
+  }
+
+  if (!options.dump_output_to_file.empty()) {
+    std::ofstream output_file;
+    output_file.open(options.dump_output_to_file,
+                     std::fstream::out | std::fstream::trunc);
+    if (!output_file) {
+      return kTfLiteError;
+    }
+
+    for (auto id : runner->GetOutputs()) {
+      output_file << runner->ReadOutput(id) << "\n";
+    }
+    output_file.close();
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace kernel_test
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_TESTING_KERNEL_TEST_UTIL_H_

diff --git a/tensorflow/lite/testing/kernel_test/util_test.cc b/tensorflow/lite/testing/kernel_test/util_test.cc
new file mode 100644
index 0000000..751c77e
--- /dev/null
+++ b/tensorflow/lite/testing/kernel_test/util_test.cc

@@ -0,0 +1,52 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/testing/kernel_test/util.h"
+
+#include <fstream>
+#include <memory>
+
+#include <gmock/gmock.h>
+#include "testing/base/public/googletest.h"
+#include <gtest/gtest.h>
+#include "tensorflow/lite/testing/tflite_driver.h"
+
+namespace tflite {
+namespace testing {
+namespace kernel_test {
+namespace {
+
+TEST(UtilTest, SimpleE2ETest) {
+  TestOptions options;
+  options.tflite_model = "third_party/tensorflow/lite/testdata/add.bin";
+  options.read_input_from_file =
+      "third_party/tensorflow/lite/testdata/test_input.csv";
+  options.dump_output_to_file = FLAGS_test_tmpdir + "/test_out.csv";
+  options.kernel_type = "REFERENCE";
+  std::unique_ptr<TestRunner> runner(new TfLiteDriver(false, "", true));
+  RunKernelTest(options, runner.get());
+  std::string expected = "3";
+  for (int i = 0; i < 1 * 8 * 8 * 3 - 1; i++) {
+    expected.append(",3");
+  }
+  std::string content;
+  std::ifstream file(options.dump_output_to_file);
+  std::getline(file, content);
+  EXPECT_EQ(content, expected);
+}
+
+}  // namespace
+}  // namespace kernel_test
+}  // namespace testing
+}  // namespace tflite

diff --git a/tensorflow/lite/testing/string_util.cc b/tensorflow/lite/testing/string_util.cc
new file mode 100644
index 0000000..cf9d508
--- /dev/null
+++ b/tensorflow/lite/testing/string_util.cc

@@ -0,0 +1,45 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+
+#include "tensorflow/lite/testing/string_util.h"
+
+#include "absl/strings/escaping.h"
+#include "tensorflow/lite/python/interpreter_wrapper/numpy.h"
+#include "tensorflow/lite/python/interpreter_wrapper/python_utils.h"
+#include "tensorflow/lite/string_util.h"
+
+namespace tflite {
+namespace testing {
+namespace python {
+
+PyObject* SerializeAsHexString(PyObject* value) {
+  DynamicBuffer dynamic_buffer;
+  if (!python_utils::FillStringBufferWithPyArray(value, &dynamic_buffer)) {
+    return nullptr;
+  }
+
+  char* char_buffer = nullptr;
+  size_t size = dynamic_buffer.WriteToBuffer(&char_buffer);
+  string s = absl::BytesToHexString({char_buffer, size});
+  free(char_buffer);
+
+  return python_utils::ConvertToPyString(s.data(), s.size());
+}
+
+}  // namespace python
+}  // namespace testing
+}  // namespace tflite

diff --git a/tensorflow/lite/testing/string_util.h b/tensorflow/lite/testing/string_util.h
new file mode 100644
index 0000000..56c024d
--- /dev/null
+++ b/tensorflow/lite/testing/string_util.h

@@ -0,0 +1,33 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_TESTING_STRING_UTIL_H_
+#define TENSORFLOW_LITE_TESTING_STRING_UTIL_H_
+
+#include <Python.h>
+#include <string>
+
+namespace tflite {
+namespace testing {
+namespace python {
+
+// Take a python string array, convert it to TF Lite dynamic buffer format and
+// serialize it as a HexString.
+PyObject* SerializeAsHexString(PyObject* value);
+
+}  // namespace python
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_TESTING_STRING_UTIL_H_

diff --git a/tensorflow/lite/testing/string_util.i b/tensorflow/lite/testing/string_util.i
new file mode 100644
index 0000000..574abb7
--- /dev/null
+++ b/tensorflow/lite/testing/string_util.i

@@ -0,0 +1,31 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+%{
+
+#define SWIG_FILE_WITH_INIT
+#include "tensorflow/lite/testing/string_util.h"
+
+%}
+
+namespace tflite {
+namespace testing {
+namespace python {
+
+PyObject* SerializeAsHexString(PyObject* string_tensor);
+
+}  // namespace python
+}  // namespace testing
+}  // namespace tflite

diff --git a/tensorflow/lite/testing/tflite_driver.cc b/tensorflow/lite/testing/tflite_driver.cc
index a637dc8..5567085 100644
--- a/tensorflow/lite/testing/tflite_driver.cc
+++ b/tensorflow/lite/testing/tflite_driver.cc

@@ -22,6 +22,7 @@
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/kernels/register_ref.h"
 #include "tensorflow/lite/string_util.h"
+#include "tensorflow/lite/testing/join.h"
 #include "tensorflow/lite/testing/split.h"
 
 namespace tflite {
@@ -383,5 +384,34 @@
   interpreter_->ResetVariableTensors();
 }
 
+string TfLiteDriver::ReadOutput(int id) {
+  auto* tensor = interpreter_->tensor(id);
+  int num_elements = 1;
+
+  for (int i = 0; i < tensor->dims->size; ++i) {
+    num_elements *= tensor->dims->data[i];
+  }
+
+  switch (tensor->type) {
+    case kTfLiteFloat32:
+      return JoinDefault(tensor->data.f, num_elements, ",");
+    case kTfLiteInt32:
+      return JoinDefault(tensor->data.i32, num_elements, ",");
+    case kTfLiteInt64:
+      return JoinDefault(tensor->data.i64, num_elements, ",");
+    case kTfLiteUInt8:
+      return Join(tensor->data.uint8, num_elements, ",");
+    case kTfLiteInt8:
+      return JoinDefault(tensor->data.int8, num_elements, ",");
+    case kTfLiteBool:
+      return JoinDefault(tensor->data.b, num_elements, ",");
+    default:
+      Invalidate(absl::StrCat("Unsupported tensor type ",
+                              TfLiteTypeGetName(tensor->type),
+                              " in TfLiteDriver::ReadOutput"));
+      return "";
+  }
+}
+
 }  // namespace testing
 }  // namespace tflite

diff --git a/tensorflow/lite/testing/tflite_driver.h b/tensorflow/lite/testing/tflite_driver.h
index 537f20d..3cce6c4 100644
--- a/tensorflow/lite/testing/tflite_driver.h
+++ b/tensorflow/lite/testing/tflite_driver.h

@@ -49,7 +49,7 @@
   void SetExpectation(int id, const string& csv_values) override;
   void Invoke() override;
   bool CheckResults() override;
-  string ReadOutput(int id) override { return "no-op"; }
+  string ReadOutput(int id) override;
 
  private:
   void DeallocateStringTensor(TfLiteTensor* t) {

diff --git a/tensorflow/lite/testing/tflite_driver_test.cc b/tensorflow/lite/testing/tflite_driver_test.cc
index 81bf670..e80816b 100644
--- a/tensorflow/lite/testing/tflite_driver_test.cc
+++ b/tensorflow/lite/testing/tflite_driver_test.cc

@@ -54,6 +54,8 @@
   ASSERT_TRUE(runner->IsValid());
 
   ASSERT_TRUE(runner->CheckResults());
+  EXPECT_EQ(runner->ReadOutput(5), "0.101,0.202,0.303,0.404");
+  EXPECT_EQ(runner->ReadOutput(6), "0.011,0.022,0.033,0.044");
 }
 
 TEST(TfliteDriverTest, SingleAddOpTest) {
@@ -88,6 +90,8 @@
   ASSERT_TRUE(runner->IsValid());
 
   ASSERT_TRUE(runner->CheckResults());
+  EXPECT_EQ(runner->ReadOutput(5), "0.101,0.202,0.303,0.404");
+  EXPECT_EQ(runner->ReadOutput(6), "0.011,0.022,0.033,0.044");
 }
 
 }  // namespace

diff --git a/tensorflow/lite/toco/BUILD b/tensorflow/lite/toco/BUILD
index 3150c48..c477e2f 100644
--- a/tensorflow/lite/toco/BUILD
+++ b/tensorflow/lite/toco/BUILD

@@ -378,6 +378,7 @@
         ":types_proto_cc",
         "//tensorflow/core:lib",
         "//tensorflow/lite/kernels/internal:types",
+        "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_googlesource_code_re2//:re2",
         "@protobuf_archive//:protobuf_headers",

diff --git a/tensorflow/lite/toco/dump_graphviz.cc b/tensorflow/lite/toco/dump_graphviz.cc
index 8896893..ad69e4f 100644
--- a/tensorflow/lite/toco/dump_graphviz.cc
+++ b/tensorflow/lite/toco/dump_graphviz.cc

@@ -15,17 +15,21 @@
 #include "tensorflow/lite/toco/dump_graphviz.h"
 
 #include <cmath>
+#include <functional>
 #include <memory>
 #include <vector>
 
+#include "absl/memory/memory.h"
 #include "absl/strings/str_replace.h"
+#include "absl/strings/str_split.h"
 #include "absl/strings/strip.h"
+#include "re2/re2.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/lite/toco/model_flags.pb.h"
 #include "tensorflow/lite/toco/toco_graphviz_dump_options.h"
 #include "tensorflow/lite/toco/toco_port.h"
 #include "tensorflow/lite/toco/toco_types.h"
 #include "tensorflow/lite/toco/tooling_util.h"
-#include "tensorflow/core/platform/logging.h"
 
 using toco::port::AppendF;
 using toco::port::StringF;
@@ -33,72 +37,158 @@
 namespace toco {
 namespace {
 
+// 'nslimit' is a graphviz (dot) paramater that limits the iterations during
+// the layout phase. Omitting it allows infinite iterations, causing some
+// complex graphs to never finish. A value of 125 produces good graphs
+// while allowing complex graphs to finish.
+constexpr char kGraphFmt[] = R"CODE(digraph Computegraph { tooltip = "/"
+    nslimit=125 margin=36 ranksep = 2 labelloc="t" label=%s
+)CODE";
+// Note: tooltip's are only supported on SVGs in Chrome.
+constexpr char kSubgraphFmt[] =
+    R"CODE(    subgraph "cluster_%s" { style=rounded bgcolor="%s" penwidth=0.0 label=%s
+)CODE";
+constexpr char kArrayNodeFmt[] =
+    R"CODE(        "%s" [label=%s tooltip="%s" shape=%s style=filled fillcolor="%s" fontcolor="%sDD"];
+)CODE";
+constexpr char kOpNodeFmt[] =
+    R"CODE(        %s [label=%s tooltip=" " shape=box margin=0 style=filled fillcolor="%s" fontcolor="%sDD"];
+)CODE";
+constexpr char kInputEdgeFmt[] =
+    R"CODE(        "%s"%s -> %s:i%d:n [penwidth=%f weight=%f];
+)CODE";
+constexpr char kOutputEdgeFmt[] =
+    R"CODE(        %s:o%d:s -> "%s"%s [penwidth=%f weight=%f];
+)CODE";
+constexpr char kRNNBackEdgeFmt[] =
+    R"CODE(        "%s":s -> "%s":n [color="#0F9D58" constraint=false];
+)CODE";
+constexpr char kUnicodeMult[] = "\u00D7";
+constexpr char kUnicodeEllipsis[] = " \u2026 ";
+
 class Color {
  public:
   Color() {}
   Color(uint8 r, uint8 g, uint8 b) : r_(r), g_(g), b_(b) {}
+  explicit Color(uint32 word)
+      : r_((word & 0x00FF0000) >> 16),
+        g_((word & 0x0000FF00) >> 8),
+        b_((word & 0x000000FF) >> 0) {}
+
   // Returns the string serialization of this color in graphviz format,
   // for use as 'fillcolor' in boxes.
-  string FillColorString() const { return StringF("%.2X%.2X%.2X", r_, g_, b_); }
+  string AsHexString() const { return StringF("#%.2X%.2X%.2X", r_, g_, b_); }
+  // The color to use for this node; will be used as 'fillcolor'
+  // for its box. See Color::AsHexString. A suitable, different
+  // color will be chosen for the 'fontcolor' for the inside text
+  // label, see Color::TextColorString.
   // Returns the serialization in graphviz format of a suitable color to use
   // 'fontcolor' in the same boxes. It should black or white, whichever offers
-  // the better contrast from FillColorString().
+  // the better contrast from AsHexString().
   string TextColorString() const {
     // https://en.wikipedia.org/wiki/Relative_luminance
     const float luminance = 0.2126f * r_ + 0.7152f * g_ + 0.0722f * b_;
     const uint8 l = luminance > 128.f ? 0 : 255;
-    return StringF("%.2X%.2X%.2X", l, l, l);
+    return StringF("#%.2X%.2X%.2X", l, l, l);
   }
 
  private:
   uint8 r_ = 0, g_ = 0, b_ = 0;
 };
 
-struct NodeProperties {
-  // The text to display inside the box for this node.
-  string label;
-  // The color to use for this node; will be used as 'fillcolor'
-  // for its box. See Color::FillColorString. A suitable, different
-  // color will be chosen for the 'fontcolor' for the inside text
-  // label, see Color::TextColorString.
-  Color color;
-  float log2_buffer_size;
-};
+Color HashStringToColor(string s) {
+  // Return a unique color for a name.
+  //
+  // This function removes Tensorflow anti-collision suffixes (eg "_2"), hashes
+  // the string to a uint_32, then twiddles some bits to get a light and subtle
+  // color. This seems to be a good heuristic for keeping enough of the name to
+  // hash to a unique color while still revealing structure through naming
+  // similarities.
+  //
+  // The regular expression "_\d+" matches any underscore followed by numbers,
+  // which we strip out. Examples:
+  //
+  //     "Conv"      -> "Conv"
+  //     "Conv_2"    -> "Conv"
+  //     "Conv_72"   -> "Conv"
+  //     "Pad_1_bias -> "Pad_bias"
+  //     "Conv_abc"  -> "Conv_abc"
 
-// All colors in this file are from:
-// https://material.io/guidelines/style/color.html
+  RE2::GlobalReplace(&s, R"CODE(_\d+)CODE", "");
+  uint32 color_word = std::hash<std::string>{}(s);
+  color_word |= 0x00E0E0E0;
+  return Color(color_word);
+}
 
-Color GetColorForArray(const Model& model, const string& array_name) {
+void GetArrayColorAndShape(const Model& model, const string& array_name,
+                           Color* color, string* shape) {
+  // All colors in this file are from:
+  // https://material.io/guidelines/style/color.html
   // Arrays involved in RNN back-edges have a different color
   for (const auto& rnn_state : model.flags.rnn_states()) {
     // RNN state, fed by a back-edge. Bold color.
     if (array_name == rnn_state.state_array()) {
-      return Color(0x0F, 0x9D, 0x58);
+      *color = Color(0x0F, 0x9D, 0x58);
+      *shape = "invhouse";
+      return;
     }
     // RNN back-edge source, feeding a RNN state.
     // Light tone of the same color as RNN states.
     if (array_name == rnn_state.back_edge_source_array()) {
-      return Color(0xB7, 0xE1, 0xCD);
+      *color = Color(0xB7, 0xE1, 0xCD);
+      *shape = "house";
+      return;
     }
   }
   // Constant parameter arrays have their own bold color
   if (model.GetArray(array_name).buffer) {
-    return Color(0x42, 0x85, 0xF4);
+    *color = Color(0x42, 0x85, 0xF4);
+    *shape = "cylinder";
+    return;
   }
   // Remaining arrays are activations.
   // We use gray colors for them because they are the majority
   // of arrays so we want to highlight other arrays instead of them.
   // First, we use a bolder gray for input/output arrays:
   if (IsInputArray(model, array_name)) {
-    return Color(0x9E, 0x9E, 0x9E);
+    *color = Color(0x9E, 0x9E, 0x9E);
+    *shape = "invhouse";
+    return;
   }
   if (IsOutputArray(model, array_name)) {
-    return Color(0x9E, 0x9E, 0x9E);
+    *color = Color(0x9E, 0x9E, 0x9E);
+    *shape = "house";
+    return;
   }
   // Remaining arrays are intermediate activation arrays.
   // Lighter tone of the same grey as for input/output arrays:
   // We want these to be very discrete.
-  return Color(0xF5, 0xF5, 0xF5);
+  *color = Color(0xF5, 0xF5, 0xF5);
+  *shape = "box";
+}
+
+string GetArrayCompassPt(const Model& model, const string& array_name) {
+  // The "compass point" is the point on the node where edge connections are
+  // made. For most arrays we don't care, but input's and outputs look better
+  // connected at the tip of the "house" and "invhouse" shapes used. So we
+  // append ":n" and ":s" respectively for those.
+  for (const auto& rnn_state : model.flags.rnn_states()) {
+    // RNN state is essentially an input
+    if (array_name == rnn_state.state_array()) {
+      return ":s";
+    }
+    // RNN back-edge source is essentially an output
+    if (array_name == rnn_state.back_edge_source_array()) {
+      return ":n";
+    }
+  }
+  if (IsInputArray(model, array_name)) {
+    return ":s";
+  }
+  if (IsOutputArray(model, array_name)) {
+    return ":n";
+  }
+  return "";
 }
 
 void AppendArrayVal(string* string, Array const& array, int index) {
@@ -141,239 +231,550 @@
   }
 }
 
-NodeProperties GetPropertiesForArray(const Model& model,
-                                     const string& array_name) {
-  NodeProperties node_properties;
-  node_properties.color = GetColorForArray(model, array_name);
-  node_properties.label = absl::StrReplaceAll(array_name, {{"/", "/\\n"}});
-  node_properties.log2_buffer_size = 0.0f;
+typedef std::map<string, string> Attributes;
 
-  // Append array shape to the label.
-  auto& array = model.GetArray(array_name);
-  AppendF(&node_properties.label, "\\nType: %s",
-          ArrayDataTypeName(array.data_type));
-
-  if (array.has_shape()) {
-    auto& array_shape = array.shape();
-    node_properties.label += "\\n[";
-    for (int id = 0; id < array_shape.dimensions_count(); id++) {
-      if (id == 0) {
-        AppendF(&node_properties.label, "%d", array_shape.dims(id));
-      } else {
-        // 0x00D7 is the unicode multiplication symbol
-        AppendF(&node_properties.label, "\u00D7%d", array_shape.dims(id));
-      }
-    }
-    node_properties.label += "]";
-
-    int buffer_size = 0;
-    if (IsNonEmpty(array.shape())) {
-      buffer_size = RequiredBufferSizeForShape(array.shape());
-      node_properties.log2_buffer_size =
-          std::log2(static_cast<float>(buffer_size));
-    }
-
-    if (array.buffer) {
-      const auto& array = model.GetArray(array_name);
-      if (buffer_size <= 4) {
-        AppendF(&node_properties.label, " = ");
-        if (array.shape().dimensions_count() > 0) {
-          AppendF(&node_properties.label, "{");
-        }
-        for (int i = 0; i < buffer_size; i++) {
-          AppendArrayVal(&node_properties.label, array, i);
-          if (i + 1 < buffer_size) {
-            AppendF(&node_properties.label, ", ");
-          }
-        }
-      } else {
-        AppendF(&node_properties.label, "\\n = ");
-        if (array.shape().dimensions_count() > 0) {
-          AppendF(&node_properties.label, "{");
-        }
-        AppendArrayVal(&node_properties.label, array, 0);
-        AppendF(&node_properties.label, ", ");
-        AppendArrayVal(&node_properties.label, array, 1);
-        // 0x2026 is the unicode ellipsis symbol
-        AppendF(&node_properties.label, " \u2026 ");
-        AppendArrayVal(&node_properties.label, array, buffer_size - 2);
-        AppendF(&node_properties.label, ", ");
-        AppendArrayVal(&node_properties.label, array, buffer_size - 1);
-      }
-      if (array.shape().dimensions_count() > 0) {
-        AppendF(&node_properties.label, "}");
-      }
-    }
+string AttributesToHtml(Attributes attributes) {
+  string html;
+  for (const auto& attr : attributes) {
+    html += R"CODE(<TR><TD CELLPADDING="1" ALIGN="RIGHT">)CODE";
+    html += attr.first;
+    html += R"CODE(:</TD><TD CELLPADDING="1" ALIGN="LEFT">)CODE";
+    html += attr.second;
+    html += "</TD></TR>";
   }
-
-  if (array.minmax) {
-    AppendF(&node_properties.label, "\\nMinMax: [%.7g, %.7g]",
-            array.minmax->min, array.minmax->max);
-  }
-
-  if (array.quantization_params) {
-    AppendF(&node_properties.label, "\\nQuantization: %7g * (x - %d)",
-            array.quantization_params->scale,
-            array.quantization_params->zero_point);
-  }
-
-  if (array.alloc) {
-    AppendF(&node_properties.label, "\\nTransient Alloc: [%d, %d)",
-            array.alloc->start, array.alloc->end);
-  }
-
-  return node_properties;
+  return html;
 }
 
-NodeProperties GetPropertiesForOperator(const Operator& op) {
-  NodeProperties node_properties;
-  if (op.type == OperatorType::kUnsupported) {
-    node_properties.label =
-        static_cast<const TensorFlowUnsupportedOperator&>(op).tensorflow_op;
-  } else {
-    node_properties.label =
-        string(absl::StripPrefix(OperatorTypeName(op.type), "TensorFlow"));
+string GetArrayLabel(const Model& model, const string& array_id) {
+  string html;
+
+  // Use HTML-like labels (http://www.graphviz.org/doc/info/shapes.html#html)
+  html += "<";
+
+  // Begin Table
+  html += R"CODE(<FONT POINT-SIZE="10" FACE="Courier">)CODE";
+  html += R"CODE(<TABLE BORDER="0" CELLSPACING="2" CELLPADDING="0">)CODE";
+
+  auto& array = model.GetArray(array_id);
+  if (array.buffer) {
+    // "cylinder" shapes require some extra head room.
+    html += R"CODE(<TR><TD COLSPAN="2"> </TD></TR>)CODE";
   }
+
+  // "Primary" name of array (last non-slash delimited group of characters).
+  html += R"CODE(<TR><TD COLSPAN="2" ALIGN="CENTER">)CODE";
+  html += R"CODE(<FONT POINT-SIZE="16" FACE="Helvetica"><I>)CODE";
+  AppendF(&html, R"CODE(%s)CODE",
+          std::vector<string>(absl::StrSplit(array_id, '/')).back());
+  html += R"CODE(</I></FONT>)CODE";
+  html += "</TD></TR>";
+
+  // Array data type and dimensions
+  html += R"CODE(<TR><TD COLSPAN="2" ALIGN="CENTER">)CODE";
+  html += R"CODE(<FONT POINT-SIZE="14" FACE="Courier"><B>)CODE";
+  // Type
+  html += ArrayDataTypeName(array.data_type);
+  // Shape
+  if (array.has_shape()) {
+    auto& array_shape = array.shape();
+    html += "[";
+    for (int dim = 0; dim < array_shape.dimensions_count(); dim++) {
+      AppendF(&html, "%d", array_shape.dims(dim));
+      if (dim + 1 < array_shape.dimensions_count()) {
+        html += kUnicodeMult;
+      }
+    }
+    html += "]";
+  }
+
+  // Small buffer sample
+  int buffer_size = 0;
+  if (array.buffer) {
+    buffer_size = RequiredBufferSizeForShape(array.shape());
+  }
+  if ((buffer_size > 0) && (buffer_size <= 4)) {
+    html += " = ";
+    if (array.shape().dimensions_count() > 0) {
+      html += "{";
+    }
+    for (int i = 0; i < buffer_size; i++) {
+      AppendArrayVal(&html, array, i);
+      if (i + 1 < buffer_size) {
+        html += ", ";
+      }
+    }
+    if (array.shape().dimensions_count() > 0) {
+      html += "}";
+    }
+  }
+  html += R"CODE(</B></FONT>)CODE";
+  html += "</TD></TR>";
+
+  // Large buffer samples get their own line
+  if (buffer_size > 4) {
+    html += R"CODE(<TR><TD COLSPAN="2" ALIGN="CENTER"> = {)CODE";
+    AppendArrayVal(&html, array, 0);
+    html += ", ";
+    AppendArrayVal(&html, array, 1);
+    html += kUnicodeEllipsis;
+    AppendArrayVal(&html, array, buffer_size - 2);
+    html += ", ";
+    AppendArrayVal(&html, array, buffer_size - 1);
+    html += "}</TD></TR>";
+  }
+
+  // Other array properties
+  Attributes attrs;
+  if (array.minmax) {
+    attrs["minmax"] =
+        StringF("[%.7g, %.7g]", array.minmax->min, array.minmax->max);
+  }
+  if (array.quantization_params) {
+    attrs["quant"] = StringF("%7g\u00B7(x-%d)",  // Unicode "cdot"
+                             array.quantization_params->scale,
+                             array.quantization_params->zero_point);
+  }
+  if (array.alloc) {
+    attrs["alloc"] = StringF("[%d, %d)", array.alloc->start, array.alloc->end);
+  }
+  html += AttributesToHtml(attrs);
+
+  // output array_id in ultra-small font so it can be searched and copied.
+  html += R"CODE(<TR><TD COLSPAN="2" ALIGN="CENTER">)CODE";
+  html += R"CODE(<FONT POINT-SIZE="3" FACE="">)CODE";
+  AppendF(&html, R"CODE("%s")CODE", array_id);
+  html += R"CODE(</FONT>)CODE";
+  html += "</TD></TR>";
+
+  // End Table and HTML-like label
+  html += R"CODE(</TABLE></FONT>)CODE";
+  html += ">";
+  return html;
+}
+
+Attributes GetOpAttributes(const Model& model, const Operator& op) {
+  Attributes attrs;
   switch (op.fused_activation_function) {
     case FusedActivationFunctionType::kRelu:
-      AppendF(&node_properties.label, "\\nReLU");
+      attrs["func"] = "ReLU";
       break;
     case FusedActivationFunctionType::kRelu6:
-      AppendF(&node_properties.label, "\\nReLU6");
+      attrs["func"] = "ReLU6";
       break;
     case FusedActivationFunctionType::kRelu1:
-      AppendF(&node_properties.label, "\\nReLU1");
+      attrs["func"] = "ReLU1";
       break;
     default:
       break;
   }
-  // Additional information for some of the operators.
+  // Output state of member vars on derived operators.
   switch (op.type) {
     case OperatorType::kConv: {
       const auto& conv_op = static_cast<const ConvOperator&>(op);
-      node_properties.color = Color(0xC5, 0x39, 0x29);  // Bolder color
-      AppendF(&node_properties.label, "\\n%dx%d/%s", conv_op.stride_width,
-              conv_op.stride_height,
-              conv_op.padding.type == PaddingType::kSame ? "S" : "V");
+      string stride;
+      AppendF(&stride, "%d", conv_op.stride_width);
+      stride += kUnicodeMult;
+      AppendF(&stride, "%d", conv_op.stride_height);
+      attrs["stride"] = stride;
+      attrs["padding"] =
+          (conv_op.padding.type == PaddingType::kSame) ? "same" : "valid";
       break;
     }
     case OperatorType::kDepthwiseConv: {
-      const auto& conv_op = static_cast<const DepthwiseConvOperator&>(op);
-      node_properties.color = Color(0xC5, 0x39, 0x29);  // Bolder color
-      AppendF(&node_properties.label, "\\n%dx%d/%s", conv_op.stride_width,
-              conv_op.stride_height,
-              conv_op.padding.type == PaddingType::kSame ? "S" : "V");
-      break;
-    }
-    case OperatorType::kFullyConnected: {
-      node_properties.color = Color(0xC5, 0x39, 0x29);  // Bolder color
+      const auto& depthconv_op = static_cast<const ConvOperator&>(op);
+      string stride;
+      AppendF(&stride, "%d", depthconv_op.stride_width);
+      stride += kUnicodeMult;
+      AppendF(&stride, "%d", depthconv_op.stride_height);
+      attrs["stride"] = stride;
+      attrs["padding"] =
+          (depthconv_op.padding.type == PaddingType::kSame) ? "same" : "valid";
       break;
     }
     case OperatorType::kFakeQuant: {
       const auto& fakequant_op = static_cast<const FakeQuantOperator&>(op);
-      node_properties.color = Color(0xC5, 0x39, 0x29);  // Bolder color
+      attrs["bits"] = StringF("%d", fakequant_op.num_bits);
       if (fakequant_op.minmax) {
-        AppendF(&node_properties.label, "\\n%dbit [%g,%g]",
-                fakequant_op.num_bits, fakequant_op.minmax->min,
-                fakequant_op.minmax->max);
+        attrs["range"] = StringF("[%g,%g]", fakequant_op.minmax->min,
+                                 fakequant_op.minmax->max);
       } else {
-        AppendF(&node_properties.label, "\\n%dbit [?,?]",
-                fakequant_op.num_bits);
+        attrs["range"] = "[?,?]";
       }
       break;
     }
     default:
-      node_properties.color = Color(0xDB, 0x44, 0x37);
       break;
   }
+  int64 math_ops_count;
+  if (EstimateArithmeticOpsCount(model, op, &math_ops_count) &&
+      (math_ops_count != 0)) {
+    attrs["math"] = FormattedNumber(math_ops_count) + "ops";
+  }
 
-  return node_properties;
+  return attrs;
 }
 
-}  // namespace
-
-void DumpGraphviz(const Model& model, string* output_file_contents) {
-  AppendF(output_file_contents, "digraph Computegraph {\n");
-  // 'nslimit' is a graphviz (dot) paramater that limits the iterations during
-  // the layout phase. Omitting it allows infinite iterations, causing some
-  // complex graphs to never finish. A value of 125 produces good graphs
-  // while allowing complex graphs to finish.
-  AppendF(output_file_contents, "\t nslimit=125;\n");
-
-  constexpr char kNodeFormat[] =
-      "\t \"%s\" [label=\"%s\", shape=%s, style=filled, fillcolor=\"#%s\", "
-      "fontcolor = \"#%sDD\"];\n";
-
-  constexpr char kEdgeFormat[] =
-      "\t \"%s\" -> \"%s\" [penwidth=%f, weight=%f];\n";
-
-  constexpr char kRNNBackEdgeFormat[] =
-      "\t \"%s\" -> \"%s\" [color=\"#0F9D58\"];\n";
-
-  for (const auto& array_kv : model.GetArrayMap()) {
-    // Add node for array.
-    const string& array_name = array_kv.first;
-    const auto& array_properties = GetPropertiesForArray(model, array_name);
-    AppendF(output_file_contents, kNodeFormat, array_name,
-            array_properties.label, "octagon",
-            array_properties.color.FillColorString().c_str(),
-            array_properties.color.TextColorString().c_str());
+Color GetOpColor(const Operator& op) {
+  if ((op.type == OperatorType::kDepthwiseConv) ||
+      (op.type == OperatorType::kConv) ||
+      (op.type == OperatorType::kFullyConnected) ||
+      (op.type == OperatorType::kFakeQuant)) {
+    // Give some ops a bolder red
+    return Color(0xC5, 0x39, 0x29);
+  } else {
+    return Color(0xDB, 0x44, 0x37);
   }
+}
+
+string GetOpLabel(const Model& model, const Operator& op) {
+  // Use HTML-like labels (http://www.graphviz.org/doc/info/shapes.html#html)
+  string html;
+  html += "<";
+
+  // Begin Table
+  html += R"CODE(<FONT POINT-SIZE="10" FACE="Courier">)CODE";
+  html +=
+      R"CODE(<TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="0">)CODE";
+
+  // Input Ports
+  if (!op.inputs.empty()) {
+    html += R"CODE(<TR><TD COLSPAN="2" ALIGN="CENTER">)CODE";
+    // Distribute evenly using a sub-table
+    html += R"CODE(<TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0">)CODE";
+    html += R"CODE(<TR>)CODE";
+    for (int i = 0; i < op.inputs.size(); i++) {
+      html += R"CODE(<TD PORT=")CODE";
+      AppendF(&html, "i%d", i);
+      html += R"CODE(">)CODE";
+      if (op.inputs.size() > 1) {
+        // Only number inputs when op has two or more inputs
+        AppendF(&html, "%d", i);
+      }
+      html += "</TD>";
+    }
+    html += "</TR>";
+    html += R"CODE(</TABLE></TD></TR>)CODE";
+  }
+
+  // Name
+  html += R"CODE(<TR><TD COLSPAN="2" CELLPADDING="3" ALIGN="CENTER">)CODE";
+  html += R"CODE(<FONT POINT-SIZE="16" FACE="Helvetica"><B>)CODE";
+  if (op.type == OperatorType::kUnsupported) {
+    html += static_cast<const TensorFlowUnsupportedOperator&>(op).tensorflow_op;
+  } else {
+    html += string(absl::StripPrefix(OperatorTypeName(op.type), "TensorFlow"));
+  }
+  html += R"CODE(</B></FONT>)CODE";
+  html += "</TD></TR>";
+
+  // Attributes
+  Attributes attrs = GetOpAttributes(model, op);
+  html += AttributesToHtml(attrs);
+
+  // Output Ports
+  if (!op.outputs.empty()) {
+    html += R"CODE(<TR><TD COLSPAN="2" ALIGN="CENTER">)CODE";
+    // Distribute evenly using a sub-table
+    html += R"CODE(<TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0">)CODE";
+    html += R"CODE(<TR>)CODE";
+    for (int i = 0; i < op.outputs.size(); i++) {
+      html += R"CODE(<TD PORT=")CODE";
+      AppendF(&html, "o%d", i);
+      html += R"CODE(">)CODE";
+      if (op.outputs.size() > 1) {
+        // Only number outputs when op has two or more outputs
+        AppendF(&html, "%d", i);
+      }
+      html += "</TD>";
+    }
+    html += "</TR>";
+    html += R"CODE(</TABLE></TD></TR>)CODE";
+  }
+
+  // End Table and HTML-like label
+  html += R"CODE(</TABLE></FONT>)CODE";
+  html += ">";
+
+  return html;
+}
+
+float GetLog2BufferSize(const Model& model, const string& array_id) {
+  auto& array = model.GetArray(array_id);
+  if (array.has_shape()) {
+    int buffer_size = 0;
+    if (IsNonEmpty(array.shape())) {
+      buffer_size = RequiredBufferSizeForShape(array.shape());
+      return std::log2(static_cast<float>(buffer_size));
+    }
+  }
+  return 0.0f;
+}
+
+string GetOpId(int op_index) { return StringF("op%05d", op_index); }
+
+void DumpOperator(const Model& model, string* output_file, int op_index) {
+  // Dump node for operator.
+  const Operator& op = *model.operators[op_index];
+  Color color = GetOpColor(op);
+  string label = GetOpLabel(model, op);
+  string op_id = GetOpId(op_index);
+  AppendF(output_file, kOpNodeFmt, op_id, label, color.AsHexString(),
+          color.TextColorString());
+}
+
+void DumpOperatorEdges(const Model& model, string* output_file, int op_index) {
+  // Inputs
+  const Operator& op = *model.operators[op_index];
+  string op_id = GetOpId(op_index);
+  for (int i = 0; i < op.inputs.size(); i++) {
+    const auto& input = op.inputs[i];
+    if (!model.HasArray(input)) {
+      // Connected arrays should _always_ exist. Except, perhaps, during
+      // development.
+      continue;
+    }
+    float log2_buffer_size = GetLog2BufferSize(model, input);
+    // Draw lines that transport more data thicker (Otherwise, where would the
+    // data fit? right?).
+    float line_width = std::max(0.5f, log2_buffer_size / 3.0f);
+    // Keep edges that transport more data shorter than those with less.
+    float weight = std::max(1.0f, log2_buffer_size);
+    if (!IsInputArray(model, input) &&
+        GetOpWithOutput(model, input) == nullptr) {
+      // Give the main line of data flow a straighter path by penalizing edges
+      // to standalone buffers. Weights are generally very large buffers that
+      // would otherwise skew the layout.
+      weight = 1.0f;
+    }
+    string compass_pt = GetArrayCompassPt(model, input);
+    AppendF(output_file, kInputEdgeFmt, input, compass_pt, op_id, i, line_width,
+            weight);
+  }
+  // Outputs
+  for (int i = 0; i < op.outputs.size(); i++) {
+    const auto& output = op.outputs[i];
+    if (!model.HasArray(output)) {
+      continue;
+    }
+    float log2_buffer_size = GetLog2BufferSize(model, output);
+    // See comments above regarding weight and line_width calculations.
+    float line_width = std::max(0.5f, log2_buffer_size / 3.0f);
+    float weight = std::max(1.0f, log2_buffer_size);
+    if (!IsArrayConsumed(model, output)) {
+      weight = 1.0f;
+    }
+    string compass_pt = GetArrayCompassPt(model, output);
+    AppendF(output_file, kOutputEdgeFmt, op_id, i, output, compass_pt,
+            line_width, weight);
+  }
+}
+
+struct Node {
+  Node() : math_ops(0) {}
+  // Name used as a key in the model's array map
+  string array_id;
+
+  // Estimated number of math ops incurred by this node (the sum of the op
+  // with this array as 1st output, plus all children nodes).
+  int64 math_ops;
+
+  // A map of child nodes keyed by name.
+  std::map<const string, std::unique_ptr<Node>> children;
+};
+
+string GetSubgraphLabel(Node const& node, const string& subgraph) {
+  // Use HTML-like labels (http://www.graphviz.org/doc/info/shapes.html#html)
+  string html;
+  html += "<";
+
+  // Begin Table
+  html += R"CODE(<FONT POINT-SIZE="12" FACE="Courier">)CODE";
+  html +=
+      R"CODE(<TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="0">)CODE";
+
+  // Name
+  html += R"CODE(<TR><TD COLSPAN="2" CELLPADDING="3" ALIGN="CENTER">)CODE";
+  html += R"CODE(<FONT POINT-SIZE="18" FACE="Helvetica"><I>)CODE";
+  html += subgraph;
+  html += R"CODE(</I></FONT>)CODE";
+  html += "</TD></TR>";
+
+  // Attributes
+  Attributes attrs;
+  if (node.math_ops > 0) {
+    attrs["math"] = FormattedNumber(node.math_ops) + "ops";
+  }
+  html += AttributesToHtml(attrs);
+
+  // End Table and HTML-like label
+  html += R"CODE(</TABLE></FONT>)CODE";
+  html += ">";
+
+  return html;
+}
+
+void DumpSubgraphHeader(string* output_file, Node const& node,
+                        const string& node_name) {
+  Color color = HashStringToColor(node_name);
+  string label = GetSubgraphLabel(node, node_name);
+  AppendF(output_file, kSubgraphFmt, node_name, color.AsHexString(), label);
+}
+
+void DumpArray(const Model& model, string* output_file,
+               const string& array_id) {
+  Color color;
+  string shape;
+  GetArrayColorAndShape(model, array_id, &color, &shape);
+  string label = GetArrayLabel(model, array_id);
+  AppendF(output_file, kArrayNodeFmt, array_id, label, array_id, shape,
+          color.AsHexString(), color.TextColorString());
+
+  // Ops are placed in the same subgraph as their first output.
   for (int op_index = 0; op_index < model.operators.size(); op_index++) {
     const Operator& op = *model.operators[op_index];
-    // Add node for operator.
-    auto op_properties = GetPropertiesForOperator(op);
-    string operator_id = StringF("op%05d", op_index);
-    AppendF(output_file_contents, kNodeFormat, operator_id, op_properties.label,
-            "box", op_properties.color.FillColorString().c_str(),
-            op_properties.color.TextColorString().c_str());
-    // Add edges for all inputs of the operator.
-    for (const auto& input : op.inputs) {
-      if (!model.HasArray(input)) {
-        // Arrays should _always_ exist. Except, perhaps, during development.
-        continue;
-      }
-      auto array_properties = GetPropertiesForArray(model, input);
-      // Draw lines that transport more data thicker (Otherwise, where would the
-      // data fit? right?).
-      float line_width =
-          std::max(0.5f, array_properties.log2_buffer_size / 3.0f);
-      // Keep edges that transport more data shorter than those with less.
-      float weight = std::max(1.0f, array_properties.log2_buffer_size);
-      if (!IsInputArray(model, input) &&
-          GetOpWithOutput(model, input) == nullptr) {
-        // Give the main line of data flow a straighter path by penalizing edges
-        // to standalone buffers. Weights are generally very large buffers that
-        // otherwise skew the layout without this.
-        weight = 1.0f;
-      }
-      AppendF(output_file_contents, kEdgeFormat, input, operator_id, line_width,
-              weight);
-    }
-    // Add edges for all outputs of the operator.
-    for (const auto& output : op.outputs) {
-      if (!model.HasArray(output)) {
-        // Arrays should _always_ exist. Except, perhaps, during development.
-        continue;
-      }
-      auto array_properties = GetPropertiesForArray(model, output);
-      // See comments above regarding weight and line_width calculations.
-      float line_width =
-          std::max(0.5f, array_properties.log2_buffer_size / 3.0f);
-      float weight = std::max(1.0f, array_properties.log2_buffer_size);
-      if (!IsArrayConsumed(model, output)) {
-        weight = 1.0f;
-      }
-      AppendF(output_file_contents, kEdgeFormat, operator_id, output,
-              line_width, weight);
+    if (!op.outputs.empty() && (op.outputs[0] == array_id)) {
+      DumpOperator(model, output_file, op_index);
     }
   }
+}
 
+void DumpNode(const Model& model, string* output_file, const string& node_name,
+              Node const& node) {
+  bool not_root = !node_name.empty();
+  if (not_root) {
+    DumpSubgraphHeader(output_file, node, node_name);
+  }
+
+  for (const auto& child : node.children) {
+    if (!child.second->array_id.empty()) {
+      // Dump array if this node posesses one.
+      DumpArray(model, output_file, child.second->array_id);
+    }
+    // Note that it is always possible to have children. Unlike a filesystem,
+    // the existence of array "foo/bar" does _not_ prevent other arrays, such as
+    // and "foo/bar/baz", from being nested beneath it.
+    DumpNode(model, output_file, child.first, *child.second);
+  }
+
+  if (not_root) {
+    // End subgraph
+    AppendF(output_file, "    }\n");
+  }
+}
+
+int64 GetArithmeticOpsCount(const Model& model, const string& array_id) {
+  for (const auto& op : model.operators) {
+    if (!op->outputs.empty() && op->outputs[0] == array_id) {
+      int64 count;
+      if (EstimateArithmeticOpsCount(model, *op, &count)) {
+        return count;
+      } else {
+        return 0;
+      }
+    }
+  }
+  return 0;
+}
+
+void InsertNode(const Model& model, const string& array_id, Node* node,
+                std::vector<string> prefixes, int64* math_ops) {
+  if (prefixes.empty()) {
+    // Base case: store array in this node.
+    node->array_id = array_id;
+    *math_ops = GetArithmeticOpsCount(model, array_id);
+  } else {
+    // Insert into the sub-tree for that prefix.
+    string prefix = prefixes.back();
+    prefixes.pop_back();
+    if (node->children.count(prefix) == 0) {
+      // Create a new node if this prefix is unseen.
+      node->children[prefix] = absl::make_unique<Node>();
+    }
+    InsertNode(model, array_id, node->children[prefix].get(), prefixes,
+               math_ops);
+  }
+  // Sum estimated math ops into all nodes.
+  node->math_ops += *math_ops;
+}
+
+void BuildArrayTree(const Model& model, Node* tree) {
+  // Delimit array names by path "/", then place into a tree based on this path.
+  for (const auto& array_id : model.GetArrayMap()) {
+    std::vector<string> prefixes = absl::StrSplit(array_id.first, '/');
+    std::reverse(prefixes.begin(), prefixes.end());
+    int64 math_ops;  // Temporary storage for math ops used during recursion.
+    InsertNode(model, array_id.first, tree, prefixes, &math_ops);
+  }
+}
+
+string GetGraphLabel(const Model& model, const string& graph_name) {
+  // Use HTML-like labels (http://www.graphviz.org/doc/info/shapes.html#html)
+  string html;
+  html += "<";
+
+  // Begin Table
+  html += R"CODE(<FONT POINT-SIZE="36" FACE="Courier">)CODE";
+  html +=
+      R"CODE(<TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="0">)CODE";
+
+  // Name
+  html += R"CODE(<TR><TD COLSPAN="2" CELLPADDING="3" ALIGN="CENTER">)CODE";
+  html += R"CODE(<FONT POINT-SIZE="64" FACE="Helvetica"><B><I>)CODE";
+  html += graph_name;
+  html += R"CODE(</I></B></FONT>)CODE";
+  html += "</TD></TR>";
+
+  // Attributes
+  Attributes attrs;
+  attrs["arrays"] = StringF("%d", model.GetArrayMap().size());
+  if (!model.optional_arrays.empty()) {
+    attrs["optional arrays"] = StringF("%d", model.optional_arrays.size());
+  }
+  attrs["operators"] = StringF("%d", model.operators.size());
+  int64 ops_count;
+  if (EstimateArithmeticOpsCount(model, &ops_count) && (ops_count > 0)) {
+    attrs["math"] = FormattedNumber(ops_count) + "ops";
+  }
+  if (model.transient_data_size > 0) {
+    attrs["transient data size"] =
+        StringF("%d KiB", model.transient_data_size / 1024);
+  }
+  if (model.transient_data_alignment > 0) {
+    attrs["transient data alignment"] =
+        StringF("%d bytes", model.transient_data_alignment);
+  }
+  html += AttributesToHtml(attrs);
+
+  // End Table and HTML-like label
+  html += R"CODE(</TABLE></FONT>)CODE";
+  html += ">";
+
+  return html;
+}
+}  // namespace
+
+void DumpGraphviz(const Model& model, string* output_file,
+                  const string& graph_name) {
+  // Start graphviz format
+  AppendF(output_file, kGraphFmt, GetGraphLabel(model, graph_name));
+
+  // Organize arrays into a tree for subgraphing
+  Node tree;
+  BuildArrayTree(model, &tree);
+  DumpNode(model, output_file, "", tree);
+
+  // Dump edges outside all subgraphs (otherwise the referred-to nodes are
+  // implicitly included in that subgraph).
+  for (int op_index = 0; op_index < model.operators.size(); op_index++) {
+    DumpOperatorEdges(model, output_file, op_index);
+  }
+
+  // Dump RNN Backedges
   for (const auto& rnn_state : model.flags.rnn_states()) {
-    AppendF(output_file_contents, kRNNBackEdgeFormat,
-            rnn_state.back_edge_source_array(), rnn_state.state_array());
+    AppendF(output_file, kRNNBackEdgeFmt, rnn_state.back_edge_source_array(),
+            rnn_state.state_array());
   }
-
-  AppendF(output_file_contents, "}\n");
+  // End graphviz format
+  AppendF(output_file, "}\n");
 }
 }  // namespace toco

diff --git a/tensorflow/lite/toco/dump_graphviz.h b/tensorflow/lite/toco/dump_graphviz.h
index 9697bd6..9bb74da 100644
--- a/tensorflow/lite/toco/dump_graphviz.h
+++ b/tensorflow/lite/toco/dump_graphviz.h

@@ -21,7 +21,8 @@
 
 namespace toco {
 
-void DumpGraphviz(const Model& model, string* output_file_contents);
+void DumpGraphviz(const Model& model, string* output_file_contents,
+                  const string& graph_name);
 
 }  // namespace toco
 

diff --git a/tensorflow/lite/toco/export_tensorflow.cc b/tensorflow/lite/toco/export_tensorflow.cc
index 50a30f5..c2952c7 100644
--- a/tensorflow/lite/toco/export_tensorflow.cc
+++ b/tensorflow/lite/toco/export_tensorflow.cc

@@ -1305,7 +1305,8 @@
       GetTensorFlowDataType(model, src_op.outputs[0]));
 }
 
-void ConvertRankOperator(const Model& model, const RankOperator& src_op,
+void ConvertRankOperator(const Model& model,
+                         const TensorFlowRankOperator& src_op,
                          GraphDef* tensorflow_graph) {
   tensorflow::NodeDef* rank_op = tensorflow_graph->add_node();
   rank_op->set_op("Rank");
@@ -2274,7 +2275,8 @@
         model, static_cast<const TensorFlowShapeOperator&>(src_op),
         tensorflow_graph);
   } else if (src_op.type == OperatorType::kRank) {
-    ConvertRankOperator(model, static_cast<const RankOperator&>(src_op),
+    ConvertRankOperator(model,
+                        static_cast<const TensorFlowRankOperator&>(src_op),
                         tensorflow_graph);
   } else if (src_op.type == OperatorType::kRange) {
     ConvertRangeOperator(model, static_cast<const RangeOperator&>(src_op),

diff --git a/tensorflow/lite/toco/graph_transformations/graph_transformations.cc b/tensorflow/lite/toco/graph_transformations/graph_transformations.cc
index a0260e2..e4eb769 100644
--- a/tensorflow/lite/toco/graph_transformations/graph_transformations.cc
+++ b/tensorflow/lite/toco/graph_transformations/graph_transformations.cc

@@ -128,7 +128,8 @@
 }
 
 bool GraphTransformationsPass(int increment, Model* model,
-                              const GraphTransformationsSet& transformations) {
+                              const GraphTransformationsSet& transformations,
+                              tensorflow::Status* status) {
   CHECK(increment == 1 || increment == -1);
   bool changed = false;
   if (model->operators.empty()) {
@@ -142,7 +143,10 @@
     for (const auto& transformation : transformations) {
       CHECK(!changed_now);
       CHECK(transformation->Messages().empty());
-      CHECK(transformation->Run(model, op_index, &changed_now).ok());
+      *status = transformation->Run(model, op_index, &changed_now);
+      if (!status->ok()) {
+        return false;
+      }
       const char* made_a_change_msg =
           changed_now ? "made a change" : "did NOT make a change";
       const int log_level =
@@ -186,18 +190,21 @@
 
 }  // namespace
 
-void RunGraphTransformations(Model* model, const string& msg,
-                             const GraphTransformationsSet& transformations) {
+tensorflow::Status RunGraphTransformationsWithStatus(
+    Model* model, const string& msg,
+    const GraphTransformationsSet& transformations) {
   PrintModelStats(toco::port::StringF("Before %s", msg), *model);
   int pass_index = 0;
+  tensorflow::Status status;
   while (GraphTransformationsPass((pass_index % 2) ? -1 : 1, model,
-                                  transformations)) {
+                                  transformations, &status)) {
     pass_index++;
     const auto& label =
         toco::port::StringF("After %s pass %d", msg, pass_index);
     PrintModelStats(label, *model);
     CheckInvariants(*model);
   }
+  return status;
 }
 
 }  // namespace toco

diff --git a/tensorflow/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/lite/toco/graph_transformations/graph_transformations.h
index 4008bbd..491a3e7 100644
--- a/tensorflow/lite/toco/graph_transformations/graph_transformations.h
+++ b/tensorflow/lite/toco/graph_transformations/graph_transformations.h

@@ -102,8 +102,16 @@
 // construct GraphTransformation objects by using 'new', pass us
 // the resulting raw pointers, and this RunGraphTransformations
 // takes care of delete'ing these pointers.
-void RunGraphTransformations(Model* model, const string& message,
-                             const GraphTransformationsSet& transformations);
+tensorflow::Status RunGraphTransformationsWithStatus(
+    Model* model, const string& msg,
+    const GraphTransformationsSet& transformations);
+
+inline void RunGraphTransformations(
+    Model* model, const string& msg,
+    const GraphTransformationsSet& transformations) {
+  auto s = RunGraphTransformationsWithStatus(model, msg, transformations);
+  CHECK(s.ok()) << s.error_message();
+}
 
 #define DECLARE_GRAPH_TRANSFORMATION(GTName)                     \
   class GTName : public GraphTransformation {                    \

diff --git a/tensorflow/lite/toco/graph_transformations/group_bidirectional_sequence_ops.cc b/tensorflow/lite/toco/graph_transformations/group_bidirectional_sequence_ops.cc
index 10e817b..6efdd8f 100644
--- a/tensorflow/lite/toco/graph_transformations/group_bidirectional_sequence_ops.cc
+++ b/tensorflow/lite/toco/graph_transformations/group_bidirectional_sequence_ops.cc

@@ -68,7 +68,7 @@
 
   while (op_it->type == operator_type) {
     sequence_ops->push(op_it);
-    // Check the first input of the unidirectional squence lstm op.
+    // Check the first input of the unidirectional sequence lstm op.
     op_it = GetOpWithOutput(model, op_it->inputs[0]);
     if (op_it == nullptr) {
       return false;
@@ -340,7 +340,7 @@
   *modified = false;
   // Bidirectional sequence lstm will generate two separate unidirectional
   // sequence lstm ops, for static bidirectional sequence lstm, there will be
-  // a concatenation op at very end; for dynamic bidirectional squence lstm,
+  // a concatenation op at very end; for dynamic bidirectional sequence lstm,
   // it is not guaranteed, but currently we do not support that.
   auto op_it = model->operators.begin() + op_index;
   Operator* final_concat_op = op_it->get();
@@ -421,7 +421,7 @@
   *modified = false;
   // Bidirectional sequence rnn will generate two separate unidirectional
   // sequence rnn ops, for static bidirectional sequence rnn, there will be
-  // a concatenation op at very end; for dynamic bidirectional squence rnn,
+  // a concatenation op at very end; for dynamic bidirectional sequence rnn,
   // it is not guaranteed, but currently we do not support that.
   auto op_it = model->operators.begin() + op_index;
   Operator* final_concat_op = op_it->get();

diff --git a/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index 329ef92..6f65d52 100644
--- a/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc

@@ -1400,6 +1400,38 @@
   }
 }
 
+void ProcessGatherNdOperator(Model* model, GatherNdOperator* op) {
+  const auto& input_array = model->GetArray(op->inputs[0]);
+  const auto& indices_array = model->GetArray(op->inputs[1]);
+  auto& output_array = model->GetArray(op->outputs[0]);
+
+  // Bail if we already know the output shape.
+  if (output_array.has_shape()) {
+    return;
+  }
+
+  // Yield until input dims have been resolved.
+  if (!input_array.has_shape() || !indices_array.has_shape()) {
+    return;
+  }
+
+  const auto& input_shape = input_array.shape();
+  const auto& indices_shape = indices_array.shape();
+  QCHECK_GE(input_shape.dimensions_count(), 1);
+  QCHECK_GE(indices_shape.dimensions_count(), 1);
+  const int indices_nd =
+      indices_shape.dims(indices_shape.dimensions_count() - 1);
+  QCHECK_LE(indices_nd, input_shape.dimensions_count());
+
+  auto output_dims = output_array.mutable_shape()->mutable_dims();
+  for (int dim = 0; dim < indices_shape.dimensions_count() - 1; ++dim) {
+    output_dims->push_back(indices_shape.dims(dim));
+  }
+  for (int dim = indices_nd; dim < input_shape.dimensions_count(); ++dim) {
+    output_dims->push_back(input_shape.dims(dim));
+  }
+}
+
 void ProcessTopkV2Operator(Model* model, TopKV2Operator* op) {
   const auto& input_values = model->GetArray(op->inputs[0]);
   const auto& input_k = model->GetArray(op->inputs[1]);
@@ -1485,7 +1517,7 @@
   output_array.copy_shape(output_shape);
 }
 
-void ProcessRankOperator(Model* model, RankOperator* op) {
+void ProcessRankOperator(Model* model, TensorFlowRankOperator* op) {
   CHECK_GE(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
   auto& output_array = model->GetArray(op->outputs[0]);
@@ -2057,6 +2089,7 @@
     case OperatorType::kCeil:
     case OperatorType::kExp:
     case OperatorType::kSin:
+    case OperatorType::kCos:
     case OperatorType::kLogicalAnd:
     case OperatorType::kLogicalNot:
     case OperatorType::kLogicalOr:
@@ -2067,6 +2100,9 @@
     case OperatorType::kGather:
       ProcessGatherOperator(model, static_cast<GatherOperator*>(op));
       break;
+    case OperatorType::kGatherNd:
+      ProcessGatherNdOperator(model, static_cast<GatherNdOperator*>(op));
+      break;
     case OperatorType::kTopK_V2:
       ProcessTopkV2Operator(model, static_cast<TopKV2Operator*>(op));
       break;
@@ -2183,7 +2219,7 @@
       ProcessRangeOperator(model, static_cast<RangeOperator*>(op));
       break;
     case OperatorType::kRank:
-      ProcessRankOperator(model, static_cast<RankOperator*>(op));
+      ProcessRankOperator(model, static_cast<TensorFlowRankOperator*>(op));
       break;
     case OperatorType::kShape:
       ProcessShapeOperator(model, static_cast<TensorFlowShapeOperator*>(op));
@@ -2305,6 +2341,11 @@
     case OperatorType::kUnique:
       ProcessUniqueOperator(model, static_cast<UniqueOperator*>(op));
       break;
+    case OperatorType::kWhere:
+      // The size of the output can only be known after evaluating the cond
+      // tensor. Ignore shape propagation here and defer that to the
+      // interpreter.
+      break;
     default:
       // Unimplemented, another graph transformation should drop it.
       LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(op->type);

diff --git a/tensorflow/lite/toco/graph_transformations/quantize.cc b/tensorflow/lite/toco/graph_transformations/quantize.cc
index ee65f92..c7836f6 100644
--- a/tensorflow/lite/toco/graph_transformations/quantize.cc
+++ b/tensorflow/lite/toco/graph_transformations/quantize.cc

@@ -489,20 +489,20 @@
     }
   }
   if (!SupportsQuantization(op)) {
-    LOG(FATAL) << "Unimplemented: this graph contains an operator of type "
-               << HelpfulOperatorTypeName(op)
-               << " for which the quantized form is not yet implemented. "
-                  "Sorry, and patches welcome (that's a relatively fun patch "
-                  "to write, mostly providing the actual quantized arithmetic "
-                  "code for this op).";
+    return tensorflow::errors::InvalidArgument(
+        "Unimplemented: this graph contains an operator of type ",
+        HelpfulOperatorTypeName(op),
+        " for which the quantized form is not yet implemented. Sorry, and "
+        "patches welcome (that's a relatively fun patch to write, mostly "
+        "providing the actual quantized arithmetic code for this op).");
   }
 
   for (const auto& input : op.inputs) {
     const auto& array = model->GetArray(input);
     if (array.data_type == ArrayDataType::kFloat) {
       if (!array.minmax && !array.buffer) {
-        LOG(ERROR) << "Can't quantize input array " << input
-                   << " because it lacks min/max info";
+        LOG(WARNING) << "Can't quantize input array " << input
+                     << " because it lacks min/max info";
         return ::tensorflow::Status::OK();
       }
       const auto* other_op = GetOpWithOutput(*model, input);

diff --git a/tensorflow/lite/toco/graph_transformations/reorder_reshape_transpose.cc b/tensorflow/lite/toco/graph_transformations/reorder_reshape_transpose.cc
index fdd411c..77803d5 100644
--- a/tensorflow/lite/toco/graph_transformations/reorder_reshape_transpose.cc
+++ b/tensorflow/lite/toco/graph_transformations/reorder_reshape_transpose.cc

@@ -218,6 +218,7 @@
   CHECK_EQ(input_dims.size(), new_perm.size());
 
   auto& transpose_array = model->GetOrCreateArray(transpose_op->inputs[1]);
+  transpose_array.data_type = ArrayDataType::kInt32;
   transpose_array.GetMutableBuffer<ArrayDataType::kInt32>().data = new_perm;
   *(transpose_array.mutable_shape()->mutable_dims()) = {
       static_cast<int>(new_perm.size())};

diff --git a/tensorflow/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc b/tensorflow/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc
index c0becaf..2c860c3 100644
--- a/tensorflow/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc
+++ b/tensorflow/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc

@@ -61,11 +61,11 @@
   minmax.max = max_array.GetBuffer<ArrayDataType::kFloat>().data[0];
   // We always want [min, max] to contain 0.
   if (minmax.min > 0 || minmax.max < 0) {
-    LOG(ERROR) << "For " << LogName(*fakequant_op) << " the MinMax range "
-               << "[" << minmax.min << ", " << minmax.max
-               << "] does not contain 0. "
-               << "Proceeding by tweaking it to contain 0, which will result "
-                  "in poor accuracy.";
+    LOG(WARNING) << "For " << LogName(*fakequant_op) << " the MinMax range "
+                 << "[" << minmax.min << ", " << minmax.max
+                 << "] does not contain 0. "
+                 << "Proceeding by tweaking it to contain 0, which will result "
+                    "in poor accuracy.";
   }
   minmax.min = std::min(minmax.min, 0.);
   minmax.max = std::max(minmax.max, 0.);

diff --git a/tensorflow/lite/toco/graph_transformations/unroll_batch_matmul.cc b/tensorflow/lite/toco/graph_transformations/unroll_batch_matmul.cc
index 41a7353..7492f3e 100644
--- a/tensorflow/lite/toco/graph_transformations/unroll_batch_matmul.cc
+++ b/tensorflow/lite/toco/graph_transformations/unroll_batch_matmul.cc

@@ -13,17 +13,192 @@
 limitations under the License.
 ==============================================================================*/
 #include <memory>
+#include <numeric>
 #include <string>
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/lite/toco/graph_transformations/graph_transformations.h"
 #include "tensorflow/lite/toco/model.h"
 #include "tensorflow/lite/toco/tooling_util.h"
-#include "tensorflow/core/platform/logging.h"
 
 namespace toco {
 
+namespace {
+
+void UnrollBatchMatMul3D(
+    const string& input_lhs, const string& input_rhs,
+    const BatchMatMulOperator* batch_op, const std::vector<int> batch,
+    Model* model, std::vector<std::unique_ptr<Operator>>::iterator* tail_it,
+    std::vector<string>* pack_inputs) {
+  const std::string batch_name =
+      absl::StrCat(batch_op->outputs[0], "_b", absl::StrJoin(batch, "-"));
+  const auto& input_array_a = model->GetArray(input_lhs);
+  const auto& input_array_b = model->GetArray(input_rhs);
+  const int dims_count = input_array_a.shape().dimensions_count();
+
+  // tf.slice(a, ...).
+  std::vector<int> begin_indices_a = batch;
+  begin_indices_a.resize(dims_count);
+  std::vector<int> slice_size_a = input_array_a.shape().dims();
+  for (int i = 0; i < batch.size(); ++i) {
+    slice_size_a[i] = 1;
+  }
+  auto* slice_a_op = new SliceOperator;
+  slice_a_op->inputs = {
+      input_lhs,
+      CreateInt32Array(model, batch_name + "/slice_a/slice/begin",
+                       begin_indices_a),
+      CreateInt32Array(model, batch_name + "/slice_a/slice/size", slice_size_a),
+  };
+  slice_a_op->outputs = {AvailableArrayName(*model, batch_name + "/slice_a")};
+  auto& slice_a_op_output = model->GetOrCreateArray(slice_a_op->outputs[0]);
+  slice_a_op_output.data_type = input_array_a.data_type;
+  *tail_it = model->operators.emplace(*tail_it, slice_a_op) + 1;
+
+  // Reshape to remove the first dimension ([1,M,N] -> [M,N]).
+  auto* slice_a_reshape_op = new TensorFlowReshapeOperator;
+  slice_a_reshape_op->inputs = {
+      slice_a_op->outputs[0],
+      CreateInt32Array(model, batch_name + "/slice_a/reshape/shape",
+                       {-1, input_array_a.shape().dims(dims_count - 1)})};
+  slice_a_reshape_op->outputs = {
+      AvailableArrayName(*model, batch_name + "/slice_a/reshape")};
+  auto& slice_a_reshape_op_output =
+      model->GetOrCreateArray(slice_a_reshape_op->outputs[0]);
+  slice_a_reshape_op_output.data_type = input_array_a.data_type;
+  *tail_it = model->operators.emplace(*tail_it, slice_a_reshape_op) + 1;
+
+  // tf.slice(b, ...).
+  std::vector<int> begin_indices_b = batch;
+  begin_indices_b.resize(dims_count);
+  std::vector<int> slice_size_b = input_array_b.shape().dims();
+  for (int i = 0; i < batch.size(); ++i) {
+    slice_size_b[i] = 1;
+  }
+  auto* slice_b_op = new SliceOperator;
+  slice_b_op->inputs = {
+      input_rhs,
+      CreateInt32Array(model, batch_name + "/slice_b/slice/begin",
+                       begin_indices_b),
+      CreateInt32Array(model, batch_name + "/slice_b/slice/size", slice_size_b),
+  };
+  slice_b_op->outputs = {AvailableArrayName(*model, batch_name + "/slice_b")};
+  auto& slice_b_op_output = model->GetOrCreateArray(slice_b_op->outputs[0]);
+  slice_b_op_output.data_type = input_array_b.data_type;
+  *tail_it = model->operators.emplace(*tail_it, slice_b_op) + 1;
+
+  // Reshape to remove the first dimension ([1,M,N] -> [M,N]).
+  auto* slice_b_reshape_op = new TensorFlowReshapeOperator;
+  slice_b_reshape_op->inputs = {
+      slice_b_op->outputs[0],
+      CreateInt32Array(model, batch_name + "/slice_b/reshape/shape",
+                       {-1, input_array_b.shape().dims(dims_count - 1)})};
+  slice_b_reshape_op->outputs = {
+      AvailableArrayName(*model, batch_name + "/slice_b/reshape")};
+  auto& slice_b_reshape_op_output =
+      model->GetOrCreateArray(slice_b_reshape_op->outputs[0]);
+  slice_b_reshape_op_output.data_type = input_array_b.data_type;
+  *tail_it = model->operators.emplace(*tail_it, slice_b_reshape_op) + 1;
+
+  // tf.matmul(slice_a, slice_b).
+  auto* matmul_op = new TensorFlowMatMulOperator;
+  matmul_op->inputs = {slice_a_reshape_op->outputs[0],
+                       slice_b_reshape_op->outputs[0]};
+  matmul_op->outputs = {AvailableArrayName(*model, batch_name)};
+  auto& matmul_op_output = model->GetOrCreateArray(matmul_op->outputs[0]);
+  matmul_op_output.data_type = input_array_a.data_type;
+  *tail_it = model->operators.emplace(*tail_it, matmul_op) + 1;
+
+  // Add to stack.
+  pack_inputs->push_back(matmul_op->outputs[0]);
+}
+
+std::vector<string> UnrollBatchMatMulRecursion(
+    const string& input_lhs, const string& input_rhs,
+    const BatchMatMulOperator* batch_op, Model* model,
+    std::vector<std::unique_ptr<Operator>>::iterator* tail_it,
+    const std::vector<int>& batch_prefix) {
+  const auto& input_array_a = model->GetArray(input_lhs);
+  const auto& dims_vec = input_array_a.shape().dims();
+  const int current_dim_size = dims_vec[batch_prefix.size()];
+  std::vector<string> batch_pack_inputs;
+
+  if (batch_prefix.size() + 3 == dims_vec.size()) {
+    // Base case
+    for (int batch = 0; batch < current_dim_size; ++batch) {
+      std::vector<int> new_batch_prefix = batch_prefix;
+      new_batch_prefix.emplace_back(batch);
+      UnrollBatchMatMul3D(input_lhs, input_rhs, batch_op, new_batch_prefix,
+                          model, tail_it, &batch_pack_inputs);
+    }
+  } else {
+    // Recursion
+    for (int batch = 0; batch < current_dim_size; ++batch) {
+      std::vector<int> new_batch_prefix = batch_prefix;
+      new_batch_prefix.emplace_back(batch);
+      std::vector<string> pack_inputs = UnrollBatchMatMulRecursion(
+          input_lhs, input_rhs, batch_op, model, tail_it, new_batch_prefix);
+
+      // The pack that will join all the individual matmul results together.
+      auto* pack_op = new PackOperator;
+      std::string batch_name = absl::StrCat(
+          batch_op->outputs[0], "_b", absl::StrJoin(new_batch_prefix, "-"));
+      pack_op->inputs = pack_inputs;
+      pack_op->outputs = {AvailableArrayName(*model, batch_name + "/pack")};
+      auto& pack_op_output = model->GetOrCreateArray(pack_op->outputs[0]);
+      pack_op_output.data_type = input_array_a.data_type;
+      pack_op->axis = 0;
+      pack_op->values_count = pack_inputs.size();
+      *tail_it = model->operators.emplace(*tail_it, pack_op) + 1;
+
+      batch_pack_inputs.push_back(pack_op->outputs[0]);
+    }
+  }
+  return batch_pack_inputs;
+}
+
+std::vector<int32> GetTransposePerm(const Array& input_array) {
+  const int32 dims = input_array.shape().dimensions_count();
+  std::vector<int32> perm_array_val(dims);
+  for (int i = 0; i < dims; ++i) {
+    perm_array_val[i] = i;
+  }
+  perm_array_val[dims - 2] = dims - 1;
+  perm_array_val[dims - 1] = dims - 2;
+  return perm_array_val;
+}
+
+std::vector<int32> GetTransposeShape(const Shape& input_shape,
+                                     const std::vector<int32>& perm_array_val) {
+  const int32 dims = input_shape.dimensions_count();
+  std::vector<int32> output_shape(dims);
+  for (int i = 0; i < dims; ++i) {
+    output_shape[i] = input_shape.dims(perm_array_val[i]);
+  }
+  return output_shape;
+}
+
+TransposeOperator* TransposeInput(const string& input, Model* model) {
+  const auto& input_array = model->GetArray(input);
+  const auto perm_array = GetTransposePerm(input_array);
+  const string perm_array_name = CreateInt32Array(
+      model, AvailableArrayName(*model, input + "/transpose/perm"), perm_array);
+  auto* transpose_op = new TransposeOperator;
+  transpose_op->inputs = {input, perm_array_name};
+  transpose_op->outputs = {AvailableArrayName(*model, input + "/transpose")};
+  auto& transpose_array = model->GetOrCreateArray(transpose_op->outputs[0]);
+  *transpose_array.mutable_shape()->mutable_dims() =
+      GetTransposeShape(input_array.shape(), perm_array);
+  model->GetOrCreateArray(transpose_op->outputs[0]);
+  return transpose_op;
+}
+
+}  // namespace
+
 // Unrolls a BatchMatMul on the batch dimension.
 // We need to slice each batch out of the inputs, matmul them individually, then
 // stack them all back together at the end.
@@ -46,115 +221,67 @@
   const auto* batch_op =
       static_cast<const BatchMatMulOperator*>(batch_op_it->get());
 
-  // We must have the shape of at least one input to know our batch size.
-  const auto& input_array_a = model->GetArray(batch_op->inputs[0]);
-  const auto& input_array_b = model->GetArray(batch_op->inputs[1]);
-  if (!input_array_a.has_shape() || !input_array_b.has_shape())
+  auto& tail_it = batch_op_it;
+
+  string input_lhs = batch_op->inputs[0];
+  string input_rhs = batch_op->inputs[1];
+  const auto& input_lhs_array = model->GetArray(input_lhs);
+  const auto& input_rhs_array = model->GetArray(input_rhs);
+  if (!input_lhs_array.has_shape() || !input_rhs_array.has_shape())
     return ::tensorflow::Status::OK();
 
-  // We only support the rank 3 case. If you are batching on rank > 3 you'll
-  // have to figure that out.
-  CHECK_EQ(input_array_a.shape().dimensions_count(),
-           input_array_b.shape().dimensions_count())
-      << "Input dimensions must have the same rank";
-  if (input_array_a.shape().dimensions_count() == 2) {
+  // Transpose LHS input if necessary.
+  if (batch_op->adj_x) {
+    TransposeOperator* transpose_op = TransposeInput(input_lhs, model);
+    tail_it = model->operators.emplace(tail_it, transpose_op) + 1;
+    input_lhs = transpose_op->outputs[0];
+  }
+  const auto& input_array_a = model->GetArray(input_lhs);
+
+  // Transpose RHS input if necessary.
+  if (batch_op->adj_y) {
+    TransposeOperator* transpose_op = TransposeInput(input_rhs, model);
+    tail_it = model->operators.emplace(tail_it, transpose_op) + 1;
+    input_rhs = transpose_op->outputs[0];
+  }
+  const auto& input_array_b = model->GetArray(input_rhs);
+
+  const int dims = input_array_a.shape().dimensions_count();
+  for (int i = 0; i < dims - 2; ++i) {
+    CHECK_EQ(input_array_a.shape().dims(i), input_array_b.shape().dims(i))
+        << "input array not consistent at index " << i;
+  }
+  CHECK_EQ(input_array_a.shape().dims(dims - 1),
+           input_array_b.shape().dims(dims - 2))
+      << "Input dimensions must be compatible for multipication. shape a = ["
+      << absl::StrJoin(input_array_a.shape().dims(), ", ") << "], shape b = ["
+      << absl::StrJoin(input_array_b.shape().dims(), ", ") << "]";
+
+  if (dims == 2) {
     // This is really just a MatMul. This likely means that someone hand-crafted
     // a graphdef with a BatchMatMul when they really wanted a MatMul.
     AddMessageF("Replacing non-batch BatchMatMul %s by a MatMul operator",
                 LogName(*batch_op));
     auto* matmul_op = new TensorFlowMatMulOperator;
-    matmul_op->inputs = batch_op->inputs;
+    matmul_op->inputs = {input_lhs, input_rhs};
     matmul_op->outputs = batch_op->outputs;
-    const auto matmul_op_it = model->operators.emplace(batch_op_it, matmul_op);
-    batch_op_it = matmul_op_it + 1;
-    CHECK_EQ(batch_op_it->get(), batch_op);
-    model->operators.erase(batch_op_it);
+    tail_it = model->operators.emplace(tail_it, matmul_op) + 1;
+    CHECK_EQ(tail_it->get(), batch_op);
+    model->operators.erase(tail_it);
     *modified = true;
     return ::tensorflow::Status::OK();
   }
-  CHECK_EQ(input_array_a.shape().dimensions_count(), 3)
-      << "Input arrays must have rank 3";
 
-  // Perform the matmul for each slice of the batch.
-  int batch_count = input_array_a.shape().dims(0);
+  CHECK_GE(input_array_a.shape().dimensions_count(), 3)
+      << "Input arrays must have rank >= 3";
+
+  const auto& dims_vec = input_array_a.shape().dims();
   AddMessageF("Unrolling BatchMatMul %s %d times", LogName(*batch_op),
-              batch_count);
-  auto tail_it = batch_op_it;
-  std::vector<string> pack_inputs;
-  for (int batch = 0; batch < batch_count; ++batch) {
-    std::string batch_name =
-        std::string(batch_op->outputs[0]) + "_b" + std::to_string(batch);
+              std::accumulate(dims_vec.begin(), dims_vec.end() - 2, 1,
+                              std::multiplies<int>()));
 
-    // tf.slice(a, ...).
-    auto* slice_a_op = new SliceOperator;
-    slice_a_op->inputs = {
-        batch_op->inputs[0],
-        CreateInt32Array(model, batch_name + "/slice_a/slice/begin",
-                         {batch, 0, 0}),
-        CreateInt32Array(
-            model, batch_name + "/slice_a/slice/size",
-            {1, input_array_a.shape().dims(1), input_array_a.shape().dims(2)}),
-    };
-    slice_a_op->outputs = {AvailableArrayName(*model, batch_name + "/slice_a")};
-    auto& slice_a_op_output = model->GetOrCreateArray(slice_a_op->outputs[0]);
-    slice_a_op_output.data_type = input_array_a.data_type;
-    tail_it = model->operators.emplace(tail_it, slice_a_op) + 1;
-
-    // Reshape to remove the first dimension ([1,M,N] -> [M,N]).
-    auto* slice_a_reshape_op = new TensorFlowReshapeOperator;
-    slice_a_reshape_op->inputs = {
-        slice_a_op->outputs[0],
-        CreateInt32Array(model, batch_name + "/slice_a/reshape/shape",
-                         {-1, input_array_a.shape().dims(2)})};
-    slice_a_reshape_op->outputs = {
-        AvailableArrayName(*model, batch_name + "/slice_a/reshape")};
-    auto& slice_a_reshape_op_output =
-        model->GetOrCreateArray(slice_a_reshape_op->outputs[0]);
-    slice_a_reshape_op_output.data_type = input_array_a.data_type;
-    tail_it = model->operators.emplace(tail_it, slice_a_reshape_op) + 1;
-
-    // tf.slice(b, ...).
-    auto* slice_b_op = new SliceOperator;
-    slice_b_op->inputs = {
-        batch_op->inputs[1],
-        CreateInt32Array(model, batch_name + "/slice_b/slice/begin",
-                         {batch, 0, 0}),
-        CreateInt32Array(
-            model, batch_name + "/slice_b/slice/size",
-            {1, input_array_b.shape().dims(1), input_array_b.shape().dims(2)}),
-    };
-    slice_b_op->outputs = {AvailableArrayName(*model, batch_name + "/slice_b")};
-    auto& slice_b_op_output = model->GetOrCreateArray(slice_b_op->outputs[0]);
-    slice_b_op_output.data_type = input_array_b.data_type;
-    tail_it = model->operators.emplace(tail_it, slice_b_op) + 1;
-
-    // Reshape to remove the first dimension ([1,M,N] -> [M,N]).
-    auto* slice_b_reshape_op = new TensorFlowReshapeOperator;
-    slice_b_reshape_op->inputs = {
-        slice_b_op->outputs[0],
-        CreateInt32Array(model, batch_name + "/slice_b/reshape/shape",
-                         {-1, input_array_b.shape().dims(2)})};
-    slice_b_reshape_op->outputs = {
-        AvailableArrayName(*model, batch_name + "/slice_b/reshape")};
-    auto& slice_b_reshape_op_output =
-        model->GetOrCreateArray(slice_b_reshape_op->outputs[0]);
-    slice_b_reshape_op_output.data_type = input_array_b.data_type;
-    tail_it = model->operators.emplace(tail_it, slice_b_reshape_op) + 1;
-
-    // tf.matmul(slice_a, slice_b).
-    auto* matmul_op = new TensorFlowMatMulOperator;
-    matmul_op->inputs = {slice_a_reshape_op->outputs[0],
-                         slice_b_reshape_op->outputs[0]};
-    matmul_op->outputs = {AvailableArrayName(*model, batch_name)};
-    auto& matmul_op_output = model->GetOrCreateArray(matmul_op->outputs[0]);
-    matmul_op_output.data_type = input_array_a.data_type;
-    tail_it = model->operators.emplace(tail_it, matmul_op) + 1;
-
-    // Add to stack.
-    pack_inputs.push_back(matmul_op->outputs[0]);
-  }
-
-  // The pack that will join all the individual matmul results together.
+  std::vector<string> pack_inputs = UnrollBatchMatMulRecursion(
+      input_lhs, input_rhs, batch_op, model, &tail_it, {});
   auto* pack_op = new PackOperator;
   pack_op->inputs = pack_inputs;
   pack_op->outputs = {batch_op->outputs[0]};

diff --git a/tensorflow/lite/toco/import_tensorflow.cc b/tensorflow/lite/toco/import_tensorflow.cc
index 813e439..927fa2a 100644
--- a/tensorflow/lite/toco/import_tensorflow.cc
+++ b/tensorflow/lite/toco/import_tensorflow.cc

@@ -1092,11 +1092,14 @@
     Model* model) {
   TF_QCHECK_OK(CheckInputsCount(node, tf_import_flags, 2));
 
-  // https://www.tensorflow.org/versions/r0.12/api_docs/python/math_ops/matrix_math_functions
-  CHECK(!HasAttr(node, "adj_a") || (GetBoolAttr(node, "adj_a") == false));
-  CHECK(!HasAttr(node, "adj_b") || (GetBoolAttr(node, "adj_b") == false));
-
   auto* batch_matmul = new BatchMatMulOperator;
+  // https://www.tensorflow.org/versions/r0.12/api_docs/python/math_ops/matrix_math_functions
+  if (HasAttr(node, "adj_x")) {
+    batch_matmul->adj_x = GetBoolAttr(node, "adj_x");
+  }
+  if (HasAttr(node, "adj_y")) {
+    batch_matmul->adj_y = GetBoolAttr(node, "adj_y");
+  }
   batch_matmul->inputs = {node.input(0), node.input(1)};
   batch_matmul->outputs = {node.name()};
 
@@ -1346,7 +1349,7 @@
   }
 
   // Parse outputs. Name them after the node's name, plus an ordinal suffix.
-  // Note that some outputs are to be multipled by a named attribute.
+  // Note that some outputs are to be multiplied by a named attribute.
   const tensorflow::OpDef* op_def = nullptr;
   if (tensorflow::OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) {
     GetOutputNamesFromNodeDef(node, *op_def, op);
@@ -1480,7 +1483,7 @@
   if (node.attr().count("shape")) {
     const auto& shape = GetShapeAttr(node, "shape");
     auto num_dims = shape.dim_size();
-    // TODO(b/62716978): This logic needs to be revisted.  During dims
+    // TODO(b/62716978): This logic needs to be revisited.  During dims
     // refactoring it is an interim fix.
     if (num_dims > 0 && !HasWildcardDimension(shape)) {
       auto& dst_array_dims = *array.mutable_shape()->mutable_dims();
@@ -1572,6 +1575,21 @@
   return tensorflow::Status::OK();
 }
 
+tensorflow::Status ConvertGatherNdOperator(
+    const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
+    Model* model) {
+  CHECK_EQ(node.op(), "GatherNd");
+  TF_QCHECK_OK(CheckInputsCount(node, tf_import_flags, 2));
+  const auto indices_data_type = GetDataTypeAttr(node, "Tindices");
+  CHECK(indices_data_type == DT_INT32 || indices_data_type == DT_INT64);
+  auto* op = new GatherNdOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+  return tensorflow::Status::OK();
+}
+
 template <typename Op>
 tensorflow::Status ConvertArgMinMaxOperator(
     const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
@@ -2396,6 +2414,7 @@
       {"Const", ConvertConstOperator},
       {"Conv2D", ConvertConvOperator},
       {"Conv2DBackpropInput", ConvertTransposeConvOperator},
+      {"Cos", ConvertSimpleOperator<CosOperator, 1, 1>},
       {"CTCBeamSearchDecoder", ConvertCTCBeamSearchDecoderOperator},
       {"DepthToSpace", ConvertDepthToSpaceOperator},
       {"DepthwiseConv2dNative", ConvertDepthwiseConvOperator},
@@ -2414,6 +2433,7 @@
       {"FusedBatchNorm", ConvertFusedBatchNormOperator},
       {"Gather", ConvertGatherOperator},
       {"GatherV2", ConvertGatherOperator},
+      {"GatherNd", ConvertGatherNdOperator},
       {"Greater", ConvertSimpleOperator<TensorFlowGreaterOperator, 2, 1>},
       {"GreaterEqual",
        ConvertSimpleOperator<TensorFlowGreaterEqualOperator, 2, 1>},
@@ -2452,7 +2472,7 @@
       {"Prod", ConvertReduceOperator<TensorFlowProdOperator>},
       {"RandomUniform", ConvertRandomUniform},
       {"Range", ConvertRangeOperator},
-      {"Rank", ConvertSimpleOperator<RankOperator, 1, 1>},
+      {"Rank", ConvertSimpleOperator<TensorFlowRankOperator, 1, 1>},
       {"RealDiv", ConvertSimpleOperator<DivOperator, 2, 1>},
       {"Relu", ConvertSimpleOperator<ReluOperator, 1, 1>},
       {"Relu6", ConvertSimpleOperator<Relu6Operator, 1, 1>},
@@ -2494,6 +2514,7 @@
       {"UnidirectionalSequenceRnn", ConvertUnidirectionalSequenceRnn},
       {"MirrorPad", ConvertMirrorPadOperator},
       {"Unique", ConvertSimpleOperator<UniqueOperator, 1, 2>},
+      {"Where", ConvertSimpleOperator<WhereOperator, 1, 1>},
   });
 }
 

diff --git a/tensorflow/lite/toco/model.h b/tensorflow/lite/toco/model.h
index b99cb74..e38f50e 100644
--- a/tensorflow/lite/toco/model.h
+++ b/tensorflow/lite/toco/model.h

@@ -24,11 +24,11 @@
 #include <vector>
 
 #include "absl/types/optional.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/lite/toco/model_flags.pb.h"
 #include "tensorflow/lite/toco/runtime/types.h"
 #include "tensorflow/lite/toco/toco_port.h"
 #include "tensorflow/lite/toco/toco_types.h"
-#include "tensorflow/core/platform/logging.h"
 
 namespace toco {
 
@@ -45,6 +45,7 @@
   kCeil,
   kConv,
   kConcatenation,
+  kCos,
   kDepthwiseConv,
   kDepthToSpace,
   kSpaceToDepth,
@@ -163,7 +164,9 @@
   kUnidirectionalSequenceRnn,
   kBidirectionalSequenceLstm,
   kReverseV2,
-  kBidirectionalSequenceRnn
+  kBidirectionalSequenceRnn,
+  kGatherNd,
+  kWhere
 };
 
 // Helper to deal with TensorFlow arrays using a different ordering of
@@ -964,6 +967,8 @@
 // TensorFlow equivalent: MatMul
 struct BatchMatMulOperator : Operator {
   BatchMatMulOperator() : Operator(OperatorType::kBatchMatMul) {}
+  bool adj_x = false;
+  bool adj_y = false;
 };
 
 // General matrix multiplication operator. We don't want to support general
@@ -1166,6 +1171,17 @@
   ExpOperator() : Operator(OperatorType::kExp) {}
 };
 
+// Given a tensor input, this operation calculates element-wise exponential
+// (y = cos(x)).
+//
+// Inputs:
+//   inputs[0]: required: input tensor
+//
+// TensorFlow equivalent: Cos
+struct CosOperator : Operator {
+  CosOperator() : Operator(OperatorType::kCos) {}
+};
+
 // Given a tensor input, this operation inserts a dimension of 1 at the
 // dimension index axis of input's shape. The dimension index axis starts at
 // zero; if you specify a negative number for axis it is counted backward from
@@ -1244,13 +1260,12 @@
 // Inputs:
 //   inputs[0]: required: the input array
 //
-// This operation outputs a 0-D integer tensor representing the rank of
-// the input.
+// This operation outputs a 0-D int32 Tensor representing the rank of input.
 //
-// TensorFlow equivalent: Rank.  We currently assume that the output is int32
-// and not int64.  The output type could be stored herein.
-struct RankOperator : Operator {
-  RankOperator() : Operator(OperatorType::kRank) {}
+// TensorFlow equivalent: Rank.
+struct TensorFlowRankOperator : Operator {
+  TensorFlowRankOperator() : Operator(OperatorType::kRank) {}
+  ArrayDataType output_data_type = ArrayDataType::kInt32;
 };
 
 // Element-wise negation (-x) operator.
@@ -1707,6 +1722,17 @@
   int input_rank = 0;
 };
 
+// GatherNd operator. It gathers slices from params according to indices.
+//
+// Inputs:
+//   inputs[0]: required: the params array
+//   inputs[1]: required: the indices to gather
+//
+// TensorFlow equivalent: GatherNd
+struct GatherNdOperator : Operator {
+  GatherNdOperator() : Operator(OperatorType::kGatherNd) {}
+};
+
 // ArgMax operator. It returns the index of the maximum value along axis.
 //
 // Inputs:
@@ -2011,6 +2037,18 @@
   FusedActivationFunctionType fused_activation_function;
 };
 
+// Where Operator:
+// Return the coordinates of the true values in condition tensor in row-major
+// order.
+//
+// Inputs:
+//  inputs[0]: required: boolean condition tensor
+//
+//  TensorFlow equivalent: Where
+struct WhereOperator : Operator {
+  WhereOperator() : Operator(OperatorType::kWhere) {}
+};
+
 // Alloc's are used for transient arrays only. An Alloc specifies which interval
 // of the "transient_data" workspace buffer passed to inference functions, is to
 // be used for the transient array at hand. The 'start' and 'end' values are

diff --git a/tensorflow/lite/toco/python/BUILD b/tensorflow/lite/toco/python/BUILD
index 6da4833..2f5654c 100644
--- a/tensorflow/lite/toco/python/BUILD
+++ b/tensorflow/lite/toco/python/BUILD

@@ -25,6 +25,7 @@
     deps = [
         "//third_party/python_runtime:headers",
         "//tensorflow/core:lib",
+        "//tensorflow/lite/python/interpreter_wrapper:python_utils",
         "//tensorflow/lite/toco:model_flags_proto_cc",
         "//tensorflow/lite/toco:toco_flags_proto_cc",
         "//tensorflow/lite/toco:toco_graphviz_dump_options",

diff --git a/tensorflow/lite/toco/python/toco_python_api.cc b/tensorflow/lite/toco/python/toco_python_api.cc
index ce8e3c9..6fad092 100644
--- a/tensorflow/lite/toco/python/toco_python_api.cc
+++ b/tensorflow/lite/toco/python/toco_python_api.cc

@@ -16,6 +16,7 @@
 #include <vector>
 #include "tensorflow/core/platform/logging.h"
 
+#include "tensorflow/lite/python/interpreter_wrapper/python_utils.h"
 #include "tensorflow/lite/toco/model_flags.pb.h"
 #include "tensorflow/lite/toco/python/toco_python_api.h"
 #include "tensorflow/lite/toco/toco_flags.pb.h"
@@ -26,14 +27,6 @@
 
 namespace toco {
 
-#if PY_MAJOR_VERSION >= 3
-#define TOCO_PY_TO_CPPSTRING PyBytes_AsStringAndSize
-#define TOCO_FROM_CPPSTRING_TO_PY PyBytes_FromStringAndSize
-#else
-#define TOCO_PY_TO_CPPSTRING PyString_AsStringAndSize
-#define TOCO_FROM_CPPSTRING_TO_PY PyString_FromStringAndSize
-#endif
-
 // NOTE(aselle): We are using raw PyObject's here because we want to make
 // sure we input and output bytes rather than unicode strings for Python3.
 PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw,
@@ -44,7 +37,7 @@
   auto ConvertArg = [&](PyObject* obj, bool* error) {
     char* buf;
     Py_ssize_t len;
-    if (TOCO_PY_TO_CPPSTRING(obj, &buf, &len) == -1) {
+    if (::tflite::python_utils::ConvertFromPyString(obj, &buf, &len) == -1) {
       *error = true;
       return std::string();
     } else {
@@ -96,15 +89,15 @@
     PyObject* dict = PyDict_New();
     PyDict_SetItemString(
         dict, "flatbuffer",
-        TOCO_FROM_CPPSTRING_TO_PY(output_file_contents_txt.data(),
-                                  output_file_contents_txt.size()));
+        ::tflite::python_utils::ConvertToPyString(
+            output_file_contents_txt.data(), output_file_contents_txt.size()));
     PyDict_SetItemString(dict, "arithmetic_ops",
                          PyLong_FromLong(model->ArithmeticOpsCount()));
     return dict;
   }
   // Convert arguments back to byte (py3) or str (py2)
-  return TOCO_FROM_CPPSTRING_TO_PY(output_file_contents_txt.data(),
-                                   output_file_contents_txt.size());
+  return ::tflite::python_utils::ConvertToPyString(
+      output_file_contents_txt.data(), output_file_contents_txt.size());
 }
 
 }  // namespace toco

diff --git a/tensorflow/lite/toco/tflite/export.cc b/tensorflow/lite/toco/tflite/export.cc
index 53f5ece..1ce37d2 100644
--- a/tensorflow/lite/toco/tflite/export.cc
+++ b/tensorflow/lite/toco/tflite/export.cc

@@ -63,12 +63,12 @@
   return false;
 }
 
-// Check if a TensorFlow Op is unsupportred by the Flex runtime.
+// Check if a TensorFlow Op is unsupported by the Flex runtime.
 bool IsUnsupportedFlexOp(const string& tensorflow_op) {
   if (IsControlFlowOp(tensorflow_op)) {
     return true;
   }
-  // `HashTableV2` isn't supported for now since it requires an additinonal
+  // `HashTableV2` isn't supported for now since it requires an additional
   // initialization step.
   // TODO(b/117651199): Support `HashTableV2` with Flex runtime.
   if (tensorflow_op == "HashTableV2") {
@@ -157,7 +157,7 @@
         string(::tflite::kFlexCustomCodePrefix) + flex_tensorflow_op_;
   } else {
     // If Flex is disabled or the original TensorFlow NodeDef isn't available,
-    // we produce a custom op. This gives developers a chance to implemenr
+    // we produce a custom op. This gives developers a chance to implement
     // custom ops.
     custom_code_ = name;
   }
@@ -222,7 +222,7 @@
 
     std::vector<int> shape;
     if (array.has_shape()) {
-      for (int d : array.shape().dims()) {
+      for (const auto& d : array.shape().dims()) {
         shape.push_back(d);
       }
     }

diff --git a/tensorflow/lite/toco/tflite/export_test.cc b/tensorflow/lite/toco/tflite/export_test.cc
index 58cfb49..fb640f7 100644
--- a/tensorflow/lite/toco/tflite/export_test.cc
+++ b/tensorflow/lite/toco/tflite/export_test.cc

@@ -51,9 +51,27 @@
         output_array.data_type = ArrayDataType::kFloat;
         input_model_.operators.emplace_back(op);
       } else if (name == "Add") {
-        input_model_.operators.emplace_back(new AddOperator);
+        auto* op = new AddOperator;
+        op->inputs = {"input1", "input2"};
+        op->outputs = {"output"};
+        Array& input1_array = input_model_.GetOrCreateArray(op->inputs[0]);
+        Array& input2_array = input_model_.GetOrCreateArray(op->inputs[1]);
+        Array& output_array = input_model_.GetOrCreateArray(op->outputs[0]);
+        input1_array.data_type = ArrayDataType::kFloat;
+        input2_array.data_type = ArrayDataType::kFloat;
+        output_array.data_type = ArrayDataType::kFloat;
+        input_model_.operators.emplace_back(op);
       } else if (name == "Sub") {
-        input_model_.operators.emplace_back(new SubOperator);
+        auto* op = new SubOperator;
+        op->inputs = {"input1", "input2"};
+        op->outputs = {"output"};
+        Array& input1_array = input_model_.GetOrCreateArray(op->inputs[0]);
+        Array& input2_array = input_model_.GetOrCreateArray(op->inputs[1]);
+        Array& output_array = input_model_.GetOrCreateArray(op->outputs[0]);
+        input1_array.data_type = ArrayDataType::kFloat;
+        input2_array.data_type = ArrayDataType::kFloat;
+        output_array.data_type = ArrayDataType::kFloat;
+        input_model_.operators.emplace_back(op);
       } else if (name == "Assert") {
         auto* op = new TensorFlowAssertOperator;
 
@@ -114,7 +132,18 @@
       output_array.data_type = ArrayDataType::kFloat;
       input_model_.operators.emplace_back(op);
     }
-    input_model_.operators.emplace_back(new AddOperator);
+    {
+      auto* op = new AddOperator;
+      op->inputs = {"input1", "input2"};
+      op->outputs = {"output"};
+      Array& input1_array = input_model_.GetOrCreateArray(op->inputs[0]);
+      Array& input2_array = input_model_.GetOrCreateArray(op->inputs[1]);
+      Array& output_array = input_model_.GetOrCreateArray(op->outputs[0]);
+      input1_array.data_type = ArrayDataType::kFloat;
+      input2_array.data_type = ArrayDataType::kFloat;
+      output_array.data_type = ArrayDataType::kFloat;
+      input_model_.operators.emplace_back(op);
+    }
   }
 
   std::vector<string> ExportAndSummarizeOperators(const ExportParams& params) {
@@ -445,7 +474,7 @@
   auto* model = ::tflite::GetModel(result.data());
   auto operator_codes = model->operator_codes();
 
-  // Verify that 2 operator codes are populdated. Both are CONV_2D but with
+  // Verify that 2 operator codes are populated. Both are CONV_2D but with
   // different versions.
   EXPECT_EQ(2, operator_codes->size());
   EXPECT_EQ(::tflite::BuiltinOperator_CONV_2D,

diff --git a/tensorflow/lite/toco/tflite/import_test.cc b/tensorflow/lite/toco/tflite/import_test.cc
index 93ab514..b00c412 100644
--- a/tensorflow/lite/toco/tflite/import_test.cc
+++ b/tensorflow/lite/toco/tflite/import_test.cc

@@ -60,7 +60,7 @@
                                builder_.CreateString("tensor_one"), q);
     auto t2 =
         ::tflite::CreateTensor(builder_, builder_.CreateVector<int>({2, 1}),
-                               ::tflite::TensorType_FLOAT32, 2,
+                               ::tflite::TensorType_FLOAT32, 0,
                                builder_.CreateString("tensor_two"), q);
     return builder_.CreateVector(
         std::vector<Offset<::tflite::Tensor>>({t1, t2}));

diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc
index 4ce3aa9..93f7b99 100644
--- a/tensorflow/lite/toco/tflite/operator.cc
+++ b/tensorflow/lite/toco/tflite/operator.cc

@@ -19,6 +19,7 @@
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/util/ptr_util.h"
+
 // TODO(ycling): Consider refactoring to extract the LSTM definition out of
 // graph_transformation module.
 #include "tensorflow/lite/schema/schema_generated.h"
@@ -199,6 +200,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -239,6 +246,12 @@
                    TocoOperator* op) const override {}
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -263,6 +276,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -308,6 +327,12 @@
                    TocoOperator* op) const override {}
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -446,18 +471,29 @@
     }
   }
 
+  // +-----------------+--------------------+--------------------------+
+  // |                 |    Weight::Default | Weight::Shuffled4x16Int8 |
+  // +-----------------+--------------------+--------------------------+
+  // | Float           |                  1 |                        2 |
+  // | Quantized Uint8 |                  1 |                        2 |
+  // | Hybrid          |                  3 |                        3 |
+  // | Quantized Int8  |                  4 |                        4 |
+  // +-----------------+--------------------+--------------------------+
   int GetVersion(const OperatorSignature& op_signature) const override {
     const auto& fc_op =
         static_cast<const FullyConnectedOperator&>(*op_signature.op);
-    if (fc_op.weights_format == FullyConnectedWeightsFormat::kDefault) {
-      return 1;
-    }
     const string& input_name = op_signature.op->inputs[0];
     const string& weights_name = op_signature.op->inputs[1];
     const string& output_name = op_signature.op->outputs[0];
     const Array& input_array = op_signature.model->GetArray(input_name);
     const Array& weights_array = op_signature.model->GetArray(weights_name);
     const Array& output_array = op_signature.model->GetArray(output_name);
+    // Int8 fully fixed point kernel is at version 4.
+    if (input_array.data_type == ArrayDataType::kInt8 &&
+        weights_array.data_type == ArrayDataType::kInt8 &&
+        output_array.data_type == ArrayDataType::kInt8) {
+      return 4;
+    }
     // If the op is a signed int8 hybrid operation, we need to return
     // version 3.
     if (input_array.data_type == ArrayDataType::kFloat &&
@@ -465,7 +501,15 @@
         output_array.data_type == ArrayDataType::kFloat) {
       return 3;
     }
-    return 2;
+    // For float and uint8 fixed point kernels, if the weight is
+    // Shuffled4x16Int8, is is version 2.
+    if (fc_op.weights_format ==
+        FullyConnectedWeightsFormat::kShuffled4x16Int8) {
+      return 2;
+    }
+
+    // Otherwise (weight is default), the version is 1.
+    return 1;
   }
 };
 
@@ -496,6 +540,26 @@
   }
 };
 
+class GatherNd
+    : public BuiltinOperator<GatherNdOperator, ::tflite::GatherNdOptions,
+                             ::tflite::BuiltinOptions_GatherNdOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    return ::tflite::CreateGatherNdOptions(*builder);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {}
+
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
+};
+
 class Svdf : public BuiltinOperator<SvdfOperator, ::tflite::SVDFOptions,
                                     ::tflite::BuiltinOptions_SVDFOptions> {
  public:
@@ -643,6 +707,39 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
+class Maximum : public SimpleOperator<TensorFlowMaximumOperator> {
+ public:
+  explicit Maximum() : SimpleOperator("MAXIMUM", OperatorType::kMaximum) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
+class Minimum : public SimpleOperator<TensorFlowMinimumOperator> {
+ public:
+  explicit Minimum() : SimpleOperator("MINIMUM", OperatorType::kMinimum) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -667,6 +764,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -686,6 +789,12 @@
                    TocoOperator* op) const override {}
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -723,6 +832,12 @@
                    TocoOperator* op) const override {}
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -796,6 +911,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -815,6 +936,12 @@
                    TocoOperator* op) const override {}
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -1104,6 +1231,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -1125,6 +1258,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -1171,6 +1310,20 @@
   }
 };
 
+class Relu6 : public SimpleOperator<Relu6Operator> {
+ public:
+  explicit Relu6() : SimpleOperator("RELU6", OperatorType::kRelu6) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
 class ResizeBilinear
     : public BuiltinOperator<ResizeBilinearOperator,
                              ::tflite::ResizeBilinearOptions,
@@ -1189,6 +1342,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op takes int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -1212,6 +1371,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -1259,6 +1424,14 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2, for int32 it's version 3.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    } else if (input_array.data_type == ArrayDataType::kInt32) {
+      return 3;
+    }
     return 1;
   }
 };
@@ -1309,6 +1482,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -1327,6 +1506,11 @@
                    TocoOperator* op) const override {}
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -1348,6 +1532,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+
     return 1;
   }
 };
@@ -1369,6 +1559,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+
     return 1;
   }
 };
@@ -1461,6 +1657,12 @@
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -1487,6 +1689,34 @@
   }
 };
 
+class Slice : public SimpleOperator<SliceOperator> {
+ public:
+  explicit Slice() : SimpleOperator("SLICE", OperatorType::kSlice) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
+class Tanh : public SimpleOperator<TanhOperator> {
+ public:
+  explicit Tanh() : SimpleOperator("TANH", OperatorType::kTanh) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
 class OneHot : public BuiltinOperator<OneHotOperator, ::tflite::OneHotOptions,
                                       ::tflite::BuiltinOptions_OneHotOptions> {
  public:
@@ -1569,6 +1799,35 @@
   }
 };
 
+class Logistic : public SimpleOperator<LogisticOperator> {
+ public:
+  explicit Logistic() : SimpleOperator("LOGISTIC", OperatorType::kLogistic) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
+class LogSoftmax : public SimpleOperator<LogSoftmaxOperator> {
+ public:
+  explicit LogSoftmax()
+      : SimpleOperator("LOG_SOFTMAX", OperatorType::kLogSoftmax) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
 class SquaredDifference
     : public BuiltinOperator<
           SquaredDifferenceOperator, ::tflite::SquaredDifferenceOptions,
@@ -1673,6 +1932,25 @@
   }
 };
 
+class Where : public BuiltinOperator<WhereOperator, ::tflite::WhereOptions,
+                                     ::tflite::BuiltinOptions_WhereOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    return ::tflite::CreateWhereOptions(*builder);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {}
+
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
+};
+
 std::unique_ptr<flexbuffers::Builder> WriteFlexOpOptions(
     const string& tensorflow_node_def) {
   auto fbb = absl::make_unique<flexbuffers::Builder>();
@@ -1913,6 +2191,106 @@
   }
 };
 
+class Equal : public SimpleOperator<TensorFlowEqualOperator> {
+ public:
+  explicit Equal() : SimpleOperator("EQUAL", OperatorType::kEqual) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
+class NotEqual : public SimpleOperator<TensorFlowNotEqualOperator> {
+ public:
+  explicit NotEqual() : SimpleOperator("NOT_EQUAL", OperatorType::kNotEqual) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
+class Greater : public SimpleOperator<TensorFlowGreaterOperator> {
+ public:
+  explicit Greater() : SimpleOperator("GREATER", OperatorType::kGreater) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
+class GreaterEqual : public SimpleOperator<TensorFlowGreaterEqualOperator> {
+ public:
+  explicit GreaterEqual()
+      : SimpleOperator("GREATER_EQUAL", OperatorType::kGreaterEqual) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
+class Less : public SimpleOperator<TensorFlowLessOperator> {
+ public:
+  explicit Less() : SimpleOperator("LESS", OperatorType::kLess) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
+class LessEqual : public SimpleOperator<TensorFlowLessEqualOperator> {
+ public:
+  explicit LessEqual()
+      : SimpleOperator("LESS_EQUAL", OperatorType::kLessEqual) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
+class Select : public SimpleOperator<SelectOperator> {
+ public:
+  explicit Select() : SimpleOperator("SELECT", OperatorType::kSelect) {}
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
+    return 1;
+  }
+};
+
 namespace {
 // Build a vector containing all the known operators.
 std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
@@ -1950,6 +2328,8 @@
                                  OperatorType::kFullyConnected));
   ops.push_back(MakeUnique<Gather>(::tflite::BuiltinOperator_GATHER,
                                    OperatorType::kGather));
+  ops.push_back(MakeUnique<GatherNd>(::tflite::BuiltinOperator_GATHER_ND,
+                                     OperatorType::kGatherNd));
   ops.push_back(
       MakeUnique<L2Normalization>(::tflite::BuiltinOperator_L2_NORMALIZATION,
                                   OperatorType::kL2Normalization));
@@ -2052,6 +2432,8 @@
   ops.push_back(MakeUnique<UnidirectionalSequenceRnn>(
       ::tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
       OperatorType::kUnidirectionalSequenceRnn));
+  ops.push_back(
+      MakeUnique<Where>(::tflite::BuiltinOperator_WHERE, OperatorType::kWhere));
 
   // Custom Operators.
   ops.push_back(
@@ -2075,40 +2457,28 @@
       MakeUnique<SimpleOperator<ReluOperator>>("RELU", OperatorType::kRelu));
   ops.push_back(MakeUnique<SimpleOperator<Relu1Operator>>(
       "RELU_N1_TO_1", OperatorType::kRelu1));
-  ops.push_back(
-      MakeUnique<SimpleOperator<Relu6Operator>>("RELU6", OperatorType::kRelu6));
+  ops.push_back(MakeUnique<Relu6>());
   ops.push_back(
       MakeUnique<SimpleOperator<PReluOperator>>("PRELU", OperatorType::kPRelu));
-  ops.push_back(MakeUnique<SimpleOperator<LogisticOperator>>(
-      "LOGISTIC", OperatorType::kLogistic));
-  ops.push_back(
-      MakeUnique<SimpleOperator<TanhOperator>>("TANH", OperatorType::kTanh));
+  ops.push_back(MakeUnique<Logistic>());
+  ops.push_back(MakeUnique<Tanh>());
   ops.push_back(
       MakeUnique<SimpleOperator<ExpOperator>>("EXP", OperatorType::kExp));
-  ops.push_back(MakeUnique<SimpleOperator<LogSoftmaxOperator>>(
-      "LOG_SOFTMAX", OperatorType::kLogSoftmax));
-  ops.push_back(MakeUnique<SimpleOperator<TensorFlowMaximumOperator>>(
-      "MAXIMUM", OperatorType::kMaximum));  //  Element-wise Maximum
-  ops.push_back(MakeUnique<SimpleOperator<TensorFlowMinimumOperator>>(
-      "MINIMUM", OperatorType::kMinimum));  //  Element-wise Minimum
-  ops.push_back(MakeUnique<SimpleOperator<TensorFlowGreaterOperator>>(
-      "GREATER", OperatorType::kGreater));
-  ops.push_back(MakeUnique<SimpleOperator<TensorFlowGreaterEqualOperator>>(
-      "GREATER_EQUAL", OperatorType::kGreaterEqual));
-  ops.push_back(MakeUnique<SimpleOperator<TensorFlowLessOperator>>(
-      "LESS", OperatorType::kLess));
-  ops.push_back(MakeUnique<SimpleOperator<TensorFlowLessEqualOperator>>(
-      "LESS_EQUAL", OperatorType::kLessEqual));
-  ops.push_back(MakeUnique<SimpleOperator<TensorFlowEqualOperator>>(
-      "EQUAL", OperatorType::kEqual));
-  ops.push_back(MakeUnique<SimpleOperator<TensorFlowNotEqualOperator>>(
-      "NOT_EQUAL", OperatorType::kNotEqual));
+  ops.push_back(
+      MakeUnique<SimpleOperator<CosOperator>>("COS", OperatorType::kCos));
+  ops.push_back(MakeUnique<LogSoftmax>());
+  ops.push_back(MakeUnique<Maximum>());  //  Element-wise Maximum
+  ops.push_back(MakeUnique<Minimum>());  //  Element-wise Minimum
+  ops.push_back(MakeUnique<Greater>());
+  ops.push_back(MakeUnique<GreaterEqual>());
+  ops.push_back(MakeUnique<Less>());
+  ops.push_back(MakeUnique<LessEqual>());
+  ops.push_back(MakeUnique<Equal>());
+  ops.push_back(MakeUnique<NotEqual>());
   ops.push_back(
       MakeUnique<SimpleOperator<NegOperator>>("NEG", OperatorType::kNeg));
-  ops.push_back(MakeUnique<SimpleOperator<SelectOperator>>(
-      "SELECT", OperatorType::kSelect));
-  ops.push_back(
-      MakeUnique<SimpleOperator<SliceOperator>>("SLICE", OperatorType::kSlice));
+  ops.push_back(MakeUnique<Select>());
+  ops.push_back(MakeUnique<Slice>());
   ops.push_back(
       MakeUnique<SimpleOperator<PowOperator>>("POW", OperatorType::kPow));
   ops.push_back(MakeUnique<SimpleOperator<LogicalOrOperator>>(
@@ -2142,6 +2512,8 @@
       MakeUnique<SimpleOperator<FillOperator>>("FILL", OperatorType::kFill));
   ops.push_back(MakeUnique<SimpleOperator<ReverseV2Operator>>(
       "REVERSE_V2", OperatorType::kReverseV2));
+  ops.push_back(MakeUnique<SimpleOperator<TensorFlowRankOperator>>(
+      "RANK", OperatorType::kRank));
   return ops;
 }
 }  // namespace

diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc
index 43b52c4..3e78d3a 100644
--- a/tensorflow/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/lite/toco/tflite/operator_test.cc

@@ -119,6 +119,7 @@
   CheckSimpleOperator<LogisticOperator>("LOGISTIC", OperatorType::kLogistic);
   CheckSimpleOperator<TanhOperator>("TANH", OperatorType::kTanh);
   CheckSimpleOperator<ExpOperator>("EXP", OperatorType::kExp);
+  CheckSimpleOperator<CosOperator>("COS", OperatorType::kCos);
   CheckSimpleOperator<LogSoftmaxOperator>("LOG_SOFTMAX",
                                           OperatorType::kLogSoftmax);
   CheckSimpleOperator<TensorFlowMaximumOperator>(
@@ -153,6 +154,7 @@
   CheckSimpleOperator<FillOperator>("FILL", OperatorType::kFill);
   CheckSimpleOperator<ReverseV2Operator>("REVERSE_V2",
                                          OperatorType::kReverseV2);
+  CheckSimpleOperator<TensorFlowRankOperator>("RANK", OperatorType::kRank);
 }
 
 TEST_F(OperatorTest, BuiltinAdd) {
@@ -239,6 +241,20 @@
   ASSERT_NE(nullptr, output_toco_op.get());
 }
 
+TEST_F(OperatorTest, BuiltinGatherNd) {
+  GatherNdOperator op;
+  auto output_toco_op = SerializeAndDeserialize(
+      GetOperator("GATHER_ND", OperatorType::kGatherNd), op);
+  ASSERT_NE(output_toco_op.get(), nullptr);
+}
+
+TEST_F(OperatorTest, BuiltinWhere) {
+  WhereOperator op;
+  auto output_toco_op =
+      SerializeAndDeserialize(GetOperator("WHERE", OperatorType::kWhere), op);
+  ASSERT_NE(output_toco_op.get(), nullptr);
+}
+
 TEST_F(OperatorTest, BuiltinL2Pool) {
   L2PoolOperator op;
   op.stride_width = 123;
@@ -287,6 +303,44 @@
   EXPECT_EQ(op.kheight, output_toco_op->kheight);
 }
 
+TEST_F(OperatorTest, VersioningMaxTest) {
+  TensorFlowMaximumOperator max_op;
+  max_op.inputs = {"input1"};
+  auto operator_by_type_map = BuildOperatorByTypeMap(false /*enable_flex_ops*/);
+  const BaseOperator* op = operator_by_type_map.at(max_op.type).get();
+
+  Model uint8_model;
+  Array& uint8_array = uint8_model.GetOrCreateArray(max_op.inputs[0]);
+  uint8_array.data_type = ArrayDataType::kUint8;
+  OperatorSignature uint8_signature = {.model = &uint8_model, .op = &max_op};
+  EXPECT_EQ(op->GetVersion(uint8_signature), 1);
+
+  Model int8_model;
+  Array& int8_array = int8_model.GetOrCreateArray(max_op.inputs[0]);
+  int8_array.data_type = ArrayDataType::kInt8;
+  OperatorSignature int8_signature = {.model = &int8_model, .op = &max_op};
+  EXPECT_EQ(op->GetVersion(int8_signature), 2);
+}
+
+TEST_F(OperatorTest, VersioningMinTest) {
+  TensorFlowMinimumOperator min_op;
+  min_op.inputs = {"input1"};
+  auto operator_by_type_map = BuildOperatorByTypeMap(false /*enable_flex_ops*/);
+  const BaseOperator* op = operator_by_type_map.at(min_op.type).get();
+
+  Model uint8_model;
+  Array& uint8_array = uint8_model.GetOrCreateArray(min_op.inputs[0]);
+  uint8_array.data_type = ArrayDataType::kUint8;
+  OperatorSignature uint8_signature = {.model = &uint8_model, .op = &min_op};
+  EXPECT_EQ(op->GetVersion(uint8_signature), 1);
+
+  Model int8_model;
+  Array& int8_array = int8_model.GetOrCreateArray(min_op.inputs[0]);
+  int8_array.data_type = ArrayDataType::kInt8;
+  OperatorSignature int8_signature = {.model = &int8_model, .op = &min_op};
+  EXPECT_EQ(op->GetVersion(int8_signature), 2);
+}
+
 TEST_F(OperatorTest, BuiltinReshape) {
   TensorFlowReshapeOperator op;
   op.shape = {1, 2, 4, 5, 8};
@@ -548,6 +602,26 @@
   EXPECT_EQ(op.alpha, output_toco_op->alpha);
 }
 
+TEST_F(OperatorTest, VersioningLogisticTest) {
+  LogisticOperator logistic_op;
+  logistic_op.inputs = {"input1"};
+  auto operator_by_type_map = BuildOperatorByTypeMap(false /*enable_flex_ops*/);
+  const BaseOperator* op = operator_by_type_map.at(logistic_op.type).get();
+
+  Model uint8_model;
+  Array& uint8_array = uint8_model.GetOrCreateArray(logistic_op.inputs[0]);
+  uint8_array.data_type = ArrayDataType::kUint8;
+  OperatorSignature uint8_signature = {.model = &uint8_model,
+                                       .op = &logistic_op};
+  EXPECT_EQ(op->GetVersion(uint8_signature), 1);
+
+  Model int8_model;
+  Array& int8_array = int8_model.GetOrCreateArray(logistic_op.inputs[0]);
+  int8_array.data_type = ArrayDataType::kInt8;
+  OperatorSignature int8_signature = {.model = &int8_model, .op = &logistic_op};
+  EXPECT_EQ(op->GetVersion(int8_signature), 2);
+}
+
 TEST_F(OperatorTest, BuiltinSquaredDifference) {
   SquaredDifferenceOperator op;
   auto output_toco_op = SerializeAndDeserialize(
@@ -677,6 +751,141 @@
   EXPECT_EQ(output_toco_op->idx_out_type, op.idx_out_type);
 }
 
+// Test version for a simple Op with 2 versions and the input type controls the
+// version.
+template <typename Op>
+void SimpleVersioningTest() {
+  Op op;
+  op.inputs = {"input1"};
+  auto operator_by_type_map = BuildOperatorByTypeMap(false /*enable_flex_ops*/);
+  const BaseOperator* base_op = operator_by_type_map.at(op.type).get();
+
+  Model uint8_model;
+  Array& uint8_array = uint8_model.GetOrCreateArray(op.inputs[0]);
+  uint8_array.data_type = ArrayDataType::kUint8;
+  OperatorSignature uint8_signature = {.model = &uint8_model, .op = &op};
+  EXPECT_EQ(base_op->GetVersion(uint8_signature), 1);
+
+  Model int8_model;
+  Array& int8_array = int8_model.GetOrCreateArray(op.inputs[0]);
+  int8_array.data_type = ArrayDataType::kInt8;
+  OperatorSignature int8_signature = {.model = &int8_model, .op = &op};
+  EXPECT_EQ(base_op->GetVersion(int8_signature), 2);
+}
+
+TEST_F(OperatorTest, VersioningEqualTest) {
+  SimpleVersioningTest<TensorFlowEqualOperator>();
+}
+
+TEST_F(OperatorTest, VersioningNotEqualTest) {
+  SimpleVersioningTest<TensorFlowNotEqualOperator>();
+}
+
+TEST_F(OperatorTest, VersioningLessTest) {
+  SimpleVersioningTest<TensorFlowLessOperator>();
+}
+
+TEST_F(OperatorTest, VersioningLessEqualTest) {
+  SimpleVersioningTest<TensorFlowLessEqualOperator>();
+}
+
+TEST_F(OperatorTest, VersioningGreaterTest) {
+  SimpleVersioningTest<TensorFlowGreaterOperator>();
+}
+
+TEST_F(OperatorTest, VersioningGreaterEqualTest) {
+  SimpleVersioningTest<TensorFlowGreaterEqualOperator>();
+}
+
+TEST_F(OperatorTest, VersioningSpaceToBatchNDTest) {
+  SimpleVersioningTest<SpaceToBatchNDOperator>();
+}
+
+TEST_F(OperatorTest, VersioningLogSoftmaxTest) {
+  SimpleVersioningTest<LogSoftmaxOperator>();
+}
+
+TEST_F(OperatorTest, VersioningPackTest) {
+  SimpleVersioningTest<PackOperator>();
+}
+
+TEST_F(OperatorTest, VersioningBatchToSpaceNDTest) {
+  SimpleVersioningTest<BatchToSpaceNDOperator>();
+}
+
+TEST_F(OperatorTest, VersioningTanhTest) {
+  SimpleVersioningTest<TanhOperator>();
+}
+
+TEST_F(OperatorTest, VersioningStridedSliceTest) {
+  SimpleVersioningTest<StridedSliceOperator>();
+}
+
+TEST_F(OperatorTest, VersioningSpaceToDepthTest) {
+  SimpleVersioningTest<SpaceToDepthOperator>();
+}
+
+TEST_F(OperatorTest, VersioningSliceTest) {
+  SimpleVersioningTest<SliceOperator>();
+}
+
+TEST_F(OperatorTest, VersioningAddTest) { SimpleVersioningTest<AddOperator>(); }
+
+TEST_F(OperatorTest, VersioningSubTest) { SimpleVersioningTest<SubOperator>(); }
+
+TEST_F(OperatorTest, VersioningMulTest) { SimpleVersioningTest<MulOperator>(); }
+
+TEST_F(OperatorTest, VersioningPadTest) { SimpleVersioningTest<PadOperator>(); }
+
+TEST_F(OperatorTest, VersioningPadV2Test) {
+  SimpleVersioningTest<PadV2Operator>();
+}
+
+TEST_F(OperatorTest, VersioningSelectTest) {
+  SimpleVersioningTest<SelectOperator>();
+}
+
+TEST_F(OperatorTest, VersioningRelu6Test) {
+  SimpleVersioningTest<Relu6Operator>();
+}
+
+TEST_F(OperatorTest, VersioningFullyConnectedTest) {
+  FullyConnectedOperator fully_connected_op;
+  fully_connected_op.inputs = {"input", "weight"};
+  fully_connected_op.outputs = {"output"};
+  auto operator_by_type_map = BuildOperatorByTypeMap(false /*enable_flex_ops*/);
+  const BaseOperator* op =
+      operator_by_type_map.at(fully_connected_op.type).get();
+
+  Model uint8_model;
+  Array& input_uint8_array =
+      uint8_model.GetOrCreateArray(fully_connected_op.inputs[0]);
+  input_uint8_array.data_type = ArrayDataType::kUint8;
+  Array& weight_uint8_array =
+      uint8_model.GetOrCreateArray(fully_connected_op.inputs[1]);
+  weight_uint8_array.data_type = ArrayDataType::kUint8;
+  Array& output_uint8_array =
+      uint8_model.GetOrCreateArray(fully_connected_op.outputs[0]);
+  output_uint8_array.data_type = ArrayDataType::kUint8;
+  OperatorSignature uint8_signature = {.model = &uint8_model,
+                                       .op = &fully_connected_op};
+  EXPECT_EQ(op->GetVersion(uint8_signature), 1);
+
+  Model int8_model;
+  Array& input_int8_array =
+      int8_model.GetOrCreateArray(fully_connected_op.inputs[0]);
+  input_int8_array.data_type = ArrayDataType::kInt8;
+  Array& weight_int8_array =
+      int8_model.GetOrCreateArray(fully_connected_op.inputs[1]);
+  weight_int8_array.data_type = ArrayDataType::kInt8;
+  Array& output_int8_array =
+      int8_model.GetOrCreateArray(fully_connected_op.outputs[0]);
+  output_int8_array.data_type = ArrayDataType::kInt8;
+  OperatorSignature int8_signature = {.model = &int8_model,
+                                      .op = &fully_connected_op};
+  EXPECT_EQ(op->GetVersion(int8_signature), 4);
+}
+
 }  // namespace
 }  // namespace tflite
 

diff --git a/tensorflow/lite/toco/toco.cc b/tensorflow/lite/toco/toco.cc
index 4a3d6a5..aa7e433 100644
--- a/tensorflow/lite/toco/toco.cc
+++ b/tensorflow/lite/toco/toco.cc

@@ -49,5 +49,10 @@
   }
   toco::port::InitGoogle(argv[0], effective_argc, &effective_argv, true);
   auto status = toco::Convert(parsed_toco_flags, parsed_model_flags);
-  return status.ok() ? 0 : -1;
+  if (!status.ok()) {
+    fprintf(stderr, "%s\n", status.error_message().c_str());
+    fflush(stderr);
+    return 1;
+  }
+  return 0;
 }

diff --git a/tensorflow/lite/toco/toco_convert.cc b/tensorflow/lite/toco/toco_convert.cc
index 28e7b10..2adfc1d 100644
--- a/tensorflow/lite/toco/toco_convert.cc
+++ b/tensorflow/lite/toco/toco_convert.cc

@@ -77,7 +77,7 @@
                            string* output_file_contents) {
   std::unique_ptr<Model> model =
       Import(toco_flags, model_flags, graph_def_contents);
-  Transform(toco_flags, model.get());
+  TF_RETURN_IF_ERROR(TransformWithStatus(toco_flags, model.get()));
   return Export(toco_flags, *model, toco_flags.allow_custom_ops(),
                 output_file_contents);
 }

diff --git a/tensorflow/lite/toco/toco_tooling.cc b/tensorflow/lite/toco/toco_tooling.cc
index 69d7a7a..229be2f 100644
--- a/tensorflow/lite/toco/toco_tooling.cc
+++ b/tensorflow/lite/toco/toco_tooling.cc

@@ -236,7 +236,8 @@
   return model;
 }
 
-void Transform(const TocoFlags& toco_flags, Model* model) {
+tensorflow::Status TransformWithStatus(const TocoFlags& toco_flags,
+                                       Model* model) {
   const FileFormat output_format = toco_flags.output_format();
   const IODataType inference_type = toco_flags.inference_type();
 
@@ -258,8 +259,8 @@
   // stop optimizations from crossing the input/output boundaries. For example
   // this will stop BatchNorm fusing if the output node is in between a conv
   // and BatchNorm layers.
-  RunGraphTransformations(model, "Removing unused ops",
-                          {new toco::RemoveUnusedOp});
+  TF_RETURN_IF_ERROR(RunGraphTransformationsWithStatus(
+      model, "Removing unused ops", {new toco::RemoveUnusedOp}));
 
   GraphTransformationsSet transformations;
   MakeGeneralGraphTransformationsSet(&transformations);
@@ -307,20 +308,21 @@
     identify_dilated_conv->set_identify_depthwise_conv(false);
   }
   transformations.Add(identify_dilated_conv);
-  RunGraphTransformations(model, "general graph transformations",
-                          transformations);
+  TF_RETURN_IF_ERROR(RunGraphTransformationsWithStatus(
+      model, "general graph transformations", transformations));
 
   if (quantize_output) {
     if (toco_flags.propagate_fake_quant_num_bits()) {
-      RunGraphTransformations(model,
-                              "fake quant propagation graph transformations",
-                              {new PropagateFakeQuantNumBits});
+      TF_RETURN_IF_ERROR(RunGraphTransformationsWithStatus(
+          model, "fake quant propagation graph transformations",
+          {new PropagateFakeQuantNumBits}));
     }
-    RunGraphTransformations(model, "pre-quantization graph transformations",
-                            {
-                                new HardcodeMinMax,
-                                new DropFakeQuant,
-                            });
+    TF_RETURN_IF_ERROR(RunGraphTransformationsWithStatus(
+        model, "pre-quantization graph transformations",
+        {
+            new HardcodeMinMax,
+            new DropFakeQuant,
+        }));
   }
 
   // Try to merge bidirectional sequence lstm or rnn if present.
@@ -328,8 +330,9 @@
   bidirectional_transformations.Add(new RemoveUnusedOp);
   bidirectional_transformations.Add(new toco::GroupBidirectionalSequenceLstm);
   bidirectional_transformations.Add(new toco::GroupBidirectionalSequenceRnn);
-  RunGraphTransformations(model, "Group bidirectional sequence lstm/rnn",
-                          bidirectional_transformations);
+  TF_RETURN_IF_ERROR(RunGraphTransformationsWithStatus(
+      model, "Group bidirectional sequence lstm/rnn",
+      bidirectional_transformations));
 
   // Fix any issues with IO edges. This must happen after any transform that
   // may modify the structure of the edges.
@@ -357,12 +360,12 @@
           toco_flags.default_int16_ranges_max());
     }
     if (propagate_default_min_max->has_any_ranges_defined()) {
-      RunGraphTransformations(
+      TF_RETURN_IF_ERROR(RunGraphTransformationsWithStatus(
           model, "default min-max range propagation graph transformations",
           {
               propagate_default_min_max.release(),
               new HardcodeMinMax,
-          });
+          }));
     }
 
     CheckIsReadyForQuantization(*model);
@@ -372,17 +375,18 @@
         toco_flags.allow_nudging_weights_to_use_fast_gemm_kernel());
     ensure_safe_for_int8_kernels->set_has_default_ranges_flag(
         has_default_ranges_flag);
-    RunGraphTransformations(model, "quantization graph transformations",
-                            {
-                                new RemoveTrivialQuantizedActivationFunc,
-                                new RemoveTrivialQuantizedMinMax,
-                                new Quantize,
-                                new RemoveFinalDequantizeOp,
-                                ensure_safe_for_int8_kernels,
-                            });
+    TF_RETURN_IF_ERROR(RunGraphTransformationsWithStatus(
+        model, "quantization graph transformations",
+        {
+            new RemoveTrivialQuantizedActivationFunc,
+            new RemoveTrivialQuantizedMinMax,
+            new Quantize,
+            new RemoveFinalDequantizeOp,
+            ensure_safe_for_int8_kernels,
+        }));
     if (SupportsShuffledFCWeights(output_format)) {
-      RunGraphTransformations(model, "shuffling of FC weights",
-                              {new ShuffleFCWeights});
+      TF_RETURN_IF_ERROR(RunGraphTransformationsWithStatus(
+          model, "shuffling of FC weights", {new ShuffleFCWeights}));
     }
   } else {
     GraphTransformationsSet dequantization_transformations{new Dequantize};
@@ -392,8 +396,9 @@
       dequantization_transformations.Add(new DropFakeQuant);
     }
 
-    RunGraphTransformations(model, "dequantization graph transformations",
-                            dequantization_transformations);
+    TF_RETURN_IF_ERROR(RunGraphTransformationsWithStatus(
+        model, "dequantization graph transformations",
+        dequantization_transformations));
   }
 
   if (output_format == TENSORFLOW_GRAPHDEF) {
@@ -425,6 +430,7 @@
               << " billion (note that a multiply-add is counted as 2 ops).";
   }
   model->ops_count = ops_count;
+  return tensorflow::Status::OK();
 }
 
 tensorflow::Status Export(const TocoFlags& toco_flags, const Model& model,
@@ -448,7 +454,7 @@
       return status;
     } break;
     case GRAPHVIZ_DOT:
-      DumpGraphviz(model, output_file_contents);
+      DumpGraphviz(model, output_file_contents, "Computation Graph");
       break;
     default:
       LOG(FATAL) << "Unhandled output_format='"

diff --git a/tensorflow/lite/toco/toco_tooling.h b/tensorflow/lite/toco/toco_tooling.h
index 742e376..3699615 100644
--- a/tensorflow/lite/toco/toco_tooling.h
+++ b/tensorflow/lite/toco/toco_tooling.h

@@ -31,7 +31,12 @@
 
 // Transforms a Model. The resulting Model is ready to be passed
 // to Export with the exact same toco_flags.
-void Transform(const TocoFlags& toco_flags, Model* model);
+tensorflow::Status TransformWithStatus(const TocoFlags& toco_flags,
+                                       Model* model);
+inline void Transform(const TocoFlags& toco_flags, Model* model) {
+  auto s = TransformWithStatus(toco_flags, model);
+  CHECK(s.ok()) << s.error_message();
+}
 
 // Exports the Model, which must be of the 'lowered' form returned by
 // Transform, to a file of the format given by

diff --git a/tensorflow/lite/toco/tooling_util.cc b/tensorflow/lite/toco/tooling_util.cc
index 6978dde..ccd8008 100644
--- a/tensorflow/lite/toco/tooling_util.cc
+++ b/tensorflow/lite/toco/tooling_util.cc

@@ -66,29 +66,29 @@
 string ArrayDataTypeName(ArrayDataType data_type) {
   switch (data_type) {
     case ArrayDataType::kFloat:
-      return "Float";
+      return "float";
     case ArrayDataType::kInt8:
-      return "Int8";
+      return "int8";
     case ArrayDataType::kUint8:
-      return "Uint8";
+      return "uint8";
     case ArrayDataType::kInt16:
-      return "Int16";
+      return "int16";
     case ArrayDataType::kUint16:
-      return "Uint16";
+      return "uint16";
     case ArrayDataType::kInt32:
-      return "Int32";
+      return "int32";
     case ArrayDataType::kUint32:
-      return "Uint32";
+      return "uint32";
     case ArrayDataType::kInt64:
-      return "Int64";
+      return "int64";
     case ArrayDataType::kUint64:
-      return "Uint64";
+      return "uint64";
     case ArrayDataType::kString:
-      return "String";
+      return "string";
     case ArrayDataType::kBool:
-      return "Bool";
+      return "bool";
     case ArrayDataType::kComplex64:
-      return "Complex64";
+      return "complex64";
     case ArrayDataType::kNone:
       return "None";
     default:
@@ -387,6 +387,7 @@
     HANDLE_OPERATORTYPENAME_CASE(Floor)
     HANDLE_OPERATORTYPENAME_CASE(Ceil)
     HANDLE_OPERATORTYPENAME_CASE(Gather)
+    HANDLE_OPERATORTYPENAME_CASE(GatherNd)
     HANDLE_OPERATORTYPENAME_CASE(ResizeBilinear)
     HANDLE_OPERATORTYPENAME_CASE(SpaceToBatchND)
     HANDLE_OPERATORTYPENAME_CASE(BatchToSpaceND)
@@ -422,6 +423,8 @@
     HANDLE_OPERATORTYPENAME_CASE(Unique)
     HANDLE_OPERATORTYPENAME_CASE(UnidirectionalSequenceRnn)
     HANDLE_OPERATORTYPENAME_CASE(ReverseV2)
+    HANDLE_OPERATORTYPENAME_CASE(Cos)
+    HANDLE_OPERATORTYPENAME_CASE(Where)
     default:
       LOG(FATAL) << "Unhandled op type";
 #undef HANDLE_OPERATORTYPENAME_CASE
@@ -538,7 +541,8 @@
   static int dump_id = 0;
   static std::unordered_set<std::size_t> dump_hashes;
   string graphviz_dump;
-  DumpGraphviz(model, &graphviz_dump);
+  DumpGraphviz(model, &graphviz_dump,
+               toco::port::StringF("VIDEO frame:%05d", dump_id));
   std::size_t hash = std::hash<string>{}(graphviz_dump);
   if (!dump_hashes.count(hash)) {
     LOG(INFO) << "DUMPING GRAPHVIZ VIDEO FRAME: " << dump_id;
@@ -561,7 +565,7 @@
   if (!dump_options.dump_graphviz.empty()) {
     string graphviz_dump;
 
-    DumpGraphviz(model, &graphviz_dump);
+    DumpGraphviz(model, &graphviz_dump, message);
     const auto result = port::file::SetContents(
         port::file::JoinPath(
             dump_options.dump_graphviz,
@@ -1863,119 +1867,140 @@
   return param_array_name;
 }
 
+bool EstimateArithmeticOpsCount(const Model& model, const Operator& op,
+                                int64* result) {
+  switch (op.type) {
+    case OperatorType::kFullyConnected:
+    case OperatorType::kConv:
+    case OperatorType::kDepthwiseConv: {
+      const auto& output_array = model.GetArray(op.outputs[0]);
+      const auto& weights_array = model.GetArray(op.inputs[1]);
+      if (!output_array.has_shape() || !weights_array.has_shape()) {
+        return false;
+      }
+      int64 cols = 1;
+      for (int i = 0; i < output_array.shape().dimensions_count() - 1; i++) {
+        cols *= output_array.shape().dims(i);
+      }
+      const int64 cost_per_col =
+          2 * RequiredBufferSizeForShape(weights_array.shape());
+      *result = cost_per_col * cols;
+      if (op.inputs.size() > 2) {
+        // There is a bias vector. One more op per output value.
+        *result += RequiredBufferSizeForShape(output_array.shape());
+      }
+      break;
+    }
+    case OperatorType::kAdd:
+    case OperatorType::kSub:
+    case OperatorType::kMul: {
+      const auto& output_array = model.GetArray(op.outputs[0]);
+      if (!output_array.has_shape()) {
+        return false;
+      }
+      *result = RequiredBufferSizeForShape(output_array.shape());
+      break;
+    }
+    case OperatorType::kAddN: {
+      const auto& output_array = model.GetArray(op.outputs[0]);
+      if (!output_array.has_shape()) {
+        return false;
+      }
+      // AddN cost is roughly the same cost as N-1 Adds.
+      const int64 num_adds = op.inputs.size() - 1;
+      *result = num_adds * RequiredBufferSizeForShape(output_array.shape());
+      break;
+    }
+    case OperatorType::kLogistic:
+    case OperatorType::kSoftmax:
+    case OperatorType::kLogSoftmax:
+    case OperatorType::kTanh: {
+      const auto& output_array = model.GetArray(op.outputs[0]);
+      if (!output_array.has_shape()) {
+        return false;
+      }
+      // As a very rough ballpark, the cost of evaluating a math function
+      // such as tanh or logistic is about 32 multiplications, and about as
+      // many additions/subtractions. (Just a power-of-two order-of-magnitude
+      // from looking at actual implementations that we use in runtime/ code).
+      *result = 64 * RequiredBufferSizeForShape(output_array.shape());
+      break;
+    }
+    case OperatorType::kMaxPool: {
+      const auto& maxpool = *static_cast<const MaxPoolOperator*>(&op);
+      const auto& output_array = model.GetArray(op.outputs[0]);
+      if (!output_array.has_shape()) {
+        return false;
+      }
+      *result = RequiredBufferSizeForShape(output_array.shape()) *
+                maxpool.kheight * maxpool.kwidth;
+      break;
+    }
+    case OperatorType::kAveragePool: {
+      const auto& avgpool = *static_cast<const AveragePoolOperator*>(&op);
+      const auto& output_array = model.GetArray(op.outputs[0]);
+      if (!output_array.has_shape()) {
+        return false;
+      }
+      *result = RequiredBufferSizeForShape(output_array.shape()) *
+                avgpool.kheight * avgpool.kwidth;
+      break;
+    }
+    case OperatorType::kL2Pool: {
+      const auto* maxpool = static_cast<const MaxPoolOperator*>(&op);
+      const auto& output_array = model.GetArray(op.outputs[0]);
+      if (!output_array.has_shape()) {
+        return false;
+      }
+      // The sum of squares requires (kheight*kwidth) multiply-adds,
+      // and then there is the sqrt which we ballpark at 32 ops.
+      const int64 cost_per_val = 2 * maxpool->kheight * maxpool->kwidth + 32;
+      *result = RequiredBufferSizeForShape(output_array.shape()) * cost_per_val;
+      break;
+    }
+    case OperatorType::kL2Normalization: {
+      const auto& output_array = model.GetArray(op.outputs[0]);
+      if (!output_array.has_shape()) {
+        return false;
+      }
+      // Computing the squared L2 norm is N multiply-adds so 2N ops,
+      // then the single inverse-sqrt is negligible, then we multiply each
+      // value by the resulting multiplier, so an extra N ops. count 3N ops.
+      *result = 3 * RequiredBufferSizeForShape(output_array.shape());
+      break;
+    }
+    default:
+      *result = 0;
+      break;
+  }
+  return true;
+}
+
 bool EstimateArithmeticOpsCount(const Model& model, int64* result) {
   int64 total = 0;
   for (const auto& op : model.operators) {
-    switch (op->type) {
-      case OperatorType::kFullyConnected:
-      case OperatorType::kConv:
-      case OperatorType::kDepthwiseConv: {
-        const auto& output_array = model.GetArray(op->outputs[0]);
-        const auto& weights_array = model.GetArray(op->inputs[1]);
-        if (!output_array.has_shape() || !weights_array.has_shape()) {
-          return false;
-        }
-        int cols = 1;
-        for (int i = 0; i < output_array.shape().dimensions_count() - 1; i++) {
-          cols *= output_array.shape().dims(i);
-        }
-        const int64 cost_per_col =
-            2 * RequiredBufferSizeForShape(weights_array.shape());
-        total += cost_per_col * cols;
-        if (op->inputs.size() > 2) {
-          // There is a bias vector. One more op per output value.
-          total += RequiredBufferSizeForShape(output_array.shape());
-        }
-        break;
-      }
-      case OperatorType::kAdd:
-      case OperatorType::kSub:
-      case OperatorType::kMul: {
-        const auto& output_array = model.GetArray(op->outputs[0]);
-        if (!output_array.has_shape()) {
-          return false;
-        }
-        total += RequiredBufferSizeForShape(output_array.shape());
-        break;
-      }
-      case OperatorType::kAddN: {
-        const auto& output_array = model.GetArray(op->outputs[0]);
-        if (!output_array.has_shape()) {
-          return false;
-        }
-        // AddN cost is roughly the same cost as N-1 Adds.
-        const int num_adds = op->inputs.size() - 1;
-        total += num_adds * RequiredBufferSizeForShape(output_array.shape());
-        break;
-      }
-      case OperatorType::kLogistic:
-      case OperatorType::kSoftmax:
-      case OperatorType::kLogSoftmax:
-      case OperatorType::kTanh: {
-        const auto& output_array = model.GetArray(op->outputs[0]);
-        if (!output_array.has_shape()) {
-          return false;
-        }
-        // As a very rough ballpark, the cost of evaluating a math function
-        // such as tanh or logistic is about 32 multiplications, and about as
-        // many additions/subtractions. (Just a power-of-two order-of-magnitude
-        // from looking at actual implementations that we use in runtime/ code).
-        total += 64 * RequiredBufferSizeForShape(output_array.shape());
-        break;
-      }
-      case OperatorType::kMaxPool: {
-        const auto& maxpool = *static_cast<const MaxPoolOperator*>(op.get());
-        const auto& output_array = model.GetArray(op->outputs[0]);
-        if (!output_array.has_shape()) {
-          return false;
-        }
-        total += RequiredBufferSizeForShape(output_array.shape()) *
-                 maxpool.kheight * maxpool.kwidth;
-        break;
-      }
-      case OperatorType::kAveragePool: {
-        const auto& avgpool =
-            *static_cast<const AveragePoolOperator*>(op.get());
-        const auto& output_array = model.GetArray(op->outputs[0]);
-        if (!output_array.has_shape()) {
-          return false;
-        }
-        total += RequiredBufferSizeForShape(output_array.shape()) *
-                 avgpool.kheight * avgpool.kwidth;
-        break;
-      }
-      case OperatorType::kL2Pool: {
-        const auto* maxpool = static_cast<const MaxPoolOperator*>(op.get());
-        const auto& output_array = model.GetArray(op->outputs[0]);
-        if (!output_array.has_shape()) {
-          return false;
-        }
-        // The sum of squares requires (kheight*kwidth) multiply-adds,
-        // and then there is the sqrt which we ballpark at 32 ops.
-        const int64 cost_per_val = 2 * maxpool->kheight * maxpool->kwidth + 32;
-        total +=
-            RequiredBufferSizeForShape(output_array.shape()) * cost_per_val;
-        break;
-      }
-      case OperatorType::kL2Normalization: {
-        const auto& output_array = model.GetArray(op->outputs[0]);
-        if (!output_array.has_shape()) {
-          return false;
-        }
-        // Computing the squared L2 norm is N multiply-adds so 2N ops,
-        // then the single inverse-sqrt is negligible, then we multiply each
-        // value by the resulting multiplier, so an extra N ops. Total 3N ops.
-        total += 3 * RequiredBufferSizeForShape(output_array.shape());
-        break;
-      }
-      default:
-        break;
+    int64 num_ops;
+    if (!EstimateArithmeticOpsCount(model, *op, &num_ops)) {
+      return false;
     }
+    total += num_ops;
   }
   *result = total;
   return true;
 }
 
+string FormattedNumber(int64 x) {
+  const int64 million = 1000000;
+  const int64 billion = 1000000000;
+  if (x < 10000) {
+    return toco::port::StringF("%d ", x);
+  } else if (x < billion) {
+    return toco::port::StringF("%.3f M", static_cast<double>(x) / million);
+  } else {
+    return toco::port::StringF("%.3f G", static_cast<double>(x) / billion);
+  }
+}
+
 void GetShuffleShape(AxesOrder input_axes_order, AxesOrder output_axes_order,
                      std::vector<int>* shuffle) {
   CHECK_EQ(AxesCount(input_axes_order), AxesCount(output_axes_order));

diff --git a/tensorflow/lite/toco/tooling_util.h b/tensorflow/lite/toco/tooling_util.h
index 517da78..fc4aac7 100644
--- a/tensorflow/lite/toco/tooling_util.h
+++ b/tensorflow/lite/toco/tooling_util.h

@@ -267,7 +267,10 @@
 string CreateInt32Array(Model* model, const string& param_name,
                         const std::vector<int>& value);
 
+bool EstimateArithmeticOpsCount(const Model& model, const Operator& op,
+                                int64* result);
 bool EstimateArithmeticOpsCount(const Model& model, int64* result);
+string FormattedNumber(int64 x);
 
 int AxesCount(AxesOrder axes_order);
 

diff --git a/tensorflow/lite/tools/BUILD b/tensorflow/lite/tools/BUILD
index 1d141b5..f67b3f9 100644
--- a/tensorflow/lite/tools/BUILD
+++ b/tensorflow/lite/tools/BUILD

@@ -70,7 +70,9 @@
         "//tensorflow/lite:framework",
         "//tensorflow/lite:schema_fbs_version",
         "//tensorflow/lite:string_util",
+        "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/schema:schema_fbs",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -86,6 +88,7 @@
         "//tensorflow/core:framework_lite",
         "//tensorflow/lite:framework",
         "//tensorflow/lite:schema_fbs_version",
+        "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/schema:schema_fbs",
         "//tensorflow/lite/testing:util",
         "@com_google_googletest//:gtest",

diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
index 0bc7565..83e0ff1 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc

@@ -356,11 +356,23 @@
     }
   }
 
-  if (interpreter->AllocateTensors() != kTfLiteOk) {
+  // Don't allocate tensors if we have delegates.
+  if (delegates_.empty() && interpreter->AllocateTensors() != kTfLiteOk) {
     TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
   }
 }
 
+void BenchmarkTfLiteModel::ApplyDelegates() {
+  for (int i = 0; i < delegates_.size(); ++i) {
+    if (interpreter->ModifyGraphWithDelegate(delegates_[i].get()) !=
+        kTfLiteOk) {
+      TFLITE_LOG(FATAL) << "Failed to apply delegate # " << i;
+    } else {
+      TFLITE_LOG(INFO) << "Applied Delegate # " << i;
+    }
+  }
+}
+
 void BenchmarkTfLiteModel::RunImpl() {
   if (interpreter->Invoke() != kTfLiteOk) {
     TFLITE_LOG(FATAL) << "Failed to invoke!";

diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
index 83599e6..3532d2a 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h

@@ -70,6 +70,9 @@
   uint64_t ComputeInputBytes() override;
   void Init() override;
   void RunImpl() override;
+  void SetDelegates(std::vector<std::unique_ptr<TfLiteDelegate>> delegates) {
+    delegates_ = std::move(delegates);
+  }
 
   struct InputLayerInfo {
     std::string name;
@@ -81,7 +84,7 @@
   void PrepareInputsAndOutputs() override;
 
   // Allows installation of custom delegates during initialization
-  virtual void ApplyDelegates() {}
+  virtual void ApplyDelegates();
 
   std::unique_ptr<tflite::FlatBufferModel> model;
   std::unique_ptr<tflite::Interpreter> interpreter;
@@ -90,6 +93,7 @@
   std::vector<InputLayerInfo> inputs;
   ProfilingListener profiling_listener_;
   GemmlowpProfilingListener gemmlowp_profiling_listener_;
+  std::vector<std::unique_ptr<TfLiteDelegate>> delegates_;
 };
 
 }  // namespace benchmark

diff --git a/tensorflow/lite/tools/benchmark/ios/README.md b/tensorflow/lite/tools/benchmark/ios/README.md
index 8142f48..ee880f0 100644
--- a/tensorflow/lite/tools/benchmark/ios/README.md
+++ b/tensorflow/lite/tools/benchmark/ios/README.md

@@ -27,9 +27,9 @@
 tensorflow/lite/tools/make/build_ios_universal_lib.sh
 ```
 
-will also build `tensorflow/lite/gen/lib/benchmark-lib.a` .
+will also build `tensorflow/lite/tools/make/gen/lib/benchmark-lib.a` .
 
-- Now copy the downloaded model file to `benchmark_data` directory. 
+- Now copy the downloaded model file to `benchmark_data` directory.
 
 - Modify `benchmark_params.json` change the `input_layer`, `input_layer_shape`
 and other benchmark parameters.
@@ -37,8 +37,8 @@
 - Change `Build Phases -> Copy Bundle Resources` and add the model file to the
 resources that need to be copied.
 
-- Ensure that `Build Phases -> Link Binary With Library` contains the 
-`Accelerate framework` and `tensorflow/lite/gen/lib/benchmark-lib.a`.
+- Ensure that `Build Phases -> Link Binary With Library` contains the
+`Accelerate framework` and `tensorflow/lite/tools/make/gen/lib/benchmark-lib.a`.
 
 - Now try running the app. The app has a single button that runs the benchmark
   on the model and displays results in a text view below.
@@ -48,7 +48,7 @@
 If you want detailed profiling, use the following command:
 
 ```bash
-tensorflow/lite/build_ios_universal_lib.sh -p
+tensorflow/lite/tools/make/build_ios_universal_lib.sh -p
 ```
 
 Then following the same steps above and run the benchmark app. You will see the

diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile
index 5c4bb4d..4a22d76 100644
--- a/tensorflow/lite/tools/make/Makefile
+++ b/tensorflow/lite/tools/make/Makefile

@@ -109,7 +109,7 @@
 $(wildcard tensorflow/lite/*/*test.cc) \
 $(wildcard tensorflow/lite/*/*/*test.cc) \
 $(wildcard tensorflow/lite/*/*/*/*test.cc) \
-$(wildcard tensorflow/lite/kernels/test_util.cc) \
+$(wildcard tensorflow/lite/kernels/*test_util.cc) \
 $(MINIMAL_SRCS)
 
 ifeq ($(BUILD_TYPE),micro)

diff --git a/tensorflow/lite/tools/optimize/BUILD b/tensorflow/lite/tools/optimize/BUILD
index ecc48f8..cf99cad 100644
--- a/tensorflow/lite/tools/optimize/BUILD
+++ b/tensorflow/lite/tools/optimize/BUILD

@@ -56,134 +56,6 @@
     ],
 )
 
-cc_library(
-    name = "calibrator_lib",
-    srcs = ["calibrator.cc"],
-    hdrs = ["calibrator.h"],
-    deps = [
-        ":calibration_common",
-        ":calibration_logger",
-        ":calibration_reader",
-        ":logging_op_resolver",
-        ":node_info_delegate",
-        "//tensorflow/lite:framework",
-        "//tensorflow/lite:string_util",
-        "//tensorflow/lite/c:c_api_internal",
-        "//tensorflow/lite/core/api",
-        "//tensorflow/lite/kernels:builtin_ops",
-        "//tensorflow/lite/schema:schema_fbs",
-        "@com_google_absl//absl/memory",
-        "@flatbuffers",
-    ],
-)
-
-tf_cc_test(
-    name = "calibrator_test",
-    srcs = ["calibrator_test.cc"],
-    args = [
-        "--test_model_file=$(location //tensorflow/lite:testdata/multi_add.bin)",
-    ],
-    data = [
-        "//tensorflow/lite:testdata/multi_add.bin",
-    ],
-    tags = [
-        "tflite_not_portable_android",
-        "tflite_not_portable_ios",
-    ],
-    deps = [
-        ":calibrator_lib",
-        "//tensorflow/core:framework_internal",
-        "//tensorflow/core:lib",
-        "//tensorflow/lite:framework",
-        "//tensorflow/lite/kernels:builtin_ops",
-        "@com_google_googletest//:gtest",
-    ],
-)
-
-cc_library(
-    name = "logging_op_resolver",
-    srcs = ["logging_op_resolver.cc"],
-    hdrs = ["logging_op_resolver.h"],
-    deps = [
-        ":calibration_common",
-        "//tensorflow/lite:framework",
-        "//tensorflow/lite/core/api",
-        "@com_google_absl//absl/memory",
-    ],
-)
-
-cc_test(
-    name = "logging_op_resolver_test",
-    srcs = ["logging_op_resolver_test.cc"],
-    deps = [
-        ":logging_op_resolver",
-        "//tensorflow/lite:framework",
-        "//tensorflow/lite/kernels:builtin_ops",
-        "@com_google_googletest//:gtest",
-    ],
-)
-
-cc_library(
-    name = "calibration_reader",
-    srcs = ["calibration_reader.cc"],
-    hdrs = ["calibration_reader.h"],
-    deps = [
-        ":calibration_logger",
-        "//tensorflow/lite:framework",
-        "@com_google_absl//absl/memory",
-    ],
-)
-
-cc_library(
-    name = "calibration_logger",
-    hdrs = ["calibration_logger.h"],
-    deps = [
-        "//tensorflow/lite/c:c_api_internal",
-    ],
-)
-
-cc_library(
-    name = "calibration_common",
-    hdrs = ["calibration_common.h"],
-    deps = [
-        "//tensorflow/lite:framework",
-    ],
-)
-
-cc_library(
-    name = "node_info_delegate",
-    srcs = ["node_info_delegate.cc"],
-    hdrs = ["node_info_delegate.h"],
-    deps = [
-        ":calibration_common",
-        "//tensorflow/lite:framework",
-    ],
-)
-
-tf_cc_test(
-    name = "node_info_delegate_test",
-    srcs = ["node_info_delegate_test.cc"],
-    args = [
-        "--test_model_file=$(location //tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin)",
-    ],
-    data = [
-        "//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin",
-    ],
-    tags = [
-        "tflite_not_portable_android",
-        "tflite_not_portable_ios",
-    ],
-    deps = [
-        ":node_info_delegate",
-        ":test_util",
-        "//tensorflow/core:framework_internal",
-        "//tensorflow/core:lib",
-        "//tensorflow/lite:framework",
-        "//tensorflow/lite/kernels:builtin_ops",
-        "@com_google_googletest//:gtest",
-    ],
-)
-
 tf_cc_test(
     name = "quantize_weights_test",
     srcs = ["quantize_weights_test.cc"],

diff --git a/tensorflow/lite/tools/optimize/calibration/BUILD b/tensorflow/lite/tools/optimize/calibration/BUILD
new file mode 100644
index 0000000..c1d2ad2
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/BUILD

@@ -0,0 +1,138 @@
+load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite")
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+
+package(default_visibility = [
+    "//visibility:public",
+])
+
+licenses(["notice"])  # Apache 2.0
+
+cc_library(
+    name = "calibrator_lib",
+    srcs = ["calibrator.cc"],
+    hdrs = ["calibrator.h"],
+    deps = [
+        ":calibration_common",
+        ":calibration_logger",
+        ":calibration_reader",
+        ":logging_op_resolver",
+        ":node_info_delegate",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:string_util",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/core/api",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/schema:schema_fbs",
+        "@com_google_absl//absl/memory",
+        "@flatbuffers",
+    ],
+)
+
+tf_cc_test(
+    name = "calibrator_test",
+    srcs = ["calibrator_test.cc"],
+    args = [
+        "--test_model_file=$(location //tensorflow/lite:testdata/multi_add.bin)",
+    ],
+    data = [
+        "//tensorflow/lite:testdata/multi_add.bin",
+    ],
+    tags = [
+        "tflite_not_portable_android",
+        "tflite_not_portable_ios",
+    ],
+    deps = [
+        ":calibrator_lib",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+cc_library(
+    name = "logging_op_resolver",
+    srcs = ["logging_op_resolver.cc"],
+    hdrs = ["logging_op_resolver.h"],
+    deps = [
+        ":calibration_common",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/core/api",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
+cc_test(
+    name = "logging_op_resolver_test",
+    srcs = ["logging_op_resolver_test.cc"],
+    deps = [
+        ":logging_op_resolver",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+cc_library(
+    name = "calibration_reader",
+    srcs = ["calibration_reader.cc"],
+    hdrs = ["calibration_reader.h"],
+    deps = [
+        ":calibration_logger",
+        "//tensorflow/lite:framework",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
+cc_library(
+    name = "calibration_logger",
+    hdrs = ["calibration_logger.h"],
+    deps = [
+        "//tensorflow/lite/c:c_api_internal",
+    ],
+)
+
+cc_library(
+    name = "calibration_common",
+    hdrs = ["calibration_common.h"],
+    deps = [
+        "//tensorflow/lite:framework",
+    ],
+)
+
+cc_library(
+    name = "node_info_delegate",
+    srcs = ["node_info_delegate.cc"],
+    hdrs = ["node_info_delegate.h"],
+    deps = [
+        ":calibration_common",
+        "//tensorflow/lite:framework",
+    ],
+)
+
+tf_cc_test(
+    name = "node_info_delegate_test",
+    srcs = ["node_info_delegate_test.cc"],
+    args = [
+        "--test_model_file=$(location //tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin)",
+    ],
+    data = [
+        "//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin",
+    ],
+    tags = [
+        "tflite_not_portable_android",
+        "tflite_not_portable_ios",
+    ],
+    deps = [
+        ":node_info_delegate",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/tools/optimize:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+tflite_portable_test_suite()

diff --git a/tensorflow/lite/tools/optimize/calibration_common.h b/tensorflow/lite/tools/optimize/calibration/calibration_common.h
similarity index 100%
rename from tensorflow/lite/tools/optimize/calibration_common.h
rename to tensorflow/lite/tools/optimize/calibration/calibration_common.h


diff --git a/tensorflow/lite/tools/optimize/calibration_logger.h b/tensorflow/lite/tools/optimize/calibration/calibration_logger.h
similarity index 100%
rename from tensorflow/lite/tools/optimize/calibration_logger.h
rename to tensorflow/lite/tools/optimize/calibration/calibration_logger.h


diff --git a/tensorflow/lite/tools/optimize/calibration/calibration_reader.cc b/tensorflow/lite/tools/optimize/calibration/calibration_reader.cc
new file mode 100644
index 0000000..69e9c5a
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/calibration_reader.cc

@@ -0,0 +1,55 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/tools/optimize/calibration/calibration_reader.h"
+
+#include "absl/memory/memory.h"
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+TfLiteStatus CalibrationReader::GetTensorStatsAsMap(
+    std::unordered_map<int, CalibrationStats>* tensor_id_to_stats_map) const {
+  tensor_id_to_stats_map->clear();
+  for (const auto& tensorid_stat : logger_->GetCalibrationValues()) {
+    auto minmax = tensorid_stat.second;
+    CalibrationReader::CalibrationStats stats;
+    TF_LITE_ENSURE_STATUS(minmax.Get(&stats.min, &stats.max));
+    tensor_id_to_stats_map->insert({tensorid_stat.first, stats});
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus CalibrationReader::AddCalibrationToModel(ModelT* model) const {
+  if (!model || model->subgraphs.empty()) {
+    return kTfLiteError;
+  }
+  const auto& subgraph = model->subgraphs[0];
+  for (const auto& tensorid_stat : logger_->GetCalibrationValues()) {
+    auto minmax = tensorid_stat.second;
+    float min, max;
+    TF_LITE_ENSURE_STATUS(minmax.Get(&min, &max));
+    auto quant_params = absl::make_unique<tflite::QuantizationParametersT>();
+    quant_params->min.push_back(min);
+    quant_params->max.push_back(max);
+    subgraph->tensors[tensorid_stat.first]->quantization =
+        std::move(quant_params);
+  }
+
+  return kTfLiteOk;
+}
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite

diff --git a/tensorflow/lite/tools/optimize/calibration/calibration_reader.h b/tensorflow/lite/tools/optimize/calibration/calibration_reader.h
new file mode 100644
index 0000000..0120d84
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/calibration_reader.h

@@ -0,0 +1,55 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATION_READER_H_
+#define TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATION_READER_H_
+
+#include <unordered_map>
+
+#include "tensorflow/lite/context.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/tools/optimize/calibration/calibration_logger.h"
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+// Warning: This is not a public API and subject to change.
+//
+// Reads calibrator data collected by running the interpreter through
+// a calibration set.
+class CalibrationReader {
+ public:
+  struct CalibrationStats {
+    float min;
+    float max;
+  };
+  explicit CalibrationReader(const Logger* logger) : logger_(logger) {}
+
+  // Gets a map from tensor index to recorded calibration values.
+  virtual TfLiteStatus GetTensorStatsAsMap(
+      std::unordered_map<int, CalibrationStats>* tensor_id_to_stats_map) const;
+
+  // Annotates the tensors in the given model with statistics captured during
+  // calibration.
+  virtual TfLiteStatus AddCalibrationToModel(ModelT* model) const;
+
+  virtual ~CalibrationReader() {}
+
+ private:
+  const Logger* logger_;
+};
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATION_READER_H_

diff --git a/tensorflow/lite/tools/optimize/calibration/calibrator.cc b/tensorflow/lite/tools/optimize/calibration/calibrator.cc
new file mode 100644
index 0000000..eead4e5
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/calibrator.cc

@@ -0,0 +1,347 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/tools/optimize/calibration/calibrator.h"
+
+#include <fstream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "absl/memory/memory.h"
+#include "tensorflow/lite/core/api/error_reporter.h"
+#include "tensorflow/lite/core/api/op_resolver.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/op_resolver.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/string_util.h"
+#include "tensorflow/lite/tools/optimize/calibration/calibration_common.h"
+#include "tensorflow/lite/tools/optimize/calibration/calibration_logger.h"
+#include "tensorflow/lite/tools/optimize/calibration/calibration_reader.h"
+#include "tensorflow/lite/tools/optimize/calibration/logging_op_resolver.h"
+#include "tensorflow/lite/tools/optimize/calibration/node_info_delegate.h"
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+
+namespace {
+
+// Calibrator is used to hold information that can be accessed during kernel
+// invocations.
+// TfLite kernel invocations are C functions and cannot look at the global
+// structure of the graph. Calibrator allows the kernel invoke functions to
+// access the global structure of graph and know which node is currently being
+// executed. This also allows us to write a simple kernel invoke wrapper
+// (see LoggingEval) that can work for most builtin ops.
+class Calibrator {
+ public:
+  Calibrator(const std::unordered_map<const TfLiteNode*, OperatorInfo>&
+                 node_ptr_opinfo_map,
+             std::unique_ptr<LoggingOpResolver> logging_op_resolver)
+      : node_ptr_opinfo_map_(node_ptr_opinfo_map),
+        logging_op_resolver_(std::move(logging_op_resolver)) {
+    logger_ = absl::make_unique<Logger>();
+  }
+
+  // Returns the wrapped kernel invoke function |TfLiteRegistration.invoke|.
+  KernelEvalFuncPtr GetKernelInvoke(const TfLiteNode* node) const;
+
+  // Gets the instance of logger associated with the current context.
+  Logger* GetLogger() const { return logger_.get(); }
+
+  // Gets the operator information about the given TfLiteNode.
+  const OperatorInfo& GetOpInfo(const TfLiteNode* node) const {
+    return node_ptr_opinfo_map_.at(node);
+  }
+
+ private:
+  std::unordered_map<const TfLiteNode*, OperatorInfo> node_ptr_opinfo_map_;
+  std::unique_ptr<LoggingOpResolver> logging_op_resolver_;
+  const std::unordered_map<int, OperatorInfo> index_opinfo_;
+  std::unique_ptr<Logger> logger_;
+};
+
+KernelEvalFuncPtr Calibrator::GetKernelInvoke(const TfLiteNode* node) const {
+  auto op_info = node_ptr_opinfo_map_.at(node);
+  return logging_op_resolver_->GetWrappedKernelInvoke(op_info.builtin_op_code,
+                                                      1);
+}
+
+// A registry of |Calibrator| objects per |TfLiteContext|.
+// This global registry is needed to access |Calibrator| objects in the kernel
+// invoke functions i.e. |TfLiteRegistration.invoke|.
+// Kernel invoke functions are C functions that have limited access to
+// |TfLiteContext|. Kernel invoke functions don't have access to global state of
+// graph. That means during a kernel invocation, the function cannot know which
+// node it was invoked for. E.g. in case of a model with |Conv| op at two
+// locations, there is no easy way for the Conv.invoke function to disambiguate
+// the calls.
+//
+// For calibration we solve this problem by creating a map of calibrators
+// per |TfLiteContext|. This map is |GlobalCalibrationRegistry|.
+//
+// This registry is then accessed using a global getter function:
+// |GetCalibratorRegistry|.
+// E.g.
+// TfLiteStatus SomeKernelInvokeFn(TfLiteContext* context, TfLiteNode* node) {
+//   .... code ....
+//   auto registry = GetCalibratorRegistry();
+//   auto calibrator = registry->GetCalibrator(context);
+//   ..... code ....
+//  }
+//
+// This way the kernel invoke functions can get the access to the Calibrator
+// object associated with the |TfLiteContext|.
+class GlobalCalibratorRegistry {
+ public:
+  // Get the |Calibrator| associated with given context, returns null if no
+  // calibrator is associated with the given context.
+  Calibrator* GetCalibrator(const TfLiteContext* context) const {
+    if (calibrator_registry_.find(context) == calibrator_registry_.cend()) {
+      return nullptr;
+    }
+    return calibrator_registry_.at(context).get();
+  }
+
+  // Removes the association between calibrator and context.
+  // Note: This deletes the calibrator as well.
+  void RemoveCalibrator(const TfLiteContext* context) {
+    calibrator_registry_.erase(context);
+  }
+
+  // Creates an instance of |Calibrator|.
+  // Registry owns the |Calibrator| object which can be deleted by calling
+  // |RemoveCalibrator|.
+  TfLiteStatus CreateCalibrator(
+      const TfLiteContext* context,
+      const std::unordered_map<const TfLiteNode*, OperatorInfo>& node_to_opinfo,
+      std::unique_ptr<LoggingOpResolver> logging_op_resolver,
+      Calibrator** calibrator_ptr, ErrorReporter* reporter) {
+    if (calibrator_registry_.find(context) != calibrator_registry_.cend()) {
+      reporter->Report(
+          "Failed to create calibrator, context already registered.");
+      return kTfLiteError;
+    }
+    std::unique_ptr<Calibrator> calibrator = absl::make_unique<Calibrator>(
+        node_to_opinfo, std::move(logging_op_resolver));
+    calibrator_registry_[context] = std::move(calibrator);
+    *calibrator_ptr = calibrator_registry_.at(context).get();
+    return kTfLiteOk;
+  }
+
+ private:
+  std::unordered_map<const TfLiteContext*, std::unique_ptr<Calibrator>>
+      calibrator_registry_;
+};
+
+GlobalCalibratorRegistry* GetCalibratorRegistry() {
+  static GlobalCalibratorRegistry* registry = new GlobalCalibratorRegistry();
+  return registry;
+}
+
+// A wrapper implementation for |TfLiteRegistration.invoke| that logs inputs,
+// invokes the wrapped implementation and then logs the outputs.
+TfLiteStatus LoggingEval(TfLiteContext* context, TfLiteNode* node) {
+  Calibrator* calibrator = GetCalibratorRegistry()->GetCalibrator(context);
+
+  if (!calibrator) {
+    context->ReportError(context, "No calibrator found for context.");
+    return kTfLiteError;
+  }
+
+  auto kernel_invoke = calibrator->GetKernelInvoke(node);
+  auto logger = calibrator->GetLogger();
+  auto op_info = calibrator->GetOpInfo(node);
+
+  for (int i : op_info.loggable_inputs) {
+    auto tensor = context->tensors[i];
+    logger->LogTensorValue(i, tensor.data.f, tensor.bytes / sizeof(float));
+  }
+
+  auto status = kernel_invoke(context, node);
+  // TODO(shashishekhar): An intermediate tensor in graph will get logged twice
+  // once as an input and second time as output. This doesn't change the min max
+  // values but is inefficient.
+  // Using moving average will also break this.
+
+  for (int i : op_info.loggable_outputs) {
+    auto tensor = context->tensors[i];
+    logger->LogTensorValue(i, tensor.data.f, tensor.bytes / sizeof(float));
+  }
+
+  return status;
+}
+
+// Returns the loggable tensors. Not all inputs and outputs need to be logged.
+// For example, const weight tensors which have buffers associated with them
+// don't need to be logged.
+std::vector<int> GetLoggableTensorIndices(
+    const std::vector<int>& tensor_indices,
+    const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors,
+    const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* tensor_buffers) {
+  std::vector<int> loggable;
+  for (auto tensor_index : tensor_indices) {
+    auto tensor = tensors->Get(tensor_index);
+    auto buffer_index = tensor->buffer();
+    const bool has_no_buffer =
+        (tensor_buffers->Get(buffer_index) == nullptr) ||
+        (tensor_buffers->Get(buffer_index)->data() == nullptr) ||
+        (tensor_buffers->Get(buffer_index)->data()->size() == 0);
+    if (has_no_buffer && tensor->type() == tflite::TensorType_FLOAT32) {
+      loggable.push_back(tensor_index);
+    }
+  }
+  return loggable;
+}
+
+// Creates a mapping between the static model graph and the runtime TfLiteNode*
+// nodes in the graph for the given context.
+// This is done by querying the TfLiteContext for node and registrations using
+// the |NodeInfoDelegateObserver|.
+TfLiteStatus GetNodeOpInfoMapAndContext(
+    const std::unordered_map<int, OperatorInfo>& node_to_opinfo,
+    tflite::Interpreter* const interpreter,
+    std::unordered_map<const TfLiteNode*, OperatorInfo>* node_ptr_opinfo_map,
+    const TfLiteContext** context
+
+) {
+  NodeInfoDelegateObserver delegate_observer(node_to_opinfo,
+                                             node_ptr_opinfo_map);
+  NodeInfoDelegateParams delegate_params;
+  delegate_params.delegate_observer = &delegate_observer;
+  TfLiteDelegate logging_delegate = CreateNodeInfoDelegate(&delegate_params);
+
+  auto modify_status = interpreter->ModifyGraphWithDelegate(&logging_delegate);
+  if (modify_status != kTfLiteOk) {
+    return kTfLiteError;
+  }
+  *context = delegate_observer.GetContext();
+  return kTfLiteOk;
+}
+
+string GetOpName(const tflite::OperatorCode& opcode) {
+  if (opcode.custom_code() != nullptr) {
+    return opcode.custom_code()->str();
+  }
+  return tflite::EnumNamesBuiltinOperator()[opcode.builtin_code()];
+}
+
+// A |CalibrationReader| that owns the Calibrator.
+class Reader : public CalibrationReader {
+ public:
+  Reader(const TfLiteContext* context, const Logger* logger)
+      : CalibrationReader(logger), context_(context) {}
+
+  ~Reader() override { GetCalibratorRegistry()->RemoveCalibrator(context_); }
+
+ private:
+  const TfLiteContext* context_;
+};
+
+}  // namespace
+
+TfLiteStatus BuildLoggingInterpreter(
+    const FlatBufferModel& model, const OpResolver& op_resolver,
+    std::unique_ptr<Interpreter>* interpreter,
+    std::unique_ptr<CalibrationReader>* calibration_reader) {
+  auto tflite_model = model.GetModel();
+  auto subgraphs = tflite_model->subgraphs();
+  auto tensor_buffers = tflite_model->buffers();
+
+  if (subgraphs->size() != 1) {
+    model.error_reporter()->Report(
+        "Only models with a single subgraph are supported, model had %d "
+        "subgraphs",
+        subgraphs->size());
+    return kTfLiteError;
+  }
+
+  // Populate the node index to operator info map.
+  // We want to collect this information so we can use it during runtime to
+  // log details of which inputs and outputs.
+  // At runtime TFLite kernel invoke functions can only look into their
+  // own node in the graph (TFLiteNode*) and some limited context information.
+  auto primary_subgraph = subgraphs->Get(0);
+  auto operator_codes = tflite_model->operator_codes();
+  auto operators = primary_subgraph->operators();
+  auto tensors = primary_subgraph->tensors();
+  std::unordered_map<int, OperatorInfo> node_to_opinfo;
+  BuiltinOpsSet op_and_versions;
+
+  for (size_t i = 0; i < operators->size(); i++) {
+    OperatorInfo op_info;
+    op_info.node_index = i;
+    auto op = operators->Get(i);
+    auto operator_code = operator_codes->Get(op->opcode_index());
+    op_info.builtin_op_code = operator_code->builtin_code();
+    op_info.name = GetOpName(*operator_code);
+    op_info.is_custom_op = operator_code->custom_code() != nullptr;
+
+    auto op_inputs = op->inputs();
+    auto op_outputs = op->outputs();
+    op_info.inputs = std::vector<int>(op_inputs->begin(), op_inputs->end());
+    op_info.outputs = std::vector<int>(op_outputs->begin(), op_outputs->end());
+    op_info.loggable_inputs =
+        GetLoggableTensorIndices(op_info.inputs, tensors, tensor_buffers);
+    op_info.loggable_outputs =
+        GetLoggableTensorIndices(op_info.outputs, tensors, tensor_buffers);
+    if (!op_info.is_custom_op) {
+      op_info.registration = op_resolver.FindOp(operator_code->builtin_code(),
+                                                operator_code->version());
+    } else {
+      op_info.registration =
+          op_resolver.FindOp(op_info.name.c_str(), operator_code->version());
+    }
+    node_to_opinfo[i] = op_info;
+    op_and_versions.insert({op_info.builtin_op_code, operator_code->version()});
+  }
+
+  // Prepare the logging op resolver to use |LoggingEval| for kernel
+  // invocations.
+  auto logging_op_resolver = absl::make_unique<LoggingOpResolver>(
+      op_and_versions, op_resolver, LoggingEval);
+  tflite::InterpreterBuilder(model, *logging_op_resolver)(interpreter);
+
+  if (!(*interpreter)) {
+    model.error_reporter()->Report("Failed to construct interpreter");
+    return kTfLiteError;
+  }
+
+  // Compute the mapping between runtime and static graph structure, i.e.
+  // (TfLiteContext, TfLiteNode) -> OperatorInfo
+  std::unordered_map<const TfLiteNode*, OperatorInfo> node_ptr_opinfo_map;
+  const TfLiteContext* context = nullptr;
+  GetNodeOpInfoMapAndContext(node_to_opinfo, interpreter->get(),
+                             &node_ptr_opinfo_map, &context);
+
+  Calibrator* calibrator = nullptr;
+  // Register a calibrator object for the context. This can be accessed
+  // during invocations by the logging kernels.
+  TF_LITE_ENSURE_STATUS(GetCalibratorRegistry()->CreateCalibrator(
+      context, node_ptr_opinfo_map, std::move(logging_op_resolver), &calibrator,
+      model.error_reporter()));
+  *calibration_reader = std::unique_ptr<CalibrationReader>(
+      new Reader(context, calibrator->GetLogger()));
+
+  return kTfLiteOk;
+}
+
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite

diff --git a/tensorflow/lite/tools/optimize/calibration/calibrator.h b/tensorflow/lite/tools/optimize/calibration/calibrator.h
new file mode 100644
index 0000000..fb7e03f
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/calibrator.h

@@ -0,0 +1,64 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATOR_H_
+#define TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATOR_H_
+
+#include <unordered_map>
+
+#include "flatbuffers/flatbuffers.h"  // TF:flatbuffers
+#include "tensorflow/lite/core/api/op_resolver.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/tools/optimize/calibration/calibration_reader.h"
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+
+// Warning: This is not a public API and subject to change.
+
+// Builds a interpreter that logs the calibration data in memory.
+// The calibration data can be recovered using |calibration_reader|.
+//
+// Sample usage:
+// std::unique_ptr<Interpreter> interpreter;
+// std::unique_ptr<CalibrationReader> calibration_reader;
+// BuiltinOpResolver resolver = ...
+// FlatBufferModel model = ..
+//
+// BuildLoggingInterpreter(model, resolver, &interpreter,
+//  &calibration_reader);
+//
+//
+// * Allocate tensors...
+// * Call interpreter->invoke on calibration dataset.
+//
+// Calibration data can be read either directly by calling
+// std::unordered_map<int,  CalibrationStats>> tensor_index_to_stats;
+// calibration_reader->GetTensorStatsAsMap(&tensor_index_to_stats);
+//
+// or adding calibration data to model itself.
+// ModelT * original_floating_point_model = ...
+// calibration_reader->AddCalibrationToModel(original_floating_point_model);
+//
+TfLiteStatus BuildLoggingInterpreter(
+    const FlatBufferModel& model, const OpResolver& op_resolver,
+    std::unique_ptr<Interpreter>* interpreter,
+    std::unique_ptr<CalibrationReader>* calibration_reader);
+
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATOR_H_

diff --git a/tensorflow/lite/tools/optimize/calibration/calibrator_test.cc b/tensorflow/lite/tools/optimize/calibration/calibrator_test.cc
new file mode 100644
index 0000000..60e652e
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/calibrator_test.cc

@@ -0,0 +1,212 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <cstring>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/tools/optimize/calibration/calibrator.h"
+
+namespace {
+tensorflow::string* g_test_model_file = nullptr;
+}  // namespace
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+namespace {
+
+std::unique_ptr<FlatBufferModel> ReadModel() {
+  if (g_test_model_file) {
+    return FlatBufferModel::BuildFromFile(g_test_model_file->c_str());
+  }
+  return nullptr;
+}
+
+TEST(CalibratorTest, CalibrationStatsAreCollected) {
+  auto model = ReadModel();
+  ASSERT_TRUE(model);
+  std::unique_ptr<Interpreter> interpreter;
+  std::unique_ptr<CalibrationReader> reader;
+  auto status = BuildLoggingInterpreter(
+      *model, ops::builtin::BuiltinOpResolver{}, &interpreter, &reader);
+  EXPECT_EQ(kTfLiteOk, status);
+
+  ASSERT_TRUE(interpreter);
+  ASSERT_TRUE(reader);
+  std::unordered_map<int, CalibrationReader::CalibrationStats> stats;
+  status = reader->GetTensorStatsAsMap(&stats);
+  EXPECT_EQ(kTfLiteOk, status);
+  EXPECT_TRUE(stats.empty());
+
+  status = interpreter->AllocateTensors();
+  ASSERT_EQ(kTfLiteOk, status);
+  // Model does the following:
+  // 0        1       2        3
+  // |        |__ ____|        |
+  // |           |             |
+  // |          Add(tensor:4)  |
+  // |____ ______|______ ______|
+  //      |             |
+  //      Add          Add
+  //      |             |
+  //    Output:5      Output:6
+
+  const size_t tensor_size = 1 * 8 * 8 * 3;
+
+  std::vector<float> ones(tensor_size, 1.0f);
+  // Fill input tensor i with i+1, i.e. input[0] = 1.0f, input[1] = 2.0f,
+  // input[2] = 3.0f
+
+  for (size_t i = 0; i < interpreter->inputs().size(); i++) {
+    int input_tensor_idx = interpreter->inputs()[i];
+    TfLiteTensor* tensor = interpreter->tensor(input_tensor_idx);
+    ASSERT_EQ(tensor->bytes, tensor_size * sizeof(float));
+    for (size_t j = 0; j < tensor_size; j++) {
+      tensor->data.f[j] = i + 1;
+    }
+  }
+  status = interpreter->Invoke();
+  ASSERT_EQ(kTfLiteOk, status);
+  const float eps = 1e-6f;
+  // Verify that tensor 5: is 6
+  // Verify that tensor 6: is 9
+  TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]);
+  for (size_t i = 0; i < tensor_size; i++) {
+    EXPECT_NEAR(tensor->data.f[i], 6.0f, eps);
+  }
+  tensor = interpreter->tensor(interpreter->outputs()[1]);
+  for (size_t i = 0; i < tensor_size; i++) {
+    EXPECT_NEAR(tensor->data.f[i], 9.0f, eps);
+  }
+
+  // Verify that min max of tensors.
+  status = reader->GetTensorStatsAsMap(&stats);
+  EXPECT_EQ(kTfLiteOk, status);
+  EXPECT_EQ(7, stats.size());
+  // Check inputs
+  for (int tensor_idx = 0; tensor_idx < 4; tensor_idx++) {
+    EXPECT_NEAR(stats.at(tensor_idx).min, tensor_idx + 1, eps);
+    EXPECT_NEAR(stats.at(tensor_idx).max, tensor_idx + 1, eps);
+  }
+  // Check tensor 4 max.
+  EXPECT_NEAR(stats.at(4).min, 5, eps);
+  EXPECT_NEAR(stats.at(4).max, 5, eps);
+
+  // Check outputs
+  EXPECT_NEAR(stats.at(5).min, 6, eps);
+  EXPECT_NEAR(stats.at(5).max, 6, eps);
+
+  EXPECT_NEAR(stats.at(6).min, 9, eps);
+  EXPECT_NEAR(stats.at(6).max, 9, eps);
+}
+
+TEST(CalibratorTest, MultipleInvokes) {
+  auto model = ReadModel();
+  ASSERT_TRUE(model);
+  std::unique_ptr<Interpreter> interpreter;
+  std::unique_ptr<CalibrationReader> reader;
+  auto status = BuildLoggingInterpreter(
+      *model, ops::builtin::BuiltinOpResolver{}, &interpreter, &reader);
+  EXPECT_EQ(kTfLiteOk, status);
+
+  ASSERT_TRUE(interpreter);
+  ASSERT_TRUE(reader);
+  status = interpreter->AllocateTensors();
+
+  EXPECT_EQ(kTfLiteOk, status);
+  const size_t tensor_size = 1 * 8 * 8 * 3;
+  // Fill input tensor i with i+1, i.e. input[0] = 1.0f, input[1] = 2.0f,
+  // input[2] = 3.0f
+
+  for (size_t i = 0; i < interpreter->inputs().size(); i++) {
+    int input_tensor_idx = interpreter->inputs()[i];
+    TfLiteTensor* tensor = interpreter->tensor(input_tensor_idx);
+    ASSERT_EQ(tensor->bytes, tensor_size * sizeof(float));
+    for (size_t j = 0; j < tensor_size; j++) {
+      tensor->data.f[j] = i + 1;
+    }
+  }
+  status = interpreter->Invoke();
+  ASSERT_EQ(kTfLiteOk, status);
+  const float eps = 1e-6f;
+  // Verify that min max of tensors.
+  std::unordered_map<int, CalibrationReader::CalibrationStats> stats;
+  status = reader->GetTensorStatsAsMap(&stats);
+  EXPECT_EQ(kTfLiteOk, status);
+  EXPECT_EQ(7, stats.size());
+  const float expected_values[7] = {
+      1.0f,  // input 0
+      2.0f,  // input 1
+      3.0f,  // input 2
+      4.0f,  // input 3
+      5.0f,  // Add(1, 2)
+      6.0f,  // Output 5: Add(0, Add(1,2))
+      9.0f,  // Output 6: Add(Add(1,2), 3)
+  };
+  for (int tensor_idx = 0; tensor_idx < 7; tensor_idx++) {
+    EXPECT_NEAR(stats.at(tensor_idx).min, expected_values[tensor_idx], eps);
+    EXPECT_NEAR(stats.at(tensor_idx).max, expected_values[tensor_idx], eps);
+  }
+  // Set input[0][0] = 1.5 and input[0][1] = 0.5 this should change the values
+  // only for input[0] and tensor 4 and ouputs 5, 6.
+  TfLiteTensor* input0 = interpreter->tensor(0);
+  input0->data.f[0] = 1.5f;
+  input0->data.f[1] = 0.5f;
+  status = interpreter->Invoke();
+  ASSERT_EQ(kTfLiteOk, status);
+  status = reader->GetTensorStatsAsMap(&stats);
+  EXPECT_EQ(kTfLiteOk, status);
+  EXPECT_EQ(7, stats.size());
+  EXPECT_NEAR(stats.at(0).min, 0.5f, eps);
+  EXPECT_NEAR(stats.at(0).max, 1.5f, eps);
+
+  for (int tensor_idx = 1; tensor_idx < 5; tensor_idx++) {
+    EXPECT_NEAR(stats.at(tensor_idx).min, expected_values[tensor_idx], eps);
+    EXPECT_NEAR(stats.at(tensor_idx).max, expected_values[tensor_idx], eps);
+  }
+
+  EXPECT_NEAR(stats.at(5).min, 5.5f, eps);
+  EXPECT_NEAR(stats.at(5).max, 6.5f, eps);
+
+  EXPECT_NEAR(stats.at(6).min, 9.0f, eps);
+  EXPECT_NEAR(stats.at(6).max, 9.0f, eps);
+}
+
+}  // namespace
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  tensorflow::string model_file;
+  const std::vector<tensorflow::Flag> flag_list = {
+      tensorflow::Flag("test_model_file", &model_file,
+                       "Path to test tflite model file."),
+  };
+
+  const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
+  if (!parse_result) {
+    std::cerr << "Required test_model_file\n";
+    std::abort();
+  }
+  g_test_model_file = new tensorflow::string(model_file);
+  ::tensorflow::port::InitMain(argv[0], &argc, &argv);
+  return RUN_ALL_TESTS();
+}

diff --git a/tensorflow/lite/tools/optimize/calibration/logging_op_resolver.cc b/tensorflow/lite/tools/optimize/calibration/logging_op_resolver.cc
new file mode 100644
index 0000000..d2a09e8
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/logging_op_resolver.cc

@@ -0,0 +1,61 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/tools/optimize/calibration/logging_op_resolver.h"
+
+#include "absl/memory/memory.h"
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+
+LoggingOpResolver::LoggingOpResolver(const BuiltinOpsSet& ops_to_replace,
+                                     const OpResolver& base_resolver,
+                                     KernelEvalFuncPtr logging_eval_fn) {
+  for (const auto& op_and_version : ops_to_replace) {
+    const TfLiteRegistration* base_registration =
+        base_resolver.FindOp(op_and_version.first, op_and_version.second);
+    BuiltinOperatorKey key = op_and_version;
+    builtin_op_evalfn_map_[key] = base_registration->invoke;
+    std::unique_ptr<TfLiteRegistration> logging_registation =
+        absl::make_unique<TfLiteRegistration>(*base_registration);
+    logging_registation->invoke = logging_eval_fn;
+    builtin_op_registration_map_[key] = std::move(logging_registation);
+  }
+}
+
+const TfLiteRegistration* LoggingOpResolver::FindOp(BuiltinOperator op,
+                                                    int version) const {
+  BuiltinOperatorKey key = {op, version};
+  if (builtin_op_registration_map_.find(key) !=
+      builtin_op_registration_map_.end()) {
+    return builtin_op_registration_map_.at(key).get();
+  }
+
+  return nullptr;
+}
+
+KernelEvalFuncPtr LoggingOpResolver::GetWrappedKernelInvoke(BuiltinOperator op,
+                                                            int version) const {
+  return builtin_op_evalfn_map_.at({op, version});
+}
+
+const TfLiteRegistration* LoggingOpResolver::FindOp(const char* op,
+                                                    int version) const {
+  // TODO(b/121374947): Support custom ops as well.
+  return nullptr;
+}
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite

diff --git a/tensorflow/lite/tools/optimize/calibration/logging_op_resolver.h b/tensorflow/lite/tools/optimize/calibration/logging_op_resolver.h
new file mode 100644
index 0000000..af4127e
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/logging_op_resolver.h

@@ -0,0 +1,59 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_LOGGING_OP_RESOLVER_H_
+#define TENSORFLOW_LITE_TOOLS_OPTIMIZE_LOGGING_OP_RESOLVER_H_
+
+#include <set>
+#include <unordered_map>
+
+#include "tensorflow/lite/core/api/op_resolver.h"
+#include "tensorflow/lite/mutable_op_resolver.h"
+#include "tensorflow/lite/op_resolver.h"
+#include "tensorflow/lite/tools/optimize/calibration/calibration_common.h"
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+// A resolver that replaces the kernel invocations with a wrapper
+// eval function.
+class LoggingOpResolver : public OpResolver {
+ public:
+  // Creates an instance of |LoggingOpResolver|.
+  // All |TfLiteRegistration.invoke| functions are replaced by
+  // |logging_eval_fn|.
+  // TODO(shashishekhar): This interface needs to change for custom ops and
+  // BuiltinOps that need special logging implementations.
+  LoggingOpResolver(const BuiltinOpsSet& ops_to_replace,
+                    const OpResolver& base_resolver,
+                    KernelEvalFuncPtr logging_eval_fn);
+
+  const TfLiteRegistration* FindOp(BuiltinOperator op,
+                                   int version) const override;
+
+  KernelEvalFuncPtr GetWrappedKernelInvoke(BuiltinOperator op,
+                                           int version) const;
+  const TfLiteRegistration* FindOp(const char* op, int version) const override;
+
+ private:
+  BuiltinOpsMap<std::unique_ptr<TfLiteRegistration>>
+      builtin_op_registration_map_;
+  BuiltinOpsMap<KernelEvalFuncPtr> builtin_op_evalfn_map_;
+};
+
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_TOOLS_OPTIMIZE_LOGGING_OP_RESOLVER_H_

diff --git a/tensorflow/lite/tools/optimize/calibration/logging_op_resolver_test.cc b/tensorflow/lite/tools/optimize/calibration/logging_op_resolver_test.cc
new file mode 100644
index 0000000..d8d29ad
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/logging_op_resolver_test.cc

@@ -0,0 +1,141 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/tools/optimize/calibration/logging_op_resolver.h"
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/lite/mutable_op_resolver.h"
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+namespace {
+
+TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus WrappingInvoke(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TEST(LoggingOpResolverTest, KernelInvokesAreReplaced) {
+  MutableOpResolver base_resolver;
+  TfLiteRegistration conv_registration = {};
+  conv_registration.prepare = ConvPrepare;
+  conv_registration.invoke = ConvEval;
+
+  base_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &conv_registration);
+
+  TfLiteRegistration add_registration = {};
+  add_registration.prepare = AddPrepare;
+  add_registration.invoke = AddEval;
+
+  base_resolver.AddBuiltin(BuiltinOperator_ADD, &add_registration);
+  BuiltinOpsSet ops_to_replace = {
+      {BuiltinOperator_CONV_2D, /*version*/ 1},
+      {BuiltinOperator_ADD, /*version*/ 1},
+  };
+
+  LoggingOpResolver resolver(ops_to_replace, base_resolver, WrappingInvoke);
+
+  auto reg = resolver.FindOp(BuiltinOperator_CONV_2D, 1);
+
+  EXPECT_EQ(reg->builtin_code, BuiltinOperator_CONV_2D);
+  EXPECT_TRUE(reg->prepare == ConvPrepare);
+  EXPECT_TRUE(reg->invoke == WrappingInvoke);
+
+  reg = resolver.FindOp(BuiltinOperator_ADD, 1);
+
+  EXPECT_EQ(reg->builtin_code, BuiltinOperator_ADD);
+  EXPECT_TRUE(reg->prepare == AddPrepare);
+  EXPECT_TRUE(reg->invoke == WrappingInvoke);
+}
+
+TEST(LoggingOpResolverTest, OriginalKernelInvokesAreRetained) {
+  MutableOpResolver base_resolver;
+  TfLiteRegistration conv_registration = {};
+  conv_registration.prepare = ConvPrepare;
+  conv_registration.invoke = ConvEval;
+
+  base_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &conv_registration);
+
+  TfLiteRegistration add_registration = {};
+  add_registration.prepare = AddPrepare;
+  add_registration.invoke = AddEval;
+
+  base_resolver.AddBuiltin(BuiltinOperator_ADD, &add_registration);
+  BuiltinOpsSet ops_to_replace = {
+      {BuiltinOperator_CONV_2D, /*version*/ 1},
+      {BuiltinOperator_ADD, /*version*/ 1},
+  };
+
+  LoggingOpResolver resolver(ops_to_replace, base_resolver, WrappingInvoke);
+  auto kernel_invoke =
+      resolver.GetWrappedKernelInvoke(BuiltinOperator_CONV_2D, 1);
+  EXPECT_TRUE(kernel_invoke == ConvEval);
+  kernel_invoke = resolver.GetWrappedKernelInvoke(BuiltinOperator_ADD, 1);
+  EXPECT_TRUE(kernel_invoke == AddEval);
+}
+
+TEST(LoggingOpResolverTest, OnlyOpsInReplacementSetAreReplaces) {
+  MutableOpResolver base_resolver;
+  TfLiteRegistration conv_registration = {};
+  conv_registration.prepare = ConvPrepare;
+  conv_registration.invoke = ConvEval;
+
+  base_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &conv_registration);
+
+  TfLiteRegistration add_registration = {};
+  add_registration.prepare = AddPrepare;
+  add_registration.invoke = AddEval;
+
+  base_resolver.AddBuiltin(BuiltinOperator_ADD, &add_registration);
+  // Only replace conv2d
+  BuiltinOpsSet ops_to_replace = {
+      {BuiltinOperator_CONV_2D, /*version*/ 1},
+  };
+
+  LoggingOpResolver resolver(ops_to_replace, base_resolver, WrappingInvoke);
+  auto reg = resolver.FindOp(BuiltinOperator_CONV_2D, 1);
+  EXPECT_EQ(reg->builtin_code, BuiltinOperator_CONV_2D);
+  EXPECT_TRUE(reg->prepare == ConvPrepare);
+  EXPECT_TRUE(reg->invoke == WrappingInvoke);
+
+  reg = resolver.FindOp(BuiltinOperator_ADD, 1);
+  EXPECT_EQ(nullptr, reg);
+}
+
+}  // namespace
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  // On Linux, add: FLAGS_logtostderr = true;
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

diff --git a/tensorflow/lite/tools/optimize/calibration/node_info_delegate.cc b/tensorflow/lite/tools/optimize/calibration/node_info_delegate.cc
new file mode 100644
index 0000000..2b91974
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/node_info_delegate.cc

@@ -0,0 +1,66 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/tools/optimize/calibration/node_info_delegate.h"
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+
+namespace {
+// The prepare function for delegate that forwards the prepare call to the
+// delegate observer in node info delegate params.
+// The function simply calls a delegate observer OnDelegatePrepareMethod.
+TfLiteStatus NodeInfoDelegatePrepare(TfLiteContext* context,
+                                     TfLiteDelegate* delegate) {
+  if (delegate == nullptr) return TfLiteStatus::kTfLiteError;
+
+  NodeInfoDelegateParams* params =
+      reinterpret_cast<NodeInfoDelegateParams*>(delegate->data_);
+  return params->delegate_observer->OnDelegatePrepareCalled(context);
+}
+}  // namespace
+
+TfLiteDelegate CreateNodeInfoDelegate(NodeInfoDelegateParams* params) {
+  return {/*data_ */ params,
+          /* Prepare */ NodeInfoDelegatePrepare,
+          /* CopyFromBufferHandle*/ nullptr,
+          /* CopyToBufferHandle*/ nullptr,
+          /* FreeBufferHandle*/ nullptr};
+}
+
+TfLiteStatus NodeInfoDelegateObserver::OnDelegatePrepareCalled(
+    TfLiteContext* context) {
+  context_ = context;
+  const size_t num_nodes = node_index_opinfo_map_.size();
+  for (size_t node_index = 0; node_index < num_nodes; node_index++) {
+    TfLiteNode* node = nullptr;
+    TfLiteRegistration* reg = nullptr;
+    TF_LITE_ENSURE_STATUS(
+        context->GetNodeAndRegistration(context, node_index, &node, &reg));
+    auto op_info = node_index_opinfo_map_.at(node_index);
+    op_info.registration = reg;
+    node_ptr_opinfo_map_->insert({node, op_info});
+  }
+
+  if (node_ptr_opinfo_map_->size() != node_index_opinfo_map_.size()) {
+    // Something wrong.
+    return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite

diff --git a/tensorflow/lite/tools/optimize/calibration/node_info_delegate.h b/tensorflow/lite/tools/optimize/calibration/node_info_delegate.h
new file mode 100644
index 0000000..56f6141
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/node_info_delegate.h

@@ -0,0 +1,67 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_NODE_INFO_DELEGATE_H_
+#define TENSORFLOW_LITE_TOOLS_OPTIMIZE_NODE_INFO_DELEGATE_H_
+
+#include <unordered_map>
+
+#include "tensorflow/lite/context.h"
+#include "tensorflow/lite/tools/optimize/calibration/calibration_common.h"
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+
+// An interface for delegate observer that can listen to TfLiteDelegate::Prepare
+// calls.
+class DelegateObserver {
+ public:
+  virtual TfLiteStatus OnDelegatePrepareCalled(TfLiteContext* context) = 0;
+  virtual ~DelegateObserver() {}
+};
+
+// The parameters for the node info delegate.
+struct NodeInfoDelegateParams {
+  DelegateObserver* delegate_observer;
+};
+
+// Creates a delegate with the given |params|.
+TfLiteDelegate CreateNodeInfoDelegate(NodeInfoDelegateParams* params);
+
+// A delegate observer that can construct the map from TfLiteNode* ->
+// OperatorInfo.
+class NodeInfoDelegateObserver : public DelegateObserver {
+ public:
+  NodeInfoDelegateObserver(
+      const std::unordered_map<int, OperatorInfo>& node_index_to_op,
+      std::unordered_map<const TfLiteNode*, OperatorInfo>* node_ptr_opinfo_map)
+      : node_index_opinfo_map_(node_index_to_op),
+        node_ptr_opinfo_map_(node_ptr_opinfo_map) {}
+
+  TfLiteStatus OnDelegatePrepareCalled(TfLiteContext* context) override;
+
+  // Returns the context that was used to called the prepare method.
+  const TfLiteContext* GetContext() const { return context_; }
+
+ private:
+  const TfLiteContext* context_ = nullptr;
+  const std::unordered_map<int, OperatorInfo>& node_index_opinfo_map_;
+  std::unordered_map<const TfLiteNode*, OperatorInfo>* node_ptr_opinfo_map_;
+};
+
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_TOOLS_OPTIMIZE_NODE_INFO_DELEGATE_H_

diff --git a/tensorflow/lite/tools/optimize/calibration/node_info_delegate_test.cc b/tensorflow/lite/tools/optimize/calibration/node_info_delegate_test.cc
new file mode 100644
index 0000000..b110174
--- /dev/null
+++ b/tensorflow/lite/tools/optimize/calibration/node_info_delegate_test.cc

@@ -0,0 +1,178 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <unordered_map>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/tools/optimize/calibration/node_info_delegate.h"
+#include "tensorflow/lite/tools/optimize/test_util.h"
+
+namespace {
+tensorflow::string* g_test_model_dir = nullptr;
+}  // namespace
+
+namespace tflite {
+namespace optimize {
+namespace calibration {
+namespace {
+
+std::unique_ptr<FlatBufferModel> ReadModel(const char* model) {
+  auto model_path = tensorflow::io::JoinPath(*g_test_model_dir, model);
+  return FlatBufferModel::BuildFromFile(model_path.c_str());
+}
+
+std::unique_ptr<FlatBufferModel> ReadModel() {
+  return ReadModel(internal::kConvModelWith0Plus10Weights);
+}
+
+class TestDelegateObserver : public DelegateObserver {
+ public:
+  explicit TestDelegateObserver(TfLiteStatus status_to_return)
+      : status_to_return_(status_to_return) {}
+
+  TfLiteStatus OnDelegatePrepareCalled(TfLiteContext* context) override {
+    num_times_called_++;
+    return status_to_return_;
+  }
+  int num_times_called() { return num_times_called_; }
+
+ private:
+  int num_times_called_ = 0;
+  TfLiteStatus status_to_return_;
+};
+
+TEST(NodeInfoDelegateTest, DelegateObserverIsCalled) {
+  TestDelegateObserver observer(kTfLiteOk);
+  NodeInfoDelegateParams params;
+  params.delegate_observer = &observer;
+  auto model = ReadModel();
+  ASSERT_TRUE(model);
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(InterpreterBuilder(*model,
+                               ops::builtin::BuiltinOpResolver{})(&interpreter),
+            kTfLiteOk);
+  ASSERT_TRUE(interpreter);
+  EXPECT_EQ(0, observer.num_times_called());
+  TfLiteDelegate delegate = CreateNodeInfoDelegate(&params);
+
+  auto status = interpreter->ModifyGraphWithDelegate(&delegate);
+  EXPECT_EQ(kTfLiteOk, status);
+  EXPECT_EQ(1, observer.num_times_called());
+}
+
+TEST(NodeInfoDelegateTest, ObserverErrorCausesModifyGraphFailure) {
+  // Observer returns error
+  TestDelegateObserver observer(kTfLiteError);
+  NodeInfoDelegateParams params;
+  params.delegate_observer = &observer;
+  auto model = ReadModel();
+  ASSERT_TRUE(model);
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(InterpreterBuilder(*model,
+                               ops::builtin::BuiltinOpResolver{})(&interpreter),
+            kTfLiteOk);
+  ASSERT_TRUE(interpreter);
+  TfLiteDelegate delegate = CreateNodeInfoDelegate(&params);
+
+  auto status = interpreter->ModifyGraphWithDelegate(&delegate);
+  EXPECT_EQ(kTfLiteError, status);
+}
+
+TEST(NodeInfoDelegateTest, NodeInfoDelegateObserver) {
+  auto model = ReadModel();
+  ASSERT_TRUE(model);
+
+  std::unordered_map<int, OperatorInfo> index_to_opinfo;
+  auto primary_subgraph = model->GetModel()->subgraphs()->Get(0);
+  auto operators = primary_subgraph->operators();
+  auto subgraph_tensors = primary_subgraph->tensors();
+  for (size_t i = 0; i < operators->size(); i++) {
+    OperatorInfo info;
+    auto op_inputs = operators->Get(i)->inputs();
+    auto op_outputs = operators->Get(i)->outputs();
+    info.inputs = std::vector<int>(op_inputs->begin(), op_inputs->end());
+    info.outputs = std::vector<int>(op_outputs->begin(), op_outputs->end());
+    index_to_opinfo[i] = info;
+  }
+
+  std::unordered_map<const TfLiteNode*, OperatorInfo> node_to_opinfo;
+  NodeInfoDelegateObserver observer(index_to_opinfo, &node_to_opinfo);
+  NodeInfoDelegateParams params;
+  params.delegate_observer = &observer;
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(InterpreterBuilder(*model,
+                               ops::builtin::BuiltinOpResolver{})(&interpreter),
+            kTfLiteOk);
+  ASSERT_TRUE(interpreter);
+
+  TfLiteDelegate delegate = CreateNodeInfoDelegate(&params);
+
+  auto status = interpreter->ModifyGraphWithDelegate(&delegate);
+  EXPECT_EQ(kTfLiteOk, status);
+  EXPECT_EQ(index_to_opinfo.size(), node_to_opinfo.size());
+  EXPECT_EQ(interpreter->nodes_size(), node_to_opinfo.size());
+
+  for (const auto& node_and_opinfo : node_to_opinfo) {
+    const TfLiteNode* tflite_node = node_and_opinfo.first;
+    const OperatorInfo& info = node_and_opinfo.second;
+    ASSERT_EQ(tflite_node->inputs->size, info.inputs.size());
+    ASSERT_EQ(tflite_node->outputs->size, info.outputs.size());
+
+    for (size_t input_index = 0; input_index < info.inputs.size();
+         input_index++) {
+      const TfLiteTensor* tflite_tensor =
+          interpreter->tensor(tflite_node->inputs->data[input_index]);
+      EXPECT_EQ(tflite_tensor->name,
+                subgraph_tensors->Get(info.inputs[input_index])->name()->str());
+    }
+
+    for (size_t output_index = 0; output_index < info.outputs.size();
+         output_index++) {
+      const TfLiteTensor* tflite_tensor =
+          interpreter->tensor(tflite_node->outputs->data[output_index]);
+      EXPECT_EQ(
+          tflite_tensor->name,
+          subgraph_tensors->Get(info.outputs[output_index])->name()->str());
+    }
+  }
+}
+
+}  // namespace
+}  // namespace calibration
+}  // namespace optimize
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  tensorflow::string model_file;
+  const std::vector<tensorflow::Flag> flag_list = {
+      tensorflow::Flag("test_model_file", &model_file,
+                       "Path to test tflite model file."),
+  };
+
+  const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
+  if (!parse_result) {
+    std::cerr << "Required test_model_file\n";
+    std::abort();
+  }
+  g_test_model_dir =
+      new tensorflow::string(tensorflow::io::Dirname(model_file));
+  ::tensorflow::port::InitMain(argv[0], &argc, &argv);
+  return RUN_ALL_TESTS();
+}

diff --git a/tensorflow/lite/tools/optimize/calibration_reader.cc b/tensorflow/lite/tools/optimize/calibration_reader.cc
deleted file mode 100644
index b01a62b..0000000
--- a/tensorflow/lite/tools/optimize/calibration_reader.cc
+++ /dev/null

@@ -1,55 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/tools/optimize/calibration_reader.h"
-
-#include "absl/memory/memory.h"
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-TfLiteStatus CalibrationReader::GetTensorStatsAsMap(
-    std::unordered_map<int, CalibrationStats>* tensor_id_to_stats_map) const {
-  tensor_id_to_stats_map->clear();
-  for (const auto& tensorid_stat : logger_->GetCalibrationValues()) {
-    auto minmax = tensorid_stat.second;
-    CalibrationReader::CalibrationStats stats;
-    TF_LITE_ENSURE_STATUS(minmax.Get(&stats.min, &stats.max));
-    tensor_id_to_stats_map->insert({tensorid_stat.first, stats});
-  }
-
-  return kTfLiteOk;
-}
-
-TfLiteStatus CalibrationReader::AddCalibrationToModel(ModelT* model) const {
-  if (!model || model->subgraphs.empty()) {
-    return kTfLiteError;
-  }
-  const auto& subgraph = model->subgraphs[0];
-  for (const auto& tensorid_stat : logger_->GetCalibrationValues()) {
-    auto minmax = tensorid_stat.second;
-    float min, max;
-    TF_LITE_ENSURE_STATUS(minmax.Get(&min, &max));
-    auto quant_params = absl::make_unique<tflite::QuantizationParametersT>();
-    quant_params->min.push_back(min);
-    quant_params->max.push_back(max);
-    subgraph->tensors[tensorid_stat.first]->quantization =
-        std::move(quant_params);
-  }
-
-  return kTfLiteOk;
-}
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite

diff --git a/tensorflow/lite/tools/optimize/calibration_reader.h b/tensorflow/lite/tools/optimize/calibration_reader.h
deleted file mode 100644
index af0da1b..0000000
--- a/tensorflow/lite/tools/optimize/calibration_reader.h
+++ /dev/null

@@ -1,55 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATION_READER_H_
-#define TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATION_READER_H_
-
-#include <unordered_map>
-
-#include "tensorflow/lite/context.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/tools/optimize/calibration_logger.h"
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-// Warning: This is not a public API and subject to change.
-//
-// Reads calibrator data collected by running the interpreter through
-// a calibration set.
-class CalibrationReader {
- public:
-  struct CalibrationStats {
-    float min;
-    float max;
-  };
-  explicit CalibrationReader(const Logger* logger) : logger_(logger) {}
-
-  // Gets a map from tensor index to recorded calibration values.
-  virtual TfLiteStatus GetTensorStatsAsMap(
-      std::unordered_map<int, CalibrationStats>* tensor_id_to_stats_map) const;
-
-  // Annotates the tensors in the given model with statistics captured during
-  // calibration.
-  virtual TfLiteStatus AddCalibrationToModel(ModelT* model) const;
-
-  virtual ~CalibrationReader() {}
-
- private:
-  const Logger* logger_;
-};
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATION_READER_H_

diff --git a/tensorflow/lite/tools/optimize/calibrator.cc b/tensorflow/lite/tools/optimize/calibrator.cc
deleted file mode 100644
index ccb55c3..0000000
--- a/tensorflow/lite/tools/optimize/calibrator.cc
+++ /dev/null

@@ -1,347 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/tools/optimize/calibrator.h"
-
-#include <fstream>
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-
-#include "absl/memory/memory.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/core/api/op_resolver.h"
-#include "tensorflow/lite/interpreter.h"
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/op_resolver.h"
-#include "tensorflow/lite/schema/schema_generated.h"
-#include "tensorflow/lite/string_util.h"
-#include "tensorflow/lite/tools/optimize/calibration_common.h"
-#include "tensorflow/lite/tools/optimize/calibration_logger.h"
-#include "tensorflow/lite/tools/optimize/calibration_reader.h"
-#include "tensorflow/lite/tools/optimize/logging_op_resolver.h"
-#include "tensorflow/lite/tools/optimize/node_info_delegate.h"
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-
-namespace {
-
-// Calibrator is used to hold information that can be accessed during kernel
-// invocations.
-// TfLite kernel invocations are C functions and cannot look at the global
-// structure of the graph. Calibrator allows the kernel invoke functions to
-// access the global structure of graph and know which node is currently being
-// executed. This also allows us to write a simple kernel invoke wrapper
-// (see LoggingEval) that can work for most builtin ops.
-class Calibrator {
- public:
-  Calibrator(const std::unordered_map<const TfLiteNode*, OperatorInfo>&
-                 node_ptr_opinfo_map,
-             std::unique_ptr<LoggingOpResolver> logging_op_resolver)
-      : node_ptr_opinfo_map_(node_ptr_opinfo_map),
-        logging_op_resolver_(std::move(logging_op_resolver)) {
-    logger_ = absl::make_unique<Logger>();
-  }
-
-  // Returns the wrapped kernel invoke function |TfLiteRegistration.invoke|.
-  KernelEvalFuncPtr GetKernelInvoke(const TfLiteNode* node) const;
-
-  // Gets the instance of logger associated with the current context.
-  Logger* GetLogger() const { return logger_.get(); }
-
-  // Gets the operator information about the given TfLiteNode.
-  const OperatorInfo& GetOpInfo(const TfLiteNode* node) const {
-    return node_ptr_opinfo_map_.at(node);
-  }
-
- private:
-  std::unordered_map<const TfLiteNode*, OperatorInfo> node_ptr_opinfo_map_;
-  std::unique_ptr<LoggingOpResolver> logging_op_resolver_;
-  const std::unordered_map<int, OperatorInfo> index_opinfo_;
-  std::unique_ptr<Logger> logger_;
-};
-
-KernelEvalFuncPtr Calibrator::GetKernelInvoke(const TfLiteNode* node) const {
-  auto op_info = node_ptr_opinfo_map_.at(node);
-  return logging_op_resolver_->GetWrappedKernelInvoke(op_info.builtin_op_code,
-                                                      1);
-}
-
-// A registry of |Calibrator| objects per |TfLiteContext|.
-// This global registry is needed to access |Calibrator| objects in the kernel
-// invoke functions i.e. |TfLiteRegistration.invoke|.
-// Kernel invoke functions are C functions that have limited access to
-// |TfLiteContext|. Kernel invoke functions don't have access to global state of
-// graph. That means during a kernel invocation, the function cannot know which
-// node it was invoked for. E.g. in case of a model with |Conv| op at two
-// locations, there is no easy way for the Conv.invoke function to disambiguate
-// the calls.
-//
-// For calibration we solve this problem by creating a map of calibrators
-// per |TfLiteContext|. This map is |GlobalCalibrationRegistry|.
-//
-// This registry is then accessed using a global getter function:
-// |GetCalibratorRegistry|.
-// E.g.
-// TfLiteStatus SomeKernelInvokeFn(TfLiteContext* context, TfLiteNode* node) {
-//   .... code ....
-//   auto registry = GetCalibratorRegistry();
-//   auto calibrator = registry->GetCalibrator(context);
-//   ..... code ....
-//  }
-//
-// This way the kernel invoke functions can get the access to the Calibrator
-// object associated with the |TfLiteContext|.
-class GlobalCalibratorRegistry {
- public:
-  // Get the |Calibrator| associated with given context, returns null if no
-  // calibrator is associated with the given context.
-  Calibrator* GetCalibrator(const TfLiteContext* context) const {
-    if (calibrator_registry_.find(context) == calibrator_registry_.cend()) {
-      return nullptr;
-    }
-    return calibrator_registry_.at(context).get();
-  }
-
-  // Removes the association between calibrator and context.
-  // Note: This deletes the calibrator as well.
-  void RemoveCalibrator(const TfLiteContext* context) {
-    calibrator_registry_.erase(context);
-  }
-
-  // Creates an instance of |Calibrator|.
-  // Registry owns the |Calibrator| object which can be deleted by calling
-  // |RemoveCalibrator|.
-  TfLiteStatus CreateCalibrator(
-      const TfLiteContext* context,
-      const std::unordered_map<const TfLiteNode*, OperatorInfo>& node_to_opinfo,
-      std::unique_ptr<LoggingOpResolver> logging_op_resolver,
-      Calibrator** calibrator_ptr, ErrorReporter* reporter) {
-    if (calibrator_registry_.find(context) != calibrator_registry_.cend()) {
-      reporter->Report(
-          "Failed to create calibrator, context already registered.");
-      return kTfLiteError;
-    }
-    std::unique_ptr<Calibrator> calibrator = absl::make_unique<Calibrator>(
-        node_to_opinfo, std::move(logging_op_resolver));
-    calibrator_registry_[context] = std::move(calibrator);
-    *calibrator_ptr = calibrator_registry_.at(context).get();
-    return kTfLiteOk;
-  }
-
- private:
-  std::unordered_map<const TfLiteContext*, std::unique_ptr<Calibrator>>
-      calibrator_registry_;
-};
-
-GlobalCalibratorRegistry* GetCalibratorRegistry() {
-  static GlobalCalibratorRegistry* registry = new GlobalCalibratorRegistry();
-  return registry;
-}
-
-// A wrapper implementation for |TfLiteRegistration.invoke| that logs inputs,
-// invokes the wrapped implementation and then logs the outputs.
-TfLiteStatus LoggingEval(TfLiteContext* context, TfLiteNode* node) {
-  Calibrator* calibrator = GetCalibratorRegistry()->GetCalibrator(context);
-
-  if (!calibrator) {
-    context->ReportError(context, "No calibrator found for context.");
-    return kTfLiteError;
-  }
-
-  auto kernel_invoke = calibrator->GetKernelInvoke(node);
-  auto logger = calibrator->GetLogger();
-  auto op_info = calibrator->GetOpInfo(node);
-
-  for (int i : op_info.loggable_inputs) {
-    auto tensor = context->tensors[i];
-    logger->LogTensorValue(i, tensor.data.f, tensor.bytes / sizeof(float));
-  }
-
-  auto status = kernel_invoke(context, node);
-  // TODO(shashishekhar): An intermediate tensor in graph will get logged twice
-  // once as an input and second time as output. This doesn't change the min max
-  // values but is inefficient.
-  // Using moving average will also break this.
-
-  for (int i : op_info.loggable_outputs) {
-    auto tensor = context->tensors[i];
-    logger->LogTensorValue(i, tensor.data.f, tensor.bytes / sizeof(float));
-  }
-
-  return status;
-}
-
-// Returns the loggable tensors. Not all inputs and outputs need to be logged.
-// For example, const weight tensors which have buffers associated with them
-// don't need to be logged.
-std::vector<int> GetLoggableTensorIndices(
-    const std::vector<int>& tensor_indices,
-    const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors,
-    const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* tensor_buffers) {
-  std::vector<int> loggable;
-  for (auto tensor_index : tensor_indices) {
-    auto tensor = tensors->Get(tensor_index);
-    auto buffer_index = tensor->buffer();
-    const bool has_no_buffer =
-        (tensor_buffers->Get(buffer_index) == nullptr) ||
-        (tensor_buffers->Get(buffer_index)->data() == nullptr) ||
-        (tensor_buffers->Get(buffer_index)->data()->size() == 0);
-    if (has_no_buffer && tensor->type() == tflite::TensorType_FLOAT32) {
-      loggable.push_back(tensor_index);
-    }
-  }
-  return loggable;
-}
-
-// Creates a mapping between the static model graph and the runtime TfLiteNode*
-// nodes in the graph for the given context.
-// This is done by querying the TfLiteContext for node and registrations using
-// the |NodeInfoDelegateObserver|.
-TfLiteStatus GetNodeOpInfoMapAndContext(
-    const std::unordered_map<int, OperatorInfo>& node_to_opinfo,
-    tflite::Interpreter* const interpreter,
-    std::unordered_map<const TfLiteNode*, OperatorInfo>* node_ptr_opinfo_map,
-    const TfLiteContext** context
-
-) {
-  NodeInfoDelegateObserver delegate_observer(node_to_opinfo,
-                                             node_ptr_opinfo_map);
-  NodeInfoDelegateParams delegate_params;
-  delegate_params.delegate_observer = &delegate_observer;
-  TfLiteDelegate logging_delegate = CreateNodeInfoDelegate(&delegate_params);
-
-  auto modify_status = interpreter->ModifyGraphWithDelegate(&logging_delegate);
-  if (modify_status != kTfLiteOk) {
-    return kTfLiteError;
-  }
-  *context = delegate_observer.GetContext();
-  return kTfLiteOk;
-}
-
-string GetOpName(const tflite::OperatorCode& opcode) {
-  if (opcode.custom_code() != nullptr) {
-    return opcode.custom_code()->str();
-  }
-  return tflite::EnumNamesBuiltinOperator()[opcode.builtin_code()];
-}
-
-// A |CalibrationReader| that owns the Calibrator.
-class Reader : public CalibrationReader {
- public:
-  Reader(const TfLiteContext* context, const Logger* logger)
-      : CalibrationReader(logger), context_(context) {}
-
-  ~Reader() override { GetCalibratorRegistry()->RemoveCalibrator(context_); }
-
- private:
-  const TfLiteContext* context_;
-};
-
-}  // namespace
-
-TfLiteStatus BuildLoggingInterpreter(
-    const FlatBufferModel& model, const OpResolver& op_resolver,
-    std::unique_ptr<Interpreter>* interpreter,
-    std::unique_ptr<CalibrationReader>* calibration_reader) {
-  auto tflite_model = model.GetModel();
-  auto subgraphs = tflite_model->subgraphs();
-  auto tensor_buffers = tflite_model->buffers();
-
-  if (subgraphs->size() != 1) {
-    model.error_reporter()->Report(
-        "Only models with a single subgraph are supported, model had %d "
-        "subgraphs",
-        subgraphs->size());
-    return kTfLiteError;
-  }
-
-  // Populate the node index to operator info map.
-  // We want to collect this information so we can use it during runtime to
-  // log details of which inputs and outputs.
-  // At runtime TFLite kernel invoke functions can only look into their
-  // own node in the graph (TFLiteNode*) and some limited context information.
-  auto primary_subgraph = subgraphs->Get(0);
-  auto operator_codes = tflite_model->operator_codes();
-  auto operators = primary_subgraph->operators();
-  auto tensors = primary_subgraph->tensors();
-  std::unordered_map<int, OperatorInfo> node_to_opinfo;
-  BuiltinOpsSet op_and_versions;
-
-  for (size_t i = 0; i < operators->size(); i++) {
-    OperatorInfo op_info;
-    op_info.node_index = i;
-    auto op = operators->Get(i);
-    auto operator_code = operator_codes->Get(op->opcode_index());
-    op_info.builtin_op_code = operator_code->builtin_code();
-    op_info.name = GetOpName(*operator_code);
-    op_info.is_custom_op = operator_code->custom_code() != nullptr;
-
-    auto op_inputs = op->inputs();
-    auto op_outputs = op->outputs();
-    op_info.inputs = std::vector<int>(op_inputs->begin(), op_inputs->end());
-    op_info.outputs = std::vector<int>(op_outputs->begin(), op_outputs->end());
-    op_info.loggable_inputs =
-        GetLoggableTensorIndices(op_info.inputs, tensors, tensor_buffers);
-    op_info.loggable_outputs =
-        GetLoggableTensorIndices(op_info.outputs, tensors, tensor_buffers);
-    if (!op_info.is_custom_op) {
-      op_info.registration = op_resolver.FindOp(operator_code->builtin_code(),
-                                                operator_code->version());
-    } else {
-      op_info.registration =
-          op_resolver.FindOp(op_info.name.c_str(), operator_code->version());
-    }
-    node_to_opinfo[i] = op_info;
-    op_and_versions.insert({op_info.builtin_op_code, operator_code->version()});
-  }
-
-  // Prepare the logging op resolver to use |LoggingEval| for kernel
-  // invocations.
-  auto logging_op_resolver = absl::make_unique<LoggingOpResolver>(
-      op_and_versions, op_resolver, LoggingEval);
-  tflite::InterpreterBuilder(model, *logging_op_resolver)(interpreter);
-
-  if (!(*interpreter)) {
-    model.error_reporter()->Report("Failed to construct interpreter");
-    return kTfLiteError;
-  }
-
-  // Compute the mapping between runtime and static graph structure, i.e.
-  // (TfLiteContext, TfLiteNode) -> OperatorInfo
-  std::unordered_map<const TfLiteNode*, OperatorInfo> node_ptr_opinfo_map;
-  const TfLiteContext* context = nullptr;
-  GetNodeOpInfoMapAndContext(node_to_opinfo, interpreter->get(),
-                             &node_ptr_opinfo_map, &context);
-
-  Calibrator* calibrator = nullptr;
-  // Register a calibrator object for the context. This can be accessed
-  // during invocations by the logging kernels.
-  TF_LITE_ENSURE_STATUS(GetCalibratorRegistry()->CreateCalibrator(
-      context, node_ptr_opinfo_map, std::move(logging_op_resolver), &calibrator,
-      model.error_reporter()));
-  *calibration_reader = std::unique_ptr<CalibrationReader>(
-      new Reader(context, calibrator->GetLogger()));
-
-  return kTfLiteOk;
-}
-
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite

diff --git a/tensorflow/lite/tools/optimize/calibrator.h b/tensorflow/lite/tools/optimize/calibrator.h
deleted file mode 100644
index ab3cb27..0000000
--- a/tensorflow/lite/tools/optimize/calibrator.h
+++ /dev/null

@@ -1,64 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATOR_H_
-#define TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATOR_H_
-
-#include <unordered_map>
-
-#include "flatbuffers/flatbuffers.h"  // TF:flatbuffers
-#include "tensorflow/lite/core/api/op_resolver.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/tools/optimize/calibration_reader.h"
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-
-// Warning: This is not a public API and subject to change.
-
-// Builds a interpreter that logs the calibration data in memory.
-// The calibration data can be recovered using |calibration_reader|.
-//
-// Sample usage:
-// std::unique_ptr<Interpreter> interpreter;
-// std::unique_ptr<CalibrationReader> calibration_reader;
-// BuiltinOpResolver resolver = ...
-// FlatBufferModel model = ..
-//
-// BuildLoggingInterpreter(model, resolver, &interpreter,
-//  &calibration_reader);
-//
-//
-// * Allocate tensors...
-// * Call interpreter->invoke on calibration dataset.
-//
-// Calibration data can be read either directly by calling
-// std::unordered_map<int,  CalibrationStats>> tensor_index_to_stats;
-// calibration_reader->GetTensorStatsAsMap(&tensor_index_to_stats);
-//
-// or adding calibration data to model itself.
-// ModelT * original_floating_point_model = ...
-// calibration_reader->AddCalibrationToModel(original_floating_point_model);
-//
-TfLiteStatus BuildLoggingInterpreter(
-    const FlatBufferModel& model, const OpResolver& op_resolver,
-    std::unique_ptr<Interpreter>* interpreter,
-    std::unique_ptr<CalibrationReader>* calibration_reader);
-
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_TOOLS_OPTIMIZE_CALIBRATOR_H_

diff --git a/tensorflow/lite/tools/optimize/calibrator_test.cc b/tensorflow/lite/tools/optimize/calibrator_test.cc
deleted file mode 100644
index 1a415ad..0000000
--- a/tensorflow/lite/tools/optimize/calibrator_test.cc
+++ /dev/null

@@ -1,212 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <cstring>
-
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/init_main.h"
-#include "tensorflow/core/util/command_line_flags.h"
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/tools/optimize/calibrator.h"
-
-namespace {
-tensorflow::string* g_test_model_file = nullptr;
-}  // namespace
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-namespace {
-
-std::unique_ptr<FlatBufferModel> ReadModel() {
-  if (g_test_model_file) {
-    return FlatBufferModel::BuildFromFile(g_test_model_file->c_str());
-  }
-  return nullptr;
-}
-
-TEST(CalibratorTest, CalibrationStatsAreCollected) {
-  auto model = ReadModel();
-  ASSERT_TRUE(model);
-  std::unique_ptr<Interpreter> interpreter;
-  std::unique_ptr<CalibrationReader> reader;
-  auto status = BuildLoggingInterpreter(
-      *model, ops::builtin::BuiltinOpResolver{}, &interpreter, &reader);
-  EXPECT_EQ(kTfLiteOk, status);
-
-  ASSERT_TRUE(interpreter);
-  ASSERT_TRUE(reader);
-  std::unordered_map<int, CalibrationReader::CalibrationStats> stats;
-  status = reader->GetTensorStatsAsMap(&stats);
-  EXPECT_EQ(kTfLiteOk, status);
-  EXPECT_TRUE(stats.empty());
-
-  status = interpreter->AllocateTensors();
-  ASSERT_EQ(kTfLiteOk, status);
-  // Model does the following:
-  // 0        1       2        3
-  // |        |__ ____|        |
-  // |           |             |
-  // |          Add(tensor:4)  |
-  // |____ ______|______ ______|
-  //      |             |
-  //      Add          Add
-  //      |             |
-  //    Output:5      Output:6
-
-  const size_t tensor_size = 1 * 8 * 8 * 3;
-
-  std::vector<float> ones(tensor_size, 1.0f);
-  // Fill input tensor i with i+1, i.e. input[0] = 1.0f, input[1] = 2.0f,
-  // input[2] = 3.0f
-
-  for (size_t i = 0; i < interpreter->inputs().size(); i++) {
-    int input_tensor_idx = interpreter->inputs()[i];
-    TfLiteTensor* tensor = interpreter->tensor(input_tensor_idx);
-    ASSERT_EQ(tensor->bytes, tensor_size * sizeof(float));
-    for (size_t j = 0; j < tensor_size; j++) {
-      tensor->data.f[j] = i + 1;
-    }
-  }
-  status = interpreter->Invoke();
-  ASSERT_EQ(kTfLiteOk, status);
-  const float eps = 1e-6f;
-  // Verify that tensor 5: is 6
-  // Verify that tensor 6: is 9
-  TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]);
-  for (size_t i = 0; i < tensor_size; i++) {
-    EXPECT_NEAR(tensor->data.f[i], 6.0f, eps);
-  }
-  tensor = interpreter->tensor(interpreter->outputs()[1]);
-  for (size_t i = 0; i < tensor_size; i++) {
-    EXPECT_NEAR(tensor->data.f[i], 9.0f, eps);
-  }
-
-  // Verify that min max of tensors.
-  status = reader->GetTensorStatsAsMap(&stats);
-  EXPECT_EQ(kTfLiteOk, status);
-  EXPECT_EQ(7, stats.size());
-  // Check inputs
-  for (int tensor_idx = 0; tensor_idx < 4; tensor_idx++) {
-    EXPECT_NEAR(stats.at(tensor_idx).min, tensor_idx + 1, eps);
-    EXPECT_NEAR(stats.at(tensor_idx).max, tensor_idx + 1, eps);
-  }
-  // Check tensor 4 max.
-  EXPECT_NEAR(stats.at(4).min, 5, eps);
-  EXPECT_NEAR(stats.at(4).max, 5, eps);
-
-  // Check outputs
-  EXPECT_NEAR(stats.at(5).min, 6, eps);
-  EXPECT_NEAR(stats.at(5).max, 6, eps);
-
-  EXPECT_NEAR(stats.at(6).min, 9, eps);
-  EXPECT_NEAR(stats.at(6).max, 9, eps);
-}
-
-TEST(CalibratorTest, MultipleInvokes) {
-  auto model = ReadModel();
-  ASSERT_TRUE(model);
-  std::unique_ptr<Interpreter> interpreter;
-  std::unique_ptr<CalibrationReader> reader;
-  auto status = BuildLoggingInterpreter(
-      *model, ops::builtin::BuiltinOpResolver{}, &interpreter, &reader);
-  EXPECT_EQ(kTfLiteOk, status);
-
-  ASSERT_TRUE(interpreter);
-  ASSERT_TRUE(reader);
-  status = interpreter->AllocateTensors();
-
-  EXPECT_EQ(kTfLiteOk, status);
-  const size_t tensor_size = 1 * 8 * 8 * 3;
-  // Fill input tensor i with i+1, i.e. input[0] = 1.0f, input[1] = 2.0f,
-  // input[2] = 3.0f
-
-  for (size_t i = 0; i < interpreter->inputs().size(); i++) {
-    int input_tensor_idx = interpreter->inputs()[i];
-    TfLiteTensor* tensor = interpreter->tensor(input_tensor_idx);
-    ASSERT_EQ(tensor->bytes, tensor_size * sizeof(float));
-    for (size_t j = 0; j < tensor_size; j++) {
-      tensor->data.f[j] = i + 1;
-    }
-  }
-  status = interpreter->Invoke();
-  ASSERT_EQ(kTfLiteOk, status);
-  const float eps = 1e-6f;
-  // Verify that min max of tensors.
-  std::unordered_map<int, CalibrationReader::CalibrationStats> stats;
-  status = reader->GetTensorStatsAsMap(&stats);
-  EXPECT_EQ(kTfLiteOk, status);
-  EXPECT_EQ(7, stats.size());
-  const float expected_values[7] = {
-      1.0f,  // input 0
-      2.0f,  // input 1
-      3.0f,  // input 2
-      4.0f,  // input 3
-      5.0f,  // Add(1, 2)
-      6.0f,  // Output 5: Add(0, Add(1,2))
-      9.0f,  // Output 6: Add(Add(1,2), 3)
-  };
-  for (int tensor_idx = 0; tensor_idx < 7; tensor_idx++) {
-    EXPECT_NEAR(stats.at(tensor_idx).min, expected_values[tensor_idx], eps);
-    EXPECT_NEAR(stats.at(tensor_idx).max, expected_values[tensor_idx], eps);
-  }
-  // Set input[0][0] = 1.5 and input[0][1] = 0.5 this should change the values
-  // only for input[0] and tensor 4 and ouputs 5, 6.
-  TfLiteTensor* input0 = interpreter->tensor(0);
-  input0->data.f[0] = 1.5f;
-  input0->data.f[1] = 0.5f;
-  status = interpreter->Invoke();
-  ASSERT_EQ(kTfLiteOk, status);
-  status = reader->GetTensorStatsAsMap(&stats);
-  EXPECT_EQ(kTfLiteOk, status);
-  EXPECT_EQ(7, stats.size());
-  EXPECT_NEAR(stats.at(0).min, 0.5f, eps);
-  EXPECT_NEAR(stats.at(0).max, 1.5f, eps);
-
-  for (int tensor_idx = 1; tensor_idx < 5; tensor_idx++) {
-    EXPECT_NEAR(stats.at(tensor_idx).min, expected_values[tensor_idx], eps);
-    EXPECT_NEAR(stats.at(tensor_idx).max, expected_values[tensor_idx], eps);
-  }
-
-  EXPECT_NEAR(stats.at(5).min, 5.5f, eps);
-  EXPECT_NEAR(stats.at(5).max, 6.5f, eps);
-
-  EXPECT_NEAR(stats.at(6).min, 9.0f, eps);
-  EXPECT_NEAR(stats.at(6).max, 9.0f, eps);
-}
-
-}  // namespace
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite
-
-int main(int argc, char** argv) {
-  tensorflow::string model_file;
-  const std::vector<tensorflow::Flag> flag_list = {
-      tensorflow::Flag("test_model_file", &model_file,
-                       "Path to test tflite model file."),
-  };
-
-  const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
-  if (!parse_result) {
-    std::cerr << "Required test_model_file\n";
-    std::abort();
-  }
-  g_test_model_file = new tensorflow::string(model_file);
-  ::tensorflow::port::InitMain(argv[0], &argc, &argv);
-  return RUN_ALL_TESTS();
-}

diff --git a/tensorflow/lite/tools/optimize/logging_op_resolver.cc b/tensorflow/lite/tools/optimize/logging_op_resolver.cc
deleted file mode 100644
index 7633ebb..0000000
--- a/tensorflow/lite/tools/optimize/logging_op_resolver.cc
+++ /dev/null

@@ -1,61 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/tools/optimize/logging_op_resolver.h"
-
-#include "absl/memory/memory.h"
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-
-LoggingOpResolver::LoggingOpResolver(const BuiltinOpsSet& ops_to_replace,
-                                     const OpResolver& base_resolver,
-                                     KernelEvalFuncPtr logging_eval_fn) {
-  for (const auto& op_and_version : ops_to_replace) {
-    const TfLiteRegistration* base_registration =
-        base_resolver.FindOp(op_and_version.first, op_and_version.second);
-    BuiltinOperatorKey key = op_and_version;
-    builtin_op_evalfn_map_[key] = base_registration->invoke;
-    std::unique_ptr<TfLiteRegistration> logging_registation =
-        absl::make_unique<TfLiteRegistration>(*base_registration);
-    logging_registation->invoke = logging_eval_fn;
-    builtin_op_registration_map_[key] = std::move(logging_registation);
-  }
-}
-
-const TfLiteRegistration* LoggingOpResolver::FindOp(BuiltinOperator op,
-                                                    int version) const {
-  BuiltinOperatorKey key = {op, version};
-  if (builtin_op_registration_map_.find(key) !=
-      builtin_op_registration_map_.end()) {
-    return builtin_op_registration_map_.at(key).get();
-  }
-
-  return nullptr;
-}
-
-KernelEvalFuncPtr LoggingOpResolver::GetWrappedKernelInvoke(BuiltinOperator op,
-                                                            int version) const {
-  return builtin_op_evalfn_map_.at({op, version});
-}
-
-const TfLiteRegistration* LoggingOpResolver::FindOp(const char* op,
-                                                    int version) const {
-  // TODO(b/121374947): Support custom ops as well.
-  return nullptr;
-}
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite

diff --git a/tensorflow/lite/tools/optimize/logging_op_resolver.h b/tensorflow/lite/tools/optimize/logging_op_resolver.h
deleted file mode 100644
index 58a3a0f..0000000
--- a/tensorflow/lite/tools/optimize/logging_op_resolver.h
+++ /dev/null

@@ -1,59 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_LOGGING_OP_RESOLVER_H_
-#define TENSORFLOW_LITE_TOOLS_OPTIMIZE_LOGGING_OP_RESOLVER_H_
-
-#include <set>
-#include <unordered_map>
-
-#include "tensorflow/lite/core/api/op_resolver.h"
-#include "tensorflow/lite/mutable_op_resolver.h"
-#include "tensorflow/lite/op_resolver.h"
-#include "tensorflow/lite/tools/optimize/calibration_common.h"
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-// A resolver that replaces the kernel invocations with a wrapper
-// eval function.
-class LoggingOpResolver : public OpResolver {
- public:
-  // Creates an instance of |LoggingOpResolver|.
-  // All |TfLiteRegistration.invoke| functions are replaced by
-  // |logging_eval_fn|.
-  // TODO(shashishekhar): This interface needs to change for custom ops and
-  // BuiltinOps that need special logging implementations.
-  LoggingOpResolver(const BuiltinOpsSet& ops_to_replace,
-                    const OpResolver& base_resolver,
-                    KernelEvalFuncPtr logging_eval_fn);
-
-  const TfLiteRegistration* FindOp(BuiltinOperator op,
-                                   int version) const override;
-
-  KernelEvalFuncPtr GetWrappedKernelInvoke(BuiltinOperator op,
-                                           int version) const;
-  const TfLiteRegistration* FindOp(const char* op, int version) const override;
-
- private:
-  BuiltinOpsMap<std::unique_ptr<TfLiteRegistration>>
-      builtin_op_registration_map_;
-  BuiltinOpsMap<KernelEvalFuncPtr> builtin_op_evalfn_map_;
-};
-
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_TOOLS_OPTIMIZE_LOGGING_OP_RESOLVER_H_

diff --git a/tensorflow/lite/tools/optimize/logging_op_resolver_test.cc b/tensorflow/lite/tools/optimize/logging_op_resolver_test.cc
deleted file mode 100644
index c7fe2d3..0000000
--- a/tensorflow/lite/tools/optimize/logging_op_resolver_test.cc
+++ /dev/null

@@ -1,141 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/tools/optimize/logging_op_resolver.h"
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
-#include "tensorflow/lite/mutable_op_resolver.h"
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-namespace {
-
-TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
-  return kTfLiteOk;
-}
-
-TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
-  return kTfLiteOk;
-}
-
-TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) {
-  return kTfLiteOk;
-}
-
-TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
-  return kTfLiteOk;
-}
-
-TfLiteStatus WrappingInvoke(TfLiteContext* context, TfLiteNode* node) {
-  return kTfLiteOk;
-}
-
-TEST(LoggingOpResolverTest, KernelInvokesAreReplaced) {
-  MutableOpResolver base_resolver;
-  TfLiteRegistration conv_registration = {};
-  conv_registration.prepare = ConvPrepare;
-  conv_registration.invoke = ConvEval;
-
-  base_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &conv_registration);
-
-  TfLiteRegistration add_registration = {};
-  add_registration.prepare = AddPrepare;
-  add_registration.invoke = AddEval;
-
-  base_resolver.AddBuiltin(BuiltinOperator_ADD, &add_registration);
-  BuiltinOpsSet ops_to_replace = {
-      {BuiltinOperator_CONV_2D, /*version*/ 1},
-      {BuiltinOperator_ADD, /*version*/ 1},
-  };
-
-  LoggingOpResolver resolver(ops_to_replace, base_resolver, WrappingInvoke);
-
-  auto reg = resolver.FindOp(BuiltinOperator_CONV_2D, 1);
-
-  EXPECT_EQ(reg->builtin_code, BuiltinOperator_CONV_2D);
-  EXPECT_TRUE(reg->prepare == ConvPrepare);
-  EXPECT_TRUE(reg->invoke == WrappingInvoke);
-
-  reg = resolver.FindOp(BuiltinOperator_ADD, 1);
-
-  EXPECT_EQ(reg->builtin_code, BuiltinOperator_ADD);
-  EXPECT_TRUE(reg->prepare == AddPrepare);
-  EXPECT_TRUE(reg->invoke == WrappingInvoke);
-}
-
-TEST(LoggingOpResolverTest, OriginalKernelInvokesAreRetained) {
-  MutableOpResolver base_resolver;
-  TfLiteRegistration conv_registration = {};
-  conv_registration.prepare = ConvPrepare;
-  conv_registration.invoke = ConvEval;
-
-  base_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &conv_registration);
-
-  TfLiteRegistration add_registration = {};
-  add_registration.prepare = AddPrepare;
-  add_registration.invoke = AddEval;
-
-  base_resolver.AddBuiltin(BuiltinOperator_ADD, &add_registration);
-  BuiltinOpsSet ops_to_replace = {
-      {BuiltinOperator_CONV_2D, /*version*/ 1},
-      {BuiltinOperator_ADD, /*version*/ 1},
-  };
-
-  LoggingOpResolver resolver(ops_to_replace, base_resolver, WrappingInvoke);
-  auto kernel_invoke =
-      resolver.GetWrappedKernelInvoke(BuiltinOperator_CONV_2D, 1);
-  EXPECT_TRUE(kernel_invoke == ConvEval);
-  kernel_invoke = resolver.GetWrappedKernelInvoke(BuiltinOperator_ADD, 1);
-  EXPECT_TRUE(kernel_invoke == AddEval);
-}
-
-TEST(LoggingOpResolverTest, OnlyOpsInReplacementSetAreReplaces) {
-  MutableOpResolver base_resolver;
-  TfLiteRegistration conv_registration = {};
-  conv_registration.prepare = ConvPrepare;
-  conv_registration.invoke = ConvEval;
-
-  base_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &conv_registration);
-
-  TfLiteRegistration add_registration = {};
-  add_registration.prepare = AddPrepare;
-  add_registration.invoke = AddEval;
-
-  base_resolver.AddBuiltin(BuiltinOperator_ADD, &add_registration);
-  // Only replace conv2d
-  BuiltinOpsSet ops_to_replace = {
-      {BuiltinOperator_CONV_2D, /*version*/ 1},
-  };
-
-  LoggingOpResolver resolver(ops_to_replace, base_resolver, WrappingInvoke);
-  auto reg = resolver.FindOp(BuiltinOperator_CONV_2D, 1);
-  EXPECT_EQ(reg->builtin_code, BuiltinOperator_CONV_2D);
-  EXPECT_TRUE(reg->prepare == ConvPrepare);
-  EXPECT_TRUE(reg->invoke == WrappingInvoke);
-
-  reg = resolver.FindOp(BuiltinOperator_ADD, 1);
-  EXPECT_EQ(nullptr, reg);
-}
-
-}  // namespace
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite
-
-int main(int argc, char** argv) {
-  // On Linux, add: FLAGS_logtostderr = true;
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}

diff --git a/tensorflow/lite/tools/optimize/node_info_delegate.cc b/tensorflow/lite/tools/optimize/node_info_delegate.cc
deleted file mode 100644
index ccaa693..0000000
--- a/tensorflow/lite/tools/optimize/node_info_delegate.cc
+++ /dev/null

@@ -1,66 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/tools/optimize/node_info_delegate.h"
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-
-namespace {
-// The prepare function for delegate that forwards the prepare call to the
-// delegate observer in node info delegate params.
-// The function simply calls a delegate observer OnDelegatePrepareMethod.
-TfLiteStatus NodeInfoDelegatePrepare(TfLiteContext* context,
-                                     TfLiteDelegate* delegate) {
-  if (delegate == nullptr) return TfLiteStatus::kTfLiteError;
-
-  NodeInfoDelegateParams* params =
-      reinterpret_cast<NodeInfoDelegateParams*>(delegate->data_);
-  return params->delegate_observer->OnDelegatePrepareCalled(context);
-}
-}  // namespace
-
-TfLiteDelegate CreateNodeInfoDelegate(NodeInfoDelegateParams* params) {
-  return {.data_ = params,
-          .Prepare = NodeInfoDelegatePrepare,
-          .CopyFromBufferHandle = nullptr,
-          .CopyToBufferHandle = nullptr,
-          .FreeBufferHandle = nullptr};
-}
-
-TfLiteStatus NodeInfoDelegateObserver::OnDelegatePrepareCalled(
-    TfLiteContext* context) {
-  context_ = context;
-  const size_t num_nodes = node_index_opinfo_map_.size();
-  for (size_t node_index = 0; node_index < num_nodes; node_index++) {
-    TfLiteNode* node = nullptr;
-    TfLiteRegistration* reg = nullptr;
-    TF_LITE_ENSURE_STATUS(
-        context->GetNodeAndRegistration(context, node_index, &node, &reg));
-    auto op_info = node_index_opinfo_map_.at(node_index);
-    op_info.registration = reg;
-    node_ptr_opinfo_map_->insert({node, op_info});
-  }
-
-  if (node_ptr_opinfo_map_->size() != node_index_opinfo_map_.size()) {
-    // Something wrong.
-    return kTfLiteError;
-  }
-  return kTfLiteOk;
-}
-
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite

diff --git a/tensorflow/lite/tools/optimize/node_info_delegate.h b/tensorflow/lite/tools/optimize/node_info_delegate.h
deleted file mode 100644
index 8ee2ce1..0000000
--- a/tensorflow/lite/tools/optimize/node_info_delegate.h
+++ /dev/null

@@ -1,67 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_NODE_INFO_DELEGATE_H_
-#define TENSORFLOW_LITE_TOOLS_OPTIMIZE_NODE_INFO_DELEGATE_H_
-
-#include <unordered_map>
-
-#include "tensorflow/lite/context.h"
-#include "tensorflow/lite/tools/optimize/calibration_common.h"
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-
-// An interface for delegate observer that can listen to TfLiteDelegate::Prepare
-// calls.
-class DelegateObserver {
- public:
-  virtual TfLiteStatus OnDelegatePrepareCalled(TfLiteContext* context) = 0;
-  virtual ~DelegateObserver() {}
-};
-
-// The parameters for the node info delegate.
-struct NodeInfoDelegateParams {
-  DelegateObserver* delegate_observer;
-};
-
-// Creates a delegate with the given |params|.
-TfLiteDelegate CreateNodeInfoDelegate(NodeInfoDelegateParams* params);
-
-// A delegate observer that can construct the map from TfLiteNode* ->
-// OperatorInfo.
-class NodeInfoDelegateObserver : public DelegateObserver {
- public:
-  NodeInfoDelegateObserver(
-      const std::unordered_map<int, OperatorInfo>& node_index_to_op,
-      std::unordered_map<const TfLiteNode*, OperatorInfo>* node_ptr_opinfo_map)
-      : node_index_opinfo_map_(node_index_to_op),
-        node_ptr_opinfo_map_(node_ptr_opinfo_map) {}
-
-  TfLiteStatus OnDelegatePrepareCalled(TfLiteContext* context) override;
-
-  // Returns the context that was used to called the prepare method.
-  const TfLiteContext* GetContext() const { return context_; }
-
- private:
-  const TfLiteContext* context_ = nullptr;
-  const std::unordered_map<int, OperatorInfo>& node_index_opinfo_map_;
-  std::unordered_map<const TfLiteNode*, OperatorInfo>* node_ptr_opinfo_map_;
-};
-
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_TOOLS_OPTIMIZE_NODE_INFO_DELEGATE_H_

diff --git a/tensorflow/lite/tools/optimize/node_info_delegate_test.cc b/tensorflow/lite/tools/optimize/node_info_delegate_test.cc
deleted file mode 100644
index 05332c5..0000000
--- a/tensorflow/lite/tools/optimize/node_info_delegate_test.cc
+++ /dev/null

@@ -1,178 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <unordered_map>
-
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/init_main.h"
-#include "tensorflow/core/util/command_line_flags.h"
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/tools/optimize/node_info_delegate.h"
-#include "tensorflow/lite/tools/optimize/test_util.h"
-
-namespace {
-tensorflow::string* g_test_model_dir = nullptr;
-}  // namespace
-
-namespace tflite {
-namespace optimize {
-namespace calibration {
-namespace {
-
-std::unique_ptr<FlatBufferModel> ReadModel(const char* model) {
-  auto model_path = tensorflow::io::JoinPath(*g_test_model_dir, model);
-  return FlatBufferModel::BuildFromFile(model_path.c_str());
-}
-
-std::unique_ptr<FlatBufferModel> ReadModel() {
-  return ReadModel(internal::kConvModelWith0Plus10Weights);
-}
-
-class TestDelegateObserver : public DelegateObserver {
- public:
-  explicit TestDelegateObserver(TfLiteStatus status_to_return)
-      : status_to_return_(status_to_return) {}
-
-  TfLiteStatus OnDelegatePrepareCalled(TfLiteContext* context) override {
-    num_times_called_++;
-    return status_to_return_;
-  }
-  int num_times_called() { return num_times_called_; }
-
- private:
-  int num_times_called_ = 0;
-  TfLiteStatus status_to_return_;
-};
-
-TEST(NodeInfoDelegateTest, DelegateObserverIsCalled) {
-  TestDelegateObserver observer(kTfLiteOk);
-  NodeInfoDelegateParams params;
-  params.delegate_observer = &observer;
-  auto model = ReadModel();
-  ASSERT_TRUE(model);
-  std::unique_ptr<Interpreter> interpreter;
-  ASSERT_EQ(InterpreterBuilder(*model,
-                               ops::builtin::BuiltinOpResolver{})(&interpreter),
-            kTfLiteOk);
-  ASSERT_TRUE(interpreter);
-  EXPECT_EQ(0, observer.num_times_called());
-  TfLiteDelegate delegate = CreateNodeInfoDelegate(&params);
-
-  auto status = interpreter->ModifyGraphWithDelegate(&delegate);
-  EXPECT_EQ(kTfLiteOk, status);
-  EXPECT_EQ(1, observer.num_times_called());
-}
-
-TEST(NodeInfoDelegateTest, ObserverErrorCausesModifyGraphFailure) {
-  // Observer returns error
-  TestDelegateObserver observer(kTfLiteError);
-  NodeInfoDelegateParams params;
-  params.delegate_observer = &observer;
-  auto model = ReadModel();
-  ASSERT_TRUE(model);
-  std::unique_ptr<Interpreter> interpreter;
-  ASSERT_EQ(InterpreterBuilder(*model,
-                               ops::builtin::BuiltinOpResolver{})(&interpreter),
-            kTfLiteOk);
-  ASSERT_TRUE(interpreter);
-  TfLiteDelegate delegate = CreateNodeInfoDelegate(&params);
-
-  auto status = interpreter->ModifyGraphWithDelegate(&delegate);
-  EXPECT_EQ(kTfLiteError, status);
-}
-
-TEST(NodeInfoDelegateTest, NodeInfoDelegateObserver) {
-  auto model = ReadModel();
-  ASSERT_TRUE(model);
-
-  std::unordered_map<int, OperatorInfo> index_to_opinfo;
-  auto primary_subgraph = model->GetModel()->subgraphs()->Get(0);
-  auto operators = primary_subgraph->operators();
-  auto subgraph_tensors = primary_subgraph->tensors();
-  for (size_t i = 0; i < operators->size(); i++) {
-    OperatorInfo info;
-    auto op_inputs = operators->Get(i)->inputs();
-    auto op_outputs = operators->Get(i)->outputs();
-    info.inputs = std::vector<int>(op_inputs->begin(), op_inputs->end());
-    info.outputs = std::vector<int>(op_outputs->begin(), op_outputs->end());
-    index_to_opinfo[i] = info;
-  }
-
-  std::unordered_map<const TfLiteNode*, OperatorInfo> node_to_opinfo;
-  NodeInfoDelegateObserver observer(index_to_opinfo, &node_to_opinfo);
-  NodeInfoDelegateParams params;
-  params.delegate_observer = &observer;
-  std::unique_ptr<Interpreter> interpreter;
-  ASSERT_EQ(InterpreterBuilder(*model,
-                               ops::builtin::BuiltinOpResolver{})(&interpreter),
-            kTfLiteOk);
-  ASSERT_TRUE(interpreter);
-
-  TfLiteDelegate delegate = CreateNodeInfoDelegate(&params);
-
-  auto status = interpreter->ModifyGraphWithDelegate(&delegate);
-  EXPECT_EQ(kTfLiteOk, status);
-  EXPECT_EQ(index_to_opinfo.size(), node_to_opinfo.size());
-  EXPECT_EQ(interpreter->nodes_size(), node_to_opinfo.size());
-
-  for (const auto& node_and_opinfo : node_to_opinfo) {
-    const TfLiteNode* tflite_node = node_and_opinfo.first;
-    const OperatorInfo& info = node_and_opinfo.second;
-    ASSERT_EQ(tflite_node->inputs->size, info.inputs.size());
-    ASSERT_EQ(tflite_node->outputs->size, info.outputs.size());
-
-    for (size_t input_index = 0; input_index < info.inputs.size();
-         input_index++) {
-      const TfLiteTensor* tflite_tensor =
-          interpreter->tensor(tflite_node->inputs->data[input_index]);
-      EXPECT_EQ(tflite_tensor->name,
-                subgraph_tensors->Get(info.inputs[input_index])->name()->str());
-    }
-
-    for (size_t output_index = 0; output_index < info.outputs.size();
-         output_index++) {
-      const TfLiteTensor* tflite_tensor =
-          interpreter->tensor(tflite_node->outputs->data[output_index]);
-      EXPECT_EQ(
-          tflite_tensor->name,
-          subgraph_tensors->Get(info.outputs[output_index])->name()->str());
-    }
-  }
-}
-
-}  // namespace
-}  // namespace calibration
-}  // namespace optimize
-}  // namespace tflite
-
-int main(int argc, char** argv) {
-  tensorflow::string model_file;
-  const std::vector<tensorflow::Flag> flag_list = {
-      tensorflow::Flag("test_model_file", &model_file,
-                       "Path to test tflite model file."),
-  };
-
-  const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
-  if (!parse_result) {
-    std::cerr << "Required test_model_file\n";
-    std::abort();
-  }
-  g_test_model_dir =
-      new tensorflow::string(tensorflow::io::Dirname(model_file));
-  ::tensorflow::port::InitMain(argv[0], &argc, &argv);
-  return RUN_ALL_TESTS();
-}

diff --git a/tensorflow/lite/tools/optimize/subgraph_quantizer.cc b/tensorflow/lite/tools/optimize/subgraph_quantizer.cc
index c1ff444..05115e8 100644
--- a/tensorflow/lite/tools/optimize/subgraph_quantizer.cc
+++ b/tensorflow/lite/tools/optimize/subgraph_quantizer.cc

@@ -67,17 +67,17 @@
 
 const OpWithBiasTensors* GetInfoForOpWithBiasTensor(BuiltinOperator op_code) {
   if (op_code == BuiltinOperator_CONV_2D) {
-    static OpWithBiasTensors op_info = {.activation_input_index = 0,
-                                        .weights_input_index = 1,
-                                        .bias_input_index = 2,
-                                        .index_for_channel_in_weights = 0};
+    static OpWithBiasTensors op_info = {/* activation_input_index */ 0,
+                                        /* weights_input_index */ 1,
+                                        /* bias_input_index */ 2,
+                                        /* index_for_channel_in_weights */ 0};
     return &op_info;
   }
   if (op_code == BuiltinOperator_DEPTHWISE_CONV_2D) {
-    static OpWithBiasTensors op_info = {.activation_input_index = 0,
-                                        .weights_input_index = 1,
-                                        .bias_input_index = 2,
-                                        .index_for_channel_in_weights = 3};
+    static OpWithBiasTensors op_info = {/* bias_input_index */ 0,
+                                        /* bias_input_index */ 1,
+                                        /* bias_input_index */ 2,
+                                        /* index_for_channel_in_weights */ 3};
     return &op_info;
   }
 

diff --git a/tensorflow/lite/tools/verifier.cc b/tensorflow/lite/tools/verifier.cc
index 8631bef..713f152 100644
--- a/tensorflow/lite/tools/verifier.cc
+++ b/tensorflow/lite/tools/verifier.cc

@@ -15,6 +15,7 @@
 
 #include "tensorflow/lite/tools/verifier.h"
 #include <climits>
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/string_util.h"
 #include "tensorflow/lite/version.h"
@@ -53,7 +54,7 @@
 
 // Verifies string tensor has legit buffer contents that follow the schema
 // defined in lite/string_util.h
-bool VerifyStringTensorBuffer(const Buffer& buffer,
+bool VerifyStringTensorBuffer(const Tensor& tensor, const Buffer& buffer,
                               ErrorReporter* error_reporter) {
   uint32_t buffer_size = buffer.data()->size();
   const char* buffer_ptr = reinterpret_cast<const char*>(buffer.data()->data());
@@ -61,7 +62,8 @@
   uint32_t num_strings = *GetIntPtr(buffer_ptr);
   if (num_strings > kMaxNumString) {
     ReportError(error_reporter,
-                "String tensor has invalid num of string set: %d", num_strings);
+                "String tensor %s has invalid num of string set: %d",
+                tensor.name()->c_str(), num_strings);
     return false;
   }
   uint32_t header_offsets =
@@ -69,9 +71,9 @@
 
   if (buffer_size < header_offsets) {
     ReportError(error_reporter,
-                "String tensor buffer requires at least %d bytes, but is "
+                "String tensor %s buffer requires at least %d bytes, but is "
                 "allocated with %d bytes",
-                header_offsets, buffer_size);
+                tensor.name()->c_str(), header_offsets, buffer_size);
     return false;
   }
 
@@ -80,22 +82,24 @@
 
   if (*GetIntPtr(buffer_ptr + offset) != header_offsets) {
     ReportError(error_reporter,
-                "String tensor buffer initial offset must be: %d",
-                header_offsets);
+                "String tensor %s buffer initial offset must be: %d",
+                tensor.name()->c_str(), header_offsets);
     return false;
   }
   offset += sizeof(int32_t);
   for (int i = 1; i <= num_strings; i++, offset += sizeof(int32_t)) {
     int string_offset = *GetIntPtr(buffer_ptr + offset);
     if (string_offset < prev_ptr || string_offset > buffer_size) {
-      ReportError(error_reporter, "String tensor buffer is invalid: index %d",
-                  i);
+      ReportError(error_reporter,
+                  "String tensor %s buffer is invalid: index %d",
+                  tensor.name()->c_str(), i);
       return false;
     }
   }
   if (*GetIntPtr(buffer_ptr + offset - sizeof(int32_t)) != buffer_size) {
-    ReportError(error_reporter, "String tensor buffer last offset must be %d",
-                buffer_size);
+    ReportError(error_reporter,
+                "String tensor %s buffer last offset must be %d",
+                tensor.name()->c_str(), buffer_size);
     return false;
   }
   return true;
@@ -106,13 +110,15 @@
                                ErrorReporter* error_reporter) {
   uint64_t bytes_required = 1;
   if (!tensor.shape()) {
-    ReportError(error_reporter, "Tensor shape is empty");
+    ReportError(error_reporter, "Tensor %s shape is empty",
+                tensor.name()->c_str());
     return false;
   }
   for (int dim : *tensor.shape()) {
     bytes_required *= dim;
     if (bytes_required > UINT_MAX) {
-      ReportError(error_reporter, "Tensor dimension overflow");
+      ReportError(error_reporter, "Tensor %s dimension overflow",
+                  tensor.name()->c_str());
       return false;
     }
   }
@@ -120,31 +126,36 @@
     case TensorType_FLOAT32:
       bytes_required *= sizeof(float);
       break;
-    case TensorType_INT32:
-      bytes_required *= sizeof(int32_t);
+    case TensorType_INT8:
+      bytes_required *= sizeof(int8_t);
       break;
     case TensorType_UINT8:
       bytes_required *= sizeof(uint8_t);
       break;
+    case TensorType_INT32:
+      bytes_required *= sizeof(int32_t);
+      break;
     case TensorType_INT64:
       bytes_required *= sizeof(int64_t);
       break;
     case TensorType_FLOAT16:
       // FALLTHROUGH_INTENDED;
     default:
-      ReportError(error_reporter, "Invalid tensor type: %d", tensor.type());
+      ReportError(error_reporter, "Tensor %s invalid type: %d",
+                  tensor.name()->c_str(), tensor.type());
       return false;
   }
   if (bytes_required > UINT_MAX) {
-    ReportError(error_reporter, "Tensor dimension overflow");
+    ReportError(error_reporter, "Tensor %s dimension overflow",
+                tensor.name()->c_str());
     return false;
   }
 
   if (bytes_required != buffer.data()->size()) {
     ReportError(
         error_reporter,
-        "Tensor requires %d bytes, but is allocated with %d bytes buffer",
-        bytes_required, buffer.data()->size());
+        "Tensor %s requires %d bytes, but is allocated with %d bytes buffer",
+        tensor.name()->c_str(), bytes_required, buffer.data()->size());
     return false;
   }
   return true;
@@ -170,6 +181,86 @@
   return true;
 }
 
+bool IsConstantTensor(const Tensor& tensor, const Model& model) {
+  if (!tensor.buffer() || !model.buffers()) return false;
+  if (tensor.buffer() > 0 && tensor.buffer() < model.buffers()->size()) {
+    auto* buffer = model.buffers()->Get(tensor.buffer());
+    if (buffer && buffer->data()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Performs basic consistency checks on a sub-graph.
+bool VerifySubGraphConsistency(const Model& model, const SubGraph& subgraph,
+                               ErrorReporter* error_reporter) {
+  absl::flat_hash_set<int> subgraph_input_tensors, constant_tensors,
+      variable_tensors, output_tensors;
+  for (int i = 0; i < subgraph.tensors()->Length(); ++i) {
+    const auto* tensor = subgraph.tensors()->Get(i);
+    if (IsConstantTensor(*tensor, model)) {
+      constant_tensors.insert(i);
+    } else if (tensor->is_variable()) {
+      variable_tensors.insert(i);
+    }
+  }
+  for (const int tensor_idx : *subgraph.inputs()) {
+    subgraph_input_tensors.insert(tensor_idx);
+  }
+
+  for (int op_idx = 0; op_idx < subgraph.operators()->Length(); ++op_idx) {
+    const auto* op = subgraph.operators()->Get(op_idx);
+    const auto& opcode = model.operator_codes()->Get(op->opcode_index());
+    // Check for invalid inputs by ensuring all exist in produced_tensors.
+    for (const int input_idx : *op->inputs()) {
+      if (input_idx == kOptionalTensor) continue;
+      if (constant_tensors.find(input_idx) == constant_tensors.end() &&
+          variable_tensors.find(input_idx) == variable_tensors.end() &&
+          subgraph_input_tensors.find(input_idx) ==
+              subgraph_input_tensors.end() &&
+          output_tensors.find(input_idx) == output_tensors.end()) {
+        ReportError(error_reporter,
+                    "Input tensor %d to op %d (%s) is not produced", input_idx,
+                    op_idx, EnumNameBuiltinOperator(opcode->builtin_code()));
+        return false;
+      }
+    }
+    // Check for cycles/invalid outputs by ensuring that none exist in
+    // produced_tensors.
+    for (const int output_idx : *op->outputs()) {
+      if (constant_tensors.find(output_idx) != constant_tensors.end()) {
+        ReportError(error_reporter,
+                    "Output tensor %d to op %d (%s) is a constant", output_idx,
+                    op_idx, EnumNameBuiltinOperator(opcode->builtin_code()));
+        return false;
+      } else if (variable_tensors.find(output_idx) != variable_tensors.end()) {
+        ReportError(error_reporter,
+                    "Output tensor %d to op %d (%s) is a variable", output_idx,
+                    op_idx, EnumNameBuiltinOperator(opcode->builtin_code()));
+        return false;
+      } else if (subgraph_input_tensors.find(output_idx) !=
+                 subgraph_input_tensors.end()) {
+        ReportError(error_reporter,
+                    "Output tensor %d to op %d (%s) is a subgraph input",
+                    output_idx, op_idx,
+                    EnumNameBuiltinOperator(opcode->builtin_code()));
+        return false;
+      } else if (output_tensors.find(output_idx) != output_tensors.end()) {
+        ReportError(error_reporter,
+                    "Output tensor %d to op %d (%s) is an output from "
+                    "another op. There is a cycle in the graph",
+                    output_idx, op_idx,
+                    EnumNameBuiltinOperator(opcode->builtin_code()));
+        return false;
+      }
+      // This can be an input to a subsequent op.
+      output_tensors.insert(output_idx);
+    }
+  }
+  return true;
+}
+
 bool VerifySubGraphs(const Model& model, ErrorReporter* error_reporter) {
   if (!model.subgraphs()) {
     ReportError(error_reporter, "Missing 'subgraphs' section.");
@@ -184,6 +275,10 @@
     if (!VerifyOperators(*subgraph->operators(), error_reporter)) {
       return false;
     }
+
+    if (!VerifySubGraphConsistency(model, *subgraph, error_reporter)) {
+      return false;
+    }
   }
   return true;
 }
@@ -207,14 +302,14 @@
         continue;
       }
       if (tensor->buffer() >= model.buffers()->size()) {
-        ReportError(error_reporter, "Invalid tensor buffer index: %d",
-                    tensor->buffer());
+        ReportError(error_reporter, "Tensor %s invalid buffer index: %d",
+                    tensor->name(), tensor->buffer());
         return false;
       }
       auto* buffer = model.buffers()->Get(tensor->buffer());
       if (!buffer) {
-        ReportError(error_reporter, "Tensor buffer %d not set",
-                    tensor->buffer());
+        ReportError(error_reporter, "Tensor %s buffer %d not set",
+                    tensor->name(), tensor->buffer());
         return false;
       }
 
@@ -222,7 +317,7 @@
       // buffers will be allocated by the interpreter at run-time.
       if (buffer->data()) {
         if (tensor->type() == TensorType_STRING) {
-          if (!VerifyStringTensorBuffer(*buffer, error_reporter)) {
+          if (!VerifyStringTensorBuffer(*tensor, *buffer, error_reporter)) {
             return false;
           }
         } else {

diff --git a/tensorflow/lite/tools/verifier_test.cc b/tensorflow/lite/tools/verifier_test.cc
index 083f00d..a3cffef 100644
--- a/tensorflow/lite/tools/verifier_test.cc
+++ b/tensorflow/lite/tools/verifier_test.cc

@@ -72,7 +72,8 @@
   }
 
   void AddTensor(const std::vector<int>& shape, tflite::TensorType type,
-                 const std::vector<uint8_t>& buffer, const char* name) {
+                 const std::vector<uint8_t>& buffer, const char* name,
+                 const bool is_variable = false) {
     int buffer_index = 0;
     if (!buffer.empty()) {
       buffer_index = buffers_.size();
@@ -81,11 +82,12 @@
     if (shape.empty()) {
       tensors_.push_back(CreateTensorDirect(builder_, /*shape=*/nullptr, type,
                                             buffer_index, name,
-                                            /*quantization=*/0));
+                                            /*quantization=*/0, is_variable));
       return;
     }
     tensors_.push_back(CreateTensorDirect(builder_, &shape, type, buffer_index,
-                                          name, /*quantization=*/0));
+                                          name, /*quantization=*/0,
+                                          is_variable));
   }
 
   void AddOperator(const std::vector<int32_t>& inputs,
@@ -148,7 +150,7 @@
 
 TEST(VerifyModel, TestEmptyShape) {
   TfLiteFlatbufferModelBuilder builder({}, {"test"});
-  builder.AddOperator({0, 1}, {2}, BuiltinOperator_CUSTOM, "test");
+  builder.AddOperator({0, 1}, {3}, BuiltinOperator_CUSTOM, "test");
   builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4, 5, 6}, "input");
   builder.AddTensor({}, TensorType_UINT8, {1, 2, 3, 4, 5, 6}, "inputtwo");
   builder.AddTensor(
@@ -156,10 +158,10 @@
       {2, 0, 0, 0, 16, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 'A', 'B', 'C'},
       "data");
   builder.AddTensor({2, 3}, TensorType_INT32, {}, "output");
-  builder.FinishModel({0, 1}, {2});
+  builder.FinishModel({0, 1}, {3});
   ASSERT_FALSE(builder.Verify());
   EXPECT_THAT(builder.GetErrorString(),
-              ::testing::ContainsRegex("Tensor shape is empty"));
+              ::testing::ContainsRegex("Tensor inputtwo shape is empty"));
 }
 
 TEST(VerifyModel, TestSimpleModel) {
@@ -219,10 +221,9 @@
   builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4}, "input");
   builder.FinishModel({}, {});
   ASSERT_FALSE(builder.Verify());
-  EXPECT_THAT(
-      builder.GetErrorString(),
-      ::testing::ContainsRegex(
-          "Tensor requires 6 bytes, but is allocated with 4 bytes buffer"));
+  EXPECT_THAT(builder.GetErrorString(),
+              ::testing::ContainsRegex("Tensor input requires 6 bytes, but is "
+                                       "allocated with 4 bytes buffer"));
 }
 
 TEST(VerifyModel, TestIntTensorShapeIsSmallerThanBuffer) {
@@ -230,10 +231,9 @@
   builder.AddTensor({2, 1}, TensorType_UINT8, {1, 2, 3, 4}, "input");
   builder.FinishModel({}, {});
   ASSERT_FALSE(builder.Verify());
-  EXPECT_THAT(
-      builder.GetErrorString(),
-      ::testing::ContainsRegex(
-          "Tensor requires 2 bytes, but is allocated with 4 bytes buffer"));
+  EXPECT_THAT(builder.GetErrorString(),
+              ::testing::ContainsRegex("Tensor input requires 2 bytes, but is "
+                                       "allocated with 4 bytes buffer"));
 }
 
 TEST(VerifyModel, TestIntTensorShapeOverflow) {
@@ -243,7 +243,7 @@
   builder.FinishModel({}, {});
   ASSERT_FALSE(builder.Verify());
   EXPECT_THAT(builder.GetErrorString(),
-              ::testing::ContainsRegex("Tensor dimension overflow"));
+              ::testing::ContainsRegex("Tensor input dimension overflow"));
 }
 
 TEST(VerifyModel, TensorBufferIsNotValid) {
@@ -282,10 +282,11 @@
       "input");
   builder.FinishModel({}, {});
   ASSERT_FALSE(builder.Verify());
-  EXPECT_THAT(builder.GetErrorString(),
-              ::testing::ContainsRegex(
-                  "String tensor buffer requires at least -2147483640 bytes, "
-                  "but is allocated with 18 bytes"));
+  EXPECT_THAT(
+      builder.GetErrorString(),
+      ::testing::ContainsRegex(
+          "String tensor input buffer requires at least -2147483640 bytes, "
+          "but is allocated with 18 bytes"));
 }
 
 TEST(VerifyModel, StringTensorOffsetTooSmall) {
@@ -297,7 +298,7 @@
   ASSERT_FALSE(builder.Verify());
   EXPECT_THAT(builder.GetErrorString(),
               ::testing::ContainsRegex(
-                  "String tensor buffer initial offset must be: 16"));
+                  "String tensor input buffer initial offset must be: 16"));
 }
 
 TEST(VerifyModel, StringTensorOffsetOutOfRange) {
@@ -307,9 +308,9 @@
       {2, 0, 0, 0, 16, 0, 0, 0, 17, 0, 0, 0, 22, 0, 0, 0, 'A', 'B'}, "input");
   builder.FinishModel({}, {});
   ASSERT_FALSE(builder.Verify());
-  EXPECT_THAT(
-      builder.GetErrorString(),
-      ::testing::ContainsRegex("String tensor buffer is invalid: index 2"));
+  EXPECT_THAT(builder.GetErrorString(),
+              ::testing::ContainsRegex(
+                  "String tensor input buffer is invalid: index 2"));
 }
 
 TEST(VerifyModel, StringTensorIsLargerThanRequired) {
@@ -320,18 +321,19 @@
       "input");
   builder.FinishModel({}, {});
   ASSERT_FALSE(builder.Verify());
-  EXPECT_THAT(
-      builder.GetErrorString(),
-      ::testing::ContainsRegex("String tensor buffer last offset must be 19"));
+  EXPECT_THAT(builder.GetErrorString(),
+              ::testing::ContainsRegex(
+                  "String tensor input buffer last offset must be 19"));
 }
 
 TEST(VerifyModel, AllOpsAreSupported) {
   TfLiteFlatbufferModelBuilder builder({BuiltinOperator_ADD}, {"CustomOp"});
   builder.AddTensor({2, 2}, TensorType_UINT8, {1, 2, 3, 4}, "input1");
   builder.AddTensor({2, 2}, TensorType_UINT8, {1, 2, 3, 4}, "input2");
-  builder.AddTensor({2, 2}, TensorType_UINT8, {}, "output");
+  builder.AddTensor({2, 2}, TensorType_UINT8, {}, "output1");
+  builder.AddTensor({2, 2}, TensorType_UINT8, {}, "output2");
   builder.AddOperator({0, 1}, {2}, BuiltinOperator_ADD, nullptr);
-  builder.AddOperator({0, 1}, {2}, BuiltinOperator_CUSTOM, "CustomOp");
+  builder.AddOperator({0, 1}, {3}, BuiltinOperator_CUSTOM, "CustomOp");
   builder.FinishModel({}, {});
   ASSERT_TRUE(builder.Verify());
   EXPECT_EQ("", builder.GetErrorString());
@@ -363,6 +365,102 @@
                   "Unsupported custom op: Not supported, version: 1"));
 }
 
+TEST(VerifyModel, UnpopulatedInputToOp) {
+  TfLiteFlatbufferModelBuilder builder({}, {"test"});
+  builder.AddOperator({1, 2}, {3}, BuiltinOperator_CUSTOM, "test");
+  builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4, 5, 6}, "input");
+  // This tensor will never be populated.
+  builder.AddTensor({2, 3}, TensorType_UINT8, {}, "invalid_input");
+  builder.AddTensor(
+      {2}, TensorType_STRING,
+      {2, 0, 0, 0, 16, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 'A', 'B', 'C'},
+      "data");
+  builder.AddTensor({2, 3}, TensorType_INT32, {}, "output");
+  builder.FinishModel({0, 2}, {3});
+  ASSERT_FALSE(builder.Verify());
+  EXPECT_EQ("Input tensor 1 to op 0 (CUSTOM) is not produced",
+            builder.GetErrorString());
+}
+
+TEST(VerifyModel, MultipleOpsOutputToSameTensor) {
+  TfLiteFlatbufferModelBuilder builder({BuiltinOperator_ADD}, {"CustomOp"});
+  builder.AddTensor({2, 2}, TensorType_UINT8, {1, 2, 3, 4}, "input1");
+  builder.AddTensor({2, 2}, TensorType_UINT8, {1, 2, 3, 4}, "input2");
+  builder.AddTensor({2, 2}, TensorType_UINT8, {}, "output1");
+  builder.AddOperator({0, 1}, {2}, BuiltinOperator_ADD, nullptr);
+  // This can't output to "output1", since the first operator does that.
+  builder.AddOperator({0, 1}, {2}, BuiltinOperator_CUSTOM, "CustomOp");
+  builder.FinishModel({}, {});
+  ASSERT_FALSE(builder.Verify());
+  EXPECT_EQ(
+      "Output tensor 2 to op 1 (CUSTOM) is an output from another op. "
+      "There is a cycle in the graph",
+      builder.GetErrorString());
+}
+
+TEST(VerifyModel, OutputIsAConstantTensor) {
+  TfLiteFlatbufferModelBuilder builder({}, {"test"});
+  builder.AddOperator({0, 1}, {2}, BuiltinOperator_CUSTOM, "test");
+  builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4, 5, 6}, "input");
+  builder.AddTensor(
+      {2}, TensorType_STRING,
+      {2, 0, 0, 0, 16, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 'A', 'B', 'C'},
+      "data");
+  // Output shouldn't be populated with constant value.
+  builder.AddTensor({2, 3}, TensorType_INT32, {1, 2, 3, 4, 5, 6}, "output");
+  builder.FinishModel({0, 1}, {2});
+  ASSERT_FALSE(builder.Verify());
+  EXPECT_EQ("Output tensor 2 to op 0 (CUSTOM) is a constant",
+            builder.GetErrorString());
+}
+
+TEST(VerifyModel, OutputIsSubgraphInput) {
+  TfLiteFlatbufferModelBuilder builder({}, {"test"});
+  builder.AddOperator({0, 1}, {2}, BuiltinOperator_CUSTOM, "test");
+  builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4, 5, 6}, "input");
+  builder.AddTensor(
+      {2}, TensorType_STRING,
+      {2, 0, 0, 0, 16, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 'A', 'B', 'C'},
+      "data");
+  builder.AddTensor({2, 3}, TensorType_INT32, {}, "output");
+  // Output shouldn't be a subgraph input.
+  builder.FinishModel({0, 1, 2}, {2});
+  ASSERT_FALSE(builder.Verify());
+  EXPECT_EQ("Output tensor 2 to op 0 (CUSTOM) is a subgraph input",
+            builder.GetErrorString());
+}
+
+TEST(VerifyModel, OutputIsAVariable) {
+  TfLiteFlatbufferModelBuilder builder({}, {"test"});
+  builder.AddOperator({0, 1}, {2}, BuiltinOperator_CUSTOM, "test");
+  builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4, 5, 6}, "input");
+  builder.AddTensor(
+      {2}, TensorType_STRING,
+      {2, 0, 0, 0, 16, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 'A', 'B', 'C'},
+      "data");
+  // Output shouldn't be a variable.
+  builder.AddTensor({2, 3}, TensorType_INT32, {}, "output", /*variable*/ true);
+  builder.FinishModel({0, 1}, {2});
+  ASSERT_FALSE(builder.Verify());
+  EXPECT_EQ("Output tensor 2 to op 0 (CUSTOM) is a variable",
+            builder.GetErrorString());
+}
+
+TEST(VerifyModel, OpWithOptionalTensor) {
+  TfLiteFlatbufferModelBuilder builder({}, {"test"});
+  builder.AddOperator({kOptionalTensor, 0, 1}, {2}, BuiltinOperator_CUSTOM,
+                      "test");
+  builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4, 5, 6}, "input");
+  builder.AddTensor(
+      {2}, TensorType_STRING,
+      {2, 0, 0, 0, 16, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 'A', 'B', 'C'},
+      "data");
+  builder.AddTensor({2, 3}, TensorType_INT32, {}, "output");
+  builder.FinishModel({0, 1}, {2});
+  ASSERT_TRUE(builder.Verify());
+  EXPECT_EQ("", builder.GetErrorString());
+}
+
 // TODO(yichengfan): make up malicious files to test with.
 
 }  // namespace tflite

diff --git a/tensorflow/lite/tutorials/post_training_quant.ipynb b/tensorflow/lite/tutorials/post_training_quant.ipynb
index 394ab07..8bc02ee 100644
--- a/tensorflow/lite/tutorials/post_training_quant.ipynb
+++ b/tensorflow/lite/tutorials/post_training_quant.ipynb

@@ -301,7 +301,7 @@
         "id": "7BONhYtYocQY"
       },
       "source": [
-        "To quantize the model on export, set the `post_training_quantize` flag:"
+        "To quantize the model on export, set the `optimizations` flag to optimize for size:"
       ]
     },
     {
@@ -313,11 +313,11 @@
         "id": "g8PUvLWDlmmz"
       },
       "outputs": [],
-      "source": [
+     "source": [
         "# Note: If you don't have a recent tf-nightly installed, the\n",
-        "# \"post_training_quantize\" line will have no effect.\n",
+        "# \"optimizations\" line will have no effect.\n",
         "tf.logging.set_verbosity(tf.logging.INFO)\n",
-        "converter.post_training_quantize = True\n",
+        "converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n",
         "tflite_quant_model = converter.convert()\n",
         "tflite_model_quant_file = tflite_models_dir/\"mnist_model_quant.tflite\"\n",
         "tflite_model_quant_file.write_bytes(tflite_quant_model)"
@@ -329,8 +329,8 @@
         "colab_type": "text",
         "id": "PhMmUTl4sbkz"
       },
-      "source": [
-        "Note how the resulting file, with `post_training_quantize` set, is approximately `1/4` the size."
+    "source": [
+        "Note how the resulting file, is approximately `1/4` the size."
       ]
     },
     {
@@ -383,7 +383,7 @@
       "source": [
         "import numpy as np\n",
         "mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()\n",
-        "images, labels = tf.to_float(mnist_test[0])/255.0, mnist_test[1]\n",
+        "images, labels = tf.cast(mnist_test[0], tf.float32)/255.0, mnist_test[1]\n",
         "\n",
         "# Note: If you change the batch size, then use \n",
         "# `tf.lite.Interpreter.resize_tensor_input` to also change it for\n",
@@ -489,7 +489,7 @@
         "plt.imshow(img[0])\n",
         "template = \"True:{true}, predicted:{predict}\"\n",
         "_ = plt.title(template.format(true= str(label[0].numpy()),\n",
-        "                              predict=str(predictions[0,0])))\n",
+        "                              predict=str(predictions[0])))\n",
         "plt.grid(False)"
       ]
     },
@@ -650,7 +650,7 @@
         "output_arrays = [\"output\"]\n",
         "converter = tf.lite.TFLiteConverter.from_frozen_graph(\n",
         "  str(graph_def_file), input_arrays, output_arrays, input_shapes={\"input\":[1,299,299,3]})\n",
-        "converter.post_training_quantize = True\n",
+        "converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n",
         "resnet_tflite_file = graph_def_file.parent/\"resnet_v2_101_quantized.tflite\"\n",
         "resnet_tflite_file.write_bytes(converter.convert())\n"
       ]

diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 0312114..9264939 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files

@@ -120,8 +120,8 @@
 tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/ThreadPool
 tensorflow/third_party/eigen3/unsupported/Eigen/SpecialFunctions
 tensorflow/third_party/eigen3/unsupported/Eigen/MatrixFunctions
+tensorflow/third_party/eigen3/gpu_packet_math.patch
 tensorflow/third_party/eigen3/LICENSE
-tensorflow/third_party/eigen3/gebp_neon.patch
 tensorflow/third_party/eigen3/BUILD
 tensorflow/third_party/systemlibs/build_defs.bzl.tpl
 tensorflow/third_party/systemlibs/absl_py.BUILD

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index ca6a09c..3b2311a 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD

@@ -143,7 +143,6 @@
         ":map_fn",
         ":math_ops",
         ":metrics",
-        ":mode_keys",
         ":nccl_ops",
         ":nn",
         ":ops",
@@ -174,6 +173,7 @@
         "//tensorflow/lite/python:lite",
         "//tensorflow/python/compat",
         "//tensorflow/python/compat:v2_compat",
+        "//tensorflow/python/compiler",
         "//tensorflow/python/data",
         "//tensorflow/python/distribute",
         "//tensorflow/python/distribute:estimator_training",
@@ -679,6 +679,7 @@
     deps = [
         ":common_shapes",
         ":composite_tensor",
+        ":convert_to_constants",
         ":cpp_shape_inference_proto_py",
         ":errors",
         ":framework_fast_tensor_util",
@@ -855,6 +856,22 @@
 )
 
 py_library(
+    name = "convert_to_constants",
+    srcs = [
+        "framework/convert_to_constants.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":dtypes",
+        ":framework_ops",
+        ":platform",
+        ":tensor_util",
+        ":tf_optimizer",
+        "//tensorflow/core:protos_all_py",
+    ],
+)
+
+py_library(
     name = "kernels",
     srcs = [
         "framework/kernels.py",
@@ -971,6 +988,7 @@
     additional_deps = [
         ":auto_control_deps",
         ":client_testlib",
+        "//tensorflow/python/keras",
     ],
 )
 
@@ -1310,8 +1328,8 @@
     ],
 )
 
-cuda_py_tests(
-    name = "framework_function_test",
+cuda_py_test(
+    name = "function_test",
     size = "medium",
     srcs = ["framework/function_test.py"],
     additional_deps = [
@@ -1341,6 +1359,7 @@
         "noasan",
         "optonly",
     ],
+    xla_enable_strict_auto_jit = True,
 )
 
 tf_py_test(
@@ -1813,7 +1832,7 @@
     visibility = [
         "//learning/brain/python/ops:__pkg__",
         "//tensorflow/python/kernel_tests:__pkg__",
-        "//tensorflow/python/training/checkpointable:__pkg__",
+        "//tensorflow/python/training/tracking:__pkg__",
     ],
 )
 
@@ -1986,6 +2005,27 @@
     ],
 )
 
+tf_gen_op_wrapper_private_py(
+    name = "tpu_ops_gen",
+    visibility = [
+        "//smartass/brain/configure/python:__pkg__",
+        "//tensorflow/contrib/tpu:__pkg__",
+        "//tensorflow/python/tpu:__pkg__",
+    ],
+    deps = [
+        "//tensorflow/core:tpu_configuration_ops_op_lib",
+        "//tensorflow/core:tpu_cross_replica_ops_op_lib",
+        "//tensorflow/core:tpu_embedding_ops_op_lib",
+        "//tensorflow/core:tpu_functional_ops_op_lib",
+        "//tensorflow/core:tpu_heartbeat_ops_op_lib",
+        "//tensorflow/core:tpu_host_compute_ops_op_lib",
+        "//tensorflow/core:tpu_infeed_ops_op_lib",
+        "//tensorflow/core:tpu_ordinal_selector_ops_op_lib",
+        "//tensorflow/core:tpu_outfeed_ops_op_lib",
+        "//tensorflow/core:tpu_replication_ops_op_lib",
+    ],
+)
+
 py_library(
     name = "array_grad",
     srcs = ["ops/array_grad.py"],
@@ -2738,6 +2778,22 @@
 )
 
 py_library(
+    name = "critical_section_ops",
+    srcs = ["ops/critical_section_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":array_ops",
+        ":control_flow_ops",
+        ":dtypes",
+        ":framework_ops",
+        ":resource_variable_ops_gen",
+        ":tensor_array_ops",
+        ":util",
+        "//tensorflow/python/eager:context",
+    ],
+)
+
+py_library(
     name = "list_ops",
     srcs = ["ops/list_ops.py"],
     srcs_version = "PY2AND3",
@@ -3148,6 +3204,7 @@
         ":clip_ops",
         ":confusion_matrix",
         ":control_flow_ops",
+        ":critical_section_ops",
         ":cudnn_rnn_grad",
         ":data_flow_grad",
         ":data_flow_ops",
@@ -3332,7 +3389,7 @@
         ":util",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python/eager:context",
-        "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/tracking:base",
     ],
 )
 
@@ -3385,6 +3442,7 @@
         ":framework_test_lib",
     ],
     tags = ["no_windows"],
+    xla_enable_strict_auto_jit = True,
 )
 
 cuda_py_test(
@@ -3503,6 +3561,7 @@
         ":image_ops",
         "//third_party/py/numpy",
     ],
+    xla_enable_strict_auto_jit = True,
 )
 
 cuda_py_test(
@@ -3528,6 +3587,7 @@
     ],
     data = ["//tensorflow/core:image_testdata"],
     shard_count = 5,
+    xla_enable_strict_auto_jit = True,
 )
 
 cuda_py_test(
@@ -3627,6 +3687,7 @@
         "//third_party/py/numpy",
     ],
     shard_count = 16,
+    tags = ["no_rocm"],
 )
 
 cuda_py_test(
@@ -3647,6 +3708,7 @@
         "//third_party/py/numpy",
     ],
     tags = ["no_windows"],
+    xla_enable_strict_auto_jit = True,
 )
 
 cuda_py_test(
@@ -3677,6 +3739,7 @@
         "//third_party/py/numpy",
     ],
     tags = ["no_windows_gpu"],
+    xla_enable_strict_auto_jit = True,
 )
 
 py_library(
@@ -3685,7 +3748,7 @@
         ["training/**/*.py"],
         exclude = [
             "**/*test*",
-            "training/checkpointable/**/*.py",
+            "training/tracking/**/*.py",
             "training/saving/**/*.py",
             # The following targets have their own build rules (same name as the
             # file):
@@ -3746,8 +3809,8 @@
         "//tensorflow/python/eager:context",
         "//tensorflow/python/keras/optimizer_v2:learning_rate_schedule",
         "//tensorflow/python/ops/losses",
-        "//tensorflow/python/training/checkpointable:base",
-        "//tensorflow/python/training/checkpointable:util",
+        "//tensorflow/python/training/tracking:base",
+        "//tensorflow/python/training/tracking:util",
         "//third_party/py/numpy",
         "@six_archive//:six",
     ],
@@ -3827,9 +3890,9 @@
         ":variables",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python/eager:context",
-        "//tensorflow/python/training/checkpointable:base",
         "//tensorflow/python/training/saving:saveable_object",
         "//tensorflow/python/training/saving:saveable_object_util",
+        "//tensorflow/python/training/tracking:base",
         "//third_party/py/numpy",
         "@six_archive//:six",
     ],
@@ -3910,6 +3973,7 @@
     srcs_version = "PY2AND3",
     visibility = visibility + [
         "//tensorflow:__pkg__",
+        "//third_party/py/tf_agents:__subpackages__",
     ],
     deps = [
         "//third_party/py/numpy",
@@ -4702,6 +4766,7 @@
         ":math_ops",
         "//tensorflow/core:protos_all_py",
     ],
+    xla_enable_strict_auto_jit = False,  # Graph structure is different with autojit
 )
 
 cuda_py_test(
@@ -4747,6 +4812,17 @@
 )
 
 tf_py_test(
+    name = "convert_to_constants_test",
+    size = "small",
+    srcs = ["framework/convert_to_constants_test.py"],
+    additional_deps = [
+        ":convert_to_constants",
+        "client_testlib",
+        "framework_test_lib",
+    ],
+)
+
+tf_py_test(
     name = "bfloat16_test",
     size = "small",
     srcs = ["lib/core/bfloat16_test.py"],
@@ -5777,6 +5853,8 @@
         "grappler",
         "no_pip",  # tf_optimizer is not available in pip.
     ],
+    # This test will not run on XLA because it primarily tests the TF Classic flow.
+    xla_enable_strict_auto_jit = False,
 )
 
 py_library(
@@ -5917,6 +5995,8 @@
     tags = [
         "grappler",
     ],
+    # This test will not run on XLA because it primarily tests the TF Classic flow.
+    xla_enable_strict_auto_jit = False,
 )
 
 py_library(
@@ -6057,29 +6137,6 @@
     ],
 )
 
-py_library(
-    name = "mode_keys",
-    srcs = [
-        "training/mode_keys.py",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":util",
-    ],
-)
-
-tf_py_test(
-    name = "mode_keys_test",
-    size = "small",
-    srcs = [
-        "training/mode_keys_test.py",
-    ],
-    additional_deps = [
-        ":client_testlib",
-        ":mode_keys",
-    ],
-)
-
 pyx_library(
     name = "framework_fast_tensor_util",
     srcs = ["framework/fast_tensor_util.pyx"],

diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py
index 398fb37..8538f8c 100644
--- a/tensorflow/python/__init__.py
+++ b/tensorflow/python/__init__.py

@@ -84,6 +84,7 @@
 from tensorflow.python.layers import layers
 from tensorflow.python.module import module
 from tensorflow.python.ops import bitwise_ops as bitwise
+from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import image_ops as image
 from tensorflow.python.ops import manip_ops as manip
 from tensorflow.python.ops import metrics

diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index 04439ba..a35ff16 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py

@@ -26,6 +26,7 @@
 
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import ast_util
 from tensorflow.python.autograph.pyct import parser
 from tensorflow.python.autograph.pyct import templates
 
@@ -33,16 +34,35 @@
 # TODO(mdan): Rename to FunctionCallsTransformer.
 
 
+class _Function(object):
+
+  no_root = True
+
+
 class CallTreeTransformer(converter.Base):
   """Transforms the call tree by renaming transformed symbols."""
 
   def visit_FunctionDef(self, node):
+    self.state[_Function].enter()
     node.args = self.visit(node.args)
     node.body = self.visit_block(node.body)
-    # TODO(mdan): Is this correct for local functions?
-    node.decorator_list = []
+
+    if self.state[_Function].level < 2:
+      # Top-level functions lose their decorator because the conversion is
+      # always just-in-time and by the time it happens the decorators are
+      # already set to be applied.
+      node.decorator_list = []
+    else:
+      # Inner functions are converted already, so we insert a decorator to
+      # prevent double conversion. Double conversion would work too, but this
+      # saves the overhead.
+      node.decorator_list.append(
+          parser.parse_expression('ag__.do_not_convert_internal'))
+
     if node.returns:
       node.returns = self.visit(node.returns)
+
+    self.state[_Function].exit()
     return node
 
   def visit_With(self, node):
@@ -62,9 +82,6 @@
         not self.ctx.program.options.uses(converter.Feature.BUILTIN_FUNCTIONS)):
       return self.generic_visit(node)
 
-    template = """
-      ag__.converted_call(func, owner, options, args)
-    """
     if isinstance(node.func, gast.Attribute):
       func = gast.Str(node.func.attr)
       owner = node.func.value
@@ -72,6 +89,41 @@
       func = node.func
       owner = parser.parse_expression('None')
 
+    starred_arg = None
+    normal_args = []
+    for a in node.args:
+      if isinstance(a, gast.Starred):
+        assert starred_arg is None, 'Multiple *args should be impossible.'
+        starred_arg = a
+      else:
+        normal_args.append(a)
+    if starred_arg is None:
+      args = templates.replace_as_expression('(args,)', args=normal_args)
+    else:
+      args = templates.replace_as_expression(
+          '(args,) + tuple(stararg)',
+          stararg=starred_arg.value,
+          args=normal_args)
+
+    kwargs_arg = None
+    normal_keywords = []
+    for k in node.keywords:
+      if k.arg is None:
+        assert kwargs_arg is None, 'Multiple **kwargs should be impossible.'
+        kwargs_arg = k
+      else:
+        normal_keywords.append(k)
+    if kwargs_arg is None:
+      kwargs = ast_util.keywords_to_dict(normal_keywords)
+    else:
+      kwargs = templates.replace_as_expression(
+          'dict(kwargs, **keywords)',
+          kwargs=kwargs_arg.value,
+          keywords=ast_util.keywords_to_dict(normal_keywords))
+
+    template = """
+      ag__.converted_call(func, owner, options, args, kwargs)
+    """
     new_call = templates.replace_as_expression(
         template,
         func=func,
@@ -79,9 +131,8 @@
         options=self.ctx.program.options.to_ast(
             self.ctx,
             internal_convert_user_code=self.ctx.program.options.recursive),
-        args=node.args)
-    # TODO(mdan): Improve the template mechanism to better support this.
-    new_call.keywords = node.keywords
+        args=args,
+        kwargs=kwargs)
 
     return new_call
 

diff --git a/tensorflow/python/autograph/converters/call_trees_test.py b/tensorflow/python/autograph/converters/call_trees_test.py
index 6ee56bf..654682e 100644
--- a/tensorflow/python/autograph/converters/call_trees_test.py
+++ b/tensorflow/python/autograph/converters/call_trees_test.py

@@ -34,7 +34,55 @@
       self.assertEquals(
           result.test_fn(None),
           converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 3)
-      self.assertListEqual(self.dynamic_calls, [()])
+      self.assertListEqual(self.dynamic_calls, [((), {})])
+
+  def test_function_with_kwarg(self):
+
+    def test_fn(f, a, b):
+      return f(a, c=b) + 3
+
+    with self.converted(test_fn, call_trees, {}) as result:
+      self.assertEquals(
+          result.test_fn(None, 1, 2),
+          converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 3)
+      self.assertListEqual(self.dynamic_calls, [((1,), {'c': 2})])
+
+  def test_function_with_kwargs_starargs(self):
+
+    def test_fn(f, a, *args, **kwargs):
+      return f(a, *args, **kwargs) + 5
+
+    with self.converted(test_fn, call_trees, {}) as result:
+      self.assertEquals(
+          result.test_fn(None, 1, *[2, 3], **{'b': 4, 'c': 5}),
+          converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 5)
+      self.assertListEqual(self.dynamic_calls, [((1, 2, 3), {'b': 4, 'c': 5})])
+
+  def test_function_with_kwargs_starargs_only(self):
+
+    def f(*unused_args):  # Will not be called.
+      pass
+
+    def test_fn():
+      args = [1, 2, 3]
+      return f(*args) + 11
+
+    with self.converted(test_fn, call_trees, {'f': f}) as result:
+      self.assertEquals(
+          result.test_fn(),
+          converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 11)
+      self.assertListEqual(self.dynamic_calls, [((1, 2, 3), {})])
+
+  def test_function_with_kwargs_keywords(self):
+
+    def test_fn(f, a, b, **kwargs):
+      return f(a, b=b, **kwargs) + 5
+
+    with self.converted(test_fn, call_trees, {}) as result:
+      self.assertEquals(
+          result.test_fn(None, 1, 2, **{'c': 3}),
+          converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 5)
+      self.assertListEqual(self.dynamic_calls, [((1,), {'b': 2, 'c': 3})])
 
   def test_class_method(self):
 
@@ -47,7 +95,7 @@
     with self.converted(TestClass.test_method, call_trees, {}) as result:
       self.assertEquals(converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 1,
                         result.test_method(tc, 1))
-      self.assertListEqual(self.dynamic_calls, [(1,)])
+      self.assertListEqual(self.dynamic_calls, [((1,), {})])
 
   def test_object_method(self):
 
@@ -60,7 +108,7 @@
     with self.converted(tc.test_method, call_trees, {}) as result:
       self.assertEquals(converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 1,
                         result.test_method(tc, 1))
-      self.assertListEqual(self.dynamic_calls, [(1,)])
+      self.assertListEqual(self.dynamic_calls, [((1,), {})])
 
 
 if __name__ == '__main__':

diff --git a/tensorflow/python/autograph/converters/continue_statements.py b/tensorflow/python/autograph/converters/continue_statements.py
index 725e053..780f837 100644
--- a/tensorflow/python/autograph/converters/continue_statements.py
+++ b/tensorflow/python/autograph/converters/continue_statements.py

@@ -29,11 +29,17 @@
   def __init__(self):
     self.used = False
     self.control_var_name = None
-    self.create_guard = False
-    self.guard_created = False
 
   def __repr__(self):
-    return 'used: %s, var: %s' % (self.used, self.control_var_name)
+    return '<_Continue(used: {}, var: {})>'.format(self.used,
+                                                   self.control_var_name)
+
+
+class _Block(object):
+
+  def __init__(self):
+    self.guard_created = False
+    self.create_guard = False
 
 
 class ContinueCanonicalizationTransformer(converter.Base):
@@ -68,15 +74,15 @@
     #    |                #         created if node)
 
     if self.state[_Continue].used:
-      if self.state[_Continue].guard_created:
+      if self.state[_Block].guard_created:
         return node, None
 
-      elif not self.state[_Continue].create_guard:
-        self.state[_Continue].create_guard = True
+      elif not self.state[_Block].create_guard:
+        self.state[_Block].create_guard = True
         return node, None
 
       else:
-        self.state[_Continue].guard_created = True
+        self.state[_Block].guard_created = True
         template = """
           if ag__.not_(var_name):
             original_node
@@ -90,6 +96,7 @@
 
   def _visit_loop_body(self, node, nodes):
     self.state[_Continue].enter()
+    self.state[_Block].enter()
     scope = anno.getanno(node, NodeAnno.BODY_SCOPE)
     continue_var = self.ctx.namer.new_symbol('continue_', scope.referenced)
     self.state[_Continue].control_var_name = continue_var
@@ -103,14 +110,21 @@
       control_var_init = templates.replace(template, var_name=continue_var)
       nodes = control_var_init + nodes
 
+    self.state[_Block].exit()
     self.state[_Continue].exit()
     return nodes
 
+  def _visit_non_loop_body(self, nodes):
+    self.state[_Block].enter()
+    nodes = self.visit_block(nodes, after_visit=self._postprocess_statement)
+    self.state[_Block].exit()
+    return nodes
+
   def visit_While(self, node):
     node.test = self.visit(node.test)
     node.body = self._visit_loop_body(node, node.body)
     # A continue in the else clause applies to the containing scope.
-    node.orelse = self.visit_block(node.orelse)
+    node.orelse = self._visit_non_loop_body(node.orelse)
     return node
 
   def visit_For(self, node):
@@ -118,29 +132,29 @@
     node.iter = self.generic_visit(node.iter)
     node.body = self._visit_loop_body(node, node.body)
     # A continue in the else clause applies to the containing scope.
-    node.orelse = self.visit_block(node.orelse)
+    node.orelse = self._visit_non_loop_body(node.orelse)
+    return node
+
+  def visit_If(self, node):
+    node.body = self.visit_block(node.body)
+    node.orelse = self._visit_non_loop_body(node.orelse)
     return node
 
   def visit_With(self, node):
     node.items = self.visit_block(node.items)
-    node.body = self.visit_block(node.body,
-                                 after_visit=self._postprocess_statement)
+    node.body = self._visit_non_loop_body(node.body)
     return node
 
   def visit_Try(self, node):
-    node.body = self.visit_block(node.body,
-                                 after_visit=self._postprocess_statement)
-    node.orelse = self.visit_block(node.orelse,
-                                   after_visit=self._postprocess_statement)
+    node.body = self._visit_non_loop_body(node.body)
+    node.orelse = self._visit_non_loop_body(node.orelse)
     # In Python 3.8 and later continue is allowed in finally blocks
-    node.finalbody = self.visit_block(node.finalbody,
-                                      after_visit=self._postprocess_statement)
+    node.finalbody = self._visit_non_loop_body(node.finalbody)
     node.handlers = self.visit_block(node.handlers)
     return node
 
   def visit_ExceptHandler(self, node):
-    node.body = self.visit_block(node.body,
-                                 after_visit=self._postprocess_statement)
+    node.body = self._visit_non_loop_body(node.body)
     return node
 
 

diff --git a/tensorflow/python/autograph/converters/continue_statements_test.py b/tensorflow/python/autograph/converters/continue_statements_test.py
index d6aaa50..5a1828e 100644
--- a/tensorflow/python/autograph/converters/continue_statements_test.py
+++ b/tensorflow/python/autograph/converters/continue_statements_test.py

@@ -20,15 +20,15 @@
 
 from tensorflow.python.autograph.converters import continue_statements
 from tensorflow.python.autograph.core import converter_testing
-from tensorflow.python.eager import context as tfe_ctx
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 
 
 class ContinueCanonicalizationTest(converter_testing.TestCase):
 
   def assertTransformedEquivalent(self, test_fn, *inputs):
-    with self.converted(test_fn, continue_statements, {},
+    with self.converted(test_fn, continue_statements, {'ops': ops},
                         constant_op.constant) as result:
       self.assertEqual(test_fn(*inputs), result.test_fn(*inputs))
 
@@ -43,11 +43,10 @@
         v.append(x)
       return v
 
-    with tfe_ctx.eager_mode():
-      self.assertTransformedEquivalent(test_fn, 0)
-      self.assertTransformedEquivalent(test_fn, 1)
-      self.assertTransformedEquivalent(test_fn, 3)
-      self.assertTransformedEquivalent(test_fn, 4)
+    self.assertTransformedEquivalent(test_fn, 0)
+    self.assertTransformedEquivalent(test_fn, 1)
+    self.assertTransformedEquivalent(test_fn, 3)
+    self.assertTransformedEquivalent(test_fn, 4)
 
   def test_for_loop(self):
 
@@ -60,11 +59,89 @@
         v.append(x)
       return v
 
-    with tfe_ctx.eager_mode():
-      self.assertTransformedEquivalent(test_fn, [])
-      self.assertTransformedEquivalent(test_fn, [1])
-      self.assertTransformedEquivalent(test_fn, [2])
-      self.assertTransformedEquivalent(test_fn, [1, 2, 3])
+    self.assertTransformedEquivalent(test_fn, [])
+    self.assertTransformedEquivalent(test_fn, [1])
+    self.assertTransformedEquivalent(test_fn, [2])
+    self.assertTransformedEquivalent(test_fn, [1, 2, 3])
+
+  def test_nested_with(self):
+
+    def test_fn(x):
+      v = []
+      while x > 0:
+        x -= 1
+        with ops.name_scope(''):
+          if x % 2 == 0:
+            continue
+        v.append(x)
+      return v
+
+    self.assertTransformedEquivalent(test_fn, 0)
+    self.assertTransformedEquivalent(test_fn, 1)
+    self.assertTransformedEquivalent(test_fn, 3)
+    self.assertTransformedEquivalent(test_fn, 4)
+
+  def test_nested_multiple_withs(self):
+
+    def test_fn(x):
+      v = []
+      while x > 0:
+        x -= 1
+        with ops.name_scope(''):
+          if x % 2 == 0:
+            continue
+        with ops.name_scope(''):
+          v.append(x)
+        v.append(x)
+      return v
+
+    self.assertTransformedEquivalent(test_fn, 0)
+    self.assertTransformedEquivalent(test_fn, 1)
+    self.assertTransformedEquivalent(test_fn, 3)
+    self.assertTransformedEquivalent(test_fn, 4)
+
+  def test_nested_multiple_withs_and_statements(self):
+
+    def test_fn(x):
+      v = []
+      while x > 0:
+        x -= 1
+        with ops.name_scope(''):
+          if x % 2 == 0:
+            continue
+          v.append(x)
+        v.append(x)
+        with ops.name_scope(''):
+          v.append(x)
+        v.append(x)
+      return v
+
+    self.assertTransformedEquivalent(test_fn, 0)
+    self.assertTransformedEquivalent(test_fn, 1)
+    self.assertTransformedEquivalent(test_fn, 3)
+    self.assertTransformedEquivalent(test_fn, 4)
+
+  def test_nested_multiple_withs_and_nested_withs(self):
+
+    def test_fn(x):
+      v = []
+      while x > 0:
+        x -= 1
+        with ops.name_scope(''):
+          if x % 2 == 0:
+            continue
+          with ops.name_scope(''):
+            v.append(x)
+        v.append(x)
+        with ops.name_scope(''):
+          v.append(x)
+        v.append(x)
+      return v
+
+    self.assertTransformedEquivalent(test_fn, 0)
+    self.assertTransformedEquivalent(test_fn, 1)
+    self.assertTransformedEquivalent(test_fn, 3)
+    self.assertTransformedEquivalent(test_fn, 4)
 
   def test_nested(self):
 
@@ -83,11 +160,10 @@
         v.append(x)
       return v, u, w
 
-    with tfe_ctx.eager_mode():
-      self.assertTransformedEquivalent(test_fn, 0)
-      self.assertTransformedEquivalent(test_fn, 1)
-      self.assertTransformedEquivalent(test_fn, 3)
-      self.assertTransformedEquivalent(test_fn, 4)
+    self.assertTransformedEquivalent(test_fn, 0)
+    self.assertTransformedEquivalent(test_fn, 1)
+    self.assertTransformedEquivalent(test_fn, 3)
+    self.assertTransformedEquivalent(test_fn, 4)
 
 
 if __name__ == '__main__':

diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py
index 0416820..90432e8 100644
--- a/tensorflow/python/autograph/converters/control_flow.py
+++ b/tensorflow/python/autograph/converters/control_flow.py

@@ -260,9 +260,12 @@
     assignments = []
     for s in undefined_symbols:
       template = '''
-        var = ag__.UNDEFINED
+        var = ag__.Undefined(symbol_name)
       '''
-      assignments += templates.replace(template, var=s)
+      assignments += templates.replace(
+          template,
+          var=s,
+          symbol_name=gast.Str(s.ssf()))
     return assignments
 
   def _get_loop_state(self, node):
@@ -278,26 +281,15 @@
     # the loop state, regardless of whether they are later used or not.
     loop_state = body_scope.modified & live_in
 
-    undefined_lives = loop_state - defined_in
+    # Variable that are used or defined inside the loop, but not defined
+    # before entering the loop
+    undefined_lives = ((loop_state - defined_in)
+                       | ((body_scope.modified - live_in) & live_out))
     # Only simple variables must be defined. The composite ones will be
     # implicitly checked at runtime.
     undefined_simple_lives = {v for v in undefined_lives if v.is_simple()}
-    if undefined_simple_lives:
-      raise NameError(
-          'cannot convert loop: it includes symbols that are undefined'
-          ' when entering the loop: {}'.format(
-              self._fmt_symbols(undefined_simple_lives)))
 
-    live_defs_in_loop = (body_scope.modified - live_in) & live_out
-    if live_defs_in_loop:
-      # TODO(mdan): Include reference to explanation why.
-      raise NotImplementedError(
-          'cannot convert loop: it includes symbols that are defined'
-          ' inside the loop, but used later: {}. To fix, initialize'
-          ' these symbols before the loop'.format(
-              self._fmt_symbols(live_defs_in_loop)))
-
-    return loop_state, reserved_symbols
+    return loop_state, reserved_symbols, undefined_simple_lives
 
   def _state_constructs(self, loop_state, reserved_symbols):
     loop_state = list(loop_state)
@@ -321,7 +313,7 @@
   def visit_While(self, node):
     self.generic_visit(node)
 
-    loop_state, reserved_symbols = self._get_loop_state(node)
+    loop_state, reserved_symbols, possibly_undef = self._get_loop_state(node)
 
     # Note: one might expect we can dispatch based on the loop condition.
     # But because that is dependent on the state, it cannot be evaluated ahead
@@ -340,7 +332,6 @@
     cond_closure = set()
     for s in cond_scope.read:
       cond_closure |= s.support_set
-    cond_closure -= loop_state
 
     loop_state, state_ssf, state_ast_tuple, ssf_map = self._state_constructs(
         loop_state, reserved_symbols)
@@ -386,12 +377,13 @@
           extra_deps=tuple(s.ast() for s in cond_closure),
       )
 
-    return node
+    undefined_assigns = self._create_undefined_assigns(possibly_undef)
+    return undefined_assigns + node
 
   def visit_For(self, node):
     self.generic_visit(node)
 
-    loop_state, reserved_symbols = self._get_loop_state(node)
+    loop_state, reserved_symbols, possibly_undef = self._get_loop_state(node)
     loop_state, state_ssf, state_ast_tuple, ssf_map = self._state_constructs(
         loop_state, reserved_symbols)
     node_body = ast_util.rename_symbols(node.body, ssf_map)
@@ -446,7 +438,8 @@
           body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
           body=node_body)
 
-    return node
+    undefined_assigns = self._create_undefined_assigns(possibly_undef)
+    return undefined_assigns + node
 
 
 def transform(node, ctx):

diff --git a/tensorflow/python/autograph/converters/control_flow_test.py b/tensorflow/python/autograph/converters/control_flow_test.py
index 1e73dca..37ea4c2 100644
--- a/tensorflow/python/autograph/converters/control_flow_test.py
+++ b/tensorflow/python/autograph/converters/control_flow_test.py

@@ -33,8 +33,7 @@
       inputs = (inputs,)
     with self.converted(test_fn, control_flow, {},
                         constant_op.constant) as result:
-      with self.cached_session() as sess:
-        self.assertEqual(sess.run(result.test_fn(*inputs)), expected)
+      self.assertEqual(self.evaluate(result.test_fn(*inputs)), expected)
 
   @test_util.run_deprecated_v1
   def test_while_basic(self):
@@ -78,16 +77,32 @@
 
     self.assertTransformedResult(test_fn, constant_op.constant(5), 0)
 
-  def test_while_variable_defined_in_body(self):
-    def bad_while_loop(n):
+  @test_util.run_deprecated_v1
+  def test_while_dispatches_by_cond_only(self):
+
+    class TensorIncompatibleNumeric(object):
+      """Works in arithmetic expression, but errors out with TF ops."""
+
+      def __init__(self, val):
+        self.val = val
+
+      def __add__(self, other):
+        return TensorIncompatibleNumeric(self.val + other)
+
+    def test_fn(n, s):
       while n > 0:
         n -= 1
-        s = n
+        s += n
       return s
 
-    node, ctx = self.prepare(bad_while_loop, {})
-    with self.assertRaises(NameError):
-      control_flow.transform(node, ctx)
+    self.assertTransformedResult(test_fn, (constant_op.constant(5), 0), 10)
+    with self.converted(test_fn, control_flow, {}) as result:
+      # n alone controls the staging. When the loop is not staged, Python
+      # knows how to add the two objects. But when staged, tf.while_loop will
+      # not know how to deal with the TensorIncompatibleNumeric object.
+      self.assertEqual(result.test_fn(5, TensorIncompatibleNumeric(0)).val, 10)
+      with self.assertRaises(TypeError):
+        result.test_fn(constant_op.constant(5), TensorIncompatibleNumeric(0))
 
   @test_util.run_deprecated_v1
   def test_if_basic(self):
@@ -123,11 +138,10 @@
       return obj
 
     with self.converted(test_fn, control_flow, {}) as result:
-      with self.cached_session() as sess:
-        res_obj = result.test_fn(constant_op.constant(1), TestClass(0, 0))
-        self.assertEqual(sess.run((res_obj.a, res_obj.b)), (-1, 0))
-        res_obj = result.test_fn(constant_op.constant(-1), TestClass(0, 0))
-        self.assertEqual(sess.run((res_obj.a, res_obj.b)), (0, -2))
+      res_obj = result.test_fn(constant_op.constant(1), TestClass(0, 0))
+      self.assertEqual(self.evaluate((res_obj.a, res_obj.b)), (-1, 0))
+      res_obj = result.test_fn(constant_op.constant(-1), TestClass(0, 0))
+      self.assertEqual(self.evaluate((res_obj.a, res_obj.b)), (0, -2))
 
   @test_util.run_deprecated_v1
   def test_if_single_output(self):
@@ -225,16 +239,6 @@
       self.assertEqual(result.test_fn(5), 10)
       self.assertEqual(eval_count[0], 1)
 
-  def test_for_variable_defined_in_body(self):
-    def bad_for_loop(n):
-      for i in range(n):
-        s = i
-      return s
-
-    node, ctx = self.prepare(bad_for_loop, {})
-    with self.assertRaises(NameError):
-      control_flow.transform(node, ctx)
-
   @test_util.run_deprecated_v1
   def test_for_tuple_unpacking(self):
     def test_fn(x_list):
@@ -244,5 +248,7 @@
       return z
 
     self.assertTransformedResult(test_fn, [3, 3], 7)
+
+
 if __name__ == '__main__':
   test.main()

diff --git a/tensorflow/python/autograph/converters/return_statements.py b/tensorflow/python/autograph/converters/return_statements.py
index 723acab..3173e67 100644
--- a/tensorflow/python/autograph/converters/return_statements.py
+++ b/tensorflow/python/autograph/converters/return_statements.py

@@ -408,6 +408,5 @@
   transformer = ReturnStatementsTransformer(
       ctx, default_to_null_return=default_to_null_return)
   node = transformer.visit(node)
-  transformer.debug_print_src(node)
 
   return node

diff --git a/tensorflow/python/autograph/converters/return_statements_test.py b/tensorflow/python/autograph/converters/return_statements_test.py
index 2942555d..b2d3d1b 100644
--- a/tensorflow/python/autograph/converters/return_statements_test.py
+++ b/tensorflow/python/autograph/converters/return_statements_test.py

@@ -197,6 +197,25 @@
     self.assertTransformedEquivalent(test_fn, 4)
     self.assertTransformedEquivalent(test_fn, 5)
 
+  def test_nested_multiple_withs(self):
+
+    def test_fn(x):
+      v = []
+      while x > 0:
+        x -= 1
+        with ops.name_scope(''):
+          if x % 2 == 0:
+            return v
+        with ops.name_scope(''):
+          v.append(x)
+        v.append(x)
+      return v
+
+    self.assertTransformedEquivalent(test_fn, 0)
+    self.assertTransformedEquivalent(test_fn, 1)
+    self.assertTransformedEquivalent(test_fn, 3)
+    self.assertTransformedEquivalent(test_fn, 4)
+
 
 if __name__ == '__main__':
   test.main()

diff --git a/tensorflow/python/autograph/converters/side_effect_guards.py b/tensorflow/python/autograph/converters/side_effect_guards.py
index d7c0951..7e556d9 100644
--- a/tensorflow/python/autograph/converters/side_effect_guards.py
+++ b/tensorflow/python/autograph/converters/side_effect_guards.py

@@ -125,6 +125,10 @@
     node.orelse = self._visit_and_reindent(node.orelse)
     return node
 
+  # TODO(b/123995141) Remove once ExceptionHandlers are in the CFG
+  def visit_ExceptHandler(self, node):
+    return node
+
   def visit_Expr(self, node):
     self.generic_visit(node)
     if isinstance(node.value, gast.Call):

diff --git a/tensorflow/python/autograph/core/config.py b/tensorflow/python/autograph/core/config.py
index 5dce3e6..f038704 100644
--- a/tensorflow/python/autograph/core/config.py
+++ b/tensorflow/python/autograph/core/config.py

@@ -29,14 +29,32 @@
 }
 
 
-def internal_module_name(name):
-  full_name = utils.__name__
-  name_start = full_name.find(name)
-  name_end = name_start + len(name) + 1
-  return full_name[:name_end]
+def _internal_name(name):
+  """This function correctly resolves internal and external names."""
+  reference_name = utils.__name__
+
+  reference_root = 'tensorflow.'
+  # If the TF module is foo.tensorflow, then all other modules
+  # are then assumed to be prefixed by 'foo'.
+
+  if reference_name.startswith(reference_root):
+    return name
+
+  reference_begin = reference_name.find('.' + reference_root)
+  assert reference_begin > 0
+
+  root_prefix = reference_name[:reference_begin]
+  return root_prefix + '.' + name
 
 
-DEFAULT_UNCOMPILED_MODULES = set(((internal_module_name('tensorflow'),),))
+DEFAULT_UNCOMPILED_MODULES = set((
+    ('tensorflow',),
+    (_internal_name('tensorflow'),),
+    # TODO(mdan): Remove once the conversion process is optimized.
+    ('tensorflow_probability',),
+    (_internal_name('tensorflow_probability'),),
+))
+
 
 COMPILED_IMPORT_STATEMENTS = (
     'from __future__ import print_function',

diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index c8db2ca..3a08483 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py

@@ -233,8 +233,7 @@
     """
 
     def as_qualified_name(o):
-      name = inspect_utils.getqualifiedname(
-          ctx.info.namespace, o, max_depth=1)
+      name = inspect_utils.getqualifiedname(ctx.info.namespace, o, max_depth=1)
       if not name:
         if isinstance(o, weakref.ref):
           # `o` might already be a weak reference, if this object was
@@ -252,9 +251,7 @@
 
     def list_of_features(values):
       return parser.parse_expression('({})'.format(', '.join(
-          'ag__.{}'.format(v)
-          for v in Feature.__members__.values()
-          if v in values)))
+          'ag__.{}'.format(str(v)) for v in values)))
 
     if internal_convert_user_code is None:
       internal_convert_user_code = self.internal_convert_user_code

diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index abe0f25..7d91859 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py

@@ -90,7 +90,7 @@
               verbose=verbose,
               force_conversion=True,
               optional_features=optional_features,
-          ), *args, **kwargs)
+          ), args, kwargs)
 
     wrapper = tf_decorator.make_decorator(f, wrapper)
 
@@ -116,6 +116,12 @@
   PY_FUNC = 2
 
 
+def do_not_convert_internal(f):
+  """Decorator that marks internal functions which do not need conversion."""
+  setattr(f, '__ag_compiled', True)
+  return f
+
+
 def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None):
   """Decorator that suppresses the conversion of a function.
 
@@ -154,15 +160,13 @@
     else:
       raise ValueError('unknown value for run_as: %s' % run_as)
 
-    # Sometimes the decorator is just desugared, making it impossible to detect.
-    # This attribute makes detection easier.
     setattr(wrapper, '__ag_compiled', True)
     return wrapper
 
   return decorator
 
 
-def converted_call(f, owner, options, *args, **kwargs):
+def converted_call(f, owner, options, args, kwargs):
   """Compiles a function call inline. For internal use only."""
   logging.log(1,
               'Converted call: %s; owner: %s\n    args: %s\n    kwargs: %s\n',
@@ -196,6 +200,13 @@
     logging.log(2, 'Permanently whitelisted: %s: wrapt decorated', f)
     return f(*args, **kwargs)
 
+  # Constructors are permanently whitelisted.
+  # TODO(mdan): Toggle as experimental feature instead.
+  # TODO(b/124016764): Remove this limitation.
+  if tf_inspect.isclass(f):
+    logging.log(2, 'Permanently whitelisted: %s: constructor', f)
+    return f(*args, **kwargs)
+
   # Other built-in modules are permanently whitelisted.
   # TODO(mdan): Figure out how to do this consistently for all stdlib modules.
   if (f in collections.__dict__.values() or f in pdb.__dict__.values() or
@@ -276,6 +287,9 @@
 
     elif tf_inspect.isclass(f):
       # Constructors
+      # Note: Until we support class constructurs, and enable whole-class
+      # conversion with an experimental flag, this branch is dead code.
+      # TODO(mdan): Consider removing unless there is a compelling use case.
       target_entity = f
       arg_map_target = f.__init__
       effective_args = args
@@ -337,8 +351,8 @@
     logging.warn(
         'Entity %s could not be transformed and will be staged without change.'
         ' Error details can be found in the logs when running with the env'
-        ' variable AUTOGRAPH_VERBOSITY=5. Please report this to the AutoGraph'
-        ' team. Cause: %s', target_entity, e)
+        ' variable AUTOGRAPH_VERBOSITY >= 1. Please report this to the'
+        ' AutoGraph team. Cause: %s', target_entity, e)
 
     return f(*args, **kwargs)
 

diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py
index 5192809..ebc6cc4 100644
--- a/tensorflow/python/autograph/impl/api_test.py
+++ b/tensorflow/python/autograph/impl/api_test.py

@@ -41,6 +41,9 @@
 tf = utils.fake_tf()
 
 
+testing_global_numeric = 2
+
+
 class TestResource(str):
   pass
 
@@ -192,18 +195,17 @@
       def test_method(self, x, s, a):
         while tf.reduce_sum(x) > s:
           x //= api.converted_call(self.called_member, None,
-                                   converter.ConversionOptions(), self, a)
+                                   converter.ConversionOptions(), (self, a), {})
         return x
 
     tc = TestClass()
-    with self.cached_session() as sess:
-      x = tc.test_method(
-          constant_op.constant([2, 4]), constant_op.constant(1),
-          constant_op.constant(-2))
-      self.assertListEqual([0, 1], self.evaluate(x).tolist())
+    x = tc.test_method(
+        constant_op.constant([2, 4]), constant_op.constant(1),
+        constant_op.constant(-2))
+    self.assertListEqual([0, 1], self.evaluate(x).tolist())
 
   def test_converted_call_builtin(self):
-    x = api.converted_call(range, None, converter.ConversionOptions(), 3)
+    x = api.converted_call(range, None, converter.ConversionOptions(), (3,), {})
     self.assertEqual((0, 1, 2), tuple(x))
 
   def test_converted_call_function(self):
@@ -213,10 +215,9 @@
         return -x
       return x
 
-    with self.cached_session() as sess:
-      x = api.converted_call(test_fn, None, converter.ConversionOptions(),
-                             constant_op.constant(-1))
-      self.assertEqual(1, self.evaluate(x))
+    x = api.converted_call(test_fn, None, converter.ConversionOptions(),
+                           (constant_op.constant(-1),), {})
+    self.assertEqual(1, self.evaluate(x))
 
   @test_util.run_v1_only('b/120545219')
   def test_converted_call_functools_partial(self):
@@ -227,16 +228,14 @@
       return x, y, z
 
     x = api.converted_call(
-        functools.partial(test_fn, constant_op.constant(-1), z=-3),
-        None, converter.ConversionOptions(),
-        constant_op.constant(-2))
+        functools.partial(test_fn, constant_op.constant(-1), z=-3), None,
+        converter.ConversionOptions(), (constant_op.constant(-2),), {})
     self.assertEqual((1, 2, 3), self.evaluate(x))
 
     x = api.converted_call(
         functools.partial(
-            functools.partial(test_fn, constant_op.constant(-1)), z=-3),
-        None, converter.ConversionOptions(),
-        constant_op.constant(-2))
+            functools.partial(test_fn, constant_op.constant(-1)), z=-3), None,
+        converter.ConversionOptions(), (constant_op.constant(-2),), {})
     self.assertEqual((1, 2, 3), self.evaluate(x))
 
   def test_converted_call_method_explicit_owner(self):
@@ -259,11 +258,10 @@
           return -self.x
         return self.x
 
-    with self.cached_session() as sess:
-      tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(tc.test_method, None,
-                             converter.ConversionOptions(), tc)
-      self.assertEqual(1, self.evaluate(x))
+    tc = TestClass(constant_op.constant(-1))
+    x = api.converted_call(tc.test_method, None, converter.ConversionOptions(),
+                           (tc,), {})
+    self.assertEqual(1, self.evaluate(x))
 
   def test_converted_call_method_converts_recursively(self):
 
@@ -282,7 +280,8 @@
 
     tc = TestClass(constant_op.constant(-1))
     x = api.converted_call(tc.test_method, None,
-                           converter.ConversionOptions(recursive=True), tc)
+                           converter.ConversionOptions(recursive=True), (tc,),
+                           {})
     self.assertEqual(1, self.evaluate(x))
 
   def test_converted_call_method_by_class(self):
@@ -297,11 +296,10 @@
           return -self.x
         return self.x
 
-    with self.cached_session() as sess:
-      tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(TestClass.test_method, None,
-                             converter.ConversionOptions(), tc)
-      self.assertEqual(1, self.evaluate(x))
+    tc = TestClass(constant_op.constant(-1))
+    x = api.converted_call(TestClass.test_method, None,
+                           converter.ConversionOptions(), (tc,), {})
+    self.assertEqual(1, self.evaluate(x))
 
   def test_converted_call_callable_object(self):
 
@@ -315,11 +313,11 @@
           return -self.x
         return self.x
 
-    with self.cached_session() as sess:
-      tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(tc, None, converter.ConversionOptions())
-      self.assertEqual(1, self.evaluate(x))
+    tc = TestClass(constant_op.constant(-1))
+    x = api.converted_call(tc, None, converter.ConversionOptions(), (), {})
+    self.assertEqual(1, self.evaluate(x))
 
+  @test_util.run_deprecated_v1
   def test_converted_call_constructor(self):
 
     class TestClass(object):
@@ -332,27 +330,28 @@
           return -self.x
         return self.x
 
-    with self.cached_session() as sess:
-      tc = api.converted_call(TestClass, None, converter.ConversionOptions(),
-                              constant_op.constant(-1))
-      # tc is now a converted object.
-      x = tc.test_method()
-      self.assertEqual(1, self.evaluate(x))
+    tc = api.converted_call(TestClass, None, converter.ConversionOptions(),
+                            (constant_op.constant(-1),), {})
+    # tc is still a TestClass - constructors are whitelisted.
+    # TODO(b/124016764): Support this use case.
+    # The error below is specific to the `if` statement not being converted.
+    with self.assertRaisesRegex(
+        TypeError, 'Using a `tf.Tensor` as a Python `bool`'):
+      tc.test_method()
 
   def test_converted_call_already_converted(self):
 
     def f(x):
       return x == 0
 
-    with self.cached_session() as sess:
-      x = api.converted_call(f, None, converter.ConversionOptions(),
-                             constant_op.constant(0))
-      self.assertTrue(self.evaluate(x))
+    x = api.converted_call(f, None, converter.ConversionOptions(),
+                           (constant_op.constant(0),), {})
+    self.assertTrue(self.evaluate(x))
 
-      converted_f = api.to_graph(f)
-      x = api.converted_call(converted_f, None, converter.ConversionOptions(),
-                             constant_op.constant(0))
-      self.assertTrue(self.evaluate(x))
+    converted_f = api.to_graph(f)
+    x = api.converted_call(converted_f, None, converter.ConversionOptions(),
+                           (constant_op.constant(0),), {})
+    self.assertTrue(self.evaluate(x))
 
   def test_converted_call_then_already_converted_dynamic(self):
 
@@ -367,7 +366,7 @@
       return g(x)
 
     x = api.converted_call(f, None, converter.ConversionOptions(),
-                           g, constant_op.constant(1))
+                           (g, constant_op.constant(1)), {})
     self.assertEqual(self.evaluate(x), 1)
 
   @test_util.run_deprecated_v1
@@ -381,10 +380,10 @@
     # f should not be converted, causing len to error out.
     with self.assertRaisesRegexp(Exception,
                                  'object of type \'Tensor\' has no len()'):
-      api.converted_call(f, None, opts, constant_op.constant([0]))
+      api.converted_call(f, None, opts, (constant_op.constant([0]),), {})
 
     # len on the other hand should work fine.
-    x = api.converted_call(len, None, opts, constant_op.constant([0]))
+    x = api.converted_call(len, None, opts, (constant_op.constant([0]),), {})
     # The constant has static shape so the result is a primitive not a Tensor.
     self.assertEqual(x, 1)
 
@@ -397,11 +396,10 @@
     ])
 
     x = api.converted_call(model.call, None, opts,
-                           constant_op.constant([[0.0]]), training=True)
+                           (constant_op.constant([[0.0]]),), {'training': True})
 
-    with self.cached_session() as sess:
-      self.evaluate(variables.global_variables_initializer())
-      self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
+    self.evaluate(variables.global_variables_initializer())
+    self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
 
   def test_converted_call_whitelisted_method_extra_self(self):
 
@@ -412,11 +410,11 @@
     ])
 
     x = api.converted_call(model.call, None, opts,
-                           model, constant_op.constant([[0.0]]), training=True)
+                           (model, constant_op.constant([[0.0]])),
+                           {'training': True})
 
-    with self.cached_session() as sess:
-      self.evaluate(variables.global_variables_initializer())
-      self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
+    self.evaluate(variables.global_variables_initializer())
+    self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
 
   def test_converted_call_whitelisted_method_via_owner(self):
 
@@ -427,18 +425,17 @@
     ])
 
     x = api.converted_call('call', model, opts,
-                           constant_op.constant([[0.0]]), training=True)
+                           (constant_op.constant([[0.0]]),), {'training': True})
 
-    with self.cached_session() as sess:
-      self.evaluate(variables.global_variables_initializer())
-      self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
+    self.evaluate(variables.global_variables_initializer())
+    self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
 
   def test_converted_call_namedtuple(self):
 
     opts = converter.ConversionOptions()
 
     x = api.converted_call(collections.namedtuple, None, opts,
-                           'TestNamedtuple', ('a', 'b'))
+                           ('TestNamedtuple', ('a', 'b')), {})
 
     self.assertTrue(inspect_utils.isnamedtuple(x))
 
@@ -446,8 +443,8 @@
 
     opts = converter.ConversionOptions()
 
-    x = api.converted_call('namedtuple', collections, opts,
-                           'TestNamedtuple', ('a', 'b'))
+    x = api.converted_call('namedtuple', collections, opts, ('TestNamedtuple',
+                                                             ('a', 'b')), {})
 
     self.assertTrue(inspect_utils.isnamedtuple(x))
 
@@ -457,11 +454,10 @@
 
     l = lambda x: x == 0
 
-    x = api.converted_call(l, None, opts, constant_op.constant(0))
+    x = api.converted_call(l, None, opts, (constant_op.constant(0),), {})
 
-    with self.cached_session() as sess:
-      self.evaluate(variables.global_variables_initializer())
-      self.assertAllEqual(True, self.evaluate(x))
+    self.evaluate(variables.global_variables_initializer())
+    self.assertAllEqual(True, self.evaluate(x))
 
   @test_util.run_deprecated_v1
   def test_to_graph_basic(self):
@@ -493,6 +489,33 @@
       x = compiled_fn(constant_op.constant([4, 8]))
       self.assertListEqual([1, 2], self.evaluate(x).tolist())
 
+  def test_to_graph_with_globals(self):
+
+    def test_fn(x):
+      global testing_global_numeric
+      testing_global_numeric = x + testing_global_numeric
+      return testing_global_numeric
+
+    compiled_fn = api.to_graph(test_fn)
+
+    x = compiled_fn(constant_op.constant(3))
+    self.assertEqual(5, self.evaluate(x))
+    # TODO(b/122368197): This should be the constant 5!
+    self.assertEqual(2, testing_global_numeric)
+
+  def test_to_graph_with_kwargs_clashing_converted_call(self):
+
+    def called_fn(**kwargs):
+      return kwargs['f'] + kwargs['owner']
+
+    def test_fn():
+      # These arg names intentionally match converted_call's
+      return called_fn(f=1, owner=2)
+
+    compiled_fn = api.to_graph(test_fn)
+
+    self.assertEqual(compiled_fn(), 3)
+
   def test_to_code_basic(self):
 
     def test_fn(x, s):

diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index 1ac2e33..dda98ca 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py

@@ -53,6 +53,7 @@
 from tensorflow.python.autograph.pyct import inspect_utils
 from tensorflow.python.autograph.pyct import origin_info
 from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.autograph.pyct import pretty_printer
 from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
 from tensorflow.python.autograph.pyct import transformer
@@ -219,7 +220,8 @@
                 compiler.ast_to_source(node))
   if logging.has_verbosity(4):
     for n in node:
-      logging.log(4, 'Compiled AST of %s:\n\n%s\n', o, gast.dump(n))
+      logging.log(4, 'Compiled AST of %s:\n\n%s\n\n', o,
+                  pretty_printer.fmt(n, color=False))
 
   if program_ctx.options.recursive:
     while True:
@@ -354,7 +356,7 @@
   """Specialization of `entity_to_graph` for callable functions."""
 
   node, source = parser.parse_entity(f)
-  logging.log(3, 'Source code of %s:\n%s', f, source)
+  logging.log(3, 'Source code of %s:\n\n%s\n', f, source)
   node = node.body[0]
 
   # In general, the output of inspect.getsource is inexact for lambdas because

diff --git a/tensorflow/python/autograph/operators/__init__.py b/tensorflow/python/autograph/operators/__init__.py
index 58ed72b..5b3f45d 100644
--- a/tensorflow/python/autograph/operators/__init__.py
+++ b/tensorflow/python/autograph/operators/__init__.py

@@ -71,4 +71,5 @@
 from tensorflow.python.autograph.operators.slices import get_item
 from tensorflow.python.autograph.operators.slices import GetItemOpts
 from tensorflow.python.autograph.operators.slices import set_item
-from tensorflow.python.autograph.operators.special_values import UNDEFINED
+from tensorflow.python.autograph.operators.special_values import is_undefined
+from tensorflow.python.autograph.operators.special_values import Undefined

diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py
index f046000..414d533 100644
--- a/tensorflow/python/autograph/operators/control_flow.py
+++ b/tensorflow/python/autograph/operators/control_flow.py

@@ -19,6 +19,7 @@
 from __future__ import print_function
 
 from tensorflow.python.autograph.operators import py_builtins
+from tensorflow.python.autograph.operators import special_values
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import control_flow_ops
@@ -62,6 +63,17 @@
   if tensor_util.is_tensor(iter_):
     return _known_len_for_stmt(iter_, extra_test, body, init_state)
   elif isinstance(iter_, dataset_ops.DatasetV2):
+    # Check for undefined symbols and report an error. This prevents the error
+    # from propagating into the TF runtime. We have more information here and
+    # can provide a clearer error message.
+    undefined_symbols = _filter_undefined(init_state)
+
+    if undefined_symbols:
+      raise ValueError(
+          'TensorFlow requires that the following symbols must be initialized '
+          'to a Tensor, Variable or TensorArray before the loop: {}'
+          .format(tuple(undefined_symbols)))
+
     return _dataset_for_stmt(iter_, extra_test, body, init_state)
   else:
     return _py_for_stmt(iter_, extra_test, body, init_state)
@@ -153,13 +165,32 @@
   # TODO(mdan): Consider adding a generic mechanism for dynamic dispatch.
   # That could be something as simple as a collection of dispatch rules, with
   # some prioritization.
-  if any(tensor_util.is_tensor(v)
-         for v in nest.flatten(init_state + extra_deps)):
+  if any(tensor_util.is_tensor(v) for v in nest.flatten(extra_deps)):
+    # Check for undefined symbols and report an error. This prevents the error
+    # from propagating into the TF runtime. We have more information here and
+    # can provide a clearer error message.
+    undefined_symbols = _filter_undefined(init_state)
+
+    if undefined_symbols:
+      raise ValueError(
+          'TensorFlow requires that the following symbols must be initialized '
+          'to a Tensor, Variable or TensorArray before the loop: {}'
+          .format(tuple(undefined_symbols)))
     return _tf_while_stmt(test, body, init_state, opts)
   else:
     return _py_while_stmt(test, body, init_state, opts)
 
 
+def _filter_undefined(all_symbols):
+  """Returns the names of undefined symbols contained in all_symbols."""
+  undefined_symbols = [
+      s.symbol_name
+      for s in all_symbols
+      if special_values.is_undefined(s)
+  ]
+  return undefined_symbols
+
+
 def _tf_while_stmt(test, body, init_state, opts):
   """Overload of while_stmt that stages a TF while_stmt."""
   if opts is None:
@@ -203,7 +234,33 @@
 
 def tf_if_stmt(cond, body, orelse):
   """Overload of if_stmt that stages a TF cond."""
-  return control_flow_ops.cond(cond, body, orelse)
+  protected_body = _wrap_in_protection_from_undefined(body, branch_name='if')
+  protected_orelse = _wrap_in_protection_from_undefined(orelse,
+                                                        branch_name='else')
+
+  return control_flow_ops.cond(cond, protected_body, protected_orelse)
+
+
+def _wrap_in_protection_from_undefined(func, branch_name):
+  """Wraps function to raise useful error when it returns undefined symbols."""
+  def protected_func():
+    """Calls function and raises an error if undefined symbols are returned."""
+    results = func()
+    undefined_symbols = None
+    if isinstance(results, tuple):
+      undefined_symbols = _filter_undefined(results)
+    elif special_values.is_undefined(results):
+      # Single return value
+      undefined_symbols = results.symbol_name
+
+    if undefined_symbols:
+      message = ('The following symbols must also be initialized in the %s '
+                 'branch: {}. Alternatively, you may initialize them before '
+                 'the if statement.') % branch_name
+      message = message.format(undefined_symbols)
+      raise ValueError(message)
+    return results
+  return protected_func
 
 
 def _py_if_stmt(cond, body, orelse):

diff --git a/tensorflow/python/autograph/operators/control_flow_test.py b/tensorflow/python/autograph/operators/control_flow_test.py
index 590b518..316748f 100644
--- a/tensorflow/python/autograph/operators/control_flow_test.py
+++ b/tensorflow/python/autograph/operators/control_flow_test.py

@@ -65,30 +65,41 @@
   def test_tensor(self):
     n = constant_op.constant(5)
     results = control_flow.while_stmt(
-        test=lambda i, sum: i < n,
-        body=lambda i, sum: (i + 1, sum + i,),
+        test=lambda i, s: i < n,
+        body=lambda i, s: (i + 1, s + i,),
         init_state=(0, 0),
         extra_deps=(n,))
-    with self.cached_session():
-      self.assertEqual((5, 10), self.evaluate(results))
+    self.assertEqual((5, 10), self.evaluate(results))
 
   @test_util.run_deprecated_v1
-  def test_tensor_dict_state(self):
+  def test_python_with_tensor_state(self):
     n = 5
-    init_state = {'i': constant_op.constant(0), 'sum': constant_op.constant(0)}
     results = control_flow.while_stmt(
-        test=lambda s: s['i'] < n,
-        body=lambda s: ({'i': s['i'] + 1, 'sum': s['sum'] + s['i']},),
-        init_state=(init_state,),
+        test=lambda i, s: i < n,
+        body=lambda i, s: (i + 1, s + i),
+        init_state=(0, constant_op.constant(0)),
         extra_deps=())
-    with self.cached_session():
-      self.assertEqual(({'i': 5, 'sum': 10},), self.evaluate(results))
+    result_i, result_s = results
+    self.assertEqual(5, result_i)
+    self.assertEqual(10, self.evaluate(result_s))
+
+  @test_util.run_deprecated_v1
+  def test_python_due_to_hidden_cond_type(self):
+    n = 5
+
+    # TODO(b/124002646): Improve the error message.
+    with self.assertRaises(Exception):
+      control_flow.while_stmt(
+          test=lambda i, s: i < n,
+          body=lambda i, s: (i + 1, s + i),
+          init_state=(constant_op.constant(0), constant_op.constant(0)),
+          extra_deps=())
 
   def test_python(self):
     n = 5
     results = control_flow.while_stmt(
-        test=lambda i, sum: i < n,
-        body=lambda i, sum: (i + 1, sum + i),
+        test=lambda i, s: i < n,
+        body=lambda i, s: (i + 1, s + i),
         init_state=(0, 0),
         extra_deps=(n,))
     self.assertEqual((5, 10), results)

diff --git a/tensorflow/python/autograph/operators/special_values.py b/tensorflow/python/autograph/operators/special_values.py
index 08a1013..4c1b3d1 100644
--- a/tensorflow/python/autograph/operators/special_values.py
+++ b/tensorflow/python/autograph/operators/special_values.py

@@ -19,5 +19,47 @@
 from __future__ import print_function
 
 
-# Used to reify undefined Python symbols so they can be used during staging.
-UNDEFINED = object()
+class Undefined(object):
+  """Represents an undefined symbol in Python.
+
+  This is used to reify undefined symbols, which is required to use the
+  functional form of loops.
+  Example:
+
+    while n > 0:
+      n = n - 1
+      s = n
+    return s  # Runtime error if n == 0
+
+  This is valid Python code and will not result in an error as long as n
+  is positive. The use of this class is to stay as close to Python semantics
+  as possible for staged code of this nature.
+
+  Converted version of the above showing the possible usage of this class:
+
+    s = Undefined('s')
+    init_state = (s,)
+    s = while_loop(cond, body, init_state)
+    return s  # s is an instance of Undefined if the loop never runs
+
+  Attributes:
+    symbol_name: Text, identifier for the undefined symbol
+  """
+
+  def __init__(self, symbol_name):
+    self.symbol_name = symbol_name
+
+
+def is_undefined(value):
+  """Checks whether Autograph has determined that a given value is undefined.
+
+  This only works in places where Autograph reifies undefined symbols. Note that
+  if this function is passed a truly undefined symbol the call-site will raise
+  NameError.
+
+  Args:
+    value: value to test for undefinedness
+  Returns:
+    Boolean, whether the input value is undefined.
+  """
+  return isinstance(value, Undefined)

diff --git a/tensorflow/python/autograph/operators/special_values_test.py b/tensorflow/python/autograph/operators/special_values_test.py
index 45fcf45..2e1e087 100644
--- a/tensorflow/python/autograph/operators/special_values_test.py
+++ b/tensorflow/python/autograph/operators/special_values_test.py

@@ -22,11 +22,17 @@
 from tensorflow.python.platform import test
 
 
-class PythonLangUtilsTest(test.TestCase):
+class SpecialValuesTest(test.TestCase):
 
   def test_undefined(self):
-    self.assertIs(special_values.UNDEFINED, special_values.UNDEFINED)
+    undefined_symbol = special_values.Undefined('name')
+    self.assertEqual(undefined_symbol.symbol_name, 'name')
 
+    undefined_symbol2 = special_values.Undefined('name')
+    self.assertNotEqual(undefined_symbol, undefined_symbol2)
+
+    self.assertTrue(special_values.is_undefined(undefined_symbol))
+    self.assertTrue(special_values.is_undefined(undefined_symbol2))
 
 if __name__ == '__main__':
   test.main()

diff --git a/tensorflow/python/autograph/pyct/ast_util.py b/tensorflow/python/autograph/pyct/ast_util.py
index 3496c51..b091285 100644
--- a/tensorflow/python/autograph/pyct/ast_util.py
+++ b/tensorflow/python/autograph/pyct/ast_util.py

@@ -283,13 +283,18 @@
     n = node_stack.pop()
     o = other_stack.pop()
 
-    if (not isinstance(n, (ast.AST, gast.AST)) or
-        not isinstance(o, (ast.AST, gast.AST)) or
+    if (not isinstance(n, (ast.AST, gast.AST, str)) or
+        not isinstance(o, (ast.AST, gast.AST, str)) or
         n.__class__.__name__ != o.__class__.__name__):
-      raise ValueError('inconsistent nodes: {} and {}'.format(n, o))
+      raise ValueError('inconsistent nodes: {} ({}) and {} ({})'.format(
+          n, n.__class__.__name__, o, o.__class__.__name__))
 
     yield n, o
 
+    if isinstance(n, str):
+      assert isinstance(o, str), 'The check above should have ensured this'
+      continue
+
     for f in n._fields:
       n_child = getattr(n, f, None)
       o_child = getattr(o, f, None)

diff --git a/tensorflow/python/autograph/pyct/ast_util_test.py b/tensorflow/python/autograph/pyct/ast_util_test.py
index 00ef7f5..c6c1132 100644
--- a/tensorflow/python/autograph/pyct/ast_util_test.py
+++ b/tensorflow/python/autograph/pyct/ast_util_test.py

@@ -159,11 +159,20 @@
     })
 
   def test_parallel_walk(self):
-    node = parser.parse_str(
-        textwrap.dedent("""
+    src = """
       def f(a):
         return a + 1
-    """))
+    """
+    node = parser.parse_str(textwrap.dedent(src))
+    for child_a, child_b in ast_util.parallel_walk(node, node):
+      self.assertEqual(child_a, child_b)
+
+  def test_parallel_walk_string_leaves(self):
+    src = """
+      def f(a):
+        global g
+    """
+    node = parser.parse_str(textwrap.dedent(src))
     for child_a, child_b in ast_util.parallel_walk(node, node):
       self.assertEqual(child_a, child_b)
 

diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py
index fdfcd4d..0cedfa8 100644
--- a/tensorflow/python/autograph/pyct/cfg.py
+++ b/tensorflow/python/autograph/pyct/cfg.py

@@ -393,6 +393,8 @@
   def _connect_jump_to_finally_sections(self, node):
     """Connects a jump node to the finally sections protecting it."""
     cursor = set((node,))
+    if node not in self.finally_sections:
+      return cursor
     for guard_section_id in self.finally_sections[node]:
       guard_begin, guard_ends = self.finally_section_subgraphs[guard_section_id]
       self._connect_nodes(cursor, guard_begin)
@@ -620,10 +622,10 @@
     leaving_node = self.lexical_scopes.pop()
     assert node == leaving_node
 
-  def _get_enclosing_scopes(self, include, stop_at):
+  def _get_enclosing_finally_scopes(self, stop_at):
     included = []
     for node in reversed(self.lexical_scopes):
-      if isinstance(node, include):
+      if isinstance(node, gast.Try) and node.finalbody:
         included.append(node)
       if isinstance(node, stop_at):
         return node, included
@@ -635,10 +637,8 @@
 
   def _process_exit_statement(self, node, *exits_nodes_of_type):
     # Note: this is safe because we process functions separately.
-    try_node, guards = self._get_enclosing_scopes(
-        include=(gast.Try,),
-        stop_at=tuple(exits_nodes_of_type),
-    )
+    try_node, guards = self._get_enclosing_finally_scopes(
+        tuple(exits_nodes_of_type))
     if try_node is None:
       raise ValueError(
           '%s that is not enclosed by any of %s' % (node, exits_nodes_of_type))
@@ -646,10 +646,8 @@
 
   def _process_continue_statement(self, node, *loops_to_nodes_of_type):
     # Note: this is safe because we process functions separately.
-    try_node, guards = self._get_enclosing_scopes(
-        include=(gast.Try,),
-        stop_at=tuple(loops_to_nodes_of_type),
-    )
+    try_node, guards = self._get_enclosing_finally_scopes(
+        tuple(loops_to_nodes_of_type))
     if try_node is None:
       raise ValueError('%s that is not enclosed by any of %s' %
                        (node, loops_to_nodes_of_type))
@@ -698,10 +696,7 @@
     self._process_basic_statement(node)
 
   def visit_Raise(self, node):
-    try_node, guards = self._get_enclosing_scopes(
-        include=(gast.Try,),
-        stop_at=(gast.FunctionDef,),
-    )
+    try_node, guards = self._get_enclosing_finally_scopes((gast.FunctionDef,))
     if try_node is None:
       raise ValueError('%s that is not enclosed by any FunctionDef' % node)
     self.builder.add_error_node(node, guards)
@@ -797,16 +792,13 @@
     for stmt in node.orelse:
       self.visit(stmt)
 
-    if node.handlers:
-      # TODO(mdan): Should we still support bare try/except? Might be confusing.
-      raise NotImplementedError('exceptions are not yet supported')
-
     self._exit_lexical_scope(node)
 
-    self.builder.enter_finally_section(node)
-    for stmt in node.finalbody:
-      self.visit(stmt)
-    self.builder.exit_finally_section(node)
+    if node.finalbody:
+      self.builder.enter_finally_section(node)
+      for stmt in node.finalbody:
+        self.visit(stmt)
+      self.builder.exit_finally_section(node)
 
   def visit_With(self, node):
     # TODO(mdan): Mark the context manager's exit call as exit guard.

diff --git a/tensorflow/python/autograph/pyct/errors.py b/tensorflow/python/autograph/pyct/errors.py
index 3de3fad..2f2049c 100644
--- a/tensorflow/python/autograph/pyct/errors.py
+++ b/tensorflow/python/autograph/pyct/errors.py

@@ -44,5 +44,5 @@
   raise AutoGraphError(
       'Unexpected error transforming %s. If you believe this is due to a bug,'
       ' please set the verbosity to 10 (on Linux, `export '
-      'AUTOGRAPH_VERBOSITY=1`) and attach the full output when filing the bug '
-      'report. Caused by %s' % (entity, exception))
+      'AUTOGRAPH_VERBOSITY=10`) and attach the full output when filing the bug '
+      'report. Caused by: %s' % (entity, exception))

diff --git a/tensorflow/python/autograph/pyct/parser.py b/tensorflow/python/autograph/pyct/parser.py
index 8b73440..f6b2a78 100644
--- a/tensorflow/python/autograph/pyct/parser.py
+++ b/tensorflow/python/autograph/pyct/parser.py

@@ -111,7 +111,7 @@
   """Returns the AST of given piece of code."""
   # TODO(mdan): This should exclude the module things are autowrapped in.
 
-  if six.PY2 and re.search('\\Wprint\\(', src):
+  if six.PY2 and re.search('\\Wprint\\s*\\(', src):
     # This special treatment is required because gast.parse is not aware of
     # whether print_function was present in the original context.
     src = 'from __future__ import print_function\n' + src

diff --git a/tensorflow/python/autograph/pyct/parser_test.py b/tensorflow/python/autograph/pyct/parser_test.py
index cc01234..e7fa3c7 100644
--- a/tensorflow/python/autograph/pyct/parser_test.py
+++ b/tensorflow/python/autograph/pyct/parser_test.py

@@ -51,6 +51,15 @@
     """))
     self.assertEqual('f', mod.body[0].name)
 
+  def test_parse_str_weird_print(self):
+    mod = parser.parse_str(
+        textwrap.dedent("""
+            def f(x):
+              print (x)
+              return x + 1
+    """))
+    self.assertEqual('f', mod.body[0].name)
+
   def test_parse_comments(self):
     def f():
 # unindented comment

diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness.py b/tensorflow/python/autograph/pyct/static_analysis/liveness.py
index f8b8d7f..691b786 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/liveness.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/liveness.py

@@ -219,6 +219,10 @@
                  frozenset(self.current_analyzer.out[cfg_node]))
     return node
 
+  def visit_ExceptHandler(self, node):
+    # TODO(b/123995141) Add Exception Handlers to the CFG
+    return node
+
 
 def resolve(node, source_info, graphs):
   """Resolves the live symbols at the exit of control flow statements.

diff --git a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py
index d1587d8..6f0f09e 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py

@@ -223,6 +223,10 @@
   def visit_global(self, node):
     raise NotImplementedError()
 
+  def visit_ExceptHandler(self, node):
+    # TODO(b/123995141) Add Exception Handlers to the CFG
+    return node
+
   def visit_Name(self, node):
     if self.current_analyzer is None:
       # Names may appear outside function defs - for example in class
@@ -232,7 +236,8 @@
     analyzer = self.current_analyzer
     cfg_node = self.current_cfg_node
 
-    assert cfg_node is not None, 'name node outside of any statement?'
+    assert cfg_node is not None, ('name node, %s, outside of any statement?'
+                                  % node.id)
 
     qn = anno.getanno(node, anno.Basic.QN)
     if isinstance(node.ctx, gast.Load):

diff --git a/tensorflow/python/autograph/pyct/templates.py b/tensorflow/python/autograph/pyct/templates.py
index 831eb6d..b682a21 100644
--- a/tensorflow/python/autograph/pyct/templates.py
+++ b/tensorflow/python/autograph/pyct/templates.py

@@ -92,6 +92,14 @@
     return self.generic_visit(node)
 
   def visit_comprehension(self, node):
+    # We may be able to override some of these, but for now it's simpler
+    # to just assert that they're set.
+    self._ctx_override = None
+    return self.generic_visit(node)
+
+  def visit_Lambda(self, node):
+    # We may be able to override some of these, but for now it's simpler
+    # to just assert that they're set.
     self._ctx_override = None
     return self.generic_visit(node)
 

diff --git a/tensorflow/python/autograph/pyct/templates_test.py b/tensorflow/python/autograph/pyct/templates_test.py
index bd6b451..4762aaf 100644
--- a/tensorflow/python/autograph/pyct/templates_test.py
+++ b/tensorflow/python/autograph/pyct/templates_test.py

@@ -248,6 +248,16 @@
     self.assertIsInstance(arg_node.generators[0].target.ctx, gast.Store)
     self.assertIsInstance(arg_node.elt.ctx, gast.Load)
 
+  def test_lambda_in_function_call(self):
+    template = """
+      a = foo(arg)
+    """
+    source = parser.parse_expression('[lambda i: i]')
+    node = templates.replace(template, arg=source)
+    lambda_arg = node[0].value.args[0].elts[0]
+    self.assertIsInstance(lambda_arg.args.args[0].ctx, gast.Param)
+    self.assertIsInstance(lambda_arg.body.ctx, gast.Load)
+
 
 if __name__ == '__main__':
   test.main()

diff --git a/tensorflow/python/autograph/utils/ag_logging.py b/tensorflow/python/autograph/utils/ag_logging.py
index cd737a8..8229b82 100644
--- a/tensorflow/python/autograph/utils/ag_logging.py
+++ b/tensorflow/python/autograph/utils/ag_logging.py

@@ -46,8 +46,11 @@
   more in-depth debugging.
 
   There are two controls that control the logging verbosity:
+
    * The `set_verbosity` function
+
    * The `AUTOGRAPH_VERBOSITY` environment variable
+
   `set_verbosity` takes precedence over the environment variable.
 
   For example:
@@ -133,6 +136,8 @@
 
 def warn(msg, *args, **kwargs):
   logging.warn(msg, *args, **kwargs)
+  if echo_log_to_stdout:
+    print('WARNING:', msg % args)
 
 
 def warn_first_n(msg, *args, **kwargs):

diff --git a/tensorflow/python/build_defs.bzl b/tensorflow/python/build_defs.bzl
index b9056f8..244820f 100644
--- a/tensorflow/python/build_defs.bzl
+++ b/tensorflow/python/build_defs.bzl

@@ -12,22 +12,26 @@
 # consumers of the tf_gen_op_wrapper_py rule would be simplified if we don't
 # hard code the ops/ directory.
 
-def tf_gen_op_wrapper_private_py(name, out=None, deps=[],
-                                 require_shape_functions=True,
-                                 visibility=[]):
-  if not name.endswith("_gen"):
-    fail("name must end in _gen")
-  if not visibility:
-    visibility = ["//visibility:private"]
-  bare_op_name = name[:-4] # Strip off the _gen
-  tf_gen_op_wrapper_py(name=bare_op_name,
-    out=out,
-    visibility=visibility,
-    deps=deps,
-    require_shape_functions=require_shape_functions,
-    generated_target_name=name,
-    api_def_srcs = [
-        "//tensorflow/core/api_def:base_api_def",
-        "//tensorflow/core/api_def:python_api_def",
-    ],
-  )
+def tf_gen_op_wrapper_private_py(
+        name,
+        out = None,
+        deps = [],
+        require_shape_functions = True,
+        visibility = []):
+    if not name.endswith("_gen"):
+        fail("name must end in _gen")
+    if not visibility:
+        visibility = ["//visibility:private"]
+    bare_op_name = name[:-4]  # Strip off the _gen
+    tf_gen_op_wrapper_py(
+        name = bare_op_name,
+        out = out,
+        visibility = visibility,
+        deps = deps,
+        require_shape_functions = require_shape_functions,
+        generated_target_name = name,
+        api_def_srcs = [
+            "//tensorflow/core/api_def:base_api_def",
+            "//tensorflow/core/api_def:python_api_def",
+        ],
+    )

diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index bdca7de..4f3eb61 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py

@@ -1590,7 +1590,21 @@
     self._default_session_context_manager = None
     self._default_graph_context_manager = None
 
-    self.close()
+    # If we are closing due to an exception, set a time limit on our Close() to
+    # avoid blocking forever.
+    # TODO(b/120204635) remove this when deadlock is fixed.
+    if exec_type:
+      close_thread = threading.Thread(
+          name='SessionCloseThread', target=self.close)
+      close_thread.daemon = True
+      close_thread.start()
+      close_thread.join(30.0)
+      if close_thread.is_alive():
+        logging.error(
+            'Session failed to close after 30 seconds. Continuing after this '
+            'point may leave your program in an undefined state.')
+    else:
+      self.close()
 
   @staticmethod
   def reset(target, containers=None, config=None):

diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index ef7527d..3b9677b 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i

@@ -604,6 +604,27 @@
   }
 }
 
+// $input is a Python list of wrapped TF_Operations
+%typemap(in) (const std::vector<TF_Operation*>* control_outputs)
+    (std::vector<TF_Operation*> control_outputs) {
+  if ($input != Py_None) {
+    if (!PyList_Check($input)) {
+      SWIG_exception_fail(SWIG_TypeError, "$symname: expected list");
+    }
+    size_t size = PyList_Size($input);
+    for (int i = 0; i < size; ++i) {
+      PyObject* item = PyList_GetItem($input, i);
+      TF_Operation* oper_ptr;
+      SWIG_ConvertPtr(item, reinterpret_cast<void**>(&oper_ptr),
+                      $descriptor(TF_Operation*), 0);
+      control_outputs.push_back(oper_ptr);
+    }
+    $1 = &control_outputs;
+  } else {
+    $1 = nullptr;
+  }
+}
+
 // Typemaps for TF_GraphGetTensorShapeHelper.
 
 // Convert from C++ integer vector to Python list of ints.

diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index dc0c10b..56b4eec 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc

@@ -590,7 +590,9 @@
     const TF_Graph* fn_body, const char* fn_name, bool append_hash_to_fn_name,
     const std::vector<TF_Operation*>* opers,
     const std::vector<TF_Output>& inputs, const std::vector<TF_Output>& outputs,
-    const NameVector& output_names, const TF_FunctionOptions* opts,
+    const NameVector& output_names,
+    const std::vector<TF_Operation*>* control_outputs,
+    const NameVector& control_output_names, const TF_FunctionOptions* opts,
     const char* description, TF_Status* out_status) {
   if (!output_names.empty() && output_names.size() != outputs.size()) {
     Set_TF_Status_from_Status(
@@ -613,10 +615,18 @@
       output_names.empty() ? nullptr
                            : const_cast<const char**>(output_names.data());
 
-  return TF_GraphToFunction(fn_body, fn_name, append_hash_to_fn_name, nopers,
-                            opers_array, inputs.size(), inputs.data(),
-                            outputs.size(), outputs.data(), output_names_ptr,
-                            opts, description, out_status);
+  const char** control_output_names_ptr =
+      control_output_names.empty()
+          ? nullptr
+          : const_cast<const char**>(control_output_names.data());
+
+  return TF_GraphToFunctionWithControlOutputs(
+      fn_body, fn_name, append_hash_to_fn_name, nopers, opers_array,
+      inputs.size(), inputs.data(), outputs.size(), outputs.data(),
+      output_names_ptr,
+      control_outputs == nullptr ? 0 : control_outputs->size(),
+      control_outputs == nullptr ? nullptr : control_outputs->data(),
+      control_output_names_ptr, opts, description, out_status);
 }
 
 void TF_GraphSetOutputHandleShapesAndTypes_wrapper(

diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h
index dab7e71..d2c7dc3 100644
--- a/tensorflow/python/client/tf_session_helper.h
+++ b/tensorflow/python/client/tf_session_helper.h

@@ -208,7 +208,9 @@
     const TF_Graph* fn_body, const char* fn_name, bool append_hash_to_fn_name,
     const std::vector<TF_Operation*>* opers,
     const std::vector<TF_Output>& inputs, const std::vector<TF_Output>& outputs,
-    const NameVector& output_names, const TF_FunctionOptions* opts,
+    const NameVector& output_names,
+    const std::vector<TF_Operation*>* control_outputs,
+    const NameVector& control_output_names, const TF_FunctionOptions* opts,
     const char* description, TF_Status* status);
 
 // Set the shapes and types for the output's handle.

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 3ba5468..38e6e37 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py

@@ -27,7 +27,7 @@
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 2, 6)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 2, 20)
 
 
 @tf_export("compat.forward_compatible")

diff --git a/tensorflow/python/compiler/BUILD b/tensorflow/python/compiler/BUILD
new file mode 100644
index 0000000..07209a9
--- /dev/null
+++ b/tensorflow/python/compiler/BUILD

@@ -0,0 +1,19 @@
+# Description:
+# Python APIs for various Tensorflow backends.
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "if_not_windows")
+
+exports_files(["LICENSE"])
+
+py_library(
+    name = "compiler",
+    srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
+    deps = if_not_windows([
+        "//tensorflow/python/compiler/tensorrt:init_py",
+    ]),
+)

diff --git a/tensorflow/python/compiler/__init__.py b/tensorflow/python/compiler/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tensorflow/python/compiler/__init__.py


diff --git a/tensorflow/python/compiler/tensorrt/BUILD b/tensorflow/python/compiler/tensorrt/BUILD
index a382579..b9fe768 100644
--- a/tensorflow/python/compiler/tensorrt/BUILD
+++ b/tensorflow/python/compiler/tensorrt/BUILD

@@ -32,8 +32,6 @@
     deps = [
         ":tf_trt_integration_test_base",
         ":trt_convert_py",
-        ":trt_ops_py",
-        "//tensorflow/python:errors",
     ],
 )
 
@@ -51,6 +49,7 @@
     srcs = ["trt_convert.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":trt_ops_py",
         ":wrap_conversion",
         "//tensorflow/python:graph_util",
         "//tensorflow/python:session",
@@ -83,7 +82,6 @@
     srcs = ["test/tf_trt_integration_test_base.py"],
     deps = [
         ":trt_convert_py",
-        ":trt_ops_py",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_test_lib",
     ],
@@ -112,6 +110,7 @@
         "no_windows",
         "nomac",
     ],
+    xla_enable_strict_auto_jit = True,
 )
 
 cuda_py_tests(
@@ -149,6 +148,7 @@
         "no_windows",
         "nomac",
     ],
+    xla_enable_strict_auto_jit = True,
 )
 
 cuda_py_test(
@@ -173,4 +173,5 @@
         "no_windows",
         "nomac",
     ],
+    xla_enable_strict_auto_jit = True,
 )

diff --git a/tensorflow/python/compiler/tensorrt/__init__.py b/tensorflow/python/compiler/tensorrt/__init__.py
index 88fb691..db3540b 100644
--- a/tensorflow/python/compiler/tensorrt/__init__.py
+++ b/tensorflow/python/compiler/tensorrt/__init__.py

@@ -18,25 +18,6 @@
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import errors
-
-# pylint: disable=unused-import,g-import-not-at-top,line-too-long
-try:
-  from tensorflow.compiler.tf2tensorrt.python.ops import trt_ops
-  from tensorflow.python.compiler.tensorrt.trt_convert import add_test_value
-  from tensorflow.python.compiler.tensorrt.trt_convert import calib_graph_to_infer_graph
-  from tensorflow.python.compiler.tensorrt.trt_convert import clear_test_values
-  from tensorflow.python.compiler.tensorrt.trt_convert import create_inference_graph
-  from tensorflow.python.compiler.tensorrt.trt_convert import enable_test_value
-  from tensorflow.python.compiler.tensorrt.trt_convert import get_test_value
-  from tensorflow.python.compiler.tensorrt.trt_convert import is_tensorrt_enabled
-except errors.NotFoundError as e:
-  no_trt_message = (
-      '**** Failed to initialize TensorRT. This is either because the TensorRT'
-      ' installation path is not in LD_LIBRARY_PATH, or because you do not have'
-      ' it installed. If not installed, please go to'
-      ' https://developer.nvidia.com/tensorrt to download and install'
-      ' TensorRT ****')
-  print(no_trt_message)
-  raise e
-# pylint: enable=unused-import,g-import-not-at-top,line-too-long
+# pylint: disable=unused-import,line-too-long
+from tensorflow.python.compiler.tensorrt.trt_convert import create_inference_graph
+# pylint: enable=unused-import,line-too-long

diff --git a/tensorflow/python/compiler/tensorrt/test/base_test.py b/tensorflow/python/compiler/tensorrt/test/base_test.py
index cc31099..a1199c5 100644
--- a/tensorflow/python/compiler/tensorrt/test/base_test.py
+++ b/tensorflow/python/compiler/tensorrt/test/base_test.py

@@ -20,8 +20,9 @@
 
 import numpy as np
 
-from tensorflow.python.compiler.tensorrt import trt_convert
 from tensorflow.python.compiler.tensorrt.test import tf_trt_integration_test_base as trt_test
+from tensorflow.python.compiler.tensorrt.wrap_conversion import add_test_value
+from tensorflow.python.compiler.tensorrt.wrap_conversion import clear_test_values
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -154,7 +155,7 @@
     """Setup method."""
     super(PartiallyConvertedTestA, self).setUp()
     # Let it fail to build the second engine.
-    trt_convert.add_test_value("TRTEngineOp_1:CreateTRTNode", "fail")
+    add_test_value("TRTEngineOp_1:CreateTRTNode", "fail")
 
   def GetParams(self):
     """Create a graph containing two segment."""
@@ -209,8 +210,8 @@
     """Setup method."""
     super(PartiallyConvertedTestB, self).setUp()
     # Let it fail to build the first engine.
-    trt_convert.clear_test_values("")
-    trt_convert.add_test_value("TRTEngineOp_0:CreateTRTNode", "fail")
+    clear_test_values("")
+    add_test_value("TRTEngineOp_0:CreateTRTNode", "fail")
 
   def ExpectedEnginesToBuild(self, run_params):
     """Return the expected engines to build."""

diff --git a/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py b/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py
index 1d7792c..cdd25e3 100644
--- a/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py
+++ b/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py

@@ -18,13 +18,12 @@
 from __future__ import division
 from __future__ import print_function
 
-# pylint: disable=unused-import
-from tensorflow.compiler.tf2tensorrt.python.ops import trt_ops
-# pylint: enable=unused-import
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python import data
 from tensorflow.python import keras
 from tensorflow.python.compiler.tensorrt import trt_convert
+from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version
+from tensorflow.python.compiler.tensorrt.wrap_conversion import is_tensorrt_enabled
 from tensorflow.python.estimator.estimator import Estimator
 from tensorflow.python.estimator.model_fn import EstimatorSpec
 from tensorflow.python.estimator.model_fn import ModeKeys
@@ -139,9 +138,9 @@
     if use_trt:
       logging.info('Number of nodes before TF-TRT conversion: %d',
                    len(graph_def.node))
-      graph_def = trt_convert.create_inference_graph(
-          graph_def,
-          outputs=[OUTPUT_NODE_NAME],
+      converter = trt_convert.TrtGraphConverter(
+          input_graph_def=graph_def,
+          nodes_blacklist=[OUTPUT_NODE_NAME],
           max_batch_size=max_batch_size,
           precision_mode='INT8',
           # There is a 2GB GPU memory limit for each test, so we set
@@ -151,6 +150,7 @@
           minimum_segment_size=2,
           use_calibration=False,
       )
+      graph_def = converter.convert()
       logging.info('Number of nodes after TF-TRT conversion: %d',
                    len(graph_def.node))
       num_engines = len(
@@ -263,7 +263,7 @@
   #     num_epochs=100,
   #     model_dir=model_dir)
   def testEval(self):
-    if not trt_convert.is_tensorrt_enabled():
+    if not is_tensorrt_enabled():
       return
     model_dir = test.test_src_dir_path('python/compiler/tensorrt/test/testdata')
 
@@ -274,9 +274,9 @@
         num_epochs=None,
         model_dir=model_dir)['accuracy']
     logging.info('accuracy_tf_native: %f', accuracy_tf_native)
-    self.assertAllClose(0.9662, accuracy_tf_native, rtol=1e-3, atol=1e-3)
+    self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3)
 
-    if trt_convert.get_linked_tensorrt_version()[0] < 5:
+    if get_linked_tensorrt_version()[0] < 5:
       return
 
     accuracy_tf_trt = self._Run(

diff --git a/tensorflow/python/compiler/tensorrt/test/quantization_test.py b/tensorflow/python/compiler/tensorrt/test/quantization_test.py
index 086e070..3e1c9ff 100644
--- a/tensorflow/python/compiler/tensorrt/test/quantization_test.py
+++ b/tensorflow/python/compiler/tensorrt/test/quantization_test.py

@@ -20,8 +20,8 @@
 
 import numpy as np
 
-from tensorflow.python.compiler.tensorrt import trt_convert
 from tensorflow.python.compiler.tensorrt.test import tf_trt_integration_test_base as trt_test
+from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -72,7 +72,7 @@
     return _GetParams(add_quantization_nodes=False)
 
   def ShouldRunTest(self, run_params):
-    if trt_convert.get_linked_tensorrt_version()[0] < 5:
+    if get_linked_tensorrt_version()[0] < 5:
       return False
     # Only test static engine mode, with or without calibration.
     return (trt_test.IsQuantizationMode(run_params.precision_mode) and
@@ -96,7 +96,7 @@
     return _GetParams(add_quantization_nodes=True)
 
   def ShouldRunTest(self, run_params):
-    if trt_convert.get_linked_tensorrt_version()[0] < 5:
+    if get_linked_tensorrt_version()[0] < 5:
       return False
     # Test static/dynamic engine with/without calibration.
     return (trt_test.IsQuantizationMode(run_params.precision_mode) and

diff --git a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py
index 28563f0..3a14a1c 100644
--- a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py
+++ b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py

@@ -25,12 +25,13 @@
 import numpy as np
 import six
 
-# pylint: disable=unused-import
-from tensorflow.compiler.tf2tensorrt.python.ops import trt_ops
-# pylint: enable=unused-import
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.compiler.tensorrt import trt_convert
+from tensorflow.python.compiler.tensorrt.wrap_conversion import clear_test_values
+from tensorflow.python.compiler.tensorrt.wrap_conversion import enable_test_value
+from tensorflow.python.compiler.tensorrt.wrap_conversion import get_test_value
+from tensorflow.python.compiler.tensorrt.wrap_conversion import is_tensorrt_enabled
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_io
 from tensorflow.python.framework import importer
@@ -110,7 +111,7 @@
 
   @property
   def trt_incompatible_op(self):
-    return math_ops.sin
+    return math_ops.erf
 
   @property
   def precision_modes(self):
@@ -151,7 +152,7 @@
   def setUpClass(cls):
     """Setup method for the module."""
     super(TfTrtIntegrationTestBase, cls).setUpClass()
-    trt_convert.enable_test_value()
+    enable_test_value()
 
   def __init__(self, methodName="runTest"):  # pylint: disable=invalid-name
     super(TfTrtIntegrationTestBase, self).__init__(methodName)
@@ -161,7 +162,7 @@
     """Setup method."""
     super(TfTrtIntegrationTestBase, self).setUp()
     warnings.simplefilter("always")
-    trt_convert.clear_test_values("")
+    clear_test_values("")
 
   def GetParams(self):
     """Return a TfTrtIntegrationTestParams for test, implemented by subclass."""
@@ -246,9 +247,9 @@
   def _PrepareRun(self, graph_state):
     """Set up necessary testing environment before calling sess.run()."""
     # Clear test values added by TRTEngineOp.
-    trt_convert.clear_test_values("TRTEngineOp_.*:ExecuteTrtEngine")
-    trt_convert.clear_test_values("TRTEngineOp_.*:ExecuteCalibration")
-    trt_convert.clear_test_values("TRTEngineOp_.*:ExecuteNativeSegment")
+    clear_test_values("TRTEngineOp_.*:ExecuteTrtEngine")
+    clear_test_values("TRTEngineOp_.*:ExecuteCalibration")
+    clear_test_values("TRTEngineOp_.*:ExecuteNativeSegment")
 
   def _GetGPUOptions(self):
     gpu_options = config_pb2.GPUOptions()
@@ -258,7 +259,7 @@
   def _GetConfigProto(self, run_params, graph_state):
     """Get config proto based on specific settings."""
     conversion_params = self.GetConversionParams(run_params)
-    if graph_state != GraphState.ORIGINAL and run_params.use_optimizer:
+    if graph_state == GraphState.INFERENCE and run_params.use_optimizer:
       rewriter_cfg = trt_convert.TrtGraphConverter.get_tensorrt_rewriter_config(
           conversion_params.rewriter_config, conversion_params.max_batch_size,
           conversion_params.max_workspace_size_bytes,
@@ -282,7 +283,7 @@
 
   def _ExpectTestValue(self, engine_name, method, expected_value):
     label = "%s:%s" % (engine_name, method)
-    actual_value = trt_convert.get_test_value(label)
+    actual_value = get_test_value(label)
     self.assertEqual(
         expected_value,
         actual_value,
@@ -298,6 +299,23 @@
   def _ExpectNativeSegment(self, engine_name, value):
     self._ExpectTestValue(engine_name, "ExecuteNativeSegment", value)
 
+  def _GetFeedNames(self):
+    params = self._GetParamsCached()
+    # Construct the feeds tensor names by appending :0 to the node names.
+    return [input_name + ":0" for input_name in params.input_names]
+
+  def _GetFetchNames(self):
+    params = self._GetParamsCached()
+    # Construct the fetches tensor names by appending :0 to the node names.
+    return [output_name + ":0" for output_name in params.output_names]
+
+  def _GetFeedDict(self, inputs_data, input_shape_index):
+    assert input_shape_index < len(inputs_data)
+    feeds = self._GetFeedNames()
+    return {
+        feeds[i]: inputs_data[input_shape_index][i] for i in range(len(feeds))
+    }
+
   def _RunGraph(self,
                 run_params,
                 gdef,
@@ -307,32 +325,23 @@
                 num_runs=2):
     """Run given graphdef multiple times."""
     params = self._GetParamsCached()
-    for current_input_data in inputs_data:
-      assert len(params.input_names) == len(current_input_data)
+    for data in inputs_data:
+      assert len(params.input_names) == len(data)
 
-    vals = []
+    fetches = self._GetFetchNames()
     g = ops.Graph()
     with g.as_default():
-      io_ops = importer.import_graph_def(
-          graph_def=gdef,
-          return_elements=params.input_names + params.output_names,
-          name="")
-      inputs = [op.outputs[0] for op in io_ops[:len(params.input_names)]]
-      for current_input_data in inputs_data:
-        assert len(inputs) == len(current_input_data)
-      outputs = [op.outputs[0] for op in io_ops[len(params.input_names):]]
-      with self.test_session(
+      importer.import_graph_def(graph_def=gdef, name="")
+      with self.session(
           graph=g, config=config, use_gpu=True, force_gpu=True) as sess:
+        vals = []
         # Run for each input(s) shape
         for shape_index in range(len(inputs_data)):
           val = None
-          # Defaults to 2 runs to verify result across multiple runs is same.
           for _ in range(num_runs):
             self._PrepareRun(graph_state)
-            new_val = sess.run(outputs, {
-                inputs[i]: inputs_data[shape_index][i]
-                for i in range(len(inputs))
-            })
+            new_val = sess.run(fetches,
+                               self._GetFeedDict(inputs_data, shape_index))
             output_len = len(params.expected_output_dims[shape_index])
             self.assertEqual(output_len, len(new_val))
             for i in range(output_len):
@@ -344,25 +353,15 @@
             val = new_val
             self.VerifyRun(run_params, graph_state)
           vals.append(val)
-    return vals
+        return vals
 
-  # Use real data that is representative of the inference dataset
-  # for calibration. For this test script it is random data.
-  def _RunCalibration(self, run_params, gdef, inputs_data, config):
-    """Run calibration on given graph."""
-    return self._RunGraph(
-        run_params, gdef, inputs_data, config, GraphState.CALIBRATE, num_runs=5)
-
-  def _GetTrtGraphDef(self, run_params, graph_state, gdef):
-    """Return trt converted graphdef."""
+  def _CreateConverter(self, gdef, session_config, conversion_params):
+    """Return a TrtGraphConverter."""
     params = self._GetParamsCached()
-    conversion_params = self.GetConversionParams(run_params)
-    logging.info(conversion_params)
-
-    config_for_trt = self._GetConfigProto(run_params, graph_state)
-    return trt_convert.create_inference_graph(
+    converter = trt_convert.TrtGraphConverter(
         input_graph_def=gdef,
-        outputs=params.input_names + params.output_names,
+        nodes_blacklist=params.input_names + params.output_names,
+        session_config=session_config,
         max_batch_size=conversion_params.max_batch_size,
         max_workspace_size_bytes=conversion_params.max_workspace_size_bytes,
         precision_mode=conversion_params.precision_mode,
@@ -370,8 +369,42 @@
         is_dynamic_op=conversion_params.is_dynamic_op,
         maximum_cached_engines=conversion_params.maximum_cached_engines,
         cached_engine_batches=conversion_params.cached_engine_batches,
-        use_calibration=conversion_params.use_calibration,
-        session_config=config_for_trt)
+        use_calibration=conversion_params.use_calibration)
+    return converter
+
+  def _GetCalibratedInferGraph(self, run_params, gdef, inputs_data):
+    """Return trt converted graphdef in INT8 mode."""
+    conversion_params = self.GetConversionParams(run_params)
+    logging.info(conversion_params)
+    assert conversion_params.precision_mode == "INT8"
+    assert conversion_params.is_dynamic_op
+    assert conversion_params.maximum_cached_engines == 1
+    assert not conversion_params.cached_engine_batches
+    assert conversion_params.use_calibration
+    assert len(inputs_data) == 1  # We only support calibrating single engine.
+
+    session_config = self._GetConfigProto(run_params, GraphState.CALIBRATE)
+    logging.info("Running calibration graph, config:\n%s", str(session_config))
+
+    converter = self._CreateConverter(gdef, session_config, conversion_params)
+    int8_gdef = converter.convert()
+    self._VerifyGraphDef(run_params, int8_gdef, GraphState.CALIBRATE)
+
+    return converter.calibrate(
+        fetch_names=self._GetFetchNames(),
+        num_runs=5,
+        feed_dict_fn=lambda: self._GetFeedDict(inputs_data, 0))
+
+  def _GetInferGraph(self, run_params, gdef):
+    """Return trt converted graphdef."""
+    conversion_params = self.GetConversionParams(run_params)
+    logging.info(conversion_params)
+
+    session_config = self._GetConfigProto(run_params, GraphState.INFERENCE)
+    logging.info("Creating TRT graph for inference, config\n%s",
+                 str(session_config))
+    converter = self._CreateConverter(gdef, session_config, conversion_params)
+    return converter.convert()
 
   def _WriteGraph(self, run_params, gdef, graph_state):
     if graph_state == GraphState.ORIGINAL:
@@ -454,13 +487,14 @@
 
     expected_engines = self.ExpectedEnginesToBuild(run_params)
     num_engines = 0
+    functions = [f.signature.name for f in gdef.library.function]
     for node in gdef.node:
       if node.op == "TRTEngineOp":
         logging.info("Found TRTEngineOp: " + node.name)
-    for node in gdef.node:
-      if node.op == "TRTEngineOp":
         num_engines += 1
-        self.assertTrue(node.name in expected_engines, node.name)
+        function_name = node.name + "_native_segment"
+        self.assertIn(function_name, functions)
+        self.assertIn(node.name, expected_engines)
         self.assertTrue(len(node.attr["serialized_segment"].s), node.name)
         self.assertTrue(len(node.attr["segment_funcdef_name"].s), node.name)
         self.assertEqual(
@@ -521,9 +555,10 @@
             (scale * np.random.random_sample(dims)).astype(dtype))
       inputs_data.append(current_input_data)
 
+    # Verify original graph.
     self._VerifyGraphDef(run_params, input_gdef, GraphState.ORIGINAL)
 
-    # Get reference result without running trt.
+    # Run original graph without trt to get reference result.
     config_no_trt = self._GetConfigProto(run_params, GraphState.ORIGINAL)
     logging.info("Running original graph w/o trt, config:\n%s",
                  str(config_no_trt))
@@ -533,27 +568,12 @@
     # Run calibration if necessary.
     if (IsQuantizationMode(run_params.precision_mode) and
         run_params.use_calibration):
-
-      calib_config = self._GetConfigProto(run_params, GraphState.CALIBRATE)
-      logging.info("Running calibration graph, config:\n%s", str(calib_config))
-      if run_params.use_optimizer:
-        result = self._RunCalibration(run_params, input_gdef, inputs_data,
-                                      calib_config)
-      else:
-        calib_gdef = self._GetTrtGraphDef(run_params, GraphState.CALIBRATE,
-                                          input_gdef)
-        self._VerifyGraphDef(run_params, calib_gdef, GraphState.CALIBRATE)
-        result = self._RunCalibration(run_params, calib_gdef, inputs_data,
-                                      calib_config)
-      infer_gdef = trt_convert.calib_graph_to_infer_graph(
-          calib_gdef, run_params.dynamic_engine)
+      infer_gdef = self._GetCalibratedInferGraph(run_params, input_gdef,
+                                                 inputs_data)
       self._VerifyGraphDef(run_params, infer_gdef, GraphState.INFERENCE)
-
-      self.assertAllClose(
-          ref_result,
-          result,
-          atol=self.ExpectedAbsoluteTolerance(run_params),
-          rtol=self.ExpectedRelativeTolerance(run_params))
+    elif not run_params.use_optimizer:
+      infer_gdef = self._GetInferGraph(run_params, input_gdef)
+      self._VerifyGraphDef(run_params, infer_gdef, GraphState.INFERENCE)
     else:
       infer_gdef = input_gdef
 
@@ -561,11 +581,6 @@
     infer_config = self._GetConfigProto(run_params, GraphState.INFERENCE)
     logging.info("Running final inference graph, config:\n%s",
                  str(infer_config))
-    if not run_params.use_optimizer:
-      infer_gdef = self._GetTrtGraphDef(run_params, GraphState.INFERENCE,
-                                        infer_gdef)
-      self._VerifyGraphDef(run_params, infer_gdef, GraphState.INFERENCE)
-
     result = self._RunGraph(run_params, infer_gdef, inputs_data, infer_config,
                             GraphState.INFERENCE)
     self.assertAllClose(
@@ -610,9 +625,8 @@
   for (use_optimizer, precision_mode, dynamic_engine, use_calibration) in opts:
     if IsQuantizationMode(precision_mode):
       if use_optimizer:
-        # TODO(aaroey): if use_optimizer is True we need to get the inference
-        # graphdef using custom python wrapper class, which is not currently
-        # supported yet.
+        # We ignore the use_optimizer option and always use TrtGraphConverter
+        # for INT8 mode, so no need to run it twice.
         continue
       if use_calibration and not dynamic_engine:
         # Static engine with use_calibration=False will be static, so we want to
@@ -639,5 +653,5 @@
     setattr(test_class, "testTfTrt_" + test_name, _GetTest(run_params))
 
 
-if trt_convert.is_tensorrt_enabled():
+if is_tensorrt_enabled():
   _AddTests(TfTrtIntegrationTestBase)

diff --git a/tensorflow/python/compiler/tensorrt/trt_conversion.i b/tensorflow/python/compiler/tensorrt/trt_conversion.i
index c12895c..35a6fa1 100644
--- a/tensorflow/python/compiler/tensorrt/trt_conversion.i
+++ b/tensorflow/python/compiler/tensorrt/trt_conversion.i

@@ -17,38 +17,10 @@
 %{
 #define SWIG_FILE_WITH_INIT
 %}
-%include "std_pair.i"
+%include "std_string.i"
 %include "tensorflow/python/platform/base.i"
 
 %{
-PyObject* pair_helper(std::pair<string, string>* in) {
-  PyObject *first(nullptr), *second(nullptr), *tuple(nullptr);
-  first = PyBytes_FromStringAndSize(in->first.data(), in->first.length());
-  if (!first) {
-    if (!PyErr_Occurred()) {
-      PyErr_SetString(PyExc_TypeError, "Pair conversion first argument failed");
-    }
-    return NULL;
-  }
-  second = PyBytes_FromStringAndSize(in->second.data(), in->second.length());
-  if (!second) {
-    if (!PyErr_Occurred()) {
-      PyErr_SetString(PyExc_TypeError,
-                      "Pair conversion second argument failed");
-    }
-    return NULL;
-  }
-  tuple = Py_BuildValue("(OO)", first, second);
-  if (!tuple) {
-    if (!PyErr_Occurred()) {
-      PyErr_SetString(PyExc_TypeError,
-                      "Tuple creation from pair<string,string> failed!");
-    }
-    return NULL;
-  }
-  return tuple;
-}
-
 struct version_struct{
   int vmajor;
   int vminor;
@@ -67,6 +39,7 @@
   }
   return tuple;
 }
+
 /* Define converters for vector<int> */
 template<>
 bool _PyObjAs(PyObject *pyobj, int* dest) {
@@ -83,12 +56,6 @@
 
 _LIST_OUTPUT_TYPEMAP(int, PyLong_FromLong);
 
-%typemap(out) std::pair<string, string> {
-  PyObject *tuple = pair_helper(&$1);
-  if (!tuple) SWIG_fail;
-  $result = tuple;
-}
-
 %typemap(out) version_struct {
   PyObject *tuple = version_helper(&$1);
   if (!tuple) SWIG_fail;
@@ -96,9 +63,6 @@
 }
 
 %{
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/util/stat_summarizer.h"
 #include "tensorflow/compiler/tf2tensorrt/convert/convert_graph.h"
 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
 #include "tensorflow/compiler/tf2tensorrt/utils/test_utils.h"
@@ -106,7 +70,6 @@
 
 %ignoreall
 %unignore tensorflow;
-%unignore calib_convert;
 %unignore get_linked_tensorrt_version;
 %unignore get_loaded_tensorrt_version;
 %unignore is_tensorrt_enabled;
@@ -117,52 +80,6 @@
 
 %{
 
-std::pair<string, string> calib_convert(
-    string graph_def_string, bool is_dyn_op
-    // unfortunately we can't use TF_Status here since it
-    // is in c/c_api and brings in a lot of other libraries
-    // which in turn declare ops. These ops are included
-    // statically in our library and cause an abort when
-    // module is loaded due to double registration
-    // until Tensorflow properly exposes these headers
-    // we have to work around this by returning a string
-    // and converting it to exception on python side.
-    //,TF_Status* out_status) {
-) {
-#if GOOGLE_CUDA && GOOGLE_TENSORRT
-  string out_status;
-
-  tensorflow::GraphDef graph_def;
-  if (!graph_def.ParseFromString(graph_def_string)) {
-    out_status = "InvalidArgument;Couldn't interpret input as a GraphDef";
-    return std::pair<string, string>{out_status, ""};
-  }
-  graph_def_string.resize(0);
-  tensorflow::GraphDef out_graph;
-  tensorflow::Status conversion_status =
-      tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph(
-          graph_def, &out_graph, is_dyn_op);
-  if (!conversion_status.ok()) {
-    auto retCode = (int)conversion_status.code();
-    char buff[2000];
-    snprintf(buff, 2000, "%d;%s", retCode,
-             conversion_status.error_message().c_str());
-    out_status = buff;
-    return std::pair<string, string>{out_status, ""};
-  }
-  string result;
-  if (!out_graph.SerializeToString(&result)) {
-    out_status = "InvalidArgument;Couldn't serialize output as a GraphDef";
-    return std::pair<string, string>{out_status, ""};
-  }
-  out_status = "OK;All good!";
-  return std::pair<string, string>{out_status, result};
-#else
-  // Returns FAILED_PRECONDITION.
-  return std::pair<string, string>{"9;TensorRT is not enabled!", ""};
-#endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
-}
-
 version_struct get_linked_tensorrt_version() {
   // Return the version at the link time.
   version_struct s;
@@ -221,8 +138,6 @@
 
 %}
 
-std::pair<string, string> calib_convert(
-    string graph_def_string, bool is_dyn_op);
 version_struct get_linked_tensorrt_version();
 version_struct get_loaded_tensorrt_version();
 bool is_tensorrt_enabled();

diff --git a/tensorflow/python/compiler/tensorrt/trt_convert.py b/tensorflow/python/compiler/tensorrt/trt_convert.py
index 33b5e50..0caa2bf 100644
--- a/tensorflow/python/compiler/tensorrt/trt_convert.py
+++ b/tensorflow/python/compiler/tensorrt/trt_convert.py

@@ -19,27 +19,17 @@
 from __future__ import print_function
 
 import six as _six
-# pylint: disable=unused-import,line-too-long
 from tensorflow.compiler.tf2tensorrt.python.ops import trt_ops
-from tensorflow.python.compiler.tensorrt.wrap_conversion import add_test_value
-from tensorflow.python.compiler.tensorrt.wrap_conversion import calib_convert
-from tensorflow.python.compiler.tensorrt.wrap_conversion import clear_test_values
-from tensorflow.python.compiler.tensorrt.wrap_conversion import enable_test_value
-from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version
-from tensorflow.python.compiler.tensorrt.wrap_conversion import get_loaded_tensorrt_version
-from tensorflow.python.compiler.tensorrt.wrap_conversion import get_test_value
-from tensorflow.python.compiler.tensorrt.wrap_conversion import is_tensorrt_enabled
-# pylint: enable=unused-import,line-too-long
-from tensorflow.core.framework import graph_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.client import session
-from tensorflow.python.framework import errors_impl as _impl
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_util
 from tensorflow.python.framework import importer
 from tensorflow.python.framework import ops
 from tensorflow.python.grappler import tf_optimizer
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging
 from tensorflow.python.saved_model import builder
 from tensorflow.python.saved_model import loader
@@ -86,7 +76,20 @@
   my_converter.save(output_saved_model_dir)  # Optional
   ```
 
-  TODO(laigd): add calibration support.
+  To run the conversion with quantization calibration:
+
+  ```python
+  my_converter = MyGraphConverter(input_saved_model_dir="my_dir")
+  my_converter.convert()
+
+  # Run calibration 10 times.
+  converted_graph_def = my_converter.calibrate(
+      fetch_names=['output:0'],
+      num_runs=10,
+      feed_dict_fn=lambda: {'input:0': my_next_data()})
+
+  my_converter.save(output_saved_model_dir)  # Optional
+  ```
   """
 
   def __init__(self,
@@ -130,6 +133,11 @@
         input_saved_model_tags or [tag_constants.SERVING])
     self._session_config = session_config or config_pb2.ConfigProto()
 
+    # For calibration usage.
+    self._calibration_graph = None
+    self._calibration_sess = None
+    self._calibration_data_collected = False
+
   def get_rewriter_config(self, rewriter_config_template=None):
     """Returns a RewriterConfig proto for TRT transformation.
 
@@ -249,6 +257,63 @@
       self._convert_saved_model()
     return self._converted_graph_def
 
+  def calibrate(self,
+                fetch_names,
+                num_runs,
+                feed_dict_fn=None,
+                input_map_fn=None):
+    """Run the calibration and return the calibrated GraphDef.
+
+    Args:
+      fetch_names: a list of output tensor name to fetch during calibration.
+      num_runs: number of runs of the graph during calibration.
+      feed_dict_fn: a function that returns a dictionary mapping input names (as
+        strings) in the GraphDef to be calibrated to values (e.g. Python list,
+        numpy arrays, etc). One and only one of `feed_dict_fn` and
+        `input_map_fn` should be specified.
+      input_map_fn: a function that returns a dictionary mapping input names (as
+        strings) in the GraphDef to be calibrated to Tensor objects. The values
+        of the named input tensors in the GraphDef to be calibrated will be
+        re-mapped to the respective `Tensor` values during calibration. One and
+        only one of `feed_dict_fn` and `input_map_fn` should be specified.
+
+    Raises:
+      ValueError: if the input combination is invalid.
+
+    Returns:
+      The GraphDef after the calibration.
+    """
+    assert self._converted
+    assert not self._calibration_sess
+    if (feed_dict_fn and input_map_fn) or (not feed_dict_fn and
+                                           not input_map_fn):
+      raise ValueError(
+          "Should specify one and only one of feed_dict_fn and input_map_fn.")
+
+    self._calibration_graph = ops.Graph()
+    with self._calibration_graph.as_default():
+      fetches = importer.import_graph_def(
+          self._converted_graph_def,
+          input_map=input_map_fn() if input_map_fn else None,
+          return_elements=fetch_names,
+          name="")
+    self._calibration_sess = session.Session(
+        graph=self._calibration_graph, config=self._session_config)
+
+    for _ in range(num_runs):
+      self._calibration_sess.run(
+          fetches, feed_dict=feed_dict_fn() if feed_dict_fn else None)
+
+    self.finalize_calibration()
+    return self._converted_graph_def
+
+  def finalize_calibration(self):
+    """Clean up calibration resources and finalize the calibration.
+
+    Implementations need to close self._calibration_sess before returning.
+    """
+    raise NotImplementedError("finalize_calibration")
+
   def save(self, output_saved_model_dir):
     """Save the converted graph as a SavedModel.
 
@@ -323,8 +388,7 @@
       max_batch_size: max size for the input batch
       max_workspace_size_bytes: the maximum GPU temporary memory which the TRT
         engine can use at execution time. This corresponds to the
-        'workspaceSize'
-        parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
+        'workspaceSize' parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
       precision_mode: one of TrtPrecisionMode.supported_precision_modes().
       minimum_segment_size: the minimum number of nodes required for a subgraph
         to be replaced by TRTEngineOp.
@@ -357,6 +421,14 @@
       TypeError: if any of the parameters are of unexpected type.
       ValueError: if any of the parameters are of unexpected value.
     """
+    # Lazily load the TF-TRT C bindings, so `import tensorflow` doesn't complain
+    # even if it cannot find TensorRT library.
+    trt_ops.load_trt_ops()
+    # pylint: disable=g-import-not-at-top,unused-import,line-too-long,unused-variable
+    # Import a random symbol to trigger loading of TRT library.
+    from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version
+    # pylint: enable=g-import-not-at-top,unused-import,line-too-long,unused-variable
+
     if rewriter_config_template is not None and not isinstance(
         rewriter_config_template, rewriter_config_pb2.RewriterConfig):
       raise TypeError(
@@ -419,8 +491,7 @@
       max_batch_size: max size for the input batch.
       max_workspace_size_bytes: the maximum GPU temporary memory which the TRT
         engine can use at execution time. This corresponds to the
-        'workspaceSize'
-        parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
+        'workspaceSize' parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
       precision_mode: one of TrtPrecisionMode.supported_precision_modes().
       minimum_segment_size: the minimum number of nodes required for a subgraph
         to be replaced by TRTEngineOp.
@@ -457,6 +528,14 @@
         nodes_blacklist=nodes_blacklist,
         session_config=session_config)
 
+    # Lazily load the TF-TRT C bindings, so `import tensorflow` doesn't complain
+    # even if it cannot find TensorRT library.
+    trt_ops.load_trt_ops()
+    # pylint: disable=g-import-not-at-top,line-too-long
+    from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version
+    from tensorflow.python.compiler.tensorrt.wrap_conversion import get_loaded_tensorrt_version
+    # pylint: enable=g-import-not-at-top,line-too-long
+
     # Check compatibility of TensorRT version.
     compiled_version = get_linked_tensorrt_version()
     loaded_version = get_loaded_tensorrt_version()
@@ -479,12 +558,11 @@
         version_mismatch = True
         break
     if not version_mismatch:
-      tf_logging.info("Running against TensorRT version %s" % ".".join(
-          [str(x) for x in loaded_version]))
+      tf_logging.info("Running against TensorRT version %s" %
+                      ".".join([str(x) for x in loaded_version]))
 
     # Check input arguments.
-    if precision_mode.upper() not in TrtPrecisionMode.supported_precision_modes(
-    ):
+    if precision_mode not in TrtPrecisionMode.supported_precision_modes():
       raise ValueError(("precision mode '{}' is not supported."
                         "It should be one of {}").format(
                             precision_mode,
@@ -497,6 +575,9 @@
         raise ValueError("cached_engine_batches should not contain more than "
                          "maximum_cached_engines items.")
 
+    self._need_calibration = (
+        precision_mode == TrtPrecisionMode.INT8 and use_calibration)
+
     # TODO(laigd):
     # - Get rid of is_dynamic_op option, it should always be True, and it should
     #   accept N shapes as input.
@@ -510,7 +591,6 @@
     self._is_dynamic_op = is_dynamic_op
     self._maximum_cached_engines = maximum_cached_engines
     self._cached_engine_batches = cached_engine_batches
-    self._use_calibration = use_calibration
 
   def get_rewriter_config(self, rewriter_config_template=None):
     return TrtGraphConverter.get_tensorrt_rewriter_config(
@@ -522,7 +602,65 @@
         is_dynamic_op=self._is_dynamic_op,
         maximum_cached_engines=self._maximum_cached_engines,
         cached_engine_batches=self._cached_engine_batches,
-        use_calibration=self._use_calibration)
+        use_calibration=self._need_calibration)
+
+  def finalize_calibration(self):
+    assert self._need_calibration
+    assert self._converted
+    assert not self._calibration_data_collected
+
+    # Lazily load the op, since it's not available in cpu-only builds. Importing
+    # this at top will cause tests that imports TF-TRT fail when they're built
+    # and run without CUDA/GPU.
+    # pylint: disable=g-import-not-at-top,line-too-long
+    from tensorflow.compiler.tf2tensorrt.ops.gen_trt_ops import get_serialized_resource_op
+    # pylint: enable=g-import-not-at-top,line-too-long
+
+    # TODO(laigd): a better way would be to use self._calibration_sess to list
+    # all the devices, add one get_serialized_resource_op for each device, and
+    # fetch each such op for every resource until its found. This can work
+    # even when the device of the TRTEngineOp is empty or not fully specified.
+
+    # Maps device name to the corresponding get_serialized_resource_op.
+    device_to_get_resource_op_map = {}
+
+    with self._calibration_graph.as_default():
+      container_input = array_ops.placeholder(dtypes.string)
+      resource_name_input = array_ops.placeholder(dtypes.string)
+
+      for node in self._converted_graph_def.node:
+        if node.op == "TRTEngineOp":
+          # Adds the get_serialized_resource_op for the device if not done
+          # before. We only add one such op for each device.
+          # TODO(laigd): What if the device is empty?????
+          if node.device not in device_to_get_resource_op_map:
+            with self._calibration_graph.device(node.device):
+              serialized_resources_output = (
+                  get_serialized_resource_op(container_input,
+                                             resource_name_input))
+            device_to_get_resource_op_map[node.device] = (
+                serialized_resources_output)
+
+          # Get the calibration resource.
+          calibration_result = self._calibration_sess.run(
+              device_to_get_resource_op_map[node.device],
+              feed_dict={
+                  container_input:
+                      TrtGraphConverter
+                      ._TRT_CALIBRATION_RESOURCE_CONTAINER_NAME,
+                  resource_name_input:
+                      node.name
+              })
+          node.attr["calibration_data"].s = calibration_result
+
+    self._calibration_data_collected = True
+    self._calibration_sess.close()
+
+  def save(self, output_saved_model_dir):
+    """Save the converted graph as a SavedModel."""
+    if self._need_calibration:
+      assert self._calibration_data_collected
+    super(TrtGraphConverter, self).save(output_saved_model_dir)
 
 
 def create_inference_graph(
@@ -628,45 +766,3 @@
   if output_saved_model_dir:
     trt_converter.save(output_saved_model_dir)
   return converted_graph_def
-
-
-def calib_graph_to_infer_graph(calibration_graph_def, is_dynamic_op=False):
-  """Convert an existing calibration graph to inference graph.
-
-  Args:
-    calibration_graph_def: the calibration GraphDef object with calibration data
-    is_dynamic_op: whether to create dynamic static engines from calibration
-
-  Returns:
-    New GraphDef with TRTEngineOps placed in graph replacing calibration nodes.
-  Raises:
-    RuntimeError: if the returned status message is malformed.
-  """
-
-  is_calib_graph = False
-  for n in calibration_graph_def.node:
-    if n.op == "TRTEngineOp":
-      is_calib_graph = is_calib_graph or not n.attr["calibration_data"].s
-  if not is_calib_graph:
-    tf_logging.error(
-        "Not a calib graph. Doesn't seem to contain any calibration nodes.")
-    return None
-  graph_str = calibration_graph_def.SerializeToString()
-  out = calib_convert(graph_str, is_dynamic_op)
-  status = _to_string(out[0])
-  output_graph_def_string = out[1]
-  del graph_str  # Save some memory
-  if len(status) < 2:
-    raise _impl.UnknownError(None, None, status)
-  if status[:2] != "OK":
-    msg = status.split(";")
-    if len(msg) == 1:
-      raise RuntimeError("Status message is malformed {}".format(status))
-    # pylint: disable=protected-access
-    raise _impl._make_specific_exception(None, None, ";".join(msg[1:]),
-                                         int(msg[0]))
-    # pylint: enable=protected-access
-  output_graph_def = graph_pb2.GraphDef()
-  output_graph_def.ParseFromString(output_graph_def_string)
-  del output_graph_def_string  # Save some memory
-  return output_graph_def

diff --git a/tensorflow/python/compiler/tensorrt/trt_convert_test.py b/tensorflow/python/compiler/tensorrt/trt_convert_test.py
index 0dbc5c1..97dea1b 100644
--- a/tensorflow/python/compiler/tensorrt/trt_convert_test.py
+++ b/tensorflow/python/compiler/tensorrt/trt_convert_test.py

@@ -20,9 +20,10 @@
 
 import os
 
-# pylint: disable=unused-import
-from tensorflow.compiler.tf2tensorrt.python.ops import trt_ops
-# pylint: enable=unused-import
+from tensorflow.python.compiler.tensorrt.wrap_conversion import clear_test_values
+from tensorflow.python.compiler.tensorrt.wrap_conversion import enable_test_value
+from tensorflow.python.compiler.tensorrt.wrap_conversion import get_test_value
+from tensorflow.python.compiler.tensorrt.wrap_conversion import is_tensorrt_enabled
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
@@ -53,6 +54,8 @@
 
   def testGetTensorrtRewriterConfig(self):
     """Test case for TrtGraphConverter.get_tensorrt_rewriter_config()."""
+    if not is_tensorrt_enabled():
+      return
     rewriter_cfg = trt_convert.TrtGraphConverter.get_tensorrt_rewriter_config(
         rewriter_config_template=None,
         max_batch_size=128,
@@ -98,6 +101,17 @@
 
   def _GetGraph(self):
     """Get the graph for testing."""
+    # The graph computes (input+1)^2, it looks like:
+    #
+    # input (Placeholder)  v1 (Variable)
+    #               |   \ /
+    #                \   +
+    #                 \ / \
+    #                  *   |
+    #                   \ /
+    #                    +
+    #                    |
+    #                 output (Identity)
     g = ops.Graph()
     with g.as_default():
       with g.device("/GPU:0"):
@@ -144,19 +158,61 @@
           signature_def_map={"mypredict": signature_def})
     saved_model_builder.save()
 
-  def _TestCreateInferenceGraph(self,
-                                input_saved_model_dir=None,
-                                output_saved_model_dir=None):
-    """General method to test trt_convert.create_inference_graph()."""
-    input_graph_def = None if input_saved_model_dir else self._GetGraphDef()
-    output_graph_def = trt_convert.create_inference_graph(
-        input_graph_def, ["output"],
+  def _ConvertGraph(self,
+                    input_saved_model_dir=None,
+                    output_saved_model_dir=None,
+                    need_calibration=False,
+                    max_batch_size=1,
+                    minimum_segment_size=3,
+                    is_dynamic_op=False,
+                    maximum_cached_engines=1):
+    """Helper method to convert a GraphDef or SavedModel using TF-TRT."""
+    converter = trt_convert.TrtGraphConverter(
+        input_saved_model_dir=input_saved_model_dir,
+        input_graph_def=None if input_saved_model_dir else self._GetGraphDef(),
+        nodes_blacklist=["output"],
+        session_config=self._GetConfigProto(),
+        max_batch_size=max_batch_size,
         max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES,
+        precision_mode=(trt_convert.TrtPrecisionMode.INT8 if need_calibration
+                        else trt_convert.TrtPrecisionMode.FP32),
+        minimum_segment_size=minimum_segment_size,
+        is_dynamic_op=is_dynamic_op,
+        maximum_cached_engines=maximum_cached_engines)
+    output_graph_def = converter.convert()
+
+    if need_calibration:
+
+      class CalibrationData(object):
+
+        def __init__(self):
+          self._data = 0
+
+        def next(self):
+          self._data += 1
+          return {"input:0": [[[self._data]]]}
+
+      output_graph_def = converter.calibrate(
+          fetch_names=["output:0"],
+          num_runs=10,
+          feed_dict_fn=CalibrationData().next)
+
+    if output_saved_model_dir is not None:
+      converter.save(output_saved_model_dir=output_saved_model_dir)
+    return output_graph_def
+
+  def _TestTrtGraphConverter(self,
+                             input_saved_model_dir=None,
+                             output_saved_model_dir=None,
+                             need_calibration=False):
+    """General method to test trt_convert.TrtGraphConverter()."""
+    output_graph_def = self._ConvertGraph(
         input_saved_model_dir=input_saved_model_dir,
         output_saved_model_dir=output_saved_model_dir,
-        session_config=self._GetConfigProto())
+        need_calibration=need_calibration)
     graph_defs_to_verify = [output_graph_def]
-    if output_saved_model_dir is not None:
+
+    if output_saved_model_dir:
       saved_model_graph_def = saved_model_utils.get_meta_graph_def(
           output_saved_model_dir, tag_constants.SERVING).graph_def
       self.assertTrue(isinstance(saved_model_graph_def, graph_pb2.GraphDef))
@@ -170,43 +226,60 @@
           "output": "Identity"
       }, node_name_to_op)
 
-  def testCreateInferenceGraph_BasicConversion(self):
-    """Test case for trt_convert.create_inference_graph()."""
-    if not trt_convert.is_tensorrt_enabled():
+      if need_calibration:
+        trt_engine_nodes = [
+            node for node in graph_def.node if node.op == "TRTEngineOp"
+        ]
+        self.assertNotEmpty(trt_engine_nodes)
+        for node in trt_engine_nodes:
+          self.assertTrue(len(node.attr["calibration_data"].s))
+        # Run the calibrated graph.
+        # TODO(laigd): consider having some input where the answer is different.
+        with ops.Graph().as_default():
+          importer.import_graph_def(graph_def, name="")
+          with self.session(config=self._GetConfigProto()) as sess:
+            for test_data in range(10):
+              self.assertEqual((test_data + 1.0)**2,
+                               sess.run(
+                                   "output:0",
+                                   feed_dict={"input:0": [[[test_data]]]}))
+
+  def testTrtGraphConverter_BasicConversion(self):
+    """Test case for trt_convert.TrtGraphConverter()."""
+    if not is_tensorrt_enabled():
       return
 
-    # Use GraphDef as input.
-    self._TestCreateInferenceGraph()
-
-    # Use SavedModel as input.
     tmp_dir = self.get_temp_dir()
     input_saved_model_dir = os.path.join(tmp_dir, "in_dir1")
-    output_saved_model_dir = os.path.join(tmp_dir, "out_dir1")
     self._WriteInputSavedModel(input_saved_model_dir)
-    self._TestCreateInferenceGraph(input_saved_model_dir,
-                                   output_saved_model_dir)
+
+    for need_calibration in [False, True]:
+      # Use GraphDef as input.
+      self._TestTrtGraphConverter()
+
+      # Use SavedModel as input.
+      output_saved_model_dir = os.path.join(
+          tmp_dir, "out_dir1%s" % ("_int8" if need_calibration else ""))
+      self._TestTrtGraphConverter(
+          input_saved_model_dir=input_saved_model_dir,
+          output_saved_model_dir=output_saved_model_dir,
+          need_calibration=need_calibration)
 
   def _TestRun(self, sess, batch_size, expect_engine_is_run):
-    trt_convert.clear_test_values("")
+    clear_test_values("")
     result = sess.run("output:0", feed_dict={"input:0": [[[1.0]]] * batch_size})
     self.assertAllEqual([[[4.0]]] * batch_size, result)
     execute_engine_test_value = ("done" if expect_engine_is_run else "")
     execute_native_segment_test_value = ("" if expect_engine_is_run else "done")
-    self.assertEqual(
-        execute_engine_test_value,
-        trt_convert.get_test_value("TRTEngineOp_0:ExecuteTrtEngine"))
-    self.assertEqual(
-        execute_native_segment_test_value,
-        trt_convert.get_test_value("TRTEngineOp_0:ExecuteNativeSegment"))
+    self.assertEqual(execute_engine_test_value,
+                     get_test_value("TRTEngineOp_0:ExecuteTrtEngine"))
+    self.assertEqual(execute_native_segment_test_value,
+                     get_test_value("TRTEngineOp_0:ExecuteNativeSegment"))
 
-  def testCreateInferenceGraph_MinimumSegmentSize(self):
-    if not trt_convert.is_tensorrt_enabled():
+  def testTrtGraphConverter_MinimumSegmentSize(self):
+    if not is_tensorrt_enabled():
       return
-    output_graph_def = trt_convert.create_inference_graph(
-        self._GetGraphDef(), ["output"],
-        minimum_segment_size=5,
-        max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES,
-        is_dynamic_op=False)
+    output_graph_def = self._ConvertGraph(minimum_segment_size=5)
     node_name_to_op = {node.name: node.op for node in output_graph_def.node}
     self.assertEqual({
         "v1/read": "Const",
@@ -217,29 +290,25 @@
         "output": "Identity"
     }, node_name_to_op)
 
-  def testCreateInferenceGraph_DynamicOp(self):
-    if not trt_convert.is_tensorrt_enabled():
+  def testTrtGraphConverter_DynamicOp(self):
+    if not is_tensorrt_enabled():
       return
-    trt_convert.enable_test_value()
+    enable_test_value()
 
     tmp_dir = self.get_temp_dir()
     input_saved_model_dir = os.path.join(tmp_dir, "in_dir2")
     output_saved_model_dir = os.path.join(tmp_dir, "out_dir2")
     self._WriteInputSavedModel(input_saved_model_dir)
-    output_graph_def = trt_convert.create_inference_graph(
-        None,
-        None,
-        max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES,
-        is_dynamic_op=True,
-        maximum_cached_engines=2,
+    output_graph_def = self._ConvertGraph(
         input_saved_model_dir=input_saved_model_dir,
         output_saved_model_dir=output_saved_model_dir,
-        session_config=self._GetConfigProto())
+        is_dynamic_op=True,
+        maximum_cached_engines=2)
 
     # Test the output GraphDef.
     with ops.Graph().as_default():
       importer.import_graph_def(output_graph_def, name="")
-      with self.test_session(config=self._GetConfigProto()) as sess:
+      with self.session(config=self._GetConfigProto()) as sess:
         # Run with batch size 1, a new engine is created and cached.
         self._TestRun(sess, 1, True)
         # Run with batch size 2, a new engine is created and cached.
@@ -250,7 +319,7 @@
 
     # Test the output SavedModel
     with ops.Graph().as_default():
-      with self.test_session(config=self._GetConfigProto()) as sess:
+      with self.session(config=self._GetConfigProto()) as sess:
         loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
         # Run with batch size 1, a new engine is created and cached.
         self._TestRun(sess, 1, True)
@@ -260,30 +329,24 @@
         # the max, it should evict an old engine and create a new one.
         self._TestRun(sess, 3, True)
 
-  def testCreateInferenceGraph_StaticOp(self):
-    if not trt_convert.is_tensorrt_enabled():
+  def testTrtGraphConverter_StaticOp(self):
+    if not is_tensorrt_enabled():
       return
-    trt_convert.enable_test_value()
+    enable_test_value()
 
     tmp_dir = self.get_temp_dir()
     input_saved_model_dir = os.path.join(tmp_dir, "in_dir3")
     output_saved_model_dir = os.path.join(tmp_dir, "out_dir3")
     self._WriteInputSavedModel(input_saved_model_dir)
-    output_graph_def = trt_convert.create_inference_graph(
-        None,
-        None,
-        max_batch_size=1,
-        max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES,
-        is_dynamic_op=False,
-        maximum_cached_engines=2,  # This is noop, added just for testing.
+    output_graph_def = self._ConvertGraph(
         input_saved_model_dir=input_saved_model_dir,
         output_saved_model_dir=output_saved_model_dir,
-        session_config=self._GetConfigProto())
+        maximum_cached_engines=2)  # This is noop, added just for testing.
 
     # Test the output GraphDef.
     with ops.Graph().as_default():
       importer.import_graph_def(output_graph_def, name="")
-      with self.test_session(config=self._GetConfigProto()) as sess:
+      with self.session(config=self._GetConfigProto()) as sess:
         # Run with batch size 1, the default engine embedded in the graphdef
         # will be used.
         self._TestRun(sess, 1, True)
@@ -293,7 +356,7 @@
 
     # Test the output SavedModel
     with ops.Graph().as_default():
-      with self.test_session(config=self._GetConfigProto()) as sess:
+      with self.session(config=self._GetConfigProto()) as sess:
         loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
         # Run with batch size 1, the default engine embedded in the graphdef
         # will be used.

diff --git a/tensorflow/python/data/experimental/__init__.py b/tensorflow/python/data/experimental/__init__.py
index 3c1d798..275bdf7 100644
--- a/tensorflow/python/data/experimental/__init__.py
+++ b/tensorflow/python/data/experimental/__init__.py

@@ -58,6 +58,7 @@
 @@make_csv_dataset
 @@make_saveable_from_iterator
 @@map_and_batch
+@@map_and_batch_with_legacy_function
 @@parallel_interleave
 @@parse_example_dataset
 @@prefetch_to_device
@@ -82,6 +83,7 @@
 
 from tensorflow.python.data.experimental.ops.batching import dense_to_sparse_batch
 from tensorflow.python.data.experimental.ops.batching import map_and_batch
+from tensorflow.python.data.experimental.ops.batching import map_and_batch_with_legacy_function
 from tensorflow.python.data.experimental.ops.batching import unbatch
 from tensorflow.python.data.experimental.ops.cardinality import cardinality
 from tensorflow.python.data.experimental.ops.cardinality import INFINITE as INFINITE_CARDINALITY

diff --git a/tensorflow/python/data/experimental/benchmarks/map_vectorization_benchmark.py b/tensorflow/python/data/experimental/benchmarks/map_vectorization_benchmark.py
index 50e3a5c..8f8fbe8 100644
--- a/tensorflow/python/data/experimental/benchmarks/map_vectorization_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/map_vectorization_benchmark.py

@@ -159,7 +159,7 @@
 
   def benchmarkCast(self):
     self._benchmark_helper(
-        lambda *args: [math_ops.cast(x, dtypes.float64) for x in args], "cast")
+        lambda *args: [math_ops.cast(x, dtypes.float32) for x in args], "cast")
 
   def benchmarkReshape(self):
     self._benchmark_helper(

diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index d0e5abc..1733b98 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD

@@ -322,6 +322,8 @@
         "//tensorflow/python:functional_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:session",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python/data/experimental/ops:map_defun",
         "//tensorflow/python/data/kernel_tests:test_base",
     ],
@@ -481,6 +483,7 @@
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:nest",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
@@ -603,7 +606,7 @@
 
 py_test(
     name = "sql_dataset_test",
-    size = "small",
+    size = "medium",
     srcs = ["sql_dataset_test.py"],
     srcs_version = "PY2AND3",
     tags = ["no_pip"],

diff --git a/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py b/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
index 3e2cf77..f65740c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py

@@ -22,10 +22,12 @@
 from tensorflow.python.data.experimental.ops import get_single_element
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import function
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
@@ -71,6 +73,52 @@
     self.assertDatasetProduces(
         dataset, [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
 
+  def testSideEffect(self):
+    counter_var = variables.Variable(0)
+
+    def increment_fn(x):
+      counter_var.assign_add(1)
+      return x
+
+    def dataset_fn():
+      return dataset_ops.Dataset.range(1).map(increment_fn)
+
+    @function.defun
+    def fn():
+      _ = get_single_element.get_single_element(dataset_fn())
+      return "hello"
+
+    self.evaluate(counter_var.initializer)
+    self.assertEqual(self.evaluate(fn()), b"hello")
+    self.assertEqual(self.evaluate(counter_var), 1)
+
+  def testAutomaticControlDependencies(self):
+    counter_var = variables.Variable(1)
+
+    def increment_fn(x):
+      counter_var.assign(counter_var + 1)
+      return x
+
+    def multiply_fn(x):
+      counter_var.assign(counter_var * 2)
+      return x
+
+    def dataset1_fn():
+      return dataset_ops.Dataset.range(1).map(increment_fn)
+
+    def dataset2_fn():
+      return dataset_ops.Dataset.range(1).map(multiply_fn)
+
+    @function.defun
+    def fn():
+      _ = get_single_element.get_single_element(dataset1_fn())
+      _ = get_single_element.get_single_element(dataset2_fn())
+      return "hello"
+
+    self.evaluate(counter_var.initializer)
+    self.assertEqual(self.evaluate(fn()), b"hello")
+    self.assertEqual(self.evaluate(counter_var), 4)
+
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
index a48f080..4e99189 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py

@@ -27,12 +27,14 @@
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
@@ -254,6 +256,70 @@
     expected = x + c
     self.assertAllEqual(self.evaluate(expected), self.evaluate(map_defun_op))
 
+  def testMapDefunWithVariantTensor(self):
+
+    @function.defun(
+        input_signature=[tensor_spec.TensorSpec([], dtypes.variant)])
+    def fn(x):
+      return x
+
+    st = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
+
+    serialized = sparse_ops.serialize_sparse_v2(st, out_type=dtypes.variant)
+    serialized = array_ops.stack([serialized, serialized])
+    map_defun_op = map_defun.map_defun(fn, [serialized], [dtypes.variant],
+                                       [None])[0]
+    deserialized = sparse_ops.deserialize_sparse(map_defun_op, dtypes.int32)
+    expected = sparse_tensor.SparseTensorValue(
+        indices=[[0, 0, 0], [0, 1, 2], [1, 0, 0], [1, 1, 2]],
+        values=[1, 2, 1, 2],
+        dense_shape=[2, 3, 4])
+    actual = self.evaluate(deserialized)
+    self.assertSparseValuesEqual(expected, actual)
+
+  def testMapDefunWithVariantTensorAsCaptured(self):
+
+    st = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
+    serialized = sparse_ops.serialize_sparse_v2(st, out_type=dtypes.variant)
+
+    @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.int32)])
+    def fn(x):
+      del x
+      return serialized
+
+    x = constant_op.constant([0, 0])
+    map_defun_op = map_defun.map_defun(fn, [x], [dtypes.variant], [None])[0]
+    deserialized = sparse_ops.deserialize_sparse(map_defun_op, dtypes.int32)
+    expected = sparse_tensor.SparseTensorValue(
+        indices=[[0, 0, 0], [0, 1, 2], [1, 0, 0], [1, 1, 2]],
+        values=[1, 2, 1, 2],
+        dense_shape=[2, 3, 4])
+    actual = self.evaluate(deserialized)
+    self.assertSparseValuesEqual(expected, actual)
+
+  def testMapDefunWithStrTensor(self):
+
+    @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.string)])
+    def fn(x):
+      return x
+
+    st = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
+
+    serialized = sparse_ops.serialize_sparse_v2(st, out_type=dtypes.string)
+    serialized = array_ops.stack([serialized, serialized])
+    map_defun_op = map_defun.map_defun(fn, [serialized], [dtypes.string],
+                                       [None])[0]
+    deserialized = sparse_ops.deserialize_sparse(map_defun_op, dtypes.int32)
+    expected = sparse_tensor.SparseTensorValue(
+        indices=[[0, 0, 0], [0, 1, 2], [1, 0, 0], [1, 1, 2]],
+        values=[1, 2, 1, 2],
+        dense_shape=[2, 3, 4])
+    actual = self.evaluate(deserialized)
+    self.assertSparseValuesEqual(expected, actual)
+
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index d69043c..0e14f75 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py

@@ -541,6 +541,25 @@
     optimized = unoptimized.with_options(options)
     self.assertDatasetsEqual(unoptimized, optimized)
 
+  def testOptimizationWithSparseTensor(self):
+    base_dataset = dataset_ops.Dataset.from_tensors(0)
+
+    def map_fn(x):
+      del x
+      return sparse_tensor.SparseTensor(
+          indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
+
+    # Datasets with sparse tensors have unknown output shapes.
+    unoptimized = base_dataset.apply(batching.map_and_batch(map_fn, 2))
+    options = dataset_ops.Options()
+    options.experimental_optimization.apply_default_optimizations = False
+    unoptimized = unoptimized.with_options(options)
+
+    options = dataset_ops.Options()
+    options.experimental_optimization.map_vectorization = True
+    optimized = unoptimized.with_options(options)
+    self.assertDatasetsEqual(unoptimized, optimized)
+
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py
index 0dcbd56..28f9aad 100644
--- a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py

@@ -17,44 +17,304 @@
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import scan_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
+@parameterized.named_parameters(("WithDropRemainder", True),
+                                ("WithoutDropRemainder", False))
 @test_util.run_all_in_graph_and_eager_modes
 class RebatchDatasetTest(test_base.DatasetTestBase):
 
-  def testBasic(self):
-    dataset = dataset_ops.Dataset.range(1024).batch(32, drop_remainder=True)
+  def testBasic(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(1024).batch(
+        32, drop_remainder=drop_remainder)
     rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
     self.assertEqual(
-        [[32]], [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+        [[32 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
     self.assertEqual(
-        [[8]],
+        [[8 if drop_remainder else None]],
         [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
 
     expected_output = [[k for k in range(i, i + 8)] for i in range(0, 1024, 8)]  # pylint: disable=g-complex-comprehension
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
-  def testScalarInputError(self):
+  def testScalarInputError(self, _):
     dataset = dataset_ops.Dataset.range(1024)
     with self.assertRaisesRegexp(ValueError, "at least one dimension"):
       batching._RebatchDataset(dataset, num_workers=4)
 
-  def testUnknownBatchSizeError(self):
-    dataset = dataset_ops.Dataset.range(1024).batch(32)
-    with self.assertRaisesRegexp(ValueError, "unknown batch size datasets"):
-      batching._RebatchDataset(dataset, num_workers=4)
+  def testNotDivisibleError(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(1024).batch(
+        32, drop_remainder=drop_remainder)
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 "not divisible by"):
+      rebatched_dataset = batching._RebatchDataset(dataset, num_workers=5)
+      next_element = self.getNext(rebatched_dataset)
+      self.evaluate(next_element())
 
-  def testNotDivisibleError(self):
-    dataset = dataset_ops.Dataset.range(1024).batch(32, drop_remainder=True)
-    with self.assertRaisesRegexp(ValueError, "not divisible by"):
-      batching._RebatchDataset(dataset, num_workers=5)
+  def testTupleOutput(self, drop_remainder):
+    dataset = (
+        dataset_ops.Dataset.range(1024).map(lambda x: (x, x)).batch(
+            32, drop_remainder=drop_remainder))
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    expected_output = [([k for k in range(i, i + 8)],  # pylint: disable=g-complex-comprehension
+                        [k for k in range(i, i + 8)])
+                       for i in range(0, 1024, 8)]
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  def testNestedDictionaryOutput(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(1024).map(
+        lambda x: {"a": x, "b": {"c": x}}).batch(
+            32, drop_remainder=drop_remainder)
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    expected_output = [{"a": [k for k in range(i, i + 8)],  # pylint: disable=g-complex-comprehension
+                        "b": {"c": [k for k in range(i, i + 8)]}}
+                       for i in range(0, 1024, 8)]
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testFinalPartialBatchOriginal(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(1032).batch(
+        32, drop_remainder=drop_remainder)
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[32 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    self.assertEqual(
+        [[8 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+
+    expected_output = [[k for k in range(i, i + 8)] for i in range(0, 1032, 8)]  # pylint: disable=g-complex-comprehension
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testFinalPartialBatchAfterRebatch(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(34).batch(
+        32, drop_remainder=drop_remainder)
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[32 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    self.assertEqual(
+        [[8 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+
+    expected_output = [[k for k in range(i, i + 8)] for i in range(0, 32, 8)]  # pylint: disable=g-complex-comprehension
+    if not drop_remainder:
+      expected_output += [[32, 33]]
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testMultipleBatches(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(128).batch(
+        4, drop_remainder=drop_remainder)
+    dataset = dataset.batch(8, drop_remainder=drop_remainder)
+    self.assertEqual(
+        [[8, 4]] if drop_remainder else [[None, None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    # Each element is a list of 8 elements where each element is a list of 4.
+    expected_output = [[[j, j + 1, j + 2, j + 3]  # pylint: disable=g-complex-comprehension
+                        for j in range(i, i + 32, 4)]  # generates 8 elements
+                       for i in range(0, 128, 32)]
+    self.assertDatasetProduces(dataset, expected_output)
+
+    rebatched_dataset = batching._RebatchDataset(dataset, 4)
+    self.assertEqual(
+        [[2, 4]] if drop_remainder else [[None, None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+    # Each element is a list of 2 elements where each element is a list of 4.
+    expected_output = [[[j, j + 1, j + 2, j + 3]  # pylint: disable=g-complex-comprehension
+                        for j in range(i, i + 8, 4)]  # generates 2 elements
+                       for i in range(0, 128, 8)]
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testMapAndBatch(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(1024).apply(
+        batching.map_and_batch(
+            math_ops.square, 32, drop_remainder=drop_remainder))
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[32 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    self.assertEqual(
+        [[8 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+    expected_output = [[k**2 for k in range(i, i + 8)]  # pylint: disable=g-complex-comprehension
+                       for i in range(0, 1024, 8)]
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testPaddedBatch(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(128).batch(4).padded_batch(
+        8, padded_shapes=[5], drop_remainder=drop_remainder)
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[8, 5]] if drop_remainder else [[None, 5]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    # Each element is a list of 8 elements in which each element is a list of 5
+    # elements, first four are numbers and the last one is a padded zero.
+    expected_output = [[[j, j + 1, j + 2, j + 3, 0]  # pylint: disable=g-complex-comprehension
+                        for j in range(i, i + 32, 4)]  # generates 8 elements
+                       for i in range(0, 128, 32)]
+    self.assertDatasetProduces(dataset, expected_output)
+    self.assertEqual(
+        [[2, 5]] if drop_remainder else [[None, 5]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+    # Each element is a list of 2 elements in which each element is a list of 5
+    # elements, first four are numbers and the last one is a padded zero.
+    expected_output = [[[j, j + 1, j + 2, j + 3, 0]  # pylint: disable=g-complex-comprehension
+                        for j in range(i, i + 8, 4)]  # generates 2 elements
+                       for i in range(0, 128, 8)]
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testConcatenate(self, drop_remainder):
+    dataset1 = dataset_ops.Dataset.range(64).batch(
+        8, drop_remainder=drop_remainder)
+    dataset2 = dataset_ops.Dataset.range(32).batch(
+        8, drop_remainder=drop_remainder)
+    dataset = dataset1.concatenate(dataset2)
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[8 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    self.assertEqual(
+        [[2 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+    expected_output = ([[i, i + 1] for i in range(0, 64, 2)] +
+                       [[i, i + 1] for i in range(0, 32, 2)])
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testConcatenateDifferentShapes(self, drop_remainder):
+    dataset1 = dataset_ops.Dataset.range(64).batch(
+        16, drop_remainder=drop_remainder)
+    dataset2 = dataset_ops.Dataset.range(32).batch(
+        8, drop_remainder=drop_remainder)
+    dataset = dataset1.concatenate(dataset2)
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[None]], [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    self.assertEqual(
+        [[None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+    expected_output = ([[i, i + 1, i + 2, i + 3] for i in range(0, 64, 4)] +
+                       [[i, i + 1] for i in range(0, 32, 2)])
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testZip(self, drop_remainder):
+    dataset1 = dataset_ops.Dataset.range(64).batch(
+        8, drop_remainder=drop_remainder)
+    dataset2 = dataset_ops.Dataset.range(32).batch(
+        8, drop_remainder=drop_remainder)
+    dataset = dataset_ops.Dataset.zip((dataset1, dataset2))
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[8], [8]] if drop_remainder else [[None], [None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    self.assertEqual(
+        [[2], [2]] if drop_remainder else [[None], [None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+    expected_output = [([i, i + 1], [i, i + 1]) for i in range(0, 32, 2)]
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testZipDifferentShapes(self, drop_remainder):
+    dataset1 = dataset_ops.Dataset.range(64).batch(
+        16, drop_remainder=drop_remainder)
+    dataset2 = dataset_ops.Dataset.range(32).batch(
+        8, drop_remainder=drop_remainder)
+    dataset = dataset_ops.Dataset.zip((dataset1, dataset2))
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[16], [8]] if drop_remainder else [[None], [None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    self.assertEqual(
+        [[4], [2]] if drop_remainder else [[None], [None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+    expected_output = [([2 * i, 2 * i + 1, 2 * i + 2, 2 * i + 3], [i, i + 1])
+                       for i in range(0, 32, 2)]
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testUnsupportedTransformError(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(1024).batch(
+        32, drop_remainder=drop_remainder).apply(
+            scan_ops.scan([0], lambda _, a: ([0], a)))
+    with self.assertRaises(errors.InvalidArgumentError):
+      rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+      next_element = self.getNext(rebatched_dataset)
+      self.evaluate(next_element())
+
+  def testFlatMapBatching(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(
+        2).flat_map(lambda _: dataset_ops.Dataset.range(32).batch(  # pylint: disable=g-long-lambda
+            32, drop_remainder=drop_remainder))
+    self.assertEqual(
+        [[32 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    # Two elements where each element is range(32)
+    expected_output = [[k for k in range(32)] for _ in range(2)]  # pylint: disable=g-complex-comprehension
+    self.assertDatasetProduces(dataset, expected_output)
+
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[8 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+    # Two elements where each element is a list of 4 elements where each element
+    # is a list of 8.
+    expected_output = [[k for k in range(i, i + 8)]  # pylint: disable=g-complex-comprehension
+                       for _ in range(2)
+                       for i in range(0, 32, 8)]  # generates 4 elements
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testInterleaveBatching(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(
+        2).interleave(lambda _: dataset_ops.Dataset.range(32).batch(  # pylint: disable=g-long-lambda
+            32, drop_remainder=drop_remainder), cycle_length=2)
+    self.assertEqual(
+        [[32 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    # Two elements where each element is range(32)
+    expected_output = [[k for k in range(32)] for _ in range(2)]  # pylint: disable=g-complex-comprehension
+    self.assertDatasetProduces(dataset, expected_output)
+
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[8 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+    # List of 4 elements where each element is a list of 8 numbering from 0 to
+    # 31 repeated twice.
+    expected_output = [[k for k in range(i, i + 8)]  # pylint: disable=g-complex-comprehension
+                       for i in range(0, 32, 8)  # generates 4 elements
+                       for _ in range(2)]
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
+
+  def testParallelInterleaveBatching(self, drop_remainder):
+    dataset = dataset_ops.Dataset.range(
+        2).interleave(lambda _: dataset_ops.Dataset.range(32).batch(  # pylint: disable=g-long-lambda
+            32, drop_remainder=drop_remainder), cycle_length=2,
+                      num_parallel_calls=2)
+    self.assertEqual(
+        [[32 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(dataset.output_shapes)])
+    # Two elements where each element is range(32)
+    expected_output = [[k for k in range(32)] for _ in range(2)]  # pylint: disable=g-complex-comprehension
+    self.assertDatasetProduces(dataset, expected_output)
+
+    rebatched_dataset = batching._RebatchDataset(dataset, num_workers=4)
+    self.assertEqual(
+        [[8 if drop_remainder else None]],
+        [ts.as_list() for ts in nest.flatten(rebatched_dataset.output_shapes)])
+    # List of 4 elements where each element is a list of 8 numbering from 0 to
+    # 31 repeated twice in collated fashion i.e [0...8], [0...8] etc.
+    expected_output = [[k for k in range(i, i + 8)]  # pylint: disable=g-complex-comprehension
+                       for i in range(0, 32, 8)  # generates 4 elements
+                       for _ in range(2)]
+    self.assertDatasetProduces(rebatched_dataset, expected_output)
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py
index ed4a1da..aaa46ba 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py

@@ -34,6 +34,20 @@
 
     self.run_core_tests(lambda: build_dataset(200, 10), None, 20)
 
+  def testWithNewFunction(self):
+    """Tests that optimized datasets with new functions work."""
+
+    def build_dataset():
+      dataset = dataset_ops.Dataset.range(100)
+      dataset = dataset.map(lambda x: x)
+      dataset = dataset.batch(5)
+      # map_vectorization adds a new vectorized function to the function
+      # library.
+      dataset = dataset.apply(optimization.optimize(["map_vectorization"]))
+      return dataset
+
+    self.run_core_tests(build_dataset, None, 20)
+
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py b/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
index 14a4241..783b2e6 100644
--- a/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py

@@ -23,6 +23,7 @@
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
+from tensorflow.python.eager import function
 from tensorflow.python.framework import test_util
 from tensorflow.python.lib.io import python_io
 from tensorflow.python.lib.io import tf_record
@@ -94,6 +95,20 @@
     with self.assertRaises(TypeError):
       writers.TFRecordWriter(self._outputFilename(), "").write(input_dataset)
 
+  def testSideEffect(self):
+    def writer_fn():
+      input_dataset = readers.TFRecordDataset(self._createFile())
+      return writers.TFRecordWriter(self._outputFilename()).write(input_dataset)
+
+    @function.defun
+    def fn():
+      _ = writer_fn()
+      return "hello"
+
+    self.assertEqual(self.evaluate(fn()), b"hello")
+    for i, r in enumerate(tf_record.tf_record_iterator(self._outputFilename())):
+      self.assertAllEqual(self._record(i), r)
+
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/data/experimental/ops/batching.py b/tensorflow/python/data/experimental/ops/batching.py
index 39cb0a6..983f764 100644
--- a/tensorflow/python/data/experimental/ops/batching.py
+++ b/tensorflow/python/data/experimental/ops/batching.py

@@ -27,6 +27,7 @@
 from tensorflow.python.data.util import structure
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
@@ -38,6 +39,7 @@
 from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -549,12 +551,15 @@
   """A `Dataset` that maps a function over a batch of elements."""
 
   def __init__(self, input_dataset, map_func, batch_size, num_parallel_calls,
-               drop_remainder):
+               drop_remainder, use_legacy_function=False):
     """See `Dataset.map()` for details."""
     self._input_dataset = input_dataset
 
     self._map_func = dataset_ops.StructuredFunctionWrapper(
-        map_func, "tf.data.experimental.map_and_batch()", dataset=input_dataset)
+        map_func,
+        "tf.data.experimental.map_and_batch()",
+        dataset=input_dataset,
+        use_legacy_function=use_legacy_function)
     self._batch_size_t = ops.convert_to_tensor(
         batch_size, dtype=dtypes.int64, name="batch_size")
     self._num_parallel_calls_t = ops.convert_to_tensor(
@@ -589,6 +594,62 @@
     return self._structure
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.map_and_batch()")
+@tf_export(v1=["data.experimental.map_and_batch_with_legacy_function"])
+def map_and_batch_with_legacy_function(map_func,
+                                       batch_size,
+                                       num_parallel_batches=None,
+                                       drop_remainder=False,
+                                       num_parallel_calls=None):
+  """Fused implementation of `map` and `batch`.
+
+  NOTE: This is an escape hatch for existing uses of `map_and_batch` that do not
+  work with V2 functions. New uses are strongly discouraged and existing uses
+  should migrate to `map_and_batch` as this method will not be removed in V2.
+
+  Args:
+    map_func: A function mapping a nested structure of tensors to another
+      nested structure of tensors.
+    batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
+      consecutive elements of this dataset to combine in a single batch.
+    num_parallel_batches: (Optional.) A `tf.int64` scalar `tf.Tensor`,
+      representing the number of batches to create in parallel. On one hand,
+      higher values can help mitigate the effect of stragglers. On the other
+      hand, higher values can increase contention if CPU is scarce.
+    drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing
+      whether the last batch should be dropped in case its size is smaller than
+      desired; the default behavior is not to drop the smaller batch.
+    num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
+      representing the number of elements to process in parallel. If not
+      specified, `batch_size * num_parallel_batches` elements will be processed
+      in parallel. If the value `tf.data.experimental.AUTOTUNE` is used, then
+      the number of parallel calls is set dynamically based on available CPU.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+
+  Raises:
+    ValueError: If both `num_parallel_batches` and `num_parallel_calls` are
+      specified.
+  """
+
+  if num_parallel_batches is None and num_parallel_calls is None:
+    num_parallel_calls = batch_size
+  elif num_parallel_batches is not None and num_parallel_calls is None:
+    num_parallel_calls = batch_size * num_parallel_batches
+  elif num_parallel_batches is not None and num_parallel_calls is not None:
+    raise ValueError("The `num_parallel_batches` and `num_parallel_calls` "
+                     "arguments are mutually exclusive.")
+
+  def _apply_fn(dataset):
+    return _MapAndBatchDataset(dataset, map_func, batch_size,
+                               num_parallel_calls, drop_remainder,
+                               use_legacy_function=True)
+
+  return _apply_fn
+
+
 @tf_export("data.experimental.map_and_batch")
 def map_and_batch(map_func,
                   batch_size,
@@ -652,18 +713,24 @@
 
   def __init__(self, input_dataset, num_workers):
     self._input_dataset = input_dataset
-    output_shapes = input_dataset.output_shapes
-    if len(output_shapes) < 1:
-      raise ValueError("Input shape should have at least one dimension.")
-    if not output_shapes.dims[0].value:
-      raise ValueError("Cannot rebatch unknown batch size datasets.")
-    if output_shapes.dims[0].value % num_workers != 0:
-      raise ValueError(
-          "First dim of input shape: %d is not divisible by num_workers: %d" %
-          (output_shapes[0], num_workers))
-    output_dims = [d for d in output_shapes.dims]
-    output_dims[0] = output_dims[0] // num_workers
-    output_shapes = tensor_shape.TensorShapeV1(output_dims)
+
+    def recalculate_output_shapes(output_shapes):
+      """Recalculates the output_shapes after dividing it by num_workers."""
+      if len(output_shapes) < 1:
+        raise ValueError("Input shape should have at least one dimension.")
+      if (tensor_shape.dimension_value(output_shapes[0]) and
+          tensor_shape.dimension_value(output_shapes[0]) % num_workers != 0):
+        raise errors.InvalidArgumentError(
+            None, None,
+            "First dim of input shape: %d is not divisible by num_workers: %d" %
+            (output_shapes[0], num_workers))
+      output_dims = [d for d in output_shapes.dims]
+      output_dims[0] = output_dims[0] // num_workers
+      return tensor_shape.TensorShape(output_dims)
+
+    output_shapes = nest.map_structure(recalculate_output_shapes,
+                                       input_dataset.output_shapes)
+
     self._structure = structure.convert_legacy_structure(
         self._input_dataset.output_types, output_shapes,
         self._input_dataset.output_classes)

diff --git a/tensorflow/python/data/experimental/ops/readers.py b/tensorflow/python/data/experimental/ops/readers.py
index 177886e..24a399a 100644
--- a/tensorflow/python/data/experimental/ops/readers.py
+++ b/tensorflow/python/data/experimental/ops/readers.py

@@ -24,6 +24,7 @@
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import error_ops
 from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.experimental.ops import parsing_ops
@@ -328,6 +329,7 @@
     sloppy=False,
     num_rows_for_inference=100,
     compression_type=None,
+    ignore_errors=False,
 ):
   """Reads CSV files into a dataset.
 
@@ -402,6 +404,10 @@
       the files. Defaults to 100.
     compression_type: (Optional.) A `tf.string` scalar evaluating to one of
       `""` (no compression), `"ZLIB"`, or `"GZIP"`. Defaults to no compression.
+    ignore_errors: (Optional.) If `True`, ignores errors with CSV file parsing,
+      such as malformed data or empty lines, and moves on to the next valid
+      CSV record. Otherwise, the dataset raises an error and stops processing
+      when encountering any invalid records. Defaults to `False`.
 
   Returns:
     A dataset, where each element is a (features, labels) tuple that corresponds
@@ -457,7 +463,7 @@
     raise ValueError("`label_name` provided must be one of the columns.")
 
   def filename_to_dataset(filename):
-    return CsvDataset(
+    dataset = CsvDataset(
         filename,
         record_defaults=column_defaults,
         field_delim=field_delim,
@@ -465,8 +471,11 @@
         na_value=na_value,
         select_cols=select_columns,
         header=header,
-        compression_type=compression_type,
+        compression_type=compression_type
     )
+    if ignore_errors:
+      dataset = dataset.apply(error_ops.ignore_errors())
+    return dataset
 
   def map_fn(*columns):
     """Organizes columns into a features dictionary.
@@ -528,13 +537,14 @@
     sloppy=False,
     num_rows_for_inference=100,
     compression_type=None,
+    ignore_errors=False,
 ):  # pylint: disable=missing-docstring
   return dataset_ops.DatasetV1Adapter(make_csv_dataset_v2(
       file_pattern, batch_size, column_names, column_defaults, label_name,
       select_columns, field_delim, use_quote_delim, na_value, header,
       num_epochs, shuffle, shuffle_buffer_size, shuffle_seed,
       prefetch_buffer_size, num_parallel_reads, sloppy, num_rows_for_inference,
-      compression_type))
+      compression_type, ignore_errors))
 make_csv_dataset_v1.__doc__ = make_csv_dataset_v2.__doc__
 
 

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index b134a8d..af1c6ab 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD

@@ -108,8 +108,26 @@
     size = "small",
     srcs = ["filter_test.py"],
     additional_deps = [
+        ":filter_test_base",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+tf_py_test(
+    name = "filter_with_legacy_function_test",
+    size = "small",
+    srcs = ["filter_with_legacy_function_test.py"],
+    additional_deps = [
+        ":filter_test_base",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_library(
+    name = "filter_test_base",
+    srcs = ["filter_test_base.py"],
+    deps = [
         ":test_base",
-        "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
@@ -118,6 +136,7 @@
         "//tensorflow/python:math_ops",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
     ],
 )
 
@@ -272,7 +291,7 @@
         ":test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/eager:context",
-        "//tensorflow/python/training/checkpointable:util",
+        "//tensorflow/python/training/tracking:util",
         "//tensorflow/python:checkpoint_management",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -325,7 +344,7 @@
         "//tensorflow/python/data/ops:iterator_ops",
         "//tensorflow/python/data/util:structure",
         "//tensorflow/python/eager:context",
-        "//tensorflow/python/training/checkpointable:util",
+        "//tensorflow/python/training/tracking:util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",

diff --git a/tensorflow/python/data/kernel_tests/filter_test.py b/tensorflow/python/data/kernel_tests/filter_test.py
index cd8f526..b81e9a8 100644
--- a/tensorflow/python/data/kernel_tests/filter_test.py
+++ b/tensorflow/python/data/kernel_tests/filter_test.py

@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,111 +17,16 @@
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
-from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.data.kernel_tests import filter_test_base
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import map_fn
-from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
 @test_util.run_all_in_graph_and_eager_modes
-class FilterTest(test_base.DatasetTestBase):
+class FilterTest(filter_test_base.FilterTestBase):
 
-  def testFilterDataset(self):
-    components = (
-        np.arange(7, dtype=np.int64),
-        np.array([[1, 2, 3]], dtype=np.int64) * np.arange(
-            7, dtype=np.int64)[:, np.newaxis],
-        np.array(37.0, dtype=np.float64) * np.arange(7)
-    )
-    def _map_fn(x, y, z):
-      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
-
-    def do_test(count, modulus):
-      dataset = dataset_ops.Dataset.from_tensor_slices(components).map(
-          _map_fn).repeat(count).filter(
-              lambda x, _y, _z: math_ops.equal(math_ops.mod(x, modulus), 0))
-      self.assertEqual([c.shape[1:] for c in components],
-                       [shape for shape in dataset.output_shapes])
-      get_next = self.getNext(dataset)
-      for _ in range(count):
-        for i in [x for x in range(7) if x**2 % modulus == 0]:
-          result = self.evaluate(get_next())
-          for component, result_component in zip(components, result):
-            self.assertAllEqual(component[i]**2, result_component)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next())
-
-    do_test(14, 2)
-    do_test(4, 18)
-
-    # Test an empty dataset.
-    do_test(0, 1)
-
-  def testFilterRange(self):
-    dataset = dataset_ops.Dataset.range(4).filter(
-        lambda x: math_ops.not_equal(math_ops.mod(x, 3), 2))
-    self.assertDatasetProduces(dataset, expected_output=[0, 1, 3])
-
-  def testFilterDict(self):
-    dataset = dataset_ops.Dataset.range(10).map(
-        lambda x: {"foo": x * 2, "bar": x ** 2}).filter(
-            lambda d: math_ops.equal(d["bar"] % 2, 0)).map(
-                lambda d: d["foo"] + d["bar"])
-    self.assertDatasetProduces(
-        dataset,
-        expected_output=[(i * 2 + i**2) for i in range(10) if not (i**2) % 2])
-
-  def testUseStepContainerInFilter(self):
-    input_data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int64)
-
-    # Define a predicate that returns true for the first element of
-    # the sequence and not the second, and uses `tf.map_fn()`.
-    def _predicate(xs):
-      squared_xs = map_fn.map_fn(lambda x: x * x, xs)
-      summed = math_ops.reduce_sum(squared_xs)
-      return math_ops.equal(summed, 1 + 4 + 9)
-
-    dataset = dataset_ops.Dataset.from_tensor_slices(
-        [[1, 2, 3], [4, 5, 6]]).filter(_predicate)
-    self.assertDatasetProduces(dataset, expected_output=[input_data[0]])
-
-  def testSparse(self):
-
-    def _map_fn(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=np.array([[0, 0]]),
-          values=(i * np.array([1])),
-          dense_shape=np.array([1, 1])), i
-
-    def _filter_fn(_, i):
-      return math_ops.equal(i % 2, 0)
-
-    dataset = dataset_ops.Dataset.range(10).map(_map_fn).filter(_filter_fn).map(
-        lambda x, i: x)
-    self.assertDatasetProduces(
-        dataset, expected_output=[_map_fn(i * 2)[0] for i in range(5)])
-
-  def testShortCircuit(self):
-    dataset = dataset_ops.Dataset.zip(
-        (dataset_ops.Dataset.range(10),
-         dataset_ops.Dataset.from_tensors(True).repeat(None)
-        )).filter(lambda x, y: y)
-    self.assertDatasetProduces(
-        dataset, expected_output=[(i, True) for i in range(10)])
-
-  def testParallelFilters(self):
-    dataset = dataset_ops.Dataset.range(10).filter(
-        lambda x: math_ops.equal(x % 2, 0))
-    next_elements = [self.getNext(dataset) for _ in range(10)]
-    self.assertEqual([0 for _ in range(10)],
-                     self.evaluate(
-                         [next_element() for next_element in next_elements]))
+  def apply_filter(self, input_dataset, predicate):
+    return input_dataset.filter(predicate)
 
 
 if __name__ == "__main__":

diff --git a/tensorflow/python/data/kernel_tests/filter_test_base.py b/tensorflow/python/data/kernel_tests/filter_test_base.py
new file mode 100644
index 0000000..4c53fa3
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/filter_test_base.py

@@ -0,0 +1,134 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.Dataset.filter()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import map_fn
+from tensorflow.python.ops import math_ops
+
+
+class FilterTestBase(test_base.DatasetTestBase):
+  """Base class for FilterDataset tests."""
+
+  def apply_filter(self, input_dataset, predicate):
+    raise NotImplementedError("FilterTestBase._apply_filter")
+
+  def testFilterDataset(self):
+    components = (
+        np.arange(7, dtype=np.int64),
+        np.array([[1, 2, 3]], dtype=np.int64) * np.arange(
+            7, dtype=np.int64)[:, np.newaxis],
+        np.array(37.0, dtype=np.float64) * np.arange(7)
+    )
+    def _map_fn(x, y, z):
+      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
+
+    def do_test(count, modulus):  # pylint: disable=missing-docstring
+      dataset = dataset_ops.Dataset.from_tensor_slices(components).map(
+          _map_fn).repeat(count)
+      # pylint: disable=g-long-lambda
+      dataset = self.apply_filter(
+          dataset, lambda x, _y, _z: math_ops.equal(
+              math_ops.mod(x, modulus), 0))
+      # pylint: enable=g-long-lambda
+      self.assertEqual([c.shape[1:] for c in components],
+                       [shape for shape in dataset.output_shapes])
+      get_next = self.getNext(dataset)
+      for _ in range(count):
+        for i in [x for x in range(7) if x**2 % modulus == 0]:
+          result = self.evaluate(get_next())
+          for component, result_component in zip(components, result):
+            self.assertAllEqual(component[i]**2, result_component)
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(get_next())
+
+    do_test(14, 2)
+    do_test(4, 18)
+
+    # Test an empty dataset.
+    do_test(0, 1)
+
+  def testFilterRange(self):
+    dataset = dataset_ops.Dataset.range(4)
+    dataset = self.apply_filter(
+        dataset, lambda x: math_ops.not_equal(math_ops.mod(x, 3), 2))
+    self.assertDatasetProduces(dataset, expected_output=[0, 1, 3])
+
+  def testFilterDict(self):
+    dataset = dataset_ops.Dataset.range(10).map(
+        lambda x: {"foo": x * 2, "bar": x ** 2})
+    dataset = self.apply_filter(
+        dataset, lambda d: math_ops.equal(d["bar"] % 2, 0))
+    dataset = dataset.map(lambda d: d["foo"] + d["bar"])
+    self.assertDatasetProduces(
+        dataset,
+        expected_output=[(i * 2 + i**2) for i in range(10) if not (i**2) % 2])
+
+  def testUseStepContainerInFilter(self):
+    input_data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int64)
+
+    # Define a predicate that returns true for the first element of
+    # the sequence and not the second, and uses `tf.map_fn()`.
+    def _predicate(xs):
+      squared_xs = map_fn.map_fn(lambda x: x * x, xs)
+      summed = math_ops.reduce_sum(squared_xs)
+      return math_ops.equal(summed, 1 + 4 + 9)
+
+    dataset = dataset_ops.Dataset.from_tensor_slices(
+        [[1, 2, 3], [4, 5, 6]])
+    dataset = self.apply_filter(dataset, _predicate)
+    self.assertDatasetProduces(dataset, expected_output=[input_data[0]])
+
+  def testSparse(self):
+
+    def _map_fn(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1])),
+          dense_shape=np.array([1, 1])), i
+
+    def _filter_fn(_, i):
+      return math_ops.equal(i % 2, 0)
+
+    dataset = dataset_ops.Dataset.range(10).map(_map_fn)
+    dataset = self.apply_filter(dataset, _filter_fn)
+    dataset = dataset.map(lambda x, i: x)
+    self.assertDatasetProduces(
+        dataset, expected_output=[_map_fn(i * 2)[0] for i in range(5)])
+
+  def testShortCircuit(self):
+    dataset = dataset_ops.Dataset.zip(
+        (dataset_ops.Dataset.range(10),
+         dataset_ops.Dataset.from_tensors(True).repeat(None)
+        ))
+    dataset = self.apply_filter(dataset, lambda x, y: y)
+    self.assertDatasetProduces(
+        dataset, expected_output=[(i, True) for i in range(10)])
+
+  def testParallelFilters(self):
+    dataset = dataset_ops.Dataset.range(10)
+    dataset = self.apply_filter(dataset, lambda x: math_ops.equal(x % 2, 0))
+    next_elements = [self.getNext(dataset) for _ in range(10)]
+    self.assertEqual([0 for _ in range(10)],
+                     self.evaluate(
+                         [next_element() for next_element in next_elements]))

diff --git a/tensorflow/python/data/kernel_tests/filter_with_legacy_function_test.py b/tensorflow/python/data/kernel_tests/filter_with_legacy_function_test.py
new file mode 100644
index 0000000..9a283fb
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/filter_with_legacy_function_test.py

@@ -0,0 +1,33 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.Dataset.filter_with_legacy_function()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.kernel_tests import filter_test_base
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import test
+
+
+@test_util.run_v1_only
+class FilterWithLegacyFunctionTest(filter_test_base.FilterTestBase):
+
+  def apply_filter(self, input_dataset, predicate):
+    return input_dataset.filter_with_legacy_function(predicate)
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/data/kernel_tests/iterator_checkpoint_test.py b/tensorflow/python/data/kernel_tests/iterator_checkpoint_test.py
index 91b3566..dfb54b5 100644
--- a/tensorflow/python/data/kernel_tests/iterator_checkpoint_test.py
+++ b/tensorflow/python/data/kernel_tests/iterator_checkpoint_test.py

@@ -28,7 +28,7 @@
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import util as trackable_utils
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -43,7 +43,7 @@
     ) else dataset_ops.make_one_shot_iterator(dataset)
     get_next = iterator.get_next if context.executing_eagerly(
     ) else functools.partial(self.evaluate, iterator.get_next())
-    checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
+    checkpoint = trackable_utils.Checkpoint(iterator=iterator)
     self.assertAllEqual([1, 4], get_next())
     save_path = checkpoint.save(checkpoint_prefix)
     self.assertAllEqual([9, 16], get_next())
@@ -73,7 +73,7 @@
     ) else dataset_ops.make_one_shot_iterator(dataset_2)
     get_next_3 = iterator_3.get_next if context.executing_eagerly(
     ) else functools.partial(self.evaluate, iterator_3.get_next())
-    checkpoint = checkpointable_utils.Checkpoint(
+    checkpoint = trackable_utils.Checkpoint(
         iterator_1=iterator_1, iterator_2=iterator_2, iterator_3=iterator_3)
     self.assertAllEqual([1, 4], get_next_1())
     self.assertAllEqual(0, get_next_3())
@@ -96,7 +96,7 @@
     ) else dataset_ops.make_one_shot_iterator(dataset)
     get_next = iterator.get_next if context.executing_eagerly(
     ) else functools.partial(self.evaluate, iterator.get_next())
-    checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
+    checkpoint = trackable_utils.Checkpoint(iterator=iterator)
     self.assertAllEqual(0, get_next())
     self.assertAllEqual(1, get_next())
     save_path = checkpoint.save(checkpoint_prefix)
@@ -115,7 +115,7 @@
     iterator = iter(dataset) if context.executing_eagerly(
     ) else dataset_ops.make_initializable_iterator(dataset)
     get_next = iterator.get_next
-    checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
+    checkpoint = trackable_utils.Checkpoint(iterator=iterator)
     for i in range(5):
       checkpoint.restore(
           checkpoint_management.latest_checkpoint(

diff --git a/tensorflow/python/data/kernel_tests/map_test.py b/tensorflow/python/data/kernel_tests/map_test.py
index 1cd1ddc..4badcff 100644
--- a/tensorflow/python/data/kernel_tests/map_test.py
+++ b/tensorflow/python/data/kernel_tests/map_test.py

@@ -84,6 +84,7 @@
   return dataset, coordination_events
 
 
+# TODO(jsimsa): Add tests for `map_with_legacy_function`.
 @test_util.run_all_in_graph_and_eager_modes
 class MapTest(test_base.DatasetTestBase, parameterized.TestCase):
 
@@ -393,7 +394,7 @@
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
-  # TODO(b/121264236): add eager mode coverage when we have mutli-device setup.
+  # TODO(b/121264236): add eager mode coverage when we have multi-device setup.
   @test_util.run_v1_only("b/121264236")
   def testSkipEagerCaptureConstantsWithConflictingDevices(self):
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
@@ -410,9 +411,8 @@
       expected_output = [8.0] * 10
       self.assertDatasetProduces(dataset, expected_output=expected_output)
 
-  # TODO(b/121264236): add eager mode coverage when we have mutli-device setup.
-  @test_util.run_v1_only(
-      "defun will convert RefVariables to ResourceVariables.")
+  # TODO(b/121264236): add eager mode coverage when we have multi-device setup.
+  @test_util.run_v1_only("b/121264236")
   def testSkipEagerRefVariablesWithConflictingDevices(self):
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
     with self.cached_session(config=config):
@@ -424,7 +424,10 @@
           b = variables.VariableV1(5.0)
         return math_ops.add(a, b)
 
-      dataset = dataset_ops.Dataset.from_tensors(0).repeat(10).map(func)
+      # Use the legacy function implementation as eager function will convert
+      # RefVariables to ResourceVariables.
+      dataset = dataset_ops.Dataset.from_tensors(0).repeat(10)
+      dataset = dataset.map_with_legacy_function(func)
       self.evaluate(variables.global_variables_initializer())
       expected_output = [8.0] * 10
       self.assertDatasetProduces(
@@ -432,7 +435,7 @@
           expected_output=expected_output,
           requires_initialization=True)
 
-  # TODO(b/121264236): add eager mode coverage when we have mutli-device setup.
+  # TODO(b/121264236): add eager mode coverage when we have multi-device setup.
   @test_util.run_v1_only("b/121264236")
   def testSkipEagerResourceVariablesWithConflictingDevices(self):
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
@@ -451,7 +454,8 @@
       dataset = dataset_ops.Dataset.from_tensors(0).repeat(10).map(func)
       expected_error = (
           errors.InvalidArgumentError,
-          "Could not colocate node with its resource and reference inputs")
+          "Cannot place the graph because a reference or resource edge "
+          "connects colocation groups with incompatible assigned devices")
       self.assertDatasetProduces(
           dataset, expected_error=expected_error, requires_initialization=True)
 
@@ -1027,6 +1031,18 @@
     with self.assertRaises(errors.InvalidArgumentError):
       self.evaluate(get_next())
 
+  # NOTE: collection test is specific to graph mode only, no eager coverage.
+  @test_util.run_v1_only("graph specific test")
+  def testSkipEagerCollectionCopy(self):
+    w = variable_scope.get_variable("w", [])
+    self.assertIn(w, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
+
+    def func(x):
+      self.assertIn(w, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
+      return x
+
+    dataset = dataset_ops.Dataset.from_tensors(constant_op.constant(1.0))
+    dataset.map(func)
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
index e6dfc33..a74f08f 100644
--- a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
+++ b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py

@@ -115,8 +115,9 @@
       del multi_device_iterator
       del dataset
 
+    # TODO(b/123316347): Reduce threshold once bug is fixed.
     self.assertNotIncreasingMemory(
-        f, num_iters=100, increase_threshold_absolute_mb=175)
+        f, num_iters=100, increase_threshold_absolute_mb=500)
 
   @test_util.run_v1_only("b/121264236")
   def testOneOnSameDevice(self):

diff --git a/tensorflow/python/data/kernel_tests/reduce_test.py b/tensorflow/python/data/kernel_tests/reduce_test.py
index 93acc15..846d9a6 100644
--- a/tensorflow/python/data/kernel_tests/reduce_test.py
+++ b/tensorflow/python/data/kernel_tests/reduce_test.py

@@ -22,12 +22,14 @@
 
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
@@ -123,6 +125,71 @@
       self.assertEqual(((i + 1) * i) // 2, result["dense"])
       self.assertSparseValuesEqual(make_sparse_fn(i), result["sparse"])
 
+  def testDatasetSideEffect(self):
+    counter_var = variables.Variable(0)
+
+    def increment_fn(x):
+      counter_var.assign_add(1)
+      return x
+
+    def dataset_fn():
+      return dataset_ops.Dataset.range(10).map(increment_fn)
+
+    def reduce_fn(state, value):
+      return state + value
+
+    @function.defun
+    def fn():
+      _ = dataset_fn().reduce(np.int64(0), reduce_fn)
+      return "hello"
+
+    self.evaluate(counter_var.initializer)
+    self.assertEqual(self.evaluate(fn()), b"hello")
+    self.assertEqual(self.evaluate(counter_var), 10)
+
+  def testSideEffect(self):
+    counter_var = variables.Variable(0)
+
+    def dataset_fn():
+      return dataset_ops.Dataset.range(10)
+
+    def reduce_fn(state, value):
+      counter_var.assign_add(1)
+      return state + value
+
+    @function.defun
+    def fn():
+      _ = dataset_fn().reduce(np.int64(0), reduce_fn)
+      return "hello"
+
+    self.evaluate(counter_var.initializer)
+    self.assertEqual(self.evaluate(fn()), b"hello")
+    self.assertEqual(self.evaluate(counter_var), 10)
+
+  def testAutomaticControlDependencies(self):
+    counter_var = variables.Variable(1)
+
+    def dataset_fn():
+      return dataset_ops.Dataset.range(1)
+
+    def reduce1_fn(state, value):
+      counter_var.assign(counter_var + 1)
+      return state + value
+
+    def reduce2_fn(state, value):
+      counter_var.assign(counter_var * 2)
+      return state + value
+
+    @function.defun
+    def fn():
+      _ = dataset_fn().reduce(np.int64(0), reduce1_fn)
+      _ = dataset_fn().reduce(np.int64(0), reduce2_fn)
+      return "hello"
+
+    self.evaluate(counter_var.initializer)
+    self.assertEqual(self.evaluate(fn()), b"hello")
+    self.assertEqual(self.evaluate(counter_var), 4)
+
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD
index 112aa92..a911d8c 100644
--- a/tensorflow/python/data/ops/BUILD
+++ b/tensorflow/python/data/ops/BUILD

@@ -74,7 +74,7 @@
         "//tensorflow/python/data/util:sparse",
         "//tensorflow/python/data/util:structure",
         "//tensorflow/python/eager:context",
-        "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/tracking:base",
     ],
 )
 

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 9694fc2..e185ba7 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py

@@ -39,6 +39,7 @@
 from tensorflow.python.data.util import structure as structure_lib
 from tensorflow.python.data.util import traverse
 from tensorflow.python.eager import context
+from tensorflow.python.eager import function as eager_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
@@ -47,6 +48,7 @@
 from tensorflow.python.framework import smart_cond
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -85,12 +87,12 @@
     Args:
       variant_tensor: A DT_VARIANT tensor that represents the dataset.
     """
-    self._dataset_variant_tensor = variant_tensor
+    self._variant_tensor_attr = variant_tensor
     self._graph_attr = ops.get_default_graph()
 
   @property
   def _variant_tensor(self):
-    return self._dataset_variant_tensor
+    return self._variant_tensor_attr
 
   @_variant_tensor.setter
   def _variant_tensor(self, _):
@@ -142,6 +144,8 @@
     return any(
         [input_dataset._has_captured_ref() for input_dataset in self._inputs()])  # pylint: disable=protected-access
 
+  # TODO(jsimsa): Change this to be the transitive closure of functions used
+  # by this dataset and its inputs.
   def _functions(self):
     """Returns a list of functions associated with this dataset.
 
@@ -170,12 +174,12 @@
     options = self.options()
     if options.experimental_threading is not None:
       t_options = options.experimental_threading
-      if t_options.private_threadpool_size is not None:
-        dataset = _PrivateThreadPoolDataset(dataset,
-                                            t_options.private_threadpool_size)
       if t_options.max_intra_op_parallelism is not None:
         dataset = _MaxIntraOpParallelismDataset(
             dataset, t_options.max_intra_op_parallelism)
+      if t_options.private_threadpool_size is not None:
+        dataset = _PrivateThreadPoolDataset(dataset,
+                                            t_options.private_threadpool_size)
     static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
     if static_optimizations:
       if self._has_captured_ref():
@@ -746,6 +750,12 @@
     elements. For perfect shuffling, a buffer size greater than or equal to the
     full size of the dataset is required.
 
+    For instance, if your dataset contains 10,000 elements but `buffer_size` is
+    set to 1,000, then `shuffle` will initially select a random element from
+    only the first 1,000 elements in the buffer. Once an element is selected,
+    its space in the buffer is replaced by the next (i.e. 1,001-st) element,
+    maintaining the 1,000 element buffer.
+
     Args:
       buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the
         number of elements from this dataset from which the new
@@ -1152,6 +1162,18 @@
   def filter(self, predicate):
     """Filters this dataset according to `predicate`.
 
+    ```python
+    d = tf.data.Dataset.from_tensor_slices([1, 2, 3])
+    
+    d = d.filter(lambda x: x < 3) # [1, 2]
+
+    # `tf.math.equal(x, y)` is required for equality comparison
+    def filter_fn(x):
+      return tf.math.equal(x, 1)
+
+    d = d.filter(filter_fn) # [1]
+    ```
+
     Args:
       predicate: A function mapping a nested structure of tensors (having shapes
         and types defined by `self.output_shapes` and `self.output_types`) to a
@@ -1630,6 +1652,43 @@
           ParallelMapDataset(
               self, map_func, num_parallel_calls, preserve_cardinality=False))
 
+  @deprecation.deprecated(None, "Use `tf.data.Dataset.map()")
+  def map_with_legacy_function(self, map_func, num_parallel_calls=None):
+    """Maps `map_func` across the elements of this dataset.
+
+    NOTE: This is an escape hatch for existing uses of `map` that do not work
+    with V2 functions. New uses are strongly discouraged and existing uses
+    should migrate to `map` as this method will be removed in V2.
+
+    Args:
+      map_func: A function mapping a nested structure of tensors (having shapes
+        and types defined by `self.output_shapes` and `self.output_types`) to
+        another nested structure of tensors.
+      num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
+        representing the number elements to process asynchronously in parallel.
+        If not specified, elements will be processed sequentially. If the value
+        `tf.data.experimental.AUTOTUNE` is used, then the number of parallel
+        calls is set dynamically based on available CPU.
+
+    Returns:
+      Dataset: A `Dataset`.
+    """
+    if num_parallel_calls is None:
+      return DatasetV1Adapter(
+          MapDataset(
+              self,
+              map_func,
+              preserve_cardinality=False,
+              use_legacy_function=True))
+    else:
+      return DatasetV1Adapter(
+          ParallelMapDataset(
+              self,
+              map_func,
+              num_parallel_calls,
+              preserve_cardinality=False,
+              use_legacy_function=True))
+
   @functools.wraps(DatasetV2.flat_map)
   def flat_map(self, map_func):
     return DatasetV1Adapter(super(DatasetV1, self).flat_map(map_func))
@@ -1647,6 +1706,25 @@
   def filter(self, predicate):
     return DatasetV1Adapter(super(DatasetV1, self).filter(predicate))
 
+  @deprecation.deprecated(None, "Use `tf.data.Dataset.filter()")
+  def filter_with_legacy_function(self, predicate):
+    """Filters this dataset according to `predicate`.
+
+    NOTE: This is an escape hatch for existing uses of `filter` that do not work
+    with V2 functions. New uses are strongly discouraged and existing uses
+    should migrate to `filter` as this method will be removed in V2.
+
+    Args:
+      predicate: A function mapping a nested structure of tensors (having shapes
+        and types defined by `self.output_shapes` and `self.output_types`) to a
+        scalar `tf.bool` tensor.
+
+    Returns:
+      Dataset: The `Dataset` containing the elements of this dataset for which
+          `predicate` is `True`.
+    """
+    return FilterDataset(self, predicate, use_legacy_function=True)
+
   @functools.wraps(DatasetV2.apply)
   def apply(self, transformation_func):
     return DatasetV1Adapter(super(DatasetV1, self).apply(transformation_func))
@@ -2058,9 +2136,9 @@
 
 
 class StructuredFunctionWrapper(object):
-  """A wrapper for `Defun` that supports structured arguments and return values.
-  """
+  """A function wrapper that supports structured arguments and return values."""
 
+  # pylint: disable=protected-access
   def __init__(self,
                func,
                transformation_name,
@@ -2070,6 +2148,7 @@
                input_types=None,
                input_structure=None,
                add_to_graph=True,
+               use_legacy_function=False,
                defun_kwargs=None):
     """Creates a new `StructuredFunctionWrapper` for the given function.
 
@@ -2091,9 +2170,12 @@
         defines the element types and structure for `func` arguments.
       add_to_graph: (Optional.) If `True`, the function will be added to the
         default graph.
+      use_legacy_function: (Optional.) A boolean that determines whether the
+        function be created using `tensorflow.python.eager.function.defun`
+        (default behavior) or `tensorflow.python.framework.function.Defun`
+        (legacy beheavior).
       defun_kwargs: (Optional.) A dictionary mapping string argument names to
-        values. If supplied, will be passed to `function.Defun()` as keyword
-        arguments.
+        values. If supplied, will be passed to `function` as keyword arguments.
 
     Raises:
       ValueError: If an invalid combination of `dataset`, `input_classes`,
@@ -2113,7 +2195,7 @@
           raise ValueError("Either `dataset`, `input_structure` or all of "
                            "`input_classes`, `input_shapes`, and `input_types` "
                            "must be specified.")
-        self._input_structure = dataset._element_structure  # pylint: disable=protected-access
+        self._input_structure = dataset._element_structure
     else:
       if not (dataset is None and input_classes is None and input_shapes is None
               and input_types is None):
@@ -2122,24 +2204,38 @@
                          "must be specified.")
       self._input_structure = input_structure
 
-    self._transformation_name = transformation_name
-    readable_transformation_name = transformation_name.replace(
-        ".", "_")[:-2] if len(transformation_name) > 2 else ""
-    self._func_name = "_".join([
-        readable_transformation_name,
-        function_utils.get_func_name(func),
-        str(ops.uid())
-    ])
-
     if defun_kwargs is None:
       defun_kwargs = {}
 
-    @function.Defun(
-        *self._input_structure._flat_types, func_name=self._func_name,  # pylint: disable=protected-access
-        **defun_kwargs)
-    def tf_data_structured_function_wrapper(*args):
+    readable_transformation_name = transformation_name.replace(
+        ".", "_")[:-2] if len(transformation_name) > 2 else ""
+
+    func_name = "_".join(
+        [readable_transformation_name,
+         function_utils.get_func_name(func)])
+
+    def _warn_if_collections(transformation_name, graph, initial_length):
+      """Prints a warning if the given graph uses common graph collections.
+
+      NOTE(mrry): Currently a warning is only generated for lookup tables. Any
+      variables created will be automatically hoisted out to the outermost scope
+      using `init_scope()`. Some collections (such as for control-flow contexts)
+      are benign and should not generate a warning.
+
+      Args:
+        transformation_name: A human-readable name for the transformation.
+        graph: The graph to check for collections.
+        initial_length: The initial length of the lookup table collection.
+      """
+      length = len(graph.get_collection(ops.GraphKeys.TABLE_INITIALIZERS))
+      if length != initial_length:
+        warnings.warn("Creating lookup tables inside a function passed to %s "
+                      "is not supported. Create each table outside the "
+                      "function, and capture it inside the function to use it."
+                      % transformation_name)
+
+    def _wrapper_helper(*args):
       """Wrapper for passing nested structures to and from tf.data functions."""
-      # pylint: disable=protected-access
       nested_args = self._input_structure._from_compatible_tensor_list(args)
       if not _should_unpack_args(nested_args):
         nested_args = (nested_args,)
@@ -2163,18 +2259,53 @@
       except (ValueError, TypeError):
         raise TypeError("Unsupported return value from function passed to "
                         "%s: %s." % (transformation_name, ret))
+      return ret
 
-      _warn_if_collections(transformation_name)
-      return self._output_structure._to_tensor_list(ret)
+    if use_legacy_function:
+      func_name = func_name + "_" + str(ops.uid())
 
-    self._function = tf_data_structured_function_wrapper
-    if add_to_graph:
-      self._function.add_to_graph(ops.get_default_graph())
+      @function.Defun(
+          *self._input_structure._flat_types,
+          func_name=func_name,
+          **defun_kwargs)
+      def wrapper_fn(*args):
+        ret = _wrapper_helper(*args)
+        _warn_if_collections(transformation_name, ops.get_default_graph(), 0)
+        return self._output_structure._to_tensor_list(ret)
+
+      self._function = wrapper_fn
+      if add_to_graph:
+        self._function.add_to_graph(ops.get_default_graph())
+      else:
+        # Use the private method that will execute `wrapper_fn` but delay adding
+        # it to the graph in case (e.g.) we need to rerun the function.
+        self._function._create_definition_if_needed()
     else:
-      # Use the private method that will execute
-      # `tf_data_structured_function_wrapper` but delay adding it to the graph
-      # in case (e.g.) we need to rerun the function.
-      self._function._create_definition_if_needed()  # pylint: disable=protected-access
+      defun_kwargs.update({"func_name": func_name})
+
+      @eager_function.defun_with_attributes(
+          input_signature=[
+              tensor_spec.TensorSpec(input_shape, input_type)  # pylint: disable=g-complex-comprehension
+              for input_shape, input_type in zip(
+                  self._input_structure._flat_shapes,
+                  self._input_structure._flat_types)
+          ],
+          attributes=defun_kwargs)
+      def wrapper_fn(*args):  # pylint: disable=missing-docstring
+        ret = _wrapper_helper(*args)
+        ret = self._output_structure._to_tensor_list(ret)
+        return [ops.convert_to_tensor(t) for t in ret]
+
+      initial_length = len(ops.get_default_graph().get_collection(
+          ops.GraphKeys.TABLE_INITIALIZERS))
+
+      self._function = wrapper_fn._get_concrete_function_internal()
+      if add_to_graph:
+        self._function.add_to_graph(ops.get_default_graph())
+
+      _warn_if_collections(transformation_name, self._function.graph,
+                           initial_length)
+  # pylint: enable=protected-access
 
   @property
   def output_structure(self):
@@ -2747,24 +2878,6 @@
   return type(args) is tuple  # pylint: disable=unidiomatic-typecheck
 
 
-def _warn_if_collections(transformation_name):
-  """Prints warning message if the current graph uses common graph collections.
-
-  NOTE(mrry): Currently a warning is only generated for lookup tables. Any
-  variables created will be automatically hoisted out to the outermost scope
-  using `init_scope()`. Some collections (such as for control-flow contexts)
-  are benign and should not generate a warning.
-
-  Args:
-    transformation_name: A human-readable name for the transformation.
-  """
-  if ops.get_default_graph().get_collection(ops.GraphKeys.TABLE_INITIALIZERS):
-    warnings.warn("Creating lookup tables inside a function passed to %s is not"
-                  " supported. Create each table outside the function, and "
-                  "capture it inside the function to use it."
-                  % transformation_name)
-
-
 class MapDataset(UnaryDataset):
   """A `Dataset` that maps a function over elements in its input."""
 
@@ -2772,13 +2885,17 @@
                input_dataset,
                map_func,
                use_inter_op_parallelism=True,
-               preserve_cardinality=False):
+               preserve_cardinality=False,
+               use_legacy_function=False):
     """See `Dataset.map()` for details."""
     self._input_dataset = input_dataset
     self._use_inter_op_parallelism = use_inter_op_parallelism
     self._preserve_cardinality = preserve_cardinality
     self._map_func = StructuredFunctionWrapper(
-        map_func, self._transformation_name(), dataset=input_dataset)
+        map_func,
+        self._transformation_name(),
+        dataset=input_dataset,
+        use_legacy_function=use_legacy_function)
     variant_tensor = gen_dataset_ops.map_dataset(
         input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._map_func.function.captured_inputs,
@@ -2807,12 +2924,16 @@
                map_func,
                num_parallel_calls,
                use_inter_op_parallelism=True,
-               preserve_cardinality=False):
+               preserve_cardinality=False,
+               use_legacy_function=False):
     """See `Dataset.map()` for details."""
     self._input_dataset = input_dataset
     self._use_inter_op_parallelism = use_inter_op_parallelism
     self._map_func = StructuredFunctionWrapper(
-        map_func, self._transformation_name(), dataset=input_dataset)
+        map_func,
+        self._transformation_name(),
+        dataset=input_dataset,
+        use_legacy_function=use_legacy_function)
     self._num_parallel_calls = ops.convert_to_tensor(
         num_parallel_calls, dtype=dtypes.int32, name="num_parallel_calls")
     self._preserve_cardinality = preserve_cardinality
@@ -2947,11 +3068,14 @@
 class FilterDataset(UnaryUnchangedStructureDataset):
   """A `Dataset` that filters its input according to a predicate function."""
 
-  def __init__(self, input_dataset, predicate):
+  def __init__(self, input_dataset, predicate, use_legacy_function=False):
     """See `Dataset.filter()` for details."""
     self._input_dataset = input_dataset
     wrapped_func = StructuredFunctionWrapper(
-        predicate, self._transformation_name(), dataset=input_dataset)
+        predicate,
+        self._transformation_name(),
+        dataset=input_dataset,
+        use_legacy_function=use_legacy_function)
     if not wrapped_func.output_structure.is_compatible_with(
         structure_lib.TensorStructure(dtypes.bool, [])):
       raise ValueError("`predicate` must return a scalar boolean tensor.")

diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py
index d6fb738..efa9020 100644
--- a/tensorflow/python/data/ops/iterator_ops.py
+++ b/tensorflow/python/data/ops/iterator_ops.py

@@ -31,8 +31,8 @@
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.training.saver import BaseSaverBuilder
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -68,7 +68,7 @@
 
 
 @tf_export(v1=["data.Iterator"])
-class Iterator(checkpointable.Checkpointable):
+class Iterator(trackable.Trackable):
   """Represents the state of iterating through a `Dataset`."""
 
   def __init__(self, iterator_resource, initializer, output_types,
@@ -491,7 +491,7 @@
   return "{}{}".format(prefix, uid)
 
 
-class EagerIterator(checkpointable.Checkpointable):
+class EagerIterator(trackable.Trackable):
   """An iterator producing tf.Tensor objects from a tf.data.Dataset."""
 
   def __init__(self, dataset):
@@ -641,7 +641,7 @@
     return {"ITERATOR": _saveable_factory}
 
 
-# TODO(b/71645805): Expose checkpointable stateful objects from dataset
+# TODO(b/71645805): Expose trackable stateful objects from dataset
 # attributes(potential).
 class _IteratorSaveable(BaseSaverBuilder.SaveableObject):
   """SaveableObject for saving/restoring iterator state."""

diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index dbe738b..3b1b214 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD

@@ -569,6 +569,7 @@
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
     ],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 py_test(
@@ -657,6 +658,7 @@
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
     ],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 py_test(
@@ -816,6 +818,7 @@
         "//tensorflow/python:variables",
     ],
     tags = ["notsan"],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 cuda_py_test(
@@ -833,6 +836,7 @@
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
     ],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 cuda_py_test(
@@ -850,6 +854,7 @@
         "//tensorflow/python:variables",
     ],
     tags = ["no_windows_gpu"],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 py_test(
@@ -975,6 +980,7 @@
         "//tensorflow/python:variables",
     ],
     tags = ["no_windows"],  # TODO: needs investigation on Windows
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 py_test(
@@ -1021,6 +1027,7 @@
         "no_windows",
         "notap",
     ],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 cuda_py_test(
@@ -1048,6 +1055,7 @@
         "notsan",
         "oss_serial",
     ],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 cuda_py_test(
@@ -1073,6 +1081,7 @@
         "optonly",  # Test flaky (b/80130873)
         "oss_serial",
     ],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 # TODO(cais): Run the test in OSS, perhaps through a sh_test.
@@ -1101,6 +1110,7 @@
         "no_windows",
         "notsan",
     ],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 py_test(

diff --git a/tensorflow/python/debug/lib/source_remote_test.py b/tensorflow/python/debug/lib/source_remote_test.py
index 29add42..dce400c 100644
--- a/tensorflow/python/debug/lib/source_remote_test.py
+++ b/tensorflow/python/debug/lib/source_remote_test.py

@@ -48,7 +48,8 @@
     test_util.TensorFlowTestCase.setUpClass()
     (cls._server_port, cls._debug_server_url, cls._server_dump_dir,
      cls._server_thread,
-     cls._server) = grpc_debug_test_server.start_server_on_separate_thread()
+     cls._server) = grpc_debug_test_server.start_server_on_separate_thread(
+         poll_server=True)
     cls._server_address = "localhost:%d" % cls._server_port
     (cls._server_port_2, cls._debug_server_url_2, cls._server_dump_dir_2,
      cls._server_thread_2,

diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD
index 9bcdcce..3df97e1 100644
--- a/tensorflow/python/distribute/BUILD
+++ b/tensorflow/python/distribute/BUILD

@@ -60,6 +60,7 @@
         "//tensorflow/python:platform",
         "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python/eager:context",
+        "//tensorflow/tools/docs:doc_controls",
         "@six_archive//:six",
     ],
 )
@@ -110,8 +111,10 @@
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":cross_device_ops",
         ":distribute_lib",
         ":mirrored_strategy",
+        ":one_device_strategy",
     ],
 )
 
@@ -457,7 +460,7 @@
         "//tensorflow/python:training",
         "//tensorflow/python:util",
         "//tensorflow/python/eager:context",
-        "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/tracking:base",
         "@six_archive//:six",
     ],
 )

diff --git a/tensorflow/python/distribute/__init__.py b/tensorflow/python/distribute/__init__.py
index 54e539e..87f746f 100644
--- a/tensorflow/python/distribute/__init__.py
+++ b/tensorflow/python/distribute/__init__.py

@@ -20,7 +20,9 @@
 
 # pylint: disable=unused-import
 from tensorflow.python.distribute import cluster_resolver
+from tensorflow.python.distribute import cross_device_ops
 from tensorflow.python.distribute import distribute_lib
 from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.distribute import mirrored_strategy
+from tensorflow.python.distribute import one_device_strategy
 # pylint: enable=unused-import

diff --git a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py
index 48bfc60..0cd8239 100644
--- a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py

@@ -77,17 +77,17 @@
   def task_type(self):
     if self._task_type is None:
       task_info = _get_value_in_tfconfig(_TASK_KEY, {})
-      return task_info['type'] if 'type' in task_info else None
+      return str(task_info['type']) if 'type' in task_info else None
     else:
-      return self._task_type
+      return str(self._task_type)
 
   @property
   def task_id(self):
     if self._task_type is None:
       task_info = _get_value_in_tfconfig(_TASK_KEY, {})
-      return task_info['index'] if 'index' in task_info else None
+      return int(task_info['index']) if 'index' in task_info else None
     else:
-      return self._task_id
+      return int(self._task_id)
 
   @task_type.setter
   def task_type(self, task_type):

diff --git a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py
index 97a5eb6..65e75d4 100644
--- a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py
+++ b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py

@@ -183,6 +183,40 @@
     self.assertEqual(1, cluster_resolver.task_id)
     self.assertEqual('test', cluster_resolver.rpc_layer)
 
+  def testTaskTypeCastToString(self):
+    os.environ['TF_CONFIG'] = """
+    {
+      "cluster": {
+        "123456": ["ps0:2222", "ps1:2222"],
+        "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]
+      },
+      "rpc_layer": "grpc",
+      "task": {
+        "type": 123456,
+        "index": 0
+      }
+    }
+    """
+    cluster_resolver = TFConfigClusterResolver()
+    self.assertEqual('123456', cluster_resolver.task_type)
+
+  def testTaskIndexCastToInteger(self):
+    os.environ['TF_CONFIG'] = """
+    {
+      "cluster": {
+        "ps": ["ps0:2222", "ps1:2222"],
+        "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]
+      },
+      "rpc_layer": "grpc",
+      "task": {
+        "type": "ps",
+        "index": "1"
+      }
+    }
+    """
+    cluster_resolver = TFConfigClusterResolver()
+    self.assertEqual(1, cluster_resolver.task_id)
+
   def testZeroItemsInClusterSpecMasterRead(self):
     os.environ['TF_CONFIG'] = """
     {}

diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py
index 9729302..afb7a35 100644
--- a/tensorflow/python/distribute/cross_device_ops.py
+++ b/tensorflow/python/distribute/cross_device_ops.py

@@ -32,6 +32,8 @@
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.util.tf_export import tf_export
+from tensorflow.tools.docs import doc_controls
 
 
 def check_destinations(destinations):
@@ -114,6 +116,9 @@
 def _normalize_value_destination_pairs(value_destination_pairs):
   """Converts each tensor into a PerReplica object in the input list."""
   result = []
+
+  value_destination_pairs = list(value_destination_pairs)
+
   if not isinstance(value_destination_pairs, (list, tuple)):
     raise ValueError("`value_destination_pairs` should be a list or tuple")
   for pair in value_destination_pairs:
@@ -215,6 +220,7 @@
   return reduced
 
 
+@tf_export("distribute.CrossDeviceOps")
 class CrossDeviceOps(object):
   """Base class for cross-device reduction and broadcasting algorithms."""
 
@@ -244,7 +250,8 @@
       per_replica_value = _make_tensor_into_per_replica(per_replica_value)
 
     validate_destinations(destinations)
-    return self._reduce(reduce_op, per_replica_value, destinations)
+    return self.reduce_implementation(reduce_op, per_replica_value,
+                                      destinations)
 
   def batch_reduce(self, reduce_op, value_destination_pairs):
     """Reduce PerReplica objects in a batch.
@@ -265,6 +272,8 @@
       ValueError: if `value_destination_pairs` is not a list or a tuple of
         tuples of PerReplica objects and destinations
     """
+    # TODO(yuefengz): if destinations are different, split into several
+    # `_batch_reduce` invocations.
     if not _validate_value_destination_pairs(value_destination_pairs):
       # If the first element of each pair is a tensor, we try to turn it into a
       # PerReplica object.
@@ -274,7 +283,7 @@
     for _, d in value_destination_pairs:
       validate_destinations(d)
 
-    return self._batch_reduce(reduce_op, value_destination_pairs)
+    return self.batch_reduce_implementation(reduce_op, value_destination_pairs)
 
   def broadcast(self, tensor, destinations):
     """Broadcast the `tensor` to destinations.
@@ -287,41 +296,93 @@
       a Mirrored object.
     """
     validate_destinations(destinations)
-    return self._broadcast(tensor, destinations)
+    return self.broadcast_implementation(tensor, destinations)
 
-  def _reduce(self, reduce_op, per_replica_value, destinations):
+  @doc_controls.for_subclass_implementers
+  def reduce_implementation(self, reduce_op, per_replica_value, destinations):
+    """The implementation of reduce of `per_replica_value` to `destinations`.
+
+    It runs the reduction operation defined by `reduce_op` and put the
+    result on `destinations`.
+
+    Args:
+      reduce_op: Indicates how per_replica_value will be reduced. Accepted
+        values are `tf.distribute.ReduceOp.SUM`, `tf.distribute.ReduceOp.MEAN`.
+      per_replica_value: a PerReplica object or a tensor with device set.
+      destinations: the reduction destinations.
+
+    Returns:
+      a Mirrored object.
+
+    Raises:
+      ValueError: if per_replica_value can't be converted to a PerReplica
+        object.
+    """
     raise NotImplementedError(
         "_reduce method must be implemented in descendants.")
 
-  def _batch_reduce(self, reduce_op, value_destination_pairs):
+  @doc_controls.for_subclass_implementers
+  def batch_reduce_implementation(self, reduce_op, value_destination_pairs):
+    """Implementation of reduce PerReplica objects in a batch.
+
+    Reduce each first element in `value_destination_pairs` to each second
+    element which indicates the destinations.
+
+    Args:
+      reduce_op: Indicates how per_replica_value will be reduced. Accepted
+        values are `tf.distribute.ReduceOp.SUM`, `tf.distribute.ReduceOp.MEAN`.
+      value_destination_pairs: a list or a tuple of tuples of PerReplica objects
+        (or tensors with device set if there is one device) and destinations.
+
+    Returns:
+      a list of Mirrored objects.
+
+    Raises:
+      ValueError: if `value_destination_pairs` is not a list or a tuple of
+        tuples of PerReplica objects and destinations
+    """
     raise NotImplementedError(
         "_batch_reduce method must be implemented in descendants.")
 
-  def _broadcast(self, tensor, destinations):
+  @doc_controls.for_subclass_implementers
+  def broadcast_implementation(self, tensor, destinations):
+    """Implementation of broadcast the `tensor` to destinations.
+
+    Args:
+      tensor: the tensor to broadcast.
+      destinations: the broadcast destinations.
+
+    Returns:
+      a Mirrored object.
+    """
     return simple_broadcast(tensor, destinations, always_mirrored=True)
 
 
-class ReductionToOneDeviceCrossDeviceOps(CrossDeviceOps):
+@tf_export("distribute.ReductionToOneDevice")
+class ReductionToOneDevice(CrossDeviceOps):
   """Always do reduction to one device first and then do broadcasting.
 
     Batch reduction is done by reduction on each element one by one.
   """
 
-  def __init__(self, reduce_to_device=None, accumulation_fn=math_ops.add_n):
+  def __init__(self, reduce_to_device=None, accumulation_fn=None):
     """Constructor.
 
     Args:
       reduce_to_device: the intermediate device to reduce to. If None, reduce
         to the first device in `destinations` of the reduce() method.
-      accumulation_fn: a function that does accumulation.
+      accumulation_fn: a function that does accumulation.  If None, then
+        `tf.math.add_n` is used.
     """
     self.reduce_to_device = reduce_to_device
-    self.accumulation_fn = accumulation_fn
-    super(ReductionToOneDeviceCrossDeviceOps, self).__init__()
+    self.accumulation_fn = accumulation_fn or math_ops.add_n
+    super(ReductionToOneDevice, self).__init__()
 
-  def _reduce(self, reduce_op, per_replica_value, destinations):
-    assert check_destinations(destinations)
-    devices = get_devices_from(destinations)
+  def reduce_implementation(self, reduce_op, per_replica_value, destinations):
+    if check_destinations(destinations):
+      devices = get_devices_from(destinations)
+    else:
+      devices = get_devices_from(per_replica_value)
     reduce_to_device = self.reduce_to_device or devices[0]
     logging.log_first_n(
         logging.INFO,
@@ -330,9 +391,9 @@
                              self.accumulation_fn, reduce_op)
     return self.broadcast(reduced, destinations)
 
-  def _batch_reduce(self, reduce_op, value_destination_pairs):
+  def batch_reduce_implementation(self, reduce_op, value_destination_pairs):
     return [
-        self._reduce(reduce_op, t, destinations=v)
+        self.reduce_implementation(reduce_op, t, destinations=v)
         for t, v in value_destination_pairs
     ]
 
@@ -399,11 +460,11 @@
   return [value_lib.Mirrored(device_map, v, logical_device) for v in index]
 
 
-class ConcatAndSplitPacker(object):
+class _ConcatAndSplitPacker(object):
   """Concatenate and split tensors for reduction."""
 
   def __init__(self, num_packs=1):
-    """Initialize the ConcatAndSplitPacker object.
+    """Initialize the _ConcatAndSplitPacker object.
 
     Args:
       num_packs: specifies the number of split packs that will be
@@ -503,13 +564,13 @@
     return aggregated_device_grads
 
 
-class AggregateSmallTensorPacker(object):
+class _AggregateSmallTensorPacker(object):
   """Concatenate small gradient tensors together for reduction."""
 
   def __init__(self,
                agg_small_grads_max_bytes=1048576,
                agg_small_grads_max_group=16):
-    """Initialize the AggregateSmallTensorPacker object.
+    """Initialize the _AggregateSmallTensorPacker object.
 
     Args:
       agg_small_grads_max_bytes: largest tensor eligible for aggregation,
@@ -549,11 +610,11 @@
                   agg_small_grads_max_group=0):
   """Pack tensors if specified."""
   if num_packs > 0:
-    tensor_packer = ConcatAndSplitPacker(num_packs)
+    tensor_packer = _ConcatAndSplitPacker(num_packs)
     device_grad_packs = tensor_packer.pack(device_grads)
   elif agg_small_grads_max_bytes > 0 and agg_small_grads_max_group > 0:
-    tensor_packer = AggregateSmallTensorPacker(agg_small_grads_max_bytes,
-                                               agg_small_grads_max_group)
+    tensor_packer = _AggregateSmallTensorPacker(agg_small_grads_max_bytes,
+                                                agg_small_grads_max_group)
     device_grad_packs = tensor_packer.pack(device_grads)
   else:
     tensor_packer = None
@@ -569,7 +630,7 @@
 
 
 class AllReduceCrossDeviceOps(CrossDeviceOps):
-  """Reduction using all reduce."""
+  """Reduction using all-reduce."""
 
   def __init__(self,
                all_reduce_alg="nccl",
@@ -594,37 +655,22 @@
       num_packs: see above.
       agg_small_grads_max_bytes: see above.
       agg_small_grads_max_group: see above.
-        tensors.
     """
     self._all_reduce_alg = all_reduce_alg
     self._num_packs = num_packs
     self._agg_small_grads_max_bytes = agg_small_grads_max_bytes
     self._agg_small_grads_max_group = agg_small_grads_max_group
+    self._simple_cross_replica_ops = ReductionToOneDevice()
     super(AllReduceCrossDeviceOps, self).__init__()
 
-  def _reduce(self, reduce_op, per_replica_value, destinations):
-    contains_indexed_slices = cross_device_utils.contains_indexed_slices(
-        per_replica_value)
-    if (_devices_match(per_replica_value, destinations)
-        and not context.executing_eagerly()
-        and not contains_indexed_slices):
+  def reduce_implementation(self, reduce_op, per_replica_value, destinations):
+    if _devices_match(per_replica_value, destinations):
       return self._batch_all_reduce(reduce_op, [per_replica_value])[0]
     else:
-      if contains_indexed_slices:
-        logging.log_first_n(
-            logging.WARN,
-            "Efficient allreduce is not supported for IndexedSlices.", 10)
+      return self._simple_cross_replica_ops.reduce(reduce_op, per_replica_value,
+                                                   destinations)
 
-      if check_destinations(destinations):
-        devices = get_devices_from(destinations)
-      else:
-        devices = get_devices_from(per_replica_value)
-      reduce_to_device = devices[0]
-      reduced = _simple_reduce(per_replica_value, reduce_to_device,
-                               math_ops.add_n, reduce_op)
-      return self.broadcast(reduced, destinations)
-
-  def _batch_reduce(self, reduce_op, value_destination_pairs):
+  def batch_reduce_implementation(self, reduce_op, value_destination_pairs):
     all_devices_match = _all_devices_match(value_destination_pairs)
     contains_indexed_slices = cross_device_utils.contains_indexed_slices(
         value_destination_pairs)
@@ -640,20 +686,37 @@
                             10)
 
       return [
-          self._reduce(reduce_op, t, destinations=v)
+          self.reduce_implementation(reduce_op, t, destinations=v)
           for t, v in value_destination_pairs
       ]
 
   def _batch_all_reduce(self, reduce_op, per_replica_values):
-    """All reduce algorithm in a batch."""
+    """All-reduce algorithm in a batch."""
+    dense_values, dense_indices, sparse_values, sparse_indices = (
+        cross_device_utils.split_by_sparsity(per_replica_values))
+    if dense_values:
+      dense_results = self._do_batch_all_reduce(reduce_op, dense_values)
+    else:
+      dense_results = []
+    if sparse_values:
+      sparse_results = self._do_batch_all_reduce_sparse(reduce_op,
+                                                        sparse_values)
+    else:
+      sparse_results = []
+    return cross_device_utils.stitch_values(((dense_results, dense_indices),
+                                             (sparse_results, sparse_indices)))
+
+  def _do_batch_all_reduce(self, reduce_op, dense_values):
+    """Run batch all-reduces."""
     logging.log_first_n(
         logging.INFO, "batch_all_reduce invoked for batches size = %d with "
         "algorithm = %s, num_packs = %d, agg_small_grads_max_bytes = %d and "
         "agg_small_grads_max_group = %d" %
-        (len(per_replica_values), self._all_reduce_alg, self._num_packs,
+        (len(dense_values), self._all_reduce_alg, self._num_packs,
          self._agg_small_grads_max_bytes, self._agg_small_grads_max_group), 10)
-    destinations = per_replica_values[0].devices
-    grouped = _group_value_by_device(per_replica_values)
+
+    destinations = dense_values[0].devices
+    grouped = _group_value_by_device(dense_values)
 
     device_grad_packs, tensor_packer = _pack_tensors(
         grouped, self._num_packs, self._agg_small_grads_max_bytes,
@@ -674,7 +737,18 @@
               destinations, device_grad_packs))
 
     reduced = _unpack_tensors(reduced, tensor_packer)
-    return _ungroup_and_make_mirrored(reduced, per_replica_values[0], reduce_op)
+    return _ungroup_and_make_mirrored(reduced, dense_values[0], reduce_op)
+
+  def _do_batch_all_reduce_sparse(self, reduce_op, sparse_values):
+    """Run batch all-reduce for sparse values."""
+    logging.log_first_n(
+        logging.WARN,
+        "Efficient allreduce is not supported for %d IndexedSlices" %
+        len(sparse_values), 10)
+    # Use `sparse_values` as destinations to do all-reduces. It is effectively
+    # an allgather under the hood but not an efficient one.
+    return self._simple_cross_replica_ops.batch_reduce(
+        reduce_op, zip(sparse_values, sparse_values))
 
 
 # For compatibility with code using the old name of `AllReduceCrossDeviceOps`.
@@ -685,6 +759,49 @@
                                             "alg shards limit")
 
 
+@tf_export("distribute.NcclAllReduce")
+class NcclAllReduce(AllReduceCrossDeviceOps):
+  """Reduction using NCCL all-reduce."""
+
+  def __init__(self, num_packs=1):
+    """NCCL all-reduce implementation of CrossDeviceOps.
+
+    Before performing all-reduce, tensors will be repacked or aggregated for
+    more efficient cross-device transportation.
+
+    Args:
+      num_packs: values will be packed in this many splits.  `num_packs` should
+        be greater than 0.
+    """
+    assert num_packs > 0, (
+        "NCLL all-reduce requires num_packs > 0, but {} is specified".format(
+            num_packs))
+    super(NcclAllReduce, self).__init__(
+        all_reduce_alg="nccl", num_packs=num_packs)
+
+
+@tf_export("distribute.HierarchicalCopyAllReduce")
+class HierarchicalCopyAllReduce(AllReduceCrossDeviceOps):
+  """Reduction using hierarchical copy all-reduce.
+
+  This is a good reduction for configurations like Nvidia DGX-1.
+  """
+
+  def __init__(self, num_packs=1):
+    """Hierarchical copy all-reduce implementation of CrossDeviceOps.
+
+    Before performing all-reduce, tensors will be repacked or aggregated for
+    more efficient cross-device transportation.
+
+    Args:
+      num_packs: values will be packed in this many splits.  `num_packs` should
+        be greater than 0.
+    """
+    super(HierarchicalCopyAllReduce, self).__init__(
+        all_reduce_alg="hierarchical_copy",
+        num_packs=num_packs)
+
+
 class MultiWorkerAllReduce(AllReduceCrossDeviceOps):
   """All-reduce algorithms for distributed TensorFlow."""
 
@@ -758,7 +875,7 @@
       ]
 
   def _batch_all_reduce(self, reduce_op, per_replica_values):
-    """All reduce algorithm in a batch."""
+    """All-reduce algorithm in a batch."""
     logging.log_first_n(
         logging.INFO,
         "distributed batch_all_reduce invoked for batches size = %d with "
@@ -769,7 +886,7 @@
 
     device_grads = _group_value_by_device(per_replica_values)
 
-    # The all reduce library requires fully defined shapes.
+    # The all-reduce library requires fully defined shapes.
     # TODO(yuefengz): when tensor sharding is not needed, static shapes are not
     # required as well.
     for device_grad in device_grads:
@@ -838,7 +955,7 @@
     super(CollectiveAllReduce, self).__init__()
 
   # TODO(yuefengz, tucker): is indexed slices supported by collective ops?
-  def _reduce(self, reduce_op, per_replica_value, destinations):
+  def reduce_implementation(self, reduce_op, per_replica_value, destinations):
     if cross_device_utils.contains_indexed_slices(per_replica_value):
       raise ValueError(
           "`IndexSlices` is not supported for Collective All-Reduce.")
@@ -861,7 +978,7 @@
 
     return value_lib.Mirrored(device_map, index, logical_device)
 
-  def _batch_reduce(self, reduce_op, value_destination_pairs):
+  def batch_reduce_implementation(self, reduce_op, value_destination_pairs):
     if cross_device_utils.contains_indexed_slices(value_destination_pairs):
       raise ValueError(
           "`IndexSlices` is not supported for Collective All-Reduce.")
@@ -877,7 +994,7 @@
             "destinations are different.", 10)
 
       return [
-          self._reduce(reduce_op, t, destinations=v)
+          self.reduce_implementation(reduce_op, t, destinations=v)
           for t, v in value_destination_pairs
       ]
 
@@ -930,7 +1047,7 @@
 def _has_dgx1_like_links(gpu_links):
   if not gpu_links:
     return False
-  # TODO(yuefengz): figure out the right topology for hierarchial copy if
+  # TODO(yuefengz): figure out the right topology for hierarchical copy if
   # number of gpus are less than 8.
   if len(gpu_links) < 8:
     return False
@@ -943,10 +1060,9 @@
 
 def _choose_all_reduce_algorithm(device_links):
   if _has_dgx1_like_links(device_links):
-    return AllReduceCrossDeviceOps(
-        "hierarchical_copy", num_packs=len(device_links))
+    return HierarchicalCopyAllReduce(num_packs=len(device_links))
   else:
-    return AllReduceCrossDeviceOps("nccl", num_packs=1)
+    return NcclAllReduce(num_packs=1)
 
 
 def choose_the_best(devices, session_config=None):
@@ -973,12 +1089,12 @@
   if len(using_devices) != len(requested_devices):
     logging.warning("Not all devices in `tf.distribute.Strategy` are visible "
                     "to TensorFlow.")
-    return ReductionToOneDeviceCrossDeviceOps()
+    return ReductionToOneDevice()
 
   if any(d.device_type.lower() != "gpu" for d in using_devices):
     logging.warning("Not all devices in `tf.distribute.Strategy` are visible "
                     "to TensorFlow.")
-    return ReductionToOneDeviceCrossDeviceOps()
+    return ReductionToOneDevice()
 
   device_links = [[] for _ in range(len(using_devices))]
   for i, device in enumerate(using_devices):

diff --git a/tensorflow/python/distribute/cross_device_utils.py b/tensorflow/python/distribute/cross_device_utils.py
index cb5417e..a28628d 100644
--- a/tensorflow/python/distribute/cross_device_utils.py
+++ b/tensorflow/python/distribute/cross_device_utils.py

@@ -681,3 +681,58 @@
     return contains_indexed_slices(value.values)
   else:
     return False
+
+
+def is_indexed_slices(value):
+  if isinstance(value, ops.IndexedSlices):
+    return True
+  assert isinstance(value, value_lib.DistributedValues)
+  return all([isinstance(v, ops.IndexedSlices) for v in value.values])
+
+
+def split_by_sparsity(values):
+  """Split values into dense and sparse values.
+
+  Args:
+    values: a list of tensors or `PerReplica`s.
+
+  Returns:
+    Four lists:
+      a list of dense values, a list of their indices in `values` and
+      a list of sparse values, a list of their indices in `values`.
+  """
+  dense_values = []
+  dense_indices = []
+  sparse_values = []
+  sparse_indices = []
+  for i, v in enumerate(values):
+    if is_indexed_slices(v):
+      sparse_values.append(v)
+      sparse_indices.append(i)
+    else:
+      dense_values.append(v)
+      dense_indices.append(i)
+  return dense_values, dense_indices, sparse_values, sparse_indices
+
+
+def stitch_values(values_and_indices_list):
+  """Stitch values together according to their indices.
+
+  Args:
+    values_and_indices_list: a list of tuples of values and indices indicating
+      the values and postions in the returned list.
+
+  Returns:
+    a stitched list of values.
+  """
+  length = 0
+  for values_and_indices in values_and_indices_list:
+    length += len(values_and_indices[0])
+
+  result = [None] * length
+  for values_and_indices in values_and_indices_list:
+    if values_and_indices and values_and_indices[0]:
+      for v, i in zip(*values_and_indices):
+        assert result[i] is None
+        result[i] = v
+  return result

diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py
index 6b13db3..b6adad0 100644
--- a/tensorflow/python/distribute/input_lib.py
+++ b/tensorflow/python/distribute/input_lib.py

@@ -246,7 +246,7 @@
     """
     assert isinstance(input_workers, InputWorkers)
     if split_batch_by:
-      dataset = _split_dataset_batch(dataset, split_batch_by)
+      dataset = batching._RebatchDataset(dataset, split_batch_by)  # pylint: disable=protected-access
 
     iterators = []
     for i, worker in enumerate(input_workers.worker_devices):
@@ -255,6 +255,7 @@
         cloned_dataset = dataset
         if not context.executing_eagerly():
           cloned_dataset = input_ops._clone_dataset(dataset)  # pylint: disable=protected-access
+          cloned_dataset = cloned_dataset.with_options(dataset.options())
         iterator = _SingleWorkerDatasetIterator(cloned_dataset, worker,
                                                 worker_devices)
         iterators.append(iterator)
@@ -352,7 +353,8 @@
 
   if isinstance(d, (dataset_ops.BatchDataset, batching._MapAndBatchDataset)):
     return d
-  elif isinstance(d, dataset_ops.PrefetchDataset):
+  elif isinstance(d, (dataset_ops.PrefetchDataset,
+                      dataset_ops._OptionsDataset)):
     return _get_batched_dataset(d._input_dataset)
 
   raise ValueError(
@@ -405,24 +407,6 @@
   return batch_size, drop_remainder, prefetch_buffer
 
 
-def _split_dataset_batch(dataset, split_batch_by):
-  """Divide a batch-ed dataset's batches into smaller batches."""
-  batch_size, drop_remainder, prefetch_buffer = (
-      _get_dataset_attributes(dataset))
-
-  if batch_size % split_batch_by:
-    raise ValueError(
-        "Batch size %s cannot be sharded evenly across replicas %s" % (
-            batch_size, split_batch_by))
-  new_batch_size = batch_size // split_batch_by
-
-  dataset = dataset.apply(batching.unbatch())
-  dataset = dataset.batch(new_batch_size, drop_remainder=drop_remainder)
-  if prefetch_buffer is not None:
-    dataset = dataset.prefetch(prefetch_buffer)
-  return dataset
-
-
 class MultiStepContext(object):
   """A context object that can be used to capture things when running steps.
 

diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index df96f83..96c7191 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py

@@ -454,7 +454,7 @@
     assert devices, "Must specify at least one device."
     devices = tuple(device_util.resolve(d) for d in devices)
     assert len(set(devices)) == len(devices), (
-        "No duplicates allowed in `devices` argument: %s" % devices)
+        "No duplicates allowed in `devices` argument: %s" % (devices,))
     # TODO(josh11b): Require at least 2 devices?
     self._device_map = values.ReplicaDeviceMap(devices)
     self._input_workers = input_lib.InputWorkers(self._device_map)

diff --git a/tensorflow/python/distribute/one_device_strategy.py b/tensorflow/python/distribute/one_device_strategy.py
index ff9f616..9a255b2 100644
--- a/tensorflow/python/distribute/one_device_strategy.py
+++ b/tensorflow/python/distribute/one_device_strategy.py

@@ -29,11 +29,13 @@
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.util import nest
+from tensorflow.python.util.tf_export import tf_export
 
 
 # TODO(josh11b): Replace asserts in this file with if ...: raise ...
 
 
+@tf_export("distribute.OneDeviceStrategy")
 class OneDeviceStrategy(distribute_lib.DistributionStrategy):
   """A distribution strategy for running on a single device."""
   # TODO(josh11b): Do we wrap values in types to generate errors if you are

diff --git a/tensorflow/python/distribute/parameter_server_strategy.py b/tensorflow/python/distribute/parameter_server_strategy.py
index 269c741..7490eec 100644
--- a/tensorflow/python/distribute/parameter_server_strategy.py
+++ b/tensorflow/python/distribute/parameter_server_strategy.py

@@ -101,8 +101,7 @@
 
     # We typically don't need to do all-reduce in this strategy.
     self._cross_device_ops = (
-        cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps(
-            reduce_to_device=_LOCAL_CPU))
+        cross_device_ops_lib.ReductionToOneDevice(reduce_to_device=_LOCAL_CPU))
 
   def _initialize_strategy(self, cluster_resolver):
     if cluster_resolver.cluster_spec().as_dict():
@@ -318,7 +317,8 @@
           if kwargs.get("trainable", True):
             collections.append(ops.GraphKeys.TRAINABLE_VARIABLES)
             l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES)
-            l.remove(v)
+            if v in l:
+              l.remove(v)
           g.add_to_collections(collections, wrapped)
         elif ops.GraphKeys.GLOBAL_STEP in collections:
           ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, wrapped)

diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py
index 585ae1b..c2e3b40 100644
--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py

@@ -37,7 +37,7 @@
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.training import saver
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 
 
@@ -630,7 +630,7 @@
 
 
 class MirroredVariable(DistributedVariable, Mirrored,
-                       checkpointable.Checkpointable):
+                       trackable.Trackable):
   """Holds a map from device to variables whose values are kept in sync."""
 
   def __init__(
@@ -710,7 +710,7 @@
     return self.get()._as_graph_element()
 
   def _gather_saveables_for_checkpoint(self):
-    """Overrides CheckpointableBase method.
+    """Overrides Trackable method.
 
     This allows both name-based and object-based save and restore of
     MirroredVariables.
@@ -720,7 +720,7 @@
     """
     def _saveable_factory(name=self._common_name):
       return _MirroredSaveable(self, self.primary, name)
-    return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory}
+    return {trackable.VARIABLE_VALUE_KEY: _saveable_factory}
 
 
 # Register a conversion function which reads the value of the variable,
@@ -752,7 +752,7 @@
 # tpu.replicate() because it assumes that you're in a device context where you
 # can operate on a single version of the variable, but a tpu.replicate()
 # operates on all variables and is replicated during a rewrite pass.
-class TPUMirroredVariable(checkpointable.Checkpointable):
+class TPUMirroredVariable(trackable.Trackable):
   """Holds a map from device to TPU variables whose values are kept in sync."""
 
   def __init__(
@@ -1085,7 +1085,7 @@
     return self._read_variable_op()
 
   def _gather_saveables_for_checkpoint(self):
-    """Overrides CheckpointableBase method.
+    """Overrides Trackable method.
 
     This allows both name-based and object-based save and restore of
     MirroredVariables.
@@ -1095,7 +1095,7 @@
     """
     def _saveable_factory(name=self._common_name):
       return _MirroredSaveable(self, self.primary, name)
-    return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory}
+    return {trackable.VARIABLE_VALUE_KEY: _saveable_factory}
 
   def _should_act_as_resource_variable(self):
     """Pass resource_variable_ops.is_resource_variable check."""
@@ -1205,7 +1205,7 @@
 
 
 class ReplicaLocalVariable(DistributedVariable, PerReplica,
-                           checkpointable.Checkpointable):
+                           trackable.Trackable):
   """Holds a map from device to variables whose values are reduced on save."""
 
   def __init__(
@@ -1256,7 +1256,7 @@
     return self.get()._as_graph_element()
 
   def _gather_saveables_for_checkpoint(self):
-    """Overrides CheckpointableBase method.
+    """Overrides Trackable method.
 
     This allows both name-based and object-based save and restore of
     ReplicaLocalVariables.
@@ -1266,7 +1266,7 @@
     """
     def _saveable_factory(name=self._common_name):
       return _ReplicaLocalSaveable(self, name)
-    return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory}
+    return {trackable.VARIABLE_VALUE_KEY: _saveable_factory}
 
 
 # Register a conversion function for ReplicaLocalVariable which allows as_ref to
@@ -1436,7 +1436,7 @@
 
 
 # TODO(josh11b): Descend from Variable.
-class AggregatingVariable(checkpointable.Checkpointable):
+class AggregatingVariable(trackable.Trackable):
   """A wrapper around a variable that aggregates updates across replicas."""
 
   def __init__(self, strategy, v, aggregation):
@@ -1514,7 +1514,7 @@
 
   # TODO(josh11b): Test saving & restoring.
   def _gather_saveables_for_checkpoint(self):
-    return {checkpointable.VARIABLE_VALUE_KEY: self._v}
+    return {trackable.VARIABLE_VALUE_KEY: self._v}
 
   # pylint: disable=multiple-statements
   def __add__(self, o): return self._v + o

diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index aba342b..abadce4 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD

@@ -110,7 +110,7 @@
         ":profiler",
         ":test",
         "//tensorflow/python:constant_op",
-        "//tensorflow/contrib/tpu/profiler:trace_events_proto_py",
+        "//tensorflow/core/profiler:protos_all_py",
     ],
 )
 
@@ -151,6 +151,7 @@
         "//tensorflow/python:nn_grad",
         "//tensorflow/python:training",
     ],
+    tags = ["no_rocm"],
 )
 
 cuda_py_test(
@@ -384,6 +385,7 @@
         ":context",
         ":function",
         ":test",
+        ":profiler",
         "//third_party/py/numpy",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:pywrap_tensorflow",
@@ -498,7 +500,7 @@
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:while_v2",  # TODO(b/118513001): Imported via control_flow_ops; remove.
-        "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/tracking:base",
     ],
 )
 
@@ -529,6 +531,7 @@
     srcs = ["def_function_xla_test.py"],
     tags = [
         "no_pip",
+        "no_rocm",
         "nomac",
     ],
     deps = [
@@ -552,7 +555,7 @@
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:template",
         "//tensorflow/python:variable_scope",
-        "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/tracking:base",
     ],
 )
 

diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py
index 16b52c9..f44185d 100644
--- a/tensorflow/python/eager/benchmarks_test.py
+++ b/tensorflow/python/eager/benchmarks_test.py

@@ -38,6 +38,7 @@
 from tensorflow.python.eager import context
 from tensorflow.python.eager import core
 from tensorflow.python.eager import function
+from tensorflow.python.eager import profiler
 from tensorflow.python.eager import test
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -815,10 +816,26 @@
       model = make_keras_model(initializer="glorot_uniform")
       self._benchmark_keras_model_fit(model)
 
+  def benchmark_keras_model_functional_fit_graph_mode_with_profiler(self):
+    profiler.start()
+    with context.graph_mode():
+      model = make_keras_model(initializer="glorot_uniform")
+      self._benchmark_keras_model_fit(model)
+    result = profiler.stop()
+    assert result is not None
+
   def benchmark_keras_model_functional_fit_run_model_eagerly(self):
     model = make_keras_model(initializer="glorot_uniform")
     self._benchmark_keras_model_fit(model, run_eagerly=True)
 
+  def benchmark_keras_model_functional_fit_run_model_eagerly_with_profiler(
+      self):
+    profiler.start()
+    model = make_keras_model(initializer="glorot_uniform")
+    self._benchmark_keras_model_fit(model, run_eagerly=True)
+    result = profiler.stop()
+    assert result is not None
+
   def benchmark_keras_model_sequential_fit(self):
     model = make_sequential_keras_model(initializer="glorot_uniform")
     self._benchmark_keras_model_fit(model)

diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index fd9be06..15ab3a3 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py

@@ -44,6 +44,7 @@
 # Note that we do not protect this with a lock and instead rely on python's GIL
 # and the idempotent nature of writes to provide thread safety.
 _device_parsing_cache = {}
+_starting_device_spec = pydev.DeviceSpec.from_string("")
 
 _MAXINT32 = 2**31 - 1
 
@@ -135,26 +136,52 @@
 
   def __init__(self, config=None):
     super(_EagerContext, self).__init__()
-    self.device_spec = pydev.DeviceSpec.from_string("")
-    self.device_name = self.device_spec.to_string()
+    self.device_spec = _starting_device_spec
+    self.device_name = ""
     self.mode = default_execution_mode
     self.is_eager = default_execution_mode == EAGER_MODE
     self.scope_name = ""
-    self.recording_summaries = False
     self.summary_writer_resource = None
+    self.recording_summaries = None
     self.scalar_cache = {}
-    self.ones_rank_cache = _EagerTensorCache()
-    self.zeros_cache = _EagerTensorCache()
+    self._ones_rank_cache = None
+    self._zeros_cache = None
     self.execution_mode = None
 
     # Default rewriter config corresponds to turning all default grappler
     # optimizations on.
-    base_config = config_pb2.ConfigProto()
+    self._config = config
 
-    if config is not None:
-      base_config.MergeFrom(config)
+    self._function_call_options = None
 
-    self.function_call_options = FunctionCallOptions(config_proto=base_config)
+  @property
+  def function_call_options(self):
+    if self._function_call_options is None:
+      base_config = config_pb2.ConfigProto()
+      if self._config is not None:
+        base_config.MergeFrom(self._config)
+      self._config = None
+      self._function_call_options = FunctionCallOptions(
+          config_proto=base_config)
+
+    return self._function_call_options
+
+  @function_call_options.setter
+  def function_call_options(self, function_call_options):
+    self._function_call_options = function_call_options
+    self._config = None
+
+  @property
+  def ones_rank_cache(self):
+    if not self._ones_rank_cache:
+      self._ones_rank_cache = _EagerTensorCache()
+    return self._ones_rank_cache
+
+  @property
+  def zeros_cache(self):
+    if not self._zeros_cache:
+      self._zeros_cache = _EagerTensorCache()
+    return self._zeros_cache
 
 
 ContextSwitch = collections.namedtuple(
@@ -494,6 +521,16 @@
     self._eager_context.summary_writer_resource = resource
 
   @property
+  def recording_summaries(self):
+    """Returns summary recording condition."""
+    return self._eager_context.recording_summaries
+
+  @recording_summaries.setter
+  def recording_summaries(self, condition):
+    """Sets summary recording condition."""
+    self._eager_context.recording_summaries = condition
+
+  @property
   def device_name(self):
     """Returns the device name for the current thread."""
     return self._eager_context.device_name
@@ -688,14 +725,6 @@
     """Get the list of post-execution callbacks added to the context."""
     return self._post_execution_callbacks
 
-  def enable_run_metadata(self):
-    """Enables tracing of op execution via RunMetadata.
-
-    To retrieve the accumulated metadata call context.export_run_metadata()
-    and to stop tracing call context.disable_run_metadata().
-    """
-    pywrap_tensorflow.TFE_ContextEnableRunMetadata(self._handle)
-
   @tf_contextlib.contextmanager
   def device_policy(self, policy):
     handle = self._handle
@@ -708,12 +737,34 @@
       pywrap_tensorflow.TFE_ContextSetThreadLocalDevicePlacementPolicy(
           handle, old)
 
+  def enable_run_metadata(self):
+    """Enables tracing of op execution via RunMetadata.
+
+    To retrieve the accumulated metadata call context.export_run_metadata()
+    and to stop tracing call context.disable_run_metadata().
+    """
+    pywrap_tensorflow.TFE_ContextEnableRunMetadata(self._handle)
+
   def disable_run_metadata(self):
     """Disables tracing of op execution via RunMetadata."""
     if not self._context_handle:
       return
     pywrap_tensorflow.TFE_ContextDisableRunMetadata(self._context_handle)
 
+  def enable_graph_collection(self):
+    """Enables graph collection of executed functions.
+
+    To retrieve the accumulated graphs call context.export_run_metadata()
+    and to stop collecting graphs call context.disable_graph_collection().
+    """
+    pywrap_tensorflow.TFE_ContextEnableGraphCollection(self._handle)
+
+  def disable_graph_collection(self):
+    """Disables graph collections of executed functions."""
+    if not self._context_handle:
+      return
+    pywrap_tensorflow.TFE_ContextDisableGraphCollection(self._context_handle)
+
   def export_run_metadata(self):
     """Returns a RunMetadata proto with accumulated information.
 
@@ -941,6 +992,20 @@
   context().disable_run_metadata()
 
 
+def enable_graph_collection():
+  """Enables tracing of op execution via RunMetadata.
+
+  To retrieve the accumulated metadata call context.export_run_metadata()
+  and to stop tracing call context.disable_run_metadata().
+  """
+  context().enable_graph_collection()
+
+
+def disable_graph_collection():
+  """Disables tracing of op execution via RunMetadata."""
+  context().disable_graph_collection()
+
+
 def export_run_metadata():
   """Returns a RunMetadata proto with accumulated information.
 

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 59c6608..23067cf 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py

@@ -31,7 +31,7 @@
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util.tf_export import tf_export
@@ -96,7 +96,7 @@
         shape and `validate_shape` is `True`.
       RuntimeError: If called outside of a function definition.
     """
-    if context.executing_eagerly():
+    if not ops.inside_function():
       # If we've been init_scope()d out of the function definition nothing to do
       # here; we can't really do the capturing or conditional logic.
       resource_variable_ops.ResourceVariable.__init__(
@@ -113,8 +113,8 @@
     if constraint is not None and not callable(constraint):
       raise ValueError("The `constraint` argument must be a callable.")
 
-    if isinstance(initial_value, checkpointable.CheckpointInitialValue):
-      self._maybe_initialize_checkpointable()
+    if isinstance(initial_value, trackable.CheckpointInitialValue):
+      self._maybe_initialize_trackable()
       self._update_uid = initial_value.checkpoint_position.restore_uid
       initial_value = initial_value.wrapped_value
 
@@ -156,8 +156,14 @@
       if self._in_graph_mode:
         with ops.init_scope():
           outer_graph = ops.get_default_graph()
+        func_graph = ops.get_default_graph()
+        function_placeholders = (
+            func_graph.inputs + func_graph.internal_captures)
+        placeholder_ops = set(
+            [tensor.op for tensor in function_placeholders])
         lifted_initializer = lift_to_graph.lift_to_graph(
-            initial_value, outer_graph)[initial_value]
+            [initial_value], outer_graph,
+            disallowed_placeholders=placeholder_ops)[initial_value]
         with ops.init_scope():
           self._initial_value = lifted_initializer
           with ops.name_scope("IsInitialized"):
@@ -204,6 +210,29 @@
     self._cached_shape_as_list = None
 
 
+RUN_FUNCTIONS_EAGERLY = False
+
+
+@tf_export("config.experimental_run_functions_eagerly")
+def run_functions_eagerly(run_eagerly):
+  """Enables / disables eager execution of `tf.function`s.
+
+  After calling `tf.config.experimental_run_functions_eagerly(True)` all
+  invocations of tf.function will run eagerly instead of running through a graph
+  function.
+
+  This can be useful for debugging or profiling.
+
+  Similarly, calling `tf.config.experimental_run_functions_eagerly(False)` will
+  revert the behavior of all functions to graph functions.
+
+  Args:
+    run_eagerly: Boolean. Whether to run functions eagerly.
+  """
+  global RUN_FUNCTIONS_EAGERLY
+  RUN_FUNCTIONS_EAGERLY = bool(run_eagerly)
+
+
 class FunctionDeleter(object):
 
   def __init__(self, func_graph):
@@ -349,7 +378,35 @@
     self._stateless_fn = self._defun_with_scope(invalid_creator_scope)
     self._stateless_fn._name = self._name  # pylint: disable=protected-access
 
+  def _decorate(self, decorator):
+    """Allows the captured Python function to be decorated in place.
+
+    This method is only safe to call when the Function has not been called by a
+    user. It makes sense to use this method to push a decorator into the
+    function rather than wrapping the function in the decorator.
+
+    We use this in tf.Module to allow user annotated `tf.functions` to remain as
+    `Function` objects but still automatically enter the Module name_scope
+    when they are evaluated like all other methods.
+
+    Args:
+      decorator: A callable accepting a single argument which is the function
+        to decorate and returning a callable result.
+
+    Raises:
+      ValueError: If the function has been called a ValueError is raised.
+    """
+    if self._stateful_fn is not None or self._stateless_fn is not None:
+      raise ValueError(
+          "Functions cannot be decorated after they have been traced.")
+
+    self._python_function = decorator(self._python_function)
+    self._function_spec = function_lib.FunctionSpec.from_function_and_signature(
+        self._python_function, self._input_signature)
+
   def __call__(self, *args, **kwds):
+    if RUN_FUNCTIONS_EAGERLY:
+      return self._python_function(*args, **kwds)
     """Calls the graph function."""
     if self._created_variables:
       # In this case we have created variables on the first call, so we run the
@@ -465,7 +522,7 @@
             # Ignore variables which are already initialized at trace time.
             continue
         v.assign(lift_to_graph.lift_to_graph(
-            init, ops.get_default_graph())[init])
+            [init], ops.get_default_graph())[init])
 
     with ops.init_scope():
       return initialize_variables.get_concrete_function()()
@@ -506,7 +563,7 @@
     def initialize_variables():
       for v, init in initializer_map.items():
         v.assign(lift_to_graph.lift_to_graph(
-            init, ops.get_default_graph())[init])
+            [init], ops.get_default_graph())[init])
 
     return initialize_variables.get_concrete_function()
 
@@ -726,6 +783,9 @@
       l.append(i)                           # Caution! Doesn't work.
   ```
 
+  Note that unlike other TensorFlow operations, we don't convert python
+  numerical inputs to tensors.
+
   _Referencing `tf.Variable`s_
 
   The Python function `func` may reference stateful objects (such as
@@ -826,8 +886,8 @@
   def f(x): return tf.add(x, 1.)
   ```
 
-  When an `input_signature` is specified, the callable will only accept `Tensor`
-  (or NumPy `ndarray`) objects as arguments.
+  When an `input_signature` is specified, the callable will convert the inputs
+  to the specified TensorSpecs.
 
   _Tracing and staging_
 

diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index b49b165..fdf054a 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py

@@ -26,11 +26,14 @@
 from tensorflow.python.eager import lift_to_graph
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.layers import core
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -211,7 +214,8 @@
           state.append(variables.Variable(2.0 * x))
         return state[0] * x
 
-      with self.assertRaises(lift_to_graph.UnliftableError):
+      with self.assertRaisesRegexp(
+          lift_to_graph.UnliftableError, r'transitively.* mul .* x'):
         fn(constant_op.constant(3.0))
 
   def testMethod(self):
@@ -271,6 +275,38 @@
                      (tensor_spec.TensorSpec(
                          None, dtypes.float32, name='x'),))
 
+  def test_error_inner_capture(self):
+
+    @def_function.function
+    def f(inputs):
+      num_steps, _ = inputs.shape[:2]
+      outputs = []
+      for t in math_ops.range(num_steps):
+        outputs.append(inputs[t])
+      return outputs
+
+    with self.assertRaisesRegexp(ValueError, 'inner'):
+      f(array_ops.zeros(shape=(8, 42, 3)))
+
+  def testRuntimeErrorNotSticky(self):
+
+    @def_function.function
+    def fail(i):
+      control_flow_ops.Assert(math_ops.equal(i, 0), ['ick'])
+
+    fail(constant_op.constant(0))  # OK
+    with self.assertRaises(errors.InvalidArgumentError):
+      fail(constant_op.constant(1))  # InvalidArgument: "ick"
+    fail(constant_op.constant(0))  # OK
+
+  def testUnderscoreName(self):
+
+    @def_function.function
+    def f(_):
+      return _ + _
+
+    self.assertAllEqual(2.0, f(constant_op.constant(1.0)))
+
   def test_serialization_signature_cache(self):
 
     @def_function.function
@@ -429,6 +465,39 @@
     created_variable_read = create_variable()
     self.assertRegexpMatches(created_variable_read.device, "CPU")
 
+  def testDecorate(self):
+    func = def_function.function(lambda: 1)
+    def decorator(f):
+      return lambda: 1 + f()
+
+    func._decorate(decorator)
+    self.assertEqual(func().numpy(), 2)
+
+  def testLiftPlaceholderInitializedVariable(self):
+    with ops.Graph().as_default():
+      var_list = []
+
+      @def_function.function
+      def use_variable():
+        if not var_list:
+          initial_value = array_ops.placeholder(shape=[], dtype=dtypes.float32)
+          v = variables.Variable(initial_value)
+          var_list.append(v)
+        return var_list[0] + 1.
+
+      var_plus_one = use_variable()
+      with self.session() as session:
+        init_op = var_list[0].initializer
+        session.run(init_op, feed_dict={init_op.inputs[1]: 2.})
+        self.assertEqual(3., session.run(var_plus_one))
+
+  def testDecorate_rejectedAfterTrace(self):
+    func = def_function.function(lambda: 1)
+    self.assertEqual(func().numpy(), 1)
+    msg = 'Functions cannot be decorated after they have been traced.'
+    with self.assertRaisesRegexp(ValueError, msg):
+      func._decorate(lambda f: f)
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 04f1999..d9a63bc 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py

@@ -21,7 +21,6 @@
 
 import collections
 import functools
-import re
 import threading
 import types as types_lib
 import weakref
@@ -61,13 +60,6 @@
 FORWARD_FUNCTION_ATTRIBUTE_NAME = "forward_function_name"
 BACKWARD_FUNCTION_ATTRIBUTE_NAME = "backward_function_name"
 
-# TODO(scottzhu): Update this to allow arbitrary attribute names in future.
-WHITELIST_FUNCTION_ATTRIBUTE_REGEX = [
-    "experimental_.*",
-    FORWARD_FUNCTION_ATTRIBUTE_NAME,
-    BACKWARD_FUNCTION_ATTRIBUTE_NAME
-]
-
 CacheKey = collections.namedtuple("CacheKey", [
     "input_signature", "parent_graph", "device_functions", "colocation_stack",
     "uses_xla"
@@ -108,12 +100,6 @@
   """
   attrs = {}
   for key, value in attributes.items():
-    if not any(re.match(reg, key)
-               for reg in WHITELIST_FUNCTION_ATTRIBUTE_REGEX):
-      raise ValueError("Attribute name is not whitelisted. "
-                       "Whitelisted: prefix %s, got: %s" %
-                       (WHITELIST_FUNCTION_ATTRIBUTE_REGEX, key))
-
     if isinstance(value, attr_value_pb2.AttrValue):
       attrs[key] = value
     # bool type check has to happen before int since bool is a subclass of int.
@@ -219,6 +205,8 @@
         [t._as_tf_output() for t in inputs],  # pylint: disable=protected-access
         [t._as_tf_output() for t in outputs],  # pylint: disable=protected-access
         [],
+        [t._as_c_op for o in graph.control_outputs],
+        [], # control_output_names
         None,
         compat.as_str(""))
 
@@ -431,13 +419,24 @@
                args))
     args = list(args)
     for keyword in self._arg_keywords[len(args):]:
-      args.append(kwargs.pop(compat.as_str(keyword)))
+      try:
+        args.append(kwargs.pop(compat.as_str(keyword)))
+      except KeyError:
+        specified_keywords = (list(self._arg_keywords[:len(args)])
+                              + list(kwargs.keys()))
+        raise TypeError(
+            "Expected argument names {} but got values for {}. Missing: {}."
+            .format(
+                list(self._arg_keywords),
+                specified_keywords,
+                list(set(self._arg_keywords) - set(specified_keywords))))
     if kwargs:
       positional_arg_keywords = set(self._arg_keywords[:len(args)])
       for unused_key in kwargs:
         if unused_key in positional_arg_keywords:
           raise TypeError("Got two values for keyword '{}'.".format(unused_key))
-      raise TypeError("Keyword arguments {} unknown.".format(kwargs.keys()))
+      raise TypeError("Keyword arguments {} unknown. Expected {}.".format(
+          list(kwargs.keys()), list(self._arg_keywords)))
     return self._call_flat(args)
 
   def _filtered_call(self, args, kwargs):
@@ -847,6 +846,10 @@
       python_function_to_inspect = python_function.func
       args_to_prepend = python_function.args or tuple()
       kwargs_to_include = python_function.keywords or {}
+      if input_signature is not None:
+        # TODO(b/124441704): Add support for input_signature + partial.
+        raise NotImplementedError(
+            "Missing support for input_signature when using partial functions.")
     else:
       python_function_to_inspect = python_function
       args_to_prepend = tuple()
@@ -948,6 +951,21 @@
         argument when an input signature is specified, or when the inputs
         do not conform to the input signature.
     """
+    if self._input_signature is not None:
+      if len(args) > len(self._input_signature):
+        raise TypeError(
+            "When input_signature is provided, only pass arguments "
+            "covered by it. Received %d argument(s)." % len(args))
+      for arg in six.iterkeys(kwargs):
+        index = self._args_to_indices.get(arg, None)
+        if index is None:
+          raise TypeError(
+              "Function got an unexpected keyword argument %s" % arg)
+        if index >= len(self._input_signature):
+          raise TypeError(
+              "When input_signature is provided, only pass arguments "
+              "covered by it. Received argument %s." % arg)
+
     args = self._args_to_prepend + args
     kwargs = dict(kwargs, **self._kwargs_to_include)
     if not kwargs:
@@ -979,42 +997,82 @@
         # opposed to named arguments called in a keyword-like fashion.
         kwargs.pop(arg)
       inputs = args + _deterministic_dict_values(arg_indices_to_values)
-    flat_inputs = nest.flatten(inputs)
 
-    # Check for NumPy arrays in arguments and convert them to Tensors.
-    # TODO(nareshmodi): Skip ndarray conversion to tensor altogether, perhaps
-    # finding a way to store them directly in the cache key (currently not
-    # possible since ndarrays are not hashable).
-    need_packing = False
-    for index, value in enumerate(flat_inputs):
-      if type(value) == np.ndarray:
-        flat_inputs[index] = constant_op.constant(value)
-        need_packing = True
-    if need_packing:
-      inputs = nest.pack_sequence_as(
-          structure=inputs, flat_sequence=flat_inputs)
     if self._input_signature is None:
+      inputs = _convert_numpy_inputs(inputs)
       return inputs, kwargs
     else:
       assert not kwargs
-      signature_relevant_inputs = inputs[:len(self._input_signature)]
-      if not is_same_structure(self._input_signature,
-                               signature_relevant_inputs):
-        raise ValueError("Structure of Python function inputs does not match "
-                         "input_signature.")
-      signature_inputs_flat = nest.flatten(signature_relevant_inputs)
-      if any(
-          not pywrap_tensorflow.IsTensor(arg) for arg in signature_inputs_flat):
-        raise ValueError("When input_signature is provided, all inputs to "
-                         "the Python function must be Tensors.")
-      if any(not spec.is_compatible_with(other) for spec, other in zip(
-          self._flat_input_signature, signature_inputs_flat)):
-        raise ValueError("Python inputs incompatible with input_signature: "
-                         "inputs (%s), input_signature (%s)" %
-                         (str(inputs), str(self._input_signature)))
+      inputs = _convert_inputs_to_signature(
+          inputs,
+          self._input_signature,
+          self._flat_input_signature)
       return inputs, {}
 
 
+def _convert_numpy_inputs(inputs):
+  """Convert numpy array inputs to tensors."""
+  flat_inputs = nest.flatten(inputs)
+
+  # Check for NumPy arrays in arguments and convert them to Tensors.
+  # TODO(nareshmodi): Skip ndarray conversion to tensor altogether, perhaps
+  # finding a way to store them directly in the cache key (currently not
+  # possible since ndarrays are not hashable).
+  need_packing = False
+  for index, value in enumerate(flat_inputs):
+    if type(value) == np.ndarray:
+      flat_inputs[index] = constant_op.constant(value)
+      need_packing = True
+  if need_packing:
+    return nest.pack_sequence_as(
+        structure=inputs, flat_sequence=flat_inputs)
+  else:
+    return inputs
+
+
+def _convert_inputs_to_signature(inputs, input_signature, flat_input_signature):
+  """Convert inputs to pass into a function with an explicit signature."""
+  try:
+    # TODO(b/124370185): Use all elements as inputs to throw an error if there
+    # are ignored arguments. Calling with arguments that are not part of the
+    # signature should throw an error.
+    flatten_inputs = nest.flatten_up_to(
+        input_signature,
+        inputs[:len(input_signature)])
+  except ValueError:
+    raise ValueError("Structure of Python function inputs does not match "
+                     "input_signature. Inputs (%s), input_signature(%s)." %
+                     (str(inputs), str(input_signature)))
+
+  need_packing = False
+  for index, (value, spec) in enumerate(zip(flatten_inputs,
+                                            flat_input_signature)):
+    if not pywrap_tensorflow.IsTensor(value):
+      try:
+        flatten_inputs[index] = ops.convert_to_tensor(
+            value, dtype_hint=spec.dtype)
+        need_packing = True
+      except ValueError:
+        raise ValueError("When input_signature is provided, all inputs to "
+                         "the Python function must be convertible to tensors."
+                         "Inputs (%s), input_signature(%s)." %
+                         (str(inputs), str(input_signature)))
+
+  if any(not spec.is_compatible_with(other) for spec, other in zip(
+      flat_input_signature,
+      flatten_inputs)):
+    raise ValueError("Python inputs incompatible with input_signature: "
+                     "inputs (%s), input_signature (%s)" %
+                     (str(inputs), str(input_signature)))
+
+  if need_packing:
+    inputs = nest.pack_sequence_as(
+        structure=input_signature,
+        flat_sequence=flatten_inputs)
+
+  return inputs
+
+
 class Function(object):
   """Wrapper class for the graph functions defined for a Python function.
 
@@ -1033,7 +1091,8 @@
                input_signature=None,
                attributes=None,
                autograph=True,
-               autograph_options=None):
+               autograph_options=None,
+               capture_by_value=None):
     """Initializes a `Function`.
 
     Args:
@@ -1050,6 +1109,9 @@
       autograph_options: Experimental knobs to control behavior
         `when autograph=True`. See https://www.tensorflow.org/guide/autograph
         for more information.
+      capture_by_value: Experimental. Whether to capture resource variables by
+        value or reference. If None, will inherit from a parent context or
+        default to False.
 
     Raises:
       ValueError: if `input_signature` is not None and the `python_function`'s
@@ -1067,6 +1129,7 @@
     self._function_cache = collections.OrderedDict()
     self._garbage_collector = _FunctionGarbageCollector(self._function_cache)
     self._function_attributes = attributes or {}
+    self._capture_by_value = capture_by_value
 
     self._lock = threading.Lock()
     # _descriptor_cache is a of instance of a class to an instance-specific
@@ -1329,13 +1392,15 @@
                 self._input_signature,
                 autograph=self._autograph,
                 autograph_options=self._autograph_options,
-                arg_names=arg_names), self._function_attributes)
+                arg_names=arg_names,
+                capture_by_value=self._capture_by_value),
+            self._function_attributes)
         # pylint: disable=protected-access
         # Tell the ConcreteFunction to clean up its graph once it goes out of
         # scope. ConcreteFunction does not do this in its constructor since it
         # gets used in some places (like Keras) where the FuncGraph lives
         # longer than the ConcreteFunction.
-        graph_function._garbage_collector = _ConcreteFunctionGarbageCollector(
+        graph_function._garbage_collector = ConcreteFunctionGarbageCollector(
             graph_function.graph)
         # pylint: enable=protected-access
         self._function_cache[cache_key] = graph_function
@@ -1858,7 +1923,7 @@
       pass
 
 
-class _ConcreteFunctionGarbageCollector(object):
+class ConcreteFunctionGarbageCollector(object):
   """Cleans up reference cycles when a `ConcreteFunction` goes out of scope."""
 
   def __init__(self, func_graph):

diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 33ae062..fef649c 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py

@@ -49,6 +49,7 @@
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.ops import gen_random_ops
 from tensorflow.python.ops import gen_resource_variable_ops
 from tensorflow.python.ops import init_ops
@@ -381,13 +382,13 @@
     x = random_ops.random_uniform([2, 2]).numpy()
     defined = function.defun(f)
     defined(x)
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
 
     x = random_ops.random_uniform([2, 2]).numpy()
     defined(x)
     # A NumPy array with different values but the same shape and dtype
     # shouldn't trigger another function definition.
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
 
     # Test that the numpy array is properly an argument to the graph function.
     self.assertEqual(1., defined(numpy.ones([])).numpy())
@@ -482,6 +483,34 @@
     self.assertAllEqual(5., self.evaluate(concrete()))
     self.assertAllEqual(5., self.evaluate(tensor_init()))
 
+  def testFuncGraphCaptureByValue(self):
+    v = variables.Variable(1.0)
+
+    def trivial_function():
+      return v.read_value()
+
+    graph_function = function.Function(
+        trivial_function, 'test', capture_by_value=True)
+
+    self.assertAllEqual(graph_function(), 1.0)
+    v.assign(2.0)
+    self.assertAllEqual(graph_function(), 1.0)
+
+  def testFuncGraphCaptureByValueNested(self):
+    v = variables.Variable(1.0)
+
+    def trivial_function():
+      return control_flow_ops.cond(
+          array_ops.placeholder_with_default(True, ()),
+          v.read_value, v.read_value)
+
+    graph_function = function.Function(
+        trivial_function, 'test', capture_by_value=True)
+
+    self.assertAllEqual(graph_function(), 1.0)
+    v.assign(2.0)
+    self.assertAllEqual(graph_function(), 1.0)
+
   def testDefunShapeInferenceWithCapturedResourceVariable(self):
     v = resource_variable_ops.ResourceVariable([[1, 2], [3, 4]])
 
@@ -500,6 +529,43 @@
     var_t = resource_variable_ops.read_variable_op(var_handle, dtype=v.dtype)
     self.assertEqual(var_t.shape, tensor_shape.TensorShape([2, 2]))
 
+  def testShapeInferenceForMoreSpecificInput(self):
+    self.skipTest('b/124219898')
+
+    def f(a):
+      return array_ops.reshape(a, [-1, 3])
+
+    signature = [tensor_spec.TensorSpec(None, dtypes.float32)]
+    compiled = def_function.function(f, input_signature=signature)
+
+    with ops.Graph().as_default():
+      inputs = array_ops.zeros([10, 10, 3])
+      self.assertAllEqual(f(inputs).shape, compiled(inputs).shape)
+
+  def testFuncListAttr(self):
+
+    @function.defun
+    def test_function(val):
+
+      def fn1():
+        return array_ops.ones([10])
+
+      fn2 = lambda: array_ops.ones([10]) * 2
+
+      def fn3(x=2):
+        return array_ops.ones([10]) * x
+      fn3 = functools.partial(fn3, x=3)
+
+      return gen_functional_ops.case(val, [], [dtypes.float32],
+                                     [function.defun(f).get_concrete_function()
+                                      for f in (fn1, fn2, fn3)])
+
+    ones = array_ops.ones([10])
+    self.assertAllEqual([ones], test_function(0))
+    self.assertAllEqual([ones * 2], test_function(1))
+    self.assertAllEqual([ones * 3], test_function(2))
+    self.assertAllEqual([ones * 3], test_function(22))  # default branch
+
   @test_util.enable_control_flow_v2
   def testVariableInLoopInFunction(self):
 
@@ -595,7 +661,7 @@
     run_metadata = context.export_run_metadata()
     context.disable_run_metadata()
     step_stats = run_metadata.step_stats
-    self.assertGreater(len(step_stats.dev_stats), 0)
+    self.assertNotEmpty(step_stats.dev_stats)
     cpu_stats = step_stats.dev_stats[0]
     self.assertEqual('/job:localhost/replica:0/task:0/device:CPU:0',
                      cpu_stats.device)
@@ -604,10 +670,10 @@
     # arbitrarily many (placeholders, return identities, etc, might be included
     # or not in the future, so shouldn't be tested for exactly.
     self.assertGreaterEqual(len(cpu_stats.node_stats), 2)
-    self.assertEqual(len(run_metadata.partition_graphs), 1)
+    self.assertLen(run_metadata.partition_graphs, 1)
 
   def testGraphModeCaptureVariable(self):
-    with context.graph_mode(), self.cached_session() as sess:
+    with context.graph_mode(), self.cached_session():
 
       class HasAVar(object):
 
@@ -823,8 +889,9 @@
       return None
 
     with self.assertRaisesRegexp(
-        errors.InvalidArgumentError, 'Could not colocate node with its '
-        'resource and reference inputs.*'):
+        errors.InvalidArgumentError,
+        'Cannot place the graph because a reference or resource edge connects '
+        'colocation groups with incompatible assigned devices'):
       if not context.executing_eagerly():
         self.evaluate(variables.global_variables_initializer())
       self.evaluate(resource_apply_adam())
@@ -931,7 +998,7 @@
                                   constant_op.constant(4)],
         constant_op.constant(5)
     ])
-    self.assertEqual(len(ret), 2)
+    self.assertLen(ret, 2)
     self.assertAllEqual(ret[0][0], 2)
     self.assertAllEqual(ret[0][1][0][0], 8)
     self.assertAllEqual(ret[0][1][0][1], 4)
@@ -1043,7 +1110,7 @@
 
     defined = function.defun(multi_device_fn)
     outputs = self.evaluate(defined())
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
     self.assertIn(compat.as_bytes('CPU:0'), outputs[0])
     self.assertIn(compat.as_bytes('CPU:1'), outputs[1])
     self.assertIn(compat.as_bytes('CPU:2'), outputs[2])
@@ -1051,7 +1118,7 @@
     with ops.device('/cpu:3'):
       outputs = self.evaluate(defined())
     # All function definitions are agnostic to call site devices.
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
     self.assertIn(compat.as_bytes('CPU:0'), outputs[0])
     self.assertIn(compat.as_bytes('CPU:1'), outputs[1])
     self.assertIn(compat.as_bytes('CPU:2'), outputs[2])
@@ -1059,7 +1126,7 @@
 
     with ops.device('/cpu:0'):
       outputs = self.evaluate(defined())
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
     self.assertIn(compat.as_bytes('CPU:0'), outputs[0])
     self.assertIn(compat.as_bytes('CPU:1'), outputs[1])
     self.assertIn(compat.as_bytes('CPU:2'), outputs[2])
@@ -1144,10 +1211,38 @@
 
     defined = function.defun(func)
     defined(Foo())
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
 
     defined(Foo())
-    self.assertEqual(len(defined._function_cache), 2)
+    self.assertLen(defined._function_cache, 2)
+
+  def testCacheTensorDtypeCollision(self):
+
+    def func(t):
+      return t + t
+
+    defined = function.defun(func)
+    t = constant_op.constant([[1.0]], dtype=dtypes.complex64)
+    defined(t)
+    self.assertLen(defined._function_cache, 1)
+
+    t = constant_op.constant([[1.0]], dtype=dtypes.complex128)
+    defined(t)
+    self.assertLen(defined._function_cache, 2)
+
+  def testCacheTensorShapeCollision(self):
+
+    def func(t):
+      return t + t
+
+    defined = function.defun(func)
+    t = constant_op.constant([[1.0]], dtype=dtypes.complex64)
+    defined(t)
+    self.assertLen(defined._function_cache, 1)
+
+    t = constant_op.constant([1.0], dtype=dtypes.complex64)
+    defined(t)
+    self.assertLen(defined._function_cache, 2)
 
   def testCacheTensorShapeDtypeCollision(self):
 
@@ -1157,11 +1252,11 @@
     defined = function.defun(func)
     t = constant_op.constant([[1.0]], dtype=dtypes.complex64)
     defined(t)
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
 
     t = constant_op.constant([1.0], dtype=dtypes.complex128)
     defined(t)
-    self.assertEqual(len(defined._function_cache), 2)
+    self.assertLen(defined._function_cache, 2)
 
   def testCacheTensorUnknownShapesCollision(self):
 
@@ -1173,19 +1268,19 @@
 
       p = array_ops.placeholder(dtype=dtypes.float32, shape=None)
       defined(p)
-      self.assertEqual(len(defined._function_cache), 1)
+      self.assertLen(defined._function_cache, 1)
 
       p = array_ops.placeholder(dtype=dtypes.float32, shape=[None])
       defined(p)
-      self.assertEqual(len(defined._function_cache), 2)
+      self.assertLen(defined._function_cache, 2)
 
       p = array_ops.placeholder(dtype=dtypes.float32, shape=[None, None])
       defined(p)
-      self.assertEqual(len(defined._function_cache), 3)
+      self.assertLen(defined._function_cache, 3)
 
       t = constant_op.constant(1.0, dtype=dtypes.float32)
       defined(t)
-      self.assertEqual(len(defined._function_cache), 4)
+      self.assertLen(defined._function_cache, 4)
 
   def testPythonFunctionWithDefaultArgs(self):
 
@@ -1203,32 +1298,33 @@
       return tuple(key[0] for key in defined._function_cache)
 
     # `True` corresponds to the fact that we're executing eagerly
-    self.assertIn(('URRR', (0, 1, 20)), cache_keys())
+    self.assertIn(('URRRu', (0, 1, 20)), cache_keys())
 
     defined(1)  # bar=1, baz=2
-    self.assertIn(('URRR', (1, 1, 2)), cache_keys())
+    self.assertIn(('URRRu', (1, 1, 2)), cache_keys())
 
     # This matches the previous call.
     defined(foo=1)
-    self.assertEqual(len(defined._function_cache), 2)
+    self.assertLen(defined._function_cache, 2)
 
     defined(1, 2, 3)
-    self.assertIn(('URRR', (1, 2, 3)), cache_keys())
+    self.assertLen(defined._function_cache, 3)
+    self.assertIn(('URRRu', (1, 2, 3)), cache_keys())
 
     # This matches the previous call.
     defined(1, bar=2, baz=3)
-    self.assertEqual(len(defined._function_cache), 3)
+    self.assertLen(defined._function_cache, 3)
 
     # This matches the previous call.
     defined(1, baz=3, bar=2)
-    self.assertEqual(len(defined._function_cache), 3)
+    self.assertLen(defined._function_cache, 3)
 
   def testFunctoolsPartialUnwrappedCorrectly(self):
 
     def full_function(a, b, c=3):
       return a, b, c
 
-    partial = functools.partial(full_function, 1, c=3)
+    partial = functools.partial(full_function, 1, c=4)
     a, b, c = partial(2)
 
     defined = function.defun(partial)
@@ -1237,7 +1333,7 @@
     self.assertEqual(func_b.numpy(), b)
     self.assertEqual(func_c.numpy(), c)
 
-  def testInputSignatureWithCompatibleInputs(self):
+  def testInputSignatureWithMatchingInputs(self):
 
     def foo(a):
       self.assertEqual(a.shape, (2,))
@@ -1247,12 +1343,12 @@
     defined = function.defun(foo, input_signature=signature)
     a = array_ops.ones([2])
     self.assertAllEqual(a, defined(a))
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
     self.assertAllEqual(a, defined.get_concrete_function()(a))
     self.assertAllEqual(a, defined.get_concrete_function(a)(a))
     self.assertAllEqual(a, defined.get_concrete_function(
         tensor_spec.TensorSpec((2,), dtype=dtypes.float32))(a))
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
 
     def bar(a):
       self.assertEqual(a._shape_tuple(), (2, None))
@@ -1262,31 +1358,55 @@
     defined = function.defun(bar, input_signature=signature)
     a = array_ops.ones([2, 1])
     out = defined(a)
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
     self.assertAllEqual(out, a)
 
     # Changing the second dimension shouldn't create a new function.
     b = array_ops.ones([2, 3])
     out = defined(b)
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
     self.assertAllEqual(out, b)
 
+  def testInputSignatureWithCompatibleInputs(self):
+
+    rank2_spec = tensor_spec.TensorSpec(shape=(None, None),
+                                        dtype=dtypes.float32)
+
+    @function.defun(input_signature=[rank2_spec])
+    def func(a):
+      self.assertEqual([None, None], a.shape.as_list())
+      return array_ops.shape(a)
+
+    self.assertAllEqual([3, 1], func([[0], [1.0], [1]]))
+    self.assertAllEqual([2, 2], func(numpy.array([[1, 1], [2, 2]])))
+
+    with self.assertRaisesRegexp(ValueError, 'incompatible'):
+      func([0.0, 1.0, 2.0])  # Wrong shape.
+
+    with self.assertRaisesRegexp(ValueError, 'incompatible'):
+      func([['wrong dtype']])
+
   def testNestedInputSignatures(self):
 
+    def expected_foo(a, b):
+      return [a, b]
+
+    @function.defun(input_signature=[
+        [tensor_spec.TensorSpec((2, None), dtypes.float32)] * 2,
+        tensor_spec.TensorSpec((1,), dtypes.float32),
+    ])
     def foo(a, b):
       self.assertEqual(a[0]._shape_tuple(), (2, None))
       self.assertEqual(a[1]._shape_tuple(), (2, None))
       self.assertEqual(b._shape_tuple(), (1,))
       return [a, b]
 
-    signature = [[tensor_spec.TensorSpec((2, None), dtypes.float32)] * 2,
-                 tensor_spec.TensorSpec((1,), dtypes.float32)]
-    defined = function.defun(foo, input_signature=signature)
     a = array_ops.ones([2, 1])
     b = array_ops.ones([1])
-    out = defined([a, a], b)
-    self.assertEqual(len(defined._function_cache), 1)
-    nest.assert_same_structure(out, [[a, a], b])
+    expected = expected_foo([a, a], b)
+    out = foo([a, a], b)
+    self.assertLen(foo._function_cache, 1)
+    nest.assert_same_structure(out, expected)
     self.assertAllEqual(out[0][0], a)
     self.assertAllEqual(out[0][1], a)
     self.assertAllEqual(out[1], b)
@@ -1295,33 +1415,58 @@
     a = array_ops.ones([2, 3])
     b = array_ops.ones([2, 5])
     c = array_ops.ones([1])
-    out = defined([a, b], c)
-    self.assertEqual(len(defined._function_cache), 1)
-    nest.assert_same_structure(out, [[a, b], c])
+    expected = expected_foo([a, b], c)
+    out = foo([a, b], c)
+    self.assertLen(foo._function_cache, 1)
+    nest.assert_same_structure(out, expected)
     self.assertAllEqual(out[0][0], a)
     self.assertAllEqual(out[0][1], b)
     self.assertAllEqual(out[1], c)
 
+    # Passing compatible inputs should work.
+    a = a.numpy().tolist()
+    b = b.numpy().tolist()
+    c = c.numpy().tolist()
+    out = foo([a, b], c)
+    self.assertLen(foo._function_cache, 1)
+    nest.assert_same_structure(out, expected)
+    self.assertAllEqual(out[0][0], a)
+    self.assertAllEqual(out[0][1], b)
+    self.assertAllEqual(out[1], c)
+
+  def testNestedInputSignaturesWithDict(self):
+    def expected_bar(a):
+      return a
+
+    @function.defun(input_signature=[{
+        'a': tensor_spec.TensorSpec((2, None), dtypes.float32),
+        'b': tensor_spec.TensorSpec((2, None), dtypes.float32),
+        'c': tensor_spec.TensorSpec((1,), dtypes.float32)}])
     def bar(a):
       self.assertEqual(a['a']._shape_tuple(), (2, None))
       self.assertEqual(a['b']._shape_tuple(), (2, None))
       self.assertEqual(a['c']._shape_tuple(), (1,))
       return a
 
-    signature = [{
-        'a': tensor_spec.TensorSpec((2, None), dtypes.float32),
-        'b': tensor_spec.TensorSpec((2, None), dtypes.float32),
-        'c': tensor_spec.TensorSpec((1,), dtypes.float32)
-    }]
     a = array_ops.ones([2, 3])
     b = array_ops.ones([1])
     inputs = {'a': a, 'b': a, 'c': b}
-    defined = def_function.function(bar, input_signature=signature)
-    out = defined(inputs)
-    nest.assert_same_structure(out, inputs)
-    self.assertAllEqual(out['a'], inputs['a'])
-    self.assertAllEqual(out['b'], inputs['b'])
-    self.assertAllEqual(out['c'], inputs['c'])
+    expected = expected_bar(inputs)
+    out = bar(inputs)
+    nest.assert_same_structure(out, expected)
+    self.assertAllEqual(out['a'], expected['a'])
+    self.assertAllEqual(out['b'], expected['b'])
+    self.assertAllEqual(out['c'], expected['c'])
+
+    # Passing compatible inputs should work.
+    a = a.numpy().tolist()
+    b = b.numpy().tolist()
+    inputs = {'a': a, 'b': a, 'c': b}
+    out = bar(inputs)
+    nest.assert_same_structure(out, expected)
+    self.assertAllEqual(out['a'], expected['a'])
+    self.assertAllEqual(out['b'], expected['b'])
+    self.assertAllEqual(out['c'], expected['c'])
 
   def testInputSignatureMustBeSequenceOfTensorSpecs(self):
 
@@ -1357,9 +1502,7 @@
       defined(array_ops.ones([2, 1]))
 
     # Wrong number of arguments.
-    with self.assertRaisesRegexp(
-        ValueError,
-        'Arguments and signature arguments do not match.*'):
+    with self.assertRaisesRegexp(TypeError, 'Received 2 argument\(s\)'):
       defined(array_ops.ones([2]), array_ops.ones([2]))
     with self.assertRaisesRegexp(ValueError,
                                  'Structure of Python function inputs.*'):
@@ -1370,7 +1513,60 @@
       defined.get_concrete_function(
           tensor_spec.TensorSpec(shape=(3,), dtype=dtypes.float32))
 
-  def testInputSignatureForFunctionWithNonTensorInputsNotAllowed(self):
+  def testInputsIncompatibleWithNestedSignatureRaisesError(self):
+
+    def foo(a, b):
+      return [a, b]
+
+    signature = [[tensor_spec.TensorSpec((1,), dtypes.float32)] * 2,
+                 [tensor_spec.TensorSpec((1,), dtypes.float32)] * 2]
+    defined = function.defun(foo, input_signature=signature)
+    a = array_ops.ones([1])
+
+    with self.assertRaisesRegexp(ValueError,
+                                 'Structure of Python function inputs.*'):
+      defined([a, a, a], [a])
+
+    with self.assertRaisesRegexp(ValueError,
+                                 'Structure of Python function inputs.*'):
+      defined([a], [a, a, a])
+    defined([a, a], [a, a])
+
+  def testUnderspecifiedInputSignature(self):
+    @function.defun(input_signature=[
+        tensor_spec.TensorSpec([], dtypes.float32),
+    ])
+    def foo(a, training=True):
+      if training:
+        return a
+      else:
+        return -1.0 * a
+
+    x = constant_op.constant(1.0)
+    with self.assertRaisesRegexp(TypeError, 'only pass arguments'):
+      foo(x, training=True)
+
+    with self.assertRaisesRegexp(TypeError, 'only pass arguments'):
+      foo(x, training=False)
+
+    self.assertAllEqual(x.numpy(), foo(x).numpy())
+
+  def testInputSignatureWithPartialFunction(self):
+    self.skipTest('b/124441704')
+    def full_function(a, b, c=3.0):
+      return a, b, c
+
+    partial = functools.partial(full_function, 1, c=4)
+    a, b, c = partial(2.0)
+    signature = [tensor_spec.TensorSpec([], dtypes.float32)]
+    defined = function.defun(partial, input_signature=signature)
+    x = constant_op.constant(2.0)
+    func_a, func_b, func_c = defined(x)
+    self.assertEqual(func_a.numpy(), a)
+    self.assertEqual(func_b.numpy(), b)
+    self.assertEqual(func_c.numpy(), c)
+
+  def testInputSignatureConversionWithDefaultArg(self):
 
     def foo(a, training=True):
       if training:
@@ -1384,11 +1580,9 @@
     ]
     defined = def_function.function(foo, input_signature=signature)
     a = constant_op.constant(1.0)
-    with self.assertRaisesRegexp(
-        ValueError,
-        'When input_signature is provided, all inputs to '
-        'the Python function must be Tensors.'):
-      defined(a, training=True)
+    self.assertAllEqual(a.numpy(), defined(a))
+    self.assertAllEqual(a.numpy(), defined(a, training=True))
+    self.assertAllEqual(-a.numpy(), defined(a, training=False))
 
   def testInputSignatureWithKeywordPositionalArgs(self):
 
@@ -1403,22 +1597,22 @@
     integer = constant_op.constant(2, dtypes.int64)
 
     out1, out2 = foo(flt, integer)
-    self.assertEqual(len(foo._function_cache), 1)
+    self.assertLen(foo._function_cache, 1)
     self.assertEqual(out1.numpy(), 1.0)
     self.assertEqual(out2.numpy(), 2)
 
     out1, out2 = foo(flt=flt, integer=integer)
-    self.assertEqual(len(foo._function_cache), 1)
+    self.assertLen(foo._function_cache, 1)
     self.assertEqual(out1.numpy(), 1.0)
     self.assertEqual(out2.numpy(), 2)
 
     out1, out2 = foo(integer=integer, flt=flt)
-    self.assertEqual(len(foo._function_cache), 1)
+    self.assertLen(foo._function_cache, 1)
     self.assertEqual(out1.numpy(), 1.0)
     self.assertEqual(out2.numpy(), 2)
 
     out1, out2 = foo(flt, integer=integer)
-    self.assertEqual(len(foo._function_cache), 1)
+    self.assertLen(foo._function_cache, 1)
     self.assertEqual(out1.numpy(), 1.0)
     self.assertEqual(out2.numpy(), 2)
 
@@ -1448,27 +1642,27 @@
     a = constant_op.constant(2.0)
     b = constant_op.constant([1.0, 2.0])
     one = defined(a, b)
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
 
     two = defined(a=a, b=b)
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
 
     three = defined(b=b, a=a)
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
 
     four = defined(a, b=b)
-    self.assertEqual(len(defined._function_cache), 1)
+    self.assertLen(defined._function_cache, 1)
 
     # The next call corresponds to a new input signature, hence
     # we expect another function to be defined.
     five = defined(b, a)
-    self.assertEqual(len(defined._function_cache), 2)
+    self.assertLen(defined._function_cache, 2)
 
     six = defined(a=b, b=a)
-    self.assertEqual(len(defined._function_cache), 2)
+    self.assertLen(defined._function_cache, 2)
 
     seven = defined(b=a, a=b)
-    self.assertEqual(len(defined._function_cache), 2)
+    self.assertLen(defined._function_cache, 2)
 
     self.assertAllEqual(one, [1.0, 2.0])
     self.assertAllEqual(two, [1.0, 2.0])
@@ -1550,35 +1744,24 @@
 
         graph = ops.get_default_graph()
         # pylint: disable=protected-access
-        self.assertEqual(len(graph._functions), 2)
+        self.assertLen(graph._functions, 2)
         functions = list(graph._functions.values())
         self.assertRegexpMatches(
             functions[0].definition.signature.name, '.*matmul.*')
         attrs = functions[0].definition.attr
-        self.assertEqual(len(attrs), 2)
+        self.assertLen(attrs, 2)
         self.assertEqual(attrs['experimental_1'].s, b'value1')
         self.assertEqual(attrs['experimental_2'].i, 2)
 
         self.assertRegexpMatches(
             functions[1].definition.signature.name, '.*add.*')
         attrs = functions[1].definition.attr
-        self.assertEqual(len(attrs), 2)
+        self.assertLen(attrs, 2)
         self.assertEqual(attrs['experimental_3'].b, True)
         self.assertEqual(attrs['experimental_4'].f, 1.0)
         # pylint: enable=protected-access
 
   def testFunctionWithInvalidAttribute(self):
-    @function.defun_with_attributes(attributes={'attr1': 'value1'})
-    def matmul(x, y):
-      return math_ops.matmul(x, y)
-
-    with self.assertRaisesRegexp(ValueError,
-                                 '.*Attribute name is not whitelisted.*'):
-      with context.graph_mode(), self.cached_session():
-        with ops.get_default_graph().as_default():
-          t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
-          matmul(t, t)
-
     @function.defun_with_attributes(attributes={'experimental_1': ['value1']})
     def add(x, y):
       return math_ops.add(x, y)
@@ -1608,7 +1791,7 @@
 
         graph = ops.get_default_graph()
         # pylint: disable=protected-access
-        self.assertEqual(len(graph._functions), 6)
+        self.assertLen(graph._functions, 6)
         # two sets of functions, each of them are (inference, forward, backward)
         functions = list(graph._functions.values())
         captured_function_names = [
@@ -1647,7 +1830,7 @@
         self.assertAllEqual(double.eval().reshape(-1), [2, 4, 6, 8])
         # Make sure the pre registered function is used, and no other function
         # is added.
-        self.assertEqual(len(graph._functions), 6)
+        self.assertLen(graph._functions, 6)
         functions = list(graph._functions.values())
         for i in range(len(functions)):
           self.assertEqual(captured_function_names[i],
@@ -1684,7 +1867,7 @@
 
         graph = ops.get_default_graph()
         # pylint: disable=protected-access
-        self.assertEqual(len(graph._functions), 6)
+        self.assertLen(graph._functions, 6)
         # two sets of functions, each of them are (inference, forward, backward)
         functions = list(graph._functions.values())
         captured_function_names = [
@@ -1710,7 +1893,7 @@
         self.assertAllEqual([[1, 2], [3, 4]], self.evaluate(composite_t))
         # Make sure the pre registered function is used, and no other function
         # is added.
-        self.assertEqual(len(graph._functions), 6)
+        self.assertLen(graph._functions, 6)
 
   def testRegisterFunctionWithInputSignature(self):
     def matmul(x, y):
@@ -1728,12 +1911,12 @@
 
         graph = ops.get_default_graph()
         # pylint: disable=protected-access
-        self.assertEqual(len(graph._functions), 3)
+        self.assertLen(graph._functions, 3)
 
         # Test register function with cache, note inputs are ignored.
         function.register(defun_matmul)
         graph = ops.get_default_graph()
-        self.assertEqual(len(graph._functions), 3)
+        self.assertLen(graph._functions, 3)
 
   def testRegisterFunctionWithCache(self):
     def matmul(x, y):
@@ -1750,7 +1933,7 @@
         graph = ops.get_default_graph()
         # Only one function is registered since the input param are in same type
         # pylint: disable=protected-access
-        self.assertEqual(len(graph._functions), 3)
+        self.assertLen(graph._functions, 3)
 
   def testCallingFunctionWithDifferentVariables(self):
 
@@ -1761,8 +1944,8 @@
 
     v = resource_variable_ops.ResourceVariable(0.0)
     graph_function = foo.get_concrete_function(v)
-    self.assertEqual(len(graph_function.inputs), 1)
-    self.assertEqual(len(graph_function.captured_inputs), 0)
+    self.assertLen(graph_function.inputs, 1)
+    self.assertEmpty(graph_function.captured_inputs)
 
     self.assertEqual(float(graph_function(v)), 1.0)
     self.assertEqual(float(graph_function(v)), 2.0)
@@ -1790,34 +1973,30 @@
       graph_function('Not a Tensor.')
 
   def testSwapImplementationWithGrapplerPlugin(self):
+    # Set the min_graph_nodes to -1 since the graph in this test is too small,
+    # and will be ignored by grappler if don't set this.
     rewrites = rewriter_config_pb2.RewriterConfig()
-    # function_optimizer has to be turn off, otherwise it will delete the
-    # registered function if it does not get called.
-    # TODO(scottzhu): Move the ExperimentalImplementationSelector to be called
-    # before function_optimizer in future.
-    rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF
-    customer_optimizer = rewrites.custom_optimizers.add()
-    customer_optimizer.name = 'ExperimentalImplementationSelector'
+    rewrites.implementation_selector = rewriter_config_pb2.RewriterConfig.ON
     rewrites.min_graph_nodes = -1
     graph_options = config_pb2.GraphOptions(
         rewrite_options=rewrites, build_cost_model=1)
     config = config_pb2.ConfigProto(graph_options=graph_options)
 
     with context.graph_mode(), self.cached_session(
-        config=config, graph=ops.Graph(), use_gpu=True) as sess:
+        config=config, graph=ops.Graph(), use_gpu=True):
 
       @function.defun_with_attributes(
           attributes={
-              'experimental_api_implements': 'random_boost',
-              'experimental_api_preferred_device': 'CPU'
+              'api_implements': 'random_boost',
+              'api_preferred_device': 'CPU'
           })
       def cpu_boost(x):
         return math_ops.add(x, 2.0)
 
       @function.defun_with_attributes(
           attributes={
-              'experimental_api_implements': 'random_boost',
-              'experimental_api_preferred_device': 'GPU'
+              'api_implements': 'random_boost',
+              'api_preferred_device': 'GPU'
           })
       def gpu_boost(x):
         return math_ops.add(x, 4.0)
@@ -1851,18 +2030,43 @@
       with ops.Graph().as_default():
         x = constant_op.constant(11)
         maybe_add(x, True)
-        self.assertEqual(len(maybe_add._function_cache), 1)
-        self.assertEqual(len(add._function_cache), 1)
+        self.assertLen(maybe_add._function_cache, 1)
+        self.assertLen(add._function_cache, 1)
 
         maybe_add(x, False)
-        self.assertEqual(len(maybe_add._function_cache), 2)
-        self.assertEqual(len(add._function_cache), 1)
+        self.assertLen(maybe_add._function_cache, 2)
+        self.assertLen(add._function_cache, 1)
 
       with ops.Graph().as_default():
         x = constant_op.constant(11)
         maybe_add(x, True)
-        self.assertEqual(len(maybe_add._function_cache), 3)
-        self.assertEqual(len(add._function_cache), 2)
+        self.assertLen(maybe_add._function_cache, 3)
+        self.assertLen(add._function_cache, 2)
+
+  def testCacheKeyOverlappingShapes(self):
+    @function.defun
+    def defined(t):
+      return t
+
+    defined(array_ops.zeros([12, 1]))
+    self.assertLen(defined._function_cache, 1)
+
+    defined(array_ops.zeros([1, 21]))
+    self.assertLen(defined._function_cache, 2)
+
+  def testCacheKeyNestedLists(self):
+    @function.defun
+    def defined(l):
+      return l
+
+    a = constant_op.constant(1.)
+    b = constant_op.constant(2.)
+    c = constant_op.constant(3.)
+    defined([[a], b, c])
+    self.assertLen(defined._function_cache, 1)
+
+    defined([[a, b], c])
+    self.assertLen(defined._function_cache, 2)
 
   def testDecoratedMethod(self):
     m = DefunnedMiniModel()
@@ -2084,7 +2288,7 @@
     m = DefunnedMiniModel()
     m(array_ops.ones([1, 2]))
     weak_variables = weakref.WeakSet(m.variables)
-    self.assertEqual(2, len(weak_variables))
+    self.assertLen(weak_variables, 2)
     del m
     self.assertEqual([], list(weak_variables))
 
@@ -2139,13 +2343,12 @@
     def fn(x):
       return fn2(x)
 
-    try:
+    with self.assertRaises(errors.InvalidArgumentError) as cm:
       fn(2)
-      self.assertFail()
-    except errors.InvalidArgumentError as e:
-      self.assertIn('fn -> fn2', e.message)
-      self.assertIn('node assert_equal/Assert/Assert (defined at', e.message)
-      self.assertNotIn('fn3', e.message)
+    e = cm.exception
+    self.assertIn('fn -> fn2', e.message)
+    self.assertIn('node assert_equal/Assert/Assert (defined at', e.message)
+    self.assertNotIn('fn3', e.message)
 
   def testFunctionIsNotPinned(self):
     """Tests that functions aren't pinned to the CPU by the eager runtime."""

diff --git a/tensorflow/python/eager/lift_to_graph.py b/tensorflow/python/eager/lift_to_graph.py
index ad62e6d..2ed2d58 100644
--- a/tensorflow/python/eager/lift_to_graph.py
+++ b/tensorflow/python/eager/lift_to_graph.py

@@ -21,8 +21,10 @@
 
 import collections
 
+from tensorflow.python.framework import func_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import resource_variable_ops
 
 
 def _graph_inputs(op):
@@ -40,35 +42,226 @@
   pass
 
 
-def lift_to_graph(init_tensor, graph, sources=None):
-  """Copies the tensor and all its inputs recursively to the outer graph."""
-  # Check that the initializer does not depend on any placeholders.
-  if sources is None:
-    sources = set([])
+def _constant_inputs(op_or_tensor):
+  return all(_as_operation(i).type == u"Const"
+             and not _as_operation(i).control_inputs
+             for i in _graph_inputs(_as_operation(op_or_tensor)))
+
+
+def _path_from(from_op, tensor, sources):
+  """Find one path from `from_op` to `tensor`, ignoring `sources`.
+
+  Args:
+    from_op: A `tf.Operation`.
+    tensor: A `tf.Operation` or `tf.Tensor`.
+    sources: A list of `tf.Tensor`.
+
+  Returns:
+    A python string containing the path, or "??" if none is found.
+  """
   visited_ops = set([x.op for x in sources])
-  ops_to_visit = [_as_operation(init_tensor)]
-  op_outputs = collections.defaultdict(set)
+  ops_to_visit = [_as_operation(tensor)]
+  some_op_output = {}
   while ops_to_visit:
     op = ops_to_visit.pop()
     if op in visited_ops:
       continue
     visited_ops.add(op)
-    # TODO(apassos) distinguish arg placeholders, capture placeholders,
-    # and placeholders the user might directly use to initialize
-    # variables.
-    if op.type == "Placeholder":
+    if op == from_op:
+      path_op = op
+      path = [path_op]
+      final_op = _as_operation(tensor)
+      while path_op != final_op:
+        path_op = some_op_output[path_op]
+        path.append(path_op)
+      return " <- ".join(["%s (%s)" % (x.name, x.type) for x in reversed(path)])
+    else:
+      for inp in _graph_inputs(op):
+        if inp not in visited_ops and inp not in sources:
+          some_op_output[inp] = op
+          ops_to_visit.append(inp)
+  return "??"
+
+
+def _map_subgraph(init_tensor, sources, disallowed_placeholders, visited_ops,
+                  op_outputs, add_sources):
+  """Walk a Graph and capture the subgraph between init_tensor and sources.
+
+  Note: This function mutates visited_ops and op_outputs.
+
+  Arguments:
+    init_tensor:  A Tensor or Operation where the subgraph terminates.
+    sources:  A set of Tensors where subgraph extraction should stop.
+    disallowed_placeholders: An optional set of ops which may not appear in the
+      lifted graph. Defaults to all placeholders.
+    visited_ops: A set of operations which were visited in a prior pass.
+    op_outputs: A defaultdict containing the outputs of an op which are to be
+      copied into the new subgraph.
+    add_sources: A boolean indicating whether placeholders which are not in
+      sources should be allowed.
+
+  Returns:
+    The set of placeholders upon which init_tensor depends and are not in
+    sources.
+
+  Raises:
+    UnliftableError: if init_tensor depends on a placeholder which is not in
+      sources and add_sources is False.
+  """
+  ops_to_visit = [_as_operation(init_tensor)]
+  extra_sources = set()
+  while ops_to_visit:
+    op = ops_to_visit.pop()
+    if op in visited_ops:
+      continue
+    visited_ops.add(op)
+
+    should_raise = False
+    if disallowed_placeholders is not None and op in disallowed_placeholders:
+      should_raise = True
+    elif op.type == "Placeholder":
+      if disallowed_placeholders is None and not add_sources:
+        should_raise = True
+      extra_sources.update(op.outputs)
+
+    if should_raise:
       raise UnliftableError(
-          "Unable to lift tensor", init_tensor,
-          "because it depends transitively on placeholder ", op)
+          "Unable to lift tensor %s because it depends transitively on "
+          "placeholder %s via at least one path, e.g.: %s"
+          % (repr(init_tensor), repr(op), _path_from(op, init_tensor, sources)))
     for inp in _graph_inputs(op):
       op_outputs[inp].add(op)
-      if inp not in visited_ops and inp not in sources:
+      if inp not in visited_ops and inp not in (sources or extra_sources):
         ops_to_visit.append(inp)
+
+  return extra_sources
+
+
+def _copy_non_source(op, graph, op_map):
+  """Copy an op directly to a given graph.
+
+  This function assumes that all of the inputs to an op have already been
+  copied.
+
+  Args:
+    op: The op to be copied.
+    graph: The destination graph.
+    op_map: A dict mapping ops and tensors in the old graph to the new one.
+  """
+  copied_inputs = [op_map[x] for x in op.inputs]
+  copied_control_inputs = [op_map[x] for x in op.control_inputs]
+  with ops.control_dependencies(copied_control_inputs), ops.device(op.device):
+    copied_op = graph.create_op(
+        op_type=op.type,
+        inputs=copied_inputs,
+        dtypes=[x.dtype for x in op.outputs],
+        attrs=op.node_def.attr,
+        name=op.name)
+  op_map[op] = copied_op
+  for i, o in enumerate(op.outputs):
+    op_map[o] = copied_op.outputs[i]
+
+
+def _copy_source(s, graph, op_map, handle_captures, inverse_captures):
+  """Create a source in a graph based on a Tensor from a different graph.
+
+  This function creates a placeholder analog of `s` in a graph with the
+  following behavior:
+
+  1) If s is a captured Tensor or Variable and handle_captures is set to True,
+     simply capture it in the new graph as well.
+
+  2) If s is a PlaceholderWithDefault whose default is a constant, preserve
+     said default in the new graph.
+
+  3) When applicable, copy resource variable metadata from `s` to the newly
+     created placeholder.
+
+  Args:
+    s: The source of interest.
+    graph: The destination graph.
+    op_map: A dict mapping ops and tensors in the old graph to the new one.
+    handle_captures: A boolean indicating whether to re-capture s in the new
+      graph or simply create a vanilla placeholder.
+    inverse_captures: A dict mapping s back to the Tensor or Variable that it
+      captures.
+  """
+  if handle_captures and s in inverse_captures:
+    copied_placeholder = graph.capture(inverse_captures[s], name=s.op.name)
+  elif s.op.type == "PlaceholderWithDefault" and _constant_inputs(s):
+    # Copy the default value to the graph.
+    default_value = s.op.inputs[0]
+    _copy_non_source(op=default_value.op, graph=graph, op_map=op_map)
+
+    with ops.device(s.op.device):
+      copied_placeholder = array_ops.placeholder_with_default(
+          input=op_map[default_value], shape=s.shape, name=s.op.name)
+  else:
+    with ops.device(s.op.device):
+      copied_placeholder = array_ops.placeholder(
+          dtype=s.dtype, shape=s.shape, name=s.op.name)
+
+  base_handle = resource_variable_ops.get_resource_handle_data(s)
+  if base_handle.shape_and_type:
+    resource_variable_ops._set_handle_shapes_and_types(  # pylint: disable=protected-access
+        copied_placeholder,
+        base_handle,
+        graph_mode=True)
+
+  op_map[s] = copied_placeholder
+
+
+def lift_to_graph(init_tensors, graph, sources=None,
+                  disallowed_placeholders=None, add_sources=False,
+                  handle_captures=False, base_graph=None):
+  """Copies the tensor and all its inputs recursively to the outer graph.
+
+  Args:
+    init_tensors: The Tensor to lift.
+    graph: The graph to lift to.
+    sources: Optional sequence of nodes to start from. If omitted the whole
+      subgraph which feeds into `init_tensor` is lifted.
+    disallowed_placeholders: An optional set of ops which may not appear in the
+      lifted graph. Defaults to all placeholders.
+    add_sources: A boolean indicating whether placeholders which are not in
+      sources should be allowed.
+    handle_captures: A boolean indicating whether to re-capture s in the new
+      graph or simply create a vanilla placeholder.
+    base_graph: The graph from which to lift ops. This will be inferred if not
+      specified.
+
+  Returns:
+    A mapping from ops in the current default graph to ops in `graph`.
+
+  Raises:
+    UnliftableError: If a placeholder blocks lifting.
+  """
+  variable_init_tensors = {i for i in init_tensors if isinstance(
+      i, resource_variable_ops.ResourceVariable)}
+  init_tensors = set(init_tensors).difference(variable_init_tensors)
+  base_graph = base_graph or list(init_tensors)[0].graph
+
+  # Check that the initializer does not depend on any placeholders.
+  sources = set(sources or [])
+  visited_ops = set([x.op for x in sources])
+  op_outputs = collections.defaultdict(set)
+
+  # First we extract the subgraph between init_tensors and sources.
+  for init_tensor in init_tensors:
+    sources.update(_map_subgraph(
+        init_tensor=init_tensor,
+        sources=sources,
+        disallowed_placeholders=disallowed_placeholders,
+        visited_ops=visited_ops,
+        op_outputs=op_outputs,
+        add_sources=add_sources))
+
   # Topologically sort the nodes we've extracted. Now we know how many of their
   # outputs are part of this subgraph.
   ops_to_copy = []
   marked_ops = set([])
-  ops_to_visit = [_as_operation(init_tensor)]
+  ops_to_visit = [_as_operation(t) for t in init_tensors
+                  if not op_outputs[_as_operation(t)]]
   while ops_to_visit:
     op = ops_to_visit.pop()
     if op in marked_ops:
@@ -76,27 +269,34 @@
     marked_ops.add(op)
     ops_to_copy.append(op)
     for inp in _graph_inputs(op):
-      if all(x in marked_ops for x in op_outputs[inp]) and inp not in sources:
+      if (all(x in marked_ops for x in op_outputs[inp]) and
+          inp not in sources):
         ops_to_visit.append(inp)
+
+  # When lifting from one FuncGraph to another, we will need to capture the
+  # relevant tensors as well.
+  captures = collections.OrderedDict()
+  if (isinstance(base_graph, func_graph.FuncGraph) and
+      isinstance(graph, func_graph.FuncGraph)):
+    captures = base_graph.captures
+  inverse_captures = {v: k for k, v in captures.items()}
+
   # ops_to_copy now holds a reverse topologically sorted list of ops which
   # ends in the initializer. We copy those to the outermost graph and
   # build the initialization op there.
   with graph.as_default():
-    op_map = {}
+    op_map = {i: i for i in variable_init_tensors}  # Pass through variables.
     source_ops = set()
     for s in sources:
       source_ops.add(s.op)
-      op_map[s] = array_ops.placeholder(dtype=s.dtype, shape=s.shape)
+      _copy_source(s=s, graph=graph, op_map=op_map,
+                   handle_captures=handle_captures,
+                   inverse_captures=inverse_captures)
+
     for op in reversed(ops_to_copy):
       if op in source_ops:
         continue
-      copied_inputs = [op_map[x] for x in op.inputs]
-      copied_control_inputs = [op_map[x] for x in op.control_inputs]
-      with ops.control_dependencies(copied_control_inputs):
-        copied_op = graph.create_op(
-            op.type, copied_inputs, [x.dtype for x in op.outputs],
-            attrs=op.node_def.attr)
-      op_map[op] = copied_op
-      for i, o in enumerate(op.outputs):
-        op_map[o] = copied_op.outputs[i]
+
+      _copy_non_source(op=op, graph=graph, op_map=op_map)
+
     return op_map

diff --git a/tensorflow/python/eager/profiler.py b/tensorflow/python/eager/profiler.py
index a883122..659c0cc 100644
--- a/tensorflow/python/eager/profiler.py
+++ b/tensorflow/python/eager/profiler.py

@@ -25,6 +25,7 @@
 from tensorflow.python.eager import context
 from tensorflow.python.framework import c_api_util
 from tensorflow.python.platform import gfile
+from tensorflow.python.platform import tf_logging as logging
 
 LOGDIR_PLUGIN = 'plugins/profile'
 
@@ -45,7 +46,17 @@
   if _profiler is not None:
     raise AssertionError('Another profiler is running.')
   with _profiler_lock:
-    _profiler = pywrap_tensorflow.TFE_NewProfiler(context.context()._handle)  # pylint: disable=protected-access
+    profiler_context = pywrap_tensorflow.TFE_NewProfilerContext()
+    if context.default_execution_mode == context.EAGER_MODE:
+      pywrap_tensorflow.TFE_ProfilerContextSetEagerContext(
+          profiler_context,
+          context.context()._handle)  # pylint: disable=protected-access
+    _profiler = pywrap_tensorflow.TFE_NewProfiler(profiler_context)
+    pywrap_tensorflow.TFE_DeleteProfilerContext(profiler_context)
+    if not pywrap_tensorflow.TFE_ProfilerIsOk(_profiler):
+      logging.warning('Another profiler session is running which is probably '
+                      'created by profiler server. Please avoid using profiler '
+                      'server and profiler APIs at the same time.')
 
 
 def stop():
@@ -79,7 +90,8 @@
   """Start a profiler grpc server that listens to given port.
 
   The profiler server will keep the program running even the training finishes.
-  Please shutdown the server with CTRL-C. The service defined in
+  Please shutdown the server with CTRL-C. It can be used in both eager mode and
+  graph mode. The service defined in
   tensorflow/contrib/tpu/profiler/tpu_profiler.proto. Please use
   tensorflow/contrib/tpu/profiler/capture_tpu_profile to capture tracable
   file following https://cloud.google.com/tpu/docs/cloud-tpu-tools#capture_trace
@@ -87,9 +99,13 @@
   Args:
     port: port profiler server listens to.
   """
-  pywrap_tensorflow.TFE_StartProfilerServer(
-      context.context()._handle,  # pylint: disable=protected-access
-      port)
+  profiler_context = pywrap_tensorflow.TFE_NewProfilerContext()
+  if context.default_execution_mode == context.EAGER_MODE:
+    pywrap_tensorflow.TFE_ProfilerContextSetEagerContext(
+        profiler_context,
+        context.context()._handle)  # pylint: disable=protected-access
+  pywrap_tensorflow.TFE_StartProfilerServer(profiler_context, port)
+  pywrap_tensorflow.TFE_DeleteProfilerContext(profiler_context)
 
 
 class Profiler(object):

diff --git a/tensorflow/python/eager/profiler_test.py b/tensorflow/python/eager/profiler_test.py
index 6940f56..ba19e17 100644
--- a/tensorflow/python/eager/profiler_test.py
+++ b/tensorflow/python/eager/profiler_test.py

@@ -18,7 +18,7 @@
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.tpu.profiler import trace_events_pb2
+from tensorflow.core.profiler import trace_events_pb2
 from tensorflow.python.eager import profiler
 from tensorflow.python.eager import test
 from tensorflow.python.framework import constant_op

diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index eb2f28d..350c8d4 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc

@@ -16,7 +16,6 @@
 #include <cstring>
 #include <thread>
 
-#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/python/eager/pywrap_tfe.h"
 
 #include "absl/strings/str_cat.h"
@@ -25,6 +24,7 @@
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/c/eager/c_api_internal.h"
 #include "tensorflow/c/eager/tape.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/compactptrset.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
@@ -264,7 +264,8 @@
 }
 
 bool SetOpAttrList(
-    TFE_Op* op, const char* key, PyObject* py_list, TF_AttrType type,
+    TFE_Context* ctx, TFE_Op* op, const char* key, PyObject* py_list,
+    TF_AttrType type,
     tensorflow::gtl::FlatMap<string, tensorflow::int64>* attr_list_sizes,
     TF_Status* status) {
   if (!PySequence_Check(py_list)) {
@@ -369,6 +370,40 @@
     TFE_OpSetAttrShapeList(op, key, dims.get(), num_dims.get(), num_values,
                            status);
     if (TF_GetCode(status) != TF_OK) return false;
+  } else if (type == TF_ATTR_FUNC) {
+    std::unique_ptr<const TFE_Op*[]> funcs(new const TFE_Op*[num_values]);
+    for (int i = 0; i < num_values; ++i) {
+      tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i));
+      // Allow:
+      // (1) String function name, OR
+      // (2) A Python object with a .name attribute
+      //     (A crude test for being a
+      //     tensorflow.python.framework.function._DefinedFunction)
+      //     (which is what the various "defun" or "Defun" decorators do).
+      // And in the future also allow an object that can encapsulate
+      // the function name and its attribute values.
+      tensorflow::StringPiece func_name;
+      if (!ParseStringValue(key, py_value.get(), status, &func_name)) {
+        PyObject* name_attr = PyObject_GetAttrString(py_value.get(), "name");
+        if (name_attr == nullptr ||
+            !ParseStringValue(key, name_attr, status, &func_name)) {
+          TF_SetStatus(
+              status, TF_INVALID_ARGUMENT,
+              tensorflow::strings::StrCat(
+                  "unable to set function value attribute from a ",
+                  py_value.get()->ob_type->tp_name,
+                  " object. If you think this is an error, please file an "
+                  "issue at "
+                  "https://github.com/tensorflow/tensorflow/issues/new")
+                  .c_str());
+          return false;
+        }
+      }
+      funcs[i] = TFE_NewOp(ctx, func_name.data(), status);
+      if (TF_GetCode(status) != TF_OK) return false;
+    }
+    TFE_OpSetAttrFunctionList(op, key, funcs.get(), num_values);
+    if (TF_GetCode(status) != TF_OK) return false;
   } else {
     TF_SetStatus(status, TF_UNIMPLEMENTED,
                  tensorflow::strings::StrCat("Attr ", key,
@@ -619,7 +654,8 @@
     const TF_AttrType type = TFE_OpGetAttrType(op, key, &is_list, out_status);
     if (TF_GetCode(out_status) != TF_OK) return;
     if (is_list != 0) {
-      if (!SetOpAttrList(op, key, py_value, type, nullptr, out_status)) return;
+      if (!SetOpAttrList(ctx, op, key, py_value, type, nullptr, out_status))
+        return;
     } else {
       if (!SetOpAttrScalar(ctx, op, key, py_value, type, nullptr, out_status))
         return;
@@ -649,7 +685,8 @@
     }
   } else {
     if (is_list != 0) {
-      SetOpAttrList(op, attr_name, attr_value, type, attr_list_sizes, status);
+      SetOpAttrList(ctx, op, attr_name, attr_value, type, attr_list_sizes,
+                    status);
     } else {
       SetOpAttrScalar(ctx, op, attr_name, attr_value, type, attr_list_sizes,
                       status);
@@ -1011,8 +1048,18 @@
   void MarkAsResult(PyObject* gradient) const final { Py_INCREF(gradient); }
 
   PyObject* Zeros(const PyTapeTensor& tensor) const final {
+    if (PyErr_Occurred()) {
+      return nullptr;
+    }
     PyObject* py_shape = tensor.GetShape();
+    if (PyErr_Occurred()) {
+      return nullptr;
+    }
     PyObject* py_dtype = tensor.GetDType();
+    if (PyErr_Occurred()) {
+      Py_DECREF(py_shape);
+      return nullptr;
+    }
     PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype);
     PyObject* result = PyEval_CallObject(zeros_fn_, arg_list);
     Py_DECREF(arg_list);
@@ -1022,6 +1069,9 @@
   }
 
   PyObject* Ones(const PyTapeTensor& tensor) const final {
+    if (PyErr_Occurred()) {
+      return nullptr;
+    }
     PyObject* py_shape = tensor.GetShape();
     PyObject* py_dtype = tensor.GetDType();
     PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype);
@@ -2086,6 +2136,9 @@
         PyBackwardFunction* function =
             new PyBackwardFunction([op_name, attrs, num_inputs, op_inputs,
                                     op_outputs](PyObject* output_grads) {
+              if (PyErr_Occurred()) {
+                return static_cast<PyObject*>(nullptr);
+              }
               tensorflow::Safe_PyObjectPtr callback_args(
                   Py_BuildValue("OOOOOO", op_name, attrs, num_inputs, op_inputs,
                                 op_outputs, output_grads));
@@ -2411,14 +2464,14 @@
 
 bool RunCallbacks(
     const FastPathOpExecInfo& op_exec_info, PyObject* args,
-    const std::vector<tensorflow::Safe_PyObjectPtr>& flattened_inputs,
-    const std::vector<tensorflow::Safe_PyObjectPtr>& flattened_attrs,
+    const std::vector<tensorflow::Safe_PyObjectPtr>* const flattened_inputs,
+    const std::vector<tensorflow::Safe_PyObjectPtr>* const flattened_attrs,
     PyObject* flattened_result) {
   if (!op_exec_info.run_callbacks) return true;
 
-  tensorflow::Safe_PyObjectPtr inputs(PyTuple_New(flattened_inputs.size()));
-  for (int i = 0; i < flattened_inputs.size(); i++) {
-    PyObject* input = flattened_inputs[i].get();
+  tensorflow::Safe_PyObjectPtr inputs(PyTuple_New(flattened_inputs->size()));
+  for (int i = 0; i < flattened_inputs->size(); i++) {
+    PyObject* input = (*flattened_inputs)[i].get();
     Py_INCREF(input);
     PyTuple_SET_ITEM(inputs.get(), i, input);
   }
@@ -2426,7 +2479,7 @@
   int num_non_inferred_attrs = PyTuple_GET_SIZE(args) -
                                op_exec_info.op_def->input_arg_size() -
                                kFastPathExecuteInputStartIndex;
-  int num_attrs = flattened_attrs.size() + num_non_inferred_attrs;
+  int num_attrs = flattened_attrs->size() + num_non_inferred_attrs;
   tensorflow::Safe_PyObjectPtr attrs(PyTuple_New(num_attrs));
 
   for (int i = 0; i < num_non_inferred_attrs; i++) {
@@ -2438,7 +2491,7 @@
   }
   for (int i = num_non_inferred_attrs; i < num_attrs; i++) {
     PyObject* attr_or_name =
-        flattened_attrs.at(i - num_non_inferred_attrs).get();
+        flattened_attrs->at(i - num_non_inferred_attrs).get();
     Py_INCREF(attr_or_name);
     PyTuple_SET_ITEM(attrs.get(), i, attr_or_name);
   }
@@ -2676,9 +2729,10 @@
       for (Py_ssize_t j = 0; j < len; j++) {
         PyObject* py_input = PySequence_Fast_GET_ITEM(input, j);
         tensorflow::Safe_PyObjectPtr py_eager_tensor;
-        if (!ConvertToTensor(op_exec_info, py_input, &py_eager_tensor,
-                             []() { Py_RETURN_NONE; },
-                             [](const TF_DataType& dtype) {}, status)) {
+        if (!ConvertToTensor(
+                op_exec_info, py_input, &py_eager_tensor,
+                []() { Py_RETURN_NONE; }, [](const TF_DataType& dtype) {},
+                status)) {
           return nullptr;
         }
 
@@ -2757,8 +2811,8 @@
     PyList_SET_ITEM(flat_result.get(), i, EagerTensorFromHandle(retvals[i]));
   }
 
-  if (!RunCallbacks(op_exec_info, args, *flattened_inputs, *flattened_attrs,
-                    flat_result.get())) {
+  if (!RunCallbacks(op_exec_info, args, flattened_inputs.get(),
+                    flattened_attrs.get(), flat_result.get())) {
     return nullptr;
   }
 
@@ -2823,10 +2877,13 @@
 const char kTensor[] = "T";
 const char kIndexedSlices[] = "I";
 const char kList[] = "L";
+const char kListEnd[] = "l";
 const char kTuple[] = "U";
+const char kTupleEnd[] = "u";
 const char kDict[] = "D";
 const char kRaw[] = "R";
 const char kShape[] = "s";
+const char kShapeDelim[] = "-";
 const char kDType[] = "d";
 const char kNone[] = "n";
 
@@ -2866,7 +2923,7 @@
 
     absl::StrAppend(&result->str, kShape);
     for (tensorflow::int64 dim_size : tensor_shape.dim_sizes()) {
-      absl::StrAppend(&result->str, dim_size);
+      absl::StrAppend(&result->str, dim_size, kShapeDelim);
     }
 
     return tensorflow::Status::OK();
@@ -2928,6 +2985,7 @@
 
 // This function doesn't set the type of sequence before
 tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type,
+                                         const char* end_type,
                                          EncodeResult* result) {
   tensorflow::Safe_PyObjectPtr arg_seq(
       PySequence_Fast(arg, "unable to create seq from list/tuple"));
@@ -2942,6 +3000,7 @@
       TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(item, result));
     }
   }
+  absl::StrAppend(&result->str, end_type);
 
   return tensorflow::Status::OK();
 }
@@ -2980,9 +3039,9 @@
       TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(dense_shape.get(), result));
     }
   } else if (PyList_Check(arg)) {
-    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kList, result));
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kList, kListEnd, result));
   } else if (PyTuple_Check(arg)) {
-    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kTuple, result));
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kTuple, kTupleEnd, result));
   } else if (PyDict_Check(arg)) {
     tensorflow::Safe_PyObjectPtr keys(PyDict_Keys(arg));
     if (PyList_Sort(keys.get()) == -1) {

diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py
index 0ee2ff6..0d8845b 100644
--- a/tensorflow/python/eager/tensor_test.py
+++ b/tensorflow/python/eager/tensor_test.py

@@ -339,6 +339,24 @@
   def testConvertToTensorAllowsOverflow(self):
     _ = ops.convert_to_tensor(123456789, dtype=dtypes.uint8)
 
+  @test_util.assert_no_new_pyobjects_executing_eagerly
+  @test_util.run_in_graph_and_eager_modes
+  def testConvertToTensorNumpyZeroDim(self):
+    for np_type, dtype in [(np.int32, dtypes.int32),
+                           (np.half, dtypes.half),
+                           (np.float32, dtypes.float32)]:
+      x = ops.convert_to_tensor([np.array(65, dtype=np_type),
+                                 np.array(16, dtype=np_type)])
+      self.assertEqual(x.dtype, dtype)
+      self.assertAllEqual(x, [65, 16])
+
+  @test_util.assert_no_new_pyobjects_executing_eagerly
+  @test_util.run_in_graph_and_eager_modes
+  def testConvertToTensorNumpyScalar(self):
+    x = ops.convert_to_tensor([np.asscalar(np.array(321, dtype=np.int)),
+                               np.asscalar(np.array(16, dtype=np.int))])
+    self.assertAllEqual(x, [321, 16])
+
   def testEagerTensorError(self):
     with self.assertRaisesRegexp(
         TypeError,
@@ -347,7 +365,6 @@
       _ = ops.convert_to_tensor(1., dtype=dtypes.int32)
 
 
-
 class TFETensorUtilTest(test_util.TensorFlowTestCase):
 
   def testListOfThree(self):

diff --git a/tensorflow/python/eager/wrap_function.py b/tensorflow/python/eager/wrap_function.py
index f8fbda8..8eb0200 100644
--- a/tensorflow/python/eager/wrap_function.py
+++ b/tensorflow/python/eager/wrap_function.py

@@ -19,12 +19,15 @@
 from __future__ import division
 from __future__ import print_function
 
+import weakref
+
+from tensorflow.python.eager import def_function
 from tensorflow.python.eager import function
 from tensorflow.python.eager import lift_to_graph
 from tensorflow.python.framework import func_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
@@ -59,7 +62,7 @@
       return self._fn(*args, **kwargs)
 
 
-# TODO(allenl): make this checkpointable
+# TODO(allenl): make this trackable
 class WrappedFunction(function.ConcreteFunction):
   """Wraps a tf V1 piece of code in a function."""
 
@@ -67,6 +70,54 @@
     super(WrappedFunction, self).__init__(
         fn_graph, attrs=attrs, signature=signature)
     self._variable_holder = variable_holder
+    if ops.executing_eagerly_outside_functions():
+      # TODO(allenl): Make this work in 1.x?
+      self._lift_unlifted_variables()
+
+  def _lift_unlifted_variables(self):
+    """Finds resource variables and lifts them into the outer context.
+
+    When we import a GraphDef inside a wrap_function, no Python graph building
+    code runs. This means we get VarHandleOps which create variable resources,
+    but no corresponding Python objects. Leaving them like this works but gives
+    the user no way to interact with or modify the variables outside the graph.
+
+    This method searches for variables and lifts them out as regular variable
+    objects when possible, indicating to the FuncGraph that they are captures.
+    """
+    with self.graph.as_default():
+      collection_variables = (
+          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+          + ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES))
+      existing_captures = set(self.graph.internal_captures)
+      lifted_variables = {}
+      for old_variable in collection_variables:
+        if (old_variable._in_graph_mode  # pylint: disable=protected-access
+            and isinstance(old_variable,
+                           resource_variable_ops.ResourceVariable)):
+          if old_variable.handle in existing_captures:
+            continue
+          new_variable = def_function.UnliftedInitializerVariable(
+              array_ops.placeholder(
+                  name="unused_{}_initializer".format(old_variable.op.name),
+                  shape=old_variable.shape,
+                  dtype=old_variable.dtype),
+              name=old_variable.op.name,
+              trainable=old_variable.trainable)
+          self.graph.captures[new_variable.handle] = old_variable.handle
+          existing_captures.add(old_variable.handle)
+          lifted_variables[old_variable] = new_variable
+          # pylint: disable=protected-access
+          self._variable_holder._variables.append(new_variable)
+          self.graph._weak_variables.append(weakref.ref(new_variable))
+          # pylint: enable=protected-access
+      # Update the graph's collections, partly for the user and partly so this
+      # function is idempotent when it runs again in prune() calls.
+      for collection_name in [ops.GraphKeys.GLOBAL_VARIABLES,
+                              ops.GraphKeys.LOCAL_VARIABLES]:
+        mutable_collection = ops.get_collection_ref(collection_name)
+        for index, current in enumerate(mutable_collection):
+          mutable_collection[index] = lifted_variables.get(current, current)
 
   def prune(self, feeds, fetches):
     flat_feeds, flat_fetches = nest.flatten(feeds), nest.flatten(fetches)
@@ -103,15 +154,16 @@
           sink_tensor = identity_fetches[0]
         else:
           identity_fetches = []
-          sink_tensor = control_flow_ops.no_op()
+          sink_tensor = array_ops.zeros([])
     lift_map = lift_to_graph.lift_to_graph(
-        sink_tensor, pruned_graph,
-        sources=flat_feeds + internal_captures)
+        [sink_tensor], pruned_graph, sources=flat_feeds + internal_captures)
     for original_fetch, identity_fetch in zip(
         tensor_fetches, identity_fetches):
       lift_map[original_fetch] = lift_map[identity_fetch]
     pruned_graph.outputs.extend(
         lift_map[x] for x in flat_fetches if isinstance(x, ops.Tensor))
+    if not tensor_fetches:
+      pruned_graph.outputs.append(lift_map[sink_tensor])
     for external_capture, internal_capture in self.graph.captures.items():
       pruned_graph.captures[external_capture] = lift_map[internal_capture]
     pruned_graph.inputs.extend(lift_map[x] for x in flat_feeds)

diff --git a/tensorflow/python/eager/wrap_function_test.py b/tensorflow/python/eager/wrap_function_test.py
index bb2b288..6225b13 100644
--- a/tensorflow/python/eager/wrap_function_test.py
+++ b/tensorflow/python/eager/wrap_function_test.py

@@ -206,7 +206,7 @@
         fetches=(f_wrapped.graph.get_operation_by_name('increment'),
                  f_wrapped.graph.get_tensor_by_name('other:0')))
     first_output, second_output = increments(constant_op.constant(2))
-    self.assertEqual(['Placeholder:0', 'Placeholder_1:0'],
+    self.assertEqual(['step:0', 'increment/resource:0'],
                      [t.name for t in increments.inputs])
     self.assertIs(None, first_output)
     self.assertEqual(1, second_output.numpy())

diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index 789887e53..8caf46e 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD

@@ -13,6 +13,7 @@
     deps = [
         ":feature_column",
         ":feature_column_v2",
+        ":sequence_feature_column",
         "//tensorflow/python:util",
     ],
 )
@@ -164,3 +165,59 @@
         "no_windows",
     ],
 )
+
+py_library(
+    name = "sequence_feature_column",
+    srcs = ["sequence_feature_column.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":feature_column_v2",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:check_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:variable_scope",
+    ],
+)
+
+tf_py_test(
+    name = "sequence_feature_column_test",
+    srcs = ["sequence_feature_column_test.py"],
+    additional_deps = [
+        ":feature_column_v2",
+        ":feature_column_v2_test",
+        ":sequence_feature_column",
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:training",
+    ],
+    tags = ["no_pip"],
+)
+
+py_test(
+    name = "sequence_feature_column_integration_test",
+    srcs = ["sequence_feature_column_integration_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":feature_column_v2",
+        ":sequence_feature_column",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:util",
+        "//tensorflow/python/keras:layers",
+    ],
+)

diff --git a/tensorflow/python/feature_column/feature_column_lib.py b/tensorflow/python/feature_column/feature_column_lib.py
index 68a2712..1595040 100644
--- a/tensorflow/python/feature_column/feature_column_lib.py
+++ b/tensorflow/python/feature_column/feature_column_lib.py

@@ -21,4 +21,5 @@
 # pylint: disable=unused-import,line-too-long,wildcard-import
 from tensorflow.python.feature_column.feature_column import *
 from tensorflow.python.feature_column.feature_column_v2 import *
+from tensorflow.python.feature_column.sequence_feature_column import *
 # pylint: enable=unused-import,line-too-long

diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index edc3719..3b9f527 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py

@@ -141,11 +141,11 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.keras.engine import training
-from tensorflow.python.keras.engine.base_layer import Layer
 # TODO(b/118385027): Dependency on keras can be problematic if Keras moves out
 # of the main repo.
 from tensorflow.python.keras import utils
+from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
@@ -162,7 +162,7 @@
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.tracking import tracking
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import keras_export
@@ -380,7 +380,7 @@
     return array_ops.concat(output_tensors, -1)
 
 
-@keras_export('keras.layers.DenseFeatures', v1=[])
+@keras_export('keras.layers.DenseFeatures')
 class DenseFeatures(_BaseFeaturesLayer):
   """A layer that produces a dense `Tensor` based on given `feature_columns`.
 
@@ -3192,7 +3192,7 @@
         sparse_tensors,
         weight_collections=weight_collections,
         trainable=trainable)
-    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
+    sequence_length = _sequence_length_from_sparse_tensor(
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
@@ -3228,7 +3228,7 @@
                    '`DenseFeatures` or `LinearModel` instead.')
 
 
-class SharedEmbeddingColumnCreator(tracking.AutoCheckpointable):
+class SharedEmbeddingColumnCreator(tracking.AutoTrackable):
 
   def __init__(self,
                dimension,
@@ -3376,7 +3376,7 @@
                                                    state_manager)
     sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
-    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
+    sequence_length = _sequence_length_from_sparse_tensor(
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
@@ -4426,7 +4426,7 @@
     dense_tensor = transformation_cache.get(self, state_manager)
     sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
-    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
+    sequence_length = _sequence_length_from_sparse_tensor(
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
@@ -4455,7 +4455,7 @@
     # representation created by _transform_feature.
     dense_tensor = inputs.get(self)
     sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
-    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
+    sequence_length = _sequence_length_from_sparse_tensor(
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
@@ -4509,6 +4509,31 @@
                 expected_batch_size, batch_size))
 
 
+def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1):
+  """Returns a [batch_size] Tensor with per-example sequence length."""
+  with ops.name_scope(None, 'sequence_length') as name_scope:
+    row_ids = sp_tensor.indices[:, 0]
+    column_ids = sp_tensor.indices[:, 1]
+    # Add one to convert column indices to element length
+    column_ids += array_ops.ones_like(column_ids)
+    # Get the number of elements we will have per example/row
+    seq_length = math_ops.segment_max(column_ids, segment_ids=row_ids)
+
+    # The raw values are grouped according to num_elements;
+    # how many entities will we have after grouping?
+    # Example: orig tensor [[1, 2], [3]], col_ids = (0, 1, 1),
+    # row_ids = (0, 0, 1), seq_length = [2, 1]. If num_elements = 2,
+    # these will get grouped, and the final seq_length is [1, 1]
+    seq_length = math_ops.cast(
+        math_ops.ceil(seq_length / num_elements), dtypes.int64)
+
+    # If the last n rows do not have ids, seq_length will have shape
+    # [batch_size - n]. Pad the remaining values with zeros.
+    n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1]
+    padding = array_ops.zeros(n_pad, dtype=seq_length.dtype)
+    return array_ops.concat([seq_length, padding], axis=0, name=name_scope)
+
+
 class SequenceCategoricalColumn(
     CategoricalColumn,
     fc_old._SequenceCategoricalColumn,  # pylint: disable=protected-access

diff --git a/tensorflow/python/feature_column/sequence_feature_column.py b/tensorflow/python/feature_column/sequence_feature_column.py
new file mode 100644
index 0000000..bc58c41
--- /dev/null
+++ b/tensorflow/python/feature_column/sequence_feature_column.py

@@ -0,0 +1,588 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""This API defines FeatureColumn for sequential input.
+
+NOTE: This API is a work in progress and will likely be changing frequently.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import collections
+
+
+from tensorflow.python.feature_column import feature_column_v2 as fc
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.util.tf_export import keras_export
+from tensorflow.python.util.tf_export import tf_export
+
+# pylint: disable=protected-access
+
+
+@keras_export('keras.experimental.SequenceFeatures')
+class SequenceFeatures(fc._BaseFeaturesLayer):
+  """A layer for sequence input.
+
+    All `feature_columns` must be sequence dense columns with the same
+    `sequence_length`. The output of this method can be fed into sequence
+    networks, such as RNN.
+
+    The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`.
+    `T` is the maximum sequence length for this batch, which could differ from
+    batch to batch.
+
+    If multiple `feature_columns` are given with `Di` `num_elements` each, their
+    outputs are concatenated. So, the final `Tensor` has shape
+    `[batch_size, T, D0 + D1 + ... + Dn]`.
+
+    Example:
+
+    ```python
+    rating = sequence_numeric_column('rating')
+    watches = sequence_categorical_column_with_identity(
+        'watches', num_buckets=1000)
+    watches_embedding = embedding_column(watches, dimension=10)
+    columns = [rating, watches_embedding]
+
+    sequence_input_layer = SequenceFeatures(columns)
+    features = tf.parse_example(..., features=make_parse_example_spec(columns))
+    sequence_input, sequence_length = sequence_input_layer(features)
+    sequence_length_mask = tf.sequence_mask(sequence_length)
+
+    rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
+    rnn_layer = tf.keras.layers.RNN(rnn_cell)
+    outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
+    ```
+  """
+
+  def __init__(
+      self,
+      feature_columns,
+      trainable=True,
+      name=None,
+      **kwargs):
+    """"Constructs a SequenceFeatures layer.
+
+    Args:
+      feature_columns: An iterable of dense sequence columns. Valid columns are
+        - `embedding_column` that wraps a `sequence_categorical_column_with_*`
+        - `sequence_numeric_column`.
+      trainable: Boolean, whether the layer's variables will be updated via
+        gradient descent during training.
+      name: Name to give to the SequenceFeatures.
+      **kwargs: Keyword arguments to construct a layer.
+
+    Raises:
+      ValueError: If any of the `feature_columns` is not a
+        `SequenceDenseColumn`.
+    """
+    super(SequenceFeatures, self).__init__(
+        feature_columns=feature_columns,
+        trainable=trainable,
+        name=name,
+        expected_column_type=fc.SequenceDenseColumn,
+        **kwargs)
+
+  def _target_shape(self, input_shape, total_elements):
+    return (input_shape[0], input_shape[1], total_elements)
+
+  def call(self, features):
+    """Returns sequence input corresponding to the `feature_columns`.
+
+    Args:
+      features: A dict mapping keys to tensors.
+
+    Returns:
+      An `(input_layer, sequence_length)` tuple where:
+      - input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
+          `T` is the maximum sequence length for this batch, which could differ
+          from batch to batch. `D` is the sum of `num_elements` for all
+          `feature_columns`.
+      - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
+          length for each example.
+
+    Raises:
+      ValueError: If features are not a dictionary.
+    """
+    if not isinstance(features, dict):
+      raise ValueError('We expected a dictionary here. Instead we got: ',
+                       features)
+    transformation_cache = fc.FeatureTransformationCache(features)
+    output_tensors = []
+    sequence_lengths = []
+
+    for column in self._feature_columns:
+      with ops.name_scope(column.name):
+        dense_tensor, sequence_length = column.get_sequence_dense_tensor(
+            transformation_cache, self._state_manager)
+        # Flattens the final dimension to produce a 3D Tensor.
+        output_tensors.append(self._process_dense_tensor(column, dense_tensor))
+        sequence_lengths.append(sequence_length)
+
+    # Check and process sequence lengths.
+    fc._verify_static_batch_size_equality(sequence_lengths,
+                                          self._feature_columns)
+    sequence_length = _assert_all_equal_and_return(sequence_lengths)
+
+    return self._verify_and_concat_tensors(output_tensors), sequence_length
+
+
+def concatenate_context_input(context_input, sequence_input):
+  """Replicates `context_input` across all timesteps of `sequence_input`.
+
+  Expands dimension 1 of `context_input` then tiles it `sequence_length` times.
+  This value is appended to `sequence_input` on dimension 2 and the result is
+  returned.
+
+  Args:
+    context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`.
+    sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size,
+      padded_length, d0]`.
+
+  Returns:
+    A `Tensor` of dtype `float32` and shape `[batch_size, padded_length,
+    d0 + d1]`.
+
+  Raises:
+    ValueError: If `sequence_input` does not have rank 3 or `context_input` does
+      not have rank 2.
+  """
+  seq_rank_check = check_ops.assert_rank(
+      sequence_input,
+      3,
+      message='sequence_input must have rank 3',
+      data=[array_ops.shape(sequence_input)])
+  seq_type_check = check_ops.assert_type(
+      sequence_input,
+      dtypes.float32,
+      message='sequence_input must have dtype float32; got {}.'.format(
+          sequence_input.dtype))
+  ctx_rank_check = check_ops.assert_rank(
+      context_input,
+      2,
+      message='context_input must have rank 2',
+      data=[array_ops.shape(context_input)])
+  ctx_type_check = check_ops.assert_type(
+      context_input,
+      dtypes.float32,
+      message='context_input must have dtype float32; got {}.'.format(
+          context_input.dtype))
+  with ops.control_dependencies(
+      [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]):
+    padded_length = array_ops.shape(sequence_input)[1]
+    tiled_context_input = array_ops.tile(
+        array_ops.expand_dims(context_input, 1),
+        array_ops.concat([[1], [padded_length], [1]], 0))
+  return array_ops.concat([sequence_input, tiled_context_input], 2)
+
+
+@tf_export('feature_column.sequence_categorical_column_with_identity')
+def sequence_categorical_column_with_identity(
+    key, num_buckets, default_value=None):
+  """Returns a feature column that represents sequences of integers.
+
+  Pass this to `embedding_column` or `indicator_column` to convert sequence
+  categorical data into dense representation for input to sequence NN, such as
+  RNN.
+
+  Example:
+
+  ```python
+  watches = sequence_categorical_column_with_identity(
+      'watches', num_buckets=1000)
+  watches_embedding = embedding_column(watches, dimension=10)
+  columns = [watches_embedding]
+
+  features = tf.parse_example(..., features=make_parse_example_spec(columns))
+  sequence_feature_layer = SequenceFeatures(columns)
+  sequence_input, sequence_length = sequence_feature_layer(features)
+  sequence_length_mask = tf.sequence_mask(sequence_length)
+
+  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
+  rnn_layer = tf.keras.layers.RNN(rnn_cell)
+  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
+  ```
+
+  Args:
+    key: A unique string identifying the input feature.
+    num_buckets: Range of inputs. Namely, inputs are expected to be in the
+      range `[0, num_buckets)`.
+    default_value: If `None`, this column's graph operations will fail for
+      out-of-range inputs. Otherwise, this value must be in the range
+      `[0, num_buckets)`, and will replace out-of-range inputs.
+
+  Returns:
+    A `SequenceCategoricalColumn`.
+
+  Raises:
+    ValueError: if `num_buckets` is less than one.
+    ValueError: if `default_value` is not in range `[0, num_buckets)`.
+  """
+  return fc.SequenceCategoricalColumn(
+      fc.categorical_column_with_identity(
+          key=key,
+          num_buckets=num_buckets,
+          default_value=default_value))
+
+
+@tf_export('feature_column.sequence_categorical_column_with_hash_bucket')
+def sequence_categorical_column_with_hash_bucket(
+    key, hash_bucket_size, dtype=dtypes.string):
+  """A sequence of categorical terms where ids are set by hashing.
+
+  Pass this to `embedding_column` or `indicator_column` to convert sequence
+  categorical data into dense representation for input to sequence NN, such as
+  RNN.
+
+  Example:
+
+  ```python
+  tokens = sequence_categorical_column_with_hash_bucket(
+      'tokens', hash_bucket_size=1000)
+  tokens_embedding = embedding_column(tokens, dimension=10)
+  columns = [tokens_embedding]
+
+  features = tf.parse_example(..., features=make_parse_example_spec(columns))
+  sequence_feature_layer = SequenceFeatures(columns)
+  sequence_input, sequence_length = sequence_feature_layer(features)
+  sequence_length_mask = tf.sequence_mask(sequence_length)
+
+  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
+  rnn_layer = tf.keras.layers.RNN(rnn_cell)
+  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
+  ```
+
+  Args:
+    key: A unique string identifying the input feature.
+    hash_bucket_size: An int > 1. The number of buckets.
+    dtype: The type of features. Only string and integer types are supported.
+
+  Returns:
+    A `SequenceCategoricalColumn`.
+
+  Raises:
+    ValueError: `hash_bucket_size` is not greater than 1.
+    ValueError: `dtype` is neither string nor integer.
+  """
+  return fc.SequenceCategoricalColumn(
+      fc.categorical_column_with_hash_bucket(
+          key=key,
+          hash_bucket_size=hash_bucket_size,
+          dtype=dtype))
+
+
+@tf_export('feature_column.sequence_categorical_column_with_vocabulary_file')
+def sequence_categorical_column_with_vocabulary_file(
+    key, vocabulary_file, vocabulary_size=None, num_oov_buckets=0,
+    default_value=None, dtype=dtypes.string):
+  """A sequence of categorical terms where ids use a vocabulary file.
+
+  Pass this to `embedding_column` or `indicator_column` to convert sequence
+  categorical data into dense representation for input to sequence NN, such as
+  RNN.
+
+  Example:
+
+  ```python
+  states = sequence_categorical_column_with_vocabulary_file(
+      key='states', vocabulary_file='/us/states.txt', vocabulary_size=50,
+      num_oov_buckets=5)
+  states_embedding = embedding_column(states, dimension=10)
+  columns = [states_embedding]
+
+  features = tf.parse_example(..., features=make_parse_example_spec(columns))
+  sequence_feature_layer = SequenceFeatures(columns)
+  sequence_input, sequence_length = sequence_feature_layer(features)
+  sequence_length_mask = tf.sequence_mask(sequence_length)
+
+  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
+  rnn_layer = tf.keras.layers.RNN(rnn_cell)
+  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
+  ```
+
+  Args:
+    key: A unique string identifying the input feature.
+    vocabulary_file: The vocabulary file name.
+    vocabulary_size: Number of the elements in the vocabulary. This must be no
+      greater than length of `vocabulary_file`, if less than length, later
+      values are ignored. If None, it is set to the length of `vocabulary_file`.
+    num_oov_buckets: Non-negative integer, the number of out-of-vocabulary
+      buckets. All out-of-vocabulary inputs will be assigned IDs in the range
+      `[vocabulary_size, vocabulary_size+num_oov_buckets)` based on a hash of
+      the input value. A positive `num_oov_buckets` can not be specified with
+      `default_value`.
+    default_value: The integer ID value to return for out-of-vocabulary feature
+      values, defaults to `-1`. This can not be specified with a positive
+      `num_oov_buckets`.
+    dtype: The type of features. Only string and integer types are supported.
+
+  Returns:
+    A `SequenceCategoricalColumn`.
+
+  Raises:
+    ValueError: `vocabulary_file` is missing or cannot be opened.
+    ValueError: `vocabulary_size` is missing or < 1.
+    ValueError: `num_oov_buckets` is a negative integer.
+    ValueError: `num_oov_buckets` and `default_value` are both specified.
+    ValueError: `dtype` is neither string nor integer.
+  """
+  return fc.SequenceCategoricalColumn(
+      fc.categorical_column_with_vocabulary_file(
+          key=key,
+          vocabulary_file=vocabulary_file,
+          vocabulary_size=vocabulary_size,
+          num_oov_buckets=num_oov_buckets,
+          default_value=default_value,
+          dtype=dtype))
+
+
+@tf_export('feature_column.sequence_categorical_column_with_vocabulary_list')
+def sequence_categorical_column_with_vocabulary_list(
+    key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0):
+  """A sequence of categorical terms where ids use an in-memory list.
+
+  Pass this to `embedding_column` or `indicator_column` to convert sequence
+  categorical data into dense representation for input to sequence NN, such as
+  RNN.
+
+  Example:
+
+  ```python
+  colors = sequence_categorical_column_with_vocabulary_list(
+      key='colors', vocabulary_list=('R', 'G', 'B', 'Y'),
+      num_oov_buckets=2)
+  colors_embedding = embedding_column(colors, dimension=3)
+  columns = [colors_embedding]
+
+  features = tf.parse_example(..., features=make_parse_example_spec(columns))
+  sequence_feature_layer = SequenceFeatures(columns)
+  sequence_input, sequence_length = sequence_feature_layer(features)
+  sequence_length_mask = tf.sequence_mask(sequence_length)
+
+  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
+  rnn_layer = tf.keras.layers.RNN(rnn_cell)
+  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
+  ```
+
+  Args:
+    key: A unique string identifying the input feature.
+    vocabulary_list: An ordered iterable defining the vocabulary. Each feature
+      is mapped to the index of its value (if present) in `vocabulary_list`.
+      Must be castable to `dtype`.
+    dtype: The type of features. Only string and integer types are supported.
+      If `None`, it will be inferred from `vocabulary_list`.
+    default_value: The integer ID value to return for out-of-vocabulary feature
+      values, defaults to `-1`. This can not be specified with a positive
+      `num_oov_buckets`.
+    num_oov_buckets: Non-negative integer, the number of out-of-vocabulary
+      buckets. All out-of-vocabulary inputs will be assigned IDs in the range
+      `[len(vocabulary_list), len(vocabulary_list)+num_oov_buckets)` based on a
+      hash of the input value. A positive `num_oov_buckets` can not be specified
+      with `default_value`.
+
+  Returns:
+    A `SequenceCategoricalColumn`.
+
+  Raises:
+    ValueError: if `vocabulary_list` is empty, or contains duplicate keys.
+    ValueError: `num_oov_buckets` is a negative integer.
+    ValueError: `num_oov_buckets` and `default_value` are both specified.
+    ValueError: if `dtype` is not integer or string.
+  """
+  return fc.SequenceCategoricalColumn(
+      fc.categorical_column_with_vocabulary_list(
+          key=key,
+          vocabulary_list=vocabulary_list,
+          dtype=dtype,
+          default_value=default_value,
+          num_oov_buckets=num_oov_buckets))
+
+
+@tf_export('feature_column.sequence_numeric_column')
+def sequence_numeric_column(
+    key,
+    shape=(1,),
+    default_value=0.,
+    dtype=dtypes.float32,
+    normalizer_fn=None):
+  """Returns a feature column that represents sequences of numeric data.
+
+  Example:
+
+  ```python
+  temperature = sequence_numeric_column('temperature')
+  columns = [temperature]
+
+  features = tf.parse_example(..., features=make_parse_example_spec(columns))
+  sequence_feature_layer = SequenceFeatures(columns)
+  sequence_input, sequence_length = sequence_feature_layer(features)
+  sequence_length_mask = tf.sequence_mask(sequence_length)
+
+  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
+  rnn_layer = tf.keras.layers.RNN(rnn_cell)
+  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
+  ```
+
+  Args:
+    key: A unique string identifying the input features.
+    shape: The shape of the input data per sequence id. E.g. if `shape=(2,)`,
+      each example must contain `2 * sequence_length` values.
+    default_value: A single value compatible with `dtype` that is used for
+      padding the sparse data into a dense `Tensor`.
+    dtype: The type of values.
+    normalizer_fn: If not `None`, a function that can be used to normalize the
+      value of the tensor after `default_value` is applied for parsing.
+      Normalizer function takes the input `Tensor` as its argument, and returns
+      the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that
+      even though the most common use case of this function is normalization, it
+      can be used for any kind of Tensorflow transformations.
+
+  Returns:
+    A `SequenceNumericColumn`.
+
+  Raises:
+    TypeError: if any dimension in shape is not an int.
+    ValueError: if any dimension in shape is not a positive integer.
+    ValueError: if `dtype` is not convertible to `tf.float32`.
+  """
+  shape = fc._check_shape(shape=shape, key=key)
+  if not (dtype.is_integer or dtype.is_floating):
+    raise ValueError('dtype must be convertible to float. '
+                     'dtype: {}, key: {}'.format(dtype, key))
+  if normalizer_fn is not None and not callable(normalizer_fn):
+    raise TypeError(
+        'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn))
+
+  return SequenceNumericColumn(
+      key,
+      shape=shape,
+      default_value=default_value,
+      dtype=dtype,
+      normalizer_fn=normalizer_fn)
+
+
+def _assert_all_equal_and_return(tensors, name=None):
+  """Asserts that all tensors are equal and returns the first one."""
+  with ops.name_scope(name, 'assert_all_equal', values=tensors):
+    if len(tensors) == 1:
+      return tensors[0]
+    assert_equal_ops = []
+    for t in tensors[1:]:
+      assert_equal_ops.append(check_ops.assert_equal(tensors[0], t))
+    with ops.control_dependencies(assert_equal_ops):
+      return array_ops.identity(tensors[0])
+
+
+class SequenceNumericColumn(
+    fc.SequenceDenseColumn,
+    collections.namedtuple(
+        'SequenceNumericColumn',
+        ('key', 'shape', 'default_value', 'dtype', 'normalizer_fn'))):
+  """Represents sequences of numeric data."""
+
+  @property
+  def _is_v2_column(self):
+    return True
+
+  @property
+  def name(self):
+    """See `FeatureColumn` base class."""
+    return self.key
+
+  @property
+  def parse_example_spec(self):
+    """See `FeatureColumn` base class."""
+    return {self.key: parsing_ops.VarLenFeature(self.dtype)}
+
+  def transform_feature(self, transformation_cache, state_manager):
+    """See `FeatureColumn` base class.
+
+    In this case, we apply the `normalizer_fn` to the input tensor.
+
+    Args:
+      transformation_cache: A `FeatureTransformationCache` object to access
+        features.
+      state_manager: A `StateManager` to create / access resources such as
+        lookup tables.
+
+    Returns:
+      Normalized input tensor.
+    """
+    input_tensor = transformation_cache.get(self.key, state_manager)
+    if self.normalizer_fn is not None:
+      input_tensor = self.normalizer_fn(input_tensor)
+    return input_tensor
+
+  @property
+  def variable_shape(self):
+    """Returns a `TensorShape` representing the shape of sequence input."""
+    return tensor_shape.TensorShape(self.shape)
+
+  def get_sequence_dense_tensor(self, transformation_cache, state_manager):
+    """Returns a `TensorSequenceLengthPair`.
+
+    Args:
+      transformation_cache: A `FeatureTransformationCache` object to access
+        features.
+      state_manager: A `StateManager` to create / access resources such as
+        lookup tables.
+    """
+    sp_tensor = transformation_cache.get(self, state_manager)
+    dense_tensor = sparse_ops.sparse_tensor_to_dense(
+        sp_tensor, default_value=self.default_value)
+    # Reshape into [batch_size, T, variable_shape].
+    dense_shape = array_ops.concat(
+        [array_ops.shape(dense_tensor)[:1], [-1], self.variable_shape],
+        axis=0)
+    dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape)
+
+    # Get the number of timesteps per example
+    # For the 2D case, the raw values are grouped according to num_elements;
+    # for the 3D case, the grouping happens in the third dimension, and
+    # sequence length is not affected.
+    if sp_tensor.shape.ndims == 2:
+      num_elements = self.variable_shape.num_elements()
+    else:
+      num_elements = 1
+    seq_length = fc._sequence_length_from_sparse_tensor(
+        sp_tensor, num_elements=num_elements)
+
+    return fc.SequenceDenseColumn.TensorSequenceLengthPair(
+        dense_tensor=dense_tensor, sequence_length=seq_length)
+
+  # TODO(b/119409767): Implement parents, _{get,from}_config.
+  @property
+  def parents(self):
+    """See 'FeatureColumn` base class."""
+    raise NotImplementedError()
+
+  def _get_config(self):
+    """See 'FeatureColumn` base class."""
+    raise NotImplementedError()
+
+  @classmethod
+  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+    """See 'FeatureColumn` base class."""
+    raise NotImplementedError()
+
+# pylint: enable=protected-access

diff --git a/tensorflow/python/feature_column/sequence_feature_column_integration_test.py b/tensorflow/python/feature_column/sequence_feature_column_integration_test.py
new file mode 100644
index 0000000..b7c67945
--- /dev/null
+++ b/tensorflow/python/feature_column/sequence_feature_column_integration_test.py

@@ -0,0 +1,283 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Integration test for sequence feature columns with SequenceExamples."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import string
+import tempfile
+
+from google.protobuf import text_format
+
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.feature_column import feature_column_v2 as fc
+from tensorflow.python.feature_column import sequence_feature_column as sfc
+from tensorflow.python.keras.layers import recurrent
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+
+class SequenceFeatureColumnIntegrationTest(test.TestCase):
+
+  def _make_sequence_example(self):
+    example = example_pb2.SequenceExample()
+    example.context.feature['int_ctx'].int64_list.value.extend([5])
+    example.context.feature['float_ctx'].float_list.value.extend([123.6])
+    for val in range(0, 10, 2):
+      feat = feature_pb2.Feature()
+      feat.int64_list.value.extend([val] * val)
+      example.feature_lists.feature_list['int_list'].feature.extend([feat])
+    for val in range(1, 11, 2):
+      feat = feature_pb2.Feature()
+      feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val)
+      example.feature_lists.feature_list['str_list'].feature.extend([feat])
+
+    return example
+
+  def _build_feature_columns(self):
+    col = fc.categorical_column_with_identity('int_ctx', num_buckets=100)
+    ctx_cols = [
+        fc.embedding_column(col, dimension=10),
+        fc.numeric_column('float_ctx')
+    ]
+
+    identity_col = sfc.sequence_categorical_column_with_identity(
+        'int_list', num_buckets=10)
+    bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
+        'bytes_list', hash_bucket_size=100)
+    seq_cols = [
+        fc.embedding_column(identity_col, dimension=10),
+        fc.embedding_column(bucket_col, dimension=20)
+    ]
+
+    return ctx_cols, seq_cols
+
+  def test_sequence_example_into_input_layer(self):
+    examples = [_make_sequence_example().SerializeToString()] * 100
+    ctx_cols, seq_cols = self._build_feature_columns()
+
+    def _parse_example(example):
+      ctx, seq = parsing_ops.parse_single_sequence_example(
+          example,
+          context_features=fc.make_parse_example_spec_v2(ctx_cols),
+          sequence_features=fc.make_parse_example_spec_v2(seq_cols))
+      ctx.update(seq)
+      return ctx
+
+    ds = dataset_ops.Dataset.from_tensor_slices(examples)
+    ds = ds.map(_parse_example)
+    ds = ds.batch(20)
+
+    # Test on a single batch
+    features = ds.make_one_shot_iterator().get_next()
+
+    # Tile the context features across the sequence features
+    sequence_input_layer = sfc.SequenceFeatures(seq_cols)
+    seq_layer, _ = sequence_input_layer(features)
+    input_layer = fc.DenseFeatures(ctx_cols)
+    ctx_layer = input_layer(features)
+    input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer)
+
+    rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10))
+    output = rnn_layer(input_layer)
+
+    with self.cached_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      features_r = sess.run(features)
+      self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6])
+
+      output_r = sess.run(output)
+      self.assertAllEqual(output_r.shape, [20, 10])
+
+
+class SequenceExampleParsingTest(test.TestCase):
+
+  def test_seq_ex_in_sequence_categorical_column_with_identity(self):
+    self._test_parsed_sequence_example(
+        'int_list', sfc.sequence_categorical_column_with_identity,
+        10, [3, 6], [2, 4, 6])
+
+  def test_seq_ex_in_sequence_categorical_column_with_hash_bucket(self):
+    self._test_parsed_sequence_example(
+        'bytes_list', sfc.sequence_categorical_column_with_hash_bucket,
+        10, [3, 4], [compat.as_bytes(x) for x in 'acg'])
+
+  def test_seq_ex_in_sequence_categorical_column_with_vocabulary_list(self):
+    self._test_parsed_sequence_example(
+        'bytes_list', sfc.sequence_categorical_column_with_vocabulary_list,
+        list(string.ascii_lowercase), [3, 4],
+        [compat.as_bytes(x) for x in 'acg'])
+
+  def test_seq_ex_in_sequence_categorical_column_with_vocabulary_file(self):
+    _, fname = tempfile.mkstemp()
+    with open(fname, 'w') as f:
+      f.write(string.ascii_lowercase)
+    self._test_parsed_sequence_example(
+        'bytes_list', sfc.sequence_categorical_column_with_vocabulary_file,
+        fname, [3, 4], [compat.as_bytes(x) for x in 'acg'])
+
+  def _test_parsed_sequence_example(
+      self, col_name, col_fn, col_arg, shape, values):
+    """Helper function to check that each FeatureColumn parses correctly.
+
+    Args:
+      col_name: string, name to give to the feature column. Should match
+        the name that the column will parse out of the features dict.
+      col_fn: function used to create the feature column. For example,
+        sequence_numeric_column.
+      col_arg: second arg that the target feature column is expecting.
+      shape: the expected dense_shape of the feature after parsing into
+        a SparseTensor.
+      values: the expected values at index [0, 2, 6] of the feature
+        after parsing into a SparseTensor.
+    """
+    example = _make_sequence_example()
+    columns = [
+        fc.categorical_column_with_identity('int_ctx', num_buckets=100),
+        fc.numeric_column('float_ctx'),
+        col_fn(col_name, col_arg)
+    ]
+    context, seq_features = parsing_ops.parse_single_sequence_example(
+        example.SerializeToString(),
+        context_features=fc.make_parse_example_spec_v2(columns[:2]),
+        sequence_features=fc.make_parse_example_spec_v2(columns[2:]))
+
+    with self.cached_session() as sess:
+      ctx_result, seq_result = sess.run([context, seq_features])
+      self.assertEqual(list(seq_result[col_name].dense_shape), shape)
+      self.assertEqual(
+          list(seq_result[col_name].values[[0, 2, 6]]), values)
+      self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
+      self.assertEqual(ctx_result['int_ctx'].values[0], 5)
+      self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
+      self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
+
+
+_SEQ_EX_PROTO = """
+context {
+  feature {
+    key: "float_ctx"
+    value {
+      float_list {
+        value: 123.6
+      }
+    }
+  }
+  feature {
+    key: "int_ctx"
+    value {
+      int64_list {
+        value: 5
+      }
+    }
+  }
+}
+feature_lists {
+  feature_list {
+    key: "bytes_list"
+    value {
+      feature {
+        bytes_list {
+          value: "a"
+        }
+      }
+      feature {
+        bytes_list {
+          value: "b"
+          value: "c"
+        }
+      }
+      feature {
+        bytes_list {
+          value: "d"
+          value: "e"
+          value: "f"
+          value: "g"
+        }
+      }
+    }
+  }
+  feature_list {
+    key: "float_list"
+    value {
+      feature {
+        float_list {
+          value: 1.0
+        }
+      }
+      feature {
+        float_list {
+          value: 3.0
+          value: 3.0
+          value: 3.0
+        }
+      }
+      feature {
+        float_list {
+          value: 5.0
+          value: 5.0
+          value: 5.0
+          value: 5.0
+          value: 5.0
+        }
+      }
+    }
+  }
+  feature_list {
+    key: "int_list"
+    value {
+      feature {
+        int64_list {
+          value: 2
+          value: 2
+        }
+      }
+      feature {
+        int64_list {
+          value: 4
+          value: 4
+          value: 4
+          value: 4
+        }
+      }
+      feature {
+        int64_list {
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+        }
+      }
+    }
+  }
+}
+"""
+
+
+def _make_sequence_example():
+  example = example_pb2.SequenceExample()
+  return text_format.Parse(_SEQ_EX_PROTO, example)
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/python/feature_column/sequence_feature_column_test.py b/tensorflow/python/feature_column/sequence_feature_column_test.py
new file mode 100644
index 0000000..0c8f37b
--- /dev/null
+++ b/tensorflow/python/feature_column/sequence_feature_column_test.py

@@ -0,0 +1,1535 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for sequential_feature_column."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.feature_column import feature_column_v2 as fc
+from tensorflow.python.feature_column import feature_column_v2_test as fc_test
+from tensorflow.python.feature_column import sequence_feature_column as sfc
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine.base_layer import Layer
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.platform import test
+
+
+class SequenceFeaturesTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args_a': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
+       'sparse_input_args_b': {
+           # example 0, ids [1]
+           # example 1, ids [2, 0]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 2, 0),
+           'dense_shape': (2, 2)},
+       'expected_input_layer': [
+           # example 0, ids_a [2], ids_b [1]
+           [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
+           # example 1, ids_a [0, 1], ids_b [2, 0]
+           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],],
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'sparse_input_args_a': {
+           # feature 0, ids [[2], [0, 1]]
+           # feature 1, ids [[0, 0], [1]]
+           'indices': (
+               (0, 0, 0), (0, 1, 0), (0, 1, 1),
+               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2, 0, 1, 0, 0, 1),
+           'dense_shape': (2, 2, 2)},
+       'sparse_input_args_b': {
+           # feature 0, ids [[1, 1], [1]]
+           # feature 1, ids [[2], [0]]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (1, 1, 1, 2, 0),
+           'dense_shape': (2, 2, 2)},
+       'expected_input_layer': [
+           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
+           [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]],
+           # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -]
+           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_embedding_column(
+      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
+      expected_sequence_length):
+
+    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
+    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
+    vocabulary_size = 3
+    embedding_dimension_a = 2
+    embedding_values_a = (
+        (1., 2.),  # id 0
+        (3., 4.),  # id 1
+        (5., 6.)  # id 2
+    )
+    embedding_dimension_b = 3
+    embedding_values_b = (
+        (11., 12., 13.),  # id 0
+        (14., 15., 16.),  # id 1
+        (17., 18., 19.)  # id 2
+    )
+    def _get_initializer(embedding_dimension, embedding_values):
+      def _initializer(shape, dtype, partition_info):
+        self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+        self.assertEqual(dtypes.float32, dtype)
+        self.assertIsNone(partition_info)
+        return embedding_values
+      return _initializer
+
+    categorical_column_a = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column_a = fc.embedding_column(
+        categorical_column_a,
+        dimension=embedding_dimension_a,
+        initializer=_get_initializer(embedding_dimension_a, embedding_values_a))
+    categorical_column_b = sfc.sequence_categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    embedding_column_b = fc.embedding_column(
+        categorical_column_b,
+        dimension=embedding_dimension_b,
+        initializer=_get_initializer(embedding_dimension_b, embedding_values_b))
+
+    # Test that columns are reordered alphabetically.
+    sequence_input_layer = sfc.SequenceFeatures(
+        [embedding_column_b, embedding_column_a])
+    input_layer, sequence_length = sequence_input_layer({
+        'aaa': sparse_input_a, 'bbb': sparse_input_b,})
+
+    self.evaluate(variables_lib.global_variables_initializer())
+    weights = sequence_input_layer.weights
+    self.assertCountEqual(
+        ('sequence_features/aaa_embedding/embedding_weights:0',
+         'sequence_features/bbb_embedding/embedding_weights:0'),
+        tuple([v.name for v in weights]))
+    self.assertAllEqual(embedding_values_a, self.evaluate(weights[0]))
+    self.assertAllEqual(embedding_values_b, self.evaluate(weights[1]))
+    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_embedding_column_with_non_sequence_categorical(self):
+    """Tests that error is raised for non-sequence embedding column."""
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=(2, 0, 1),
+        dense_shape=(2, 2))
+
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column_a = fc.embedding_column(
+        categorical_column_a, dimension=2)
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'In embedding_column: aaa_embedding\. categorical_column must be of '
+        r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
+      sequence_input_layer = sfc.SequenceFeatures([embedding_column_a])
+      _, _ = sequence_input_layer({'aaa': sparse_input})
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_shared_embedding_column(self):
+    with ops.Graph().as_default():
+      vocabulary_size = 3
+      sparse_input_a = sparse_tensor.SparseTensorValue(
+          # example 0, ids [2]
+          # example 1, ids [0, 1]
+          indices=((0, 0), (1, 0), (1, 1)),
+          values=(2, 0, 1),
+          dense_shape=(2, 2))
+      sparse_input_b = sparse_tensor.SparseTensorValue(
+          # example 0, ids [1]
+          # example 1, ids [2, 0]
+          indices=((0, 0), (1, 0), (1, 1)),
+          values=(1, 2, 0),
+          dense_shape=(2, 2))
+
+      embedding_dimension = 2
+      embedding_values = (
+          (1., 2.),  # id 0
+          (3., 4.),  # id 1
+          (5., 6.)  # id 2
+      )
+
+      def _get_initializer(embedding_dimension, embedding_values):
+
+        def _initializer(shape, dtype, partition_info):
+          self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+          self.assertEqual(dtypes.float32, dtype)
+          self.assertIsNone(partition_info)
+          return embedding_values
+
+        return _initializer
+
+      expected_input_layer = [
+          # example 0, ids_a [2], ids_b [1]
+          [[5., 6., 3., 4.], [0., 0., 0., 0.]],
+          # example 1, ids_a [0, 1], ids_b [2, 0]
+          [[1., 2., 5., 6.], [3., 4., 1., 2.]],
+      ]
+      expected_sequence_length = [1, 2]
+
+      categorical_column_a = sfc.sequence_categorical_column_with_identity(
+          key='aaa', num_buckets=vocabulary_size)
+      categorical_column_b = sfc.sequence_categorical_column_with_identity(
+          key='bbb', num_buckets=vocabulary_size)
+      # Test that columns are reordered alphabetically.
+      shared_embedding_columns = fc.shared_embedding_columns_v2(
+          [categorical_column_b, categorical_column_a],
+          dimension=embedding_dimension,
+          initializer=_get_initializer(embedding_dimension, embedding_values))
+
+      sequence_input_layer = sfc.SequenceFeatures(shared_embedding_columns)
+      input_layer, sequence_length = sequence_input_layer({
+          'aaa': sparse_input_a, 'bbb': sparse_input_b})
+
+      global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+      self.assertCountEqual(
+          ('aaa_bbb_shared_embedding:0',),
+          tuple([v.name for v in global_vars]))
+      with fc_test._initialized_session() as sess:
+        self.assertAllEqual(embedding_values,
+                            global_vars[0].eval(session=sess))
+        self.assertAllEqual(expected_input_layer,
+                            input_layer.eval(session=sess))
+        self.assertAllEqual(
+            expected_sequence_length, sequence_length.eval(session=sess))
+
+  @test_util.run_deprecated_v1
+  def test_shared_embedding_column_with_non_sequence_categorical(self):
+    """Tests that error is raised for non-sequence shared embedding column."""
+    vocabulary_size = 3
+    sparse_input_a = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=(2, 0, 1),
+        dense_shape=(2, 2))
+    sparse_input_b = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=(2, 0, 1),
+        dense_shape=(2, 2))
+
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    shared_embedding_columns = fc.shared_embedding_columns_v2(
+        [categorical_column_a, categorical_column_b], dimension=2)
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'In embedding_column: aaa_shared_embedding\. categorical_column must '
+        r'be of type SequenceCategoricalColumn to use SequenceFeatures\.'):
+      sequence_input_layer = sfc.SequenceFeatures(shared_embedding_columns)
+      _, _ = sequence_input_layer({'aaa': sparse_input_a,
+                                   'bbb': sparse_input_b})
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args_a': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
+       'sparse_input_args_b': {
+           # example 0, ids [1]
+           # example 1, ids [1, 0]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 1, 0),
+           'dense_shape': (2, 2)},
+       'expected_input_layer': [
+           # example 0, ids_a [2], ids_b [1]
+           [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
+           # example 1, ids_a [0, 1], ids_b [1, 0]
+           [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'sparse_input_args_a': {
+           # feature 0, ids [[2], [0, 1]]
+           # feature 1, ids [[0, 0], [1]]
+           'indices': (
+               (0, 0, 0), (0, 1, 0), (0, 1, 1),
+               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2, 0, 1, 0, 0, 1),
+           'dense_shape': (2, 2, 2)},
+       'sparse_input_args_b': {
+           # feature 0, ids [[1, 1], [1]]
+           # feature 1, ids [[1], [0]]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (1, 1, 1, 1, 0),
+           'dense_shape': (2, 2, 2)},
+       'expected_input_layer': [
+           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
+           [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]],
+           # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -]
+           [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_indicator_column(
+      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
+      expected_sequence_length):
+    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
+    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
+
+    vocabulary_size_a = 3
+    vocabulary_size_b = 2
+
+    categorical_column_a = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size_a)
+    indicator_column_a = fc.indicator_column(categorical_column_a)
+    categorical_column_b = sfc.sequence_categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size_b)
+    indicator_column_b = fc.indicator_column(categorical_column_b)
+    # Test that columns are reordered alphabetically.
+    sequence_input_layer = sfc.SequenceFeatures(
+        [indicator_column_b, indicator_column_a])
+    input_layer, sequence_length = sequence_input_layer({
+        'aaa': sparse_input_a, 'bbb': sparse_input_b})
+
+    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_indicator_column_with_non_sequence_categorical(self):
+    """Tests that error is raised for non-sequence categorical column."""
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=(2, 0, 1),
+        dense_shape=(2, 2))
+
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    indicator_column_a = fc.indicator_column(categorical_column_a)
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'In indicator_column: aaa_indicator\. categorical_column must be of '
+        r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
+      sequence_input_layer = sfc.SequenceFeatures([indicator_column_a])
+      _, _ = sequence_input_layer({'aaa': sparse_input})
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args': {
+           # example 0, values [0., 1]
+           # example 1, [10.]
+           'indices': ((0, 0), (0, 1), (1, 0)),
+           'values': (0., 1., 10.),
+           'dense_shape': (2, 2)},
+       'expected_input_layer': [
+           [[0.], [1.]],
+           [[10.], [0.]]],
+       'expected_sequence_length': [2, 1]},
+      {'testcase_name': '3D',
+       'sparse_input_args': {
+           # feature 0, ids [[20, 3], [5]]
+           # feature 1, ids [[3], [8]]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (20., 3., 5., 3., 8.),
+           'dense_shape': (2, 2, 2)},
+       'expected_input_layer': [
+           [[20.], [3.], [5.], [0.]],
+           [[3.], [0.], [8.], [0.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_numeric_column(
+      self, sparse_input_args, expected_input_layer, expected_sequence_length):
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+
+    numeric_column = sfc.sequence_numeric_column('aaa')
+
+    sequence_input_layer = sfc.SequenceFeatures([numeric_column])
+    input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
+
+    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args': {
+           # example 0, values [0., 1.,  2., 3., 4., 5., 6., 7.]
+           # example 1, [10., 11., 12., 13.]
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
+       'expected_input_layer': [
+           # The output of numeric_column._get_dense_tensor should be flattened.
+           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
+           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
+       'expected_sequence_length': [2, 1]},
+      {'testcase_name': '3D',
+       'sparse_input_args': {
+           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
+           # example 1, [[10., 11., 12., 13.], []]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
+                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 4)},
+       'expected_input_layer': [
+           # The output of numeric_column._get_dense_tensor should be flattened.
+           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
+           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
+       'expected_sequence_length': [2, 1]},
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_numeric_column_multi_dim(
+      self, sparse_input_args, expected_input_layer, expected_sequence_length):
+    """Tests SequenceFeatures for multi-dimensional numeric_column."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+
+    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
+
+    sequence_input_layer = sfc.SequenceFeatures([numeric_column])
+    input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
+
+    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_sequence_length_not_equal(self):
+    """Tests that an error is raised when sequence lengths are not equal."""
+    # Input a with sequence_length = [2, 1]
+    sparse_input_a = sparse_tensor.SparseTensorValue(
+        indices=((0, 0), (0, 1), (1, 0)),
+        values=(0., 1., 10.),
+        dense_shape=(2, 2))
+    # Input b with sequence_length = [1, 1]
+    sparse_input_b = sparse_tensor.SparseTensorValue(
+        indices=((0, 0), (1, 0)),
+        values=(1., 10.),
+        dense_shape=(2, 2))
+    numeric_column_a = sfc.sequence_numeric_column('aaa')
+    numeric_column_b = sfc.sequence_numeric_column('bbb')
+
+    sequence_input_layer = sfc.SequenceFeatures(
+        [numeric_column_a, numeric_column_b])
+
+    with self.assertRaisesRegexp(
+        errors.InvalidArgumentError, r'Condition x == y did not hold.*'):
+      _, sequence_length = sequence_input_layer({
+          'aaa': sparse_input_a, 'bbb': sparse_input_b})
+      self.evaluate(sequence_length)
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args': {
+           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
+           # example 1, [[[10., 11.],  [12., 13.]]]
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
+       'expected_shape': [2, 2, 4]},
+      {'testcase_name': '3D',
+       'sparse_input_args': {
+           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
+           # example 1, [[10., 11., 12., 13.], []]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
+                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 4)},
+       'expected_shape': [2, 2, 4]},
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_static_shape_from_tensors_numeric(
+      self, sparse_input_args, expected_shape):
+    """Tests that we return a known static shape when we have one."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
+
+    sequence_input_layer = sfc.SequenceFeatures([numeric_column])
+    input_layer, _ = sequence_input_layer({'aaa': sparse_input})
+    shape = input_layer.get_shape()
+    self.assertEqual(shape, expected_shape)
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           # example 2, ids []
+           # example 3, ids [1]
+           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
+           'values': (2, 0, 1, 1),
+           'dense_shape': (4, 2)},
+       'expected_shape': [4, 2, 3]},
+      {'testcase_name': '3D',
+       'sparse_input_args': {
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           # example 2, ids []
+           # example 3, ids [[1], [0, 2]]
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           'values': (2, 0, 1, 2, 1, 0, 2),
+           'dense_shape': (4, 2, 2)},
+       'expected_shape': [4, 2, 3]}
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_static_shape_from_tensors_indicator(
+      self, sparse_input_args, expected_shape):
+    """Tests that we return a known static shape when we have one."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+    categorical_column = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    indicator_column = fc.indicator_column(categorical_column)
+
+    sequence_input_layer = sfc.SequenceFeatures([indicator_column])
+    input_layer, _ = sequence_input_layer({'aaa': sparse_input})
+    shape = input_layer.get_shape()
+    self.assertEqual(shape, expected_shape)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_compute_output_shape(self):
+    price1 = sfc.sequence_numeric_column('price1', shape=2)
+    price2 = sfc.sequence_numeric_column('price2')
+    features = {
+        'price1': sparse_tensor.SparseTensor(
+            indices=[[0, 0, 0], [0, 0, 1],
+                     [0, 1, 0], [0, 1, 1],
+                     [1, 0, 0], [1, 0, 1],
+                     [2, 0, 0], [2, 0, 1],
+                     [3, 0, 0], [3, 0, 1]],
+            values=[0., 1., 10., 11., 100., 101., 200., 201., 300., 301.],
+            dense_shape=(4, 3, 2)),
+        'price2': sparse_tensor.SparseTensor(
+            indices=[[0, 0],
+                     [0, 1],
+                     [1, 0],
+                     [2, 0],
+                     [3, 0]],
+            values=[10., 11., 20., 30., 40.],
+            dense_shape=(4, 3))}
+    sequence_features = sfc.SequenceFeatures([price1, price2])
+    seq_input, seq_len = sequence_features(features)
+    self.assertEqual(
+        sequence_features.compute_output_shape((None, None)),
+        (None, None, 3))
+    self.evaluate(variables_lib.global_variables_initializer())
+    self.evaluate(lookup_ops.tables_initializer())
+
+    self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]],
+                         [[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]],
+                         [[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]],
+                         [[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]],
+                        self.evaluate(seq_input))
+    self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len))
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
+  """Tests the utility fn concatenate_context_input."""
+
+  def test_concatenate_context_input(self):
+    seq_input = ops.convert_to_tensor(np.arange(12).reshape(2, 3, 2))
+    context_input = ops.convert_to_tensor(np.arange(10).reshape(2, 5))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    input_layer = sfc.concatenate_context_input(context_input, seq_input)
+
+    expected = np.array([
+        [[0, 1, 0, 1, 2, 3, 4], [2, 3, 0, 1, 2, 3, 4], [4, 5, 0, 1, 2, 3, 4]],
+        [[6, 7, 5, 6, 7, 8, 9], [8, 9, 5, 6, 7, 8, 9], [10, 11, 5, 6, 7, 8, 9]]
+    ], dtype=np.float32)
+    output = self.evaluate(input_layer)
+    self.assertAllEqual(expected, output)
+
+  @parameterized.named_parameters(
+      {'testcase_name': 'rank_lt_3',
+       'seq_input_arg': np.arange(100).reshape(10, 10)},
+      {'testcase_name': 'rank_gt_3',
+       'seq_input_arg': np.arange(100).reshape(5, 5, 2, 2)}
+      )
+  def test_sequence_input_throws_error(self, seq_input_arg):
+    seq_input = ops.convert_to_tensor(seq_input_arg)
+    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(ValueError, 'sequence_input must have rank 3'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
+  @parameterized.named_parameters(
+      {'testcase_name': 'rank_lt_2',
+       'context_input_arg': np.arange(100)},
+      {'testcase_name': 'rank_gt_2',
+       'context_input_arg': np.arange(100).reshape(5, 5, 4)}
+      )
+  def test_context_input_throws_error(self, context_input_arg):
+    context_input = ops.convert_to_tensor(context_input_arg)
+    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(ValueError, 'context_input must have rank 2'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
+  def test_integer_seq_input_throws_error(self):
+    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
+    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(
+        TypeError, 'sequence_input must have dtype float32'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
+  def test_integer_context_input_throws_error(self):
+    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
+    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(
+        TypeError, 'context_input must have dtype float32'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class DenseFeaturesTest(test.TestCase):
+  """Tests DenseFeatures with sequence feature columns."""
+
+  def test_embedding_column(self):
+    """Tests that error is raised for sequence embedding column."""
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=(2, 0, 1),
+        dense_shape=(2, 2))
+
+    categorical_column_a = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column_a = fc.embedding_column(
+        categorical_column_a, dimension=2)
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'In embedding_column: aaa_embedding\. categorical_column must not be '
+        r'of type SequenceCategoricalColumn\.'):
+      input_layer = fc.DenseFeatures([embedding_column_a])
+      _ = input_layer({'aaa': sparse_input})
+
+  def test_indicator_column(self):
+    """Tests that error is raised for sequence indicator column."""
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=(2, 0, 1),
+        dense_shape=(2, 2))
+
+    categorical_column_a = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    indicator_column_a = fc.indicator_column(categorical_column_a)
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'In indicator_column: aaa_indicator\. categorical_column must not be '
+        r'of type SequenceCategoricalColumn\.'):
+      input_layer = fc.DenseFeatures([indicator_column_a])
+      _ = input_layer({'aaa': sparse_input})
+
+
+def _assert_sparse_tensor_value(test_case, expected, actual):
+  _assert_sparse_tensor_indices_shape(test_case, expected, actual)
+
+  test_case.assertEqual(
+      np.array(expected.values).dtype, np.array(actual.values).dtype)
+  test_case.assertAllEqual(expected.values, actual.values)
+
+
+def _assert_sparse_tensor_indices_shape(test_case, expected, actual):
+  test_case.assertEqual(np.int64, np.array(actual.indices).dtype)
+  test_case.assertAllEqual(expected.indices, actual.indices)
+
+  test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype)
+  test_case.assertAllEqual(expected.dense_shape, actual.dense_shape)
+
+
+def _get_sequence_dense_tensor(column, features):
+  return column.get_sequence_dense_tensor(
+      fc.FeatureTransformationCache(features), None)
+
+
+def _get_sequence_dense_tensor_state(column, features):
+  state_manager = fc._StateManagerImpl(Layer(), trainable=True)
+  column.create_state(state_manager)
+  dense_tensor, lengths = column.get_sequence_dense_tensor(
+      fc.FeatureTransformationCache(features), state_manager)
+  return dense_tensor, lengths, state_manager
+
+
+def _get_sparse_tensors(column, features):
+  return column.get_sparse_tensors(
+      fc.FeatureTransformationCache(features), None)
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class SequenceCategoricalColumnWithIdentityTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 2, 0),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           'values': np.array((1, 2, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
+      {'testcase_name': '3D',
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': (6, 7, 8),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': (6, 7, 8),
+           'dense_shape': (2, 2, 2)}}
+      )
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
+    column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9)
+
+    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
+
+    self.assertIsNone(id_weight_pair.weight_tensor)
+    _assert_sparse_tensor_value(
+        self, expected, self.evaluate(id_weight_pair.id_tensor))
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class SequenceCategoricalColumnWithHashBucketTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': ('omar', 'stringer', 'marlo'),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           # Ignored to avoid hash dependence in test.
+           'values': np.array((0, 0, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
+      {'testcase_name': '3D',
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': ('omar', 'stringer', 'marlo'),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           # Ignored to avoid hash dependence in test.
+           'values': np.array((0, 0, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 2)}}
+      )
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
+    column = sfc.sequence_categorical_column_with_hash_bucket(
+        'aaa', hash_bucket_size=10)
+
+    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
+
+    self.assertIsNone(id_weight_pair.weight_tensor)
+    _assert_sparse_tensor_indices_shape(
+        self, expected, self.evaluate(id_weight_pair.id_tensor))
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class SequenceCategoricalColumnWithVocabularyFileTest(
+    test.TestCase, parameterized.TestCase):
+
+  def _write_vocab(self, vocab_strings, file_name):
+    vocab_file = os.path.join(self.get_temp_dir(), file_name)
+    with open(vocab_file, 'w') as f:
+      f.write('\n'.join(vocab_strings))
+    return vocab_file
+
+  def setUp(self):
+    super(SequenceCategoricalColumnWithVocabularyFileTest, self).setUp()
+
+    vocab_strings = ['omar', 'stringer', 'marlo']
+    self._wire_vocabulary_file_name = self._write_vocab(vocab_strings,
+                                                        'wire_vocabulary.txt')
+    self._wire_vocabulary_size = 3
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': ('marlo', 'skywalker', 'omar'),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           'values': np.array((2, -1, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
+      {'testcase_name': '3D',
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': ('omar', 'skywalker', 'marlo'),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': np.array((0, -1, 2), dtype=np.int64),
+           'dense_shape': (2, 2, 2)}}
+      )
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
+    column = sfc.sequence_categorical_column_with_vocabulary_file(
+        key='aaa',
+        vocabulary_file=self._wire_vocabulary_file_name,
+        vocabulary_size=self._wire_vocabulary_size)
+
+    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
+
+    self.assertIsNone(id_weight_pair.weight_tensor)
+    self.evaluate(variables_lib.global_variables_initializer())
+    self.evaluate(lookup_ops.tables_initializer())
+    _assert_sparse_tensor_value(
+        self, expected, self.evaluate(id_weight_pair.id_tensor))
+
+  def test_get_sparse_tensors_dynamic_zero_length(self):
+    """Tests _get_sparse_tensors with a dynamic sequence length."""
+    with ops.Graph().as_default():
+      inputs = sparse_tensor.SparseTensorValue(
+          indices=np.zeros((0, 2)), values=[], dense_shape=(2, 0))
+      expected = sparse_tensor.SparseTensorValue(
+          indices=np.zeros((0, 3)),
+          values=np.array((), dtype=np.int64),
+          dense_shape=(2, 0, 1))
+      column = sfc.sequence_categorical_column_with_vocabulary_file(
+          key='aaa',
+          vocabulary_file=self._wire_vocabulary_file_name,
+          vocabulary_size=self._wire_vocabulary_size)
+      input_placeholder_shape = list(inputs.dense_shape)
+      # Make second dimension (sequence length) dynamic.
+      input_placeholder_shape[1] = None
+      input_placeholder = array_ops.sparse_placeholder(
+          dtypes.string, shape=input_placeholder_shape)
+      id_weight_pair = _get_sparse_tensors(column, {'aaa': input_placeholder})
+
+      self.assertIsNone(id_weight_pair.weight_tensor)
+      with fc_test._initialized_session() as sess:
+        result = id_weight_pair.id_tensor.eval(
+            session=sess, feed_dict={input_placeholder: inputs})
+        _assert_sparse_tensor_value(
+            self, expected, result)
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class SequenceCategoricalColumnWithVocabularyListTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': ('marlo', 'skywalker', 'omar'),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           'values': np.array((2, -1, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
+      {'testcase_name': '3D',
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': ('omar', 'skywalker', 'marlo'),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': np.array((0, -1, 2), dtype=np.int64),
+           'dense_shape': (2, 2, 2)}}
+      )
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
+    column = sfc.sequence_categorical_column_with_vocabulary_list(
+        key='aaa',
+        vocabulary_list=('omar', 'stringer', 'marlo'))
+
+    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
+
+    self.assertIsNone(id_weight_pair.weight_tensor)
+    self.evaluate(variables_lib.global_variables_initializer())
+    self.evaluate(lookup_ops.tables_initializer())
+    _assert_sparse_tensor_value(
+        self, expected, self.evaluate(id_weight_pair.id_tensor))
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class SequenceEmbeddingColumnTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs_args': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           # example 2, ids []
+           # example 3, ids [1]
+           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
+           'values': (2, 0, 1, 1),
+           'dense_shape': (4, 2)},
+       'expected': [
+           # example 0, ids [2]
+           [[7., 11.], [0., 0.]],
+           # example 1, ids [0, 1]
+           [[1., 2.], [3., 5.]],
+           # example 2, ids []
+           [[0., 0.], [0., 0.]],
+           # example 3, ids [1]
+           [[3., 5.], [0., 0.]]]},
+      {'testcase_name': '3D',
+       'inputs_args': {
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           # example 2, ids []
+           # example 3, ids [[1], [0, 2]]
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           'values': (2, 0, 1, 2, 1, 0, 2),
+           'dense_shape': (4, 2, 2)},
+       'expected': [
+           # example 0, ids [[2]]
+           [[7., 11.], [0., 0.]],
+           # example 1, ids [[0, 1], [2]]
+           [[2, 3.5], [7., 11.]],
+           # example 2, ids []
+           [[0., 0.], [0., 0.]],
+           # example 3, ids [[1], [0, 2]]
+           [[3., 5.], [4., 6.5]]]}
+      )
+  def test_get_sequence_dense_tensor(self, inputs_args, expected):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    vocabulary_size = 3
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    categorical_column = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column, dimension=embedding_dimension,
+        initializer=_initializer)
+
+    embedding_lookup, _, state_manager = _get_sequence_dense_tensor_state(
+        embedding_column, {'aaa': inputs})
+
+    variables = state_manager._layer.weights
+    self.evaluate(variables_lib.global_variables_initializer())
+    self.assertCountEqual(
+        ('embedding_weights:0',), tuple([v.name for v in variables]))
+    self.assertAllEqual(embedding_values, self.evaluate(variables[0]))
+    self.assertAllEqual(expected, self.evaluate(embedding_lookup))
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs_args': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'inputs_args': {
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2, 0, 1, 2),
+           'dense_shape': (2, 2, 2)},
+       'expected_sequence_length': [1, 2]}
+      )
+  def test_sequence_length(self, inputs_args, expected_sequence_length):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    vocabulary_size = 3
+
+    categorical_column = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column, dimension=2)
+
+    _, sequence_length, _ = _get_sequence_dense_tensor_state(
+        embedding_column, {'aaa': inputs})
+
+    sequence_length = self.evaluate(sequence_length)
+    self.assertAllEqual(expected_sequence_length, sequence_length)
+    self.assertEqual(np.int64, sequence_length.dtype)
+
+  def test_sequence_length_with_empty_rows(self):
+    """Tests _sequence_length when some examples do not have ids."""
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids []
+        # example 1, ids [2]
+        # example 2, ids [0, 1]
+        # example 3, ids []
+        # example 4, ids [1]
+        # example 5, ids []
+        indices=((1, 0), (2, 0), (2, 1), (4, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(6, 2))
+    expected_sequence_length = [0, 1, 2, 0, 1, 0]
+
+    categorical_column = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column, dimension=2)
+
+    _, sequence_length, _ = _get_sequence_dense_tensor_state(
+        embedding_column, {'aaa': sparse_input})
+
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+
+class SequenceSharedEmbeddingColumnTest(test.TestCase):
+
+  @test_util.run_deprecated_v1
+  def test_get_sequence_dense_tensor(self):
+    vocabulary_size = 3
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    sparse_input_a = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 1), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 2))
+    sparse_input_b = sparse_tensor.SparseTensorValue(
+        # example 0, ids [1]
+        # example 1, ids [0, 2]
+        # example 2, ids [0]
+        # example 3, ids []
+        indices=((0, 0), (1, 0), (1, 1), (2, 0)),
+        values=(1, 0, 2, 0),
+        dense_shape=(4, 2))
+
+    expected_lookups_a = [
+        # example 0, ids [2]
+        [[7., 11.], [0., 0.]],
+        # example 1, ids [0, 1]
+        [[1., 2.], [3., 5.]],
+        # example 2, ids []
+        [[0., 0.], [0., 0.]],
+        # example 3, ids [1]
+        [[3., 5.], [0., 0.]],
+    ]
+
+    expected_lookups_b = [
+        # example 0, ids [1]
+        [[3., 5.], [0., 0.]],
+        # example 1, ids [0, 2]
+        [[1., 2.], [7., 11.]],
+        # example 2, ids [0]
+        [[1., 2.], [0., 0.]],
+        # example 3, ids []
+        [[0., 0.], [0., 0.]],
+    ]
+
+    categorical_column_a = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = sfc.sequence_categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    shared_embedding_columns = fc.shared_embedding_columns_v2(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    embedding_lookup_a = _get_sequence_dense_tensor(
+        shared_embedding_columns[0], {'aaa': sparse_input_a})[0]
+    embedding_lookup_b = _get_sequence_dense_tensor(
+        shared_embedding_columns[1], {'bbb': sparse_input_b})[0]
+
+    self.evaluate(variables_lib.global_variables_initializer())
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('aaa_bbb_shared_embedding:0',),
+                          tuple([v.name for v in global_vars]))
+    self.assertAllEqual(embedding_values, self.evaluate(global_vars[0]))
+    self.assertAllEqual(
+        expected_lookups_a, self.evaluate(embedding_lookup_a))
+    self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b))
+
+  def test_sequence_length(self):
+    with ops.Graph().as_default():
+      vocabulary_size = 3
+
+      sparse_input_a = sparse_tensor.SparseTensorValue(
+          # example 0, ids [2]
+          # example 1, ids [0, 1]
+          indices=((0, 0), (1, 0), (1, 1)),
+          values=(2, 0, 1),
+          dense_shape=(2, 2))
+      expected_sequence_length_a = [1, 2]
+      categorical_column_a = sfc.sequence_categorical_column_with_identity(
+          key='aaa', num_buckets=vocabulary_size)
+
+      sparse_input_b = sparse_tensor.SparseTensorValue(
+          # example 0, ids [0, 2]
+          # example 1, ids [1]
+          indices=((0, 0), (0, 1), (1, 0)),
+          values=(0, 2, 1),
+          dense_shape=(2, 2))
+      expected_sequence_length_b = [2, 1]
+      categorical_column_b = sfc.sequence_categorical_column_with_identity(
+          key='bbb', num_buckets=vocabulary_size)
+      shared_embedding_columns = fc.shared_embedding_columns_v2(
+          [categorical_column_a, categorical_column_b], dimension=2)
+
+      sequence_length_a = _get_sequence_dense_tensor(
+          shared_embedding_columns[0], {'aaa': sparse_input_a})[1]
+      sequence_length_b = _get_sequence_dense_tensor(
+          shared_embedding_columns[1], {'bbb': sparse_input_b})[1]
+
+      with fc_test._initialized_session() as sess:
+        sequence_length_a = sess.run(sequence_length_a)
+        self.assertAllEqual(expected_sequence_length_a, sequence_length_a)
+        self.assertEqual(np.int64, sequence_length_a.dtype)
+        sequence_length_b = sess.run(sequence_length_b)
+        self.assertAllEqual(expected_sequence_length_b, sequence_length_b)
+        self.assertEqual(np.int64, sequence_length_b.dtype)
+
+  def test_sequence_length_with_empty_rows(self):
+    """Tests _sequence_length when some examples do not have ids."""
+    with ops.Graph().as_default():
+      vocabulary_size = 3
+      sparse_input_a = sparse_tensor.SparseTensorValue(
+          # example 0, ids []
+          # example 1, ids [2]
+          # example 2, ids [0, 1]
+          # example 3, ids []
+          # example 4, ids [1]
+          # example 5, ids []
+          indices=((1, 0), (2, 0), (2, 1), (4, 0)),
+          values=(2, 0, 1, 1),
+          dense_shape=(6, 2))
+      expected_sequence_length_a = [0, 1, 2, 0, 1, 0]
+      categorical_column_a = sfc.sequence_categorical_column_with_identity(
+          key='aaa', num_buckets=vocabulary_size)
+
+      sparse_input_b = sparse_tensor.SparseTensorValue(
+          # example 0, ids [2]
+          # example 1, ids []
+          # example 2, ids []
+          # example 3, ids []
+          # example 4, ids [1]
+          # example 5, ids [0, 1]
+          indices=((0, 0), (4, 0), (5, 0), (5, 1)),
+          values=(2, 1, 0, 1),
+          dense_shape=(6, 2))
+      expected_sequence_length_b = [1, 0, 0, 0, 1, 2]
+      categorical_column_b = sfc.sequence_categorical_column_with_identity(
+          key='bbb', num_buckets=vocabulary_size)
+
+      shared_embedding_columns = fc.shared_embedding_columns_v2(
+          [categorical_column_a, categorical_column_b], dimension=2)
+
+      sequence_length_a = _get_sequence_dense_tensor(
+          shared_embedding_columns[0], {'aaa': sparse_input_a})[1]
+      sequence_length_b = _get_sequence_dense_tensor(
+          shared_embedding_columns[1], {'bbb': sparse_input_b})[1]
+
+      with fc_test._initialized_session() as sess:
+        self.assertAllEqual(
+            expected_sequence_length_a, sequence_length_a.eval(session=sess))
+        self.assertAllEqual(
+            expected_sequence_length_b, sequence_length_b.eval(session=sess))
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs_args': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           # example 2, ids []
+           # example 3, ids [1]
+           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
+           'values': (2, 0, 1, 1),
+           'dense_shape': (4, 2)},
+       'expected': [
+           # example 0, ids [2]
+           [[0., 0., 1.], [0., 0., 0.]],
+           # example 1, ids [0, 1]
+           [[1., 0., 0.], [0., 1., 0.]],
+           # example 2, ids []
+           [[0., 0., 0.], [0., 0., 0.]],
+           # example 3, ids [1]
+           [[0., 1., 0.], [0., 0., 0.]]]},
+      {'testcase_name': '3D',
+       'inputs_args': {
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           # example 2, ids []
+           # example 3, ids [[1], [2, 2]]
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           'values': (2, 0, 1, 2, 1, 2, 2),
+           'dense_shape': (4, 2, 2)},
+       'expected': [
+           # example 0, ids [[2]]
+           [[0., 0., 1.], [0., 0., 0.]],
+           # example 1, ids [[0, 1], [2]]
+           [[1., 1., 0.], [0., 0., 1.]],
+           # example 2, ids []
+           [[0., 0., 0.], [0., 0., 0.]],
+           # example 3, ids [[1], [2, 2]]
+           [[0., 1., 0.], [0., 0., 2.]]]}
+      )
+  def test_get_sequence_dense_tensor(self, inputs_args, expected):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    vocabulary_size = 3
+
+    categorical_column = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    indicator_column = fc.indicator_column(categorical_column)
+
+    indicator_tensor, _ = _get_sequence_dense_tensor(
+        indicator_column, {'aaa': inputs})
+
+    self.assertAllEqual(expected, self.evaluate(indicator_tensor))
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs_args': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'inputs_args': {
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2, 0, 1, 2),
+           'dense_shape': (2, 2, 2)},
+       'expected_sequence_length': [1, 2]}
+      )
+  def test_sequence_length(self, inputs_args, expected_sequence_length):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    vocabulary_size = 3
+
+    categorical_column = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    indicator_column = fc.indicator_column(categorical_column)
+
+    _, sequence_length = _get_sequence_dense_tensor(
+        indicator_column, {'aaa': inputs})
+
+    sequence_length = self.evaluate(sequence_length)
+    self.assertAllEqual(expected_sequence_length, sequence_length)
+    self.assertEqual(np.int64, sequence_length.dtype)
+
+  def test_sequence_length_with_empty_rows(self):
+    """Tests _sequence_length when some examples do not have ids."""
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids []
+        # example 1, ids [2]
+        # example 2, ids [0, 1]
+        # example 3, ids []
+        # example 4, ids [1]
+        # example 5, ids []
+        indices=((1, 0), (2, 0), (2, 1), (4, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(6, 2))
+    expected_sequence_length = [0, 1, 2, 0, 1, 0]
+
+    categorical_column = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    indicator_column = fc.indicator_column(categorical_column)
+
+    _, sequence_length = _get_sequence_dense_tensor(
+        indicator_column, {'aaa': sparse_input})
+
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
+
+  def test_defaults(self):
+    a = sfc.sequence_numeric_column('aaa')
+    self.assertEqual('aaa', a.key)
+    self.assertEqual('aaa', a.name)
+    self.assertEqual((1,), a.shape)
+    self.assertEqual(0., a.default_value)
+    self.assertEqual(dtypes.float32, a.dtype)
+    self.assertIsNone(a.normalizer_fn)
+
+  def test_shape_saved_as_tuple(self):
+    a = sfc.sequence_numeric_column('aaa', shape=[1, 2])
+    self.assertEqual((1, 2), a.shape)
+
+  def test_shape_must_be_positive_integer(self):
+    with self.assertRaisesRegexp(TypeError, 'shape dimensions must be integer'):
+      sfc.sequence_numeric_column('aaa', shape=[1.0])
+
+    with self.assertRaisesRegexp(
+        ValueError, 'shape dimensions must be greater than 0'):
+      sfc.sequence_numeric_column('aaa', shape=[0])
+
+  def test_dtype_is_convertible_to_float(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'dtype must be convertible to float'):
+      sfc.sequence_numeric_column('aaa', dtype=dtypes.string)
+
+  def test_normalizer_fn_must_be_callable(self):
+    with self.assertRaisesRegexp(TypeError, 'must be a callable'):
+      sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable')
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs_args': {
+           # example 0, values [0., 1]
+           # example 1, [10.]
+           'indices': ((0, 0), (0, 1), (1, 0)),
+           'values': (0., 1., 10.),
+           'dense_shape': (2, 2)},
+       'expected': [
+           [[0.], [1.]],
+           [[10.], [0.]]]},
+      {'testcase_name': '3D',
+       'inputs_args': {
+           # feature 0, ids [[20, 3], [5]]
+           # feature 1, ids [[3], [8]]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (20, 3, 5., 3., 8.),
+           'dense_shape': (2, 2, 2)},
+       'expected': [
+           [[20.], [3.], [5.], [0.]],
+           [[3.], [0.], [8.], [0.]]]},
+      )
+  def test_get_sequence_dense_tensor(self, inputs_args, expected):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    numeric_column = sfc.sequence_numeric_column('aaa')
+
+    dense_tensor, _ = _get_sequence_dense_tensor(
+        numeric_column, {'aaa': inputs})
+    self.assertAllEqual(expected, self.evaluate(dense_tensor))
+
+  def test_get_sequence_dense_tensor_with_normalizer_fn(self):
+
+    def _increment_two(input_sparse_tensor):
+      return sparse_ops.sparse_add(
+          input_sparse_tensor,
+          sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2))
+      )
+
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, values [[0.], [1]]
+        # example 1, [[10.]]
+        indices=((0, 0), (0, 1), (1, 0)),
+        values=(0., 1., 10.),
+        dense_shape=(2, 2))
+
+    # Before _increment_two:
+    #   [[0.], [1.]],
+    #   [[10.], [0.]],
+    # After _increment_two:
+    #   [[2.], [1.]],
+    #   [[10.], [2.]],
+    expected_dense_tensor = [
+        [[2.], [1.]],
+        [[10.], [2.]],
+    ]
+    numeric_column = sfc.sequence_numeric_column(
+        'aaa', normalizer_fn=_increment_two)
+
+    dense_tensor, _ = _get_sequence_dense_tensor(
+        numeric_column, {'aaa': sparse_input})
+
+    self.assertAllEqual(
+        expected_dense_tensor, self.evaluate(dense_tensor))
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args': {
+           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
+           # example 1, [[[10., 11.],  [12., 13.]]]
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
+       'expected_dense_tensor': [
+           [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
+           [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]},
+      {'testcase_name': '3D',
+       'sparse_input_args': {
+           'indices': ((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6),
+                       (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6),
+                       (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 8)},
+       'expected_dense_tensor': [
+           [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]],
+            [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]],
+           [[[10., 0.], [11., 0.]], [[12., 0.], [13., 0.]],
+            [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]},
+      )
+  def test_get_dense_tensor_multi_dim(
+      self, sparse_input_args, expected_dense_tensor):
+    """Tests get_sequence_dense_tensor for multi-dim numeric_column."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
+
+    dense_tensor, _ = _get_sequence_dense_tensor(
+        numeric_column, {'aaa': sparse_input})
+
+    self.assertAllEqual(
+        expected_dense_tensor, self.evaluate(dense_tensor))
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs_args': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2., 0., 1.),
+           'dense_shape': (2, 2)},
+       'expected_sequence_length': [1, 2],
+       'shape': (1,)},
+      {'testcase_name': '3D',
+       'inputs_args': {
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2., 0., 1., 2.),
+           'dense_shape': (2, 2, 2)},
+       'expected_sequence_length': [1, 2],
+       'shape': (1,)},
+      {'testcase_name': '2D_with_shape',
+       'inputs_args': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2., 0., 1.),
+           'dense_shape': (2, 2)},
+       'expected_sequence_length': [1, 1],
+       'shape': (2,)},
+      {'testcase_name': '3D_with_shape',
+       'inputs_args': {
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2., 0., 1., 2.),
+           'dense_shape': (2, 2, 2)},
+       'expected_sequence_length': [1, 2],
+       'shape': (2,)},
+      )
+  def test_sequence_length(self, inputs_args, expected_sequence_length, shape):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    numeric_column = sfc.sequence_numeric_column('aaa', shape=shape)
+
+    _, sequence_length = _get_sequence_dense_tensor(
+        numeric_column, {'aaa': inputs})
+
+    sequence_length = self.evaluate(sequence_length)
+    self.assertAllEqual(expected_sequence_length, sequence_length)
+    self.assertEqual(np.int64, sequence_length.dtype)
+
+  def test_sequence_length_with_empty_rows(self):
+    """Tests _sequence_length when some examples do not have ids."""
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, values []
+        # example 1, values [[0.], [1.]]
+        # example 2, [[2.]]
+        # example 3, values []
+        # example 4, [[3.]]
+        # example 5, values []
+        indices=((1, 0), (1, 1), (2, 0), (4, 0)),
+        values=(0., 1., 2., 3.),
+        dense_shape=(6, 2))
+    expected_sequence_length = [0, 2, 1, 0, 1, 0]
+    numeric_column = sfc.sequence_numeric_column('aaa')
+
+    _, sequence_length = _get_sequence_dense_tensor(
+        numeric_column, {'aaa': sparse_input})
+
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/python/framework/auto_control_deps.py b/tensorflow/python/framework/auto_control_deps.py
index 6210010..a8ba4ea 100644
--- a/tensorflow/python/framework/auto_control_deps.py
+++ b/tensorflow/python/framework/auto_control_deps.py

@@ -32,12 +32,65 @@
 # Op types that should not run in program order, e.g. because they need to run
 # asynchronously to avoid deadlock.
 ASYNC_STATEFUL_OPS = [
+    "CollectiveGather",
     "CollectiveReduce",
     "CollectiveBcastSend",
     "CollectiveBcastRecv",
     "NcclAllReduce",
 ]
 
+LEGACY_RANDOM_OPS = [
+    # These may be used in variable initializers -- thus their execution should
+    # not be dependent on other stateful operations.  This is because although
+    # according to program order, tf.Variables may be created in sequence,
+    # their initialization happens outside of the program order (specifically,
+    # in graph mode their initialization happens by calling a grouped
+    # initializer operation or in eager mode, where initialization is lifted
+    # out of the tf.function and executed the first time the function is
+    # executed).
+    #
+    # Unless there is a specific dependency between the initializers
+    # themselves (e.g. one initializer depends on a Variable whose value depends
+    # on another initializer), the initialization can happen in any order so
+    # long as it's before the associated Variable read operations.
+    #
+    # Note that in general the randomness of legacy random operations is only
+    # guaranteed by providing a graph-level and op-level seed (and ordering of
+    # the same op across multiple iterations of a while_loop is specifically not
+    # guaranteed; see the discussion below).
+    #
+    # There is a possible race condition inside while_loop where the same
+    # random OpKernel instantiation is reused across multiple steps
+    # of the loop.  Since legacy Random OpKernels have an internal rng state,
+    # automatic dependency tracking across loop steps would likely
+    # fix this race; and for that case this blacklist is problematic.
+    # However, since automatic dependency tracking inside while loops is not
+    # currently supported, and there are no other examples of OpKernel reuse
+    # (each OpKernel is associated with a unique op in graph mode),
+    # this blacklist has no effect on the aforementioned behavior.
+    #
+    # TODO(ebrevdo,skyewm): Modify the check against this blacklist to
+    # only occur when the op is inside a "variable initialization scope"; and
+    # add proper autodeps inside while_loops that respects this updated check.
+    "RandomUniform",
+    "RandomUniformInt",
+    "RandomStandardNormal",
+    "ParameterizedTruncatedNormal",
+    "TruncatedNormal",
+    "RandomShuffle",
+    "Multinomial",
+    "RandomGamma",
+    "RandomGammaGrad",
+    "RandomPoisson",
+    "RandomPoissonV2",
+]
+
+_ALL_BLACKLISTED_OPS = set(ASYNC_STATEFUL_OPS) | set(LEGACY_RANDOM_OPS)
+
+
+def op_is_stateful(op_def):
+  return op_def.is_stateful and op_def.name not in _ALL_BLACKLISTED_OPS
+
 
 class AutomaticControlDependencies(object):
   """Context manager to automatically add control dependencies.
@@ -45,7 +98,7 @@
   Code under this context manager will act as if a sensible set of control
   dependencies were present. More specifically:
     1. All stateful ops in the scope will execute (with the exception of ops in
-       ASYNC_STATEFUL_OPS)
+       ASYNC_STATEFUL_OPS and LEGACY_RANDOM_OPS)
     2. Stateful ops which modify the same resource will execute in program order
 
   Note: creating variables in an automatic control dependencies context is not
@@ -57,6 +110,7 @@
 
   def __init__(self):
     self._returned_tensors = set()
+    self.ops_which_must_run = set()
 
   def mark_as_return(self, tensor):
     """Acts like identity but marks the `Tensor` as a return value.
@@ -233,8 +287,7 @@
       control_inputs = set()
       # Ensure stateful ops run
       if (op.type not in self._graph._registered_ops  # pylint: disable=protected-access
-          or (self._graph._registered_ops[op.type].is_stateful   # pylint: disable=protected-access
-              and op.type not in ASYNC_STATEFUL_OPS)):
+          or op_is_stateful(self._graph._registered_ops[op.type])):  # pylint: disable=protected-access
         ops_which_must_run.add(op)
       # Ignore switches (they're handled separately)
       if op.type == "Switch" and op.inputs[0].dtype == dtypes_module.resource:
@@ -271,8 +324,8 @@
         if inp in merge_for_resource:
           merge_for_resource[inp]._add_control_input(op)  # pylint: disable=protected-access
         last_op_using_resource_tensor[inp] = op
-      if (op.op_def.is_stateful and op.type not in ASYNC_STATEFUL_OPS
-          and not found_resource and op._control_flow_context is None):  # pylint: disable=protected-access
+      if (op_is_stateful(op.op_def) and not found_resource
+          and op._control_flow_context is None):  # pylint: disable=protected-access
         if None in last_op_using_resource_tensor:
           op._add_control_input(last_op_using_resource_tensor[None])  # pylint: disable=protected-access
         last_op_using_resource_tensor[None] = op
@@ -281,10 +334,11 @@
       op._add_control_inputs(control_inputs)  # pylint: disable=protected-access
 
     # Ensure all ops which must run do run
+    self.ops_which_must_run.update(ops_which_must_run)
     for r in self._returned_tensors:
-      if ops_which_must_run:
+      if self.ops_which_must_run:
         r.op._add_control_inputs(  # pylint: disable=protected-access
-            [o for o in ops_which_must_run
+            [o for o in self.ops_which_must_run
              if o._control_flow_context is r.op._control_flow_context])  # pylint: disable=protected-access
 
 

diff --git a/tensorflow/python/framework/auto_control_deps_test.py b/tensorflow/python/framework/auto_control_deps_test.py
index 2c25ab1..d9df96f 100644
--- a/tensorflow/python/framework/auto_control_deps_test.py
+++ b/tensorflow/python/framework/auto_control_deps_test.py

@@ -25,7 +25,9 @@
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras.layers import core as keras_core
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -296,6 +298,30 @@
 
     self.assertEqual(self.evaluate(outer()), 2.0)
 
+  def testVariableInitializersCanBeLifted(self):
+    # The initializer is a stateful op, but using it inside a function should
+    # *not* create additional dependencies.  That's what we're testing.
+    layer = keras_core.Dense(1, kernel_initializer="glorot_uniform")
+
+    @def_function.function
+    def fn(x):
+      # Stateful operation
+      control_flow_ops.Assert(x, ["Error"])
+      # Variable initialization should be lifted.  Prior to the change that
+      # added this test, the lifting would crash because of an auto control dep
+      # added on `x`.  Note, the error did not happen if we
+      # manually created a tf.Variable outside of function and used it
+      # here.  Alternatively, creating a tf.Variable inside fn() causes
+      # a different sort of error that is out of scope for this test.
+      return layer(ops.convert_to_tensor([[1.0, 1.0]]))
+
+    true = ops.convert_to_tensor(True)
+
+    concrete = fn.get_concrete_function(
+        tensor_spec.TensorSpec(shape=(), dtype=dtypes.bool))
+    self.evaluate(concrete(true))
+    self.evaluate(fn(True))
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()

diff --git a/tensorflow/python/framework/convert_to_constants.py b/tensorflow/python/framework/convert_to_constants.py
new file mode 100644
index 0000000..f96e599
--- /dev/null
+++ b/tensorflow/python/framework/convert_to_constants.py

@@ -0,0 +1,142 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Helpers to convert variables to constants in TensorFlow 2.0."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.core.framework import graph_pb2
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import meta_graph_pb2
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.grappler import tf_optimizer
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training.saver import export_meta_graph
+
+
+def _run_inline_graph_optimization(func):
+  """Apply function inline optimization to the graph.
+
+  Returns the GraphDef after Grappler's function inlining optimization is
+  applied. This optimization does not work on models with control flow.
+
+  Args:
+    func: ConcreteFunction.
+
+  Returns:
+    GraphDef
+  """
+  meta_graph = export_meta_graph(
+      graph_def=func.graph.as_graph_def(), graph=func.graph)
+
+  # Add a collection 'train_op' so that Grappler knows the outputs.
+  fetch_collection = meta_graph_pb2.CollectionDef()
+  for array in func.inputs + func.outputs:
+    fetch_collection.node_list.value.append(array.name)
+  meta_graph.collection_def["train_op"].CopyFrom(fetch_collection)
+
+  # Initialize RewriterConfig with everything disabled except function inlining.
+  config = config_pb2.ConfigProto()
+  rewrite_options = config.graph_options.rewrite_options
+  rewrite_options.optimizers.append("function")
+  return tf_optimizer.OptimizeGraph(config, meta_graph)
+
+
+def convert_variables_to_constants_v2(func):
+  """Replaces all the variables in a graph with constants of the same values.
+
+  TensorFlow 2.0 function for converting all Variable ops into Const ops holding
+  the same values. This makes it possible to describe the network fully with a
+  single GraphDef file, and allows the removal of a lot of ops related to
+  loading and saving the variables. This function runs Grappler's function
+  inlining optimization in order to return a single subgraph.
+
+  The current implementation only works for graphs that do not contain any
+  control flow or embedding related ops.
+
+  Args:
+    func: ConcreteFunction.
+
+  Returns:
+    GraphDef containing a simplified version of the original.
+  """
+  # TODO(nupurgarg): Replace ResourceGather with Gather.
+  # TODO(nupurgarg): Change attr for Variables in control flow and functions.
+  graph_def = _run_inline_graph_optimization(func)
+
+  # Identify the ReadVariableOps.
+  get_name = lambda name: name.split(":")[0]
+  map_name_to_node = {get_name(node.name): node for node in graph_def.node}
+
+  variables_generator = func.graph.variables
+  resource_identities = {}
+  resource_placeholders = {}
+  for node in graph_def.node:
+    if node.op == "ReadVariableOp":
+      # Get name of Placeholder op associated with ReadVariableOp. There can be
+      # an Identity in between the ReadVariableOp and Placeholder. Store the
+      # Identity ops with the associated dtypes.
+      input_name = get_name(node.input[0])
+      while map_name_to_node[input_name].op == "Identity":
+        resource_identities[input_name] = node.attr["dtype"]
+        input_name = get_name(map_name_to_node[input_name].input[0])
+      if map_name_to_node[input_name].op != "Placeholder":
+        raise ValueError("Cannot find the Placeholder op that is an input "
+                         "to the ReadVariableOp.")
+      # Build a map of Placeholder ops that are inputs to ReadVariableOps to the
+      # variable's dtype and data.
+      # TODO(nupurgarg): Confirm relationship between variables in
+      # `func.graph.variables` and ReadVariableOps in `graph_def.nodes`.
+      resource_placeholders[input_name] = {
+          "dtype": node.attr["dtype"],
+          "data": next(variables_generator).numpy(),
+      }
+
+  # Reconstruct the graph with constants in place of variables.
+  output_graph_def = graph_pb2.GraphDef()
+  how_many_converted = 0
+
+  for input_node in graph_def.node:
+    output_node = output_graph_def.node.add()
+    # Convert Placeholder ops that are inputs to ReadVariableOps into Const ops.
+    if input_node.name in resource_placeholders:
+      dtype = resource_placeholders[input_node.name]["dtype"]
+      data = resource_placeholders[input_node.name]["data"]
+
+      output_node.op = "Const"
+      output_node.name = input_node.name
+      output_node.attr["dtype"].CopyFrom(dtype)
+      output_node.attr["value"].tensor.CopyFrom(
+          tensor_util.make_tensor_proto(
+              data, dtype=dtype.type, shape=data.shape))
+      how_many_converted += 1
+    # Change the dtype for Identity ops that are inputs to ReadVariableOps.
+    elif input_node.name in resource_identities:
+      output_node.CopyFrom(input_node)
+      output_node.attr["T"].CopyFrom(resource_identities[input_node.name])
+    # Convert ReadVariableOps into Identity ops.
+    elif input_node.op == "ReadVariableOp":
+      output_node.op = "Identity"
+      output_node.name = input_node.name
+      output_node.input.extend([input_node.input[0]])
+      output_node.attr["T"].CopyFrom(input_node.attr["dtype"])
+      if "_class" in input_node.attr:
+        output_node.attr["_class"].CopyFrom(input_node.attr["_class"])
+    else:
+      output_node.CopyFrom(input_node)
+
+  logging.info("Converted %d variables to const ops.", how_many_converted)
+  return output_graph_def

diff --git a/tensorflow/python/framework/convert_to_constants_test.py b/tensorflow/python/framework/convert_to_constants_test.py
new file mode 100644
index 0000000..bd28dba
--- /dev/null
+++ b/tensorflow/python/framework/convert_to_constants_test.py

@@ -0,0 +1,191 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for convert_to_constants.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from tensorflow.python.client import session
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import convert_to_constants
+from tensorflow.python.framework import importer
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model.load import load
+from tensorflow.python.saved_model.save import save
+from tensorflow.python.training.tracking import tracking
+
+
+class VariablesToConstantsTest(test.TestCase):
+
+  def _hasStatefulPartitionedCallOp(self, graph_def):
+    """Determines if a StatefulPartitionedCall op exists in the graph."""
+    for node in graph_def.node:
+      if node.op == "StatefulPartitionedCall":
+        return True
+    return False
+
+  def _getNumVariables(self, graph_def):
+    """Returns the number of ReadVariableOp in the graph."""
+    return sum(node.op == "ReadVariableOp" for node in graph_def.node)
+
+  def _getTensors(self, sess, tensor_list):
+    """Returns a list of Tensor objects from the Session."""
+    return [
+        sess.graph.get_tensor_by_name(tensor.name) for tensor in tensor_list
+    ]
+
+  def _evaluateGraphDef(self, graph_def, func, input_data):
+    """Evaluates the GraphDef using Sessions."""
+    with ops.Graph().as_default() as graph:
+      importer.import_graph_def(graph_def, name="")
+      func.add_to_graph(graph)
+      sess = session.Session(graph=graph)
+
+    input_tensors = self._getTensors(sess, func.inputs)
+    output_tensors = self._getTensors(sess, func.outputs)
+    return sess.run(
+        output_tensors, feed_dict=dict(zip(input_tensors, input_data)))
+
+  @test_util.run_v2_only
+  def testConstSavedModel(self):
+    """Test a basic model with functions to make sure functions are inlined."""
+    input_data = constant_op.constant(1., shape=[1])
+    root = tracking.AutoTrackable()
+    root.f = def_function.function(lambda x: 2. * x)
+    to_save = root.f.get_concrete_function(input_data)
+
+    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
+    save(root, save_dir, to_save)
+    saved_model = load(save_dir)
+    concrete_func = saved_model.signatures["serving_default"]
+
+    variable_graph_def = concrete_func.graph.as_graph_def()
+    self.assertEqual(0, self._getNumVariables(variable_graph_def))
+    self.assertTrue(variable_graph_def.library.function)
+
+    constant_graph_def = convert_to_constants.convert_variables_to_constants_v2(
+        concrete_func)
+    self.assertEqual(0, self._getNumVariables(constant_graph_def))
+    self.assertFalse(constant_graph_def.library.function)
+
+    # Check value.
+    expected_value = root.f(input_data)
+    actual_value = self._evaluateGraphDef(constant_graph_def, concrete_func,
+                                          [input_data.numpy()])
+    self.assertEqual(expected_value.numpy(), actual_value)
+
+  @test_util.run_v2_only
+  def testVariableModel(self):
+    """Test a basic model with Variables."""
+    input_data = constant_op.constant(1., shape=[1])
+    root = tracking.AutoTrackable()
+    root.v1 = variables.Variable(3.)
+    root.v2 = variables.Variable(2.)
+    root.f = def_function.function(lambda x: root.v1 * root.v2 * x)
+    concrete_func = root.f.get_concrete_function(input_data)
+
+    variable_graph_def = concrete_func.graph.as_graph_def()
+    self.assertEqual(2, self._getNumVariables(variable_graph_def))
+
+    constant_graph_def = convert_to_constants.convert_variables_to_constants_v2(
+        concrete_func)
+    self.assertEqual(0, self._getNumVariables(constant_graph_def))
+    self.assertFalse(self._hasStatefulPartitionedCallOp(constant_graph_def))
+
+    # Check value.
+    expected_value = root.f(input_data)
+    actual_value = self._evaluateGraphDef(constant_graph_def, concrete_func,
+                                          [input_data.numpy()])
+    self.assertEqual(expected_value.numpy(), actual_value)
+
+  @test_util.run_v2_only
+  def testVariableSavedModel(self):
+    """Test a basic model with Variables with saving/loading the SavedModel."""
+    input_data = constant_op.constant(1., shape=[1])
+    root = tracking.AutoTrackable()
+    root.v1 = variables.Variable(3.)
+    root.v2 = variables.Variable(2.)
+    root.f = def_function.function(lambda x: root.v1 * root.v2 * x)
+    to_save = root.f.get_concrete_function(input_data)
+
+    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
+    save(root, save_dir, to_save)
+    saved_model = load(save_dir)
+    concrete_func = saved_model.signatures["serving_default"]
+
+    variable_graph_def = concrete_func.graph.as_graph_def()
+    self.assertTrue(self._hasStatefulPartitionedCallOp(variable_graph_def))
+
+    constant_graph_def = convert_to_constants.convert_variables_to_constants_v2(
+        concrete_func)
+    self.assertEqual(0, self._getNumVariables(constant_graph_def))
+    self.assertFalse(self._hasStatefulPartitionedCallOp(constant_graph_def))
+
+    # Check value.
+    expected_value = root.f(input_data)
+    actual_value = self._evaluateGraphDef(constant_graph_def, concrete_func,
+                                          [input_data.numpy()])
+    self.assertEqual(expected_value.numpy(), actual_value)
+
+  @test_util.run_v2_only
+  def testMultiFunctionModel(self):
+    """Test a basic model with Variables."""
+
+    class BasicModel(tracking.AutoTrackable):
+
+      def __init__(self):
+        self.y = None
+        self.z = None
+
+      @def_function.function
+      def add(self, x):
+        if self.y is None:
+          self.y = variables.Variable(2.)
+        return x + self.y
+
+      @def_function.function
+      def sub(self, x):
+        if self.z is None:
+          self.z = variables.Variable(3.)
+        return x - self.z
+
+    input_data = constant_op.constant(1., shape=[1])
+    root = BasicModel()
+    concrete_func = root.add.get_concrete_function(input_data)
+
+    variable_graph_def = concrete_func.graph.as_graph_def()
+    self.assertEqual(1, self._getNumVariables(variable_graph_def))
+
+    constant_graph_def = convert_to_constants.convert_variables_to_constants_v2(
+        concrete_func)
+    self.assertEqual(0, self._getNumVariables(constant_graph_def))
+    self.assertFalse(self._hasStatefulPartitionedCallOp(constant_graph_def))
+
+    # Check value.
+    expected_value = root.add(input_data)
+    actual_value = self._evaluateGraphDef(constant_graph_def, concrete_func,
+                                          [input_data.numpy()])
+    self.assertEqual(expected_value.numpy(), actual_value)
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/framework/error_interpolation.py b/tensorflow/python/framework/error_interpolation.py
index 2d1b210..b671dfb 100644
--- a/tensorflow/python/framework/error_interpolation.py
+++ b/tensorflow/python/framework/error_interpolation.py

@@ -41,6 +41,8 @@
 _BAD_FILE_SUBSTRINGS = [
     os.path.join("tensorflow", "python"),
     os.path.join("tensorflow", "contrib"),
+    os.path.join("tensorflow_estimator", "python"),
+    os.path.join("tensorflow_estimator", "contrib"),
     "<embedded",
 ]
 
@@ -222,16 +224,25 @@
 
   Returns:
     A list of line name and lineno pairs. Below is an example of returned list:
-    [("tool_utils.py", "124"), ("tool_utils.py", "21"), ....]
+    [("tool_utils.py", "124", "func1", "a={}"), ("tool_utils.py", "21", "func2",
+    "for i in range(10):"), ....]
   """
   defining_frame_index = _find_index_of_defining_frame_for_op(op)
   stack_trace = []
-  # the stack trace is collected from the defining (included) to the outermost.
-  for index in reversed(range(defining_frame_index + 1)):
+  # The stack trace is collected from the defining (included) to the outermost.
+  # Include `frame_num` frames at most.
+  # Two lines from the TensorFlow library are included to show the node
+  # definition.
+  frame_num = 10
+  innermost_excluded = min(defining_frame_index + 2 + 1, len(op.traceback))
+  outermost_included = max(innermost_excluded - frame_num, 0)
+  for index in reversed(range(outermost_included, innermost_excluded)):
     frame = op.traceback[index]
     filename = frame[tf_stack.TB_FILENAME]
     lineno = frame[tf_stack.TB_LINENO]
-    stack_trace.append((filename, lineno))
+    func = frame[tf_stack.TB_FUNCNAME]
+    code = frame[tf_stack.TB_CODEDICT]
+    stack_trace.append((filename, lineno, func, code))
   return stack_trace
 
 
@@ -312,21 +323,18 @@
   return os.path.split(os.path.commonprefix(list(files)))[0]
 
 
-def _sources_for_node(name, graph):
-  """Gets the top-level root input nodes for 'name' node.
-
-  We recursively traverse the graph from 'name' node to its inputs and collect
-  all the nodes which don't have any inputs.
+def _sources_for_node(node, graph):
+  """Gets the input op nodes for 'node'.
 
   Args:
-    name: The name of the node.
+    node: The node.
     graph: The graph containing the node.
 
   Returns:
-    The unique top-level root input nodes.
+    The unique input nodes.
   """
-  def _helper(name, graph, seen_names, inputs):
-    """Recursive helper. 'seen_names' and 'inputs' are mutated."""
+  inputs = set()
+  for name in node.node_def.input:
     if name.startswith("^"):
       name = name[1:]
     try:
@@ -336,20 +344,9 @@
       try:
         op = graph.get_operation_by_name(name)
       except KeyError:
-        return
-    name = op.name
-    if name in seen_names:
-      return
-    seen_names.add(name)
-    if not op.node_def.input:
-      inputs.add(op)
-      return
-    for n in op.node_def.input:
-      _helper(n, graph, seen_names, inputs)
+        continue
+    inputs.add(op)
 
-  names = set()
-  inputs = set()
-  _helper(name, graph, names, inputs)
   return list(inputs)
 
 
@@ -413,7 +410,7 @@
     if op is None:
       tagged_ops.append(None)
     else:
-      tagged_ops.append([op] + _sources_for_node(op.name, graph))
+      tagged_ops.append([op] + _sources_for_node(op, graph))
 
   common_prefix = traceback_files_common_prefix(tagged_ops)
   for tag, ops in zip(tags, tagged_ops):

diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py
index 3f81466..c9bc913 100644
--- a/tensorflow/python/framework/func_graph.py
+++ b/tensorflow/python/framework/func_graph.py

@@ -23,6 +23,7 @@
 
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.python.eager import context
+from tensorflow.python.eager import execute
 from tensorflow.python.eager import tape
 from tensorflow.python.eager.graph_only_ops import graph_placeholder
 from tensorflow.python.framework import dtypes
@@ -135,9 +136,11 @@
     captures: Maps external tensor -> internal tensor (i.e. input placeholder).
       The entries are in the order they were captured.
     seed: The graph-level random seed.
+    capture_by_value: If True, the func graph will capture Variables by value
+      instead of reference.
   """
 
-  def __init__(self, name, collections=None):
+  def __init__(self, name, collections=None, capture_by_value=None):
     """Construct a new FuncGraph.
 
     The graph will inherit its graph key, collections, seed, and distribution
@@ -152,17 +155,29 @@
         The current whitelisted collections are the global variables, the
         local variables, and the trainable variables.
         Defaults to None.
+      capture_by_value: An optional boolean. If True, the func graph will
+        capture Variables by value instead of reference. By default inherit
+        from outer graphs, and failing that will default to False.
     """
     super(FuncGraph, self).__init__()
 
     self.name = name
     self.inputs = []
     self.outputs = []
+    self.control_outputs = []
     self.structured_input_signature = None
     self.structured_outputs = None
     self._weak_variables = []
     self.outer_graph = ops.get_default_graph()
     self.captures = py_collections.OrderedDict()
+    # Inherit capture-by-value from outer graph.
+    if capture_by_value is not None:
+      self.capture_by_value = capture_by_value
+    elif self.outer_graph is not None and isinstance(
+        self.outer_graph, FuncGraph):
+      self.capture_by_value = self.outer_graph.capture_by_value
+    else:
+      self.capture_by_value = False
 
     self._building_function = True
     # Map from resource tensor name to last op (in program order) which uses
@@ -195,6 +210,9 @@
     else:
       self._collections = collections
 
+  def __str__(self):
+    return "FuncGraph(name=%s, id=%s)" % (self.name, id(self))
+
   def as_default(self):
     outer_cm = super(FuncGraph, self).as_default()
 
@@ -278,11 +296,39 @@
   def variables(self, var_list):
     self._weak_variables = [weakref.ref(v) for v in var_list]
 
+  def _capture_by_value(
+      self,
+      op_type,
+      inputs,
+      dtypes,  # pylint: disable=redefined-outer-name
+      input_types=None,
+      name=None,
+      attrs=None,
+      op_def=None,
+      compute_shapes=True,
+      compute_device=True):
+    # When capturing by value, do the read outside
+    reverse_captures = dict((v, k) for k, v in self.captures.items())
+    uncaptured_inputs = [reverse_captures.get(t, t) for t in inputs]
+    with ops.init_scope():
+      if context.executing_eagerly():
+        attr_list = ("dtype", int(attrs["dtype"].type))
+        value, = execute.execute(
+            compat.as_bytes(op_type), 1, uncaptured_inputs, attr_list,
+            context.context())
+      else:
+        op = ops.get_default_graph().create_op(
+            op_type, uncaptured_inputs, dtypes, input_types, name, attrs,
+            op_def, compute_shapes, compute_device)
+        value = op.outputs[0]
+    captured_value = self.capture(value)
+    return captured_value.op
+
   def create_op(
       self,
       op_type,
       inputs,
-      dtypes,
+      dtypes,  # pylint: disable=redefined-outer-name
       input_types=None,
       name=None,
       attrs=None,
@@ -321,6 +367,12 @@
     Returns:
       An `Operation` object.
     """
+    if self.capture_by_value and op_type in ["ReadVariableOp",
+                                             "ResourceGather"]:
+      return self._capture_by_value(
+          op_type, inputs, dtypes, input_types, name, attrs, op_def,
+          compute_shapes, compute_device)
+
     # This capturing logic interacts poorly with control flow contexts which
     # want to replace inputs of ops far too late in the process. This can lead
     # the context to get confused and try to create an Enter for an Enter. We
@@ -366,6 +418,19 @@
     if tensor.graph is not self:
       if name is None:
         name = tensor.op.name
+      inner_graph = tensor.graph
+      while inner_graph is not None and isinstance(inner_graph, FuncGraph):
+        if inner_graph is self:
+          raise ValueError(
+              "Trying to capture a tensor from an inner function. This can be "
+              "caused by accessing a tensor defined inside a loop or "
+              "conditional body, or a subfunction, from a calling function, "
+              "without going through the proper return value mechanism. "
+              "Consider using TensorFlow mechanisms such as TensorArrays "
+              "to return tensors from inner functions or loop / conditional "
+              "bodies. Tensor: %s; tensor graph: %s; this graph: %s"
+              % (tensor, tensor.graph, self))
+        inner_graph = inner_graph.outer_graph
       return self._capture_helper(tensor, name)
     return tensor
 
@@ -402,7 +467,8 @@
                             add_control_dependencies=True,
                             arg_names=None,
                             op_return_value=None,
-                            collections=None):
+                            collections=None,
+                            capture_by_value=None):
   """Returns a `FuncGraph` generated from `python_func`.
 
   Args:
@@ -438,6 +504,9 @@
       The current whitelisted collections are the global variables, the
       local variables, and the trainable variables.
       Defaults to None.
+    capture_by_value: An optional boolean. If True, the func graph will capture
+      Variables by value instead of reference. By default inherit from outer
+      graphs, and failing that will default to False.
 
   Returns:
     A FuncGraph.
@@ -449,7 +518,8 @@
   if op_return_value is not None:
     assert isinstance(op_return_value, ops.Tensor), op_return_value
   if func_graph is None:
-    func_graph = FuncGraph(name, collections=collections)
+    func_graph = FuncGraph(name, collections=collections,
+                           capture_by_value=capture_by_value)
   assert isinstance(func_graph, FuncGraph)
   if add_control_dependencies:
     control_manager = AutomaticControlDependencies
@@ -476,12 +546,19 @@
         convert_structure_to_signature(func_args, arg_names),
         convert_structure_to_signature(func_kwargs))
 
+    flat_func_args = nest.flatten(func_args)
+    flat_func_kwargs = nest.flatten(func_kwargs)
+    # Temporarily set inputs to allow graph building code to inspect
+    # them. Reassigned below.
+    func_graph.inputs = [arg for arg in flat_func_args + flat_func_kwargs
+                         if isinstance(arg, ops.Tensor)]
+
     # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`.
     # Variables to help check whether mutation happens in calling the function
     # Copy the recursive list, tuple and map structure, but not base objects
-    func_args_before = nest.pack_sequence_as(func_args, nest.flatten(func_args))
+    func_args_before = nest.pack_sequence_as(func_args, flat_func_args)
     func_kwargs_before = nest.pack_sequence_as(
-        func_kwargs, nest.flatten(func_kwargs))
+        func_kwargs, flat_func_kwargs)
 
     def convert(x):
       """Converts a function output to a Tensor."""
@@ -527,7 +604,7 @@
                   strip_decorators=(def_function.function,),
                   optional_features=autograph_options,
                   force_conversion=True,
-              ), *args, **kwargs)
+              ), args, kwargs)
 
         # Wrapping around a decorator allows checks like tf_inspect.getargspec
         # to be accurate.
@@ -572,6 +649,8 @@
         func_graph.capture(x)
         for x in flatten(func_graph.structured_outputs)
         if x is not None)
+    if add_control_dependencies:
+      func_graph.control_outputs.extend(a.ops_which_must_run)
 
     func_graph.variables = variables
 
@@ -709,9 +788,15 @@
           requested_name = arg.name
         else:
           requested_name = name
-        placeholder = graph_placeholder(
-            arg.dtype, arg.shape,
-            name=requested_name)
+
+        try:
+          placeholder = graph_placeholder(
+              arg.dtype, arg.shape,
+              name=requested_name)
+        except ValueError:
+          # Sometimes parameter names are not valid op names, so fall back to
+          # unnamed placeholders.
+          placeholder = graph_placeholder(arg.dtype, arg.shape)
         if name is not None:
           # Record the requested/user-specified name in case it's different than
           # the uniquified name, for validation when exporting signatures.

diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 291986a..7002f16 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py

@@ -129,13 +129,15 @@
   def __call__(self, func):
     # Various sanity checks on the callable func.
     if not callable(func):
-      raise ValueError("func %s must be callable" % func)
+      raise ValueError("function %s must be callable" % func)
 
     # Func should not use kwargs and defaults.
     argspec = tf_inspect.getargspec(func)
     if argspec.keywords or argspec.defaults:
-      raise ValueError("Functions with argument defaults or keywords "
-                       "arguments are not supported.")
+      raise ValueError(
+          "function with argument defaults or keywords arguments are not"
+          " supported. {} has defaults {} and keywords {}.".format(
+              func, argspec.defaults, argspec.keywords))
 
     # Computes how many arguments 'func' has.
     min_args = len(argspec.args)
@@ -412,6 +414,8 @@
           [t._as_tf_output() for t in temp_graph.inputs],
           [t._as_tf_output() for t in temp_graph.outputs],
           output_names,
+          [], # control_outputs
+          [], # control_output_names
           None,  # opts
           description)
       self._c_func = c_api_util.ScopedTFFunction(c_func)

diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 7543376..cd62322 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py

@@ -284,6 +284,7 @@
         out, = sess.run(dlogits, {logits: x, labels: y})
       self.assertAllClose(out, np.exp(prob - y))
 
+  @test_util.disable_xla("b/124286351")  # No error is raised
   def testCustomGradientError(self):
     dtype = dtypes.float32
 

diff --git a/tensorflow/python/framework/graph_util_impl.py b/tensorflow/python/framework/graph_util_impl.py
index a46fccc..f0cd164 100644
--- a/tensorflow/python/framework/graph_util_impl.py
+++ b/tensorflow/python/framework/graph_util_impl.py

@@ -197,7 +197,7 @@
 
 @deprecation.deprecated(
     date=None,
-    instructions="Use tf.compat.v1.graph_util.remove_training_nodes")
+    instructions="Use tf.compat.v1.graph_util.tensor_shape_from_node_def_name")
 @tf_export(v1=["graph_util.tensor_shape_from_node_def_name"])
 def tensor_shape_from_node_def_name(graph, input_name):
   """Convenience function to get a shape from a NodeDef's input string."""
@@ -353,19 +353,27 @@
     nodes_after_removal.append(new_node)
 
   types_to_splice = {"Identity": True}
+  control_input_names = set()
+  node_names_with_control_input = set()
+  for node in nodes_after_removal:
+    for node_input in node.input:
+      if "^" in node_input:
+        control_input_names.add(node_input.replace("^", ""))
+        node_names_with_control_input.add(node.name)
+
   names_to_splice = {}
   for node in nodes_after_removal:
     if node.op in types_to_splice and node.name not in protected_nodes:
       # We don't want to remove nodes that have control edge inputs, because
       # they might be involved in subtle dependency issues that removing them
       # will jeopardize.
-      has_control_edge = False
-      for input_name in node.input:
-        if re.match(r"^\^", input_name):
-          has_control_edge = True
-      if not has_control_edge:
+      if node.name not in node_names_with_control_input:
         names_to_splice[node.name] = node.input[0]
 
+  # We also don't want to remove nodes which are used as control edge inputs.
+  names_to_splice = {name: value for name, value in names_to_splice.items()
+                     if name not in control_input_names}
+
   nodes_after_splicing = []
   for node in nodes_after_removal:
     if node.name in names_to_splice:

diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py
index dd26b8a..78777dc 100644
--- a/tensorflow/python/framework/graph_util_test.py
+++ b/tensorflow/python/framework/graph_util_test.py

@@ -308,8 +308,9 @@
       new_node.input.extend([input_name])
     return new_node
 
-  def create_constant_node_def(self, name, value, dtype, shape=None):
-    node = self.create_node_def("Const", name, [])
+  def create_constant_node_def(self, name, value, dtype,
+                               shape=None, inputs=None):
+    node = self.create_node_def("Const", name, inputs or [])
     self.set_attr_dtype(node, "dtype", dtype)
     self.set_attr_tensor(node, "value", value, dtype, shape)
     return node
@@ -393,6 +394,18 @@
     self.assertProtoEquals(expected_graph_def,
                            graph_util.remove_training_nodes(graph_def))
 
+  def testRemoveIdentityUsedAsControlInputInConst(self):
+    """Check that Identity nodes used as control inputs are not removed."""
+    graph_def = graph_pb2.GraphDef()
+    graph_def.node.extend([
+        self.create_constant_node_def("C", 1, dtypes.float32, inputs=["^I"]),
+        self.create_node_def("Identity", "I", ["Base"]),
+        self.create_node_def("BaseOp", "Base", [])
+    ])
+
+    self.assertProtoEquals(graph_def,
+                           graph_util.remove_training_nodes(graph_def))
+
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py
index 0febab5..fc566ce 100644
--- a/tensorflow/python/framework/meta_graph.py
+++ b/tensorflow/python/framework/meta_graph.py

@@ -538,7 +538,7 @@
       all_file_names.add(trace[0])
 
   # Sets the `files` field in the GraphDebugInfo proto
-  graph_debug_info_def.files.extend(sorted(all_file_names))
+  graph_debug_info_def.files.extend(all_file_names)
 
   # Builds a mapping between file names and index of the `files` field, so we
   # only store the indexes for the nodes in the GraphDebugInfo.
@@ -550,9 +550,10 @@
   # save the storage space.
   for node_name, trace in node_to_trace.items():
     trace_def = graph_debug_info_def.traces[node_name]
-    for file_line in trace:
-      file_index = file_to_index[file_line[0]]
-      trace_def.file_line_cols.add(file_index=file_index, line=file_line[1])
+    for file_name, line, func, code in trace:
+      file_index = file_to_index[file_name]
+      trace_def.file_line_cols.add(
+          file_index=file_index, line=line, func=func, code=code)
 
   return graph_debug_info_def
 
@@ -972,8 +973,11 @@
 
   Raises:
     ValueError: When the `GraphDef` is larger than 2GB.
+    ValueError: When executing in Eager mode and either `graph_def` or `graph`
+      is undefined.
   """
-  if context.executing_eagerly():
+  if context.executing_eagerly() and not (graph_def is not None and
+                                          graph is not None):
     raise ValueError("Exporting/importing meta graphs is not supported when "
                      "Eager Execution is enabled.")
   graph = graph or ops.get_default_graph()
@@ -1061,11 +1065,12 @@
       name, _ = os.path.splitext(filename)
       debug_filename = "{name}{ext}".format(name=name, ext=".debug")
 
-      # Gets the operation from the graph by the name.
-      ops_to_export = {}
+      # Gets the operation from the graph by the name. Exludes variable nodes,
+      # so only the nodes in the frozen models are included.
+      ops_to_export = []
       for node in scoped_meta_graph_def.graph_def.node:
         scoped_op_name = ops.prepend_name_scope(node.name, export_scope)
-        ops_to_export.add(graph.get_operation_by_name(scoped_op_name))
+        ops_to_export.append(graph.get_operation_by_name(scoped_op_name))
 
       graph_debug_info = create_graph_debug_info_def(ops_to_export)
 

diff --git a/tensorflow/python/framework/op_def_library.py b/tensorflow/python/framework/op_def_library.py
index 2318b32..372763a 100644
--- a/tensorflow/python/framework/op_def_library.py
+++ b/tensorflow/python/framework/op_def_library.py

@@ -212,6 +212,22 @@
       (repr(v), arg_name))
 
 
+def _MakeFunc(v, arg_name):
+  """Ensure v is a func."""
+  if isinstance(v, attr_value_pb2.NameAttrList):
+    return v
+  fn_attr = attr_value_pb2.NameAttrList()
+  if isinstance(v, compat.bytes_or_text_types):
+    fn_attr.name = v
+  elif hasattr(v, "add_to_graph"):
+    v.add_to_graph(ops.get_default_graph())
+    fn_attr.name = v.name
+  else:
+    raise TypeError("Don't know how to convert {} to a func for "
+                    "argument {}".format(v, arg_name))
+  return fn_attr
+
+
 class _OpInfo(object):
   """All per-Op state we would like to precompute/validate."""
 
@@ -515,9 +531,9 @@
             else:
               raise TypeError(
                   "Expected %s passed to parameter '%s' of op '%s', got %s of "
-                  "type '%s' instead." %
+                  "type '%s' instead. Error: %s" %
                   (dtypes.as_dtype(dtype).name, input_arg.name, op_type_name,
-                   repr(values), type(values).__name__))
+                   repr(values), type(values).__name__, err))
           except ValueError:
             # What type does convert_to_tensor think it has?
             try:
@@ -733,13 +749,9 @@
           attr_value.list.tensor.extend(
               [_MakeTensor(x, key) for x in value])
         elif attr_def.type == "func":
-          if isinstance(value, attr_value_pb2.NameAttrList):
-            attr_value.func.CopyFrom(value)
-          elif isinstance(value, compat.bytes_or_text_types):
-            attr_value.func.name = value
-          else:
-            value.add_to_graph(ops.get_default_graph())
-            attr_value.func.name = value.name
+          attr_value.func.CopyFrom(_MakeFunc(value, key))
+        elif attr_def.type == "list(func)":
+          attr_value.list.func.extend([_MakeFunc(x, key) for x in value])
         else:
           raise TypeError("Unrecognized Attr type " + attr_def.type)
 

diff --git a/tensorflow/python/framework/op_def_library_test.py b/tensorflow/python/framework/op_def_library_test.py
index 66cfe21..71d708d 100644
--- a/tensorflow/python/framework/op_def_library_test.py
+++ b/tensorflow/python/framework/op_def_library_test.py

@@ -24,6 +24,7 @@
 from tensorflow.core.framework import op_def_pb2
 from tensorflow.core.framework import tensor_shape_pb2
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_ops
@@ -140,40 +141,43 @@
     with ops.Graph().as_default():
       with self.assertRaises(TypeError) as cm:
         self._lib.apply_op("Simple", a="Bad string")
-      self.assertEqual(str(cm.exception),
-                       "Expected int32 passed to parameter 'a' of op 'Simple', "
-                       "got 'Bad string' of type 'str' instead.")
+      self.assertTrue(
+          "Expected int32 passed to parameter 'a' of op 'Simple', "
+          "got 'Bad string' of type 'str' instead." in str(cm.exception))
 
       with self.assertRaises(TypeError) as cm:
         self._lib.apply_op("Simple", a=self.Tensor(dtypes.string))
-      self.assertEqual(str(cm.exception),
-                       "Input 'a' of 'Simple' Op has type string "
-                       "that does not match expected type of int32.")
+      self.assertTrue(
+          "Input 'a' of 'Simple' Op has type string "
+          "that does not match expected type of int32." in str(cm.exception))
 
       with self.assertRaises(TypeError) as cm:
         self._lib.apply_op("Simple", a=6, extra="bogus")
-      self.assertEqual(str(cm.exception),
-                       "apply_op() got unexpected keyword arguments: extra")
+      self.assertTrue(
+          "apply_op() got unexpected keyword arguments: extra"
+          in str(cm.exception))
 
       with self.assertRaises(TypeError) as cm:
         self._lib.apply_op("Simple", a=6, extra1="bogus", extra2="also_bogus")
-      self.assertEqual(str(cm.exception),
-                       "apply_op() got unexpected keyword arguments: extra1, "
-                       "extra2")
+      self.assertTrue(
+          "apply_op() got unexpected keyword arguments: extra1, "
+          "extra2" in str(cm.exception))
 
       with self.assertRaises(TypeError) as cm:
         self._lib.apply_op("Simple")
-      self.assertEqual(str(cm.exception), "No argument for input a")
+      self.assertTrue(
+          "No argument for input a" in str(cm.exception))
 
       with self.assertRaises(TypeError) as cm:
         self._lib.apply_op("Simple", wrong=7)
-      self.assertEqual(str(cm.exception), "No argument for input a")
+      self.assertTrue(
+          "No argument for input a" in str(cm.exception))
 
       with self.assertRaises(TypeError) as cm:
         self._lib.apply_op("Simple", a={"label": 1})
-      self.assertEqual(str(cm.exception),
-                       "Expected int32 passed to parameter 'a' of op 'Simple', "
-                       "got {'label': 1} of type 'dict' instead.")
+      self.assertTrue(
+          "Expected int32 passed to parameter 'a' of op 'Simple', "
+          "got {'label': 1} of type 'dict' instead." in str(cm.exception))
 
   def testReservedInput(self):
     with ops.Graph().as_default():
@@ -268,19 +272,13 @@
         attr { key: 'T' value { type: DT_STRING } }
         """, out.op.node_def)
 
-      with self.assertRaises(TypeError) as cm:
+      with self.assertRaises(TypeError):
         self._lib.apply_op("Binary", a="left", b=12)
-      self.assertEqual(str(cm.exception),
-                       "Expected string passed to parameter 'b' of op 'Binary',"
-                       " got 12 of type 'int' instead.")
 
-      with self.assertRaises(TypeError) as cm:
+      with self.assertRaises(TypeError):
         self._lib.apply_op("Binary",
                            a=self.Tensor(dtypes.string),
                            b=self.Tensor(dtypes.int32))
-      self.assertEqual(str(cm.exception),
-                       "Input 'b' of 'Binary' Op has type int32 "
-                       "that does not match type string of argument 'a'.")
 
   def testRestrict(self):
     with ops.Graph().as_default():
@@ -466,6 +464,46 @@
       self.assertEqual(str(cm.exception),
                        "Expected float for argument 'a' not 'bad'.")
 
+  def testAttrFunc(self):
+    with ops.Graph().as_default():
+      @function.Defun(dtypes.float32, func_name="MyFn")
+      def fn(x):
+        return 2 + x
+      op = self._lib.apply_op("FuncAttr", f=fn, name="t")
+      self.assertProtoEquals("""
+        name: 't' op: 'FuncAttr' attr { key: 'f'
+                                        value { func { name: 'MyFn' } } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("FuncAttr", f=3)
+      self.assertEqual(str(cm.exception),
+                       "Don't know how to convert 3 to a func for argument f")
+
+  def testAttrFuncList(self):
+    with ops.Graph().as_default():
+      @function.Defun(dtypes.float32, func_name="MyFn")
+      def fn1(x):
+        return 2 + x
+      @function.Defun(dtypes.int32, dtypes.float32, func_name="MyFn2")
+      def fn2(x, y):
+        return 2 + x, y * 3
+      @function.Defun(dtypes.int32, func_name="MyFn3")
+      def fn3(y):
+        return 2 + y
+      op = self._lib.apply_op("FuncListAttr", f=[fn1, fn2, fn3], name="t")
+      self.assertProtoEquals("""
+        name: 't' op: 'FuncListAttr'
+        attr { key: 'f' value { list { func { name: 'MyFn' }
+                                       func { name: 'MyFn2' }
+                                       func { name: 'MyFn3' } } } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("FuncListAttr", f=[fn1, 3, fn2])
+      self.assertEqual(str(cm.exception),
+                       "Don't know how to convert 3 to a func for argument f")
+
   def testAttrBool(self):
     with ops.Graph().as_default():
       op = self._lib.apply_op("AttrBool", a=True, name="t")

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 6e64622..41d6bde 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py

@@ -5048,6 +5048,48 @@
     self._thread_local._distribution_strategy_stack = (  # pylint: disable=protected-access
         _distribution_strategy_stack)
 
+  @property
+  def _auto_cast_variable_read_dtype(self):
+    """The dtype that instances of `AutoCastVariable` will be casted to.
+
+    This is None if `AutoCastVariables` should not be casted.
+
+    See `AutoCastVariable` for more information.
+
+    Returns:
+      The dtype that instances of `AutoCastVariable` will be casted to.
+    """
+    if not hasattr(self._thread_local, "_auto_cast_variable_read_dtype"):
+      self._thread_local._auto_cast_variable_read_dtype = None  # pylint: disable=protected-access
+    return self._thread_local._auto_cast_variable_read_dtype  # pylint: disable=protected-access
+
+  @_auto_cast_variable_read_dtype.setter
+  def _auto_cast_variable_read_dtype(self, _auto_cast_variable_read_dtype):
+    self._thread_local._auto_cast_variable_read_dtype = (  # pylint: disable=protected-access
+        _auto_cast_variable_read_dtype)
+
+  @tf_contextlib.contextmanager
+  def _enable_auto_casting_variables(self, dtype):
+    """Context manager to automatically cast AutoCastVariables.
+
+    If an AutoCastVariable `var` is used under this context manager, it will be
+    casted to `dtype` before being used.
+
+    See `AutoCastVariable` for more information.
+
+    Args:
+      dtype: The dtype that AutoCastVariables should be casted to.
+
+    Yields:
+      Nothing.
+    """
+    prev_read_dtype = self._auto_cast_variable_read_dtype
+    try:
+      self._auto_cast_variable_read_dtype = dtype
+      yield
+    finally:
+      self._auto_cast_variable_read_dtype = prev_read_dtype
+
   def _mutation_lock(self):
     """Returns a lock to guard code that creates & mutates ops.
 

diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 8872c9f..7d9799a 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py

@@ -1587,6 +1587,8 @@
     self.assertSequenceEqual(g.collections, ["key"])
     g.add_to_collection("other", "foo")
     self.assertSequenceEqual(sorted(g.collections), ["key", "other"])
+    self.assertSequenceEqual(
+        sorted(g.get_all_collection_keys()), ["key", "other"])
 
   def test_add_to_collection(self):
     g = ops.Graph()
@@ -2414,17 +2416,22 @@
 
   def testDefaultGraph(self):
     orig = ops.get_default_graph()
+    self.assertFalse(ops.has_default_graph())
     self._AssertDefault(orig)
     g0 = ops.Graph()
+    self.assertFalse(ops.has_default_graph())
     self._AssertDefault(orig)
     context_manager_0 = g0.as_default()
+    self.assertFalse(ops.has_default_graph())
     self._AssertDefault(orig)
     with context_manager_0 as g0:
       self._AssertDefault(g0)
       with ops.Graph().as_default() as g1:
+        self.assertTrue(ops.has_default_graph())
         self._AssertDefault(g1)
       self._AssertDefault(g0)
     self._AssertDefault(orig)
+    self.assertFalse(ops.has_default_graph())
 
   def testPreventFeeding(self):
     g = ops.Graph()

diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc
index b0b9ce7..c27f014 100644
--- a/tensorflow/python/framework/python_op_gen.cc
+++ b/tensorflow/python/framework/python_op_gen.cc

@@ -547,7 +547,7 @@
       strings::StrAppend(function_setup, indentation, attr_api_name,
                          " = [_execute.make_tensor(_t, \"", attr_api_name,
                          "\") for _t in ", attr_api_name, "]\n");
-    } else if (attr_type != "func") {
+    } else if (attr_type != "func" && attr_type != "list(func)") {
       *function_setup =
           strings::StrCat("# No definition for ", function_name_,
                           " since we don't support attrs with type\n"

diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py
index a7537bb..0dc3dde 100644
--- a/tensorflow/python/framework/tensor_shape.py
+++ b/tensorflow/python/framework/tensor_shape.py

@@ -74,9 +74,8 @@
   # in `tensor_shape[i]`, but they would not be.
   ```
   """
-  global _TENSORSHAPE_V2_OVERRIDE, TensorShape  # pylint: disable=invalid-name
+  global _TENSORSHAPE_V2_OVERRIDE  # pylint: disable=invalid-name
   _TENSORSHAPE_V2_OVERRIDE = True
-  TensorShape = TensorShapeV2
 
 
 @tf_export(v1=["disable_v2_tensorshape"])
@@ -85,9 +84,8 @@
 
   See docstring for `enable_v2_tensorshape` for details about the new behavior.
   """
-  global _TENSORSHAPE_V2_OVERRIDE, TensorShape  # pylint: disable=invalid-name
+  global _TENSORSHAPE_V2_OVERRIDE  # pylint: disable=invalid-name
   _TENSORSHAPE_V2_OVERRIDE = False
-  TensorShape = TensorShapeV1
 
 
 @tf_export("compat.dimension_value",
@@ -470,6 +468,54 @@
     """
     return self // other
 
+  def __rdiv__(self, other):
+    """Use `__floordiv__` via `x // y` instead.
+
+    This function exists only to have a better error message. Instead of:
+    `TypeError: unsupported operand type(s) for /: 'int' and 'Dimension'`,
+    this function will explicitly call for usage of `//` instead.
+
+    Args:
+      other: Another `Dimension`.
+
+    Raises:
+      TypeError.
+    """
+    raise TypeError("unsupported operand type(s) for /: '{}' and 'Dimension', "
+                    "please use // instead".format(type(other).__name__))
+
+  def __truediv__(self, other):
+    """Use `__floordiv__` via `x // y` instead.
+
+    This function exists only to have a better error message. Instead of:
+    `TypeError: unsupported operand type(s) for /: 'Dimension' and 'int'`,
+    this function will explicitly call for usage of `//` instead.
+
+    Args:
+      other: Another `Dimension`.
+
+    Raises:
+      TypeError.
+    """
+    raise TypeError("unsupported operand type(s) for /: 'Dimension' and '{}', "
+                    "please use // instead".format(type(other).__name__))
+
+  def __rtruediv__(self, other):
+    """Use `__floordiv__` via `x // y` instead.
+
+    This function exists only to have a better error message. Instead of:
+    `TypeError: unsupported operand type(s) for /: 'int' and 'Dimension'`,
+    this function will explicitly call for usage of `//` instead.
+
+    Args:
+      other: Another `Dimension`.
+
+    Raises:
+      TypeError.
+    """
+    raise TypeError("unsupported operand type(s) for /: '{}' and 'Dimension', "
+                    "please use // instead".format(type(other).__name__))
+
   def __mod__(self, other):
     """Returns `self` modulo `other`.
 
@@ -635,8 +681,8 @@
     return Dimension(value)
 
 
-@tf_export(v1=["TensorShape"])
-class TensorShapeV1(object):
+@tf_export("TensorShape")
+class TensorShape(object):
   """Represents the shape of a `Tensor`.
 
   A `TensorShape` represents a possibly-partial shape specification for a
@@ -695,7 +741,7 @@
   @property
   def _v2_behavior(self):
     if _TENSORSHAPE_V2_OVERRIDE is None:
-      return False
+      return tf2.enabled()
     return _TENSORSHAPE_V2_OVERRIDE
 
   def __repr__(self):
@@ -1151,22 +1197,6 @@
     return TensorShape([Dimension(None)] * rank)
 
 
-@tf_export("TensorShape", v1=[])
-class TensorShapeV2(TensorShapeV1):
-
-  @property
-  def _v2_behavior(self):
-    if _TENSORSHAPE_V2_OVERRIDE is None:
-      return True
-    return _TENSORSHAPE_V2_OVERRIDE
-
-
-if tf2.enabled():
-  TensorShape = TensorShapeV2
-else:
-  TensorShape = TensorShapeV1
-
-
 def scalar():
   """Returns a shape representing a scalar."""
   return TensorShape([])

diff --git a/tensorflow/python/framework/tensor_shape_div_test.py b/tensorflow/python/framework/tensor_shape_div_test.py
index 8e63d7f5..5160c75 100644
--- a/tensorflow/python/framework/tensor_shape_div_test.py
+++ b/tensorflow/python/framework/tensor_shape_div_test.py

@@ -35,6 +35,16 @@
         for y in values:
           self.assertEqual((x / y).value, (x // y).value)
 
+  def testRDivFail(self):
+    # Note: This test is related to GitHub issue 25790.
+    """Without from __future__ import division, __rdiv__ is used."""
+    if six.PY2:  # Old division exists only in Python 2
+      two = tensor_shape.Dimension(2)
+      message = (r"unsupported operand type\(s\) for /: "
+                 r"'int' and 'Dimension', please use // instead")
+      with self.assertRaisesRegexp(TypeError, message):
+        _ = 6 / two
+
 
 if __name__ == "__main__":
   googletest.main()

diff --git a/tensorflow/python/framework/tensor_shape_test.py b/tensorflow/python/framework/tensor_shape_test.py
index 7d85e0a..b4a37c0 100644
--- a/tensorflow/python/framework/tensor_shape_test.py
+++ b/tensorflow/python/framework/tensor_shape_test.py

@@ -205,6 +205,23 @@
     reconstructed = ctor(*args)
     self.assertEquals(reconstructed, dim)
 
+  def testDiv(self):
+    # Note: This test is related to GitHub issue 25790.
+    six = tensor_shape.Dimension(6)
+    two = tensor_shape.Dimension(2)
+    message = (r"unsupported operand type\(s\) for /: "
+               r"'Dimension' and 'Dimension', please use // instead")
+    with self.assertRaisesRegexp(TypeError, message):
+      _ = six / two
+    message = (r"unsupported operand type\(s\) for /: "
+               r"'Dimension' and 'int', please use // instead")
+    with self.assertRaisesRegexp(TypeError, message):
+      _ = six / 2
+    message = (r"unsupported operand type\(s\) for /: "
+               r"'int' and 'Dimension', please use // instead")
+    with self.assertRaisesRegexp(TypeError, message):
+      _ = 6 / two
+
 
 class ShapeTest(test_util.TensorFlowTestCase):
 

diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index 21ded1a..af943f0 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py

@@ -22,6 +22,7 @@
 
 from tensorflow.core.framework import tensor_pb2
 from tensorflow.core.framework import tensor_shape_pb2
+from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.framework import composite_tensor
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -808,6 +809,8 @@
   """
   if isinstance(tensor, ops.EagerTensor):
     return tensor.numpy()
+  if not pywrap_tensorflow.IsTensor(tensor):
+    return tensor
   ret = _ConstantValue(tensor, partial)
   if ret is not None:
     # The caller may now depend on the constant value of `tensor`, so we

diff --git a/tensorflow/python/framework/test_ops.cc b/tensorflow/python/framework/test_ops.cc
index 1d0145f..5d1386c 100644
--- a/tensorflow/python/framework/test_ops.cc
+++ b/tensorflow/python/framework/test_ops.cc

@@ -406,6 +406,10 @@
     .Attr("f: func")
     .SetShapeFn(shape_inference::UnknownShape);
 
+REGISTER_OP("FuncListAttr")
+    .Attr("f: list(func)")
+    .SetShapeFn(shape_inference::UnknownShape);
+
 REGISTER_OP("Simple")
     .Input("a: int32")
     .Output("out: float")

diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i
index 593d382..0d4f7de 100644
--- a/tensorflow/python/grappler/item.i
+++ b/tensorflow/python/grappler/item.i

@@ -272,7 +272,6 @@
     if (!s.ok()) {
       continue;
     }
-    int i = 0;
     for (const auto& arg : op_def->input_arg()) {
       if (!arg.is_ref()) {
         continue;

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index d904193..931a5d6 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD

@@ -63,6 +63,7 @@
         ":pil_for_keras",
         ":saving",
         "//tensorflow/python:training",
+        "//tensorflow/python/keras/mixed_precision/experimental:autocast_variable",
         "//tensorflow/python/keras/optimizer_v2",
         "//tensorflow/python/saved_model",
         "@keras_applications_archive//:keras_applications",
@@ -156,10 +157,12 @@
         ":activations",
         ":backend",
         ":callbacks",
+        ":callbacks_v1",
         ":constraints",
         ":engine_utils",
         ":initializers",
         ":losses",
+        ":mode_keys",
         ":optimizers",
         ":regularizers",
         ":saving",
@@ -168,7 +171,7 @@
         "//tensorflow/python/distribute:distribute_lib",
         "//tensorflow/python/distribute:input_lib",
         "//tensorflow/python/distribute:reduce_util",
-        "//tensorflow/python/training/checkpointable:data_structures",
+        "//tensorflow/python/training/tracking:data_structures",
         "//tensorflow/tools/docs:doc_controls",
         "@six_archive//:six",
     ],
@@ -187,9 +190,9 @@
     deps = [
         ":backend",
         ":engine_utils",
+        ":mode_keys",
         ":optimizers",
         "//tensorflow/python:lib",
-        "//tensorflow/python:mode_keys",
         "//tensorflow/python:saver",
         "//tensorflow/python/saved_model",
         "//tensorflow/python/saved_model/model_utils",
@@ -217,6 +220,19 @@
     deps = [
         ":backend",
         ":engine_utils",
+        ":mode_keys",
+    ],
+)
+
+py_library(
+    name = "callbacks_v1",
+    srcs = [
+        "callbacks_v1.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":backend",
+        ":engine_utils",
     ],
 )
 
@@ -356,6 +372,17 @@
     ],
 )
 
+py_library(
+    name = "mode_keys",
+    srcs = [
+        "utils/mode_keys.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python/saved_model/model_utils:mode_keys",
+    ],
+)
+
 tf_py_test(
     name = "integration_test",
     size = "medium",
@@ -368,10 +395,7 @@
         "//tensorflow/python:nn_ops",
     ],
     shard_count = 12,
-    tags = [
-        "no_oss",  # b/123899138
-        "notsan",
-    ],
+    tags = ["notsan"],
 )
 
 tf_py_test(
@@ -397,9 +421,6 @@
         "//third_party/py/numpy",
         "//tensorflow/python:client_testlib",
     ],
-    tags = [
-        "no_oss",  # b/123899138
-    ],
 )
 
 tf_py_test(
@@ -505,14 +526,14 @@
 
 tf_py_test(
     name = "applications_test",
-    size = "enormous",
+    size = "medium",
     srcs = ["applications/applications_test.py"],
     additional_deps = [
         ":keras",
         "@absl_py//absl/testing:parameterized",
         "//tensorflow/python:client_testlib",
     ],
-    shard_count = 2,
+    shard_count = 11,
 )
 
 tf_py_test(
@@ -562,6 +583,11 @@
         "//tensorflow/python:client_testlib",
     ],
     shard_count = 8,
+    tags = [
+        "manual",  # b/124471597
+        "notap",  # b/124471597
+    ],
+    xla_enable_strict_auto_jit = True,
 )
 
 cuda_py_test(
@@ -587,7 +613,10 @@
         "//tensorflow/python:client_testlib",
     ],
     shard_count = 4,
-    tags = ["no_windows_gpu"],
+    tags = [
+        "no_rocm",
+        "no_windows_gpu",
+    ],
 )
 
 tf_py_test(
@@ -758,6 +787,7 @@
         "//tensorflow/python:client_testlib",
     ],
     shard_count = 8,
+    tags = ["no_rocm"],
 )
 
 cuda_py_test(
@@ -771,6 +801,7 @@
         "//tensorflow/python:client_testlib",
     ],
     shard_count = 8,
+    tags = ["no_rocm"],
 )
 
 tf_py_test(
@@ -822,7 +853,6 @@
     ],
     shard_count = 4,
     tags = [
-        "no_oss",  # b/123899138
         "noasan",  # http://b/78599823
         "notsan",
     ],
@@ -883,6 +913,27 @@
 )
 
 tf_py_test(
+    name = "composite_tensor_support_test",
+    size = "medium",
+    srcs = ["utils/composite_tensor_support_test.py"],
+    additional_deps = [
+        ":engine",
+        ":layers",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python/ops/ragged:ragged_tensor",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+    ],
+)
+
+tf_py_test(
     name = "io_utils_test",
     size = "small",
     srcs = ["utils/io_utils_test.py"],
@@ -1007,6 +1058,20 @@
         "//third_party/py/numpy",
         "//tensorflow/python:client_testlib",
     ],
+    shard_count = 4,
+    tags = ["notsan"],
+)
+
+tf_py_test(
+    name = "callbacks_v1_test",
+    size = "medium",
+    srcs = ["callbacks_v1_test.py"],
+    additional_deps = [
+        ":keras",
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+    ],
     tags = ["notsan"],
 )
 
@@ -1148,6 +1213,7 @@
         "//third_party/py/numpy",
         "//tensorflow/python:client_testlib",
     ],
+    tags = ["no_rocm"],
 )
 
 tf_py_test(

diff --git a/tensorflow/python/keras/__init__.py b/tensorflow/python/keras/__init__.py
index f024b9b..b7ec638 100644
--- a/tensorflow/python/keras/__init__.py
+++ b/tensorflow/python/keras/__init__.py

@@ -25,6 +25,7 @@
 from tensorflow.python.keras import applications
 from tensorflow.python.keras import backend
 from tensorflow.python.keras import callbacks
+from tensorflow.python.keras import callbacks_v1
 from tensorflow.python.keras import constraints
 from tensorflow.python.keras import datasets
 from tensorflow.python.keras import estimator

diff --git a/tensorflow/python/keras/applications/applications_test.py b/tensorflow/python/keras/applications/applications_test.py
index b15ca59..ad6b589 100644
--- a/tensorflow/python/keras/applications/applications_test.py
+++ b/tensorflow/python/keras/applications/applications_test.py

@@ -32,13 +32,11 @@
     (applications.InceptionV3, 2048),
     (applications.InceptionResNetV2, 1536),
     (applications.MobileNet, 1024),
-    # TODO(fchollet): enable MobileNetV2 tests when a new TensorFlow test image
-    # is released with keras_applications upgraded to 1.0.5 or above.
+    (applications.MobileNetV2, 1280),
     (applications.DenseNet121, 1024),
     (applications.DenseNet169, 1664),
     (applications.DenseNet201, 1920),
     (applications.NASNetMobile, 1056),
-    (applications.NASNetLarge, 4032),
 ]
 
 
@@ -47,7 +45,8 @@
   @parameterized.parameters(*MODEL_LIST)
   def test_feature_extration_model(self, model_fn, output_dim):
     model = model_fn(include_top=False, weights=None)
-    self.assertEqual(model.output_shape, (None, None, None, output_dim))
+    self.assertLen(model.output_shape, 4)
+    self.assertEqual(model.output_shape[-1], output_dim)
 
 
 if __name__ == '__main__':

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 32820fe..0a8c5bb 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py

@@ -37,6 +37,7 @@
 from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function as eager_function
+from tensorflow.python.eager import lift_to_graph
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_module
 from tensorflow.python.framework import func_graph
@@ -79,6 +80,9 @@
 # while executing eagerly (such as the functional API for model-building).
 _GRAPH = None
 
+# A graph which is used for constructing functions in eager mode.
+_CURRENT_SCRATCH_GRAPH = None
+
 # This is a thread local object that will hold the default internal TF session
 # used by Keras. It can be set manually via `set_session(sess)`.
 _SESSION = threading.local()
@@ -389,16 +393,26 @@
     _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH] = previous_value
 
 
-def _get_session():
+def _current_graph(op_input_list):
+  """Return the graph members of `op_input_list`, or the current graph."""
+  return ops._get_graph_from_inputs(op_input_list)
+
+
+def _get_session(op_input_list=()):
   """Returns the session object for the current thread."""
   global _SESSION
   default_session = ops.get_default_session()
   if default_session is not None:
     session = default_session
   else:
-    if getattr(_SESSION, 'session', None) is None:
-      # We are creating the Session inside a Distribution
-      # Strategy scope.
+    if ops.inside_function():
+      raise RuntimeError('Cannot get session inside Tensorflow graph function.')
+    # If we don't have a session, or that session does not match the current
+    # graph, create and cache a new session.
+    if (getattr(_SESSION, 'session', None) is None or
+        _SESSION.session.graph is not _current_graph(op_input_list)):
+      # If we are creating the Session inside a tf.distribute.Strategy scope,
+      # we ask the strategy for the right session options to use.
       if distribution_strategy_context.has_strategy():
         configure_and_create_distributed_session(
             distribution_strategy_context.get_strategy())
@@ -410,12 +424,13 @@
 
 
 @keras_export(v1=['keras.backend.get_session'])
-def get_session():
+def get_session(op_input_list=()):
   """Returns the TF session to be used by the backend.
 
   If a default TensorFlow session is available, we will return it.
 
-  Else, we will return the global Keras session.
+  Else, we will return the global Keras session assuming it matches
+  the current graph.
 
   If no global Keras session exists at this point:
   we will create a new global session.
@@ -423,10 +438,15 @@
   Note that you can manually set the global session
   via `K.set_session(sess)`.
 
+  Arguments:
+      op_input_list: An option sequence of tensors or ops, which will be used
+        to determine the current graph. Otherwise the default graph will be
+        used.
+
   Returns:
       A TensorFlow session.
   """
-  session = _get_session()
+  session = _get_session(op_input_list)
   if not _MANUAL_VAR_INIT:
     with session.graph.as_default():
       _initialize_variables(session)
@@ -443,6 +463,40 @@
     return ops.get_default_graph()
 
 
+@tf_contextlib.contextmanager
+def _scratch_graph(graph=None):
+  """Retrieve a shared and temporary func graph.
+
+  The eager execution path lifts a subgraph from the keras global graph into
+  a scratch graph in order to create a function. DistributionStrategies, in
+  turn, constructs multiple functions as well as a final combined function. In
+  order for that logic to work correctly, all of the functions need to be
+  created on the same scratch FuncGraph.
+
+  Args:
+    graph: A graph to be used as the current scratch graph. If not set then
+      a scratch graph will either be retrieved or created:
+
+  Yields:
+    The current scratch graph.
+  """
+  global _CURRENT_SCRATCH_GRAPH
+  if (_CURRENT_SCRATCH_GRAPH is not None and graph is not None and
+      _CURRENT_SCRATCH_GRAPH is not graph):
+    raise ValueError('Multiple scratch graphs specified.')
+
+  if _CURRENT_SCRATCH_GRAPH:
+    yield _CURRENT_SCRATCH_GRAPH
+    return
+
+  graph = graph or func_graph.FuncGraph('keras_scratch_graph')
+  try:
+    _CURRENT_SCRATCH_GRAPH = graph
+    yield graph
+  finally:
+    _CURRENT_SCRATCH_GRAPH = None
+
+
 @keras_export('keras.backend.set_session')
 def set_session(session):
   """Sets the global TensorFlow session.
@@ -956,10 +1010,10 @@
       # Keras variable
       >>> kvar = K.variable(np.array([[1, 2], [3, 4]]))
       >>> K.dtype(kvar)
-      'float32_ref'
+      'float32'
       >>> kvar = K.variable(np.array([[1, 2], [3, 4]]), dtype='float32')
       >>> K.dtype(kvar)
-      'float32_ref'
+      'float32'
   ```
   """
   return x.dtype.base_dtype.name
@@ -1590,7 +1644,7 @@
           the reduced dimension is retained with length 1.
 
   Returns:
-      A tensor with miminum values of `x`.
+      A tensor with minimum values of `x`.
   """
   return math_ops.reduce_min(x, axis, keepdims)
 
@@ -2753,7 +2807,7 @@
       return x.numpy()
   elif ops.inside_function():
     raise RuntimeError('Cannot get value inside Tensorflow graph function.')
-  return x.eval(session=get_session())
+  return x.eval(session=get_session((x,)))
 
 
 @keras_export('keras.backend.batch_get_value')
@@ -2774,7 +2828,7 @@
   elif ops.inside_function():  # pylint: disable=protected-access
     raise RuntimeError('Cannot get value inside Tensorflow graph function.')
   if tensors:
-    return get_session().run(tensors)
+    return get_session(tensors).run(tensors)
   else:
     return []
 
@@ -2992,7 +3046,7 @@
   def __call__(self, inputs):
     inputs = nest.flatten(inputs)
 
-    session = get_session()
+    session = get_session(inputs)
     feed_arrays = []
     array_vals = []
     feed_symbols = []
@@ -3049,47 +3103,79 @@
   """
 
   def __init__(self, inputs, outputs, updates=None, name=None):
+    self.name = name
+    self._outputs_structure = outputs
+    inputs = nest.flatten(inputs)
+    outputs = nest.flatten(outputs)
+
     updates = updates or []
     if not isinstance(updates, (list, tuple)):
       raise TypeError('`updates` in a Keras backend function '
                       'should be a list or tuple.')
-    self.name = name
-    self.inputs = nest.flatten(inputs)
-    self._outputs_structure = outputs
-    graph = get_graph()
-    # Consolidate updates
-    with graph.as_default():
-      self.outputs = cast_variables_to_tensor(nest.flatten(outputs))
-      with ops.control_dependencies(self.outputs):
-        # In general, updates should be run after the outputs have been
-        # computed. However, we can only ensure this when we create
-        # the updates here (i.e. when updates are passed as tuples).
-        # We cannot modify the control dependencies of preexisting update ops.
-        updates_ops = []
-        for update in updates:
-          # For legacy reasons it is allowed to pass an update as a tuple
-          # `(variable, new_value)` (this maps to an assign op).
-          if isinstance(update, tuple):
-            p, new_p = update
-            updates_ops.append(state_ops.assign(p, new_p))
-          else:
-            # Assumed already an op -- we cannot control its execution order.
-            updates_ops.append(update)
 
-      # We set the update ops to run at the end by conditioning it on output[0]
-      if updates and not self.outputs:
-        # Edge case; never happens in practice
-        raise ValueError('Cannot create a Keras backend function with updates'
-                         ' but no outputs during eager execution.')
+    if updates and not outputs:
+      # Edge case; never happens in practice
+      raise ValueError('Cannot create a Keras backend function with updates'
+                       ' but no outputs during eager execution.')
+
+    graphs = {i.graph for i in nest.flatten([inputs, outputs, updates])
+              if hasattr(i, 'graph')}
+    if len(graphs) > 1:
+      raise ValueError('Cannot create an execution function which is comprised '
+                       'of elements from multiple graphs.')
+
+    source_graph = graphs.pop()
+    global_graph = get_graph()
+
+    updates_ops = []
+    legacy_update_ops = []
+    for update in updates:
+      # For legacy reasons it is allowed to pass an update as a tuple
+      # `(variable, new_value)` (this maps to an assign op). Otherwise it
+      # is assumed to already be an op -- we cannot control its execution
+      # order.
+      if isinstance(update, tuple):
+        legacy_update_ops.append(update)
+      else:
+        if hasattr(update, 'op'):
+          update = update.op
+        updates_ops.append(update)
+
+    with _scratch_graph() as exec_graph:
+      global_graph = get_graph()
+      if source_graph not in (exec_graph, global_graph):
+        raise ValueError('Unknown graph. Aborting.')
+
+      if source_graph is global_graph and exec_graph is not global_graph:
+        init_tensors = (
+            outputs + updates_ops + [p for [p, _] in legacy_update_ops] +
+            [p_new for [_, p_new] in legacy_update_ops
+             if isinstance(p_new, ops.Tensor)])
+        lifted_map = lift_to_graph.lift_to_graph(
+            init_tensors=init_tensors, graph=exec_graph, sources=inputs,
+            add_sources=True, handle_captures=True, base_graph=source_graph)
+
+        inputs = [lifted_map[i] for i in inputs]
+        outputs = [lifted_map[i] for i in outputs]
+        updates_ops = [lifted_map[i] for i in updates_ops]
+        legacy_update_ops = [(lifted_map[p], lifted_map.get(p_new, p_new))
+                             for p, p_new in legacy_update_ops]
+
+    # Consolidate updates
+    with exec_graph.as_default():
+      outputs = cast_variables_to_tensor(outputs)
+      with ops.control_dependencies(outputs):
+        for p, p_new in legacy_update_ops:
+          updates_ops.append(state_ops.assign(p, p_new))
+
+      self.inputs, self.outputs = inputs, outputs
       with ops.control_dependencies(updates_ops):
         self.outputs[0] = array_ops.identity(self.outputs[0])
 
-    # Prepare graph function
-    # TODO(fchollet): can we restrict `captures` to variables actually used in
-    # the relevant subgraph?
-    graph.inputs = self.inputs + list(graph.captures.values())
-    graph.outputs = self.outputs
-    graph_fn = eager_function.ConcreteFunction(graph)
+      exec_graph.inputs = self.inputs + list(exec_graph.captures.values())
+      exec_graph.outputs = self.outputs
+      graph_fn = eager_function.ConcreteFunction(exec_graph)
+
     graph_fn._num_positional_args = len(self.inputs)
     graph_fn._arg_keywords = []
     self._graph_fn = graph_fn
@@ -3097,7 +3183,7 @@
     # Handle placeholders with default
     # (treated as required placeholder by graph functions)
     self._placeholder_default_values = {}
-    with graph.as_default():
+    with exec_graph.as_default():
       for x in self.inputs:
         if x.op.type == 'PlaceholderWithDefault':
           self._placeholder_default_values[x] = tensor_util.constant_value(
@@ -4365,6 +4451,7 @@
   Raises:
       ValueError: if `data_format` is neither `channels_last` or
       `channels_first`.
+      ValueError: if `strides` is not a tuple of 2 integers.
   """
   if data_format is None:
     data_format = image_data_format()
@@ -4572,6 +4659,8 @@
   Raises:
       ValueError: if `data_format` is neither `"channels_last"` or
       `"channels_first"`.
+      ValueError: if `pool_size` is not a tuple of 2 integers.
+      ValueError: if `strides` is not a tuple of 2 integers.
       ValueError: if `pool_mode` is neither `"max"` or `"avg"`.
   """
   if data_format is None:
@@ -5221,11 +5310,16 @@
 def configure_and_create_distributed_session(distribution_strategy):
   """Configure session config and create a session with it."""
 
-  # TODO(priyag): Throw error if a session already exists.
   def _create_session(distribution_strategy):
     """Create the Distributed Strategy session."""
     session_config = get_default_session_config()
 
+    # If a session already exists, merge in its config; in the case there is a
+    # conflict, take values of the existing config.
+    global _SESSION
+    if getattr(_SESSION, 'session', None) and _SESSION.session._config:
+      session_config.MergeFrom(_SESSION.session._config)
+
     if is_tpu_strategy(distribution_strategy):
       # TODO(priyag, yuefengz): Remove this workaround when Distribute
       # Coordinator is integrated with keras and we can create a session from

diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index 263a1dc..cd78216 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py

@@ -1741,6 +1741,16 @@
         x, mean, var, beta, gamma, axis=1, epsilon=1e-3)
     self.assertEqual(normed.shape.as_list(), [10, 3, 5, 5])
 
+  def test_get_session_different_graphs(self):
+    with ops.Graph().as_default():
+      x = keras.backend.constant(1)
+      session = keras.backend.get_session()
+      self.assertIs(session, keras.backend.get_session((x,)))
+      self.assertIs(session, keras.backend.get_session())
+    with ops.Graph().as_default():
+      self.assertIs(session, keras.backend.get_session((x,)))
+      self.assertIsNot(session, keras.backend.get_session())
+
 
 if __name__ == '__main__':
   test.main()

diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index 41fedbb..79bafb9 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py

@@ -32,18 +32,14 @@
 
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.utils.data_utils import Sequence
 from tensorflow.python.keras.utils.generic_utils import Progbar
+from tensorflow.python.keras.utils.mode_keys import ModeKeys
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import summary_ops_v2
-from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.summary import summary as tf_summary
-from tensorflow.python.training import saver
-from tensorflow.python.training.mode_keys import ModeKeys
 from tensorflow.python.util.tf_export import keras_export
 
 try:
@@ -52,7 +48,6 @@
   requests = None
 
 
-# pylint: disable=protected-access
 def configure_callbacks(callbacks,
                         model,
                         do_validation=False,
@@ -91,20 +86,57 @@
   # Add additional callbacks during training.
   if mode == ModeKeys.TRAIN:
     model.history = History()
-    stateful_metric_names = None
-    if hasattr(model, 'metrics_names'):
-      stateful_metric_names = model.metrics_names[1:]  # Exclude `loss`
-    callbacks = [BaseLogger(stateful_metrics=stateful_metric_names)
-                ] + (callbacks or []) + [model.history]
+    callbacks = [BaseLogger()] + (callbacks or []) + [model.history]
     if verbose:
-      callbacks.append(
-          ProgbarLogger(count_mode, stateful_metrics=stateful_metric_names))
+      callbacks.append(ProgbarLogger(count_mode))
   callback_list = CallbackList(callbacks)
 
   # Set callback model
-  callback_model = model._get_callback_model()
+  callback_model = model._get_callback_model()  # pylint: disable=protected-access
   callback_list.set_model(callback_model)
 
+  set_callback_parameters(
+      callback_list,
+      model,
+      do_validation=do_validation,
+      batch_size=batch_size,
+      epochs=epochs,
+      steps_per_epoch=steps_per_epoch,
+      samples=samples,
+      verbose=verbose,
+      mode=mode)
+
+  callback_list.model.stop_training = False
+  return callback_list
+
+
+def set_callback_parameters(callback_list,
+                            model,
+                            do_validation=False,
+                            batch_size=None,
+                            epochs=None,
+                            steps_per_epoch=None,
+                            samples=None,
+                            verbose=1,
+                            mode=ModeKeys.TRAIN):
+  """Sets callback parameters.
+
+  Arguments:
+      callback_list: CallbackList instance.
+      model: Model being trained.
+      do_validation: Whether or not validation loop will be run.
+      batch_size: Number of samples per batch.
+      epochs: Number of epoch to train.
+      steps_per_epoch: Number of batches to run per training epoch.
+      samples: Number of training samples.
+      verbose: int, 0 or 1. Keras logging verbosity to pass to ProgbarLogger.
+      mode: String. One of ModeKeys.TRAIN, ModeKeys.TEST, or ModeKeys.PREDICT.
+        Which loop mode to configure callbacks for.
+  """
+  for cbk in callback_list:
+    if isinstance(cbk, (BaseLogger, ProgbarLogger)):
+      cbk.stateful_metrics = model.metrics_names[1:]  # Exclude `loss`
+
   # Set callback parameters
   callback_metrics = []
   # When we have deferred build scenario with iterator input, we will compile
@@ -123,9 +155,6 @@
       'metrics': callback_metrics,
   }
   callback_list.set_params(callback_params)
-  callback_list.model.stop_training = False
-  return callback_list
-# pylint: enable=protected-access
 
 
 def _is_generator_like(data):
@@ -395,7 +424,7 @@
   take as argument will contain keys for quantities relevant to
   the current batch or epoch.
 
-  Currently, the `.fit()` method of the `Sequential` model class
+  Currently, the `.fit()` method of the `Model` class
   will include the following quantities in the `logs` that
   it passes to its callbacks:
 
@@ -412,6 +441,10 @@
   def __init__(self):
     self.validation_data = None
     self.model = None
+    # Whether this Callback should only run on the chief worker in a
+    # Multi-Worker setting.
+    # TODO(omalleyt): Make this attr public once solution is stable.
+    self._chief_worker_only = None
 
   def set_params(self, params):
     self.params = params
@@ -815,6 +848,17 @@
         self.monitor_op = np.less
         self.best = np.Inf
 
+    # Only the chief worker writes model checkpoints.
+    self._chief_worker_only = True
+
+  def set_model(self, model):
+    self.model = model
+    # Use name matching rather than `isinstance` to avoid circular dependencies.
+    if (not self.save_weights_only and
+        not model._is_graph_network and  # pylint: disable=protected-access
+        model.__class__.__name__ != 'Sequential'):
+      self.save_weights_only = True
+
   def on_epoch_end(self, epoch, logs=None):
     logs = logs or {}
     self.epochs_since_last_save += 1
@@ -1054,10 +1098,10 @@
     logs['lr'] = K.get_value(self.model.optimizer.lr)
 
 
-@keras_export('keras.callbacks.TensorBoard')
+@keras_export('keras.callbacks.TensorBoard', v1=[])
 class TensorBoard(Callback):
   # pylint: disable=line-too-long
-  """Tensorboard basic visualizations.
+  """TensorBoard basic visualizations.
 
   This callback writes a log for TensorBoard, which allows
   you to visualize dynamic graphs of your training and test
@@ -1077,51 +1121,25 @@
   [here](https://www.tensorflow.org/get_started/summaries_and_tensorboard).
 
   Arguments:
-      log_dir: the path of the directory where to save the log
-          files to be parsed by TensorBoard.
-      histogram_freq: frequency (in epochs) at which to compute activation
-          and weight histograms for the layers of the model. If set to 0,
-          histograms won't be computed. Validation data (or split) must be
-          specified for histogram visualizations.
-      write_graph: whether to visualize the graph in TensorBoard.
-          The log file can become quite large when
-          write_graph is set to True.
-      write_grads: whether to visualize gradient histograms in TensorBoard.
-          `histogram_freq` must be greater than 0.
-      batch_size: size of batch of inputs to feed to the network
-          for histograms computation.
-      write_images: whether to write model weights to visualize as
-          image in TensorBoard.
-      embeddings_freq: frequency (in epochs) at which selected embedding
-          layers will be saved. If set to 0, embeddings won't be computed.
-          Data to be visualized in TensorBoard's Embedding tab must be passed
-          as `embeddings_data`.
-      embeddings_layer_names: a list of names of layers to keep eye on. If
-          None or empty list all the embedding layer will be watched.
-      embeddings_metadata: a dictionary which maps layer name to a file name
-          in which metadata for this embedding layer is saved. See the
-          [details](https://www.tensorflow.org/how_tos/embedding_viz/#metadata_optional)
-          about metadata files format. In case if the same metadata file is
-          used for all embedding layers, string can be passed.
-      embeddings_data: data to be embedded at layers specified in
-          `embeddings_layer_names`. Numpy array (if the model has a single
-          input) or list of Numpy arrays (if the model has multiple inputs).
-          Learn [more about embeddings](https://www.tensorflow.org/programmers_guide/embedding)
+      log_dir: the path of the directory where to save the log files to be
+        parsed by TensorBoard.
+      histogram_freq: frequency (in epochs) at which to compute activation and
+        weight histograms for the layers of the model. If set to 0, histograms
+        won't be computed. Validation data (or split) must be specified for
+        histogram visualizations.
+      write_graph: whether to visualize the graph in TensorBoard. The log file
+        can become quite large when write_graph is set to True.
+      write_images: whether to write model weights to visualize as image in
+        TensorBoard.
       update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`,
-          writes the losses and metrics to TensorBoard after each batch.
-          The same applies for `'epoch'`. If using an integer, let's say `1000`,
-          the callback will write the metrics and losses to TensorBoard every
-          1000 samples. Note that writing too frequently to TensorBoard
-          can slow down your training.
+        writes the losses and metrics to TensorBoard after each batch. The same
+        applies for `'epoch'`. If using an integer, let's say `1000`, the
+        callback will write the metrics and losses to TensorBoard every 1000
+        samples. Note that writing too frequently to TensorBoard can slow down
+        your training.
 
   Raises:
       ValueError: If histogram_freq is set and no validation data is provided.
-
-  @compatibility(eager)
-  Using the `TensorBoard` callback will work when eager execution is enabled,
-  with the restriction that outputting histogram summaries of weights and
-  gradients is not supported. Consequently, `histogram_freq` will be ignored.
-  @end_compatibility
   """
 
   # pylint: enable=line-too-long
@@ -1129,316 +1147,194 @@
   def __init__(self,
                log_dir='./logs',
                histogram_freq=0,
-               batch_size=32,
                write_graph=True,
-               write_grads=False,
                write_images=False,
-               embeddings_freq=0,
-               embeddings_layer_names=None,
-               embeddings_metadata=None,
-               embeddings_data=None,
-               update_freq='epoch'):
+               update_freq='epoch',
+               **kwargs):
     super(TensorBoard, self).__init__()
+    self._validate_kwargs(kwargs)
+
     self.log_dir = log_dir
     self.histogram_freq = histogram_freq
-    if self.histogram_freq and context.executing_eagerly():
-      logging.warning(
-          UserWarning('Weight and gradient histograms not supported for eager'
-                      'execution, setting `histogram_freq` to `0`.'))
-      self.histogram_freq = 0
-    self.merged = None
     self.write_graph = write_graph
-    self.write_grads = write_grads
     self.write_images = write_images
-    self.batch_size = batch_size
-    self._current_batch = 0
-    self._total_batches_seen = 0
-    self._total_val_batches_seen = 0
-    self.embeddings_freq = embeddings_freq
-    self.embeddings_layer_names = embeddings_layer_names
-    self.embeddings_metadata = embeddings_metadata
-    self.embeddings_data = embeddings_data
     if update_freq == 'batch':
       self.update_freq = 1
     else:
       self.update_freq = update_freq
+
     self._samples_seen = 0
     self._samples_seen_at_last_write = 0
+    self._current_batch = 0
+    self._total_batches_seen = 0
+    self._total_val_batches_seen = 0
 
-  def _init_writer(self, model):
-    """Sets file writer."""
-    if context.executing_eagerly():
-      self.writer = summary_ops_v2.create_file_writer(self.log_dir)
-      if not model.run_eagerly and self.write_graph:
-        with self.writer.as_default():
-          summary_ops_v2.graph(K.get_graph())
-    elif self.write_graph:
-      self.writer = tf_summary.FileWriter(self.log_dir, K.get_graph())
-    else:
-      self.writer = tf_summary.FileWriter(self.log_dir)
+    self._writers = []  # file writers to be closed
+    self._train_writer = None  # set in `_initialize_writers`
+    self._validation_writer = None  # set in `_initialize_writers`
 
-  def _make_histogram_ops(self, model):
-    """Defines histogram ops when histogram_freq > 0."""
-    # only make histogram summary op if it hasn't already been made
-    if self.histogram_freq and self.merged is None:
-      for layer in self.model.layers:
-        for weight in layer.weights:
-          mapped_weight_name = weight.name.replace(':', '_')
-          tf_summary.histogram(mapped_weight_name, weight)
-          if self.write_images:
-            w_img = array_ops.squeeze(weight)
-            shape = K.int_shape(w_img)
-            if len(shape) == 2:  # dense layer kernel case
-              if shape[0] > shape[1]:
-                w_img = array_ops.transpose(w_img)
-                shape = K.int_shape(w_img)
-              w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1])
-            elif len(shape) == 3:  # convnet case
-              if K.image_data_format() == 'channels_last':
-                # switch to channels_first to display
-                # every kernel as a separate image
-                w_img = array_ops.transpose(w_img, perm=[2, 0, 1])
-                shape = K.int_shape(w_img)
-              w_img = array_ops.reshape(w_img,
-                                        [shape[0], shape[1], shape[2], 1])
-            elif len(shape) == 1:  # bias case
-              w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1])
-            else:
-              # not possible to handle 3D convnets etc.
-              continue
+    # TensorBoard should only write summaries on the chief when in a
+    # Multi-Worker setting.
+    self._chief_worker_only = True
 
-            shape = K.int_shape(w_img)
-            assert len(shape) == 4 and shape[-1] in [1, 3, 4]
-            tf_summary.image(mapped_weight_name, w_img)
+  def _validate_kwargs(self, kwargs):
+    """Handle arguments were supported in V1."""
+    if kwargs.get('write_grads', False):
+      logging.warning('`write_grads` will be ignored in TensorFlow 2.0 '
+                      'for the `TensorBoard` Callback.')
+    if kwargs.get('embeddings_freq', False):
+      logging.warning('Embeddings will be ignored in TensorFlow 2.0 '
+                      'for the `TensorBoard` Callback.')
 
-        if self.write_grads:
-          for weight in layer.trainable_weights:
-            mapped_weight_name = weight.name.replace(':', '_')
-            grads = model.optimizer.get_gradients(model.total_loss, weight)
+    unrecognized_kwargs = set(kwargs.keys()) - {
+        'write_grads', 'embeddings_freq', 'embeddings_layer_names',
+        'embeddings_metadata', 'embeddings_data'
+    }
 
-            def is_indexed_slices(grad):
-              return type(grad).__name__ == 'IndexedSlices'
-
-            grads = [
-                grad.values if is_indexed_slices(grad) else grad
-                for grad in grads
-            ]
-            tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads)
-
-        if hasattr(layer, 'output'):
-          if isinstance(layer.output, list):
-            for i, output in enumerate(layer.output):
-              tf_summary.histogram('{}_out_{}'.format(layer.name, i), output)
-          else:
-            tf_summary.histogram('{}_out'.format(layer.name), layer.output)
+    # Only allow kwargs that were supported in V1.
+    if unrecognized_kwargs:
+      raise ValueError('Unrecognized arguments in `TensorBoard` '
+                       'Callback: ' + str(unrecognized_kwargs))
 
   def set_model(self, model):
-    """Sets Keras model and creates summary ops."""
-
+    """Sets Keras model and writes graph if specified."""
     self.model = model
-    self._init_writer(model)
-    # histogram summaries only enabled in graph mode
-    if not context.executing_eagerly():
-      self._make_histogram_ops(model)
-      self.merged = tf_summary.merge_all()
+    with context.eager_mode():
+      self._initialize_writers()
+      if self.write_graph:
+        if model.run_eagerly:
+          logging.warning('TensorBoard Callback will ignore `write_graph=True`'
+                          'when `Model.run_eagerly=True`.`')
+        else:
+          with self._train_writer.as_default():
+            with summary_ops_v2.always_record_summaries():
+              summary_ops_v2.graph(K.get_graph())
 
-    # If both embedding_freq and embeddings_data are available, we will
-    # visualize embeddings.
-    if self.embeddings_freq and self.embeddings_data is not None:
-      # Avoid circular dependency.
-      from tensorflow.python.keras.engine import training_utils  # pylint: disable=g-import-not-at-top
-      self.embeddings_data = training_utils.standardize_input_data(
-          self.embeddings_data, model.input_names)
+  def _close_writers(self):
+    """Close all remaining open file writers owned by this callback.
 
-      # If embedding_layer_names are not provided, get all of the embedding
-      # layers from the model.
-      embeddings_layer_names = self.embeddings_layer_names
-      if not embeddings_layer_names:
-        embeddings_layer_names = [
-            layer.name
-            for layer in self.model.layers
-            if type(layer).__name__ == 'Embedding'
-        ]
-
-      self.assign_embeddings = []
-      embeddings_vars = {}
-
-      self.batch_id = batch_id = array_ops.placeholder(dtypes.int32)
-      self.step = step = array_ops.placeholder(dtypes.int32)
-
-      for layer in self.model.layers:
-        if layer.name in embeddings_layer_names:
-          embedding_input = self.model.get_layer(layer.name).output
-          embedding_size = np.prod(embedding_input.shape[1:])
-          embedding_input = array_ops.reshape(embedding_input,
-                                              (step, int(embedding_size)))
-          shape = (self.embeddings_data[0].shape[0], int(embedding_size))
-          embedding = variables.Variable(
-              array_ops.zeros(shape), name=layer.name + '_embedding')
-          embeddings_vars[layer.name] = embedding
-          batch = state_ops.assign(embedding[batch_id:batch_id + step],
-                                   embedding_input)
-          self.assign_embeddings.append(batch)
-
-      self.saver = saver.Saver(list(embeddings_vars.values()))
-
-      # Create embeddings_metadata dictionary
-      if isinstance(self.embeddings_metadata, str):
-        embeddings_metadata = {
-            layer_name: self.embeddings_metadata
-            for layer_name in embeddings_vars.keys()
-        }
-      else:
-        # If embedding_metadata is already a dictionary
-        embeddings_metadata = self.embeddings_metadata
-
-      try:
-        from tensorboard.plugins import projector
-      except ImportError:
-        raise ImportError('Failed to import TensorBoard. Please make sure that '
-                          'TensorBoard integration is complete."')
-
-      # TODO(psv): Add integration tests to test embedding visualization
-      # with TensorBoard callback. We are unable to write a unit test for this
-      # because TensorBoard dependency assumes TensorFlow package is installed.
-      config = projector.ProjectorConfig()
-      for layer_name, tensor in embeddings_vars.items():
-        embedding = config.embeddings.add()
-        embedding.tensor_name = tensor.name
-
-        if (embeddings_metadata is not None and
-            layer_name in embeddings_metadata):
-          embedding.metadata_path = embeddings_metadata[layer_name]
-
-      projector.visualize_embeddings(self.writer, config)
-
-  def _fetch_callback(self, summary):
-    self.writer.add_summary(summary, self._total_val_batches_seen)
-    self._total_val_batches_seen += 1
-
-  def _write_custom_summaries(self, step, logs=None):
-    """Writes metrics out as custom scalar summaries.
-
-    Arguments:
-        step: the global step to use for Tensorboard.
-        logs: dict. Keys are scalar summary names, values are
-            NumPy scalars.
-
+    If there are no such file writers, this is a no-op.
     """
-    logs = logs or {}
-    if context.executing_eagerly():
-      # use v2 summary ops
-      with self.writer.as_default(), summary_ops_v2.always_record_summaries():
-        for name, value in logs.items():
-          if isinstance(value, np.ndarray):
-            value = value.item()
-          summary_ops_v2.scalar(name, value, step=step)
-    else:
-      # use FileWriter from v1 summary
-      for name, value in logs.items():
-        if isinstance(value, np.ndarray):
-          value = value.item()
-        summary = tf_summary.Summary()
-        summary_value = summary.value.add()
-        summary_value.simple_value = value
-        summary_value.tag = name
-        self.writer.add_summary(summary, step)
-    self.writer.flush()
+    with context.eager_mode():
+      for writer in self._writers:
+        writer.close()
+      del self._writers[:]
+
+  def _initialize_writers(self):
+    """Create all file writers needed and validation writers.
+
+    This updates `self._train_writer` and `self._validation_writer`, and
+    populates the `self._writers` list to be cleaned up by
+    `_close_writers`.
+    """
+    self._close_writers()
+
+    def create_writer(subdir):
+      path = os.path.join(self.log_dir, subdir)
+      return summary_ops_v2.create_file_writer(path)
+
+    self._train_writer = create_writer('train')
+    self._writers.append(self._train_writer)
+    self._validation_writer = create_writer('validation')
+    self._writers.append(self._validation_writer)
 
   def on_batch_end(self, batch, logs=None):
     """Writes scalar summaries for metrics on every training batch."""
-    # Don't output batch_size and batch number as Tensorboard summaries
+    # Don't output batch_size and batch number as TensorBoard summaries
     logs = logs or {}
     self._samples_seen += logs.get('size', 1)
     samples_seen_since = self._samples_seen - self._samples_seen_at_last_write
     if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq:
-      batch_logs = {('batch_' + k): v
-                    for k, v in logs.items()
-                    if k not in ['batch', 'size', 'num_steps']}
-      self._write_custom_summaries(self._total_batches_seen, batch_logs)
+      self._log_metrics(logs, prefix='batch_', step=self._total_batches_seen)
       self._samples_seen_at_last_write = self._samples_seen
     self._total_batches_seen += 1
 
-  def on_epoch_begin(self, epoch, logs=None):
-    """Add histogram op to Model eval_function callbacks, reset batch count."""
-
-    # check if histogram summary should be run for this epoch
-    if self.histogram_freq and epoch % self.histogram_freq == 0:
-      self._epoch = epoch
-      # pylint: disable=protected-access
-      # add the histogram summary op if it should run this epoch
-      self.model._make_eval_function()
-      if self.merged not in self.model._eval_function.fetches:
-        self.model._eval_function.fetches.append(self.merged)
-        self.model._eval_function.fetch_callbacks[
-            self.merged] = self._fetch_callback
-      # pylint: enable=protected-access
-
   def on_epoch_end(self, epoch, logs=None):
-    """Checks if summary ops should run next epoch, logs scalar summaries."""
+    """Runs metrics and histogram summaries at epoch end."""
+    step = epoch if self.update_freq == 'epoch' else self._samples_seen
+    self._log_metrics(logs, prefix='epoch_', step=step)
 
-    # don't output batch_size and
-    # batch number as Tensorboard summaries
-    logs = {('epoch_' + k): v
-            for k, v in logs.items()
-            if k not in ['batch', 'size', 'num_steps']}
-    if self.update_freq == 'epoch':
-      step = epoch
-    else:
-      step = self._samples_seen
-    self._write_custom_summaries(step, logs)
-
-    # pop the histogram summary op after each epoch
-    if self.histogram_freq:
-      # pylint: disable=protected-access
-      if self.merged in self.model._eval_function.fetches:
-        self.model._eval_function.fetches.remove(self.merged)
-      if self.merged in self.model._eval_function.fetch_callbacks:
-        self.model._eval_function.fetch_callbacks.pop(self.merged)
-      # pylint: enable=protected-access
-
-    if self.embeddings_data is None and self.embeddings_freq:
-      raise ValueError('To visualize embeddings, embeddings_data must '
-                       'be provided.')
-
-    if self.embeddings_freq and self.embeddings_data is not None:
-      if epoch % self.embeddings_freq == 0:
-        # We need a second forward-pass here because we're passing
-        # the `embeddings_data` explicitly. This design allows to pass
-        # arbitrary data as `embeddings_data` and results from the fact
-        # that we need to know the size of the `tf.Variable`s which
-        # hold the embeddings in `set_model`. At this point, however,
-        # the `validation_data` is not yet set.
-
-        embeddings_data = self.embeddings_data
-        n_samples = embeddings_data[0].shape[0]
-        i = 0
-        while i < n_samples:
-          step = min(self.batch_size, n_samples - i)
-          batch = slice(i, i + step)
-
-          if isinstance(self.model.input, list):
-            feed_dict = {
-                model_input: embeddings_data[idx][batch]
-                for idx, model_input in enumerate(self.model.input)
-            }
-          else:
-            feed_dict = {self.model.input: embeddings_data[0][batch]}
-
-          feed_dict.update({self.batch_id: i, self.step: step})
-
-          if not isinstance(K.learning_phase(), int):
-            feed_dict[K.learning_phase()] = False
-
-          self.sess.run(self.assign_embeddings, feed_dict=feed_dict)
-          self.saver.save(self.sess,
-                          os.path.join(self.log_dir, 'keras_embedding.ckpt'),
-                          epoch)
-
-          i += self.batch_size
+    if self.histogram_freq and epoch % self.histogram_freq == 0:
+      self._log_weights(epoch)
 
   def on_train_end(self, logs=None):
-    self.writer.close()
+    self._close_writers()
+
+  def _log_metrics(self, logs, prefix, step):
+    """Writes metrics out as custom scalar summaries.
+
+    Arguments:
+        logs: Dict. Keys are scalar summary names, values are NumPy scalars.
+        prefix: String. The prefix to apply to the scalar summary names.
+        step: Int. The global step to use for TensorBoard.
+    """
+    if logs is None:
+      logs = {}
+
+    # Group metrics by their associated file writer. Values are lists of
+    # metrics, as (name, scalar_value) pairs.
+    logs_by_writer = {
+        self._train_writer: [],
+        self._validation_writer: [],
+    }
+    validation_prefix = 'val_'
+    for (name, value) in logs.items():
+      if name in ('batch', 'size', 'num_steps'):
+        # Scrub non-metric items.
+        continue
+      if name.startswith(validation_prefix):
+        name = name[len(validation_prefix):]
+        writer = self._validation_writer
+      else:
+        writer = self._train_writer
+      name = prefix + name  # assign batch or epoch prefix
+      logs_by_writer[writer].append((name, value))
+
+    with context.eager_mode():
+      with summary_ops_v2.always_record_summaries():
+        for writer in logs_by_writer:
+          with writer.as_default():
+            for (name, value) in logs_by_writer[writer]:
+              summary_ops_v2.scalar(name, value, step=step)
+
+  def _log_weights(self, epoch):
+    """Logs the weights of the Model to TensorBoard."""
+    with context.eager_mode(), \
+          self._train_writer.as_default(), \
+          summary_ops_v2.always_record_summaries():
+      for layer in self.model.layers:
+        for weight in layer.weights:
+          weight_name = weight.name.replace(':', '_')
+          with ops.init_scope():
+            weight = K.get_value(weight)
+          summary_ops_v2.histogram(weight_name, weight, step=epoch)
+          if self.write_images:
+            self._log_weight_as_image(weight, weight_name, epoch)
+      self._train_writer.flush()
+
+  def _log_weight_as_image(self, weight, weight_name, epoch):
+    """Logs a weight as a TensorBoard image."""
+    w_img = array_ops.squeeze(weight)
+    shape = K.int_shape(w_img)
+    if len(shape) == 1:  # Bias case
+      w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1])
+    elif len(shape) == 2:  # Dense layer kernel case
+      if shape[0] > shape[1]:
+        w_img = array_ops.transpose(w_img)
+        shape = K.int_shape(w_img)
+      w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1])
+    elif len(shape) == 3:  # ConvNet case
+      if K.image_data_format() == 'channels_last':
+        # Switch to channels_first to display every kernel as a separate
+        # image.
+        w_img = array_ops.transpose(w_img, perm=[2, 0, 1])
+        shape = K.int_shape(w_img)
+      w_img = array_ops.reshape(w_img, [shape[0], shape[1], shape[2], 1])
+
+    shape = K.int_shape(w_img)
+    # Not possible to handle 3D convnets etc.
+    if len(shape) == 4 and shape[-1] in [1, 3, 4]:
+      summary_ops_v2.image(weight_name, w_img, step=epoch)
 
 
 @keras_export('keras.callbacks.ReduceLROnPlateau')

diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index 6aba08b..1bd24aa 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py

@@ -23,22 +23,25 @@
 import os
 import re
 import shutil
-import tempfile
+import sys
 import threading
 import unittest
 
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.core.framework import summary_pb2
 from tensorflow.python import keras
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import context
 from tensorflow.python.framework import random_seed
-from tensorflow.python.framework import test_util
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import summary_ops_v2
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import adam
+from tensorflow.python.util import tf_contextlib
 
 try:
   import h5py  # pylint:disable=g-import-not-at-top
@@ -218,173 +221,222 @@
         })
 
 
-class KerasCallbacksTest(test.TestCase):
+class KerasCallbacksTest(keras_parameterized.TestCase):
 
+  def _get_model(self, input_shape=None):
+    layers = [
+        keras.layers.Dense(3, activation='relu'),
+        keras.layers.Dense(2, activation='softmax')
+    ]
+    model = testing_utils.get_model_from_layers(layers, input_shape=input_shape)
+    model.compile(
+        loss='mse',
+        optimizer='rmsprop',
+        metrics=[keras.metrics.CategoricalAccuracy(name='my_acc')],
+        run_eagerly=testing_utils.should_run_eagerly())
+    return model
+
+  @keras_parameterized.run_with_all_model_types
+  @keras_parameterized.run_all_keras_modes
+  def test_progbar_logging(self):
+    model = self._get_model(input_shape=(3,))
+
+    x = array_ops.ones((50, 3))
+    y = array_ops.zeros((50, 2))
+    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(10)
+    expected_log = r'(.*- loss:.*- my_acc:.*)+'
+
+    with self.captureWritesToStream(sys.stdout) as printed:
+      model.fit(dataset, epochs=2, steps_per_epoch=10)
+      self.assertRegexpMatches(printed.contents(), expected_log)
+
+  @keras_parameterized.run_with_all_model_types(exclude_models='functional')
+  @keras_parameterized.run_all_keras_modes
+  def test_progbar_logging_deferred_model_build(self):
+    model = self._get_model()
+    self.assertFalse(model.built)
+
+    x = array_ops.ones((50, 3))
+    y = array_ops.zeros((50, 2))
+    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(10)
+    expected_log = r'(.*- loss:.*- my_acc:.*)+'
+
+    with self.captureWritesToStream(sys.stdout) as printed:
+      model.fit(dataset, epochs=2, steps_per_epoch=10)
+      self.assertRegexpMatches(printed.contents(), expected_log)
+
+  @keras_parameterized.run_with_all_model_types
   def test_ModelCheckpoint(self):
     if h5py is None:
       return  # Skip test if models cannot be saved.
 
-    with self.cached_session():
-      np.random.seed(1337)
+    layers = [
+        keras.layers.Dense(NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu'),
+        keras.layers.Dense(NUM_CLASSES, activation='softmax')
+    ]
+    model = testing_utils.get_model_from_layers(layers, input_shape=(10,))
+    model.compile(loss='categorical_crossentropy',
+                  optimizer='rmsprop',
+                  metrics=['accuracy'])
 
-      temp_dir = self.get_temp_dir()
-      self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
 
-      filepath = os.path.join(temp_dir, 'checkpoint.h5')
-      (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
-          train_samples=TRAIN_SAMPLES,
-          test_samples=TEST_SAMPLES,
-          input_shape=(INPUT_DIM,),
-          num_classes=NUM_CLASSES)
-      y_test = keras.utils.to_categorical(y_test)
-      y_train = keras.utils.to_categorical(y_train)
-      # case 1
-      monitor = 'val_loss'
-      save_best_only = False
-      mode = 'auto'
+    filepath = os.path.join(temp_dir, 'checkpoint')
+    (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
+        train_samples=TRAIN_SAMPLES,
+        test_samples=TEST_SAMPLES,
+        input_shape=(INPUT_DIM,),
+        num_classes=NUM_CLASSES)
+    y_test = keras.utils.to_categorical(y_test)
+    y_train = keras.utils.to_categorical(y_train)
+    # case 1
+    monitor = 'val_loss'
+    save_best_only = False
+    mode = 'auto'
 
-      model = keras.models.Sequential()
-      model.add(
-          keras.layers.Dense(
-              NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu'))
-      model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax'))
-      model.compile(
-          loss='categorical_crossentropy',
-          optimizer='rmsprop',
-          metrics=['accuracy'])
+    model = keras.models.Sequential()
+    model.add(
+        keras.layers.Dense(
+            NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu'))
+    model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax'))
+    model.compile(
+        loss='categorical_crossentropy',
+        optimizer='rmsprop',
+        metrics=['accuracy'])
 
-      cbks = [
-          keras.callbacks.ModelCheckpoint(
-              filepath,
-              monitor=monitor,
-              save_best_only=save_best_only,
-              mode=mode)
-      ]
-      model.fit(
-          x_train,
-          y_train,
-          batch_size=BATCH_SIZE,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          epochs=1,
-          verbose=0)
-      assert os.path.exists(filepath)
-      os.remove(filepath)
+    cbks = [
+        keras.callbacks.ModelCheckpoint(
+            filepath,
+            monitor=monitor,
+            save_best_only=save_best_only,
+            mode=mode)
+    ]
+    model.fit(
+        x_train,
+        y_train,
+        batch_size=BATCH_SIZE,
+        validation_data=(x_test, y_test),
+        callbacks=cbks,
+        epochs=1,
+        verbose=0)
+    assert os.path.exists(filepath)
+    os.remove(filepath)
 
-      # case 2
-      mode = 'min'
-      cbks = [
-          keras.callbacks.ModelCheckpoint(
-              filepath,
-              monitor=monitor,
-              save_best_only=save_best_only,
-              mode=mode)
-      ]
-      model.fit(
-          x_train,
-          y_train,
-          batch_size=BATCH_SIZE,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          epochs=1,
-          verbose=0)
-      assert os.path.exists(filepath)
-      os.remove(filepath)
+    # case 2
+    mode = 'min'
+    cbks = [
+        keras.callbacks.ModelCheckpoint(
+            filepath,
+            monitor=monitor,
+            save_best_only=save_best_only,
+            mode=mode)
+    ]
+    model.fit(
+        x_train,
+        y_train,
+        batch_size=BATCH_SIZE,
+        validation_data=(x_test, y_test),
+        callbacks=cbks,
+        epochs=1,
+        verbose=0)
+    assert os.path.exists(filepath)
+    os.remove(filepath)
 
-      # case 3
-      mode = 'max'
-      monitor = 'val_acc'
-      cbks = [
-          keras.callbacks.ModelCheckpoint(
-              filepath,
-              monitor=monitor,
-              save_best_only=save_best_only,
-              mode=mode)
-      ]
-      model.fit(
-          x_train,
-          y_train,
-          batch_size=BATCH_SIZE,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          epochs=1,
-          verbose=0)
-      assert os.path.exists(filepath)
-      os.remove(filepath)
+    # case 3
+    mode = 'max'
+    monitor = 'val_acc'
+    cbks = [
+        keras.callbacks.ModelCheckpoint(
+            filepath,
+            monitor=monitor,
+            save_best_only=save_best_only,
+            mode=mode)
+    ]
+    model.fit(
+        x_train,
+        y_train,
+        batch_size=BATCH_SIZE,
+        validation_data=(x_test, y_test),
+        callbacks=cbks,
+        epochs=1,
+        verbose=0)
+    assert os.path.exists(filepath)
+    os.remove(filepath)
 
-      # case 4
-      save_best_only = True
-      cbks = [
-          keras.callbacks.ModelCheckpoint(
-              filepath,
-              monitor=monitor,
-              save_best_only=save_best_only,
-              mode=mode)
-      ]
-      model.fit(
-          x_train,
-          y_train,
-          batch_size=BATCH_SIZE,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          epochs=1,
-          verbose=0)
-      assert os.path.exists(filepath)
-      os.remove(filepath)
+    # case 4
+    save_best_only = True
+    cbks = [
+        keras.callbacks.ModelCheckpoint(
+            filepath,
+            monitor=monitor,
+            save_best_only=save_best_only,
+            mode=mode)
+    ]
+    model.fit(
+        x_train,
+        y_train,
+        batch_size=BATCH_SIZE,
+        validation_data=(x_test, y_test),
+        callbacks=cbks,
+        epochs=1,
+        verbose=0)
+    assert os.path.exists(filepath)
+    os.remove(filepath)
 
-      # Case: metric not available.
-      cbks = [
-          keras.callbacks.ModelCheckpoint(
-              filepath,
-              monitor='unknown',
-              save_best_only=True)
-      ]
-      model.fit(
-          x_train,
-          y_train,
-          batch_size=BATCH_SIZE,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          epochs=1,
-          verbose=0)
-      # File won't be written.
-      assert not os.path.exists(filepath)
+    # Case: metric not available.
+    cbks = [
+        keras.callbacks.ModelCheckpoint(
+            filepath,
+            monitor='unknown',
+            save_best_only=True)
+    ]
+    model.fit(
+        x_train,
+        y_train,
+        batch_size=BATCH_SIZE,
+        validation_data=(x_test, y_test),
+        callbacks=cbks,
+        epochs=1,
+        verbose=0)
+    # File won't be written.
+    assert not os.path.exists(filepath)
 
-      # case 5
-      save_best_only = False
-      period = 2
-      mode = 'auto'
+    # case 5
+    save_best_only = False
+    period = 2
+    mode = 'auto'
 
-      filepath = os.path.join(temp_dir, 'checkpoint.{epoch:02d}.h5')
-      cbks = [
-          keras.callbacks.ModelCheckpoint(
-              filepath,
-              monitor=monitor,
-              save_best_only=save_best_only,
-              mode=mode,
-              period=period)
-      ]
-      model.fit(
-          x_train,
-          y_train,
-          batch_size=BATCH_SIZE,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          epochs=4,
-          verbose=1)
-      assert os.path.exists(filepath.format(epoch=2))
-      assert os.path.exists(filepath.format(epoch=4))
-      os.remove(filepath.format(epoch=2))
-      os.remove(filepath.format(epoch=4))
-      assert not os.path.exists(filepath.format(epoch=1))
-      assert not os.path.exists(filepath.format(epoch=3))
+    filepath = os.path.join(temp_dir, 'checkpoint.{epoch:02d}.h5')
+    cbks = [
+        keras.callbacks.ModelCheckpoint(
+            filepath,
+            monitor=monitor,
+            save_best_only=save_best_only,
+            mode=mode,
+            period=period)
+    ]
+    model.fit(
+        x_train,
+        y_train,
+        batch_size=BATCH_SIZE,
+        validation_data=(x_test, y_test),
+        callbacks=cbks,
+        epochs=4,
+        verbose=1)
+    assert os.path.exists(filepath.format(epoch=2))
+    assert os.path.exists(filepath.format(epoch=4))
+    os.remove(filepath.format(epoch=2))
+    os.remove(filepath.format(epoch=4))
+    assert not os.path.exists(filepath.format(epoch=1))
+    assert not os.path.exists(filepath.format(epoch=3))
 
-      # Invalid use: this will raise a warning but not an Exception.
-      keras.callbacks.ModelCheckpoint(
-          filepath,
-          monitor=monitor,
-          save_best_only=save_best_only,
-          mode='unknown')
+    # Invalid use: this will raise a warning but not an Exception.
+    keras.callbacks.ModelCheckpoint(
+        filepath,
+        monitor=monitor,
+        save_best_only=save_best_only,
+        mode='unknown')
 
   def test_EarlyStopping(self):
     with self.cached_session():
@@ -836,310 +888,6 @@
       self.assertEqual(len(loss), 1)
       self.assertEqual(loss[0], np.inf)
 
-  @test_util.run_deprecated_v1
-  def test_TensorBoard(self):
-    np.random.seed(1337)
-
-    temp_dir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
-
-    (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
-        train_samples=TRAIN_SAMPLES,
-        test_samples=TEST_SAMPLES,
-        input_shape=(INPUT_DIM,),
-        num_classes=NUM_CLASSES)
-    y_test = keras.utils.to_categorical(y_test)
-    y_train = keras.utils.to_categorical(y_train)
-
-    def data_generator(train):
-      if train:
-        max_batch_index = len(x_train) // BATCH_SIZE
-      else:
-        max_batch_index = len(x_test) // BATCH_SIZE
-      i = 0
-      while 1:
-        if train:
-          yield (x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE],
-                 y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE])
-        else:
-          yield (x_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE],
-                 y_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE])
-        i += 1
-        i %= max_batch_index
-
-    # case: Sequential
-    with self.cached_session():
-      model = keras.models.Sequential()
-      model.add(
-          keras.layers.Dense(
-              NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu'))
-      # non_trainable_weights: moving_variance, moving_mean
-      model.add(keras.layers.BatchNormalization())
-      model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax'))
-      model.compile(
-          loss='categorical_crossentropy',
-          optimizer='sgd',
-          metrics=['accuracy'])
-      tsb = keras.callbacks.TensorBoard(
-          log_dir=temp_dir, histogram_freq=1, write_images=True,
-          write_grads=True, batch_size=5)
-      cbks = [tsb]
-
-      # fit with validation data
-      model.fit(
-          x_train,
-          y_train,
-          batch_size=BATCH_SIZE,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          epochs=3,
-          verbose=0)
-
-      # fit with validation data and accuracy
-      model.fit(
-          x_train,
-          y_train,
-          batch_size=BATCH_SIZE,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          epochs=2,
-          verbose=0)
-
-      # fit generator with validation data
-      model.fit_generator(
-          data_generator(True),
-          len(x_train),
-          epochs=2,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          verbose=0)
-
-      # fit generator without validation data
-      # histogram_freq must be zero
-      tsb.histogram_freq = 0
-      model.fit_generator(
-          data_generator(True),
-          len(x_train),
-          epochs=2,
-          callbacks=cbks,
-          verbose=0)
-
-      # fit generator with validation data and accuracy
-      tsb.histogram_freq = 1
-      model.fit_generator(
-          data_generator(True),
-          len(x_train),
-          epochs=2,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          verbose=0)
-
-      # fit generator without validation data and accuracy
-      tsb.histogram_freq = 0
-      model.fit_generator(
-          data_generator(True), len(x_train), epochs=2, callbacks=cbks)
-      assert os.path.exists(temp_dir)
-
-  @test_util.run_deprecated_v1
-  def test_TensorBoard_multi_input_output(self):
-    np.random.seed(1337)
-    tmpdir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True)
-
-    with self.cached_session():
-      filepath = os.path.join(tmpdir, 'logs')
-
-      (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
-          train_samples=TRAIN_SAMPLES,
-          test_samples=TEST_SAMPLES,
-          input_shape=(INPUT_DIM,),
-          num_classes=NUM_CLASSES)
-      y_test = keras.utils.to_categorical(y_test)
-      y_train = keras.utils.to_categorical(y_train)
-
-      def data_generator(train):
-        if train:
-          max_batch_index = len(x_train) // BATCH_SIZE
-        else:
-          max_batch_index = len(x_test) // BATCH_SIZE
-        i = 0
-        while 1:
-          if train:
-            # simulate multi-input/output models
-            yield ([x_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2,
-                   [y_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2)
-          else:
-            yield ([x_test[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2,
-                   [y_test[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2)
-          i += 1
-          i %= max_batch_index
-
-      inp1 = keras.Input((INPUT_DIM,))
-      inp2 = keras.Input((INPUT_DIM,))
-      inp = keras.layers.add([inp1, inp2])
-      hidden = keras.layers.Dense(2, activation='relu')(inp)
-      hidden = keras.layers.Dropout(0.1)(hidden)
-      output1 = keras.layers.Dense(NUM_CLASSES, activation='softmax')(hidden)
-      output2 = keras.layers.Dense(NUM_CLASSES, activation='softmax')(hidden)
-      model = keras.models.Model([inp1, inp2], [output1, output2])
-      model.compile(loss='categorical_crossentropy',
-                    optimizer='sgd',
-                    metrics=['accuracy'])
-
-      # we must generate new callbacks for each test, as they aren't stateless
-      def callbacks_factory(histogram_freq):
-        return [keras.callbacks.TensorBoard(log_dir=filepath,
-                                            histogram_freq=histogram_freq,
-                                            write_images=True, write_grads=True,
-                                            batch_size=5)]
-
-      # fit without validation data
-      model.fit([x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE,
-                callbacks=callbacks_factory(histogram_freq=0), epochs=3)
-
-      # fit with validation data and accuracy
-      model.fit([x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE,
-                validation_data=([x_test] * 2, [y_test] * 2),
-                callbacks=callbacks_factory(histogram_freq=1), epochs=2)
-
-      # fit generator without validation data
-      model.fit_generator(data_generator(True), len(x_train), epochs=2,
-                          callbacks=callbacks_factory(histogram_freq=0))
-
-      # fit generator with validation data and accuracy
-      model.fit_generator(data_generator(True), len(x_train), epochs=2,
-                          validation_data=([x_test] * 2, [y_test] * 2),
-                          callbacks=callbacks_factory(histogram_freq=1))
-      assert os.path.isdir(filepath)
-
-  @test_util.run_deprecated_v1
-  def test_Tensorboard_histogram_summaries_in_test_function(self):
-
-    class FileWriterStub(object):
-
-      def __init__(self, logdir, graph=None):
-        self.logdir = logdir
-        self.graph = graph
-        self.steps_seen = []
-
-      def add_summary(self, summary, global_step):
-        summary_obj = summary_pb2.Summary()
-
-        # ensure a valid Summary proto is being sent
-        if isinstance(summary, bytes):
-          summary_obj.ParseFromString(summary)
-        else:
-          assert isinstance(summary, summary_pb2.Summary)
-          summary_obj = summary
-
-        # keep track of steps seen for the merged_summary op,
-        # which contains the histogram summaries
-        if len(summary_obj.value) > 1:
-          self.steps_seen.append(global_step)
-
-      def flush(self):
-        pass
-
-      def close(self):
-        pass
-
-    def _init_writer(obj, _):
-      obj.writer = FileWriterStub(obj.log_dir)
-
-    np.random.seed(1337)
-    tmpdir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True)
-    (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
-        train_samples=TRAIN_SAMPLES,
-        test_samples=TEST_SAMPLES,
-        input_shape=(INPUT_DIM,),
-        num_classes=NUM_CLASSES)
-    y_test = keras.utils.to_categorical(y_test)
-    y_train = keras.utils.to_categorical(y_train)
-
-    with self.cached_session():
-      model = keras.models.Sequential()
-      model.add(
-          keras.layers.Dense(
-              NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu'))
-      # non_trainable_weights: moving_variance, moving_mean
-      model.add(keras.layers.BatchNormalization())
-      model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax'))
-      model.compile(
-          loss='categorical_crossentropy',
-          optimizer='sgd',
-          metrics=['accuracy'])
-      keras.callbacks.TensorBoard._init_writer = _init_writer
-      tsb = keras.callbacks.TensorBoard(
-          log_dir=tmpdir,
-          histogram_freq=1,
-          write_images=True,
-          write_grads=True,
-          batch_size=5)
-      cbks = [tsb]
-
-      # fit with validation data
-      model.fit(
-          x_train,
-          y_train,
-          batch_size=BATCH_SIZE,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          epochs=3,
-          verbose=0)
-
-      self.assertAllEqual(tsb.writer.steps_seen, [0, 1, 2, 3, 4, 5])
-
-  @test_util.run_deprecated_v1
-  def test_Tensorboard_histogram_summaries_with_generator(self):
-    np.random.seed(1337)
-    tmpdir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True)
-
-    def generator():
-      x = np.random.randn(10, 100).astype(np.float32)
-      y = np.random.randn(10, 10).astype(np.float32)
-      while True:
-        yield x, y
-
-    with self.cached_session():
-      model = testing_utils.get_small_sequential_mlp(
-          num_hidden=10, num_classes=10, input_dim=100)
-      model.compile(
-          loss='categorical_crossentropy',
-          optimizer='sgd',
-          metrics=['accuracy'])
-      tsb = keras.callbacks.TensorBoard(
-          log_dir=tmpdir,
-          histogram_freq=1,
-          write_images=True,
-          write_grads=True,
-          batch_size=5)
-      cbks = [tsb]
-
-      # fit with validation generator
-      model.fit_generator(
-          generator(),
-          steps_per_epoch=2,
-          epochs=2,
-          validation_data=generator(),
-          validation_steps=2,
-          callbacks=cbks,
-          verbose=0)
-
-      with self.assertRaises(ValueError):
-        # fit with validation generator but no
-        # validation_steps
-        model.fit_generator(
-            generator(),
-            steps_per_epoch=2,
-            epochs=2,
-            validation_data=generator(),
-            callbacks=cbks,
-            verbose=0)
-
-      self.assertTrue(os.path.exists(tmpdir))
-
   @unittest.skipIf(
       os.name == 'nt',
       'use_multiprocessing=True does not work on windows properly.')
@@ -1187,209 +935,6 @@
       t.join()
       assert not t.is_alive()
 
-  def test_TensorBoard_with_ReduceLROnPlateau(self):
-    with self.cached_session():
-      temp_dir = self.get_temp_dir()
-      self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
-
-      (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
-          train_samples=TRAIN_SAMPLES,
-          test_samples=TEST_SAMPLES,
-          input_shape=(INPUT_DIM,),
-          num_classes=NUM_CLASSES)
-      y_test = keras.utils.to_categorical(y_test)
-      y_train = keras.utils.to_categorical(y_train)
-
-      model = testing_utils.get_small_sequential_mlp(
-          num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM)
-      model.compile(
-          loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])
-
-      cbks = [
-          keras.callbacks.ReduceLROnPlateau(
-              monitor='val_loss', factor=0.5, patience=4, verbose=1),
-          keras.callbacks.TensorBoard(log_dir=temp_dir)
-      ]
-
-      model.fit(
-          x_train,
-          y_train,
-          batch_size=BATCH_SIZE,
-          validation_data=(x_test, y_test),
-          callbacks=cbks,
-          epochs=2,
-          verbose=0)
-
-      assert os.path.exists(temp_dir)
-
-  @test_util.run_deprecated_v1
-  def test_Tensorboard_batch_logging(self):
-
-    class FileWriterStub(object):
-
-      def __init__(self, logdir, graph=None):
-        self.logdir = logdir
-        self.graph = graph
-        self.batches_logged = []
-        self.summary_values = []
-        self.summary_tags = []
-
-      def add_summary(self, summary, step):
-        self.summary_values.append(summary.value[0].simple_value)
-        self.summary_tags.append(summary.value[0].tag)
-        self.batches_logged.append(step)
-
-      def flush(self):
-        pass
-
-      def close(self):
-        pass
-
-    temp_dir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
-
-    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch')
-    tb_cbk.writer = FileWriterStub(temp_dir)
-
-    for batch in range(5):
-      tb_cbk.on_batch_end(batch, {'acc': batch})
-    self.assertEqual(tb_cbk.writer.batches_logged, [0, 1, 2, 3, 4])
-    self.assertEqual(tb_cbk.writer.summary_values, [0., 1., 2., 3., 4.])
-    self.assertEqual(tb_cbk.writer.summary_tags, ['batch_acc'] * 5)
-
-  @test_util.run_deprecated_v1
-  def test_Tensorboard_epoch_and_batch_logging(self):
-
-    class FileWriterStub(object):
-
-      def __init__(self, logdir, graph=None):
-        self.logdir = logdir
-        self.graph = graph
-
-      def add_summary(self, summary, step):
-        if 'batch_' in summary.value[0].tag:
-          self.batch_summary = (step, summary)
-        elif 'epoch_' in summary.value[0].tag:
-          self.epoch_summary = (step, summary)
-
-      def flush(self):
-        pass
-
-      def close(self):
-        pass
-
-    temp_dir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
-
-    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch')
-    tb_cbk.writer = FileWriterStub(temp_dir)
-
-    tb_cbk.on_batch_end(0, {'acc': 5.0})
-    batch_step, batch_summary = tb_cbk.writer.batch_summary
-    self.assertEqual(batch_step, 0)
-    self.assertEqual(batch_summary.value[0].simple_value, 5.0)
-
-    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='epoch')
-    tb_cbk.writer = FileWriterStub(temp_dir)
-    tb_cbk.on_epoch_end(0, {'acc': 10.0})
-    epoch_step, epoch_summary = tb_cbk.writer.epoch_summary
-    self.assertEqual(epoch_step, 0)
-    self.assertEqual(epoch_summary.value[0].simple_value, 10.0)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_Tensorboard_eager(self):
-    temp_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
-
-    (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
-        train_samples=TRAIN_SAMPLES,
-        test_samples=TEST_SAMPLES,
-        input_shape=(INPUT_DIM,),
-        num_classes=NUM_CLASSES)
-    y_test = keras.utils.to_categorical(y_test)
-    y_train = keras.utils.to_categorical(y_train)
-
-    model = testing_utils.get_small_sequential_mlp(
-        num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM)
-    model.compile(
-        loss='binary_crossentropy',
-        optimizer=adam.AdamOptimizer(0.01),
-        metrics=['accuracy'])
-
-    cbks = [keras.callbacks.TensorBoard(log_dir=temp_dir)]
-
-    model.fit(
-        x_train,
-        y_train,
-        batch_size=BATCH_SIZE,
-        validation_data=(x_test, y_test),
-        callbacks=cbks,
-        epochs=2,
-        verbose=0)
-
-    self.assertTrue(os.path.exists(temp_dir))
-
-  @test_util.run_deprecated_v1
-  def test_TensorBoard_update_freq(self):
-
-    class FileWriterStub(object):
-
-      def __init__(self, logdir, graph=None):
-        self.logdir = logdir
-        self.graph = graph
-        self.batch_summaries = []
-        self.epoch_summaries = []
-
-      def add_summary(self, summary, step):
-        if 'batch_' in summary.value[0].tag:
-          self.batch_summaries.append((step, summary))
-        elif 'epoch_' in summary.value[0].tag:
-          self.epoch_summaries.append((step, summary))
-
-      def flush(self):
-        pass
-
-      def close(self):
-        pass
-
-    temp_dir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
-
-    # Epoch mode
-    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='epoch')
-    tb_cbk.writer = FileWriterStub(temp_dir)
-
-    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
-    self.assertEqual(tb_cbk.writer.batch_summaries, [])
-    tb_cbk.on_epoch_end(0, {'acc': 10.0, 'size': 1})
-    self.assertEqual(len(tb_cbk.writer.epoch_summaries), 1)
-
-    # Batch mode
-    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch')
-    tb_cbk.writer = FileWriterStub(temp_dir)
-
-    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
-    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
-    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
-    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
-    self.assertFalse(tb_cbk.writer.epoch_summaries)
-
-    # Integer mode
-    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq=20)
-    tb_cbk.writer = FileWriterStub(temp_dir)
-
-    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
-    self.assertFalse(tb_cbk.writer.batch_summaries)
-    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
-    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
-    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
-    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
-    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
-    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
-    tb_cbk.on_batch_end(0, {'acc': 10.0, 'size': 10})
-    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
-    self.assertFalse(tb_cbk.writer.epoch_summaries)
-
   def test_RemoteMonitorWithJsonPayload(self):
     if requests is None:
       self.skipTest('`requests` required to run this test')
@@ -1422,5 +967,217 @@
             epochs=1)
 
 
+# A summary that was emitted during a test. Fields:
+#   logdir: str. The logdir of the FileWriter to which the summary was
+#     written.
+#   tag: str. The name of the summary.
+_ObservedSummary = collections.namedtuple('_ObservedSummary', ('logdir', 'tag'))
+
+
+class _MockSummaryFile(object):
+  """Record summary tag names and the files to which they're written.
+
+  Fields `scalars`, `images`, and `histograms` are sets containing
+  `_ObservedSummary` values.
+  """
+
+  def __init__(self):
+    self.scalars = set()
+    self.images = set()
+    self.histograms = set()
+
+
+@tf_contextlib.contextmanager
+def _mock_summary_api():
+  summary_file = _MockSummaryFile()
+
+  # Keep track of the logdir associated with each created resource.
+  # (There doesn't seem to be an easy way to get this information after
+  # the fact.)
+  resource_logdirs = {}
+  real_create_file_writer = summary_ops_v2.create_file_writer
+
+  def mock_create_file_writer(logdir, *args, **kwargs):
+    writer = real_create_file_writer(logdir, *args, **kwargs)
+    resource = writer._resource
+    assert resource is not None
+    assert resource not in resource_logdirs, (resource, resource_logdirs)
+    resource_logdirs[resource] = logdir
+    return writer
+
+  def make_mock_summary(summary_set):
+
+    def mock_summary(tag, *args, **kwargs):
+      del args  # unused
+      del kwargs  # unused
+      resource = context.context().summary_writer_resource
+      logdir = resource_logdirs[resource]
+      summary_set.add(_ObservedSummary(logdir=logdir, tag=tag))
+
+    return mock_summary
+
+  with test.mock.patch.object(summary_ops_v2,
+                              'create_file_writer',
+                              mock_create_file_writer), \
+        test.mock.patch.object(summary_ops_v2,
+                               'scalar',
+                               make_mock_summary(summary_file.scalars)), \
+        test.mock.patch.object(summary_ops_v2,
+                               'histogram',
+                               make_mock_summary(summary_file.histograms)), \
+        test.mock.patch.object(summary_ops_v2,
+                               'image',
+                               make_mock_summary(summary_file.images)):
+    yield summary_file
+
+
+@keras_parameterized.run_with_all_model_types
+@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+class TestTensorBoardV2(keras_parameterized.TestCase):
+
+  def setUp(self):
+    super(TestTensorBoardV2, self).setUp()
+    self.logdir = os.path.join(self.get_temp_dir(), 'tb')
+    self.train_dir = os.path.join(self.logdir, 'train')
+    self.validation_dir = os.path.join(self.logdir, 'validation')
+
+  def _get_model(self):
+    layers = [
+        keras.layers.Conv2D(8, (3, 3)),
+        keras.layers.Flatten(),
+        keras.layers.Dense(1)
+    ]
+    model = testing_utils.get_model_from_layers(layers, input_shape=(10, 10, 1))
+    model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly())
+    return model
+
+  def test_TensorBoard_basic(self):
+    model = self._get_model()
+    x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1))
+    tb_cbk = keras.callbacks.TensorBoard(self.logdir)
+
+    with _mock_summary_api() as summary_file:
+      model.fit(
+          x,
+          y,
+          batch_size=2,
+          epochs=2,
+          validation_data=(x, y),
+          callbacks=[tb_cbk])
+
+    self.assertEqual(
+        summary_file.scalars, {
+            _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'),
+            _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'),
+        })
+
+  def test_TensorBoard_batch_metrics(self):
+    model = self._get_model()
+    x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1))
+    tb_cbk = keras.callbacks.TensorBoard(self.logdir, update_freq=1)
+
+    with _mock_summary_api() as summary_file:
+      model.fit(
+          x,
+          y,
+          batch_size=2,
+          epochs=2,
+          validation_data=(x, y),
+          callbacks=[tb_cbk])
+
+    self.assertEqual(
+        summary_file.scalars,
+        {
+            _ObservedSummary(logdir=self.train_dir, tag='batch_loss'),
+            _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'),
+            _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'),
+        },
+    )
+
+  def test_TensorBoard_weight_histograms(self):
+    model = self._get_model()
+    x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1))
+    temp_dir = self.get_temp_dir() + '/tb'
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, histogram_freq=1)
+
+    with _mock_summary_api() as summary_file:
+      model.fit(
+          x,
+          y,
+          batch_size=2,
+          epochs=2,
+          validation_data=(x, y),
+          callbacks=[tb_cbk])
+
+    self.assertEqual(
+        summary_file.scalars,
+        {
+            _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'),
+            _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'),
+        },
+    )
+    self.assertEqual(
+        self._strip_layer_names(summary_file.histograms),
+        {
+            _ObservedSummary(logdir=self.train_dir, tag='bias_0'),
+            _ObservedSummary(logdir=self.train_dir, tag='kernel_0'),
+        },
+    )
+
+  def test_TensorBoard_weight_images(self):
+    model = self._get_model()
+    x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1))
+    temp_dir = self.get_temp_dir() + '/tb'
+    tb_cbk = keras.callbacks.TensorBoard(
+        temp_dir, histogram_freq=1, write_images=True)
+
+    with _mock_summary_api() as summary_file:
+      model.fit(
+          x,
+          y,
+          batch_size=2,
+          epochs=2,
+          validation_data=(x, y),
+          callbacks=[tb_cbk])
+
+    self.assertEqual(
+        summary_file.scalars,
+        {
+            _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'),
+            _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'),
+        },
+    )
+    self.assertEqual(
+        self._strip_layer_names(summary_file.histograms),
+        {
+            _ObservedSummary(logdir=self.train_dir, tag='bias_0'),
+            _ObservedSummary(logdir=self.train_dir, tag='kernel_0'),
+        },
+    )
+    self.assertEqual(
+        self._strip_layer_names(summary_file.images),
+        {
+            _ObservedSummary(logdir=self.train_dir, tag='bias_0'),
+            _ObservedSummary(logdir=self.train_dir, tag='kernel_0'),
+        },
+    )
+
+  def _strip_layer_names(self, summaries):
+    """Deduplicate summary names modulo layer suffix.
+
+    Args:
+      summaries: A `set` of `_ObservedSummary` values.
+
+    Returns:
+      A new `set` of `_ObservedSummary` values with layer suffixes
+      removed.
+    """
+    return {s._replace(tag=s.tag[s.tag.rfind('/') + 1:]) for s in summaries}
+
+  def test_TensorBoard_invalid_argument(self):
+    with self.assertRaisesRegexp(ValueError, 'Unrecognized arguments'):
+      keras.callbacks.TensorBoard(wwrite_images=True)
+
+
 if __name__ == '__main__':
   test.main()

diff --git a/tensorflow/python/keras/callbacks_v1.py b/tensorflow/python/keras/callbacks_v1.py
new file mode 100644
index 0000000..980eacd
--- /dev/null
+++ b/tensorflow/python/keras/callbacks_v1.py

@@ -0,0 +1,424 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=g-import-not-at-top
+"""Callbacks: utilities called at certain points during model training.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import numpy as np
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
+from tensorflow.python.keras import backend as K
+from tensorflow.python.keras import callbacks
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import summary_ops_v2
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.summary import summary as tf_summary
+from tensorflow.python.training import saver
+from tensorflow.python.util.tf_export import keras_export
+
+
+@keras_export(v1=['keras.callbacks.TensorBoard'])
+class TensorBoard(callbacks.Callback):
+  # pylint: disable=line-too-long
+  """TensorBoard basic visualizations.
+
+  This callback writes a log for TensorBoard, which allows
+  you to visualize dynamic graphs of your training and test
+  metrics, as well as activation histograms for the different
+  layers in your model.
+
+  TensorBoard is a visualization tool provided with TensorFlow.
+
+  If you have installed TensorFlow with pip, you should be able
+  to launch TensorBoard from the command line:
+
+  ```sh
+  tensorboard --logdir=/full_path_to_your_logs
+  ```
+
+  You can find more information about TensorBoard
+  [here](https://www.tensorflow.org/get_started/summaries_and_tensorboard).
+
+  Arguments:
+      log_dir: the path of the directory where to save the log files to be
+        parsed by TensorBoard.
+      histogram_freq: frequency (in epochs) at which to compute activation and
+        weight histograms for the layers of the model. If set to 0, histograms
+        won't be computed. Validation data (or split) must be specified for
+        histogram visualizations.
+      write_graph: whether to visualize the graph in TensorBoard. The log file
+        can become quite large when write_graph is set to True.
+      write_grads: whether to visualize gradient histograms in TensorBoard.
+        `histogram_freq` must be greater than 0.
+      batch_size: size of batch of inputs to feed to the network for histograms
+        computation.
+      write_images: whether to write model weights to visualize as image in
+        TensorBoard.
+      embeddings_freq: frequency (in epochs) at which selected embedding layers
+        will be saved. If set to 0, embeddings won't be computed. Data to be
+        visualized in TensorBoard's Embedding tab must be passed as
+        `embeddings_data`.
+      embeddings_layer_names: a list of names of layers to keep eye on. If None
+        or empty list all the embedding layer will be watched.
+      embeddings_metadata: a dictionary which maps layer name to a file name in
+        which metadata for this embedding layer is saved. See the
+          [details](https://www.tensorflow.org/how_tos/embedding_viz/#metadata_optional)
+            about metadata files format. In case if the same metadata file is
+            used for all embedding layers, string can be passed.
+      embeddings_data: data to be embedded at layers specified in
+        `embeddings_layer_names`. Numpy array (if the model has a single input)
+        or list of Numpy arrays (if the model has multiple inputs). Learn [more
+        about
+            embeddings](https://www.tensorflow.org/programmers_guide/embedding)
+      update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`,
+        writes the losses and metrics to TensorBoard after each batch. The same
+        applies for `'epoch'`. If using an integer, let's say `1000`, the
+        callback will write the metrics and losses to TensorBoard every 1000
+        samples. Note that writing too frequently to TensorBoard can slow down
+        your training.
+
+  Raises:
+      ValueError: If histogram_freq is set and no validation data is provided.
+
+  @compatibility(eager)
+  Using the `TensorBoard` callback will work when eager execution is enabled,
+  with the restriction that outputting histogram summaries of weights and
+  gradients is not supported. Consequently, `histogram_freq` will be ignored.
+  @end_compatibility
+  """
+
+  # pylint: enable=line-too-long
+
+  def __init__(self,
+               log_dir='./logs',
+               histogram_freq=0,
+               batch_size=32,
+               write_graph=True,
+               write_grads=False,
+               write_images=False,
+               embeddings_freq=0,
+               embeddings_layer_names=None,
+               embeddings_metadata=None,
+               embeddings_data=None,
+               update_freq='epoch'):
+    super(TensorBoard, self).__init__()
+    self.log_dir = log_dir
+    self.histogram_freq = histogram_freq
+    if self.histogram_freq and context.executing_eagerly():
+      logging.warning(
+          UserWarning('Weight and gradient histograms not supported for eager'
+                      'execution, setting `histogram_freq` to `0`.'))
+      self.histogram_freq = 0
+    self.merged = None
+    self.write_graph = write_graph
+    self.write_grads = write_grads
+    self.write_images = write_images
+    self.batch_size = batch_size
+    self._current_batch = 0
+    self._total_batches_seen = 0
+    self._total_val_batches_seen = 0
+    self.embeddings_freq = embeddings_freq
+    self.embeddings_layer_names = embeddings_layer_names
+    self.embeddings_metadata = embeddings_metadata
+    self.embeddings_data = embeddings_data
+    if update_freq == 'batch':
+      self.update_freq = 1
+    else:
+      self.update_freq = update_freq
+    self._samples_seen = 0
+    self._samples_seen_at_last_write = 0
+
+  def _init_writer(self, model):
+    """Sets file writer."""
+    if context.executing_eagerly():
+      self.writer = summary_ops_v2.create_file_writer(self.log_dir)
+      if not model.run_eagerly and self.write_graph:
+        with self.writer.as_default():
+          summary_ops_v2.graph(K.get_graph())
+    elif self.write_graph:
+      self.writer = tf_summary.FileWriter(self.log_dir, K.get_graph())
+    else:
+      self.writer = tf_summary.FileWriter(self.log_dir)
+
+  def _make_histogram_ops(self, model):
+    """Defines histogram ops when histogram_freq > 0."""
+    # only make histogram summary op if it hasn't already been made
+    if self.histogram_freq and self.merged is None:
+      for layer in self.model.layers:
+        for weight in layer.weights:
+          mapped_weight_name = weight.name.replace(':', '_')
+          tf_summary.histogram(mapped_weight_name, weight)
+          if self.write_images:
+            w_img = array_ops.squeeze(weight)
+            shape = K.int_shape(w_img)
+            if len(shape) == 2:  # dense layer kernel case
+              if shape[0] > shape[1]:
+                w_img = array_ops.transpose(w_img)
+                shape = K.int_shape(w_img)
+              w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1])
+            elif len(shape) == 3:  # convnet case
+              if K.image_data_format() == 'channels_last':
+                # switch to channels_first to display
+                # every kernel as a separate image
+                w_img = array_ops.transpose(w_img, perm=[2, 0, 1])
+                shape = K.int_shape(w_img)
+              w_img = array_ops.reshape(w_img,
+                                        [shape[0], shape[1], shape[2], 1])
+            elif len(shape) == 1:  # bias case
+              w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1])
+            else:
+              # not possible to handle 3D convnets etc.
+              continue
+
+            shape = K.int_shape(w_img)
+            assert len(shape) == 4 and shape[-1] in [1, 3, 4]
+            tf_summary.image(mapped_weight_name, w_img)
+
+        if self.write_grads:
+          for weight in layer.trainable_weights:
+            mapped_weight_name = weight.name.replace(':', '_')
+            grads = model.optimizer.get_gradients(model.total_loss, weight)
+
+            def is_indexed_slices(grad):
+              return type(grad).__name__ == 'IndexedSlices'
+
+            grads = [
+                grad.values if is_indexed_slices(grad) else grad
+                for grad in grads
+            ]
+            tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads)
+
+        if hasattr(layer, 'output'):
+          if isinstance(layer.output, list):
+            for i, output in enumerate(layer.output):
+              tf_summary.histogram('{}_out_{}'.format(layer.name, i), output)
+          else:
+            tf_summary.histogram('{}_out'.format(layer.name), layer.output)
+
+  def set_model(self, model):
+    """Sets Keras model and creates summary ops."""
+
+    self.model = model
+    self._init_writer(model)
+    # histogram summaries only enabled in graph mode
+    if not context.executing_eagerly():
+      self._make_histogram_ops(model)
+      self.merged = tf_summary.merge_all()
+
+    # If both embedding_freq and embeddings_data are available, we will
+    # visualize embeddings.
+    if self.embeddings_freq and self.embeddings_data is not None:
+      # Avoid circular dependency.
+      from tensorflow.python.keras.engine import training_utils  # pylint: disable=g-import-not-at-top
+      self.embeddings_data = training_utils.standardize_input_data(
+          self.embeddings_data, model.input_names)
+
+      # If embedding_layer_names are not provided, get all of the embedding
+      # layers from the model.
+      embeddings_layer_names = self.embeddings_layer_names
+      if not embeddings_layer_names:
+        embeddings_layer_names = [
+            layer.name
+            for layer in self.model.layers
+            if type(layer).__name__ == 'Embedding'
+        ]
+
+      self.assign_embeddings = []
+      embeddings_vars = {}
+
+      self.batch_id = batch_id = array_ops.placeholder(dtypes.int32)
+      self.step = step = array_ops.placeholder(dtypes.int32)
+
+      for layer in self.model.layers:
+        if layer.name in embeddings_layer_names:
+          embedding_input = self.model.get_layer(layer.name).output
+          embedding_size = np.prod(embedding_input.shape[1:])
+          embedding_input = array_ops.reshape(embedding_input,
+                                              (step, int(embedding_size)))
+          shape = (self.embeddings_data[0].shape[0], int(embedding_size))
+          embedding = variables.Variable(
+              array_ops.zeros(shape), name=layer.name + '_embedding')
+          embeddings_vars[layer.name] = embedding
+          batch = state_ops.assign(embedding[batch_id:batch_id + step],
+                                   embedding_input)
+          self.assign_embeddings.append(batch)
+
+      self.saver = saver.Saver(list(embeddings_vars.values()))
+
+      # Create embeddings_metadata dictionary
+      if isinstance(self.embeddings_metadata, str):
+        embeddings_metadata = {
+            layer_name: self.embeddings_metadata
+            for layer_name in embeddings_vars.keys()
+        }
+      else:
+        # If embedding_metadata is already a dictionary
+        embeddings_metadata = self.embeddings_metadata
+
+      try:
+        from tensorboard.plugins import projector
+      except ImportError:
+        raise ImportError('Failed to import TensorBoard. Please make sure that '
+                          'TensorBoard integration is complete."')
+
+      # TODO(psv): Add integration tests to test embedding visualization
+      # with TensorBoard callback. We are unable to write a unit test for this
+      # because TensorBoard dependency assumes TensorFlow package is installed.
+      config = projector.ProjectorConfig()
+      for layer_name, tensor in embeddings_vars.items():
+        embedding = config.embeddings.add()
+        embedding.tensor_name = tensor.name
+
+        if (embeddings_metadata is not None and
+            layer_name in embeddings_metadata):
+          embedding.metadata_path = embeddings_metadata[layer_name]
+
+      projector.visualize_embeddings(self.writer, config)
+
+  def _fetch_callback(self, summary):
+    self.writer.add_summary(summary, self._total_val_batches_seen)
+    self._total_val_batches_seen += 1
+
+  def _write_custom_summaries(self, step, logs=None):
+    """Writes metrics out as custom scalar summaries.
+
+    Arguments:
+        step: the global step to use for TensorBoard.
+        logs: dict. Keys are scalar summary names, values are
+            NumPy scalars.
+
+    """
+    logs = logs or {}
+    if context.executing_eagerly():
+      # use v2 summary ops
+      with self.writer.as_default(), summary_ops_v2.always_record_summaries():
+        for name, value in logs.items():
+          if isinstance(value, np.ndarray):
+            value = value.item()
+          summary_ops_v2.scalar(name, value, step=step)
+    else:
+      # use FileWriter from v1 summary
+      for name, value in logs.items():
+        if isinstance(value, np.ndarray):
+          value = value.item()
+        summary = tf_summary.Summary()
+        summary_value = summary.value.add()
+        summary_value.simple_value = value
+        summary_value.tag = name
+        self.writer.add_summary(summary, step)
+    self.writer.flush()
+
+  def on_batch_end(self, batch, logs=None):
+    """Writes scalar summaries for metrics on every training batch."""
+    # Don't output batch_size and batch number as TensorBoard summaries
+    logs = logs or {}
+    self._samples_seen += logs.get('size', 1)
+    samples_seen_since = self._samples_seen - self._samples_seen_at_last_write
+    if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq:
+      batch_logs = {('batch_' + k): v
+                    for k, v in logs.items()
+                    if k not in ['batch', 'size', 'num_steps']}
+      self._write_custom_summaries(self._total_batches_seen, batch_logs)
+      self._samples_seen_at_last_write = self._samples_seen
+    self._total_batches_seen += 1
+
+  def on_epoch_begin(self, epoch, logs=None):
+    """Add histogram op to Model eval_function callbacks, reset batch count."""
+
+    # check if histogram summary should be run for this epoch
+    if self.histogram_freq and epoch % self.histogram_freq == 0:
+      self._epoch = epoch
+      # pylint: disable=protected-access
+      # add the histogram summary op if it should run this epoch
+      self.model._make_eval_function()
+      if self.merged not in self.model._eval_function.fetches:
+        self.model._eval_function.fetches.append(self.merged)
+        self.model._eval_function.fetch_callbacks[
+            self.merged] = self._fetch_callback
+      # pylint: enable=protected-access
+
+  def on_epoch_end(self, epoch, logs=None):
+    """Checks if summary ops should run next epoch, logs scalar summaries."""
+
+    # don't output batch_size and
+    # batch number as TensorBoard summaries
+    logs = {('epoch_' + k): v
+            for k, v in logs.items()
+            if k not in ['batch', 'size', 'num_steps']}
+    if self.update_freq == 'epoch':
+      step = epoch
+    else:
+      step = self._samples_seen
+    self._write_custom_summaries(step, logs)
+
+    # pop the histogram summary op after each epoch
+    if self.histogram_freq:
+      # pylint: disable=protected-access
+      if self.merged in self.model._eval_function.fetches:
+        self.model._eval_function.fetches.remove(self.merged)
+      if self.merged in self.model._eval_function.fetch_callbacks:
+        self.model._eval_function.fetch_callbacks.pop(self.merged)
+      # pylint: enable=protected-access
+
+    if self.embeddings_data is None and self.embeddings_freq:
+      raise ValueError('To visualize embeddings, embeddings_data must '
+                       'be provided.')
+
+    if self.embeddings_freq and self.embeddings_data is not None:
+      if epoch % self.embeddings_freq == 0:
+        # We need a second forward-pass here because we're passing
+        # the `embeddings_data` explicitly. This design allows to pass
+        # arbitrary data as `embeddings_data` and results from the fact
+        # that we need to know the size of the `tf.Variable`s which
+        # hold the embeddings in `set_model`. At this point, however,
+        # the `validation_data` is not yet set.
+
+        embeddings_data = self.embeddings_data
+        n_samples = embeddings_data[0].shape[0]
+        i = 0
+        while i < n_samples:
+          step = min(self.batch_size, n_samples - i)
+          batch = slice(i, i + step)
+
+          if isinstance(self.model.input, list):
+            feed_dict = {
+                model_input: embeddings_data[idx][batch]
+                for idx, model_input in enumerate(self.model.input)
+            }
+          else:
+            feed_dict = {self.model.input: embeddings_data[0][batch]}
+
+          feed_dict.update({self.batch_id: i, self.step: step})
+
+          if not isinstance(K.learning_phase(), int):
+            feed_dict[K.learning_phase()] = False
+
+          self.sess.run(self.assign_embeddings, feed_dict=feed_dict)
+          self.saver.save(self.sess,
+                          os.path.join(self.log_dir, 'keras_embedding.ckpt'),
+                          epoch)
+
+          i += self.batch_size
+
+  def on_train_end(self, logs=None):
+    self.writer.close()

diff --git a/tensorflow/python/keras/callbacks_v1_test.py b/tensorflow/python/keras/callbacks_v1_test.py
new file mode 100644
index 0000000..fdb6af9
--- /dev/null
+++ b/tensorflow/python/keras/callbacks_v1_test.py

@@ -0,0 +1,562 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Keras callbacks."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+import tempfile
+
+import numpy as np
+
+from tensorflow.core.framework import summary_pb2
+from tensorflow.python import keras
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import callbacks_v1
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.platform import test
+from tensorflow.python.training import adam
+
+
+TRAIN_SAMPLES = 10
+TEST_SAMPLES = 10
+NUM_CLASSES = 2
+INPUT_DIM = 3
+NUM_HIDDEN = 5
+BATCH_SIZE = 5
+
+
+class TestTensorBoardV1(test.TestCase):
+
+  @test_util.run_deprecated_v1
+  def test_TensorBoard(self):
+    np.random.seed(1337)
+
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+
+    (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
+        train_samples=TRAIN_SAMPLES,
+        test_samples=TEST_SAMPLES,
+        input_shape=(INPUT_DIM,),
+        num_classes=NUM_CLASSES)
+    y_test = keras.utils.to_categorical(y_test)
+    y_train = keras.utils.to_categorical(y_train)
+
+    def data_generator(train):
+      if train:
+        max_batch_index = len(x_train) // BATCH_SIZE
+      else:
+        max_batch_index = len(x_test) // BATCH_SIZE
+      i = 0
+      while 1:
+        if train:
+          yield (x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE],
+                 y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE])
+        else:
+          yield (x_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE],
+                 y_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE])
+        i += 1
+        i %= max_batch_index
+
+    # case: Sequential
+    with self.cached_session():
+      model = keras.models.Sequential()
+      model.add(
+          keras.layers.Dense(
+              NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu'))
+      # non_trainable_weights: moving_variance, moving_mean
+      model.add(keras.layers.BatchNormalization())
+      model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax'))
+      model.compile(
+          loss='categorical_crossentropy',
+          optimizer='sgd',
+          metrics=['accuracy'])
+      tsb = callbacks_v1.TensorBoard(
+          log_dir=temp_dir,
+          histogram_freq=1,
+          write_images=True,
+          write_grads=True,
+          batch_size=5)
+      cbks = [tsb]
+
+      # fit with validation data
+      model.fit(
+          x_train,
+          y_train,
+          batch_size=BATCH_SIZE,
+          validation_data=(x_test, y_test),
+          callbacks=cbks,
+          epochs=3,
+          verbose=0)
+
+      # fit with validation data and accuracy
+      model.fit(
+          x_train,
+          y_train,
+          batch_size=BATCH_SIZE,
+          validation_data=(x_test, y_test),
+          callbacks=cbks,
+          epochs=2,
+          verbose=0)
+
+      # fit generator with validation data
+      model.fit_generator(
+          data_generator(True),
+          len(x_train),
+          epochs=2,
+          validation_data=(x_test, y_test),
+          callbacks=cbks,
+          verbose=0)
+
+      # fit generator without validation data
+      # histogram_freq must be zero
+      tsb.histogram_freq = 0
+      model.fit_generator(
+          data_generator(True),
+          len(x_train),
+          epochs=2,
+          callbacks=cbks,
+          verbose=0)
+
+      # fit generator with validation data and accuracy
+      tsb.histogram_freq = 1
+      model.fit_generator(
+          data_generator(True),
+          len(x_train),
+          epochs=2,
+          validation_data=(x_test, y_test),
+          callbacks=cbks,
+          verbose=0)
+
+      # fit generator without validation data and accuracy
+      tsb.histogram_freq = 0
+      model.fit_generator(
+          data_generator(True), len(x_train), epochs=2, callbacks=cbks)
+      assert os.path.exists(temp_dir)
+
+  @test_util.run_deprecated_v1
+  def test_TensorBoard_multi_input_output(self):
+    np.random.seed(1337)
+    tmpdir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True)
+
+    with self.cached_session():
+      filepath = os.path.join(tmpdir, 'logs')
+
+      (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
+          train_samples=TRAIN_SAMPLES,
+          test_samples=TEST_SAMPLES,
+          input_shape=(INPUT_DIM,),
+          num_classes=NUM_CLASSES)
+      y_test = keras.utils.to_categorical(y_test)
+      y_train = keras.utils.to_categorical(y_train)
+
+      def data_generator(train):
+        if train:
+          max_batch_index = len(x_train) // BATCH_SIZE
+        else:
+          max_batch_index = len(x_test) // BATCH_SIZE
+        i = 0
+        while 1:
+          if train:
+            # simulate multi-input/output models
+            yield ([x_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2,
+                   [y_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2)
+          else:
+            yield ([x_test[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2,
+                   [y_test[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2)
+          i += 1
+          i %= max_batch_index
+
+      inp1 = keras.Input((INPUT_DIM,))
+      inp2 = keras.Input((INPUT_DIM,))
+      inp = keras.layers.add([inp1, inp2])
+      hidden = keras.layers.Dense(2, activation='relu')(inp)
+      hidden = keras.layers.Dropout(0.1)(hidden)
+      output1 = keras.layers.Dense(NUM_CLASSES, activation='softmax')(hidden)
+      output2 = keras.layers.Dense(NUM_CLASSES, activation='softmax')(hidden)
+      model = keras.models.Model([inp1, inp2], [output1, output2])
+      model.compile(loss='categorical_crossentropy',
+                    optimizer='sgd',
+                    metrics=['accuracy'])
+
+      # we must generate new callbacks for each test, as they aren't stateless
+      def callbacks_factory(histogram_freq):
+        return [
+            callbacks_v1.TensorBoard(
+                log_dir=filepath,
+                histogram_freq=histogram_freq,
+                write_images=True,
+                write_grads=True,
+                batch_size=5)
+        ]
+
+      # fit without validation data
+      model.fit([x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE,
+                callbacks=callbacks_factory(histogram_freq=0), epochs=3)
+
+      # fit with validation data and accuracy
+      model.fit([x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE,
+                validation_data=([x_test] * 2, [y_test] * 2),
+                callbacks=callbacks_factory(histogram_freq=1), epochs=2)
+
+      # fit generator without validation data
+      model.fit_generator(data_generator(True), len(x_train), epochs=2,
+                          callbacks=callbacks_factory(histogram_freq=0))
+
+      # fit generator with validation data and accuracy
+      model.fit_generator(data_generator(True), len(x_train), epochs=2,
+                          validation_data=([x_test] * 2, [y_test] * 2),
+                          callbacks=callbacks_factory(histogram_freq=1))
+      assert os.path.isdir(filepath)
+
+  @test_util.run_deprecated_v1
+  def test_Tensorboard_histogram_summaries_in_test_function(self):
+
+    class FileWriterStub(object):
+
+      def __init__(self, logdir, graph=None):
+        self.logdir = logdir
+        self.graph = graph
+        self.steps_seen = []
+
+      def add_summary(self, summary, global_step):
+        summary_obj = summary_pb2.Summary()
+
+        # ensure a valid Summary proto is being sent
+        if isinstance(summary, bytes):
+          summary_obj.ParseFromString(summary)
+        else:
+          assert isinstance(summary, summary_pb2.Summary)
+          summary_obj = summary
+
+        # keep track of steps seen for the merged_summary op,
+        # which contains the histogram summaries
+        if len(summary_obj.value) > 1:
+          self.steps_seen.append(global_step)
+
+      def flush(self):
+        pass
+
+      def close(self):
+        pass
+
+    def _init_writer(obj, _):
+      obj.writer = FileWriterStub(obj.log_dir)
+
+    np.random.seed(1337)
+    tmpdir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True)
+    (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
+        train_samples=TRAIN_SAMPLES,
+        test_samples=TEST_SAMPLES,
+        input_shape=(INPUT_DIM,),
+        num_classes=NUM_CLASSES)
+    y_test = keras.utils.to_categorical(y_test)
+    y_train = keras.utils.to_categorical(y_train)
+
+    with self.cached_session():
+      model = keras.models.Sequential()
+      model.add(
+          keras.layers.Dense(
+              NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu'))
+      # non_trainable_weights: moving_variance, moving_mean
+      model.add(keras.layers.BatchNormalization())
+      model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax'))
+      model.compile(
+          loss='categorical_crossentropy',
+          optimizer='sgd',
+          metrics=['accuracy'])
+      callbacks_v1.TensorBoard._init_writer = _init_writer
+      tsb = callbacks_v1.TensorBoard(
+          log_dir=tmpdir,
+          histogram_freq=1,
+          write_images=True,
+          write_grads=True,
+          batch_size=5)
+      cbks = [tsb]
+
+      # fit with validation data
+      model.fit(
+          x_train,
+          y_train,
+          batch_size=BATCH_SIZE,
+          validation_data=(x_test, y_test),
+          callbacks=cbks,
+          epochs=3,
+          verbose=0)
+
+      self.assertAllEqual(tsb.writer.steps_seen, [0, 1, 2, 3, 4, 5])
+
+  @test_util.run_deprecated_v1
+  def test_Tensorboard_histogram_summaries_with_generator(self):
+    np.random.seed(1337)
+    tmpdir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True)
+
+    def generator():
+      x = np.random.randn(10, 100).astype(np.float32)
+      y = np.random.randn(10, 10).astype(np.float32)
+      while True:
+        yield x, y
+
+    with self.cached_session():
+      model = testing_utils.get_small_sequential_mlp(
+          num_hidden=10, num_classes=10, input_dim=100)
+      model.compile(
+          loss='categorical_crossentropy',
+          optimizer='sgd',
+          metrics=['accuracy'])
+      tsb = callbacks_v1.TensorBoard(
+          log_dir=tmpdir,
+          histogram_freq=1,
+          write_images=True,
+          write_grads=True,
+          batch_size=5)
+      cbks = [tsb]
+
+      # fit with validation generator
+      model.fit_generator(
+          generator(),
+          steps_per_epoch=2,
+          epochs=2,
+          validation_data=generator(),
+          validation_steps=2,
+          callbacks=cbks,
+          verbose=0)
+
+      with self.assertRaises(ValueError):
+        # fit with validation generator but no
+        # validation_steps
+        model.fit_generator(
+            generator(),
+            steps_per_epoch=2,
+            epochs=2,
+            validation_data=generator(),
+            callbacks=cbks,
+            verbose=0)
+
+      self.assertTrue(os.path.exists(tmpdir))
+
+  def test_TensorBoard_with_ReduceLROnPlateau(self):
+    with self.cached_session():
+      temp_dir = self.get_temp_dir()
+      self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+
+      (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
+          train_samples=TRAIN_SAMPLES,
+          test_samples=TEST_SAMPLES,
+          input_shape=(INPUT_DIM,),
+          num_classes=NUM_CLASSES)
+      y_test = keras.utils.to_categorical(y_test)
+      y_train = keras.utils.to_categorical(y_train)
+
+      model = testing_utils.get_small_sequential_mlp(
+          num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM)
+      model.compile(
+          loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])
+
+      cbks = [
+          keras.callbacks.ReduceLROnPlateau(
+              monitor='val_loss', factor=0.5, patience=4, verbose=1),
+          callbacks_v1.TensorBoard(log_dir=temp_dir)
+      ]
+
+      model.fit(
+          x_train,
+          y_train,
+          batch_size=BATCH_SIZE,
+          validation_data=(x_test, y_test),
+          callbacks=cbks,
+          epochs=2,
+          verbose=0)
+
+      assert os.path.exists(temp_dir)
+
+  @test_util.run_deprecated_v1
+  def test_Tensorboard_batch_logging(self):
+
+    class FileWriterStub(object):
+
+      def __init__(self, logdir, graph=None):
+        self.logdir = logdir
+        self.graph = graph
+        self.batches_logged = []
+        self.summary_values = []
+        self.summary_tags = []
+
+      def add_summary(self, summary, step):
+        self.summary_values.append(summary.value[0].simple_value)
+        self.summary_tags.append(summary.value[0].tag)
+        self.batches_logged.append(step)
+
+      def flush(self):
+        pass
+
+      def close(self):
+        pass
+
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+
+    tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq='batch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    for batch in range(5):
+      tb_cbk.on_batch_end(batch, {'acc': batch})
+    self.assertEqual(tb_cbk.writer.batches_logged, [0, 1, 2, 3, 4])
+    self.assertEqual(tb_cbk.writer.summary_values, [0., 1., 2., 3., 4.])
+    self.assertEqual(tb_cbk.writer.summary_tags, ['batch_acc'] * 5)
+
+  @test_util.run_deprecated_v1
+  def test_Tensorboard_epoch_and_batch_logging(self):
+
+    class FileWriterStub(object):
+
+      def __init__(self, logdir, graph=None):
+        self.logdir = logdir
+        self.graph = graph
+
+      def add_summary(self, summary, step):
+        if 'batch_' in summary.value[0].tag:
+          self.batch_summary = (step, summary)
+        elif 'epoch_' in summary.value[0].tag:
+          self.epoch_summary = (step, summary)
+
+      def flush(self):
+        pass
+
+      def close(self):
+        pass
+
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+
+    tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq='batch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    tb_cbk.on_batch_end(0, {'acc': 5.0})
+    batch_step, batch_summary = tb_cbk.writer.batch_summary
+    self.assertEqual(batch_step, 0)
+    self.assertEqual(batch_summary.value[0].simple_value, 5.0)
+
+    tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq='epoch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+    tb_cbk.on_epoch_end(0, {'acc': 10.0})
+    epoch_step, epoch_summary = tb_cbk.writer.epoch_summary
+    self.assertEqual(epoch_step, 0)
+    self.assertEqual(epoch_summary.value[0].simple_value, 10.0)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_Tensorboard_eager(self):
+    temp_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
+    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+
+    (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
+        train_samples=TRAIN_SAMPLES,
+        test_samples=TEST_SAMPLES,
+        input_shape=(INPUT_DIM,),
+        num_classes=NUM_CLASSES)
+    y_test = keras.utils.to_categorical(y_test)
+    y_train = keras.utils.to_categorical(y_train)
+
+    model = testing_utils.get_small_sequential_mlp(
+        num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM)
+    model.compile(
+        loss='binary_crossentropy',
+        optimizer=adam.AdamOptimizer(0.01),
+        metrics=['accuracy'])
+
+    cbks = [callbacks_v1.TensorBoard(log_dir=temp_dir)]
+
+    model.fit(
+        x_train,
+        y_train,
+        batch_size=BATCH_SIZE,
+        validation_data=(x_test, y_test),
+        callbacks=cbks,
+        epochs=2,
+        verbose=0)
+
+    self.assertTrue(os.path.exists(temp_dir))
+
+  @test_util.run_deprecated_v1
+  def test_TensorBoard_update_freq(self):
+
+    class FileWriterStub(object):
+
+      def __init__(self, logdir, graph=None):
+        self.logdir = logdir
+        self.graph = graph
+        self.batch_summaries = []
+        self.epoch_summaries = []
+
+      def add_summary(self, summary, step):
+        if 'batch_' in summary.value[0].tag:
+          self.batch_summaries.append((step, summary))
+        elif 'epoch_' in summary.value[0].tag:
+          self.epoch_summaries.append((step, summary))
+
+      def flush(self):
+        pass
+
+      def close(self):
+        pass
+
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+
+    # Epoch mode
+    tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq='epoch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
+    self.assertEqual(tb_cbk.writer.batch_summaries, [])
+    tb_cbk.on_epoch_end(0, {'acc': 10.0, 'size': 1})
+    self.assertEqual(len(tb_cbk.writer.epoch_summaries), 1)
+
+    # Batch mode
+    tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq='batch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
+    self.assertFalse(tb_cbk.writer.epoch_summaries)
+
+    # Integer mode
+    tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq=20)
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertFalse(tb_cbk.writer.batch_summaries)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
+    tb_cbk.on_batch_end(0, {'acc': 10.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
+    self.assertFalse(tb_cbk.writer.epoch_summaries)
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index 4eb0be5..f4ba35d 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py

@@ -46,8 +46,9 @@
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables as tf_variables
-from tensorflow.python.training.checkpointable import base as checkpointable
-from tensorflow.python.training.checkpointable import layer_utils as checkpointable_layer_utils
+from tensorflow.python.training.tracking import base as trackable
+from tensorflow.python.training.tracking import data_structures
+from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils
 from tensorflow.python.util import function_utils
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
@@ -57,7 +58,7 @@
 
 
 @keras_export('keras.layers.Layer')
-class Layer(checkpointable.Checkpointable):
+class Layer(trackable.Trackable):
   """Base layer class.
 
   This is the class from which all layers inherit.
@@ -110,7 +111,7 @@
       constraints on inputs that can be accepted by the layer.
   """
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def __init__(self, trainable=True, name=None, dtype=None, dynamic=False,
                **kwargs):
     # These properties should be set by the user via keyword arguments.
@@ -272,7 +273,7 @@
         marked as non-trainable. `trainable` defaults to `True` unless
         `synchronization` is set to `ON_READ`.
       constraint: constraint instance (callable).
-      partitioner: Partitioner to be passed to the `Checkpointable` API.
+      partitioner: Partitioner to be passed to the `Trackable` API.
       use_resource: Whether to use `ResourceVariable`.
       synchronization: Indicates when a distributed a variable will be
         aggregated. Accepted values are constants defined in the class
@@ -345,9 +346,9 @@
         name=name,
         shape=shape,
         # TODO(allenl): a `make_variable` equivalent should be added as a
-        # `Checkpointable` method.
+        # `Trackable` method.
         getter=getter or base_layer_utils.make_variable,
-        # Manage errors in Layer rather than Checkpointable.
+        # Manage errors in Layer rather than Trackable.
         overwrite=True,
         initializer=initializer,
         dtype=dtype,
@@ -535,8 +536,6 @@
       # framework.
       if base_layer_utils.needs_keras_history(inputs):
         base_layer_utils.create_keras_history(inputs)
-      # Do not track these Tensors in any sublayers invoked during `call`.
-      base_layer_utils.mark_checked(inputs)
 
     # Handle Keras mask propagation from previous layer to current layer.
     previous_mask = None
@@ -551,62 +550,77 @@
         # pass to __call__, hence we set previous_mask as the default value.
         kwargs['mask'] = previous_mask
 
-    # Check input assumptions set after layer building, e.g. input shape.
-    if build_graph:
-      # Symbolic execution on symbolic tensors. We will attempt to build
-      # the corresponding TF subgraph inside `backend.get_graph()`
-      input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
-      graph = backend.get_graph()
-      with graph.as_default(), ops.name_scope(self._name_scope()):
-        # Build layer if applicable (if the `build` method has been overridden).
-        self._maybe_build(inputs)
-        if not self.dynamic:
-          try:
-            outputs = self.call(inputs, *args, **kwargs)
-          except TypeError as e:
-            messages = ('`tf.Tensor` as a Python `bool` is not allowed',
-                        'Tensor objects are only iterable when eager')
-            exception_str = str(e)
-            for msg in messages:
-              if msg in exception_str:
-                raise TypeError('You are attempting to use Python control '
-                                'flow in a layer that was not declared to be '
-                                'dynamic. Pass `dynamic=True` to the class '
-                                'constructor.\nEncountered error:\n"""\n' +
-                                exception_str + '\n"""')
-            raise
-        else:
-          # We will use static shape inference to return symbolic tensors
-          # matching the specifications of the layer outputs.
-          # Since `self.dynamic` is True, we will never attempt to
-          # run the underlying TF graph (which is disconnected).
-          # TODO(fchollet): consider py_func as an alternative, which
-          # would enable us to run the underlying graph if needed.
-          outputs = self._symbolic_call(inputs)
+    with base_layer_utils.call_context():
+      # Check input assumptions set after layer building, e.g. input shape.
+      if build_graph:
+        # Symbolic execution on symbolic tensors. We will attempt to build
+        # the corresponding TF subgraph inside `backend.get_graph()`
+        input_spec.assert_input_compatibility(self.input_spec, inputs,
+                                              self.name)
+        graph = backend.get_graph()
+        with graph.as_default(), ops.name_scope(self._name_scope()):
+          # Build layer if applicable (if the `build` method has been
+          # overridden).
+          self._maybe_build(inputs)
+          # Explicitly pass the learning phase placeholder to `call` if
+          # the `training` argument was left unspecified by the user.
+          # This behavior is restricted to the managed Keras FuncGraph.
+          learning_phase_passed_by_framework = False
+          if (self._expects_training_arg and
+              not base_layer_utils.training_arg_passed_to_call(
+                  tf_inspect.getfullargspec(self.call), args, kwargs) and
+              getattr(graph, 'name', None) == 'keras_graph'):
+            learning_phase_passed_by_framework = True
+            kwargs['training'] = backend.learning_phase()
+          if not self.dynamic:
+            try:
+              outputs = self.call(inputs, *args, **kwargs)
+            except TypeError as e:
+              messages = ('`tf.Tensor` as a Python `bool` is not allowed',
+                          'Tensor objects are only iterable when eager')
+              exception_str = str(e)
+              for msg in messages:
+                if msg in exception_str:
+                  raise TypeError('You are attempting to use Python control '
+                                  'flow in a layer that was not declared to be '
+                                  'dynamic. Pass `dynamic=True` to the class '
+                                  'constructor.\nEncountered error:\n"""\n' +
+                                  exception_str + '\n"""')
+              raise
+          else:
+            # We will use static shape inference to return symbolic tensors
+            # matching the specifications of the layer outputs.
+            # Since `self.dynamic` is True, we will never attempt to
+            # run the underlying TF graph (which is disconnected).
+            # TODO(fchollet): consider py_func as an alternative, which
+            # would enable us to run the underlying graph if needed.
+            outputs = self._symbolic_call(inputs)
 
-        if outputs is None:
-          raise ValueError('A layer\'s `call` method should return a '
-                           'Tensor or a list of Tensors, not None '
-                           '(layer: ' + self.name + ').')
-        if base_layer_utils.have_all_keras_metadata(inputs):
-          inputs, outputs = self._set_connectivity_metadata_(
-              inputs, outputs, args, kwargs)
-        self._handle_activity_regularization(inputs, outputs)
-        self._set_mask_metadata(inputs, outputs, previous_mask)
-        if hasattr(self, '_set_inputs') and not self.inputs:
-          # Subclassed network: explicitly set metadata normally set by
-          # a call to self._set_inputs().
-          # TODO(b/120997007): This should be done in Eager as well, but
-          # causes garbage collection issues because of the placeholders
-          # created on the default Keras graph.
-          self._set_inputs(inputs, outputs)
-    else:
-      # Eager execution on data tensors.
-      with ops.name_scope(self._name_scope()):
-        self._maybe_build(inputs)
-        outputs = self.call(inputs, *args, **kwargs)
-        self._handle_activity_regularization(inputs, outputs)
-        self._set_mask_metadata(inputs, outputs, previous_mask)
+          if outputs is None:
+            raise ValueError('A layer\'s `call` method should return a '
+                             'Tensor or a list of Tensors, not None '
+                             '(layer: ' + self.name + ').')
+          if base_layer_utils.have_all_keras_metadata(inputs):
+            if learning_phase_passed_by_framework:
+              kwargs.pop('training')
+            inputs, outputs = self._set_connectivity_metadata_(
+                inputs, outputs, args, kwargs)
+          self._handle_activity_regularization(inputs, outputs)
+          self._set_mask_metadata(inputs, outputs, previous_mask)
+          if hasattr(self, '_set_inputs') and not self.inputs:
+            # Subclassed network: explicitly set metadata normally set by
+            # a call to self._set_inputs().
+            # TODO(b/120997007): This should be done in Eager as well, but
+            # causes garbage collection issues because of the placeholders
+            # created on the default Keras graph.
+            self._set_inputs(inputs, outputs)
+      else:
+        # Eager execution on data tensors.
+        with ops.name_scope(self._name_scope()):
+          self._maybe_build(inputs)
+          outputs = self.call(inputs, *args, **kwargs)
+          self._handle_activity_regularization(inputs, outputs)
+          self._set_mask_metadata(inputs, outputs, previous_mask)
 
     if not context.executing_eagerly():
       # Optionally load weight values specified at layer instantiation.
@@ -683,7 +697,12 @@
       A list of tensors.
     """
     collected_losses = []
-    if context.executing_eagerly():
+
+    # If any eager losses are present, we assume the model to be part of an
+    # eager training loop (either a custom one or the one used when
+    # `run_eagerly=True`), and so we always return just the eager losses in that
+    # case.
+    if self._eager_losses:
       collected_losses.extend(self._eager_losses)
     else:
       collected_losses.extend(self._losses)
@@ -714,6 +733,7 @@
     Arguments:
       losses: Loss tensor, or list/tuple of tensors. Rather than tensors, losses
         may also be zero-argument callables which create a loss tensor.
+        Other types of input are ignored.
       inputs: Ignored when executing eagerly. If anything other than None is
         passed, it signals the losses are conditional on some of the layer's
         inputs, and thus they should only be run where these inputs are
@@ -739,10 +759,13 @@
         self._callable_losses.append(
             functools.partial(_tag_unconditional, loss))
       else:
-        if context.executing_eagerly():
-          self._eager_losses.append(_tag_unconditional(loss))
-        else:
+        if not tensor_util.is_tensor(loss):
+          # Ignoring constant values as this does not affect the gradients.
+          return
+        if tf_utils.is_symbolic_tensor(loss):
           self._losses.append(_tag_unconditional(loss))
+        else:
+          self._eager_losses.append(_tag_unconditional(loss))
 
   @doc_controls.for_subclass_implementers
   def add_metric(self, value, aggregation=None, name=None):
@@ -755,7 +778,7 @@
         already. eg, `model.add_metric(BinaryAccuracy(name='acc')(y_true,
         y_pred))`. If aggregation='mean', the given metric tensor will be
         sample-wise reduced using `mean` function. eg, `model.add_metric(
-        tf.reduce_mean(outputs), name='output_mean', aggregation='mean')`.
+        tf.reduce_sum(outputs), name='output_mean', aggregation='mean')`.
       name: String metric name.
 
     Raises:
@@ -766,8 +789,25 @@
           'We currently support only `mean` sample-wise metric aggregation. '
           'You provided aggregation=`%s`' % aggregation)
 
-    if tf_utils.is_symbolic_tensor(value):
-      self._symbolic_add_metric(value, aggregation, name)
+    is_symbolic = tf_utils.is_symbolic_tensor(value)
+    if name is None and (not is_symbolic or not hasattr(value, '_metric_obj')):
+      # Eg. `self.add_metric(math_ops.reduce_sum(x), aggregation='mean')`
+      # In eager mode, we use metric name to lookup a metric. Without a name,
+      # a new Mean metric wrapper will be created on every model/layer call.
+      # So, we raise an error when no name is provided.
+      # We will do the same for symbolic mode for consistency although a name
+      # will be generated if no name is provided.
+
+      # We will not raise this error in the foll use case for the sake of
+      # consistency as name in provided in the metric constructor.
+      # model.add_metric(metrics.Mean(name='my_metric')(outputs))
+      raise ValueError('Please provide a name for your metric like '
+                       '`self.add_metric(tf.reduce_sum(inputs), '
+                       'name=\'mean_activation\', aggregation=\'mean\')`')
+
+    if is_symbolic:
+      with backend.get_graph().as_default():
+        self._symbolic_add_metric(value, aggregation, name)
     else:
       self._eager_add_metric(value, aggregation, name)
 
@@ -1289,9 +1329,10 @@
       match(value)  # Update the metric state.
       return
     else:
-      if aggregation is None:
-        raise ValueError('We do not support adding an aggregated metric tensor '
-                         'in `call` in eager execution.')
+      # Aggregation will always be set in this use case. If not we will raise
+      # error on model/layer call in graph function mode when model/layer is
+      # created.
+      assert aggregation is not None
       metric_obj, _ = base_layer_utils.create_mean_metric(value, name)
       self._metrics.append(metric_obj)
 
@@ -1310,10 +1351,20 @@
         else:
           raise ValueError(
               'We currently do not support reusing a metric instance.')
-      else:
+      elif hasattr(value, '_metric_obj'):
         # We track the instance using the metadata on the result tensor.
         result_tensor = value
         metric_obj = result_tensor._metric_obj
+      else:
+        raise ValueError(
+            'We do not support adding an aggregated metric result tensor that '
+            'is not the output of a `tf.keras.metrics.Metric` metric instance. '
+            'Without having access to the metric instance we cannot reset the '
+            'state of a metric after every epoch during training. You can '
+            'create a `tf.keras.metrics.Metric` instance and pass the result '
+            'here or pass an un-aggregated result with `aggregation` parameter '
+            'set as `mean`. For example: `self.add_metric(tf.reduce_sum(inputs)'
+            ', name=\'mean_activation\', aggregation=\'mean\')`')
     else:
       # If a non-aggregated tensor is given as input (ie. `aggregation` is
       # explicitly set to `mean`), we wrap the tensor in `Mean` metric.
@@ -1353,32 +1404,35 @@
           self.add_loss(mean_activity_loss, inputs=inputs)
 
   def _set_mask_metadata(self, inputs, outputs, previous_mask):
-    if getattr(self, '_compute_output_and_mask_jointly', False):
-      # Mask is already computed for Keras Graph Networks.
-      return
-
     flat_outputs = nest.flatten(outputs)
-    if all(getattr(x, '_keras_mask', None) is not None for x in flat_outputs):
-      # Mask is already computed by sublayers.
-      return
+    mask_already_computed = (
+        getattr(self, '_compute_output_and_mask_jointly', False) or
+        all(getattr(x, '_keras_mask', None) is not None for x in flat_outputs))
 
-    if hasattr(self, 'compute_mask'):
-      output_masks = self.compute_mask(inputs, previous_mask)
-      # `compute_mask` can return a single `None` even when a Layer
-      # has multiple outputs.
-      if output_masks is None:
-        flat_masks = [None for _ in flat_outputs]
+    if not mask_already_computed:
+      if hasattr(self, 'compute_mask'):
+        output_masks = self.compute_mask(inputs, previous_mask)
+        # `compute_mask` can return a single `None` even when a Layer
+        # has multiple outputs.
+        if output_masks is None:
+          flat_masks = [None for _ in flat_outputs]
+        else:
+          flat_masks = nest.flatten(output_masks)
       else:
-        flat_masks = nest.flatten(output_masks)
-    else:
-      flat_masks = [None for _ in flat_outputs]
+        flat_masks = [None for _ in flat_outputs]
 
-    for output, mask in zip(flat_outputs, flat_masks):
-      try:
-        output._keras_mask = mask
-      except AttributeError:
-        # C Type such as np.ndarray.
-        pass
+      for output, mask in zip(flat_outputs, flat_masks):
+        try:
+          output._keras_mask = mask
+        except AttributeError:
+          # C Type such as np.ndarray.
+          pass
+
+    if tf_utils.are_all_symbolic_tensors(flat_outputs):
+      for output in flat_outputs:
+        if getattr(output, '_keras_mask', None) is not None:
+          # Do not track masks for `TensorFlowOpLayer` construction.
+          output._keras_mask._keras_history_checked = True
 
   def _set_connectivity_metadata_(self, inputs, outputs, args, kwargs):
     call_convention = getattr(
@@ -1611,12 +1665,16 @@
       super(Layer, self).__setattr__(name, value)
       return
 
+    # Keep track of trackable objects, for the needs of `Network.save_weights`.
+    value = data_structures.sticky_attribute_assignment(
+        trackable=self, value=value, name=name)
+
     # Append value to self._layers if relevant
     if (isinstance(value, Layer) or
-        checkpointable_layer_utils.has_weights(value)):
+        trackable_layer_utils.has_weights(value)):
       # Initialize `_layers` here in case `__init__` has not yet been called.
       if not hasattr(self, '_layers'):
-        self._layers = []
+        super(Layer, self).__setattr__('_layers', [])
       # We need to check object identity to avoid de-duplicating empty
       # container types which compare equal.
       if not any((layer is value for layer in self._layers)):
@@ -1631,9 +1689,9 @@
       # Users may add extra weights/variables
       # simply by assigning them to attributes (invalid for graph networks)
       if not hasattr(self, '_trainable_weights'):
-        self._trainable_weights = []
+        super(Layer, self).__setattr__('_trainable_weights', [])
       if not hasattr(self, '_non_trainable_weights'):
-        self._non_trainable_weights = []
+        super(Layer, self).__setattr__('_non_trainable_weights', [])
       if value not in self._trainable_weights + self._non_trainable_weights:
         if value.trainable:
           self._trainable_weights.append(value)
@@ -1650,7 +1708,7 @@
     return []
 
   # This is a hack so that the is_layer (within
-  # training/checkpointable/layer_utils.py) check doesn't get the weights attr.
+  # training/trackable/layer_utils.py) check doesn't get the weights attr.
   # TODO(b/110718070): Remove when fixed.
   def _is_layer(self):
     return True
@@ -1816,6 +1874,9 @@
         name=name, trainable=trainable, dtype=dtype)
     self.node_def = node_def_pb2.NodeDef.FromString(node_def)
     self.constants = constants or {}
+    # Layer uses original op unless it is called on new inputs.
+    # This means `built` is not set in `__call__`.
+    self.built = True
 
   def call(self, inputs):
     if context.executing_eagerly():
@@ -1831,6 +1892,10 @@
         inputs.insert(index, constant)
 
       self.node_def.name = graph.unique_name(self.node_def.name)
+      # Check for case where first input should be a list of Tensors.
+      if 'N' in self.node_def.attr:
+        num_tensors = self.node_def.attr['N'].i
+        inputs = [inputs[:num_tensors]] + inputs[num_tensors:]
       c_op = ops._create_c_op(graph, self.node_def, inputs, control_inputs=[])
       op = graph._create_op_from_tf_operation(c_op)
 

diff --git a/tensorflow/python/keras/engine/base_layer_test.py b/tensorflow/python/keras/engine/base_layer_test.py
index 109fc1f..b5d76f6 100644
--- a/tensorflow/python/keras/engine/base_layer_test.py
+++ b/tensorflow/python/keras/engine/base_layer_test.py

@@ -25,6 +25,7 @@
 
 from tensorflow.python import keras
 from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
@@ -32,6 +33,7 @@
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.optimizer_v2 import rmsprop
+from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
@@ -279,6 +281,70 @@
     keras.backend.set_learning_phase(0)
     self.assertEqual(get_learning_phase_value(), 0)
 
+  # Cannot be enabled with `run_eagerly=True`, see b/123904578
+  @test_util.run_all_in_graph_and_eager_modes
+  def test_layer_can_return_variable(self):
+
+    class ComputeSum(keras.layers.Layer):
+
+      def __init__(self):
+        super(ComputeSum, self).__init__()
+        self.total = variables.Variable(
+            initial_value=array_ops.zeros((1, 1)), trainable=False)
+        if not context.executing_eagerly():
+          keras.backend.get_session().run(self.total.initializer)
+
+      def call(self, inputs):
+        self.total.assign_add(inputs)
+        return self.total
+
+    inputs = keras.Input(shape=(1,))
+    model = keras.Model(inputs, ComputeSum()(inputs))
+    model.predict(np.ones((1, 1)))
+
+  def _get_layer_with_training_arg(self):
+
+    class TrainingLayer(keras.layers.Layer):
+      """A layer with a `training` argument in a defuned `call`."""
+
+      @def_function.function
+      def call(self, inputs, training=None):
+        if training is None:
+          training = keras.backend.learning_phase()
+        return tf_utils.smart_cond(training,
+                                   lambda: array_ops.ones_like(inputs),
+                                   lambda: array_ops.zeros_like(inputs))
+
+    return TrainingLayer()
+
+  @keras_parameterized.run_with_all_model_types
+  # b/124459427: can't test with `run_eagerly=True` for now.
+  @test_util.run_in_graph_and_eager_modes
+  def test_training_arg_in_defun(self):
+    layer = self._get_layer_with_training_arg()
+    model = testing_utils.get_model_from_layers([layer], input_shape=(1,))
+    model.compile(rmsprop.RMSprop(0.),
+                  loss='mae')
+    history = model.fit(np.zeros((1, 1)), np.zeros((1, 1)))
+    self.assertEqual(history.history['loss'][0], 1.)
+    loss = model.evaluate(np.zeros((1, 1)), np.zeros((1, 1)))
+    self.assertEqual(loss, 0.)
+
+    # Test that the argument injection performed in `call` is not active
+    # when the argument is passed explicitly.
+    layer = self._get_layer_with_training_arg()
+    inputs = keras.Input(shape=(1,))
+    # Pass `training` by name
+    outputs = layer(inputs, training=False)
+    model = keras.Model(inputs, outputs)
+    model.compile(rmsprop.RMSprop(0.),
+                  loss='mae')
+    history = model.fit(np.zeros((1, 1)), np.zeros((1, 1)))
+    self.assertEqual(history.history['loss'][0], 0.)
+
+
+class SymbolicSupportTest(test.TestCase):
+
   def test_using_symbolic_tensors_with_tf_ops(self):
     # Single-input.
     x = keras.Input((3,))
@@ -369,27 +435,6 @@
       function_name = last_entry[2]
       self.assertEqual(function_name, 'easily_identifiable_name')
 
-  # Cannot be enabled with `run_eagerly=True`, see b/123904578
-  @test_util.run_all_in_graph_and_eager_modes
-  def test_layer_can_return_variable(self):
-
-    class ComputeSum(keras.layers.Layer):
-
-      def __init__(self):
-        super(ComputeSum, self).__init__()
-        self.total = variables.Variable(
-            initial_value=array_ops.zeros((1, 1)), trainable=False)
-        if not context.executing_eagerly():
-          keras.backend.get_session().run(self.total.initializer)
-
-      def call(self, inputs):
-        self.total.assign_add(inputs)
-        return self.total
-
-    inputs = keras.Input(shape=(1,))
-    model = keras.Model(inputs, ComputeSum()(inputs))
-    model.predict(np.ones((1, 1)))
-
 
 @test_util.run_all_in_graph_and_eager_modes
 class NestedTrackingTest(test.TestCase):
@@ -433,6 +478,9 @@
     self.assertEqual(len(layer.weights), 8)
     self.assertEqual(len(layer.trainable_weights), 0)
     self.assertEqual(len(layer.non_trainable_weights), 8)
+    self.assertEqual(
+        set([layer.dense1, layer.dense2, layer.v1, layer.v2]),
+        set([obj for unused_name, obj in layer._checkpoint_dependencies]))
 
   def test_nested_layer_updates_losses_tracking(self):
     # Test that updates and losses from nested sublayers are

diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py
index 211e848..19143db 100644
--- a/tensorflow/python/keras/engine/base_layer_utils.py
+++ b/tensorflow/python/keras/engine/base_layer_utils.py

@@ -18,6 +18,7 @@
 from __future__ import print_function
 
 import collections as collections_lib
+import threading
 import enum
 
 from tensorflow.python.framework import dtypes
@@ -28,6 +29,9 @@
 from tensorflow.python.ops import init_ops_v2
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.util import nest
+from tensorflow.python.util import tf_contextlib
+
+_call_context = threading.local()
 
 
 class CallConvention(enum.Enum):
@@ -72,7 +76,7 @@
   that has fewer constraints (`variable_scope.variable()`).
 
   In the longer term, it seems like a similar "default variable creator" method
-  should exist in `CheckpointableBase` instead. When this happens, we can get
+  should exist in `Trackable` instead. When this happens, we can get
   rid of this temporary solution.
 
   TODO(fchollet): remove this method when no longer needed.
@@ -277,7 +281,7 @@
           # a constant (Variables currently have `Placeholder` op type
           # when originating from an eager context
           # so can't be supported.
-          constants[i] = backend.function([], [op_input])([])
+          constants[i] = backend.function([], op_input)([])
       processed_ops = _create_keras_history_helper(layer_inputs, processed_ops)
       name = op.name
       node_def = op.node_def.SerializeToString()
@@ -292,6 +296,11 @@
 def needs_keras_history(tensors):
   """Check if any Tensors need to be wrapped in TensorFlowOpLayers.
 
+  This will never return True inside a sublayer, because sublayers
+  do not need to create Keras History. Otherwise, this returns True
+  if one or more of `tensors` originates from a `keras.Input` and
+  does not have `_keras_history` set.
+
   Arguments:
     tensors: An arbitrary nested structure of Tensors.
 
@@ -299,7 +308,7 @@
     Bool, whether at least one Tensor needs to be wrapped.
   """
   input_tensors = nest.flatten(tensors)
-  if all(
+  if getattr(_call_context, 'in_call', False) or all(
       getattr(tensor, '_keras_history', None) is not None
       for tensor in input_tensors):
     # KerasHistory already set.
@@ -362,3 +371,22 @@
     tensor._keras_history_checked = True  # pylint: disable=protected-access
 
   nest.map_structure(_mark_checked, tensors)
+
+
+@tf_contextlib.contextmanager
+def call_context():
+  """Scope that marks when we are currently inside a Layer/Model's `call`."""
+  was_in_call = getattr(_call_context, 'in_call', False)
+  _call_context.in_call = True
+  try:
+    yield
+  finally:
+    _call_context.in_call = was_in_call
+
+
+def training_arg_passed_to_call(argspec, args, kwargs):
+  """Returns whether a user passed the `training` argument in `__call__`."""
+  # `argspec.args` starts with ['self', 'inputs']
+  full_args = dict(zip(argspec.args[2:], args))
+  full_args.update(kwargs)
+  return 'training' in full_args

diff --git a/tensorflow/python/keras/engine/distributed_training_utils.py b/tensorflow/python/keras/engine/distributed_training_utils.py
index 7df9c2f..0c9fdbb 100644
--- a/tensorflow/python/keras/engine/distributed_training_utils.py
+++ b/tensorflow/python/keras/engine/distributed_training_utils.py

@@ -32,12 +32,13 @@
 from tensorflow.python.keras import callbacks
 from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras import optimizers
+from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.keras.utils.mode_keys import ModeKeys
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.mode_keys import ModeKeys
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_contextlib
 
@@ -67,7 +68,7 @@
     weights = weights[num_param:]
 
   if not ops.executing_eagerly_outside_functions():
-    K.get_session().run(assign_ops)
+    K.get_session(assign_ops).run(assign_ops)
 
 
 def unwrap_values(distribution_strategy, grouped_inputs, grouped_outputs,
@@ -196,14 +197,14 @@
       # features of the callback that involve accessing model attributes and
       # running ops.
       if isinstance(callback, callbacks.TensorBoard):
-        if callback.__getattribute__('histogram_freq'):
+        if getattr(callback, 'histogram_freq', False):
           logging.warning(
               UserWarning(
                   '`histogram_freq` in the TensorBoard callback is not '
                   'supported when using DistributionStrategy. Setting '
                   '`histogram_freq` to `0`.'))
           callback.histogram_freq = 0
-        if callback.__getattribute__('write_grads'):
+        if getattr(callback, 'write_grads', False):
           logging.warning(
               UserWarning(
                   '`write_grads` in the TensorBoard callback is not supported '
@@ -522,7 +523,7 @@
   with distribution_strategy.scope():
     init_op = control_flow_ops.group(iterator.initialize())
     if not context.executing_eagerly():
-      K.get_session().run(init_op)
+      K.get_session((init_op,)).run(init_op)
 
 
 def _get_input_from_iterator(iterator, model):
@@ -563,6 +564,11 @@
   inputs, targets, sample_weights = _get_input_from_iterator(inputs, model)
   inputs = flatten_perdevice_values(strategy, inputs)
   targets = flatten_perdevice_values(strategy, targets)
+  # Expand 1-dimensional inputs.
+  # TODO(b/124535720): Remove once this standarize data logic is shared with
+  # main flow.
+  inputs, targets = nest.map_structure(training_utils.standardize_single_array,
+                                       (inputs, targets))
   if mode == ModeKeys.PREDICT:
     sample_weights = []
     targets = []
@@ -723,21 +729,46 @@
 
 
 def _make_execution_function(model, mode):
-  """Makes function to run one step of distributed model execution."""
-  if context.executing_eagerly():
-    return _make_eager_execution_function(model, mode)
-
+  """Makes or reuses function to run one step of distributed model execution."""
   strategy = model._distribution_strategy
-  if not get_distributed_model(model, mode):
-    if model._compile_distribution:
-      clone_model_on_replicas(model, strategy, mode)
-    else:
-      _build_distributed_network(model, strategy, mode)
+
+  distributed_model = get_distributed_model(model, mode)
+  # If distributed model for a particular `mode` is already built, use the
+  # `_distribution_function` on that distributed model.
+  if distributed_model:
+    return distributed_model._distributed_function
+
+  # If distributed_model is not built, create one for `mode`.
+  if model._compile_distribution:
+    clone_model_on_replicas(model, strategy, mode)
+  else:
+    _build_distributed_network(model, strategy, mode)
+
+  # We've just created the distributed model. So `distributed_model` should be
+  # not None.
+  distributed_model = get_distributed_model(model, mode)
+  assert distributed_model
+
+  # Also create an execution fuction on that distributed model.
+  if context.executing_eagerly():
+    distributed_function = _make_eager_execution_function(model, mode)
+  else:
+    distributed_function = _make_graph_execution_function(model, mode)
+
+  # We cache the distributed execution function on the model since creating
+  # distributed models and exection functions are expensive.
+  distributed_model._distributed_function = distributed_function
+  return distributed_function
+
+
+def _make_graph_execution_function(model, mode):
+  """Makes function to run one step of distributed model in graph mode."""
 
   def _per_device_function(model):
     f = model._make_execution_function(mode)
     return (f.inputs, f.outputs, f.updates_op, f.session_kwargs)
 
+  strategy = model._distribution_strategy
   with strategy.scope():
     # Create train ops on each of the devices when we call
     # `_per_device_fit_function`.
@@ -773,35 +804,38 @@
 
 def _make_eager_execution_function(model, mode):
   """Makes function to run one step of distributed model eager execution."""
-  strategy = model._distribution_strategy
-  if not get_distributed_model(model, mode):
-    if model._compile_distribution:
-      clone_model_on_replicas(model, strategy, mode)
-    else:
-      _build_distributed_network(model, strategy, mode)
-
   def _per_device_function(model):
     f = model._make_execution_function(mode)
     return (f.inputs, f.outputs)
 
   # NOTE(priyag): Try creating a new FuncGraph within DS scope instead of using
   # the global one.
-  with K.get_graph().as_default(), strategy.scope():
-    # Create train ops on each of the devices when we call
-    # `_per_device_fit_function`.
-    (grouped_inputs, grouped_outputs) = strategy.extended.call_for_each_replica(
-        _per_device_function, args=(get_distributed_model(model, mode),))
+  strategy = model._distribution_strategy
+  global_graph = K.get_graph()
 
-    # Unwrap all the per device values returned from `call_for_each_replica`.
-    # Unwrapping per device values gives you a list of values that can be
-    # used to construct a new train function that is composed of inptus/outputs
-    # on all the devices over which the model is distributed.
-    (all_inputs, all_outputs, _, _) = unwrap_values(
-        strategy,
-        grouped_inputs,
-        grouped_outputs,
-        with_loss_tensor=(mode != ModeKeys.PREDICT))
+  with global_graph.as_default(), strategy.scope():
+    # First we gather the relevant portions of the model across all replicas.
+    # `K._scratch_graph(global_graph)` signals to Keras that it should not
+    # lift to a separate graph when creating the per-replica functions.
+    with K._scratch_graph(global_graph):
+      # Create train ops on each of the devices when we call
+      # `_per_device_fit_function`.
+      grouped = strategy.extended.call_for_each_replica(
+          _per_device_function, args=(get_distributed_model(model, mode),))
+      grouped_inputs, grouped_outputs = grouped
 
+      # Unwrap all the per device values returned from `call_for_each_replica`.
+      # Unwrapping per device values gives you a list of values that can be
+      # used to construct a new train function that is composed of
+      # inputs/outputs on all the devices over which the model is distributed.
+      (all_inputs, all_outputs, _, _) = unwrap_values(
+          strategy,
+          grouped_inputs,
+          grouped_outputs,
+          with_loss_tensor=(mode != ModeKeys.PREDICT))
+
+    # Finally, a joint Keras function is created; this one will be created in
+    # a separate FuncGraph.
     return K.function(
         all_inputs,
         all_outputs,
@@ -869,3 +903,38 @@
 def distributed_scope(strategy, learning_phase):
   with strategy.scope(), K.learning_phase_scope(learning_phase):
     yield
+
+
+def filter_distributed_callbacks(callbacks_list):
+  """Filter Callbacks based on the worker context when running multi-worker.
+
+  Arguments:
+    callbacks_list: A list of `Callback` instances.
+
+  Returns:
+    The list of `Callback` instances that should be run on this worker.
+  """
+
+  if not K.in_multi_worker_mode():
+    raise ValueError(
+        'filter_distributed_callbacks() should only be called when Keras '
+        'is in multi worker mode.')
+
+  worker_context = dc_context.get_current_worker_context()
+  callbacks_list = callbacks_list or []
+  if not [
+      c for c in callbacks_list if isinstance(c, callbacks.ModelCheckpoint)
+  ]:
+    # TODO(rchao): Consider providing a ModelCheckpoint here if the user
+    # fails to.
+    logging.warning('ModelCheckpoint callback is not provided. '
+                    'Workers will need to restart training if any fails.')
+  # TODO(rchao): Add similar warning for restoring callback (to be designed).
+
+  if callbacks_list is None or worker_context.is_chief:
+    return callbacks_list
+
+  # Some Callbacks should only run on the chief worker.
+  return [
+      callback for callback in callbacks_list if not callback._chief_worker_only
+  ]  # pylint: disable=protected-access

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 559df56..a6fdfad 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py

@@ -44,10 +44,10 @@
 from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training.checkpointable import base as checkpointable
-from tensorflow.python.training.checkpointable import data_structures
-from tensorflow.python.training.checkpointable import layer_utils as checkpointable_layer_utils
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import base as trackable
+from tensorflow.python.training.tracking import data_structures
+from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils
+from tensorflow.python.training.tracking import util as trackable_utils
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_inspect
 
@@ -152,7 +152,7 @@
   # empty lists shouldn't cause issues; adding or removing them will not break
   # checkpoints, but may cause "all Python objects matched" assertions to fail
   # (in which case less strict assertions may be substituted if necessary).
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def _base_init(self, name=None):
     # The following are implemented as property functions:
     # self.trainable_weights
@@ -170,7 +170,7 @@
     self._is_compiled = False
     self._expects_training_arg = False
 
-    # This is True for Sequential networks and graph networks.
+    # This is True for Sequential networks and Functional networks.
     self._compute_output_and_mask_jointly = False
 
     self.supports_masking = False
@@ -206,10 +206,10 @@
     self._outbound_nodes = []
     self._inbound_nodes = []
 
-    self._checkpointable_saver = (
-        checkpointable_utils.saver_with_op_caching(self))
+    self._trackable_saver = (
+        trackable_utils.saver_with_op_caching(self))
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def _init_graph_network(self, inputs, outputs, name=None):
     self._call_convention = (base_layer_utils
                              .CallConvention.EXPLICIT_INPUTS_ARGUMENT)
@@ -238,6 +238,9 @@
     self._compute_output_and_mask_jointly = True
     self._is_graph_network = True
     self._dynamic = False
+    # `_expects_training_arg` is True since the `training` argument is always
+    # present in the signature of the `call` method of a graph network.
+    self._expects_training_arg = True
 
     self._input_layers = []
     self._output_layers = []
@@ -306,7 +309,7 @@
     for layer in self._output_layers:
       self.output_names.append(layer.name)
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def _init_subclassed_network(self, name=None, dynamic=False):
     self._base_init(name=name)
     self._is_graph_network = False
@@ -367,20 +370,20 @@
       return base_layer_utils.CallConvention.POSITIONAL_ARGUMENTS_ARE_INPUTS
 
   def _track_layers(self, layers):
-    """Add Checkpointable dependencies on a list of Layers."""
+    """Add Trackable dependencies on a list of Layers."""
     weight_layer_index = 0
     for layer_index, layer in enumerate(layers):
       if layer.weights:
         # Keep a separate index for layers which have weights. This allows users
         # to insert Layers without weights anywhere in the network without
         # breaking checkpoints.
-        self._track_checkpointable(
+        self._track_trackable(
             layer, name='layer_with_weights-%d' % weight_layer_index,
             overwrite=True)
         weight_layer_index += 1
       # Even if it doesn't have weights, we should still track everything in
-      # case it has/will have Checkpointable dependencies.
-      self._track_checkpointable(
+      # case it has/will have Trackable dependencies.
+      self._track_trackable(
           layer, name='layer-%d' % layer_index, overwrite=True)
 
   def __setattr__(self, name, value):
@@ -390,18 +393,15 @@
 
     if all(
         isinstance(v, (base_layer.Layer,
-                       data_structures.CheckpointableDataStructure)) or
-        checkpointable_layer_utils.has_weights(v) for v in nest.flatten(value)):
+                       data_structures.TrackableDataStructure)) or
+        trackable_layer_utils.has_weights(v) for v in nest.flatten(value)):
       try:
         self._is_graph_network
       except AttributeError:
         raise RuntimeError('It looks like you are subclassing `Model` and you '
                            'forgot to call `super(YourClass, self).__init__()`.'
                            ' Always start with this line.')
-    # Keep track of checkpointable objects,
-    # for the needs of `self.save/save_weights`.
-    value = data_structures.sticky_attribute_assignment(
-        checkpointable=self, value=value, name=name)
+
     super(Network, self).__setattr__(name, value)
 
     # Keep track of metric instance created in subclassed model/layer.
@@ -478,7 +478,7 @@
 
   @property
   def layers(self):
-    return checkpointable_layer_utils.filter_empty_layer_containers(
+    return trackable_layer_utils.filter_empty_layer_containers(
         self._layers)
 
   def get_layer(self, name=None, index=None):
@@ -528,7 +528,12 @@
   @property
   def _unfiltered_losses(self):
     losses = []
-    if context.executing_eagerly():
+
+    # If any eager losses are present, we assume the model to be part of an
+    # eager training loop (either a custom one or the one used when
+    # `run_eagerly=True`), and so we always return just the eager losses in that
+    # case.
+    if self._eager_losses:
       losses.extend(self._eager_losses)
     else:
       losses.extend(self._losses)
@@ -539,7 +544,7 @@
         losses += layer.losses
     return losses
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def _clear_losses(self):
     """Used every step in eager to reset losses."""
     self._eager_losses = []
@@ -679,14 +684,14 @@
 
   @property
   def trainable_weights(self):
-    return checkpointable_layer_utils.gather_trainable_weights(
+    return trackable_layer_utils.gather_trainable_weights(
         trainable=self.trainable,
         sub_layers=self._layers,
         extra_variables=self._trainable_weights)
 
   @property
   def non_trainable_weights(self):
-    return checkpointable_layer_utils.gather_non_trainable_weights(
+    return trackable_layer_utils.gather_non_trainable_weights(
         trainable=self.trainable,
         sub_layers=self._layers,
         extra_variables=self._non_trainable_weights + self._trainable_weights)
@@ -723,7 +728,7 @@
         A list of `InputSpec` instances (one per input to the model)
             or a single instance if the model has only one input.
     """
-    # If not a graph network, can't assume anything.
+    # If subclassed model, can't assume anything.
     if not self._is_graph_network:
       return None
 
@@ -1298,7 +1303,10 @@
     """
     if not self._is_graph_network:
       raise NotImplementedError(
-          'Currently `save` requires model to be a graph network. Consider '
+          'The `save` method requires the model to be a Functional model or a '
+          'Sequential model. It does not work for subclassed models, '
+          'because such models are defined via the body of a Python method, '
+          'which isn\'t safely serializable. Consider '
           'using `save_weights`, in order to save the weights of the model.')
 
     from tensorflow.python.keras.models import save_model  # pylint: disable=g-import-not-at-top
@@ -1391,7 +1399,7 @@
         session = backend.get_session()
       optimizer = getattr(self, 'optimizer', None)
       if (optimizer
-          and not isinstance(optimizer, checkpointable.Checkpointable)):
+          and not isinstance(optimizer, trackable.Trackable)):
         logging.warning(
             ('This model was compiled with a Keras optimizer (%s) but is being '
              'saved in TensorFlow format with `save_weights`. The model\'s '
@@ -1399,7 +1407,7 @@
              'the TensorFlow format the optimizer\'s state will not be '
              'saved.\n\nConsider using a TensorFlow optimizer from `tf.train`.')
             % (optimizer,))
-      self._checkpointable_saver.save(filepath, session=session)
+      self._trackable_saver.save(filepath, session=session)
       # Record this checkpoint so it's visible from tf.train.latest_checkpoint.
       checkpoint_management.update_checkpoint_state_internal(
           save_dir=os.path.dirname(filepath),
@@ -1458,7 +1466,7 @@
         # The checkpoint is not readable in TensorFlow format. Try HDF5.
         save_format = 'h5'
     if save_format == 'tf':
-      status = self._checkpointable_saver.restore(filepath)
+      status = self._trackable_saver.restore(filepath)
       if by_name:
         raise NotImplementedError(
             'Weights may only be loaded based on topology into Models when '
@@ -1468,7 +1476,7 @@
         session = backend.get_session()
         # Restore existing variables (if any) immediately, and set up a
         # streaming restore for any variables created in the future.
-        checkpointable_utils.streaming_restore(status=status, session=session)
+        trackable_utils.streaming_restore(status=status, session=session)
       status.assert_nontrivial_match()
       return status
     if h5py is None:
@@ -1829,3 +1837,4 @@
                        str(all_names.count(name)) + ' times in the model. '
                        'All layer names should be unique.')
   return network_nodes, nodes_by_depth, layers, layers_by_depth
+

diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py
index 671bfe9..6c8f5c2 100644
--- a/tensorflow/python/keras/engine/sequential.py
+++ b/tensorflow/python/keras/engine/sequential.py

@@ -28,7 +28,7 @@
 from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.utils import layer_utils
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import keras_export
@@ -93,7 +93,7 @@
   ```
   """
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def __init__(self, layers=None, name=None):
     super(Sequential, self).__init__(name=name)
     self.supports_masking = True
@@ -112,7 +112,7 @@
     # Historically, `sequential.layers` only returns layers that were added
     # via `add`, and omits the auto-generated `InputLayer` that comes at the
     # bottom of the stack.
-    # `CheckpointableBase` manages the `_layers` attributes and does filtering
+    # `Trackable` manages the `_layers` attributes and does filtering
     # over it.
     layers = super(Sequential, self).layers
     if layers and isinstance(layers[0], input_layer.InputLayer):
@@ -123,7 +123,7 @@
   def dynamic(self):
     return any(layer.dynamic for layer in self.layers)
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def add(self, layer):
     """Adds a layer instance on top of the layer stack.
 
@@ -138,6 +138,14 @@
             multiple output tensors, or is already connected
             somewhere else (forbidden in `Sequential` models).
     """
+    # If we are passed a Keras tensor created by keras.Input(), we can extract
+    # the input layer from its keras history and use that without any loss of
+    # generality.
+    if hasattr(layer, '_keras_history'):
+      origin_layer = layer._keras_history[0]
+      if isinstance(origin_layer, input_layer.InputLayer):
+        layer = origin_layer
+
     if not isinstance(layer, base_layer.Layer):
       raise TypeError('The added layer must be '
                       'an instance of class Layer. '
@@ -193,7 +201,7 @@
 
     self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call)
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def pop(self):
     """Removes the last layer in the model.
 

diff --git a/tensorflow/python/keras/engine/sequential_test.py b/tensorflow/python/keras/engine/sequential_test.py
index a65c200..afd7d23 100644
--- a/tensorflow/python/keras/engine/sequential_test.py
+++ b/tensorflow/python/keras/engine/sequential_test.py

@@ -48,6 +48,18 @@
     self.assertEqual(model.get_layer(name='dp').name, 'dp')
 
   @keras_parameterized.run_all_keras_modes
+  def test_input_defined_first_layer(self):
+    model = keras.models.Sequential()
+    model.add(keras.Input(shape=(2,), name='input_layer'))
+    model.add(keras.layers.Dense(1))
+    model.add(keras.layers.Dropout(0.3, name='dp'))
+    model.add(keras.layers.Dense(2, kernel_regularizer='l2',
+                                 kernel_constraint='max_norm'))
+    self.assertLen(model.layers, 3)
+    self.assertLen(model.weights, 2 * 2)
+    self.assertEqual(model.get_layer(name='dp').name, 'dp')
+
+  @keras_parameterized.run_all_keras_modes
   def test_sequential_pop(self):
     num_hidden = 5
     input_dim = 3

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 6530b08..97bee58 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py

@@ -41,17 +41,14 @@
 from tensorflow.python.keras.engine import training_generator
 from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.engine.network import Network
-from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.keras.saving import saving_utils
 from tensorflow.python.keras.utils import data_utils
+from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.keras.utils.generic_utils import slice_arrays
-from tensorflow.python.keras.utils.losses_utils import squeeze_or_expand_dimensions
+from tensorflow.python.keras.utils.mode_keys import ModeKeys
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.losses import losses_impl
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import optimizer as tf_optimizer_module
-from tensorflow.python.training.checkpointable import base as checkpointable
-from tensorflow.python.training.mode_keys import ModeKeys
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import keras_export
 
@@ -144,7 +141,7 @@
         return super(Model, self).get_weights()
     return super(Model, self).get_weights()
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def compile(self,
               optimizer,
               loss=None,
@@ -160,17 +157,20 @@
     Arguments:
         optimizer: String (name of optimizer) or optimizer instance.
             See `tf.keras.optimizers`.
-        loss: String (name of objective function) or objective function.
-            See `tf.losses`. If the model has multiple outputs, you can use a
-            different loss on each output by passing a dictionary or a list of
-            losses. The loss value that will be minimized by the model
-            will then be the sum of all individual losses.
-        metrics: List of metrics to be evaluated by the model
-            during training and testing.
-            Typically you will use `metrics=['accuracy']`.
+        loss: String (name of objective function), objective function or
+            `tf.losses.Loss` instance. See `tf.losses`. If the model has
+            multiple outputs, you can use a different loss on each output by
+            passing a dictionary or a list of losses. The loss value that will
+            be minimized by the model will then be the sum of all individual
+            losses.
+        metrics: List of metrics to be evaluated by the model during training
+            and testing. Typically you will use `metrics=['accuracy']`.
             To specify different metrics for different outputs of a
-            multi-output model, you could also pass a dictionary,
-            such as `metrics={'output_a': 'accuracy'}`.
+            multi-output model, you could also pass a dictionary, such as
+            `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`.
+            You can also pass a list (len = len(outputs)) of lists of metrics
+            such as `metrics=[['accuracy'], ['accuracy', 'mse']]` or
+            `metrics=['accuracy', ['accuracy', 'mse']]`.
         loss_weights: Optional list or dictionary specifying scalar
             coefficients (Python floats) to weight the loss contributions
             of different model outputs.
@@ -231,12 +231,6 @@
     # Validate that arguments passed by the user to `compile` are supported by
     # DistributionStrategy.
     if self._distribution_strategy:
-      if not isinstance(optimizer,
-                        (tf_optimizer_module.Optimizer, optimizers.TFOptimizer,
-                         optimizer_v2.OptimizerV2)):
-        raise NotImplementedError(
-            'optimizer must be an instance of '
-            'tf.train.Optimizer, not a %s' % type(optimizer))
       if sample_weight_mode:
         raise NotImplementedError('sample_weight_mode is not supported with '
                                   'DistributionStrategy.')
@@ -248,19 +242,12 @@
                          'DistributionStrategy.')
 
     loss = loss or {}
-    if self.run_eagerly and not isinstance(
-        optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer,
-                    optimizer_v2.OptimizerV2)):
-      raise ValueError(
-          'When running a model in eager execution, the optimizer must be an '
-          'instance of tf.train.Optimizer. Received: '
-          '%s' % optimizer)
 
     self.optimizer = optimizer
     # We've disabled automatic dependency tracking for this method, but do want
-    # to add a checkpoint dependency on the optimizer if it's checkpointable.
-    if isinstance(self.optimizer, checkpointable.Checkpointable):
-      self._track_checkpointable(
+    # to add a checkpoint dependency on the optimizer if it's trackable.
+    if isinstance(self.optimizer, trackable.Trackable):
+      self._track_trackable(
           self.optimizer, name='optimizer', overwrite=True)
     self.loss = loss
     self._compile_metrics = metrics or []
@@ -289,79 +276,30 @@
       return
     self._is_compiled = True
 
-    # Prepare loss functions.
-    if isinstance(loss, dict):
-      for name in loss:
-        if name not in self.output_names:
-          raise ValueError(
-              'Unknown entry in loss '
-              'dictionary: "' + name + '". '
-              'Only expected the following keys: ' + str(self.output_names))
-      loss_functions = []
-      for name in self.output_names:
-        if name not in loss:
-          logging.warning(
-              'Output "' + name +
-              '" missing from loss dictionary. We assume '
-              'this was done on purpose. The fit and evaluate APIs will not be '
-              'expecting any data to be passed to "' + name + '".')
-        loss_functions.append(training_utils.get_loss_function(loss.get(name)))
-    elif isinstance(loss, list):
-      if len(loss) != len(self.outputs):
-        raise ValueError('When passing a list as loss, '
-                         'it should have one entry per model outputs. '
-                         'The model has ' + str(len(self.outputs)) +
-                         ' outputs, but you passed loss=' + str(loss))
-      loss_functions = [training_utils.get_loss_function(l) for l in loss]
-    else:
-      loss_functions = [
-          training_utils.get_loss_function(loss)
-          for _ in range(len(self.outputs))
-      ]
-    self.loss_functions = loss_functions
+    # Prepare list of loss functions, same size of model outputs.
+    self.loss_functions = training_utils.prepare_loss_functions(
+        loss, self.output_names)
 
-    skip_target_indices = []
-    skip_target_weighing_indices = []
     self._feed_outputs = []
     self._feed_output_names = []
     self._feed_output_shapes = []
     self._feed_loss_fns = []
-    for i in range(len(loss_functions)):
-      if loss_functions[i] is None:
+    # if loss function is None, then this output will be skipped during total
+    # loss calculation and feed targets preparation.
+    skip_target_indices = []
+    skip_target_weighing_indices = []
+    for i, loss_function in enumerate(self.loss_functions):
+      if loss_function is None:
         skip_target_indices.append(i)
         skip_target_weighing_indices.append(i)
 
     # Prepare output masks.
     if not self.run_eagerly:
       masks = [getattr(x, '_keras_mask', None) for x in self.outputs]
-      if not isinstance(masks, list):
-        masks = [masks]
 
-    # Prepare loss weights.
-    if loss_weights is None:
-      loss_weights_list = [1. for _ in range(len(self.outputs))]
-    elif isinstance(loss_weights, dict):
-      for name in loss_weights:
-        if name not in self.output_names:
-          raise ValueError(
-              'Unknown entry in loss_weights '
-              'dictionary: "' + name + '". '
-              'Only expected the following keys: ' + str(self.output_names))
-      loss_weights_list = []
-      for name in self.output_names:
-        loss_weights_list.append(loss_weights.get(name, 1.))
-    elif isinstance(loss_weights, list):
-      if len(loss_weights) != len(self.outputs):
-        raise ValueError(
-            'When passing a list as loss_weights, '
-            'it should have one entry per model output. '
-            'The model has ' + str(len(self.outputs)) +
-            ' outputs, but you passed loss_weights=' + str(loss_weights))
-      loss_weights_list = loss_weights
-    else:
-      raise TypeError('Could not interpret loss_weights argument: ' +
-                      str(loss_weights) + ' - expected a list of dicts.')
-    self.loss_weights_list = loss_weights_list
+    # Prepare list loss weights, same size of model outputs.
+    self.loss_weights_list = training_utils.prepare_loss_weights(
+        self.output_names, loss_weights)
 
     # Initialization for Eager mode execution.
     if self.run_eagerly:
@@ -375,15 +313,9 @@
         raise ValueError('target_tensors are not currently supported in Eager '
                          'mode.')
       self.total_loss = None
-      for i in range(len(self.outputs)):
-        if len(self.outputs) > 1:
-          self._compile_metrics_names.append(self.output_names[i] + '_loss')
 
       # Set metric attributes on model.
-      self._set_metric_attributes(
-          self.outputs,
-          skip_target_indices=skip_target_indices,
-      )
+      self._set_metric_attributes(skip_target_indices=skip_target_indices)
 
       self.targets = []
       for i in range(len(self.outputs)):
@@ -456,70 +388,9 @@
       # Save all metric attributes per output of the model.
       self._cache_output_metric_attributes(metrics, weighted_metrics)
 
-      # Compute total loss.
-      total_loss = None
-      with K.name_scope('loss'):
-        for i in range(len(self.outputs)):
-          if i in skip_target_indices:
-            continue
-          y_true = self.targets[i]
-          y_pred = self.outputs[i]
-          loss_fn = loss_functions[i]
-          sample_weight = self.sample_weights[i]
-          mask = masks[i]
-          loss_weight = loss_weights_list[i]
-          with K.name_scope(self.output_names[i] + '_loss'):
-            if mask is not None:
-              mask = math_ops.cast(mask, y_pred.dtype)
-              # Update weights with mask.
-              if sample_weight is None:
-                sample_weight = mask
-              else:
-                # Update dimensions of weights to match with mask if possible.
-                mask, _, sample_weight = squeeze_or_expand_dimensions(
-                    mask, None, sample_weight)
-                sample_weight *= mask
-
-            output_loss = loss_fn(y_true, y_pred, sample_weight=sample_weight)
-
-          if len(self.outputs) > 1:
-            # Keep track of the un-aggregated loss result tensor.
-            self._compile_metrics_tensors[self.output_names[i] +
-                                          '_loss'] = output_loss
-
-            # Keep track of stateful result tensor and function for the loss.
-            # Reset reduction here as metric wrapper will take care of that.
-            loss_fn.reduction = losses_impl.ReductionV2.NONE
-            output_loss_metric = metrics_module.SumOverBatchSizeMetricWrapper(
-                loss_fn, name=loss_fn.name)
-            result_tensor = self._call_metric_fn(output_loss_metric, y_true,
-                                                 y_pred, sample_weight, mask)
-            self._compile_stateful_metrics_tensors[self.output_names[i] +
-                                                   '_loss'] = result_tensor
-            self._compile_stateful_metric_functions.append(output_loss_metric)
-
-            self._compile_metrics_names.append(self.output_names[i] + '_loss')
-          if total_loss is None:
-            total_loss = loss_weight * output_loss
-          else:
-            total_loss += loss_weight * output_loss
-        if total_loss is None:
-          if not self.losses:
-            raise ValueError('The model cannot be compiled '
-                             'because it has no loss to optimize.')
-          else:
-            total_loss = 0.
-
-        # Add regularization penalties
-        # and other layer-specific losses.
-        for loss_tensor in self.losses:
-          total_loss += loss_tensor
-
       # Set metric attributes on model.
-      self._set_metric_attributes(
-          self.outputs,
-          skip_target_indices=skip_target_indices,
-      )
+      self._set_metric_attributes(skip_target_indices=skip_target_indices)
+
       # Invoke metric functions for all the outputs.
       self._handle_metrics(
           self.outputs,
@@ -528,8 +399,12 @@
           skip_target_indices=skip_target_indices,
           sample_weights=self.sample_weights)
 
-      # Prepare gradient updates and state updates.
-      self.total_loss = total_loss
+      # Compute total loss.
+      # Used to keep track of the total loss value (stateless).
+      # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
+      #                   loss_weight_2 * output_2_loss_fn(...) +
+      #                   layer losses.
+      self.total_loss = self._prepare_total_loss(skip_target_indices, masks)
 
       # Functions for train, test and predict will
       # be compiled lazily when required.
@@ -538,6 +413,7 @@
 
       self._fit_function = None
       self._eval_function = None
+      self._predict_function = None
       self.train_function = None
       self.test_function = None
       self.predict_function = None
@@ -798,6 +674,8 @@
         # servers via the Distribute Coordinator.
         def _worker_fn(_):
           """Run training inside the distributed coordinator."""
+          filtered_callbacks = distributed_training_utils \
+              .filter_distributed_callbacks(callbacks)
           return training_distributed.fit_distributed(
               self,
               x=x,
@@ -805,7 +683,7 @@
               batch_size=batch_size,
               epochs=epochs,
               verbose=verbose,
-              callbacks=callbacks,
+              callbacks=filtered_callbacks,
               validation_split=validation_split,
               validation_data=validation_data,
               shuffle=shuffle,
@@ -1058,6 +936,8 @@
         # servers via the Distribute Coordinator.
         def _worker_fn(_):
           """Run evaluation inside the distributed coordinator."""
+          filtered_callbacks = distributed_training_utils \
+              .filter_distributed_callbacks(callbacks)
           return training_distributed.evaluate_distributed(
               self,
               x=x,
@@ -1066,7 +946,7 @@
               verbose=verbose,
               sample_weight=sample_weight,
               steps=steps,
-              callbacks=callbacks)
+              callbacks=filtered_callbacks)
 
         # Independent worker only for now.
         return dc.run_distribute_coordinator(
@@ -1260,8 +1140,16 @@
     if hasattr(self, 'metrics'):
       for m in self.metrics:
         m.reset_states()
-      if self._distribution_strategy:
-        distributed_training_utils._reset_metrics(self)  # pylint: disable=protected-access
+
+    # Reset the state of loss metric wrappers.
+    if hasattr(
+        self, '_output_loss_metrics') and self._output_loss_metrics is not None:
+      for m in self._output_loss_metrics:
+        m.reset_states()
+
+    # Reset metrics on all the distributed (cloned) models.
+    if self._distribution_strategy:
+      distributed_training_utils._reset_metrics(self)  # pylint: disable=protected-access
 
   def train_on_batch(self,
                      x,
@@ -1320,7 +1208,12 @@
 
     if self.run_eagerly:
       outputs = training_eager.train_on_batch(
-          self, x, y, sample_weights=sample_weights)
+          self,
+          x,
+          y,
+          sample_weights=sample_weights,
+          reset_metrics=reset_metrics,
+          output_loss_metrics=self._output_loss_metrics)
     else:
       if not isinstance(K.symbolic_learning_phase(), int):
         ins = x + y + sample_weights + [True]
@@ -1389,7 +1282,12 @@
 
     if self.run_eagerly:
       outputs = training_eager.test_on_batch(
-          self, x, y, sample_weights=sample_weights)
+          self,
+          x,
+          y,
+          sample_weights=sample_weights,
+          reset_metrics=reset_metrics,
+          output_loss_metrics=self._output_loss_metrics)
     else:
       inputs = x + y + sample_weights
       if reset_metrics:
@@ -1705,6 +1603,105 @@
         verbose=verbose,
         callbacks=callbacks)
 
+  def _prepare_total_loss(self, skip_target_indices=None, masks=None):
+    """Computes total loss from loss functions.
+
+    Arguments:
+        skip_target_indices: A list of indices of model outputs where loss
+          function is None.
+        masks: List of mask values corresponding to each model output.
+
+    Returns:
+        A list of loss weights of python floats.
+
+    Raises:
+        TypeError: If model run_eagerly is True.
+    """
+    if self.run_eagerly:
+      raise TypeError('total loss can not be computed when compiled with '
+                      'run_eagerly = True.')
+    skip_target_indices = skip_target_indices or []
+    total_loss = None
+    with K.name_scope('loss'):
+      zipped_inputs = zip(self.targets, self.outputs, self.loss_functions,
+                          self.sample_weights, masks, self.loss_weights_list)
+      for i, (y_true, y_pred, loss_fn, sample_weight, mask,
+              loss_weight) in enumerate(zipped_inputs):
+        if i in skip_target_indices:
+          continue
+        loss_name = self.output_names[i] + '_loss'
+        with K.name_scope(loss_name):
+          if mask is not None:
+            mask = math_ops.cast(mask, y_pred.dtype)
+            # Update weights with mask.
+            if sample_weight is None:
+              sample_weight = mask
+            else:
+              # Update dimensions of weights to match with mask if possible.
+              mask, _, sample_weight = (
+                  losses_utils.squeeze_or_expand_dimensions(
+                      mask, None, sample_weight))
+              sample_weight *= mask
+
+          # Reset reduction on the loss so that we can get the per sample loss
+          # value. We use this to get both the stateless and stateful loss
+          # values without having to compute the underlying loss function
+          # twice.
+          weighted_losses = None
+          if hasattr(loss_fn, 'reduction'):
+            current_loss_reduction = loss_fn.reduction
+            loss_fn.reduction = losses_utils.ReductionV2.NONE
+            weighted_losses = loss_fn(
+                y_true, y_pred, sample_weight=sample_weight)
+            loss_fn.reduction = current_loss_reduction
+
+            # Compute the stateless loss value.
+            output_loss = losses_utils.reduce_weighted_loss(
+                weighted_losses, reduction=current_loss_reduction)
+          else:
+            # Compute the stateless loss value for a custom loss class.
+            # Here we assume that the class takes care of loss reduction
+            # because if this class returns a vector value we cannot
+            # differentiate between use case where a custom optimizer
+            # expects a vector loss value vs unreduced per-sample loss value.
+            output_loss = loss_fn(y_true, y_pred, sample_weight=sample_weight)
+
+        if len(self.outputs) > 1:
+          # Keep track of the un-aggregated loss result tensor.
+          self._compile_metrics_tensors[loss_name] = output_loss
+
+          # Keep track of stateful result tensor and function for the loss.
+          # Compute the stateful loss value.
+          if weighted_losses is not None:
+            # TODO(b/120571621): Directly call metric when the bug is fixed.
+            aggregated_output_loss = self._call_fn_for_each_replica(
+                self._output_loss_metrics[i], weighted_losses)
+          else:
+            # Custom loss class.
+            aggregated_output_loss = self._call_metric_fn(
+                self._output_loss_metrics[i], y_true, y_pred, sample_weight)
+          self._compile_stateful_metrics_tensors[
+              loss_name] = aggregated_output_loss
+          self._compile_stateful_metric_functions.append(
+              self._output_loss_metrics[i])
+
+        if total_loss is None:
+          total_loss = loss_weight * output_loss
+        else:
+          total_loss += loss_weight * output_loss
+      if total_loss is None:
+        if not self.losses:
+          raise ValueError('The model cannot be compiled '
+                           'because it has no loss to optimize.')
+        else:
+          total_loss = 0.
+
+      # Add regularization penalties and other layer-specific losses.
+      if self.losses:
+        total_loss += losses_utils.scale_loss_for_distribution(
+            math_ops.add_n(self.losses))
+    return total_loss
+
   def _get_callback_model(self):
     """Returns the Callback Model for this Model."""
 
@@ -1817,10 +1814,13 @@
         output_shapes.append(output.shape.as_list())
     self._per_output_metrics = training_utils.collect_per_output_metric_info(
         metrics, self.output_names, output_shapes, self.loss_functions)
-    self._per_output_weighted_metrics = \
+    self._per_output_weighted_metrics = (
         training_utils.collect_per_output_metric_info(
-            weighted_metrics, self.output_names, output_shapes,
-            self.loss_functions, is_weighted=True)
+            weighted_metrics,
+            self.output_names,
+            output_shapes,
+            self.loss_functions,
+            is_weighted=True))
 
   def _add_unique_metric_name(self, metric_name, output_index):
     """Makes the metric name unique and adds it to the model's metric name list.
@@ -1867,19 +1867,19 @@
 
   def _init_metric_attributes(self):
     """Initialized model metric attributes."""
-    # List of all metric names in the model.
+    # List of all metric names in the model. This includes loss metrics.
     self._compile_metrics_names = ['loss']
     # List of stateful metric functions. Used for resetting metric state during
-    # training/eval.
-    # This includes loss functions when there are multiple outputs.
+    # training/eval. This includes loss metric functions.
     self._compile_stateful_metric_functions = []
     # Dict of all aggregated metric result tensors. This includes aggregated
-    # loss result tensors when there are multiple outputs.
+    # loss result tensors.
     self._compile_stateful_metrics_tensors = {}
     # Dict of all metric result tensors (aggregated or not - based on the
-    # values given in compile.). This includes aggregated loss result tensors
-    # when there are multiple outputs.
+    # values given in compile.). This includes aggregated loss result tensors.
     self._compile_metrics_tensors = {}
+    # List of metric wrappers on output losses.
+    self._output_loss_metrics = None
 
   def _set_per_output_metric_attributes(self, metrics_dict, output_index):
     """Sets the metric attributes on the model for the given output.
@@ -1901,12 +1901,21 @@
       self._compile_stateful_metric_functions.append(stateful_metric_fn)
     return updated_metrics_dict
 
-  def _set_metric_attributes(self, outputs, skip_target_indices=None):
+  def _set_metric_attributes(self, skip_target_indices=None):
     """Sets the metric attributes on the model for all the model outputs."""
+    # Add loss metric names to the model metric names list.
+    if len(self.outputs) > 1:
+      output_names = [
+          self.output_names[i] + '_loss'
+          for i in range(len(self.outputs))
+          if i not in skip_target_indices
+      ]
+      self._compile_metrics_names.extend(output_names)
+
     skip_target_indices = skip_target_indices or []
     updated_per_output_metrics = []
     updated_per_output_weighted_metrics = []
-    for i in range(len(outputs)):
+    for i in range(len(self.outputs)):
       if i in skip_target_indices:
         updated_per_output_metrics.append(self._per_output_metrics[i])
         updated_per_output_weighted_metrics.append(
@@ -1919,11 +1928,29 @@
           self._set_per_output_metric_attributes(
               self._per_output_weighted_metrics[i], i))
 
+    # Create a metric wrapper for each output loss.
+    if len(self.outputs) > 1:
+      self._output_loss_metrics = [
+          metrics_module.SumOverBatchSize() if hasattr(loss_fn, 'reduction')
+          else metrics_module.SumOverBatchSizeMetricWrapper(loss_fn)
+          for loss_fn in self.loss_functions
+      ]
+
     self._per_output_metrics = updated_per_output_metrics
     self._per_output_weighted_metrics = updated_per_output_weighted_metrics
 
-  def _call_metric_fn(self, fn, y_true, y_pred, weights, mask):
+  def _call_metric_fn(self, metric_fn, y_true, y_pred, weights, mask=None):
+    # TODO(b/120571621): Remove this function when the bug is fixed.
     """Helper function to call metric function with distribution strategy."""
+    return self._call_fn_for_each_replica(
+        training_utils.call_metric_function,
+        metric_fn,
+        y_true,
+        y_pred,
+        weights=weights,
+        mask=mask)
+
+  def _call_fn_for_each_replica(self, fn, *args, **kwargs):
     # TODO(b/120571621): We want to avoid metric reductions here since
     # since TPUStrategy does not implement replica local variables.
     # Remove this hack once we support TPUReplicaLocalVariables.
@@ -1933,10 +1960,8 @@
         distribution_strategy_context.in_cross_replica_context()):
       with self._distribution_strategy.scope():
         return self._distribution_strategy.extended.call_for_each_replica(
-            training_utils.call_metric_function,
-            (fn, y_true, y_pred, weights, mask))
-    return training_utils.call_metric_function(
-        fn, y_true, y_pred, weights=weights, mask=mask)
+            fn, args, kwargs)
+    return fn(*args, **kwargs)
 
   def _handle_per_output_metrics(self,
                                  metrics_dict,
@@ -2071,7 +2096,7 @@
           ' trainable weights, did you set `model.trainable`'
           ' without calling `model.compile` after ?', 1)
 
-  def _make_train_function_helper(self, fn_name, outputs, metric_updates=None):
+  def _make_train_function_helper(self, fn_name, outputs):
     if not self._is_compiled:
       raise RuntimeError('You must compile your model before using it.')
     self._check_trainable_weights_consistency()
@@ -2092,9 +2117,6 @@
       updates += self.get_updates_for(None)
       # Conditional updates relevant to this model
       updates += self.get_updates_for(self.inputs)
-      # Add stateful metrics updates.
-      if metric_updates is not None:
-        updates += metric_updates
 
       with K.name_scope('training'):
         # Gets loss and metrics. Updates weights at each call.
@@ -2120,7 +2142,7 @@
     self._make_train_function_helper(
         '_fit_function', [self.total_loss] + metrics_tensors)
 
-  def _make_test_function_helper(self, fn_name, outputs, metric_updates=None):
+  def _make_test_function_helper(self, fn_name, outputs):
     if not self._is_compiled:
       raise RuntimeError('You must compile your model before using it.')
     if getattr(self, fn_name) is None:
@@ -2130,9 +2152,6 @@
 
       with K.name_scope('evaluation'):
         updates = self.state_updates
-        # Add stateful metrics updates.
-        if metric_updates is not None:
-          updates += metric_updates
         # Return loss and metrics, no gradient updates.
         # Does update the network states.
         fn = K.function(
@@ -2190,9 +2209,6 @@
                                           sample_weight=None,
                                           class_weight=None,
                                           batch_size=None,
-                                          check_steps=False,
-                                          steps_name='steps',
-                                          steps=None,
                                           validation_split=0,
                                           shuffle=False,
                                           repeat=True,
@@ -2212,11 +2228,6 @@
         to, as conveyed by `y`.
       batch_size: Integer batch size. If provided, it is used to run additional
         validation checks on stateful models.
-      check_steps: boolean, True if we want to check for validity of `steps` and
-        False, otherwise.
-      steps_name: The public API's parameter name for `steps`.
-      steps: Integer or `None`. Total number of steps (batches of samples) to
-        execute.
       validation_split: Float between 0 and 1.
         Fraction of the training data to be used as validation data.
       shuffle: Boolean whether to shuffle the training data before each epoch.
@@ -2265,18 +2276,15 @@
       if shuffle:
         training_utils.verify_dataset_shuffled(x)
 
-      if check_steps and steps is None:
-        raise ValueError('When using Datasets as input, '
-                         'you should specify the `{steps_name}` argument.'
-                         .format(steps_name=steps_name))
-
-    if ops.executing_eagerly_outside_functions():
-      session = None
-    else:
-      session = K.get_session()
-
     strategy = self._distribution_strategy
     with strategy.scope():
+      # We should be sure to call get_session() inside the strategy.scope()
+      # so the strategy can affect the session options.
+      if ops.executing_eagerly_outside_functions():
+        session = None
+      else:
+        session = K.get_session()
+
       first_x_value = nest.flatten(x)[0]
       if isinstance(first_x_value, np.ndarray):
         x = distributed_training_utils.list_to_tuple(x)
@@ -2655,7 +2663,7 @@
           'However we received `validation_data=%s`' % validation_data)
     return val_x, val_y, val_sample_weight
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def _set_inputs(self, inputs, outputs=None, training=None):
     """Set model's input and output specs based on the input data received.
 

diff --git a/tensorflow/python/keras/engine/training_arrays.py b/tensorflow/python/keras/engine/training_arrays.py
index 68b8f61..e609f5d 100644
--- a/tensorflow/python/keras/engine/training_arrays.py
+++ b/tensorflow/python/keras/engine/training_arrays.py

@@ -33,8 +33,8 @@
 from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.utils.generic_utils import make_batches
 from tensorflow.python.keras.utils.generic_utils import slice_arrays
+from tensorflow.python.keras.utils.mode_keys import ModeKeys
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.mode_keys import ModeKeys
 
 try:
   from scipy.sparse import issparse  # pylint: disable=g-import-not-at-top
@@ -252,8 +252,28 @@
           actual_inputs = ins() if callable(ins) else ins
           batch_outs = f(actual_inputs)
         except errors.OutOfRangeError:
-          if not is_dataset:
+          if is_dataset:
+            # The dataset passed by the user ran out of batches.
+            # Now we know the cardinality of the dataset.
+            # If steps_per_epoch was specified, then running out of data is
+            # unexpected, so we stop training and inform the user.
+            if steps_per_epoch:
+              callbacks.model.stop_training = True
+              logging.warning(
+                  'Your dataset ran out of data; interrupting training. '
+                  'Make sure that your dataset can generate at least '
+                  '`%s * epochs` batches (in this case, %d batches). '
+                  'You may need to use the repeat() function when '
+                  'building your dataset.'
+                  % (steps_name, steps_per_epoch * epochs))
+            elif step > 0:
+              steps_per_epoch = step
+              aggregator.num_samples_or_steps = steps_per_epoch
+              progbar.params['steps'] = steps_per_epoch
+              progbar.progbar.target = steps_per_epoch
+          else:
             # We ran out of batches while the user passed an iterator (legacy).
+            callbacks.model.stop_training = True
             logging.warning(
                 'Your dataset iterator ran out of data; '
                 'interrupting training. Make sure that your iterator '
@@ -261,15 +281,6 @@
                 'batches (in this case, %d batches). You may need to'
                 'use the repeat() function when building your '
                 'dataset.' % (steps_name, steps_per_epoch * epochs))
-            callbacks.model.stop_training = True
-          else:
-            # The dataset passed by the user ran out of batches.
-            # Now we know the cardinality of the dataset.
-            if step > 0:
-              steps_per_epoch = step
-              aggregator.num_samples_or_steps = steps_per_epoch
-              progbar.params['steps'] = steps_per_epoch
-              progbar.progbar.target = steps_per_epoch
           break
 
         if not isinstance(batch_outs, list):

diff --git a/tensorflow/python/keras/engine/training_dataset_test.py b/tensorflow/python/keras/engine/training_dataset_test.py
index 4d2d68c..bc37b08 100644
--- a/tensorflow/python/keras/engine/training_dataset_test.py
+++ b/tensorflow/python/keras/engine/training_dataset_test.py

@@ -395,8 +395,7 @@
   @keras_parameterized.run_all_keras_modes
   def test_finite_dataset_known_cardinality_no_steps_arg(self):
     model = testing_utils.get_small_mlp(1, 4, input_dim=3)
-    optimizer = 'rmsprop'
-    model.compile(optimizer, 'mse',
+    model.compile('rmsprop', 'mse',
                   run_eagerly=testing_utils.should_run_eagerly())
 
     inputs = np.zeros((100, 3), dtype=np.float32)
@@ -407,7 +406,7 @@
     batch_counter = BatchCounterCallback()
     history = model.fit(dataset, epochs=2, verbose=1, callbacks=[batch_counter])
 
-    self.assertEqual(len(history.history['loss']), 2)
+    self.assertLen(history.history['loss'], 2)
     self.assertEqual(batch_counter.batch_count, 20)
     model.evaluate(dataset)
     out = model.predict(dataset)
@@ -417,8 +416,7 @@
   @keras_parameterized.run_all_keras_modes
   def test_finite_dataset_unknown_cardinality_no_steps_arg(self):
     model = testing_utils.get_small_mlp(1, 4, input_dim=3)
-    optimizer = 'rmsprop'
-    model.compile(optimizer, 'mse',
+    model.compile('rmsprop', 'mse',
                   run_eagerly=testing_utils.should_run_eagerly())
 
     inputs = np.zeros((100, 3), dtype=np.float32)
@@ -431,12 +429,50 @@
     batch_counter = BatchCounterCallback()
     history = model.fit(dataset, epochs=2, verbose=1, callbacks=[batch_counter])
 
-    self.assertEqual(len(history.history['loss']), 2)
+    self.assertLen(history.history['loss'], 2)
     self.assertEqual(batch_counter.batch_count, 20)
     model.evaluate(dataset)
     out = model.predict(dataset)
     self.assertEqual(out.shape[0], 100)
 
+  @keras_parameterized.run_with_all_model_types
+  @keras_parameterized.run_all_keras_modes
+  def test_finite_dataset_unknown_cardinality_out_of_data(self):
+    model = testing_utils.get_small_mlp(1, 4, input_dim=3)
+    model.compile('rmsprop', 'mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
+
+    inputs = np.zeros((100, 3), dtype=np.float32)
+    targets = np.random.randint(0, 4, size=100, dtype=np.int32)
+    dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
+    dataset = dataset.filter(lambda x, y: True).batch(10)
+    self.assertEqual(
+        keras.backend.get_value(cardinality.cardinality(dataset)),
+        cardinality.UNKNOWN)
+
+    batch_counter = BatchCounterCallback()
+    with test.mock.patch.object(logging, 'warning') as mock_log:
+      # steps_per_epoch (200) is greater than the dataset size (100). As this is
+      # unexpected, training will stop and not make it to the second epoch.
+      history = model.fit(
+          dataset,
+          epochs=2,
+          verbose=1,
+          callbacks=[batch_counter],
+          steps_per_epoch=200)
+      self.assertIn(
+          'Your dataset ran out of data; interrupting training. '
+          'Make sure that your dataset can generate at least '
+          '`steps_per_epoch * epochs` batches (in this case, 400 batches). '
+          'You may need to use the repeat() function when '
+          'building your dataset.', str(mock_log.call_args))
+
+    self.assertLen(history.history['loss'], 1)
+    self.assertEqual(batch_counter.batch_count, 10)
+    model.evaluate(dataset)
+    out = model.predict(dataset)
+    self.assertEqual(out.shape[0], 100)
+
 
 class TestMetricsWithDatasetIterators(keras_parameterized.TestCase):
 

diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 4a2a337..3df58af 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py

@@ -22,7 +22,6 @@
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import batching
-from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.distribute import input_lib
 from tensorflow.python.distribute import reduce_util as ds_reduce_util
 from tensorflow.python.framework import constant_op
@@ -35,9 +34,9 @@
 from tensorflow.python.keras.engine import training_arrays
 from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.utils.generic_utils import Progbar
+from tensorflow.python.keras.utils.mode_keys import ModeKeys
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.mode_keys import ModeKeys
 from tensorflow.python.util import nest
 
 
@@ -73,18 +72,11 @@
             batch_size, mode=ModeKeys.TRAIN))
   batch_size = model._validate_or_infer_batch_size(
       batch_size, steps_per_epoch, x)
-  steps_name = 'steps_per_epoch'
-  if isinstance(x, dataset_ops.DatasetV2):
-    steps_per_epoch = training_utils.infer_steps_for_dataset(
-        x, steps_per_epoch, steps_name=steps_name)
   dataset = model._distribution_standardize_user_data(
       x, y,
       sample_weight=sample_weight,
       class_weight=class_weight,
       batch_size=batch_size,
-      check_steps=True,
-      steps_name=steps_name,
-      steps=steps_per_epoch,
       validation_split=validation_split,
       shuffle=shuffle)
 
@@ -99,18 +91,11 @@
       validation_steps, _ = distributed_training_utils.get_input_params(
           model._distribution_strategy, first_valx_value, validation_steps,
           batch_size)
-    steps_name = 'validation_steps'
-    if isinstance(val_x, dataset_ops.DatasetV2):
-      validation_steps = training_utils.infer_steps_for_dataset(
-          val_x, validation_steps, steps_name=steps_name)
     val_dataset = model._distribution_standardize_user_data(
         val_x, val_y,
         sample_weight=val_sample_weights,
         class_weight=None,
         batch_size=batch_size,
-        check_steps=True,
-        steps_name=steps_name,
-        steps=validation_steps,
         validation_split=validation_split,
         shuffle=shuffle)
   elif validation_split:
@@ -128,7 +113,7 @@
         initial_epoch=initial_epoch,
         steps_per_epoch=steps_per_epoch,
         validation_steps=validation_steps,
-        validation_freq=1)
+        validation_freq=validation_freq)
   else:
     return training_arrays.fit_loop(
         model,
@@ -142,7 +127,8 @@
         initial_epoch=initial_epoch,
         steps_per_epoch=steps_per_epoch,
         validation_steps=validation_steps,
-        validation_freq=validation_freq)
+        validation_freq=validation_freq,
+        steps_name='steps_per_epoch')
 
 
 def evaluate_distributed(model,
@@ -160,18 +146,10 @@
     steps, batch_size = distributed_training_utils.get_input_params(
         model._distribution_strategy, first_x_value, steps, batch_size)
   batch_size = model._validate_or_infer_batch_size(batch_size, steps, x)
-  steps_name = 'steps'
-
-  if isinstance(x, dataset_ops.DatasetV2):
-    steps = training_utils.infer_steps_for_dataset(x, steps,
-                                                   steps_name=steps_name)
   dataset = model._distribution_standardize_user_data(
       x, y,
       sample_weight=sample_weight,
-      batch_size=batch_size,
-      check_steps=True,
-      steps_name=steps_name,
-      steps=steps)
+      batch_size=batch_size)
 
   if distributed_training_utils.is_tpu_strategy(model._distribution_strategy):
     return experimental_tpu_test_loop(
@@ -201,16 +179,9 @@
         model._distribution_strategy, first_x_value, steps,
         batch_size, mode=ModeKeys.PREDICT)
   batch_size = model._validate_or_infer_batch_size(batch_size, steps, x)
-  steps_name = 'steps'
-  if isinstance(x, dataset_ops.DatasetV2):
-    steps = training_utils.infer_steps_for_dataset(x, steps,
-                                                   steps_name=steps_name)
   dataset = model._distribution_standardize_user_data(
       x,
       batch_size=batch_size,
-      check_steps=True,
-      steps_name=steps_name,
-      steps=steps,
       repeat=False,
       allow_partial_batch=True)
   if distributed_training_utils.is_tpu_strategy(model._distribution_strategy):
@@ -271,6 +242,13 @@
   # TODO(fchollet): add support for `steps_per_epoch=None` in TPU loops.
   current_strategy = model._distribution_strategy
   iterator = distributed_training_utils.get_iterator(dataset, current_strategy)
+  steps_per_epoch = training_utils.infer_steps_for_dataset(
+      dataset, steps_per_epoch, epochs, steps_name='steps_per_epoch')
+  if (current_strategy.extended.steps_per_run != 1 and
+      steps_per_epoch is None):
+    raise ValueError('`steps_per_epoch` should be specified when calling '
+                     '`fit` on the model with TPUStrategy when '
+                     '`steps_per_run` != 1 .')
 
   scope = distributed_training_utils.distributed_scope(
       strategy=current_strategy, learning_phase=1)
@@ -330,18 +308,20 @@
     tensor = model._all_stateful_metrics_tensors[name]
     initial_loop_values[name] = array_ops.zeros(tensor.shape, tensor.dtype)
 
-  if steps_per_epoch is None:
-    raise ValueError('`steps_per_epoch` should be specified when calling '
-                     '`fit` on the model.')
+  use_steps = steps_per_epoch is not None
+  if use_steps:
+    iteration_value = min(steps_per_epoch,
+                          current_strategy.extended.steps_per_run)
+  else:
+    iteration_value = current_strategy.extended.steps_per_run
+
   steps_per_run = K.variable(
-      value=min(steps_per_epoch, current_strategy.extended.steps_per_run),
+      value=iteration_value,
       dtype='int32',
       name='steps_per_run')
-
   ctx = current_strategy.extended.experimental_run_steps_on_iterator(
       step_fn, iterator, iterations=steps_per_run,
       initial_loop_values=initial_loop_values)
-
   train_op = ctx.run_op
   output_tensors = ctx.last_step_outputs
 
@@ -361,11 +341,16 @@
       mode=mode)
 
   # Calculate the steps each time on the device.
-  steps_to_run = [current_strategy.extended.steps_per_run] * (
-      steps_per_epoch // current_strategy.extended.steps_per_run)
-  if steps_per_epoch % current_strategy.extended.steps_per_run:
-    steps_to_run.append(
-        steps_per_epoch % current_strategy.extended.steps_per_run)
+  if use_steps:
+    steps_to_run = ([current_strategy.extended.steps_per_run] *
+                    (steps_per_epoch //
+                     current_strategy.extended.steps_per_run))
+    if steps_per_epoch % current_strategy.extended.steps_per_run:
+      steps_to_run.append(
+          steps_per_epoch % current_strategy.extended.steps_per_run)
+    target_steps = len(steps_to_run)
+  else:
+    target_steps = np.inf
 
   callbacks._call_begin_hook(mode)
   for epoch in range(initial_epoch, epochs):
@@ -374,25 +359,36 @@
     epoch_logs = {}
     step_index = 0
     prev_step_count = None
-    for step_count in steps_to_run:
+    current_step = 0
+    while current_step < target_steps:
+      step_count = steps_to_run[current_step] if use_steps else 1
       batch_logs = {'batch': step_index, 'size': 1, 'num_steps': step_count}
       callbacks._call_batch_hook(mode, 'begin', step_index, batch_logs)
       if prev_step_count is None or step_count != prev_step_count:
         steps_per_run.load(step_count, K.get_session())
         prev_step_count = step_count
       try:
-        _, outputs = K.get_session().run([train_op, output_tensors])
+        _, outputs = K.batch_get_value([train_op, output_tensors])
       except errors.OutOfRangeError:
-        logging.warning('Your dataset iterator ran out of data; '
-                        'interrupting training. Make sure that your dataset '
-                        'can generate at least `steps_per_epoch * epochs` '
-                        'batches (in this case, %d batches).' %
-                        steps_per_epoch * epochs)
+        if use_steps:
+          logging.warning('Your dataset iterator ran out of data; '
+                          'interrupting training. Make sure that your dataset '
+                          'can generate at least `steps_per_epoch * epochs` '
+                          'batches (in this case, %d batches).' %
+                          steps_per_epoch * epochs)
+        else:
+          target_steps = current_step
+          logging.info('Dataset iterator ran out of data. Inferring the '
+                       'value of `steps_per_epoch` as %s  .' % target_steps)
+          distributed_training_utils.initialize_iterator(iterator,
+                                                         current_strategy)
         break
 
       batch_logs.update(outputs)
       callbacks._call_batch_hook(mode, 'end', step_index, batch_logs)
       step_index = step_index + step_count
+      current_step += 1
+
       if callbacks.model.stop_training:
         break
 
@@ -455,7 +451,11 @@
   """
   mode = ModeKeys.TEST
   current_strategy = model._distribution_strategy
-  iterator = distributed_training_utils.get_iterator(dataset, current_strategy)
+  iterator = distributed_training_utils.get_iterator(dataset,
+                                                     current_strategy)
+  steps = training_utils.infer_steps_for_dataset(dataset, steps,
+                                                 steps_name='steps')
+
   scope = distributed_training_utils.distributed_scope(
       strategy=current_strategy, learning_phase=0)
   scope.__enter__()
@@ -539,12 +539,29 @@
       mode=ModeKeys.TEST)
   callbacks._call_begin_hook(mode)
 
-  assert steps is not None
   outs = [0.] * len(model.metrics_names)
-  for step in range(steps):
-    batch_logs = {'batch': step, 'size': 1}
-    callbacks._call_batch_hook(mode, 'begin', step, batch_logs)
-    _, batch_outs = K.get_session().run([test_op, output_tensors])
+  if steps is not None:
+    target_steps = steps
+  else:
+    target_steps = np.inf
+
+  current_step = 0
+  while current_step < target_steps:
+    batch_logs = {'batch': current_step, 'size': 1}
+    callbacks._call_batch_hook(mode, 'begin', current_step, batch_logs)
+    try:
+      _, batch_outs = K.batch_get_value([test_op, output_tensors])
+    except errors.OutOfRangeError:
+      if steps is not None:
+        warning_msg = 'Make sure that your dataset can generate at least '
+        '`steps` batches (in this case, {} batches).'.format(steps)
+      else:
+        warning_msg = 'Number of steps ran: {} steps'.format(current_step)
+
+      logging.warning('Your dataset iterator ran out of data; '
+                      'interrupting evaluation. ' + warning_msg)
+      target_steps = current_step
+      break
     for i, label in enumerate(model.metrics_names):
       if i == 0:
         # Loss is stateless metrics.
@@ -554,15 +571,16 @@
         outs[i] = batch_outs[label]
 
     batch_logs = cbks.make_logs(model, batch_logs, outs, mode)
-    callbacks._call_batch_hook(mode, 'end', step, batch_logs)
+    callbacks._call_batch_hook(mode, 'end', current_step, batch_logs)
     if verbose >= 1:
-      progbar.update(step + 1)
+      progbar.update(current_step + 1)
+    current_step += 1
 
   callbacks._call_end_hook(mode)
 
   scope.__exit__(None, None, None)
   if len(outs) >= 0:
-    outs[0] /= (steps)
+    outs[0] /= (target_steps)
 
   if len(outs) == 1:
     return outs[0]
@@ -591,6 +609,8 @@
       (if the model has multiple outputs).
   """
   mode = ModeKeys.PREDICT
+  steps = training_utils.infer_steps_for_dataset(dataset, steps,
+                                                 steps_name='steps')
   dataset_fully_shaped = (distributed_training_utils.
                           is_dataset_shape_fully_defined(dataset))
   padding_handler = None
@@ -697,22 +717,40 @@
       mode=mode)
   callbacks._call_begin_hook(mode)
 
-  assert steps is not None
   # Since we do not know how many samples we will see, we cannot pre-allocate
   # the returned Numpy arrays. Instead, we store one array per batch seen
   # and concatenate them upon returning.
   unconcatenated_outs = [[] for _ in model.outputs]
-  for step in range(steps):
-    batch_logs = {'batch': step, 'size': 1}
-    callbacks._call_batch_hook(mode, 'begin', step, batch_logs)
-    _, batch_outs = K.get_session().run([predict_op, output_tensors])
+  if steps is not None:
+    target_steps = steps
+  else:
+    target_steps = np.inf
+
+  current_step = 0
+  while current_step < target_steps:
+    batch_logs = {'batch': current_step, 'size': 1}
+    callbacks._call_batch_hook(mode, 'begin', current_step, batch_logs)
+    try:
+      _, batch_outs = K.batch_get_value([predict_op, output_tensors])
+    except errors.OutOfRangeError:
+      if steps is not None:
+        warning_msg = 'Make sure that your dataset can generate at least '
+        '`steps` batches (in this case, {} batches).'.format(steps)
+      else:
+        warning_msg = 'Number of steps ran: {} steps'.format(current_step)
+
+      logging.warning('Your dataset iterator ran out of data; '
+                      'interrupting evaluation. ' + warning_msg)
+      break
+
     # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy.
     for i, label in enumerate(model.output_names):
       unconcatenated_outs[i].extend(batch_outs[label])
     batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
-    callbacks._call_batch_hook(mode, 'end', step, batch_logs)
+    callbacks._call_batch_hook(mode, 'end', current_step, batch_logs)
     if verbose >= 1:
-      progbar.update(step + 1)
+      progbar.update(current_step + 1)
+    current_step += 1
 
   callbacks._call_end_hook(mode)
 

diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py
index 7f80f94..6fdb19c 100644
--- a/tensorflow/python/keras/engine/training_eager.py
+++ b/tensorflow/python/keras/engine/training_eager.py

@@ -28,7 +28,7 @@
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.engine import training_utils
-from tensorflow.python.keras.utils.losses_utils import squeeze_or_expand_dimensions
+from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
@@ -96,6 +96,10 @@
      regularization losses and applies masking and sample weighting
      to the loss value.
   """
+  # Used to keep track of the total loss value (stateless).
+  # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
+  #                   loss_weight_2 * output_2_loss_fn(...) +
+  #                   layer losses.
   total_loss = 0
   kwargs = {}
   if model._expects_training_arg:
@@ -116,14 +120,14 @@
   masks = [t._keras_mask for t in outs]
   targets = nest.flatten(targets)
 
-  loss_metrics = []
-  aggregated_loss_metrics = []
+  # Used to keep track of individual output losses (stateless).
+  output_losses = []
+  # Used to keep track of individual output losses (stateful).
+  aggregated_output_losses = []
+
   with backend.name_scope('loss'):
     for i, loss_fn in enumerate(model.loss_functions):
-      if sample_weights:
-        weights = sample_weights[i]
-      else:
-        weights = None
+      weights = sample_weights[i] if sample_weights else None
       mask = masks[i]
       with backend.name_scope(model.output_names[i] + '_loss'):
         if mask is not None:
@@ -133,26 +137,47 @@
             weights = mask
           else:
             # Update dimensions of weights to match with mask if possible.
-            mask, _, weights = squeeze_or_expand_dimensions(mask, None, weights)
+            mask, _, weights = (
+                losses_utils.squeeze_or_expand_dimensions(mask, None, weights))
             weights *= mask
-        output_loss = loss_fn(targets[i], outs[i], sample_weight=weights)
+
+        # Reset reduction on the loss so that we can get the per sample loss
+        # value. We use this to get both the stateless and stateful loss
+        # values without having to compute the underlying loss function
+        # twice.
+        weighted_losses = None
+        if hasattr(loss_fn, 'reduction'):
+          current_loss_reduction = loss_fn.reduction
+          loss_fn.reduction = losses_utils.ReductionV2.NONE
+          weighted_losses = loss_fn(targets[i], outs[i], sample_weight=weights)
+          loss_fn.reduction = current_loss_reduction
+
+          # Compute the stateless loss value.
+          output_loss = losses_utils.reduce_weighted_loss(weighted_losses)
+        else:
+          # Compute the stateless loss value for a custom loss class.
+          # Here we assume that the class takes care of loss reduction
+          # because if this class returns a vector value we cannot
+          # differentiate between use case where a custom optimizer
+          # expects a vector loss value vs unreduced per-sample loss value.
+          output_loss = loss_fn(targets[i], outs[i], sample_weight=weights)
 
       # If the number of outputs is 1 then we don't append the loss metric
       # associated with each model output. When there are multiple outputs
       # associated with a model, each output's loss is calculated and returned
       # as part of the loss_metrics.
       if len(model.outputs) > 1:
-        loss_metrics.append(backend.mean(output_loss))
-
+        output_losses.append(backend.mean(output_loss))
         if output_loss_metrics is not None:
-          # Keep track of the stateful loss result.
-          aggregated_loss_metrics.append(
-              training_utils.call_metric_function(
-                  output_loss_metrics[i],
-                  targets[i],
-                  outs[i],
-                  weights=weights,
-                  mask=mask))
+          # Compute the stateful loss value.
+          if weighted_losses is not None:
+            aggregated_output_loss = output_loss_metrics[i](weighted_losses)
+          else:
+            # Custom loss class.
+            aggregated_output_loss = training_utils.call_metric_function(
+                output_loss_metrics[i], targets[i], outs[i], weights=weights)
+          # Keep track of the stateful output loss result.
+          aggregated_output_losses.append(aggregated_output_loss)
 
       loss_weight = model.loss_weights_list[i]
       if total_loss is None:
@@ -164,10 +189,11 @@
     # Add regularization losses
     custom_losses = model.losses
     if custom_losses:
-      total_loss += math_ops.add_n(custom_losses)
+      total_loss += losses_utils.scale_loss_for_distribution(
+          math_ops.add_n(custom_losses))
     model._clear_losses()
 
-  return outs, total_loss, loss_metrics, aggregated_loss_metrics, masks
+  return outs, total_loss, output_losses, aggregated_output_losses, masks
 
 
 def _process_single_batch(model,
@@ -200,15 +226,15 @@
   """
   with backend.eager_learning_phase_scope(1 if training else 0):
     with GradientTape() as tape:
-      outs, loss, loss_metrics, aggregated_loss_metrics, masks\
-        = _model_loss(
-            model,
-            inputs,
-            targets,
-            output_loss_metrics=output_loss_metrics,
-            sample_weights=sample_weights,
-            training=training)
-      if loss is None:
+      outs, total_loss, output_losses, aggregated_output_losses, masks = (
+          _model_loss(
+              model,
+              inputs,
+              targets,
+              output_loss_metrics=output_loss_metrics,
+              sample_weights=sample_weights,
+              training=training))
+      if total_loss is None:
         raise ValueError('The model cannot be run '
                          'because it has no loss to optimize.')
     if training:
@@ -217,13 +243,18 @@
                         ' you are not setting model.trainable to False before '
                         'compiling the model.')
       else:
-        grads = tape.gradient(loss, model._collected_trainable_weights)
+        grads = tape.gradient(total_loss, model._collected_trainable_weights)
         model.optimizer.apply_gradients(zip(grads,
                                             model._collected_trainable_weights))
-    return outs, loss, loss_metrics, aggregated_loss_metrics, masks
+    return outs, total_loss, output_losses, aggregated_output_losses, masks
 
 
-def train_on_batch(model, inputs, targets, sample_weights=None):
+def train_on_batch(model,
+                   inputs,
+                   targets,
+                   sample_weights=None,
+                   reset_metrics=True,
+                   output_loss_metrics=None):
   """Calculates the loss and gradient updates for one input batch.
 
   Arguments:
@@ -231,6 +262,11 @@
       inputs: Input batch data.
       targets: Target batch data.
       sample_weights: Sample weight batch data.
+      reset_metrics: If `True`, the metrics returned will be only for this
+        batch. If `False`, the metrics will be statefully accumulated across
+        batches.
+      output_loss_metrics: List of metrics that are used to aggregated output
+        loss values.
 
   Returns:
       total loss and the loss associated with each output.
@@ -240,20 +276,24 @@
       inputs = training_utils.cast_if_floating_dtype(inputs)
       targets = training_utils.cast_if_floating_dtype(targets)
     else:
-      inputs = training_utils.cast_if_floating_dtype([
-          ops.convert_to_tensor(val) for val in inputs
-      ])
-      targets = training_utils.cast_if_floating_dtype([
-          ops.convert_to_tensor(val) for val in targets
-      ])
+      inputs = training_utils.cast_if_floating_dtype(
+          [ops.convert_to_tensor(val) for val in inputs])
+      targets = training_utils.cast_if_floating_dtype(
+          [ops.convert_to_tensor(val) for val in targets])
   if sample_weights:
     sample_weights = [
         training_utils.cast_if_floating_dtype(ops.convert_to_tensor(val))
         if val is not None else None for val in sample_weights
     ]
 
-  outs, loss, loss_metrics, _, masks = _process_single_batch(
-      model, inputs, targets, sample_weights=sample_weights, training=True)
+  outs, total_loss, output_losses, aggregated_output_losses, masks = (
+      _process_single_batch(
+          model,
+          inputs,
+          targets,
+          sample_weights=sample_weights,
+          training=True,
+          output_loss_metrics=output_loss_metrics))
   if not isinstance(outs, list):
     outs = [outs]
   metrics_results = _eager_metrics_fn(
@@ -262,16 +302,23 @@
       targets,
       sample_weights=sample_weights,
       masks=masks,
-      return_stateful_result=True)
-  loss = nest.flatten(loss)
+      return_stateful_result=not reset_metrics)
+  total_loss = nest.flatten(total_loss)
+  if reset_metrics:
+    final_output_losses = output_losses
+  else:
+    final_output_losses = aggregated_output_losses
+  results = total_loss + final_output_losses + metrics_results
 
-  return [
-      tensor_util.constant_value(v)
-      for v in loss + loss_metrics + metrics_results
-  ]
+  return [tensor_util.constant_value(v) for v in results]
 
 
-def test_on_batch(model, inputs, targets, sample_weights=None):
+def test_on_batch(model,
+                  inputs,
+                  targets,
+                  sample_weights=None,
+                  reset_metrics=True,
+                  output_loss_metrics=None):
   """Calculates the loss for one input batch.
 
   Arguments:
@@ -279,6 +326,11 @@
       inputs: Input batch data.
       targets: Target batch data.
       sample_weights: Sample weight batch data.
+      reset_metrics: If `True`, the metrics returned will be only for this
+        batch. If `False`, the metrics will be statefully accumulated across
+        batches.
+      output_loss_metrics: List of metrics that are used to aggregated output
+        loss values.
 
   Returns:
       total loss, loss and metrics associated with each output.
@@ -288,19 +340,23 @@
       inputs = training_utils.cast_if_floating_dtype(inputs)
       targets = training_utils.cast_if_floating_dtype(targets)
     else:
-      inputs = training_utils.cast_if_floating_dtype([
-          ops.convert_to_tensor(val) for val in inputs
-      ])
-      targets = training_utils.cast_if_floating_dtype([
-          ops.convert_to_tensor(val) for val in targets
-      ])
+      inputs = training_utils.cast_if_floating_dtype(
+          [ops.convert_to_tensor(val) for val in inputs])
+      targets = training_utils.cast_if_floating_dtype(
+          [ops.convert_to_tensor(val) for val in targets])
   if sample_weights:
     sample_weights = [
         training_utils.cast_if_floating_dtype(ops.convert_to_tensor(val))
         if val is not None else None for val in sample_weights
     ]
-  outs, loss, loss_metrics, _, masks = _model_loss(
-      model, inputs, targets, sample_weights=sample_weights, training=False)
+  outs, total_loss, output_losses, aggregated_output_losses, masks = (
+      _model_loss(
+          model,
+          inputs,
+          targets,
+          sample_weights=sample_weights,
+          training=False,
+          output_loss_metrics=output_loss_metrics))
   if not isinstance(outs, list):
     outs = [outs]
   metrics_results = _eager_metrics_fn(
@@ -309,10 +365,12 @@
       targets,
       sample_weights=sample_weights,
       masks=masks,
-      return_stateful_result=True)
-  loss = nest.flatten(loss)
+      return_stateful_result=not reset_metrics)
+  total_loss = nest.flatten(total_loss)
+  if reset_metrics:
+    final_output_losses = output_losses
+  else:
+    final_output_losses = aggregated_output_losses
+  results = total_loss + final_output_losses + metrics_results
 
-  return [
-      tensor_util.constant_value(v)
-      for v in loss + loss_metrics + metrics_results
-  ]
+  return [tensor_util.constant_value(v) for v in results]

diff --git a/tensorflow/python/keras/engine/training_generator.py b/tensorflow/python/keras/engine/training_generator.py
index 7c63161..a9fdb07 100644
--- a/tensorflow/python/keras/engine/training_generator.py
+++ b/tensorflow/python/keras/engine/training_generator.py

@@ -33,8 +33,8 @@
 from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.utils import data_utils
 from tensorflow.python.keras.utils import generic_utils
+from tensorflow.python.keras.utils.mode_keys import ModeKeys
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.mode_keys import ModeKeys
 from tensorflow.python.util import nest
 
 
@@ -212,8 +212,28 @@
     while step < target_steps:
       batch_data = _get_next_batch(generator, mode)
       if batch_data is None:
-        if not is_dataset:
+        if is_dataset:
+          # The dataset passed by the user ran out of batches.
+          # Now we know the cardinality of the dataset.
+          # If steps_per_epoch was specified, then running out of data is
+          # unexpected, so we stop training and inform the user.
+          if steps_per_epoch:
+            callbacks.model.stop_training = True
+            logging.warning(
+                'Your dataset ran out of data; interrupting training. '
+                'Make sure that your dataset can generate at least '
+                '`%s * epochs` batches (in this case, %d batches). '
+                'You may need to use the repeat() function when '
+                'building your dataset.'
+                % (steps_name, steps_per_epoch * epochs))
+          elif step > 0:
+            steps_per_epoch = step
+            aggregator.num_samples_or_steps = steps_per_epoch
+            progbar.params['steps'] = steps_per_epoch
+            progbar.progbar.target = steps_per_epoch
+        else:
           # We ran out of batches while the user passed an iterator (legacy).
+          callbacks.model.stop_training = True
           logging.warning(
               'Your dataset iterator ran out of data; '
               'interrupting training. Make sure that your iterator '
@@ -221,16 +241,6 @@
               'batches (in this case, %d batches). You may need to'
               'use the repeat() function when building your '
               'dataset.' % (steps_name, steps_per_epoch * epochs))
-          callbacks.model.stop_training = True
-        else:
-          # The dataset passed by the user ran out of batches.
-          # Now we know the cardinality of the dataset.
-          # assert steps_per_epoch is None
-          if step > 0:
-            steps_per_epoch = step
-            aggregator.num_samples_or_steps = steps_per_epoch
-            progbar.params['steps'] = steps_per_epoch
-            progbar.progbar.target = steps_per_epoch
         break
 
       # `batch_size` used for validation data if validation
@@ -242,13 +252,32 @@
       callbacks._call_batch_hook(mode, 'begin', step, batch_logs)
       progbar.on_batch_begin(step, batch_logs)
 
+      is_deferred = not model._is_compiled
       batch_outs = batch_function(*batch_data)
       if not isinstance(batch_outs, list):
         batch_outs = [batch_outs]
 
-      # Aggregate results.
       if step == 0:
         aggregator.create(batch_outs)
+
+        if is_deferred:
+          # Set callbacks params. We do this here when model is compiled only
+          # in the first iteration of this loop (deferred build scenario).
+          cbks.set_callback_parameters(
+              callbacks,
+              model,
+              do_validation=do_validation,
+              batch_size=batch_size,
+              epochs=epochs,
+              steps_per_epoch=steps_per_epoch,
+              samples=num_samples_or_steps,
+              verbose=verbose,
+              mode=mode)
+
+          progbar.params = callbacks.params
+          progbar.params['verbose'] = verbose
+
+      # Aggregate results.
       aggregator.aggregate(batch_outs)
 
       # Callbacks batch end.

diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 72c4a29..754bc08 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py

@@ -34,6 +34,7 @@
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import losses
 from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.callbacks import Callback
@@ -54,6 +55,129 @@
   scipy_sparse = None
 
 
+class CompileTest(keras_parameterized.TestCase):
+
+  def _get_multi_output_model(self):
+    input_a = keras.layers.Input(shape=(3,), name='input_a')
+    output_a = keras.layers.Dense(1, name='dense_1')(input_a)
+    output_b = keras.layers.Dense(1, name='dense_2')(input_a)
+    return keras.models.Model(input_a, [output_a, output_b])
+
+  def _do_test_compile_with_model_and_single_loss(self, model, loss):
+    model.compile(optimizer='adam', loss=loss)
+    self.assertEqual(model.loss, loss)
+
+    loss = losses.get(loss)
+    if not isinstance(loss, list):
+      loss_list = [loss] * len(model.outputs)
+
+    self.assertEqual(len(model.loss_functions), len(loss_list))
+    for i in range(len(loss_list)):
+      self.assertIsInstance(model.loss_functions[i], losses.LossFunctionWrapper)
+      if not isinstance(loss_list[i], losses.LossFunctionWrapper):
+        self.assertEqual(model.loss_functions[i].fn, loss_list[i])
+    self.assertAllEqual(model.loss_weights_list, [1.] * len(loss_list))
+
+  @keras_parameterized.run_all_keras_modes
+  @parameterized.named_parameters(('loss_string', 'mse'),
+                                  ('loss_function', losses.mean_squared_error),
+                                  ('loss_instance', losses.MeanSquaredError()))
+  def test_compile_with_single_output(self, loss):
+    model = testing_utils.get_small_sequential_mlp(
+        num_hidden=10, num_classes=2, input_dim=3)
+    self._do_test_compile_with_model_and_single_loss(model, loss)
+
+  @keras_parameterized.run_all_keras_modes
+  @parameterized.named_parameters(('loss_string', 'mse'),
+                                  ('loss_function', losses.mean_squared_error),
+                                  ('loss_instance', losses.MeanSquaredError()))
+  def test_compile_with_multi_output(self, loss):
+    model = self._get_multi_output_model()
+    self._do_test_compile_with_model_and_single_loss(model, loss)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_compile_with_multi_output_and_multi_loss(self):
+    model = self._get_multi_output_model()
+    # Test loss is a list.
+    loss = ['mse', 'mae']
+    model.compile(optimizer='adam', loss=loss)
+    self.assertEqual(model.loss_functions[0].fn, losses.mean_squared_error)
+    self.assertEqual(model.loss_functions[1].fn, losses.mean_absolute_error)
+    self.assertAllEqual(model.loss_weights_list, [1., 1.])
+
+    # Test loss is a dict.
+    loss = {'dense_1': 'mae', 'dense_2': 'mse'}
+    model.compile(optimizer='adam', loss=loss)
+    self.assertEqual(model.loss_functions[0].fn, losses.mean_absolute_error)
+    self.assertEqual(model.loss_functions[1].fn, losses.mean_squared_error)
+    self.assertAllEqual(model.loss_weights_list, [1., 1.])
+
+  @keras_parameterized.run_all_keras_modes
+  def test_compile_with_multi_output_and_loss_weights_list(self):
+    model = self._get_multi_output_model()
+    loss_weights = [1., 2.]
+    model.compile(optimizer='adam', loss='mse', loss_weights=loss_weights)
+    self.assertAllEqual(model.loss_weights_list, [1., 2.])
+
+  def test_compile_with_multi_output_and_loss_weights_dict(self):
+    with context.graph_mode():
+      model = self._get_multi_output_model()
+      loss_weights = {'dense_1': 1., 'dense_2': 2.}
+      model.compile(optimizer='adam', loss='mse', loss_weights=loss_weights)
+      self.assertAllEqual(model.loss_weights_list, [1., 2.])
+
+      input_np = np.random.random((10, 3))
+      output_a_np = np.random.random((10, 1))
+      output_b_np = np.random.random((10, 1))
+
+      with self.cached_session() as sess:
+        sess.run(variables_lib.global_variables_initializer())
+        total_loss, y_preds = sess.run(
+            [model.total_loss, model.outputs],
+            feed_dict={
+                'input_a:0': input_np,
+                'dense_1_target:0': output_a_np,
+                'dense_2_target:0': output_b_np
+            })
+        self.assertAllClose(
+            total_loss,
+            np.mean(
+                np.add((output_a_np - y_preds[0])**2,
+                       2 * (output_b_np - y_preds[1])**2)))
+
+  @keras_parameterized.run_all_keras_modes
+  def test_compile_with_incorrect_loss_size(self):
+    model = testing_utils.get_small_sequential_mlp(
+        num_hidden=10, num_classes=2, input_dim=3)
+    with self.assertRaisesRegexp(ValueError, 'The model has 1 outputs'):
+      model.compile(optimizer='adam', loss=['mse', 'mae'])
+
+  @keras_parameterized.run_all_keras_modes
+  def test_compile_with_incorrect_loss_key(self):
+    model = testing_utils.get_small_sequential_mlp(
+        num_hidden=10, num_classes=2, input_dim=3)
+    with self.assertRaisesRegexp(
+        ValueError, 'Unknown entry in loss dictionary: unknown_output'):
+      model.compile(optimizer='adam', loss={'unknown_output': 'mse'})
+
+  @keras_parameterized.run_all_keras_modes
+  def test_compile_with_incorrect_loss_weights_size(self):
+    model = testing_utils.get_small_sequential_mlp(
+        num_hidden=10, num_classes=2, input_dim=3)
+    with self.assertRaisesRegexp(ValueError,
+                                 'it should have one entry per model output'):
+      model.compile(optimizer='adam', loss='mse', loss_weights=[1., 2.])
+
+  @keras_parameterized.run_all_keras_modes
+  def test_compile_with_incorrect_loss_weights_key(self):
+    model = testing_utils.get_small_sequential_mlp(
+        num_hidden=10, num_classes=2, input_dim=3)
+    with self.assertRaisesRegexp(
+        ValueError, 'Unknown entry in loss_weights dictionary: unknown_output'):
+      model.compile(
+          optimizer='adam', loss='mse', loss_weights={'unknown_output': 1.})
+
+
 class TrainingTest(keras_parameterized.TestCase):
 
   @keras_parameterized.run_with_all_model_types(exclude_models='sequential')
@@ -832,6 +956,44 @@
         callbacks=[val_counter])
     self.assertEqual(val_counter.val_runs, expected_runs)
 
+  @keras_parameterized.run_all_keras_modes
+  def test_add_loss_correctness(self):
+    if testing_utils.should_run_eagerly():
+      self.skipTest('b/124303407')
+
+    class Bias(keras.layers.Layer):
+
+      def build(self, input_shape):
+        self.bias = self.add_variable('bias', (1,), initializer='zeros')
+
+      def call(self, inputs):
+        return inputs + self.bias
+
+    inputs = keras.Input(shape=(1,))
+    outputs = Bias()(inputs)
+    model = keras.Model(inputs, outputs)
+    targets = keras.Input(shape=(1,))
+
+    model.add_loss(
+        math_ops.reduce_mean(
+            keras.losses.mean_absolute_error(targets, outputs)))
+
+    # If we want to use the loss class instance as shown below, we will need to
+    # add graph scope as the reduction logic involves some eager mode checks.
+    with keras.backend.get_graph().as_default():
+      model.add_loss(keras.losses.MeanAbsoluteError()(targets, outputs))
+
+    model.compile(
+        keras.optimizer_v2.gradient_descent.SGD(0.033333),
+        loss=keras.losses.MeanAbsoluteError(),
+        target_tensors=[targets],
+        run_eagerly=testing_utils.should_run_eagerly())
+
+    x = np.array([[0.], [1.], [2.]])
+    y = np.array([[0.5], [2.], [3.5]])
+    history = model.fit(x, y, batch_size=3, epochs=5)
+    self.assertAllClose(history.history['loss'], [3., 2.7, 2.4, 2.1, 1.8], 1e-3)
+
 
 class TestExceptionsAndWarnings(keras_parameterized.TestCase):
 
@@ -885,9 +1047,9 @@
                 'dense_1': metrics_module.CategoricalAccuracy(),
             },
             run_eagerly=testing_utils.should_run_eagerly())
-        msg = ('Output "dense_1" missing from loss dictionary. We assume this '
+        msg = ('Output dense_1 missing from loss dictionary. We assume this '
                'was done on purpose. The fit and evaluate APIs will not be '
-               'expecting any data to be passed to "dense_1".')
+               'expecting any data to be passed to dense_1.')
         self.assertRegexpMatches(str(mock_log.call_args), msg)
 
 
@@ -2136,6 +2298,67 @@
     model.evaluate(x_test, y_test, batch_size=5)
     self.assertEqual(self.evaluate(acc_obj.count), 10)
 
+  @keras_parameterized.run_with_all_model_types(exclude_models=['sequential'])
+  @keras_parameterized.run_all_keras_modes
+  def test_metrics_valid_compile_input_formats(self):
+    inp_1 = keras.layers.Input(shape=(1,), name='input_1')
+    inp_2 = keras.layers.Input(shape=(1,), name='input_2')
+    x = keras.layers.Dense(3, kernel_initializer='ones', trainable=False)
+    out_1 = keras.layers.Dense(
+        1, kernel_initializer='ones', name='output_1', trainable=False)
+    out_2 = keras.layers.Dense(
+        1, kernel_initializer='ones', name='output_2', trainable=False)
+
+    branch_a = [inp_1, x, out_1]
+    branch_b = [inp_2, x, out_2]
+    model = testing_utils.get_multi_io_model(branch_a, branch_b)
+
+    # list of metrics.
+    model.compile(
+        optimizer='rmsprop',
+        loss='mse',
+        metrics=[keras.metrics.MeanSquaredError()],
+        weighted_metrics=[keras.metrics.MeanSquaredError()],
+        run_eagerly=testing_utils.should_run_eagerly())
+
+    # list of list of metrics.
+    model.compile(
+        optimizer='rmsprop',
+        loss='mse',
+        metrics=[
+            keras.metrics.MeanSquaredError(),
+            [keras.metrics.MeanSquaredError(),
+             keras.metrics.Accuracy()]
+        ],
+        weighted_metrics=[
+            keras.metrics.MeanSquaredError(),
+            [keras.metrics.MeanSquaredError(),
+             keras.metrics.Accuracy()]
+        ],
+        run_eagerly=testing_utils.should_run_eagerly())
+
+    # dict of metrics.
+    model.compile(
+        optimizer='rmsprop',
+        loss='mse',
+        metrics={
+            'output_1':
+                keras.metrics.MeanSquaredError(),
+            'output_2': [
+                keras.metrics.MeanSquaredError(),
+                keras.metrics.Accuracy()
+            ],
+        },
+        weighted_metrics={
+            'output_1':
+                keras.metrics.MeanSquaredError(),
+            'output_2': [
+                keras.metrics.MeanSquaredError(),
+                keras.metrics.Accuracy()
+            ],
+        },
+        run_eagerly=testing_utils.should_run_eagerly())
+
   @keras_parameterized.run_all_keras_modes
   def test_invalid_metrics(self):
     num_classes = 5
@@ -2153,6 +2376,17 @@
           metrics=metrics_module.CategoricalAccuracy(),
           run_eagerly=testing_utils.should_run_eagerly())
 
+    inp = keras.layers.Input(shape=(1,))
+    x = keras.layers.Dense(3, activation='relu')(inp)
+    out_1 = keras.layers.Dense(1, activation='sigmoid', name='output_1')(x)
+    out_2 = keras.layers.Dense(1, activation='sigmoid', name='output_2')(x)
+    model = keras.models.Model(inp, [out_1, out_2])
+    with self.assertRaisesRegex(
+        ValueError, 'When passing a list of lists as `metrics`, '
+        'it should have one entry per model output. '
+        'The model has 2 outputs, but you passed metrics='):
+      model.compile('rmsprop', loss='mse', metrics=[['mse']])
+
   @keras_parameterized.run_all_keras_modes
   def test_metrics_masking(self):
     if testing_utils.should_run_eagerly():
@@ -2181,40 +2415,44 @@
       scores = model.train_on_batch(x, y, sample_weight=w)
       self.assertArrayNear(scores, [0.3328, 0.8], 0.001)
 
-  @tf_test_util.run_deprecated_v1
-  def test_add_metric_with_tensor_on_model_in_graph_mode(self):
-    with self.cached_session():
-      x = keras.layers.Input(shape=(1,))
-      y = keras.layers.Dense(1, kernel_initializer='ones')(x)
-      model = keras.models.Model(x, y)
-      model.add_metric(
-          math_ops.reduce_sum(y), name='metric_1', aggregation='mean')
+  @keras_parameterized.run_all_keras_modes
+  def test_add_metric_with_tensor_on_model(self):
+    if testing_utils.should_run_eagerly():
+      self.skipTest('b/124303407')
 
-      # test with a metric which does not have the standard signature:
-      # (y_true, y_pred, sample_Weight)
+    x = keras.layers.Input(shape=(1,))
+    y = keras.layers.Dense(1, kernel_initializer='ones')(x)
+    model = keras.models.Model(x, y)
+    model.add_metric(
+        math_ops.reduce_sum(y), name='metric_1', aggregation='mean')
+
+    # test with a metric which does not have the standard signature:
+    # (y_true, y_pred, sample_Weight)
+    with keras.backend.get_graph().as_default():
       model.add_metric(metrics_module.Mean(name='metric_2')(y))
-      model.compile('sgd', loss='mse')
+    model.compile(
+        'sgd', loss='mse', run_eagerly=testing_utils.should_run_eagerly())
 
-      inputs = np.ones(shape=(10, 1))
-      targets = np.ones(shape=(10, 1))
-      history = model.fit(
-          inputs,
-          targets,
-          epochs=2,
-          batch_size=5,
-          validation_data=(inputs, targets))
-      self.assertEqual(history.history['metric_1'][-1], 5)
-      self.assertEqual(history.history['metric_2'][-1], 1)
-      self.assertEqual(history.history['val_metric_1'][-1], 5)
-      self.assertEqual(history.history['val_metric_2'][-1], 1)
+    inputs = np.ones(shape=(10, 1))
+    targets = np.ones(shape=(10, 1))
+    history = model.fit(
+        inputs,
+        targets,
+        epochs=2,
+        batch_size=5,
+        validation_data=(inputs, targets))
+    self.assertEqual(history.history['metric_1'][-1], 5)
+    self.assertEqual(history.history['metric_2'][-1], 1)
+    self.assertEqual(history.history['val_metric_1'][-1], 5)
+    self.assertEqual(history.history['val_metric_2'][-1], 1)
 
-      eval_results = model.evaluate(inputs, targets, batch_size=5)
-      self.assertEqual(eval_results[-1], 1)
-      self.assertEqual(eval_results[-2], 5)
+    eval_results = model.evaluate(inputs, targets, batch_size=5)
+    self.assertEqual(eval_results[-1], 1)
+    self.assertEqual(eval_results[-2], 5)
 
-      model.predict(inputs, batch_size=5)
-      model.train_on_batch(inputs, targets)
-      model.test_on_batch(inputs, targets)
+    model.predict(inputs, batch_size=5)
+    model.train_on_batch(inputs, targets)
+    model.test_on_batch(inputs, targets)
 
   @keras_parameterized.run_all_keras_modes
   def test_add_metric_in_model_call(self):
@@ -2254,6 +2492,7 @@
     model.train_on_batch(x, y)
     model.test_on_batch(x, y)
 
+  @keras_parameterized.run_with_all_model_types
   @keras_parameterized.run_all_keras_modes
   def test_add_metric_in_layer_call(self):
 
@@ -2269,9 +2508,11 @@
             math_ops.reduce_sum(inputs), name='metric_1', aggregation='mean')
         return inputs + 1
 
-    model = keras.Sequential()
-    model.add(TestLayer(input_shape=(1,)))
-    model.add(keras.layers.Dense(2, kernel_initializer='ones'))
+    layers = [
+        TestLayer(input_shape=(1,)),
+        keras.layers.Dense(2, kernel_initializer='ones')
+    ]
+    model = testing_utils.get_model_from_layers(layers, input_shape=(1,))
     model.compile(loss='mse', optimizer=RMSPropOptimizer(0.01),
                   run_eagerly=testing_utils.should_run_eagerly())
 
@@ -2281,60 +2522,64 @@
     self.assertEqual(history.history['metric_1'][-1], 5)
     self.assertAlmostEqual(history.history['val_metric_1'][-1], 5, 0)
 
-  @tf_test_util.run_deprecated_v1
+  @keras_parameterized.run_all_keras_modes
   def test_model_metrics_list(self):
-    with self.cached_session():
-      x = keras.layers.Input(shape=(1,))
-      y = keras.layers.Dense(1, kernel_initializer='ones')(x)
-      model = keras.models.Model(x, y)
-      model.add_metric(
-          math_ops.reduce_sum(y), name='metric_1', aggregation='mean')
+    x = keras.layers.Input(shape=(1,))
+    y = keras.layers.Dense(1, kernel_initializer='ones')(x)
+    model = keras.models.Model(x, y)
+    model.add_metric(
+        math_ops.reduce_sum(y), name='metric_1', aggregation='mean')
+    with keras.backend.get_graph().as_default():
       model.add_metric(metrics_module.Mean(name='metric_2')(y))
-      model.compile('sgd', loss='mse', metrics=['acc'])
+    model.compile(
+        'sgd',
+        loss='mse',
+        metrics=['acc'],
+        run_eagerly=testing_utils.should_run_eagerly())
 
-      # Verify that the metrics added using `compile` and `add_metric` API are
-      # included
-      self.assertEqual(model._compile_metrics, ['acc'])
-      names = []
-      for m in model.metrics:
-        if isinstance(m, metrics_module.Metric):
-          names.append(m.name)
-        else:
-          names.append(m.__name__)
-      self.assertEqual(names, ['binary_accuracy', 'metric_1', 'metric_2'])
+    # Verify that the metrics added using `compile` and `add_metric` API are
+    # included
+    self.assertEqual(model._compile_metrics, ['acc'])
+    names = []
+    for m in model.metrics:
+      if isinstance(m, metrics_module.Metric):
+        names.append(m.name)
+      else:
+        names.append(m.__name__)
+    self.assertEqual(names, ['binary_accuracy', 'metric_1', 'metric_2'])
 
-  def test_model_eager_metrics_list(self):
-    with context.eager_mode():
+  @keras_parameterized.run_all_keras_modes
+  def test_model_metrics_list_in_call(self):
 
-      class TestModel(keras.Model):
+    class TestModel(keras.Model):
 
-        def __init__(self):
-          super(TestModel, self).__init__(name='test_model')
-          self.dense1 = keras.layers.Dense(2, kernel_initializer='ones')
+      def __init__(self):
+        super(TestModel, self).__init__(name='test_model')
+        self.dense1 = keras.layers.Dense(2, kernel_initializer='ones')
 
-        def call(self, x):
-          self.add_metric(
-              math_ops.reduce_sum(x), name='metric_1', aggregation='mean')
-          return self.dense1(x)
+      def call(self, x):
+        self.add_metric(
+            math_ops.reduce_sum(x), name='metric_1', aggregation='mean')
+        return self.dense1(x)
 
-      model = TestModel()
-      model.compile(
-          loss='mse',
-          optimizer=RMSPropOptimizer(0.01),
-          metrics=['acc'],
-          run_eagerly=True)
-      x = np.ones(shape=(10, 1))
-      y = np.ones(shape=(10, 2))
-      model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
+    model = TestModel()
+    model.compile(
+        loss='mse',
+        optimizer=RMSPropOptimizer(0.01),
+        metrics=['acc'],
+        run_eagerly=testing_utils.should_run_eagerly())
+    x = np.ones(shape=(10, 1))
+    y = np.ones(shape=(10, 2))
+    model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
 
-      self.assertEqual(model._compile_metrics, ['acc'])
-      names = []
-      for m in model.metrics:
-        if isinstance(m, metrics_module.Metric):
-          names.append(m.name)
-        else:
-          names.append(m.__name__)
-      self.assertEqual(names, ['categorical_accuracy', 'metric_1'])
+    self.assertEqual(model._compile_metrics, ['acc'])
+    names = []
+    for m in model.metrics:
+      if isinstance(m, metrics_module.Metric):
+        names.append(m.name)
+      else:
+        names.append(m.__name__)
+    self.assertEqual(names, ['categorical_accuracy', 'metric_1'])
 
   @keras_parameterized.run_all_keras_modes
   def test_multiple_add_metric_calls(self):
@@ -2372,28 +2617,34 @@
     model.train_on_batch(x, y)
     model.test_on_batch(x, y)
 
-  def test_invalid_metric_tensor_in_call(self):
-    with context.eager_mode():
+  @keras_parameterized.run_with_all_model_types
+  @keras_parameterized.run_all_keras_modes
+  def test_invalid_metric_tensor(self):
 
-      class TestLayer(keras.layers.Layer):
+    class TestLayer(keras.layers.Layer):
 
-        def call(self, inputs):
-          self.add_metric(metrics_module.Mean(name='metric_1')(inputs))
-          return inputs + 1
+      def build(self, input_shape):
+        self.built = True
 
-      model = keras.Sequential()
-      model.add(TestLayer(input_shape=(1,)))
-      model.add(keras.layers.Dense(2, kernel_initializer='ones'))
+      def call(self, inputs):
+        self.add_metric(math_ops.reduce_mean(inputs), name='metric_1')
+        return inputs + 1
+
+    layers = [TestLayer(input_shape=(1,))]
+    layers.append(keras.layers.Dense(2, kernel_initializer='ones'))
+    x = np.ones(shape=(10, 1))
+    y = np.ones(shape=(10, 2))
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        'We do not support adding an aggregated metric result tensor that is '
+        'not the output of a `tf.keras.metrics.Metric` metric instance.'):
+      model = testing_utils.get_model_from_layers(layers, input_shape=(1,))
       model.compile(
-          loss='mse', optimizer=RMSPropOptimizer(0.01), run_eagerly=True)
-
-      x = np.ones(shape=(10, 1))
-      y = np.ones(shape=(10, 2))
-      with self.assertRaisesRegexp(
-          ValueError,
-          'We do not support adding an aggregated metric tensor in `call` in '
-          'eager execution.'):
-        model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
+          loss='mse',
+          optimizer=RMSPropOptimizer(0.01),
+          run_eagerly=testing_utils.should_run_eagerly())
+      model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
 
   @keras_parameterized.run_all_keras_modes
   def test_duplicate_metric_name_in_add_metric(self):
@@ -2423,10 +2674,7 @@
       model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
 
   @keras_parameterized.run_all_keras_modes
-  def test_multiple_no_name_input_to_add_metric(self):
-    # TODO(kaftan) Test seems to not work, file ticket
-    if testing_utils.should_run_eagerly() and context.executing_eagerly():
-      self.skipTest('Skipping running model eagerly.')
+  def test_add_metric_without_name(self):
 
     class TestModel(keras.Model):
 
@@ -2436,7 +2684,6 @@
 
       def call(self, x):
         self.add_metric(math_ops.reduce_sum(x), aggregation='mean')
-        self.add_metric(math_ops.reduce_sum(x), aggregation='mean')
         return self.dense1(x)
 
     model = TestModel()
@@ -2444,8 +2691,58 @@
                   run_eagerly=testing_utils.should_run_eagerly())
     x = np.ones(shape=(10, 1))
     y = np.ones(shape=(10, 2))
-    model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
-    self.assertEqual([m.name for m in model.metrics], ['mean', 'mean_1'])
+
+    with self.assertRaisesRegex(ValueError,
+                                'Please provide a name for your metric like'):
+      model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
+
+  @keras_parameterized.run_all_keras_modes
+  def test_add_metric_correctness(self):
+    if testing_utils.should_run_eagerly():
+      self.skipTest('b/124303407')
+
+    inputs = keras.Input(shape=(1,))
+    targets = keras.Input(shape=(1,))
+
+    class Bias(keras.layers.Layer):
+
+      def build(self, input_shape):
+        self.bias = self.add_variable('bias', (1,), initializer='zeros')
+        self.mae = metrics_module.MeanAbsoluteError(name='mae_1')
+
+      def call(self, inputs):
+        outputs = inputs + self.bias
+        self.add_metric(self.mae(targets, outputs), name='mae_1')
+        return outputs
+
+    outputs = Bias()(inputs)
+    model = keras.Model(inputs, outputs)
+
+    model.add_metric(
+        metrics_module.mean_absolute_error(targets, outputs),
+        name='mae_2',
+        aggregation='mean')
+
+    # If we want to use the metric class instance as shown below, we will need
+    # to add graph scope as the reduction logic involves some eager mode checks.
+    with keras.backend.get_graph().as_default():
+      model.add_metric(
+          metrics_module.MeanAbsoluteError(name='mae_3')(targets, outputs))
+
+    model.compile(
+        loss='mae',
+        optimizer=keras.optimizer_v2.gradient_descent.SGD(0.1),
+        metrics=[metrics_module.MeanAbsoluteError(name='mae_4')],
+        target_tensors=[targets],
+        run_eagerly=testing_utils.should_run_eagerly())
+
+    x = np.array([[0.], [1.], [2.]])
+    y = np.array([[0.5], [2.], [3.5]])
+    history = model.fit(x, y, batch_size=3, epochs=5)
+
+    expected_val = [1., 0.9, 0.8, 0.7, 0.6]
+    for key in ['loss', 'mae_1', 'mae_2', 'mae_3', 'mae_4']:
+      self.assertAllClose(history.history[key], expected_val, 1e-3)
 
   @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
   def test_a1_total_loss_available_with_dict_dataset(self):

diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 22f976a..16aaa51 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py

@@ -31,6 +31,7 @@
 from tensorflow.python.data.ops import readers
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -524,7 +525,7 @@
   """Maps metric names and functions to model outputs.
 
   Arguments:
-      metrics: a list or dict of metric functions.
+      metrics: a list or a list of lists or a dict of metric functions.
       output_names: a list of the names (strings) of model outputs.
       output_shapes: a list of the shapes (strings) of model outputs.
       loss_fns: a list of the loss functions corresponding to the model outputs.
@@ -550,20 +551,30 @@
   """
   if not metrics:
     return [{} for _ in output_names]
+
   if isinstance(metrics, list):
-    # we then apply all metrics to all outputs.
-    if len(output_names) > 1:
-      nested_metrics = []
-      for _ in output_names:
-        nested_metrics.append([metrics_module.clone_metric(m) for m in metrics])
+    any_sub_list = any(isinstance(m, list) for m in metrics)
+    if any_sub_list:
+      if len(metrics) != len(output_names):
+        raise ValueError('When passing a list of lists as `metrics`, '
+                         'it should have one entry per model output. '
+                         'The model has ' + str(len(output_names)) +
+                         ' outputs, but you passed metrics=' + str(metrics))
+      # User has provided a list of len = len(outputs).
+      nested_metrics = [generic_utils.to_list(m) for m in metrics]
     else:
-      nested_metrics = [metrics]
+      # If it is a single list we then apply all metrics to all outputs.
+      if len(output_names) > 1:
+        nested_metrics = []
+        for _ in output_names:
+          nested_metrics.append(
+              [metrics_module.clone_metric(m) for m in metrics])
+      else:
+        nested_metrics = [metrics]
   elif isinstance(metrics, dict):
     nested_metrics = []
     for name in output_names:
-      output_metrics = metrics.get(name, [])
-      if not isinstance(output_metrics, list):
-        output_metrics = [output_metrics]
+      output_metrics = generic_utils.to_list(metrics.get(name, []))
       nested_metrics.append(output_metrics)
   else:
     raise TypeError('Type of `metrics` argument not understood. '
@@ -1106,6 +1117,95 @@
   return sample_weights, sample_weight_modes
 
 
+def prepare_loss_functions(loss, output_names):
+  """Converts loss to a list of loss functions.
+
+  Arguments:
+      loss: String (name of objective function), objective function or
+        `tf.losses.Loss` instance. See `tf.losses`. If the model has multiple
+        outputs, you can use a different loss on each output by passing a
+        dictionary or a list of losses. The loss value that will be minimized by
+        the model will then be the sum of all individual losses.
+      output_names: List of model output names.
+
+  Returns:
+      A list of loss objective functions.
+
+  Raises:
+      ValueError: If loss is a dict with keys not in model output names,
+          or if loss is a list with len not equal to model outputs.
+  """
+  if isinstance(loss, collections.Mapping):
+    for name in loss:
+      if name not in output_names:
+        raise ValueError('Unknown entry in loss dictionary: {}. Only expected '
+                         'following keys: {}'.format(name, output_names))
+    loss_functions = []
+    for name in output_names:
+      if name not in loss:
+        logging.warning(
+            'Output {0} missing from loss dictionary. We assume '
+            'this was done on purpose. The fit and evaluate APIs will not be '
+            'expecting any data to be passed to {0}.'.format(name))
+      loss_functions.append(get_loss_function(loss.get(name, None)))
+  elif isinstance(loss, six.string_types):
+    loss_functions = [get_loss_function(loss) for _ in output_names]
+  elif isinstance(loss, collections.Sequence):
+    if len(loss) != len(output_names):
+      raise ValueError('When passing a list as loss, it should have one entry '
+                       'per model outputs. The model has {} outputs, but you '
+                       'passed loss={}'.format(len(output_names), loss))
+    loss_functions = nest.map_structure(get_loss_function, loss)
+  else:
+    loss_functions = [get_loss_function(loss) for _ in range(len(output_names))]
+
+  return loss_functions
+
+
+def prepare_loss_weights(output_names, loss_weights=None):
+  """Converts loss weights to a list of loss weights.
+
+  Arguments:
+      output_names: List of model output names.
+      loss_weights: Optional list or dictionary specifying scalar coefficients
+        (Python floats) to weight the loss contributions of different model
+        outputs. The loss value that will be minimized by the model will then be
+        the *weighted sum* of all individual losses, weighted by the
+          `loss_weights` coefficients. If a list, it is expected to have a 1:1
+            mapping to the model's outputs. If a dict, it is expected to map
+            output names (strings) to scalar coefficients.
+
+  Returns:
+      A list of loss weights of python floats.
+
+  Raises:
+      ValueError: If loss weight is a dict with key not in model output names,
+          or if loss is a list with len not equal to model outputs.
+  """
+  if loss_weights is None:
+    weights_list = [1.] * len(output_names)
+  elif isinstance(loss_weights, dict):
+    for name in loss_weights:
+      if name not in output_names:
+        raise ValueError('Unknown entry in loss_weights dictionary: {}. '
+                         'Only expected the following keys: {}'.format(
+                             name, output_names))
+    weights_list = [loss_weights.get(name, 1.) for name in output_names]
+  elif isinstance(loss_weights, list):
+    if len(loss_weights) != len(output_names):
+      raise ValueError('When passing a list as loss_weights, '
+                       'it should have one entry per model output. '
+                       'The model has ' + str(len(output_names)) +
+                       ' outputs, but you passed loss_weights=' +
+                       str(loss_weights))
+    weights_list = loss_weights
+  else:
+    raise TypeError('Could not interpret loss_weights argument: ' +
+                    str(loss_weights) + ' - expected a list of dicts.')
+
+  return weights_list
+
+
 # TODO(rohanj): This is a hack to get around not depending on feature_column and
 # create a cyclical dependency. Figure out a cleaner solution
 def is_feature_layer(layer):
@@ -1269,7 +1369,7 @@
 def initialize_iterator(iterator):
   init_op = iterator.initializer
   if not context.executing_eagerly():
-    K.get_session().run(init_op)
+    K.get_session((init_op,)).run(init_op)
 
 
 def extract_tensors_from_dataset(dataset):
@@ -1416,7 +1516,10 @@
         # we have. The user should call `model._set_inputs(placeholders)`
         # to specify custom placeholders if the need arises.
         shape = (None,) + tuple(v.shape[1:])
-        v = K.placeholder(shape=shape, name=k)
+        dtype = dtypes.as_dtype(v.dtype)
+        if dtype.is_floating:
+          dtype = K.floatx()
+        v = K.placeholder(shape=shape, name=k, dtype=dtype)
       elif isinstance(v, tensor_shape.TensorShape):
         shape = (None,) + tuple(v.as_list()[1:])
         v = K.placeholder(shape=shape, name=k)
@@ -1452,7 +1555,7 @@
       does not have a defined input shape.
 
   Raises:
-    ValueError: in case an empty Sequential or Graph Network is passed.
+    ValueError: in case an empty Sequential or Functional model is passed.
   """
 
   def _is_graph_model(layer):

diff --git a/tensorflow/python/keras/engine/training_utils_test.py b/tensorflow/python/keras/engine/training_utils_test.py
index e472dc3..30e3d1f 100644
--- a/tensorflow/python/keras/engine/training_utils_test.py
+++ b/tensorflow/python/keras/engine/training_utils_test.py

@@ -25,7 +25,9 @@
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
 from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_util
+from tensorflow.python.keras import backend
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import training_utils
@@ -45,10 +47,11 @@
     vals = model_inputs.get_symbolic_inputs(return_single_as_list=True)
     self.assertEqual(1, len(vals))
     self.assertTrue(tensor_util.is_tensor(vals[0]))
+    self.assertEqual(backend.floatx(), vals[0].dtype)
 
   def test_single_thing_eager(self):
     with context.eager_mode():
-      a = np.ones(10)
+      a = np.ones(10, dtype=np.int32)
       model_inputs = training_utils.ModelInputs(a)
       self.assertEqual(['input_1'], model_inputs.get_input_names())
       val = model_inputs.get_symbolic_inputs()
@@ -56,6 +59,7 @@
       vals = model_inputs.get_symbolic_inputs(return_single_as_list=True)
       self.assertEqual(1, len(vals))
       self.assertTrue(tf_utils.is_symbolic_tensor(vals[0]))
+      self.assertEqual(dtypes.int32, vals[0].dtype)
 
   def test_list(self):
     a = [np.ones(10), np.ones(20)]

diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py
index 30b919c..8d80eb8 100644
--- a/tensorflow/python/keras/layers/convolutional.py
+++ b/tensorflow/python/keras/layers/convolutional.py

@@ -689,7 +689,7 @@
       - [A guide to convolution arithmetic for deep
         learning](https://arxiv.org/abs/1603.07285v1)
       - [Deconvolutional
-        Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
+        Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
   """
 
   def __init__(self,
@@ -972,7 +972,7 @@
       - [A guide to convolution arithmetic for deep
         learning](https://arxiv.org/abs/1603.07285v1)
       - [Deconvolutional
-        Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
+        Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
   """
 
   def __init__(self,

diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py
index 9140ce4..24b61fe 100644
--- a/tensorflow/python/keras/layers/convolutional_test.py
+++ b/tensorflow/python/keras/layers/convolutional_test.py

@@ -23,6 +23,7 @@
 
 from tensorflow.python import keras
 from tensorflow.python.eager import context
+from tensorflow.python.framework import test_util
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
@@ -420,6 +421,7 @@
       keras.layers.ZeroPadding3D(padding=None)
 
 
+@test_util.disable_all_xla('b/124289666')  # align_corners=False unimplemented
 @keras_parameterized.run_all_keras_modes
 class UpSamplingTest(keras_parameterized.TestCase):
 

diff --git a/tensorflow/python/keras/layers/kernelized_test.py b/tensorflow/python/keras/layers/kernelized_test.py
index 300b1b8..0bc6e8b 100644
--- a/tensorflow/python/keras/layers/kernelized_test.py
+++ b/tensorflow/python/keras/layers/kernelized_test.py

@@ -288,7 +288,7 @@
       ('other', init_ops.random_uniform_initializer(), 5.0))
   @test_util.run_in_graph_and_eager_modes()
   def test_different_params_similar_approximation(self, initializer, scale):
-    # Layers initialized using different randomness (seed).
+    random_seed.set_random_seed(12345)
     rff_layer1 = kernel_layers.RandomFourierFeatures(
         output_dim=3000,
         kernel_initializer=initializer,
@@ -314,7 +314,7 @@
     # rff_layer(x)^T * rff_layer(y) ~= K(x,y) up to a normalization factor.
     approx_kernel1 = kernelized_utils.inner_product(output_x1, output_y1)
     approx_kernel2 = kernelized_utils.inner_product(output_x2, output_y2)
-    self._assert_all_close(approx_kernel1, approx_kernel2, atol=0.05)
+    self._assert_all_close(approx_kernel1, approx_kernel2, atol=0.08)
 
   @parameterized.named_parameters(
       ('gaussian', 'gaussian', 5.0, _exact_gaussian(stddev=5.0)),

diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index 79c6197..f90d97c 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py

@@ -42,15 +42,15 @@
 from tensorflow.python.ops import gen_cudnn_rnn_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import keras_export
 
 
 # The following string constants are used by Defun approach for unified backend
 # of LSTM and GRU.
-_DEFUN_API_NAME_ATTRIBUTE = 'experimental_api_implements'
-_DEFUN_DEVICE_ATTRIBUTE = 'experimental_api_preferred_device'
+_DEFUN_API_NAME_ATTRIBUTE = 'api_implements'
+_DEFUN_DEVICE_ATTRIBUTE = 'api_preferred_device'
 _CPU_DEVICE_NAME = 'CPU'
 _GPU_DEVICE_NAME = 'GPU'
 
@@ -78,7 +78,6 @@
   ```
   """
 
-  @checkpointable.no_automatic_dependency_tracking
   def __init__(self, cells, **kwargs):
     for cell in cells:
       if not hasattr(cell, 'call'):
@@ -443,7 +442,7 @@
   ```
   """
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def __init__(self,
                cell,
                return_sequences=False,
@@ -468,8 +467,8 @@
     self.zero_output_for_mask = kwargs.pop('zero_output_for_mask', False)
     super(RNN, self).__init__(**kwargs)
     self.cell = cell
-    if isinstance(cell, checkpointable.Checkpointable):
-      self._track_checkpointable(self.cell, name='cell')
+    if isinstance(cell, trackable.Trackable):
+      self._track_trackable(self.cell, name='cell')
     self.return_sequences = return_sequences
     self.return_state = return_state
     self.go_backwards = go_backwards
@@ -781,9 +780,9 @@
     else:
       input_shape = K.int_shape(inputs)
     timesteps = input_shape[0] if self.time_major else input_shape[1]
-    if self.unroll and timesteps in [None, 1]:
+    if self.unroll and timesteps is None:
       raise ValueError('Cannot unroll a RNN if the '
-                       'time dimension is undefined or equal to 1. \n'
+                       'time dimension is undefined. \n'
                        '- If using a Sequential model, '
                        'specify the time dimension by passing '
                        'an `input_shape` or `batch_input_shape` '
@@ -2020,6 +2019,8 @@
           True = "after" (default and CuDNN compatible).
   """
 
+  _setattr_tracking = False  # TODO(allenl): Figure out why this is needed.
+
   def __init__(self,
                units,
                activation='tanh',
@@ -2114,8 +2115,7 @@
           time_major=self.time_major,
           zero_output_for_mask=self.zero_output_for_mask)
       # This is a dummy tensor for testing purpose.
-      runtime = constant_op.constant(
-          'unknown', dtype=dtypes.string, name='runtime')
+      runtime = _runtime('unknown')
     else:
       last_output, outputs, runtime, states = self._defun_gru_call(
           inputs, initial_state, training)
@@ -2174,11 +2174,11 @@
             recurrent_activation=self.recurrent_activation,
             time_major=self.time_major)
     else:
-      experimental_api_name = 'gru_' + str(uuid.uuid4())
+      api_name = 'gru_' + str(uuid.uuid4())
       defun_standard_gru = _generate_defun_backend(
-          experimental_api_name, _CPU_DEVICE_NAME, standard_gru)
+          api_name, _CPU_DEVICE_NAME, standard_gru)
       defun_cudnn_gru = _generate_defun_backend(
-          experimental_api_name, _GPU_DEVICE_NAME, cudnn_gru)
+          api_name, _GPU_DEVICE_NAME, cudnn_gru)
       # Call the normal GRU impl and register the CuDNN impl function. The
       # grappler will kick in during session execution to optimize the graph.
       last_output, outputs, new_h, runtime = defun_standard_gru(
@@ -2266,8 +2266,7 @@
       unroll=False,
       time_major=time_major,
       input_length=timesteps)
-  return last_output, outputs, new_states[0], constant_op.constant(
-      'cpu', dtype=dtypes.string, name='runtime')
+  return last_output, outputs, new_states[0], _runtime('cpu')
 
 
 def cudnn_gru(inputs, init_h, kernel, recurrent_kernel, bias, time_major):
@@ -2309,8 +2308,7 @@
   if not time_major:
     outputs = array_ops.transpose(outputs, perm=[1, 0, 2])
   h = h[0]
-  return last_output, outputs, h, constant_op.constant(
-      'cudnn', dtype=dtypes.string, name='runtime')
+  return last_output, outputs, h, _runtime('cudnn')
 
 
 @keras_export('keras.layers.LSTMCell')
@@ -3090,8 +3088,7 @@
           input_length=timesteps,
           time_major=self.time_major,
           zero_output_for_mask=self.zero_output_for_mask)
-      runtime = constant_op.constant(
-          'unknown', dtype=dtypes.string, name='runtime')
+      runtime = _runtime('unknown')
     else:
       # Use the new defun approach for backend implementation swap.
       # Note that different implementations need to have same function
@@ -3129,11 +3126,11 @@
         # identifiable API name, so that Grappler won't get confused when it
         # sees multiple LSTM layers added into same graph, and it will be able
         # to pair up the different implementations across them.
-        experimental_api_name = 'lstm_' + str(uuid.uuid4())
+        api_name = 'lstm_' + str(uuid.uuid4())
         defun_standard_lstm = _generate_defun_backend(
-            experimental_api_name, _CPU_DEVICE_NAME, standard_lstm)
+            api_name, _CPU_DEVICE_NAME, standard_lstm)
         defun_cudnn_lstm = _generate_defun_backend(
-            experimental_api_name, _GPU_DEVICE_NAME, cudnn_lstm)
+            api_name, _GPU_DEVICE_NAME, cudnn_lstm)
 
         # Call the normal LSTM impl and register the CuDNN impl function. The
         # grappler will kick in during session execution to optimize the graph.
@@ -3266,8 +3263,7 @@
       unroll=False,
       time_major=time_major,
       input_length=timesteps)
-  return last_output, outputs, new_states[0], new_states[
-      1], constant_op.constant('cpu', dtype=dtypes.string, name='runtime')
+  return last_output, outputs, new_states[0], new_states[1], _runtime('cpu')
 
 
 def cudnn_lstm(inputs, input_h, input_c, kernel, recurrent_kernel, bias,
@@ -3298,8 +3294,7 @@
   h = h[0]
   c = c[0]
 
-  return last_output, outputs, h, c, constant_op.constant(
-      'cudnn', dtype=dtypes.string, name='runtime')
+  return last_output, outputs, h, c, _runtime('cudnn')
 
 
 def _generate_dropout_mask(ones, rate, training=None, count=1):
@@ -3418,3 +3413,9 @@
   }
   return function.defun_with_attributes(func=func,
                                         attributes=function_attributes)
+
+
+def _runtime(runtime_name):
+  with ops.device('/cpu:0'):
+    return constant_op.constant(
+        runtime_name, dtype=dtypes.string, name='runtime')

diff --git a/tensorflow/python/keras/layers/recurrent_test.py b/tensorflow/python/keras/layers/recurrent_test.py
index ddea2f4..4b7aeb9 100644
--- a/tensorflow/python/keras/layers/recurrent_test.py
+++ b/tensorflow/python/keras/layers/recurrent_test.py

@@ -40,7 +40,8 @@
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
-from tensorflow.python.training.checkpointable import util as checkpointable_util
+from tensorflow.python.training.tracking import object_identity
+from tensorflow.python.training.tracking import util as trackable_util
 from tensorflow.python.util import nest
 
 # Used for nested input/output/state RNN test.
@@ -715,7 +716,7 @@
         [tuple(o.as_list()) for o in output_shape],
         expected_output_shape)
 
-  def test_checkpointable_dependencies(self):
+  def test_trackable_dependencies(self):
     rnn = keras.layers.SimpleRNN
     x = np.random.random((2, 2, 2))
     y = np.random.random((2, 2))
@@ -728,8 +729,9 @@
     model.fit(x, y, epochs=1, batch_size=1)
 
     # check whether the model variables are present in the
-    # checkpointable list of objects
-    checkpointed_objects = set(checkpointable_util.list_objects(model))
+    # trackable list of objects
+    checkpointed_objects = object_identity.ObjectIdentitySet(
+        trackable_util.list_objects(model))
     for v in model.variables:
       self.assertIn(v, checkpointed_objects)
 
@@ -1162,6 +1164,30 @@
       result_1[5, 3:] = 0
       self.assertAllClose(result_1, result_2)
 
+  def test_unroll_single_step(self):
+    """Even if the time dimension is only one, we should be able to unroll."""
+    cell = keras.layers.SimpleRNNCell(5)
+    x = keras.Input((1, 5))
+    layer = keras.layers.RNN(cell, return_sequences=True, unroll=True)
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.compile(
+        optimizer='rmsprop',
+        loss='mse',
+        run_eagerly=testing_utils.should_run_eagerly())
+
+    np_x = np.ones((6, 1, 5))
+    result = model.predict(np_x)
+    self.assertEqual((6, 1, 5), result.shape)
+
+  def test_unroll_zero_step(self):
+    """If the time dimension is None, we should fail to unroll."""
+    cell = keras.layers.SimpleRNNCell(5)
+    x = keras.Input((None, 5))
+    layer = keras.layers.RNN(cell, return_sequences=True, unroll=True)
+    with self.assertRaisesRegexp(ValueError, 'Cannot unroll a RNN.*'):
+      layer(x)
+
 
 class Minimal2DRNNCell(keras.layers.Layer):
   """The minimal 2D RNN cell is a simple combination of 2 1-D RNN cell.
@@ -1269,3 +1295,4 @@
 
 if __name__ == '__main__':
   test.main()
+

diff --git a/tensorflow/python/keras/layers/serialization.py b/tensorflow/python/keras/layers/serialization.py
index 932cc58..95adf78 100644
--- a/tensorflow/python/keras/layers/serialization.py
+++ b/tensorflow/python/keras/layers/serialization.py

@@ -39,10 +39,20 @@
 from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object
 from tensorflow.python.util.tf_export import keras_export
 
+# TODO(b/124791387): replace mapping with layer attribute.
+_V2_CONVERSION_TABLE = {
+    # BatchNormalization Layer
+    'BatchNormalizationV1': 'BatchNormalization',
+    'BatchNormalizationV2': 'BatchNormalization',
+}
+
 
 @keras_export('keras.layers.serialize')
 def serialize(layer):
-  return {'class_name': layer.__class__.__name__, 'config': layer.get_config()}
+  layer_class_name = layer.__class__.__name__
+  if layer_class_name in _V2_CONVERSION_TABLE:
+    layer_class_name = _V2_CONVERSION_TABLE[layer_class_name]
+  return {'class_name': layer_class_name, 'config': layer.get_config()}
 
 
 @keras_export('keras.layers.deserialize')

diff --git a/tensorflow/python/keras/layers/serialization_test.py b/tensorflow/python/keras/layers/serialization_test.py
index 548c3ec..d322e6a 100644
--- a/tensorflow/python/keras/layers/serialization_test.py
+++ b/tensorflow/python/keras/layers/serialization_test.py

@@ -38,6 +38,17 @@
                      keras.initializers.Ones)
     self.assertEqual(new_layer.units, 3)
 
+  def test_serialize_deserialize_batchnorm(self):
+    layer = keras.layers.BatchNormalization(
+        momentum=0.9, beta_initializer='zeros', gamma_regularizer='l2')
+    config = keras.layers.serialize(layer)
+    self.assertEqual(config['class_name'], 'BatchNormalization')
+    new_layer = keras.layers.deserialize(config)
+    self.assertEqual(new_layer.momentum, 0.9)
+    self.assertEqual(new_layer.beta_initializer.__class__,
+                     keras.initializers.Zeros)
+    self.assertEqual(new_layer.gamma_regularizer.__class__,
+                     keras.regularizers.L1L2)
 
 if __name__ == '__main__':
   test.main()

diff --git a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py
index f01c9d4..993f5a9 100644
--- a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py
+++ b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py

@@ -27,6 +27,7 @@
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
@@ -40,6 +41,14 @@
   return inputs, outputs
 
 
+def _single_identity_op_at_end():
+  inputs = keras.Input(shape=(10,))
+  x = keras.layers.Dense(10)(inputs)
+  outputs = array_ops.identity(x)
+  assert 'Identity' in outputs.name
+  return inputs, outputs
+
+
 def _multiple_ops_at_end():
   inputs = keras.Input(shape=(10,))
   x = keras.layers.Dense(10)(inputs)
@@ -88,17 +97,62 @@
   return inputs, outputs
 
 
+def _op_with_tensor_list():
+  inputs = keras.Input(shape=(10,))
+  x = array_ops.concat([inputs, inputs], axis=1)
+  outputs = keras.layers.Dense(10)(x)
+  return inputs, outputs
+
+
+def _add_n():
+  inputs = keras.Input(shape=(10,))
+  outputs = math_ops.add_n([inputs, inputs, inputs])
+  return inputs, outputs
+
+
+def _reuse_op():
+  inputs = keras.Input(shape=(10,))
+  # This op needs to be checked multiple times.
+  x = gen_nn_ops.relu(inputs)
+  y = keras.layers.Dense(10)(x)
+  x2 = x * 2
+  y2 = keras.layers.Dense(10)(x2)
+  outputs = y + y2
+  return inputs, outputs
+
+
+class LayerWithLayer(keras.layers.Layer):
+
+  def build(self, input_shape):
+    self.bias = self.add_weight(name='bias', dtype='float32')
+    self.layer = keras.layers.Dense(10)
+
+  def call(self, inputs):
+    inputs = inputs * self.bias
+    # Would throw an error if Keras History was created here.
+    return self.layer(inputs)
+
+
+def _inner_layer():
+  inputs = keras.Input(shape=(10,))
+  outputs = LayerWithLayer()(inputs)
+  return inputs, outputs
+
+
 @keras_parameterized.run_all_keras_modes
 class AutoLambdaTest(keras_parameterized.TestCase):
 
   @parameterized.named_parameters(
       ('single_op_at_end', _single_op_at_end),
+      ('single_identity_op_at_end', _single_identity_op_at_end),
       ('multiple_ops_at_end', _multiple_ops_at_end),
       ('single_op_in_middle', _single_op_in_middle),
       ('multiple_ops_in_middle', _multiple_ops_in_middle),
       ('single_standalone_branch', _single_standalone_branch),
       ('single_op_with_attrs', _single_op_with_attrs),
-      ('multiple_uses', _multiple_uses))
+      ('multiple_uses', _multiple_uses),
+      ('op_with_tensor_list', _op_with_tensor_list), ('add_n', _add_n),
+      ('_reuse_op', _reuse_op), ('_inner_layer', _inner_layer))
   def test_autolambda(self, model_fn):
     inputs, outputs = model_fn()
     model = keras.Model(inputs, outputs)
@@ -110,6 +164,14 @@
     np_outputs = nest.map_structure(lambda x: np.ones((10, 10), 'float32'),
                                     outputs)
     model.fit(np_inputs, np_outputs, batch_size=2)
+    model(np_inputs)  # Test calling the model directly on inputs.
+
+    new_model = keras.Model.from_config(
+        model.get_config(), custom_objects={'LayerWithLayer': LayerWithLayer})
+    new_model.compile(
+        adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly())
+    new_model.fit(np_inputs, np_outputs, batch_size=2)
+    new_model(np_inputs)  # Test calling the new model directly on inputs.
 
   def test_numerical_correctness_simple(self):
     x = ops.convert_to_tensor([[-1., 0., -2., 1.]])
@@ -127,7 +189,7 @@
     y = self.evaluate(model(x))
     self.assertAllClose(y, [1.5, 3.])
 
-  def test_serialization(self):
+  def test_numerical_correctness_serialization(self):
     x = ops.convert_to_tensor([-1., 0., -2., 1.])
     inputs = keras.Input(shape=(4,))
     outputs = gen_nn_ops.relu(inputs)
@@ -158,12 +220,25 @@
     size_50 = _construct_graph_of_size(50)
     size_500 = _construct_graph_of_size(500)
 
-    # Check reasonable graph construction time.
-    self.assertLess(size_50, 5)
     # Check construction time grows approx. linearly with size.
     e = 2  # Fudge factor to prevent flakiness.
     self.assertLess(size_500, (10 * e) * size_50)
 
+  def test_no_mask_tracking(self):
+    x = keras.backend.placeholder((10, 10))
+    y = keras.layers.Masking(0.)(x)
+    self.assertTrue(y._keras_mask._keras_history_checked)
+
+  def test_built(self):
+    inputs = keras.Input(shape=(10,))
+    outputs = gen_nn_ops.relu(inputs)
+    model = keras.Model(inputs, outputs)
+    model.compile('sgd', 'mse')
+    for layer in model.layers:
+      self.assertTrue(layer.built)
+    # Test something that requires Layers to be built.
+    model.summary()
+
 
 if __name__ == '__main__':
   test.main()

diff --git a/tensorflow/python/keras/layers/unified_gru_test.py b/tensorflow/python/keras/layers/unified_gru_test.py
index b25007e..db86104 100644
--- a/tensorflow/python/keras/layers/unified_gru_test.py
+++ b/tensorflow/python/keras/layers/unified_gru_test.py

@@ -47,9 +47,7 @@
 
 # Global config for grappler setting that is used for graph mode test.
 _rewrites = rewriter_config_pb2.RewriterConfig()
-_rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF
-_customer_optimizer = _rewrites.custom_optimizers.add()
-_customer_optimizer.name = 'ExperimentalImplementationSelector'
+_rewrites.implementation_selector = rewriter_config_pb2.RewriterConfig.ON
 _rewrites.min_graph_nodes = -1
 _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites)
 _config = config_pb2.ConfigProto(graph_options=_graph_options)

diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index 08153db..938c87c 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py

@@ -47,9 +47,7 @@
 
 # Global config for grappler setting that is used for graph mode test.
 _rewrites = rewriter_config_pb2.RewriterConfig()
-_rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF
-_customer_optimizer = _rewrites.custom_optimizers.add()
-_customer_optimizer.name = 'ExperimentalImplementationSelector'
+_rewrites.implementation_selector = rewriter_config_pb2.RewriterConfig.ON
 _rewrites.min_graph_nodes = -1
 _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites)
 _config = config_pb2.ConfigProto(graph_options=_graph_options)

diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py
index a10db50..182d5e3 100644
--- a/tensorflow/python/keras/layers/wrappers.py
+++ b/tensorflow/python/keras/layers/wrappers.py

@@ -29,7 +29,7 @@
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import keras_export
 
@@ -46,7 +46,7 @@
       layer: The layer to be wrapped.
   """
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def __init__(self, layer, **kwargs):
     assert isinstance(layer, Layer)
     self.layer = layer
@@ -170,7 +170,7 @@
           '`Layer` instance. You passed: {input}'.format(input=layer))
     super(TimeDistributed, self).__init__(layer, **kwargs)
     self.supports_masking = True
-    self._track_checkpointable(layer, name='layer')
+    self._track_trackable(layer, name='layer')
 
   def _get_shape_tuple(self, init_tuple, tensor, start_idx, int_shape=None):
     """Finds non-specific dimensions in the static shapes.
@@ -386,7 +386,7 @@
   ```
   """
 
-  @checkpointable.no_automatic_dependency_tracking
+  @trackable.no_automatic_dependency_tracking
   def __init__(self, layer, merge_mode='concat', weights=None, **kwargs):
     if not isinstance(layer, Layer):
       raise ValueError(
@@ -419,8 +419,8 @@
     self._num_constants = None
     super(Bidirectional, self).__init__(layer, **kwargs)
     self.input_spec = layer.input_spec
-    self._track_checkpointable(self.forward_layer, name='forward_layer')
-    self._track_checkpointable(self.backward_layer, name='backward_layer')
+    self._track_trackable(self.forward_layer, name='forward_layer')
+    self._track_trackable(self.backward_layer, name='backward_layer')
 
   @property
   def trainable(self):

diff --git a/tensorflow/python/keras/layers/wrappers_test.py b/tensorflow/python/keras/layers/wrappers_test.py
index 8a0b265..8fa0e7b 100644
--- a/tensorflow/python/keras/layers/wrappers_test.py
+++ b/tensorflow/python/keras/layers/wrappers_test.py

@@ -23,10 +23,12 @@
 import numpy as np
 
 from tensorflow.python import keras
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.platform import test
-from tensorflow.python.training.checkpointable import util as checkpointable_util
+from tensorflow.python.training.tracking import object_identity
+from tensorflow.python.training.tracking import util as trackable_util
 
 
 class _RNNCellWithConstants(keras.layers.Layer):
@@ -87,8 +89,8 @@
     model.get_config()
 
     # check whether the model variables are present in the
-    # checkpointable list of objects
-    checkpointed_objects = set(checkpointable_util.list_objects(model))
+    # trackable list of objects
+    checkpointed_objects = set(trackable_util.list_objects(model))
     for v in model.variables:
       self.assertIn(v, checkpointed_objects)
 
@@ -302,8 +304,9 @@
         model.fit(x, y, epochs=1, batch_size=1)
 
         # check whether the model variables are present in the
-        # checkpointable list of objects
-        checkpointed_objects = set(checkpointable_util.list_objects(model))
+        # trackable list of objects
+        checkpointed_objects = object_identity.ObjectIdentitySet(
+            trackable_util.list_objects(model))
         for v in model.variables:
           self.assertIn(v, checkpointed_objects)
 
@@ -396,7 +399,6 @@
       model.compile(loss='mse', optimizer='sgd')
       model.fit(x, y, epochs=1, batch_size=1)
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_Bidirectional_merged_value(self):
     rnn = keras.layers.LSTM
     samples = 2
@@ -424,10 +426,10 @@
             rnn(units, return_sequences=True), merge_mode=merge_mode)
         f_merged = keras.backend.function([inputs], _to_list(layer(inputs)))
         f_forward = keras.backend.function([inputs],
-                                           [layer.forward_layer.call(inputs)])
+                                           [layer.forward_layer(inputs)])
         f_backward = keras.backend.function(
             [inputs],
-            [keras.backend.reverse(layer.backward_layer.call(inputs), 1)])
+            [keras.backend.reverse(layer.backward_layer(inputs), 1)])
 
         y_merged = f_merged(x)
         y_expected = _to_list(merge_func(f_forward(x)[0], f_backward(x)[0]))
@@ -441,9 +443,9 @@
             rnn(units, return_state=True), merge_mode=merge_mode)
         f_merged = keras.backend.function([inputs], layer(inputs))
         f_forward = keras.backend.function([inputs],
-                                           layer.forward_layer.call(inputs))
+                                           layer.forward_layer(inputs))
         f_backward = keras.backend.function([inputs],
-                                            layer.backward_layer.call(inputs))
+                                            layer.backward_layer(inputs))
         n_states = len(layer.layer.states)
 
         y_merged = f_merged(x)
@@ -527,8 +529,10 @@
       layer.trainable = True
       assert len(layer.trainable_weights) == 6
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_Bidirectional_updates(self):
+    if context.executing_eagerly():
+      self.skipTest('layer.updates is only available in graph mode.')
+
     with self.cached_session():
       x = keras.layers.Input(shape=(3, 2))
       x_reachable_update = x * x
@@ -556,10 +560,15 @@
       assert len(layer.losses) == 4
       assert len(layer.get_losses_for(None)) == 4
       assert not layer.get_losses_for(x)
+
+      # Create a random tensor that is not conditional on the inputs.
+      with keras.backend.get_graph().as_default():
+        const_tensor = constant_op.constant(1)
+
       layer.forward_layer.add_loss(x_reachable_loss, inputs=x)
-      layer.forward_layer.add_loss(1, inputs=None)
+      layer.forward_layer.add_loss(const_tensor, inputs=None)
       layer.backward_layer.add_loss(x_reachable_loss, inputs=x)
-      layer.backward_layer.add_loss(1, inputs=None)
+      layer.backward_layer.add_loss(const_tensor, inputs=None)
       assert len(layer.losses) == 8
       assert len(layer.get_losses_for(None)) == 6
       assert len(layer.get_losses_for(x)) == 2
@@ -696,3 +705,4 @@
 
 if __name__ == '__main__':
   test.main()
+

diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py
index 366469d..1bffcab 100644
--- a/tensorflow/python/keras/losses.py
+++ b/tensorflow/python/keras/losses.py

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# pylint: disable=unused-import
 """Built-in loss functions.
 """
 from __future__ import absolute_import
@@ -23,16 +22,14 @@
 
 import six
 
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import smart_cond
 from tensorflow.python.keras import backend as K
+from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object
 from tensorflow.python.keras.utils.generic_utils import serialize_keras_object
-from tensorflow.python.keras.utils.losses_utils import compute_weighted_loss
 from tensorflow.python.keras.utils.tf_utils import is_tensor_or_variable
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops.losses import losses_impl
@@ -57,13 +54,13 @@
   ```
 
   Args:
-    reduction: Type of `tf.losses.Reduction` to apply to loss. Default value is
-      `SUM_OVER_BATCH_SIZE`.
+    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
+      Default value is `SUM_OVER_BATCH_SIZE`.
     name: Optional name for the op.
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name=None):
     self.reduction = reduction
     self.name = name
@@ -97,7 +94,7 @@
     with ops.name_scope(scope_name, format(self.__class__.__name__),
                         (y_pred, y_true, sample_weight)):
       losses = self.call(y_true, y_pred)
-      return compute_weighted_loss(
+      return losses_utils.compute_weighted_loss(
           losses, sample_weight, reduction=self.reduction)
 
   @classmethod
@@ -133,7 +130,7 @@
   Args:
     fn: The loss function to wrap, with signature `fn(y_true, y_pred,
       **kwargs)`.
-    reduction: (Optional) Type of `tf.losses.Reduction` to apply to loss.
+    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
       Default value is `SUM_OVER_BATCH_SIZE`.
     name: (Optional) name for the loss.
     **kwargs: The keyword arguments that are passed on to `fn`.
@@ -141,7 +138,7 @@
 
   def __init__(self,
                fn,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name=None,
                **kwargs):
     super(LossFunctionWrapper, self).__init__(reduction=reduction, name=name)
@@ -186,13 +183,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.MeanSquaredError())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='mean_squared_error'):
     super(MeanSquaredError, self).__init__(
         mean_squared_error, name=name, reduction=reduction)
@@ -216,13 +213,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.MeanAbsoluteError())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='mean_absolute_error'):
     super(MeanAbsoluteError, self).__init__(
         mean_absolute_error, name=name, reduction=reduction)
@@ -246,13 +243,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.MeanAbsolutePercentageError())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='mean_absolute_percentage_error'):
     super(MeanAbsolutePercentageError, self).__init__(
         mean_absolute_percentage_error, name=name, reduction=reduction)
@@ -276,13 +273,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.MeanSquaredLogarithmicError())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='mean_squared_logarithmic_error'):
     super(MeanSquaredLogarithmicError, self).__init__(
         mean_squared_logarithmic_error, name=name, reduction=reduction)
@@ -310,7 +307,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.BinaryCrossentropy())
   ```
 
@@ -318,15 +315,15 @@
     from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
       we assume that `y_pred` encodes a probability distribution.
     label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
-    reduction: Type of `tf.losses.Reduction` to apply to loss. Default value is
-      `SUM_OVER_BATCH_SIZE`.
+    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
+      Default value is `SUM_OVER_BATCH_SIZE`.
     name: Optional name for the op.
   """
 
   def __init__(self,
                from_logits=False,
                label_smoothing=0,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='binary_crossentropy'):
     super(BinaryCrossentropy, self).__init__(
         binary_crossentropy,
@@ -363,7 +360,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.CategoricalCrossentropy())
   ```
 
@@ -374,15 +371,15 @@
       meaning the confidence on label values are relaxed. e.g.
       `label_smoothing=0.2` means that we will use a value of `0.1` for label
       `0` and `0.9` for label `1`"
-    reduction: Type of `tf.losses.Reduction` to apply to loss. Default value is
-      `SUM_OVER_BATCH_SIZE`.
+    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
+      Default value is `SUM_OVER_BATCH_SIZE`.
     name: Optional name for the op.
   """
 
   def __init__(self,
                from_logits=False,
                label_smoothing=0,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='categorical_crossentropy'):
     super(CategoricalCrossentropy, self).__init__(
         categorical_crossentropy,
@@ -420,21 +417,21 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.SparseCategoricalCrossentropy())
   ````
 
   Args:
     from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
       we assume that `y_pred` encodes a probability distribution.
-    reduction: Type of `tf.losses.Reduction` to apply to loss. Default value is
-      `SUM_OVER_BATCH_SIZE`.
+    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
+      Default value is `SUM_OVER_BATCH_SIZE`.
     name: Optional name for the op.
   """
 
   def __init__(self,
                from_logits=False,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name=None):
     super(SparseCategoricalCrossentropy, self).__init__(
         sparse_categorical_crossentropy,
@@ -453,7 +450,7 @@
   Usage:
 
   ```python
-  h = tf.losses.Hinge()
+  h = tf.keras.losses.Hinge()
   loss = h([-1., 1., 1.], [0.6, -0.7, -0.5])
 
   # loss = max(0, 1 - y_true * y_pred) = [1.6 + 1.7 + 1.5] / 3
@@ -464,13 +461,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss=tf.losses.Hinge())
+  model = tf.keras.Model(inputs, outputs)
+  model.compile('sgd', loss=tf.keras.losses.Hinge())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name=None):
     super(Hinge, self).__init__(hinge, name=name, reduction=reduction)
 
@@ -485,7 +482,7 @@
   Usage:
 
   ```python
-  sh = tf.losses.SquaredHinge()
+  sh = tf.keras.losses.SquaredHinge()
   loss = sh([-1., 1., 1.], [0.6, -0.7, -0.5])
 
   # loss = (max(0, 1 - y_true * y_pred))^2 = [1.6^2 + 1.7^2 + 1.5^2] / 3
@@ -496,13 +493,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss=tf.losses.SquaredHinge())
+  model = tf.keras.Model(inputs, outputs)
+  model.compile('sgd', loss=tf.keras.losses.SquaredHinge())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='squared_hinge'):
     super(SquaredHinge, self).__init__(
         squared_hinge, name=name, reduction=reduction)
@@ -515,7 +512,7 @@
   Usage:
 
   ```python
-  ch = tf.losses.CategoricalHinge()
+  ch = tf.keras.losses.CategoricalHinge()
   loss = ch([0., 1., 1.], [1., 0., 1.])
   print('Loss: ', loss.numpy())  # Loss: 1.0
   ```
@@ -523,13 +520,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss=tf.losses.CategoricalHinge())
+  model = tf.keras.Model(inputs, outputs)
+  model.compile('sgd', loss=tf.keras.losses.CategoricalHinge())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='categorical_hinge'):
     super(CategoricalHinge, self).__init__(
         categorical_hinge, name=name, reduction=reduction)
@@ -544,7 +541,7 @@
   Usage:
 
   ```python
-  l = tf.losses.LogLoss()
+  l = tf.keras.losses.LogLoss()
   loss = l([0., 1., 1.], [1., 0., 1.])
   print('Loss: ', loss.numpy())  # Loss: 10.745
   ```
@@ -552,13 +549,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss=tf.losses.LogLoss())
+  model = tf.keras.Model(inputs, outputs)
+  model.compile('sgd', loss=tf.keras.losses.LogLoss())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='logloss'):
     super(LogLoss, self).__init__(logloss, name=name, reduction=reduction)
 
@@ -572,7 +569,7 @@
   Usage:
 
   ```python
-  p = tf.losses.Poisson()
+  p = tf.keras.losses.Poisson()
   loss = p([1, 9, 2], [4, 8, 12])
   print('Loss: ', loss.numpy())  # Loss: -4.63
   ```
@@ -580,13 +577,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss=tf.losses.Poisson())
+  model = tf.keras.Model(inputs, outputs)
+  model.compile('sgd', loss=tf.keras.losses.Poisson())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='poisson'):
     super(Poisson, self).__init__(poisson, name=name, reduction=reduction)
 
@@ -600,7 +597,7 @@
   Usage:
 
   ```python
-  l = tf.losses.LogCosh()
+  l = tf.keras.losses.LogCosh()
   loss = l([0., 1., 1.], [1., 0., 1.])
   print('Loss: ', loss.numpy())  # Loss: 0.289
   ```
@@ -608,13 +605,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss=tf.losses.LogCosh())
+  model = tf.keras.Model(inputs, outputs)
+  model.compile('sgd', loss=tf.keras.losses.LogCosh())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='logcosh'):
     super(LogCosh, self).__init__(logcosh, name=name, reduction=reduction)
 
@@ -628,7 +625,7 @@
   Usage:
 
   ```python
-  k = tf.losses.KLDivergence()
+  k = tf.keras.losses.KLDivergence()
   loss = k([.4, .9, .2], [.5, .8, .12])
   print('Loss: ', loss.numpy())  # Loss: -0.043
   ```
@@ -636,13 +633,13 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss=tf.losses.KLDivergence())
+  model = tf.keras.Model(inputs, outputs)
+  model.compile('sgd', loss=tf.keras.losses.KLDivergence())
   ```
   """
 
   def __init__(self,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='kullback_leibler_divergence'):
     super(KLDivergence, self).__init__(
         kullback_leibler_divergence, name=name, reduction=reduction)
@@ -663,7 +660,7 @@
   Usage:
 
   ```python
-  l = tf.losses.Huber()
+  l = tf.keras.losses.Huber()
   loss = l([0., 1., 1.], [1., 0., 1.])
   print('Loss: ', loss.numpy())  # Loss: 0.333
   ```
@@ -671,21 +668,21 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss=tf.losses.Huber())
+  model = tf.keras.Model(inputs, outputs)
+  model.compile('sgd', loss=tf.keras.losses.Huber())
   ```
 
   Args:
     delta: A float, the point where the Huber loss function changes from a
       quadratic to linear.
-    reduction: Type of `tf.losses.Reduction` to apply to loss. Default value is
-      `SUM_OVER_BATCH_SIZE`.
+    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
+      Default value is `SUM_OVER_BATCH_SIZE`.
     name: Optional name for the op.
   """
 
   def __init__(self,
                delta=1.0,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name='huber_loss'):
     super(Huber, self).__init__(
         huber_loss, name=name, reduction=reduction, delta=delta)
@@ -963,7 +960,7 @@
   Usage:
 
   ```python
-  cosine_loss = tf.losses.CosineProximity()
+  cosine_loss = tf.keras.losses.CosineProximity()
   loss = cosine_loss([0., 1., 1.], [1., 0., 1.])
   print('Loss: ', loss.numpy())  # Loss: -0.5
   ```
@@ -971,21 +968,21 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss=tf.losses.CosineProximity())
+  model = tf.keras.Model(inputs, outputs)
+  model.compile('sgd', loss=tf.keras.losses.CosineProximity())
   ```
 
   Args:
     axis: (Optional) Defaults to -1. The dimension along which the cosine
       proximity is computed.
-    reduction: (Optional) Type of `tf.losses.Reduction` to apply to loss.
+    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
       Default value is `SUM_OVER_BATCH_SIZE`.
     name: Optional name for the op.
   """
 
   def __init__(self,
                axis=-1,
-               reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name=None):
     super(CosineProximity, self).__init__(reduction=reduction, name=name)
     self.axis = axis

diff --git a/tensorflow/python/keras/losses_test.py b/tensorflow/python/keras/losses_test.py
index e722090..48e4b68 100644
--- a/tensorflow/python/keras/losses_test.py
+++ b/tensorflow/python/keras/losses_test.py

@@ -27,7 +27,7 @@
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops.losses import losses_impl
+from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.platform import test
 
 try:
@@ -186,7 +186,7 @@
 
     self.assertEqual(mse_obj.name, 'mean_squared_error')
     self.assertEqual(mse_obj.reduction,
-                     losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE)
+                     losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE)
 
     y_true = constant_op.constant([[1., 9.], [2., 5.]])
     y_pred = constant_op.constant([[4., 8.], [12., 3.]])
@@ -205,9 +205,9 @@
 
   def test_config(self):
     mse_obj = keras.losses.MeanSquaredError(
-        reduction=losses_impl.ReductionV2.SUM, name='mse_1')
+        reduction=losses_utils.ReductionV2.SUM, name='mse_1')
     self.assertEqual(mse_obj.name, 'mse_1')
-    self.assertEqual(mse_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(mse_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_all_correct_unweighted(self):
     mse_obj = keras.losses.MeanSquaredError()
@@ -273,7 +273,7 @@
 
   def test_no_reduction(self):
     mse_obj = keras.losses.MeanSquaredError(
-        reduction=losses_impl.ReductionV2.NONE)
+        reduction=losses_utils.ReductionV2.NONE)
     y_true = constant_op.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
     y_pred = constant_op.constant([4, 8, 12, 8, 1, 3],
                                   shape=(2, 3),
@@ -284,7 +284,7 @@
 
   def test_sum_reduction(self):
     mse_obj = keras.losses.MeanSquaredError(
-        reduction=losses_impl.ReductionV2.SUM)
+        reduction=losses_utils.ReductionV2.SUM)
     y_true = constant_op.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
     y_pred = constant_op.constant([4, 8, 12, 8, 1, 3],
                                   shape=(2, 3),
@@ -298,9 +298,9 @@
 
   def test_config(self):
     mae_obj = keras.losses.MeanAbsoluteError(
-        reduction=losses_impl.ReductionV2.SUM, name='mae_1')
+        reduction=losses_utils.ReductionV2.SUM, name='mae_1')
     self.assertEqual(mae_obj.name, 'mae_1')
-    self.assertEqual(mae_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(mae_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_all_correct_unweighted(self):
     mae_obj = keras.losses.MeanAbsoluteError()
@@ -366,7 +366,7 @@
 
   def test_no_reduction(self):
     mae_obj = keras.losses.MeanAbsoluteError(
-        reduction=losses_impl.ReductionV2.NONE)
+        reduction=losses_utils.ReductionV2.NONE)
     y_true = constant_op.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
     y_pred = constant_op.constant([4, 8, 12, 8, 1, 3],
                                   shape=(2, 3),
@@ -377,7 +377,7 @@
 
   def test_sum_reduction(self):
     mae_obj = keras.losses.MeanAbsoluteError(
-        reduction=losses_impl.ReductionV2.SUM)
+        reduction=losses_utils.ReductionV2.SUM)
     y_true = constant_op.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
     y_pred = constant_op.constant([4, 8, 12, 8, 1, 3],
                                   shape=(2, 3),
@@ -391,9 +391,9 @@
 
   def test_config(self):
     mape_obj = keras.losses.MeanAbsolutePercentageError(
-        reduction=losses_impl.ReductionV2.SUM, name='mape_1')
+        reduction=losses_utils.ReductionV2.SUM, name='mape_1')
     self.assertEqual(mape_obj.name, 'mape_1')
-    self.assertEqual(mape_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(mape_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_unweighted(self):
     mape_obj = keras.losses.MeanAbsolutePercentageError()
@@ -448,9 +448,9 @@
 
   def test_config(self):
     msle_obj = keras.losses.MeanSquaredLogarithmicError(
-        reduction=losses_impl.ReductionV2.SUM, name='mape_1')
+        reduction=losses_utils.ReductionV2.SUM, name='mape_1')
     self.assertEqual(msle_obj.name, 'mape_1')
-    self.assertEqual(msle_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(msle_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_unweighted(self):
     msle_obj = keras.losses.MeanSquaredLogarithmicError()
@@ -522,9 +522,9 @@
 
   def test_config(self):
     cosine_obj = keras.losses.CosineProximity(
-        axis=2, reduction=losses_impl.ReductionV2.SUM, name='cosine_loss')
+        axis=2, reduction=losses_utils.ReductionV2.SUM, name='cosine_loss')
     self.assertEqual(cosine_obj.name, 'cosine_loss')
-    self.assertEqual(cosine_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(cosine_obj.reduction, losses_utils.ReductionV2.SUM)
     self.assertEqual(cosine_obj.axis, 2)
 
   def test_unweighted(self):
@@ -591,9 +591,9 @@
 
   def test_config(self):
     bce_obj = keras.losses.BinaryCrossentropy(
-        reduction=losses_impl.ReductionV2.SUM, name='bce_1')
+        reduction=losses_utils.ReductionV2.SUM, name='bce_1')
     self.assertEqual(bce_obj.name, 'bce_1')
-    self.assertEqual(bce_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(bce_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_all_correct_unweighted(self):
     y_true = constant_op.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]],
@@ -722,7 +722,7 @@
     logits = constant_op.constant([[100.0, -100.0, 100.0],
                                    [100.0, 100.0, -100.0]])
     bce_obj = keras.losses.BinaryCrossentropy(
-        from_logits=True, reduction=losses_impl.ReductionV2.NONE)
+        from_logits=True, reduction=losses_utils.ReductionV2.NONE)
     loss = bce_obj(y_true, logits)
 
     # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x)))
@@ -757,9 +757,9 @@
 
   def test_config(self):
     cce_obj = keras.losses.CategoricalCrossentropy(
-        reduction=losses_impl.ReductionV2.SUM, name='bce_1')
+        reduction=losses_utils.ReductionV2.SUM, name='bce_1')
     self.assertEqual(cce_obj.name, 'bce_1')
-    self.assertEqual(cce_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(cce_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_all_correct_unweighted(self):
     y_true = constant_op.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]],
@@ -823,7 +823,7 @@
     y_true = constant_op.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
     logits = constant_op.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
     cce_obj = keras.losses.CategoricalCrossentropy(
-        from_logits=True, reduction=losses_impl.ReductionV2.NONE)
+        from_logits=True, reduction=losses_utils.ReductionV2.NONE)
     loss = cce_obj(y_true, logits)
     self.assertAllClose((0.001822, 0.000459, 0.169846), self.evaluate(loss), 3)
 
@@ -914,7 +914,7 @@
     y_true = constant_op.constant([[0], [1], [2]])
     logits = constant_op.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
     cce_obj = keras.losses.SparseCategoricalCrossentropy(
-        from_logits=True, reduction=losses_impl.ReductionV2.NONE)
+        from_logits=True, reduction=losses_utils.ReductionV2.NONE)
     loss = cce_obj(y_true, logits)
     self.assertAllClose((0.001822, 0.000459, 0.169846), self.evaluate(loss), 3)
 
@@ -924,9 +924,9 @@
 
   def test_config(self):
     hinge_obj = keras.losses.Hinge(
-        reduction=losses_impl.ReductionV2.SUM, name='hinge_loss')
+        reduction=losses_utils.ReductionV2.SUM, name='hinge_loss')
     self.assertEqual(hinge_obj.name, 'hinge_loss')
-    self.assertEqual(hinge_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(hinge_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_unweighted(self):
     hinge_obj = keras.losses.Hinge()
@@ -1024,9 +1024,9 @@
 
   def test_config(self):
     sq_hinge_obj = keras.losses.SquaredHinge(
-        reduction=losses_impl.ReductionV2.SUM, name='sq_hinge_loss')
+        reduction=losses_utils.ReductionV2.SUM, name='sq_hinge_loss')
     self.assertEqual(sq_hinge_obj.name, 'sq_hinge_loss')
-    self.assertEqual(sq_hinge_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(sq_hinge_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_unweighted(self):
     sq_hinge_obj = keras.losses.SquaredHinge()
@@ -1133,9 +1133,9 @@
 
   def test_config(self):
     cat_hinge_obj = keras.losses.CategoricalHinge(
-        reduction=losses_impl.ReductionV2.SUM, name='cat_hinge_loss')
+        reduction=losses_utils.ReductionV2.SUM, name='cat_hinge_loss')
     self.assertEqual(cat_hinge_obj.name, 'cat_hinge_loss')
-    self.assertEqual(cat_hinge_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(cat_hinge_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_unweighted(self):
     cat_hinge_obj = keras.losses.CategoricalHinge()
@@ -1214,9 +1214,9 @@
 
   def test_config(self):
     log_loss_obj = keras.losses.LogLoss(
-        reduction=losses_impl.ReductionV2.SUM, name='log')
+        reduction=losses_utils.ReductionV2.SUM, name='log')
     self.assertEqual(log_loss_obj.name, 'log')
-    self.assertEqual(log_loss_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(log_loss_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_all_correct(self):
     self.setup()
@@ -1301,9 +1301,9 @@
 
   def test_config(self):
     logcosh_obj = keras.losses.LogCosh(
-        reduction=losses_impl.ReductionV2.SUM, name='logcosh_loss')
+        reduction=losses_utils.ReductionV2.SUM, name='logcosh_loss')
     self.assertEqual(logcosh_obj.name, 'logcosh_loss')
-    self.assertEqual(logcosh_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(logcosh_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_unweighted(self):
     self.setup()
@@ -1382,9 +1382,9 @@
 
   def test_config(self):
     poisson_obj = keras.losses.Poisson(
-        reduction=losses_impl.ReductionV2.SUM, name='poisson')
+        reduction=losses_utils.ReductionV2.SUM, name='poisson')
     self.assertEqual(poisson_obj.name, 'poisson')
-    self.assertEqual(poisson_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(poisson_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_unweighted(self):
     self.setup()
@@ -1463,9 +1463,9 @@
 
   def test_config(self):
     k_obj = keras.losses.KLDivergence(
-        reduction=losses_impl.ReductionV2.SUM, name='kld')
+        reduction=losses_utils.ReductionV2.SUM, name='kld')
     self.assertEqual(k_obj.name, 'kld')
-    self.assertEqual(k_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(k_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_unweighted(self):
     self.setup()
@@ -1553,9 +1553,9 @@
 
   def test_config(self):
     h_obj = keras.losses.Huber(
-        reduction=losses_impl.ReductionV2.SUM, name='huber')
+        reduction=losses_utils.ReductionV2.SUM, name='huber')
     self.assertEqual(h_obj.name, 'huber')
-    self.assertEqual(h_obj.reduction, losses_impl.ReductionV2.SUM)
+    self.assertEqual(h_obj.reduction, losses_utils.ReductionV2.SUM)
 
   def test_all_correct(self):
     self.setup()

diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 63ff5e6..9ceb033 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py

@@ -109,28 +109,25 @@
   Example subclass implementation:
 
   ```
-  class BinaryTruePositives(Metric):
-    def __init__(self, name='binary_true_positives', dtype=None):
-      super(BinaryTruePositives, self).__init__(name=name, dtype=dtype)
-      self.true_positives = self.add_weight(
-          'true_positives', initializer=init_ops.zeros_initializer)
+  class BinaryTruePositives(tf.keras.metrics.Metric):
+
+    def __init__(self, name='binary_true_positives'):
+      super(BinaryTruePositives, self).__init__(name=name)
+      self.true_positives = self.add_weight(name='tp', initializer='zeros')
 
     def update_state(self, y_true, y_pred, sample_weight=None):
-      y_true = math_ops.cast(y_true, dtypes.bool)
-      y_pred = math_ops.cast(y_pred, dtypes.bool)
-      y_pred, y_true, sample_weight = squeeze_or_expand_dimensions(
-          y_pred, y_true, sample_weight)
+      y_true = tf.cast(y_true, tf.bool)
+      y_pred = tf.cast(y_pred, tf.bool)
 
-      values = math_ops.logical_and(
-          math_ops.equal(y_true, True), math_ops.equal(y_pred, True))
-      values = math_ops.cast(values, self._dtype)
+      values = tf.logical_and(tf.equal(y_true, True), tf.equal(y_pred, True))
+      values = tf.cast(values, self.dtype)
       if sample_weight is not None:
-        sample_weight = math_ops.cast(sample_weight, self._dtype)
-        values = math_ops.multiply(values, sample_weight)
-      self.true_positives.assign_add(math_ops.reduce_sum(values))
+        sample_weight = tf.cast(sample_weight, self.dtype)
+        values = tf.multiply(values, sample_weight)
+      return self.true_positives.assign_add(tf.reduce_sum(values))
 
     def result(self):
-      return array_ops.identity(self.true_positives)
+      return tf.identity(self.true_positives)
   ```
   """
 
@@ -203,8 +200,7 @@
     This function is called between epochs/steps,
     when a metric is evaluated during training.
     """
-    for v in self.variables:
-      K.set_value(v, 0)
+    K.batch_set_value([(v, 0) for v in self.variables])
 
   @abc.abstractmethod
   def update_state(self, *args, **kwargs):
@@ -374,7 +370,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.add_metric(tf.keras.metrics.Sum(name='sum_1')(outputs))
   model.compile('sgd', loss='mse')
   ```
@@ -416,7 +412,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.add_metric(tf.keras.metrics.Mean(name='mean_1')(outputs))
   model.compile('sgd', loss='mse')
   ```
@@ -460,7 +456,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
     'sgd',
     loss='mse',
@@ -590,7 +586,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.Accuracy()])
   ```
   """
@@ -626,7 +622,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.BinaryAccuracy()])
   ```
   """
@@ -676,7 +672,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
     'sgd',
     loss='mse',
@@ -724,7 +720,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
       'sgd',
       loss='mse',
@@ -752,7 +748,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.TopKCategoricalAccuracy()])
   ```
   """
@@ -785,7 +781,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
     'sgd',
     metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()])
@@ -864,8 +860,8 @@
 
   def reset_states(self):
     num_thresholds = len(to_list(self.thresholds))
-    for v in self.variables:
-      K.set_value(v, np.zeros((num_thresholds,)))
+    K.batch_set_value(
+        [(v, np.zeros((num_thresholds,))) for v in self.variables])
 
   def get_config(self):
     config = {'thresholds': self.init_thresholds}
@@ -899,7 +895,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.FalsePositives()])
   ```
   """
@@ -949,7 +945,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.FalseNegatives()])
   ```
   """
@@ -999,7 +995,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.TrueNegatives()])
   ```
   """
@@ -1049,7 +1045,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.TruePositives()])
   ```
   """
@@ -1109,7 +1105,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.Precision()])
   ```
   """
@@ -1187,8 +1183,8 @@
 
   def reset_states(self):
     num_thresholds = len(to_list(self.thresholds))
-    for v in self.variables:
-      K.set_value(v, np.zeros((num_thresholds,)))
+    K.batch_set_value(
+        [(v, np.zeros((num_thresholds,))) for v in self.variables])
 
   def get_config(self):
     config = {
@@ -1235,7 +1231,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.Recall()])
   ```
   """
@@ -1313,8 +1309,8 @@
 
   def reset_states(self):
     num_thresholds = len(to_list(self.thresholds))
-    for v in self.variables:
-      K.set_value(v, np.zeros((num_thresholds,)))
+    K.batch_set_value(
+        [(v, np.zeros((num_thresholds,))) for v in self.variables])
 
   def get_config(self):
     config = {
@@ -1391,8 +1387,8 @@
 
   def reset_states(self):
     num_thresholds = len(self.thresholds)
-    for v in self.variables:
-      K.set_value(v, np.zeros((num_thresholds,)))
+    K.batch_set_value(
+        [(v, np.zeros((num_thresholds,))) for v in self.variables])
 
 
 @keras_export('keras.metrics.SensitivityAtSpecificity')
@@ -1426,7 +1422,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
       'sgd',
       loss='mse',
@@ -1507,7 +1503,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
       'sgd',
       loss='mse',
@@ -1602,7 +1598,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.AUC()])
   ```
   """
@@ -1809,8 +1805,8 @@
 
   def reset_states(self):
     num_thresholds = len(self.thresholds)
-    for v in self.variables:
-      K.set_value(v, np.zeros((num_thresholds,)))
+    K.batch_set_value(
+        [(v, np.zeros((num_thresholds,))) for v in self.variables])
 
   def get_config(self):
     config = {
@@ -1834,7 +1830,7 @@
 
   Usage:
   ```python
-  m = tf.metrics.CosineProximity()
+  m = tf.keras.metrics.CosineProximity()
   m.update_state([0, 1, 1], [1, 0, 1])
   print('Final result: ', m.result().numpy())  # Final result: -0.5
   ```
@@ -1842,11 +1838,11 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
       'sgd',
       loss='mse',
-      metrics=[tf.metrics.CosineProximity()])
+      metrics=[tf.keras.metrics.CosineProximity()])
   ```
   """
 
@@ -1871,7 +1867,7 @@
 
   Usage:
   ```python
-  m = tf.metrics.MeanAbsoluteError()
+  m = tf.keras.metrics.MeanAbsoluteError()
   m.update_state([0., 0., 1., 1.], [1., 1., 1., 0.])
   print('Final result: ', m.result().numpy())  # Final result: 0.75
   ```
@@ -1879,7 +1875,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.MeanAbsoluteError()])
   ```
   """
@@ -1907,7 +1903,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.MeanAbsolutePercentageError()])
   ```
   """
@@ -1935,7 +1931,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.MeanSquaredError()])
   ```
   """
@@ -1963,7 +1959,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.MeanSquaredLogarithmicError()])
   ```
   """
@@ -1997,7 +1993,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.Hinge()])
   ```
   """
@@ -2030,7 +2026,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.SquaredHinge()])
   ```
   """
@@ -2057,7 +2053,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.CategoricalHinge()])
   ```
   """
@@ -2081,7 +2077,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.RootMeanSquaredError()])
   ```
   """
@@ -2131,7 +2127,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.LogCoshError()])
   ```
   """
@@ -2157,7 +2153,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.Poisson()])
   ```
   """
@@ -2183,7 +2179,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', metrics=[tf.keras.metrics.KLDivergence()])
   ```
   """
@@ -2224,7 +2220,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
     'sgd',
     loss='mse',
@@ -2330,7 +2326,7 @@
   Usage:
 
   ```python
-  m = tf.metrics.MeanTensor()
+  m = tf.keras.metrics.MeanTensor()
   m.update_state([0, 1, 2, 3])
   m.update_state([4, 5, 6, 7])
   print('Result: ', m.result().numpy())  # Result: [2, 3, 4, 5]
@@ -2425,8 +2421,8 @@
 
   def reset_states(self):
     if self._built:
-      for v in self.variables:
-        K.set_value(v, np.zeros(self._shape.as_list()))
+      K.batch_set_value(
+          [(v, np.zeros(self._shape.as_list())) for v in self.variables])
 
 
 @keras_export('keras.metrics.BinaryCrossentropy')
@@ -2458,7 +2454,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
       'sgd',
       loss='mse',
@@ -2524,7 +2520,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
     'sgd',
     loss='mse',
@@ -2598,7 +2594,7 @@
   Usage with tf.keras API:
 
   ```python
-  model = keras.models.Model(inputs, outputs)
+  model = tf.keras.Model(inputs, outputs)
   model.compile(
     'sgd',
     loss='mse',

diff --git a/tensorflow/python/keras/metrics_correctness_test.py b/tensorflow/python/keras/metrics_correctness_test.py
index b2385aa..6cb3b36 100644
--- a/tensorflow/python/keras/metrics_correctness_test.py
+++ b/tensorflow/python/keras/metrics_correctness_test.py

@@ -30,7 +30,6 @@
 @keras_parameterized.run_with_all_model_types(exclude_models=['sequential'])
 @keras_parameterized.run_all_keras_modes
 class TestMetricsCorrectnessMultiIO(keras_parameterized.TestCase):
-  # TODO(psv): Remove the run_eagerly checks here when b/123082095 is fixed.
 
   def _get_multi_io_model(self):
     inp_1 = layers.Input(shape=(1,), name='input_1')
@@ -119,7 +118,9 @@
         'output_2_mean_squared_error': [7.5, 7.5],
         'output_1_weighted_mean_squared_error': [9.286, 9.286],
         'output_2_weighted_mean_squared_error': [4.375, 4.375],
-        'loss': [41.25, 41.25]
+        'loss': [41.25, 41.25],
+        'output_1_loss': [32.5, 32.5],
+        'output_2_loss': [8.75, 8.75],
     }
 
     # In the order: 'loss', 'output_1_loss', 'output_2_loss',
@@ -137,11 +138,6 @@
                         batch_size=2,
                         epochs=2,
                         shuffle=False)
-
-    if not model.run_eagerly:
-      self.expected_fit_result['output_1_loss'] = [32.5, 32.5]
-      self.expected_fit_result['output_2_loss'] = [8.75, 8.75]
-
     for key, value in self.expected_fit_result.items():
       self.assertAllClose(history.history[key], value, 1e-3)
 
@@ -153,13 +149,8 @@
                                      'output_1': self.weights_1,
                                      'output_2': self.weights_2,
                                  })
-
-    if model.run_eagerly:
-      self.expected_batch_result = [41.25, 58, 10.75, 7.5, 9.286, 7.5, 4.375]
     self.assertAllClose(eval_result, self.expected_batch_result, 1e-3)
 
-    if model.run_eagerly:
-      return
     # Verify that metric value is same with arbitrary weights and batch size.
     x = np.random.random((50, 1))
     y = np.random.random((50, 1))
@@ -191,18 +182,12 @@
     model = self._get_multi_io_model()
     history = model.fit_generator(
         self._custom_generator(), steps_per_epoch=2, epochs=2)
-
-    if not model.run_eagerly:
-      self.expected_fit_result['output_1_loss'] = [32.5, 32.5]
-      self.expected_fit_result['output_2_loss'] = [8.75, 8.75]
     for key, value in self.expected_fit_result.items():
       self.assertAllClose(history.history[key], value, 1e-3)
 
   def test_eval_generator(self):
     model = self._get_multi_io_model()
     eval_result = model.evaluate_generator(self._custom_generator(), steps=2)
-    if model.run_eagerly:
-      self.expected_batch_result = [41.25, 58, 10.75, 7.5, 9.286, 7.5, 4.375]
     self.assertAllClose(eval_result, self.expected_batch_result, 1e-3)
 
 

diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index 4143066..274b0a5 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py

@@ -37,7 +37,7 @@
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import util as trackable_utils
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -131,7 +131,7 @@
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
     m = metrics.Sum()
-    checkpoint = checkpointable_utils.Checkpoint(sum=m)
+    checkpoint = trackable_utils.Checkpoint(sum=m)
     self.evaluate(variables.variables_initializer(m.variables))
 
     # update state
@@ -149,7 +149,7 @@
 
     # restore to a different checkpoint sum object
     restore_sum = metrics.Sum()
-    restore_checkpoint = checkpointable_utils.Checkpoint(sum=restore_sum)
+    restore_checkpoint = trackable_utils.Checkpoint(sum=restore_sum)
     status = restore_checkpoint.restore(save_path)
     restore_update = restore_sum(300.)
     status.assert_consumed().run_restore_ops()
@@ -267,7 +267,7 @@
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
     m = metrics.Mean()
-    checkpoint = checkpointable_utils.Checkpoint(mean=m)
+    checkpoint = trackable_utils.Checkpoint(mean=m)
     self.evaluate(variables.variables_initializer(m.variables))
 
     # update state
@@ -285,7 +285,7 @@
 
     # restore to a different checkpoint mean object
     restore_mean = metrics.Mean()
-    restore_checkpoint = checkpointable_utils.Checkpoint(mean=restore_mean)
+    restore_checkpoint = trackable_utils.Checkpoint(mean=restore_mean)
     status = restore_checkpoint.restore(save_path)
     restore_update = restore_mean(300.)
     status.assert_consumed().run_restore_ops()

diff --git a/tensorflow/python/keras/mixed_precision/experimental/BUILD b/tensorflow/python/keras/mixed_precision/experimental/BUILD
new file mode 100644
index 0000000..f994ab9
--- /dev/null
+++ b/tensorflow/python/keras/mixed_precision/experimental/BUILD

@@ -0,0 +1,54 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# Description:
+#   Contains the Keras Mixed Precision API (TensorFlow version).
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+py_library(
+    name = "autocast_variable",
+    srcs = [
+        "autocast_variable.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python/distribute:values",
+    ],
+)
+
+py_test(
+    name = "autocast_variable_test",
+    size = "medium",
+    srcs = ["autocast_variable_test.py"],
+    deps = [
+        ":autocast_variable",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python/distribute:mirrored_strategy",
+        "//tensorflow/python/eager:context",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)

diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py
new file mode 100644
index 0000000..a64b517
--- /dev/null
+++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py

@@ -0,0 +1,178 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains AutoCastVariable, a variable which automatically casts itself."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.distribute import values as distribute_values
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+
+
+# TODO(reedwm): Make checkpointable?
+class AutoCastVariable(object):
+  """Variable that will cast itself to a different dtype in applicable contexts.
+
+  This class wraps a floating-point tf.Variable. It emulates the variable
+  interface and delegates to the wrapped variable, but it additionally will cast
+  the wrapped variable under a `Graph._enable_variable_auto_cast(dtype)` context
+  manager.
+
+  For example:
+
+  ```
+  v = tf.Variable(1.0, dtype=tf.float32)
+  v = AutoCastVariable(v)
+  print(tf.identity(v).dtype)  # tf.float32
+  with ops.get_default_graph()._enable_variable_auto_cast(tf.float16):
+    print(tf.identity(v).dtype)  # tf.float16, as v will cast itself to float16
+    print(v.dtype)  # tf.float16, as v.dtype also changes under the ctx manager.
+  ```
+
+  The purpose of this class is to allow Keras layers to create variables in
+  float32, and automatically cast them to float16 or bfloat16 when the layer is
+  called.
+  """
+
+  def __init__(self, variable):
+    """Creates an AutoCastVariable instance.
+
+    Args:
+      variable: A floating-point resource variable to wrap.
+
+    Raises:
+      ValueError: If `variable` is not a floating-point resource variable
+    """
+    if not resource_variable_ops.is_resource_variable(variable):
+      raise ValueError('variable must be of type tf.ResourceVariable, but got: '
+                       '%s' % variable)
+    if not variable.dtype.is_floating:
+      raise ValueError('variable must be a floating point variable but has '
+                       'type: %s' % variable.dtype.name)
+    self._variable = variable
+
+  @property
+  def name(self):
+    return self._variable.name
+
+  def _should_cast(self):
+    """Returns True if this variable should be casted when accessed."""
+    g = ops.get_default_graph()
+    # pylint:disable=protected-access
+    return (g._auto_cast_variable_read_dtype is not None and
+            self.true_dtype != g._auto_cast_variable_read_dtype)
+    # pylint:enable=protected-access
+
+  @property
+  def dtype(self):
+    """The dtype this variable will be casted to when read."""
+    if self._should_cast():
+      return ops.get_default_graph()._auto_cast_variable_read_dtype  # pylint:disable=protected-access
+    else:
+      return self._variable.dtype
+
+  @property
+  def true_dtype(self):
+    """The dtype of the underlying variable, before any casts are done."""
+    return self._variable.dtype
+
+  def value(self):
+    val = self._variable.value()
+    if not self._should_cast():
+      return val
+    # We colocate_with(None) to ignore the existing device constraints, so that
+    # the cast is always done on the variable's device
+    with ops.colocate_with(None, ignore_existing=True):
+      with ops.device(val.device):
+        return math_ops.cast(val, self.dtype)
+
+  def read_value(self):
+    val = self._variable.read_value()
+    if not self._should_cast():
+      return val
+    return math_ops.cast(val, self.dtype)
+
+  def sparse_read(self, indices, name=None):
+    """Reads the value of this variable sparsely, using `gather`."""
+    val = self._variable.sparse_read(indices, name=name)
+    if not self._should_cast():
+      return val
+    return math_ops.cast(val, self.dtype)
+
+  def assign(self, value, use_locking=None, name=None, read_value=True):
+    return self._variable.assign(
+        value, use_locking=use_locking, name=name, read_value=read_value)
+
+  def assign_add(self, delta, use_locking=None, name=None, read_value=True):
+    return self._variable.assign_add(
+        delta, use_locking=use_locking, name=name, read_value=read_value)
+
+  def assign_sub(self, delta, use_locking=None, name=None, read_value=True):
+    return self._variable.assign_sub(
+        delta, use_locking=use_locking, name=name, read_value=read_value)
+
+  # TODO(reedwm): Support assigning variables with tf.assign(), var.scatter_add,
+  # etc.
+
+  def __getattr__(self, name):
+    return getattr(self._variable, name)
+
+  def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False):
+    """Converts this variable to a tensor."""
+    if not self._should_cast():
+      return ops.internal_convert_to_tensor(self._variable, dtype, name,
+                                            as_ref)
+    # TODO(reedwm): Support as_ref?
+    assert not as_ref
+    if dtype is not None and not dtype.is_compatible_with(self.dtype):
+      raise ValueError(
+          'Incompatible type conversion requested to type {!r} for variable '
+          'of type {!r}'.format(dtype.name, self.dtype.name))
+    val = ops.internal_convert_to_tensor(self._variable,
+                                         self._variable.dtype, name,
+                                         as_ref=False)
+    with ops.colocate_with(None, ignore_existing=True):
+      with ops.device(val.device):
+        return math_ops.cast(val, self.dtype)
+
+  def _should_act_as_resource_variable(self):
+    """Pass resource_variable_ops.is_resource_variable check."""
+    pass
+
+  # TODO(reedwm): Define operator overloads.
+
+
+ops.register_tensor_conversion_function(
+    AutoCastVariable, AutoCastVariable._dense_var_to_tensor)  # pylint:disable=protected-access
+ops.register_dense_tensor_like_type(AutoCastVariable)
+
+
+# We have DistributedVariable subclass to pass
+# isinstance(..., DistributedVariable) checks when wrapping a
+# DistributedVariable.
+# TODO(reedwm): We should not wrap DistributedVariable, but instead have
+# DistributedVariable wrap AutoCastVariable. Subclassing DistributedVariable is
+# messy, because we do not fully implement the interface of DistributedVariable.
+class AutoCastDistributedVariable(AutoCastVariable,
+                                  distribute_values.DistributedVariable):
+  """Version of AutoCastVariable that subclasses DistributedVariable."""
+
+  def __init__(self, variable):
+    if not isinstance(variable, distribute_values.DistributedValues):
+      raise ValueError('variable must be of type DistributedValues, '
+                       'but got: %s' % variable)
+    super(AutoCastDistributedVariable, self).__init__(variable)

diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py
new file mode 100644
index 0000000..1caec6a
--- /dev/null
+++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py

@@ -0,0 +1,245 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for AutoCastVariable."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.distribute import mirrored_strategy
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.mixed_precision.experimental import autocast_variable
+
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+TESTCASES = ({
+    'testcase_name': 'base',
+    'distribute': False
+}, {
+    'testcase_name': 'distribute',
+    'distribute': True
+})
+
+
+def get_distribute_scope(distribute):
+
+  class DummyContextManager(object):
+
+    def __enter__(self):
+      pass
+
+    def __exit__(self, *args):
+      pass
+
+  if distribute:
+    return mirrored_strategy.MirroredStrategy(['cpu:0']).scope()
+  else:
+    return DummyContextManager()
+
+
+def get_autocast_var(var, distribute):
+  if distribute:
+    return autocast_variable.AutoCastDistributedVariable(var)
+  else:
+    return autocast_variable.AutoCastVariable(var)
+
+
+def get_var(val, dtype):
+  return variables.VariableV1(val, use_resource=True, dtype=dtype)
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(*TESTCASES)
+  def test_read(self, distribute):
+    with get_distribute_scope(distribute):
+      x = get_var(1., dtypes.float32)
+      x = get_autocast_var(x, distribute)
+      self.evaluate(x.initializer)
+
+      # outside of auto cast scope.
+      self.assertEqual(x.dtype, dtypes.float32)
+      self.assertEqual(x.value().dtype, dtypes.float32)
+      self.assertEqual(x.read_value().dtype, dtypes.float32)
+      self.assertEqual(array_ops.identity(x).dtype, dtypes.float32)
+
+      # within auto cast scope of different dtype
+      with ops.get_default_graph()._enable_auto_casting_variables(
+          dtypes.float16):
+        self.assertEqual(x.dtype, dtypes.float16)
+        self.assertEqual(x.value().dtype, dtypes.float16)
+        self.assertEqual(x.read_value().dtype, dtypes.float16)
+        self.assertEqual(array_ops.identity(x).dtype, dtypes.float16)
+
+      # within auto cast scope of same dtype
+      with ops.get_default_graph()._enable_auto_casting_variables(
+          dtypes.float32):
+        self.assertEqual(x.dtype, dtypes.float32)
+        self.assertEqual(x.value().dtype, dtypes.float32)
+        self.assertEqual(x.read_value().dtype, dtypes.float32)
+        self.assertEqual(array_ops.identity(x).dtype, dtypes.float32)
+
+  @parameterized.named_parameters(*TESTCASES)
+  def test_read_nested_scopes(self, distribute):
+    with get_distribute_scope(distribute):
+      x = get_var(1., dtypes.float32)
+      x = get_autocast_var(x, distribute)
+      self.evaluate(x.initializer)
+
+      with ops.get_default_graph()._enable_auto_casting_variables(
+          dtypes.float16):
+        self.assertEqual(x.dtype, dtypes.float16)
+        self.assertEqual(x.read_value().dtype, dtypes.float16)
+
+        with ops.get_default_graph()._enable_auto_casting_variables(
+            dtypes.float32):
+          self.assertEqual(x.dtype, dtypes.float32)
+          self.assertEqual(x.read_value().dtype, dtypes.float32)
+
+        self.assertEqual(x.dtype, dtypes.float16)
+        self.assertEqual(x.read_value().dtype, dtypes.float16)
+
+  @parameterized.named_parameters(*TESTCASES)
+  def test_operator_overloads(self, distribute):
+    with get_distribute_scope(distribute):
+      x = get_var(1., dtypes.float32)
+      x = get_autocast_var(x, distribute)
+      self.evaluate(x.initializer)
+
+    v1 = constant_op.constant(2., dtype=dtypes.float32)
+    v2 = constant_op.constant(2., dtype=dtypes.float16)
+
+    # Because autocast variables do not yet define operator overloads, the
+    # operator is defined by the non-variable tensor
+
+    # Test variable as the LHS. Currently, this is not supported with
+    # distributed autocast variables
+    if not distribute:
+      self.assertEqual(self.evaluate(x + v1), 3.)
+
+      with ops.get_default_graph()._enable_auto_casting_variables(
+          dtypes.float16):
+        self.assertEqual(self.evaluate(x + v2), 3.)
+
+    # Test variable as the RHS
+    self.assertEqual(self.evaluate(v1 + x), 3.)
+
+    with ops.get_default_graph()._enable_auto_casting_variables(
+        dtypes.float16):
+      self.assertEqual(self.evaluate(v2 + x), 3.)
+
+  @parameterized.named_parameters(*TESTCASES)
+  def test_assign(self, distribute):
+    with get_distribute_scope(distribute):
+      x = get_var(0., dtypes.float32)
+      x = get_autocast_var(x, distribute)
+      self.evaluate(x.initializer)
+
+      # outside of auto cast scope.
+      v1 = constant_op.constant(3.14, dtype=dtypes.float32)
+      v2 = constant_op.constant(3.14, dtype=dtypes.float16)
+
+      def run_and_check():
+        # Assign float32 values
+        self.assertAllClose(3.14, self.evaluate(x.assign(v1)))
+        self.assertAllClose(3.14 * 2, self.evaluate(x.assign_add(v1)))
+        self.assertAllClose(3.14, self.evaluate(x.assign_sub(v1)))
+
+        # Attempt to assign float16 values
+        with self.assertRaisesRegexp(
+            ValueError,
+            'conversion requested dtype float32 for Tensor with dtype float16'):
+          self.evaluate(x.assign(v2))
+        with self.assertRaisesRegexp(
+            ValueError,
+            'conversion requested dtype float32 for Tensor with dtype float16'):
+          self.evaluate(x.assign_add(v2))
+        with self.assertRaisesRegexp(
+            ValueError,
+            'conversion requested dtype float32 for Tensor with dtype float16'):
+          self.evaluate(x.assign_sub(v2))
+
+        # Assign Python floats
+        self.assertAllClose(3.14, self.evaluate(x.assign(3.14)))
+        self.assertAllClose(3.14 * 2, self.evaluate(x.assign_add(3.14)))
+        self.assertAllClose(3.14, self.evaluate(x.assign_sub(3.14)))
+
+      run_and_check()
+      # reset x
+      self.evaluate(x.assign(0.))
+      # within auto cast scope.
+      with ops.get_default_graph()._enable_auto_casting_variables(
+          dtypes.float16):
+        # assign still expect float32 value even if in float16 scope
+        run_and_check()
+
+  @parameterized.named_parameters(*TESTCASES)
+  def test_assign_stays_in_true_dtype(self, distribute):
+    with get_distribute_scope(distribute):
+      x = get_var(1., dtypes.float32)
+      x = get_autocast_var(x, distribute)
+      self.evaluate(x.initializer)
+      # small_val is a value such that 1.0 + small_val == 1.0 in fp16, but not
+      # in fp32
+      small_val = np.finfo('float16').eps / 2
+      small_tensor = constant_op.constant(small_val, dtype=dtypes.float32)
+      with ops.get_default_graph()._enable_auto_casting_variables(
+          dtypes.float16):
+        # Variable should be increased, despite it appearing to be the same
+        # float16 value.
+        self.assertEqual(1. + small_val,
+                         self.evaluate(x.assign(1. + small_tensor)))
+        self.assertEqual(1., self.evaluate(x.value()))
+      self.assertEqual(1. + small_val, self.evaluate(x.value()))
+
+      self.evaluate(x.assign(1.))
+      with ops.get_default_graph()._enable_auto_casting_variables(
+          dtypes.float16):
+        self.assertEqual(1. + small_val,
+                         self.evaluate(x.assign_add(small_tensor)))
+        self.assertEqual(1., self.evaluate(x.value()))
+      self.assertEqual(1. + small_val, self.evaluate(x.value()))
+
+  @parameterized.named_parameters(*TESTCASES)
+  def test_invalid_wrapped_variable(self, distribute):
+    with get_distribute_scope(distribute):
+      # Wrap a non-variable
+      with self.assertRaisesRegexp(ValueError, 'variable must be of type'):
+        x = constant_op.constant([1.], dtype=dtypes.float32)
+        get_autocast_var(x, distribute)
+
+      # Wrap a non-floating point variable
+      with self.assertRaisesRegexp(ValueError,
+                                   'variable must be a floating point'):
+        x = get_var(1, dtypes.int32)
+        get_autocast_var(x, distribute)
+
+    if distribute:
+      # Wrap a non-distributed variable with AutoCastDistributedVariable
+      with self.assertRaisesRegexp(ValueError, 'variable must be of type'):
+        x = get_var(1., dtypes.float32)
+        get_autocast_var(x, distribute)
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/python/keras/model_subclassing_test.py b/tensorflow/python/keras/model_subclassing_test.py
index 6d8ff9d..5220f4e 100644
--- a/tensorflow/python/keras/model_subclassing_test.py
+++ b/tensorflow/python/keras/model_subclassing_test.py

@@ -35,7 +35,7 @@
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.platform import test
-from tensorflow.python.training.checkpointable import data_structures
+from tensorflow.python.training.tracking import data_structures
 
 try:
   import h5py  # pylint:disable=g-import-not-at-top

diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py
index 9bc5aa2..4872598 100644
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py

@@ -313,15 +313,16 @@
       continue
     if isinstance(value, Layer):
       attributes_cache[name] = value
-      assert value in model._layers
-      if hasattr(value, '_layers') and value._layers:
+      assert value in model.layers
+      if hasattr(value, 'layers') and value.layers:
         raise ValueError('We do not support the use of nested layers '
                          'in `model_to_estimator` at this time. Found nested '
                          'layer: %s' % value)
     elif isinstance(
         value,
         (list, tuple)) and name not in ('layers', '_layers', 'metrics',
-                                        '_compile_stateful_metric_functions'):
+                                        '_compile_stateful_metric_functions',
+                                        '_output_loss_metrics'):
       # Handle case: list/tuple of layers (also tracked by the Network API).
       if value and all(isinstance(val, Layer) for val in value):
         raise ValueError('We do not support the use of list-of-layers '
@@ -461,7 +462,7 @@
       or functions.
     compile_clone: Boolean, whether to compile model clone (default `True`).
     in_place_reset: Boolean, whether to reset the model in place. Only used if
-      the model is not a graph network. If the model is a subclassed model, then
+      the model is a subclassed model. In the case of a subclassed model,
       this argument must be set to `True` (default `False`). To restore the
       original model, use the function
       `in_place_subclassed_model_state_restoration(model)`.
@@ -504,8 +505,8 @@
   else:
     if not in_place_reset:
       raise ValueError(
-          'Model is not a graph network (usually means that it is a subclassed '
-          'model). The model cannot be cloned, but there is a workaround where '
+          'This model is a subclassed model. '
+          'Such a model cannot be cloned, but there is a workaround where '
           'the model is reset in-place. To use this, please set the argument '
           '`in_place_reset` to `True`. This will reset the attributes in the '
           'original model. To restore the attributes, call '

diff --git a/tensorflow/python/keras/ops.py b/tensorflow/python/keras/ops.py
index 44e0228..b2d8520 100644
--- a/tensorflow/python/keras/ops.py
+++ b/tensorflow/python/keras/ops.py

@@ -20,7 +20,6 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import init_ops_v2
-from tensorflow.python.ops.losses import losses_impl
 from tensorflow.python.util.tf_export import keras_export
 
 
@@ -101,6 +100,3 @@
 
 
 keras_export("keras.backend.name_scope")(ops.name_scope)
-
-keras_export("keras.losses.Reduction", v1=[])(
-    losses_impl.ReductionV2)

diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py
index 4fa7c73..965ae86 100644
--- a/tensorflow/python/keras/optimizer_v2/adam.py
+++ b/tensorflow/python/keras/optimizer_v2/adam.py

@@ -64,7 +64,7 @@
       $$t := 0 \text{(Initialize timestep)}$$
 
       The update rule for `variable` with gradient `g` uses an optimization
-      described at the end of section2 of the paper:
+      described at the end of section 2 of the paper:
 
       $$t := t + 1$$
       $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
@@ -82,7 +82,7 @@
       $$t := 0 \text{(Initialize timestep)}$$
 
       The update rule for `variable` with gradient `g` uses an optimization
-      described at the end of section2 of the paper:
+      described at the end of section 2 of the paper:
 
       $$t := t + 1$$
       $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$

diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index b701c98..26369e9 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py

@@ -43,7 +43,7 @@
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import keras_export
 
@@ -70,7 +70,7 @@
 
 @six.add_metaclass(abc.ABCMeta)
 @keras_export("keras.optimizers.Optimizer")
-class OptimizerV2(checkpointable.Checkpointable):
+class OptimizerV2(trackable.Trackable):
   """Updated base class for optimizers.
 
   This class defines the API to add Ops to train a model.  You never use this
@@ -132,8 +132,8 @@
   you divide your loss by the global batch size, which is done automatically
   if you use a member of `tf.keras.losses` or `tf.losses`. See the
   `reduction` argument of your loss which should be set to
-  `tf.losses.Reduction.SUM_OVER_BATCH_SIZE` for averaging or
-  `tf.losses.Reduction.SUM` for not.
+  `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` for averaging or
+  `tf.keras.losses.Reduction.SUM` for not.
 
   If you are not using these and you want to average gradients, you should use
   `tf.math.reduce_sum` to add up your per-example losses and then divide by the
@@ -244,9 +244,9 @@
     self._weights = []
     self._iterations = None
 
-    # For implementing Checkpointable. Stores information about how to restore
+    # For implementing Trackable. Stores information about how to restore
     # slot variables which have not yet been created
-    # (checkpointable._CheckpointPosition objects).
+    # (trackable._CheckpointPosition objects).
     #  {slot_name :
     #      {_var_key(variable_to_train): [checkpoint_position, ... ], ... },
     #   ... }
@@ -559,7 +559,7 @@
         self._weights.append(self._iterations)
     for name, value in self._hyper.items():
       if isinstance(value, ops.Tensor) or callable(value):
-        pass
+        continue
       else:
         self._hyper[name] = self.add_weight(
             name,
@@ -648,7 +648,8 @@
     if callable(value):
       return value()
     if isinstance(value, (ops.Tensor, tf_variables.Variable,
-                          distributed_values.TPUMirroredVariable)):
+                          distributed_values.TPUMirroredVariable,
+                          distributed_values.DistributedVariable)):
       return backend.get_value(value)
     return value
 
@@ -828,7 +829,7 @@
       return x.value()
 
   # ---------------
-  # For implementing the checkpointable interface
+  # For implementing the trackable interface
   # ---------------
 
   def _restore_slot_variable(self, slot_name, variable, slot_variable):
@@ -859,8 +860,8 @@
     slot variable needs to be restored).
 
     Args:
-      slot_variable_position: A `checkpointable._CheckpointPosition` object
-        indicating the slot variable `Checkpointable` object to be restored.
+      slot_variable_position: A `trackable._CheckpointPosition` object
+        indicating the slot variable `Trackable` object to be restored.
       slot_name: The name of this `Optimizer`'s slot to restore into.
       variable: The variable object this slot is being created for.
     """
@@ -878,7 +879,7 @@
         # (aside from double initialization), and makes variable creator scopes
         # behave the same way they do when graph building.
         and not ops.get_default_graph()._variable_creator_stack):  # pylint: disable=protected-access
-      initializer = checkpointable.CheckpointInitialValue(
+      initializer = trackable.CheckpointInitialValue(
           checkpoint_position=slot_variable_position)
       slot_variable = self.add_slot(
           var=variable,

diff --git a/tensorflow/python/keras/optimizers.py b/tensorflow/python/keras/optimizers.py
index b704b88..bce5dcb 100644
--- a/tensorflow/python/keras/optimizers.py
+++ b/tensorflow/python/keras/optimizers.py

@@ -40,7 +40,7 @@
 from tensorflow.python.ops import state_ops
 from tensorflow.python.training import optimizer as tf_optimizer_module
 from tensorflow.python.training import training_util
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util.tf_export import keras_export
 
 
@@ -710,19 +710,19 @@
     return dict(list(base_config.items()) + list(config.items()))
 
 
-class TFOptimizer(Optimizer, checkpointable.Checkpointable):
+class TFOptimizer(Optimizer, trackable.Trackable):
   """Wrapper class for native TensorFlow optimizers.
   """
 
   def __init__(self, optimizer, iterations=None):  # pylint: disable=super-init-not-called
     self.optimizer = optimizer
-    self._track_checkpointable(optimizer, name='optimizer')
+    self._track_trackable(optimizer, name='optimizer')
     if iterations is None:
       with K.name_scope(self.__class__.__name__):
         self.iterations = K.variable(0, dtype='int64', name='iterations')
     else:
       self.iterations = iterations
-    self._track_checkpointable(self.iterations, name='global_step')
+    self._track_trackable(self.iterations, name='global_step')
 
   def apply_gradients(self, grads):
     self.optimizer.apply_gradients(grads, global_step=self.iterations)

diff --git a/tensorflow/python/keras/regularizers.py b/tensorflow/python/keras/regularizers.py
index b828fa9..2dabe50 100644
--- a/tensorflow/python/keras/regularizers.py
+++ b/tensorflow/python/keras/regularizers.py

@@ -54,6 +54,8 @@
     self.l2 = K.cast_to_floatx(l2)
 
   def __call__(self, x):
+    if not self.l1 and not self.l2:
+      return K.constant(0.)
     regularization = 0.
     if self.l1:
       regularization += math_ops.reduce_sum(self.l1 * math_ops.abs(x))

diff --git a/tensorflow/python/keras/regularizers_test.py b/tensorflow/python/keras/regularizers_test.py
index 3aca0c7..fb24393 100644
--- a/tensorflow/python/keras/regularizers_test.py
+++ b/tensorflow/python/keras/regularizers_test.py

@@ -67,6 +67,7 @@
   @parameterized.named_parameters([
       ('l1', keras.regularizers.l1()),
       ('l2', keras.regularizers.l2()),
+      ('l2_zero', keras.regularizers.l2(0.)),
   ])
   @test_util.deprecated_graph_mode_only
   def test_activity_regularization(self, regularizer):

diff --git a/tensorflow/python/keras/saving/hdf5_format_test.py b/tensorflow/python/keras/saving/hdf5_format_test.py
index c51eecf..534c78d 100644
--- a/tensorflow/python/keras/saving/hdf5_format_test.py
+++ b/tensorflow/python/keras/saving/hdf5_format_test.py

@@ -40,7 +40,7 @@
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import training as training_module
-from tensorflow.python.training.checkpointable import util as checkpointable
+from tensorflow.python.training.tracking import util as trackable
 
 try:
   import h5py  # pylint:disable=g-import-not-at-top
@@ -994,7 +994,7 @@
 
   @test_util.run_in_graph_and_eager_modes
   def test_incompatible_checkpoint(self):
-    save_path = checkpointable.Checkpoint().save(
+    save_path = trackable.Checkpoint().save(
         os.path.join(self.get_temp_dir(), 'ckpt'))
     m = keras.Model()
     with self.assertRaisesRegexp(AssertionError, 'Nothing to load'):

diff --git a/tensorflow/python/keras/saving/saved_model.py b/tensorflow/python/keras/saving/saved_model.py
index a614359..59781a5 100644
--- a/tensorflow/python/keras/saving/saved_model.py
+++ b/tensorflow/python/keras/saving/saved_model.py

@@ -27,6 +27,7 @@
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.saving import model_from_json
 from tensorflow.python.keras.saving import saving_utils
+from tensorflow.python.keras.utils import mode_keys
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
@@ -35,9 +36,8 @@
 from tensorflow.python.saved_model import model_utils
 from tensorflow.python.saved_model import save as save_lib
 from tensorflow.python.saved_model import utils_impl as saved_model_utils
-from tensorflow.python.training import mode_keys
 from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.training.checkpointable import graph_view
+from tensorflow.python.training.tracking import graph_view
 from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import keras_export
@@ -300,10 +300,10 @@
         # not counting optimizer objects. Optimizer objects are ignored because
         # if the model has not trained, the slot variables will not have been
         # created yet.
-        # TODO(b/113179535): Replace with checkpointable equivalence.
+        # TODO(b/113179535): Replace with trackable equivalence.
         _assert_same_non_optimizer_objects(model, model_graph, clone, g)
 
-        # TODO(b/113178242): Use value transfer for checkpointable objects.
+        # TODO(b/113178242): Use value transfer for trackable objects.
         clone.load_weights(checkpoint_path)
 
         # Add graph and variables to SavedModel.
@@ -361,14 +361,14 @@
 
 
 def _assert_same_non_optimizer_objects(model, model_graph, clone, clone_graph):  # pylint: disable=unused-argument
-  """Asserts model and clone contain the same checkpointable objects."""
+  """Asserts model and clone contain the same trackable objects."""
 
   # TODO(fchollet, kathywu): make sure this works in eager mode.
   return True
 
 
 @keras_export('keras.experimental.load_from_saved_model')
-def load_from_saved_model(saved_model_path):
+def load_from_saved_model(saved_model_path, custom_objects=None):
   """Loads a keras.Model from a SavedModel created by keras export().
 
   This function reinstantiates model state by:
@@ -397,6 +397,9 @@
 
   Args:
     saved_model_path: a string specifying the path to an existing SavedModel.
+    custom_objects: Optional dictionary mapping names
+        (strings) to custom classes or functions to be
+        considered during deserialization.
 
   Returns:
     a keras.Model instance.
@@ -407,7 +410,7 @@
       compat.as_bytes(constants.ASSETS_DIRECTORY),
       compat.as_bytes(constants.SAVED_MODEL_FILENAME_JSON))
   model_json = file_io.read_file_to_string(model_json_filepath)
-  model = model_from_json(model_json)
+  model = model_from_json(model_json, custom_objects=custom_objects)
 
   # restore model weights
   checkpoint_prefix = os.path.join(

diff --git a/tensorflow/python/keras/saving/saved_model_test.py b/tensorflow/python/keras/saving/saved_model_test.py
index 8063b8a..ab0b4e4 100644
--- a/tensorflow/python/keras/saving/saved_model_test.py
+++ b/tensorflow/python/keras/saving/saved_model_test.py

@@ -33,13 +33,12 @@
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.saving import saved_model as keras_saved_model
+from tensorflow.python.keras.utils import mode_keys
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 from tensorflow.python.saved_model import loader_impl
 from tensorflow.python.saved_model import model_utils
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.training import mode_keys
 from tensorflow.python.training import training as training_module
 
 
@@ -264,10 +263,7 @@
 
 def load_model(sess, path, mode):
   tags = model_utils.EXPORT_TAG_MAP[mode]
-  if mode == mode_keys.ModeKeys.PREDICT:
-    sig_def_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-  else:
-    sig_def_key = mode
+  sig_def_key = model_utils.SIGNATURE_KEY_MAP[mode]
 
   meta_graph_def = loader_impl.load(sess, tags, path)
   inputs = {

diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index eff0f39..fdc01d1 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py

@@ -160,7 +160,7 @@
     weights = model.get_weights()
     recovered_model.set_weights(weights)
     output = recovered_model.predict(input_data)
-    np.testing.assert_allclose(output, actual_output, rtol=1e-3)
+    np.testing.assert_allclose(output, actual_output, rtol=2e-3)
 
   # test training mode (e.g. useful for dropout tests)
   # Rebuild the model to avoid the graph being reused between predict() and
@@ -209,7 +209,7 @@
     weights = model.get_weights()
     recovered_model.set_weights(weights)
     output = recovered_model.predict(input_data)
-    np.testing.assert_allclose(output, actual_output, rtol=1e-3)
+    np.testing.assert_allclose(output, actual_output, rtol=2e-3)
 
   # for further checks in the caller function
   return actual_output

diff --git a/tensorflow/python/keras/utils/composite_tensor_support_test.py b/tensorflow/python/keras/utils/composite_tensor_support_test.py
new file mode 100644
index 0000000..67e7711
--- /dev/null
+++ b/tensorflow/python/keras/utils/composite_tensor_support_test.py

@@ -0,0 +1,133 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Keras composite tensor support."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.layers import core
+from tensorflow.python.keras.layers import Layer
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.platform import test
+
+
+# Define test-only Layer classes to validate passing Sparse and Ragged tensors
+# between layers.
+class ToDense(Layer):
+  """Create a dense (standard) tensor from the given input tensor."""
+
+  def __init__(self, default_value, **kwargs):
+    super(ToDense, self).__init__(**kwargs)
+    self._default_value = default_value
+
+  def call(self, inputs):
+    if isinstance(inputs, ragged_tensor.RaggedTensor):
+      return inputs.to_tensor(default_value=self._default_value)
+    elif isinstance(inputs, sparse_tensor.SparseTensor):
+      return sparse_ops.sparse_tensor_to_dense(
+          inputs, default_value=self._default_value)
+    elif isinstance(inputs, ops.Tensor):
+      return inputs
+    else:
+      raise TypeError("Unexpected tensor type %s" % type(inputs).__name__)
+
+
+class ToRagged(Layer):
+  """Create a ragged tensor based on a given dense tensor."""
+
+  def __init__(self, padding, ragged_rank=1, **kwargs):
+    super(ToRagged, self).__init__(**kwargs)
+    self._padding = padding
+    self._ragged_rank = ragged_rank
+
+  def call(self, inputs):
+    return ragged_tensor.RaggedTensor.from_tensor(
+        inputs, padding=self._padding, ragged_rank=self._ragged_rank)
+
+
+class ToSparse(Layer):
+  """Create a sparse tensor based on a given dense tensor."""
+
+  def call(self, inputs):
+    indices = array_ops.where(math_ops.not_equal(inputs, 0))
+    values = array_ops.gather_nd(inputs, indices)
+    shape = array_ops.shape(inputs, out_type=dtypes.int64)
+    return sparse_tensor.SparseTensor(indices, values, dense_shape=shape)
+
+
+@keras_parameterized.run_with_all_model_types
+@keras_parameterized.run_all_keras_modes
+class InternalCompositeTest(keras_parameterized.TestCase):
+
+  def test_model_with_internal_ragged_tensors(self):
+    # Create a model that accepts an input, converts it to Ragged, and
+    # converts the ragged tensor back to a dense tensor.
+    layers = [ToRagged(padding=0), ToDense(default_value=-1)]
+    model = testing_utils.get_model_from_layers(layers, input_shape=(None,))
+
+    # Define some training data with additional padding.
+    input_data = np.array([[1, 0, 0], [2, 3, 0]])
+    expected_output = np.array([[1, -1], [2, 3]])
+    output = model.predict(input_data)
+    self.assertAllEqual(expected_output, output)
+
+  def test_model_with_internal_sparse_tensors(self):
+    # Create a model that accepts an input, converts it to Sparse, and
+    # converts the sparse tensor back to a dense tensor.
+    layers = [ToSparse(), ToDense(default_value=-1)]
+    model = testing_utils.get_model_from_layers(layers, input_shape=(None,))
+
+    # Define some training data with additional padding.
+    input_data = np.array([[1, 0, 0], [2, 3, 0]])
+    expected_output = np.array([[1, -1, -1], [2, 3, -1]])
+    output = model.predict(input_data)
+    self.assertAllEqual(expected_output, output)
+
+  def test_training_model_with_internal_ragged_tensors(self):
+
+    # Create a model that implements y=Mx. This is easy to learn and will
+    # demonstrate appropriate gradient passing. (We have to use RaggedTensors
+    # for this test, as ToSparse() doesn't support gradient propagation through
+    # the layer.) TODO(b/124796939): Investigate this.
+    layers = [core.Dense(2), ToRagged(padding=0), ToDense(default_value=-1)]
+    model = testing_utils.get_model_from_layers(layers, input_shape=(1,))
+
+    input_data = np.random.rand(1024, 1)
+    expected_data = np.concatenate((input_data * 3, input_data * .5), axis=-1)
+
+    model.compile(
+        loss="mse",
+        optimizer="adam",
+        run_eagerly=testing_utils.should_run_eagerly())
+    history = model.fit(input_data, expected_data, epochs=10, verbose=0)
+
+    # If the model trained, the loss stored at history[0] should be different
+    # than the one stored at history[-1].
+    self.assertNotEqual(history.history["loss"][-1], history.history["loss"][0])
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/keras/utils/losses_utils.py b/tensorflow/python/keras/utils/losses_utils.py
index 899780e..4b37c74 100644
--- a/tensorflow/python/keras/utils/losses_utils.py
+++ b/tensorflow/python/keras/utils/losses_utils.py

@@ -27,7 +27,34 @@
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import weights_broadcast_ops
-from tensorflow.python.ops.losses import losses_impl
+from tensorflow.python.util.tf_export import keras_export
+
+
+@keras_export('keras.losses.Reduction', v1=[])
+class ReductionV2(object):
+  """Types of loss reduction.
+
+  Contains the following values:
+
+  * `NONE`: Un-reduced weighted losses with the same shape as input.
+  * `SUM`: Scalar sum of weighted losses.
+  * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
+     Note that when using `tf.distribute.Strategy`, this is the global batch
+     size across all the replicas that are contributing to a single step.
+  """
+
+  NONE = 'none'
+  SUM = 'sum'
+  SUM_OVER_BATCH_SIZE = 'sum_over_batch_size'
+
+  @classmethod
+  def all(cls):
+    return (cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE)
+
+  @classmethod
+  def validate(cls, key):
+    if key not in cls.all():
+      raise ValueError('Invalid Reduction Key %s.' % key)
 
 
 def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight):
@@ -141,14 +168,14 @@
     return math_ops.cast(array_ops.size(losses, name=scope), dtype=losses.dtype)
 
 
-def _reduce_weighted_loss(
-    weighted_losses, reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE):
+def reduce_weighted_loss(weighted_losses,
+                         reduction=ReductionV2.SUM_OVER_BATCH_SIZE):
   """Reduces the individual weighted loss measurements."""
-  if reduction == losses_impl.ReductionV2.NONE:
+  if reduction == ReductionV2.NONE:
     loss = weighted_losses
   else:
     loss = math_ops.reduce_sum(weighted_losses)
-    if reduction == losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE:
+    if reduction == ReductionV2.SUM_OVER_BATCH_SIZE:
       num_replicas = (  # Used to convert from local to global batch size.
           distribution_strategy_context.get_strategy().num_replicas_in_sync)
       loss = _safe_mean(loss, num_replicas * _num_elements(weighted_losses))
@@ -157,7 +184,7 @@
 
 def compute_weighted_loss(losses,
                           sample_weight=None,
-                          reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
+                          reduction=ReductionV2.SUM_OVER_BATCH_SIZE,
                           name=None):
   """Computes the weighted loss.
 
@@ -165,8 +192,8 @@
     losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
     sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
       `losses`, or be broadcastable to `losses`.
-    reduction: Type of `tf.losses.Reduction` to apply to loss. Default value is
-      `SUM_OVER_BATCH_SIZE`.
+    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
+      Default value is `SUM_OVER_BATCH_SIZE`.
     name: Optional name for the op.
 
   Raises:
@@ -176,7 +203,7 @@
     Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
     `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
   """
-  losses_impl.ReductionV2.validate(reduction)
+  ReductionV2.validate(reduction)
   if sample_weight is None:
     sample_weight = 1.0
   with ops.name_scope(name, 'weighted_loss', (losses, sample_weight)):
@@ -201,7 +228,16 @@
     sample_weight.get_shape().assert_is_compatible_with(losses.get_shape())
     weighted_losses = math_ops.multiply(losses, sample_weight)
     # Apply reduction function to the individual weighted losses.
-    loss = _reduce_weighted_loss(weighted_losses, reduction)
+    loss = reduce_weighted_loss(weighted_losses, reduction)
     # Convert the result back to the input type.
     loss = math_ops.cast(loss, input_dtype)
     return loss
+
+
+def scale_loss_for_distribution(loss_value):
+  """Scales and returns the given loss value by the number of replicas."""
+  num_replicas = (
+      distribution_strategy_context.get_strategy().num_replicas_in_sync)
+  if num_replicas > 1:
+    loss_value *= (1. / num_replicas)
+  return loss_value

diff --git a/tensorflow/python/keras/utils/mode_keys.py b/tensorflow/python/keras/utils/mode_keys.py
new file mode 100644
index 0000000..fb6fc3e
--- /dev/null
+++ b/tensorflow/python/keras/utils/mode_keys.py

@@ -0,0 +1,22 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keras model mode constants."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=unused-import
+from tensorflow.python.saved_model.model_utils.mode_keys import KerasModeKeys as ModeKeys
+# pylint: enable=unused-import

diff --git a/tensorflow/python/keras/utils/tf_utils.py b/tensorflow/python/keras/utils/tf_utils.py
index 387fbf5..1c1d30b 100644
--- a/tensorflow/python/keras/utils/tf_utils.py
+++ b/tensorflow/python/keras/utils/tf_utils.py

@@ -20,9 +20,9 @@
 import six
 
 from tensorflow.python.eager import context
+from tensorflow.python.framework import composite_tensor
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import smart_cond as smart_module
-from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import control_flow_ops
@@ -313,7 +313,9 @@
   """
   if isinstance(tensor, variables.Variable):
     return not context.executing_eagerly()
-  if isinstance(tensor, (ops.Tensor, sparse_tensor.SparseTensor)):
+  if isinstance(tensor, composite_tensor.CompositeTensor):
+    return tensor._is_graph_tensor  # pylint: disable=protected-access
+  if isinstance(tensor, ops.Tensor):
     return hasattr(tensor, 'graph')
   if isinstance(tensor, tuple(_user_convertible_tensor_types)):
     return hasattr(ops.convert_to_tensor(tensor), 'graph')

diff --git a/tensorflow/python/keras/utils/tf_utils_test.py b/tensorflow/python/keras/utils/tf_utils_test.py
index 9c478af..902ecf9 100644
--- a/tensorflow/python/keras/utils/tf_utils_test.py
+++ b/tensorflow/python/keras/utils/tf_utils_test.py

@@ -89,6 +89,10 @@
         self._input = input_
         self.value = ops.convert_to_tensor(42.)
 
+      @property
+      def dtype(self):
+        return self.value.dtype
+
     ops.register_tensor_conversion_function(
         Foo, lambda x, *args, **kwargs: x.value)
     tf_utils.register_symbolic_tensor_type(Foo)
@@ -128,6 +132,16 @@
     # `Tensor`.
     y = model(ops.convert_to_tensor(7.))
     self.assertIsInstance(y, Foo)
+    # Confirm that (custom) loss sees `Foo` instance, not Tensor.
+    obtained_prediction_box = [None]
+    def custom_loss(y_obs, y_pred):
+      del y_obs
+      obtained_prediction_box[0] = y_pred
+      return y_pred
+    # Apparently `compile` calls the loss function enough to trigger the
+    # side-effect.
+    model.compile('SGD', loss=custom_loss)
+    self.assertIsInstance(obtained_prediction_box[0], Foo)
 
 
 class ConvertInnerNodeDataTest(test.TestCase):

diff --git a/tensorflow/python/keras/utils/vis_utils.py b/tensorflow/python/keras/utils/vis_utils.py
index 9394cc1..d396851 100644
--- a/tensorflow/python/keras/utils/vis_utils.py
+++ b/tensorflow/python/keras/utils/vis_utils.py

@@ -145,6 +145,10 @@
           a string specifying the format of the plot:
           'TB' creates a vertical plot;
           'LR' creates a horizontal plot.
+
+  Returns:
+      A Jupyter notebook Image object if Jupyter is installed.
+      This enables in-line display of the model plots in notebooks.
   """
   dot = model_to_dot(model, show_shapes, show_layer_names, rankdir)
   _, extension = os.path.splitext(to_file)
@@ -152,4 +156,13 @@
     extension = 'png'
   else:
     extension = extension[1:]
+  # Save image to disk.
   dot.write(to_file, format=extension)
+  # Return the image as a Jupyter Image object, to be displayed in-line.
+  # Note that we cannot easily detect whether the code is running in a
+  # notebook, and thus we always return the Image if Jupyter is available.
+  try:
+    from IPython import display
+    return display.Image(filename=to_file)
+  except ImportError:
+    pass

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 60126bb..a883b40 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD

@@ -131,6 +131,7 @@
         "//tensorflow/python:client_testlib",
     ],
     grpc_enabled = True,
+    tags = ["no_rocm"],
     xla_enable_strict_auto_jit = True,
 )
 
@@ -1150,6 +1151,19 @@
 )
 
 tf_py_test(
+    name = "tridiagonal_solve_op_test",
+    size = "medium",
+    srcs = ["tridiagonal_solve_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:linalg_ops",
+    ],
+    shard_count = 5,
+)
+
+tf_py_test(
     name = "unicode_script_op_test",
     size = "small",
     srcs = ["unicode_script_op_test.py"],
@@ -1675,6 +1689,21 @@
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:nn_ops",
     ],
+    tags = ["no_rocm"],
+    xla_enable_strict_auto_jit = True,
+)
+
+cuda_py_test(
+    name = "conv1d_transpose_test",
+    size = "small",
+    srcs = ["conv1d_transpose_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:nn_ops",
+    ],
     xla_enable_strict_auto_jit = True,
 )
 
@@ -1837,7 +1866,7 @@
 
 cuda_py_test(
     name = "functional_ops_test",
-    size = "small",
+    size = "medium",
     srcs = ["functional_ops_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -2172,7 +2201,7 @@
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:math_ops",
     ],
-    shard_count = 4,
+    shard_count = 6,
     tags = ["no_windows_gpu"],
     xla_enable_strict_auto_jit = True,
 )
@@ -2188,12 +2217,12 @@
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:math_ops",
     ],
+    shard_count = 3,
     tags = [
         "manual",
         "no_gpu",
         "nogpu",
         "noguitar",
-        "notap",
     ],
     xla_enable_strict_auto_jit = True,
 )
@@ -2578,6 +2607,7 @@
         "//tensorflow/python/eager:context",
     ],
     flaky = 1,  # create_local_cluster sometimes times out.
+    tags = ["no_rocm"],
     xla_enable_strict_auto_jit = True,
 )
 
@@ -2737,6 +2767,7 @@
     ],
     shard_count = 2,
     tags = [
+        "no_rocm",
         "optonly",  # flaky timeouts unless optimized
     ],
     xla_enable_strict_auto_jit = True,
@@ -2778,6 +2809,7 @@
     ],
     shard_count = 4,
     tags = [
+        "no_rocm",
         "optonly",  # times out
     ],
     xla_enable_strict_auto_jit = True,
@@ -2841,6 +2873,7 @@
         "//tensorflow/python:nn_grad",
         "//tensorflow/python:nn_ops",
     ],
+    tags = ["no_rocm"],
     xla_enable_strict_auto_jit = True,
 )
 
@@ -2860,6 +2893,7 @@
         "//tensorflow/python:nn_ops_gen",
     ],
     shard_count = 4,
+    tags = ["no_rocm"],
     xla_enable_strict_auto_jit = True,
 )
 
@@ -2894,6 +2928,34 @@
 )
 
 cuda_py_test(
+    name = "rnn_cell_test",
+    size = "medium",
+    srcs = ["rnn_cell_test.py"],
+    additional_deps = [
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:rnn",
+        "//tensorflow/python:rnn_cell",
+        "//tensorflow/python:tensor_array_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+    ],
+    shard_count = 10,
+    xla_enable_strict_auto_jit = True,
+)
+
+cuda_py_test(
     name = "scatter_ops_test",
     size = "medium",  # NOTE: This is not run by default.
     srcs = ["scatter_ops_test.py"],
@@ -3043,6 +3105,7 @@
         "//tensorflow/python:util",
         "//tensorflow/python:data_flow_ops",
     ],
+    tags = ["no_oss"],  # b/124474135
     xla_enable_strict_auto_jit = True,
 )
 
@@ -3091,6 +3154,7 @@
         "//tensorflow/python:nn_ops",
     ],
     shard_count = 30,
+    tags = ["no_rocm"],
     xla_enable_strict_auto_jit = True,
 )
 
@@ -3229,8 +3293,11 @@
     ],
     data = ["//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files"],
     shard_count = 20,
-    tags = ["no_windows"],
-    xla_enable_strict_auto_jit = True,
+    tags = [
+        "no_rocm",  # flaky test
+        "no_windows",
+    ],
+    # TODO(kuny): Add xla_enable_strict_auto_jit = True after b/124377352 is fixed.
 )
 
 cuda_py_test(
@@ -3578,3 +3645,24 @@
     grpc_enabled = True,
     xla_enable_strict_auto_jit = True,
 )
+
+cuda_py_test(
+    name = "critical_section_test",
+    size = "medium",
+    srcs = ["critical_section_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:prefetching_ops",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:critical_section_ops",
+        "//tensorflow/python:tensor_array_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/eager:context",
+    ],
+)

diff --git a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
index 6b04e8a..e741930 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py

@@ -897,11 +897,36 @@
       self.assertAllClose(expected_logits, logits)
 
   @test_util.run_deprecated_v1
+  def testPredictionOnEmptyEnsembleMultiClass(self):
+    """Tests that prediction on empty ensemble does not fail for multiclass."""
+    with self.cached_session() as session:
+      # Create an empty ensemble.
+      tree_ensemble = boosted_trees_ops.TreeEnsemble(
+          'ensemble', serialized_proto='')
+      tree_ensemble_handle = tree_ensemble.resource_handle
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      feature_0_values = [36, 32]
+      feature_1_values = [11, 27]
+      logits_dimension = 2
+      expected_logits = [[0.0, 0.0], [0.0, 0.0]]
+
+      # Prediction should work fine.
+      predict_op = boosted_trees_ops.predict(
+          tree_ensemble_handle,
+          bucketized_features=[feature_0_values, feature_1_values],
+          logits_dimension=logits_dimension)
+
+      logits = session.run(predict_op)
+      self.assertAllClose(expected_logits, logits)
+
+  @test_util.run_deprecated_v1
   def testPredictionMultipleTree(self):
     """Tests the predictions work when we have multiple trees."""
     with self.cached_session() as session:
       tree_ensemble_config = boosted_trees_pb2.TreeEnsemble()
-      text_format.Merge("""
+      text_format.Merge(
+          """
         trees {
           nodes {
             bucketized_split {
@@ -1008,6 +1033,158 @@
       self.assertAllClose(expected_logits, logits)
 
   @test_util.run_deprecated_v1
+  def testPredictionMultipleTreeMultiClass(self):
+    """Tests the predictions work when we have multiple trees."""
+    with self.cached_session() as session:
+      tree_ensemble_config = boosted_trees_pb2.TreeEnsemble()
+      text_format.Merge(
+          """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              threshold: 28
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 7.62
+            }
+          }
+          nodes {
+            leaf {
+              vector: {
+                value: 0.51
+              }
+              vector: {
+                value: 1.14
+              }
+            }
+          }
+          nodes {
+            leaf {
+              vector: {
+                value: 1.29
+              }
+              vector: {
+                value: 8.79
+              }
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              threshold: 26
+              left_id: 1
+              right_id: 2
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 50
+              left_id: 3
+              right_id: 4
+            }
+          }
+          nodes {
+            leaf {
+              vector: {
+                value: -4.33
+              }
+              vector: {
+                value: 7.0
+              }
+            }
+          }
+          nodes {
+            leaf {
+              vector: {
+                value: 0.2
+              }
+              vector: {
+                value: 5.0
+              }
+            }
+          }
+          nodes {
+            leaf {
+              vector: {
+                value: -4.1
+              }
+              vector: {
+                value: 6.0
+              }
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 34
+              left_id: 1
+              right_id: 2
+            }
+          }
+          nodes {
+            leaf {
+              vector: {
+                value: 2.0
+              }
+              vector: {
+                value: -7.0
+              }
+            }
+          }
+          nodes {
+            leaf {
+              vector: {
+                value: 6.3
+              }
+              vector: {
+                value: 5.0
+              }
+            }
+          }
+        }
+        tree_weights: 0.1
+        tree_weights: 0.2
+        tree_weights: 1.0
+      """, tree_ensemble_config)
+
+      # Create existing ensemble with one root split
+      tree_ensemble = boosted_trees_ops.TreeEnsemble(
+          'ensemble', serialized_proto=tree_ensemble_config.SerializeToString())
+      tree_ensemble_handle = tree_ensemble.resource_handle
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      feature_0_values = [36, 32]
+      feature_1_values = [11, 27]
+
+      # Example 1: tree 0: (0.51, 1.14), tree 1: (0.2, 5.0), tree 2: (6.3, 5.0)
+      #
+      #            logits = (0.1*0.51+0.2*0.2+1*6.3,
+      #                      0.1*1.14+0.2*5.0+1*5)
+      # Example 2: tree 0: (0.51, 1.14), tree 1: (-4.33, 7.0), tree 2: (2.0, -7)
+      #
+      #            logits = (0.1*0.51+0.2*-4.33+1*2.0,
+      #                      0.1*1.14+0.2*7.0+1*-7)
+      logits_dimension = 2
+      expected_logits = [[6.391, 6.114], [1.185, -5.486]]
+
+      # Prediction should work fine.
+      predict_op = boosted_trees_ops.predict(
+          tree_ensemble_handle,
+          bucketized_features=[feature_0_values, feature_1_values],
+          logits_dimension=logits_dimension)
+
+      logits = session.run(predict_op)
+      self.assertAllClose(expected_logits, logits)
+
+  @test_util.run_deprecated_v1
   def testCategoricalSplits(self):
     """Tests the predictions work for categorical splits."""
     with self.cached_session() as session:

diff --git a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py
index b9eb239..3c5433c 100644
--- a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py
+++ b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py

@@ -67,6 +67,36 @@
           self.assertAllEqual(v_tf.eval(), v_np)
 
   @test_util.run_deprecated_v1
+  def testBroadcastToShapeInnerDim(self):
+    input_shape = [2, 1, 3]
+    output_shape = [2, 5, 3]
+    with self.cached_session(use_gpu=True):
+      x = np.array(np.random.randint(5, size=input_shape), dtype=np.int32)
+      v_tf = array_ops.broadcast_to(constant_op.constant(x), output_shape)
+      v_np = np.broadcast_to(x, output_shape)
+      self.assertAllEqual(v_tf.eval(), v_np)
+
+  @test_util.run_deprecated_v1
+  def testBroadcastToShapeLargerDim(self):
+    input_shape = [2, 1, 3, 2, 2, 2]
+    output_shape = [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 15, 3, 2, 2, 2]
+    with self.cached_session(use_gpu=True):
+      x = np.array(np.random.randint(5, size=input_shape), dtype=np.int32)
+      v_tf = array_ops.broadcast_to(constant_op.constant(x), output_shape)
+      v_np = np.broadcast_to(x, output_shape)
+      self.assertAllEqual(v_tf.eval(), v_np)
+
+  @test_util.run_deprecated_v1
+  def testBroadcastToShapeLargerDim2(self):
+    input_shape = [2, 1, 3, 2, 2, 2, 1, 1, 1]
+    output_shape = [1, 1, 1, 2, 5, 3, 2, 2, 2, 3, 3, 3]
+    with self.cached_session(use_gpu=True):
+      x = np.array(np.random.randint(5, size=input_shape), dtype=np.int32)
+      v_tf = array_ops.broadcast_to(constant_op.constant(x), output_shape)
+      v_np = np.broadcast_to(x, output_shape)
+      self.assertAllEqual(v_tf.eval(), v_np)
+
+  @test_util.run_deprecated_v1
   def testBroadcastToScalar(self):
     with self.session(use_gpu=True):
       x = np.array(1, dtype=np.int32)
@@ -78,8 +108,9 @@
   def testBroadcastScalarToNonScalar(self):
     with self.session(use_gpu=True):
       x = np.array(1.0, dtype=np.float)
-      v_tf = array_ops.broadcast_to(constant_op.constant(1.0), [2, 3, 4])
-      v_np = np.broadcast_to(x, [2, 3, 4])
+      v_tf = array_ops.broadcast_to(constant_op.constant(1.0), [2, 3, 4,
+                                                                1, 1, 1])
+      v_np = np.broadcast_to(x, [2, 3, 4, 1, 1, 1])
       self.assertAllEqual(v_tf.eval(), v_np)
 
   @test_util.run_deprecated_v1
@@ -130,14 +161,26 @@
 
   @test_util.run_deprecated_v1
   def testGradientWithBroadcastAllDimensions(self):
-    x = constant_op.constant([[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32)
-    v = array_ops.broadcast_to(x, [5, 4, 6])
+    x = constant_op.constant([1], dtype=dtypes.float32)
+    v = array_ops.broadcast_to(x, [5, 2, 3])
     out = 2 * v
     with self.cached_session():
       err = gradient_checker.compute_gradient_error(x, x.get_shape(),
                                                     out, out.get_shape())
     self.assertLess(err, 1e-4)
 
+  @test_util.run_deprecated_v1
+  def testGradientWithLargeDim(self):
+    input_shape = [2, 1, 3, 2, 2, 2, 1, 1, 1]
+    output_shape = [1, 1, 1, 2, 5, 3, 2, 2, 2, 3, 3, 3]
+    x = constant_op.constant(np.array(np.random.randn(*input_shape),
+                                      dtype=np.float32))
+    v = array_ops.broadcast_to(x, output_shape)
+    out = 2 * v
+    with self.cached_session():
+      err = gradient_checker.compute_gradient_error(x, x.get_shape(),
+                                                    out, out.get_shape())
+    self.assertLess(err, 1e-4)
 
 if __name__ == "__main__":
   test_lib.main()

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index a073bb3..982ead7 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py

@@ -654,6 +654,27 @@
       r = control_flow_ops.cond(pred, fn1, fn2)
       self.assertAllEqual([11, 12], self.evaluate(r))
 
+  @test_util.run_gpu_only
+  @test_util.run_deprecated_v1
+  def testCond_Device(self):
+    x = constant_op.constant(-10)
+
+    # True branch function defined outside of device scope
+    def true_fn():
+      return math_ops.exp(x)
+
+    with ops.device("CPU:0"):
+      r = control_flow_ops.cond(
+          constant_op.constant(True), true_fn, lambda: 0.)
+      self.assertIn("cpu", r.device.lower())
+
+    with session.Session() as sess:
+      options = config_pb2.RunOptions(output_partition_graphs=True)
+      run_metadata = config_pb2.RunMetadata()
+      sess.run(r, options=options, run_metadata=run_metadata)
+      # We expect that everything runs on CPU, even if GPU is available.
+      self.assertEqual(len(run_metadata.partition_graphs), 1)
+
   def testCondListOutput(self):
     with self.cached_session() as sess:
       x = constant_op.constant(10)
@@ -1017,6 +1038,31 @@
     r = control_flow_ops.cond(foo()[1], lambda: 1.0, lambda: 2.0)
     self.assertEqual(self.evaluate(r), 1.0)
 
+  @test_util.run_v1_only("Tests Session.run() pruning logic.")
+  def testCondFeedConstantPredicate(self):
+    with self.cached_session() as sess:
+      value = constant_op.constant(37.0)
+      predicate = constant_op.constant(True)
+      cond_output = control_flow_ops.cond(
+          predicate, lambda: constant_op.constant(0.0), lambda: value)
+      result = array_ops.identity(cond_output)
+      self.assertEqual(37.0, sess.run(result, feed_dict={predicate: False}))
+      self.assertEqual(0.0, sess.run(result, feed_dict={predicate: True}))
+      self.assertEqual(0.0, sess.run(result))
+
+  @test_util.run_v1_only("Tests Session.run() pruning logic.")
+  def testCondFeedPlaceholderWithDefaultPredicate(self):
+    with self.cached_session() as sess:
+      value = constant_op.constant(37.0)
+      predicate = array_ops.placeholder_with_default(
+          constant_op.constant(True), [])
+      cond_output = control_flow_ops.cond(
+          predicate, lambda: constant_op.constant(0.0), lambda: value)
+      result = array_ops.identity(cond_output)
+      self.assertAllEqual(37.0, sess.run(result, feed_dict={predicate: False}))
+      self.assertAllEqual(0.0, sess.run(result, feed_dict={predicate: True}))
+      self.assertAllEqual(0.0, sess.run(result))
+
   @test_util.run_in_graph_and_eager_modes
   def testCondAutoControlDeps(self):
 
@@ -1503,6 +1549,26 @@
       result = r[2]
     self.assertAllEqual(np.array([0, 1, 2, 3, 4, 5, 6]), result)
 
+  @test_util.run_gpu_only
+  @test_util.run_deprecated_v1
+  def testWhile_Device(self):
+
+    # Body function defined outside of device scope
+    def body(x):
+      return math_ops.exp(x)
+
+    with ops.device("CPU:0"):
+      r = control_flow_ops.while_loop(
+          lambda x: x < 10, body, [constant_op.constant(-10.)])
+      self.assertIn("cpu", r.device.lower())
+
+    with session.Session() as sess:
+      options = config_pb2.RunOptions(output_partition_graphs=True)
+      run_metadata = config_pb2.RunMetadata()
+      sess.run(r, options=options, run_metadata=run_metadata)
+      # We expect that everything runs on CPU, even if GPU is available.
+      self.assertEqual(len(run_metadata.partition_graphs), 1)
+
   @test_util.disable_control_flow_v2("b/116338794 (buffer_reuse)")
   @test_util.run_v1_only("b/120545219")
   def testBufferForwarding(self):
@@ -1744,6 +1810,8 @@
 
   @test_util.disable_control_flow_v2("b/116328420 (RaggedTensor)")
   def testWhileShapeInferenceRaggedTensor(self):
+    if context.executing_eagerly():
+      self.skipTest("b/116328420")
     i = constant_op.constant(0)
     x = ragged_factory_ops.constant([[1, 2], [3], [4, 5, 6]])
     c = lambda i, _: i < 10
@@ -1787,6 +1855,8 @@
 
   @test_util.disable_control_flow_v2("b/116328420 (RaggedTensor)")
   def testWhileShapeInferenceRaggedTensorRaggedRank2(self):
+    if context.executing_eagerly():
+      self.skipTest("b/116328420")
     i = constant_op.constant(0)
     x = ragged_factory_ops.constant([[[1, 2], [3], [4, 5, 6]],
                                      [[], [8, 9, 10]]])

diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index e846332..4b44bb6 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py

@@ -68,7 +68,7 @@
       f = constant_op.constant(
           1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
       output = nn_ops.conv1d_transpose(
-          x, f, y_shape, stride=stride, padding="VALID")
+          x, f, y_shape, strides=stride, padding="VALID")
       value = self.evaluate(output)
 
       cache_values = np.zeros(y_shape, dtype=np.float32)

diff --git a/tensorflow/python/kernel_tests/conv1d_transpose_test.py b/tensorflow/python/kernel_tests/conv1d_transpose_test.py
new file mode 100644
index 0000000..02ac5af
--- /dev/null
+++ b/tensorflow/python/kernel_tests/conv1d_transpose_test.py

@@ -0,0 +1,260 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for convolution related functionality in tensorflow.ops.nn."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import nn_ops
+import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
+from tensorflow.python.platform import test
+
+
+class Conv1DTransposeTest(test.TestCase):
+
+  def testConv1DTransposeSingleStride(self):
+    with self.cached_session():
+      strides = [1, 1, 1]
+
+      # Input, output: [batch, width, depth]
+      x_shape = [2, 6, 3]
+      y_shape = [2, 6, 2]
+
+      # Filter: [kernel_width, output_depth, input_depth]
+      f_shape = [3, 2, 3]
+
+      x = constant_op.constant(
+          1.0, shape=x_shape, name="x", dtype=dtypes.float32)
+      f = constant_op.constant(
+          1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
+      output = nn_ops.conv1d_transpose(
+          x, f, y_shape, strides=strides, padding="SAME")
+      value = self.evaluate(output)
+
+      for n in xrange(y_shape[0]):
+        for w in xrange(y_shape[1]):
+          for c in xrange(y_shape[2]):
+            target = 2 * 3.0
+            w_in = w > 0 and w < y_shape[1] - 1
+            if w_in:
+              target += 3.0
+            self.assertAllClose(target, value[n, w, c])
+
+  def testConv1DTransposeSame(self):
+    with self.cached_session():
+      strides = [1, 2, 1]
+
+      # Input, output: [batch, width, depth]
+      x_shape = [2, 4, 3]
+      y_shape = [2, 8, 2]
+
+      # Filter: [kernel_width, output_depth, input_depth]
+      f_shape = [3, 2, 3]
+
+      x = constant_op.constant(
+          1.0, shape=x_shape, name="x", dtype=dtypes.float32)
+      f = constant_op.constant(
+          1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
+      output = nn_ops.conv1d_transpose(
+          x, f, y_shape, strides=strides, padding="SAME")
+      value = self.evaluate(output)
+
+      for n in xrange(x_shape[0]):
+        for k in xrange(f_shape[1]):
+          for w in xrange(y_shape[1]):
+            target = 3.0
+            # We add a case for locations divisible by the stride.
+            w_in = w % strides[1] == 0 and w > 0 and w < y_shape[1] - 1
+            if w_in:
+              target += 3.0
+            self.assertAllClose(target, value[n, w, k])
+
+  def testConv1DTransposeValid(self):
+    with self.cached_session():
+      strides = [1, 2, 1]
+
+      # Input, output: [batch, width, depth]
+      x_shape = [2, 4, 3]
+      y_shape = [2, 9, 2]
+
+      # Filter: [kernel_width, output_depth, input_depth]
+      f_shape = [3, 2, 3]
+
+      x = constant_op.constant(
+          1.0, shape=x_shape, name="x", dtype=dtypes.float32)
+      f = constant_op.constant(
+          1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
+      output = nn_ops.conv1d_transpose(
+          x, f, y_shape, strides=strides, padding="VALID")
+      value = self.evaluate(output)
+
+      cache_values = np.zeros(y_shape, dtype=np.float32)
+
+      # The amount of padding added
+      pad = 1
+
+      for n in xrange(x_shape[0]):
+        for k in xrange(f_shape[1]):
+          for w in xrange(pad, y_shape[1] - pad):
+            target = 3.0
+            # We add a case for locations divisible by the stride.
+            w_in = w % strides[1] == 0 and w > pad and w < y_shape[1] - 1 - pad
+            if w_in:
+              target += 3.0
+            cache_values[n, w, k] = target
+
+          # copy values in the border
+          cache_values[n, 0, k] = cache_values[n, 1, k]
+          cache_values[n, -1, k] = cache_values[n, -2, k]
+          cache_values[n, :, k] = cache_values[n, :, k]
+
+    self.assertAllClose(cache_values, value)
+
+  @test_util.run_deprecated_v1
+  def testGradient(self):
+    x_shape = [2, 4, 3]
+    f_shape = [3, 2, 3]
+    y_shape = [2, 8, 2]
+    strides = [1, 2, 1]
+    np.random.seed(1)  # Make it reproducible.
+    x_val = np.random.random_sample(x_shape).astype(np.float64)
+    f_val = np.random.random_sample(f_shape).astype(np.float64)
+    with self.cached_session():
+      x = constant_op.constant(x_val, name="x", dtype=dtypes.float32)
+      f = constant_op.constant(f_val, name="f", dtype=dtypes.float32)
+      output = nn_ops.conv1d_transpose(
+          x, f, y_shape, strides=strides, padding="SAME")
+      err = gradient_checker.compute_gradient_error([x, f], [x_shape, f_shape],
+                                                    output, y_shape)
+    print("conv1d_transpose gradient err = %g " % err)
+    err_tolerance = 0.0005
+    self.assertLess(err, err_tolerance)
+
+  def testConv1DTransposeSingleStrideNCW(self):
+    # `NCW` data format is only supported for CUDA device.
+    if test.is_gpu_available(cuda_only=True):
+      with self.session(use_gpu=True):
+        strides = [1, 1, 1]
+
+        # Input, output: [batch, depth, width]
+        x_shape = [2, 3, 4]
+        y_shape = [2, 2, 4]
+
+        # Filter: [kernel_width, output_depth, input_depth]
+        f_shape = [3, 2, 3]
+
+        x = constant_op.constant(
+            1.0, shape=x_shape, name="x", dtype=dtypes.float32)
+        f = constant_op.constant(
+            1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
+
+        output = nn_ops.conv1d_transpose(
+            x, f, y_shape, strides=strides, padding="SAME", data_format="NCW")
+
+        value = self.evaluate(output)
+        for n in xrange(x_shape[0]):
+          for k in xrange(f_shape[1]):
+            for w in xrange(y_shape[2]):
+              target = 2 * 3.0
+              w_in = w > 0 and w < y_shape[2] - 1
+              if w_in:
+                target += 3.0
+              self.assertAllClose(target, value[n, k, w])
+
+  def testConv1DTransposeSameNCW(self):
+    # `NCW` data format is only supported for CUDA device.
+    if test.is_gpu_available(cuda_only=True):
+      with self.session(use_gpu=True):
+        strides = [1, 1, 2]
+
+        # Input, output: [batch, depth, width]
+        x_shape = [2, 3, 4]
+        y_shape = [2, 2, 8]
+
+        # Filter: [kernel_width, output_depth, input_depth]
+        f_shape = [3, 2, 3]
+
+        x = constant_op.constant(
+            1.0, shape=x_shape, name="x", dtype=dtypes.float32)
+        f = constant_op.constant(
+            1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
+
+        output = nn_ops.conv1d_transpose(
+            x, f, y_shape, strides=strides, padding="SAME", data_format="NCW")
+
+        value = self.evaluate(output)
+        for n in xrange(x_shape[0]):
+          for k in xrange(f_shape[1]):
+            for w in xrange(y_shape[2]):
+              target = 3.0
+              # We add a case for locations divisible by the stride.
+              w_in = w % strides[2] == 0 and w > 0 and w < y_shape[2] - 1
+              if w_in:
+                target += 3.0
+              self.assertAllClose(target, value[n, k, w])
+
+  def testConv1DTransposeValidNCW(self):
+    # `NCW` data format is only supported for CUDA device.
+    if test.is_gpu_available(cuda_only=True):
+      with self.session(use_gpu=True):
+        strides = [1, 1, 2]
+
+        # Input, output: [batch, depth, width]
+        x_shape = [2, 3, 4]
+        y_shape = [2, 2, 9]
+
+        # Filter: [kernel_width, output_depth, input_depth]
+        f_shape = [3, 2, 3]
+
+        x = constant_op.constant(
+            1.0, shape=x_shape, name="x", dtype=dtypes.float32)
+        f = constant_op.constant(
+            1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
+        output = nn_ops.conv1d_transpose(
+            x, f, y_shape, strides=strides, padding="VALID", data_format="NCW")
+
+        value = self.evaluate(output)
+        cache_values = np.zeros(y_shape, dtype=np.float32)
+        # The amount of padding added
+        pad = 1
+        for n in xrange(x_shape[0]):
+          for k in xrange(f_shape[1]):
+            for w in xrange(pad, y_shape[2] - pad):
+              target = 3.0
+              # We add a case for locations divisible by the stride.
+              w_in = w % strides[2] == 0 and w > pad and \
+                     w < y_shape[2] - 1 - pad
+              if w_in:
+                target += 3.0
+              cache_values[n, k, w] = target
+
+            # copy values in the border
+            cache_values[n, k, 0] = cache_values[n, k, 1]
+            cache_values[n, k, -1] = cache_values[n, k, -2]
+            cache_values[n, k, :] = cache_values[n, k, :]
+
+        self.assertAllClose(cache_values, value)
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/kernel_tests/critical_section_test.py b/tensorflow/python/kernel_tests/critical_section_test.py
new file mode 100644
index 0000000..7b1519c
--- /dev/null
+++ b/tensorflow/python/kernel_tests/critical_section_test.py

@@ -0,0 +1,423 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""critical section tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.ops import prefetching_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import critical_section_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging as logging
+# TODO(ebrevdo): Re-enable once CriticalSection is in core.
+# from tensorflow.python.training import saver as saver_lib
+
+
+class CriticalSectionTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def testCreateCriticalSection(self):
+    cs = critical_section_ops.CriticalSection(shared_name="cs")
+    v = resource_variable_ops.ResourceVariable(0.0, name="v")
+
+    def fn(a, b):
+      c = v.value()
+      with ops.control_dependencies([c]):
+        nv = v.assign_add(a * b)
+        with ops.control_dependencies([nv]):
+          return array_ops.identity(c)
+
+    num_concurrent = 100
+    r = [cs.execute(lambda: fn(1.0, 2.0)) for _ in range(num_concurrent)]
+    self.evaluate(v.initializer)
+    r_value = self.evaluate(r)
+    self.assertAllClose([2.0 * i for i in range(num_concurrent)],
+                        sorted(r_value))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testCriticalSectionWithControlFlow(self):
+    for outer_cond in [False, True]:
+      for inner_cond in [False, True]:
+        cs = critical_section_ops.CriticalSection(shared_name="cs")
+        v = resource_variable_ops.ResourceVariable(0.0, name="v")
+        num_concurrent = 100
+
+        # pylint: disable=cell-var-from-loop
+        def fn(a, b):
+          c = v.read_value()
+          def true_fn():
+            with ops.control_dependencies([c]):
+              nv = v.assign_add(a * b)
+              with ops.control_dependencies([nv]):
+                return array_ops.identity(c)
+          return control_flow_ops.cond(
+              array_ops.identity(inner_cond), true_fn, lambda: c)
+
+        def execute():
+          return cs.execute(lambda: fn(1.0, 2.0))
+
+        r = [
+            control_flow_ops.cond(array_ops.identity(outer_cond),
+                                  execute,
+                                  v.read_value)
+            for _ in range(num_concurrent)
+        ]
+        # pylint: enable=cell-var-from-loop
+
+        self.evaluate(v.initializer)
+        r_value = self.evaluate(r)
+        if inner_cond and outer_cond:
+          self.assertAllClose([2.0 * i for i in range(num_concurrent)],
+                              sorted(r_value))
+        else:
+          self.assertAllClose([0] * num_concurrent, r_value)
+
+  @test_util.run_v1_only("b/123990562 Sees CancelledError on some calls")
+  def testCriticalSectionInParallelDoesntDeadlockOnError(self):
+    # No eager mode execution of this test because eager does not
+    # run fn() in parallel, which is where the deadlock could
+    # potentially occur (in graph mode).
+    cs = critical_section_ops.CriticalSection(shared_name="cs")
+    v = resource_variable_ops.ResourceVariable(0.0, name="v")
+
+    def fn(i):
+      error = control_flow_ops.Assert((i % 2) == 1, ["Error"])
+      with ops.control_dependencies([error]):
+        return v.read_value()
+
+    num_concurrent = 2
+
+    @def_function.function(autograph=False)
+    def run_concurrently():
+      return [cs.execute(lambda: fn(i)) for i in range(num_concurrent)]
+
+    if not context.executing_eagerly():
+      run_concurrently = run_concurrently()
+
+    self.evaluate(v.initializer)
+    for _ in range(100):
+      with self.assertRaisesOpError("Error"):
+        if context.executing_eagerly():
+          run_concurrently()
+        else:
+          self.evaluate(run_concurrently)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testCreateCriticalSectionFnReturnsOp(self):
+    cs = critical_section_ops.CriticalSection(shared_name="cs")
+    v = resource_variable_ops.ResourceVariable(0.0, name="v")
+
+    def fn_return_op(a, b):
+      c = v.read_value()
+      with ops.control_dependencies([c]):
+        nv = v.assign_add(a * b)
+        with ops.control_dependencies([nv]):
+          return control_flow_ops.no_op()
+
+    num_concurrent = 100
+    r = [cs.execute(lambda: fn_return_op(1.0, 2.0))
+         for _ in range(num_concurrent)]
+    self.evaluate(v.initializer)
+    self.evaluate(r)
+    final_v = self.evaluate(v)
+    self.assertAllClose(2.0 * num_concurrent, final_v)
+
+  @test_util.run_v1_only("Collections don't exist in TF2")
+  def testCollection(self):
+    cs = critical_section_ops.CriticalSection(shared_name="cs")
+    self.assertIn(
+        cs, ops.get_collection(critical_section_ops.CRITICAL_SECTIONS))
+    add = lambda x: x + 1
+    execute = cs.execute(lambda: add(1.0), name="my_execute")
+    execute_op = [
+        x for x in execute.graph.get_operations()
+        if "my_execute" in x.name and "MutexLock" in x.type
+    ][0]
+    self.assertIn(
+        execute_op,
+        [signature.op for signature in
+         ops.get_collection(critical_section_ops.CRITICAL_SECTION_EXECUTIONS)])
+
+  @test_util.run_v1_only("b/123955885 Can't identify deadlocks in eager mode")
+  def testRecursiveCriticalSectionAccessIsIllegal(self):
+    # This does not work properly in eager mode.  Eager users will
+    # just hit a deadlock if they do this.  But at least it'll be easier
+    # to debug.
+    cs = critical_section_ops.CriticalSection()
+    add = lambda y: y + 1
+    def fn(x):
+      return cs.execute(lambda: add(x))
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        r"attempts to directly access the CriticalSection in which it "
+        r"would be running"):
+      cs.execute(lambda: fn(1.0))
+
+  def testRecursiveCriticalSectionAccessViaCapturedTensorIsProtected(self):
+    # This one is subtle; and we're being overly cautious here.  The
+    # deadlock we are ensuring we catch is:
+    #
+    # to_capture = CS[lambda x: x + 1](1.0)
+    # deadlocked = CS[lambda x: x + to_capture](1.0)
+    #
+    # This would have caused a deadlock because executing `deadlocked` will
+    # lock the mutex on CS; but then due to dependencies, will attempt
+    # to compute `to_capture`.  This computation requires locking CS,
+    # but that is not possible now because CS is already locked by
+    # `deadlocked`.
+    #
+    # We check that CriticalSection.execute properly inserts new
+    # control dependencies to its lock to ensure all captured
+    # operations are finished before anything runs within the critical section.
+    cs = critical_section_ops.CriticalSection(shared_name="cs")
+    fn = array_ops.identity
+    to_capture = cs.execute(lambda: fn(1.0))
+    fn_captures = lambda x: x + to_capture
+    to_capture_too = array_ops.identity(to_capture)
+
+    ex_0 = cs.execute(lambda: fn_captures(1.0))
+
+    with ops.control_dependencies([to_capture]):
+      # This is OK because to_capture will execute before this next call
+      ex_1 = cs.execute(lambda: fn_captures(1.0))
+
+    dependency = array_ops.identity(to_capture)
+
+    fn_captures_dependency = lambda x: x + dependency
+
+    ex_2 = cs.execute(lambda: fn_captures_dependency(1.0))
+
+    with ops.control_dependencies([to_capture_too]):
+      ex_3 = cs.execute(lambda: fn_captures_dependency(1.0))
+
+    # Ensure there's no actual deadlock on to_execute.
+    self.assertEquals(2.0, self.evaluate(ex_0))
+    self.assertEquals(2.0, self.evaluate(ex_1))
+    self.assertEquals(2.0, self.evaluate(ex_2))
+    self.assertEquals(2.0, self.evaluate(ex_3))
+
+  def testRecursiveCriticalSectionAccessWithinLoopIsProtected(self):
+    cs = critical_section_ops.CriticalSection(shared_name="cs")
+
+    def body_implicit_capture(i, j):
+      # This would have caused a deadlock if not for logic in execute
+      # that inserts additional control dependencies onto the lock op:
+      #   * Loop body argument j is captured by fn()
+      #   * i is running in parallel to move forward the execution
+      #   * j is not being checked by the predicate function
+      #   * output of cs.execute() is returned as next j.
+      fn = lambda: j + 1
+      return (i + 1, cs.execute(fn))
+
+    (i_n, j_n) = control_flow_ops.while_loop(
+        lambda i, _: i < 1000,
+        body_implicit_capture,
+        [0, 0],
+        parallel_iterations=25)
+    # For consistency between eager and graph mode.
+    i_n = array_ops.identity(i_n)
+    logging.warn(
+        "\n==============\nRunning "
+        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
+        "body_implicit_capture'\n"
+        "==============\n")
+    self.assertEquals((1000, 1000), self.evaluate((i_n, j_n)))
+    logging.warn(
+        "\n==============\nSuccessfully finished running "
+        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
+        "body_implicit_capture'\n"
+        "==============\n")
+
+    def body_implicit_capture_protected(i, j):
+      # This version is ok because we manually add a control
+      # dependency on j, which is an argument to the while_loop body
+      # and captured by fn.
+      fn = lambda: j + 1
+      with ops.control_dependencies([j]):
+        return (i + 1, cs.execute(fn))
+
+    (i_n, j_n) = control_flow_ops.while_loop(
+        lambda i, _: i < 1000,
+        body_implicit_capture_protected,
+        [0, 0],
+        parallel_iterations=25)
+    # For consistency between eager and graph mode.
+    i_n = array_ops.identity(i_n)
+    logging.warn(
+        "\n==============\nRunning "
+        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
+        "body_implicit_capture_protected'\n"
+        "==============\n")
+    self.assertEquals((1000, 1000), self.evaluate((i_n, j_n)))
+    logging.warn(
+        "\n==============\nSuccessfully finished running "
+        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
+        "body_implicit_capture_protected'\n"
+        "==============\n")
+
+    def body_args_capture(i, j):
+      # This version is ok because j is an argument to fn and we can
+      # ensure there's a control dependency on j.
+      fn = lambda x: x + 1
+      return (i + 1, cs.execute(lambda: fn(j)))
+
+    (i_n, j_n) = control_flow_ops.while_loop(
+        lambda i, _: i < 1000,
+        body_args_capture,
+        [0, 0],
+        parallel_iterations=25)
+    # For consistency between eager and graph mode.
+    i_n = array_ops.identity(i_n)
+    logging.warn(
+        "\n==============\nRunning "
+        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
+        "body_args_capture'\n"
+        "==============\n")
+    self.assertEquals((1000, 1000), self.evaluate((i_n, j_n)))
+    logging.warn(
+        "\n==============\nSuccessfully finished running "
+        "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
+        "body_args_capture'\n"
+        "==============\n")
+
+  @test_util.run_v1_only("b/123955885 Can't identify deadlocks in eager mode")
+  def testRecursiveCriticalSectionAccessIsIllegalSameSharedName(self):
+    # This does not work properly in eager mode.  Eager users will
+    # just hit a deadlock if they do this.  But at least it'll be easier
+    # to debug.
+    cs = critical_section_ops.CriticalSection(shared_name="cs")
+    cs_same = critical_section_ops.CriticalSection(shared_name="cs")
+    add = lambda x: x + 1
+    def fn(x):
+      return cs_same.execute(lambda: add(x))
+    with self.assertRaisesRegexp(
+        ValueError,
+        r"attempts to directly access the CriticalSection in which it "
+        r"would be running"):
+      cs.execute(lambda: fn(1.0))
+
+  @test_util.run_v1_only("b/123955885 Can't identify deadlocks in eager mode")
+  def testMultipleCSExecutionsRequestSameResource(self):
+    cs0 = critical_section_ops.CriticalSection()
+    cs1 = critical_section_ops.CriticalSection()
+    v = resource_variable_ops.ResourceVariable(0.0, name="v")
+    cs0.execute(lambda: v + 1)
+    # It's OK for the same CriticalSection to access this resource.
+    cs0.execute(lambda: v - 1)
+    # It's *not* OK for a different CriticalSection to access it by
+    # default.
+    with self.assertRaisesRegexp(
+        ValueError, "requested exclusive resource access"):
+      cs1.execute(lambda: v + 1)
+    # It's not even OK if the second call doesn't request exclusive access.
+    with self.assertRaisesRegexp(
+        ValueError, "requested exclusive resource access"):
+      cs1.execute(lambda: v + 1, exclusive_resource_access=False)
+
+    v2 = resource_variable_ops.ResourceVariable(0.0, name="v2")
+    cs0.execute(lambda: v2 + 1, exclusive_resource_access=False)
+    # It's OK if neither requests exclusive resource access.
+    cs1.execute(lambda: v2 + 1, exclusive_resource_access=False)
+
+    # It's not OK if the second request requires exlusive resource
+    # access.
+    with self.assertRaisesRegexp(
+        ValueError, "requested exclusive resource access"):
+      cs1.execute(lambda: v2 + 1)
+
+  def testControlDependencyFromOutsideWhileLoopMixedWithInsideLoop(self):
+    cs = critical_section_ops.CriticalSection()
+    v = resource_variable_ops.ResourceVariable(0, name="v")
+    # Make sure that the control dependencies on v do not cause issues
+    # in the lock_op's automatic control dependency adder.
+    #
+    # Note, here v must be a resource variable (or something similar),
+    # otherwise it gets hoisted into the while_loop by the time we add
+    # control dependencies to the lock_op.
+    def body(i):
+      add_j = lambda j: v + j + 1
+      return cs.execute(lambda: add_j(i))
+    out = control_flow_ops.while_loop(
+        lambda i: i < 10, body, [0])
+    self.evaluate(v.initializer)
+    self.assertEqual(10, self.evaluate(out))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testInsideFunction(self):
+    if test_util.is_gpu_available():
+      self.skipTest(
+          "b/123899495: Colocation errors for critical sections in map on GPU")
+    cs = critical_section_ops.CriticalSection()
+    with ops.device("/gpu:0" if test_util.is_gpu_available() else "/cpu:0"):
+      v = resource_variable_ops.ResourceVariable(1)
+    def fn():
+      return v.read_value()
+
+    # map() creates a TensorFlow function.
+    ds = dataset_ops.Dataset.range(1)
+    if test_util.is_gpu_available():
+      ds = (ds.apply(prefetching_ops.copy_to_device("/gpu:0"))
+            .apply(prefetching_ops.map_on_gpu(lambda _: cs.execute(fn))))
+    else:
+      ds = ds.map(lambda _: cs.execute(fn))
+
+    def get_first():
+      if context.executing_eagerly():
+        return self.evaluate(ds.make_one_shot_iterator().get_next())
+      itr = ds.make_initializable_iterator()
+      self.evaluate([v.initializer, itr.initializer])
+      return self.evaluate(itr.get_next())
+
+    self.assertEqual(1, get_first())
+
+  # TODO(ebrevdo): Re-enable once CriticalSection is in core.
+  #
+  # def testCriticalSectionAndExecuteOpSaverRoundTrip(self):
+  #   cs = critical_section_ops.CriticalSection()
+  #   r = cs.execute(lambda x: x + 1, 1.0)
+  #   graph = ops.get_default_graph()
+  #   meta_graph = saver_lib.export_meta_graph(
+  #       graph=graph, collection_list=graph.get_all_collection_keys())
+  #   graph_copy = ops.Graph()
+  #   with graph_copy.as_default():
+  #     _ = saver_lib.import_meta_graph(meta_graph, import_scope="imported")
+  #     restored_cs = ops.get_collection(critical_section_ops.CRITICAL_SECTIONS)
+  #     restored_exec = ops.get_collection(
+  #         critical_section_ops.CRITICAL_SECTION_EXECUTIONS)
+  #     self.assertEqual(1, len(restored_cs))
+  #     self.assertEqual(1, len(restored_exec))
+  #     self.assertEqual(restored_cs[0].name, "imported/%s" % cs.name)
+  #     self.assertEqual(restored_exec[0].op.name, "imported/%s" % r.op.name)
+
+  # def testToProto(self):
+  #   cs = critical_section_ops.CriticalSection(shared_name="cs")
+  #   proto = cs.to_proto()
+  #   self.assertEqual(proto.critical_section_name, cs._handle.name)
+  #   cs_copy = critical_section_ops.CriticalSection.from_proto(proto)
+  #   self.assertEqual(cs_copy._handle, cs._handle)
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py
index 008e59b..bb8d2cf 100644
--- a/tensorflow/python/kernel_tests/decode_raw_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py

@@ -90,6 +90,32 @@
       self.assertAllEqual(expected_result, result)
 
   @test_util.run_deprecated_v1
+  def testToComplex64(self):
+    with self.cached_session():
+      in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
+      decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.complex64)
+      self.assertEqual([None, None], decode.get_shape().as_list())
+
+      expected_result = np.matrix([[1 + 1j, 2 - 2j, -3 + 3j, -4 - 4j]],
+                                  dtype="<c8")
+      result = decode.eval(feed_dict={in_bytes: [expected_result.tostring()]})
+
+      self.assertAllEqual(expected_result, result)
+
+  @test_util.run_deprecated_v1
+  def testToComplex128(self):
+    with self.cached_session():
+      in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
+      decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.complex128)
+      self.assertEqual([None, None], decode.get_shape().as_list())
+
+      expected_result = np.matrix([[1 + 1j, 2 - 2j, -3 + 3j, -4 - 4j]],
+                                  dtype="<c16")
+      result = decode.eval(feed_dict={in_bytes: [expected_result.tostring()]})
+
+      self.assertAllEqual(expected_result, result)
+
+  @test_util.run_deprecated_v1
   def testEmptyStringInput(self):
     with self.cached_session():
       in_bytes = array_ops.placeholder(dtypes.string, shape=[None])

diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py
index 48e14d0..0ab2b4b 100644
--- a/tensorflow/python/kernel_tests/functional_ops_test.py
+++ b/tensorflow/python/kernel_tests/functional_ops_test.py

@@ -23,6 +23,7 @@
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
+from tensorflow.python.eager import function as eager_function
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -32,6 +33,7 @@
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import functional_ops
+from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
@@ -1110,6 +1112,37 @@
       self.evaluate(op)
 
 
+@test_util.run_all_in_graph_and_eager_modes
+@test_util.with_control_flow_v2
+class FunctionalOpsCaseTest(test.TestCase):
+
+  def testCase(self):
+    @eager_function.defun
+    def two(x):
+      return x * 2
+
+    @eager_function.defun
+    def three(x):
+      return x * 3
+
+    @eager_function.defun
+    def four(x):
+      return x * 4
+
+    def f(branch, x):
+      tmpl = array_ops.zeros_like(x)
+      return array_ops.identity(gen_functional_ops.case(
+          branch, input=[x], Tout=[dtypes.float32],
+          branches=[f.get_concrete_function(tmpl)
+                    for f in (two, three, four)])[0])
+    one = array_ops.ones([])
+    self.assertAllEqual(np.float32(2), self.evaluate(f(0, one)))
+    self.assertAllEqual(np.float32(3), self.evaluate(f(1, one)))
+    self.assertAllEqual(np.float32(4), self.evaluate(f(2, one)))
+    self.assertAllEqual(np.float32(4), self.evaluate(f(-1, one)))  # <0 default
+    self.assertAllEqual(np.float32(4), self.evaluate(f(6, one)))  # >=N default
+
+
 if __name__ == "__main__":
   test.main()
 

diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py
index 1d6f795..65043d9 100644
--- a/tensorflow/python/kernel_tests/gather_op_test.py
+++ b/tensorflow/python/kernel_tests/gather_op_test.py

@@ -467,12 +467,12 @@
   def testErrors(self):
 
     with self.assertRaisesRegexp(
-        ValueError, r"batch_dims = 2 must be less than ndims\(indices\) = 2"):
+        ValueError, r"batch_dims = 2 must be less than rank\(indices\) = 2"):
       array_ops.gather(
           params=[[1, 2], [3, 4]], indices=[[1, 2], [3, 4]], batch_dims=2)
 
     with self.assertRaisesRegexp(
-        ValueError, r"batch_dims = 1 must be less than ndims\(params\) = 1"):
+        ValueError, r"batch_dims = 1 must be less than rank\(params\) = 1"):
       array_ops.gather(
           params=[1, 2, 3, 4], indices=[[1, 2], [3, 4]], batch_dims=1)
 

diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py
index 7d4c045..e350129 100644
--- a/tensorflow/python/kernel_tests/list_ops_test.py
+++ b/tensorflow/python/kernel_tests/list_ops_test.py

@@ -505,16 +505,24 @@
     with self.assertRaisesRegexp(
         errors.InvalidArgumentError,
         "Indices in TensorListScatter must all be non-negative."):
-      l = list_ops.tensor_list_scatter(
-          c0, [-1, -2], ops.convert_to_tensor([], dtype=dtypes.int32))
+      l = list_ops.tensor_list_scatter(c0, [-1, -2], element_shape=[])
       self.evaluate(l)
 
+  def testScatterIntoExistingList(self):
+    l = list_ops.tensor_list_reserve(
+        element_dtype=dtypes.float32, element_shape=[], num_elements=3)
+    l = list_ops.tensor_list_scatter(tensor=[1.], indices=[0], element_shape=[])
+    l = list_ops.tensor_list_scatter(
+        tensor=[2., 3.], indices=[1, 2], element_shape=[], input_handle=l)
+    self.assertAllEqual(
+        list_ops.tensor_list_stack(l, element_dtype=dtypes.float32),
+        [1., 2., 3.])
+
   def testScatterGrad(self):
     with backprop.GradientTape() as tape:
       c0 = constant_op.constant([1.0, 2.0])
       tape.watch(c0)
-      l = list_ops.tensor_list_scatter(
-          c0, [1, 0], ops.convert_to_tensor([], dtype=dtypes.int32))
+      l = list_ops.tensor_list_scatter(c0, [1, 0], element_shape=[])
       t0 = list_ops.tensor_list_get_item(l, 0, element_dtype=dtypes.float32)
       t1 = list_ops.tensor_list_get_item(l, 1, element_dtype=dtypes.float32)
       self.assertAllEqual(self.evaluate(t0), 2.0)
@@ -527,8 +535,7 @@
     with backprop.GradientTape() as tape:
       c0 = constant_op.constant([1.0, 2.0])
       tape.watch(c0)
-      l = list_ops.tensor_list_scatter(
-          c0, [1, 0], ops.convert_to_tensor([], dtype=dtypes.int32))
+      l = list_ops.tensor_list_scatter(c0, [1, 0], element_shape=[])
       t0 = list_ops.tensor_list_get_item(l, 0, element_dtype=dtypes.float32)
       self.assertAllEqual(self.evaluate(t0), 2.0)
       loss = t0 * t0

diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py
index 0b78115..d7980db 100644
--- a/tensorflow/python/kernel_tests/lookup_ops_test.py
+++ b/tensorflow/python/kernel_tests/lookup_ops_test.py

@@ -38,7 +38,7 @@
 from tensorflow.python.platform import test
 from tensorflow.python.training import saver
 from tensorflow.python.training import server_lib
-from tensorflow.python.training.checkpointable import util as checkpointable
+from tensorflow.python.training.tracking import util as trackable
 
 
 class HashTableTest(test.TestCase):
@@ -1123,22 +1123,24 @@
 
       # Invalid data type
       other_type = constant_op.constant(1)
-      with self.assertRaises(ValueError):
+      with self.assertRaises(Exception) as cm:
         lookup_ops.HashTable(
             lookup_ops.TextFileInitializer(
                 other_type, dtypes.string, lookup_ops.TextFileIndex.WHOLE_LINE,
                 dtypes.int64, lookup_ops.TextFileIndex.LINE_NUMBER),
             default_value)
+      self.assertTrue(isinstance(cm.exception, (ValueError, TypeError)))
 
       # Non-scalar filename
       filenames = constant_op.constant([vocabulary_file, vocabulary_file])
       if not context.executing_eagerly():
-        with self.assertRaises(ValueError):
+        with self.assertRaises(Exception) as cm:
           lookup_ops.HashTable(
               lookup_ops.TextFileInitializer(
                   filenames, dtypes.string, lookup_ops.TextFileIndex.WHOLE_LINE,
                   dtypes.int64, lookup_ops.TextFileIndex.LINE_NUMBER),
               default_value)
+        self.assertTrue(isinstance(cm.exception, (ValueError, TypeError)))
       else:
         with self.assertRaises(errors_impl.InvalidArgumentError):
           lookup_ops.HashTable(
@@ -1689,7 +1691,7 @@
     table = lookup_ops.MutableHashTable(
         dtypes.string, dtypes.int64, default_val, name="t1", checkpoint=True)
 
-    checkpoint = checkpointable.Checkpoint(table=table, v0=v0, v1=v1)
+    checkpoint = trackable.Checkpoint(table=table, v0=v0, v1=v1)
     self.evaluate([v0.initializer, v1.initializer])
 
     # Check that the parameter nodes have been initialized.
@@ -1714,7 +1716,7 @@
             constant_op.constant([12, 24], dtypes.int64)))
     self.assertAllEqual(2, self.evaluate(table.size()))
 
-    checkpoint = checkpointable.Checkpoint(table=table, v0=v0, v1=v1)
+    checkpoint = trackable.Checkpoint(table=table, v0=v0, v1=v1)
 
     # Restore the saved values in the parameter nodes.
     checkpoint.restore(save_path).run_restore_ops()
@@ -2510,7 +2512,7 @@
         checkpoint=True,
         initial_num_buckets=32)
 
-    save_checkpoint = checkpointable.Checkpoint(table=save_table)
+    save_checkpoint = trackable.Checkpoint(table=save_table)
 
     self.assertAllEqual(0, self.evaluate(save_table.size()))
     self.evaluate(save_table.insert(keys, values))
@@ -2536,7 +2538,7 @@
     self.assertAllEqual(2, self.evaluate(load_table.size()))
     self.assertAllEqual(64, len(self.evaluate(load_table.export()[0])))
 
-    restore_checkpoint = checkpointable.Checkpoint(table=load_table)
+    restore_checkpoint = trackable.Checkpoint(table=load_table)
 
     # Restore the saved values in the parameter nodes.
     restore_checkpoint.restore(save_path).run_restore_ops()

diff --git a/tensorflow/python/kernel_tests/map_fn_test.py b/tensorflow/python/kernel_tests/map_fn_test.py
index 41d99ea..d2b1d43 100644
--- a/tensorflow/python/kernel_tests/map_fn_test.py
+++ b/tensorflow/python/kernel_tests/map_fn_test.py

@@ -197,17 +197,17 @@
     y = map_fn.map_fn(lambda e: e, x)
     self.assertIs(None, y.get_shape().dims)
 
-  @test_util.disable_control_flow_v2("b/119323354")
-  @test_util.run_in_graph_and_eager_modes
+  # TODO(b/124383826): this test fails in eager: the iterable is of length 0 so
+  # so the body of the while loop never executes
   @test_util.run_v1_only("b/120545219")
   def testMapEmptyScalar(self):
-    map_return = map_fn.map_fn(lambda x: 1, constant_op.constant([]))
+    map_return = map_fn.map_fn(lambda x: 1,
+                               constant_op.constant([], dtype=dtypes.int32))
     self.assertAllEqual([0], map_return.get_shape().dims)
     self.assertAllEqual([0], self.evaluate(map_return).shape)
 
-  # TODO(akshayka): this test fails in eager: the iterable is of length 0 so
+  # TODO(b/124383826): this test fails in eager: the iterable is of length 0 so
   # so the body of the while loop never executes
-  @test_util.disable_control_flow_v2("b/119323354")
   @test_util.run_v1_only("b/120545219")
   def testMapEmptyTensor(self):
     with self.cached_session():

diff --git a/tensorflow/python/kernel_tests/random/random_grad_test.py b/tensorflow/python/kernel_tests/random/random_grad_test.py
index aac6eea..38fa44f 100644
--- a/tensorflow/python/kernel_tests/random/random_grad_test.py
+++ b/tensorflow/python/kernel_tests/random/random_grad_test.py

@@ -79,7 +79,7 @@
     shape = [2, 3]
     alpha = array_ops.ones([2, 2])
     beta = array_ops.ones([1, 2])
-    sample = random_ops.random_gamma(shape, alpha, beta)
+    sample = random_ops.random_gamma(shape, alpha, beta, seed=12345)
     grads_alpha, grads_beta = gradients_impl.gradients(sample, [alpha, beta])
     self.assertAllEqual(grads_alpha.shape, alpha.shape)
     self.assertAllEqual(grads_beta.shape, beta.shape)
@@ -89,7 +89,7 @@
     shape = []
     alpha = array_ops.ones([2, 2])
     beta = array_ops.ones([1, 2])
-    sample = random_ops.random_gamma(shape, alpha, beta)
+    sample = random_ops.random_gamma(shape, alpha, beta, seed=12345)
     grads_alpha, grads_beta = gradients_impl.gradients(sample, [alpha, beta])
     self.assertAllEqual(grads_alpha.shape, alpha.shape)
     self.assertAllEqual(grads_beta.shape, beta.shape)
@@ -99,7 +99,7 @@
     shape = array_ops.placeholder(dtypes.int32)
     alpha = array_ops.placeholder(dtypes.float32)
     beta = array_ops.placeholder(dtypes.float32)
-    sample = random_ops.random_gamma(shape, alpha, beta)
+    sample = random_ops.random_gamma(shape, alpha, beta, seed=12345)
     grads_alpha, grads_beta = gradients_impl.gradients(sample, [alpha, beta])
 
     alpha_val = np.ones([1, 2])
@@ -129,7 +129,8 @@
 
       alpha_val = np.logspace(-2, 3, dtype=np_dtype)
       alpha = constant_op.constant(alpha_val)
-      sample = random_ops.random_gamma([], alpha, np_dtype(1.0), dtype=dtype)
+      sample = random_ops.random_gamma(
+          [], alpha, np_dtype(1.0), dtype=dtype, seed=12345)
       actual = gradients_impl.gradients(sample, alpha)[0]
 
       (sample_val, actual_val) = self.evaluate((sample, actual))
@@ -175,7 +176,8 @@
     """
     np_dtype = dtype.as_numpy_dtype
     alpha = constant_op.constant(np.logspace(-2, 3, dtype=np_dtype))
-    sample = random_ops.random_gamma([], alpha, np_dtype(1.0), dtype=dtype)
+    sample = random_ops.random_gamma(
+        [], alpha, np_dtype(1.0), dtype=dtype, seed=12345)
     actual = gradients_impl.gradients(sample, alpha)[0]
 
     sample_sg = array_ops.stop_gradient(sample)
@@ -207,9 +209,9 @@
     Here we verify that the rhs is fairly close to one.
     The convergence speed is not great, so we use many samples and loose bounds.
     """
-    num_samples = 1000
+    num_samples = 10000
     alpha = constant_op.constant([0.8, 1e1, 1e3], dtype=dtypes.float32)
-    sample = random_ops.random_gamma([num_samples], alpha)
+    sample = random_ops.random_gamma([num_samples], alpha, seed=12345)
     # We need to average the gradients, which is equivalent to averaging the
     # samples and then doing backprop.
     mean_sample = math_ops.reduce_mean(sample, axis=0)
@@ -234,13 +236,13 @@
     We compare the Monte-Carlo estimate of the expectation with the
     true gradient.
     """
-    num_samples = 1000
+    num_samples = 10000
     t = 0.3
     alpha = 0.5
     expected = 1 + 2 * alpha - 2 * t
 
     alpha = constant_op.constant(alpha)
-    sample = random_ops.random_gamma([num_samples], alpha, 1.0)
+    sample = random_ops.random_gamma([num_samples], alpha, 1.0, seed=12345)
     loss = math_ops.reduce_mean(math_ops.square(sample - t))
     dloss_dalpha = gradients_impl.gradients(loss, alpha)[0]
     dloss_dalpha_val = self.evaluate(dloss_dalpha)

diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py
index 67a8946..5ab8bc3 100644
--- a/tensorflow/python/kernel_tests/reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test.py

@@ -104,7 +104,8 @@
       for dtype, reductions in [(dtypes.float32,
                                  (math_ops.reduce_sum, math_ops.reduce_mean,
                                   math_ops.reduce_prod, math_ops.reduce_max,
-                                  math_ops.reduce_min)),
+                                  math_ops.reduce_min,
+                                  math_ops.reduce_euclidean_norm)),
                                 (dtypes.bool, (math_ops.reduce_all,
                                                math_ops.reduce_any))]:
         for reduction in reductions:
@@ -487,6 +488,79 @@
         self.assertTrue(np.all(np.isnan(y)))
 
 
+class EuclideanNormReductionTest(BaseReductionTest):
+
+  def _tf_reduce(self, x, reduction_axes, keepdims):
+    return math_ops.reduce_euclidean_norm(x, reduction_axes, keepdims)
+
+  def _np_reduce(self, x, reduction_axes, keepdims):
+    if isinstance(reduction_axes, list) or isinstance(reduction_axes,
+                                                      np.ndarray):
+      reduction_axes = tuple(reduction_axes)
+    if reduction_axes is None or reduction_axes != tuple():
+      np_fro = np.sqrt(
+          np.sum(x * np.conj(x), axis=reduction_axes, keepdims=keepdims))
+    else:
+      np_fro = x
+    if np.issubdtype(x.dtype, np.integer):
+      np_fro = np.floor(np_fro)
+    return np_fro
+
+  @test_util.run_deprecated_v1
+  def testAxesType(self):
+    for dtype in [dtypes.int64, dtypes.int32]:
+      with self.cached_session(use_gpu=True):
+        v = math_ops.reduce_mean([0, 0], constant_op.constant(0, dtype=dtype))
+        tf_v = self.evaluate(v)
+      self.assertAllEqual(tf_v, 0)
+
+  @test_util.run_deprecated_v1
+  def testInfinity(self):
+    for dtype in [np.float32, np.float64]:
+      for special_value_x in [-np.inf, np.inf]:
+        for special_value_y in [-np.inf, np.inf]:
+          np_arr = np.array([special_value_x, special_value_y]).astype(dtype)
+          self._compareAll(np_arr, None)
+
+  @test_util.run_deprecated_v1
+  def testInt32(self):
+    for rank in range(1, _MAX_RANK + 1):
+      np_arr = self._makeIncremental((2,) * rank, dtypes.int32)
+      self._compareAllAxes(np_arr)
+
+  @test_util.run_deprecated_v1
+  def testFloat32(self):
+    for rank in range(1, _MAX_RANK + 1):
+      np_arr = self._makeIncremental((2,) * rank, dtypes.float32)
+      self._compareAllAxes(np_arr)
+
+  @test_util.run_deprecated_v1
+  def testFloat64(self):
+    for rank in range(1, _MAX_RANK + 1):
+      np_arr = self._makeIncremental((2,) * rank, dtypes.float64)
+      self._compareAllAxes(np_arr)
+
+  @test_util.run_deprecated_v1
+  def testComplex64(self):
+    for rank in range(1, _MAX_RANK + 1):
+      np_arr = self._makeIncremental((2,) * rank, dtypes.complex64)
+      self._compareAllAxes(np_arr)
+
+  @test_util.run_deprecated_v1
+  def testComplex128(self):
+    for rank in range(1, _MAX_RANK + 1):
+      np_arr = self._makeIncremental((2,) * rank, dtypes.complex128)
+      self._compareAllAxes(np_arr)
+
+    with self.session(use_gpu=True):
+      for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
+        # A large number is needed to get Eigen to die
+        x = array_ops.zeros((0, 9938), dtype=dtype)
+        y = math_ops.reduce_euclidean_norm(x, [0]).eval()
+        self.assertEqual(y.shape, (9938,))
+        self.assertAllEqual(y, np.zeros(9938))
+
+
 class ProdReductionTest(BaseReductionTest):
 
   def _tf_reduce(self, x, reduction_axes, keepdims):

diff --git a/tensorflow/python/kernel_tests/reduction_ops_test_big.py b/tensorflow/python/kernel_tests/reduction_ops_test_big.py
index 73bba54..2d5cff3 100644
--- a/tensorflow/python/kernel_tests/reduction_ops_test_big.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test_big.py

@@ -22,16 +22,17 @@
 
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
+
 class BaseReductionTest(test.TestCase):
 
   def _tf_reduce(self, x, reduction_axes, keepdims):
     raise NotImplementedError()
 
 
-@test_util.disable_all_xla("b/123864762")  # Test times out
 class BigReductionTest(BaseReductionTest):
   """Test reductions for sum and boolean all over a wide range of shapes."""
 
@@ -47,6 +48,7 @@
   def _tf_reduce_sum(self, x, reduction_axes, keepdims):
     return math_ops.reduce_sum(x, reduction_axes, keepdims)
 
+  @test_util.run_deprecated_v1
   def testFloat32Sum(self):
     # make sure we test all possible kernel invocations
     # logic is the same for all ops, test just float32 for brevity
@@ -65,11 +67,13 @@
         full_sum = np.ones([], dtype=np.float32) * size_x * size_y
 
         with self.session(graph=ops.Graph(), use_gpu=True) as sess:
-          tf_row_sum = self._tf_reduce_sum(arr, 1, False)
-          tf_col_sum = self._tf_reduce_sum(arr, 0, False)
-          tf_full_sum = self._tf_reduce_sum(arr, [0, 1], False)
+          arr_placeholder = array_ops.placeholder(dtype=np.float32,
+                                                  shape=(size_x, size_y))
+          tf_row_sum = self._tf_reduce_sum(arr_placeholder, 1, False)
+          tf_col_sum = self._tf_reduce_sum(arr_placeholder, 0, False)
+          tf_full_sum = self._tf_reduce_sum(arr_placeholder, [0, 1], False)
           tf_out_row, tf_out_col, tf_out_full = sess.run(
-              [tf_row_sum, tf_col_sum, tf_full_sum])
+              [tf_row_sum, tf_col_sum, tf_full_sum], {arr_placeholder: arr})
         self.assertAllClose(col_sum, tf_out_col)
         self.assertAllClose(row_sum, tf_out_row)
         self.assertAllClose(full_sum, tf_out_full)
@@ -83,12 +87,16 @@
           sum_xz = np.ones([size_y], dtype=np.float32)
 
           with self.session(graph=ops.Graph(), use_gpu=True) as sess:
-            tf_sum_xz = self._tf_reduce_mean(arr, [0, 2], False)
-            tf_sum_y = self._tf_reduce_mean(arr, 1, False)
-            tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y])
+            arr_placeholder = array_ops.placeholder(
+                dtype=np.float32, shape=(size_x, size_y, size_z))
+            tf_sum_xz = self._tf_reduce_mean(arr_placeholder, [0, 2], False)
+            tf_sum_y = self._tf_reduce_mean(arr_placeholder, 1, False)
+            tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y],
+                                                   {arr_placeholder: arr})
           self.assertAllClose(sum_y, tf_out_sum_y)
           self.assertAllClose(sum_xz, tf_out_sum_xz)
 
+  @test_util.run_deprecated_v1
   def testFloat32Max(self):
     # make sure we test all possible kernel invocations
     # logic is the same for all ops, test just float32 for brevity
@@ -108,11 +116,13 @@
         full_max = np.max(col_max)
 
         with self.session(graph=ops.Graph(), use_gpu=True) as sess:
-          tf_row_max = self._tf_reduce_max(arr, 1, False)
-          tf_col_max = self._tf_reduce_max(arr, 0, False)
-          tf_full_max = self._tf_reduce_max(arr, [0, 1], False)
+          arr_placeholder = array_ops.placeholder(dtype=np.float32,
+                                                  shape=(size_x, size_y))
+          tf_row_max = self._tf_reduce_max(arr_placeholder, 1, False)
+          tf_col_max = self._tf_reduce_max(arr_placeholder, 0, False)
+          tf_full_max = self._tf_reduce_max(arr_placeholder, [0, 1], False)
           tf_out_row, tf_out_col, tf_out_full = sess.run(
-              [tf_row_max, tf_col_max, tf_full_max])
+              [tf_row_max, tf_col_max, tf_full_max], {arr_placeholder: arr})
         self.assertAllClose(col_max, tf_out_col)
         self.assertAllClose(row_max, tf_out_row)
         self.assertAllClose(full_max, tf_out_full)
@@ -127,12 +137,16 @@
           sum_xz = np.max(arr, axis=(0, 2))
 
           with self.session(graph=ops.Graph(), use_gpu=True) as sess:
-            tf_sum_xz = self._tf_reduce_max(arr, [0, 2], False)
-            tf_sum_y = self._tf_reduce_max(arr, 1, False)
-            tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y])
+            arr_placeholder = array_ops.placeholder(
+                dtype=np.float32, shape=(size_x, size_y, size_z))
+            tf_sum_xz = self._tf_reduce_max(arr_placeholder, [0, 2], False)
+            tf_sum_y = self._tf_reduce_max(arr_placeholder, 1, False)
+            tf_out_sum_xz, tf_out_sum_y = sess.run(
+                [tf_sum_xz, tf_sum_y], {arr_placeholder: arr})
           self.assertAllClose(sum_y, tf_out_sum_y)
           self.assertAllClose(sum_xz, tf_out_sum_xz)
 
+  @test_util.run_deprecated_v1
   def testBooleanAll(self):
     # make sure we test all possible kernel invocations
     # test operation where T(0) is not the identity
@@ -151,11 +165,13 @@
         full_sum = np.ones([1], dtype=np.bool).reshape([])
 
         with self.session(graph=ops.Graph(), use_gpu=True) as sess:
-          tf_row_sum = self._tf_reduce_all(arr, 1, False)
-          tf_col_sum = self._tf_reduce_all(arr, 0, False)
-          tf_full_sum = self._tf_reduce_all(arr, [0, 1], False)
+          arr_placeholder = array_ops.placeholder(dtype=np.bool,
+                                                  shape=(size_x, size_y))
+          tf_row_sum = self._tf_reduce_all(arr_placeholder, 1, False)
+          tf_col_sum = self._tf_reduce_all(arr_placeholder, 0, False)
+          tf_full_sum = self._tf_reduce_all(arr_placeholder, [0, 1], False)
           tf_out_row, tf_out_col, tf_out_full = sess.run(
-              [tf_row_sum, tf_col_sum, tf_full_sum])
+              [tf_row_sum, tf_col_sum, tf_full_sum], {arr_placeholder: arr})
         self.assertAllClose(col_sum, tf_out_col)
         self.assertAllClose(row_sum, tf_out_row)
         self.assertAllClose(full_sum, tf_out_full)
@@ -169,9 +185,12 @@
           sum_xz = np.ones([size_y], dtype=np.bool)
 
           with self.session(graph=ops.Graph(), use_gpu=True) as sess:
-            tf_sum_xz = self._tf_reduce_all(arr, [0, 2], False)
-            tf_sum_y = self._tf_reduce_all(arr, 1, False)
-            tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y])
+            arr_placeholder = array_ops.placeholder(
+                dtype=np.bool, shape=(size_x, size_y, size_z))
+            tf_sum_xz = self._tf_reduce_all(arr_placeholder, [0, 2], False)
+            tf_sum_y = self._tf_reduce_all(arr_placeholder, 1, False)
+            tf_out_sum_xz, tf_out_sum_y = sess.run(
+                [tf_sum_xz, tf_sum_y], {arr_placeholder: arr})
           self.assertAllClose(sum_y, tf_out_sum_y)
           self.assertAllClose(sum_xz, tf_out_sum_xz)
 

diff --git a/tensorflow/python/kernel_tests/rnn_cell_test.py b/tensorflow/python/kernel_tests/rnn_cell_test.py
new file mode 100644
index 0000000..c732c9b
--- /dev/null
+++ b/tensorflow/python/kernel_tests/rnn_cell_test.py

@@ -0,0 +1,3354 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for RNN cells."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+import os
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import layers as keras_layers
+from tensorflow.python.layers import base as base_layer
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import rnn
+from tensorflow.python.ops import rnn_cell
+from tensorflow.python.ops import rnn_cell_impl
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import tensor_array_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+from tensorflow.python.training.tracking import util as trackable_utils
+from tensorflow.python.util import nest
+
+
+class Plus1RNNCell(rnn_cell.RNNCell):
+  """RNN Cell generating (output, new_state) = (input + 1, state + 1)."""
+
+  @property
+  def output_size(self):
+    return 5
+
+  @property
+  def state_size(self):
+    return 5
+
+  def __call__(self, input_, state, scope=None):
+    return (input_ + 1, state + 1)
+
+
+class DummyMultiDimensionalLSTM(rnn_cell.RNNCell):
+  """LSTM Cell generating (output, new_state) = (input + 1, state + 1).
+
+  The input to this cell may have an arbitrary number of dimensions that follow
+  the preceding 'Time' and 'Batch' dimensions.
+  """
+
+  def __init__(self, dims):
+    """Initialize the Multi-dimensional LSTM cell.
+
+    Args:
+      dims: tuple that contains the dimensions of the output of the cell,
+      without including 'Time' or 'Batch' dimensions.
+    """
+    if not isinstance(dims, tuple):
+      raise TypeError("The dimensions passed to DummyMultiDimensionalLSTM "
+                      "should be a tuple of ints.")
+    self._dims = dims
+    self._output_size = tensor_shape.TensorShape(self._dims)
+    self._state_size = (tensor_shape.TensorShape(self._dims),
+                        tensor_shape.TensorShape(self._dims))
+
+  @property
+  def output_size(self):
+    return self._output_size
+
+  @property
+  def state_size(self):
+    return self._state_size
+
+  def __call__(self, input_, state, scope=None):
+    h, c = state
+    return (input_ + 1, (h + 1, c + 1))
+
+
+class NestedRNNCell(rnn_cell.RNNCell):
+  """RNN Cell generating (output, new_state) = (input + 1, state + 1).
+
+  The input, output and state of this cell is a tuple of two tensors.
+  """
+
+  @property
+  def output_size(self):
+    return (5, 5)
+
+  @property
+  def state_size(self):
+    return (6, 6)
+
+  def __call__(self, input_, state, scope=None):
+    h, c = state
+    x, y = input_
+    return ((x + 1, y + 1), (h + 1, c + 1))
+
+
+class TestStateSaver(object):
+
+  def __init__(self, batch_size, state_size):
+    self._batch_size = batch_size
+    self._state_size = state_size
+    self.saved_state = {}
+
+  def state(self, name):
+
+    if isinstance(self._state_size, dict):
+      state_size = self._state_size[name]
+    else:
+      state_size = self._state_size
+    if isinstance(state_size, int):
+      state_size = (state_size,)
+    elif isinstance(state_size, tuple):
+      pass
+    else:
+      raise TypeError("state_size should either be an int or a tuple")
+
+    return array_ops.zeros((self._batch_size,) + state_size)
+
+  def save_state(self, name, state):
+    self.saved_state[name] = state
+    return array_ops.identity(state)
+
+  @property
+  def batch_size(self):
+    return self._batch_size
+
+  @property
+  def state_size(self):
+    return self._state_size
+
+
+class TestStateSaverWithCounters(TestStateSaver):
+  """Class wrapper around TestStateSaver.
+
+  A dummy class used for testing of static_state_saving_rnn. It helps test if
+  save_state and state functions got called same number of time when we
+  evaluate output of rnn cell and state or either of them separately. It
+  inherits from the TestStateSaver and adds the counters for calls of functions.
+  """
+
+  @test_util.run_v1_only("b/124229375")
+  def __init__(self, batch_size, state_size):
+    super(TestStateSaverWithCounters, self).__init__(batch_size, state_size)
+    self._num_state_calls = variables_lib.VariableV1(0)
+    self._num_save_state_calls = variables_lib.VariableV1(0)
+
+  def state(self, name):
+    with ops.control_dependencies(
+        [state_ops.assign_add(self._num_state_calls, 1)]):
+      return super(TestStateSaverWithCounters, self).state(name)
+
+  def save_state(self, name, state):
+    with ops.control_dependencies([state_ops.assign_add(
+        self._num_save_state_calls, 1)]):
+      return super(TestStateSaverWithCounters, self).save_state(name, state)
+
+  @property
+  def num_state_calls(self):
+    return self._num_state_calls
+
+  @property
+  def num_save_state_calls(self):
+    return self._num_save_state_calls
+
+
+class RNNTest(test.TestCase):
+
+  def setUp(self):
+    self._seed = 23489
+    np.random.seed(self._seed)
+
+  @test_util.run_v1_only("b/124229375")
+  def testInvalidSequenceLengthShape(self):
+    cell = Plus1RNNCell()
+    inputs = [array_ops.placeholder(dtypes.float32, shape=(3, 4))]
+    with self.assertRaisesRegexp(ValueError, "must be a vector"):
+      rnn.static_rnn(cell, inputs, dtype=dtypes.float32, sequence_length=4)
+
+  @test_util.run_v1_only("b/124229375")
+  def testRNN(self):
+    cell = Plus1RNNCell()
+    batch_size = 2
+    input_size = 5
+    max_length = 8  # unrolled up to this length
+    inputs = max_length * [
+        array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
+    ]
+    outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
+    self.assertEqual(len(outputs), len(inputs))
+    for out, inp in zip(outputs, inputs):
+      self.assertEqual(out.get_shape(), inp.get_shape())
+      self.assertEqual(out.dtype, inp.dtype)
+
+    with self.session(use_gpu=True) as sess:
+      input_value = np.random.randn(batch_size, input_size)
+      values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value})
+
+      # Outputs
+      for v in values[:-1]:
+        self.assertAllClose(v, input_value + 1.0)
+
+      # Final state
+      self.assertAllClose(values[-1],
+                          max_length * np.ones(
+                              (batch_size, input_size), dtype=np.float32))
+
+  @test_util.run_v1_only("b/124229375")
+  def testDropout(self):
+    cell = Plus1RNNCell()
+    full_dropout_cell = rnn_cell.DropoutWrapper(
+        cell, input_keep_prob=1e-6, seed=0)
+    (name, dep), = full_dropout_cell._checkpoint_dependencies
+    self.assertIs(dep, cell)
+    self.assertEqual("cell", name)
+    batch_size = 2
+    input_size = 5
+    max_length = 8
+    inputs = max_length * [
+        array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
+    ]
+    with variable_scope.variable_scope("share_scope"):
+      outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
+    with variable_scope.variable_scope("drop_scope"):
+      dropped_outputs, _ = rnn.static_rnn(
+          full_dropout_cell, inputs, dtype=dtypes.float32)
+    self.assertEqual(len(outputs), len(inputs))
+    for out, inp in zip(outputs, inputs):
+      self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list())
+      self.assertEqual(out.dtype, inp.dtype)
+
+    with self.session(use_gpu=True) as sess:
+      input_value = np.random.randn(batch_size, input_size)
+      values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value})
+      full_dropout_values = sess.run(
+          dropped_outputs, feed_dict={
+              inputs[0]: input_value
+          })
+
+      for v in values[:-1]:
+        self.assertAllClose(v, input_value + 1.0)
+      for d_v in full_dropout_values[:-1]:  # Add 1.0 to dropped_out (all zeros)
+        self.assertAllClose(d_v, np.ones_like(input_value))
+
+  @test_util.run_v1_only("b/124229375")
+  def testDynamicCalculation(self):
+    cell = Plus1RNNCell()
+    sequence_length = array_ops.placeholder(dtypes.int64)
+    batch_size = 2
+    input_size = 5
+    max_length = 8
+    inputs = max_length * [
+        array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
+    ]
+    with variable_scope.variable_scope("drop_scope"):
+      dynamic_outputs, dynamic_state = rnn.static_rnn(
+          cell, inputs, sequence_length=sequence_length, dtype=dtypes.float32)
+    self.assertEqual(len(dynamic_outputs), len(inputs))
+
+    with self.session(use_gpu=True) as sess:
+      input_value = np.random.randn(batch_size, input_size)
+      dynamic_values = sess.run(
+          dynamic_outputs,
+          feed_dict={
+              inputs[0]: input_value,
+              sequence_length: [2, 3]
+          })
+      dynamic_state_value = sess.run(
+          [dynamic_state],
+          feed_dict={
+              inputs[0]: input_value,
+              sequence_length: [2, 3]
+          })
+
+      # outputs are fully calculated for t = 0, 1
+      for v in dynamic_values[:2]:
+        self.assertAllClose(v, input_value + 1.0)
+
+      # outputs at t = 2 are zero for entry 0, calculated for entry 1
+      self.assertAllClose(dynamic_values[2],
+                          np.vstack((np.zeros((input_size)),
+                                     1.0 + input_value[1, :])))
+
+      # outputs at t = 3+ are zero
+      for v in dynamic_values[3:]:
+        self.assertAllEqual(v, np.zeros_like(input_value))
+
+      # the final states are:
+      #  entry 0: the values from the calculation at t=1
+      #  entry 1: the values from the calculation at t=2
+      self.assertAllEqual(dynamic_state_value[0],
+                          np.vstack((1.0 * (1 + 1) * np.ones((input_size)),
+                                     1.0 * (2 + 1) * np.ones((input_size)))))
+
+  def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
+    with self.session(use_gpu=True, graph=ops.Graph()):
+      if use_outer_scope:
+        with variable_scope.variable_scope(prefix) as scope:
+          factory(scope)
+      else:
+        factory(prefix)
+
+      # check that all the variables names starts
+      # with the proper scope.
+      variables_lib.global_variables_initializer()
+      all_vars = variables_lib.global_variables()
+      prefix = prefix or "rnn"
+      scope_vars = [v for v in all_vars if v.name.startswith(prefix + "/")]
+      tf_logging.info("RNN with scope: %s (%s)" %
+                      (prefix, "scope" if use_outer_scope else "str"))
+      for v in scope_vars:
+        tf_logging.info(v.name)
+      self.assertEqual(len(scope_vars), len(all_vars))
+
+  @test_util.run_v1_only("b/124229375")
+  def testScope(self):
+
+    def factory(scope):
+      cell = Plus1RNNCell()
+      batch_size = 2
+      input_size = 5
+      max_length = 8  # unrolled up to this length
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
+      ]
+      return rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope=scope)
+
+    self._testScope(factory, use_outer_scope=True)
+    self._testScope(factory, use_outer_scope=False)
+    self._testScope(factory, prefix=None, use_outer_scope=False)
+
+
+class LSTMTest(test.TestCase):
+
+  def setUp(self):
+    self._seed = 23489
+    np.random.seed(self._seed)
+
+  def testDType(self):
+    # Test case for GitHub issue 16228
+    # Not passing dtype in constructor results in default float32
+    lstm = rnn_cell.LSTMCell(10)
+    input_tensor = array_ops.ones([10, 50])
+    lstm.build(input_tensor.get_shape())
+    self.assertEqual(lstm._bias.dtype.base_dtype, dtypes.float32)
+
+    # Explicitly pass dtype in constructor
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      lstm = rnn_cell.LSTMCell(10, dtype=dtype)
+      input_tensor = array_ops.ones([10, 50])
+      lstm.build(input_tensor.get_shape())
+      self.assertEqual(lstm._bias.dtype.base_dtype, dtype)
+
+  @test_util.run_v1_only("b/124229375")
+  def testNoProjNoSharding(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    max_length = 8
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      cell = rnn_cell.LSTMCell(
+          num_units, initializer=initializer, state_is_tuple=False)
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
+      ]
+      outputs, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
+      self.assertEqual(len(outputs), len(inputs))
+      for out in outputs:
+        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units])
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      sess.run(outputs, feed_dict={inputs[0]: input_value})
+
+  @test_util.run_v1_only("b/124229375")
+  def testCellClipping(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    max_length = 8
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          cell_clip=0.0,
+          initializer=initializer,
+          state_is_tuple=False)
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
+      ]
+      outputs, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
+      self.assertEqual(len(outputs), len(inputs))
+      for out in outputs:
+        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units])
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      values = sess.run(outputs, feed_dict={inputs[0]: input_value})
+
+    for value in values:
+      # if cell c is clipped to 0, tanh(c) = 0 => m==0
+      self.assertAllEqual(value, np.zeros((batch_size, num_units)))
+
+  @test_util.run_v1_only("b/124229375")
+  def testNoProjNoShardingSimpleStateSaver(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    max_length = 8
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      state_saver = TestStateSaver(batch_size, 2 * num_units)
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=False,
+          initializer=initializer,
+          state_is_tuple=False)
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
+      ]
+      with variable_scope.variable_scope("share_scope"):
+        outputs, state = rnn.static_state_saving_rnn(
+            cell, inputs, state_saver=state_saver, state_name="save_lstm")
+      self.assertEqual(len(outputs), len(inputs))
+      for out in outputs:
+        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units])
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      (last_state_value, saved_state_value) = sess.run(
+          [state, state_saver.saved_state["save_lstm"]],
+          feed_dict={
+              inputs[0]: input_value
+          })
+      self.assertAllEqual(last_state_value, saved_state_value)
+
+  @test_util.run_v1_only("b/124229375")
+  def testNoProjNoShardingTupleStateSaver(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    max_length = 8
+    with self.session(graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      state_saver = TestStateSaver(batch_size, num_units)
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=False,
+          initializer=initializer,
+          state_is_tuple=True)
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
+      ]
+      with variable_scope.variable_scope("share_scope"):
+        outputs, state = rnn.static_state_saving_rnn(
+            cell, inputs, state_saver=state_saver, state_name=("c", "m"))
+      self.assertEqual(len(outputs), len(inputs))
+      for out in outputs:
+        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units])
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      last_and_saved_states = sess.run(
+          state + (state_saver.saved_state["c"], state_saver.saved_state["m"]),
+          feed_dict={
+              inputs[0]: input_value
+          })
+      self.assertEqual(4, len(last_and_saved_states))
+      self.assertAllEqual(last_and_saved_states[:2], last_and_saved_states[2:])
+
+  @test_util.run_v1_only("b/124229375")
+  def testNoProjNoShardingNestedTupleStateSaver(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    max_length = 8
+    with self.session(graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      state_saver = TestStateSaver(
+          batch_size, {
+              "c0": num_units,
+              "m0": num_units,
+              "c1": num_units + 1,
+              "m1": num_units + 1,
+              "c2": num_units + 2,
+              "m2": num_units + 2,
+              "c3": num_units + 3,
+              "m3": num_units + 3
+          })
+
+      def _cell(i):
+        return rnn_cell.LSTMCell(
+            num_units + i,
+            use_peepholes=False,
+            initializer=initializer,
+            state_is_tuple=True)
+
+      # This creates a state tuple which has 4 sub-tuples of length 2 each.
+      cell = rnn_cell.MultiRNNCell(
+          [_cell(i) for i in range(4)], state_is_tuple=True)
+
+      self.assertEqual(len(cell.state_size), 4)
+      for i in range(4):
+        self.assertEqual(len(cell.state_size[i]), 2)
+
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
+      ]
+
+      state_names = (("c0", "m0"), ("c1", "m1"), ("c2", "m2"), ("c3", "m3"))
+      with variable_scope.variable_scope("share_scope"):
+        outputs, state = rnn.static_state_saving_rnn(
+            cell, inputs, state_saver=state_saver, state_name=state_names)
+      self.assertEqual(len(outputs), len(inputs))
+
+      # Final output comes from _cell(3) which has state size num_units + 3
+      for out in outputs:
+        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units + 3])
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      last_states = sess.run(
+          list(nest.flatten(state)), feed_dict={
+              inputs[0]: input_value
+          })
+      saved_states = sess.run(
+          list(state_saver.saved_state.values()),
+          feed_dict={
+              inputs[0]: input_value
+          })
+      self.assertEqual(8, len(last_states))
+      self.assertEqual(8, len(saved_states))
+      flat_state_names = nest.flatten(state_names)
+      named_saved_states = dict(
+          zip(state_saver.saved_state.keys(), saved_states))
+
+      for i in range(8):
+        self.assertAllEqual(last_states[i],
+                            named_saved_states[flat_state_names[i]])
+
+  @test_util.run_v1_only("b/124229375")
+  def testProjNoSharding(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    num_proj = 4
+    max_length = 8
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
+      ]
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          num_proj=num_proj,
+          initializer=initializer,
+          state_is_tuple=False)
+      outputs, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
+      self.assertEqual(len(outputs), len(inputs))
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      sess.run(outputs, feed_dict={inputs[0]: input_value})
+
+  def _testStateTupleWithProjAndSequenceLength(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    num_proj = 4
+    max_length = 8
+    sequence_length = [4, 6]
+    with self.session(graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
+      ]
+      cell_notuple = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          num_proj=num_proj,
+          initializer=initializer,
+          state_is_tuple=False)
+      cell_tuple = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          num_proj=num_proj,
+          initializer=initializer,
+          state_is_tuple=True)
+      with variable_scope.variable_scope("root") as scope:
+        outputs_notuple, state_notuple = rnn.static_rnn(
+            cell_notuple,
+            inputs,
+            dtype=dtypes.float32,
+            sequence_length=sequence_length,
+            scope=scope)
+        scope.reuse_variables()
+        # TODO(ebrevdo): For this test, we ensure values are identical and
+        # therefore the weights here are tied.  In the future, we may consider
+        # making the state_is_tuple property mutable so we can avoid
+        # having to do this - especially if users ever need to reuse
+        # the parameters from different RNNCell instances.  Right now,
+        # this seems an unrealistic use case except for testing.
+        cell_tuple._scope = cell_notuple._scope  # pylint: disable=protected-access
+        outputs_tuple, state_tuple = rnn.static_rnn(
+            cell_tuple,
+            inputs,
+            dtype=dtypes.float32,
+            sequence_length=sequence_length,
+            scope=scope)
+      self.assertEqual(len(outputs_notuple), len(inputs))
+      self.assertEqual(len(outputs_tuple), len(inputs))
+      self.assertTrue(isinstance(state_tuple, tuple))
+      self.assertTrue(isinstance(state_notuple, ops.Tensor))
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      outputs_notuple_v = sess.run(
+          outputs_notuple, feed_dict={
+              inputs[0]: input_value
+          })
+      outputs_tuple_v = sess.run(
+          outputs_tuple, feed_dict={
+              inputs[0]: input_value
+          })
+      self.assertAllEqual(outputs_notuple_v, outputs_tuple_v)
+
+      (state_notuple_v,) = sess.run(
+          (state_notuple,), feed_dict={
+              inputs[0]: input_value
+          })
+      state_tuple_v = sess.run(state_tuple, feed_dict={inputs[0]: input_value})
+      self.assertAllEqual(state_notuple_v, np.hstack(state_tuple_v))
+
+  @test_util.run_v1_only("b/124229375")
+  def testProjSharding(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    num_proj = 4
+    num_proj_shards = 3
+    num_unit_shards = 2
+    max_length = 8
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
+      ]
+
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          num_proj=num_proj,
+          num_unit_shards=num_unit_shards,
+          num_proj_shards=num_proj_shards,
+          initializer=initializer,
+          state_is_tuple=False)
+
+      outputs, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
+
+      self.assertEqual(len(outputs), len(inputs))
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      sess.run(outputs, feed_dict={inputs[0]: input_value})
+
+  @test_util.run_v1_only("b/124229375")
+  def testDoubleInput(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    num_proj = 4
+    num_proj_shards = 3
+    num_unit_shards = 2
+    max_length = 8
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(-1, 1, seed=self._seed)
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float64, shape=(None, input_size))
+      ]
+
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          num_proj=num_proj,
+          num_unit_shards=num_unit_shards,
+          num_proj_shards=num_proj_shards,
+          initializer=initializer,
+          state_is_tuple=False)
+
+      outputs, _ = rnn.static_rnn(
+          cell,
+          inputs,
+          initial_state=cell.zero_state(batch_size, dtypes.float64))
+
+      self.assertEqual(len(outputs), len(inputs))
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.asarray(
+          np.random.randn(batch_size, input_size), dtype=np.float64)
+      values = sess.run(outputs, feed_dict={inputs[0]: input_value})
+      self.assertEqual(values[0].dtype, input_value.dtype)
+
+  @test_util.run_v1_only("b/124229375")
+  def testShardNoShardEquivalentOutput(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    num_proj = 4
+    num_proj_shards = 3
+    num_unit_shards = 2
+    max_length = 8
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
+      ]
+      initializer = init_ops.constant_initializer(0.001)
+
+      cell_noshard = rnn_cell.LSTMCell(
+          num_units,
+          num_proj=num_proj,
+          use_peepholes=True,
+          initializer=initializer,
+          num_unit_shards=num_unit_shards,
+          num_proj_shards=num_proj_shards,
+          state_is_tuple=False)
+
+      cell_shard = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          initializer=initializer,
+          num_proj=num_proj,
+          state_is_tuple=False)
+
+      with variable_scope.variable_scope("noshard_scope"):
+        outputs_noshard, state_noshard = rnn.static_rnn(
+            cell_noshard, inputs, dtype=dtypes.float32)
+      with variable_scope.variable_scope("shard_scope"):
+        outputs_shard, state_shard = rnn.static_rnn(
+            cell_shard, inputs, dtype=dtypes.float32)
+
+      self.assertEqual(len(outputs_noshard), len(inputs))
+      self.assertEqual(len(outputs_noshard), len(outputs_shard))
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      feeds = dict((x, input_value) for x in inputs)
+      values_noshard = sess.run(outputs_noshard, feed_dict=feeds)
+      values_shard = sess.run(outputs_shard, feed_dict=feeds)
+      state_values_noshard = sess.run([state_noshard], feed_dict=feeds)
+      state_values_shard = sess.run([state_shard], feed_dict=feeds)
+      self.assertEqual(len(values_noshard), len(values_shard))
+      self.assertEqual(len(state_values_noshard), len(state_values_shard))
+      for (v_noshard, v_shard) in zip(values_noshard, values_shard):
+        self.assertAllClose(v_noshard, v_shard, atol=1e-3)
+      for (s_noshard, s_shard) in zip(state_values_noshard, state_values_shard):
+        self.assertAllClose(s_noshard, s_shard, atol=1e-3)
+
+  @test_util.run_v1_only("b/124229375")
+  def testDoubleInputWithDropoutAndDynamicCalculation(self):
+    """Smoke test for using LSTM with doubles, dropout, dynamic calculation."""
+
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    num_proj = 4
+    num_proj_shards = 3
+    num_unit_shards = 2
+    max_length = 8
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      sequence_length = array_ops.placeholder(dtypes.int64)
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float64, shape=(None, input_size))
+      ]
+
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          num_proj=num_proj,
+          num_unit_shards=num_unit_shards,
+          num_proj_shards=num_proj_shards,
+          initializer=initializer,
+          state_is_tuple=False)
+      dropout_cell = rnn_cell.DropoutWrapper(cell, 0.5, seed=0)
+
+      outputs, state = rnn.static_rnn(
+          dropout_cell,
+          inputs,
+          sequence_length=sequence_length,
+          initial_state=cell.zero_state(batch_size, dtypes.float64))
+
+      self.assertEqual(len(outputs), len(inputs))
+
+      variables_lib.global_variables_initializer().run(feed_dict={
+          sequence_length: [2, 3]
+      })
+      input_value = np.asarray(
+          np.random.randn(batch_size, input_size), dtype=np.float64)
+      values = sess.run(
+          outputs, feed_dict={
+              inputs[0]: input_value,
+              sequence_length: [2, 3]
+          })
+      state_value = sess.run(
+          [state], feed_dict={
+              inputs[0]: input_value,
+              sequence_length: [2, 3]
+          })
+      self.assertEqual(values[0].dtype, input_value.dtype)
+      self.assertEqual(state_value[0].dtype, input_value.dtype)
+
+  @test_util.run_v1_only("b/124229375")
+  def testSharingWeightsWithReuse(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    num_proj = 4
+    max_length = 8
+    with self.session(graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(-1, 1, seed=self._seed)
+      initializer_d = init_ops.random_uniform_initializer(
+          -1, 1, seed=self._seed + 1)
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
+      ]
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          num_proj=num_proj,
+          initializer=initializer,
+          state_is_tuple=False)
+      cell_d = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          num_proj=num_proj,
+          initializer=initializer_d,
+          state_is_tuple=False)
+
+      with variable_scope.variable_scope("share_scope"):
+        outputs0, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
+      with variable_scope.variable_scope("share_scope", reuse=True):
+        outputs1, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
+      with variable_scope.variable_scope("diff_scope"):
+        outputs2, _ = rnn.static_rnn(cell_d, inputs, dtype=dtypes.float32)
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      output_values = sess.run(
+          outputs0 + outputs1 + outputs2, feed_dict={
+              inputs[0]: input_value
+          })
+      outputs0_values = output_values[:max_length]
+      outputs1_values = output_values[max_length:2 * max_length]
+      outputs2_values = output_values[2 * max_length:]
+      self.assertEqual(len(outputs0_values), len(outputs1_values))
+      self.assertEqual(len(outputs0_values), len(outputs2_values))
+      for o1, o2, o3 in zip(outputs0_values, outputs1_values, outputs2_values):
+        # Same weights used by both RNNs so outputs should be the same.
+        self.assertAllEqual(o1, o2)
+        # Different weights used so outputs should be different.
+        self.assertTrue(np.linalg.norm(o1 - o3) > 1e-6)
+
+  @test_util.run_v1_only("b/124229375")
+  def testSharingWeightsWithDifferentNamescope(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    num_proj = 4
+    max_length = 8
+    with self.session(graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(-1, 1, seed=self._seed)
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
+      ]
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          num_proj=num_proj,
+          initializer=initializer,
+          state_is_tuple=False)
+
+      with ops.name_scope("scope0"):
+        with variable_scope.variable_scope("share_scope"):
+          outputs0, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
+      with ops.name_scope("scope1"):
+        with variable_scope.variable_scope("share_scope", reuse=True):
+          outputs1, _ = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)
+
+      variables_lib.global_variables_initializer().run()
+      input_value = np.random.randn(batch_size, input_size)
+      output_values = sess.run(
+          outputs0 + outputs1, feed_dict={
+              inputs[0]: input_value
+          })
+      outputs0_values = output_values[:max_length]
+      outputs1_values = output_values[max_length:]
+      self.assertEqual(len(outputs0_values), len(outputs1_values))
+      for out0, out1 in zip(outputs0_values, outputs1_values):
+        self.assertAllEqual(out0, out1)
+
+  @test_util.run_v1_only("b/124229375")
+  def testDynamicRNNAllowsUnknownTimeDimension(self):
+    inputs = array_ops.placeholder(dtypes.float32, shape=[1, None, 20])
+    cell = rnn_cell.GRUCell(30)
+    # Smoke test, this should not raise an error
+    rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDynamicRNNWithTupleStates(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    num_proj = 4
+    max_length = 8
+    sequence_length = [4, 6]
+    in_graph_mode = not context.executing_eagerly()
+    with self.session(graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      if in_graph_mode:
+        inputs = max_length * [
+            array_ops.placeholder(dtypes.float32, shape=(None, input_size))
+        ]
+      else:
+        inputs = max_length * [
+            constant_op.constant(
+                np.random.randn(batch_size, input_size).astype(np.float32))
+        ]
+      inputs_c = array_ops.stack(inputs)
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          num_proj=num_proj,
+          initializer=initializer,
+          state_is_tuple=True)
+      with variable_scope.variable_scope("root") as scope:
+        outputs_static, state_static = rnn.static_rnn(
+            cell,
+            inputs,
+            dtype=dtypes.float32,
+            sequence_length=sequence_length,
+            scope=scope)
+        scope.reuse_variables()
+        outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
+            cell,
+            inputs_c,
+            dtype=dtypes.float32,
+            time_major=True,
+            sequence_length=sequence_length,
+            scope=scope)
+      self.assertTrue(isinstance(state_static, rnn_cell.LSTMStateTuple))
+      self.assertTrue(isinstance(state_dynamic, rnn_cell.LSTMStateTuple))
+      self.assertEqual(state_static[0], state_static.c)
+      self.assertEqual(state_static[1], state_static.h)
+      self.assertEqual(state_dynamic[0], state_dynamic.c)
+      self.assertEqual(state_dynamic[1], state_dynamic.h)
+
+      if in_graph_mode:
+        variables_lib.global_variables_initializer().run()
+        input_value = np.random.randn(batch_size, input_size)
+        outputs_static = sess.run(
+            outputs_static, feed_dict={
+                inputs[0]: input_value
+            })
+        outputs_dynamic = sess.run(
+            outputs_dynamic, feed_dict={
+                inputs[0]: input_value
+            })
+        state_static = sess.run(
+            state_static, feed_dict={
+                inputs[0]: input_value
+            })
+        state_dynamic = sess.run(
+            state_dynamic, feed_dict={
+                inputs[0]: input_value
+            })
+
+      if in_graph_mode:
+        self.assertAllEqual(outputs_static, outputs_dynamic)
+      else:
+        self.assertAllEqual(array_ops.stack(outputs_static), outputs_dynamic)
+      self.assertAllEqual(np.hstack(state_static), np.hstack(state_dynamic))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDynamicRNNWithNestedTupleStates(self):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    num_proj = 4
+    max_length = 8
+    sequence_length = [4, 6]
+    in_graph_mode = not context.executing_eagerly()
+    with self.session(graph=ops.Graph()) as sess:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      if in_graph_mode:
+        inputs = max_length * [
+            array_ops.placeholder(dtypes.float32, shape=(None, input_size))
+        ]
+      else:
+        inputs = max_length * [
+            constant_op.constant(
+                np.random.randn(batch_size, input_size).astype(np.float32))
+        ]
+      inputs_c = array_ops.stack(inputs)
+
+      def _cell(i):
+        return rnn_cell.LSTMCell(
+            num_units + i,
+            use_peepholes=True,
+            num_proj=num_proj + i,
+            initializer=initializer,
+            state_is_tuple=True)
+
+      # This creates a state tuple which has 4 sub-tuples of length 2 each.
+      cell = rnn_cell.MultiRNNCell(
+          [_cell(i) for i in range(4)], state_is_tuple=True)
+
+      self.assertEqual(len(cell.state_size), 4)
+      for i in range(4):
+        self.assertEqual(len(cell.state_size[i]), 2)
+
+      test_zero = cell.zero_state(1, dtypes.float32)
+      self.assertEqual(len(test_zero), 4)
+      for i in range(4):
+        self.assertEqual(test_zero[i][0].get_shape()[1], cell.state_size[i][0])
+        self.assertEqual(test_zero[i][1].get_shape()[1], cell.state_size[i][1])
+
+      with variable_scope.variable_scope("root") as scope:
+        outputs_static, state_static = rnn.static_rnn(
+            cell,
+            inputs,
+            dtype=dtypes.float32,
+            sequence_length=sequence_length,
+            scope=scope)
+        scope.reuse_variables()
+        outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
+            cell,
+            inputs_c,
+            dtype=dtypes.float32,
+            time_major=True,
+            sequence_length=sequence_length,
+            scope=scope)
+
+      if in_graph_mode:
+        input_value = np.random.randn(batch_size, input_size)
+        variables_lib.global_variables_initializer().run()
+        outputs_static = sess.run(
+            outputs_static, feed_dict={
+                inputs[0]: input_value
+            })
+        outputs_dynamic = sess.run(
+            outputs_dynamic, feed_dict={
+                inputs[0]: input_value
+            })
+        state_static = sess.run(
+            nest.flatten(state_static), feed_dict={
+                inputs[0]: input_value
+            })
+        state_dynamic = sess.run(
+            nest.flatten(state_dynamic), feed_dict={
+                inputs[0]: input_value
+            })
+
+      if in_graph_mode:
+        self.assertAllEqual(outputs_static, outputs_dynamic)
+      else:
+        self.assertAllEqual(array_ops.stack(outputs_static), outputs_dynamic)
+        state_static = nest.flatten(state_static)
+        state_dynamic = nest.flatten(state_dynamic)
+      self.assertAllEqual(np.hstack(state_static), np.hstack(state_dynamic))
+
+  def _testDynamicEquivalentToStaticRNN(self, use_sequence_length):
+    time_steps = 8
+    num_units = 3
+    num_proj = 4
+    input_size = 5
+    batch_size = 2
+
+    input_values = np.random.randn(time_steps, batch_size, input_size).astype(
+        np.float32)
+
+    if use_sequence_length:
+      sequence_length = np.random.randint(0, time_steps, size=batch_size)
+    else:
+      sequence_length = None
+
+    in_graph_mode = not context.executing_eagerly()
+
+    # TODO(b/68017812): Eager ignores operation seeds, so we need to create a
+    # single cell and reuse it across the static and dynamic RNNs. Remove this
+    # special case once is fixed.
+    if not in_graph_mode:
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+      cell = rnn_cell.LSTMCell(
+          num_units,
+          use_peepholes=True,
+          initializer=initializer,
+          num_proj=num_proj,
+          state_is_tuple=False)
+
+    ########### Step 1: Run static graph and generate readouts
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      if in_graph_mode:
+        concat_inputs = array_ops.placeholder(
+            dtypes.float32, shape=(time_steps, batch_size, input_size))
+      else:
+        concat_inputs = constant_op.constant(input_values)
+      inputs = array_ops.unstack(concat_inputs)
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+
+      # TODO(akshayka): Remove special case once b/68017812 is fixed.
+      if in_graph_mode:
+        cell = rnn_cell.LSTMCell(
+            num_units,
+            use_peepholes=True,
+            initializer=initializer,
+            num_proj=num_proj,
+            state_is_tuple=False)
+
+      with variable_scope.variable_scope("dynamic_scope"):
+        outputs_static, state_static = rnn.static_rnn(
+            cell, inputs, sequence_length=sequence_length, dtype=dtypes.float32)
+
+      if in_graph_mode:
+        # Generate gradients and run sessions to obtain outputs
+        feeds = {concat_inputs: input_values}
+        # Initialize
+        variables_lib.global_variables_initializer().run(feed_dict=feeds)
+        # Generate gradients of sum of outputs w.r.t. inputs
+        static_gradients = gradients_impl.gradients(
+            outputs_static + [state_static], [concat_inputs])
+        # Generate gradients of individual outputs w.r.t. inputs
+        static_individual_gradients = nest.flatten([
+            gradients_impl.gradients(y, [concat_inputs])
+            for y in [outputs_static[0], outputs_static[-1], state_static]
+        ])
+        # Generate gradients of individual variables w.r.t. inputs
+        trainable_variables = ops.get_collection(
+            ops.GraphKeys.TRAINABLE_VARIABLES)
+        assert len(trainable_variables) > 1, (
+            "Count of trainable variables: %d" % len(trainable_variables))
+        # pylint: disable=bad-builtin
+        static_individual_variable_gradients = nest.flatten([
+            gradients_impl.gradients(y, trainable_variables)
+            for y in [outputs_static[0], outputs_static[-1], state_static]
+        ])
+        # Test forward pass
+        values_static = sess.run(outputs_static, feed_dict=feeds)
+        (state_value_static,) = sess.run((state_static,), feed_dict=feeds)
+
+        # Test gradients to inputs and variables w.r.t. outputs & final state
+        static_grad_values = sess.run(static_gradients, feed_dict=feeds)
+
+        static_individual_grad_values = sess.run(
+            static_individual_gradients, feed_dict=feeds)
+
+        static_individual_var_grad_values = sess.run(
+            static_individual_variable_gradients, feed_dict=feeds)
+
+    ########## Step 2: Run dynamic graph and generate readouts
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      if in_graph_mode:
+        concat_inputs = array_ops.placeholder(
+            dtypes.float32, shape=(time_steps, batch_size, input_size))
+      else:
+        concat_inputs = constant_op.constant(input_values)
+      initializer = init_ops.random_uniform_initializer(
+          -0.01, 0.01, seed=self._seed)
+
+      # TODO(akshayka): Remove this special case once b/68017812 is
+      # fixed.
+      if in_graph_mode:
+        cell = rnn_cell.LSTMCell(
+            num_units,
+            use_peepholes=True,
+            initializer=initializer,
+            num_proj=num_proj,
+            state_is_tuple=False)
+
+      with variable_scope.variable_scope("dynamic_scope"):
+        outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
+            cell,
+            inputs=concat_inputs,
+            sequence_length=sequence_length,
+            time_major=True,
+            dtype=dtypes.float32)
+        split_outputs_dynamic = array_ops.unstack(outputs_dynamic, time_steps)
+
+      if in_graph_mode:
+        feeds = {concat_inputs: input_values}
+
+        # Initialize
+        variables_lib.global_variables_initializer().run(feed_dict=feeds)
+
+        # Generate gradients of sum of outputs w.r.t. inputs
+        dynamic_gradients = gradients_impl.gradients(
+            split_outputs_dynamic + [state_dynamic], [concat_inputs])
+
+        # Generate gradients of several individual outputs w.r.t. inputs
+        dynamic_individual_gradients = nest.flatten([
+            gradients_impl.gradients(y, [concat_inputs])
+            for y in [
+                split_outputs_dynamic[0], split_outputs_dynamic[-1],
+                state_dynamic
+            ]
+        ])
+
+        # Generate gradients of individual variables w.r.t. inputs
+        trainable_variables = ops.get_collection(
+            ops.GraphKeys.TRAINABLE_VARIABLES)
+        assert len(trainable_variables) > 1, (
+            "Count of trainable variables: %d" % len(trainable_variables))
+        dynamic_individual_variable_gradients = nest.flatten([
+            gradients_impl.gradients(y, trainable_variables)
+            for y in [
+                split_outputs_dynamic[0], split_outputs_dynamic[-1],
+                state_dynamic
+            ]
+        ])
+
+        # Test forward pass
+        values_dynamic = sess.run(split_outputs_dynamic, feed_dict=feeds)
+        (state_value_dynamic,) = sess.run((state_dynamic,), feed_dict=feeds)
+
+        # Test gradients to inputs and variables w.r.t. outputs & final state
+        dynamic_grad_values = sess.run(dynamic_gradients, feed_dict=feeds)
+
+        dynamic_individual_grad_values = sess.run(
+            dynamic_individual_gradients, feed_dict=feeds)
+
+        dynamic_individual_var_grad_values = sess.run(
+            dynamic_individual_variable_gradients, feed_dict=feeds)
+
+    ######### Step 3: Comparisons
+    if not in_graph_mode:
+      values_static = outputs_static
+      values_dynamic = split_outputs_dynamic
+      state_value_static = state_static
+      state_value_dynamic = state_dynamic
+
+    self.assertEqual(len(values_static), len(values_dynamic))
+    for (value_static, value_dynamic) in zip(values_static, values_dynamic):
+      self.assertAllClose(value_static, value_dynamic)
+    self.assertAllClose(state_value_static, state_value_dynamic)
+
+    if in_graph_mode:
+
+      self.assertAllClose(static_grad_values, dynamic_grad_values)
+
+      self.assertEqual(
+          len(static_individual_grad_values),
+          len(dynamic_individual_grad_values))
+      self.assertEqual(
+          len(static_individual_var_grad_values),
+          len(dynamic_individual_var_grad_values))
+
+      for i, (a, b) in enumerate(
+          zip(static_individual_grad_values, dynamic_individual_grad_values)):
+        tf_logging.info("Comparing individual gradients iteration %d" % i)
+        self.assertAllClose(a, b)
+
+      for i, (a, b) in enumerate(
+          zip(static_individual_var_grad_values,
+              dynamic_individual_var_grad_values)):
+        tf_logging.info(
+            "Comparing individual variable gradients iteration %d" % i)
+        self.assertAllClose(a, b)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDynamicEquivalentToStaticRNN(self):
+    self._testDynamicEquivalentToStaticRNN(use_sequence_length=False)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDynamicEquivalentToStaticRNNWithSequenceLength(self):
+    self._testDynamicEquivalentToStaticRNN(use_sequence_length=True)
+
+
+class BidirectionalRNNTest(test.TestCase):
+
+  def setUp(self):
+    self._seed = 23489
+    np.random.seed(self._seed)
+
+  def _createBidirectionalRNN(self, use_shape, use_sequence_length, scope=None):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    max_length = 8
+
+    initializer = init_ops.random_uniform_initializer(
+        -0.01, 0.01, seed=self._seed)
+    sequence_length = array_ops.placeholder(
+        dtypes.int64) if use_sequence_length else None
+    cell_fw = rnn_cell.LSTMCell(
+        num_units, input_size, initializer=initializer, state_is_tuple=False)
+    cell_bw = rnn_cell.LSTMCell(
+        num_units, input_size, initializer=initializer, state_is_tuple=False)
+    inputs = max_length * [
+        array_ops.placeholder(
+            dtypes.float32,
+            shape=(batch_size, input_size) if use_shape else (None, input_size))
+    ]
+    outputs, state_fw, state_bw = rnn.static_bidirectional_rnn(
+        cell_fw,
+        cell_bw,
+        inputs,
+        dtype=dtypes.float32,
+        sequence_length=sequence_length,
+        scope=scope)
+    self.assertEqual(len(outputs), len(inputs))
+    for out in outputs:
+      self.assertEqual(out.get_shape().as_list(),
+                       [batch_size if use_shape else None, 2 * num_units])
+
+    input_value = np.random.randn(batch_size, input_size)
+    outputs = array_ops.stack(outputs)
+
+    return input_value, inputs, outputs, state_fw, state_bw, sequence_length
+
+  def _testBidirectionalRNN(self, use_shape):
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      input_value, inputs, outputs, state_fw, state_bw, sequence_length = (
+          self._createBidirectionalRNN(use_shape, True))
+      variables_lib.global_variables_initializer().run()
+      # Run with pre-specified sequence length of 2, 3
+      out, s_fw, s_bw = sess.run(
+          [outputs, state_fw, state_bw],
+          feed_dict={
+              inputs[0]: input_value,
+              sequence_length: [2, 3]
+          })
+
+      # Since the forward and backward LSTM cells were initialized with the
+      # same parameters, the forward and backward output has to be the same,
+      # but reversed in time. The format is output[time][batch][depth], and
+      # due to depth concatenation (as num_units=3 for both RNNs):
+      # - forward output:  out[][][depth] for 0 <= depth < 3
+      # - backward output: out[][][depth] for 4 <= depth < 6
+      #
+      # First sequence in batch is length=2
+      # Check that the time=0 forward output is equal to time=1 backward output
+      self.assertAllClose(out[0][0][0], out[1][0][3])
+      self.assertAllClose(out[0][0][1], out[1][0][4])
+      self.assertAllClose(out[0][0][2], out[1][0][5])
+      # Check that the time=1 forward output is equal to time=0 backward output
+      self.assertAllClose(out[1][0][0], out[0][0][3])
+      self.assertAllClose(out[1][0][1], out[0][0][4])
+      self.assertAllClose(out[1][0][2], out[0][0][5])
+
+      # Second sequence in batch is length=3
+      # Check that the time=0 forward output is equal to time=2 backward output
+      self.assertAllClose(out[0][1][0], out[2][1][3])
+      self.assertAllClose(out[0][1][1], out[2][1][4])
+      self.assertAllClose(out[0][1][2], out[2][1][5])
+      # Check that the time=1 forward output is equal to time=1 backward output
+      self.assertAllClose(out[1][1][0], out[1][1][3])
+      self.assertAllClose(out[1][1][1], out[1][1][4])
+      self.assertAllClose(out[1][1][2], out[1][1][5])
+      # Check that the time=2 forward output is equal to time=0 backward output
+      self.assertAllClose(out[2][1][0], out[0][1][3])
+      self.assertAllClose(out[2][1][1], out[0][1][4])
+      self.assertAllClose(out[2][1][2], out[0][1][5])
+      # Via the reasoning above, the forward and backward final state should be
+      # exactly the same
+      self.assertAllClose(s_fw, s_bw)
+
+  def _testBidirectionalRNNWithoutSequenceLength(self, use_shape):
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      input_value, inputs, outputs, state_fw, state_bw, _ = (
+          self._createBidirectionalRNN(use_shape, False))
+      variables_lib.global_variables_initializer().run()
+      out, s_fw, s_bw = sess.run(
+          [outputs, state_fw, state_bw], feed_dict={
+              inputs[0]: input_value
+          })
+
+      # Since the forward and backward LSTM cells were initialized with the
+      # same parameters, the forward and backward output has to be the same,
+      # but reversed in time. The format is output[time][batch][depth], and
+      # due to depth concatenation (as num_units=3 for both RNNs):
+      # - forward output:  out[][][depth] for 0 <= depth < 3
+      # - backward output: out[][][depth] for 4 <= depth < 6
+      #
+      # Both sequences in batch are length=8.  Check that the time=i
+      # forward output is equal to time=8-1-i backward output
+      for i in range(8):
+        self.assertAllClose(out[i][0][0:3], out[8 - 1 - i][0][3:6])
+        self.assertAllClose(out[i][1][0:3], out[8 - 1 - i][1][3:6])
+      # Via the reasoning above, the forward and backward final state should be
+      # exactly the same
+      self.assertAllClose(s_fw, s_bw)
+
+  @test_util.run_v1_only("b/124229375")
+  def testBidirectionalRNN(self):
+    self._testBidirectionalRNN(use_shape=False)
+    self._testBidirectionalRNN(use_shape=True)
+
+  @test_util.run_v1_only("b/124229375")
+  def testBidirectionalRNNWithoutSequenceLength(self):
+    self._testBidirectionalRNNWithoutSequenceLength(use_shape=False)
+    self._testBidirectionalRNNWithoutSequenceLength(use_shape=True)
+
+  def _createBidirectionalDynamicRNN(self,
+                                     use_shape,
+                                     use_state_tuple,
+                                     use_time_major,
+                                     use_sequence_length,
+                                     scope=None):
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    max_length = 8
+
+    initializer = init_ops.random_uniform_initializer(
+        -0.01, 0.01, seed=self._seed)
+    sequence_length = (
+        array_ops.placeholder(dtypes.int64) if use_sequence_length else None)
+    cell_fw = rnn_cell.LSTMCell(
+        num_units, initializer=initializer, state_is_tuple=use_state_tuple)
+    cell_bw = rnn_cell.LSTMCell(
+        num_units, initializer=initializer, state_is_tuple=use_state_tuple)
+    inputs = max_length * [
+        array_ops.placeholder(
+            dtypes.float32,
+            shape=(batch_size if use_shape else None, input_size))
+    ]
+    inputs_c = array_ops.stack(inputs)
+    if not use_time_major:
+      inputs_c = array_ops.transpose(inputs_c, [1, 0, 2])
+    outputs, states = rnn.bidirectional_dynamic_rnn(
+        cell_fw,
+        cell_bw,
+        inputs_c,
+        sequence_length,
+        dtype=dtypes.float32,
+        time_major=use_time_major,
+        scope=scope)
+    outputs = array_ops.concat(outputs, 2)
+    state_fw, state_bw = states
+    outputs_shape = [None, max_length, 2 * num_units]
+    if use_shape:
+      outputs_shape[0] = batch_size
+    if use_time_major:
+      outputs_shape[0], outputs_shape[1] = outputs_shape[1], outputs_shape[0]
+    self.assertEqual(outputs.get_shape().as_list(), outputs_shape)
+
+    input_value = np.random.randn(batch_size, input_size)
+
+    return input_value, inputs, outputs, state_fw, state_bw, sequence_length
+
+  def _testBidirectionalDynamicRNN(self, use_shape, use_state_tuple,
+                                   use_time_major, use_sequence_length):
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      input_value, inputs, outputs, state_fw, state_bw, sequence_length = (
+          self._createBidirectionalDynamicRNN(
+              use_shape, use_state_tuple, use_time_major, use_sequence_length))
+      variables_lib.global_variables_initializer().run()
+      # Run with pre-specified sequence length of 2, 3
+      feed_dict = ({sequence_length: [2, 3]} if use_sequence_length else {})
+      feed_dict.update({inputs[0]: input_value})
+      if use_state_tuple:
+        out, c_fw, m_fw, c_bw, m_bw = sess.run(
+            [outputs, state_fw[0], state_fw[1], state_bw[0], state_bw[1]],
+            feed_dict=feed_dict)
+        s_fw = (c_fw, m_fw)
+        s_bw = (c_bw, m_bw)
+      else:
+        feed_dict.update({inputs[0]: input_value})
+        out, s_fw, s_bw = sess.run(
+            [outputs, state_fw, state_bw], feed_dict=feed_dict)
+
+      # Since the forward and backward LSTM cells were initialized with the
+      # same parameters, the forward and backward output has to be the same,
+      # but reversed in time. The format is output[time][batch][depth], and
+      # due to depth concatenation (as num_units=3 for both RNNs):
+      # - forward output:  out[][][depth] for 0 <= depth < 3
+      # - backward output: out[][][depth] for 4 <= depth < 6
+      #
+      if not use_time_major:
+        out = np.swapaxes(out, 0, 1)
+
+      if use_sequence_length:
+        # First sequence in batch is length=2
+        # Check that the t=0 forward output is equal to t=1 backward output
+        self.assertEqual(out[0][0][0], out[1][0][3])
+        self.assertEqual(out[0][0][1], out[1][0][4])
+        self.assertEqual(out[0][0][2], out[1][0][5])
+        # Check that the t=1 forward output is equal to t=0 backward output
+        self.assertEqual(out[1][0][0], out[0][0][3])
+        self.assertEqual(out[1][0][1], out[0][0][4])
+        self.assertEqual(out[1][0][2], out[0][0][5])
+
+        # Second sequence in batch is length=3
+        # Check that the t=0 forward output is equal to t=2 backward output
+        self.assertEqual(out[0][1][0], out[2][1][3])
+        self.assertEqual(out[0][1][1], out[2][1][4])
+        self.assertEqual(out[0][1][2], out[2][1][5])
+        # Check that the t=1 forward output is equal to t=1 backward output
+        self.assertEqual(out[1][1][0], out[1][1][3])
+        self.assertEqual(out[1][1][1], out[1][1][4])
+        self.assertEqual(out[1][1][2], out[1][1][5])
+        # Check that the t=2 forward output is equal to t=0 backward output
+        self.assertEqual(out[2][1][0], out[0][1][3])
+        self.assertEqual(out[2][1][1], out[0][1][4])
+        self.assertEqual(out[2][1][2], out[0][1][5])
+        # Via the reasoning above, the forward and backward final state should
+        # be exactly the same
+        self.assertAllClose(s_fw, s_bw)
+      else:  # not use_sequence_length
+        max_length = 8  # from createBidirectionalDynamicRNN
+        for t in range(max_length):
+          self.assertAllEqual(out[t, :, 0:3], out[max_length - t - 1, :, 3:6])
+        self.assertAllClose(s_fw, s_bw)
+
+  @test_util.run_v1_only("b/124229375")
+  def testBidirectionalDynamicRNN(self):
+    # Generate 2^5 option values
+    # from [True, True, True, True, True] to [False, False, False, False, False]
+    options = itertools.product([True, False], repeat=4)
+    for option in options:
+      self._testBidirectionalDynamicRNN(
+          use_shape=option[0],
+          use_state_tuple=option[1],
+          use_time_major=option[2],
+          use_sequence_length=option[3])
+
+  def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
+    # REMARKS: factory(scope) is a function accepting a scope
+    #          as an argument, such scope can be None, a string
+    #          or a VariableScope instance.
+    with self.session(use_gpu=True, graph=ops.Graph()):
+      if use_outer_scope:
+        with variable_scope.variable_scope(prefix) as scope:
+          factory(scope)
+      else:
+        factory(prefix)
+
+      # check that all the variables names starts
+      # with the proper scope.
+      variables_lib.global_variables_initializer()
+      all_vars = variables_lib.global_variables()
+      prefix = prefix or "bidirectional_rnn"
+      scope_vars = [v for v in all_vars if v.name.startswith(prefix + "/")]
+      tf_logging.info("BiRNN with scope: %s (%s)" %
+                      (prefix, "scope" if use_outer_scope else "str"))
+      for v in scope_vars:
+        tf_logging.info(v.name)
+      self.assertEqual(len(scope_vars), len(all_vars))
+
+  @test_util.run_v1_only("b/124229375")
+  def testBidirectionalRNNScope(self):
+
+    def factory(scope):
+      return self._createBidirectionalRNN(
+          use_shape=True, use_sequence_length=True, scope=scope)
+
+    self._testScope(factory, use_outer_scope=True)
+    self._testScope(factory, use_outer_scope=False)
+    self._testScope(factory, prefix=None, use_outer_scope=False)
+
+  @test_util.run_v1_only("b/124229375")
+  def testBidirectionalDynamicRNNScope(self):
+
+    def get_factory(use_time_major):
+
+      def factory(scope):
+        return self._createBidirectionalDynamicRNN(
+            use_shape=True,
+            use_state_tuple=True,
+            use_sequence_length=True,
+            use_time_major=use_time_major,
+            scope=scope)
+
+      return factory
+
+    self._testScope(get_factory(True), use_outer_scope=True)
+    self._testScope(get_factory(True), use_outer_scope=False)
+    self._testScope(get_factory(True), prefix=None, use_outer_scope=False)
+    self._testScope(get_factory(False), use_outer_scope=True)
+    self._testScope(get_factory(False), use_outer_scope=False)
+    self._testScope(get_factory(False), prefix=None, use_outer_scope=False)
+
+
+class MultiDimensionalLSTMTest(test.TestCase):
+
+  def setUp(self):
+    self._seed = 23489
+    np.random.seed(self._seed)
+
+  @test_util.run_v1_only("b/124229375")
+  def testMultiDimensionalLSTMAllRNNContainers(self):
+    feature_dims = (3, 4, 5)
+    input_size = feature_dims
+    batch_size = 2
+    max_length = 8
+    sequence_length = [4, 6]
+    with self.session(graph=ops.Graph()) as sess:
+      inputs = max_length * [
+          array_ops.placeholder(dtypes.float32, shape=(None,) + input_size)
+      ]
+      inputs_using_dim = max_length * [
+          array_ops.placeholder(
+              dtypes.float32, shape=(batch_size,) + input_size)
+      ]
+      inputs_c = array_ops.stack(inputs)
+      # Create a cell for the whole test. This is fine because the cell has no
+      # variables.
+      cell = DummyMultiDimensionalLSTM(feature_dims)
+      state_saver = TestStateSaver(batch_size, input_size)
+      outputs_static, state_static = rnn.static_rnn(
+          cell, inputs, dtype=dtypes.float32, sequence_length=sequence_length)
+      outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
+          cell,
+          inputs_c,
+          dtype=dtypes.float32,
+          time_major=True,
+          sequence_length=sequence_length)
+      outputs_bid, state_fw, state_bw = rnn.static_bidirectional_rnn(
+          cell,
+          cell,
+          inputs_using_dim,
+          dtype=dtypes.float32,
+          sequence_length=sequence_length)
+      outputs_sav, state_sav = rnn.static_state_saving_rnn(
+          cell,
+          inputs_using_dim,
+          sequence_length=sequence_length,
+          state_saver=state_saver,
+          state_name=("h", "c"))
+
+      self.assertEqual(outputs_dynamic.get_shape().as_list(),
+                       inputs_c.get_shape().as_list())
+      for out, inp in zip(outputs_static, inputs):
+        self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list())
+      for out, inp in zip(outputs_bid, inputs_using_dim):
+        input_shape_list = inp.get_shape().as_list()
+        # fwd and bwd activations are concatenated along the second dim.
+        input_shape_list[1] *= 2
+        self.assertEqual(out.get_shape().as_list(), input_shape_list)
+
+      variables_lib.global_variables_initializer().run()
+
+      input_total_size = (batch_size,) + input_size
+      input_value = np.random.randn(*input_total_size)
+      outputs_static_v = sess.run(
+          outputs_static, feed_dict={
+              inputs[0]: input_value
+          })
+      outputs_dynamic_v = sess.run(
+          outputs_dynamic, feed_dict={
+              inputs[0]: input_value
+          })
+      outputs_bid_v = sess.run(
+          outputs_bid, feed_dict={
+              inputs_using_dim[0]: input_value
+          })
+      outputs_sav_v = sess.run(
+          outputs_sav, feed_dict={
+              inputs_using_dim[0]: input_value
+          })
+
+      self.assertAllEqual(outputs_static_v, outputs_dynamic_v)
+      self.assertAllEqual(outputs_static_v, outputs_sav_v)
+      outputs_static_array = np.array(outputs_static_v)
+      outputs_static_array_double = np.concatenate(
+          (outputs_static_array, outputs_static_array), axis=2)
+      outputs_bid_array = np.array(outputs_bid_v)
+      self.assertAllEqual(outputs_static_array_double, outputs_bid_array)
+
+      state_static_v = sess.run(
+          state_static, feed_dict={
+              inputs[0]: input_value
+          })
+      state_dynamic_v = sess.run(
+          state_dynamic, feed_dict={
+              inputs[0]: input_value
+          })
+      state_bid_fw_v = sess.run(
+          state_fw, feed_dict={
+              inputs_using_dim[0]: input_value
+          })
+      state_bid_bw_v = sess.run(
+          state_bw, feed_dict={
+              inputs_using_dim[0]: input_value
+          })
+      state_sav_v = sess.run(
+          state_sav, feed_dict={
+              inputs_using_dim[0]: input_value
+          })
+      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_dynamic_v))
+      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_sav_v))
+      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_bid_fw_v))
+      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_bid_bw_v))
+
+
+class NestedLSTMTest(test.TestCase):
+
+  def setUp(self):
+    self._seed = 23489
+    np.random.seed(self._seed)
+
+  @test_util.run_v1_only("b/124229375")
+  def testNestedIOLSTMAllRNNContainers(self):
+    input_size = 5
+    batch_size = 2
+    state_size = 6
+    max_length = 8
+    sequence_length = [4, 6]
+    with self.session(graph=ops.Graph()) as sess:
+      state_saver = TestStateSaver(batch_size, state_size)
+      single_input = (array_ops.placeholder(
+          dtypes.float32, shape=(None, input_size)),
+                      array_ops.placeholder(
+                          dtypes.float32, shape=(None, input_size)))
+      inputs = max_length * [single_input]
+      inputs_c = (array_ops.stack([input_[0] for input_ in inputs]),
+                  array_ops.stack([input_[1] for input_ in inputs]))
+      single_input_using_dim = (array_ops.placeholder(
+          dtypes.float32, shape=(batch_size, input_size)),
+                                array_ops.placeholder(
+                                    dtypes.float32,
+                                    shape=(batch_size, input_size)))
+      inputs_using_dim = max_length * [single_input_using_dim]
+
+      # Create a cell for the whole test. This is fine because the cell has no
+      # variables.
+      cell = NestedRNNCell()
+      outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
+          cell,
+          inputs_c,
+          dtype=dtypes.float32,
+          time_major=True,
+          sequence_length=sequence_length)
+      outputs_static, state_static = rnn.static_rnn(
+          cell, inputs, dtype=dtypes.float32, sequence_length=sequence_length)
+      outputs_bid, state_fw, state_bw = rnn.static_bidirectional_rnn(
+          cell,
+          cell,
+          inputs_using_dim,
+          dtype=dtypes.float32,
+          sequence_length=sequence_length)
+      outputs_sav, state_sav = rnn.static_state_saving_rnn(
+          cell,
+          inputs_using_dim,
+          sequence_length=sequence_length,
+          state_saver=state_saver,
+          state_name=("h", "c"))
+
+      def _assert_same_shape(input1, input2, double=False):
+        flat_input1 = nest.flatten(input1)
+        flat_input2 = nest.flatten(input2)
+        for inp1, inp2 in zip(flat_input1, flat_input2):
+          input_shape = inp1.get_shape().as_list()
+          if double:
+            input_shape[1] *= 2
+          self.assertEqual(input_shape, inp2.get_shape().as_list())
+
+      _assert_same_shape(inputs_c, outputs_dynamic)
+      _assert_same_shape(inputs, outputs_static)
+      _assert_same_shape(inputs_using_dim, outputs_sav)
+      _assert_same_shape(inputs_using_dim, outputs_bid, double=True)
+
+      variables_lib.global_variables_initializer().run()
+
+      input_total_size = (batch_size, input_size)
+      input_value = (np.random.randn(*input_total_size),
+                     np.random.randn(*input_total_size))
+      outputs_dynamic_v = sess.run(
+          outputs_dynamic, feed_dict={
+              single_input: input_value
+          })
+      outputs_static_v = sess.run(
+          outputs_static, feed_dict={
+              single_input: input_value
+          })
+      outputs_sav_v = sess.run(
+          outputs_sav, feed_dict={
+              single_input_using_dim: input_value
+          })
+      outputs_bid_v = sess.run(
+          outputs_bid, feed_dict={
+              single_input_using_dim: input_value
+          })
+
+      self.assertAllEqual(outputs_static_v,
+                          np.transpose(outputs_dynamic_v, (1, 0, 2, 3)))
+      self.assertAllEqual(outputs_static_v, outputs_sav_v)
+      outputs_static_array = np.array(outputs_static_v)
+      outputs_static_array_double = np.concatenate(
+          (outputs_static_array, outputs_static_array), axis=3)
+      outputs_bid_array = np.array(outputs_bid_v)
+      self.assertAllEqual(outputs_static_array_double, outputs_bid_array)
+
+      state_dynamic_v = sess.run(
+          state_dynamic, feed_dict={
+              single_input: input_value
+          })
+      state_static_v = sess.run(
+          state_static, feed_dict={
+              single_input: input_value
+          })
+      state_bid_fw_v = sess.run(
+          state_fw, feed_dict={
+              single_input_using_dim: input_value
+          })
+      state_bid_bw_v = sess.run(
+          state_bw, feed_dict={
+              single_input_using_dim: input_value
+          })
+      state_sav_v = sess.run(
+          state_sav, feed_dict={
+              single_input_using_dim: input_value
+          })
+      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_dynamic_v))
+      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_sav_v))
+      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_bid_fw_v))
+      self.assertAllEqual(np.hstack(state_static_v), np.hstack(state_bid_bw_v))
+
+
+class StateSaverRNNTest(test.TestCase):
+
+  def setUp(self):
+    self._seed = 23489
+    np.random.seed(self._seed)
+
+  def _factory(self, scope, state_saver):
+    num_units = state_saver.state_size // 2
+    batch_size = state_saver.batch_size
+    input_size = 5
+    max_length = 8
+    initializer = init_ops.random_uniform_initializer(
+        -0.01, 0.01, seed=self._seed)
+    cell = rnn_cell.LSTMCell(
+        num_units,
+        use_peepholes=False,
+        initializer=initializer,
+        state_is_tuple=False)
+    inputs = max_length * [
+        array_ops.zeros(dtype=dtypes.float32, shape=(batch_size, input_size))
+    ]
+    out, state = rnn.static_state_saving_rnn(
+        cell,
+        inputs,
+        state_saver=state_saver,
+        state_name="save_lstm",
+        scope=scope)
+    return out, state, state_saver
+
+  def _testScope(self, prefix="prefix", use_outer_scope=True):
+    num_units = 3
+    batch_size = 2
+    state_saver = TestStateSaver(batch_size, 2 * num_units)
+
+    with self.session(use_gpu=True, graph=ops.Graph()):
+      if use_outer_scope:
+        with variable_scope.variable_scope(prefix) as scope:
+          self._factory(scope=scope, state_saver=state_saver)
+      else:
+        self._factory(scope=prefix, state_saver=state_saver)
+        variables_lib.global_variables_initializer()
+
+      # check that all the variables names starts
+      # with the proper scope.
+      all_vars = variables_lib.global_variables()
+      prefix = prefix or "rnn"
+      scope_vars = [v for v in all_vars if v.name.startswith(prefix + "/")]
+      tf_logging.info("RNN with scope: %s (%s)" %
+                      (prefix, "scope" if use_outer_scope else "str"))
+      for v in scope_vars:
+        tf_logging.info(v.name)
+      self.assertEqual(len(scope_vars), len(all_vars))
+
+  def testStateSaverRNNScope(self):
+    self._testScope(use_outer_scope=True)
+    self._testScope(use_outer_scope=False)
+    self._testScope(prefix=None, use_outer_scope=False)
+
+  def testStateSaverCallsSaveState(self):
+    """Test that number of calls to state and save_state is equal.
+
+    Test if the order of actual evaluating or skipping evaluation of out,
+    state tensors, which are the output tensors from static_state_saving_rnn,
+    have influence on number of calls to save_state and state methods of
+    state_saver object (the number of calls should be same.)
+    """
+    self.skipTest("b/124196246 Breakage for sess.run([out, ...]): 2 != 1")
+
+    num_units = 3
+    batch_size = 2
+    state_saver = TestStateSaverWithCounters(batch_size, 2 * num_units)
+    out, state, state_saver = self._factory(scope=None, state_saver=state_saver)
+
+    with self.cached_session() as sess:
+      sess.run(variables_lib.global_variables_initializer())
+      sess.run(variables_lib.local_variables_initializer())
+
+      _, _, num_state_calls, num_save_state_calls = sess.run([
+          out,
+          state,
+          state_saver.num_state_calls,
+          state_saver.num_save_state_calls])
+      self.assertEqual(num_state_calls, num_save_state_calls)
+
+      _, num_state_calls, num_save_state_calls = sess.run([
+          out,
+          state_saver.num_state_calls,
+          state_saver.num_save_state_calls])
+      self.assertEqual(num_state_calls, num_save_state_calls)
+
+      _, num_state_calls, num_save_state_calls = sess.run([
+          state,
+          state_saver.num_state_calls,
+          state_saver.num_save_state_calls])
+      self.assertEqual(num_state_calls, num_save_state_calls)
+
+class GRUTest(test.TestCase):
+
+  def setUp(self):
+    self._seed = 23489
+    np.random.seed(self._seed)
+
+  @test_util.run_v1_only("b/124229375")
+  def testDynamic(self):
+    time_steps = 8
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+
+    input_values = np.random.randn(time_steps, batch_size, input_size)
+
+    sequence_length = np.random.randint(0, time_steps, size=batch_size)
+
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
+      concat_inputs = array_ops.placeholder(
+          dtypes.float32, shape=(time_steps, batch_size, input_size))
+
+      cell = rnn_cell.GRUCell(num_units=num_units)
+
+      with variable_scope.variable_scope("dynamic_scope"):
+        outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
+            cell,
+            inputs=concat_inputs,
+            sequence_length=sequence_length,
+            time_major=True,
+            dtype=dtypes.float32)
+
+      feeds = {concat_inputs: input_values}
+
+      # Initialize
+      variables_lib.global_variables_initializer().run(feed_dict=feeds)
+
+      sess.run([outputs_dynamic, state_dynamic], feed_dict=feeds)
+
+  def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
+    with self.session(use_gpu=True, graph=ops.Graph()):
+      if use_outer_scope:
+        with variable_scope.variable_scope(prefix) as scope:
+          factory(scope)
+      else:
+        factory(prefix)
+        variables_lib.global_variables_initializer()
+
+      # check that all the variables names starts
+      # with the proper scope.
+      all_vars = variables_lib.global_variables()
+      prefix = prefix or "rnn"
+      scope_vars = [v for v in all_vars if v.name.startswith(prefix + "/")]
+      tf_logging.info("RNN with scope: %s (%s)" %
+                      (prefix, "scope" if use_outer_scope else "str"))
+      for v in scope_vars:
+        tf_logging.info(v.name)
+      self.assertEqual(len(scope_vars), len(all_vars))
+
+  @test_util.run_v1_only("b/124229375")
+  def testDynamicScope(self):
+    time_steps = 8
+    num_units = 3
+    input_size = 5
+    batch_size = 2
+    sequence_length = np.random.randint(0, time_steps, size=batch_size)
+
+    def factory(scope):
+      concat_inputs = array_ops.placeholder(
+          dtypes.float32, shape=(time_steps, batch_size, input_size))
+      cell = rnn_cell.GRUCell(num_units=num_units)
+      return rnn.dynamic_rnn(
+          cell,
+          inputs=concat_inputs,
+          sequence_length=sequence_length,
+          time_major=True,
+          dtype=dtypes.float32,
+          scope=scope)
+
+    self._testScope(factory, use_outer_scope=True)
+    self._testScope(factory, use_outer_scope=False)
+    self._testScope(factory, prefix=None, use_outer_scope=False)
+
+
+class RawRNNTest(test.TestCase):
+
+  def setUp(self):
+    self._seed = 23489
+    np.random.seed(self._seed)
+
+  @test_util.run_v1_only("b/124229375")
+  def _testRawRNN(self, max_time):
+    with self.session(graph=ops.Graph()) as sess:
+      batch_size = 16
+      input_depth = 4
+      num_units = 3
+
+      inputs = array_ops.placeholder(
+          shape=(max_time, batch_size, input_depth), dtype=dtypes.float32)
+      sequence_length = array_ops.placeholder(
+          shape=(batch_size,), dtype=dtypes.int32)
+      inputs_ta = tensor_array_ops.TensorArray(
+          dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
+      inputs_ta = inputs_ta.unstack(inputs)
+
+      cell = rnn_cell.LSTMCell(num_units, state_is_tuple=True)
+
+      def loop_fn(time_, cell_output, cell_state, unused_loop_state):
+        emit_output = cell_output  # == None for time == 0
+        if cell_output is None:  # time == 0
+          next_state = cell.zero_state(batch_size, dtypes.float32)
+        else:
+          next_state = cell_state  # copy state through
+        elements_finished = (time_ >= sequence_length)
+        finished = math_ops.reduce_all(elements_finished)
+        # For the very final iteration, we must emit a dummy input
+        next_input = control_flow_ops.cond(
+            finished,
+            lambda: array_ops.zeros([batch_size, input_depth], dtype=dtypes.float32),
+            lambda: inputs_ta.read(time_))
+        return (elements_finished, next_input, next_state, emit_output, None)
+
+      reuse_scope = variable_scope.get_variable_scope()
+
+      outputs_ta, final_state, _ = rnn.raw_rnn(cell, loop_fn, scope=reuse_scope)
+      outputs = outputs_ta.stack()
+
+      reuse_scope.reuse_variables()
+      outputs_dynamic_rnn, final_state_dynamic_rnn = rnn.dynamic_rnn(
+          cell,
+          inputs,
+          time_major=True,
+          dtype=dtypes.float32,
+          sequence_length=sequence_length,
+          scope=reuse_scope)
+
+      variables = variables_lib.trainable_variables()
+      gradients = gradients_impl.gradients([outputs, final_state],
+                                           [inputs] + variables)
+      gradients_dynamic_rnn = gradients_impl.gradients(
+          [outputs_dynamic_rnn, final_state_dynamic_rnn], [inputs] + variables)
+
+      variables_lib.global_variables_initializer().run()
+
+      rand_input = np.random.randn(max_time, batch_size, input_depth)
+      if max_time == 0:
+        rand_seq_len = np.zeros(batch_size)
+      else:
+        rand_seq_len = np.random.randint(max_time, size=batch_size)
+
+      # To ensure same output lengths for dynamic_rnn and raw_rnn
+      rand_seq_len[0] = max_time
+
+      (outputs_val, outputs_dynamic_rnn_val, final_state_val,
+       final_state_dynamic_rnn_val) = sess.run(
+           [outputs, outputs_dynamic_rnn, final_state, final_state_dynamic_rnn],
+           feed_dict={
+               inputs: rand_input,
+               sequence_length: rand_seq_len
+           })
+
+      self.assertAllClose(outputs_dynamic_rnn_val, outputs_val)
+      self.assertAllClose(final_state_dynamic_rnn_val, final_state_val)
+
+      # NOTE: Because with 0 time steps, raw_rnn does not have shape
+      # information about the input, it is impossible to perform
+      # gradients comparisons as the gradients eval will fail.  So
+      # this case skips the gradients test.
+      if max_time > 0:
+        self.assertEqual(len(gradients), len(gradients_dynamic_rnn))
+        gradients_val = sess.run(
+            gradients,
+            feed_dict={
+                inputs: rand_input,
+                sequence_length: rand_seq_len
+            })
+        gradients_dynamic_rnn_val = sess.run(
+            gradients_dynamic_rnn,
+            feed_dict={
+                inputs: rand_input,
+                sequence_length: rand_seq_len
+            })
+        self.assertEqual(len(gradients_val), len(gradients_dynamic_rnn_val))
+        input_gradients_val = gradients_val[0]
+        input_gradients_dynamic_rnn_val = gradients_dynamic_rnn_val[0]
+        self.assertAllClose(input_gradients_val,
+                            input_gradients_dynamic_rnn_val)
+        for i in range(1, len(gradients_val)):
+          self.assertAllClose(gradients_dynamic_rnn_val[i], gradients_val[i])
+
+  @test_util.run_v1_only("b/124229375")
+  def testRawRNNZeroLength(self):
+    # NOTE: Because with 0 time steps, raw_rnn does not have shape
+    # information about the input, it is impossible to perform
+    # gradients comparisons as the gradients eval will fail.  So this
+    # case skips the gradients test.
+    self._testRawRNN(max_time=0)
+
+  def testRawRNN(self):
+    self._testRawRNN(max_time=10)
+
+  @test_util.run_v1_only("b/124229375")
+  def testLoopState(self):
+    with self.session(graph=ops.Graph()):
+      max_time = 10
+      batch_size = 16
+      input_depth = 4
+      num_units = 3
+
+      inputs = np.random.randn(max_time, batch_size, input_depth)
+      inputs_ta = tensor_array_ops.TensorArray(
+          dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
+      inputs_ta = inputs_ta.unstack(inputs)
+
+      cell = rnn_cell.LSTMCell(num_units, state_is_tuple=True)
+
+      def loop_fn(time_, cell_output, cell_state, loop_state):
+        if cell_output is None:
+          loop_state = constant_op.constant([0])
+          next_state = cell.zero_state(batch_size, dtypes.float32)
+        else:
+          loop_state = array_ops.stack([array_ops.squeeze(loop_state) + 1])
+          next_state = cell_state
+        emit_output = cell_output  # == None for time == 0
+        elements_finished = array_ops.tile([time_ >= max_time], [batch_size])
+        finished = math_ops.reduce_all(elements_finished)
+        # For the very final iteration, we must emit a dummy input
+        next_input = control_flow_ops.cond(
+            finished,
+            lambda: array_ops.zeros([batch_size, input_depth], dtype=dtypes.float32),
+            lambda: inputs_ta.read(time_))
+        return (elements_finished, next_input, next_state, emit_output,
+                loop_state)
+
+      r = rnn.raw_rnn(cell, loop_fn)
+      loop_state = r[-1]
+      self.assertEqual([10], loop_state.eval())
+
+  @test_util.run_v1_only("b/124229375")
+  def testLoopStateWithTensorArray(self):
+    with self.session(graph=ops.Graph()):
+      max_time = 4
+      batch_size = 16
+      input_depth = 4
+      num_units = 3
+
+      inputs = np.random.randn(max_time, batch_size, input_depth)
+      inputs_ta = tensor_array_ops.TensorArray(
+          dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
+      inputs_ta = inputs_ta.unstack(inputs)
+
+      cell = rnn_cell.LSTMCell(num_units, state_is_tuple=True)
+
+      def loop_fn(time_, cell_output, cell_state, loop_state):
+        if cell_output is None:
+          loop_state = tensor_array_ops.TensorArray(
+              dynamic_size=True,
+              size=0,
+              dtype=dtypes.int32,
+              clear_after_read=False)
+          loop_state = loop_state.write(0, 1)
+          next_state = cell.zero_state(batch_size, dtypes.float32)
+        else:
+          loop_state = loop_state.write(time_,
+                                        loop_state.read(time_ - 1) + time_)
+          next_state = cell_state
+        emit_output = cell_output  # == None for time == 0
+        elements_finished = array_ops.tile([time_ >= max_time], [batch_size])
+        finished = math_ops.reduce_all(elements_finished)
+        # For the very final iteration, we must emit a dummy input
+        next_input = control_flow_ops.cond(
+            finished,
+            lambda: array_ops.zeros([batch_size, input_depth], dtype=dtypes.float32),
+            lambda: inputs_ta.read(time_))
+        return (elements_finished, next_input, next_state, emit_output,
+                loop_state)
+
+      r = rnn.raw_rnn(cell, loop_fn)
+      loop_state = r[-1]
+      loop_state = loop_state.stack()
+      self.assertAllEqual([1, 2, 2 + 2, 4 + 3, 7 + 4], loop_state.eval())
+
+  @test_util.run_v1_only("b/124229375")
+  def testEmitDifferentStructureThanCellOutput(self):
+    with self.session(graph=ops.Graph()) as sess:
+      max_time = 10
+      batch_size = 16
+      input_depth = 4
+      num_units = 3
+
+      inputs = np.random.randn(max_time, batch_size, input_depth)
+      inputs_ta = tensor_array_ops.TensorArray(
+          dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
+      inputs_ta = inputs_ta.unstack(inputs)
+      # Verify emit shapes may be unknown by feeding a placeholder that
+      # determines an emit shape.
+      unknown_dim = array_ops.placeholder(dtype=dtypes.int32)
+
+      cell = rnn_cell.LSTMCell(num_units, state_is_tuple=True)
+
+      def loop_fn(time_, cell_output, cell_state, _):
+        if cell_output is None:
+          emit_output = (array_ops.zeros([2, 3], dtype=dtypes.int32),
+                         array_ops.zeros([unknown_dim], dtype=dtypes.int64))
+          next_state = cell.zero_state(batch_size, dtypes.float32)
+        else:
+          emit_output = (array_ops.ones([batch_size, 2, 3], dtype=dtypes.int32),
+                         array_ops.ones(
+                             [batch_size, unknown_dim], dtype=dtypes.int64))
+          next_state = cell_state
+        elements_finished = array_ops.tile([time_ >= max_time], [batch_size])
+        finished = math_ops.reduce_all(elements_finished)
+        # For the very final iteration, we must emit a dummy input
+        next_input = control_flow_ops.cond(
+            finished,
+            lambda: array_ops.zeros([batch_size, input_depth], dtype=dtypes.float32),
+            lambda: inputs_ta.read(time_))
+        return (elements_finished, next_input, next_state, emit_output, None)
+
+      r = rnn.raw_rnn(cell, loop_fn)
+      output_ta = r[0]
+      self.assertEqual(2, len(output_ta))
+      self.assertEqual([dtypes.int32, dtypes.int64],
+                       [ta.dtype for ta in output_ta])
+      output = [ta.stack() for ta in output_ta]
+      output_vals = sess.run(output, feed_dict={unknown_dim: 1})
+      self.assertAllEqual(
+          np.ones((max_time, batch_size, 2, 3), np.int32), output_vals[0])
+      self.assertAllEqual(
+          np.ones((max_time, batch_size, 1), np.int64), output_vals[1])
+
+  def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
+    with self.session(use_gpu=True, graph=ops.Graph()):
+      if use_outer_scope:
+        with variable_scope.variable_scope(prefix) as scope:
+          factory(scope)
+      else:
+        factory(prefix)
+        variables_lib.global_variables_initializer()
+
+      # check that all the variables names starts
+      # with the proper scope.
+      all_vars = variables_lib.global_variables()
+      prefix = prefix or "rnn"
+      scope_vars = [v for v in all_vars if v.name.startswith(prefix + "/")]
+      tf_logging.info("RNN with scope: %s (%s)" %
+                      (prefix, "scope" if use_outer_scope else "str"))
+      for v in scope_vars:
+        tf_logging.info(v.name)
+      self.assertEqual(len(scope_vars), len(all_vars))
+
+  @test_util.run_v1_only("b/124229375")
+  def testRawRNNScope(self):
+    max_time = 10
+    batch_size = 16
+    input_depth = 4
+    num_units = 3
+
+    def factory(scope):
+      inputs = array_ops.placeholder(
+          shape=(max_time, batch_size, input_depth), dtype=dtypes.float32)
+      sequence_length = array_ops.placeholder(
+          shape=(batch_size,), dtype=dtypes.int32)
+      inputs_ta = tensor_array_ops.TensorArray(
+          dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
+      inputs_ta = inputs_ta.unstack(inputs)
+
+      cell = rnn_cell.LSTMCell(num_units, state_is_tuple=True)
+
+      def loop_fn(time_, cell_output, cell_state, unused_loop_state):
+        emit_output = cell_output  # == None for time == 0
+        if cell_output is None:  # time == 0
+          next_state = cell.zero_state(batch_size, dtypes.float32)
+        else:
+          next_state = cell_state
+
+        elements_finished = (time_ >= sequence_length)
+        finished = math_ops.reduce_all(elements_finished)
+        # For the very final iteration, we must emit a dummy input
+        next_input = control_flow_ops.cond(
+            finished,
+            lambda: array_ops.zeros([batch_size, input_depth], dtype=dtypes.float32),
+            lambda: inputs_ta.read(time_))
+        return (elements_finished, next_input, next_state, emit_output, None)
+
+      return rnn.raw_rnn(cell, loop_fn, scope=scope)
+
+    self._testScope(factory, use_outer_scope=True)
+    self._testScope(factory, use_outer_scope=False)
+    self._testScope(factory, prefix=None, use_outer_scope=False)
+
+
+class DeviceWrapperCell(rnn_cell.RNNCell):
+  """Class to ensure cell calculation happens on a specific device."""
+
+  def __init__(self, cell, device):
+    self._cell = cell
+    self._device = device
+
+  @property
+  def output_size(self):
+    return self._cell.output_size
+
+  @property
+  def state_size(self):
+    return self._cell.state_size
+
+  def __call__(self, input_, state, scope=None):
+    if self._device is not None:
+      with ops.device(self._device):
+        return self._cell(input_, state, scope=scope)
+    else:
+      return self._cell(input_, state, scope=scope)
+
+
+class TensorArrayOnCorrectDeviceTest(test.TestCase):
+
+  def _execute_rnn_on(self,
+                      rnn_device=None,
+                      cell_device=None,
+                      input_device=None):
+    batch_size = 3
+    time_steps = 7
+    input_size = 5
+    num_units = 10
+
+    cell = rnn_cell.LSTMCell(num_units, use_peepholes=True)
+    gpu_cell = DeviceWrapperCell(cell, cell_device)
+    inputs = np.random.randn(batch_size, time_steps, input_size).astype(
+        np.float32)
+    sequence_length = np.random.randint(0, time_steps, size=batch_size)
+
+    if input_device is not None:
+      with ops.device(input_device):
+        inputs = constant_op.constant(inputs)
+
+    if rnn_device is not None:
+      with ops.device(rnn_device):
+        outputs, _ = rnn.dynamic_rnn(
+            gpu_cell,
+            inputs,
+            sequence_length=sequence_length,
+            dtype=dtypes.float32)
+    else:
+      outputs, _ = rnn.dynamic_rnn(
+          gpu_cell,
+          inputs,
+          sequence_length=sequence_length,
+          dtype=dtypes.float32)
+
+    with self.session(use_gpu=True) as sess:
+      opts = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
+      run_metadata = config_pb2.RunMetadata()
+      variables_lib.global_variables_initializer().run()
+      sess.run(outputs, options=opts, run_metadata=run_metadata)
+
+    return run_metadata
+
+  def _retrieve_cpu_gpu_stats(self, run_metadata):
+    cpu_stats = None
+    gpu_stats = None
+    step_stats = run_metadata.step_stats
+    for ds in step_stats.dev_stats:
+      if "cpu:0" in ds.device[-5:].lower():
+        cpu_stats = ds.node_stats
+      if "gpu:0" == ds.device[-5:].lower():
+        gpu_stats = ds.node_stats
+    return cpu_stats, gpu_stats
+
+  def testRNNOnCPUCellOnGPU(self):
+    if not test.is_gpu_available():
+      return  # Test requires access to a GPU
+
+    gpu_dev = test.gpu_device_name()
+    run_metadata = self._execute_rnn_on(
+        rnn_device="/cpu:0", cell_device=gpu_dev)
+    cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
+
+    def _assert_in(op_str, in_stats, out_stats):
+      self.assertTrue(any(op_str in s.node_name for s in in_stats))
+      self.assertFalse(any(op_str in s.node_name for s in out_stats))
+
+    # Writes happen at output of RNN cell
+    _assert_in("TensorArrayWrite", gpu_stats, cpu_stats)
+    # Gather happens on final TensorArray
+    _assert_in("TensorArrayGather", gpu_stats, cpu_stats)
+    # Reads happen at input to RNN cell
+    _assert_in("TensorArrayRead", cpu_stats, gpu_stats)
+    # Scatters happen to get initial input into TensorArray
+    _assert_in("TensorArrayScatter", cpu_stats, gpu_stats)
+
+  def testRNNOnCPUCellOnCPU(self):
+    if not test.is_gpu_available():
+      return  # Test requires access to a GPU
+
+    gpu_dev = test.gpu_device_name()
+    run_metadata = self._execute_rnn_on(
+        rnn_device="/cpu:0", cell_device="/cpu:0", input_device=gpu_dev)
+    cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
+
+    def _assert_in(op_str, in_stats, out_stats):
+      self.assertTrue(any(op_str in s.node_name for s in in_stats))
+      self.assertFalse(any(op_str in s.node_name for s in out_stats))
+
+    # All TensorArray operations happen on CPU
+    _assert_in("TensorArray", cpu_stats, gpu_stats)
+
+  def testInputOnGPUCellNotDeclared(self):
+    if not test.is_gpu_available():
+      return  # Test requires access to a GPU
+
+    gpu_dev = test.gpu_device_name()
+    run_metadata = self._execute_rnn_on(input_device=gpu_dev)
+    cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
+
+    def _assert_in(op_str, in_stats, out_stats):
+      self.assertTrue(any(op_str in s.node_name for s in in_stats))
+      self.assertFalse(any(op_str in s.node_name for s in out_stats))
+
+    # Everything happens on GPU
+    _assert_in("TensorArray", gpu_stats, cpu_stats)
+
+
+class RNNCellTest(test.TestCase, parameterized.TestCase):
+
+  @test_util.run_v1_only("b/124229375")
+  def testBasicRNNCell(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        cell = rnn_cell_impl.BasicRNNCell(2)
+        g, _ = cell(x, m)
+        self.assertEqual([
+            "root/basic_rnn_cell/%s:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+            "root/basic_rnn_cell/%s:0" % rnn_cell_impl._BIAS_VARIABLE_NAME
+        ], [v.name for v in cell.trainable_variables])
+        self.assertFalse(cell.non_trainable_variables)
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run([g], {
+            x: np.array([[1., 1.]]),
+            m: np.array([[0.1, 0.1]])
+        })
+        self.assertEqual(res[0].shape, (1, 2))
+
+  @test_util.run_v1_only("b/124229375")
+  def testBasicRNNCellNotTrainable(self):
+    with self.cached_session() as sess:
+
+      def not_trainable_getter(getter, *args, **kwargs):
+        kwargs["trainable"] = False
+        return getter(*args, **kwargs)
+
+      with variable_scope.variable_scope(
+          "root",
+          initializer=init_ops.constant_initializer(0.5),
+          custom_getter=not_trainable_getter):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        cell = rnn_cell_impl.BasicRNNCell(2)
+        g, _ = cell(x, m)
+        self.assertFalse(cell.trainable_variables)
+        self.assertEqual([
+            "root/basic_rnn_cell/%s:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+            "root/basic_rnn_cell/%s:0" % rnn_cell_impl._BIAS_VARIABLE_NAME
+        ], [v.name for v in cell.non_trainable_variables])
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run([g], {
+            x: np.array([[1., 1.]]),
+            m: np.array([[0.1, 0.1]])
+        })
+        self.assertEqual(res[0].shape, (1, 2))
+
+  @test_util.run_v1_only("b/124229375")
+  def testGRUCell(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        g, _ = rnn_cell_impl.GRUCell(2)(x, m)
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run([g], {
+            x: np.array([[1., 1.]]),
+            m: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.175991, 0.175991]])
+      with variable_scope.variable_scope(
+          "other", initializer=init_ops.constant_initializer(0.5)):
+        # Test GRUCell with input_size != num_units.
+        x = array_ops.zeros([1, 3])
+        m = array_ops.zeros([1, 2])
+        g, _ = rnn_cell_impl.GRUCell(2)(x, m)
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run([g], {
+            x: np.array([[1., 1., 1.]]),
+            m: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.156736, 0.156736]])
+
+  @test_util.run_v1_only("b/124229375")
+  def testBasicLSTMCell(self):
+    for dtype in [dtypes.float16, dtypes.float32]:
+      np_dtype = dtype.as_numpy_dtype
+      with self.session(graph=ops.Graph()) as sess:
+        with variable_scope.variable_scope(
+            "root", initializer=init_ops.constant_initializer(0.5)):
+          x = array_ops.zeros([1, 2], dtype=dtype)
+          m = array_ops.zeros([1, 8], dtype=dtype)
+          cell = rnn_cell_impl.MultiRNNCell(
+              [
+                  rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
+                  for _ in range(2)
+              ],
+              state_is_tuple=False)
+          self.assertEqual(cell.dtype, None)
+          self.assertEqual("cell-0", cell._checkpoint_dependencies[0].name)
+          self.assertEqual("cell-1", cell._checkpoint_dependencies[1].name)
+          cell.get_config()  # Should not throw an error
+          g, out_m = cell(x, m)
+          # Layer infers the input type.
+          self.assertEqual(cell.dtype, dtype.name)
+          expected_variable_names = [
+              "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
+              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+              "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
+              rnn_cell_impl._BIAS_VARIABLE_NAME,
+              "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
+              rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+              "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
+              rnn_cell_impl._BIAS_VARIABLE_NAME
+          ]
+          self.assertEqual(expected_variable_names,
+                           [v.name for v in cell.trainable_variables])
+          self.assertFalse(cell.non_trainable_variables)
+          sess.run([variables_lib.global_variables_initializer()])
+          res = sess.run([g, out_m], {
+              x: np.array([[1., 1.]]),
+              m: 0.1 * np.ones([1, 8])
+          })
+          self.assertEqual(len(res), 2)
+          variables = variables_lib.global_variables()
+          self.assertEqual(expected_variable_names, [v.name for v in variables])
+          # The numbers in results were not calculated, this is just a
+          # smoke test.
+          self.assertAllClose(res[0], np.array(
+              [[0.240, 0.240]], dtype=np_dtype), 1e-2)
+          expected_mem = np.array(
+              [[0.689, 0.689, 0.448, 0.448, 0.398, 0.398, 0.240, 0.240]],
+              dtype=np_dtype)
+          self.assertAllClose(res[1], expected_mem, 1e-2)
+        with variable_scope.variable_scope(
+            "other", initializer=init_ops.constant_initializer(0.5)):
+          # Test BasicLSTMCell with input_size != num_units.
+          x = array_ops.zeros([1, 3], dtype=dtype)
+          m = array_ops.zeros([1, 4], dtype=dtype)
+          g, out_m = rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)(x, m)
+          sess.run([variables_lib.global_variables_initializer()])
+          res = sess.run(
+              [g, out_m], {
+                  x: np.array([[1., 1., 1.]], dtype=np_dtype),
+                  m: 0.1 * np.ones([1, 4], dtype=np_dtype)
+              })
+          self.assertEqual(len(res), 2)
+
+  @test_util.run_v1_only("b/124229375")
+  def testBasicLSTMCellDimension0Error(self):
+    """Tests that dimension 0 in both(x and m) shape must be equal."""
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        num_units = 2
+        state_size = num_units * 2
+        batch_size = 3
+        input_size = 4
+        x = array_ops.zeros([batch_size, input_size])
+        m = array_ops.zeros([batch_size - 1, state_size])
+        with self.assertRaises(ValueError):
+          g, out_m = rnn_cell_impl.BasicLSTMCell(
+              num_units, state_is_tuple=False)(x, m)
+          sess.run([variables_lib.global_variables_initializer()])
+          sess.run(
+              [g, out_m], {
+                  x: 1 * np.ones([batch_size, input_size]),
+                  m: 0.1 * np.ones([batch_size - 1, state_size])
+              })
+
+  def testBasicLSTMCellStateSizeError(self):
+    """Tests that state_size must be num_units * 2."""
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        num_units = 2
+        state_size = num_units * 3  # state_size must be num_units * 2
+        batch_size = 3
+        input_size = 4
+        x = array_ops.zeros([batch_size, input_size])
+        m = array_ops.zeros([batch_size, state_size])
+        with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+          g, out_m = rnn_cell_impl.BasicLSTMCell(
+              num_units, state_is_tuple=False)(x, m)
+          sess.run([variables_lib.global_variables_initializer()])
+          sess.run(
+              [g, out_m], {
+                  x: 1 * np.ones([batch_size, input_size]),
+                  m: 0.1 * np.ones([batch_size, state_size])
+              })
+
+  @test_util.run_v1_only("b/124229375")
+  def testBasicLSTMCellStateTupleType(self):
+    with self.cached_session():
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m0 = (array_ops.zeros([1, 2]),) * 2
+        m1 = (array_ops.zeros([1, 2]),) * 2
+        cell = rnn_cell_impl.MultiRNNCell(
+            [rnn_cell_impl.BasicLSTMCell(2) for _ in range(2)],
+            state_is_tuple=True)
+        self.assertTrue(isinstance(cell.state_size, tuple))
+        self.assertTrue(
+            isinstance(cell.state_size[0], rnn_cell_impl.LSTMStateTuple))
+        self.assertTrue(
+            isinstance(cell.state_size[1], rnn_cell_impl.LSTMStateTuple))
+
+        # Pass in regular tuples
+        _, (out_m0, out_m1) = cell(x, (m0, m1))
+        self.assertTrue(isinstance(out_m0, rnn_cell_impl.LSTMStateTuple))
+        self.assertTrue(isinstance(out_m1, rnn_cell_impl.LSTMStateTuple))
+
+        # Pass in LSTMStateTuples
+        variable_scope.get_variable_scope().reuse_variables()
+        zero_state = cell.zero_state(1, dtypes.float32)
+        self.assertTrue(isinstance(zero_state, tuple))
+        self.assertTrue(isinstance(zero_state[0], rnn_cell_impl.LSTMStateTuple))
+        self.assertTrue(isinstance(zero_state[1], rnn_cell_impl.LSTMStateTuple))
+        _, (out_m0, out_m1) = cell(x, zero_state)
+        self.assertTrue(isinstance(out_m0, rnn_cell_impl.LSTMStateTuple))
+        self.assertTrue(isinstance(out_m1, rnn_cell_impl.LSTMStateTuple))
+
+  @test_util.run_v1_only("b/124229375")
+  def testBasicLSTMCellWithStateTuple(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m0 = array_ops.zeros([1, 4])
+        m1 = array_ops.zeros([1, 4])
+        cell = rnn_cell_impl.MultiRNNCell(
+            [
+                rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
+                for _ in range(2)
+            ],
+            state_is_tuple=True)
+        g, (out_m0, out_m1) = cell(x, (m0, m1))
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run(
+            [g, out_m0, out_m1], {
+                x: np.array([[1., 1.]]),
+                m0: 0.1 * np.ones([1, 4]),
+                m1: 0.1 * np.ones([1, 4])
+            })
+        self.assertEqual(len(res), 3)
+        # The numbers in results were not calculated, this is just a smoke test.
+        # Note, however, these values should match the original
+        # version having state_is_tuple=False.
+        self.assertAllClose(res[0], [[0.24024698, 0.24024698]])
+        expected_mem0 = np.array(
+            [[0.68967271, 0.68967271, 0.44848421, 0.44848421]])
+        expected_mem1 = np.array(
+            [[0.39897051, 0.39897051, 0.24024698, 0.24024698]])
+        self.assertAllClose(res[1], expected_mem0)
+        self.assertAllClose(res[2], expected_mem1)
+
+  @test_util.run_v1_only("b/124229375")
+  def testLSTMCell(self):
+    with self.cached_session() as sess:
+      num_units = 8
+      num_proj = 6
+      state_size = num_units + num_proj
+      batch_size = 3
+      input_size = 2
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([batch_size, input_size])
+        m = array_ops.zeros([batch_size, state_size])
+        cell = rnn_cell_impl.LSTMCell(
+            num_units=num_units,
+            num_proj=num_proj,
+            forget_bias=1.0,
+            state_is_tuple=False)
+        output, state = cell(x, m)
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run(
+            [output, state], {
+                x: np.array([[1., 1.], [2., 2.], [3., 3.]]),
+                m: 0.1 * np.ones((batch_size, state_size))
+            })
+        self.assertEqual(len(res), 2)
+        # The numbers in results were not calculated, this is mostly just a
+        # smoke test.
+        self.assertEqual(res[0].shape, (batch_size, num_proj))
+        self.assertEqual(res[1].shape, (batch_size, state_size))
+        # Different inputs so different outputs and states
+        for i in range(1, batch_size):
+          self.assertTrue(
+              float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) > 1e-6)
+          self.assertTrue(
+              float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) > 1e-6)
+
+  @test_util.run_v1_only("b/124229375")
+  def testLSTMCellVariables(self):
+    with self.cached_session():
+      num_units = 8
+      num_proj = 6
+      state_size = num_units + num_proj
+      batch_size = 3
+      input_size = 2
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([batch_size, input_size])
+        m = array_ops.zeros([batch_size, state_size])
+        cell = rnn_cell_impl.LSTMCell(
+            num_units=num_units,
+            num_proj=num_proj,
+            forget_bias=1.0,
+            state_is_tuple=False)
+        cell(x, m)  # Execute to create variables
+      variables = variables_lib.global_variables()
+      self.assertEquals(variables[0].op.name, "root/lstm_cell/kernel")
+      self.assertEquals(variables[1].op.name, "root/lstm_cell/bias")
+      self.assertEquals(variables[2].op.name,
+                        "root/lstm_cell/projection/kernel")
+
+  @test_util.run_in_graph_and_eager_modes
+  def testWrapperCheckpointing(self):
+    for wrapper_type in [
+        rnn_cell_impl.DropoutWrapper,
+        rnn_cell_impl.ResidualWrapper,
+        lambda cell: rnn_cell_impl.MultiRNNCell([cell])]:
+      cell = rnn_cell_impl.BasicRNNCell(1)
+      wrapper = wrapper_type(cell)
+      wrapper(array_ops.ones([1, 1]),
+              state=wrapper.zero_state(batch_size=1, dtype=dtypes.float32))
+      self.evaluate([v.initializer for v in cell.variables])
+      checkpoint = trackable_utils.Checkpoint(wrapper=wrapper)
+      prefix = os.path.join(self.get_temp_dir(), "ckpt")
+      self.evaluate(cell._bias.assign([40.]))
+      save_path = checkpoint.save(prefix)
+      self.evaluate(cell._bias.assign([0.]))
+      checkpoint.restore(save_path).assert_consumed().run_restore_ops()
+      self.assertAllEqual([40.], self.evaluate(cell._bias))
+
+  @parameterized.parameters(
+      [rnn_cell_impl.ResidualWrapper, rnn_cell_impl.ResidualWrapperV2])
+  @test_util.run_in_graph_and_eager_modes
+  def testResidualWrapper(self, wrapper_type):
+    x = ops.convert_to_tensor(np.array([[1., 1., 1.]]))
+    m = ops.convert_to_tensor(np.array([[0.1, 0.1, 0.1]]))
+    base_cell = rnn_cell_impl.GRUCell(
+        3, kernel_initializer=init_ops.constant_initializer(0.5),
+        bias_initializer=init_ops.constant_initializer(0.5))
+    g, m_new = base_cell(x, m)
+    wrapper_object = wrapper_type(base_cell)
+    (name, dep), = wrapper_object._checkpoint_dependencies
+    wrapper_object.get_config()  # Should not throw an error
+    self.assertIs(dep, base_cell)
+    self.assertEqual("cell", name)
+
+    g_res, m_new_res = wrapper_object(x, m)
+    self.evaluate([variables_lib.global_variables_initializer()])
+    res = self.evaluate([g, g_res, m_new, m_new_res])
+    # Residual connections
+    self.assertAllClose(res[1], res[0] + [1., 1., 1.])
+    # States are left untouched
+    self.assertAllClose(res[2], res[3])
+
+  @parameterized.parameters(
+      [rnn_cell_impl.ResidualWrapper, rnn_cell_impl.ResidualWrapperV2])
+  @test_util.run_in_graph_and_eager_modes
+  def testResidualWrapperWithSlice(self, wrapper_type):
+    x = ops.convert_to_tensor(np.array([[1., 1., 1., 1., 1.]]))
+    m = ops.convert_to_tensor(np.array([[0.1, 0.1, 0.1]]))
+    base_cell = rnn_cell_impl.GRUCell(
+        3, kernel_initializer=init_ops.constant_initializer(0.5),
+        bias_initializer=init_ops.constant_initializer(0.5))
+    g, m_new = base_cell(x, m)
+
+    def residual_with_slice_fn(inp, out):
+      inp_sliced = array_ops.slice(inp, [0, 0], [-1, 3])
+      return inp_sliced + out
+
+    g_res, m_new_res = wrapper_type(
+        base_cell, residual_with_slice_fn)(x, m)
+    self.evaluate([variables_lib.global_variables_initializer()])
+    res_g, res_g_res, res_m_new, res_m_new_res = self.evaluate(
+        [g, g_res, m_new, m_new_res])
+    # Residual connections
+    self.assertAllClose(res_g_res, res_g + [1., 1., 1.])
+    # States are left untouched
+    self.assertAllClose(res_m_new, res_m_new_res)
+
+  @test_util.run_v1_only("b/124229375")
+  def testDeviceWrapper(self):
+    with variable_scope.variable_scope(
+        "root", initializer=init_ops.constant_initializer(0.5)):
+      x = array_ops.zeros([1, 3])
+      m = array_ops.zeros([1, 3])
+      wrapped = rnn_cell_impl.GRUCell(3)
+      cell = rnn_cell_impl.DeviceWrapper(wrapped, "/cpu:14159")
+      (name, dep), = cell._checkpoint_dependencies
+      cell.get_config()  # Should not throw an error
+      self.assertIs(dep, wrapped)
+      self.assertEqual("cell", name)
+
+      outputs, _ = cell(x, m)
+      self.assertTrue("cpu:14159" in outputs.device.lower())
+
+  def _retrieve_cpu_gpu_stats(self, run_metadata):
+    cpu_stats = None
+    gpu_stats = None
+    step_stats = run_metadata.step_stats
+    for ds in step_stats.dev_stats:
+      if "cpu:0" in ds.device[-5:].lower():
+        cpu_stats = ds.node_stats
+      if "gpu:0" == ds.device[-5:].lower():
+        gpu_stats = ds.node_stats
+    return cpu_stats, gpu_stats
+
+  def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
+    if not test.is_gpu_available():
+      # Can't perform this test w/o a GPU
+      return
+
+    gpu_dev = test.gpu_device_name()
+    with self.session(use_gpu=True) as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 1, 3])
+        cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), gpu_dev)
+        with ops.device("/cpu:0"):
+          outputs, _ = rnn.dynamic_rnn(
+              cell=cell, inputs=x, dtype=dtypes.float32)
+        run_metadata = config_pb2.RunMetadata()
+        opts = config_pb2.RunOptions(
+            trace_level=config_pb2.RunOptions.FULL_TRACE)
+
+        sess.run([variables_lib.global_variables_initializer()])
+        _ = sess.run(outputs, options=opts, run_metadata=run_metadata)
+
+      cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
+      self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
+      self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name])
+
+  @test_util.run_v1_only("b/124229375")
+  def testMultiRNNCell(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 4])
+        multi_rnn_cell = rnn_cell_impl.MultiRNNCell(
+            [rnn_cell_impl.GRUCell(2) for _ in range(2)],
+            state_is_tuple=False)
+        _, ml = multi_rnn_cell(x, m)
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run(ml, {
+            x: np.array([[1., 1.]]),
+            m: np.array([[0.1, 0.1, 0.1, 0.1]])
+        })
+        # The numbers in results were not calculated, this is just a smoke test.
+        self.assertAllClose(res, [[0.175991, 0.175991, 0.13248, 0.13248]])
+        self.assertEqual(len(multi_rnn_cell.weights), 2 * 4)
+        self.assertTrue(
+            [x.dtype == dtypes.float32 for x in multi_rnn_cell.weights])
+
+  @test_util.run_v1_only("b/124229375")
+  def testMultiRNNCellWithStateTuple(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m_bad = array_ops.zeros([1, 4])
+        m_good = (array_ops.zeros([1, 2]), array_ops.zeros([1, 2]))
+
+        # Test incorrectness of state
+        with self.assertRaisesRegexp(ValueError, "Expected state .* a tuple"):
+          rnn_cell_impl.MultiRNNCell(
+              [rnn_cell_impl.GRUCell(2) for _ in range(2)],
+              state_is_tuple=True)(x, m_bad)
+
+        _, ml = rnn_cell_impl.MultiRNNCell(
+            [rnn_cell_impl.GRUCell(2) for _ in range(2)],
+            state_is_tuple=True)(x, m_good)
+
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run(
+            ml, {
+                x: np.array([[1., 1.]]),
+                m_good[0]: np.array([[0.1, 0.1]]),
+                m_good[1]: np.array([[0.1, 0.1]])
+            })
+
+        # The numbers in results were not calculated, this is just a
+        # smoke test.  However, these numbers should match those of
+        # the test testMultiRNNCell.
+        self.assertAllClose(res[0], [[0.175991, 0.175991]])
+        self.assertAllClose(res[1], [[0.13248, 0.13248]])
+
+  @parameterized.parameters(
+      [[rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2],
+       [rnn_cell_impl.ResidualWrapper, rnn_cell_impl.ResidualWrapperV2]])
+  @test_util.run_in_graph_and_eager_modes
+  def testWrapperKerasStyle(self, wrapper, wrapper_v2):
+    """Tests if wrapper cell is instantiated in keras style scope."""
+    wrapped_cell_v2 = wrapper_v2(rnn_cell_impl.BasicRNNCell(1))
+    self.assertTrue(wrapped_cell_v2._keras_style)
+
+    wrapped_cell = wrapper(rnn_cell_impl.BasicRNNCell(1))
+    self.assertFalse(wrapped_cell._keras_style)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapperV2, rnn_cell_impl.ResidualWrapperV2])
+  @test_util.run_in_graph_and_eager_modes
+  def testWrapperV2VariableNames(self, wrapper):
+    """Tests that variables names do not depend on wrapper in RNN layer."""
+
+    def _rnn_input(apply_wrapper, name):
+      """Creates a RNN layer with/without wrapper and returns built rnn cell."""
+      with base_layer.keras_style_scope():
+        base_cell = rnn_cell_impl.MultiRNNCell(
+            [rnn_cell_impl.BasicRNNCell(1, name="basic_rnn_cell")
+             for _ in range(2)])
+      if apply_wrapper:
+        rnn_cell = wrapper(base_cell)
+      else:
+        rnn_cell = base_cell
+      rnn_layer = keras_layers.RNN(rnn_cell, name=name)
+      inputs = ops.convert_to_tensor([[[1]]], dtype=dtypes.float32)
+      _ = rnn_layer(inputs)
+      return base_cell._cells[0]
+
+    rnn_1 = _rnn_input(True, name="rnn_0")
+    rnn_2 = _rnn_input(False, name="rnn_1")
+
+    for i, cell in enumerate([rnn_1, rnn_2]):
+      var_prefix = "rnn_{}/cell_0/basic_rnn_cell/".format(i)
+      self.assertCountEqual([v.name for v in cell.weights],
+                            (var_prefix + "kernel:0", var_prefix + "bias:0"))
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapperV2, rnn_cell_impl.ResidualWrapperV2])
+  @test_util.run_in_graph_and_eager_modes
+  def testWrapperWeights(self, wrapper):
+    """Tests that wrapper weights contain wrapped cells weights."""
+
+    with base_layer.keras_style_scope():
+      base_cell = rnn_cell_impl.BasicRNNCell(1, name="basic_rnn_cell")
+    rnn_cell = wrapper(base_cell)
+    rnn_layer = keras_layers.RNN(rnn_cell)
+    inputs = ops.convert_to_tensor([[[1]]], dtype=dtypes.float32)
+    rnn_layer(inputs)
+
+    expected_weights = ["rnn/" + var for var in ("kernel:0", "bias:0")]
+    self.assertEqual(len(rnn_cell.weights), 2)
+    self.assertCountEqual([v.name for v in rnn_cell.weights], expected_weights)
+    self.assertCountEqual([v.name for v in rnn_cell.trainable_variables],
+                          expected_weights)
+    self.assertCountEqual([v.name for v in rnn_cell.non_trainable_variables],
+                          [])
+    self.assertCountEqual([v.name for v in rnn_cell._cell.weights],
+                          expected_weights)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapperV2, rnn_cell_impl.ResidualWrapperV2])
+  @test_util.run_in_graph_and_eager_modes
+  def testWrapperV2Caller(self, wrapper):
+    """Tests that wrapper V2 is using the LayerRNNCell's caller."""
+
+    with base_layer.keras_style_scope():
+      base_cell = rnn_cell_impl.MultiRNNCell(
+          [rnn_cell_impl.BasicRNNCell(1) for _ in range(2)])
+    rnn_cell = wrapper(base_cell)
+    inputs = ops.convert_to_tensor([[1]], dtype=dtypes.float32)
+    state = ops.convert_to_tensor([[1]], dtype=dtypes.float32)
+    _ = rnn_cell(inputs, [state, state])
+    weights = base_cell._cells[0].weights
+    self.assertLen(weights, expected_len=2)
+    self.assertTrue(all(["_wrapper" in v.name for v in weights]))
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapperV2, rnn_cell_impl.ResidualWrapperV2])
+  @test_util.run_in_graph_and_eager_modes
+  def testWrapperV2Build(self, wrapper):
+    cell = rnn_cell_impl.LSTMCell(10)
+    wrapper = wrapper(cell)
+    wrapper.build((1,))
+    self.assertTrue(cell.built)
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class DropoutWrapperTest(test.TestCase, parameterized.TestCase):
+
+  def _testDropoutWrapper(self,
+                          batch_size=None,
+                          time_steps=None,
+                          parallel_iterations=None,
+                          wrapper_type=None,
+                          scope="root",
+                          **kwargs):
+    if batch_size is None and time_steps is None:
+      # 2 time steps, batch size 1, depth 3
+      batch_size = 1
+      time_steps = 2
+      x = constant_op.constant(
+          [[[2., 2., 2.]], [[1., 1., 1.]]], dtype=dtypes.float32)
+      m = rnn_cell_impl.LSTMStateTuple(
+          *[constant_op.constant([[0.1, 0.1, 0.1]], dtype=dtypes.float32)] * 2)
+    else:
+      x = constant_op.constant(
+          np.random.randn(time_steps, batch_size, 3).astype(np.float32))
+      m = rnn_cell_impl.LSTMStateTuple(*[
+          constant_op.
+          constant([[0.1, 0.1, 0.1]] * batch_size, dtype=dtypes.float32)] * 2)
+    outputs, final_state = rnn.dynamic_rnn(
+        cell=wrapper_type(
+            rnn_cell_impl.LSTMCell(
+                3, initializer=init_ops.constant_initializer(0.5)),
+            dtype=x.dtype, **kwargs),
+        time_major=True,
+        parallel_iterations=parallel_iterations,
+        inputs=x,
+        initial_state=m,
+        scope=scope)
+    self.evaluate([variables_lib.global_variables_initializer()])
+    res = self.evaluate([outputs, final_state])
+    self.assertEqual(res[0].shape, (time_steps, batch_size, 3))
+    self.assertEqual(res[1].c.shape, (batch_size, 3))
+    self.assertEqual(res[1].h.shape, (batch_size, 3))
+    return res
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperProperties(self, wrapper_type):
+    cell = rnn_cell_impl.BasicRNNCell(10)
+    wrapper = wrapper_type(cell)
+    # Github issue 15810
+    self.assertEqual(wrapper.wrapped_cell, cell)
+    self.assertEqual(wrapper.state_size, 10)
+    self.assertEqual(wrapper.output_size, 10)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperZeroState(self, wrapper_type):
+    class _Cell(rnn_cell_impl.BasicRNNCell):
+
+      def zero_state(self, batch_size=None, dtype=None):
+        return "wrapped_cell_zero_state"
+    wrapper = wrapper_type(_Cell(10))
+    self.assertEqual(wrapper.zero_state(10, dtypes.float32),
+                     "wrapped_cell_zero_state")
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperKeepAllConstantInput(self, wrapper_type):
+    keep = array_ops.ones([])
+    res = self._testDropoutWrapper(
+        input_keep_prob=keep, output_keep_prob=keep, state_keep_prob=keep,
+        wrapper_type=wrapper_type)
+    true_full_output = np.array(
+        [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
+        dtype=np.float32)
+    true_full_final_c = np.array(
+        [[1.949385, 1.949385, 1.949385]], dtype=np.float32)
+    self.assertAllClose(true_full_output, res[0])
+    self.assertAllClose(true_full_output[1], res[1].h)
+    self.assertAllClose(true_full_final_c, res[1].c)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperKeepAll(self, wrapper_type):
+    keep = variable_scope.get_variable("all", initializer=1.0)
+    res = self._testDropoutWrapper(
+        input_keep_prob=keep, output_keep_prob=keep, state_keep_prob=keep,
+        wrapper_type=wrapper_type)
+    true_full_output = np.array(
+        [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
+        dtype=np.float32)
+    true_full_final_c = np.array(
+        [[1.949385, 1.949385, 1.949385]], dtype=np.float32)
+    self.assertAllClose(true_full_output, res[0])
+    self.assertAllClose(true_full_output[1], res[1].h)
+    self.assertAllClose(true_full_final_c, res[1].c)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperWithSeed(self, wrapper_type):
+    keep_some = 0.5
+    random_seed.set_random_seed(2)
+    ## Use parallel_iterations = 1 in both calls to
+    ## _testDropoutWrapper to ensure the (per-time step) dropout is
+    ## consistent across both calls.  Otherwise the seed may not end
+    ## up being munged consistently across both graphs.
+    res_standard_1 = self._testDropoutWrapper(
+        input_keep_prob=keep_some,
+        output_keep_prob=keep_some,
+        state_keep_prob=keep_some,
+        seed=10,
+        parallel_iterations=1,
+        wrapper_type=wrapper_type,
+        scope="root_1")
+    random_seed.set_random_seed(2)
+    res_standard_2 = self._testDropoutWrapper(
+        input_keep_prob=keep_some,
+        output_keep_prob=keep_some,
+        state_keep_prob=keep_some,
+        seed=10,
+        parallel_iterations=1,
+        wrapper_type=wrapper_type,
+        scope="root_2")
+    self.assertAllClose(res_standard_1[0], res_standard_2[0])
+    self.assertAllClose(res_standard_1[1].c, res_standard_2[1].c)
+    self.assertAllClose(res_standard_1[1].h, res_standard_2[1].h)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperKeepNoOutput(self, wrapper_type):
+    keep_all = variable_scope.get_variable("all", initializer=1.0)
+    keep_none = variable_scope.get_variable("none", initializer=1e-6)
+    res = self._testDropoutWrapper(
+        input_keep_prob=keep_all,
+        output_keep_prob=keep_none,
+        state_keep_prob=keep_all,
+        wrapper_type=wrapper_type)
+    true_full_output = np.array(
+        [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
+        dtype=np.float32)
+    true_full_final_c = np.array(
+        [[1.949385, 1.949385, 1.949385]], dtype=np.float32)
+    self.assertAllClose(np.zeros(res[0].shape), res[0])
+    self.assertAllClose(true_full_output[1], res[1].h)
+    self.assertAllClose(true_full_final_c, res[1].c)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperKeepNoStateExceptLSTMCellMemory(self, wrapper_type):
+    keep_all = variable_scope.get_variable("all", initializer=1.0)
+    keep_none = variable_scope.get_variable("none", initializer=1e-6)
+    # Even though we dropout state, by default DropoutWrapper never
+    # drops out the memory ("c") term of an LSTMStateTuple.
+    res = self._testDropoutWrapper(
+        input_keep_prob=keep_all,
+        output_keep_prob=keep_all,
+        state_keep_prob=keep_none,
+        wrapper_type=wrapper_type)
+    true_c_state = np.array([[1.713925, 1.713925, 1.713925]], dtype=np.float32)
+    true_full_output = np.array(
+        [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
+        dtype=np.float32)
+    self.assertAllClose(true_full_output[0], res[0][0])
+    # Second output is modified by zero input state
+    self.assertGreater(np.linalg.norm(true_full_output[1] - res[0][1]), 1e-4)
+    # h state has been set to zero
+    self.assertAllClose(np.zeros(res[1].h.shape), res[1].h)
+    # c state of an LSTMStateTuple is NEVER modified.
+    self.assertAllClose(true_c_state, res[1].c)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperKeepNoInput(self, wrapper_type):
+    keep_all = variable_scope.get_variable("all", initializer=1.0)
+    keep_none = variable_scope.get_variable("none", initializer=1e-6)
+    true_full_output = np.array(
+        [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
+        dtype=np.float32)
+    true_full_final_c = np.array(
+        [[1.949385, 1.949385, 1.949385]], dtype=np.float32)
+    # All outputs are different because inputs are zeroed out
+    res = self._testDropoutWrapper(
+        input_keep_prob=keep_none,
+        output_keep_prob=keep_all,
+        state_keep_prob=keep_all,
+        wrapper_type=wrapper_type)
+    self.assertGreater(np.linalg.norm(res[0] - true_full_output), 1e-4)
+    self.assertGreater(np.linalg.norm(res[1].h - true_full_output[1]), 1e-4)
+    self.assertGreater(np.linalg.norm(res[1].c - true_full_final_c), 1e-4)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperRecurrentOutput(self, wrapper_type):
+    keep_some = 0.8
+    keep_all = variable_scope.get_variable("all", initializer=1.0)
+    res = self._testDropoutWrapper(
+        input_keep_prob=keep_all,
+        output_keep_prob=keep_some,
+        state_keep_prob=keep_all,
+        variational_recurrent=True,
+        wrapper_type=wrapper_type,
+        input_size=3,
+        batch_size=5,
+        time_steps=7)
+    # Ensure the same dropout pattern for all time steps
+    output_mask = np.abs(res[0]) > 1e-6
+    for m in output_mask[1:]:
+      self.assertAllClose(output_mask[0], m)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperRecurrentStateInputAndOutput(self, wrapper_type):
+    keep_some = 0.9
+    res = self._testDropoutWrapper(
+        input_keep_prob=keep_some,
+        output_keep_prob=keep_some,
+        state_keep_prob=keep_some,
+        variational_recurrent=True,
+        wrapper_type=wrapper_type,
+        input_size=3,
+        batch_size=5,
+        time_steps=7)
+
+    # Smoke test for the state/input masks.
+    output_mask = np.abs(res[0]) > 1e-6
+    for time_step in output_mask:
+      # Ensure the same dropout output pattern for all time steps
+      self.assertAllClose(output_mask[0], time_step)
+      for batch_entry in time_step:
+        # Assert all batch entries get the same mask
+        self.assertAllClose(batch_entry, time_step[0])
+
+    # For state, ensure all batch entries have the same mask
+    state_c_mask = np.abs(res[1].c) > 1e-6
+    state_h_mask = np.abs(res[1].h) > 1e-6
+    for batch_entry in state_c_mask:
+      self.assertAllClose(batch_entry, state_c_mask[0])
+    for batch_entry in state_h_mask:
+      self.assertAllClose(batch_entry, state_h_mask[0])
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperRecurrentStateInputAndOutputWithSeed(
+      self, wrapper_type):
+    keep_some = 0.9
+    random_seed.set_random_seed(2347)
+    np.random.seed(23487)
+    res0 = self._testDropoutWrapper(
+        input_keep_prob=keep_some,
+        output_keep_prob=keep_some,
+        state_keep_prob=keep_some,
+        variational_recurrent=True,
+        wrapper_type=wrapper_type,
+        input_size=3,
+        batch_size=5,
+        time_steps=7,
+        seed=-234987,
+        scope="root_0")
+    random_seed.set_random_seed(2347)
+    np.random.seed(23487)
+    res1 = self._testDropoutWrapper(
+        input_keep_prob=keep_some,
+        output_keep_prob=keep_some,
+        state_keep_prob=keep_some,
+        variational_recurrent=True,
+        wrapper_type=wrapper_type,
+        input_size=3,
+        batch_size=5,
+        time_steps=7,
+        seed=-234987,
+        scope="root_1")
+
+    output_mask = np.abs(res0[0]) > 1e-6
+    for time_step in output_mask:
+      # Ensure the same dropout output pattern for all time steps
+      self.assertAllClose(output_mask[0], time_step)
+      for batch_entry in time_step:
+        # Assert all batch entries get the same mask
+        self.assertAllClose(batch_entry, time_step[0])
+
+    # For state, ensure all batch entries have the same mask
+    state_c_mask = np.abs(res0[1].c) > 1e-6
+    state_h_mask = np.abs(res0[1].h) > 1e-6
+    for batch_entry in state_c_mask:
+      self.assertAllClose(batch_entry, state_c_mask[0])
+    for batch_entry in state_h_mask:
+      self.assertAllClose(batch_entry, state_h_mask[0])
+
+    # Ensure seeded calculation is identical.
+    self.assertAllClose(res0[0], res1[0])
+    self.assertAllClose(res0[1].c, res1[1].c)
+    self.assertAllClose(res0[1].h, res1[1].h)
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/kernel_tests/signal/BUILD b/tensorflow/python/kernel_tests/signal/BUILD
index 554bf38..4caecc8 100644
--- a/tensorflow/python/kernel_tests/signal/BUILD
+++ b/tensorflow/python/kernel_tests/signal/BUILD

@@ -29,6 +29,7 @@
         "//tensorflow/python:spectral_ops_test_util",
         "//tensorflow/python/ops/signal",
     ],
+    tags = ["no_rocm"],
     xla_enable_strict_auto_jit = True,
 )
 
@@ -45,7 +46,10 @@
         "//tensorflow/python/ops/signal",
     ],
     shard_count = 4,
-    tags = ["optonly"],
+    tags = [
+        "no_rocm",
+        "optonly",
+    ],
     xla_enable_strict_auto_jit = True,
 )
 
@@ -73,6 +77,7 @@
         "//tensorflow/python/ops/signal",
         "//tensorflow/python:spectral_ops_test_util",
     ],
+    tags = ["no_rocm"],
     xla_enable_strict_auto_jit = True,
 )
 
@@ -130,7 +135,10 @@
         "//tensorflow/python:spectral_ops_test_util",
         "//tensorflow/python/ops/signal",
     ],
-    tags = ["nomac"],
+    tags = [
+        "no_rocm",
+        "nomac",
+    ],
     xla_enable_strict_auto_jit = True,
 )
 

diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index b4544ba..056e3b9 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py

@@ -1430,6 +1430,19 @@
       self.assertAllEqual([1.0, -1.0], read_val)
       self.assertAllEqual([[2.0, 3.0], [0.0, 0.0]], grad_val)
 
+  def testScatterIntoExistingList(self):
+    ta = tensor_array_ops.TensorArray(
+        dtype=dtypes.float32, tensor_array_name="foo", size=5)
+
+    ta = ta.scatter(indices=[3, 4], value=array_ops.ones([2]))
+    self.assertAllEqual(ta.stack(), [0., 0., 0., 1., 1.])
+
+    ta = ta.scatter(indices=[1], value=array_ops.ones([1]))
+    self.assertAllEqual(ta.stack(), [0., 1., 0., 1., 1.])
+
+    ta = ta.scatter(indices=[0, 2], value=[5., 6.])
+    self.assertAllEqual(ta.stack(), [5., 1., 6., 1., 1.])
+
   @test_util.run_v1_only("b/118890905")
   def testTensorArrayWriteGatherAndGradients(self):
     with self.session(use_gpu=True) as session:

diff --git a/tensorflow/python/kernel_tests/tridiagonal_solve_op_test.py b/tensorflow/python/kernel_tests/tridiagonal_solve_op_test.py
new file mode 100644
index 0000000..3fd603a
--- /dev/null
+++ b/tensorflow/python/kernel_tests/tridiagonal_solve_op_test.py

@@ -0,0 +1,456 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow.ops.linalg.linalg_impl.tridiagonal_solve."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.client import session
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.ops.linalg import linalg_impl
+from tensorflow.python.platform import benchmark
+from tensorflow.python.platform import test
+
+_sample_diags = np.array([[2, 1, 4, 0], [1, 3, 2, 2], [0, 1, -1, 1]])
+_sample_rhs = np.array([1, 2, 3, 4])
+_sample_result = np.array([-9, 5, -4, 4])
+
+
+def _tfconst(array):
+  return constant_op.constant(array, dtypes.float64)
+
+
+def _tf_ones(shape):
+  return array_ops.ones(shape, dtype=dtypes.float64)
+
+
+class TridiagonalSolveOpTest(test.TestCase):
+
+  def _test(self,
+            diags,
+            rhs,
+            expected,
+            diags_format="compact",
+            transpose_rhs=False,
+            conjugate_rhs=False):
+    with self.cached_session(use_gpu=False):
+      result = linalg_impl.tridiagonal_solve(diags, rhs, diags_format,
+                                             transpose_rhs, conjugate_rhs)
+      self.assertAllClose(self.evaluate(result), expected)
+
+  def _testWithLists(self,
+                     diags,
+                     rhs,
+                     expected,
+                     diags_format="compact",
+                     transpose_rhs=False,
+                     conjugate_rhs=False):
+    self._test(
+        _tfconst(diags), _tfconst(rhs), _tfconst(expected), diags_format,
+        transpose_rhs, conjugate_rhs)
+
+  def _assertRaises(self, diags, rhs, diags_format="compact"):
+    with self.assertRaises(ValueError):
+      linalg_impl.tridiagonal_solve(diags, rhs, diags_format)
+
+  # Tests with various dtypes
+
+  def testReal(self):
+    for dtype in dtypes.float32, dtypes.float64:
+      self._test(
+          diags=constant_op.constant(_sample_diags, dtype),
+          rhs=constant_op.constant(_sample_rhs, dtype),
+          expected=constant_op.constant(_sample_result, dtype))
+
+  def testComplex(self):
+    for dtype in dtypes.complex64, dtypes.complex128:
+      self._test(
+          diags=constant_op.constant(_sample_diags, dtype) * (1 + 1j),
+          rhs=constant_op.constant(_sample_rhs, dtype) * (1 - 1j),
+          expected=constant_op.constant(_sample_result, dtype) * (1 - 1j) /
+          (1 + 1j))
+
+  # Tests with small matrix sizes
+
+  def test3x3(self):
+    self._testWithLists(
+        diags=[[2, -1, 0], [1, 3, 1], [0, -1, -2]],
+        rhs=[1, 2, 3],
+        expected=[-3, 2, 7])
+
+  def test2x2(self):
+    self._testWithLists(
+        diags=[[2, 0], [1, 3], [0, 1]], rhs=[1, 4], expected=[-5, 3])
+
+  def test1x1(self):
+    self._testWithLists(diags=[[0], [3], [0]], rhs=[6], expected=[2])
+
+  def test0x0(self):
+    self._test(
+        diags=constant_op.constant(0, shape=(3, 0), dtype=dtypes.float32),
+        rhs=constant_op.constant(0, shape=(0, 1), dtype=dtypes.float32),
+        expected=constant_op.constant(0, shape=(0, 1), dtype=dtypes.float32))
+
+  # Other edge cases
+
+  def testCaseRequiringPivoting(self):
+    # Without partial pivoting (e.g. Thomas algorithm) this would fail.
+    self._testWithLists(
+        diags=[[2, -1, 1, 0], [1, 4, 1, -1], [0, 2, -2, 3]],
+        rhs=[1, 2, 3, 4],
+        expected=[8, -3.5, 0, -4])
+
+  def testCaseRequiringPivotingLastRows(self):
+    self._testWithLists(
+        diags=[[2, 1, -1, 0], [1, -1, 2, 1], [0, 1, -6, 1]],
+        rhs=[1, 2, -1, -2],
+        expected=[5, -2, -5, 3])
+
+  def testNotInvertible(self):
+    with self.assertRaises(errors_impl.InvalidArgumentError):
+      self._testWithLists(
+          diags=[[2, -1, 1, 0], [1, 4, 1, -1], [0, 2, 0, 3]],
+          rhs=[1, 2, 3, 4],
+          expected=[8, -3.5, 0, -4])
+
+  def testDiagonal(self):
+    self._testWithLists(
+        diags=[[0, 0, 0, 0], [1, 2, -1, -2], [0, 0, 0, 0]],
+        rhs=[1, 2, 3, 4],
+        expected=[1, 1, -3, -2])
+
+  def testUpperTriangular(self):
+    self._testWithLists(
+        diags=[[2, 4, -1, 0], [1, 3, 1, 2], [0, 0, 0, 0]],
+        rhs=[1, 6, 4, 4],
+        expected=[13, -6, 6, 2])
+
+  def testLowerTriangular(self):
+    self._testWithLists(
+        diags=[[0, 0, 0, 0], [2, -1, 3, 1], [0, 1, 4, 2]],
+        rhs=[4, 5, 6, 1],
+        expected=[2, -3, 6, -11])
+
+  # Multiple right-hand sides and batching
+
+  def testWithTwoRightHandSides(self):
+    self._testWithLists(
+        diags=_sample_diags,
+        rhs=np.transpose([_sample_rhs, 2 * _sample_rhs]),
+        expected=np.transpose([_sample_result, 2 * _sample_result]))
+
+  def testBatching(self):
+    self._testWithLists(
+        diags=np.array([_sample_diags, -_sample_diags]),
+        rhs=np.array([_sample_rhs, 2 * _sample_rhs]),
+        expected=np.array([_sample_result, -2 * _sample_result]))
+
+  def testBatchingAndTwoRightHandSides(self):
+    rhs = np.transpose([_sample_rhs, 2 * _sample_rhs])
+    expected_result = np.transpose([_sample_result, 2 * _sample_result])
+    self._testWithLists(
+        diags=np.array([_sample_diags, -_sample_diags]),
+        rhs=np.array([rhs, 2 * rhs]),
+        expected=np.array([expected_result, -2 * expected_result]))
+
+  # Various input formats
+
+  def testSequenceFormat(self):
+    self._test(
+        diags=(_tfconst([2, 1, 4]), _tfconst([1, 3, 2, 2]), _tfconst([1, -1,
+                                                                      1])),
+        rhs=_tfconst([1, 2, 3, 4]),
+        expected=_tfconst([-9, 5, -4, 4]),
+        diags_format="sequence")
+
+  def testSequenceFormatWithDummyElements(self):
+    dummy = 20
+    self._test(
+        diags=(_tfconst([2, 1, 4, dummy]), _tfconst([1, 3, 2, 2]),
+               _tfconst([dummy, 1, -1, 1])),
+        rhs=_tfconst([1, 2, 3, 4]),
+        expected=_tfconst([-9, 5, -4, 4]),
+        diags_format="sequence")
+
+  def testSequenceFormatWithBatching(self):
+    self._test(
+        diags=(_tfconst([[2, 1, 4], [-2, -1, -4]]),
+               _tfconst([[1, 3, 2, 2], [-1, -3, -2, -2]]),
+               _tfconst([[1, -1, 1], [-1, 1, -1]])),
+        rhs=_tfconst([[1, 2, 3, 4], [1, 2, 3, 4]]),
+        expected=_tfconst([[-9, 5, -4, 4], [9, -5, 4, -4]]),
+        diags_format="sequence")
+
+  def testMatrixFormat(self):
+    self._testWithLists(
+        diags=[[1, 2, 0, 0], [1, 3, 1, 0], [0, -1, 2, 4], [0, 0, 1, 2]],
+        rhs=[1, 2, 3, 4],
+        expected=[-9, 5, -4, 4],
+        diags_format="matrix")
+
+  def testMatrixFormatWithMultipleRightHandSides(self):
+    self._testWithLists(
+        diags=[[1, 2, 0, 0], [1, 3, 1, 0], [0, -1, 2, 4], [0, 0, 1, 2]],
+        rhs=[[1, -1], [2, -2], [3, -3], [4, -4]],
+        expected=[[-9, 9], [5, -5], [-4, 4], [4, -4]],
+        diags_format="matrix")
+
+  def testMatrixFormatWithBatching(self):
+    self._testWithLists(
+        diags=[[[1, 2, 0, 0], [1, 3, 1, 0], [0, -1, 2, 4], [0, 0, 1, 2]],
+               [[-1, -2, 0, 0], [-1, -3, -1, 0], [0, 1, -2, -4], [0, 0, -1,
+                                                                  -2]]],
+        rhs=[[1, 2, 3, 4], [1, 2, 3, 4]],
+        expected=[[-9, 5, -4, 4], [9, -5, 4, -4]],
+        diags_format="matrix")
+
+  def testRightHandSideAsColumn(self):
+    self._testWithLists(
+        diags=_sample_diags,
+        rhs=np.transpose([_sample_rhs]),
+        expected=np.transpose([_sample_result]),
+        diags_format="compact")
+
+  # Tests with transpose and adjoint
+
+  def testTransposeRhs(self):
+    self._testWithLists(
+        diags=_sample_diags,
+        rhs=np.array([_sample_rhs, 2 * _sample_rhs]),
+        expected=np.array([_sample_result, 2 * _sample_result]),
+        transpose_rhs=True)
+
+  def testConjugateRhs(self):
+    self._testWithLists(
+        diags=_sample_diags,
+        rhs=np.transpose([_sample_rhs * (1 + 1j), _sample_rhs * (1 - 2j)]),
+        expected=np.transpose(
+            [_sample_result * (1 - 1j), _sample_result * (1 + 2j)]),
+        conjugate_rhs=True)
+
+  def testAdjointRhs(self):
+    self._testWithLists(
+        diags=_sample_diags,
+        rhs=np.array([_sample_rhs * (1 + 1j), _sample_rhs * (1 - 2j)]),
+        expected=np.array(
+            [_sample_result * (1 - 1j), _sample_result * (1 + 2j)]),
+        transpose_rhs=True,
+        conjugate_rhs=True)
+
+  def testTransposeRhsWithBatching(self):
+    self._testWithLists(
+        diags=np.array([_sample_diags, -_sample_diags]),
+        rhs=np.array([[_sample_rhs, 2 * _sample_rhs],
+                      [3 * _sample_rhs, 4 * _sample_rhs]]),
+        expected=np.array([[_sample_result, 2 * _sample_result],
+                           [-3 * _sample_result, -4 * _sample_result]]),
+        transpose_rhs=True)
+
+  def testTransposeRhsWithRhsAsVector(self):
+    self._testWithLists(
+        diags=_sample_diags,
+        rhs=_sample_rhs,
+        expected=_sample_result,
+        transpose_rhs=True)
+
+  def testConjugateRhsWithRhsAsVector(self):
+    self._testWithLists(
+        diags=_sample_diags,
+        rhs=_sample_rhs * (1 + 1j),
+        expected=_sample_result * (1 - 1j),
+        conjugate_rhs=True)
+
+  def testTransposeRhsWithRhsAsVectorAndBatching(self):
+    self._testWithLists(
+        diags=np.array([_sample_diags, -_sample_diags]),
+        rhs=np.array([_sample_rhs, 2 * _sample_rhs]),
+        expected=np.array([_sample_result, -2 * _sample_result]),
+        transpose_rhs=True)
+
+  # Invalid input shapes
+
+  def testInvalidShapesCompactFormat(self):
+
+    def test_raises(diags_shape, rhs_shape):
+      self._assertRaises(_tf_ones(diags_shape), _tf_ones(rhs_shape), "compact")
+
+    test_raises((5, 4, 4), (5, 4))
+    test_raises((5, 3, 4), (4, 5))
+    test_raises((5, 3, 4), (5))
+    test_raises((5), (5, 4))
+
+  def testInvalidShapesSequenceFormat(self):
+
+    def test_raises(diags_tuple_shapes, rhs_shape):
+      diagonals = tuple(_tf_ones(shape) for shape in diags_tuple_shapes)
+      self._assertRaises(diagonals, _tf_ones(rhs_shape), "sequence")
+
+    test_raises(((5, 4), (5, 4)), (5, 4))
+    test_raises(((5, 4), (5, 4), (5, 6)), (5, 4))
+    test_raises(((5, 3), (5, 4), (5, 6)), (5, 4))
+    test_raises(((5, 6), (5, 4), (5, 3)), (5, 4))
+    test_raises(((5, 4), (7, 4), (5, 4)), (5, 4))
+    test_raises(((5, 4), (7, 4), (5, 4)), (3, 4))
+
+  def testInvalidShapesMatrixFormat(self):
+
+    def test_raises(diags_shape, rhs_shape):
+      self._assertRaises(_tf_ones(diags_shape), _tf_ones(rhs_shape), "matrix")
+
+    test_raises((5, 4, 7), (5, 4))
+    test_raises((5, 4, 4), (3, 4))
+    test_raises((5, 4, 4), (5, 3))
+
+  # Tests with placeholders
+
+  def _testWithPlaceholders(self,
+                            diags_shape,
+                            rhs_shape,
+                            diags_feed,
+                            rhs_feed,
+                            expected,
+                            diags_format="compact"):
+    if context.executing_eagerly():
+      return
+    diags = array_ops.placeholder(dtypes.float64, shape=diags_shape)
+    rhs = array_ops.placeholder(dtypes.float64, shape=rhs_shape)
+    x = linalg_impl.tridiagonal_solve(diags, rhs, diags_format)
+    with self.cached_session(use_gpu=False) as sess:
+      result = sess.run(x, feed_dict={diags: diags_feed, rhs: rhs_feed})
+      self.assertAllClose(result, expected)
+
+  def testCompactFormatAllDimsUnknown(self):
+    self._testWithPlaceholders(
+        diags_shape=[None, None],
+        rhs_shape=[None],
+        diags_feed=_sample_diags,
+        rhs_feed=_sample_rhs,
+        expected=_sample_result)
+
+  def testCompactFormatUnknownMatrixSize(self):
+    self._testWithPlaceholders(
+        diags_shape=[3, None],
+        rhs_shape=[4],
+        diags_feed=_sample_diags,
+        rhs_feed=_sample_rhs,
+        expected=_sample_result)
+
+  def testCompactFormatUnknownRhsCount(self):
+    self._testWithPlaceholders(
+        diags_shape=[3, 4],
+        rhs_shape=[4, None],
+        diags_feed=_sample_diags,
+        rhs_feed=np.transpose([_sample_rhs, 2 * _sample_rhs]),
+        expected=np.transpose([_sample_result, 2 * _sample_result]))
+
+  def testCompactFormatUnknownBatchSize(self):
+    self._testWithPlaceholders(
+        diags_shape=[None, 3, 4],
+        rhs_shape=[None, 4],
+        diags_feed=np.array([_sample_diags, -_sample_diags]),
+        rhs_feed=np.array([_sample_rhs, 2 * _sample_rhs]),
+        expected=np.array([_sample_result, -2 * _sample_result]))
+
+  def testMatrixFormatWithUnknownDims(self):
+    if context.executing_eagerly():
+      return
+
+    def test_with_matrix_shapes(matrix_shape):
+      matrix = np.array([[1, 2, 0, 0], [1, 3, 1, 0], [0, -1, 2, 4],
+                         [0, 0, 1, 2]])
+      rhs = np.array([1, 2, 3, 4])
+      x = np.array([-9, 5, -4, 4])
+      self._testWithPlaceholders(
+          diags_shape=matrix_shape,
+          rhs_shape=[None, None],
+          diags_feed=matrix,
+          rhs_feed=np.transpose([rhs, 2 * rhs]),
+          expected=np.transpose([x, 2 * x]),
+          diags_format="matrix")
+
+    test_with_matrix_shapes(matrix_shape=[4, 4])
+    test_with_matrix_shapes(matrix_shape=[None, 4])
+    test_with_matrix_shapes(matrix_shape=[4, None])
+    with self.assertRaises(ValueError):
+      test_with_matrix_shapes(matrix_shape=[None, None])
+
+  def testSequenceFormatWithUnknownDims(self):
+    if context.executing_eagerly():
+      return
+    superdiag = array_ops.placeholder(dtypes.float64, shape=[None])
+    diag = array_ops.placeholder(dtypes.float64, shape=[None])
+    subdiag = array_ops.placeholder(dtypes.float64, shape=[None])
+    rhs = array_ops.placeholder(dtypes.float64, shape=[None])
+
+    x = linalg_impl.tridiagonal_solve((superdiag, diag, subdiag),
+                                      rhs,
+                                      diagonals_format="sequence")
+    with self.cached_session(use_gpu=False) as sess:
+      result = sess.run(
+          x,
+          feed_dict={
+              subdiag: [20, 1, -1, 1],
+              diag: [1, 3, 2, 2],
+              superdiag: [2, 1, 4, 20],
+              rhs: [1, 2, 3, 4]
+          })
+      self.assertAllClose(result, [-9, 5, -4, 4])
+
+  # Benchmark
+
+  class TridiagonalSolveBenchmark(test.Benchmark):
+    sizes = [(100000, 1, 1), (1000000, 1, 1), (10000000, 1, 1), (100000, 10, 1),
+             (100000, 100, 1), (10000, 1, 100), (10000, 1, 1000),
+             (10000, 1, 10000)]
+
+    def _generateData(self, matrix_size, batch_size, num_rhs, seed=42):
+      data = random_ops.random_normal(
+          shape=(batch_size, 3 + num_rhs, matrix_size),
+          dtype=dtypes.float64,
+          seed=seed)
+      diags = array_ops.stack([data[:, 0], data[:, 1], data[:, 2]], axis=-2)
+      rhs = data[:, 3:, :]
+      return diags, rhs
+
+    def benchmarkTridiagonalSolveOp(self):
+      for matrix_size, batch_size, num_rhs in self.sizes:
+        with ops.Graph().as_default(), \
+                session.Session(config=benchmark.benchmark_config()) as sess, \
+                ops.device("/cpu:0"):
+          diags, rhs = self._generateData(matrix_size, batch_size, num_rhs)
+          x = linalg_impl.tridiagonal_solve(diags, rhs, transpose_rhs=True)
+          variables.global_variables_initializer().run()
+          self.run_op_benchmark(
+              sess,
+              control_flow_ops.group(x),
+              min_iters=10,
+              store_memory_usage=False,
+              name=("tridiagonal_solve_matrix_size_{}_batch_size_{}_"
+                    "num_rhs_{}").format(matrix_size, batch_size, num_rhs))
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 1b84ec1..bb50442 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py

@@ -26,7 +26,7 @@
 from tensorflow.python.keras.engine import base_layer_utils
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.ops import variables as tf_variables
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import function_utils
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_contextlib
@@ -554,7 +554,7 @@
 
   def __setattr__(self, value, name):
     # By-pass the automatic dependency tracking performed by the parent Layer.
-    super(checkpointable.Checkpointable, self).__setattr__(value, name)
+    super(trackable.Trackable, self).__setattr__(value, name)
 
 
 def _add_elements_to_collection(elements, collection_list):

diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index b2d54a9..7e12dca 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py

@@ -64,7 +64,7 @@
       `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
     name: String, the name of the layer. Layers with the same name will
       share weights, but to avoid mistakes we require reuse=True in such cases.
-    reuse: Boolean, whether to reuse the weights of a previous layer
+    _reuse: Boolean, whether to reuse the weights of a previous layer
       by the same name.
 
   Properties:

diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc
index f681cff..77fbfd5 100644
--- a/tensorflow/python/lib/core/py_seq_tensor.cc
+++ b/tensorflow/python/lib/core/py_seq_tensor.cc

@@ -64,6 +64,19 @@
          PyIsInstance(obj, &PyFloatingArrType_Type);  // NumPy float types
 }
 
+// If the input is a zero dimensional PyArray return it converted to a scalar.
+// Otherwise return the input and increment its reference count.
+// Users must Py_DECREF the output of this method.
+PyObject* ZeroDimArrayToScalar(PyObject* obj) {
+  if (PyArray_IsZeroDim(obj) && !PyArray_IsScalar(obj, Generic)) {
+    auto pyarray_obj = reinterpret_cast<PyArrayObject*>(obj);
+    obj = PyArray_ToScalar(PyArray_DATA(pyarray_obj), pyarray_obj);
+  } else {
+    Py_INCREF(obj);
+  }
+  return obj;
+}
+
 // Converts Python object `c` that should hold a Python string into a
 // C++ string in *out.  Returns nullptr on success, or a message on error.
 // Defined below, but forward declared here for use in PyRepr.
@@ -130,6 +143,10 @@
 Status InferShapeAndType(PyObject* obj, TensorShape* shape, DataType* dtype) {
   std::vector<Safe_PyObjectPtr> refs_to_clean;
   while (true) {
+    // Convert any zero dimensional numpy arrays to scalars first of all.
+    // We also have to make sure a reference to the safe_obj is kept.
+    obj = ZeroDimArrayToScalar(obj);
+    refs_to_clean.push_back(make_safe(obj));
     // We test strings first, in case a string is considered a sequence.
     if (IsPyString(obj)) {
       *dtype = DT_STRING;
@@ -240,7 +257,9 @@
       }                                                                   \
       PyObject** l = PySequence_Fast_ITEMS(seq.get());                    \
       for (int64 i = 0; i < s; ++i) {                                     \
-        const char* error = CONVERT(l[i], *buf);                          \
+        auto scalar = ZeroDimArrayToScalar(l[i]);                         \
+        const char* error = CONVERT(scalar, *buf);                        \
+        Py_DECREF(scalar);                                                \
         if (TF_PREDICT_FALSE(error != nullptr)) return error;             \
         ++*buf;                                                           \
       }                                                                   \
@@ -253,7 +272,9 @@
     Tensor result(TYPE_ENUM, shape);                                      \
     if (shape.dims() == 0) { /* Scalar case */                            \
       TYPE value;                                                         \
-      const char* error = CONVERT(obj, &value);                           \
+      auto scalar = ZeroDimArrayToScalar(obj);                            \
+      const char* error = CONVERT(scalar, &value);                        \
+      Py_DECREF(scalar);                                                  \
       if (error != nullptr) return error;                                 \
       result.scalar<TYPE>()() = value;                                    \
     } else {                                                              \
@@ -331,8 +352,25 @@
 
 template <class T>
 const char* ConvertOneFloat(PyObject* v, T* out) {
+  if (PyErr_Occurred()) {
+    return nullptr;
+  }
   if (TF_PREDICT_TRUE(PyFloat_Check(v))) {
-    *out = PyFloat_AS_DOUBLE(v);
+    double as_double = PyFloat_AsDouble(v);
+    // Handle infinity.
+    if (as_double == std::numeric_limits<double>::infinity()) {
+      *out = std::numeric_limits<T>::infinity();
+      return nullptr;
+    } else if (as_double == -1 * std::numeric_limits<double>::infinity()) {
+      *out = -1 * std::numeric_limits<T>::infinity();
+      return nullptr;
+    }
+    // Check for overflow.
+    if (as_double > std::numeric_limits<T>::max() ||
+        as_double < std::numeric_limits<T>::lowest()) {
+      return ErrorOutOfRangeDouble;
+    }
+    *out = static_cast<T>(as_double);
     return nullptr;
   }
 #if PY_MAJOR_VERSION < 3
@@ -348,6 +386,9 @@
   }
   if (PyIsInstance(v, &PyFloatingArrType_Type)) {  // NumPy float types
     Safe_PyObjectPtr as_float = make_safe(PyNumber_Float(v));
+    if (PyErr_Occurred()) {
+      return nullptr;
+    }
     return ConvertOneFloat<T>(as_float.get(), out);
   }
   if (PyIsInstance(v, &PyIntegerArrType_Type)) {  // NumPy integers
@@ -356,6 +397,9 @@
 #else
     Safe_PyObjectPtr as_int = make_safe(PyNumber_Long(v));
 #endif
+    if (PyErr_Occurred()) {
+      return nullptr;
+    }
     return ConvertOneFloat<T>(as_int.get(), out);
   }
   return ErrorMixedTypes;

diff --git a/tensorflow/python/module/BUILD b/tensorflow/python/module/BUILD
index 8aa3a19..55909cc 100644
--- a/tensorflow/python/module/BUILD
+++ b/tensorflow/python/module/BUILD

@@ -13,7 +13,7 @@
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:util",
         "//tensorflow/python:variables",
-        "//tensorflow/python/training/checkpointable:tracking",
+        "//tensorflow/python/training/tracking",
         "@six_archive//:six",
     ],
 )

diff --git a/tensorflow/python/module/module.py b/tensorflow/python/module/module.py
index 6bd2755..fe4cdfd 100644
--- a/tensorflow/python/module/module.py
+++ b/tensorflow/python/module/module.py

@@ -18,33 +18,37 @@
 from __future__ import division
 from __future__ import print_function
 
+import abc
 import re
 import sys
 
 import six
 
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import variables
-from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.tracking import tracking
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
 
+NO_MODULE_NAME_SCOPE = "__no_module_name_scope__"
 
-class ModuleMetaclass(type):
+
+class ModuleMetaclass(abc.ABCMeta):
   """Metaclass for `tf.Module`."""
 
   def __new__(mcs, name, bases, clsdict):
     for key, value in clsdict.items():
-      if key in ("__init__", "name_scope"):
+      if key == "name_scope":
+        continue
+
+      elif key.startswith("__") and key != "__call__":
+        # Don't patch methods like `__getattr__` or `__del__`.
         continue
 
       elif tf_inspect.isfunction(value):
-        if getattr(value, "_no_module_name_scope", False):
-          # The function has been annotated to say that no autoscoping should
-          # be applied, so do not patch it.
-          continue
         clsdict[key] = with_name_scope(value)
 
       elif isinstance(value, property):
@@ -54,7 +58,7 @@
             value.fdel if not value.fdel else with_name_scope(value.fdel),
             doc=value.__doc__)
 
-    return type.__new__(mcs, name, bases, clsdict)
+    return super(ModuleMetaclass, mcs).__new__(mcs, name, bases, clsdict)
 
   def __call__(cls, *args, **kwargs):
     # Call new such that we have an un-initialized module instance that we can
@@ -91,7 +95,7 @@
     return module
 
 
-def with_name_scope(unbound_method):
+def wrap_with_name_scope(unbound_method):
   """Patches the given method so it enters the modules name scope."""
   def enter_name_scope(self, *args, **kwargs):
     """Decorator that calls the given function in the module name scope.
@@ -118,11 +122,37 @@
       # for a particular method annotate it with `@no_module_name_scope`.
       return unbound_method(self, *args, **kwargs)
 
-  return tf_decorator.make_decorator(unbound_method, enter_name_scope)
+  return enter_name_scope
 
 
-@tf_export("experimental.Module")
-class Module(six.with_metaclass(ModuleMetaclass, tracking.AutoCheckpointable)):
+def wrap_with_name_scope_no_exception(unbound_method):
+  """Patches the given method so it enters the modules name scope."""
+  def enter_name_scope(self, *args, **kwargs):
+    with self.name_scope:
+      # tf.Module enters the module name scope for all methods. To disable this
+      # for a particular method annotate it with `@no_module_name_scope`.
+      return unbound_method(self, *args, **kwargs)
+  return enter_name_scope
+
+
+def with_name_scope(unbound_method):
+  """Patches the given method so it enters the modules name scope."""
+  if getattr(unbound_method, NO_MODULE_NAME_SCOPE, False):
+    # The function has been annotated to say that no autoscoping should be
+    # applied, so do not patch it.
+    return unbound_method
+
+  if isinstance(unbound_method, def_function.Function):
+    # Autograph cannot convert functions that have try/catch.
+    unbound_method._decorate(wrap_with_name_scope_no_exception)  # pylint: disable=protected-access
+    return unbound_method
+  else:
+    return tf_decorator.make_decorator(unbound_method,
+                                       wrap_with_name_scope(unbound_method))
+
+
+@tf_export("Module")
+class Module(six.with_metaclass(ModuleMetaclass, tracking.AutoTrackable)):
   """Base neural network module class.
 
   A module is a named container for `tf.Variable`s, other `tf.Module`s and
@@ -207,7 +237,8 @@
 
     Returns:
       A sequence of variables for the current module (sorted by attribute
-      name) followed by variables from all submodules recursively (depth first).
+      name) followed by variables from all submodules recursively (breadth
+      first).
     """
     return tuple(self._flatten(predicate=_IS_VARIABLE))
 
@@ -221,7 +252,8 @@
 
     Returns:
       A sequence of variables for the current module (sorted by attribute
-      name) followed by variables from all submodules recursively (depth first).
+      name) followed by variables from all submodules recursively (breadth
+      first).
     """
     return tuple(self._flatten(predicate=_IS_TRAINABLE_VARIABLE))
 
@@ -232,9 +264,9 @@
     Submodules are modules which are properties of this module, or found as
     properties of modules which are properties of this module (and so on).
 
-    >>> a = tf.experimental.Module()
-    >>> b = tf.experimental.Module()
-    >>> c = tf.experimental.Module()
+    >>> a = tf.Module()
+    >>> b = tf.Module()
+    >>> c = tf.Module()
     >>> a.b = b
     >>> b.c = c
     >>> assert list(a.submodules) == [b, c]
@@ -249,7 +281,8 @@
   def _flatten(self,
                recursive=True,
                predicate=None,
-               attribute_traversal_key=None):
+               attribute_traversal_key=None,
+               with_path=False):
     """Flattened attribute values in sorted order by attribute name.
 
     Modules are flattened by first walking their attributes in name order.
@@ -258,7 +291,7 @@
     flattened to find leaves. Finally every leaf value is optionally tested
     against the given `predicate` and finally yielded.
 
-    >>> class Foo(tf.experimental.Module):
+    >>> class Foo(tf.Module):
     ...   def __init__(self):
     ...     super(Foo, self).__init__()
     ...     self.x = [tf.constant('a'), tf.constant('b')]
@@ -267,11 +300,15 @@
     ...
     ...   @property
     ...   def tensors(self):
-    ...     return tuple(self._flatten(predicate=is_tensor))
+    ...     return tuple(self._flatten(predicate=is_tensor, with_path=True))
 
     >>> foo = Foo()
     >>> foo.tensors
-    (<tf.Tensor...'a'>, <tf.Tensor...'b'>, ...'c'>, ...'d'>, ...'e'>)
+    ((('x', 0),   <tf.Tensor: ...'a'>),
+     (('x', 1),   <tf.Tensor: ...'b'>),
+     (('y', 'i'), <tf.Tensor: ...'c'>),
+     (('y', 'j'), <tf.Tensor: ...'d'>),
+     (('z',),     <tf.Tensor: ...'e'>))
 
     `attribute_traversal_key` controls the order object properties are visited.
     If not set objects are visited in ascending order by name.
@@ -284,6 +321,10 @@
       attribute_traversal_key: (Optional) Method to rekey object attributes
         before they are sorted. Contract is the same as `key` argument to
         builtin `sorted` and only applies to object properties.
+      with_path: (Optional) Whether to include the path to the object as well
+        as the object itself. If `with_path` is `True` then leaves will not be
+        de-duplicated (e.g. if the same leaf instance is reachable via multiple
+        modules then it will be yielded multiple times with different paths).
 
     Returns:
       Flat generator for leaves of the current module and optionally all
@@ -297,7 +338,7 @@
         recursive=recursive,
         predicate=predicate,
         attribute_traversal_key=attribute_traversal_key,
-        seen=set())
+        with_path=with_path)
 
   @classmethod
   def no_name_scope(cls, method):
@@ -319,7 +360,7 @@
     Returns:
       The method, with a flag indicating no name scope wrapping should occur.
     """
-    setattr(method, "_no_module_name_scope", True)
+    setattr(method, NO_MODULE_NAME_SCOPE, True)
     return method
 
 _IS_VARIABLE = lambda o: isinstance(o, variables.Variable)
@@ -337,8 +378,20 @@
   return _CAMEL_TO_SNAKE_R.sub(r"_\1", value).lower()
 
 
-def _flatten_module(module, recursive, predicate, attribute_traversal_key,
-                    seen):
+# AutoTrackable adds object attributes that users will not expect us to
+# include when flattening (these reference dependencies reachable via other
+# object attributes).
+AUTO_CHECKPOINTABLE_ATTRS = ("_unconditional_checkpoint_dependencies",
+                             "_unconditional_dependency_names")
+
+
+def _flatten_module(module,
+                    recursive,
+                    predicate,
+                    attribute_traversal_key,
+                    with_path,
+                    module_path=(),
+                    seen=None):
   """Implementation of `flatten`."""
   if seen is None:
     seen = set([id(module)])
@@ -347,25 +400,37 @@
   submodules = []
 
   for key in sorted(module_dict, key=attribute_traversal_key):
-    for leaf in nest.flatten(module_dict[key]):
-      leaf_id = id(leaf)
-      if leaf_id in seen:
-        continue
+    if key in AUTO_CHECKPOINTABLE_ATTRS:
+      continue
 
-      seen.add(leaf_id)
+    for leaf_path, leaf in nest.flatten_with_tuple_paths(module_dict[key]):
+      leaf_path = (key,) + leaf_path
+
+      # TODO(tomhennigan) Handle cycles for `with_path=True` (e.g. `a.a = a`).
+      if not with_path:
+        leaf_id = id(leaf)
+        if leaf_id in seen:
+          continue
+        seen.add(leaf_id)
+
       if predicate(leaf):
-        yield leaf
+        if with_path:
+          yield module_path + leaf_path, leaf
+        else:
+          yield leaf
 
       if recursive and isinstance(leaf, Module):
         # Walk direct properties first then recurse.
-        submodules.append(leaf)
+        submodules.append((module_path + leaf_path, leaf))
 
-  for submodule in submodules:
+  for submodule_path, submodule in submodules:
     subvalues = _flatten_module(
         submodule,
         recursive=recursive,
         predicate=predicate,
         attribute_traversal_key=attribute_traversal_key,
+        with_path=with_path,
+        module_path=submodule_path,
         seen=seen)
 
     for subvalue in subvalues:

diff --git a/tensorflow/python/module/module_test.py b/tensorflow/python/module/module_test.py
index da9bcf7..52bb97b 100644
--- a/tensorflow/python/module/module_test.py
+++ b/tensorflow/python/module/module_test.py

@@ -18,11 +18,14 @@
 from __future__ import division
 from __future__ import print_function
 
+import abc
 import collections
 
 from absl.testing import parameterized
+import six
 
 from tensorflow.python.compat import v2_compat
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import ops
 from tensorflow.python.module import module
 from tensorflow.python.ops import variables
@@ -84,6 +87,20 @@
     # `foo` is not a method so we do not re-enter the name scope.
     self.assertEqual(mod.foo(), "")
 
+  def test_property(self):
+    mod = PropertyModule()
+    mod.some_property = None, None  # None, None for the linter.
+    getter_scope_name, setter_scope_name = mod.some_property
+    self.assertEqual(getter_scope_name, "property_module/")
+    self.assertEqual(setter_scope_name, "property_module/")
+
+  def test_property_no_name_scope(self):
+    mod = PropertyModule()
+    mod.no_name_scope_property = None, None  # None, None for the linter.
+    getter_scope_name, setter_scope_name = mod.no_name_scope_property
+    self.assertEqual(getter_scope_name, "")
+    self.assertEqual(setter_scope_name, "")
+
   def test_invalid_name(self):
     msg = ".* is not a valid module name"
     with self.assertRaisesRegexp(ValueError, msg):
@@ -133,6 +150,34 @@
 
     self.assertEqual("", get_name_scope())
 
+  def test_get_attr_doesnt_enter_name_scope(self):
+    scope_names = []
+
+    class GetAttrModule(module.Module):
+
+      def __getattr__(self, name):
+        scope_names.append((name, get_name_scope()))
+        return super(GetAttrModule, self).__getattr__(name)
+
+    mod = GetAttrModule()
+    with self.assertRaises(AttributeError):
+      mod.does_not_exist  # pylint: disable=pointless-statement
+    self.assertIn(("does_not_exist", ""), scope_names)
+
+  def test_get_attribute_doesnt_enter_name_scope(self):
+    scope_names = []
+
+    class GetAttributeModule(module.Module):
+
+      def __getattribute__(self, name):
+        scope_names.append((name, get_name_scope()))
+        return super(GetAttributeModule, self).__getattribute__(name)
+
+    mod = GetAttributeModule()
+    with self.assertRaises(AttributeError):
+      mod.does_not_exist  # pylint: disable=pointless-statement
+    self.assertIn(("does_not_exist", ""), scope_names)
+
 
 class VariableNamingTest(test.TestCase):
 
@@ -200,6 +245,43 @@
         CallsMethodBeforeSuperConstructorModule(allowed_method=True))
 
 
+class ForwardMethodsTest(test.TestCase):
+
+  def testFunctionType(self):
+    mod = ModuleWithFunctionAnnotatedCall()
+    self.assertTrue(isinstance(mod.forward, def_function.Function))
+    self.assertTrue(isinstance(mod.forward_ag, def_function.Function))
+
+  def testEntersNameScope_call(self):
+    mod = ModuleWithFunctionAnnotatedCall()
+    self.assertEqual(mod.forward().numpy(),
+                     b"module_with_function_annotated_call/")
+    self.assertEqual(mod.forward_ag().numpy(),
+                     b"module_with_function_annotated_call/")
+
+  def testEntersNameScope_concreteFunction(self):
+    mod = ModuleWithFunctionAnnotatedCall()
+    self.assertEqual(mod.forward.get_concrete_function()().numpy(),
+                     b"module_with_function_annotated_call/")
+    self.assertEqual(mod.forward_ag.get_concrete_function()().numpy(),
+                     b"module_with_function_annotated_call/")
+
+
+class AbcTest(test.TestCase):
+
+  def testAbstract(self):
+    msg = "Can't instantiate .* abstract methods"
+    with self.assertRaisesRegexp(TypeError, msg):
+      AbstractModule()  # pylint: disable=abstract-class-instantiated
+
+  def testConcrete(self):
+    mod = ConcreteModule()
+    x, scope_name = mod(2.)
+    self.assertEqual(x, 4.)
+    self.assertEqual(scope_name, "concrete_module/")
+    self.assertEqual(get_name_scope(), "")
+
+
 def get_name_scope():
   with ops.name_scope("x") as ns:
     return ns[:-2]
@@ -231,6 +313,20 @@
     self.w = variables.Variable(1.0, trainable=trainable, name="mushroom")
 
 
+@six.add_metaclass(abc.ABCMeta)
+class AbstractModule(module.Module):
+
+  @abc.abstractmethod
+  def __call__(self, x):
+    pass
+
+
+class ConcreteModule(AbstractModule):
+
+  def __call__(self, x):
+    return x ** 2, get_name_scope()
+
+
 class TreeModule(module.Module):
 
   def __init__(self, name=None):
@@ -295,14 +391,52 @@
   def with_name_scope(self):
     pass
 
+
+class ModuleWithFunctionAnnotatedCall(module.Module):
+
+  @def_function.function(autograph=False)
+  def forward(self):
+    return get_name_scope()
+
+  @def_function.function(autograph=True)
+  def forward_ag(self):
+    return get_name_scope()
+
+
+class PropertyModule(module.Module):
+
+  def __init__(self):
+    super(PropertyModule, self).__init__()
+    self._setter_scope_name = None
+
+  @property
+  def some_property(self):
+    getter_scope_name = get_name_scope()
+    return getter_scope_name, self._setter_scope_name
+
+  @some_property.setter
+  def some_property(self, my_property):
+    self._setter_scope_name = get_name_scope()
+
+  @property
+  @module.Module.no_name_scope
+  def no_name_scope_property(self):
+    getter_scope_name = get_name_scope()
+    return getter_scope_name, self._setter_scope_name
+
+  @no_name_scope_property.setter
+  @module.Module.no_name_scope
+  def no_name_scope_property(self, my_property):
+    self._setter_scope_name = get_name_scope()
+
 NamedPair = collections.namedtuple("NamedPair", ("first", "second"))
 mk_index_dict = lambda v: dict(enumerate(v))
 
 
-class WalkTest(parameterized.TestCase, test.TestCase):
+class FlattenTest(parameterized.TestCase, test.TestCase):
 
   @parameterized.parameters(lambda v: NamedPair(*v), list, tuple, mk_index_dict)
-  def test_walk(self, container_type):
+  def test_flatten(self, container_type):
     parent = SimpleModule(container_type=container_type)
     child = parent.c
 
@@ -320,6 +454,23 @@
         mod.variables,
         mod._trainable_variables + mod._non_trainable_variables + [mod._bonus])
 
+  def test_with_path(self):
+    mod = module.Module()
+    mod.w = variables.Variable(1.)
+    mod.encoder = module.Module()
+    mod.encoder.w = [({"k": mod.w}, {"k": mod.w})]
+    mod.decoder = mod.encoder
+
+    state_dict = dict(
+        mod._flatten(with_path=True, predicate=module._IS_VARIABLE))
+
+    self.assertEqual(state_dict,
+                     {("w",): mod.w,
+                      ("encoder", "w", 0, 0, "k"): mod.encoder.w[0][0]["k"],
+                      ("encoder", "w", 0, 1, "k"): mod.encoder.w[0][1]["k"],
+                      ("decoder", "w", 0, 0, "k"): mod.decoder.w[0][0]["k"],
+                      ("decoder", "w", 0, 1, "k"): mod.decoder.w[0][1]["k"]},)
+
 
 class LayerModule(module.Module):
 
@@ -363,6 +514,43 @@
 IS_MEMBER = lambda v: isinstance(v, MemberType)
 IS_MODULE = lambda v: isinstance(v, module.Module)
 
+
+class CustomMetaclass(type):
+
+  TAG = "__custom_metaclass__"
+
+  def __new__(mcs, name, bases, clsdict):
+    new_type = super(CustomMetaclass, mcs).__new__(mcs, name, bases, clsdict)
+    setattr(new_type, CustomMetaclass.TAG, True)
+    return new_type
+
+
+class CombiningMetaclass(module.ModuleMetaclass, CustomMetaclass):
+
+  TAG = "__combining_metaclass__"
+
+  def __new__(mcs, name, bases, clsdict):
+    new_type = super(CombiningMetaclass, mcs).__new__(mcs, name, bases, clsdict)
+    setattr(new_type, CombiningMetaclass.TAG, True)
+    return new_type
+
+
+@six.add_metaclass(CombiningMetaclass)
+class ModuleWithCustomMetaclass(module.Module):
+
+  def __init__(self):
+    super(ModuleWithCustomMetaclass, self).__init__()
+    self.init_name_scope = get_name_scope()
+
+
+class CustomMetaclassTest(test.TestCase):
+
+  def testSupportsCustomMetaclass(self):
+    m = ModuleWithCustomMetaclass()
+    self.assertEqual(m.init_name_scope, "module_with_custom_metaclass/")
+    self.assertTrue(getattr(ModuleWithCustomMetaclass, CombiningMetaclass.TAG))
+    self.assertTrue(getattr(ModuleWithCustomMetaclass, CustomMetaclass.TAG))
+
 if __name__ == "__main__":
   v2_compat.enable_v2_behavior()
   test.main()

diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index 45e741e..5757232 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py

@@ -839,16 +839,11 @@
 def _BroadcastToGrad(op, grad):
   input_value = op.inputs[0]
   broadcast_shape = op.inputs[1]
-  # Assign ids for each position in input_value.
   input_value_shape = array_ops.shape(input_value)
-  input_value_size = array_ops.size(input_value)
-  ids = array_ops.reshape(math_ops.range(input_value_size), input_value_shape)
-  broadcast_ids = array_ops.broadcast_to(ids, broadcast_shape)
-  # Group by ids and sum its gradients.
-  grad_flatten = array_ops.reshape(grad, [-1])
-  broadcast_ids_flatten = array_ops.reshape(broadcast_ids, [-1])
-  updates_grad_flatten = math_ops.unsorted_segment_sum(grad_flatten,
-                                                       broadcast_ids_flatten,
-                                                       input_value_size)
-  updates_grad = array_ops.reshape(updates_grad_flatten, input_value_shape)
+  _, reduction_axes = gen_array_ops.broadcast_gradient_args(broadcast_shape,
+                                                            input_value_shape)
+  updates_grad_reshaped = math_ops.reduce_sum(grad,
+                                              axis=reduction_axes,
+                                              keepdims=True)
+  updates_grad = array_ops.reshape(updates_grad_reshaped, input_value_shape)
   return [updates_grad, None]

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 230ce62..8db23c4 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py

@@ -67,9 +67,9 @@
   Returns:
     A `Tensor`. Has the same type as `input`.
   """
-  if context.executing_eagerly():
+  if context.executing_eagerly() and not hasattr(input, "graph"):
     input = ops.convert_to_tensor(input)
-    in_device = input.device
+    in_device = input.backing_device
     # TODO(ashankar): Does 'identity' need to invoke execution callbacks?
     context_device = context.context().device_name
     if not context_device:
@@ -3278,8 +3278,8 @@
            batch_dims=0):
   r"""Gather slices from params axis axis according to indices.
 
-  Gather slices from params axis axis according to indices.  `indices` must be
-  an integer tensor of any dimension (usually 0-D or 1-D).
+  Gather slices from params axis `axis` according to `indices`.  `indices` must
+  be an integer tensor of any dimension (usually 0-D or 1-D).
 
   For 0-D (scalar) `indices`:
 
@@ -3336,17 +3336,17 @@
       to `batch_dims`.  Defaults to the first non-batch dimension. Supports
       negative indexes.
     batch_dims: An `integer`.  The number of batch dimensions.  Must be less
-      than `ndims(inices)`.
+      than `rank(indices)`.
 
   Returns:
     A `Tensor`. Has the same type as `params`.
   """
   del validate_indices
-  if axis is None:
-    axis = batch_dims
   if batch_dims != 0:
     with ops.name_scope(name, "Gather", [params, indices, axis]):
       return _batch_gather(params, indices, batch_dims, axis)
+  if axis is None:
+    axis = batch_dims
   if axis != 0:
     # Note that we do a sparse_read here to avoid snapshotting the entire
     # resource variable and doing a gather, which can be inefficient and lead to
@@ -3393,15 +3393,15 @@
   This operation assumes that the leading `batch_dims` dimensions of `indices`
   and `params` are batch dimensions; and performs a `tf.gather` operation within
   each batch. (If `batch_dims` is not specified, then it defaults to
-  `ndims(indices) - 1`.)  In the case in which `batch_dims==0`, this operation
+  `rank(indices)-1`.)  In the case in which `batch_dims==0`, this operation
   is equivalent to `tf.gather`.
 
   Args:
     params: A Tensor. The tensor from which to gather values.
     indices: A Tensor. Must be one of the following types: int32, int64. Index
       tensor. Must be in range `[0, params.shape[batch_dims]]`.
-    batch_dims: An integer.  The number of batch dimensions.  Must be less than
-      ndims(inices).  Defaults to `ndims(indices) - 1` if not specified.
+    batch_dims: An integer or none.  The number of batch dimensions.  Must be
+      less than `rank(indices)`.  Defaults to `rank(indices) - 1` if None.
     axis: A `Tensor`. Must be one of the following types: `int32`, `int64`. The
       `axis` in `params` to gather `indices` from. Must be greater than or equal
       to `batch_dims`.  Defaults to the first non-batch dimension. Supports
@@ -3427,10 +3427,10 @@
   if batch_dims < 0:
     batch_dims += indices_ndims
   if batch_dims < 0 or batch_dims >= indices_ndims:
-    raise ValueError("batch_dims = %d must be less than ndims(indices) = %d" %
+    raise ValueError("batch_dims = %d must be less than rank(indices) = %d" %
                      (batch_dims, indices_ndims))
   if params.shape.ndims is not None and batch_dims >= params.shape.ndims:
-    raise ValueError("batch_dims = %d must be less than ndims(params) = %d" %
+    raise ValueError("batch_dims = %d must be less than rank(params) = %d" %
                      (batch_dims, params.shape.ndims))
 
   # Handle axis by transposing the axis dimension to be the first non-batch

diff --git a/tensorflow/python/ops/bitwise_ops_test.py b/tensorflow/python/ops/bitwise_ops_test.py
index c182874..d154b67 100644
--- a/tensorflow/python/ops/bitwise_ops_test.py
+++ b/tensorflow/python/ops/bitwise_ops_test.py

@@ -35,7 +35,6 @@
     super(BitwiseOpTest, self).__init__(method_name)
 
   @test_util.run_deprecated_v1
-  @test_util.disable_xla("This test never passed for XLA")
   def testBinaryOps(self):
     dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
                   dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64]
@@ -73,7 +72,6 @@
         self.assertAllEqual(truth, popcnt_result)
 
   @test_util.run_deprecated_v1
-  @test_util.disable_xla("This test never passed for XLA")
   def testInvertOp(self):
     dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
                   dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64]
@@ -99,7 +97,6 @@
           self.assertAllEqual(inverted, expected)
 
   @test_util.run_deprecated_v1
-  @test_util.disable_xla("This test never passed for XLA")
   def testShiftsWithPositiveLHS(self):
     dtype_list = [np.int8, np.int16, np.int32, np.int64,
                   np.uint8, np.uint16, np.uint32, np.uint64]

diff --git a/tensorflow/python/ops/boosted_trees_ops.py b/tensorflow/python/ops/boosted_trees_ops.py
index f6c3702..362c17e 100644
--- a/tensorflow/python/ops/boosted_trees_ops.py
+++ b/tensorflow/python/ops/boosted_trees_ops.py

@@ -43,7 +43,7 @@
 # pylint: enable=unused-import
 
 from tensorflow.python.training import saver
-from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.tracking import tracking
 
 
 class PruningMode(object):

diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
index a237cff..767dcb9 100644
--- a/tensorflow/python/ops/clip_ops.py
+++ b/tensorflow/python/ops/clip_ops.py

@@ -126,8 +126,8 @@
   In this case, the L2-norm of the output tensor is `clip_norm`.
 
   As another example, if `t` is a matrix and `axes == [1]`, then each row
-  of the output will have L2-norm equal to `clip_norm`. If `axes == [0]`
-  instead, each column of the output will be clipped.
+  of the output will have L2-norm less than or equal to `clip_norm`. If
+  `axes == [0]` instead, each column of the output will be clipped.
 
   This operation is typically used to clip gradients before applying them with
   an optimizer.

diff --git a/tensorflow/python/ops/collective_ops.py b/tensorflow/python/ops/collective_ops.py
index 98668fa..32a71fc 100644
--- a/tensorflow/python/ops/collective_ops.py
+++ b/tensorflow/python/ops/collective_ops.py

@@ -48,7 +48,7 @@
   if not device.canonical_name(t.device):
     raise ValueError('Device assignment required for collective ops')
   if group_size <= 1:
-    raise ValueError('Parameter group_size to add_reduce must be at least 2.')
+    raise ValueError('Parameter group_size to all_reduce must be at least 2.')
   return gen_collective_ops.collective_reduce(t,
                                               group_size=group_size,
                                               group_key=group_key,
@@ -58,6 +58,35 @@
                                               subdiv_offsets=subdiv_offsets)
 
 
+def all_gather(t, group_size, group_key, instance_key):
+  """Accumulates tensors collectively, across devices, along first dimension.
+
+  Args:
+    t: the tensor to participate in the accumulation.
+    group_size: the total number of tensors to be collectively accumulated.
+      Each must reside on a different device.
+    group_key: an integer identifying the group of devices.
+    instance_key: an integer identifying the participating group of Ops.
+
+  Returns:
+    An Op implementing the distributed operation.
+
+  Raises:
+    ValueError: if any of the input parameter constraints are not met.
+  """
+  if not device.canonical_name(t.device):
+    raise ValueError('Device assignment required for collective ops')
+  if group_size <= 1:
+    raise ValueError('Parameter group_size to all_gather must be at least 2.')
+  dims = t.shape.as_list()
+  output_shape = [dims[0] * group_size] + dims[1:]
+  return gen_collective_ops.collective_gather(t,
+                                              shape=output_shape,
+                                              group_size=group_size,
+                                              group_key=group_key,
+                                              instance_key=instance_key)
+
+
 def broadcast_send(t, shape, dtype, group_size, group_key, instance_key):
   """Broadcasts one tensor to a group of others, across devices.
 

diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py
index 9c5a39b..c9b376c 100644
--- a/tensorflow/python/ops/collective_ops_test.py
+++ b/tensorflow/python/ops/collective_ops_test.py

@@ -4,7 +4,7 @@
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -25,8 +25,6 @@
 from tensorflow.python.ops import collective_ops
 from tensorflow.python.platform import test
 
-# TODO(tucker): Make these ops work in eager mode. b/79776476
-
 
 class CollectiveOpTest(test.TestCase):
 
@@ -114,6 +112,42 @@
   def testCollectiveBroadcast(self):
     self._testCollectiveBroadcast([0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1])
 
+  def _testCollectiveGather(self, t0, t1, expected, set_graph_key):
+    group_key = 1
+    instance_key = 1
+    with self.session(
+        config=config_pb2.ConfigProto(device_count={'CPU': 2})) as sess:
+      with ops.device('/CPU:0'):
+        in0 = constant_op.constant(t0)
+        colred0 = collective_ops.all_gather(in0, 2, group_key, instance_key)
+      with ops.device('/CPU:1'):
+        in1 = constant_op.constant(t1)
+        colred1 = collective_ops.all_gather(in1, 2, group_key, instance_key)
+      run_options = config_pb2.RunOptions()
+      if set_graph_key:
+        run_options.experimental.collective_graph_key = 1
+      results = sess.run([colred0, colred1], options=run_options)
+    self.assertAllClose(results[0], expected, rtol=1e-5, atol=1e-5)
+    self.assertAllClose(results[1], expected, rtol=1e-5, atol=1e-5)
+
+  @test_util.run_deprecated_v1
+  def testCollectiveGather(self):
+    self._testCollectiveGather([0, 1, 2, 3, 4, 5, 6, 7],
+                               [10, 11, 12, 13, 14, 15, 16, 17],
+                               [0, 1, 2, 3, 4, 5, 6, 7,
+                                10, 11, 12, 13, 14, 15, 16, 17],
+                               True)
+    self._testCollectiveGather([[0, 1, 2, 3], [4, 5, 6, 7]],
+                               [[10, 11, 12, 13], [14, 15, 16, 17]],
+                               [[0, 1, 2, 3], [4, 5, 6, 7],
+                                [10, 11, 12, 13], [14, 15, 16, 17]],
+                               True)
+    self._testCollectiveGather([[[0, 1], [2, 3]], [[4, 5], [6, 7]]],
+                               [[[10, 11], [12, 13]], [[14, 15], [16, 17]]],
+                               [[[0, 1], [2, 3]], [[4, 5], [6, 7]],
+                                [[10, 11], [12, 13]], [[14, 15], [16, 17]]],
+                               True)
+
 
 if __name__ == '__main__':
   test.main()

diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index e0b83c4..9726cd9 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py

@@ -3443,6 +3443,12 @@
         if try_to_pack and not isinstance(loop_vars, (list, _basetuple)):
           packed = True
           loop_vars = (loop_vars,)
+
+      def convert(x):
+        if isinstance(x, tensor_array_ops.TensorArray):
+          return x
+        return ops.convert_to_tensor(x)
+      loop_vars = nest.map_structure(convert, loop_vars)
       if maximum_iterations is not None:
         return loop_vars[1]
       else:

diff --git a/tensorflow/python/ops/critical_section_ops.py b/tensorflow/python/ops/critical_section_ops.py
new file mode 100644
index 0000000..21872ff
--- /dev/null
+++ b/tensorflow/python/ops/critical_section_ops.py

@@ -0,0 +1,453 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Critical Section object and execution logic."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+# TODO(ebrevdo): Re-enable once CriticalSection is in core.
+# from tensorflow.core.protobuf import critical_section_pb2
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_resource_variable_ops
+from tensorflow.python.ops import tensor_array_ops
+from tensorflow.python.util import nest
+from tensorflow.python.util.tf_export import tf_export
+
+
+__all__ = ["CriticalSection"]
+
+
+# Graph Keys
+CRITICAL_SECTIONS = "critical_sections"
+CRITICAL_SECTION_EXECUTIONS = "critical_section_executions"
+
+
+class _ExecutionSignature(
+    collections.namedtuple("_ExecutionSignature",
+                           ("op", "handle",
+                            "resources", "exclusive_resource_access"))):
+  """A class storing an `ExecuteInCriticalResource` op and associated attrs."""
+  pass
+
+
+def _identity(x):
+  """Identity op that recognizes `TensorArray`, `Operation`, and `Tensor`."""
+  if isinstance(x, tensor_array_ops.TensorArray):
+    return x.identity()
+  elif isinstance(x, ops.Operation):
+    return control_flow_ops.group(x)
+  elif context.executing_eagerly() and x is None:
+    return None
+  else:
+    return array_ops.identity(x)
+
+
+def _get_colocation(op):
+  """Get colocation symbol from op, if any."""
+  try:
+    return op.get_attr("_class")
+  except ValueError:
+    return None
+
+
+@tf_export("CriticalSection")
+class CriticalSection(object):
+  """Critical section.
+
+  A `CriticalSection` object is a resource in the graph which executes subgraphs
+  in **serial** order.  A common example of a subgraph one may wish to run
+  exclusively is the one given by the following function:
+
+  ```python
+  v = resource_variable_ops.ResourceVariable(0.0, name="v")
+
+  def count():
+    value = v.read_value()
+    with tf.control_dependencies([value]):
+      with tf.control_dependencies([v.assign_add(1)]):
+        return tf.identity(value)
+  ```
+
+  Here, a snapshot of `v` is captured in `value`; and then `v` is updated.
+  The snapshot value is returned.
+
+  If multiple workers or threads all execute `count` in parallel, there is no
+  guarantee that access to the variable `v` is atomic at any point within
+  any thread's calculation of `count`.  In fact, even implementing an atomic
+  counter that guarantees that the user will see each value `0, 1, ...,` is
+  currently impossible.
+
+  The solution is to ensure any access to the underlying resource `v` is
+  only processed through a critical section:
+
+  ```python
+  cs = CriticalSection()
+  f1 = cs.execute(count)
+  f2 = cs.execute(count)
+  output = f1 + f2
+  session.run(output)
+  ```
+  The functions `f1` and `f2` will be executed serially, and updates to `v`
+  will be atomic.
+
+  **NOTES**
+
+  All resource objects, including the critical section and any captured
+  variables of functions executed on that critical section, will be
+  colocated to the same device (host and cpu/gpu).
+
+  When using multiple critical sections on the same resources, there is no
+  guarantee of exclusive access to those resources.  This behavior is disallowed
+  by default (but see the kwarg `exclusive_resource_access`).
+
+  For example, running the same function in two separate critical sections
+  will not ensure serial execution:
+
+  ```python
+  v = tf.get_variable("v", initializer=0.0, use_resource=True)
+  def accumulate(up):
+    x = v.read_value()
+    with tf.control_dependencies([x]):
+      with tf.control_dependencies([v.assign_add(up)]):
+        return tf.identity(x)
+  ex1 = CriticalSection().execute(
+    accumulate, 1.0, exclusive_resource_access=False)
+  ex2 = CriticalSection().execute(
+    accumulate, 1.0, exclusive_resource_access=False)
+  bad_sum = ex1 + ex2
+  sess.run(v.initializer)
+  sess.run(bad_sum)  # May return 0.0
+  ```
+  """
+
+  def __init__(self, name=None, shared_name=None,
+               critical_section_def=None, import_scope=None):
+    """Creates a critical section."""
+    if critical_section_def and name is not None:
+      raise ValueError("critical_section_def and shared_name are "
+                       "mutually exclusive.")
+    if critical_section_def:
+      self._init_from_proto(critical_section_def, import_scope=import_scope)
+    else:
+      self._init_from_args(name, shared_name)
+
+  def _init_from_proto(self, critical_section_def, import_scope):  # pylint: disable=invalid-name
+    raise NotImplementedError("Not yet implemented")
+    # TODO(ebrevdo): Re-enable once CriticalSection is in core.
+    # assert isinstance(
+    #     critical_section_def, critical_section_pb2.CriticalSectionDef)
+    # # Create from critical_section_def.
+    # g = ops.get_default_graph()
+    # self._handle = g.as_graph_element(
+    #     ops.prepend_name_scope(
+    #         critical_section_def.critical_section_name,
+    #         import_scope=import_scope))
+
+  def _init_from_args(self, name, shared_name):  # pylint: disable=invalid-name
+    """Initialize the CriticalSection from constructor arguments."""
+    with ops.name_scope(name, "CriticalSection", []) as name:
+      with ops.init_scope():
+        # pylint: disable=protected-access
+        container = ops.get_default_graph()._container
+        # pylint: enable=protected-access
+        if shared_name is None:
+          shared_name = name
+        if container is None:
+          container = ""
+        self._handle = gen_resource_variable_ops.mutex_v2(
+            shared_name=shared_name, container=container, name=name)
+
+    if not context.executing_eagerly():
+      ops.add_to_collections(CRITICAL_SECTIONS, self)
+
+  @property
+  def name(self):
+    return self._handle.op.name
+
+  def execute(self, fn, exclusive_resource_access=True, name=None):
+    """Execute function `fn()` inside the critical section.
+
+    `fn` should not accept any arguments.  To add extra arguments to when
+    calling `fn` in the critical section, create a lambda:
+
+    ```python
+    critical_section.execute(lambda: fn(*my_args, **my_kwargs))
+    ```
+
+    Args:
+      fn: The function to execute.  Must return at least one tensor.
+      exclusive_resource_access: Whether the resources required by
+        `fn` should be exclusive to this `CriticalSection`.  Default: `True`.
+        You may want to set this to `False` if you will be accessing a
+        resource in read-only mode in two different CriticalSections.
+      name: The name to use when creating the execute operation.
+
+    Returns:
+      The tensors returned from `fn()`.
+
+    Raises:
+      ValueError: If `fn` attempts to lock this `CriticalSection` in any nested
+        or lazy way that may cause a deadlock.
+      ValueError: If `exclusive_resource_access == True` and
+        another `CriticalSection` has an execution requesting the same
+        resources as `fn``.  Note, even if `exclusive_resource_access` is
+        `True`, if another execution in another `CriticalSection` was created
+        without `exclusive_resource_access=True`, a `ValueError` will be raised.
+    """
+    with ops.name_scope(name, "critical_section_execute", []):
+
+      # Ensure that mutex locking only happens *after* all args and
+      # kwargs have been executed.  This avoids certain types of deadlocks.
+      lock = gen_resource_variable_ops.mutex_lock(self._handle)
+
+      if not context.executing_eagerly():
+        # NOTE(ebrevdo): This is to ensure we don't pick up spurious
+        # Operations created by other threads.
+        with ops.get_default_graph()._lock:  # pylint: disable=protected-access
+          existing_ops = ops.get_default_graph().get_operations()
+          with ops.control_dependencies([lock]):
+            r = fn()
+          # TODO(ebrevdo): If creating critical sections in a python loop, this
+          # makes graph creation time quadratic.  Revisit if this
+          # becomes a problem.
+          created_ops = (set(ops.get_default_graph().get_operations())
+                         .difference(existing_ops))
+      else:
+        with ops.control_dependencies([lock]):
+          r = fn()
+
+      if not context.executing_eagerly():
+        self._add_control_dependencies_to_lock(created_ops, lock.op)
+
+        # captured_resources is a list of resources that are directly
+        # accessed only by ops created during fn(), not by any
+        # ancestors of those ops in the graph.
+        captured_resources = set([
+            input_ for op in created_ops
+            for input_ in op.inputs
+            if input_.dtype == dtypes.resource
+        ])
+
+        # NOTE(ebrevdo): The only time self._is_self_handle() is True
+        # in this call is if one of the recently created ops, within
+        # the execute(), themselves attempt to access the
+        # CriticalSection.  This will cause a deadlock.
+        if any(self._is_self_handle(x) for x in captured_resources):
+          raise ValueError("The function fn attempts to directly access the "
+                           "CriticalSection in which it would be running.  "
+                           "This is illegal and would cause deadlocks.")
+
+        self._check_multiple_access_to_resources(
+            captured_resources, exclusive_resource_access)
+
+      r_flat = [_identity(x) for x in nest.flatten(r)]
+
+      with ops.control_dependencies(r_flat):
+        # The identity must run on the same machine as self._handle
+        with ops.colocate_with(self._handle):
+          # Do not use array_ops.identity as there are special
+          # optimizations within TensorFlow which seem to elide it
+          # even when optimizations are disabled(!).
+          ensure_lock_exists = gen_resource_variable_ops.consume_mutex_lock(
+              lock)
+
+        # Make sure that if any element of r is accessed, all of
+        # them are executed together.
+        r = nest.pack_sequence_as(r, control_flow_ops.tuple(nest.flatten(r)))
+
+      with ops.control_dependencies([ensure_lock_exists]):
+        outputs = nest.map_structure(_identity, r)
+
+      if not context.executing_eagerly():
+        signature = _ExecutionSignature(
+            op=lock.op,
+            handle=self._handle,
+            resources=list(captured_resources),
+            exclusive_resource_access=exclusive_resource_access)
+        ops.add_to_collections(
+            CRITICAL_SECTION_EXECUTIONS, signature)
+
+      return outputs
+
+  def _add_control_dependencies_to_lock(self, created_ops, lock_op):
+    """To avoid deadlocks, all args must be executed before lock_op."""
+    # Get all arguments (explicit and captured) of all ops created by fn().
+    all_args = set([input_.op for op in created_ops for input_ in op.inputs])
+    all_args.update(
+        input_op for op in created_ops for input_op in op.control_inputs)
+    # Unfortunately, we can't use sets throughout because TF seems to
+    # create new Operation objects for the same op sometimes; and we
+    # can't rely on id(op).
+
+    # pylint: disable=protected-access
+    all_args_dict = dict((op._id, op) for op in all_args)
+
+    # Remove ops created within fn, or that lock_op already has a
+    # control dependency on.  Also remove a possible self-loop.
+    for op in created_ops:
+      all_args_dict.pop(op._id, None)
+    for op in lock_op.control_inputs:
+      all_args_dict.pop(op._id, None)
+    for input_ in lock_op.inputs:
+      all_args_dict.pop(input_.op._id, None)
+    all_args_dict.pop(lock_op._id, None)
+
+    all_args = all_args_dict.values()
+
+    if not all_args:
+      # No control dependencies to add; return early.
+      return
+
+    # This group is important: it ensures that any ops in all_args
+    # outside the control context of the lock_op (and this fn, which
+    # runs in the same context) are added to this context before
+    # being added to the control dependencies of lock_op.
+    all_args = control_flow_ops.group(*all_args)
+
+    lock_op._add_control_input(all_args)
+    # pylint: enable=protected-access
+
+  def _is_self_handle(self, x):
+    """Check if the tensor `x` is the same Mutex as `self._handle`."""
+    if isinstance(x, ops.EagerTensor):
+      return x is self._handle
+    return (x.op.type == "MutexV2"
+            # blank shared_name means the op will create a unique one.
+            and x.op.get_attr("shared_name")
+            and (x.op.get_attr("shared_name") ==
+                 self._handle.op.get_attr("shared_name"))
+            and (x.op.device == self._handle.op.device
+                 or _get_colocation(x.op) == _get_colocation(self._handle.op)))
+
+  def _check_multiple_access_to_resources(
+      self, captured_resources, exclusive_resource_access):
+    """Raise if captured_resources are accessed by another CriticalSection.
+
+    Args:
+      captured_resources: Set of tensors of type resource.
+      exclusive_resource_access: Whether this execution requires exclusive
+        resource access.
+
+    Raises:
+      ValueError: If any tensors in `captured_resources` are also accessed
+        by another `CriticalSection`, and at least one of them requires
+        exclusive resource access.
+    """
+    # Collections and op introspection does not work in eager
+    # mode.  This is generally ok; since eager mode (as of
+    # writing) executes sequentially anyway.
+    for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS):
+      if self._is_self_handle(sg.handle):
+        # Other executions in the same critical section are allowed.
+        continue
+      if not (exclusive_resource_access or sg.exclusive_resource_access):
+        # Neither execution requested exclusive access.
+        continue
+      resource_intersection = captured_resources.intersection(sg.resources)
+      if resource_intersection:
+        raise ValueError(
+            "This execution would access resources: %s.  Either this "
+            "lock (CriticalSection: %s) or lock '%s' "
+            "(CriticalSection: %s) requested exclusive resource access "
+            "of this resource.  Did you mean to call execute with keyword "
+            "argument exclusive_resource_access=False?" %
+            (list(resource_intersection), self._handle, sg, sg.handle))
+
+  # TODO(ebrevdo): Re-enable once CriticalSection is in core.
+
+  # def to_proto(self, export_scope=None):
+  #   """Converts a `CriticalSection` to a `CriticalSectoinDef` protocol buffer.
+
+  #   Args:
+  #     export_scope: Optional `string`. Name scope to remove.
+
+  #   Returns:
+  #     A `CriticalSectionDef` protocol buffer, or `None` if the
+  #     `CriticalSection` is not in the specified name scope.
+  #   """
+  #   if export_scope is None or self.handle.name.startswith(export_scope):
+  #     cs_def = critical_section_pb2.CriticalSectionDef()
+  #     cs_def.critical_section_name = ops.strip_name_scope(
+  #         self._handle.name, export_scope)
+  #     return cs_def
+  #   else:
+  #     return None
+
+  # @staticmethod
+  # def from_proto(critical_section_def, import_scope=None):
+  #   return CriticalSection(
+  #       critical_section_def=critical_section_def, import_scope=import_scope)
+
+
+# TODO(ebrevdo): Re-enable once CriticalSection is in core.
+
+# def _execution_to_proto_fn(execution_signature, export_scope=None):
+#   """Converts `_ExecutionSignature` to a `CriticalSectionExecutionDef`.
+#   # TODO(ebrevdo): Update for _ExecutionSignature storing resource list.
+
+#   Args:
+#     execution_signature: Instance of `_ExecutionSignature`.
+#     export_scope: The export scope, if any.
+
+#   Returns:
+#     An instance of `CriticalSectionExecutionDef`.
+#   """
+#   if (export_scope is None
+#       or execution_signature.op.name.startswith(export_scope)):
+#     op_def = critical_section_pb2.CriticalSectionExecutionDef()
+#     op_def.execute_in_critical_section_name = ops.strip_name_scope(
+#         execution_signature.op.name, export_scope)
+#     op_def.exclusive_resource_access = (
+#         execution_signature.exclusive_resource_access)
+#     return op_def
+#   else:
+#     return None
+
+
+# def _execution_from_proto_fn(op_def, import_scope=None):
+#   """Converts a `CriticalSectionExecutionDef` to a `_ExecutionSignature`."""
+#   # TODO(ebrevdo): Update for _ExecutionSignature storing resource list.
+#   assert isinstance(
+#       op_def, critical_section_pb2.CriticalSectionExecutionDef)
+
+#   # Create from op_def.
+#   g = ops.get_default_graph()
+#   execution_op = g.as_graph_element(
+#       ops.prepend_name_scope(
+#           op_def.execute_in_critical_section_name,
+#           import_scope=import_scope))
+#   return _ExecutionSignature(
+#       op=execution_op,
+#       exclusive_resource_access=op_def.exclusive_resource_access)
+
+# ops.register_proto_function(
+#     CRITICAL_SECTIONS,
+#     proto_type=critical_section_pb2.CriticalSectionDef,
+#     to_proto=CriticalSection.to_proto,
+#     from_proto=CriticalSection.from_proto)
+
+# ops.register_proto_function(
+#     CRITICAL_SECTION_EXECUTIONS,
+#     proto_type=critical_section_pb2.CriticalSectionExecutionDef,
+#     to_proto=_execution_to_proto_fn,
+#     from_proto=_execution_from_proto_fn)

diff --git a/tensorflow/python/ops/image_grad_test.py b/tensorflow/python/ops/image_grad_test.py
index c481266..f363f1b 100644
--- a/tensorflow/python/ops/image_grad_test.py
+++ b/tensorflow/python/ops/image_grad_test.py

@@ -28,6 +28,7 @@
 from tensorflow.python.platform import test
 
 
+@test_util.disable_all_xla("b/124289666")  # align_corners=False unimplemented
 class ResizeNearestNeighborOpTest(test.TestCase):
 
   TYPES = [np.float32, np.float64]
@@ -149,6 +150,7 @@
     self.assertLess(err, 1e-3)
 
   @test_util.run_deprecated_v1
+  @test_util.disable_xla("b/124290659")  # align_corners=False unimplemented
   def testCompareGpuVsCpu(self):
     in_shape = [2, 4, 6, 3]
     out_shape = [2, 8, 16, 3]

diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index b032d64..490e80e 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py

@@ -2399,6 +2399,7 @@
         self.assertAllEqual(img_shape, newshape)
         self.assertAllClose(resized, img_np, atol=1e-5)
 
+  @test_util.disable_xla("b/124289666")  # align_corners=False unimplemented
   def testResizeDown(self):
     # This test is also conducted with int8, so 127 is the maximum
     # value that can be used.
@@ -2430,6 +2431,7 @@
               resized = self.evaluate(y)
               self.assertAllClose(resized, expected, atol=1e-5)
 
+  @test_util.disable_xla("b/124289666")  # align_corners=False unimplemented
   def testResizeUpAlignCornersFalse(self):
     img_shape = [1, 3, 2, 1]
     data = [64, 32, 32, 64, 50, 100]
@@ -2553,6 +2555,7 @@
       resized = self.evaluate(y)
       self.assertAllClose(resized, expected, atol=1)
 
+  @test_util.disable_xla("b/124289666")  # align_corners=False unimplemented
   def testCompareNearestNeighbor(self):
     if test.is_gpu_available():
       input_shape = [1, 5, 6, 3]
@@ -3612,7 +3615,8 @@
     # If we negate all pixel-values then the total variation is unchanged.
     self._test(-a, tot_var)
 
-    # Scale the pixel-values by a float. This scales the total variation as well.
+    # Scale the pixel-values by a float. This scales the total variation as
+    # well.
     b = 1.1 * a
     self._test(b, 1.1 * tot_var)
 

diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index a4cebc8..035534e 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py

@@ -38,6 +38,7 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import linalg_ops_impl
@@ -531,7 +532,7 @@
     else:
       scale /= max(1., (fan_in + fan_out) / 2.)
     if self.distribution == "normal" or self.distribution == "truncated_normal":
-    # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
+      # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
       stddev = math.sqrt(scale) / .87962566103423978
       return random_ops.truncated_normal(
           shape, 0.0, stddev, dtype, seed=self.seed)
@@ -605,7 +606,8 @@
     num_rows = 1
     for dim in shape[:-1]:
       num_rows *= dim
-    num_cols = shape[-1]
+    num_rows = int(num_rows)
+    num_cols = int(shape[-1])
     if num_rows < num_cols:
       flat_shape = (num_cols, num_rows)
     else:
@@ -1207,6 +1209,8 @@
           "Identity matrix initializer can only be used for 2D matrices.")
     if dtype is None:
       dtype = self.dtype
+    if isinstance(full_shape, tensor_shape.TensorShape):
+      full_shape = full_shape.as_list()
     initializer = linalg_ops_impl.eye(*full_shape, dtype=dtype)
     if partition_info is not None:
       initializer = array_ops.slice(initializer, partition_info.var_offset,
@@ -1263,9 +1267,10 @@
   """The Glorot normal initializer, also called Xavier normal initializer.
 
   It draws samples from a truncated normal distribution centered on 0
-  with `stddev = sqrt(2 / (fan_in + fan_out))`
-  where `fan_in` is the number of input units in the weight tensor
-  and `fan_out` is the number of output units in the weight tensor.
+  with standard deviation (after truncation) given by
+  `stddev = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number
+  of input units in the weight tensor and `fan_out` is the number of
+  output units in the weight tensor.
 
   Args:
     seed: A Python integer. Used to create random seeds. See
@@ -1321,8 +1326,9 @@
   """LeCun normal initializer.
 
   It draws samples from a truncated normal distribution centered on 0
-  with `stddev = sqrt(1 / fan_in)`
-  where `fan_in` is the number of input units in the weight tensor.
+  with standard deviation (after truncation) given by
+  `stddev = sqrt(1 / fan_in)` where `fan_in` is the number of
+  input units in the weight tensor.
 
   Arguments:
       seed: A Python integer. Used to seed the random generator.
@@ -1371,8 +1377,9 @@
   """He normal initializer.
 
   It draws samples from a truncated normal distribution centered on 0
-  with `stddev = sqrt(2 / fan_in)`
-  where `fan_in` is the number of input units in the weight tensor.
+  with standard deviation (after truncation) given by
+  `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of
+  input units in the weight tensor.
 
   Arguments:
       seed: A Python integer. Used to seed the random generator.
@@ -1422,7 +1429,7 @@
     shape: Integer shape tuple or TF tensor shape.
 
   Returns:
-    A tuple of scalars (fan_in, fan_out).
+    A tuple of integer scalars (fan_in, fan_out).
   """
   if len(shape) < 1:  # Just to avoid errors for constants.
     fan_in = fan_out = 1
@@ -1434,12 +1441,12 @@
   else:
     # Assuming convolution kernels (2D, 3D, or more).
     # kernel shape: (..., input_depth, depth)
-    receptive_field_size = 1.
+    receptive_field_size = 1
     for dim in shape[:-2]:
       receptive_field_size *= dim
     fan_in = shape[-2] * receptive_field_size
     fan_out = shape[-1] * receptive_field_size
-  return fan_in, fan_out
+  return int(fan_in), int(fan_out)
 
 
 def _assert_float_dtype(dtype):

diff --git a/tensorflow/python/ops/init_ops_test.py b/tensorflow/python/ops/init_ops_test.py
index b3cdec9d..1205f36 100644
--- a/tensorflow/python/ops/init_ops_test.py
+++ b/tensorflow/python/ops/init_ops_test.py

@@ -24,6 +24,7 @@
 from tensorflow.python.client import session
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape as tensor_shape_lib
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import variable_scope
@@ -54,114 +55,126 @@
       self.assertGreater(lim, abs(output.min() - target_min))
 
   def test_uniform(self):
-    tensor_shape = (9, 6, 7)
+    shape = (9, 6, 99)
     with self.cached_session():
-      self._runner(
-          init_ops.RandomUniform(minval=-1, maxval=1, seed=124),
-          tensor_shape,
-          target_mean=0.,
-          target_max=1,
-          target_min=-1)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        self._runner(
+            init_ops.RandomUniform(minval=-1, maxval=1, seed=124),
+            tensor_shape,
+            target_mean=0.,
+            target_max=1,
+            target_min=-1)
 
   def test_normal(self):
-    tensor_shape = (8, 12, 99)
+    shape = (8, 12, 99)
     with self.cached_session():
-      self._runner(
-          init_ops.RandomNormal(mean=0, stddev=1, seed=153),
-          tensor_shape,
-          target_mean=0.,
-          target_std=1)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        self._runner(
+            init_ops.RandomNormal(mean=0, stddev=1, seed=153),
+            tensor_shape,
+            target_mean=0.,
+            target_std=1)
 
   def test_truncated_normal(self):
-    tensor_shape = (12, 99, 7)
+    shape = (12, 99, 7)
     with self.cached_session():
-      self._runner(
-          init_ops.TruncatedNormal(mean=0, stddev=1, seed=126),
-          tensor_shape,
-          target_mean=0.,
-          target_max=2,
-          target_min=-2)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        self._runner(
+            init_ops.TruncatedNormal(mean=0, stddev=1, seed=126),
+            tensor_shape,
+            target_mean=0.,
+            target_max=2,
+            target_min=-2)
 
   def test_constant(self):
-    tensor_shape = (5, 6, 4)
+    shape = (5, 6, 4)
     with self.cached_session():
-      self._runner(
-          init_ops.Constant(2),
-          tensor_shape,
-          target_mean=2,
-          target_max=2,
-          target_min=2)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        self._runner(
+            init_ops.Constant(2),
+            tensor_shape,
+            target_mean=2,
+            target_max=2,
+            target_min=2)
 
   def test_lecun_uniform(self):
-    tensor_shape = (5, 6, 4, 2)
+    shape = (5, 6, 4, 2)
     with self.cached_session():
-      fan_in, _ = init_ops._compute_fans(tensor_shape)
-      std = np.sqrt(1. / fan_in)
-      self._runner(
-          init_ops.lecun_uniform(seed=123),
-          tensor_shape,
-          target_mean=0.,
-          target_std=std)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        fan_in, _ = init_ops._compute_fans(tensor_shape)
+        std = np.sqrt(1. / fan_in)
+        self._runner(
+            init_ops.lecun_uniform(seed=123),
+            tensor_shape,
+            target_mean=0.,
+            target_std=std)
 
   def test_glorot_uniform_initializer(self):
-    tensor_shape = (5, 6, 4, 2)
+    shape = (5, 6, 4, 2)
     with self.cached_session():
-      fan_in, fan_out = init_ops._compute_fans(tensor_shape)
-      std = np.sqrt(2. / (fan_in + fan_out))
-      self._runner(
-          init_ops.glorot_uniform_initializer(seed=123),
-          tensor_shape,
-          target_mean=0.,
-          target_std=std)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        fan_in, fan_out = init_ops._compute_fans(tensor_shape)
+        std = np.sqrt(2. / (fan_in + fan_out))
+        self._runner(
+            init_ops.glorot_uniform_initializer(seed=123),
+            tensor_shape,
+            target_mean=0.,
+            target_std=std)
 
   def test_he_uniform(self):
-    tensor_shape = (5, 6, 4, 2)
+    shape = (5, 6, 4, 2)
     with self.cached_session():
-      fan_in, _ = init_ops._compute_fans(tensor_shape)
-      std = np.sqrt(2. / fan_in)
-      self._runner(
-          init_ops.he_uniform(seed=123),
-          tensor_shape,
-          target_mean=0.,
-          target_std=std)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        fan_in, _ = init_ops._compute_fans(tensor_shape)
+        std = np.sqrt(2. / fan_in)
+        self._runner(
+            init_ops.he_uniform(seed=123),
+            tensor_shape,
+            target_mean=0.,
+            target_std=std)
 
   def test_lecun_normal(self):
-    tensor_shape = (5, 6, 4, 2)
+    shape = (5, 6, 4, 2)
     with self.cached_session():
-      fan_in, _ = init_ops._compute_fans(tensor_shape)
-      std = np.sqrt(1. / fan_in)
-      self._runner(
-          init_ops.lecun_normal(seed=123),
-          tensor_shape,
-          target_mean=0.,
-          target_std=std)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        fan_in, _ = init_ops._compute_fans(tensor_shape)
+        std = np.sqrt(1. / fan_in)
+        self._runner(
+            init_ops.lecun_normal(seed=123),
+            tensor_shape,
+            target_mean=0.,
+            target_std=std)
 
   def test_glorot_normal_initializer(self):
-    tensor_shape = (5, 6, 4, 2)
+    shape = (5, 6, 4, 2)
     with self.cached_session():
-      fan_in, fan_out = init_ops._compute_fans(tensor_shape)
-      std = np.sqrt(2. / (fan_in + fan_out))
-      self._runner(
-          init_ops.glorot_normal_initializer(seed=123),
-          tensor_shape,
-          target_mean=0.,
-          target_std=std)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        fan_in, fan_out = init_ops._compute_fans(tensor_shape)
+        std = np.sqrt(2. / (fan_in + fan_out))
+        self._runner(
+            init_ops.glorot_normal_initializer(seed=123),
+            tensor_shape,
+            target_mean=0.,
+            target_std=std)
 
   def test_he_normal(self):
-    tensor_shape = (5, 6, 4, 2)
+    shape = (5, 6, 4, 2)
     with self.cached_session():
-      fan_in, _ = init_ops._compute_fans(tensor_shape)
-      std = np.sqrt(2. / fan_in)
-      self._runner(
-          init_ops.he_normal(seed=123),
-          tensor_shape,
-          target_mean=0.,
-          target_std=std)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        fan_in, _ = init_ops._compute_fans(tensor_shape)
+        std = np.sqrt(2. / fan_in)
+        self._runner(
+            init_ops.he_normal(seed=123),
+            tensor_shape,
+            target_mean=0.,
+            target_std=std)
 
   def test_Orthogonal(self):
-    tensor_shape = (20, 20)
+    shape = (20, 20)
     with self.cached_session():
-      self._runner(init_ops.Orthogonal(seed=123), tensor_shape, target_mean=0.)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        self._runner(
+            init_ops.Orthogonal(seed=123), tensor_shape, target_mean=0.)
 
   def testVariablePlacementWithOrthogonalInitializer(self):
     if not context.context().num_gpus():
@@ -199,31 +212,36 @@
 
   def test_Identity(self):
     with self.cached_session():
-      tensor_shape = (3, 4, 5)
-      with self.assertRaises(ValueError):
+      shape = (3, 4, 5)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        with self.assertRaises(ValueError):
+          self._runner(
+              init_ops.Identity(),
+              tensor_shape,
+              target_mean=1. / int(tensor_shape[0]),
+              target_max=1.)
+
+      shape = (3, 3)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
         self._runner(
             init_ops.Identity(),
             tensor_shape,
-            target_mean=1. / tensor_shape[0],
+            target_mean=1. / int(tensor_shape[0]),
             target_max=1.)
 
-      tensor_shape = (3, 3)
-      self._runner(
-          init_ops.Identity(),
-          tensor_shape,
-          target_mean=1. / tensor_shape[0],
-          target_max=1.)
-
   def test_Zeros(self):
-    tensor_shape = (4, 5)
+    shape = (4, 5)
     with self.cached_session():
-      self._runner(
-          init_ops.Zeros(), tensor_shape, target_mean=0., target_max=0.)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        self._runner(
+            init_ops.Zeros(), tensor_shape, target_mean=0., target_max=0.)
 
   def test_Ones(self):
-    tensor_shape = (4, 5)
+    shape = (4, 5)
     with self.cached_session():
-      self._runner(init_ops.Ones(), tensor_shape, target_mean=1., target_max=1.)
+      for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]:
+        self._runner(
+            init_ops.Ones(), tensor_shape, target_mean=1., target_max=1.)
 
 
 if __name__ == '__main__':

diff --git a/tensorflow/python/ops/linalg/linalg_impl.py b/tensorflow/python/ops/linalg/linalg_impl.py
index 2259eaa..fec2b27 100644
--- a/tensorflow/python/ops/linalg/linalg_impl.py
+++ b/tensorflow/python/ops/linalg/linalg_impl.py

@@ -21,6 +21,7 @@
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_linalg_ops
@@ -329,3 +330,189 @@
           result,
           array_ops.concat((batch_shape, array_ops.shape(result)[-2:]), axis=0))
     return array_ops.reshape(result, batch_shape.concatenate(result.shape[-2:]))
+
+
+@tf_export('linalg.tridiagonal_solve')
+def tridiagonal_solve(diagonals,
+                      rhs,
+                      diagonals_format='compact',
+                      transpose_rhs=False,
+                      conjugate_rhs=False,
+                      name=None):
+  r"""Solves tridiagonal systems of equations.
+
+  Solution is computed via Gaussian elemination with partial pivoting.
+
+  The input can be supplied in various formats: `matrix`, `tuple` and `compact`,
+  specified by the `diagonals_format` arg.
+
+  In `matrix` format, `diagonals` must be a tensor of shape `[..., M, M]`, with
+  two inner-most dimensions representing the square tridiagonal matrices.
+  Elements outside of the three diagonals will be ignored.
+
+  In `sequence` format, `diagonals` are supplied as a tuple or list of three
+  tensors of shapes `[..., N]`, `[..., M]`, `[..., N]` representing
+  superdiagonals, diagonals, and subdiagonals, respectively. `N` can be either
+  `M-1` or `M`; in the latter case, the last element of superdiagonal and the
+  first element of subdiagonal will be ignored.
+
+  In `compact` format the three diagonals are brought together into one tensor
+  of shape `[..., 3, M]`, with last two dimensions containing superdiagonals,
+  diagonals, and subdiagonals, in order. Similarly to `sequence` format,
+  elements `diagonals[..., 0, M-1]` and `diagonals[..., 2, 0]` are ignored.
+
+  The `compact` format is recommended as the one with best performance. In case
+  you need to cast a tensor into a compact format manually, use `tf.gather_nd`.
+  An example for a tensor of shape [m, m]:
+
+  ```python
+  rhs = tf.constant([...])
+  matrix = tf.constant([[...]])
+  m = matrix.shape[0]
+  dummy_idx = [0, 0]  # An arbitrary element to use as a dummy
+  indices = [[[i, i + 1] for i in range(m - 1)] + [dummy_idx],  # Superdiagonal
+           [[i, i] for i in range(m)],                          # Diagonal
+           [dummy_idx] + [[i + 1, i] for i in range(m - 1)]]    # Subdiagonal
+  diagonals=tf.gather_nd(matrix, indices)
+  x = tf.linalg.tridiagonal_solve(diagonals, rhs)
+  ```
+
+  Regardless of the `diagonals_format`, `rhs` is a tensor of shape `[..., M]` or
+  `[..., M, K]`. The latter allows to simultaneously solve K systems with the
+  same left-hand sides and K different right-hand sides. If `transpose_rhs`
+  is set to `True` the expected shape is `[..., M]` or `[..., K, M]`.
+
+  The batch dimensions, denoted as `...`, must be the same in `diagonals` and
+  `rhs`.
+
+  The output is a tensor of the same shape as `rhs`: either `[..., M]` or
+  `[..., M, K]`.
+
+  Args:
+    diagonals: A `Tensor` or tuple of `Tensor`s describing left-hand sides. The
+      shape depends of `diagonals_format`, see description above. Must be
+      `float32`, `float64`, `complex64`, or `complex128`.
+    rhs: A `Tensor` of shape [..., M] or [..., M, K] and with the same dtype as
+      `diagonals`.
+    diagonals_format: one of `matrix`, `sequence`, or `compact`. Default is
+      `compact`.
+    transpose_rhs: If `True`, `rhs` is transposed before solving (has no effect
+      if the shape of rhs is [..., M]).
+    conjugate_rhs: If `True`, `rhs` is conjugated before solving.
+    name:  A name to give this `Op` (optional).
+
+  Returns:
+    A `Tensor` of shape [..., M] or [..., M, K] containing the solutions.
+
+  Raises:
+    ValueError: An unsupported type is provided as input, or when the input
+    tensors have incorrect shapes.
+
+  """
+  if diagonals_format == 'compact':
+    return _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs,
+                                             conjugate_rhs, name)
+
+  if diagonals_format == 'sequence':
+    if not isinstance(diagonals, (tuple, list)) or len(diagonals) != 3:
+      raise ValueError('Expected diagonals to be a sequence of length 3.')
+
+    superdiag, maindiag, subdiag = diagonals
+    if (not subdiag.shape[:-1].is_compatible_with(maindiag.shape[:-1]) or
+        not superdiag.shape[:-1].is_compatible_with(maindiag.shape[:-1])):
+      raise ValueError(
+          'Tensors representing the three diagonals must have the same shape,'
+          'except for the last dimension, got {}, {}, {}'.format(
+              subdiag.shape, maindiag.shape, superdiag.shape))
+
+    m = tensor_shape.dimension_value(maindiag.shape[-1])
+
+    def pad_if_necessary(t, name, last_dim_padding):
+      n = tensor_shape.dimension_value(t.shape[-1])
+      if not n or n == m:
+        return t
+      if n == m - 1:
+        paddings = (
+            [[0, 0] for _ in range(len(t.shape) - 1)] + [last_dim_padding])
+        return array_ops.pad(t, paddings)
+      raise ValueError('Expected {} to be have length {} or {}, got {}.'.format(
+          name, m, m - 1, n))
+
+    subdiag = pad_if_necessary(subdiag, 'subdiagonal', [1, 0])
+    superdiag = pad_if_necessary(superdiag, 'superdiagonal', [0, 1])
+
+    diagonals = array_ops.stack((superdiag, maindiag, subdiag), axis=-2)
+    return _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs,
+                                             conjugate_rhs, name)
+
+  if diagonals_format == 'matrix':
+    m1 = tensor_shape.dimension_value(diagonals.shape[-1])
+    m2 = tensor_shape.dimension_value(diagonals.shape[-2])
+    if m1 and m2 and m1 != m2:
+      raise ValueError(
+          'Expected last two dimensions of diagonals to be same, got {} and {}'
+          .format(m1, m2))
+    m = m1 or m2
+    if not m:
+      raise ValueError('The size of the matrix needs to be known for '
+                       'diagonals_format="matrix"')
+
+    # Extract diagonals; use input[..., 0, 0] as "dummy" m-th elements of sub-
+    # and superdiagonal.
+    # gather_nd slices into first indices, whereas we need to slice into the
+    # last two, so transposing back and forth is necessary.
+    dummy_idx = [0, 0]
+    indices = ([[[1, 0], [0, 0], dummy_idx]] + [
+        [[i + 1, i], [i, i], [i - 1, i]] for i in range(1, m - 1)
+    ] + [[dummy_idx, [m - 1, m - 1], [m - 2, m - 1]]])
+    diagonals = array_ops.transpose(
+        array_ops.gather_nd(array_ops.transpose(diagonals), indices))
+    return _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs,
+                                             conjugate_rhs, name)
+
+  raise ValueError('Unrecognized diagonals_format: {}'.format(diagonals_format))
+
+
+def _tridiagonal_solve_compact_format(diagonals,
+                                      rhs,
+                                      transpose_rhs=False,
+                                      conjugate_rhs=False,
+                                      name=None):
+  """Helper function used after the input has been cast to compact form."""
+  diags_rank, rhs_rank = len(diagonals.shape), len(rhs.shape)
+
+  if diags_rank < 2:
+    raise ValueError(
+        'Expected diagonals to have rank at least 2, got {}'.format(diags_rank))
+  if rhs_rank != diags_rank and rhs_rank != diags_rank - 1:
+    raise ValueError('Expected the rank of rhs to be {} or {}, got {}'.format(
+        diags_rank - 1, diags_rank, rhs_rank))
+  if diagonals.shape[-2] != 3:
+    raise ValueError('Expected 3 diagonals got {}'.format(diagonals.shape[-2]))
+  if not diagonals.shape[:-2].is_compatible_with(rhs.shape[:diags_rank - 2]):
+    raise ValueError('Batch shapes {} and {} are incompatible'.format(
+        diagonals.shape[:-2], rhs.shape[:diags_rank - 2]))
+
+  def check_num_lhs_matches_num_rhs():
+    if diagonals.shape[-1] != rhs.shape[-2]:
+      raise ValueError('Expected number of left-hand sided and right-hand '
+                       'sides to be equal, got {} and {}'.format(
+                           diagonals.shape[-1], rhs.shape[-2]))
+
+  if rhs_rank == diags_rank - 1:
+    # Rhs provided as a vector, ignoring transpose_rhs
+    if conjugate_rhs:
+      rhs = math_ops.conj(rhs)
+    rhs = array_ops.expand_dims(rhs, -1)
+    check_num_lhs_matches_num_rhs()
+    return array_ops.squeeze(
+        linalg_ops.tridiagonal_solve(diagonals, rhs, name), -1)
+
+  if transpose_rhs:
+    rhs = array_ops.matrix_transpose(rhs, conjugate=conjugate_rhs)
+  elif conjugate_rhs:
+    rhs = math_ops.conj(rhs)
+
+  check_num_lhs_matches_num_rhs()
+  result = linalg_ops.tridiagonal_solve(diagonals, rhs, name)
+  return array_ops.matrix_transpose(result) if transpose_rhs else result

diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index e8652ba..6696030 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py

@@ -619,6 +619,8 @@
         result = math_ops.sqrt(
             math_ops.reduce_sum(
                 tensor * math_ops.conj(tensor), axis, keepdims=True))
+        # TODO(rmlarsen): Replace with the following, once gradients are defined
+        # result = math_ops.reduce_euclidean_norm(tensor, axis, keepdims=True)
     else:
       result = math_ops.abs(tensor)
       if ord == 1:

diff --git a/tensorflow/python/ops/list_ops.py b/tensorflow/python/ops/list_ops.py
index 87409eb..ee01ff7 100644
--- a/tensorflow/python/ops/list_ops.py
+++ b/tensorflow/python/ops/list_ops.py

@@ -89,28 +89,44 @@
       name=name)
 
 
-def tensor_list_gather(input_handle, indices, element_dtype, name=None):
+def tensor_list_gather(input_handle,
+                       indices,
+                       element_dtype,
+                       element_shape=None,
+                       name=None):
   return gen_list_ops.tensor_list_gather(
       input_handle=input_handle,
       indices=indices,
-      element_shape=-1,
+      element_shape=_build_element_shape(element_shape),
       element_dtype=element_dtype,
       name=name)
 
 
-def tensor_list_scatter(tensor, indices, element_shape, name=None):
-  return gen_list_ops.tensor_list_scatter_v2(
-      tensor=tensor,
-      indices=indices,
-      element_shape=_build_element_shape(element_shape),
-      num_elements=-1,
-      name=name)
+def tensor_list_scatter(tensor,
+                        indices,
+                        element_shape=None,
+                        input_handle=None,
+                        name=None):
+  if input_handle is not None:
+    return gen_list_ops.tensor_list_scatter_into_existing_list(
+        input_handle=input_handle, tensor=tensor, indices=indices, name=name)
+  else:
+    return gen_list_ops.tensor_list_scatter_v2(
+        tensor=tensor,
+        indices=indices,
+        element_shape=_build_element_shape(element_shape),
+        num_elements=-1,
+        name=name)
 
 
-def tensor_list_stack(input_handle, element_dtype, num_elements=-1, name=None):
+def tensor_list_stack(input_handle,
+                      element_dtype,
+                      num_elements=-1,
+                      element_shape=None,
+                      name=None):
   return gen_list_ops.tensor_list_stack(
       input_handle=input_handle,
-      element_shape=-1,
+      element_shape=_build_element_shape(element_shape),
       element_dtype=element_dtype,
       num_elements=num_elements,
       name=name)
@@ -269,12 +285,12 @@
 def _TensorListGatherGrad(op, dtensor):
   """Gradient function for TensorListGather."""
   input_list, indices, _ = op.inputs
-  dlist = gen_list_ops.tensor_list_scatter_v2(
-      tensor=dtensor,
-      indices=indices,
-      element_shape=gen_list_ops.tensor_list_element_shape(
-          input_list, shape_type=dtypes.int32),
-      num_elements=gen_list_ops.tensor_list_length(input_list))
+  element_shape = gen_list_ops.tensor_list_element_shape(
+      input_list, shape_type=dtypes.int32)
+  num_elements = gen_list_ops.tensor_list_length(input_list)
+  dlist = tensor_list_reserve(element_shape, num_elements, dtensor.dtype)
+  dlist = tensor_list_scatter(
+      tensor=dtensor, indices=indices, input_handle=dlist)
   return dlist, None, None
 
 
@@ -295,6 +311,20 @@
     return dtensor, None, None
 
 
+@ops.RegisterGradient("TensorListScatterIntoExistingList")
+def _TensorListScatterIntoExistingListGrad(op, dlist):
+  """Gradient function for TensorListScatterIntoExistingList."""
+  _, tensor, indices = op.inputs
+  dtensor = gen_list_ops.tensor_list_gather(
+      dlist,
+      indices,
+      element_shape=array_ops.slice(array_ops.shape(tensor), [1], [-1]),
+      element_dtype=tensor.dtype)
+  zeros = array_ops.zeros_like(tensor)
+  dlist = tensor_list_scatter(zeros, indices, indices, input_handle=dlist)
+  return dlist, dtensor, None
+
+
 def _build_element_shape(shape):
   """Converts shape to a format understood by list_ops for element_shape.
 

diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index 127565d..8f200c6 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py

@@ -38,10 +38,10 @@
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_lookup_ops import *
-# pylint: enable=wildcard-import
-from tensorflow.python.training.checkpointable import base as checkpointable_base
-from tensorflow.python.training.checkpointable import tracking as checkpointable
 from tensorflow.python.training.saver import BaseSaverBuilder
+# pylint: enable=wildcard-import
+from tensorflow.python.training.tracking import base as trackable_base
+from tensorflow.python.training.tracking import tracking as trackable
 from tensorflow.python.util import compat
 from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.tf_export import tf_export
@@ -102,7 +102,7 @@
                     (table.value_dtype, value_dtype))
 
 
-class LookupInterface(checkpointable.TrackableResource):
+class LookupInterface(trackable.TrackableResource):
   """Represent a lookup table that persists across different steps."""
 
   def __init__(self, key_dtype, value_dtype):
@@ -165,8 +165,8 @@
     self._default_value = ops.convert_to_tensor(
         default_value, dtype=self._value_dtype)
     self._default_value.get_shape().merge_with(tensor_shape.scalar())
-    if isinstance(initializer, checkpointable_base.Checkpointable):
-      self._initializer = self._track_checkpointable(
+    if isinstance(initializer, trackable_base.Trackable):
+      self._initializer = self._track_trackable(
           initializer, "_initializer")
     self._resource_handle = self.create_resource()
     self._init_op = self.initialize()
@@ -314,7 +314,7 @@
     return exported_keys, exported_values
 
 
-class TableInitializerBase(checkpointable_base.Checkpointable):
+class TableInitializerBase(trackable_base.Trackable):
   """Base class for lookup table initializers."""
 
   def __init__(self, key_dtype, value_dtype):
@@ -543,8 +543,8 @@
     self._vocab_size = vocab_size
     self._delimiter = delimiter
     self._name = name
-    self._filename = self._track_checkpointable(
-        checkpointable.TrackableAsset(filename),
+    self._filename = self._track_trackable(
+        trackable.TrackableAsset(filename),
         "_filename")
 
     super(TextFileInitializer, self).__init__(key_dtype, value_dtype)

diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index 1169c45..6cd1d8e 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py

@@ -35,33 +35,6 @@
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("losses.Reduction", v1=[])
-class ReductionV2(object):
-  """Types of loss reduction.
-
-  Contains the following values:
-
-  * `NONE`: Un-reduced weighted losses with the same shape as input.
-  * `SUM`: Scalar sum of weighted losses.
-  * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
-     Note that when using `tf.distribute.Strategy`, this is the global batch
-     size across all the replicas that are contributing to a single step.
-  """
-
-  NONE = "none"
-  SUM = "sum"
-  SUM_OVER_BATCH_SIZE = "sum_over_batch_size"
-
-  @classmethod
-  def all(cls):
-    return (cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE)
-
-  @classmethod
-  def validate(cls, key):
-    if key not in cls.all():
-      raise ValueError("Invalid Reduction Key %s." % key)
-
-
 @tf_export(v1=["losses.Reduction"])
 class Reduction(object):
   """Types of loss reduction.

diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index dc23409..8f8e3bd 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py

@@ -47,6 +47,10 @@
   return [None, None]
 
 
+# TODO(rmlarsen): Implement gradient.
+ops.NotDifferentiable("EuclideanNorm")
+
+
 @ops.RegisterGradient("Sum")
 def _SumGrad(op, grad):
   """Gradient for Sum."""
@@ -99,7 +103,7 @@
   num_selected = array_ops.reshape(
       math_ops.reduce_sum(indicators, op.inputs[1]), output_shape_kept_dims)
 
-  return [math_ops.div(indicators, num_selected) * grad, None]
+  return [math_ops.divide(indicators, num_selected) * grad, None]
 
 
 @ops.RegisterGradient("Max")
@@ -196,7 +200,7 @@
       array_ops.fill(array_ops.expand_dims(input_rank - 1, 0), 1)
   ], 0)
   ones = array_ops.fill(ones_shape, constant_op.constant(1, dtype=grad.dtype))
-  scaled_grad = math_ops.div(grad, math_ops.segment_sum(ones, op.inputs[1]))
+  scaled_grad = math_ops.divide(grad, math_ops.segment_sum(ones, op.inputs[1]))
   return array_ops.gather(scaled_grad, op.inputs[1]), None
 
 
@@ -260,7 +264,7 @@
                                       op.inputs[1])
   # Compute the gradient for each segment. The gradient for the ith segment is
   # divided evenly among the selected elements in that segment.
-  weighted_grads = math_ops.div(grad, num_selected)
+  weighted_grads = math_ops.divide(grad, num_selected)
   gathered_grads = array_ops.gather(weighted_grads, op.inputs[1])
   return array_ops.where(is_selected, gathered_grads, zeros), None
 
@@ -314,7 +318,7 @@
       math_ops.cast(is_selected, grad.dtype), op.inputs[1], op.inputs[2])
   # Compute the gradient for each segment. The gradient for the ith segment is
   # divided evenly among the selected elements in that segment.
-  weighted_grads = math_ops.div(grad, num_selected)
+  weighted_grads = math_ops.divide(grad, num_selected)
   gathered_grads, _, _ = _GatherDropNegatives(weighted_grads, None,
                                               zero_clipped_indices,
                                               is_positive)
@@ -956,10 +960,11 @@
   rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
   x = math_ops.conj(x)
   y = math_ops.conj(y)
-  return (array_ops.reshape(math_ops.reduce_sum(math_ops.div(grad, y), rx), sx),
+  return (array_ops.reshape(
+      math_ops.reduce_sum(math_ops.divide(grad, y), rx), sx),
           array_ops.reshape(
-              math_ops.reduce_sum(grad * math_ops.div(math_ops.div(-x, y), y),
-                                  ry), sy))
+              math_ops.reduce_sum(
+                  grad * math_ops.divide(math_ops.divide(-x, y), y), ry), sy))
 
 
 @ops.RegisterGradient("FloorDiv")

diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 7306a45..77a06f8 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py

@@ -1386,6 +1386,47 @@
           name=name))
 
 
+@tf_export("math.reduce_euclidean_norm")
+def reduce_euclidean_norm(input_tensor, axis=None, keepdims=False, name=None):
+  """Computes the Euclidean norm of elements across dimensions of a tensor.
+
+  Reduces `input_tensor` along the dimensions given in `axis`.
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  are retained with length 1.
+
+  If `axis` is None, all dimensions are reduced, and a
+  tensor with a single element is returned.
+
+  For example:
+
+  ```python
+  x = tf.constant([[1, 2, 3], [1, 1, 1]])
+  tf.reduce_euclidean_norm(x)  # sqrt(17)
+  tf.reduce_euclidean_norm(x, 0)  # [sqrt(2), sqrt(5), sqrt(10)]
+  tf.reduce_euclidean_norm(x, 1)  # [sqrt(14), sqrt(3)]
+  tf.reduce_euclidean_norm(x, 1, keepdims=True)  # [[sqrt(14)], [sqrt(3)]]
+  tf.reduce_euclidean_norm(x, [0, 1])  # sqrt(17)
+  ```
+
+  Args:
+    input_tensor: The tensor to reduce. Should have numeric type.
+    axis: The dimensions to reduce. If `None` (the default), reduces all
+      dimensions. Must be in the range `[-rank(input_tensor),
+      rank(input_tensor))`.
+    keepdims: If true, retains reduced dimensions with length 1.
+    name: A name for the operation (optional).
+
+  Returns:
+    The reduced tensor, of the same dtype as the input_tensor.
+  """
+  return _may_reduce_to_scalar(
+      keepdims, axis,
+      gen_math_ops.euclidean_norm(
+          input_tensor, _ReductionDims(input_tensor, axis), keepdims,
+          name=name))
+
+
 @tf_export(v1=["math.count_nonzero", "count_nonzero"])
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")

diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index a3d3c7b..e8b7b4c 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py

@@ -85,6 +85,58 @@
   ]
 
 
+@ops.RegisterGradient("DepthwiseConv2dNativeBackpropInput")
+def _DepthwiseConv2dNativeBackpropInputGrad(op, grad):
+  """The derivatives for deconvolution.
+
+  Args:
+    op: the Deconvolution op.
+    grad: the tensor representing the gradient w.r.t. the output
+
+  Returns:
+    the gradients w.r.t. the input and the filter
+  """
+  return [
+      None,
+      nn_ops.depthwise_conv2d_native_backprop_filter(
+          grad,
+          array_ops.shape(op.inputs[1]),
+          op.inputs[2],
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          data_format=op.get_attr("data_format")),
+      nn_ops.depthwise_conv2d_native(
+          grad,
+          op.inputs[1],
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          data_format=op.get_attr("data_format"))
+  ]
+
+
+@ops.RegisterGradient("DepthwiseConv2dNativeBackpropFilter")
+def _DepthwiseConv2dNativeBackpropFilterGrad(op, grad):
+  return [
+      nn_ops.depthwise_conv2d_native_backprop_input(
+          array_ops.shape(op.inputs[0]),
+          grad,
+          op.inputs[2],
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          data_format=op.get_attr("data_format")), None,
+      nn_ops.depthwise_conv2d_native(
+          op.inputs[0],
+          grad,
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          data_format=op.get_attr("data_format"))
+  ]
+
+
 @ops.RegisterGradient("Conv3D")
 def _Conv3DGrad(op, grad):
   data_format = op.get_attr("data_format").decode()
@@ -232,8 +284,8 @@
 
   Args:
      op: the Softmax op.
-     grad_softmax:  the tensor representing the gradient w.r.t. the
-       softmax output.
+     grad_softmax:  the tensor representing the gradient w.r.t. the softmax
+       output.
 
   Returns:
      gradient w.r.t the input to the softmax
@@ -309,7 +361,6 @@
     data_format = None
 
   shape = array_ops.shape(op.inputs[0])
-  rank = array_ops.rank(op.inputs[0])
   bias_shape = array_ops.shape(received_grad)
 
   if data_format == b"NCHW":
@@ -360,9 +411,9 @@
 def _EluGradGrad(op, grad):
   elu_x = op.inputs[1]
   return (gen_nn_ops.elu_grad(grad, op.outputs[0]),
-          array_ops.where(elu_x < 0, grad * op.inputs[0],
-                          array_ops.zeros(
-                              shape=array_ops.shape(elu_x), dtype=elu_x.dtype)))
+          array_ops.where(
+              elu_x < 0, grad * op.inputs[0],
+              array_ops.zeros(shape=array_ops.shape(elu_x), dtype=elu_x.dtype)))
 
 
 @ops.RegisterGradient("SeluGrad")
@@ -370,11 +421,9 @@
   x = op.inputs[1]
   scale_alpha = 1.7580993408473768599402175208123
   return (gen_nn_ops.elu_grad(grad, op.outputs[0]),
-          array_ops.where(x < 0.,
-                          gen_nn_ops.elu_grad(grad,
-                                              op.outputs[0] + scale_alpha),
-                          array_ops.zeros(
-                              shape=array_ops.shape(x), dtype=x.dtype)))
+          array_ops.where(
+              x < 0., gen_nn_ops.elu_grad(grad, op.outputs[0] + scale_alpha),
+              array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)))
 
 
 @ops.RegisterGradient("Relu6")
@@ -485,10 +534,10 @@
     softmax = nn_ops.softmax(logits)
 
     grad += ((grad_grad - array_ops.squeeze(
-        math_ops.matmul(array_ops.expand_dims(grad_grad, 1),
-                        array_ops.expand_dims(softmax, 2)),
-        axis=1)) *
-             softmax)
+        math_ops.matmul(
+            array_ops.expand_dims(grad_grad, 1),
+            array_ops.expand_dims(softmax, 2)),
+        axis=1)) * softmax)
 
   return grad, _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits))
 
@@ -785,9 +834,9 @@
   Args:
     op: The BatchNormOp for which we need to compute gradients.
     use_v2: Boolean indicating whether to use the V2 version of the fused batch
-            norm gradient.
-    *grad: An argument list for tensors of gradients wrt the outputs
-          with grad[0] as grad_y.
+      norm gradient.
+    *grad: An argument list for tensors of gradients wrt the outputs with
+      grad[0] as grad_y.
 
   Returns:
     grad_x: gradient for x, which is scale * rsqrt(variance + epsilon) *
@@ -874,8 +923,7 @@
     epsilon: A small float number added to the variance of x.
     data_format: The data format for input. Either b"NHWC" or b"NCHW".
     is_training: A bool value to indicate the operation is for training
-      (default)
-        or inference.
+      (default) or inference.
 
   Returns:
     A tuple (grad_x, grad_scale, grad_offset), where grad_x is the gradient
@@ -939,9 +987,9 @@
 
   Args:
     op: The FusedBatchNormGradOp for which we need to compute gradients.
-    *grad: An argument list for tensors of gradients wrt the outputs
-          with grad[0] as grad_grad_x, grad[1] as grad_grad_scale,
-          grad[2] as grad_grad_offset.
+    *grad: An argument list for tensors of gradients wrt the outputs with
+      grad[0] as grad_grad_x, grad[1] as grad_grad_scale, grad[2] as
+      grad_grad_offset.
 
   Returns:
     A tuple (grad_grad_y, grad_x, grad_scale, None, None), where grad_grad_y
@@ -1007,29 +1055,31 @@
   ind_shape = array_ops.shape(op.outputs[1])
 
   # int32 is not supported on GPU hence up-casting
-  ind_lastdim = array_ops.gather(math_ops.cast(
-      ind_shape, dtypes.int64), array_ops.size(ind_shape) - 1)
+  ind_lastdim = array_ops.gather(
+      math_ops.cast(ind_shape, dtypes.int64),
+      array_ops.size(ind_shape) - 1)
   # Flatten indices to 2D.
   ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim]))
 
-  in_lastdim = array_ops.gather(math_ops.cast(
-      in_shape, dtypes.int64), array_ops.size(in_shape) - 1)
+  in_lastdim = array_ops.gather(
+      math_ops.cast(in_shape, dtypes.int64),
+      array_ops.size(in_shape) - 1)
   outerdim = array_ops.shape(ind_2d)[0]
   # Compute linear indices (flattened to 1D).
-  ind = array_ops.reshape(ind_2d + math_ops.cast(array_ops.expand_dims(
-      math_ops.range(0, math_ops.cast(outerdim, dtypes.int64)
-                     * in_lastdim, in_lastdim), -1), dtypes.int32), [-1])
+  ind = array_ops.reshape(
+      ind_2d + math_ops.cast(
+          array_ops.expand_dims(
+              math_ops.range(0,
+                             math_ops.cast(outerdim, dtypes.int64) * in_lastdim,
+                             in_lastdim), -1), dtypes.int32), [-1])
 
   # Substitute grad to appropriate locations and fill the rest with zeros,
   # finally reshaping it to the original input shape.
   return [
       array_ops.reshape(
           array_ops.scatter_nd(
-              array_ops.expand_dims(ind, -1),
-              array_ops.reshape(grad, [-1]),
-              [math_ops.reduce_prod(in_shape)]
-          ),
-          in_shape),
+              array_ops.expand_dims(ind, -1), array_ops.reshape(grad, [-1]),
+              [math_ops.reduce_prod(in_shape)]), in_shape),
       array_ops.zeros([], dtype=dtypes.int32)
   ]
 

diff --git a/tensorflow/python/ops/nn_grad_test.py b/tensorflow/python/ops/nn_grad_test.py
index 95e05a9..783656a 100644
--- a/tensorflow/python/ops/nn_grad_test.py
+++ b/tensorflow/python/ops/nn_grad_test.py

@@ -23,9 +23,11 @@
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import nn_grad  # pylint: disable=unused-import
+from tensorflow.python.ops import nn_impl
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.platform import test
 
@@ -49,5 +51,111 @@
       self.assertLess(error, 1e-4)
 
 
+class Conv2dOpTest(test.TestCase):
+
+  def run_test(self, x, y):
+    with self.test_session():
+      error = gradient_checker.compute_gradient_error(x,
+                                                      x.get_shape().as_list(),
+                                                      y,
+                                                      y.get_shape().as_list())
+      self.assertLess(error, 1e-3)
+
+  @test_util.run_deprecated_v1
+  def testConv2dGradWRTInput(self):
+    x = array_ops.placeholder(
+        dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input')
+    f = constant_op.constant([0.5],
+                             dtype=dtypes.float32,
+                             shape=[2, 2, 3, 2],
+                             name='filter')
+    y = nn_ops.conv2d(x, f, [1, 1, 1, 1], 'SAME')
+    self.run_test(x, y)
+
+  @test_util.run_deprecated_v1
+  def testConv2dGradWRTFilter(self):
+    x = constant_op.constant([0.5],
+                             dtype=dtypes.float32,
+                             shape=[1, 4, 4, 3],
+                             name='input')
+    f = array_ops.placeholder(
+        dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter')
+    y = nn_ops.conv2d(x, f, [1, 1, 1, 1], 'SAME')
+    self.run_test(f, y)
+
+  @test_util.run_deprecated_v1
+  def testConv2dBackpropFilterGrad(self):
+    x = array_ops.placeholder(
+        dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input')
+    f = constant_op.constant([0.5],
+                             dtype=dtypes.float32,
+                             shape=[2, 2, 3, 2],
+                             name='filter')
+    strides = [1, 1, 1, 1]
+    padding = 'SAME'
+    out = nn_impl.depthwise_conv2d(x, f, strides, padding)
+
+    grad_wrt_input = gradients_impl.gradients(out, x)[0]
+    self.run_test(f, grad_wrt_input)
+
+    grad_wrt_filter = gradients_impl.gradients(out, f)[0]
+    self.run_test(x, grad_wrt_filter)
+
+
+class DepthwiseConv2dTest(test.TestCase):
+
+  def run_test(self, x, y):
+    with self.test_session():
+      error = gradient_checker.compute_gradient_error(x,
+                                                      x.get_shape().as_list(),
+                                                      y,
+                                                      y.get_shape().as_list())
+      self.assertLess(error, 1e-3)
+
+  @test_util.run_deprecated_v1
+  def testDepthwiseConv2dGradWRTInput(self):
+    x = array_ops.placeholder(
+        dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input')
+    f = constant_op.constant([0.5],
+                             dtype=dtypes.float32,
+                             shape=[2, 2, 3, 2],
+                             name='filter')
+    strides = [1, 1, 1, 1]
+    padding = 'SAME'
+    y = nn_impl.depthwise_conv2d(x, f, strides, padding)
+    self.run_test(x, y)
+
+  @test_util.run_deprecated_v1
+  def testDepthwiseConv2dGradWRTFilter(self):
+    x = constant_op.constant([0.5],
+                             dtype=dtypes.float32,
+                             shape=[1, 4, 4, 3],
+                             name='input')
+    f = array_ops.placeholder(
+        dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter')
+    strides = [1, 1, 1, 1]
+    padding = 'SAME'
+    y = nn_impl.depthwise_conv2d(x, f, strides, padding)
+    self.run_test(f, y)
+
+  @test_util.run_deprecated_v1
+  def testDepthwiseConv2dBackpropFilterGrad(self):
+    x = array_ops.placeholder(
+        dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input')
+    f = constant_op.constant([0.5],
+                             dtype=dtypes.float32,
+                             shape=[2, 2, 3, 2],
+                             name='filter')
+    strides = [1, 1, 1, 1]
+    padding = 'SAME'
+    out = nn_impl.depthwise_conv2d(x, f, strides, padding)
+
+    grad_wrt_input = gradients_impl.gradients(out, x)[0]
+    self.run_test(f, grad_wrt_input)
+
+    grad_wrt_filter = gradients_impl.gradients(out, f)[0]
+    self.run_test(x, grad_wrt_filter)
+
+
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index f04a3ae..6e17883 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py

@@ -530,7 +530,7 @@
     if spatial_dims != orig_spatial_dims or any(x < 1 for x in spatial_dims):
       raise ValueError(
           "spatial_dims must be a montonically increasing sequence of positive "
-          "integers")  # pylint: disable=line-too-long
+          "integers")
 
     if data_format is not None and data_format.startswith("NC"):
       expected_input_rank = spatial_dims[-1]
@@ -767,7 +767,6 @@
     data_format=None,
     filters=None,
     dilations=None):
-  # pylint: disable=line-too-long
   """Computes sums of N-D convolutions (actually cross-correlation).
 
   This also supports either output striding via the optional `strides` parameter
@@ -853,6 +852,7 @@
       "NCW".  For N=2, the valid values are "NHWC" (default) and "NCHW".
       For N=3, the valid values are "NDHWC" (default) and "NCDHW".
     filters: Alias of filter.
+    dilations: Alias of dilation_rate.
 
   Returns:
     A `Tensor` with the same type as `input` of shape
@@ -927,15 +927,23 @@
     dilations=None,
     name=None):
   """Internal function which performs rank agnostic convolution."""
-  with ops.name_scope(name, "convolution", [input, filter]) as name:
-    if input.shape is not None:
+  with ops.name_scope(name, "convolution", [input, filters]) as name:
+    if isinstance(input.shape, tensor_shape.TensorShape) and \
+        input.shape.rank is not None:
       n = len(input.shape) - 2
-    elif filters.shape is not None:
+    elif not isinstance(input.shape, tensor_shape.TensorShape) and \
+        input.shape is not None:
+      n = len(input.shape) - 2
+    elif isinstance(filters.shape, tensor_shape.TensorShape) and \
+        filters.shape.rank is not None:
+      n = len(filters.shape) - 2
+    elif not isinstance(filters.shape, tensor_shape.TensorShape) and \
+        filters.shape is not None:
       n = len(filters.shape) - 2
     else:
       raise ValueError("rank of input or filter must be known")
 
-    if n < 1 or n > 3:
+    if not 1 <= n <= 3:
       raise ValueError(
           "Input tensor must be of rank 3, 4 or 5 but was {}.".format(n + 2))
 
@@ -951,7 +959,7 @@
 
     if all(i == 1 for i in dilations):
       # fast path if no dilation as gradient only supported on GPU for dilations
-      op = conv_ops.get(n)
+      op = conv_ops[n]
       return op(
           input,
           filters,
@@ -1081,7 +1089,6 @@
     name=None,
     data_format=None,
     dilations=None):
-  # pylint: disable=line-too-long
   """Performs an N-D pooling operation.
 
   In the case that `data_format` does not start with "NC", computes for
@@ -1700,70 +1707,6 @@
                            name=name)
 
 
-@tf_export("nn.conv2d_backprop_filter", v1=[])
-def conv2d_backprop_filter_v2(input,  # pylint: disable=redefined-builtin
-                              filter_sizes,
-                              out_backprop,
-                              strides,
-                              padding,
-                              data_format="NHWC",
-                              dilations=None,
-                              name=None):
-  r"""Computes the gradients of convolution with respect to the filter.
-
-  Args:
-    input: A `Tensor`. Must be one of the following types:
-      `half`, `bfloat16`, `float32`, `float64`.
-      4-D with shape `[batch, in_height, in_width, in_channels]`.
-    filter_sizes: A `Tensor` of type `int32`.
-      An integer vector representing the tensor shape of `filter`,
-      where `filter` is a 4-D
-      `[filter_height, filter_width, in_channels, out_channels]` tensor.
-    out_backprop: A `Tensor`. Must have the same type as `input`.
-      4-D with shape `[batch, out_height, out_width, out_channels]`.
-      Gradients w.r.t. the output of the convolution.
-    strides: A list of `ints`.
-      The stride of the sliding window for each dimension of the input
-      of the convolution. Must be in the same order as the dimension specified
-      with format.
-    padding: Either the `string `"SAME"` or `"VALID"` indicating the type of
-      padding algorithm to use, or a list indicating the explicit paddings at
-      the start and end of each dimension. When explicit padding is used and
-      data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top,
-      pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used
-      and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0],
-      [pad_top, pad_bottom], [pad_left, pad_right]]`.
-    data_format: An optional `string` from: `"NHWC", "NCHW"`.
-      Defaults to `"NHWC"`.
-      Specify the data format of the input and output data. With the
-      default format "NHWC", the data is stored in the order of:
-          [batch, in_height, in_width, in_channels].
-      Alternatively, the format could be "NCHW", the data storage order of:
-          [batch, in_channels, in_height, in_width].
-    dilations: An optional list of `ints`. Defaults to `[1, 1, 1, 1]`.
-      1-D tensor of length 4.  The dilation factor for each dimension of
-      `input`. If set to k > 1, there will be k-1 skipped cells between each
-      filter element on that dimension. The dimension order is determined by
-      the value of `data_format`, see above for details. Dilations in the batch
-      and depth dimensions must be 1.
-    name: A name for the operation (optional).
-
-  Returns:
-    A `Tensor`. Has the same type as `input`.
-  """
-  if dilations is None:
-    dilations = [1, 1, 1, 1]
-  return conv2d_backprop_filter(input,  # pylint: disable=redefined-builtin
-                                filter_sizes,
-                                out_backprop,
-                                strides,
-                                padding,
-                                use_cudnn_on_gpu=True,
-                                data_format=data_format,
-                                dilations=dilations,
-                                name=name)
-
-
 @tf_export(v1=["nn.conv2d_backprop_filter"])
 def conv2d_backprop_filter(  # pylint: disable=redefined-builtin,dangerous-default-value
     input,
@@ -1824,70 +1767,6 @@
       explicit_paddings, data_format, dilations, name)
 
 
-@tf_export("nn.conv2d_backprop_input", v1=[])
-def conv2d_backprop_input_v2(input_sizes,
-                             filters,
-                             out_backprop,
-                             strides,
-                             padding,
-                             data_format="NHWC",
-                             dilations=None,
-                             name=None):
-  r"""Computes the gradients of convolution with respect to the input.
-
-  Args:
-    input_sizes: A `Tensor` of type `int32`.
-      An integer vector representing the shape of `input`,
-      where `input` is a 4-D `[batch, height, width, channels]` tensor.
-    filters: A `Tensor`. Must be one of the following types:
-      `half`, `bfloat16`, `float32`, `float64`.
-      4-D with shape
-      `[filter_height, filter_width, in_channels, out_channels]`.
-    out_backprop: A `Tensor`. Must have the same type as `filters`.
-      4-D with shape `[batch, out_height, out_width, out_channels]`.
-      Gradients w.r.t. the output of the convolution.
-    strides: A list of `ints`.
-      The stride of the sliding window for each dimension of the input
-      of the convolution. Must be in the same order as the dimension specified
-      with format.
-    padding: Either the `string `"SAME"` or `"VALID"` indicating the type of
-      padding algorithm to use, or a list indicating the explicit paddings at
-      the start and end of each dimension. When explicit padding is used and
-      data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top,
-      pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used
-      and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0],
-      [pad_top, pad_bottom], [pad_left, pad_right]]`.
-    data_format: An optional `string` from: `"NHWC", "NCHW"`.
-      Defaults to `"NHWC"`.
-      Specify the data format of the input and output data. With the
-      default format "NHWC", the data is stored in the order of:
-          [batch, in_height, in_width, in_channels].
-      Alternatively, the format could be "NCHW", the data storage order of:
-          [batch, in_channels, in_height, in_width].
-    dilations: An optional list of `ints`. Defaults to `[1, 1, 1, 1]`.
-      1-D tensor of length 4.  The dilation factor for each dimension of
-      `input`. If set to k > 1, there will be k-1 skipped cells between each
-      filter element on that dimension. The dimension order is determined by
-      the value of `data_format`, see above for details. Dilations in the batch
-      and depth dimensions must be 1.
-    name: A name for the operation (optional).
-
-  Returns:
-    A `Tensor`. Has the same type as `filters`.
-  """
-  if dilations is None:
-    dilations = [1, 1, 1, 1]
-  return conv2d_backprop_input(input_sizes,
-                               filters,
-                               out_backprop,
-                               strides,
-                               padding,
-                               use_cudnn_on_gpu=True,
-                               data_format=data_format,
-                               dilations=dilations,
-                               name=name)
-
-
 @tf_export(v1=["nn.conv2d_backprop_input"])
 def conv2d_backprop_input(  # pylint: disable=redefined-builtin,dangerous-default-value
     input_sizes,
@@ -1962,12 +1841,13 @@
     data_format="NHWC",
     name=None,
     input=None,  # pylint: disable=redefined-builtin
-    filters=None):
+    filters=None,
+    dilations=None):
   """The transpose of `conv2d`.
 
   This operation is sometimes called "deconvolution" after [Deconvolutional
-  Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is
-  actually the transpose (gradient) of `conv2d` rather than an actual
+  Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf),
+  but is really the transpose (gradient) of `conv2d` rather than an actual
   deconvolution.
 
   Args:
@@ -1990,6 +1870,14 @@
     name: Optional name for the returned tensor.
     input: Alias for value.
     filters: Alias for filter.
+    dilations: An int or list of `ints` that has length `1`, `2` or `4`,
+      defaults to 1. The dilation factor for each dimension of`input`. If a
+      single value is given it is replicated in the `H` and `W` dimension. By
+      default the `N` and `C` dimensions are set to 1. If set to k > 1, there
+      will be k-1 skipped cells between each filter element on that dimension.
+      The dimension order is determined by the value of `data_format`, see above
+      for details. Dilations in the batch and depth dimensions if a 4-d tensor
+      must be 1.
 
   Returns:
     A `Tensor` with the same type as `value`.
@@ -2002,70 +1890,86 @@
   filter = deprecated_argument_lookup("filters", filters, "filter", filter)
   with ops.name_scope(name, "conv2d_transpose",
                       [value, filter, output_shape]) as name:
-    if data_format not in ("NCHW", "NHWC"):
-      raise ValueError("data_format has to be either NCHW or NHWC.")
-    value = ops.convert_to_tensor(value, name="value")
-    filter = ops.convert_to_tensor(filter, name="filter")  # pylint: disable=redefined-builtin
-    axis = 3 if data_format == "NHWC" else 1
-    if not value.get_shape().dims[axis].is_compatible_with(
-        filter.get_shape()[3]):
-      raise ValueError("input channels does not match filter's input channels, "
-                       "{} != {}".format(value.get_shape()[axis],
-                                         filter.get_shape()[3]))
-
-    output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape")
-    if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(4)):
-      raise ValueError("output_shape must have shape (4,), got {}".format(
-          output_shape_.get_shape()))
-
-    if isinstance(output_shape, (list, np.ndarray)):
-      # output_shape's shape should be == [4] if reached this point.
-      if not filter.get_shape().dims[2].is_compatible_with(
-          output_shape[axis]):
-        raise ValueError(
-            "output_shape does not match filter's output channels, "
-            "{} != {}".format(output_shape[axis],
-                              filter.get_shape()[2]))
-
-    if padding != "VALID" and padding != "SAME":
-      raise ValueError("padding must be either VALID or SAME:"
-                       " {}".format(padding))
-
-    strides = _get_sequence(strides, 2, axis, "strides")
-
-    return gen_nn_ops.conv2d_backprop_input(
-        input_sizes=output_shape_,
-        filter=filter,
-        out_backprop=value,
-        strides=strides,
+    return conv2d_transpose_v2(
+        value,
+        filter,
+        output_shape,
+        strides,
         padding=padding,
         data_format=data_format,
+        dilations=dilations,
         name=name)
 
 
-# pylint: disable=redefined-builtin
 @tf_export("nn.conv2d_transpose", v1=[])
 def conv2d_transpose_v2(
-    input,
+    input,  # pylint: disable=redefined-builtin
     filters,  # pylint: disable=redefined-builtin
     output_shape,
     strides,
     padding="SAME",
     data_format="NHWC",
+    dilations=None,
     name=None):
-  return conv2d_transpose(
-      input,
-      filters,
-      output_shape,
-      strides,
-      padding=padding,
-      data_format=data_format,
-      name=name)
-# pylint: enable=redefined-builtin
-conv2d_transpose_v2.__doc__ = deprecation.rewrite_argument_docstring(
-    deprecation.rewrite_argument_docstring(
-        conv2d_transpose.__doc__, "filter", "filters"),
-    "value", "input")
+  """The transpose of `conv2d`.
+
+  This operation is sometimes called "deconvolution" after [Deconvolutional
+  Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is
+  actually the transpose (gradient) of `conv2d` rather than an actual
+  deconvolution.
+
+  Args:
+    input: A 4-D `Tensor` of type `float` and shape `[batch, height, width,
+      in_channels]` for `NHWC` data format or `[batch, in_channels, height,
+      width]` for `NCHW` data format.
+    filters: A 4-D `Tensor` with the same type as `value` and shape `[height,
+      width, output_channels, in_channels]`.  `filter`'s `in_channels` dimension
+      must match that of `value`.
+    output_shape: A 1-D `Tensor` representing the output shape of the
+      deconvolution op.
+    strides: An int or list of `ints` that has length `1`, `2` or `4`.  The
+      stride of the sliding window for each dimension of `input`. If a single
+      value is given it is replicated in the `H` and `W` dimension. By default
+      the `N` and `C` dimensions are set to 0. The dimension order is determined
+      by the value of `data_format`, see below for details.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See
+      the "returns" section of `tf.nn.convolution` for details.
+    data_format: A string. 'NHWC' and 'NCHW' are supported.
+    dilations: An int or list of `ints` that has length `1`, `2` or `4`,
+      defaults to 1. The dilation factor for each dimension of`input`. If a
+      single value is given it is replicated in the `H` and `W` dimension. By
+      default the `N` and `C` dimensions are set to 1. If set to k > 1, there
+      will be k-1 skipped cells between each filter element on that dimension.
+      The dimension order is determined by the value of `data_format`, see above
+      for details. Dilations in the batch and depth dimensions if a 4-d tensor
+      must be 1.
+    name: Optional name for the returned tensor.
+
+  Returns:
+    A `Tensor` with the same type as `value`.
+
+  Raises:
+    ValueError: If input/output depth does not match `filter`'s shape, or if
+      padding is other than `'VALID'` or `'SAME'`.
+  """
+  with ops.name_scope(name, "conv2d_transpose",
+                      [input, filter, output_shape]) as name:
+    if data_format is None:
+      data_format = "NHWC"
+    channel_index = 1 if data_format.startswith("NC") else 3
+
+    strides = _get_sequence(strides, 2, channel_index, "strides")
+    dilations = _get_sequence(dilations, 2, channel_index, "dilations")
+
+    return gen_nn_ops.conv2d_backprop_input(
+        input_sizes=output_shape,
+        filter=filters,
+        out_backprop=input,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilations=dilations,
+        name=name)
 
 
 @tf_export("nn.atrous_conv2d_transpose")
@@ -2078,9 +1982,9 @@
   """The transpose of `atrous_conv2d`.
 
   This operation is sometimes called "deconvolution" after [Deconvolutional
-  Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is
-  actually the transpose (gradient) of `atrous_conv2d` rather than an actual
-  deconvolution.
+  Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf),
+  but is really the transpose (gradient) of `atrous_conv2d` rather than an
+  actual deconvolution.
 
   Args:
     value: A 4-D `Tensor` of type `float`. It needs to be in the default `NHWC`
@@ -2226,7 +2130,7 @@
               name=None):
   if dilations is None:
     dilations = [1, 1, 1, 1, 1]
-  return gen_nn_ops.conv3d(input,  # pylint: disable=redefined-builtin
+  return gen_nn_ops.conv3d(input,
                            filters,
                            strides,
                            padding,
@@ -2265,12 +2169,13 @@
     data_format="NDHWC",
     name=None,
     input=None,  # pylint: disable=redefined-builtin
-    filters=None):
+    filters=None,
+    dilations=None):
   """The transpose of `conv3d`.
 
   This operation is sometimes called "deconvolution" after [Deconvolutional
-  Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is
-  actually the transpose (gradient) of `conv3d` rather than an actual
+  Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf),
+  but is really the transpose (gradient) of `conv3d` rather than an actual
   deconvolution.
 
   Args:
@@ -2290,6 +2195,14 @@
     name: Optional name for the returned tensor.
     input: Alias of value.
     filters: Alias of filter.
+    dilations: An int or list of `ints` that has length `1`, `3` or `5`,
+      defaults to 1. The dilation factor for each dimension of`input`. If a
+      single value is given it is replicated in the `D`, `H` and `W` dimension.
+      By default the `N` and `C` dimensions are set to 1. If set to k > 1, there
+      will be k-1 skipped cells between each filter element on that dimension.
+      The dimension order is determined by the value of `data_format`, see above
+      for details. Dilations in the batch and depth dimensions if a 5-d tensor
+      must be 1.
 
   Returns:
     A `Tensor` with the same type as `value`.
@@ -2300,68 +2213,81 @@
   """
   filter = deprecated_argument_lookup("filters", filters, "filter", filter)
   value = deprecated_argument_lookup("input", input, "value", value)
-  with ops.name_scope(name, "conv3d_transpose",
-                      [value, filter, output_shape]) as name:
-    value = ops.convert_to_tensor(value, name="value")
-    filter = ops.convert_to_tensor(filter, name="filter")  # pylint: disable=redefined-builtin
-    axis = 1 if data_format == "NCDHW" else 4
-    if not value.get_shape().dims[axis].is_compatible_with(
-        filter.get_shape()[4]):
-      raise ValueError("input channels does not match filter's input channels, "
-                       "{} != {}".format(value.get_shape()[axis],
-                                         filter.get_shape()[4]))
-
-    output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape")
-    if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(5)):
-      raise ValueError("output_shape must have shape (5,), got {}".format(
-          output_shape_.get_shape()))
-
-    if isinstance(output_shape, (list, np.ndarray)):
-      # output_shape's shape should be == [5] if reached this point.
-      if not filter.get_shape().dims[3].is_compatible_with(
-          output_shape[axis]):
-        raise ValueError(
-            "output_shape does not match filter's output channels, "
-            "{} != {}".format(output_shape[axis],
-                              filter.get_shape()[3]))
-
-    if padding != "VALID" and padding != "SAME":
-      raise ValueError("padding must be either VALID or SAME:"
-                       " {}".format(padding))
-
-    return gen_nn_ops.conv3d_backprop_input_v2(
-        input_sizes=output_shape_,
-        filter=filter,
-        out_backprop=value,
-        strides=strides,
-        padding=padding,
-        data_format=data_format,
-        name=name)
-
-
-# pylint: disable=redefined-builtin
-@tf_export("nn.conv3d_transpose", v1=[])
-def conv3d_transpose_v2(
-    input,
-    filters,
-    output_shape,
-    strides,
-    padding="SAME",
-    data_format="NDHWC",
-    name=None):
-  return conv3d_transpose(
-      input,
-      filters,
+  return conv3d_transpose_v2(
+      value,
+      filter,
       output_shape,
       strides,
       padding=padding,
       data_format=data_format,
+      dilations=dilations,
       name=name)
-# pylint: enable=redefined-builtin
-conv3d_transpose_v2.__doc__ = deprecation.rewrite_argument_docstring(
-    deprecation.rewrite_argument_docstring(
-        conv3d_transpose.__doc__, "filter", "filters"),
-    "value", "input")
+
+
+@tf_export("nn.conv3d_transpose", v1=[])
+def conv3d_transpose_v2(input,  # pylint: disable=redefined-builtin
+                        filters,
+                        output_shape,
+                        strides,
+                        padding="SAME",
+                        data_format="NDHWC",
+                        dilations=None,
+                        name=None):
+  """The transpose of `conv3d`.
+
+  This operation is sometimes called "deconvolution" after [Deconvolutional
+  Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is
+  actually the transpose (gradient) of `conv2d` rather than an actual
+  deconvolution.
+
+  Args:
+    input: A 5-D `Tensor` of type `float` and shape `[batch, height, width,
+      in_channels]` for `NHWC` data format or `[batch, in_channels, height,
+      width]` for `NCHW` data format.
+    filters: A 5-D `Tensor` with the same type as `value` and shape `[height,
+      width, output_channels, in_channels]`.  `filter`'s `in_channels` dimension
+      must match that of `value`.
+    output_shape: A 1-D `Tensor` representing the output shape of the
+      deconvolution op.
+    strides: An int or list of `ints` that has length `1`, `3` or `5`.  The
+      stride of the sliding window for each dimension of `input`. If a single
+      value is given it is replicated in the `D`, `H` and `W` dimension. By
+      default the `N` and `C` dimensions are set to 0. The dimension order is
+      determined by the value of `data_format`, see below for details.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See
+      the "returns" section of `tf.nn.convolution` for details.
+    data_format: A string. 'NDHWC' and 'NCDHW' are supported.
+    dilations: An int or list of `ints` that has length `1`, `3` or `5`,
+      defaults to 1. The dilation factor for each dimension of`input`. If a
+      single value is given it is replicated in the `D`, `H` and `W` dimension.
+      By default the `N` and `C` dimensions are set to 1. If set to k > 1, there
+      will be k-1 skipped cells between each filter element on that dimension.
+      The dimension order is determined by the value of `data_format`, see above
+      for details. Dilations in the batch and depth dimensions if a 5-d tensor
+      must be 1.
+    name: Optional name for the returned tensor.
+
+  Returns:
+    A `Tensor` with the same type as `value`.
+  """
+  with ops.name_scope(name, "conv3d_transpose",
+                      [input, filter, output_shape]) as name:
+    if data_format is None:
+      data_format = "NDHWC"
+    channel_index = 1 if data_format.startswith("NC") else 4
+
+    strides = _get_sequence(strides, 3, channel_index, "strides")
+    dilations = _get_sequence(dilations, 3, channel_index, "dilations")
+
+    return gen_nn_ops.conv3d_backprop_input_v2(
+        input_sizes=output_shape,
+        filter=filters,
+        out_backprop=input,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilations=dilations,
+        name=name)
 
 
 @tf_export("nn.bias_add")
@@ -2379,13 +2305,21 @@
     bias: A 1-D `Tensor` with size matching the last dimension of `value`.
       Must be the same type as `value` unless `value` is a quantized type,
       in which case a different quantized type may be used.
-    data_format: A string. 'NHWC' and 'NCHW' are supported.
+    data_format: A string. 'N...C' and 'NC...' are supported.
     name: A name for the operation (optional).
 
   Returns:
     A `Tensor` with the same type as `value`.
   """
   with ops.name_scope(name, "BiasAdd", [value, bias]) as name:
+    if data_format is not None:
+      if data_format.startswith("NC"):
+        data_format = "NCHW"
+      elif data_format.startswith("N") and data_format.endswith("C"):
+        data_format = "NHWC"
+      else:
+        raise ValueError("data_format must be of the form `N...C` or `NC...`")
+
     if not context.executing_eagerly():
       value = ops.convert_to_tensor(value, name="input")
       bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias")
@@ -2563,7 +2497,7 @@
 
   # We need its original shape for shape inference.
   shape = logits.get_shape()
-  is_last_dim = (dim is -1) or (dim == shape.ndims - 1)
+  is_last_dim = (dim == -1) or (dim == shape.ndims - 1)
 
   if is_last_dim:
     return compute_op(logits, name=name)
@@ -2571,7 +2505,7 @@
   dim_val = dim
   if isinstance(dim, ops.Tensor):
     dim_val = tensor_util.constant_value(dim)
-  if dim_val is not None and (dim_val < -shape.ndims or dim_val >= shape.ndims):
+  if dim_val is not None and not -shape.ndims <= dim_val < shape.ndims:
     raise errors_impl.InvalidArgumentError(
         None, None,
         "Dimension (%d) must be in the range [%d, %d) where %d is the number of"
@@ -3100,8 +3034,71 @@
         return cost
 
 
-@tf_export("nn.avg_pool")
-def avg_pool(value, ksize, strides, padding, data_format="NHWC", name=None):
+@tf_export("nn.avg_pool", v1=["nn.avg_pool_v2"])
+def avg_pool_v2(input, ksize, strides, padding, data_format=None, name=None):  # pylint: disable=redefined-builtin
+  """Performs the avg pooling on the input.
+
+  Each entry in `output` is the mean of the corresponding size `ksize`
+  window in `value`.
+
+  Args:
+    input:  Tensor of rank N+2, of shape `[batch_size] + input_spatial_shape +
+      [num_channels]` if `data_format` does not start with "NC" (default), or
+      `[batch_size, num_channels] + input_spatial_shape` if data_format starts
+      with "NC". Pooling happens over the spatial dimensions only.
+    ksize: An int or list of `ints` that has length `1`, `N` or `N+2`. The size
+      of the window for each dimension of the input tensor.
+    strides: An int or list of `ints` that has length `1`, `N` or `N+2`. The
+      stride of the sliding window for each dimension of the input tensor.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See
+      the "returns" section of `tf.nn.convolution` for details.
+    data_format: A string. Specifies the channel dimension. For N=1 it can be
+      either "NWC" (default) or "NCW", for N=2 it can be either "NHWC" (default)
+      or "NCHW" and for N=3 either "NDHWC" (default) or "NCDHW".
+    name: Optional name for the operation.
+
+  Returns:
+    A `Tensor` of format specified by `data_format`.
+    The average pooled output tensor.
+  """
+  if input.shape is not None:
+    n = len(input.shape) - 2
+  elif data_format is not None:
+    n = len(data_format) - 2
+  else:
+    raise ValueError(
+        "The input must have a rank or a data format must be given.")
+  if not 1 <= n <= 3:
+    raise ValueError(
+        "Input tensor must be of rank 3, 4 or 5 but was {}.".format(n + 2))
+
+  if data_format is None:
+    channel_index = n + 1
+  else:
+    channel_index = 1 if data_format.startswith("NC") else n + 1
+
+  ksize = _get_sequence(ksize, n, channel_index, "ksize")
+  strides = _get_sequence(strides, n, channel_index, "strides")
+
+  avg_pooling_ops = {
+      1: avg_pool1d,
+      2: gen_nn_ops.avg_pool,
+      3: gen_nn_ops.avg_pool3d
+  }
+
+  op = avg_pooling_ops[n]
+  return op(
+      input,
+      ksize=ksize,
+      strides=strides,
+      padding=padding,
+      data_format=data_format,
+      name=name)
+
+
+@tf_export(v1=["nn.avg_pool", "nn.avg_pool2d"])
+def avg_pool(value, ksize, strides, padding, data_format="NHWC",
+             name=None, input=None):  # pylint: disable=redefined-builtin
   """Performs the average pooling on the input.
 
   Each entry in `output` is the mean of the corresponding size `ksize`
@@ -3110,10 +3107,53 @@
   Args:
     value: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type
       `float32`, `float64`, `qint8`, `quint8`, or `qint32`.
-    ksize: A list or tuple of 4 ints. The size of the window for each dimension
-      of the input tensor.
-    strides: A list or tuple of 4 ints. The stride of the sliding window for
-      each dimension of the input tensor.
+    ksize: An int or list of `ints` that has length `1`, `2` or `4`. The size of
+      the window for each dimension of the input tensor.
+    strides: An int or list of `ints` that has length `1`, `2` or `4`. The
+      stride of the sliding window for each dimension of the input tensor.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
+      See the "returns" section of `tf.nn.convolution` for details.
+    data_format: A string. 'NHWC' and 'NCHW' are supported.
+    name: Optional name for the operation.
+    input: Alias for value.
+
+  Returns:
+    A `Tensor` with the same type as `value`.  The average pooled output tensor.
+  """
+  with ops.name_scope(name, "AvgPool", [value]) as name:
+    value = deprecation.deprecated_argument_lookup(
+        "input", input, "value", value)
+
+    if data_format is None:
+      data_format = "NHWC"
+    channel_index = 1 if data_format.startswith("NC") else 3
+
+    ksize = _get_sequence(ksize, 2, channel_index, "ksize")
+    strides = _get_sequence(strides, 2, channel_index, "strides")
+
+    return gen_nn_ops.avg_pool(
+        value,
+        ksize=ksize,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        name=name)
+
+
+@tf_export("nn.avg_pool2d", v1=[])
+def avg_pool2d(input, ksize, strides, padding, data_format="NHWC", name=None):  # pylint: disable=redefined-builtin
+  """Performs the average pooling on the input.
+
+  Each entry in `output` is the mean of the corresponding size `ksize`
+  window in `value`.
+
+  Args:
+    input: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type
+      `float32`, `float64`, `qint8`, `quint8`, or `qint32`.
+    ksize: An int or list of `ints` that has length `1`, `2` or `4`. The size of
+      the window for each dimension of the input tensor.
+    strides: An int or list of `ints` that has length `1`, `2` or `4`. The
+      stride of the sliding window for each dimension of the input tensor.
     padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
       See the "returns" section of `tf.nn.convolution` for details.
     data_format: A string. 'NHWC' and 'NCHW' are supported.
@@ -3122,10 +3162,100 @@
   Returns:
     A `Tensor` with the same type as `value`.  The average pooled output tensor.
   """
-  with ops.name_scope(name, "AvgPool", [value]) as name:
-    value = ops.convert_to_tensor(value, name="input")
+  with ops.name_scope(name, "AvgPool2D", [input]) as name:
+    if data_format is None:
+      data_format = "NHWC"
+    channel_index = 1 if data_format.startswith("NC") else 3
+
+    ksize = _get_sequence(ksize, 2, channel_index, "ksize")
+    strides = _get_sequence(strides, 2, channel_index, "strides")
+
     return gen_nn_ops.avg_pool(
-        value,
+        input,
+        ksize=ksize,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        name=name)
+
+
+@tf_export("nn.avg_pool1d")
+def avg_pool1d(input, ksize, strides, padding, data_format="NWC", name=None):  # pylint: disable=redefined-builtin
+  """Performs the average pooling on the input.
+
+  Each entry in `output` is the mean of the corresponding size `ksize`
+  window in `value`.
+
+  Note internally this op reshapes and uses the underlying 2d operation.
+
+  Args:
+    input: A 3-D `Tensor` of the format specified by `data_format`.
+    ksize: An int or list of `ints` that has length `1` or `3`. The size of the
+      window for each dimension of the input tensor.
+    strides: An int or list of `ints` that has length `1` or `3`. The stride of
+      the sliding window for each dimension of the input tensor.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See
+      the "returns" section of `tf.nn.convolution` for details.
+    data_format: An optional string from: "NWC", "NCW". Defaults to "NWC".
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` of format specified by `data_format`.
+    The max pooled output tensor.
+  """
+  with ops.name_scope(name, "AvgPool1D", [input]) as name:
+    if data_format is None:
+      data_format = "NWC"
+    channel_index = 1 if data_format.startswith("NC") else 2
+    ksize = [1] + _get_sequence(ksize, 1, channel_index, "ksize")
+    strides = [1] + _get_sequence(strides, 1, channel_index, "strides")
+
+    data_format = "NHWC" if data_format == "NWC" else "NCHW"
+    expanding_dim = 1 if data_format == "NWC" else 2
+
+    input = array_ops.expand_dims_v2(input, expanding_dim)
+    result = gen_nn_ops.avg_pool(
+        input,
+        ksize=ksize,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        name=name)
+    return array_ops.squeeze(result, expanding_dim)
+
+
+@tf_export("nn.avg_pool3d")
+def avg_pool3d(input, ksize, strides, padding, data_format="NDHWC", name=None):  # pylint: disable=redefined-builtin
+  """Performs the average pooling on the input.
+
+  Each entry in `output` is the mean of the corresponding size `ksize`
+  window in `value`.
+
+  Args:
+    input: A 5-D `Tensor` of shape `[batch, height, width, channels]` and type
+      `float32`, `float64`, `qint8`, `quint8`, or `qint32`.
+    ksize: An int or list of `ints` that has length `1`, `3` or `5`. The size of
+      the window for each dimension of the input tensor.
+    strides: An int or list of `ints` that has length `1`, `3` or `5`. The
+      stride of the sliding window for each dimension of the input tensor.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
+      See the "returns" section of `tf.nn.convolution` for details.
+    data_format: A string. 'NDHWC' and 'NCDHW' are supported.
+    name: Optional name for the operation.
+
+  Returns:
+    A `Tensor` with the same type as `value`.  The average pooled output tensor.
+  """
+  with ops.name_scope(name, "AvgPool3D", [input]) as name:
+    if data_format is None:
+      data_format = "NDHWC"
+    channel_index = 1 if data_format.startswith("NC") else 3
+
+    ksize = _get_sequence(ksize, 3, channel_index, "ksize")
+    strides = _get_sequence(strides, 3, channel_index, "strides")
+
+    return gen_nn_ops.avg_pool3d(
+        input,
         ksize=ksize,
         strides=strides,
         padding=padding,
@@ -3165,7 +3295,7 @@
   else:
     raise ValueError(
         "The input must have a rank or a data format must be given.")
-  if n < 1 or n > 3:
+  if not 1 <= n <= 3:
     raise ValueError(
         "Input tensor must be of rank 3, 4 or 5 but was {}.".format(n + 2))
 
@@ -3183,7 +3313,7 @@
       3: gen_nn_ops.max_pool3d
   }
 
-  op = max_pooling_ops.get(n)
+  op = max_pooling_ops[n]
   return op(
       input,
       ksize=ksize,
@@ -3191,8 +3321,6 @@
       padding=padding,
       data_format=data_format,
       name=name)
-
-
 # pylint: enable=redefined-builtin
 
 
@@ -3365,9 +3493,8 @@
 # pylint: enable=redefined-builtin
 
 
-# pylint: disable=redefined-builtin
 @tf_export("nn.max_pool_with_argmax", v1=[])
-def max_pool_with_argmax_v2(input,
+def max_pool_with_argmax_v2(input,  # pylint: disable=redefined-builtin
                             ksize,
                             strides,
                             padding,
@@ -3428,12 +3555,12 @@
 
 
 @tf_export(v1=["nn.max_pool_with_argmax"])
-def max_pool_with_argmax_v1(input,  # pylint: disable=missing-docstring,invalid-name
+def max_pool_with_argmax_v1(input,  # pylint: disable=missing-docstring,redefined-builtin,invalid-name
                             ksize,
                             strides,
                             padding,
                             data_format="NHWC",
-                            Targmax=None,  # pylint: disable=invalid-name
+                            Targmax=None,
                             name=None,
                             output_dtype=None):
   if data_format != "NHWC":
@@ -3447,7 +3574,6 @@
       input, ksize, strides, padding, Targmax, name)
 
 max_pool_with_argmax_v1.__doc__ = gen_nn_ops.max_pool_with_argmax.__doc__
-# pylint: enable=redefined-builtin
 
 
 @ops.RegisterStatistics("Conv2D", "flops")
@@ -3517,7 +3643,7 @@
     return bias_add(mm, biases, name=name)
 
 
-def xw_plus_b_v1(x, weights, biases, name=None):  # pylint: disable=invalid-name
+def xw_plus_b_v1(x, weights, biases, name=None):
   """Computes matmul(x, weights) + biases.
 
   This is a deprecated version of that will soon be removed.
@@ -3571,7 +3697,7 @@
                              "Rate should be set to `rate = 1 - keep_prob`.",
                              "keep_prob")
 def dropout(x, keep_prob=None, noise_shape=None, seed=None, name=None,
-            rate=None):  # pylint: disable=invalid-name
+            rate=None):
   """Computes dropout.
 
   For each element of `x`, with probability `rate`, outputs `0`, and otherwise
@@ -3621,7 +3747,7 @@
 
 
 @tf_export("nn.dropout", v1=[])
-def dropout_v2(x, rate, noise_shape=None, seed=None, name=None):  # pylint: disable=invalid-name
+def dropout_v2(x, rate, noise_shape=None, seed=None, name=None):
   """Computes dropout.
 
   With probability `rate`, drops elements of `x`. Input that are kept are
@@ -4057,10 +4183,10 @@
     warn_once=True,
     data_format="NHWC")
 def conv1d(
-    value,
-    filters,
-    stride,
-    padding,
+    value=None,
+    filters=None,
+    stride=None,
+    padding=None,
     use_cudnn_on_gpu=None,
     data_format=None,
     name=None,
@@ -4208,35 +4334,41 @@
       dilations=dilations)
 
 
+@tf_export("nn.conv1d_transpose")
 def conv1d_transpose(
-    value,
-    filter,  # pylint: disable=redefined-builtin
+    input,  # pylint: disable=redefined-builtin
+    filters,
     output_shape,
-    stride,
+    strides,
     padding="SAME",
     data_format="NWC",
+    dilations=None,
     name=None):
   """The transpose of `conv1d`.
 
   This operation is sometimes called "deconvolution" after [Deconvolutional
-  Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is
-  actually the transpose (gradient) of `conv1d` rather than an actual
+  Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf),
+  but is really the transpose (gradient) of `conv1d` rather than an actual
   deconvolution.
 
   Args:
-    value: A 3-D `Tensor` of type `float` and shape
+    input: A 3-D `Tensor` of type `float` and shape
       `[batch, in_width, in_channels]` for `NWC` data format or
       `[batch, in_channels, in_width]` for `NCW` data format.
-    filter: A 3-D `Tensor` with the same type as `value` and shape
+    filters: A 3-D `Tensor` with the same type as `value` and shape
       `[filter_width, output_channels, in_channels]`.  `filter`'s
       `in_channels` dimension must match that of `value`.
     output_shape: A 1-D `Tensor`, containing three elements, representing the
       output shape of the deconvolution op.
-    stride: An `integer`.  The number of entries by which
-      the filter is moved right at each step.
+    strides: An int or list of `ints` that has length `1` or `3`.  The number of
+      entries by which the filter is moved right at each step.
     padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
       See the "returns" section of `tf.nn.convolution` for details.
     data_format: A string. `'NWC'` and `'NCW'` are supported.
+    dilations: An int or list of `ints` that has length `1` or `3` which
+      defaults to 1. The dilation factor for each dimension of input. If set to
+      k > 1, there will be k-1 skipped cells between each filter element on that
+      dimension. Dilations in the batch and depth dimensions must be 1.
     name: Optional name for the returned tensor.
 
   Returns:
@@ -4248,64 +4380,39 @@
       `'VALID'` or `'SAME'`, or if `data_format` is invalid.
   """
   with ops.name_scope(name, "conv1d_transpose",
-                      [value, filter, output_shape]) as name:
-    output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape")
-    if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)):
-      raise ValueError("output_shape must have shape (3,), got {}".format(
-          output_shape_.get_shape()))
-
+                      [input, filters, output_shape]) as name:
     # The format could be either NWC or NCW, map to NHWC or NCHW
     if data_format is None or data_format == "NWC":
-      data_format_2d = "NHWC"
-      axis = 2
+      data_format = "NHWC"
+      spatial_start_dim = 1
+      channel_index = 2
     elif data_format == "NCW":
-      data_format_2d = "NCHW"
-      axis = 1
+      data_format = "NCHW"
+      spatial_start_dim = 2
+      channel_index = 1
     else:
       raise ValueError("data_format must be \"NWC\" or \"NCW\".")
 
-    if not value.get_shape().dims[axis].is_compatible_with(
-        filter.get_shape()[2]):
-      raise ValueError("input channels does not match filter's input channels, "
-                       "{} != {}".format(value.get_shape()[axis],
-                                         filter.get_shape()[2]))
-
-    if isinstance(output_shape, (list, np.ndarray)):
-      # output_shape's shape should be == [3] if reached this point.
-      if not filter.get_shape().dims[1].is_compatible_with(
-          output_shape[axis]):
-        raise ValueError(
-            "output_shape does not match filter's output channels, "
-            "{} != {}".format(output_shape[axis],
-                              filter.get_shape()[1]))
-
-    if padding != "VALID" and padding != "SAME":
-      raise ValueError("padding must be either VALID or SAME:"
-                       " {}".format(padding))
-
     # Reshape the input tensor to [batch, 1, in_width, in_channels]
-    if data_format_2d == "NHWC":
-      output_shape_ = array_ops.concat(
-          [output_shape_[:1], [1], output_shape_[1:]], axis=0)
-      spatial_start_dim = 1
-      strides = [1, 1, stride, 1]
-    else:
-      output_shape_ = array_ops.concat(
-          [output_shape_[:2], [1], output_shape_[2:]], axis=0)
-      spatial_start_dim = 2
-      strides = [1, 1, 1, stride]
-    value = array_ops.expand_dims(value, spatial_start_dim)
-    filter = array_ops.expand_dims(filter, 0)  # pylint: disable=redefined-builtin
+    strides = [1] + _get_sequence(strides, 1, channel_index, "stride")
+    dilations = [1] + _get_sequence(dilations, 1, channel_index, "dilations")
+
+    input = array_ops.expand_dims(input, spatial_start_dim)
+    filters = array_ops.expand_dims(filters, 0)
+    output_shape = list(output_shape)
+    output_shape = output_shape[: spatial_start_dim] + [1] + \
+                   output_shape[spatial_start_dim:]
 
     result = gen_nn_ops.conv2d_backprop_input(
-        input_sizes=output_shape_,
-        filter=filter,
-        out_backprop=value,
+        input_sizes=output_shape,
+        filter=filters,
+        out_backprop=input,
         strides=strides,
         padding=padding,
-        data_format=data_format_2d,
+        data_format=data_format,
+        dilations=dilations,
         name=name)
-    return array_ops.squeeze(result, [spatial_start_dim])
+    return array_ops.squeeze(result, spatial_start_dim)
 
 
 @ops.RegisterStatistics("Dilation2D", "flops")

diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index 3279dca..8426d39 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py

@@ -24,9 +24,11 @@
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
@@ -41,7 +43,6 @@
 from tensorflow.python.platform import test as test_lib
 
 
-@test_util.disable_all_xla("This test never passed for XLA")
 class ZeroFractionTest(test_lib.TestCase):
 
   def _ZeroFraction(self, x):
@@ -1018,11 +1019,10 @@
 class SwishTest(test_lib.TestCase):
 
   @test_util.run_deprecated_v1
-  @test_util.disable_xla("This test never passed for XLA")
   def testValues(self):
     np_values = np.array(
-        [np.linspace(-10.0, 0.0, 100),
-         np.linspace(0.0, 10.0, 100)],
+        [np.linspace(-7.0, 0.0, 100),
+         np.linspace(0.0, 7.0, 100)],
         dtype=np.float32)
     tf_values = constant_op.constant(np_values)
     actual_tf_outputs = nn_impl.swish(tf_values)
@@ -1242,6 +1242,70 @@
 
 
 @test_util.run_all_in_graph_and_eager_modes
+class AvgPoolTest(test_lib.TestCase):
+
+  def test1DTensor(self):
+    x = array_ops.ones([3, 6, 5])
+    ksize = 2
+    strides = 2
+
+    y1 = nn_ops.avg_pool_v2(x, ksize, strides, "SAME")
+    y2 = nn_ops.avg_pool1d(x, ksize, strides, "SAME")
+
+    self.assertAllEqual(self.evaluate(y1), self.evaluate(y2))
+
+  def test1DNumpy(self):
+    x = np.ones([3, 6, 5])
+    ksize = 2
+    strides = 2
+
+    y1 = nn_ops.avg_pool_v2(x, ksize, strides, "SAME")
+    y2 = nn_ops.avg_pool1d(x, ksize, strides, "SAME")
+
+    self.assertAllEqual(self.evaluate(y1), self.evaluate(y2))
+
+  def test2DTensor(self):
+    x = array_ops.ones([3, 6, 6, 5])
+    ksize = 2
+    strides = 2
+
+    y1 = nn_ops.avg_pool_v2(x, ksize, strides, "SAME")
+    y2 = nn_ops.avg_pool(x, ksize, strides, "SAME")
+
+    self.assertAllEqual(self.evaluate(y1), self.evaluate(y2))
+
+  def test2DNumpy(self):
+    x = np.ones([3, 6, 6, 5])
+    ksize = 2
+    strides = 2
+
+    y1 = nn_ops.avg_pool_v2(x, ksize, strides, "SAME")
+    y2 = nn_ops.avg_pool(x, ksize, strides, "SAME")
+
+    self.assertAllEqual(self.evaluate(y1), self.evaluate(y2))
+
+  def test3DTensor(self):
+    x = array_ops.ones([3, 7, 6, 6, 5])
+    ksize = 2
+    strides = 2
+
+    y1 = nn_ops.avg_pool_v2(x, ksize, strides, "SAME")
+    y2 = nn_ops.avg_pool3d(x, ksize, strides, "SAME")
+
+    self.assertAllEqual(self.evaluate(y1), self.evaluate(y2))
+
+  def test3DNumpy(self):
+    x = np.ones([3, 7, 6, 6, 5], dtype=np.float32)
+    ksize = 2
+    strides = 2
+
+    y1 = nn_ops.avg_pool_v2(x, ksize, strides, "SAME")
+    y2 = nn_ops.avg_pool3d(x, ksize, strides, "SAME")
+
+    self.assertAllEqual(self.evaluate(y1), self.evaluate(y2))
+
+
+@test_util.run_all_in_graph_and_eager_modes
 class MaxPoolTest(test_lib.TestCase):
 
   def test1DTensor(self):
@@ -1317,5 +1381,19 @@
       nn_ops.max_pool_v2(x, 2, 2, "SAME")
 
 
+@test_util.run_all_in_graph_and_eager_modes
+class ConvolutionTest(test_lib.TestCase):
+
+  def testUnknownSize(self):
+    x = tensor_spec.TensorSpec(None, dtypes.float32, name="x")
+    k = np.ones([3, 6, 6, 5])
+
+    @def_function.function
+    def F(value):
+      return nn_ops.convolution(value, k, "SAME")
+
+    F.get_concrete_function(x)
+
+
 if __name__ == "__main__":
   test_lib.main()

diff --git a/tensorflow/python/ops/parallel_for/BUILD b/tensorflow/python/ops/parallel_for/BUILD
index 05d2e4c..0a2f3e2 100644
--- a/tensorflow/python/ops/parallel_for/BUILD
+++ b/tensorflow/python/ops/parallel_for/BUILD

@@ -115,6 +115,7 @@
         "//tensorflow/python:random_ops",
         "//tensorflow/python:util",
     ],
+    tags = ["no_rocm"],
 )
 
 cuda_py_test(
@@ -129,6 +130,7 @@
         "//tensorflow/python:util",
         "//tensorflow/python/eager:backprop",
     ],
+    xla_enable_strict_auto_jit = True,
 )
 
 cuda_py_test(
@@ -143,6 +145,7 @@
         "//tensorflow/python:util",
     ],
     tags = ["optonly"],  # Too slow in non-opt mode
+    xla_enable_strict_auto_jit = True,
 )
 
 py_library(

diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py
index 7a5bef7..8a081e1 100644
--- a/tensorflow/python/ops/parallel_for/math_test.py
+++ b/tensorflow/python/ops/parallel_for/math_test.py

@@ -161,7 +161,6 @@
         math_ops.divide,
         math_ops.div_no_nan,
         math_ops.equal,
-        math_ops.floor_div,
         math_ops.floor_mod,
         math_ops.greater,
         math_ops.greater_equal,
@@ -182,6 +181,10 @@
         safe_polygamma,
         safe_zeta,
     ]
+    # FloorDiv fails on XLA due floor's discontinuities exacerbating small
+    # division differences.
+    if not test_util.is_xla_enabled():
+      float_ops += [math_ops.floor_div]
     for op in logical_ops + float_ops:
       x = random_ops.random_uniform([7, 3, 5])
       y = random_ops.random_uniform([3, 5])

diff --git a/tensorflow/python/ops/parallel_for/pfor.py b/tensorflow/python/ops/parallel_for/pfor.py
index 019d4f2..b9f7a0f 100644
--- a/tensorflow/python/ops/parallel_for/pfor.py
+++ b/tensorflow/python/ops/parallel_for/pfor.py

@@ -1903,7 +1903,7 @@
 def _convert_biasadd(pfor_input):
   t, t_stacked, _ = pfor_input.input(0)
   bias, bias_stacked, _ = pfor_input.input(1)
-  data_format = pfor_input.get_attr("data_format")
+  data_format = pfor_input.get_attr("data_format").decode()
   if bias_stacked:
     # BiasAdd only supports 1-D biases, so cast bias to match value and use Add.
     pfor_input.expanddim_inputs_for_broadcast()
@@ -1921,7 +1921,7 @@
       shape = array_ops.shape(t)
       flattened_shape = array_ops.concat([[-1], shape[2:]], axis=0)
       t = array_ops.reshape(t, flattened_shape)
-      t = nn_ops.bias_add(t, bias, data_format=b"NCHW")
+      t = nn_ops.bias_add(t, bias, data_format="NCHW")
       t = array_ops.reshape(t, shape)
       return wrap(t, True)
     return wrap(nn_ops.bias_add(t, bias, data_format=data_format), True)

diff --git a/tensorflow/python/ops/ragged/ragged_dispatch.py b/tensorflow/python/ops/ragged/ragged_dispatch.py
index 0c9e6ef..3bda777 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch.py

@@ -470,7 +470,8 @@
       _BINARY_ELEMENTWISE_OPS + [x[0] for x in _RAGGED_DISPATCH_OPS])
   for op in op_list:
     _, undecorated_op = tf_decorator.unwrap(op)
-    if not hasattr(undecorated_op, tf_export.API_ATTRS['tensorflow'].names):
+    if not hasattr(undecorated_op,
+                   tf_export.API_ATTRS[tf_export.TENSORFLOW_API_NAME].names):
       raise AssertionError('Expected %s to be an exported symbol '
                            '(while adding a RaggedTensor dispatcher)')
 

diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index c1f11e8..afc9e97 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py

@@ -43,7 +43,7 @@
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_resource_variable_ops import *
 # pylint: enable=wildcard-import
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import compat
 from tensorflow.python.util.deprecation import deprecated
 
@@ -505,8 +505,8 @@
     if constraint is not None and not callable(constraint):
       raise ValueError("The `constraint` argument must be a callable.")
 
-    if isinstance(initial_value, checkpointable.CheckpointInitialValue):
-      self._maybe_initialize_checkpointable()
+    if isinstance(initial_value, trackable.CheckpointInitialValue):
+      self._maybe_initialize_trackable()
       self._update_uid = initial_value.checkpoint_position.restore_uid
       initial_value = initial_value.wrapped_value
 
@@ -1684,7 +1684,7 @@
       constraint=var._constraint,
       dtype=var.dtype,
       name=var._shared_name)
-  new_variable._maybe_initialize_checkpointable()
+  new_variable._maybe_initialize_trackable()
   # pylint: enable=protected-access
   return new_variable
 

diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index 603baea..cb9377d 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py

@@ -50,7 +50,7 @@
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.tf_export import tf_export
@@ -1095,8 +1095,8 @@
   def __init__(self, cell):
     super(_RNNCellWrapperV1, self).__init__()
     self._cell = cell
-    if isinstance(cell, checkpointable.Checkpointable):
-      self._track_checkpointable(self._cell, name="cell")
+    if isinstance(cell, trackable.Trackable):
+      self._track_trackable(self._cell, name="cell")
 
   def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs):
     """Calls the wrapped cell and performs the wrapping logic.
@@ -1611,8 +1611,8 @@
     """
     super(DeviceWrapper, self).__init__()
     self._cell = cell
-    if isinstance(cell, checkpointable.Checkpointable):
-      self._track_checkpointable(self._cell, name="cell")
+    if isinstance(cell, trackable.Trackable):
+      self._track_trackable(self._cell, name="cell")
     self._device = device
 
   @property
@@ -1678,11 +1678,11 @@
 
     self._cells = cells
     for cell_number, cell in enumerate(self._cells):
-      # Add Checkpointable dependencies on these cells so their variables get
+      # Add Trackable dependencies on these cells so their variables get
       # saved with this object when using object-based saving.
-      if isinstance(cell, checkpointable.Checkpointable):
-        # TODO(allenl): Track down non-Checkpointable callers.
-        self._track_checkpointable(cell, name="cell-%d" % (cell_number,))
+      if isinstance(cell, trackable.Trackable):
+        # TODO(allenl): Track down non-Trackable callers.
+        self._track_trackable(cell, name="cell-%d" % (cell_number,))
     self._state_is_tuple = state_is_tuple
     if not state_is_tuple:
       if any(nest.is_sequence(c.state_size) for c in self._cells):

diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py
index a5b31af..63b5eab 100644
--- a/tensorflow/python/ops/script_ops.py
+++ b/tensorflow/python/ops/script_ops.py

@@ -391,12 +391,16 @@
 
 @deprecation.deprecated(
     date=None,
-    instructions="""tf.py_func is deprecated in TF V2. Instead, use
-    tf.py_function, which takes a python function which manipulates tf eager
+    instructions="""tf.py_func is deprecated in TF V2. Instead, there are two
+    options available in V2.
+    - tf.py_function takes a python function which manipulates tf eager
     tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
     an ndarray (just call tensor.numpy()) but having access to eager tensors
     means `tf.py_function`s can use accelerators such as GPUs as well as
     being differentiable using a gradient tape.
+    - tf.numpy_function maintains the semantics of the deprecated tf.py_func
+    (it is not differentiable, and manipulates numpy arrays). It drops the
+    stateful argument making all functions stateful.
     """)
 @tf_export(v1=["py_func"])
 def py_func(func, inp, Tout, stateful=True, name=None):
@@ -467,6 +471,13 @@
   return _internal_py_func(
       func=func, inp=inp, Tout=Tout, stateful=stateful, eager=False, name=name)
 
+@tf_export("numpy_function", v1=[])
+def numpy_function(func, inp, Tout, name=None):
+  return py_func(func, inp, Tout, stateful=True, name=name)
+
+numpy_function.__doc__ = py_func.__doc__.replace(
+    "py_func", "numpy_function")
+
 
 ops.NotDifferentiable("PyFunc")
 ops.NotDifferentiable("PyFuncStateless")

diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py
index 0224e7e..41ba060 100644
--- a/tensorflow/python/ops/special_math_ops_test.py
+++ b/tensorflow/python/ops/special_math_ops_test.py

@@ -119,7 +119,6 @@
           special_math_ops.lbeta(x).get_shape())
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.disable_xla('This test never passed for XLA')
   def test_length_1_last_dimension_results_in_one(self):
     # If there is only one coefficient, the formula still works, and we get one
     # as the answer, always.
@@ -127,7 +126,9 @@
     x_b = [0.1]
     with self.session(use_gpu=True):
       self.assertAllClose(
-          1, self.evaluate(math_ops.exp(special_math_ops.lbeta(x_a))))
+          1,
+          self.evaluate(math_ops.exp(special_math_ops.lbeta(x_a))),
+          rtol=3e-6)
       self.assertAllClose(
           1, self.evaluate(math_ops.exp(special_math_ops.lbeta(x_b))))
       self.assertEqual((), special_math_ops.lbeta(x_a).get_shape())

diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py
index ba3bd09..5e217d8 100644
--- a/tensorflow/python/ops/standard_ops.py
+++ b/tensorflow/python/ops/standard_ops.py

@@ -54,6 +54,7 @@
 # pylint: enable=redefined-builtin
 from tensorflow.python.eager import wrap_function
 from tensorflow.python.ops.control_flow_ops import while_loop
+from tensorflow.python.ops.critical_section_ops import *
 from tensorflow.python.ops.data_flow_ops import *
 from tensorflow.python.ops.functional_ops import *
 from tensorflow.python.ops.gradients import *

diff --git a/tensorflow/python/ops/stateful_random_ops.py b/tensorflow/python/ops/stateful_random_ops.py
index 155ad96..91ca252 100644
--- a/tensorflow/python/ops/stateful_random_ops.py
+++ b/tensorflow/python/ops/stateful_random_ops.py

@@ -27,7 +27,7 @@
 from tensorflow.python.ops import gen_stateful_random_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
-from tensorflow.python.training.checkpointable import \
+from tensorflow.python.training.tracking import \
 tracking
 from tensorflow.python.util.tf_export import tf_export
 
@@ -51,6 +51,7 @@
 
 
 STATE_TYPE = SEED_TYPE
+ALGORITHM_TYPE = STATE_TYPE
 RNG_ALG_PHILOX = 1
 DEFAULT_ALGORITHM = RNG_ALG_PHILOX
 
@@ -119,26 +120,18 @@
     raise ValueError("Unsupported algorithm id: %s" % algorithm)
 
 
-def create_rng_state(seed, algorithm=None):
+@tf_export("random.create_rng_state")
+def create_rng_state(seed, algorithm):
   """Creates a RNG state.
 
   Args:
     seed: an integer or 1-D tensor.
-    algorithm: (optional) an integer representing the RNG algorithm. If None, an
-      algorithm will be auto-selected.
+    algorithm: an integer representing the RNG algorithm.
 
   Returns:
-    a 1-D tensor "rng_state" with:
-    * rng_state[0] is a value that identifies the RNG algorithm;
-    * rng_state[1:] holds the RNG state itself (size dependent on the
-        algorithm).
+    a 1-D tensor whose size depends on the algorithm.
   """
-  if algorithm is None:
-    # TODO(wangpeng): more sophisticated algorithm selection
-    algorithm = DEFAULT_ALGORITHM
-  state = _make_state_from_seed(seed, algorithm)
-  return np.concatenate((np.array([algorithm], dtype=STATE_TYPE), state),
-                        axis=None)
+  return _make_state_from_seed(seed, algorithm)
 
 
 def _shape_tensor(shape):
@@ -151,24 +144,48 @@
 
 
 @tf_export("random.experimental.Generator")
-class Generator(tracking.AutoCheckpointable):
+class Generator(tracking.AutoTrackable):
   """Random-number generator.
 
   It uses Variable to manage its internal state.
   """
 
   def __init__(self, copy_from=None, seed=None, algorithm=None):
+    """Creates a generator.
+
+    Args:
+      copy_from: (optional) a generator to be copied from.
+      seed: (optional) the seed for the RNG. If None, it will be chosen
+            nondeterministically
+      algorithm: (optional) the RNG algorithm. If None, it will be
+                 auto-selected.
+    """
     if copy_from is None:
       if seed is None:
         seed = non_deterministic_seed()
+      if algorithm is None:
+        # TODO(wangpeng): more sophisticated algorithm selection
+        algorithm = DEFAULT_ALGORITHM
       state = create_rng_state(seed, algorithm)
       self._state_var = variables.Variable(state, dtype=STATE_TYPE)
+      self._alg_var = variables.Variable(initial_value=algorithm,
+                                         dtype=ALGORITHM_TYPE)
     else:
       assert seed is None
-      state = copy_from.state
-      self._state_var = variables.Variable(state, dtype=STATE_TYPE)
+      self._state_var = variables.Variable(copy_from.state, dtype=STATE_TYPE)
+      self._alg_var = variables.Variable(initial_value=copy_from.algorithm,
+                                         dtype=ALGORITHM_TYPE)
 
   def reset(self, seed):
+    """Resets the generator.
+
+    This function is not thread-safe: if it is run concurrently with a call to
+    sampling, the latter might see the new algorithm but the old state or vice
+    versa.
+
+    Args:
+      seed: the seed to reset the RNG to.
+    """
     algorithm = int(self.algorithm)
     state = create_rng_state(seed, algorithm)
     self._state_var.assign(state)
@@ -179,13 +196,14 @@
 
   @property
   def algorithm(self):
-    return self._state_var[0]
+    return self._alg_var
 
   # The following functions return a tensor and as a side effect update
   # self._state_var.
   def standard_normal(self, shape, dtype=dtypes.float32):
-    return gen_stateful_random_ops.stateful_standard_normal(
-        self.state.handle, shape, dtype)
+    output = gen_stateful_random_ops.stateful_standard_normal_v2(
+        self.state.handle, self.algorithm, shape, dtype)
+    return output
 
   def normal(self, shape, mean=0.0, stddev=1.0, dtype=dtypes.float32,
              name=None):
@@ -237,5 +255,6 @@
 def reset_global_generator(seed, algorithm=None):
   global global_generator
   if algorithm is None:
-    algorithm = int(global_generator.algorithm)  # preserve the old algorithm
+    # preserve the old algorithm
+    algorithm = int(get_global_generator().algorithm)
   global_generator = Generator(seed=seed, algorithm=algorithm)

diff --git a/tensorflow/python/ops/stateful_random_ops_test.py b/tensorflow/python/ops/stateful_random_ops_test.py
index 92419a0..0436736 100644
--- a/tensorflow/python/ops/stateful_random_ops_test.py
+++ b/tensorflow/python/ops/stateful_random_ops_test.py

@@ -18,15 +18,19 @@
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import gen_random_ops
+from tensorflow.python.ops import gen_stateful_random_ops
 from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import stateful_random_ops as \
 random
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
@@ -39,8 +43,8 @@
                                     random.RNG_ALG_PHILOX)
     self.assertAllEqual(
         list(map(random._uint_to_int,
-                 [random.RNG_ALG_PHILOX, 0xFFAA666677778888,
-                  0xFFFF222233334444] + [0] * (random.PHILOX_STATE_SIZE - 2))),
+                 [0xFFAA666677778888, 0xFFFF222233334444] +
+                 [0] * (random.PHILOX_STATE_SIZE - 2))),
         state)
 
   @test_util.run_v2_only
@@ -112,28 +116,82 @@
     compare(True, False)
 
   @test_util.run_v2_only
-  def testSameAsOldRandomOps(self):
-    """Tests that the generated numbers are the same as the old random_ops.py .
+  def testCPUSameAsOldRandomOps(self):
+    """Tests that the generated numbers are the same as the old random_ops.py.
+
+    The CPU version.
     """
-    seed1, seed2 = 50, 60
+    seed1, seed2 = 79, 25
     # note how the two seeds for the old op correspond to the seed for the new
     # op
-    random.get_global_generator().reset([0, seed2, seed1])
-    shape = constant_op.constant([2, 3])
-    dtype = dtypes.float32
+    with ops.device("/device:CPU:0"):
+      random.reset_global_generator([0, seed2, seed1])
+    shape = constant_op.constant([4, 7])
+    dtype = dtypes.float64
+
     # create a graph for the old op in order to call it many times
     @def_function.function
     def old():
-      return gen_random_ops.random_standard_normal(
-          shape, dtype=dtype, seed=seed1, seed2=seed2)
+      with ops.device("/device:CPU:0"):
+        return gen_random_ops.random_standard_normal(
+            shape, dtype=dtype, seed=seed1, seed2=seed2)
 
     def new():
-      return random.get_global_generator().standard_normal(shape, dtype=dtype)
+      with ops.device("/device:CPU:0"):
+        return random.get_global_generator().standard_normal(shape, dtype=dtype)
 
     for _ in range(100):
       self.assertAllEqual(old(), new())
 
   @test_util.run_v2_only
+  @test_util.run_cuda_only
+  def testGPUSameAsOldRandomOps(self):
+    """Tests that the generated numbers are the same as the old random_ops.py.
+
+    The GPU version.
+    """
+    seed1, seed2 = 79, 25
+    with ops.device(test_util.gpu_device_name()):
+      random.reset_global_generator([0, seed2, seed1])
+    shape = constant_op.constant([4, 7])
+    dtype = dtypes.float64
+
+    @def_function.function
+    def old():
+      with ops.device(test_util.gpu_device_name()):
+        return gen_random_ops.random_standard_normal(
+            shape, dtype=dtype, seed=seed1, seed2=seed2)
+
+    def new():
+      with ops.device(test_util.gpu_device_name()):
+        return random.get_global_generator().standard_normal(shape, dtype=dtype)
+
+    for _ in range(100):
+      self.assertAllEqual(old(), new())
+
+  @test_util.run_v2_only
+  def testStatefulStandardNormal(self):
+    """Tests that op 'StatefulStandardNormal' still works.
+    """
+    shape = constant_op.constant([4, 7])
+    dtype = dtypes.float64
+    seed = 1234
+    algorithm = random.RNG_ALG_PHILOX
+    state = random._make_state_from_seed(seed, algorithm)
+    with ops.device("/device:CPU:0"):
+      var1 = variables.Variable(
+          np.concatenate((np.array([algorithm], dtype=random.STATE_TYPE),
+                          state), axis=None),
+          dtype=random.STATE_TYPE)
+      var2 = variables.Variable(state, dtype=random.STATE_TYPE)
+      for _ in range(100):
+        t1 = gen_stateful_random_ops.stateful_standard_normal(
+            var1.handle, shape, dtype)
+        t2 = gen_stateful_random_ops.stateful_standard_normal_v2(
+            var2.handle, algorithm, shape, dtype)
+        self.assertAllEqual(t1, t2)
+
+  @test_util.run_v2_only
   def testResetGlobalGeneratorBadWithDefun(self):
     """Demonstrates that reset_global_generator don't work properly with defun.
     """
@@ -145,7 +203,7 @@
 
     random.reset_global_generator(50)
     with self.assertRaisesWithPredicateMatch(
-        errors_impl.NotFoundError, "Resource .+ does not exist"):
+        AssertionError, "variable.*deleted"):
       a = f()
       random.reset_global_generator(50)
       b = f()

diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py
index 168cb97..eb2ee7c 100644
--- a/tensorflow/python/ops/summary_ops_v2.py
+++ b/tensorflow/python/ops/summary_ops_v2.py

@@ -45,11 +45,6 @@
 from tensorflow.python.util.tf_export import tf_export
 
 
-# Dictionary mapping graph keys to a boolean Tensor (or callable returning
-# a boolean Tensor) indicating whether we should record summaries for the
-# graph identified by the key of the dictionary.
-_SHOULD_RECORD_SUMMARIES = {}
-
 # A global dictionary mapping graph keys to a list of summary writer init ops.
 _SUMMARY_WRITER_INIT_OP = {}
 
@@ -61,10 +56,8 @@
 def _should_record_summaries_internal():
   """Returns boolean Tensor if summaries should/shouldn't be recorded, or None.
   """
-  global _SHOULD_RECORD_SUMMARIES
-  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
-  should = _SHOULD_RECORD_SUMMARIES.get(key)
-  return should() if callable(should) else should
+  condition = context.context().recording_summaries
+  return condition() if callable(condition) else condition
 
 
 def _should_record_summaries_v2():
@@ -83,32 +76,28 @@
   return False if result is None else result
 
 
+@tf_export("summary.record_if", v1=[])
 @tf_contextlib.contextmanager
-def _record_summaries(boolean=True):
+def record_if(condition):
   """Sets summary recording on or off per the provided boolean value.
 
   The provided value can be a python boolean, a scalar boolean Tensor, or
   or a callable providing such a value; if a callable is passed it will be
-  invoked each time should_record_summaries() is called to determine whether
-  summary writing should be enabled.
+  invoked on-demand to determine whether summary writing will occur.
 
   Args:
-    boolean: can be True, False, a bool Tensor, or a callable providing such.
-      Defaults to True.
+    condition: can be True, False, a bool Tensor, or a callable providing such.
 
   Yields:
     Returns a context manager that sets this value on enter and restores the
     previous value on exit.
   """
-  # TODO(nickfelt): make this threadlocal
-  global _SHOULD_RECORD_SUMMARIES
-  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
-  old = _SHOULD_RECORD_SUMMARIES.setdefault(key, None)
+  old = context.context().recording_summaries
   try:
-    _SHOULD_RECORD_SUMMARIES[key] = boolean
+    context.context().recording_summaries = condition
     yield
   finally:
-    _SHOULD_RECORD_SUMMARIES[key] = old
+    context.context().recording_summaries = old
 
 
 # TODO(apassos) consider how to handle local step here.
@@ -120,17 +109,17 @@
     should = lambda: math_ops.equal(global_step % n, 0)
     if not context.executing_eagerly():
       should = should()
-  return _record_summaries(should)
+  return record_if(should)
 
 
 def always_record_summaries():
   """Sets the should_record_summaries Tensor to always true."""
-  return _record_summaries(True)
+  return record_if(True)
 
 
 def never_record_summaries():
   """Sets the should_record_summaries Tensor to always false."""
-  return _record_summaries(False)
+  return record_if(False)
 
 
 @tf_export("summary.SummaryWriter", v1=[])
@@ -195,16 +184,19 @@
       return self._close()
 
 
+@tf_export(v1=["summary.initialize"])
 def initialize(
     graph=None,  # pylint: disable=redefined-outer-name
     session=None):
   """Initializes summary writing for graph execution mode.
 
+  This operation is a no-op when executing eagerly.
+
   This helper method provides a higher-level alternative to using
   `tf.contrib.summary.summary_writer_initializer_op` and
   `tf.contrib.summary.graph`.
 
-  Most users will also want to call `tf.train.create_global_step`
+  Most users will also want to call `tf.compat.v1.train.create_global_step`
   which can happen before or after this function is called.
 
   Args:

diff --git a/tensorflow/python/ops/template.py b/tensorflow/python/ops/template.py
index e02175d..ff4f23a 100644
--- a/tensorflow/python/ops/template.py
+++ b/tensorflow/python/ops/template.py

@@ -26,8 +26,8 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.checkpointable import base as checkpointable
-from tensorflow.python.training.checkpointable import util as checkpointable_util
+from tensorflow.python.training.tracking import base as trackable
+from tensorflow.python.training.tracking import util as trackable_util
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util.deprecation import deprecated
@@ -232,7 +232,7 @@
   return stacktrace[-1:]
 
 
-class Template(checkpointable.Checkpointable):
+class Template(trackable.Trackable):
   """Wrap a function to aid in variable sharing.
 
   Templates are functions that create variables the first time they are called
@@ -306,8 +306,8 @@
         result = self._func(*args, **kwargs)
       else:
         # The first time we run, restore variables if necessary (via
-        # Checkpointable).
-        with checkpointable_util.capture_dependencies(template=self):
+        # Trackable).
+        with trackable_util.capture_dependencies(template=self):
           result = self._func(*args, **kwargs)
 
       if self._variables_created:
@@ -577,8 +577,8 @@
         result = self._func(*args, **kwargs)
       else:
         # The first time we run, restore variables if necessary (via
-        # Checkpointable).
-        with checkpointable_util.capture_dependencies(template=self):
+        # Trackable).
+        with trackable_util.capture_dependencies(template=self):
           result = self._func(*args, **kwargs)
 
       if self._variables_created:

diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py
index 1a11c33..65dee77 100644
--- a/tensorflow/python/ops/tensor_array_ops.py
+++ b/tensorflow/python/ops/tensor_array_ops.py

@@ -575,18 +575,29 @@
   def stack(self, name=None):
     """See TensorArray."""
     with ops.name_scope(name, "TensorArrayV2Stack", [self._flow]):
+      if self._element_shape:
+        element_shape = self._element_shape[0]
+      else:
+        element_shape = tensor_shape.TensorShape(None)
       value = list_ops.tensor_list_stack(
-          input_handle=self._flow, element_dtype=self._dtype)
+          input_handle=self._flow,
+          element_dtype=self._dtype,
+          element_shape=element_shape)
       if self._element_shape and self._element_shape[0].dims is not None:
         value.set_shape([None] + self._element_shape[0].dims)
       return value
 
   def gather(self, indices, name=None):
     """See TensorArray."""
+    if self._element_shape:
+      element_shape = self._element_shape[0]
+    else:
+      element_shape = tensor_shape.TensorShape(None)
     value = list_ops.tensor_list_gather(
         input_handle=self._flow,
         indices=indices,
         element_dtype=self._dtype,
+        element_shape=element_shape,
         name=name)
     if self._element_shape and self._element_shape[0].dims is not None:
       value.set_shape([None] + self._element_shape[0].dims)
@@ -627,7 +638,7 @@
         self._merge_element_shape(value.shape[1:])
       element_shape = self._element_shape[0] if self._element_shape else None
       flow_out = list_ops.tensor_list_scatter(
-          tensor=value, indices=indices, element_shape=element_shape)
+          tensor=value, indices=indices, input_handle=self._flow)
       return build_ta_with_new_flow(self, flow_out)
 
   @tf_should_use.should_use_result
@@ -860,7 +871,9 @@
   def gather(self, indices, name=None):
     """See TensorArray."""
     del name  # not meaningful when executing eagerly.
-    return array_ops.stack([self._maybe_zero(i) for i in indices.numpy()])
+    if isinstance(indices, ops.EagerTensor):
+      indices = indices.numpy()
+    return array_ops.stack([self._maybe_zero(i) for i in indices])
 
   def concat(self, name=None):
     """See TensorArray."""
@@ -893,7 +906,9 @@
   def scatter(self, indices, value, name=None):
     """See TensorArray."""
     del name  # not meaningful when executing eagerly.
-    for index, val in zip(indices.numpy(), array_ops.unstack(value)):
+    if isinstance(indices, ops.EagerTensor):
+      indices = indices.numpy()
+    for index, val in zip(indices, array_ops.unstack(value)):
       self._write(index, val)  # pylint: disable=protected-access
     return self.parent()
 

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 7788f28..219ba7f 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py

@@ -35,7 +35,7 @@
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import compat
 from tensorflow.python.util import tf_should_use
 from tensorflow.python.util.deprecation import deprecated
@@ -204,7 +204,7 @@
 
 @tf_export("Variable", v1=[])
 class Variable(six.with_metaclass(VariableMetaclass,
-                                  checkpointable.Checkpointable)):
+                                  trackable.Trackable)):
   """See the [Variables Guide](https://tensorflow.org/guide/variables).
 
   A variable maintains state in the graph across calls to `run()`. You add a
@@ -1018,8 +1018,8 @@
     return self.shape
 
   def _gather_saveables_for_checkpoint(self):
-    """For implementing `Checkpointable`. This object is saveable on its own."""
-    return {checkpointable.VARIABLE_VALUE_KEY: self}
+    """For implementing `Trackable`. This object is saveable on its own."""
+    return {trackable.VARIABLE_VALUE_KEY: self}
 
   def to_proto(self, export_scope=None):
     """Converts a `Variable` to a `VariableDef` protocol buffer.
@@ -1506,8 +1506,8 @@
     # Store the graph key so optimizers know how to only retrieve variables from
     # this graph.
     self._graph_key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
-    if isinstance(initial_value, checkpointable.CheckpointInitialValue):
-      self._maybe_initialize_checkpointable()
+    if isinstance(initial_value, trackable.CheckpointInitialValue):
+      self._maybe_initialize_trackable()
       self._update_uid = initial_value.checkpoint_position.restore_uid
       initial_value = initial_value.wrapped_value
 

diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD
index 0654104..fcab57c 100644
--- a/tensorflow/python/profiler/BUILD
+++ b/tensorflow/python/profiler/BUILD

@@ -61,6 +61,7 @@
         "no_pip",
         "oss_serial",
     ],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 cuda_py_test(
@@ -76,6 +77,7 @@
         "//tensorflow/python:variables",
     ],
     tags = ["no_pip"],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 py_library(
@@ -130,6 +132,7 @@
         "//tensorflow/python:variables",
     ],
     tags = ["no_pip"],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )
 
 py_library(

diff --git a/tensorflow/python/profiler/internal/BUILD b/tensorflow/python/profiler/internal/BUILD
index 994206c..0a6ba12 100644
--- a/tensorflow/python/profiler/internal/BUILD
+++ b/tensorflow/python/profiler/internal/BUILD

@@ -69,4 +69,5 @@
     tags = [
         "no_pip",
     ],
+    xla_enable_strict_auto_jit = False,  # Node names are different with autojit
 )

diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
index dd74f12..1485ff7 100755
--- a/tensorflow/python/pywrap_tfe.i
+++ b/tensorflow/python/pywrap_tfe.i

@@ -25,6 +25,8 @@
 %rename("%s") TFE_ContextHasFunction;
 %rename("%s") TFE_ContextEnableRunMetadata;
 %rename("%s") TFE_ContextDisableRunMetadata;
+%rename("%s") TFE_ContextEnableGraphCollection;
+%rename("%s") TFE_ContextDisableGraphCollection;
 %rename("%s") TFE_ContextExportRunMetadata;
 %rename("%s") TFE_ContextClearCaches;
 %rename("%s") TFE_ContextGetDevicePlacementPolicy;
@@ -34,8 +36,12 @@
 %rename("%s") TFE_ContextAsyncWait;
 %rename("%s") TFE_ContextAsyncClearError;
 %rename("%s") TFE_NewProfiler;
+%rename("%s") TFE_ProfilerIsOk;
 %rename("%s") TFE_DeleteProfiler;
 %rename("%s") TFE_ProfilerSerializeToString;
+%rename("%s") TFE_NewProfilerContext;
+%rename("%s") TFE_ProfilerContextSetEagerContext;
+%rename("%s") TFE_DeleteProfilerContext;
 %rename("%s") TFE_StartProfilerServer;
 %rename("%s") TFE_OpNameGetAttrType;
 %rename("%s") TFE_Py_InitEagerTensor;

diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD
index b605aa7..8c6f734 100644
--- a/tensorflow/python/saved_model/BUILD
+++ b/tensorflow/python/saved_model/BUILD

@@ -262,6 +262,24 @@
 )
 
 py_library(
+    name = "signature_serialization",
+    srcs = [
+        "signature_serialization.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":revived_types",
+        ":signature_constants",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:tensor_spec",
+        "//tensorflow/python:util",
+        "//tensorflow/python/eager:def_function",
+        "//tensorflow/python/eager:function",
+        "//tensorflow/python/training/tracking:base",
+    ],
+)
+
+py_library(
     name = "save",
     srcs = [
         "save.py",
@@ -276,6 +294,7 @@
         ":saved_object_graph_py",
         ":signature_constants",
         ":signature_def_utils",
+        ":signature_serialization",
         ":tag_constants",
         ":utils",
         "//tensorflow/core:protos_all_py",
@@ -291,12 +310,12 @@
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:def_function",
         "//tensorflow/python/eager:function",
-        "//tensorflow/python/training/checkpointable:base",
-        "//tensorflow/python/training/checkpointable:graph_view",
-        "//tensorflow/python/training/checkpointable:object_identity",
-        "//tensorflow/python/training/checkpointable:tracking",
-        "//tensorflow/python/training/checkpointable:util",
         "//tensorflow/python/training/saving:functional_saver",
+        "//tensorflow/python/training/tracking",
+        "//tensorflow/python/training/tracking:base",
+        "//tensorflow/python/training/tracking:graph_view",
+        "//tensorflow/python/training/tracking:object_identity",
+        "//tensorflow/python/training/tracking:util",
     ],
 )
 
@@ -323,6 +342,7 @@
     deps = [
         ":constants",
         ":function_deserialization",
+        ":load_v1_in_v2",
         ":loader",
         ":nested_structure_coder",
         ":revived_types",
@@ -336,10 +356,26 @@
         "//tensorflow/python:tensor_util",
         "//tensorflow/python:util",
         "//tensorflow/python:variables",
-        "//tensorflow/python/training/checkpointable:base",
-        "//tensorflow/python/training/checkpointable:graph_view",
-        "//tensorflow/python/training/checkpointable:tracking",
-        "//tensorflow/python/training/checkpointable:util",
+        "//tensorflow/python/training/tracking",
+        "//tensorflow/python/training/tracking:base",
+        "//tensorflow/python/training/tracking:graph_view",
+        "//tensorflow/python/training/tracking:util",
+    ],
+)
+
+py_library(
+    name = "load_v1_in_v2",
+    srcs = [
+        "load_v1_in_v2.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":loader",
+        ":signature_serialization",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:saver",
+        "//tensorflow/python/eager:wrap_function",
+        "//tensorflow/python/training/tracking",
     ],
 )
 
@@ -356,7 +392,33 @@
         "//tensorflow/python:tensor_spec",
         "//tensorflow/python/eager:def_function",
         "//tensorflow/python/eager:test",
-        "//tensorflow/python/training/checkpointable:tracking",
+        "//tensorflow/python/training/tracking:tracking",
+    ],
+)
+
+tf_py_test(
+    name = "load_v1_in_v2_test",
+    srcs = ["load_v1_in_v2_test.py"],
+    additional_deps = [
+        ":builder",
+        ":load",
+        ":save",
+        ":signature_def_utils",
+        ":simple_save",
+        ":utils",
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:tensor_spec",
+        "//tensorflow/python/eager:def_function",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python/training/tracking:tracking",
+        "//tensorflow/python:variables",
     ],
 )
 

diff --git a/tensorflow/python/saved_model/function_deserialization.py b/tensorflow/python/saved_model/function_deserialization.py
index 992b62a..d1ad371 100644
--- a/tensorflow/python/saved_model/function_deserialization.py
+++ b/tensorflow/python/saved_model/function_deserialization.py

@@ -24,6 +24,7 @@
 from tensorflow.core.framework import function_pb2
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import function as function_lib
+from tensorflow.python.framework import func_graph as func_graph_lib
 from tensorflow.python.framework import function_def_to_graph as function_def_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
@@ -32,39 +33,89 @@
 from tensorflow.python.saved_model import nested_structure_coder
 from tensorflow.python.util import compat
 from tensorflow.python.util import nest
+from tensorflow.python.util import tf_decorator
+from tensorflow.python.util import tf_inspect
 
 
 def _is_tensor(t):
   return isinstance(t, (ops.Tensor, resource_variable_ops.ResourceVariable))
 
 
-def _inputs_compatible(args, stored_inputs):
-  """Checks whether function arguments are compatible with parameters."""
-  if len(args) != len(stored_inputs):
+def _call_concrete_function(function, inputs):
+  """Calls a restored Function with structured inputs.
+
+  This differs from `function.__call__` in that inputs and outputs are
+  structured and that it casts inputs to tensors if needed.
+
+  Note: this does not checks that non-tensor inputs match. That should be
+  done before via `_concrete_function_callable_with`.
+
+  Args:
+    function: ConcreteFunction to call.
+    inputs: Structured inputs compatible with
+        `function.graph.structured_input_signature`.
+
+  Returns:
+    The structured function output.
+  """
+  expected_structure = function.graph.structured_input_signature
+  flatten_inputs = nest.flatten_up_to(expected_structure, inputs)
+  tensor_inputs = []
+  for arg, expected in zip(flatten_inputs, nest.flatten(expected_structure)):
+    if isinstance(expected, tensor_spec.TensorSpec):
+      tensor_inputs.append(
+          ops.convert_to_tensor(arg, dtype_hint=expected.dtype))
+  result = function._call_flat(tensor_inputs)  # pylint: disable=protected-access
+  if isinstance(result, ops.Operation):
+    return None
+  return result
+
+
+def _try_convert_to_tensor_spec(arg, dtype_hint):
+  """Returns None or TensorSpec obtained if `arg` is converted to tensor."""
+  try:
+    # Note: try conversion in a FuncGraph to avoid poluting current context.
+    with func_graph_lib.FuncGraph(name="guess_conversion").as_default():
+      result = ops.convert_to_tensor(arg, dtype_hint=dtype_hint)
+      return tensor_spec.TensorSpec(shape=result.shape, dtype=result.dtype)
+  except (TypeError, ValueError):
+    return None
+
+
+def _concrete_function_callable_with(function, inputs, allow_conversion):
+  """Returns whether concrete `function` can be called with `inputs`."""
+  expected_structure = function.graph.structured_input_signature
+  try:
+    flatten_inputs = nest.flatten_up_to(expected_structure, inputs)
+  except (TypeError, ValueError):
     return False
-
-  for arg, stored_input in zip(args, stored_inputs):
-    if not function_lib.is_same_structure(arg, stored_input):
-      return False
-
-    flattened_arg = nest.flatten(arg)
-    flattened_stored_input = nest.flatten(stored_input)
-
-    for a, b in zip(flattened_arg, flattened_stored_input):
-      if _is_tensor(a):
-        if not isinstance(b, tensor_spec.TensorSpec):
-          return False
-        if a.dtype != b.dtype or not b.shape.is_compatible_with(a.shape):
-          return False
-      else:
-        if a != b:
-          return False
+  for arg, expected in zip(flatten_inputs, nest.flatten(expected_structure)):
+    if isinstance(expected, tensor_spec.TensorSpec):
+      if allow_conversion:
+        arg = _try_convert_to_tensor_spec(arg, dtype_hint=expected.dtype)
+      if not _is_tensor(arg) and not isinstance(arg, tensor_spec.TensorSpec):
+        return False
+      if arg.dtype != expected.dtype:
+        return False
+      if not expected.shape.is_compatible_with(arg.shape):
+        return False
+    else:
+      if arg != expected:
+        return False
   return True
 
 
 def _deserialize_function_spec(function_spec_proto, coder):
   """Deserialize a FunctionSpec object from its proto representation."""
-  fullargspec = coder.decode_proto(function_spec_proto.fullargspec)
+  typeless_fullargspec = coder.decode_proto(function_spec_proto.fullargspec)
+  fullargspec = tf_inspect.FullArgSpec(
+      args=typeless_fullargspec.args,
+      varargs=typeless_fullargspec.varargs,
+      varkw=typeless_fullargspec.varkw,
+      defaults=typeless_fullargspec.defaults,
+      kwonlyargs=typeless_fullargspec.kwonlyargs,
+      kwonlydefaults=typeless_fullargspec.kwonlydefaults,
+      annotations=typeless_fullargspec.annotations)
   is_method = function_spec_proto.is_method
   args_to_prepend = coder.decode_proto(function_spec_proto.args_to_prepend)
   kwargs_to_include = coder.decode_proto(function_spec_proto.kwargs_to_include)
@@ -146,36 +197,40 @@
           "Cannot canonicalize input args %r and kwargs %r. Error: %r." %
           (args, kwargs, e))
 
-    debug_considered_signatures = []
-    for concrete_function_name in saved_function.concrete_functions:
-      function_obj = concrete_functions[concrete_function_name]
-      canonicalized_original_inputs = (
-          function_obj.graph.structured_input_signature)
-      debug_considered_signatures.append(canonicalized_original_inputs)
+    # First try to find a concrete function that can be called without input
+    # conversions. This allows one to pick a more specific trace in case there
+    # was also a more expensive one that supported tensors.
+    for allow_conversion in [False, True]:
+      for function_name in saved_function.concrete_functions:
+        function = concrete_functions[function_name]
+        if _concrete_function_callable_with(function,
+                                            canonicalized_inputs,
+                                            allow_conversion):
+          return _call_concrete_function(function, canonicalized_inputs)
 
-      if _inputs_compatible(canonicalized_inputs,
-                            canonicalized_original_inputs):
-        flattened_inputs = nest.flatten(canonicalized_inputs)
-        filtered_inputs = [t for t in flattened_inputs if _is_tensor(t)]
-
-        result = function_obj._call_flat(filtered_inputs)  # pylint: disable=protected-access
-        if isinstance(result, ops.Operation):
-          return None
-        return result
-
-    raise AssertionError(
+    available_signatures = [
+        concrete_functions[function_name].graph.structured_input_signature
+        for function_name in saved_function.concrete_functions
+    ]
+    raise ValueError(
         "Could not find matching function to call for canonicalized inputs %r. "
         "Only existing signatures are %r."
-        % (canonicalized_inputs, debug_considered_signatures))
+        % (canonicalized_inputs, available_signatures))
 
   concrete_function_objects = []
   for concrete_function_name in saved_function.concrete_functions:
     concrete_function_objects.append(concrete_functions[concrete_function_name])
 
-  return RestoredFunction(restored_function_body,
-                          restored_function_body.__name__,
-                          function_spec,
-                          concrete_function_objects)
+  restored_function = RestoredFunction(
+      restored_function_body,
+      restored_function_body.__name__,
+      function_spec,
+      concrete_function_objects)
+
+  return tf_decorator.make_decorator(
+      restored_function_body,
+      restored_function,
+      decorator_argspec=function_spec.fullargspec)
 
 
 def load_function_def_library(library):
@@ -196,8 +251,9 @@
   """
   functions = {}
 
+  load_shared_name_suffix = "_load_{}".format(ops.uid())
   for fdef in _sort_function_defs(library):
-    copy = _fix_fdef(fdef, functions)
+    copy = _fix_fdef(fdef, functions, load_shared_name_suffix)
 
     func_graph = function_def_lib.function_def_to_graph(copy)
     for dep in _list_function_deps(fdef):
@@ -247,7 +303,7 @@
   return [reverse[x] for x in output]
 
 
-def _fix_fdef(orig_fdef, functions):
+def _fix_fdef(orig_fdef, functions, shared_name_suffix):
   """Fixes a FunctionDef proto to be loaded in current context.
 
   In particular, when loading a function library into an eager context, one
@@ -256,6 +312,10 @@
   Args:
     orig_fdef: FunctionDef proto to fix. It is not modified.
     functions: map from function name to a ConcreteFunction instance.
+    shared_name_suffix: A unique string for this load which helps to avoid
+      `shared_name` collisions across loads. Two functions from the same load
+      using the same `shared_name` still need to share, but functions from
+      different loads with the same `shared_name` should not.
 
   Returns:
     A fixed copy of the original FunctionDef.
@@ -279,6 +339,12 @@
       if attr_value.func.name:
         attr_value.func.name = functions[attr_value.func.name].name
 
+    # TODO(b/124205571): Avoid accidental sharing and destruction of restored
+    # resources. For now uniquify "shared_name" when loading functions to avoid
+    # sharing.
+    if "shared_name" in node_def.attr:
+      node_def.attr["shared_name"].s += compat.as_bytes(shared_name_suffix)
+
   fdef.signature.name = _clean_function_name(fdef.signature.name)
   return fdef
 

diff --git a/tensorflow/python/saved_model/function_serialization.py b/tensorflow/python/saved_model/function_serialization.py
index 95dfec7..19801e6 100644
--- a/tensorflow/python/saved_model/function_serialization.py
+++ b/tensorflow/python/saved_model/function_serialization.py

@@ -49,7 +49,7 @@
         "captures tensor %s which is unsupported or not reachable from root. "
         "One reason could be that a stateful object or a variable that the "
         "function depends on is not assigned to an attribute of the serialized "
-        "checkpointable object "
+        "trackable object "
         "(see SaveTest.test_captures_unreachable_variable)."
         % (concrete_function.name, capture))
   concrete_function_proto = saved_object_graph_pb2.SavedConcreteFunction()

diff --git a/tensorflow/python/saved_model/load.py b/tensorflow/python/saved_model/load.py
index e8474f2..fd3c3db 100644
--- a/tensorflow/python/saved_model/load.py
+++ b/tensorflow/python/saved_model/load.py

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Import a checkpointable object from a SavedModel."""
+"""Import a trackable object from a SavedModel."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -30,16 +30,18 @@
 from tensorflow.python.ops import variables
 from tensorflow.python.saved_model import constants
 from tensorflow.python.saved_model import function_deserialization
+from tensorflow.python.saved_model import load_v1_in_v2
 from tensorflow.python.saved_model import loader_impl
 from tensorflow.python.saved_model import nested_structure_coder
 from tensorflow.python.saved_model import revived_types
 from tensorflow.python.saved_model import saved_object_graph_pb2
 from tensorflow.python.saved_model import utils_impl as saved_model_utils
-from tensorflow.python.training.checkpointable import base
-from tensorflow.python.training.checkpointable import graph_view
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util
+from tensorflow.python.training.tracking import base
+from tensorflow.python.training.tracking import graph_view
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util
 from tensorflow.python.util import compat
+from tensorflow.python.util import nest
 
 
 class _Loader(object):
@@ -56,59 +58,74 @@
         function_deserialization.load_function_def_library(
             meta_graph.graph_def.library))
     self._load_all()
-    self._setup_functions()
+    # TODO(b/124045874): There are limitations with functions whose captures
+    # trigger other functions to be executed. For now it is only guaranteed to
+    # work if the captures of a function only trigger functions without
+    # captures.
+    self._setup_functions_structures()
+    self._setup_functions_captures()
     self._restore_checkpoint()
 
-  def _setup_concrete_function(self, proto, concrete_function, coder):
-    """Setup captured tensors and outputs for a single concrete function."""
-    bound_inputs = [
-        self._get_tensor_from_node(node_id)
-        for node_id in proto.bound_inputs]
-    bound_variables = [
-        self._nodes[node_id]
-        for node_id in proto.bound_inputs
-        if self._proto.nodes[node_id].WhichOneof("kind") == "variable"
-    ]
-    # TODO(andresp): This is only injecting the captured inputs into the
-    # concrete function, note that we did not modify the FuncGraph
-    # itself.
-    concrete_function._captured_inputs = bound_inputs  # pylint: disable=protected-access
-    concrete_function._func_graph.variables = bound_variables  # pylint: disable=protected-access
-    # By setting the structured_outputs directly, we can rely on this
-    # function_lib.ConcreteFunction object to perform the output repacking
-    # logic. The only limitation of that logic is that it only works
-    # with output that is convertible to Tensors and the conversion
-    # always happens. For example tf.TensorShape([2, 3]) will be
-    # converted to Tensor representing [2, 3].
-    original_outputs = coder.decode_proto(proto.output_signature)
-    # The original_outputs here had Tensors converted to TensorSpecs, so
-    # the restored function's structured_outputs field will not be
-    # exactly the same. Fortunately the repacking logic cares only about
-    # the structure.
-    # TODO(vbardiovsky): Should we just replicate the structures, with
-    # Nones instead of real objects?
-    concrete_function._func_graph.structured_outputs = original_outputs  # pylint: disable=protected-access
-    concrete_function._func_graph.structured_input_signature = (  # pylint: disable=protected-access
-        coder.decode_proto(proto.canonicalized_input_signature))
+    for node in self._nodes:
+      if isinstance(node, tracking.TrackableResource):
+        init_op = node.initialize()
+        ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, init_op)
 
-  def _setup_functions(self):
-    """Setup captures and output structure in restored functions."""
+  def _setup_functions_structures(self):
+    """Setup structure for inputs and outputs of restored functions."""
     coder = nested_structure_coder.StructureCoder()
-    for name, concrete_function_proto in self._proto.concrete_functions.items():
-      self._setup_concrete_function(
-          concrete_function_proto,
-          self._concrete_functions[name],
-          coder)
+    for name, proto in sorted(self._proto.concrete_functions.items()):
+      concrete_function = self._concrete_functions[name]
+      # By setting the structured_outputs directly, we can rely on this
+      # function_lib.ConcreteFunction object to perform the output repacking
+      # logic. The only limitation of that logic is that it only works
+      # with output that is convertible to Tensors and the conversion
+      # always happens. For example tf.TensorShape([2, 3]) will be
+      # converted to Tensor representing [2, 3].
+      original_outputs = coder.decode_proto(proto.output_signature)
+      # The original_outputs here had Tensors converted to TensorSpecs, so
+      # the restored function's structured_outputs field will not be
+      # exactly the same. Fortunately the repacking logic cares only about
+      # the structure.
+      # TODO(vbardiovsky): Should we just replicate the structures, with
+      # Nones instead of real objects?
+      concrete_function._func_graph.structured_outputs = original_outputs  # pylint: disable=protected-access
+      concrete_function._func_graph.structured_input_signature = (  # pylint: disable=protected-access
+          coder.decode_proto(proto.canonicalized_input_signature))
+
+  def _setup_functions_captures(self):
+    """Setup captures and variables in restored functions."""
+    concrete_functions = sorted(self._proto.concrete_functions.items())
+    for name, proto in concrete_functions:
+      concrete_function = self._concrete_functions[name]
+      bound_inputs = [
+          self._get_tensor_from_node(node_id)
+          for node_id in proto.bound_inputs]
+      bound_variables = [
+          self._nodes[node_id]
+          for node_id in proto.bound_inputs
+          if self._proto.nodes[node_id].WhichOneof("kind") == "variable"
+      ]
+      # TODO(andresp): This is only injecting the captured inputs into the
+      # concrete function, note that we did not modify the FuncGraph
+      # itself.
+      concrete_function._captured_inputs = bound_inputs  # pylint: disable=protected-access
+      concrete_function._func_graph.variables = bound_variables  # pylint: disable=protected-access
 
   def _get_tensor_from_node(self, node_id):
-    obj = self._nodes[node_id]
-    if resource_variable_ops.is_resource_variable(obj):
-      return obj.handle
-    elif isinstance(obj, tracking.TrackableAsset):
-      return obj.asset_path.handle
-    elif tensor_util.is_tensor(obj):
-      return obj
-    raise ValueError("Can't convert node %s to tensor" % (type(obj)))
+    """Resolves a node id into a tensor to be captured for a function."""
+    with ops.init_scope():
+      obj = self._nodes[node_id]
+      if resource_variable_ops.is_resource_variable(obj):
+        return obj.handle
+      elif isinstance(obj, tracking.TrackableAsset):
+        return obj.asset_path
+      elif tensor_util.is_tensor(obj):
+        return obj
+      elif isinstance(obj, tracking.TrackableResource):
+        # Note: this executes restored functions in the TrackableResource.
+        return obj.resource_handle
+      raise ValueError("Can't convert node %s to tensor" % (type(obj)))
 
   def _load_all(self):
     """Load all saved objects and wire their properties."""
@@ -132,16 +149,16 @@
   def _restore_checkpoint(self):
     """Load state from checkpoint into the deserialized objects."""
     variables_path = saved_model_utils.get_variables_path(self._export_dir)
-    # TODO(andresp): Clean use of private methods of CheckpointableSaver.
+    # TODO(andresp): Clean use of private methods of TrackableSaver.
     # pylint: disable=protected-access
-    saver = util.CheckpointableSaver(graph_view.ObjectGraphView(self.get(0)))
+    saver = util.TrackableSaver(graph_view.ObjectGraphView(self.get(0)))
     saver._file_prefix_placeholder = constant_op.constant(variables_path)
     load_status = saver.restore(variables_path)
     load_status.assert_existing_objects_matched()
     checkpoint = load_status._checkpoint
 
     # When running in eager mode, the `restore` call above has already run and
-    # restored the state of checkpointables, call `position.restore_ops()` will
+    # restored the state of trackables, call `position.restore_ops()` will
     # return an empty list as there is nothing left to do. In graph mode, that
     # will return the list of ops that must run to restore the object on that
     # position. We have to wire them in the initializers of the objects so that
@@ -173,6 +190,7 @@
             proto.bare_concrete_function),
         "variable": lambda: self._recreate_variable(proto.variable),
         "constant": lambda: self._recreate_constant(proto.constant),
+        "resource": lambda: self._recreate_resource(proto.resource),
     }
     kind = proto.WhichOneof("kind")
     if kind not in factory:
@@ -187,7 +205,7 @@
       # individually callable by adding a `__call__` method to the classes of
       # the objects instances that have a `__call__` property.
 
-      class _UserObject(tracking.AutoCheckpointable):
+      class _UserObject(tracking.AutoTrackable):
         pass
 
       return _UserObject(), setattr
@@ -218,6 +236,30 @@
         tensor_util.MakeNdarray(tensor_proto))
     return imported_constant, setattr
 
+  def _recreate_resource(self, proto):
+    del proto
+    return _RestoredResource(), setattr
+
+
+# TODO(b/124205571,b/124092991): Solve destruction of resources.
+class _RestoredResource(tracking.TrackableResource):
+  """Restored SavedResource."""
+
+  def create_resource(self):
+    raise RuntimeError()
+
+  def initialize(self):
+    raise RuntimeError()
+
+  def _list_functions_for_serialization(self):
+    # Overwrite this method to avoid the implementation of
+    # base class to re-wrap the polymorphic functions into
+    # another layer of `tf.function`.
+    return {
+        "create_resource": self.create_resource,
+        "initialize": self.initialize,
+    }
+
 
 def _call_attribute(instance, *args, **kwargs):
   return instance.__call__(*args, **kwargs)
@@ -229,14 +271,64 @@
     return saved_object_graph_pb2.SavedObjectGraph.FromString(contents)
 
 
-def load(export_dir):
-  """Load a SavedModel from `export_dir`."""
+def load(export_dir, tags=None):
+  """Load a SavedModel from `export_dir`.
+
+  Signatures associated with the SavedModel are available as functions:
+
+  ```python
+  imported = tf.saved_model.load(path)
+  f = imported.signatures["serving_default"]
+  print(f(x=tf.constant([[1.]])))
+  ```
+
+  Objects exported with `tf.saved_model.save` additionally have trackable
+  objects and functions assigned to attributes:
+
+  ```python
+  exported = tf.train.Checkpoint(v=tf.Variable(3.))
+  exported.f = tf.function(
+      lambda x: exported.v * x,
+      input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)])
+  tf.saved_model.save(exported, path)
+  imported = tf.saved_model.load(path)
+  assert 3. == imported.v.numpy()
+  assert 6. == imported.f(x=tf.constant(2.)).numpy()
+  ```
+
+  Args:
+    export_dir: The SavedModel directory to load from.
+    tags: A tag or sequence of tags identifying the MetaGraph to load. Optional
+      if the SavedModel contains a single MetaGraph, as for those exported from
+      `tf.saved_model.load`.
+
+  Returns:
+    A trackable object with a `signatures` attribute mapping from signature
+    keys to functions. If the SavedModel was exported by `tf.saved_model.load`,
+    it also points to trackable objects and functions which were attached
+    to the exported object.
+
+  Raises:
+    ValueError: If `tags` don't match a MetaGraph in the SavedModel.
+  """
+  if tags is not None:
+    # Supports e.g. tags=SERVING and tags=[SERVING]
+    tags = nest.flatten(tags)
   saved_model_proto = loader_impl.parse_saved_model(export_dir)
   object_graph_filename = os.path.join(
       compat.as_bytes(export_dir),
       compat.as_bytes(constants.EXTRA_ASSETS_DIRECTORY),
       compat.as_bytes("object_graph.pb"))
-  if file_io.file_exists(object_graph_filename):
+  if (file_io.file_exists(object_graph_filename)
+      and len(saved_model_proto.meta_graphs) == 1):
+    meta_graph_def = saved_model_proto.meta_graphs[0]
+    if (tags is not None
+        and set(tags) != set(meta_graph_def.meta_info_def.tags)):
+      raise ValueError(
+          ("The SavedModel at {} has one MetaGraph with tags {}, but got an "
+           "incompatible argument tags={} to tf.saved_model.load. You may omit "
+           "it, pass 'None', or pass matching tags.")
+          .format(export_dir, meta_graph_def.meta_info_def.tags, tags))
     object_graph_proto = _load_saved_object_graph_proto(object_graph_filename)
     with ops.init_scope():
       loader = _Loader(object_graph_proto,
@@ -244,7 +336,6 @@
                        export_dir)
       root = loader.get(0)
   else:
-    raise NotImplementedError(
-        "Currently only SavedModels exported with `tf.saved_model.save` may be "
-        "imported. Other SavedModels may eventually be supported via load().")
+    with ops.init_scope():
+      root = load_v1_in_v2.load(export_dir, tags)
   return root

diff --git a/tensorflow/python/saved_model/load_test.py b/tensorflow/python/saved_model/load_test.py
index d61fa86..6b0734a 100644
--- a/tensorflow/python/saved_model/load_test.py
+++ b/tensorflow/python/saved_model/load_test.py

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for checkpointable object SavedModel loading."""
+"""Tests for trackable object SavedModel loading."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -38,9 +38,11 @@
 from tensorflow.python.ops import variables
 from tensorflow.python.saved_model import load
 from tensorflow.python.saved_model import save
+from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.training import monitored_session
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util
+from tensorflow.python.util import tf_inspect
 
 
 @parameterized.named_parameters(
@@ -61,17 +63,17 @@
     return loaded
 
   def test_structure_import(self, cycles):
-    root = tracking.AutoCheckpointable()
-    root.dep_one = tracking.AutoCheckpointable()
-    root.dep_two = tracking.AutoCheckpointable()
-    root.dep_two.dep = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
+    root.dep_one = tracking.AutoTrackable()
+    root.dep_two = tracking.AutoTrackable()
+    root.dep_two.dep = tracking.AutoTrackable()
     root.dep_three = root.dep_two.dep
     imported = self.cycle(root, cycles)
     self.assertIs(imported.dep_three, imported.dep_two.dep)
     self.assertIsNot(imported.dep_one, imported.dep_two)
 
   def test_variables(self, cycles):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.v1 = variables.Variable(1., trainable=True)
     root.v2 = variables.Variable(2., trainable=False)
     imported = self.cycle(root, cycles)
@@ -81,7 +83,7 @@
     self.assertFalse(imported.v2.trainable)
 
   def test_capture_variables(self, cycles):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.weights = variables.Variable(2.)
     root.f = def_function.function(
         lambda x: root.weights * x,
@@ -101,7 +103,7 @@
     file1 = self._make_asset("contents 1")
     file2 = self._make_asset("contents 2")
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.asset1 = tracking.TrackableAsset(file1)
     root.asset2 = tracking.TrackableAsset(file2)
 
@@ -120,21 +122,44 @@
       self.assertEqual("contents 2", f.read())
 
   def test_capture_assets(self, cycles):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.vocab = tracking.TrackableAsset(self._make_asset("contents"))
     root.f = def_function.function(
         lambda: root.vocab.asset_path,
         input_signature=[])
     imported = self.cycle(root, cycles)
-    origin_output = root.f().numpy()
+    original_output = root.f().numpy()
     imported_output = imported.f().numpy()
-    self.assertNotEqual(origin_output, imported_output)
+    self.assertNotEqual(original_output, imported_output)
     with open(imported_output, "r") as f:
       self.assertEqual("contents", f.read())
 
+  def test_capture_assets_in_graph(self, cycles):
+    root = tracking.AutoTrackable()
+    root.vocab = tracking.TrackableAsset(self._make_asset("contents"))
+    root.f = def_function.function(
+        lambda: root.vocab.asset_path,
+        input_signature=[])
+
+    original_output = root.f().numpy()
+
+    if cycles > 1:
+      root = self.cycle(root, cycles - 1)
+    path = tempfile.mkdtemp(prefix=self.get_temp_dir())
+    save.save(root, path)
+
+    with ops.Graph().as_default():
+      imported = load.load(path)
+      imported_tensor = imported.f()
+      with monitored_session.MonitoredSession() as sess:
+        imported_output = sess.run(imported_tensor)
+        self.assertNotEqual(original_output, imported_output)
+        with open(imported_output, "r") as f:
+          self.assertEqual("contents", f.read())
+
   def test_dedup_assets(self, cycles):
     vocab = self._make_asset("contents")
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.asset1 = tracking.TrackableAsset(vocab)
     root.asset2 = tracking.TrackableAsset(vocab)
     imported = self.cycle(root, cycles)
@@ -146,7 +171,7 @@
     def func(x):
       return 2 * x
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func
 
     # Add two traces.
@@ -164,7 +189,7 @@
     def func(x):
       return 2 * x
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func
 
     imported = self.cycle(root, cycles)
@@ -175,7 +200,7 @@
     def func(x):
       return 2 * x
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func
 
     imported = self.cycle(
@@ -194,7 +219,7 @@
         lambda x: f(x) + 1.0,
         input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.g = g
     imported = self.cycle(root, cycles)
     imported.g(constant_op.constant([1.0]))
@@ -207,7 +232,7 @@
       else:
         return 7
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(func)
 
     self.assertEqual(20, root.f(constant_op.constant(10), True).numpy())
@@ -227,7 +252,7 @@
       else:
         return array_ops.zeros(shape=x.shape, dtype=dtypes.float32)
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(func)
 
     self.assertAllEqual([0.0, 0.0, 0.0],
@@ -261,17 +286,17 @@
 
   def test_function_no_return(self, cycles):
 
-    class CheckpointableWithOneVariable(tracking.AutoCheckpointable):
+    class TrackableWithOneVariable(tracking.AutoTrackable):
 
       def __init__(self, initial_value=0.0):
-        super(CheckpointableWithOneVariable, self).__init__()
+        super(TrackableWithOneVariable, self).__init__()
         self.variable = variables.Variable(initial_value)
 
       @def_function.function
       def increase(self, by=1.0):
         self.variable.assign_add(by)
 
-    obj = CheckpointableWithOneVariable(5.0)
+    obj = TrackableWithOneVariable(5.0)
 
     obj.increase(constant_op.constant(10.0))
     self.assertEqual(15.0, obj.variable.numpy())
@@ -295,7 +320,7 @@
       else:
         return 7
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(func)
 
     x = constant_op.constant(10)
@@ -311,7 +336,7 @@
 
     imported = self.cycle(root, cycles)
 
-    with self.assertRaisesRegexp(AssertionError,
+    with self.assertRaisesRegexp(ValueError,
                                  "Could not find matching function to call"):
       imported.f(input2)
 
@@ -327,7 +352,7 @@
       named_tuple = named_tuple_type(a=input1 + input2, b=input1 * input2)
       return [named_tuple, input2, {"x": 0.5}]
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(func)
 
     result = root.f(constant_op.constant(2), constant_op.constant(3))
@@ -357,7 +382,7 @@
       else:
         return 7
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(func)
 
     self.assertEqual(20, root.f(constant_op.constant(10), True).numpy())
@@ -379,7 +404,7 @@
       else:
         return 7
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(func)
 
     x = constant_op.constant(10)
@@ -387,17 +412,17 @@
 
     imported = self.cycle(root, cycles)
 
-    with self.assertRaisesRegexp(AssertionError,
+    with self.assertRaisesRegexp(ValueError,
                                  "Could not find matching function to call.*"):
       imported.f(x, learning_rate=0.5, epochs=4)
 
     self.assertEqual(7, imported.f(x, learning_rate=0.5, epochs=3).numpy())
 
   def test_member_function(self, cycles):
-    class CheckpointableWithMember(tracking.AutoCheckpointable):
+    class TrackableWithMember(tracking.AutoTrackable):
 
       def __init__(self):
-        super(CheckpointableWithMember, self).__init__()
+        super(TrackableWithMember, self).__init__()
         self._some_value = 20
 
       @def_function.function
@@ -407,7 +432,7 @@
         else:
           return 7 + self._some_value
 
-    root = CheckpointableWithMember()
+    root = TrackableWithMember()
 
     self.assertEqual(20, root.f(constant_op.constant(10), True).numpy())
     self.assertEqual(27, root.f(constant_op.constant(1)).numpy())
@@ -419,7 +444,7 @@
     self.assertEqual(27, imported.f(constant_op.constant(2)).numpy())
 
   def test_side_effect_listing(self, cycles):
-    class M(tracking.AutoCheckpointable):
+    class M(tracking.AutoTrackable):
 
       def __init__(self):
         super(M, self).__init__()
@@ -443,7 +468,7 @@
         lambda x: x*weight + bias,
         input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.weight = weight
     root.bias = bias
     root.g = g
@@ -483,7 +508,7 @@
     def h(x):
       return g(x) + bias,
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.weight = weight
     root.bias = bias
     root.g = h
@@ -496,16 +521,16 @@
     self.assertAllClose(grad, [3.5, 2.0])
 
   def test_callable(self, cycles):
-    class M1(tracking.AutoCheckpointable):
+    class M1(tracking.AutoTrackable):
 
       @def_function.function(
           input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])
       def __call__(self, x):
         return x
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.m1 = M1()
-    root.m2 = tracking.AutoCheckpointable()
+    root.m2 = tracking.AutoTrackable()
     root.m2.__call__ = def_function.function(
         input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])(
             lambda x: x*3.0)
@@ -528,9 +553,9 @@
     func = def_function.function(
         input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])(
             lambda x: x*3.0)
-    root = tracking.AutoCheckpointable()
-    root.__call__ = tracking.AutoCheckpointable()
-    root.__call__.__call__ = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
+    root.__call__ = tracking.AutoTrackable()
+    root.__call__.__call__ = tracking.AutoTrackable()
     root.__call__.__call__.__call__ = func
 
     imported = self.cycle(root, cycles)
@@ -539,7 +564,7 @@
     self.assertAllEqual(imported(x).numpy(), 3.0)
 
   def test_load_in_graph_mode(self, cycles):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.v1 = variables.Variable(1.)
     root.v2 = variables.Variable(2.)
     root.f = def_function.function(
@@ -560,7 +585,7 @@
         self.assertEqual(4.0, sess.run(output))
 
   def test_load_in_func_graph(self, cycles):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.v1 = variables.Variable(1.)
     root.v2 = variables.Variable(2.)
     root.f = def_function.function(
@@ -572,7 +597,7 @@
     path = tempfile.mkdtemp(prefix=self.get_temp_dir())
     save.save(root, path)
 
-    closure = tracking.AutoCheckpointable()
+    closure = tracking.AutoTrackable()
     @def_function.function
     def func(x):
       if not hasattr(closure, "model"):
@@ -589,7 +614,7 @@
     def func(x):
       return 2 * x
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func
 
     self.assertAllEqual([2], root.f(constant_op.constant([1])).numpy())
@@ -625,7 +650,7 @@
         tensor_spec.TensorSpec([None], dtypes.int32), True)
     func.get_concrete_function(tensor_spec.TensorSpec([None], dtypes.float32))
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func
 
     imported = self.cycle(root, cycles)
@@ -635,7 +660,7 @@
 
     self.assertAllEqual([2, 4, 6, 8],
                         concrete(x=constant_op.constant([1, 2, 3, 4])).numpy())
-    with self.assertRaisesRegexp(AssertionError,
+    with self.assertRaisesRegexp(ValueError,
                                  "Could not find matching function to call"):
       imported.f.get_concrete_function(
           tensor_spec.TensorSpec([None], dtypes.int32))
@@ -649,7 +674,7 @@
     def func(x):
       return 2 * x
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func.get_concrete_function()
 
     self.assertAllEqual([2], root.f(constant_op.constant([1])).numpy())
@@ -670,7 +695,7 @@
     def func(x):
       return 2 * x
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func.get_concrete_function()
 
     self.assertAllEqual([2], root.f(constant_op.constant([1])).numpy())
@@ -686,7 +711,7 @@
     def func(x):
       return 2 * x
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func.get_concrete_function(constant_op.constant([1]))
     self.assertAllEqual([4], root.f(constant_op.constant([2])).numpy())
     # TODO(andresp): Fix exporting of loaded concrete functions as signatures.
@@ -699,7 +724,7 @@
         input_signature=[tensor_spec.TensorSpec([None], dtypes.float32)])
     def func(x):
       return x ** 2.
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func.get_concrete_function()
 
     def _compute_gradient(function):
@@ -719,7 +744,7 @@
     @def_function.function
     def func(x, y):
       return x * (y + 1.)
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func.get_concrete_function(
         tensor_spec.TensorSpec([], dtypes.float32),
         tensor_spec.TensorSpec([], dtypes.float32))
@@ -736,7 +761,7 @@
     def func(*args):
       x, y = args
       return x * (y + 1.)
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func.get_concrete_function(
         tensor_spec.TensorSpec([], dtypes.float32, name="x"),
         tensor_spec.TensorSpec([], dtypes.float32, name="y"))
@@ -757,7 +782,7 @@
       capture.assign_sub(1)
 
     vsave = variables.Variable(1)
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = func.get_concrete_function(vsave)
     root.capture = capture
     self.assertEqual(1, vsave.numpy())
@@ -780,7 +805,7 @@
     def func(v):
       return v + 1
 
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.func = func
     root.concrete_func = func.get_concrete_function(
         tensor_spec.TensorSpec(None, dtypes.int32))
@@ -792,7 +817,7 @@
     self.assertEqual(2, imported.concrete_func(one).numpy())
 
   def test_dict(self, cycles):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.variables = dict(a=variables.Variable(1.))
     root.variables["b"] = variables.Variable(2.)
     root.variables["c"] = 1
@@ -807,7 +832,7 @@
     self.assertEqual(100., imported.funcs["conc"]().numpy())
 
   def test_list(self, cycles):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.variables = [variables.Variable(1.)]
     root.variables.append(1)
     root.variables.append(variables.Variable(3.))
@@ -818,7 +843,7 @@
     self.assertEqual(3, len(imported.variables))
 
   def test_functions_list(self, cycles):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     v1 = variables.Variable(1.)
     root.losses = [def_function.function(lambda: math_ops.reduce_sum(v1 ** 2))]
     root.variables = [v1]
@@ -840,7 +865,7 @@
 
   def test_captured_constant(self, cycles):
     const = array_ops.zeros([100])
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(lambda: const + 1.)
     root.g = def_function.function(lambda: const + 2.)
     self.assertAllClose(array_ops.ones([100]), root.f())
@@ -858,25 +883,207 @@
     self.assertIs(f_concrete.captured_inputs[0],
                   g_concrete.captured_inputs[0])
 
+  def test_functions_accessed_once(self, cycles):
+
+    class Exported(tracking.AutoTrackable):
+
+      def __init__(self):
+        self._counter = 0
+
+      @property
+      def make_func(self):
+        @def_function.function
+        def f():
+          return constant_op.constant(self._counter)
+        f.get_concrete_function()  # force a trace
+        self._counter += 1
+        return f
+
+    exported = Exported()
+    imported = self.cycle(exported, cycles)
+    self.assertEqual(0, imported.make_func().numpy())
+    self.assertEqual(1, exported.make_func().numpy())
+
+  def test_overwritten_signatures_error(self, cycles):
+    exported = tracking.AutoTrackable()
+    exported.f = def_function.function(lambda: constant_op.constant(1.))
+    imported = self.cycle(
+        exported, cycles,
+        signatures={"key": exported.f.get_concrete_function()})
+    self.assertEqual(1., imported.signatures["key"]()["output_0"].numpy())
+    imported.signatures = {"key1": imported.signatures["key"]}
+    with self.assertRaisesRegexp(ValueError, "signatures"):
+      save.save(imported, tempfile.mkdtemp(prefix=self.get_temp_dir()))
+
+  def test_signature_loading(self, cycles):
+
+    class Exported(tracking.AutoTrackable):
+
+      def __init__(self):
+        self.v = variables.Variable(3.)
+
+      @def_function.function
+      def do(self, x):
+        return self.v * x
+
+    exported = Exported()
+    imported = self.cycle(
+        exported,
+        signatures=exported.do.get_concrete_function(
+            tensor_spec.TensorSpec(None, dtypes.float32)))
+    for _ in range(cycles - 1):
+      imported = self.cycle(imported, signatures=imported.signatures)
+    self.assertEqual(["serving_default"], list(imported.signatures.keys()))
+    imported_function = imported.signatures["serving_default"]
+    two = constant_op.constant(2.)
+    self.assertEqual(6., imported_function(x=two)["output_0"].numpy())
+    imported.v.assign(4.)
+    self.assertEqual(8., imported_function(x=two)["output_0"].numpy())
+    with self.assertRaisesRegexp(TypeError, "positional"):
+      imported_function(two)
+    with self.assertRaises(TypeError):
+      # The signatures mapping is immutable
+      imported.signatures["random_key"] = 3
+
+  def _make_model_with_tables(self):
+    default_val = -1
+    keys = constant_op.constant(["brain", "salad", "surgery"])
+    values = constant_op.constant([0, 1, 2], dtypes.int64)
+    table1_initializer = lookup_ops.KeyValueTensorInitializer(keys, values)
+    table1 = lookup_ops.HashTable(table1_initializer, default_val)
+
+    table2_file = self._make_asset("test\nfoo\nbrain\n")
+    table2_initializer = lookup_ops.TextFileIdTableInitializer(table2_file)
+    table2 = lookup_ops.HashTable(table2_initializer, default_val)
+
+    def _make_lookup_function(table):
+      signature = [tensor_spec.TensorSpec(None, dtypes.string)]
+      return def_function.function(input_signature=signature)(
+          lambda x: table.lookup(x))  # pylint: disable=unnecessary-lambda
+
+    root = tracking.AutoTrackable()
+    root.table1 = table1
+    root.lookup1 = _make_lookup_function(table1)
+    root.table2 = table2
+    root.lookup2 = _make_lookup_function(table2)
+    return root
+
   def test_table(self, cycles):
-    # TODO(b/123408779): Handle generic TrackableResources and enable this test
-    self.skipTest("Need to handle generic TrackableResources")
-    vocab_path = self._make_asset("alpha\nbeta\ngamma\n")
-    initializer = lookup_ops.TextFileInitializer(
-        vocab_path,
-        key_dtype=dtypes.string,
-        key_index=lookup_ops.TextFileIndex.WHOLE_LINE,
-        value_dtype=dtypes.int64,
-        value_index=lookup_ops.TextFileIndex.LINE_NUMBER)
-    root = util.Checkpoint(table=lookup_ops.HashTable(
-        initializer, default_value=-1))
-    root.table_user = def_function.function(
-        root.table.lookup,
-        input_signature=[tensor_spec.TensorSpec(None, dtypes.string)])
-    self.assertEqual(2, root.table_user(constant_op.constant("gamma")).numpy())
+    root = self._make_model_with_tables()
+    imported = self.cycle(root, cycles, signatures={})
+    keys = constant_op.constant(["brain", "test", "foo", "surgery"])
+    self.assertAllEqual([0, -1, -1, 2], imported.lookup1(keys).numpy())
+    self.assertAllEqual([2, 0, 1, -1], imported.lookup2(keys).numpy())
+
+  def test_table_in_graph(self, cycles):
+    root = self._make_model_with_tables()
+
+    if cycles > 1:
+      root = self.cycle(root, cycles - 1)
+    path = tempfile.mkdtemp(prefix=self.get_temp_dir())
+    save.save(root, path)
+    imported = self.cycle(root, 1)
+
+    with ops.Graph().as_default():
+      imported = load.load(path)
+      keys = constant_op.constant(["brain", "test", "foo", "surgery"])
+      output1 = imported.lookup1(keys)
+      output2 = imported.lookup2(keys)
+      with monitored_session.MonitoredSession() as sess:
+        self.assertAllEqual([0, -1, -1, 2], sess.run(output1))
+        self.assertAllEqual([2, 0, 1, -1], sess.run(output2))
+
+  def test_perserve_argspec(self, cycles):
+    def f(a, b, c):  # pylint: disable=unused-argument
+      return None
+
+    original_fullargspec = tf_inspect.getfullargspec(f)
+
+    root = tracking.AutoTrackable()
+    root.f = def_function.function(f)
     imported = self.cycle(root, cycles)
-    self.assertEqual(
-        2, imported.table_user(constant_op.constant("gamma")).numpy())
+
+    restored_fullargspec = tf_inspect.getfullargspec(imported.f)
+    self.assertEqual(original_fullargspec, restored_fullargspec)
+
+  def test_canonicalize_inputs(self, cycles):
+    @def_function.function(autograph=False)
+    def func(a=1, b=2, c=3, training=True):
+      if training:
+        return [a, b, c, training]
+      else:
+        return [c, b, a, training]
+
+    # TODO(b/123501567): Work-around to trigger generic traces of a function
+    # with extra non tensor args.
+    signature = 3*[tensor_spec.TensorSpec(None, dtypes.float32)]
+    @def_function.function(input_signature=signature)
+    def trigger(a, b, c):
+      func(a, b, c, True)
+      func(a, b, c, False)
+
+    trigger.get_concrete_function()
+
+    root = tracking.AutoTrackable()
+    root.f = func
+    root = self.cycle(root, cycles)
+    self.assertAllEqual(root.f(), [1.0, 2.0, 3.0, True])
+    self.assertAllEqual(root.f(-1.0, training=False), [3.0, 2.0, -1.0, False])
+
+    with self.assertRaisesRegexp(ValueError,
+                                 "Could not find matching function"):
+      root.f(["hello", 1.0])
+
+  def test_prefer_specific_trace(self, cycles):
+    @def_function.function(autograph=False)
+    def func(a):
+      if isinstance(a, int):
+        return a
+      else:
+        return a + 1
+
+    self.assertAllEqual(2, func(2).numpy())
+    self.assertAllEqual(3, func(constant_op.constant(2)).numpy())
+
+    root = tracking.AutoTrackable()
+    root.f = func
+    root = self.cycle(root, cycles)
+    self.assertAllEqual(2, root.f(2).numpy())
+    self.assertAllEqual(4, root.f(3).numpy())
+    self.assertAllEqual(3, root.f(constant_op.constant(2)).numpy())
+    self.assertAllEqual(4, root.f(constant_op.constant(3)).numpy())
+
+
+class SingleCycleTests(test.TestCase, parameterized.TestCase):
+
+  def test_load_with_tags(self):
+    root = tracking.AutoTrackable()
+    path = tempfile.mkdtemp(prefix=self.get_temp_dir())
+    save.save(root, path)
+    with self.assertRaises(ValueError):
+      load.load(path, tags=[tag_constants.EVAL])
+    load.load(path, tags=[tag_constants.SERVING])
+    load.load(path, tags=tag_constants.SERVING)
+
+  def test_docstring_examples(self):
+    path = tempfile.mkdtemp(prefix=self.get_temp_dir())
+    exported = util.Checkpoint(v=variables.Variable(3.))
+    exported.f = def_function.function(
+        lambda x: exported.v * x,
+        input_signature=[
+            tensor_spec.TensorSpec(shape=None, dtype=dtypes.float32)])
+    save.save(exported, path)
+    imported = load.load(path)
+    self.assertEqual(3., imported.v.numpy())
+    self.assertEqual(6., imported.f(x=constant_op.constant(2.)).numpy())
+
+    save.save(exported, path, exported.f.get_concrete_function())
+    imported = load.load(path)
+    f = imported.signatures["serving_default"]
+    self.assertAllEqual(
+        [[-3.]],
+        f(x=constant_op.constant([[-1.]]))["output_0"].numpy())
+
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/saved_model/load_v1_in_v2.py b/tensorflow/python/saved_model/load_v1_in_v2.py
new file mode 100644
index 0000000..c5ba010
--- /dev/null
+++ b/tensorflow/python/saved_model/load_v1_in_v2.py

@@ -0,0 +1,154 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Import a TF v1-style SavedModel when executing eagerly."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+
+from tensorflow.python.eager import wrap_function
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.saved_model import loader_impl
+from tensorflow.python.saved_model import signature_serialization
+from tensorflow.python.training import saver as tf_saver
+from tensorflow.python.training.tracking import tracking
+
+
+class _Initializer(tracking.TrackableResource):
+  """Represents an initialization operation restored from a SavedModel.
+
+  Without this object re-export of imported 1.x SavedModels would omit the
+  original SavedModel's initialization procedure.
+
+  Created when `tf.saved_model.load` loads a TF 1.x-style SavedModel with an
+  initialization op. This object holds a function which runs the
+  initialization. It does not require any manual user intervention;
+  `tf.saved_model.save` will see this object and automatically add it to the
+  exported SavedModel, and `tf.saved_model.load` runs the initialization
+  function automatically.
+  """
+
+  def __init__(self, init_fn, asset_paths):
+    super(_Initializer, self).__init__()
+    self._asset_paths = asset_paths
+    self._init_fn = init_fn
+
+  def create_resource(self):
+    return array_ops.placeholder(
+        dtype=dtypes.resource, shape=[], name="unused_resource")
+
+  def initialize(self):
+    self._init_fn(*[path.asset_path for path in self._asset_paths])
+
+
+class _EagerSavedModelLoader(loader_impl.SavedModelLoader):
+  """Loads a SavedModel without using Sessions."""
+
+  def get_meta_graph_def_from_tags(self, tags):
+    """Override to support implicit one-MetaGraph loading with tags=None."""
+    if tags is None:
+      if len(self._saved_model.meta_graphs) != 1:
+        tag_sets = [mg.meta_info_def.tags
+                    for mg in self._saved_model.meta_graphs]
+        raise ValueError(
+            ("Importing a SavedModel with tf.saved_model.load requires a "
+             "'tags=' argument if there is more than one MetaGraph. Got "
+             "'tags=None', but there are {} MetaGraphs in the SavedModel with "
+             "tag sets {}. Pass a 'tags=' argument to load this SavedModel.")
+            .format(len(self._saved_model.meta_graphs), tag_sets))
+      return self._saved_model.meta_graphs[0]
+    return super(_EagerSavedModelLoader, self).get_meta_graph_def_from_tags(
+        tags)
+
+  def load_graph(self, returns, meta_graph_def):
+    """Called from wrap_function to import `meta_graph_def`."""
+    # pylint: disable=protected-access
+    saver, _ = tf_saver._import_meta_graph_with_return_elements(
+        meta_graph_def)
+    # pylint: enable=protected-access
+    returns[0] = saver
+
+  def restore_variables(self, wrapped, saver):
+    """Restores variables from the checkpoint."""
+    if saver is not None:
+      saver_def = saver.saver_def
+      restore_fn = wrapped.prune(
+          feeds=[wrapped.graph.as_graph_element(
+              saver_def.filename_tensor_name)],
+          fetches=[wrapped.graph.as_graph_element(saver_def.restore_op_name)])
+      restore_fn(constant_op.constant(self._variables_path))
+
+  def _extract_signatures(self, wrapped, meta_graph_def):
+    """Creates ConcreteFunctions for signatures in `meta_graph_def`."""
+    signature_functions = {}
+    for signature_key, signature_def in meta_graph_def.signature_def.items():
+      input_names, input_specs = zip(*signature_def.inputs.items())
+      # TODO(allenl): Support optional arguments
+      signature_fn = wrapped.prune(
+          feeds=[wrapped.graph.as_graph_element(inp.name)
+                 for inp in input_specs],
+          fetches={name: wrapped.graph.as_graph_element(out.name)
+                   for name, out in signature_def.outputs.items()})
+      # pylint: disable=protected-access
+      signature_fn._arg_keywords = input_names
+      signature_fn._num_positional_args = 0
+      # pylint: enable=protected-access
+      signature_functions[signature_key] = signature_fn
+    return signature_functions
+
+  def load(self, tags):
+    """Creates an object from the MetaGraph identified by `tags`."""
+    meta_graph_def = self.get_meta_graph_def_from_tags(tags)
+    load_graph_returns = [None]
+    wrapped = wrap_function.wrap_function(
+        functools.partial(self.load_graph, load_graph_returns, meta_graph_def),
+        signature=[])
+    saver, = load_graph_returns
+    self.restore_variables(wrapped, saver)
+    with wrapped.graph.as_default():
+      init_op = loader_impl.get_init_op(meta_graph_def)
+    root = tracking.AutoTrackable()
+    if init_op is not None:
+      asset_feed_tensors = []
+      asset_paths = []
+      for tensor_name, value in loader_impl.get_asset_tensors(
+          self._export_dir, meta_graph_def).items():
+        asset_feed_tensors.append(wrapped.graph.as_graph_element(tensor_name))
+        asset_paths.append(tracking.TrackableAsset(value))
+      init_fn = wrapped.prune(
+          feeds=asset_feed_tensors,
+          fetches=[wrapped.graph.as_graph_element(init_op)])
+      initializer = _Initializer(init_fn, asset_paths)
+      initializer.initialize()
+      root.initializer = initializer
+      root.asset_paths = asset_paths
+    else:
+      root.asset_paths = []
+    signature_functions = self._extract_signatures(wrapped, meta_graph_def)
+
+    root.signatures = signature_serialization.create_signature_map(
+        signature_functions)
+    root.variables = list(wrapped.graph.variables)
+    return root
+
+
+def load(export_dir, tags):
+  """Load a v1-style SavedModel as an object."""
+  loader = _EagerSavedModelLoader(export_dir)
+  return loader.load(tags=tags)

diff --git a/tensorflow/python/saved_model/load_v1_in_v2_test.py b/tensorflow/python/saved_model/load_v1_in_v2_test.py
new file mode 100644
index 0000000..e8bd61a
--- /dev/null
+++ b/tensorflow/python/saved_model/load_v1_in_v2_test.py

@@ -0,0 +1,210 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for importing a TF v1-style SavedModel when executing eagerly."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+
+from tensorflow.python.client import session as session_lib
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.lib.io import file_io
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.saved_model import builder_impl
+from tensorflow.python.saved_model import load
+from tensorflow.python.saved_model import save
+from tensorflow.python.saved_model import signature_def_utils
+from tensorflow.python.saved_model import simple_save
+from tensorflow.python.saved_model import utils_impl
+
+
+class LoadTest(test.TestCase):
+
+  def _v1_single_metagraph_saved_model(self, use_resource):
+    export_graph = ops.Graph()
+    with export_graph.as_default():
+      start = array_ops.placeholder(
+          shape=[None], dtype=dtypes.float32, name="start")
+      if use_resource:
+        distractor = variables.RefVariable(-1., name="distractor")
+        v = resource_variable_ops.ResourceVariable(3., name="v")
+      else:
+        # "distractor" gets saved in the checkpoint and so used in the restore
+        # function, but not in the pruned function for the signature. This tests
+        # node naming: it needs to be consistent (and ideally always the same as
+        # the node in the original GraphDef) for the resource manager to find
+        # the right variable.
+        distractor = variables.RefVariable(-1., name="distractor")
+        v = variables.RefVariable(3., name="v")
+      local_variable = variables.VariableV1(
+          1.,
+          collections=[ops.GraphKeys.LOCAL_VARIABLES],
+          trainable=False,
+          use_resource=True)
+      output = array_ops.identity(start * v * local_variable, name="output")
+      with session_lib.Session() as session:
+        session.run([v.initializer, distractor.initializer,
+                     local_variable.initializer])
+        path = os.path.join(self.get_temp_dir(), "saved_model", str(ops.uid()))
+        simple_save.simple_save(
+            session,
+            path,
+            inputs={"start": start},
+            outputs={"output": output},
+            legacy_init_op=local_variable.initializer)
+    return path
+
+  def test_resource_variable_import(self):
+    imported = load.load(self._v1_single_metagraph_saved_model(
+        use_resource=True))
+    fn = imported.signatures["serving_default"]
+    with self.assertRaisesRegexp(TypeError, "positional"):
+      fn(constant_op.constant(2.))
+    self.assertEqual({"output": 6.},
+                     self.evaluate(fn(start=constant_op.constant(2.))))
+    self.assertAllEqual([3., 1.], self.evaluate(imported.variables))
+    imported.variables[0].assign(4.)
+    self.assertEqual({"output": 8.},
+                     self.evaluate(fn(start=constant_op.constant(2.))))
+    imported.variables[1].assign(2.)
+    self.assertEqual({"output": 24.},
+                     self.evaluate(fn(start=constant_op.constant(3.))))
+    self.assertTrue(imported.variables[0].trainable)
+    self.assertFalse(imported.variables[1].trainable)
+    with backprop.GradientTape() as tape:
+      output = fn(start=constant_op.constant(4.))
+    self.assertEqual(imported.variables[:1], list(tape.watched_variables()))
+    self.assertEqual(8., tape.gradient(output, imported.variables[0]).numpy())
+
+  def test_ref_variable_import(self):
+    saved = self._v1_single_metagraph_saved_model(use_resource=False)
+    imported = load.load(saved)
+    fn = imported.signatures["serving_default"]
+    self.assertEqual(6., fn(start=constant_op.constant(2.))["output"].numpy())
+
+  def _v1_multi_metagraph_saved_model(self):
+    export_graph = ops.Graph()
+    with export_graph.as_default():
+      start = array_ops.placeholder(
+          shape=[None], dtype=dtypes.float32, name="start")
+      v = resource_variable_ops.ResourceVariable(21.)
+      first_output = array_ops.identity(start * v, name="first_output")
+      second_output = array_ops.identity(v, name="second_output")
+      with session_lib.Session() as session:
+        session.run(v.initializer)
+        path = os.path.join(self.get_temp_dir(), "saved_model", str(ops.uid()))
+        builder = builder_impl.SavedModelBuilder(path)
+        builder.add_meta_graph_and_variables(
+            session, tags=["first"],
+            signature_def_map={
+                "first_key": signature_def_utils.build_signature_def(
+                    {"first_start": utils_impl.build_tensor_info(start)},
+                    {"first_output": utils_impl.build_tensor_info(
+                        first_output)})})
+        builder.add_meta_graph(
+            tags=["second"],
+            signature_def_map={
+                "second_key": signature_def_utils.build_signature_def(
+                    {"second_start": utils_impl.build_tensor_info(start)},
+                    {"second_output": utils_impl.build_tensor_info(
+                        second_output)})})
+        builder.save()
+    return path
+
+  def test_multi_meta_graph_loading(self):
+    with self.assertRaisesRegexp(ValueError, "2 MetaGraphs"):
+      load.load(self._v1_multi_metagraph_saved_model())
+    first_imported = load.load(self._v1_multi_metagraph_saved_model(),
+                               tags=["first"])
+    self.assertEqual({"first_output": 42.},
+                     self.evaluate(first_imported.signatures["first_key"](
+                         first_start=constant_op.constant(2.))))
+    second_imported = load.load(self._v1_multi_metagraph_saved_model(),
+                                tags=["second"])
+    with self.assertRaisesRegexp(TypeError, "second_start"):
+      second_imported.signatures["second_key"](x=constant_op.constant(2.))
+    with self.assertRaisesRegexp(TypeError, "second_start"):
+      second_imported.signatures["second_key"](
+          second_start=constant_op.constant(2.),
+          x=constant_op.constant(2.))
+    self.assertEqual({"second_output": 21.},
+                     self.evaluate(second_imported.signatures["second_key"](
+                         second_start=constant_op.constant(2.))))
+
+  def _v1_asset_saved_model(self):
+    export_graph = ops.Graph()
+    vocab_path = os.path.join(self.get_temp_dir(), "vocab.txt")
+    with open(vocab_path, "w") as f:
+      f.write("alpha\nbeta\ngamma\n")
+    with export_graph.as_default():
+      initializer = lookup_ops.TextFileInitializer(
+          vocab_path,
+          key_dtype=dtypes.string,
+          key_index=lookup_ops.TextFileIndex.WHOLE_LINE,
+          value_dtype=dtypes.int64,
+          value_index=lookup_ops.TextFileIndex.LINE_NUMBER)
+      table = lookup_ops.HashTable(
+          initializer, default_value=-1)
+      start = array_ops.placeholder(
+          shape=None, dtype=dtypes.string, name="in")
+      output = table.lookup(start, name="out")
+      with session_lib.Session() as session:
+        session.run([table.initializer])
+        path = os.path.join(self.get_temp_dir(), "saved_model", str(ops.uid()))
+        simple_save.simple_save(
+            session,
+            path,
+            inputs={"start": start},
+            outputs={"output": output},
+            legacy_init_op=table.initializer)
+    file_io.delete_file(vocab_path)
+    return path
+
+  def test_asset_loading(self):
+    first_path = self._v1_asset_saved_model()
+    imported = load.load(first_path)
+    fn = imported.signatures["serving_default"]
+    self.assertAllClose({"output": [2, 0]},
+                        fn(start=constant_op.constant(["gamma", "alpha"])))
+    second_path = os.path.join(self.get_temp_dir(), "saved_model",
+                               str(ops.uid()))
+    save.save(imported, second_path, signatures=imported.signatures)
+    shutil.rmtree(first_path)
+    second_import = load.load(second_path)
+    fn = second_import.signatures["serving_default"]
+    self.assertAllClose({"output": [2, 0]},
+                        fn(start=constant_op.constant(["gamma", "alpha"])))
+
+    third_path = os.path.join(self.get_temp_dir(), "saved_model",
+                              str(ops.uid()))
+    save.save(second_import, third_path, signatures=second_import.signatures)
+    shutil.rmtree(second_path)
+    third_import = load.load(third_path)
+    fn = third_import.signatures["serving_default"]
+    self.assertAllClose({"output": [2, 0]},
+                        fn(start=constant_op.constant(["gamma", "alpha"])))
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/saved_model/loader_impl.py b/tensorflow/python/saved_model/loader_impl.py
index e5be03a..bfabef9 100644
--- a/tensorflow/python/saved_model/loader_impl.py
+++ b/tensorflow/python/saved_model/loader_impl.py

@@ -88,7 +88,7 @@
 _parse_saved_model = parse_saved_model
 
 
-def _get_asset_tensors(export_dir, meta_graph_def_to_load, import_scope=None):
+def get_asset_tensors(export_dir, meta_graph_def_to_load, import_scope=None):
   """Gets the asset tensors, if defined in the meta graph def to load.
 
   Args:
@@ -393,7 +393,7 @@
     meta_graph_def = self.get_meta_graph_def_from_tags(tags)
     with sess.graph.as_default():
       # Get asset tensors, if any.
-      asset_tensors_dictionary = _get_asset_tensors(
+      asset_tensors_dictionary = get_asset_tensors(
           self._export_dir, meta_graph_def, import_scope=import_scope)
 
       init_op = get_init_op(meta_graph_def, import_scope)

diff --git a/tensorflow/python/saved_model/model_utils/BUILD b/tensorflow/python/saved_model/model_utils/BUILD
index 192a610..493574a 100644
--- a/tensorflow/python/saved_model/model_utils/BUILD
+++ b/tensorflow/python/saved_model/model_utils/BUILD

@@ -30,6 +30,7 @@
     deps = [
         ":export_output",
         ":export_utils",
+        ":mode_keys",
     ],
 )
 
@@ -70,7 +71,7 @@
     srcs_version = "PY2AND3",
     deps = [
         ":export_output",
-        "//tensorflow/python:mode_keys",
+        ":mode_keys",
         "//tensorflow/python:platform",
         "//tensorflow/python:util",
         "//tensorflow/python/saved_model:signature_constants",
@@ -98,3 +99,19 @@
         "//tensorflow/python/saved_model:signature_def_utils",
     ],
 )
+
+py_library(
+    name = "mode_keys",
+    srcs = ["mode_keys.py"],
+    srcs_version = "PY2AND3",
+)
+
+py_test(
+    name = "mode_keys_test",
+    srcs = ["mode_keys_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":mode_keys",
+        "//tensorflow/python:client_testlib",
+    ],
+)

diff --git a/tensorflow/python/saved_model/model_utils/__init__.py b/tensorflow/python/saved_model/model_utils/__init__.py
index 84540ba..3f54c96 100644
--- a/tensorflow/python/saved_model/model_utils/__init__.py
+++ b/tensorflow/python/saved_model/model_utils/__init__.py

@@ -25,4 +25,5 @@
 from tensorflow.python.saved_model.model_utils.export_utils import get_export_outputs
 from tensorflow.python.saved_model.model_utils.export_utils import get_temp_export_dir
 from tensorflow.python.saved_model.model_utils.export_utils import get_timestamped_export_dir
+from tensorflow.python.saved_model.model_utils.export_utils import SIGNATURE_KEY_MAP
 # pylint: enable=wildcard-import

diff --git a/tensorflow/python/saved_model/model_utils/export_test.py b/tensorflow/python/saved_model/model_utils/export_test.py
index ef51215..c87d2ee 100644
--- a/tensorflow/python/saved_model/model_utils/export_test.py
+++ b/tensorflow/python/saved_model/model_utils/export_test.py

@@ -24,7 +24,6 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
@@ -32,21 +31,7 @@
 from tensorflow.python.saved_model import signature_def_utils
 from tensorflow.python.saved_model.model_utils import export_output
 from tensorflow.python.saved_model.model_utils import export_utils
-
-
-class LabeledTensorMock(object):
-  """Mock class emulating LabeledTensor."""
-
-  def __init__(self):
-    self.tensor = constant_op.constant([1])
-
-
-def _convert_labeled_tensor_mock_to_tensor(value, *args, **kwargs):
-  return ops.internal_convert_to_tensor(value.tensor, *args, **kwargs)
-
-
-ops.register_tensor_conversion_function(LabeledTensorMock,
-                                        _convert_labeled_tensor_mock_to_tensor)
+from tensorflow.python.saved_model.model_utils.mode_keys import KerasModeKeys
 
 
 class ExportTest(test_util.TensorFlowTestCase):
@@ -251,6 +236,53 @@
 
     self.assertDictEqual(expected_signature_defs, signature_defs)
 
+  @test_util.deprecated_graph_mode_only
+  def test_export_outputs_for_mode(self):
+    predictions = {"predictions": constant_op.constant([1.])}
+    loss = {"loss": constant_op.constant([2.])}
+    metrics = {
+        "metrics": (constant_op.constant([3.]), constant_op.constant([4.]))}
+    expected_metrics = {
+        "metrics/value": metrics["metrics"][0],
+        "metrics/update_op": metrics["metrics"][1]
+    }
+
+    def _build_export_output(mode):
+      return export_utils.export_outputs_for_mode(
+          mode, None, predictions, loss, metrics)
+
+    ret = _build_export_output(KerasModeKeys.TRAIN)
+    self.assertIn(signature_constants.DEFAULT_TRAIN_SIGNATURE_DEF_KEY, ret)
+    export_out = ret[signature_constants.DEFAULT_TRAIN_SIGNATURE_DEF_KEY]
+    self.assertIsInstance(export_out, export_output.TrainOutput)
+    self.assertEqual(export_out.predictions, predictions)
+    self.assertEqual(export_out.loss, loss)
+    self.assertEqual(export_out.metrics, expected_metrics)
+
+    ret = _build_export_output(KerasModeKeys.TEST)
+    self.assertIn(signature_constants.DEFAULT_EVAL_SIGNATURE_DEF_KEY, ret)
+    export_out = ret[signature_constants.DEFAULT_EVAL_SIGNATURE_DEF_KEY]
+    self.assertIsInstance(export_out, export_output.EvalOutput)
+    self.assertEqual(export_out.predictions, predictions)
+    self.assertEqual(export_out.loss, loss)
+    self.assertEqual(export_out.metrics, expected_metrics)
+
+    ret = _build_export_output(KerasModeKeys.PREDICT)
+    self.assertIn(signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, ret)
+    export_out = ret[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+    self.assertIsInstance(export_out, export_output.PredictOutput)
+    self.assertEqual(export_out.outputs, predictions)
+
+    classes = constant_op.constant(["class5"])
+    ret = export_utils.export_outputs_for_mode(
+        KerasModeKeys.PREDICT,
+        {"classify": export_output.ClassificationOutput(
+            classes=classes)})
+    self.assertIn("classify", ret)
+    export_out = ret["classify"]
+    self.assertIsInstance(export_out, export_output.ClassificationOutput)
+    self.assertEqual(export_out.classes, classes)
+
 
 if __name__ == "__main__":
   test.main()

diff --git a/tensorflow/python/saved_model/model_utils/export_utils.py b/tensorflow/python/saved_model/model_utils/export_utils.py
index 4f89337..adb6bf2 100644
--- a/tensorflow/python/saved_model/model_utils/export_utils.py
+++ b/tensorflow/python/saved_model/model_utils/export_utils.py

@@ -30,21 +30,32 @@
 from tensorflow.python.saved_model import signature_def_utils
 from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.saved_model.model_utils import export_output as export_output_lib
-from tensorflow.python.training import mode_keys
+from tensorflow.python.saved_model.model_utils import mode_keys
+from tensorflow.python.saved_model.model_utils.mode_keys import KerasModeKeys as ModeKeys
 from tensorflow.python.util import compat
 
 
 # Mapping of the modes to appropriate MetaGraph tags in the SavedModel.
-EXPORT_TAG_MAP = {
-    mode_keys.ModeKeys.PREDICT: [tag_constants.SERVING],
-    mode_keys.ModeKeys.TRAIN: [tag_constants.TRAINING],
-    mode_keys.ModeKeys.TEST: [tag_constants.EVAL],
-}
+EXPORT_TAG_MAP = mode_keys.ModeKeyMap(**{
+    ModeKeys.PREDICT: [tag_constants.SERVING],
+    ModeKeys.TRAIN: [tag_constants.TRAINING],
+    ModeKeys.TEST: [tag_constants.EVAL]})
 
+# For every exported mode, a SignatureDef map should be created using the
+# functions `export_outputs_for_mode` and `build_all_signature_defs`. By
+# default, this map will contain a single Signature that defines the input
+# tensors and output predictions, losses, and/or metrics (depending on the mode)
+# The default keys used in the SignatureDef map are defined below.
+SIGNATURE_KEY_MAP = mode_keys.ModeKeyMap(**{
+    ModeKeys.PREDICT: signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY,
+    ModeKeys.TRAIN: signature_constants.DEFAULT_TRAIN_SIGNATURE_DEF_KEY,
+    ModeKeys.TEST: signature_constants.DEFAULT_EVAL_SIGNATURE_DEF_KEY})
 
-_SINGLE_FEATURE_DEFAULT_NAME = 'feature'
-_SINGLE_RECEIVER_DEFAULT_NAME = 'input'
-_SINGLE_LABEL_DEFAULT_NAME = 'label'
+# Default names used in the SignatureDef input map, which maps strings to
+# TensorInfo protos.
+SINGLE_FEATURE_DEFAULT_NAME = 'feature'
+SINGLE_RECEIVER_DEFAULT_NAME = 'input'
+SINGLE_LABEL_DEFAULT_NAME = 'label'
 
 ### Below utilities are specific to SavedModel exports.
 
@@ -80,7 +91,7 @@
     ValueError: if export_outputs is not a dict
   """
   if not isinstance(receiver_tensors, dict):
-    receiver_tensors = {_SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors}
+    receiver_tensors = {SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors}
   if export_outputs is None or not isinstance(export_outputs, dict):
     raise ValueError('export_outputs must be a dict and not'
                      '{}'.format(type(export_outputs)))
@@ -100,7 +111,7 @@
         six.iteritems(receiver_tensors_alternatives)):
       if not isinstance(receiver_tensors_alt, dict):
         receiver_tensors_alt = {
-            _SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors_alt
+            SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors_alt
         }
       for output_key, export_output in export_outputs.items():
         signature_name = '{}:{}'.format(receiver_name or 'None', output_key or
@@ -262,18 +273,21 @@
   Raises:
     ValueError: if an appropriate ExportOutput cannot be found for the mode.
   """
-  # TODO(b/113185250): move all model export helper functions into an util file.
-  if mode == mode_keys.ModeKeys.PREDICT:
+  if mode not in SIGNATURE_KEY_MAP:
+    raise ValueError(
+        'Export output type not found for mode: {}. Expected one of: {}.\n'
+        'One likely error is that V1 Estimator Modekeys were somehow passed to '
+        'this function. Please ensure that you are using the new ModeKeys.'
+        .format(mode, SIGNATURE_KEY_MAP.keys()))
+  signature_key = SIGNATURE_KEY_MAP[mode]
+  if mode_keys.is_predict(mode):
     return get_export_outputs(serving_export_outputs, predictions)
-  elif mode == mode_keys.ModeKeys.TRAIN:
-    return {mode: export_output_lib.TrainOutput(
-        loss=loss, predictions=predictions, metrics=metrics)}
-  elif mode == mode_keys.ModeKeys.TEST:
-    return {mode: export_output_lib.EvalOutput(
+  elif mode_keys.is_train(mode):
+    return {signature_key: export_output_lib.TrainOutput(
         loss=loss, predictions=predictions, metrics=metrics)}
   else:
-    raise ValueError(
-        'Export output type not found for mode: {}'.format(mode))
+    return {signature_key: export_output_lib.EvalOutput(
+        loss=loss, predictions=predictions, metrics=metrics)}
 
 
 def get_export_outputs(export_outputs, predictions):

diff --git a/tensorflow/python/saved_model/model_utils/mode_keys.py b/tensorflow/python/saved_model/model_utils/mode_keys.py
new file mode 100644
index 0000000..2912de7
--- /dev/null
+++ b/tensorflow/python/saved_model/model_utils/mode_keys.py

@@ -0,0 +1,109 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utils for managing different mode strings used by Keras and Estimator models.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+
+class KerasModeKeys(object):
+  """Standard names for model modes.
+
+  The following standard keys are defined:
+
+  * `TRAIN`: training/fitting mode.
+  * `TEST`: testing/evaluation mode.
+  * `PREDICT`: prediction/inference mode.
+  """
+
+  TRAIN = 'train'
+  TEST = 'test'
+  PREDICT = 'predict'
+
+
+# TODO(kathywu): Remove copy in Estimator after nightlies
+class EstimatorModeKeys(object):
+  """Standard names for Estimator model modes.
+
+  The following standard keys are defined:
+
+  * `TRAIN`: training/fitting mode.
+  * `EVAL`: testing/evaluation mode.
+  * `PREDICT`: predication/inference mode.
+  """
+
+  TRAIN = 'train'
+  EVAL = 'eval'
+  PREDICT = 'infer'
+
+
+def is_predict(mode):
+  return mode in [KerasModeKeys.PREDICT, EstimatorModeKeys.PREDICT]
+
+
+def is_eval(mode):
+  return mode in [KerasModeKeys.TEST, EstimatorModeKeys.EVAL]
+
+
+def is_train(mode):
+  return mode in [KerasModeKeys.TRAIN, EstimatorModeKeys.TRAIN]
+
+
+class ModeKeyMap(collections.Mapping):
+  """Map using ModeKeys as keys.
+
+  This class creates an immutable mapping from modes to values. For example,
+  SavedModel export of Keras and Estimator models use this to map modes to their
+  corresponding MetaGraph tags/SignatureDef keys.
+
+  Since this class uses modes, rather than strings, as keys, both "predict"
+  (Keras's PREDICT ModeKey) and "infer" (Estimator's PREDICT ModeKey) map to the
+  same value.
+  """
+
+  def __init__(self, **kwargs):
+    self._internal_dict = {}
+    self._keys = []
+    for key in kwargs:
+      self._keys.append(key)
+      dict_key = self._get_internal_key(key)
+      if dict_key in self._internal_dict:
+        raise ValueError(
+            'Error creating ModeKeyMap. Multiple keys/values found for {} mode.'
+            .format(dict_key))
+      self._internal_dict[dict_key] = kwargs[key]
+
+  def _get_internal_key(self, key):
+    """Return keys used for the internal dictionary."""
+    if is_train(key):
+      return KerasModeKeys.TRAIN
+    if is_eval(key):
+      return KerasModeKeys.TEST
+    if is_predict(key):
+      return KerasModeKeys.PREDICT
+    raise ValueError('Invalid mode key: {}.'.format(key))
+
+  def __getitem__(self, key):
+    return self._internal_dict[self._get_internal_key(key)]
+
+  def __iter__(self):
+    return iter(self._keys)
+
+  def __len__(self):
+    return len(self._keys)

diff --git a/tensorflow/python/saved_model/model_utils/mode_keys_test.py b/tensorflow/python/saved_model/model_utils/mode_keys_test.py
new file mode 100644
index 0000000..26795ef
--- /dev/null
+++ b/tensorflow/python/saved_model/model_utils/mode_keys_test.py

@@ -0,0 +1,65 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""ModeKey Tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model.model_utils import mode_keys
+
+
+class ModeKeyMapTest(test.TestCase):
+
+  def test_map(self):
+    mode_map = mode_keys.ModeKeyMap(**{
+        mode_keys.KerasModeKeys.PREDICT: 3,
+        mode_keys.KerasModeKeys.TEST: 1
+    })
+
+    # Test dictionary __getitem__
+    self.assertEqual(3, mode_map[mode_keys.KerasModeKeys.PREDICT])
+    self.assertEqual(3, mode_map[mode_keys.EstimatorModeKeys.PREDICT])
+    self.assertEqual(1, mode_map[mode_keys.KerasModeKeys.TEST])
+    self.assertEqual(1, mode_map[mode_keys.EstimatorModeKeys.EVAL])
+    with self.assertRaises(KeyError):
+      _ = mode_map[mode_keys.KerasModeKeys.TRAIN]
+    with self.assertRaises(KeyError):
+      _ = mode_map[mode_keys.EstimatorModeKeys.TRAIN]
+    with self.assertRaisesRegexp(ValueError, 'Invalid mode'):
+      _ = mode_map['serve']
+
+    # Test common dictionary methods
+    self.assertLen(mode_map, 2)
+    self.assertEqual({1, 3}, set(mode_map.values()))
+    self.assertEqual(
+        {mode_keys.KerasModeKeys.TEST, mode_keys.KerasModeKeys.PREDICT},
+        set(mode_map.keys()))
+
+    # Map is immutable
+    with self.assertRaises(TypeError):
+      mode_map[mode_keys.KerasModeKeys.TEST] = 1
+
+  def test_invalid_init(self):
+    with self.assertRaisesRegexp(ValueError, 'Multiple keys/values found'):
+      _ = mode_keys.ModeKeyMap(**{
+          mode_keys.KerasModeKeys.PREDICT: 3,
+          mode_keys.EstimatorModeKeys.PREDICT: 1
+      })
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/python/saved_model/nested_structure_coder.py b/tensorflow/python/saved_model/nested_structure_coder.py
index 5cf9a5b..3d335de 100644
--- a/tensorflow/python/saved_model/nested_structure_coder.py
+++ b/tensorflow/python/saved_model/nested_structure_coder.py

@@ -361,10 +361,7 @@
   """Codec for `TensorShape`."""
 
   def can_encode(self, pyobj):
-    return isinstance(pyobj, (tensor_shape.TensorShape,
-                              # TODO(b/121255889): Should not need these.
-                              tensor_shape.TensorShapeV1,
-                              tensor_shape.TensorShapeV2))
+    return isinstance(pyobj, tensor_shape.TensorShape)
 
   def do_encode(self, tensor_shape_value, encode_fn):
     del encode_fn

diff --git a/tensorflow/python/saved_model/revived_types.py b/tensorflow/python/saved_model/revived_types.py
index ae06320..39b55e2 100644
--- a/tensorflow/python/saved_model/revived_types.py
+++ b/tensorflow/python/saved_model/revived_types.py

@@ -31,7 +31,7 @@
 
     Args:
       object_factory: A callable which takes a SavedUserObject proto and returns
-        a checkpointable object. Dependencies are added later via `setter`.
+        a trackable object. Dependencies are added later via `setter`.
       version: An integer, the producer version of this wrapper type. When
         making incompatible changes to a wrapper, add a new
         `VersionedTypeRegistration` with an incremented `version`. The most
@@ -45,11 +45,11 @@
         with this object. `min_consumer_version` should be set to the lowest
         version number which can successfully load protos saved by this
         object. If no matching registration is available on load, the object
-        will be revived with a generic checkpointable type.
+        will be revived with a generic trackable type.
 
         `min_consumer_version` and `bad_consumers` are a blunt tool, and using
         them will generally break forward compatibility: previous versions of
-        TensorFlow will revive newly saved objects as opaque checkpointable
+        TensorFlow will revive newly saved objects as opaque trackable
         objects rather than wrapped objects. When updating wrappers, prefer
         saving new information but preserving compatibility with previous
         wrapper versions. They are, however, useful for ensuring that
@@ -83,7 +83,7 @@
             bad_consumers=self._bad_consumers))
 
   def from_proto(self, proto):
-    """Recreate a checkpointable object from a SavedUserObject proto."""
+    """Recreate a trackable object from a SavedUserObject proto."""
     return self._object_factory(proto)
 
   def should_load(self, proto):
@@ -111,7 +111,7 @@
   Args:
     identifier: A unique string identifying this class of objects.
     predicate: A Boolean predicate for this registration. Takes a
-      checkpointable object as an argument. If True, `type_registration` may be
+      trackable object as an argument. If True, `type_registration` may be
       used to save and restore the object.
     versions: A list of `VersionedTypeRegistration` objects.
   """
@@ -138,7 +138,7 @@
 
 
 def serialize(obj):
-  """Create a SavedUserObject from a checkpointable object."""
+  """Create a SavedUserObject from a trackable object."""
   for identifier in _TYPE_IDENTIFIERS:
     predicate, versions = _REVIVED_TYPE_REGISTRY[identifier]
     if predicate(obj):
@@ -148,15 +148,15 @@
 
 
 def deserialize(proto):
-  """Create a checkpointable object from a SavedUserObject proto.
+  """Create a trackable object from a SavedUserObject proto.
 
   Args:
     proto: A SavedUserObject to deserialize.
 
   Returns:
-    A tuple of (checkpointable, assignment_fn) where assignment_fn has the same
+    A tuple of (trackable, assignment_fn) where assignment_fn has the same
     signature as setattr and should be used to add dependencies to
-    `checkpointable` when they are available.
+    `trackable` when they are available.
   """
   _, type_registrations = _REVIVED_TYPE_REGISTRY.get(
       proto.identifier, (None, None))

diff --git a/tensorflow/python/saved_model/revived_types_test.py b/tensorflow/python/saved_model/revived_types_test.py
index ede5922..c58a0be 100644
--- a/tensorflow/python/saved_model/revived_types_test.py
+++ b/tensorflow/python/saved_model/revived_types_test.py

@@ -22,10 +22,10 @@
 from tensorflow.python.platform import test
 from tensorflow.python.saved_model import revived_types
 from tensorflow.python.saved_model import saved_object_graph_pb2
-from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.tracking import tracking
 
 
-class CustomTestClass(tracking.AutoCheckpointable):
+class CustomTestClass(tracking.AutoTrackable):
 
   def __init__(self, version):
     self.version = version
@@ -56,7 +56,7 @@
 class RegistrationMatchingTest(test.TestCase):
 
   def test_save_typecheck(self):
-    self.assertIs(revived_types.serialize(tracking.AutoCheckpointable()), None)
+    self.assertIs(revived_types.serialize(tracking.AutoTrackable()), None)
 
   def test_load_identifier_not_found(self):
     nothing_matches = revived_types.deserialize(

diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index 3c8bf1b..617d314 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py

@@ -12,14 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Exports a SavedModel from a Checkpointable Python object."""
+"""Exports a SavedModel from a Trackable Python object."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
 import collections
-import functools
 import os
 
 from tensorflow.core.framework import versions_pb2
@@ -44,19 +43,18 @@
 from tensorflow.python.saved_model import saved_object_graph_pb2
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.saved_model import signature_def_utils
+from tensorflow.python.saved_model import signature_serialization
 from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.saved_model import utils_impl
-from tensorflow.python.training.checkpointable import base
-from tensorflow.python.training.checkpointable import graph_view
-from tensorflow.python.training.checkpointable import object_identity
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util
 from tensorflow.python.training.saving import functional_saver
+from tensorflow.python.training.tracking import base
+from tensorflow.python.training.tracking import graph_view
+from tensorflow.python.training.tracking import object_identity
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util
 from tensorflow.python.util import compat
-from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
-DEFAULT_SIGNATURE_ATTR = "_default_save_signature"
 _UNCOPIABLE_DTYPES = frozenset((dtypes.resource, dtypes.variant))
 
 
@@ -66,40 +64,88 @@
     "_CapturedConstant", ["eager_tensor", "graph_tensor"])
 
 
+class _AugmentedGraphView(graph_view.ObjectGraphView):
+  """An extendable graph which also tracks functions attached to objects.
+
+  Extensions through `add_object` appear in the object graph and any checkpoints
+  generated from it, even if they are not dependencies of the node they were
+  attached to in the saving program. For example a `.signatures` attribute is
+  added to exported SavedModel root objects without modifying the root object
+  itself.
+
+  Also tracks functions attached to objects in the graph, through the caching
+  `list_functions` method. Enumerating functions only through this method
+  ensures that we get a consistent view of functions, even if object attributes
+  create new functions every time they are accessed.
+  """
+
+  def __init__(self, root):
+    super(_AugmentedGraphView, self).__init__(root)
+    # Object -> (name -> dep)
+    self._extra_dependencies = object_identity.ObjectIdentityDictionary()
+    self._functions = object_identity.ObjectIdentityDictionary()
+
+  def add_object(self, parent_node, name_in_parent, subgraph_root):
+    """Attach an object to `parent_node`, overriding any existing dependency."""
+    self._extra_dependencies.setdefault(
+        parent_node, {})[name_in_parent] = subgraph_root
+
+  def list_dependencies(self, obj):
+    """Overrides a parent method to include `add_object` objects."""
+    extra_dependencies = self._extra_dependencies.get(obj, {})
+    used_names = set()
+    for name, dep in super(_AugmentedGraphView, self).list_dependencies(obj):
+      used_names.add(name)
+      if name in extra_dependencies:
+        yield base.TrackableReference(name, extra_dependencies[name])
+      else:
+        yield base.TrackableReference(name, dep)
+    for name, dep in extra_dependencies.items():
+      if name in used_names:
+        continue
+      yield base.TrackableReference(name, dep)
+
+  def list_functions(self, obj):
+    obj_functions = self._functions.get(obj, None)
+    if obj_functions is None:
+      obj_functions = obj._list_functions_for_serialization()  # pylint: disable=protected-access
+      self._functions[obj] = obj_functions
+    return obj_functions
+
+
 class _SaveableView(object):
-  """Provides a stable view over a checkpointable root.
+  """Provides a frozen view over a trackable root.
 
   This class helps creating a single stable view over an object to save. The
   saving code should access properties and functions via this class and not via
   the original object as there are cases where an object construct their
-  checkpointable attributes and functions dynamically per call and will yield
+  trackable attributes and functions dynamically per call and will yield
   different objects if invoked more than once.
+
+  Changes to the graph, for example adding objects, must happen in
+  `checkpoint_view` (an `_AugmentedGraphView`) before the `_SaveableView` is
+  constructed. Changes after the `_SaveableView` has been constructed will be
+  ignored.
   """
 
   def __init__(self, checkpoint_view):
     self.checkpoint_view = checkpoint_view
-    checkpointable_objects, node_ids, slot_variables = (
+    trackable_objects, node_ids, slot_variables = (
         self.checkpoint_view.objects_ids_and_slot_variables())
-    self.nodes = checkpointable_objects
+    self.nodes = trackable_objects
     self.node_ids = node_ids
     self.captured_tensor_node_ids = object_identity.ObjectIdentityDictionary()
     self.slot_variables = slot_variables
-    self.functions = object_identity.ObjectIdentityDictionary()
     self.concrete_functions = []
 
     # Also add `Function`s as nodes.
     nodes_without_functions = list(self.nodes)
     seen_function_names = set()
-    for obj in nodes_without_functions:
-      self.functions[obj] = obj._list_functions_for_serialization()  # pylint: disable=protected-access
-      for function in self.functions[obj].values():
+    for node in nodes_without_functions:
+      for function in checkpoint_view.list_functions(node).values():
         if function not in self.node_ids:
           self.node_ids[function] = len(self.nodes)
           self.nodes.append(function)
-          # Avoids recursing into functions to see if other functions are
-          # assigned to attributes. This is sometimes true for concrete
-          # functions but not helpful.
-          self.functions[function] = {}
         if isinstance(function, def_function.Function):
           # Force listing the concrete functions for the side effects:
           #  - populate the cache for functions that have an input_signature
@@ -128,11 +174,12 @@
       if isinstance(node, (def_function.Function, defun.ConcreteFunction,
                            _CapturedConstant)):
         continue
-      for child in node._checkpoint_dependencies:  # pylint: disable=protected-access
+      for child in self.checkpoint_view.list_dependencies(node):
         child_proto = object_proto.children.add()
         child_proto.node_id = self.node_ids[child.ref]
         child_proto.local_name = child.name
-      for local_name, ref_function in self.functions[node].items():
+      for local_name, ref_function in (
+          self.checkpoint_view.list_functions(node).items()):
         child_proto = object_proto.children.add()
         child_proto.node_id = self.node_ids[ref_function]
         child_proto.local_name = local_name
@@ -177,7 +224,7 @@
         self.captured_tensor_node_ids[obj.handle] = node_id
       elif isinstance(obj, tracking.TrackableAsset):
         _process_asset(obj, asset_info, resource_map)
-        self.captured_tensor_node_ids[obj.asset_path.handle] = node_id
+        self.captured_tensor_node_ids[obj.asset_path] = node_id
 
     for concrete_function in self.concrete_functions:
       for capture in concrete_function.captured_inputs:
@@ -197,108 +244,6 @@
     return object_map, resource_map, asset_info
 
 
-def _get_signature(function):
-  if (isinstance(function, (defun.Function, def_function.Function)) and
-      function._input_signature is not None):  # pylint: disable=protected-access
-    function = function.get_concrete_function()
-  if not isinstance(function, defun.ConcreteFunction):
-    return None
-  return function
-
-
-def _valid_signature(concrete_function):
-  """Returns whether concrete function can be converted to a signature."""
-  if not concrete_function.outputs:
-    # Functions without outputs don't make sense as signatures. We just don't
-    # have any way to run an Operation with no outputs as a SignatureDef in the
-    # 1.x style.
-    return False
-  try:
-    _normalize_outputs(concrete_function.structured_outputs, "unused", "unused")
-  except ValueError:
-    return False
-  return True
-
-
-def _find_function_to_export(saveable_view):
-  """Function to export, None if no suitable function was found."""
-  # If the user did not specify signatures, check the root object for a function
-  # that can be made into a signature.
-  functions = saveable_view.functions[saveable_view.root]
-  signature = functions.get(DEFAULT_SIGNATURE_ATTR, None)
-  if signature is not None:
-    return signature
-
-  # TODO(andresp): Discuss removing this behaviour. It can lead to WTFs when a
-  # user decides to annotate more functions with tf.function and suddenly
-  # serving that model way later in the process stops working.
-  if len(functions) == 1:
-    single_function = list(functions.values())[0]
-    signature = _get_signature(single_function)
-    if signature and  _valid_signature(signature):
-      return signature
-  return None
-
-
-def _canonicalize_signatures(signatures):
-  """Converts `signatures` into a dictionary of concrete functions."""
-  if signatures is None:
-    return {}
-  if not isinstance(signatures, collections.Mapping):
-    signatures = {
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signatures}
-  concrete_signatures = {}
-  for signature_key, function in signatures.items():
-    signature_function = _get_signature(function)
-    if signature_function is None:
-      raise ValueError(
-          ("Expected a TensorFlow function to generate a signature for, but "
-           "got {}. Only `tf.functions` with an input signature or "
-           "concrete functions can be used as a signature.").format(function))
-    concrete_signatures[signature_key] = signature_function
-  return concrete_signatures
-
-
-def _is_flat(sequence):
-  sequence_flat = nest.flatten(sequence)
-  try:
-    nest.assert_same_structure(sequence_flat, sequence)
-    return True
-  except ValueError:
-    return False
-  except TypeError:
-    return False
-
-
-def _normalize_outputs(outputs, function_name, signature_key):
-  """Construct an output dictionary from unnormalized function outputs."""
-  if isinstance(outputs, collections.Mapping):
-    for key, value in outputs.items():
-      if not isinstance(value, ops.Tensor):
-        raise ValueError(
-            ("Got a dictionary containing non-Tensor value {} for key {} "
-             "in the output of the function {} used to generate a SavedModel "
-             "signature. Dictionaries outputs for functions used as signatures "
-             "should have one Tensor output per string key.")
-            .format(value, key, compat.as_str_any(function_name)))
-    return outputs
-  else:
-    original_outputs = outputs
-    if not isinstance(outputs, collections.Sequence):
-      outputs = [outputs]
-    if not _is_flat(outputs):
-      raise ValueError(
-          ("Got non-flat outputs '{}' from '{}' for SavedModel "
-           "signature '{}'. Signatures have one Tensor per output, so "
-           "to have predictable names Python functions used to generate "
-           "these signatures should avoid outputting Tensors in nested "
-           "structures.")
-          .format(original_outputs, function_name, signature_key))
-    return {("output_{}".format(output_index)): output
-            for output_index, output
-            in enumerate(outputs)}
-
-
 def _tensor_dict_to_tensorinfo(tensor_dict):
   return {key: utils_impl.build_tensor_info_internal(value)
           for key, value in tensor_dict.items()}
@@ -327,14 +272,12 @@
   for exterior, interior in original_captures.items():
     mapped_resource = resource_map.get(exterior, None)
     if mapped_resource is None:
-      if exterior.dtype == dtypes.resource:
-        raise AssertionError(
-            ("Tried to export a function which references untracked stateful "
-             "object {}. Stateful TensorFlow objects (e.g. tf.Variable) must "
-             "be tracked by the main object. Objects may be tracked by "
-             "assigning them to an attribute of another tracked object, or to "
-             "an attribute of the main object directly.")
-            .format(interior))
+      raise AssertionError(
+          ("Tried to export a function which references untracked object {}."
+           "TensorFlow objects (e.g. tf.Variable) captured by functions must "
+           "be tracked by assigning them to an attribute of a tracked object "
+           "or assigned to an attribute of the main object directly.")
+          .format(interior))
     export_captures.append(mapped_resource)
   return export_captures
 
@@ -428,8 +371,8 @@
 
   Args:
     signature_functions: A dictionary mapping string keys to concrete TensorFlow
-      functions (e.g. from `_canonicalize_signatures`) which will be used to
-      generate SignatureDefs.
+      functions (e.g. from `signature_serialization.canonicalize_signatures`)
+      which will be used to generate SignatureDefs.
     resource_map: A dictionary mapping from resource tensors in the eager
       context to resource tensors in the Graph being exported. This dictionary
       is used to re-bind resources captured by functions to tensors which will
@@ -460,10 +403,8 @@
     mapped_inputs, exterior_argument_placeholders = (
         _map_function_arguments_to_created_inputs(
             argument_inputs, signature_key, function.name))
-    outputs = _normalize_outputs(
-        _call_function_with_mapped_captures(
-            function, mapped_inputs, resource_map),
-        function.name, signature_key)
+    outputs = _call_function_with_mapped_captures(
+        function, mapped_inputs, resource_map)
     signatures[signature_key] = signature_def_utils.build_signature_def(
         _tensor_dict_to_tensorinfo(exterior_argument_placeholders),
         _tensor_dict_to_tensorinfo(outputs),
@@ -479,10 +420,13 @@
     obj.initialize()
     return constant_op.constant(1.)  # Dummy control output
 
+  def _wrap_obj_initializer(obj):
+    return lambda: _wrap_initializer(obj)
+
   for obj in accessible_objects:
     if isinstance(obj, tracking.TrackableResource):
       resource_initializers.append(def_function.function(
-          functools.partial(_wrap_initializer, obj),
+          _wrap_obj_initializer(obj),
           # All inputs are captures.
           input_signature=[]).get_concrete_function())
   return resource_initializers
@@ -522,10 +466,10 @@
   asset_def.filename = path
   asset_def.tensor_info.name = asset_path_initializer.name
   asset_info.asset_defs.append(asset_def)
-  asset_info.asset_initializers_by_resource[original_variable.handle] = (
+  asset_info.asset_initializers_by_resource[original_variable] = (
       asset_variable.initializer)
   asset_info.asset_index[trackable_asset] = len(asset_info.asset_defs) - 1
-  resource_map[original_variable.handle] = asset_variable.handle
+  resource_map[original_variable] = asset_variable
 
 
 def _fill_meta_graph_def(meta_graph_def, saveable_view, signature_functions):
@@ -560,6 +504,8 @@
         resource_initializer_ops.append(
             _call_function_with_mapped_captures(
                 resource_initializer_function, [], resource_map))
+    resource_initializer_ops.extend(
+        asset_info.asset_initializers_by_resource.values())
     with ops.control_dependencies(resource_initializer_ops):
       init_op = control_flow_ops.no_op()
     # Add the same op to the main_op collection and to the init_op
@@ -598,7 +544,7 @@
 
 def _write_object_graph(saveable_view, export_dir, asset_file_def_index):
   """Save a SavedObjectGraph proto for `root`."""
-  # SavedObjectGraph is similar to the CheckpointableObjectGraph proto in the
+  # SavedObjectGraph is similar to the TrackableObjectGraph proto in the
   # checkpoint. It will eventually go into the SavedModel.
   proto = saved_object_graph_pb2.SavedObjectGraph()
   saveable_view.fill_object_graph_proto(proto)
@@ -641,6 +587,8 @@
         function_serialization.serialize_bare_concrete_function(obj))
   elif isinstance(obj, _CapturedConstant):
     proto.constant.operation = obj.graph_tensor.op.name
+  elif isinstance(obj, tracking.TrackableResource):
+    proto.resource.SetInParent()
   else:
     registered_type_proto = revived_types.serialize(obj)
     if registered_type_proto is None:
@@ -655,7 +603,7 @@
 @tf_export("saved_model.save", v1=["saved_model.experimental.save"])
 def save(obj, export_dir, signatures=None):
   # pylint: disable=line-too-long
-  """Exports the Checkpointable object `obj` to [SavedModel format](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md).
+  """Exports the Trackable object `obj` to [SavedModel format](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md).
 
   Example usage:
 
@@ -699,7 +647,11 @@
   which case outputs will be numbered, or a dictionary mapping string keys to
   `Tensor`, in which case the keys will be used to name outputs.
 
-  Since `tf.keras.Model` objects are also Checkpointable, this function can be
+  Signatures are available in objects returned by `tf.saved_model.load` as a
+  `.signatures` attribute. This is a reserved attribute: `tf.saved_model.save`
+  on an object with a custom `.signatures` attribute will raise an exception.
+
+  Since `tf.keras.Model` objects are also Trackable, this function can be
   used to export Keras models. For example, exporting with a signature
   specified:
 
@@ -785,7 +737,7 @@
   prior to the TensorFlow 2.0 release.
 
   Args:
-    obj: A checkpointable object to export.
+    obj: A trackable object to export.
     export_dir: A directory in which to write the SavedModel.
     signatures: Optional, either a `tf.function` with an input signature
       specified or the result of `f.get_concrete_function` on a
@@ -798,7 +750,7 @@
       `tf.saved_model.signature_constants` module.
 
   Raises:
-    ValueError: If `obj` is not checkpointable.
+    ValueError: If `obj` is not trackable.
 
   @compatibility(eager)
   Not supported when graph building. From TensorFlow 1.x,
@@ -819,28 +771,35 @@
             "tf.enable_eager_execution() must run first when calling it from "
             "TensorFlow 1.x.")
   # pylint: enable=line-too-long
-  if not isinstance(obj, base.Checkpointable):
+  if not isinstance(obj, base.Trackable):
     raise ValueError(
-        "Expected a Checkpointable object for export, got {}.".format(obj))
+        "Expected a Trackable object for export, got {}.".format(obj))
 
-  # Use _SaveableView to provide a stable listing of properties and functions.
+  checkpoint_graph_view = _AugmentedGraphView(obj)
+  if signatures is None:
+    signatures = signature_serialization.find_function_to_export(
+        checkpoint_graph_view)
+
+  signatures = signature_serialization.canonicalize_signatures(signatures)
+  signature_serialization.validate_saveable_view(checkpoint_graph_view)
+  signature_map = signature_serialization.create_signature_map(signatures)
+  checkpoint_graph_view.add_object(
+      parent_node=checkpoint_graph_view.root,
+      name_in_parent=signature_serialization.SIGNATURE_ATTRIBUTE_NAME,
+      subgraph_root=signature_map)
+
+  # Use _SaveableView to provide a frozen listing of properties and functions.
   # Note we run this twice since, while constructing the view the first time
   # there can be side effects of creating variables.
-  checkpoint_graph_view = graph_view.ObjectGraphView(obj)
   _ = _SaveableView(checkpoint_graph_view)
   saveable_view = _SaveableView(checkpoint_graph_view)
 
-  if signatures is None:
-    signatures = _find_function_to_export(saveable_view)
-
-  signatures = _canonicalize_signatures(signatures)
-
   # TODO(allenl): Factor out some subset of SavedModelBuilder which is 2.x
   # compatible (no sessions) and share it with this export API rather than
   # making a SavedModel proto and writing it directly.
   saved_model = saved_model_pb2.SavedModel()
   meta_graph_def = saved_model.meta_graphs.add()
-  object_saver = util.CheckpointableSaver(checkpoint_graph_view)
+  object_saver = util.TrackableSaver(checkpoint_graph_view)
   asset_info, exported_graph = _fill_meta_graph_def(
       meta_graph_def, saveable_view, signatures)
   saved_model.saved_model_schema_version = (

diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py
index cbca51a..ca1d573 100644
--- a/tensorflow/python/saved_model/save_test.py
+++ b/tensorflow/python/saved_model/save_test.py

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for checkpointable object SavedModel save."""
+"""Tests for trackable object SavedModel save."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -41,8 +41,9 @@
 from tensorflow.python.saved_model import save
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.saved_model import tag_constants
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util
+from tensorflow.python.util import compat
 
 
 class _ModelWithOptimizer(util.Checkpoint):
@@ -86,7 +87,7 @@
 class SaveTest(test.TestCase):
 
   def test_method_save_signature(self):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(
         lambda x: 2. * x,
         input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])
@@ -98,7 +99,7 @@
         _import_and_infer(save_dir, {"x": 1.}))
 
   def test_method_save_concrete(self):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(
         lambda z: {"out": 2. * z})
     root.f(constant_op.constant(1.))
@@ -114,7 +115,7 @@
             save_dir, {"z": 1.}, signature_key="non_default_key"))
 
   def test_non_concrete_error(self):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(lambda x: 2. * x)
     root.f(constant_op.constant(1.))
     save_dir = os.path.join(self.get_temp_dir(), "saved_model")
@@ -123,7 +124,7 @@
       save.save(root, save_dir, root.f)
 
   def test_captures_unreachable_variable(self):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     unreachable_variable = variables.Variable([5.0, 2.0])
     root.reachable_variable = variables.Variable([1.0, 3.0])
 
@@ -142,7 +143,7 @@
       save.save(root, save_dir)
 
   def test_nested_inputs(self):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(
         lambda x: 2. * x[0],
         input_signature=([tensor_spec.TensorSpec(None, dtypes.float32),
@@ -155,7 +156,7 @@
       root.f.get_concrete_function()
 
   def test_nested_outputs(self):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(lambda x: (2. * x, (3. * x, 4. * x)))
     root.f(constant_op.constant(1.))
     to_save = root.f.get_concrete_function(constant_op.constant(1.))
@@ -176,7 +177,7 @@
       save.save(root, save_dir, to_save)
 
   def test_variable(self):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.v1 = variables.Variable(3.)
     root.v2 = variables.Variable(2.)
     root.f = def_function.function(
@@ -213,7 +214,7 @@
                                     {"x": [[3., 4.]], "y": [2.]}))
 
   def test_single_function_default_signature(self):
-    model = tracking.AutoCheckpointable()
+    model = tracking.AutoTrackable()
     model.f = def_function.function(lambda: 3., input_signature=())
     model.f()
     save_dir = os.path.join(self.get_temp_dir(), "saved_model")
@@ -222,7 +223,7 @@
                         _import_and_infer(save_dir, {}))
 
   def test_single_function_no_signature(self):
-    model = tracking.AutoCheckpointable()
+    model = tracking.AutoTrackable()
     model.f = def_function.function(lambda: 3.)
     save_dir = os.path.join(self.get_temp_dir(), "saved_model")
     save.save(model, save_dir)
@@ -303,6 +304,14 @@
       self.assertNotIn("T", complex_node.attr)
       self.assertNotIn("Tout", complex_node.attr)
 
+  def test_signature_attribute_reserved(self):
+    root = util.Checkpoint(signatures=variables.Variable(1.))
+    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
+    with self.assertRaisesRegexp(ValueError, "del obj.signatures"):
+      save.save(root, save_dir)
+    del root.signatures
+    save.save(root, save_dir)
+
 
 class AssetTests(test.TestCase):
 
@@ -312,6 +321,18 @@
     with open(self._vocab_path, "w") as f:
       f.write("alpha\nbeta\ngamma\n")
 
+  def test_asset_path_returned(self):
+    root = tracking.AutoTrackable()
+    root.path = tracking.TrackableAsset(self._vocab_path)
+    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
+    root.get_asset = def_function.function(lambda: root.path.asset_path)
+    save.save(root, save_dir, signatures=root.get_asset.get_concrete_function())
+    second_dir = os.path.join(self.get_temp_dir(), "second_dir")
+    file_io.rename(save_dir, second_dir)
+    imported_path = _import_and_infer(second_dir, {})["output_0"]
+    self.assertIn(compat.as_str_any(second_dir),
+                  compat.as_str_any(imported_path))
+
   def test_table(self):
     initializer = lookup_ops.TextFileInitializer(
         self._vocab_path,
@@ -341,7 +362,7 @@
         _import_and_infer(second_dir, {"keys": ["gamma", "beta"]}))
 
   def test_unused_asset(self):
-    root = tracking.AutoCheckpointable()
+    root = tracking.AutoTrackable()
     root.f = def_function.function(
         lambda x: 2. * x,
         input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])

diff --git a/tensorflow/python/saved_model/saved_object_graph.proto b/tensorflow/python/saved_model/saved_object_graph.proto
index c57bf15..bfc83e4 100644
--- a/tensorflow/python/saved_model/saved_object_graph.proto
+++ b/tensorflow/python/saved_model/saved_object_graph.proto

@@ -1,6 +1,6 @@
 syntax = "proto3";
 
-import "tensorflow/core/protobuf/checkpointable_object_graph.proto";
+import "tensorflow/core/protobuf/trackable_object_graph.proto";
 import "tensorflow/core/framework/tensor_shape.proto";
 import "tensorflow/core/framework/types.proto";
 import "tensorflow/core/framework/versions.proto";
@@ -14,9 +14,9 @@
 // describes the directed graph of Python objects (or equivalent in other
 // languages) that make up a model, with nodes[0] at the root.
 
-// SavedObjectGraph shares some structure with CheckpointableObjectGraph, but
+// SavedObjectGraph shares some structure with TrackableObjectGraph, but
 // ObjectGraph belongs to the SavedModel and contains pointers to functions and
-// type information, while CheckpointableObjectGraph lives in the checkpoint and
+// type information, while TrackableObjectGraph lives in the checkpoint and
 // contains pointers only to variable values.
 
 // NOTE: This protocol buffer format is experimental and subject to change.
@@ -38,10 +38,9 @@
   // graph.
   //
   // Note: only valid if kind == "object".
-  repeated CheckpointableObjectGraph.CheckpointableObject.ObjectReference
-      children = 1;
+  repeated TrackableObjectGraph.TrackableObject.ObjectReference children = 1;
 
-  // Removed when forking from CheckpointableObjectGraph.
+  // Removed when forking from TrackableObjectGraph.
   reserved "attributes";
   reserved 2;
 
@@ -50,7 +49,7 @@
   // depend on the others directly.
   //
   // Note: only valid if kind == "object".
-  repeated CheckpointableObjectGraph.CheckpointableObject.SlotVariableReference
+  repeated TrackableObjectGraph.TrackableObject.SlotVariableReference
       slot_variables = 3;
 
   oneof kind {
@@ -60,6 +59,7 @@
     SavedVariable variable = 7;
     SavedBareConcreteFunction bare_concrete_function = 8;
     SavedConstant constant = 9;
+    SavedResource resource = 10;
   }
 }
 
@@ -78,9 +78,9 @@
 
 // A SavedAsset represents a file in a SavedModel.
 //
-// When bound to a function this object evaluates to a Variable from which the
-// absolute filename can be read. Users should not expect the filename to be
-// maintained.
+// When bound to a function this object evaluates to a tensor with the absolute
+// filename. Users should not depend on a particular part of the filename to
+// remain stable (e.g. basename could be changed).
 message SavedAsset {
   // Index into `MetaGraphDef.asset_file_def[]` that describes the Asset.
   //
@@ -153,3 +153,9 @@
   // The input signature, if specified.
   StructuredValue input_signature = 5;
 }
+
+// A SavedResource represents a TF object that holds state during its lifetime.
+message SavedResource {
+  // An object of this type can have a reference to a:
+  // create_resource() and an initialize() function.
+}

diff --git a/tensorflow/python/saved_model/signature_constants.py b/tensorflow/python/saved_model/signature_constants.py
index 0efe176..525d18d 100644
--- a/tensorflow/python/saved_model/signature_constants.py
+++ b/tensorflow/python/saved_model/signature_constants.py

@@ -136,6 +136,9 @@
 ################################################################################
 # Train/Eval API constants.
 # Not exported while export_all_saved_models is experimental.
+DEFAULT_TRAIN_SIGNATURE_DEF_KEY = "train"
+
+DEFAULT_EVAL_SIGNATURE_DEF_KEY = "eval"
 
 SUPERVISED_TRAIN_METHOD_NAME = "tensorflow/supervised/training"
 

diff --git a/tensorflow/python/saved_model/signature_def_utils_impl.py b/tensorflow/python/saved_model/signature_def_utils_impl.py
index f6e6e1d..2e0a0af 100644
--- a/tensorflow/python/saved_model/signature_def_utils_impl.py
+++ b/tensorflow/python/saved_model/signature_def_utils_impl.py

@@ -30,7 +30,6 @@
 
 
 @tf_export(
-    'saved_model.build_signature_def',
     v1=[
         'saved_model.build_signature_def',
         'saved_model.signature_def_utils.build_signature_def'
@@ -63,7 +62,6 @@
 
 
 @tf_export(
-    'saved_model.regression_signature_def',
     v1=[
         'saved_model.regression_signature_def',
         'saved_model.signature_def_utils.regression_signature_def'
@@ -112,7 +110,6 @@
 
 
 @tf_export(
-    'saved_model.classification_signature_def',
     v1=[
         'saved_model.classification_signature_def',
         'saved_model.signature_def_utils.classification_signature_def'
@@ -172,7 +169,6 @@
 
 
 @tf_export(
-    'saved_model.predict_signature_def',
     v1=[
         'saved_model.predict_signature_def',
         'saved_model.signature_def_utils.predict_signature_def'
@@ -270,7 +266,6 @@
 
 
 @tf_export(
-    'saved_model.is_valid_signature',
     v1=[
         'saved_model.is_valid_signature',
         'saved_model.signature_def_utils.is_valid_signature'

diff --git a/tensorflow/python/saved_model/signature_serialization.py b/tensorflow/python/saved_model/signature_serialization.py
new file mode 100644
index 0000000..181ee83
--- /dev/null
+++ b/tensorflow/python/saved_model/signature_serialization.py

@@ -0,0 +1,248 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Helpers for working with signatures in tf.saved_model.save."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from tensorflow.python.eager import def_function
+from tensorflow.python.eager import function as defun
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.saved_model import revived_types
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.training.tracking import base
+from tensorflow.python.util import compat
+from tensorflow.python.util import nest
+
+
+DEFAULT_SIGNATURE_ATTR = "_default_save_signature"
+SIGNATURE_ATTRIBUTE_NAME = "signatures"
+
+
+def _get_signature(function):
+  if (isinstance(function, (defun.Function, def_function.Function)) and
+      function._input_signature is not None):  # pylint: disable=protected-access
+    function = function.get_concrete_function()
+  if not isinstance(function, defun.ConcreteFunction):
+    return None
+  return function
+
+
+def _valid_signature(concrete_function):
+  """Returns whether concrete function can be converted to a signature."""
+  if not concrete_function.outputs:
+    # Functions without outputs don't make sense as signatures. We just don't
+    # have any way to run an Operation with no outputs as a SignatureDef in the
+    # 1.x style.
+    return False
+  try:
+    _normalize_outputs(concrete_function.structured_outputs, "unused", "unused")
+  except ValueError:
+    return False
+  return True
+
+
+def find_function_to_export(saveable_view):
+  """Function to export, None if no suitable function was found."""
+  # If the user did not specify signatures, check the root object for a function
+  # that can be made into a signature.
+  functions = saveable_view.list_functions(saveable_view.root)
+  signature = functions.get(DEFAULT_SIGNATURE_ATTR, None)
+  if signature is not None:
+    return signature
+
+  # TODO(andresp): Discuss removing this behaviour. It can lead to WTFs when a
+  # user decides to annotate more functions with tf.function and suddenly
+  # serving that model way later in the process stops working.
+  possible_signatures = []
+  for function in functions.values():
+    concrete = _get_signature(function)
+    if concrete is not None and _valid_signature(concrete):
+      possible_signatures.append(concrete)
+  if len(possible_signatures) == 1:
+    single_function = possible_signatures[0]
+    signature = _get_signature(single_function)
+    if signature and  _valid_signature(signature):
+      return signature
+  return None
+
+
+def canonicalize_signatures(signatures):
+  """Converts `signatures` into a dictionary of concrete functions."""
+  if signatures is None:
+    return {}
+  if not isinstance(signatures, collections.Mapping):
+    signatures = {
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signatures}
+  concrete_signatures = {}
+  for signature_key, function in signatures.items():
+    signature_function = _get_signature(function)
+    if signature_function is None:
+      raise ValueError(
+          ("Expected a TensorFlow function to generate a signature for, but "
+           "got {}. Only `tf.functions` with an input signature or "
+           "concrete functions can be used as a signature.").format(function))
+
+    # Re-wrap the function so that it only takes keyword arguments and it
+    # returns a dictionary of Tensors. This matches the format of 1.x-style
+    # signatures.
+    # pylint: disable=cell-var-from-loop
+    @def_function.function
+    def signature_wrapper(**kwargs):
+      structured_outputs = signature_function(**kwargs)
+      return _normalize_outputs(
+          structured_outputs, signature_function.name, signature_key)
+    # TODO(b/123902469): Use ConcreteFunction.structured_inputs once their names
+    # always match keyword arguments.
+    tensor_spec_signature = {}
+    for keyword, tensor in zip(
+        signature_function._arg_keywords,  # pylint: disable=protected-access
+        signature_function.inputs):
+      keyword = compat.as_str(keyword)
+      tensor_spec_signature[keyword] = tensor_spec.TensorSpec.from_tensor(
+          tensor, name=keyword)
+    concrete_signatures[signature_key] = (
+        signature_wrapper.get_concrete_function(**tensor_spec_signature))
+    # pylint: enable=cell-var-from-loop
+  return concrete_signatures
+
+
+def _is_flat(sequence):
+  sequence_flat = nest.flatten(sequence)
+  try:
+    nest.assert_same_structure(sequence_flat, sequence)
+    return True
+  except ValueError:
+    return False
+  except TypeError:
+    return False
+
+
+def _normalize_outputs(outputs, function_name, signature_key):
+  """Construct an output dictionary from unnormalized function outputs."""
+  if isinstance(outputs, collections.Mapping):
+    for key, value in outputs.items():
+      if not isinstance(value, ops.Tensor):
+        raise ValueError(
+            ("Got a dictionary containing non-Tensor value {} for key {} "
+             "in the output of the function {} used to generate a SavedModel "
+             "signature. Dictionaries outputs for functions used as signatures "
+             "should have one Tensor output per string key.")
+            .format(value, key, compat.as_str_any(function_name)))
+    return outputs
+  else:
+    original_outputs = outputs
+    if not isinstance(outputs, collections.Sequence):
+      outputs = [outputs]
+    if not _is_flat(outputs):
+      raise ValueError(
+          ("Got non-flat outputs '{}' from '{}' for SavedModel "
+           "signature '{}'. Signatures have one Tensor per output, so "
+           "to have predictable names Python functions used to generate "
+           "these signatures should avoid outputting Tensors in nested "
+           "structures.")
+          .format(original_outputs, function_name, signature_key))
+    return {("output_{}".format(output_index)): output
+            for output_index, output
+            in enumerate(outputs)}
+
+
+# _SignatureMap is immutable to ensure that users do not expect changes to be
+# reflected in the SavedModel. Using public APIs, tf.saved_model.load() is the
+# only way to create a _SignatureMap and there is no way to modify it. So we can
+# safely ignore/overwrite ".signatures" attributes attached to objects being
+# saved if they contain a _SignatureMap. A ".signatures" attribute containing
+# any other type (e.g. a regular dict) will raise an exception asking the user
+# to first "del obj.signatures" if they want it overwritten.
+class _SignatureMap(collections.Mapping, base.Trackable):
+  """A collection of SavedModel signatures."""
+
+  def __init__(self):
+    self._signatures = {}
+
+  def _add_signature(self, name, concrete_function):
+    """Adds a signature to the _SignatureMap."""
+    # Ideally this object would be immutable, but restore is streaming so we do
+    # need a private API for adding new signatures to an existing object.
+    self._signatures[name] = concrete_function
+
+  def __getitem__(self, key):
+    return self._signatures[key]
+
+  def __iter__(self):
+    return iter(self._signatures)
+
+  def __len__(self):
+    return len(self._signatures)
+
+  def __repr__(self):
+    return "_SignatureMap({})".format(self._signatures)
+
+  def _list_functions_for_serialization(self):
+    return {
+        key: value for key, value in self.items()
+        if isinstance(value, (def_function.Function, defun.ConcreteFunction))
+    }
+
+
+revived_types.register_revived_type(
+    "signature_map",
+    lambda obj: isinstance(obj, _SignatureMap),
+    versions=[revived_types.VersionedTypeRegistration(
+        # Standard dependencies are enough to reconstruct the trackable
+        # items in dictionaries, so we don't need to save any extra information.
+        object_factory=lambda proto: _SignatureMap(),
+        version=1,
+        min_producer_version=1,
+        min_consumer_version=1,
+        setter=_SignatureMap._add_signature  # pylint: disable=protected-access
+    )])
+
+
+def create_signature_map(signatures):
+  """Creates an object containing `signatures`."""
+  signature_map = _SignatureMap()
+  for name, func in signatures.items():
+    # This true of any signature that came from canonicalize_signatures. Here as
+    # a sanity check on saving; crashing on load (e.g. in _add_signature) would
+    # be more problematic in case future export changes violated these
+    # assertions.
+    assert isinstance(func, defun.ConcreteFunction)
+    assert isinstance(func.structured_outputs, collections.Mapping)
+    assert 0 == func._num_positional_args  # pylint: disable=protected-access
+    signature_map._add_signature(name, func)  # pylint: disable=protected-access
+  return signature_map
+
+
+def validate_saveable_view(saveable_view):
+  """Performs signature-related sanity checks on `saveable_view`."""
+  for name, dep in saveable_view.list_dependencies(
+      saveable_view.root):
+    if name == SIGNATURE_ATTRIBUTE_NAME:
+      if not isinstance(dep, _SignatureMap):
+        raise ValueError(
+            ("Exporting an object {} which has an attribute named "
+             "'{signatures}'. This is a reserved attribute used to store "
+             "SavedModel signatures in objects which come from "
+             "`tf.saved_model.load`. Delete this attribute "
+             "(e.g. 'del obj.{signatures}') before saving if this shadowing is "
+             "acceptable.").format(
+                 saveable_view.root,
+                 signatures=SIGNATURE_ATTRIBUTE_NAME))
+      break

diff --git a/tensorflow/python/tools/BUILD b/tensorflow/python/tools/BUILD
index b8ad31e..e483155 100644
--- a/tensorflow/python/tools/BUILD
+++ b/tensorflow/python/tools/BUILD

@@ -13,21 +13,26 @@
 # Transitive dependencies of this target will be included in the pip package.
 py_library(
     name = "tools_pip",
+    data = [
+        ":freeze_graph",
+        ":import_pb_to_tensorboard",
+        ":inspect_checkpoint",
+        ":optimize_for_inference",
+        ":print_selective_registration_header",
+        ":saved_model_cli",
+        ":strip_unused",
+        # Include the TF upgrade script to users can run it directly after install TF
+        "//tensorflow/tools/compatibility:tf_upgrade_v2",
+    ],
     deps = [
-        ":freeze_graph_lib",
-        ":import_pb_to_tensorboard_lib",
-        ":inspect_checkpoint_lib",
-        ":optimize_for_inference_lib",
-        ":print_selective_registration_header_lib",
-        ":saved_model_cli_lib",
         ":saved_model_utils",
-        ":strip_unused_lib",
         # The following py_library are needed because
         # py_binary may not depend on them when --define=no_tensorflow_py_deps=true
         # is specified. See https://github.com/tensorflow/tensorflow/issues/22390
+        ":freeze_graph_lib",
+        ":optimize_for_inference_lib",
         ":selective_registration_header_lib",
-        # Include the TF upgrade script to users can run it directly after install TF
-        "//tensorflow/tools/compatibility:tf_upgrade_v2_lib",
+        ":strip_unused_lib",
     ],
 )
 
@@ -74,6 +79,13 @@
     name = "freeze_graph",
     srcs = ["freeze_graph.py"],
     srcs_version = "PY2AND3",
+    deps = [":freeze_graph_main_lib"],
+)
+
+py_library(
+    name = "freeze_graph_main_lib",
+    srcs = ["freeze_graph.py"],
+    srcs_version = "PY2AND3",
     deps = [
         ":freeze_graph_lib",
     ],
@@ -204,6 +216,13 @@
     name = "optimize_for_inference",
     srcs = ["optimize_for_inference.py"],
     srcs_version = "PY2AND3",
+    deps = [":optimize_for_inference_main_lib"],
+)
+
+py_library(
+    name = "optimize_for_inference_main_lib",
+    srcs = ["optimize_for_inference.py"],
+    srcs_version = "PY2AND3",
     deps = [
         ":optimize_for_inference_lib",
         "//tensorflow/core:protos_all_py",

diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 5095f9f..19af602 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl

@@ -27,7 +27,6 @@
     "linalg/__init__.py",
     "lite/__init__.py",
     "lite/constants/__init__.py",
-    "losses/__init__.py",
     "math/__init__.py",
     "nest/__init__.py",
     "nn/__init__.py",

diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py
index 06a6e7d..ab82ee9 100644
--- a/tensorflow/python/tools/freeze_graph.py
+++ b/tensorflow/python/tools/freeze_graph.py

@@ -240,7 +240,7 @@
 
 
 def _parse_input_graph_proto(input_graph, input_binary):
-  """Parser input tensorflow graph into GraphDef proto."""
+  """Parses input tensorflow graph into GraphDef proto."""
   if not gfile.Exists(input_graph):
     print("Input graph file '" + input_graph + "' does not exist!")
     return -1
@@ -255,7 +255,7 @@
 
 
 def _parse_input_meta_graph_proto(input_graph, input_binary):
-  """Parser input tensorflow graph into MetaGraphDef proto."""
+  """Parses input tensorflow graph into MetaGraphDef proto."""
   if not gfile.Exists(input_graph):
     print("Input meta graph file '" + input_graph + "' does not exist!")
     return -1
@@ -271,7 +271,7 @@
 
 
 def _parse_input_saver_proto(input_saver, input_binary):
-  """Parser input tensorflow Saver into SaverDef proto."""
+  """Parses input tensorflow Saver into SaverDef proto."""
   if not gfile.Exists(input_saver):
     print("Input saver file '" + input_saver + "' does not exist!")
     return -1

diff --git a/tensorflow/python/tpu/BUILD b/tensorflow/python/tpu/BUILD
new file mode 100644
index 0000000..a76c620
--- /dev/null
+++ b/tensorflow/python/tpu/BUILD

@@ -0,0 +1,334 @@
+# Description: Operations defined for Cloud TPUs
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_custom_op_library",
+    "tf_gen_op_libs",
+    "tf_gen_op_wrapper_py",
+    "tf_py_test",
+)
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
+
+licenses(["notice"])  # Apache 2.0
+
+package(
+    default_visibility = [
+        "//cloud/vmm/testing/tests/tpu:__subpackages__",
+        "//knowledge/cerebra/sense/im2query:__subpackages__",
+        "//learning/brain:__subpackages__",
+        "//learning/deepmind:__subpackages__",
+        "//medical/pathology:__subpackages__",
+        "//tensorflow:__subpackages__",
+        "//vr/perception:__subpackages__",
+    ],
+)
+
+py_library(
+    name = "tpu_py",
+    srcs = ["ops/tpu_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:tpu_ops_gen",
+    ],
+)
+
+py_library(
+    name = "async_checkpoint",
+    srcs = ["async_checkpoint.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:summary",
+        "//tensorflow/python:summary_ops_v2",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/estimator:estimator_py",
+    ],
+)
+
+py_library(
+    name = "tpu_estimator",
+    srcs = [
+        "_tpu_estimator_embedding.py",
+        "error_handling.py",
+        "tpu_config.py",
+        "tpu_context.py",
+        "tpu_estimator.py",
+        "util.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":async_checkpoint",
+        ":feature_column",
+        ":functional",
+        ":tpu_embedding",
+        ":tpu_lib",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:function",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:session",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:summary",
+        "//tensorflow/python:summary_ops_v2",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/estimator:estimator_py",
+        "//tensorflow/python/estimator:util",
+        "@six_archive//:six",
+    ],
+)
+
+py_library(
+    name = "functional",
+    srcs = ["functional.py"],
+    srcs_version = "PY2AND3",
+    visibility = [
+        "//visibility:public",
+    ],
+    deps = [
+        "//tensorflow/python:tpu_ops_gen",
+    ],
+)
+
+py_library(
+    name = "tpu",
+    srcs = [
+        "__init__.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":feature_column",
+        ":tpu_embedding",
+        ":tpu_estimator",
+        ":tpu_lib",
+    ],
+)
+
+py_library(
+    name = "tpu_lib",
+    srcs = [
+        "__init__.py",
+        "bfloat16.py",
+        "device_assignment.py",
+        "session_support.py",
+        "tensor_tracer.py",
+        "topology.py",
+        "tpu.py",
+        "tpu_feed.py",
+        "tpu_function.py",
+        "tpu_optimizer.py",
+        "tpu_sharding.py",
+        "tpu_system_metadata.py",
+        "training_loop.py",
+        "xla.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":datasets",
+        ":functional",
+        ":tpu_py",
+        "//tensorflow/compiler/xla/experimental/xla_sharding",
+        "//tensorflow/compiler/xla/python_api:xla_shape",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/core/protobuf/tpu:compilation_result_proto_py",
+        "//tensorflow/core/protobuf/tpu:dynamic_padding_proto_py",
+        "//tensorflow/core/protobuf/tpu:optimization_parameters_proto_py",
+        "//tensorflow/core/protobuf/tpu:topology_proto_py",
+        "//tensorflow/core/protobuf/tpu:tpu_embedding_configuration_proto_py",
+        "//tensorflow/core/protobuf/tpu:tpu_embedding_output_layout_proto_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:control_flow_util",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:tpu_ops_gen",
+        "//tensorflow/python:training",
+        "//tensorflow/python:util",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/ops/losses",
+        "//tensorflow/python/tpu/profiler",
+    ],
+)
+
+py_library(
+    name = "datasets",
+    srcs = [
+        "datasets.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:function",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:readers",
+    ],
+)
+
+tf_py_test(
+    name = "datasets_test",
+    size = "medium",
+    srcs = ["datasets_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        ":datasets",
+    ],
+    grpc_enabled = True,
+    shard_count = 4,
+    tags = ["no_oss"],
+)
+
+tf_py_test(
+    name = "tpu_test",
+    size = "small",
+    srcs = ["tpu_test.py"],
+    additional_deps = [
+        ":tpu",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:layers",
+    ],
+    tags = ["no_windows"],  # TODO: needs investigation on Windows
+)
+
+tf_py_test(
+    name = "tpu_sharding_test",
+    size = "small",
+    srcs = ["tpu_sharding_test.py"],
+    additional_deps = [
+        ":tpu",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+    ],
+)
+
+tf_py_test(
+    name = "bfloat16_test",
+    size = "small",
+    srcs = ["bfloat16_test.py"],
+    additional_deps = [
+        ":tpu",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+    ],
+)
+
+tf_py_test(
+    name = "tpu_infeed_test",
+    size = "small",
+    srcs = ["tpu_infeed_test.py"],
+    additional_deps = [
+        ":tpu",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+tf_py_test(
+    name = "tpu_config_test",
+    size = "small",
+    srcs = ["tpu_config_test.py"],
+    additional_deps = [
+        ":tpu_estimator",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+tf_py_test(
+    name = "tpu_estimator_signals_test",
+    size = "small",
+    srcs = ["tpu_estimator_signals_test.py"],
+    additional_deps = [
+        ":tpu_estimator",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+    ],
+    # TODO(jhseu): Remove. Fails in OSS on Python 3.
+    tags = ["no_oss"],
+)
+
+tf_py_test(
+    name = "topology_test",
+    size = "medium",
+    srcs = ["topology_test.py"],
+    additional_deps = [
+        ":tpu",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_library(
+    name = "tpu_embedding",
+    srcs = [
+        "tpu_embedding.py",
+        "tpu_embedding_gradient.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":tpu_lib",
+        "//tensorflow/core/protobuf/tpu:tpu_embedding_configuration_proto_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:partitioned_variables",
+        "//tensorflow/python:tpu_ops_gen",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "@six_archive//:six",
+    ],
+)
+
+py_library(
+    name = "feature_column",
+    srcs = ["feature_column.py"],
+    deps = [
+        ":tpu_lib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/feature_column",
+        "//tensorflow/python/feature_column:feature_column_py",
+    ],
+)
+
+tf_py_test(
+    name = "feature_column_test",
+    srcs = [
+        "feature_column_test.py",
+    ],
+    additional_deps = [
+        ":feature_column",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:lookup_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/feature_column",
+        "//tensorflow/python/feature_column:feature_column_py",
+    ],
+    main = "feature_column_test.py",
+)

diff --git a/tensorflow/python/tpu/__init__.py b/tensorflow/python/tpu/__init__.py
new file mode 100644
index 0000000..0dffd70
--- /dev/null
+++ b/tensorflow/python/tpu/__init__.py

@@ -0,0 +1,20 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Ops related to Tensor Processing Units."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function

diff --git a/tensorflow/python/tpu/_tpu_estimator_embedding.py b/tensorflow/python/tpu/_tpu_estimator_embedding.py
new file mode 100644
index 0000000..08e0e96
--- /dev/null
+++ b/tensorflow/python/tpu/_tpu_estimator_embedding.py

@@ -0,0 +1,334 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+"""Tooling for support TPU embedding in TPUEstimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.feature_column import feature_column as core_fc
+from tensorflow.python.feature_column import feature_column_lib as core_fc_lib
+from tensorflow.python.tpu import feature_column as tpu_fc
+from tensorflow.python.tpu import tpu_embedding
+
+# pylint: disable=protected-access
+_TPU_EMBEDDING_COLUMN_CLASSES = (tpu_fc._TPUEmbeddingColumn,
+                                 tpu_fc._TPUSharedEmbeddingColumn)
+_EMBEDDING_COLUMN_CLASSES = (core_fc._EmbeddingColumn,
+                             core_fc_lib.EmbeddingColumn,
+                             core_fc._SharedEmbeddingColumn)
+_SUPPORTED_FEATURE_COLUMNS = (core_fc._NumericColumn, core_fc_lib.NumericColumn)
+
+# pylint: enable=protected-access
+
+_TABLE_NAME_PREFIX = 'tbl_'
+_LEN_TABLE_NAME_PREFIX = len(_TABLE_NAME_PREFIX)
+
+
+def _get_table_name_from_embedding_var_name(embedding_var_name):
+  return '{}{}'.format(_TABLE_NAME_PREFIX, embedding_var_name)
+
+
+def _get_embedding_var_name_from_table_name(table_name):
+  return table_name[_LEN_TABLE_NAME_PREFIX:]
+
+
+def _get_embedding_variable_name(scope_name, var_name):
+  return '{}/{}'.format(scope_name, var_name)
+
+
+def _get_slot_variable_names(scope_name, var_name, optimization_parameters):
+  """Return embedding variable names which are consistent with CPU runs."""
+  if isinstance(optimization_parameters, tpu_embedding.AdagradParameters):
+    return tpu_embedding.AdagradSlotVariableName(
+        '{}/{}/Adagrad'.format(scope_name, var_name)
+    )
+  elif isinstance(optimization_parameters, tpu_embedding.AdamParameters):
+    return tpu_embedding.AdamSlotVariableNames(
+        '{}/{}/Adam/m'.format(scope_name, var_name),
+        '{}/{}/Adam/v'.format(scope_name, var_name)
+    )
+  elif isinstance(optimization_parameters,
+                  tpu_embedding.StochasticGradientDescentParameters):
+    return None
+  else:
+    raise ValueError('Support to infer full variable name '
+                     'for optimization_parameter {} has not been added.'
+                     .format(optimization_parameters))
+
+
+def get_full_variable_names(
+    graph, table_to_config_dict, optimization_parameters):
+  """Return embedding variable names and slot variables which are consistent with CPU runs."""
+  collection = graph.get_collection_ref(tpu_fc._TPU_FC_TO_SCOPE)  # pylint: disable=protected-access
+  if not collection:
+    raise RuntimeError(
+        'Embedding feature column did not capture any thing. Make sure the '
+        'feature columns passed to TPUEstimator constructor is properly '
+        'used in model_fn.')
+
+  embedding_variable_name_by_table = {}
+  slot_variable_names_by_table = {}
+  for table_name in table_to_config_dict:
+    embedding_var_name = _get_embedding_var_name_from_table_name(table_name)
+    (scope_name, var_name) = collection[0][embedding_var_name]
+    embedding_variable_name_by_table[table_name] = (
+        _get_embedding_variable_name(scope_name, var_name))
+    slot_variable_names_by_table[table_name] = _get_slot_variable_names(
+        scope_name, var_name, optimization_parameters)
+
+  graph.clear_collection(tpu_fc._TPU_FC_TO_SCOPE)  # pylint: disable=protected-access
+  return embedding_variable_name_by_table, slot_variable_names_by_table
+
+
+def get_tpu_embedding_config_from_feature_columns(feature_columns):
+  """Create configs for TPUEmbedding from a list of feature columns.
+
+  This function will place one embedding tensor per table and the return is
+  intended to be used as input to TPUEmbedding.
+
+  Args:
+    feature_columns: a list of supported feature columns.
+
+  Returns:
+    A pair of dicts, the first maps tables to their config, the second maps
+    features to tables.
+  """
+
+  allowed = (tpu_fc._TPUEmbeddingColumn, tpu_fc._TPUSharedEmbeddingColumn)  # pylint: disable=protected-access
+
+  for column in feature_columns:
+    if not isinstance(column, allowed):
+      raise TypeError(
+          'Unsupported feature column {}. Supported types are {}.'.format(
+              type(column), allowed))
+
+  table_to_config = {}
+  feature_to_table = {}
+  for column in feature_columns:
+    feature_name = column.get_feature_key_name()
+    table_name = _get_table_name_from_embedding_var_name(
+        column.get_embedding_var_name())
+    if feature_name in feature_to_table:
+      raise ValueError(
+          'Feature column {} is used with multiple embeddings and this is '
+          'not supported.'.format(feature_name))
+    feature_to_table[feature_name] = table_name
+    vocabulary_size, dimension = column.get_embedding_table_size()
+    table_to_config[table_name] = tpu_embedding.TableConfig(
+        vocabulary_size=vocabulary_size,
+        dimension=dimension,
+        initializer=column.get_initializer(),
+        combiner=column.get_combiner())
+
+  return table_to_config, feature_to_table
+
+
+def _get_tpu_embedding_optimization_parameters(embedding_config_spec):
+  """Get tpu_embedding._OptimizationParameters from EmbeddingConfigSpec."""
+  if embedding_config_spec.optimizer_type == 'adagrad':
+    return tpu_embedding.AdagradParameters(
+        embedding_config_spec.learning_rate,
+        embedding_config_spec.adagrad_initial_accumulator,
+        embedding_config_spec.use_gradient_accumulation)
+  elif embedding_config_spec.optimizer_type == 'sgd':
+    return tpu_embedding.StochasticGradientDescentParameters(
+        embedding_config_spec.learning_rate,
+        embedding_config_spec.use_gradient_accumulation)
+  elif embedding_config_spec.optimizer_type == 'adam':
+    return tpu_embedding.AdamParameters(
+        embedding_config_spec.learning_rate,
+        embedding_config_spec.adam_parameters.beta1,
+        embedding_config_spec.adam_parameters.beta2,
+        embedding_config_spec.adam_parameters.epsilon,
+        use_gradient_accumulation=embedding_config_spec
+        .use_gradient_accumulation)
+  else:
+    raise ValueError('optimizer_type must be adagrad or sgd or adam for now.')
+
+
+AdamParameters = collections.namedtuple('AdamParameters',
+                                        ['beta1', 'beta2', 'epsilon'])
+
+
+# TODO(shizhiw): Improve the API to support more optimizer parameters in API.
+class EmbeddingConfigSpec(
+    collections.namedtuple('EmbeddingConfigSpec', [
+        'feature_columns', 'learning_rate', 'optimizer_type',
+        'adagrad_initial_accumulator', 'clipping_limit',
+        'use_gradient_accumulation', 'adam_parameters'
+    ])):
+  """Class to keep track of embedding config specification."""
+
+  def __new__(cls,
+              feature_columns,
+              learning_rate,
+              optimizer_type='adagrad',
+              adagrad_initial_accumulator=None,
+              clipping_limit=None,
+              use_gradient_accumulation=False,
+              adam_parameters=None):
+    """Creates an EmbeddingConfigSpec instance.
+
+    Args:
+      feature_columns: All `FeatureColumn`s used by model.
+      learning_rate: embedding optimizer learning rate.
+      optimizer_type: (String) Name of the optimizer for embedding gradients
+        updates. Must be either 'adagrad' ( `tf.train.AdagradOptimizer`, default
+        value), 'sgd' (`tf.train.GradientDescentOptimizer`), or 'adam'
+        (`tf.contrib.opt.LazyAdamOptimizer`) for lazy Adam. This optimizer will
+        be applied to all embedding variables specified by `feature_columns`.
+      adagrad_initial_accumulator: Initial accumulator for Adagrad. Used when
+        optimizer_type is 'adagrad'. Default is `0.1`.
+      clipping_limit: (Optional) Clipping limit (absolute value).
+      use_gradient_accumulation: (Experimental) Whether to accumulate the
+        gradients across TPU embedding mini-batches. Gradient accumulation does
+        not affect SGD and therefore this is applicable only for Adagrad.
+      adam_parameters: AdamParameters. Used when optimizer_type is 'adam'.
+        Default is 0.9 for beta1, 0.999 for beta2 and 1e-8 for epsilon.
+
+    Returns:
+      An EmbeddingConfigSpec instance.
+
+    Raises:
+      ValueError: If the feature_columns are not specified.
+      TypeError: If the feature columns are not of ths correct type (one of
+        _SUPPORTED_FEATURE_COLUMNS, _TPU_EMBEDDING_COLUMN_CLASSES OR
+        _EMBEDDING_COLUMN_CLASSES).
+      ValueError: If use_gradient_accumulation is True for SGD.
+      ValueError: If `optimizer_type` is not one of "adagrad" or "sgd" or
+        "adam".
+    """
+    if not feature_columns:
+      raise ValueError('`feature_columns` cannot be `None` or empty.')
+
+    # It is unknown at this moment, whether the TPUEstimator is running in CPU
+    # or TPU mode. So allow non-TPU embedding columns also.
+    supported_classes = tuple(
+        list(_SUPPORTED_FEATURE_COLUMNS) + list(_TPU_EMBEDDING_COLUMN_CLASSES) +
+        list(_EMBEDDING_COLUMN_CLASSES))
+
+    for column in feature_columns:
+      if not isinstance(column, supported_classes):
+        raise TypeError(
+            'All feature columns must be supported types in {}. Got {}'.format(
+                supported_classes, type(column)))
+
+    if optimizer_type == 'adagrad':
+      if adagrad_initial_accumulator is None:
+        adagrad_initial_accumulator = 0.1
+      if adagrad_initial_accumulator <= 0:
+        raise ValueError('Adagrad initial_accumulator must be positive')
+    elif optimizer_type == 'sgd':
+      if use_gradient_accumulation:
+        raise ValueError('Gradient accumulation makes sense for Adagrad only.')
+    elif optimizer_type == 'adam':
+      if adam_parameters is None:
+        adam_parameters = AdamParameters(0.9, 0.999, 1e-8)
+      if adam_parameters.beta1 < 0. or adam_parameters.beta1 >= 1.:
+        raise ValueError('beta1 must be between 0. and 1; got {}.'.format(
+            adam_parameters.beta1))
+      if adam_parameters.beta2 < 0. or adam_parameters.beta2 >= 1.:
+        raise ValueError('beta2 must be between 0. and 1; got {}.'.format(
+            adam_parameters.beta2))
+      if adam_parameters.epsilon <= 0.:
+        raise ValueError('epsilon must be positive; got {}.'.format(
+            adam_parameters.epsilon))
+    else:
+      raise ValueError('optimizer_type must be adagrad or sgd or adam for now.')
+
+    return super(EmbeddingConfigSpec, cls).__new__(
+        cls,
+        feature_columns=feature_columns,
+        learning_rate=learning_rate,
+        optimizer_type=optimizer_type,
+        adagrad_initial_accumulator=adagrad_initial_accumulator,
+        clipping_limit=clipping_limit,
+        use_gradient_accumulation=use_gradient_accumulation,
+        adam_parameters=adam_parameters)
+
+
+class EmbeddingConfig(object):
+  """This is the internal immutable object for embedding config.
+
+  `_EmbeddingConfig` is responsible to _translate_ user provided
+  `EmbeddingConfigSpec` to internal data structures, mostly constructor
+  arguments of `TPUEmbedding`.
+  """
+
+  def __init__(self, embedding_config_spec, train_batch_size, eval_batch_size,
+               num_hosts, num_cores, master):
+    self._embedding_config_spec = embedding_config_spec
+    self._train_batch_size = train_batch_size
+    self._eval_batch_size = eval_batch_size
+    self._num_hosts = num_hosts
+    self._num_cores = num_cores
+    self._master = master
+
+    self._table_to_config_dict, self._feature_to_table_dict = (
+        get_tpu_embedding_config_from_feature_columns(
+            embedding_config_spec.feature_columns))
+    self._optimization_parameters = _get_tpu_embedding_optimization_parameters(
+        self._embedding_config_spec)
+    self._mode_to_tpu_embedding_dict = {}
+    self.dummy_table_variables = None
+
+  def has_embedding_tables(self):
+    return bool(self._table_to_config_dict)
+
+  def _create_tpu_embedding(self, mode):
+    """Create tpu_embedding.TPUEmbedding based on mode."""
+    if mode == model_fn_lib.ModeKeys.TRAIN:
+      batch_size = self._train_batch_size
+    else:
+      batch_size = self._eval_batch_size
+
+    if mode == model_fn_lib.ModeKeys.TRAIN:
+      tpu_embedding_mode = tpu_embedding.TRAINING
+    elif (mode == model_fn_lib.ModeKeys.EVAL or
+          mode == model_fn_lib.ModeKeys.PREDICT):
+      tpu_embedding_mode = tpu_embedding.INFERENCE
+    else:
+      raise ValueError('Mode {} is not supported.'.format(mode))
+
+    tpu_embedding_ = tpu_embedding.TPUEmbedding(
+        self._table_to_config_dict,
+        self._feature_to_table_dict,
+        batch_size,
+        tpu_embedding_mode,
+        self._master,
+        self._optimization_parameters,
+    )
+    return tpu_embedding_
+
+  def get_tpu_embedding(self, mode):
+    if mode not in self._mode_to_tpu_embedding_dict:
+      self._mode_to_tpu_embedding_dict[mode] = (
+          self._create_tpu_embedding(mode))
+    return self._mode_to_tpu_embedding_dict[mode]
+
+
+def split_inputs(ctx, features, labels):
+  """Splits the dense and sparse tensors inside the features and labels."""
+  sparse_features = collections.OrderedDict()
+  if ctx.embedding_config:
+    tpu_embedding_ = ctx.embedding_config.tpu_embedding
+    for feature_key in tpu_embedding_.feature_to_table_dict:
+      sparse_features[feature_key] = features.pop(feature_key)
+
+  return features, labels, sparse_features

diff --git a/tensorflow/python/tpu/async_checkpoint.py b/tensorflow/python/tpu/async_checkpoint.py
new file mode 100644
index 0000000..1b09ce1
--- /dev/null
+++ b/tensorflow/python/tpu/async_checkpoint.py

@@ -0,0 +1,212 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ======================================
+"""Hook for asynchronous checkpointing.
+
+This hook dispatches checkpoint writing operations in a separate thread to
+allow execution to continue on the main thread.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import threading
+import time
+
+from tensorflow.core.util.event_pb2 import SessionLog
+from tensorflow.python.framework import meta_graph
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import training_util
+from tensorflow.python.training.session_run_hook import SessionRunArgs
+from tensorflow.python.training.summary_io import SummaryWriterCache
+
+
+class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook):
+  """Saves checkpoints every N steps or seconds."""
+
+  def __init__(self,
+               checkpoint_dir,
+               save_secs=None,
+               save_steps=None,
+               saver=None,
+               checkpoint_basename="model.ckpt",
+               scaffold=None,
+               listeners=None):
+    """Initializes a `CheckpointSaverHook`.
+
+    Args:
+      checkpoint_dir: `str`, base directory for the checkpoint files.
+      save_secs: `int`, save every N secs.
+      save_steps: `int`, save every N steps.
+      saver: `Saver` object, used for saving.
+      checkpoint_basename: `str`, base name for the checkpoint files.
+      scaffold: `Scaffold`, use to get saver object.
+      listeners: List of `CheckpointSaverListener` subclass instances. Used for
+        callbacks that run immediately before or after this hook saves the
+        checkpoint.
+
+    Raises:
+      ValueError: One of `save_steps` or `save_secs` should be set.
+      ValueError: At most one of `saver` or `scaffold` should be set.
+    """
+    logging.info("Create AsyncCheckpointSaverHook.")
+    if saver is not None and scaffold is not None:
+      raise ValueError("You cannot provide both saver and scaffold.")
+    self._saver = saver
+    self._save_thread = None
+    self._write_graph_thread = None
+    self._checkpoint_dir = checkpoint_dir
+    self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
+    self._scaffold = scaffold
+    self._timer = basic_session_run_hooks.SecondOrStepTimer(
+        every_secs=save_secs, every_steps=save_steps)
+    self._listeners = listeners or []
+    self._steps_per_run = 1
+    self._summary_writer = None
+    self._global_step_tensor = None
+
+    self._last_checkpoint_step = None
+
+  def _set_steps_per_run(self, steps_per_run):
+    self._steps_per_run = steps_per_run
+
+  def begin(self):
+    self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir)
+    self._global_step_tensor = training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
+    if self._global_step_tensor is None:
+      raise RuntimeError(
+          "Global step should be created to use CheckpointSaverHook.")
+    for l in self._listeners:
+      l.begin()
+
+  def after_create_session(self, session, coord):
+    global_step = session.run(self._global_step_tensor)
+
+    # We do write graph and saver_def at the first call of before_run.
+    # We cannot do this in begin, since we let other hooks to change graph and
+    # add variables in begin. Graph is finalized after all begin calls.
+    def _write_graph_fn(self):
+      training_util.write_graph(
+          ops.get_default_graph().as_graph_def(add_shapes=True),
+          self._checkpoint_dir, "graph.pbtxt")
+    self._write_graph_thread = threading.Thread(target=_write_graph_fn,
+                                                args=[self])
+    self._write_graph_thread.start()
+
+    saver_def = self._get_saver().saver_def if self._get_saver() else None
+    graph = ops.get_default_graph()
+    meta_graph_def = meta_graph.create_meta_graph_def(
+        graph_def=graph.as_graph_def(add_shapes=True), saver_def=saver_def)
+    self._summary_writer.add_graph(graph)
+    self._summary_writer.add_meta_graph(meta_graph_def)
+    # The checkpoint saved here is the state at step "global_step".
+    self._save(session, global_step)
+    self._timer.update_last_triggered_step(global_step)
+
+  def before_run(self, run_context):  # pylint: disable=unused-argument
+    return SessionRunArgs(self._global_step_tensor)
+
+  def after_run(self, run_context, run_values):
+    global_step = run_context.session.run(self._global_step_tensor)
+    if self._timer.should_trigger_for_step(global_step):
+      self._timer.update_last_triggered_step(global_step)
+      logging.info("Triggering checkpoint. %s", global_step)
+      if self._save(run_context.session, global_step):
+        run_context.request_stop()
+
+  def end(self, session):
+    if self._save_thread:
+      logging.info("Waiting for any pending checkpoints to finish.")
+      self._save_thread.join()
+    if self._write_graph_thread:
+      logging.info("Waiting for any pending write_graph to finish.")
+      self._write_graph_thread.join()
+
+    last_step = session.run(self._global_step_tensor)
+
+    if self._last_checkpoint_step != last_step:
+      self._save(session, last_step, asynchronous=False)
+
+    for l in self._listeners:
+      l.end(session, last_step)
+
+  def _save(self, session, step, asynchronous=True):
+    """Saves the latest checkpoint, returns should_stop."""
+
+    # Skip saving on step 0
+    if step == 0:
+      return
+
+    def _save_fn():
+      """Run the saver process."""
+      logging.info("Saving checkpoints for %d into %s.", step, self._save_path)
+
+      start_time = time.time()
+      for l in self._listeners:
+        l.before_save(session, step)
+
+      self._get_saver().save(session, self._save_path, global_step=step)
+      self._summary_writer.add_session_log(
+          SessionLog(
+              status=SessionLog.CHECKPOINT, checkpoint_path=self._save_path),
+          step)
+
+      for l in self._listeners:
+        l.after_save(session, step)
+
+      end_time = time.time()
+      logging.info("Checkpoint actual writing time: (%.3f sec)",
+                   end_time - start_time)
+      logging.info("Checkpoint finished for %d into %s.", step, self._save_path)
+
+    if not asynchronous:
+      self._last_checkpoint_step = step
+      _save_fn()
+      return
+
+    if self._save_thread is not None:
+      self._save_thread.join(timeout=0.1)
+      if self._save_thread.is_alive():
+        logging.info("Saver thread still in progress, skipping checkpoint.")
+        return
+
+    self._last_checkpoint_step = step
+    self._save_thread = threading.Thread(target=_save_fn)
+    self._save_thread.start()
+
+  def _get_saver(self):
+    if self._saver is not None:
+      return self._saver
+    elif self._scaffold is not None:
+      return self._scaffold.saver
+
+    # Get saver from the SAVERS collection if present.
+    collection_key = ops.GraphKeys.SAVERS
+    savers = ops.get_collection(collection_key)
+    if not savers:
+      raise RuntimeError(
+          "No items in collection {}. Please add a saver to the collection "
+          "or provide a saver or scaffold.".format(collection_key))
+    elif len(savers) > 1:
+      raise RuntimeError(
+          "More than one item in collection {}. "
+          "Please indicate which one to use by passing it to the constructor."
+          .format(collection_key))
+
+    self._saver = savers[0]
+    return savers[0]

diff --git a/tensorflow/python/tpu/bfloat16.py b/tensorflow/python/tpu/bfloat16.py
new file mode 100644
index 0000000..fa74f65
--- /dev/null
+++ b/tensorflow/python/tpu/bfloat16.py

@@ -0,0 +1,77 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Helper context for running models with bfloat16."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.util import tf_contextlib
+
+
+def _get_custom_getter():
+  """Returns a custom getter that this class's methods must be called under.
+
+  All methods of this class must be called under a variable scope that was
+  passed this custom getter. Example:
+
+  ```python
+  network = ConvNetBuilder(...)
+  with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
+    network.conv(...)
+    # Call more methods of network here
+  ```
+
+  Currently, this custom getter only does anything if self.use_tf_layers is
+  True. In that case, it causes variables to be stored as dtype
+  self.variable_type, then casted to the requested dtype, instead of directly
+  storing the variable as the requested dtype.
+  """
+
+  def inner_custom_getter(getter, *args, **kwargs):
+    """Custom getter that forces variables to have type self.variable_type."""
+    cast_to_bfloat16 = False
+    requested_dtype = kwargs['dtype']
+    if requested_dtype == dtypes.bfloat16:
+      # Only change the variable dtype if doing so does not decrease variable
+      # precision.
+      kwargs['dtype'] = dtypes.float32
+      cast_to_bfloat16 = True
+    var = getter(*args, **kwargs)
+    # This if statement is needed to guard the cast, because batch norm
+    # assigns directly to the return value of this custom getter. The cast
+    # makes the return value not a variable so it cannot be assigned. Batch
+    # norm variables are always in fp32 so this if statement is never
+    # triggered for them.
+    if cast_to_bfloat16:
+      var = math_ops.cast(var, dtypes.bfloat16)
+    return var
+
+  return inner_custom_getter
+
+
+@tf_contextlib.contextmanager
+def bfloat16_scope():
+  """Scope class for bfloat16 variables so that the model uses custom getter.
+
+  This enables variables to be read as bfloat16 type when using get_variable.
+  """
+  with variable_scope.variable_scope(
+      '', custom_getter=_get_custom_getter()) as varscope:
+    yield varscope

diff --git a/tensorflow/python/tpu/bfloat16_test.py b/tensorflow/python/tpu/bfloat16_test.py
new file mode 100644
index 0000000..3308e01
--- /dev/null
+++ b/tensorflow/python/tpu/bfloat16_test.py

@@ -0,0 +1,49 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Tests for bfloat16 helper."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.platform import test
+from tensorflow.python.tpu import bfloat16
+
+
+class BFloat16ScopeTest(test.TestCase):
+
+  def testScopeName(self):
+    """Test if name for the variable scope is propogated correctly.
+    """
+    with bfloat16.bfloat16_scope() as bf:
+      self.assertEqual(bf.name, "")
+
+  def testRequestedDType(self):
+    """Test if requested dtype is honored in the getter.
+    """
+    with bfloat16.bfloat16_scope() as scope:
+      v1 = variable_scope.get_variable("v1", [])
+      self.assertEqual(v1.dtype.base_dtype, dtypes.float32)
+      v2 = variable_scope.get_variable("v2", [], dtype=dtypes.bfloat16)
+      self.assertEqual(v2.dtype.base_dtype, dtypes.bfloat16)
+      self.assertEqual([dtypes.float32, dtypes.float32],
+                       [v.dtype.base_dtype for v in scope.global_variables()])
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/tpu/datasets.py b/tensorflow/python/tpu/datasets.py
new file mode 100644
index 0000000..bc0cd41
--- /dev/null
+++ b/tensorflow/python/tpu/datasets.py

@@ -0,0 +1,191 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ======================================
+"""Library of Cloud TPU helper functions for data loading."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import interleave_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.data.ops import readers
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import function
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import functional_ops
+
+
+def _TextLineDataset(filename):
+  buffer_size = 8 * 1024 * 1024  # 8 MiB per file
+  dataset = readers.TextLineDataset(filename, buffer_size=buffer_size)
+  return dataset
+
+
+def _TFRecordDataset(filename):
+  buffer_size = 8 * 1024 * 1024  # 8 MiB per file
+  dataset = readers.TFRecordDataset(filename, buffer_size=buffer_size)
+  return dataset
+
+
+_FILETYPE_MAP = {
+    'tfrecord': _TFRecordDataset,
+    'textline': _TextLineDataset,
+    'text': _TextLineDataset,
+}
+
+
+def StreamingFilesDataset(files,
+                          filetype=None,
+                          file_reader_job=None,
+                          worker_job=None,
+                          num_epochs=None,
+                          filename_shuffle_buffer_size=None,
+                          num_parallel_reads=None,
+                          batch_transfer_size=None,
+                          sloppy=None):
+  """StreamingFilesDataset constructs a dataset to stream from workers (GCE VM).
+
+  Because Cloud TPUs are allocated over the network, a Cloud TPU cannot read
+  files local to your GCE VM. In order to train using files stored on your local
+  VM (e.g. on local SSD for extreme performance), use the StreamingFilesDataset
+  helper to generate a dataset to feed your Cloud TPU with files from your GCE
+  VM.
+
+  The resulting dataset may return an OutOfRangeError if there are no files
+  found as a result of the fileglob expansion.
+
+  Note: StreamingFilesDataset assumes that the session is using a
+  TPUClusterResolver and has therefore a worker and a coordinator job. File
+  loading will be done on the coordinator job.
+
+  Args:
+    files: A string glob to match files, or a `tf.data.Dataset` generating file
+      names.
+    filetype: A string (one of 'tfrecord', or 'textline') or a single-argument
+      TensorFlow function that when given a filename returns a dataset.
+    file_reader_job: An optional string that corresponds to the job that should
+      perform the file reads.
+    worker_job: An optional string that corresponds to the job that should
+      process the tensors (i.e. your GPU or TPU worker).
+    num_epochs: The number of epochs through the training set that should be
+      generated. By default, it will repeat infinitely.
+    filename_shuffle_buffer_size: An optional integer whose value controls the
+      shuffling of the file names. If you would like to read from the files in
+      the same order, set to 0 or False.
+    num_parallel_reads: An optional integer controlling the number of files to
+      read from concurrently. (Set to 1 for no parallelism.)
+    batch_transfer_size: An optional integer controlling the batching used to
+      amortize the remote function invocation overhead. Set to a very large
+      number to increase throughput. Set to a very small number to reduce memory
+      consumption. Set to False to skip batching.
+    sloppy: (Optional.) If `False`, read input data while maintaining a
+      deterministic order. (This may have significant performance impacts.)
+      sloppy defaults to: True.
+  Returns:
+    A `tf.data.Dataset` with an infinite stream of elements generated by a
+    parallel interleaving of the set of files matched (or generated) by `files`
+    with a type is the output of the dataset specified by `filetype`.
+
+  Raises:
+    ValueError: if any argument is not of the expected type.
+  """
+  if filetype is None:
+    filetype = 'tfrecord'
+
+  if isinstance(filetype, str):
+    if filetype not in _FILETYPE_MAP:
+      raise ValueError('Unexpected filetype: %s' % filetype)
+    reader_fn = _FILETYPE_MAP[filetype]
+  elif callable(filetype):
+    reader_fn = filetype
+  else:
+    raise ValueError('filetype should be a string or a callable')
+
+  file_reader_job = file_reader_job or 'coordinator'
+
+  worker_job = worker_job or 'worker'
+
+  if filename_shuffle_buffer_size is None:
+    filename_shuffle_buffer_size = 4096
+
+  num_parallel_reads = num_parallel_reads or 8
+
+  if batch_transfer_size is None:
+    batch_transfer_size = 256
+
+  if sloppy is None:
+    sloppy = True
+
+  with ops.device('/job:%s' % file_reader_job):
+    if isinstance(files, str):
+      source_dataset = dataset_ops.Dataset.list_files(files)
+    elif isinstance(files, dataset_ops.DatasetV2):
+      source_dataset = files
+    else:
+      raise ValueError('files was not a string or a dataset: %s' % files)
+
+    if filename_shuffle_buffer_size:
+      source_dataset = source_dataset.shuffle(
+          buffer_size=filename_shuffle_buffer_size)
+
+    source_dataset = source_dataset.apply(
+        interleave_ops.parallel_interleave(
+            reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy))
+
+    source_dataset = source_dataset.repeat(num_epochs)
+
+    if batch_transfer_size:
+      source_dataset = source_dataset.batch(batch_transfer_size)
+
+    source_dataset = source_dataset.prefetch(1)
+
+    source_iterator = dataset_ops.make_one_shot_iterator(source_dataset)
+    source_handle = source_iterator.string_handle()
+
+  @function.Defun(dtypes.string)
+  def LoadingFunc(h):
+    remote_iterator = iterator_ops.Iterator.from_string_handle(
+        h, source_dataset.output_types, source_dataset.output_shapes)
+    return remote_iterator.get_next()
+
+  def MapFn(unused_input):
+    if isinstance(source_dataset.output_types, dtypes.DType):
+      output_types = [source_dataset.output_types]
+    elif isinstance(source_dataset.output_types, (list, tuple)):
+      output_types = source_dataset.output_types
+    else:
+      raise ValueError('source dataset has invalid output types')
+    remote_calls = functional_ops.remote_call(
+        args=[source_handle],
+        Tout=output_types,
+        f=LoadingFunc,
+        target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)
+    if len(remote_calls) == 1:
+      return remote_calls[0]
+    else:
+      return remote_calls
+
+  with ops.device('/job:%s' % worker_job):
+    output_dataset = dataset_ops.Dataset.range(2).repeat().map(
+        MapFn, num_parallel_calls=4 if sloppy else None)
+    output_dataset = output_dataset.prefetch(1)
+
+    if batch_transfer_size:
+      # Undo the batching used during the transfer.
+      output_dataset = output_dataset.apply(batching.unbatch()).prefetch(1)
+
+  return output_dataset

diff --git a/tensorflow/python/tpu/datasets_test.py b/tensorflow/python/tpu/datasets_test.py
new file mode 100644
index 0000000..416dd94
--- /dev/null
+++ b/tensorflow/python/tpu/datasets_test.py

@@ -0,0 +1,214 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TPU datasets tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from tensorflow.core.protobuf import cluster_pb2
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.client import session
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import readers
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.lib.io import python_io
+from tensorflow.python.platform import test
+from tensorflow.python.tpu import datasets
+from tensorflow.python.training import server_lib
+from tensorflow.python.util import compat
+
+_NUM_FILES = 10
+_NUM_ENTRIES = 20
+
+
+class DatasetsTest(test.TestCase):
+
+  def setUp(self):
+    super(DatasetsTest, self).setUp()
+    self._coord = server_lib.Server.create_local_server()
+    self._worker = server_lib.Server.create_local_server()
+
+    self._cluster_def = cluster_pb2.ClusterDef()
+    worker_job = self._cluster_def.job.add()
+    worker_job.name = 'worker'
+    worker_job.tasks[0] = self._worker.target[len('grpc://'):]
+    coord_job = self._cluster_def.job.add()
+    coord_job.name = 'coordinator'
+    coord_job.tasks[0] = self._coord.target[len('grpc://'):]
+
+    session_config = config_pb2.ConfigProto(cluster_def=self._cluster_def)
+
+    self._sess = session.Session(self._worker.target, config=session_config)
+    self._worker_device = '/job:' + worker_job.name
+
+  def testTextLineDataset(self):
+    all_contents = []
+    for i in range(_NUM_FILES):
+      filename = os.path.join(self.get_temp_dir(), 'text_line.%d.txt' % i)
+      contents = []
+      for j in range(_NUM_ENTRIES):
+        contents.append(compat.as_bytes('%d: %d' % (i, j)))
+      with open(filename, 'wb') as f:
+        f.write(b'\n'.join(contents))
+      all_contents.extend(contents)
+
+    dataset = datasets.StreamingFilesDataset(
+        os.path.join(self.get_temp_dir(), 'text_line.*.txt'), filetype='text')
+
+    with ops.device(self._worker_device):
+      iterator = dataset_ops.make_initializable_iterator(dataset)
+    self._sess.run(iterator.initializer)
+    get_next = iterator.get_next()
+
+    retrieved_values = []
+    for _ in range(4 * len(all_contents)):
+      retrieved_values.append(compat.as_bytes(self._sess.run(get_next)))
+
+    self.assertEqual(set(all_contents), set(retrieved_values))
+
+  def testTFRecordDataset(self):
+    all_contents = []
+    for i in range(_NUM_FILES):
+      filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i)
+      writer = python_io.TFRecordWriter(filename)
+      for j in range(_NUM_ENTRIES):
+        record = compat.as_bytes('Record %d of file %d' % (j, i))
+        writer.write(record)
+        all_contents.append(record)
+      writer.close()
+
+    dataset = datasets.StreamingFilesDataset(
+        os.path.join(self.get_temp_dir(), 'tf_record*'), filetype='tfrecord')
+
+    with ops.device(self._worker_device):
+      iterator = dataset_ops.make_initializable_iterator(dataset)
+    self._sess.run(iterator.initializer)
+    get_next = iterator.get_next()
+
+    retrieved_values = []
+    for _ in range(4 * len(all_contents)):
+      retrieved_values.append(compat.as_bytes(self._sess.run(get_next)))
+
+    self.assertEqual(set(all_contents), set(retrieved_values))
+
+  def testTFRecordDatasetFromDataset(self):
+    filenames = []
+    all_contents = []
+    for i in range(_NUM_FILES):
+      filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i)
+      filenames.append(filename)
+      writer = python_io.TFRecordWriter(filename)
+      for j in range(_NUM_ENTRIES):
+        record = compat.as_bytes('Record %d of file %d' % (j, i))
+        writer.write(record)
+        all_contents.append(record)
+      writer.close()
+
+    filenames = dataset_ops.Dataset.from_tensor_slices(filenames)
+
+    dataset = datasets.StreamingFilesDataset(filenames, filetype='tfrecord')
+
+    with ops.device(self._worker_device):
+      iterator = dataset_ops.make_initializable_iterator(dataset)
+    self._sess.run(iterator.initializer)
+    get_next = iterator.get_next()
+
+    retrieved_values = []
+    for _ in range(4 * len(all_contents)):
+      retrieved_values.append(compat.as_bytes(self._sess.run(get_next)))
+
+    self.assertEqual(set(all_contents), set(retrieved_values))
+
+  def testArbitraryReaderFunc(self):
+
+    def MakeRecord(i, j):
+      return compat.as_bytes('%04d-%04d' % (i, j))
+
+    record_bytes = len(MakeRecord(10, 200))
+
+    all_contents = []
+    for i in range(_NUM_FILES):
+      filename = os.path.join(self.get_temp_dir(), 'fixed_length.%d' % i)
+      with open(filename, 'wb') as f:
+        for j in range(_NUM_ENTRIES):
+          record = MakeRecord(i, j)
+          f.write(record)
+          all_contents.append(record)
+
+    def FixedLengthFile(filename):
+      return readers.FixedLengthRecordDataset(filename, record_bytes)
+
+    dataset = datasets.StreamingFilesDataset(
+        os.path.join(self.get_temp_dir(), 'fixed_length*'),
+        filetype=FixedLengthFile)
+
+    with ops.device(self._worker_device):
+      iterator = dataset_ops.make_initializable_iterator(dataset)
+    self._sess.run(iterator.initializer)
+    get_next = iterator.get_next()
+
+    retrieved_values = []
+    for _ in range(4 * len(all_contents)):
+      retrieved_values.append(compat.as_bytes(self._sess.run(get_next)))
+
+    self.assertEqual(set(all_contents), set(retrieved_values))
+
+  def testArbitraryReaderFuncFromDatasetGenerator(self):
+
+    def my_generator():
+      yield (1, [1] * 10)
+
+    def gen_dataset(dummy):
+      return dataset_ops.Dataset.from_generator(
+          my_generator, (dtypes.int64, dtypes.int64),
+          (tensor_shape.TensorShape([]), tensor_shape.TensorShape([10])))
+
+    dataset = datasets.StreamingFilesDataset(
+        dataset_ops.Dataset.range(10), filetype=gen_dataset)
+
+    with ops.device(self._worker_device):
+      iterator = dataset_ops.make_initializable_iterator(dataset)
+    self._sess.run(iterator.initializer)
+    get_next = iterator.get_next()
+
+    retrieved_values = self._sess.run(get_next)
+
+    self.assertIsInstance(retrieved_values, (list, tuple))
+    self.assertEqual(len(retrieved_values), 2)
+    self.assertEqual(retrieved_values[0], 1)
+    self.assertItemsEqual(retrieved_values[1], [1] * 10)
+
+  def testUnexpectedFiletypeString(self):
+    with self.assertRaises(ValueError):
+      datasets.StreamingFilesDataset(
+          os.path.join(self.get_temp_dir(), '*'), filetype='foo')
+
+  def testUnexpectedFiletypeType(self):
+    with self.assertRaises(ValueError):
+      datasets.StreamingFilesDataset(
+          os.path.join(self.get_temp_dir(), '*'), filetype=3)
+
+  def testUnexpectedFilesType(self):
+    with self.assertRaises(ValueError):
+      datasets.StreamingFilesDataset(123, filetype='tfrecord')
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/python/tpu/device_assignment.py b/tensorflow/python/tpu/device_assignment.py
new file mode 100644
index 0000000..fd8a7a3
--- /dev/null
+++ b/tensorflow/python/tpu/device_assignment.py

@@ -0,0 +1,313 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ======================================
+"""Library of TPU helper functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.python.tpu.topology import Topology
+
+
+SINGLE_CORE_ASSIGNMENT = [[[0, 0, 0]]]
+
+
+def _compute_task_and_cores_to_replicas(core_assignment, topology):
+  """Computes a nested dict which maps task and logical core to replicas."""
+  task_and_cores_to_replicas = {}
+  for replica in xrange(core_assignment.shape[0]):
+    for logical_core in xrange(core_assignment.shape[1]):
+      coordinates = core_assignment[replica, logical_core, :]
+      task_id = topology.task_ordinal_at_coordinates(coordinates)
+      if task_id not in task_and_cores_to_replicas:
+        task_and_cores_to_replicas[task_id] = {}
+      if logical_core not in task_and_cores_to_replicas[task_id]:
+        task_and_cores_to_replicas[task_id][logical_core] = set()
+
+      task_and_cores_to_replicas[task_id][logical_core].add(replica)
+
+  task_to_sorted_replica_id = {}
+
+  for task, core_to_replicas in task_and_cores_to_replicas.items():
+    core_to_sorted_replicas = {}
+    for core, replicas in core_to_replicas.items():
+      core_to_sorted_replicas[core] = sorted(replicas)
+
+    task_to_sorted_replica_id[task] = core_to_sorted_replicas
+  return task_to_sorted_replica_id
+
+
+class DeviceAssignment(object):
+  """Mapping from logical cores in a computation to the physical TPU topology.
+
+  Prefer to use the `device_assignment()` helper to construct a
+  `DeviceAssignment`; it is easier if less flexible than constructing a
+  `DeviceAssignment` directly.
+  """
+
+  def __init__(self, topology, core_assignment):
+    """Constructs a `DeviceAssignment` object.
+
+    Args:
+      topology: A `Topology` object that describes the physical TPU topology.
+      core_assignment: A logical to physical core mapping, represented as a
+        rank 3 numpy array. See the description of the `core_assignment`
+        property for more details.
+
+    Raises:
+      ValueError: If `topology` is not `Topology` object.
+      ValueError: If `core_assignment` is not a rank 3 numpy array.
+    """
+    if not isinstance(topology, Topology):
+      raise ValueError("topology must be a Topology object, got {}".format(
+          type(topology)))
+    core_assignment = np.asarray(core_assignment, dtype=np.int32)
+
+    self._topology = topology
+
+    if core_assignment.ndim != 3:
+      raise ValueError("core_assignment must be a rank 3 numpy array, "
+                       "got shape {}".format(core_assignment.shape))
+
+    self._num_replicas = core_assignment.shape[0]
+    self._num_cores_per_replica = core_assignment.shape[1]
+
+    if core_assignment.shape[-1] != topology.mesh_rank:
+      raise ValueError(
+          "minor dimension of core_assignment must have size equal to topology "
+          "rank ({}), got shape {}".format(topology.mesh_rank,
+                                           core_assignment.shape))
+
+    self._core_assignment = core_assignment
+    self._task_and_cores_to_replicas = _compute_task_and_cores_to_replicas(
+        self._core_assignment, topology)
+
+  @property
+  def topology(self):
+    """A `Topology` that describes the TPU topology."""
+    return self._topology
+
+  @property
+  def num_cores_per_replica(self):
+    """The number of cores per replica."""
+    return self._num_cores_per_replica
+
+  @property
+  def num_replicas(self):
+    """The number of replicas of the computation."""
+    return self._num_replicas
+
+  @property
+  def core_assignment(self):
+    """The logical to physical core mapping.
+
+    Returns:
+      An integer numpy array of rank 3, with shape
+      `[num_replicas, num_cores_per_replica, topology_rank]`. Maps
+      (replica, logical core) pairs to physical topology coordinates.
+    """
+    return self._core_assignment
+
+  def coordinates(self, replica, logical_core):
+    """Returns the physical topology coordinates of a logical core."""
+    return tuple(self.core_assignment[replica, logical_core, :])
+
+  def lookup_replicas(self, task_id, logical_core):
+    """Lookup replica ids by task number and logical core.
+
+    Args:
+      task_id: TensorFlow task number.
+      logical_core: An integer, identifying a logical core.
+    Returns:
+      A sorted list of the replicas that are attached to that task and
+      logical_core.
+    Raises:
+      ValueError: If no replica exists in the task which contains the logical
+      core.
+    """
+    try:
+      return self._task_and_cores_to_replicas[task_id][logical_core]
+    except KeyError:
+      raise ValueError(
+          "Can not find any replica in task: {} contains logical_core: {} ".
+          format(task_id, logical_core))
+
+  def tpu_ordinal(self, replica=0, logical_core=0):
+    """Returns the ordinal of the TPU device assigned to a logical core."""
+    coordinates = self.coordinates(replica, logical_core)
+    return self._topology.tpu_device_ordinal_at_coordinates(coordinates)
+
+  def host_device(self, replica=0, logical_core=0, job=None):
+    """Returns the CPU device attached to a logical core."""
+    coordinates = self.coordinates(replica, logical_core)
+    return self._topology.cpu_device_name_at_coordinates(coordinates, job=job)
+
+  def tpu_device(self, replica=0, logical_core=0, job=None):
+    """Returns the name of the TPU device assigned to a logical core."""
+    coordinates = self.coordinates(replica, logical_core)
+    return self._topology.tpu_device_name_at_coordinates(coordinates, job=job)
+
+
+def device_assignment(topology,
+                      computation_shape=None,
+                      computation_stride=None,
+                      num_replicas=1):
+  """Computes a device_assignment of a computation across a TPU topology.
+
+  Attempts to choose a compact grid of cores for locality.
+
+  Returns a `DeviceAssignment` that describes the cores in the topology assigned
+  to each core of each replica.
+
+  `computation_shape` and `computation_stride` values should be powers of 2 for
+  optimal packing.
+
+  Args:
+    topology: A `Topology` object that describes the TPU cluster topology.
+      To obtain a TPU topology, evaluate the `Tensor` returned by
+      `initialize_system` using `Session.run`. Either a serialized
+      `TopologyProto` or a `Topology` object may be passed. Note: you must
+      evaluate the `Tensor` first; you cannot pass an unevaluated `Tensor` here.
+    computation_shape: A rank 1 int32 numpy array with size equal to the
+      topology rank, describing the shape of the computation's block of cores.
+      If None, the `computation_shape` is `[1] * topology_rank`.
+    computation_stride: A rank 1 int32 numpy array of size `topology_rank`,
+      describing the inter-core spacing of the `computation_shape` cores in the
+      TPU topology. If None, the `computation_stride` is `[1] * topology_rank`.
+    num_replicas: The number of computation replicas to run. The replicas will
+      be packed into the free spaces of the topology.
+
+  Returns:
+    A DeviceAssignment object, which describes the mapping between the logical
+    cores in each computation replica and the physical cores in the TPU
+    topology.
+
+  Raises:
+    ValueError: If `topology` is not a valid `Topology` object.
+    ValueError: If `computation_shape` or `computation_stride` are not 1D int32
+      numpy arrays with shape [3] where all values are positive.
+    ValueError: If computation's replicas cannot fit into the TPU topology.
+  """
+  # Deserialize the Topology proto, if it is a string.
+  if isinstance(topology, bytes):
+    topology = Topology(serialized=topology)
+
+  if not isinstance(topology, Topology):
+    raise ValueError("`topology` is not a Topology object; got {}".format(
+        type(topology)))
+
+  topology_rank = len(topology.mesh_shape)
+  mesh_shape = topology.mesh_shape
+  if computation_shape is None:
+    computation_shape = np.array([1] * topology_rank, dtype=np.int32)
+  else:
+    computation_shape = np.asarray(computation_shape, dtype=np.int32)
+
+  if computation_stride is None:
+    computation_stride = np.array([1] * topology_rank, dtype=np.int32)
+  else:
+    computation_stride = np.asarray(computation_stride, dtype=np.int32)
+
+  if computation_shape.shape != (topology_rank,):
+    raise ValueError("computation_shape must have shape [{}]; got {}".format(
+        topology_rank, computation_shape.shape))
+  if computation_stride.shape != (topology_rank,):
+    raise ValueError("computation_stride must have shape [{}]; got {}".format(
+        topology_rank, computation_stride.shape))
+
+  if any(computation_shape < 1):
+    raise ValueError(
+        "computation_shape must be positive; got computation_shape={}".format(
+            computation_shape))
+  if any(computation_stride < 1):
+    raise ValueError(
+        "computation_stride must be positive; got computation_stride={}".format(
+            computation_stride))
+
+  # Computes the physical size of one computation instance.
+  computation_footprint = computation_shape * computation_stride
+  if any(computation_footprint > mesh_shape):
+    raise ValueError(
+        "computation footprint {} does not fit in TPU topology shape {}".format(
+            computation_footprint, mesh_shape))
+
+  # Computes how many copies of the computation footprint fit in the mesh.
+  block_counts = mesh_shape // computation_footprint
+
+  replica_counts = block_counts * computation_stride
+  max_replicas = np.prod(replica_counts)
+  if num_replicas > max_replicas:
+    raise ValueError(
+        "requested {} replicas but only {} replicas with shape {} and "
+        "computation_stride {} fit in a TPU mesh of shape {}".format(
+            num_replicas, max_replicas, computation_shape, computation_stride,
+            mesh_shape))
+
+  def ceil_of_ratio(n, m):
+    return (n + m - 1) // m
+
+  replica_shape = [0] * topology_rank
+  if num_replicas > 0:
+    remaining_replicas = num_replicas
+    remaining_dims = topology_rank
+
+    # Choose dimensions as close to an equal cube as possible, in order of
+    # increasing dimension size. By visiting dimensions in increasing size, we
+    # assign the most constrained dimension first, so we won't make infeasible
+    # choices.
+    #
+    # As a secondary sort order, visit the dimensions in reverse order. This
+    # means we try to use both cores on the same chip in preference to two cores
+    # on different chips.
+    for x, ni in sorted(((x, -i) for (i, x) in enumerate(replica_counts))):
+      i = -ni
+      target_size = int(math.ceil(remaining_replicas**(1.0 / remaining_dims)))
+      replica_shape[i] = min(target_size, x)
+      remaining_replicas = ceil_of_ratio(remaining_replicas, replica_shape[i])
+      remaining_dims -= 1
+
+    assert remaining_replicas == 1 and remaining_dims == 0
+
+  # Assigns an offset to each replica such that no two replicas overlap.
+  replica_offsets = np.full([num_replicas, topology_rank], -1, dtype=np.int32)
+  for replica in xrange(num_replicas):
+    # Chooses a replica number in each axis.
+    t = replica
+    pos = []
+    for dim in replica_shape[::-1]:
+      pos.append(t % dim)
+      t //= dim
+    replica_pos = np.array(pos[::-1], dtype=np.int32)
+
+    # Determines where that replica starts in each axis.
+    outer = replica_pos // computation_stride
+    inner = replica_pos % computation_stride
+    replica_offsets[replica, :] = outer * computation_footprint + inner
+
+  # Computes a complete logical core -> physical core mapping for each replica.
+  indices = [
+      np.arange(0, computation_shape[i] * computation_stride[i],
+                computation_stride[i]) for i in xrange(topology_rank)
+  ]
+  indices = np.concatenate(
+      [i[..., np.newaxis] for i in np.meshgrid(*indices, indexing="ij")],
+      axis=-1)
+  indices = indices.reshape((-1, topology_rank))
+  assignment = indices + replica_offsets[:, np.newaxis, :]
+  return DeviceAssignment(topology, core_assignment=assignment)

diff --git a/tensorflow/python/tpu/error_handling.py b/tensorflow/python/tpu/error_handling.py
new file mode 100644
index 0000000..52e1ea4
--- /dev/null
+++ b/tensorflow/python/tpu/error_handling.py

@@ -0,0 +1,132 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+"""ErrorRendezvous handler for collecting errors from multiple threads."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+import sys
+import threading
+import time
+
+import six
+
+from tensorflow.python.framework import errors
+from tensorflow.python.platform import tf_logging as logging
+
+_UNINTERESTING_ERRORS = (errors.CancelledError,)
+
+
+class ErrorRendezvous(object):
+  """Resolve errors from multiple threads during TPU execution.
+
+  TPU errors can occur on the infeed or outfeed threads as well as the main
+  training thread.
+
+  Depending on which thread "wins" and receives the session error first, we may
+  end up showing users a confusing and non-actionable error message (session
+  cancelled) instead of a root cause (e.g. a bad filename).
+
+  The rendezvous object provides a location to capture these errors until all
+  threads terminate.  At that point we can choose the most informative error
+  to report.
+  """
+
+  def __init__(self, num_sources):
+    # string -> (message, traceback)
+    self._errors = {}
+    self._num_sources = num_sources
+    self._session_cancel_timer = None
+
+  def record_error(self, source, exc_info, session=None):
+    """Report an exception from the given source.
+
+    If a session is passed, a timer will be registered to close it after a few
+    seconds.  This is necessary to ensure the main training loop does not hang
+    if an infeed/oufeed error occurs.  We sleep a few seconds to allow a more
+    interesting error from another thread to propagate.
+
+    Args:
+      source: string, source of the error
+      exc_info: Output from `sys.exc_info` (type, value, traceback)
+      session: Session to close after delay.
+    """
+    _, value, _ = exc_info
+    self._errors[source] = exc_info
+    logging.info('Error recorded from %s: %s', source, value)
+
+    if session is not None and self._session_cancel_timer is None:
+
+      def _cancel_session():
+        time.sleep(5)
+        try:
+          session.close()
+        except:  # pylint: disable=bare-except
+          pass
+
+      self._session_cancel_timer = threading.Thread(target=_cancel_session,)
+      self._session_cancel_timer.daemon = True
+      self._session_cancel_timer.start()
+
+  def record_done(self, source):
+    """Mark execution source `source` as done.
+
+    If an error was originally reported from `source` it is left intact.
+
+    Args:
+      source: `str`, source being recorded
+    """
+    logging.info('%s marked as finished', source)
+    if source not in self._errors:
+      self._errors[source] = None
+
+  @contextlib.contextmanager
+  def catch_errors(self, source, session=None):
+    """Context manager to report any errors within a block."""
+    try:
+      yield
+    except Exception:  # pylint: disable=broad-except
+      self.record_error(source, sys.exc_info(), session)
+
+  def raise_errors(self, timeout_sec=0):
+    """Wait for up to `timeout` seconds for all error sources to finish.
+
+    Preferentially raise "interesting" errors (errors not in the
+    _UNINTERESTING_ERRORS) set.
+
+    Args:
+      timeout_sec: Seconds to wait for other error sources.
+    """
+    for _ in range(timeout_sec):
+      if len(self._errors) == self._num_sources:
+        break
+      time.sleep(1)
+
+    kept_errors = [(k, v) for (k, v) in self._errors.items() if v is not None]
+
+    # First check for any interesting errors, then fall back on the session
+    # cancelled errors etc.
+    for k, (typ, value, traceback) in kept_errors:
+      if isinstance(value, _UNINTERESTING_ERRORS):
+        continue
+      else:
+        logging.warn('Reraising captured error')
+        six.reraise(typ, value, traceback)
+
+    for k, (typ, value, traceback) in kept_errors:
+      logging.warn('Reraising captured error')
+      six.reraise(typ, value, traceback)

diff --git a/tensorflow/python/tpu/feature_column.py b/tensorflow/python/tpu/feature_column.py
new file mode 100644
index 0000000..2f7e939
--- /dev/null
+++ b/tensorflow/python/tpu/feature_column.py

@@ -0,0 +1,435 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+"""TPU Feature Column Library."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.feature_column import feature_column_lib as fc_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.tpu import tpu
+from tensorflow.python.tpu import tpu_function
+# pylint: disable=protected-access
+
+
+_TPU_FC_TO_SCOPE = '_tpu_feature_column_scope'
+_SUPPORTED_CATEGORICAL_COLUMNS = (fc._IdentityCategoricalColumn,
+                                  fc._VocabularyFileCategoricalColumn,
+                                  fc._VocabularyListCategoricalColumn,
+                                  fc._WeightedCategoricalColumn,
+                                  fc_lib.IdentityCategoricalColumn,
+                                  fc_lib.VocabularyFileCategoricalColumn,
+                                  fc_lib.VocabularyListCategoricalColumn,
+                                  fc_lib.WeightedCategoricalColumn)
+
+
+def embedding_column(categorical_column,
+                     dimension,
+                     combiner='mean',
+                     initializer=None):
+  """TPU embedding_column for `tf.feature_column.embedding_column`.
+
+  Note that the interface for TPU embedding_column is different from the non-TPU
+  version. The following args available for the non-TPU version are NOT
+  supported: ckpt_to_load_from, tensor_name_in_ckp, max_norm and trainable.
+
+  Args:
+    categorical_column: A categorical_column returned from
+        categorical_column_with_identity,  weighted_categorical_column,
+        categorical_column_with_vocabulary_list or
+        categorical_column_with_vocabulary_file.
+    dimension: An integer specifying dimension of the embedding, must be > 0.
+    combiner: A string specifying how to reduce if there are multiple entries
+      in a single row. For more information, see
+      `tf.feature_column.embedding_column`.
+    initializer: A variable initializer function to be used in embedding
+      variable initialization. If not specified, defaults to
+      `tf.truncated_normal_initializer` with mean `0.0` and standard deviation
+      `1/sqrt(dimension)`.
+
+  Returns:
+    A  _TPUEmbeddingColumn.
+
+  Raises:
+    ValueError: if `dimension` not > 0.
+    ValueError: if `initializer` is specified but not callable.
+  """
+  if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS):
+    raise TypeError(
+        'categorical_column for tpu '
+        ' embedding_column must be type %s, got %s.' % (' or '.join([
+            cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS
+        ]), type(categorical_column)))
+  if (dimension is None) or (dimension < 1):
+    raise ValueError('Invalid dimension {}.'.format(dimension))
+
+  if (initializer is not None) and (not callable(initializer)):
+    raise ValueError('initializer must be callable if specified. '
+                     'Embedding of column_name: {}'.format(
+                         categorical_column.name))
+  if initializer is None:
+    initializer = init_ops.truncated_normal_initializer(
+        mean=0.0, stddev=1 / math.sqrt(dimension))
+
+  embedding_shape = categorical_column._num_buckets, dimension  # pylint: disable=protected-access
+
+  def _creator(weight_collections, scope):
+    embedding_column_layer = fc._EmbeddingColumnLayer(
+        embedding_shape=embedding_shape,
+        initializer=initializer,
+        weight_collections=weight_collections,
+        trainable=True,
+        name='embedding_column_layer')
+    return embedding_column_layer(None, scope=scope)  # pylint: disable=not-callable
+
+  column = _TPUEmbeddingColumn(
+      categorical_column=categorical_column,
+      dimension=dimension,
+      combiner=combiner,
+      layer_creator=_creator,
+      ckpt_to_load_from=None,
+      tensor_name_in_ckpt=None,
+      max_norm=None,
+      trainable=True)
+  # For Embedding column, the initializer is hidden inside the creator Fn, which
+  # is not accessiable later. So, we attach it to a speicial field. Also note
+  # that non-TPU Embedding column and non-TPU shared Embedding column handle the
+  # initializer differently. See shared_embedding_columns for details.
+  column._tpu_initializer = initializer
+  return column
+
+
+def shared_embedding_columns(categorical_columns,
+                             dimension,
+                             combiner='mean',
+                             initializer=None,
+                             shared_embedding_collection_name=None):
+  """List of dense columns that convert from sparse, categorical input."""
+  for categorical_column in categorical_columns:
+    if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS):
+      raise TypeError(
+          'categorical_column for tpu '
+          ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([
+              cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS
+          ]), type(categorical_column)))
+  columns = fc_lib.shared_embedding_columns(
+      categorical_columns,
+      dimension,
+      combiner=combiner,
+      initializer=initializer,
+      shared_embedding_collection_name=shared_embedding_collection_name,
+      ckpt_to_load_from=None,
+      tensor_name_in_ckpt=None,
+      max_norm=None,
+      trainable=True)
+
+  # Use the initializer and shared_embedding_collection_name to create TPU
+  # version
+  initializer = columns[0].initializer
+  shared_embedding_collection_name = columns[0].shared_embedding_collection_name
+  tpu_columns = []
+
+  # Create the state (_SharedEmbeddingColumnLayer) here.
+  for categorical_column in categorical_columns:
+    column = _TPUSharedEmbeddingColumn(
+        categorical_column=categorical_column,
+        dimension=dimension,
+        combiner=combiner,
+        initializer=initializer,
+        shared_embedding_collection_name=shared_embedding_collection_name,
+        ckpt_to_load_from=None,
+        tensor_name_in_ckpt=None,
+        max_norm=None,
+        trainable=True)
+    tpu_columns.append(column)
+
+  return tpu_columns
+
+
+class _TPUBaseEmbeddingColumn(object):
+  """Base class for TPU Embedding Column."""
+
+  def __init__(self, categorical_column):
+    self._tpu_categorical_column = categorical_column
+
+  def get_combiner(self):
+    """Returns the embedding combiner."""
+    raise NotImplementedError('not implemented')
+
+  def get_embedding_table_size(self):
+    """Returns the embedding table size, tuple of vocab size and dimension."""
+    raise NotImplementedError('not implemented')
+
+  def get_feature_key_name(self):
+    """Returns the feature key name in the features dict."""
+    raise NotImplementedError('not impl')
+
+  def get_weight_key_name(self):
+    """Return the key name for weights."""
+    raise NotImplementedError('not impl')
+
+  def get_embedding_var_name(self):
+    """Returns the embedding variable name.
+
+    Feature key name and embedding variable name are usually one-to-one mapping.
+    But for shared embedding columns, it is many-to-one mapping.
+    """
+    raise NotImplementedError('not impl')
+
+  def get_initializer(self):
+    """Returns the initializer."""
+    raise NotImplementedError('not impl')
+
+  def is_categorical_column_weighted(self):
+    """Check if the categorical column of the embedding column is weighted."""
+    raise NotImplementedError('not impl')
+
+
+class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
+  """Core Embedding Column."""
+
+  def __new__(cls,
+              categorical_column,
+              dimension,
+              combiner='mean',
+              layer_creator=None,
+              ckpt_to_load_from=None,
+              tensor_name_in_ckpt=None,
+              max_norm=None,
+              trainable=True):
+    # Note, args ckpt_to_load_from, tensor_name_in_ckpt, max_norm and trainable
+    # are not supported on TPU. They are solely for matching the signature of
+    # __new__ of parent class fc._EmbeddingColumn.
+    return fc._EmbeddingColumn.__new__(
+        cls,
+        categorical_column,
+        dimension,
+        combiner=combiner,
+        layer_creator=layer_creator,
+        ckpt_to_load_from=ckpt_to_load_from,
+        tensor_name_in_ckpt=tensor_name_in_ckpt,
+        max_norm=max_norm,
+        trainable=trainable)
+
+  def __init__(self,
+               categorical_column,
+               dimension,
+               combiner='mean',
+               layer_creator=None,
+               ckpt_to_load_from=None,
+               tensor_name_in_ckpt=None,
+               max_norm=None,
+               trainable=True):
+    _TPUBaseEmbeddingColumn.__init__(self, categorical_column)
+    self._key = None
+
+  def get_combiner(self):
+    return self.combiner
+
+  def get_embedding_table_size(self):
+    """Returns num_ids and width."""
+    return (self.categorical_column._num_buckets, self.dimension)
+
+  def get_feature_key_name(self):
+    """get_feature_key_name."""
+    if self.is_categorical_column_weighted():
+      return self.categorical_column.categorical_column.name
+    return self.categorical_column.name
+
+  def get_weight_key_name(self):
+    """get_weight_key_name."""
+    if self.is_categorical_column_weighted():
+      return self.categorical_column.weight_feature_key
+    return None
+
+  def get_embedding_var_name(self):
+    """get_embedding_var_name."""
+    return self.categorical_column.name
+
+  def get_initializer(self):
+    return self._tpu_initializer
+
+  def is_categorical_column_weighted(self):
+    """Check if the categorical column of the embedding column is weighted."""
+    if isinstance(
+        self.categorical_column,
+        (
+            fc._WeightedCategoricalColumn,  # pylint: disable=protected-access
+            fc_lib.WeightedCategoricalColumn)):
+      return True
+    return False
+
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    if tpu.under_tpu_inference_context():
+      def host_computation():
+        return fc._EmbeddingColumn._get_dense_tensor(
+            self, inputs, weight_collections, trainable)
+      return tpu.outside_compilation(host_computation)
+
+    if _is_running_on_cpu():
+      return fc._EmbeddingColumn._get_dense_tensor(
+          self, inputs, weight_collections, trainable)
+
+    # TPU mode
+    # Get the embeddings from the LazyBuilder.
+    tensor = inputs.get(self.get_feature_key_name())
+
+    # Add to collection for _create_tpu_embedding_variables_and_ops
+    _record_variable_scope_and_name(self.get_embedding_var_name(),
+                                    'embedding_weights')
+
+    return tensor
+
+
+class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
+                                fc._SharedEmbeddingColumn):
+  """Core Shared Embedding Column."""
+
+  def __new__(cls,
+              categorical_column,
+              dimension,
+              combiner='mean',
+              initializer=None,
+              shared_embedding_collection_name=None,
+              ckpt_to_load_from=None,
+              tensor_name_in_ckpt=None,
+              max_norm=None,
+              trainable=True):
+    return fc._SharedEmbeddingColumn.__new__(
+        cls,
+        categorical_column,
+        dimension,
+        combiner=combiner,
+        initializer=initializer,
+        shared_embedding_collection_name=shared_embedding_collection_name,
+        ckpt_to_load_from=ckpt_to_load_from,
+        tensor_name_in_ckpt=tensor_name_in_ckpt,
+        max_norm=max_norm,
+        trainable=trainable)
+
+  def __init__(self,
+               categorical_column,
+               dimension,
+               combiner='mean',
+               initializer=None,
+               shared_embedding_collection_name=None,
+               ckpt_to_load_from=None,
+               tensor_name_in_ckpt=None,
+               max_norm=None,
+               trainable=True):
+
+    _TPUBaseEmbeddingColumn.__init__(self, categorical_column)
+    self._key = None
+
+  def get_combiner(self):
+    return self.combiner
+
+  def get_embedding_table_size(self):
+    """Returns num_ids and width."""
+    return (self.categorical_column._num_buckets, self.dimension)
+
+  def get_feature_key_name(self):
+    """get_feature_key_name."""
+    if self.is_categorical_column_weighted():
+      return self.categorical_column.categorical_column.name
+    return self.categorical_column.name
+
+  def get_weight_key_name(self):
+    """get_weight_key_name."""
+    if self.is_categorical_column_weighted():
+      return self.categorical_column.weight_feature_key
+    return None
+
+  def get_embedding_var_name(self):
+    """get_embedding_var_name."""
+    return self.shared_embedding_collection_name
+
+  def get_initializer(self):
+    return self.initializer
+
+  def is_categorical_column_weighted(self):
+    """Check if the categorical column of the embedding column is weighted."""
+    if isinstance(
+        self.categorical_column,
+        (
+            fc._WeightedCategoricalColumn,  # pylint: disable=protected-access
+            fc_lib.WeightedCategoricalColumn)):
+      return True
+    return False
+
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    if tpu.under_tpu_inference_context():
+      def host_computation():
+        return fc._SharedEmbeddingColumn._get_dense_tensor(
+            self, inputs, weight_collections, trainable)
+      return tpu.outside_compilation(host_computation)
+
+    if _is_running_on_cpu():
+      return fc._SharedEmbeddingColumn._get_dense_tensor(
+          self, inputs, weight_collections, trainable)
+
+    # TPU mode
+    # Get the embeddings from the LazyBuilder.
+    tensor = inputs.get(self.get_feature_key_name())
+
+    # Add to collection for _create_tpu_embedding_variables_and_ops
+    _record_variable_scope_and_name(
+        self.get_embedding_var_name(),
+        'embedding_weights',
+        is_shared_embedding=True)
+    return tensor
+
+
+def _record_variable_scope_and_name(embedding_var_name,
+                                    embedding_var_name_in_fc,
+                                    is_shared_embedding=False):
+  """Add embedding variable name and scope to collection."""
+  g = ops.get_default_graph()
+  collection = g.get_collection_ref(_TPU_FC_TO_SCOPE)
+  if not collection:
+    collection.append({})
+
+  var_def_dict = collection[0]
+
+  captured_scope = variable_scope.get_variable_scope()
+  captured_scope_name = captured_scope.name
+
+  if embedding_var_name in var_def_dict:
+    if (var_def_dict[embedding_var_name][0] != captured_scope_name
+        and not is_shared_embedding):
+      raise ValueError(
+          'For embedding var name {}, the variable scope name is different, '
+          'got {}; expected {}'.format(embedding_var_name,
+                                       captured_scope_name,
+                                       var_def_dict[embedding_var_name][0]))
+    if var_def_dict[embedding_var_name][1] != embedding_var_name_in_fc:
+      raise ValueError(
+          'For embedding var name {}, the embedding name is different, '
+          'got {}; expected {}'.format(embedding_var_name,
+                                       embedding_var_name_in_fc,
+                                       var_def_dict[embedding_var_name][1]))
+  else:
+    var_def_dict[embedding_var_name] = (captured_scope_name,
+                                        embedding_var_name_in_fc)
+
+
+def _is_running_on_cpu():
+  """Returns True if the current context is CPU model."""
+  return tpu_function.get_tpu_context().number_of_shards is None

diff --git a/tensorflow/python/tpu/feature_column_test.py b/tensorflow/python/tpu/feature_column_test.py
new file mode 100644
index 0000000..6feeb01
--- /dev/null
+++ b/tensorflow/python/tpu/feature_column_test.py

@@ -0,0 +1,286 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+"""Tests for python.tpu.feature_column."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.client import session
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.feature_column import feature_column_lib as fc_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.platform import test
+from tensorflow.python.tpu import feature_column as tpu_fc
+
+
+def _initialized_session():
+  sess = session.Session()
+  sess.run(variables_lib.global_variables_initializer())
+  sess.run(lookup_ops.tables_initializer())
+  return sess
+
+
+class EmbeddingColumnTest(test.TestCase):
+
+  def test_defaults(self):
+    categorical_column = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column = tpu_fc.embedding_column(
+        categorical_column, dimension=embedding_dimension)
+    self.assertIs(categorical_column, embedding_column.categorical_column)
+    self.assertEqual(embedding_dimension, embedding_column.dimension)
+    self.assertEqual('mean', embedding_column.combiner)
+    self.assertEqual('aaa_embedding', embedding_column.name)
+    self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
+    self.assertEqual((embedding_dimension,), embedding_column._variable_shape)
+    self.assertEqual({
+        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column._parse_example_spec)
+
+  def test_all_constructor_args(self):
+    categorical_column = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column = tpu_fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        combiner='my_combiner',
+        initializer=lambda: 'my_initializer')
+    self.assertIs(categorical_column, embedding_column.categorical_column)
+    self.assertEqual(embedding_dimension, embedding_column.dimension)
+    self.assertEqual('my_combiner', embedding_column.combiner)
+    self.assertEqual('aaa_embedding', embedding_column.name)
+    self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
+    self.assertEqual((embedding_dimension,), embedding_column._variable_shape)
+    self.assertEqual({
+        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column._parse_example_spec)
+
+  def test_get_dense_tensor(self):
+    # Inputs.
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups = (
+        # example 0, ids [2], embedding = [7, 11]
+        (7., 11.),
+        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+        (2., 3.5),
+        # example 2, ids [], embedding = [0, 0]
+        (0., 0.),
+        # example 3, ids [1], embedding = [3, 5]
+        (3., 5.),
+    )
+
+    # Build columns.
+    categorical_column = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = tpu_fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    embedding_lookup = embedding_column._get_dense_tensor(
+        fc._LazyBuilder({
+            'aaa': sparse_input
+        }))
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, global_vars[0].eval())
+      self.assertAllEqual(expected_lookups, embedding_lookup.eval())
+
+
+class SharedEmbeddingColumnTest(test.TestCase):
+
+  def test_defaults(self):
+    categorical_column_a = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc_lib.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column_b, embedding_column_a = tpu_fc.shared_embedding_columns(
+        [categorical_column_b, categorical_column_a],
+        dimension=embedding_dimension)
+    self.assertIs(categorical_column_a, embedding_column_a.categorical_column)
+    self.assertIs(categorical_column_b, embedding_column_b.categorical_column)
+    self.assertEqual(embedding_dimension, embedding_column_a.dimension)
+    self.assertEqual(embedding_dimension, embedding_column_b.dimension)
+    self.assertEqual('mean', embedding_column_a.combiner)
+    self.assertEqual('mean', embedding_column_b.combiner)
+    self.assertIsNotNone(embedding_column_a.initializer)
+    self.assertIsNotNone(embedding_column_b.initializer)
+    self.assertEqual('aaa_bbb_shared_embedding',
+                     embedding_column_a.shared_embedding_collection_name)
+    self.assertEqual('aaa_bbb_shared_embedding',
+                     embedding_column_b.shared_embedding_collection_name)
+    self.assertEqual('aaa_shared_embedding', embedding_column_a.name)
+    self.assertEqual('bbb_shared_embedding', embedding_column_b.name)
+    self.assertEqual('aaa_bbb_shared_embedding',
+                     embedding_column_a._var_scope_name)
+    self.assertEqual('aaa_bbb_shared_embedding',
+                     embedding_column_b._var_scope_name)
+    self.assertEqual((embedding_dimension,), embedding_column_a._variable_shape)
+    self.assertEqual((embedding_dimension,), embedding_column_b._variable_shape)
+    self.assertEqual({
+        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column_a._parse_example_spec)
+    self.assertEqual({
+        'bbb': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column_b._parse_example_spec)
+
+  def test_all_constructor_args(self):
+    categorical_column_a = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc_lib.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension,
+        combiner='my_combiner',
+        initializer=lambda: 'my_initializer',
+        shared_embedding_collection_name='var_scope_name')
+    self.assertIs(categorical_column_a, embedding_column_a.categorical_column)
+    self.assertIs(categorical_column_b, embedding_column_b.categorical_column)
+    self.assertEqual(embedding_dimension, embedding_column_a.dimension)
+    self.assertEqual(embedding_dimension, embedding_column_b.dimension)
+    self.assertEqual('my_combiner', embedding_column_a.combiner)
+    self.assertEqual('my_combiner', embedding_column_b.combiner)
+    self.assertEqual('my_initializer', embedding_column_a.initializer())
+    self.assertEqual('my_initializer', embedding_column_b.initializer())
+    self.assertEqual('var_scope_name',
+                     embedding_column_a.shared_embedding_collection_name)
+    self.assertEqual('var_scope_name',
+                     embedding_column_b.shared_embedding_collection_name)
+    self.assertEqual('aaa_shared_embedding', embedding_column_a.name)
+    self.assertEqual('bbb_shared_embedding', embedding_column_b.name)
+    self.assertEqual('var_scope_name', embedding_column_a._var_scope_name)
+    self.assertEqual('var_scope_name', embedding_column_b._var_scope_name)
+    self.assertEqual((embedding_dimension,), embedding_column_a._variable_shape)
+    self.assertEqual((embedding_dimension,), embedding_column_b._variable_shape)
+    self.assertEqual({
+        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column_a._parse_example_spec)
+    self.assertEqual({
+        'bbb': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column_b._parse_example_spec)
+
+  def test_get_dense_tensor(self):
+    # Inputs.
+    vocabulary_size = 3
+    # -1 values are ignored.
+    input_a = np.array([
+        [2, -1, -1],  # example 0, ids [2]
+        [0, 1, -1]
+    ])  # example 1, ids [0, 1]
+    input_b = np.array([
+        [0, -1, -1],  # example 0, ids [0]
+        [-1, -1, -1]
+    ])  # example 1, ids []
+    input_features = {'aaa': input_a, 'bbb': input_b}
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups_a = (
+        # example 0:
+        (7., 11.),  # ids [2], embedding = [7, 11]
+        # example 1:
+        (2., 3.5),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+    )
+    expected_lookups_b = (
+        # example 0:
+        (1., 2.),  # ids [0], embedding = [1, 2]
+        # example 1:
+        (0., 0.),  # ids [], embedding = [0, 0]
+    )
+
+    # Build columns.
+    categorical_column_a = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = fc_lib.categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    embedding_lookup_a = embedding_column_a._get_dense_tensor(
+        fc._LazyBuilder(input_features))
+    embedding_lookup_b = embedding_column_b._get_dense_tensor(
+        fc._LazyBuilder(input_features))
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    embedding_var = global_vars[0]
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, embedding_var.eval())
+      self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
+      self.assertAllEqual(expected_lookups_b, embedding_lookup_b.eval())
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/python/tpu/functional.py b/tensorflow/python/tpu/functional.py
new file mode 100644
index 0000000..045ec52
--- /dev/null
+++ b/tensorflow/python/tpu/functional.py

@@ -0,0 +1,23 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Functional operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.tpu.ops import tpu_ops
+
+TPUPartitionedCall = tpu_ops.tpu_partitioned_call  # pylint: disable=invalid-name

diff --git a/tensorflow/python/tpu/ops/tpu_ops.py b/tensorflow/python/tpu/ops/tpu_ops.py
new file mode 100644
index 0000000..e6e411c
--- /dev/null
+++ b/tensorflow/python/tpu/ops/tpu_ops.py

@@ -0,0 +1,418 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Operations for TPUs."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import platform
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.tpu import tpu_function
+
+if platform.system() != "Windows":
+  # pylint: disable=wildcard-import,unused-import,g-import-not-at-top
+  from tensorflow.python.ops import gen_tpu_ops
+  from tensorflow.python.ops.gen_tpu_ops import *
+  # pylint: enable=wildcard-import,unused-import,g-import-not-at-top
+
+  def _create_default_group_assignment():
+    num_shards = tpu_function.get_tpu_context().number_of_shards
+    if num_shards is None:
+      logging.warning(
+          "cross_replica_sum should be used within a tpu_shard_context, but "
+          "got unset number_of_shards. Assuming 1.")
+      num_shards = 1
+    group_assignment = [list(range(num_shards))]
+    return group_assignment
+
+  def all_to_all(x,
+                 concat_dimension,
+                 split_dimension,
+                 split_count,
+                 group_assignment=None,
+                 name=None):
+    """Exchange data across TPU replicas.
+
+    Args:
+      x: The local tensor.
+      concat_dimension: The dimension number to concatenate.
+      split_dimension: The dimension number to split.
+      split_count: The number of splits, this number must equal to the sub-group
+        size(group_assignment.get_shape()[1])
+      group_assignment: Optional 2d int32 lists with shape [num_groups,
+        num_replicas_per_group]. `group_assignment[i]` represents the replica
+        ids in the ith subgroup.
+      name: Optional op name.
+
+    Returns:
+      A `Tensor` which is concatenated by data from different replicas.
+    """
+    if group_assignment is None:
+      group_assignment = _create_default_group_assignment()
+    return gen_tpu_ops.all_to_all(
+        x,
+        group_assignment,
+        concat_dimension=concat_dimension,
+        split_dimension=split_dimension,
+        split_count=split_count,
+        name=name)
+
+  @ops.RegisterGradient("AllToAll")
+  def _all_to_all_grad(op, grad):
+    # The gradient of a all-to-all is also a all-to-all but the
+    # split_dimension and concat_dimension is swapped.
+    # The graident with respect to group_assignment is None.
+    return [
+        gen_tpu_ops.all_to_all(
+            grad,
+            op.inputs[1],
+            concat_dimension=op.get_attr("split_dimension"),
+            split_dimension=op.get_attr("concat_dimension"),
+            split_count=op.get_attr("split_count")), None
+    ]
+
+  def cross_replica_sum(x, group_assignment=None, name=None):
+    """Sum the input tensor across replicas according to group_assignment.
+
+    Args:
+      x: The local tensor to the sum.
+      group_assignment: Optional 2d int32 lists with shape [num_groups,
+        num_replicas_per_group]. `group_assignment[i]` represents the replica
+        ids in the ith subgroup.
+      name: Optional op name.
+
+    Returns:
+      A `Tensor` which is summed across replicas.
+    """
+    if group_assignment is None:
+      group_assignment = _create_default_group_assignment()
+
+    return gen_tpu_ops.cross_replica_sum(x, group_assignment, name=name)
+
+  def collective_permute(x, source_target_pairs, name=None):
+    """Permute the input tensor across replicas given source_target_pairs.
+
+    For each source_target_pair <a, b>, we send replica a's input to replica b.
+    Each replica id must only appear once in the source column. Also it must
+    only appear once in the target column.
+    For the replica id not in the target column, this op returns a zero tensor
+    with the same shape and dtype of the input x.
+
+    For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing
+    source_target_pairs=`[[0,1],[1,2],[2,3]]` gets the outputs:
+    `[0, A, B, C]`.
+
+    Args:
+      x: The local tensor to be permuted.
+      source_target_pairs: 2d int lists with shape [num_pairs, 2].
+        source_target_pairs[i][0] represents the source replica id and
+        source_target_pairs[i][1] represents the target replica id.
+      name: Optional op name.
+
+    Returns:
+      A `Tensor` which is permuted.
+    """
+    return gen_tpu_ops.collective_permute(x, source_target_pairs, name=name)
+
+  @ops.RegisterGradient("CollectivePermute")
+  def _collective_permute_grad(op, grad):
+    # The gradient of a collective permute operation is also a collective
+    # permute, but with source/target pairs reversed. The gradient with respect
+    # to input argument `source_target_pairs` is `None`.
+    source_target_pairs = op.inputs[1][:, ::-1]
+    return [gen_tpu_ops.collective_permute(grad, source_target_pairs), None]
+
+  @ops.RegisterGradient("CrossReplicaSum")
+  def _cross_replica_sum_grad(op, grad):
+    # The gradient of a cross replica sum is also a cross-replica sum.
+    # The gradient with respect to group_assignment is None.
+    return [gen_tpu_ops.cross_replica_sum(grad, op.inputs[1]), None]
+
+  # This extra type checking exists to give a more helpful error message in
+  # the common case that uint8 and int64 values are infed. Remove when both
+  # types are supported.
+
+  _SUPPORTED_INFEED_DTYPES = set([
+      dtypes.bool, dtypes.int32, dtypes.int64, dtypes.bfloat16, dtypes.float32,
+      dtypes.complex64, dtypes.uint32
+  ])
+
+  @ops.RegisterGradient("TPUEmbeddingActivations")
+  def _embedding_activations_grad(activations_op, grad_wrt_activations):
+    """Saves the gradient of embedding activations ops in a graph collection."""
+    g = ops.get_default_graph()
+    table_id = activations_op.get_attr("table_id")
+    lookup_id = activations_op.get_attr("lookup_id")
+    table_gradients = g.get_collection_ref(
+        "tpu_embedding_gradients_table_%d" % table_id)
+
+    if not table_gradients:
+      raise RuntimeError(
+          "Gradients for TPUEmbedding have been generated in non-training mode."
+          "This is not expected. Consider putting your Optimizer.minimize code "
+          "behind the training mode condition check. For Estimator, you can "
+          "do \n\n"
+          "    if mode == tf.estimator.ModeKeys.TRAIN:\n"
+          "        train_op = opt.minimize(loss)\n"
+          "\n")
+
+    table_gradients[lookup_id] = array_ops.identity(grad_wrt_activations)
+    return [
+        # RegisterGradient requires that value be returned for all inputs. Since
+        # the first argument (tpu_gradient_variable_{table_name}) has shape [1],
+        # we will return zeros(shape=[1]). The actual gradient w.r.t. the
+        # embedding activations (grad_wrt_activations) has the same shape as the
+        # activations returned by  embedding_activations.
+        array_ops.zeros(arg.shape, dtype=dtypes.float32)
+        for arg in activations_op.inputs
+    ]
+
+  def infeed_dequeue(dtype, shape, name=None):
+    """A placeholder op for a value that will be fed into the computation.
+
+    Args:
+      dtype: A `tf.DType`. The type of elements in the tensor.
+      shape: A `tf.TensorShape` or list of `ints`. The shape of the tensor.
+      name: A name for the operation (optional).
+
+    Returns:
+      A `Tensor` of type `dtype`.
+      A tensor that will be provided using the infeed mechanism.
+
+    Raises:
+      TypeError: If 'dtype` is not a supported infeed type.
+    """
+    if dtype not in _SUPPORTED_INFEED_DTYPES:
+      raise TypeError(
+          "{} is not a supported TPU infeed type. Supported types are: "
+          "{}".format(dtype, list(_SUPPORTED_INFEED_DTYPES)))
+
+    return gen_tpu_ops.infeed_dequeue(dtype, shape, name=name)
+
+  # pylint: disable=redefined-outer-name
+  def infeed_dequeue_tuple(dtypes, shapes, name=None):
+    """A placeholder op for values fed into the TPU simultaneously as a tuple.
+
+    Args:
+      dtypes: A list of `tf.DType`s that has length `>= 1`.
+        The element types of each element in `outputs`.
+      shapes: A list of shapes (each a `tf.TensorShape` or list of `ints`).
+        The shapes of each tensor in `outputs`.
+      name: A name for the operation (optional).
+
+    Returns:
+      A list of `Tensor` objects of type `dtypes`.
+      A list of tensors that will be provided using the infeed mechanism.
+
+    Raises:
+      TypeError: If a type in 'dtypes` is not a supported infeed type.
+    """
+    for dtype in dtypes:
+      if dtype not in _SUPPORTED_INFEED_DTYPES:
+        raise TypeError(
+            "{} is not a supported TPU infeed type. Supported types are: "
+            "{}".format(dtype, list(_SUPPORTED_INFEED_DTYPES)))
+    return gen_tpu_ops.infeed_dequeue_tuple(dtypes, shapes, name=name)
+  # pylint: enable=redefined-outer-name
+
+  # pylint: disable=protected-access
+  def send_tpu_embedding_gradients(inputs,
+                                   config,
+                                   learning_rates=None,
+                                   name=None):
+    """A placeholder op for feeding per-sample gradients to the embedding layer.
+
+    Args:
+      inputs: A TensorList of gradients with which to update embedding tables.
+          This argument has the same length and shapes as the return value of
+          RecvTPUEmbeddingActivations, but contains gradients of the model's
+          loss with respect to the embedding activations. The embedding tables
+          are updated from these gradients via the optimizers specified in the
+          TPU embedding configuration given to tpu.initialize_system.
+      config: Serialized TPUEmbeddingConfiguration proto.
+      learning_rates: A TensorList of float32 scalars, one for each dynamic
+          learning rate tag: see the comments in
+          //third_party/tensorflow/core/protobuf/tpu/
+                                               optimization_parameters.proto.
+          Multiple tables can share the same dynamic learning rate tag as
+          specified in the configuration. If the learning rates for all tables
+          are constant, this list should be empty.
+      name: A name for the operation (optional).
+
+    Returns:
+      A SendTPUEmbeddingGradients operation.
+    """
+    if learning_rates is None:
+      learning_rates = []
+    return gen_tpu_ops.send_tpu_embedding_gradients(
+        inputs=inputs, learning_rates=learning_rates, config=config, name=name)
+
+  send_tpu_embedding_gradients.__doc__ = (
+      gen_tpu_ops.send_tpu_embedding_gradients.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_integer_batch(batch,
+                                          device_ordinal,
+                                          mode_override=None,
+                                          name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      batch: A list of 1D tensors, one for each embedding table, containing the
+        indices into the tables.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingIntegerBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops.enqueue_tpu_embedding_integer_batch(
+        batch=batch,
+        device_ordinal=device_ordinal,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_integer_batch.__doc__ = (
+      gen_tpu_ops.enqueue_tpu_embedding_integer_batch.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_sparse_batch(sample_indices,
+                                         embedding_indices,
+                                         aggregation_weights,
+                                         device_ordinal,
+                                         combiners=None,
+                                         mode_override=None,
+                                         name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      sample_indices: A list of rank 1 Tensors specifying the training example
+        and feature to which the corresponding embedding_indices and
+        aggregation_weights values belong. sample_indices[i] must equal b * nf +
+        f, where nf is the number of features from the corresponding table, f is
+        in [0, nf), and b is in [0, batch size).
+      embedding_indices: A list of rank 1 Tensors, indices into the embedding
+        tables.
+      aggregation_weights: A list of rank 1 Tensors containing per sample --
+        i.e. per (training example, feature) -- aggregation weights.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      combiners: A list of string scalars, one for each embedding table that
+        specify how to normalize the embedding activations after weighted
+        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+        is to use 'sum' for all tables (optional).
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingSparseBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops.enqueue_tpu_embedding_sparse_batch(
+        sample_indices=sample_indices,
+        embedding_indices=embedding_indices,
+        aggregation_weights=aggregation_weights,
+        device_ordinal=device_ordinal,
+        combiners=combiners,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_sparse_batch.__doc__ = (
+      gen_tpu_ops.enqueue_tpu_embedding_sparse_batch.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_sparse_tensor_batch(sample_indices,
+                                                embedding_indices,
+                                                aggregation_weights,
+                                                table_ids,
+                                                device_ordinal,
+                                                combiners=None,
+                                                mode_override=None,
+                                                name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      sample_indices: A list of rank 1 Tensors specifying the training example
+        to which the corresponding embedding_indices and aggregation_weights
+        values belong. It corresponds to sp_ids.indices[:,0] in
+        embedding_lookup_sparse().
+      embedding_indices: A list of rank 1 Tensors, indices into the embedding
+        tables. It corresponds to sp_ids.values in embedding_lookup_sparse().
+      aggregation_weights: A list of rank 1 Tensors containing per training
+        example aggregation weights. It corresponds to sp_weights.values in
+        embedding_lookup_sparse().
+      table_ids: A list of integers specifying the identifier of the embedding
+        table (offset of TableDescriptor in the TPUEmbeddingConfiguration) to
+        lookup the corresponding input. The ith input is looked up using
+        table_ids[i]. The size of the table_ids list must be equal to that of
+        sample_indices, embedding_indices and aggregation_weights.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      combiners: A list of string scalars, one for each embedding table that
+        specify how to normalize the embedding activations after weighted
+        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+        is to use 'sum' for all tables (optional).
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingSparseTensorBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops.enqueue_tpu_embedding_sparse_tensor_batch(
+        sample_indices=sample_indices,
+        embedding_indices=embedding_indices,
+        aggregation_weights=aggregation_weights,
+        table_ids=table_ids,
+        device_ordinal=device_ordinal,
+        combiners=combiners,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_sparse_tensor_batch.__doc__ = (
+      gen_tpu_ops.enqueue_tpu_embedding_sparse_tensor_batch.__doc__)
+
+else:
+  # We have already built the appropriate libraries into the binary via CMake
+  # if we have built contrib, so we don't need this
+  pass

diff --git a/tensorflow/python/tpu/ops/tpu_ordinal_selector_op.py b/tensorflow/python/tpu/ops/tpu_ordinal_selector_op.py
new file mode 100644
index 0000000..1f2dce2
--- /dev/null
+++ b/tensorflow/python/tpu/ops/tpu_ordinal_selector_op.py

@@ -0,0 +1,20 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Operations to select TPU core to run."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function

diff --git a/tensorflow/python/tpu/profiler/BUILD b/tensorflow/python/tpu/profiler/BUILD
new file mode 100644
index 0000000..bfe79454
--- /dev/null
+++ b/tensorflow/python/tpu/profiler/BUILD

@@ -0,0 +1,27 @@
+licenses(["notice"])  # Apache 2.0
+
+package(
+    default_visibility = [
+        "//tensorflow:__subpackages__",
+    ],
+)
+
+py_library(
+    name = "profiler",
+    srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":tpu_profiler_analysis_pb2_grpc",
+        "//tensorflow/core/profiler:profiler_analysis_proto_py",
+        "//tensorflow/core/profiler:protos_all_py",
+        "//tensorflow/python:util",
+    ],
+)
+
+py_library(
+    name = "tpu_profiler_analysis_pb2_grpc",
+    srcs = ["tpu_profiler_analysis_pb2_grpc.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = ["//tensorflow/core/profiler:profiler_analysis_proto_py"],
+)

diff --git a/tensorflow/python/tpu/profiler/__init__.py b/tensorflow/python/tpu/profiler/__init__.py
new file mode 100644
index 0000000..0c183aa
--- /dev/null
+++ b/tensorflow/python/tpu/profiler/__init__.py

@@ -0,0 +1,31 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Classes for TPU trace events."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.core.profiler.trace_events_pb2 import *
+from tensorflow.core.profiler.profiler_analysis_pb2 import *
+# pylint: enable=wildcard-import,unused-import
+
+from tensorflow.python.util.all_util import remove_undocumented
+
+_allowed_symbols = ['Trace', 'Resource', 'Device', 'TraceEvent']
+
+remove_undocumented(__name__, _allowed_symbols)

diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py b/tensorflow/python/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py
similarity index 100%
rename from tensorflow/contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py
rename to tensorflow/python/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py


diff --git a/tensorflow/python/tpu/session_support.py b/tensorflow/python/tpu/session_support.py
new file mode 100644
index 0000000..3df7185
--- /dev/null
+++ b/tensorflow/python/tpu/session_support.py

@@ -0,0 +1,438 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ======================================
+"""Operations for handling session logging and shutdown notifications."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import threading
+
+import time
+from google.protobuf import text_format
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.util import event_pb2
+from tensorflow.python.client import session as session_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.tpu.ops import tpu_ops
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.training import training_util
+
+_WATCHDOG = None
+
+
+class CoordinatorShutdownException(Exception):
+  """Raised when the coordinator needs to shutdown."""
+  pass
+
+
+def _clone_session(session, graph=None):
+  return session_lib.Session(
+      target=session.sess_str,
+      config=session._config,  # pylint: disable=protected-access
+      graph=graph if graph else session.graph)
+
+
+def _make_heartbeat_op(session, device, request_ph):
+  """Return a heartbeat op or None if heartbeats are not supported by device."""
+  try:
+    # Test if we can connect in a isolated graph + session
+    with ops.Graph().as_default():
+      with _clone_session(session) as temp_session:
+        with ops.device(device):
+          heartbeat_op = tpu_ops.worker_heartbeat('')
+          options = config_pb2.RunOptions(timeout_in_ms=5000)
+          temp_session.run(heartbeat_op, options=options)
+  except errors.InvalidArgumentError as _:
+    logging.warning('Error running heartbeat on %s', device)
+    return None
+  except errors.DeadlineExceededError as _:
+    logging.warning('Timeout connecting to %s when testing heartbeat', device)
+    return None
+
+  # If we successfully connected and pinged the worker, go ahead and construct
+  # the operation.
+  with ops.device(device):
+    return tpu_ops.worker_heartbeat(request_ph)
+
+
+class WorkerHeartbeatManager(object):
+  """Manages the status/heartbeat monitor for a set of workers."""
+
+  def __init__(self, session, devices, heartbeat_ops, request_placeholder):
+    """Construct a new WorkerHeartbeatManager.
+
+    (Prefer using `WorkerHeartbeatManager.from_devices` when possible.)
+
+    Args:
+      session: `tf.Session`, session to use for heartbeat operations.
+      devices: `list[string]` Set of devices to connect to.
+      heartbeat_ops: `list[tf.Operation]` Heartbeat operations.
+      request_placeholder: `tf.Placeholder[String]` Placeholder used to specify
+        the WorkerHeartbeatRequest protocol buffer.
+    """
+    self._session = session
+    self._devices = devices
+    self._ops = heartbeat_ops
+    self._request_placeholder = request_placeholder
+
+  @staticmethod
+  def from_devices(session, devices):
+    """Construct a heartbeat manager for the given devices."""
+    if not devices:
+      logging.error('Trying to create heartbeat manager with no devices?')
+
+    logging.info('Creating heartbeat manager for %s', devices)
+    request_placeholder = array_ops.placeholder(
+        name='worker_heartbeat_request', dtype=dtypes.string)
+
+    heartbeat_ops = []
+    kept_devices = []
+    for device in devices:
+      heartbeat_op = _make_heartbeat_op(session, device, request_placeholder)
+      if heartbeat_op is not None:
+        kept_devices.append(device)
+        heartbeat_ops.append(heartbeat_op)
+      else:
+        logging.warning('Heartbeat support not available for %s', device)
+
+    return WorkerHeartbeatManager(session, kept_devices, heartbeat_ops,
+                                  request_placeholder)
+
+  def num_workers(self):
+    return len(self._devices)
+
+  def configure(self, message):
+    """Configure heartbeat manager for all devices.
+
+    Args:
+      message: `event_pb2.WorkerHeartbeatRequest`
+    Returns: `None`
+    """
+    logging.info('Configuring worker heartbeat: %s',
+                 text_format.MessageToString(message))
+    self._session.run(self._ops,
+                      {self._request_placeholder: message.SerializeToString()})
+
+  def ping(self, request=None, timeout_in_ms=5000):
+    """Ping all workers, returning the parsed status results."""
+    if request is None:
+      request = event_pb2.WorkerHeartbeatRequest()
+
+    options = config_pb2.RunOptions(timeout_in_ms=timeout_in_ms)
+    results = self._session.run(
+        self._ops,
+        feed_dict={self._request_placeholder: request.SerializeToString()},
+        options=options)
+    parsed_results = [
+        event_pb2.WorkerHeartbeatResponse.FromString(res_pb)
+        for res_pb in results
+    ]
+    logging.debug('Ping results: %s', parsed_results)
+    return parsed_results
+
+  def lame_workers(self):
+    """Ping all workers, returning manager containing lame workers (or None)."""
+    ping_results = self.ping()
+    lame_workers = []
+
+    for ping_response, device, op in zip(ping_results, self._devices,
+                                         self._ops):
+      if ping_response.health_status != event_pb2.OK:
+        lame_workers.append((device, op))
+
+    if not lame_workers:
+      return None
+
+    bad_devices, bad_ops = zip(*lame_workers)
+    return WorkerHeartbeatManager(self._session, bad_devices, bad_ops,
+                                  self._request_placeholder)
+
+  def __repr__(self):
+    return 'HeartbeatManager(%s)' % ','.join(self._devices)
+
+  def shutdown(self, timeout_ms=10000):
+    """Shutdown all workers after `shutdown_timeout_secs`."""
+    logging.info('Shutting down %s.', self)
+    req = event_pb2.WorkerHeartbeatRequest(
+        watchdog_config=event_pb2.WatchdogConfig(timeout_ms=timeout_ms),
+        shutdown_mode=event_pb2.WAIT_FOR_COORDINATOR)
+    self.configure(req)
+
+    # Wait for workers to shutdown.  This isn't strictly required
+    # but it avoids triggering multiple checkpoints with the same lame worker.
+    logging.info('Waiting %dms for worker shutdown.', timeout_ms)
+    time.sleep(timeout_ms / 1000)
+
+
+def all_worker_devices(session):
+  """Return a list of devices for each worker in the system."""
+  devices = session.list_devices()
+  return [
+      device.name
+      for device in devices
+      if ':CPU:' in device.name and 'coordinator' not in device.name
+  ]
+
+
+class WatchdogManager(threading.Thread):
+  """Configures worker watchdog timer and handles periodic pings.
+
+  Usage:
+    # Ping workers every minute, shutting down workers if they haven't received
+    # a ping after 1 hour.
+    watchdog_manager = WatchdogManager(
+      ping_interval=60, shutdown_timeout=3600
+    )
+
+    # Use as a context manager, resetting watchdog on context exit:
+    with watchdog_manager:
+      session.run(...)
+
+    # Or setup globally; watchdog will remain active until program exit.
+    watchdog_manager.configure_and_run()
+  """
+
+  def __init__(self,
+               session,
+               devices=None,
+               ping_interval=60,
+               shutdown_timeout=3600):
+    """Initialize a watchdog manager.
+
+    Args:
+      session: Session connected to worker devices.  A cloned session and graph
+        will be created for managing worker pings.
+      devices: Set of devices to monitor.  If none, all workers will be
+        monitored.
+      ping_interval: Time, in seconds, between watchdog pings.
+      shutdown_timeout: Time, in seconds, before watchdog timeout.
+    """
+    threading.Thread.__init__(self)
+    self.ping_interval = ping_interval
+    self.shutdown_timeout = shutdown_timeout
+    self.daemon = True
+    self._config = session._config  # pylint: disable=protected-access
+    self._target = session.sess_str
+    self._running = False
+    self._devices = devices
+
+    self._graph = None
+    self._session = None
+    self._worker_manager = None
+
+  def _reset_manager(self):
+    """Reset the graph, session and worker manager."""
+    self._graph = ops.Graph()
+    self._session = session_lib.Session(
+        target=self._target,
+        graph=self._graph,
+        config=self._config,
+    )
+
+    if self._devices is None:
+      self._devices = all_worker_devices(self._session)
+
+    with self._graph.as_default():
+      self._worker_manager = WorkerHeartbeatManager.from_devices(
+          self._session, self._devices)
+
+    self._worker_manager.configure(
+        event_pb2.WorkerHeartbeatRequest(
+            watchdog_config=event_pb2.WatchdogConfig(
+                timeout_ms=self.shutdown_timeout * 1000,),
+            shutdown_mode=event_pb2.WAIT_FOR_COORDINATOR))
+
+  def configure_and_run(self):
+    logging.info(
+        'Enabling watchdog timer with %d second timeout '
+        'and %d second ping interval.', self.shutdown_timeout,
+        self.ping_interval)
+    self._reset_manager()
+    self._running = True
+    self.start()
+
+  def stop(self):
+    logging.info('Stopping worker watchdog.')
+    self._worker_manager.configure(
+        event_pb2.WorkerHeartbeatRequest(
+            watchdog_config=event_pb2.WatchdogConfig(timeout_ms=-1,),
+            shutdown_mode=event_pb2.NOT_CONFIGURED))
+    self._running = False
+    self.join()
+
+  def __enter__(self):
+    self.configure_and_run()
+
+  def __exit__(self, exc_type, exc_val, exc_tb):
+    self.stop()
+
+  def run(self):
+    # Don't fetch logs or adjust timing: just ping the watchdog.
+    #
+    # If we hit an exception, reset our session as it is likely broken.
+    while self._running:
+      try:
+        self._worker_manager.ping(request=None)
+        time.sleep(self.ping_interval)
+      except errors.OpError as e:
+        # Catch any TF errors that occur so we don't stop sending heartbeats
+        logging.debug('Caught error while sending heartbeat: %s', e)
+        self._reset_manager()
+
+
+def start_worker_watchdog(session,
+                          devices=None,
+                          ping_interval=60,
+                          shutdown_timeout=3600):
+  """Start global worker watchdog to shutdown workers on coordinator exit."""
+  global _WATCHDOG
+  if _WATCHDOG is None:
+    # Ensure we can send a few pings before we timeout!
+    ping_interval = min(shutdown_timeout / 10., ping_interval)
+    _WATCHDOG = WatchdogManager(session, devices, ping_interval,
+                                shutdown_timeout)
+    _WATCHDOG.configure_and_run()
+
+
+class GracefulShutdownHook(session_run_hook.SessionRunHook):
+  """Session hook that watches for shutdown events.
+
+  If a shutdown is indicated, `saver.save(checkpoint_prefix)` is executed, and a
+  SystemShutdown exception is raised to terminate the main session.  If `saver`
+  is None the `SAVERS` collection will be read to find a saver.
+
+  `on_shutdown_hooks` is an optional list of functions that should be called
+  after checkpointing.  The function is called with (`run_context`,
+  `all_workers`, `lame_workers`).
+
+  If `heartbeat_group` is not specified, it will default to all CPU workers
+  in the system.
+  """
+
+  def __init__(self, checkpoint_prefix, saver=None, on_shutdown_hooks=None):
+    self._saver = saver
+    self._checkpoint_prefix = checkpoint_prefix
+    self._on_shutdown_hooks = on_shutdown_hooks if on_shutdown_hooks else []
+
+    # Worker heartbeats are managed independently of the main training graph.
+    self._graph = ops.Graph()
+    self._workers = None
+    self._session = None
+    self._heartbeat_supported = False
+
+  def after_create_session(self, training_session, coord):  # pylint: disable=unused-argument
+    # N.B. We have to pull the global step here to avoid it being unavailable
+    # at checkpoint time; the graph has been frozen at that point.
+    if training_util.get_global_step() is None and self.saver() is not None:
+      raise ValueError(
+          'Saver defined but no global step.  Run `get_or_create_global_step()`'
+          ' in your model definition to allow checkpointing.')
+
+    with self._graph.as_default():
+      logging.info('Installing graceful shutdown hook.')
+      self._session = _clone_session(training_session, self._graph)
+      self._workers = WorkerHeartbeatManager.from_devices(
+          self._session, all_worker_devices(self._session))
+      self._heartbeat_supported = self._workers.num_workers() > 0
+      if self._heartbeat_supported:
+        self._workers.configure(
+            event_pb2.WorkerHeartbeatRequest(
+                shutdown_mode=event_pb2.WAIT_FOR_COORDINATOR))
+      else:
+        logging.warn(
+            'No workers support hearbeats. Failure handling will be disabled.')
+
+  def saver(self):
+    if self._saver:
+      return self._saver
+
+    savers = ops.get_collection(ops.GraphKeys.SAVERS)
+    if not savers:
+      return None
+
+    if not isinstance(savers, list):
+      return savers
+
+    if len(savers) > 1:
+      logging.error(
+          'Multiple savers in the SAVERS collection.  On-demand checkpointing '
+          'will be disabled. Pass an explicit `saver` to the constructor to '
+          'override this behavior.')
+      return None
+
+    return savers[0]
+
+  def after_run(self, run_context, run_values):
+    del run_values
+
+    if not self._heartbeat_supported:
+      return
+
+    lame_workers = self._workers.lame_workers()
+    if lame_workers:
+      logging.info('ShutdownHook: lame workers found: %s', lame_workers)
+
+      if self.saver():
+        logging.info('ShutdownHook: saving checkpoint to %s',
+                     self._checkpoint_prefix)
+        self.saver().save(
+            run_context.session,
+            self._checkpoint_prefix,
+            global_step=training_util.get_global_step(),
+            write_state=True,
+        )
+      else:
+        logging.info('ShutdownHook: no Saver defined.')
+
+      for fn in self._on_shutdown_hooks:
+        fn(run_context, self._workers, lame_workers)
+
+
+class RestartComputation(object):
+  """Restart the entire computation.
+
+  This hook shuts down all workers and returns control to the top-level by
+  throwing a CoordinatorShutdownException.
+  """
+
+  def __init__(self, timeout_ms=10000):
+    self.timeout_ms = timeout_ms
+
+  def __call__(self, run_context, all_workers, lame_workers):
+    del run_context, lame_workers
+    all_workers.shutdown(timeout_ms=self.timeout_ms)
+
+    logging.info('Terminating coordinator.')
+    raise CoordinatorShutdownException()
+
+
+class ShutdownLameWorkers(object):
+  """Shutdown lamed workers.
+
+  Processing will continue normally (typically by waiting for the down
+  workers to be restarted).
+  """
+
+  def __init__(self, timeout_ms=10000):
+    self.timeout_in_ms = timeout_ms
+
+  def __call__(self, run_context, all_workers, lame_workers):
+    lame_workers.shutdown(timeout_ms=self.timeout_in_ms)

diff --git a/tensorflow/python/tpu/tensor_tracer.py b/tensorflow/python/tpu/tensor_tracer.py
new file mode 100644
index 0000000..6d41d87
--- /dev/null
+++ b/tensorflow/python/tpu/tensor_tracer.py

@@ -0,0 +1,1638 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========================================================================
+"""A utility to trace tensor values on TPU."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import os.path
+import re
+import sys
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import graph_io
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import linalg_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.tpu import tpu
+from tensorflow.python.tpu.ops import tpu_ops
+
+_TRACER_LOG_PREFIX = ' [>>>TT>>>]'
+_DEVICE_TYPE_TPU = 'tpu'
+_DEVICE_TYPE_CPU = 'cpu'
+_TRACE_MODE_NAN_INF = 'nan-inf'
+_TRACE_MODE_PART_TENSOR = 'part-tensor'
+_TRACE_MODE_PART_TENSOR_SIZE = 3
+_TRACE_MODE_FULL_TENSOR = 'full-tensor'
+_TRACE_MODE_NORM = 'norm'
+_TRACE_MODE_MAX_ABS = 'max-abs'
+_SUBMODE_BRIEF = 'brief'
+_SUBMODE_DETAILED = 'detailed'
+_REASON_OUTSIDE_OP_RANGE = 'not-traced-outside-op-range'
+_REASON_UNSAFE_OP = 'not-traced-unsafe-op'
+_REASON_WHILELOOP_OP = 'not-traced-special-whileloop-op'
+_REASON_UNSAFE_SCALAR = 'not-traced-unsafe-scalar'
+_REASON_LESS_INTERESTING_OP = 'not-traced-less-interesting-op'
+_REASON_DEVICE_MISMATCH = 'not-traced-device-mismatch'
+_REASON_DYNAMIC_SHAPE = 'not-traced-dynamic-shape'
+_REASON_SCALAR_GET_TRACED = 'traced-scalar'
+_REASON_TENSOR_GET_TRACED = 'traced-tensor'
+_REASON_USER_INCLUDED = 'traced-user-included'
+_REASON_USER_EXCLUDED = 'not-traced-user-excluded'
+_REASON_NOT_EXECUTED = 'not-traced-not-in-exec-path'
+_REASON_NON_NUMERIC_TENSOR = 'not-traced-non-numeric-tensor'
+_REASON_FEEDS_WHILELOOP_OP = 'not-traced-feeds-special-whileloop-op'
+_MARKER_SECTION_BEGIN = '!!!!!!! section-begin:'
+_MARKER_SECTION_END = '!!!!!!! section-end:'
+_SECTION_NAME_CONFIG = 'configuration'
+_SECTION_NAME_REASON = 'reason'
+_SECTION_NAME_OP_LIST = 'op-list'
+_SECTION_NAME_TENSOR_LIST = 'tensor-list'
+_SECTION_NAME_CACHE_INDEX_MAP = 'cache-index-map'
+_SECTION_NAME_GRAPH = 'graph'
+_FIELD_NAME_VERSION = 'version:'
+_FIELD_NAME_DEVICE = 'device:'
+_FIELD_NAME_TRACE_MODE = 'trace-mode:'
+_FIELD_NAME_SUBMODE = 'submode:'
+_FIELD_NAME_NUM_REPLICAS = 'num-replicas:'
+_FIELD_NAME_NUM_REPLICAS_PER_HOST = 'num-replicas-per-host:'
+_FIELD_NAME_NUM_HOSTS = 'num-hosts:'
+_FIELD_NAME_NUM_OPS = 'number-of-ops:'
+_FIELD_NAME_NUM_TENSORS = 'number-of-tensors:'
+_FIELD_NAME_NUM_CACHE_INDICES = 'number-of-indices:'
+_FIELD_NAME_TOPOLOGICAL_SORT_SUCCEED = 'topological-sort-succeed:'
+_FLAGS_ENV_VAR = 'TENSOR_TRACER_FLAGS'
+_FLAG_SINGLE_QUOTE_PAT = re.compile(r"\s*--([^=]+)='([^']*)'")
+_FLAG_DOUBLE_QUOTE_PAT = re.compile(r'\s*--([^=]+)="([^"]*)"')
+_FLAG_NO_QUOTE_PAT = re.compile(r'\s*--([^=]+)=(\S*)')
+_FLAG_NO_EQUAL_PAT = re.compile(r'\s*--([^=]+)\s*')
+_FLAG_NAME_ENABLE = 'enable'
+_FLAG_NAME_TRACE_MODE = 'trace_mode'
+_FLAG_NAME_USE_COMPACT_TRACE = 'compact_trace'
+_FLAG_NAME_SUBMODE = 'submode'
+_FLAG_NAME_INCLUDE_LESS_INTERESTING_OPS = 'include_less_interesting_ops'
+_FLAG_NAME_EXCLUDED_OPNAMES = 'excluded_opnames'
+_FLAG_NAME_EXCLUDED_OPTYPES = 'excluded_optypes'
+_FLAG_NAME_INCLUDED_OPNAMES = 'included_opnames'
+_FLAG_NAME_INCLUDED_OPTYPES = 'included_optypes'
+_FLAG_NAME_TRACE_DIR = 'trace_dir'
+_FLAG_NAME_REPORT_FILE = 'report_file'
+_FLAG_NAME_USE_TEST_UNDECLARED_OUTPUTS_DIR = 'use_test_undeclared_outputs_dir'
+_FLAG_NAME_OP_RANGE = 'op_range'
+# Folder to dump the pre (before tensor tracer updates) and post graphs (after
+# tensor tracer updates).
+_FLAG_DUMP_BEFORE_AFTER_GRAPHS = 'dump_graphs'
+_OP_RANGE_PAT = re.compile(r'(\d+):(\d+)')
+_OUTPUT_STREAM_ESCAPE = 'file://'
+_TEST_UNDECLARED_OUTPUTS_DIR_ENV_VAR = 'TEST_UNDECLARED_OUTPUTS_DIR'
+_TENSOR_TRACER_COLLECTION = 'tensor_tracer_variables'
+_TENSOR_TRACER_CHECKPOINT = 'tensor_tracer_checkpoint'
+_TRACE_FILE_NAME = 'trace.all'
+_COMPACT_TRACE_FILE_PREFIX = 'compact_trace.'
+_COMPACT_TRACE_ENTRY_INIT_VALUE = -1.0
+_TENSOR_TRACER_STORAGE = 'tensor_tracer_storage'
+_TENSOR_VALUES_CACHE = 'tensor_values_cache'
+_REPLICA_ID_TAG = '#replica-id: '
+
+
+def tensor_tracepoint(tensor, checkpoint_name):
+  """Adds a checkpoint with the given checkpoint name for the given tensor.
+
+  The tensor will be added to the list of tensors that will be traced by the
+  tensor tracer.
+
+  Args:
+     tensor: the tensor object for which the tracing is requested.
+     checkpoint_name: a string name for the checkpoint. This name has to be a
+     unique name if used within model comparison. The tensors that have the same
+     checkpoint identifier is compared in model comparison.
+  Returns:
+    The provided tensor.
+  """
+
+  tensor.graph.get_collection(_TENSOR_TRACER_COLLECTION)
+  tensor.graph.add_to_collection(_TENSOR_TRACER_COLLECTION,
+                                 (tensor, checkpoint_name))
+  return tensor
+
+
+def keras_layer_tracepoint(layer, checkpoint_name):
+  """An interface for adding the tensor outputs of a keras layer.
+
+  Encapsulates tensor_tracepoint.
+
+  Args:
+     layer: A keras layer.
+     checkpoint_name: a string name for the checkpoint. This name has to be a
+     unique name if used within model comparison. The tensors that have the same
+     checkpoint identifier is compared in model comparison.
+
+  Returns:
+    The provided layer.
+  """
+  try:
+    outputs = layer.output
+    if tensor_util.is_tensor(outputs):
+      tensor_tracepoint(outputs, '%s' % (checkpoint_name))
+    else:
+      idx = 0
+      for output_tensor in outputs:
+        if tensor_util.is_tensor(outputs):
+          tensor_tracepoint(output_tensor, '%s_%d' % (checkpoint_name, idx))
+        idx += 1
+  except AttributeError:
+    pass
+  except RuntimeError:
+    pass
+  return layer
+
+
+def _trace_files_need_precreated(output_dir):
+  """Return True if trace files must be pre-created by users."""
+
+  if not output_dir.startswith('/'):
+    return False
+  if len(output_dir) < 5:
+    return False
+  if output_dir[2] != 'n':
+    return False
+  if output_dir[3] != 's':
+    return False
+  if output_dir[1] != 'c':
+    return False
+  if output_dir[4] != '/':
+    return False
+  return True
+
+
+def _get_tensor_values_cache(graph=None):
+  """Returns the variable that implements tensor-value caching."""
+
+  graph = graph or ops.get_default_graph()
+  collection = graph.get_collection(_TENSOR_TRACER_STORAGE)
+  if len(collection) == 1:
+    return collection[0]
+  elif not collection:
+    raise RuntimeError('%s has not been created'%_TENSOR_VALUES_CACHE)
+  else:
+    raise RuntimeError('Multiple %s created'%_TENSOR_VALUES_CACHE)
+  return None
+
+
+def _create_tensor_values_cache(graph, num_tensors):
+  """Creates a variable as the cache to store intermediate tensor values."""
+  graph = graph or ops.get_default_graph()
+  # Create in proper graph and base name_scope.
+  with graph.as_default() as g, g.name_scope(None):
+    return variable_scope.get_variable(
+        _TENSOR_VALUES_CACHE,
+        shape=[num_tensors],
+        dtype=dtypes.float32,
+        initializer=init_ops.constant_initializer(
+            _COMPACT_TRACE_ENTRY_INIT_VALUE),
+        trainable=False,
+        use_resource=True,
+        collections=[_TENSOR_TRACER_STORAGE, ops.GraphKeys.GLOBAL_VARIABLES])
+
+
+class TensorTracer(object):
+  """A software construct for tracing tensor values in a TF graph on TPU.
+
+  This utility is disabled by default. It can be enabled by setting
+  the TENSOR_TRACER_FLAGS env variable as:
+    export TENSOR_TRACER_FLAGS="--enable=1"
+  If it is enabled, it will trace the output tensor values of
+  selected Ops in the graph. It has two outputs: (1) the traces and (2)
+  a report. The traces are dumped to a specified local file on the TPU
+  host. The report is printed to the log.info of the TPU job.
+  By passing options via the env variable, users can change:
+     (1) the trace mode (e.g., detecting NaN/Inf, printing partial or
+         full tensor values)
+     (2) which Ops to be traced (via op.name or op.type)
+     (3) output trace file path.
+  """
+  # The set of graphs that are rewritten by tensor tracer.
+  _traced_graphs = set()
+  @staticmethod
+  def _match_next_flag(flags, pos):
+    """Returns the match for the next TensorTracer flag.
+
+    Args:
+       flags: a string that contains the flags.
+       pos: where in flags to start the search.
+
+    Returns:
+       A pair where the first element is the regular-expression
+       match found and the second element indicates if the match
+       has a value.
+    """
+
+    match = _FLAG_DOUBLE_QUOTE_PAT.match(flags, pos)
+    if match:
+      return match, True
+    match = _FLAG_SINGLE_QUOTE_PAT.match(flags, pos)
+    if match:
+      return match, True
+    match = _FLAG_NO_QUOTE_PAT.match(flags, pos)
+    if match:
+      return match, True
+    match = _FLAG_NO_EQUAL_PAT.match(flags, pos)
+    if match:
+      # The flag is found but is not given a value.
+      return match, False
+    # The flag is not found.
+    return None, False
+
+  @staticmethod
+  def validate_flag_names():
+    """Validates if the TensorTrace flags passed are valid."""
+    valid_flag_names = [_FLAG_NAME_ENABLE, _FLAG_NAME_TRACE_MODE,
+                        _FLAG_NAME_USE_COMPACT_TRACE,
+                        _FLAG_NAME_SUBMODE,
+                        _FLAG_NAME_EXCLUDED_OPNAMES,
+                        _FLAG_NAME_EXCLUDED_OPTYPES,
+                        _FLAG_NAME_INCLUDED_OPNAMES,
+                        _FLAG_NAME_INCLUDED_OPTYPES,
+                        _FLAG_NAME_TRACE_DIR,
+                        _FLAG_NAME_REPORT_FILE,
+                        _FLAG_NAME_USE_TEST_UNDECLARED_OUTPUTS_DIR,
+                        _FLAG_NAME_INCLUDE_LESS_INTERESTING_OPS,
+                        _FLAG_NAME_OP_RANGE,
+                        _FLAG_DUMP_BEFORE_AFTER_GRAPHS]
+    tensor_tracer_flags = os.environ.get(_FLAGS_ENV_VAR)
+    if not tensor_tracer_flags:
+      return
+    pos = 0
+    while True:
+      match, _ = TensorTracer._match_next_flag(tensor_tracer_flags, pos)
+      if not match:
+        break
+      flag_name = match.group(1)
+      if flag_name not in valid_flag_names:
+        raise ValueError(
+            'The flag name "%s" passed via the environment variable "%s" '
+            'is invalid. Valid flag names are:'
+            '\n%s'%(flag_name, _FLAGS_ENV_VAR, valid_flag_names))
+      pos = match.end()
+
+  @staticmethod
+  def print_flag_values():
+    """Prints all TensorTracer flags passed via environment variables."""
+
+    tensor_tracer_flags = os.environ.get(_FLAGS_ENV_VAR)
+    if not tensor_tracer_flags:
+      return 'Env variable "%s" is not set'%_FLAGS_ENV_VAR
+    result = 'Env variable "%s" is set to "%s"\n'%(_FLAGS_ENV_VAR,
+                                                   tensor_tracer_flags)
+    result += 'Individual flag value:\n'
+    pos = 0
+    while True:
+      match, has_value = TensorTracer._match_next_flag(
+          tensor_tracer_flags, pos)
+      if not match:
+        break
+      flag_name = match.group(1)
+      if has_value:
+        flag_value = match.group(2)
+      else:
+        flag_value = None
+      result += '  %s: %s\n'%(flag_name, flag_value)
+      pos = match.end()
+    result += '\n'
+    return result
+
+  @staticmethod
+  def get_flag_value(wanted_flag_name):
+    """Returns the value of a TensorTracer flags.
+
+    Args:
+      wanted_flag_name: the name the the flag we are looking for.
+
+    Returns:
+      A pair where the first element indicates if the flag is
+      found and the second element is the value of the flag.
+
+    Raises:
+      RuntimeError: If supposedly deadcode is reached.
+    """
+
+    tensor_tracer_flags = os.getenv(_FLAGS_ENV_VAR)
+    if not tensor_tracer_flags:
+      return False, None
+    pos = 0
+    while True:
+      match, has_value = TensorTracer._match_next_flag(
+          tensor_tracer_flags, pos)
+      if not match:
+        return False, None
+      flag_name = match.group(1)
+      if has_value:
+        flag_value = match.group(2)
+      else:
+        flag_value = None
+      if flag_name == wanted_flag_name:
+        return True, flag_value
+      pos = match.end()
+    raise RuntimeError('Should not reach here.')
+
+  @staticmethod
+  def flag_value_to_re_list(flag_name):
+    """Converts list of strings to compiled RE."""
+
+    re_list = []
+    found, flag_value = TensorTracer.get_flag_value(flag_name)
+    if not found or not flag_value:
+      return re_list
+    list_of_values = flag_value.split()
+    for v in list_of_values:
+      r = re.compile(v)
+      re_list.append(r)
+    return re_list
+
+  @staticmethod
+  def _is_flag_on(flag_name):
+    """Returns True if the given flag is on."""
+
+    found, flag_value = TensorTracer.get_flag_value(flag_name)
+    if not found:
+      return False
+    if flag_value is None:
+      return True
+    # Depends on the flag value.
+    flag_value = flag_value.lower()
+    enabled = flag_value in ['1', 't', 'true', 'y', 'yes']
+    return enabled
+
+  @staticmethod
+  def is_enabled():
+    """Returns True if TensorTracer is enabled."""
+
+    return TensorTracer._is_flag_on(_FLAG_NAME_ENABLE)
+
+  @staticmethod
+  def use_test_undeclared_outputs_dir():
+    """Decides the output directory of the report and trace files.
+
+    Args:
+       None.
+
+    Returns:
+       True if the output files should be written to the
+       test-undeclared-outputs-directory defined via an
+       env variable.
+    """
+
+    return TensorTracer._is_flag_on(
+        _FLAG_NAME_USE_TEST_UNDECLARED_OUTPUTS_DIR)
+
+  @staticmethod
+  def use_compact_trace():
+    return TensorTracer._is_flag_on(
+        _FLAG_NAME_USE_COMPACT_TRACE)
+
+  @staticmethod
+  def check_device_type(device_type):
+    """Checks if the given device type is valid."""
+
+    if device_type not in [_DEVICE_TYPE_TPU, _DEVICE_TYPE_CPU]:
+      raise ValueError('Invalid device_type "%s"'%device_type)
+
+  @staticmethod
+  def check_trace_mode(trace_mode):
+    """Checks if the given trace mode is valid."""
+
+    valid_trace_modes = [_TRACE_MODE_NAN_INF, _TRACE_MODE_PART_TENSOR,
+                         _TRACE_MODE_FULL_TENSOR, _TRACE_MODE_NORM,
+                         _TRACE_MODE_MAX_ABS]
+    if trace_mode not in valid_trace_modes:
+      raise ValueError('Invalid trace mode "%s" given to the Tensor_Tracer.'
+                       'Valid trace modes are: %s'%(trace_mode,
+                                                    valid_trace_modes))
+
+  @staticmethod
+  def check_submode(submode):
+    """Checks if the given submode is valid."""
+
+    if not submode:
+      return
+    valid_submodes = [_SUBMODE_DETAILED, _SUBMODE_BRIEF]
+    if submode not in valid_submodes:
+      raise ValueError('Invalid submode "%s" given to the Tensor_Tracer.'
+                       'Valid submodes are: %s'%(submode,
+                                                 valid_submodes))
+
+  @staticmethod
+  def loop_cond_op(op):
+    return op.type in ('LoopCond', 'RefLoopCond')
+
+  @staticmethod
+  def while_loop_op(op):
+    """Returns true if op is one of the special ops of in a while loop.
+
+    Args:
+       op: A tf.Operation.
+
+    Returns:
+       True if the given op is one of [Switch, Merge, Enter, Exit,
+       NextIteration, LoopCond], which are all building blocks for TF while
+       loops.
+    """
+    return  (control_flow_util.IsLoopSwitch(op) or
+             control_flow_util.IsLoopMerge(op) or
+             control_flow_util.IsLoopEnter(op) or
+             control_flow_util.IsLoopExit(op) or
+             TensorTracer.loop_cond_op(op) or
+             op.type in ('RefNextIteration', 'NextIteration'))
+
+  @staticmethod
+  def unsafe_op(op):
+    """Returns True if this op is not safe to be traced."""
+
+    if control_flow_util.IsInCond(op):
+      return True
+    # Reasons for not including following op types:
+    #    Assign: cause incorrect result with CPU tracing.
+    if op.type in ['Assign']:
+      return True
+    return False
+
+  @staticmethod
+  def device_mismatch(device_type, op):
+    if device_type == _DEVICE_TYPE_TPU:
+      # pylint: disable=protected-access
+      return tpu._TPU_REPLICATE_ATTR not in op.node_def.attr
+      # pylint: enable=protected-access
+    return False
+
+  @staticmethod
+  def unsafe_scalar_trace(op):
+    """Return true if scalar output tensor from Op is not safe to be traced."""
+
+    # Tracing the following causes cycle in the graph on TPU.
+    if op.type in ['LoopCond', 'Enter', 'Merge', 'Const',
+                   'Switch', 'Less', 'ReadVariableOp']:
+      return True
+    # Tracing the following will cause casting-issue
+    # with the norm tracing mode or other compilation issues on CPU.
+    if op.type in ['VarHandleOp', 'IteratorToStringHandle',
+                   'IteratorGetNext', 'OneShotIterator',
+                   'IteratorV2', 'MakeIterator',
+                   'BatchDatasetV2', 'MapDataset',
+                   'FixedLengthRecordDataset', 'TakeDataset', 'ZipDataset',
+                   'Placeholder', 'PlaceholderWithDefault', 'StridedSlice']:
+      return True
+    return False
+
+  @staticmethod
+  def less_interesting_op(op):
+    """Returns True if the given Op is not an interesting one to be traced."""
+
+    found, _ = TensorTracer.get_flag_value(
+        _FLAG_NAME_INCLUDE_LESS_INTERESTING_OPS)
+    if found:
+      # users force to include all ops.
+      return False
+    # Following ops are highly unlikey to cause bugs.
+    return op.type in ['Const', 'Identity', 'Cast', 'Shape']
+
+  @staticmethod
+  def reason(op_idx, details):
+    """Returns reason why the Op at op_idx is traced or not."""
+
+    return '%d %s'%(op_idx, details)
+
+  @staticmethod
+  def topological_sort(g):
+    """Performs topological sort on the given graph.
+
+    Args:
+       g: the graph.
+
+    Returns:
+       A pair where the first element indicates if the topological
+       sort succeeded (True if there is no cycle found; False if a
+       cycle is found) and the second element is either the sorted
+       list of nodes or the cycle of nodes found.
+    """
+
+    def visit(op, cycle, permanently_marked_ops,
+              temporarily_marked_ops, sorted_ops):
+      """Recursively visits all Ops in a graph.
+
+      Args:
+         op: the current Op being visited.
+         cycle: a cycle of Ops found.
+         permanently_marked_ops: the set of Ops that were already visited.
+         temporarily_marked_ops: the set of Ops that we have visited during
+                                 the current descent.
+         sorted_ops: the list of Ops sorted in topological order.
+      """
+
+      if cycle:
+        return
+      if op in permanently_marked_ops:
+        return
+      if op in temporarily_marked_ops:
+        cycle = temporarily_marked_ops
+        return
+      temporarily_marked_ops.add(op)
+      for i in range(len(op.outputs)):
+        out_tensor = op.outputs[i]
+        for consumer_op in out_tensor.consumers():
+          visit(consumer_op, cycle, permanently_marked_ops,
+                temporarily_marked_ops, sorted_ops)
+      # pylint: disable=protected-access
+      for ctrl_output_op in op._control_outputs:
+        # pylint: enable=protected-access
+        visit(ctrl_output_op, cycle, permanently_marked_ops,
+              temporarily_marked_ops, sorted_ops)
+      temporarily_marked_ops.remove(op)
+      permanently_marked_ops.add(op)
+      sorted_ops.insert(0, op)
+
+    graph_cycle = set([])
+    sorted_ops = []
+    permanently_marked_ops = set([])
+    temporarily_marked_ops = set([])
+    unsorted_ops = g.get_operations()
+    for op in unsorted_ops:
+      visit(op, graph_cycle, permanently_marked_ops,
+            temporarily_marked_ops, sorted_ops)
+    if graph_cycle:
+      return (False, graph_cycle)
+    else:
+      assert len(unsorted_ops) == len(sorted_ops)
+      return (True, sorted_ops)
+
+  @staticmethod
+  def _make_op_and_tensor_maps(op_list):
+    """Creates various maps and lists from op_list.
+
+    Args:
+       op_list: a list of Ops
+
+    Returns:
+       opname_idx_map: a map from Op's name to its index in op_list.
+       tensor_list: a list of output tensors of the Ops in op_list.
+       tensorname_idx_map: a map from output tensor name to its index
+                           in tensor_list.
+    """
+
+    opname_idx_map = {}
+    tensor_list = []
+    tensorname_idx_map = {}
+    for op_id, op in enumerate(op_list):
+      if op.name in opname_idx_map:
+        raise ValueError('Duplicated Op name: %s'%op.name)
+      opname_idx_map[op.name] = op_id
+      for output_tensor in op.outputs:
+        if output_tensor.name not in tensorname_idx_map:
+          tensor_list.append(output_tensor)
+          tensorname_idx_map[output_tensor.name] = len(tensor_list)-1
+    return (opname_idx_map, tensor_list, tensorname_idx_map)
+
+  def __init__(self):
+    """Initializes a TensorTracer.
+
+    Sets the various member fields from the flags (if given) or the defaults.
+    """
+    self._version = 'use-outside-compilation'
+    self._device_type = None
+    TensorTracer.validate_flag_names()
+    found, self._trace_mode = TensorTracer.get_flag_value(_FLAG_NAME_TRACE_MODE)
+    if not found or not self._trace_mode:
+      self._trace_mode = _TRACE_MODE_NAN_INF
+    TensorTracer.check_trace_mode(self._trace_mode)
+    found, self._submode = TensorTracer.get_flag_value(_FLAG_NAME_SUBMODE)
+    if not found or not self._submode:
+      self._submode = _SUBMODE_DETAILED
+    TensorTracer.check_submode(self._submode)
+    self._part_tensor_size = _TRACE_MODE_PART_TENSOR_SIZE
+    self._instrument_records = {}
+    self._set_trace_dir()
+    self._set_report_file()
+    self._set_op_range()
+    self._set_excluded_opnames()
+    self._set_excluded_optypes()
+    self._set_included_opnames()
+    self._set_included_optypes()
+    self._num_replicas = None
+    self._num_replicas_per_host = None
+    self._num_hosts = None
+    self._replica_id = None
+    _, self._graph_dump_path = TensorTracer.get_flag_value(
+        _FLAG_DUMP_BEFORE_AFTER_GRAPHS)
+
+  def _add_replica_id_to_graph(self):
+    """Adds nodes for computing the replica ID to the graph."""
+
+    if self._num_replicas:
+      with ops.control_dependencies(None):
+        # Uses None as dependency to run outside of TPU graph rewrites.
+        self._replica_id = tpu_ops.tpu_replicated_input(
+            list(range(self._num_replicas)),
+            name='tt_replica_id')
+    else:
+      self._replica_id = 'unknown'
+
+  def _set_trace_dir(self):
+    found, self._trace_dir = TensorTracer.get_flag_value(_FLAG_NAME_TRACE_DIR)
+    if found and self._trace_dir \
+       and TensorTracer.use_test_undeclared_outputs_dir():
+      raise ValueError('Cannot not use --%s and --%s at the same time'
+                       %(_FLAG_NAME_TRACE_DIR,
+                         _FLAG_NAME_USE_TEST_UNDECLARED_OUTPUTS_DIR))
+    if TensorTracer.use_test_undeclared_outputs_dir():
+      self._trace_dir = os.environ.get(_TEST_UNDECLARED_OUTPUTS_DIR_ENV_VAR)
+
+  def _set_report_file(self):
+    """Sets the path of the output report file."""
+
+    found, self._report_file_path = TensorTracer.get_flag_value(
+        _FLAG_NAME_REPORT_FILE)
+    if found and self._report_file_path \
+       and TensorTracer.use_test_undeclared_outputs_dir():
+      if os.path.isabs(self._report_file_path):
+        raise ValueError('If use_test_undeclared_outputs_dir is set,'
+                         'report_file_path cannot be an absolute path (%s)'
+                         %self._report_file_path)
+      outputs_dir = os.environ.get(_TEST_UNDECLARED_OUTPUTS_DIR_ENV_VAR)
+      self._report_file_path = os.path.join(outputs_dir,
+                                            self._report_file_path)
+    if not self._report_file_path:
+      self._report_file = None
+      return
+    try:
+      self._report_file = gfile.Open(self._report_file_path, 'w')
+    except IOError as e:
+      raise e
+
+  def _close_report_file(self):
+    if self._report_file:
+      self._report_file.close()
+
+  def _set_op_range(self):
+    """Sets the index range of the Ops that we will consider tracing."""
+
+    found, op_range = TensorTracer.get_flag_value(_FLAG_NAME_OP_RANGE)
+    if not found or not op_range:
+      self._op_range = (-1, -1)  # this means including all ops.
+      return
+    match = _OP_RANGE_PAT.match(op_range)
+    if not match:
+      self._op_range = (-1, -1)  # this means including all ops.
+      return
+    self._op_range = (int(match.group(1)), int(match.group(2)))
+
+  def _inside_op_range(self, idx):
+    """Return True if the given index is inside the selected range."""
+
+    if idx < self._op_range[0]:
+      return False
+    return self._op_range[1] < 0 or idx <= self._op_range[1]
+
+  def _set_excluded_opnames(self):
+    self._excluded_opname_re_list = TensorTracer.flag_value_to_re_list(
+        _FLAG_NAME_EXCLUDED_OPNAMES)
+
+  def _set_excluded_optypes(self):
+    self._excluded_optype_re_list = TensorTracer.flag_value_to_re_list(
+        _FLAG_NAME_EXCLUDED_OPTYPES)
+
+  def _set_included_opnames(self):
+    self._included_opname_re_list = TensorTracer.flag_value_to_re_list(
+        _FLAG_NAME_INCLUDED_OPNAMES)
+
+  def _set_included_optypes(self):
+    self._included_optype_re_list = TensorTracer.flag_value_to_re_list(
+        _FLAG_NAME_INCLUDED_OPTYPES)
+
+  def _is_user_included_op(self, op):
+    for opname_re in self._included_opname_re_list:
+      if opname_re.match(op.name):
+        return True
+    for optype_re in self._included_optype_re_list:
+      if optype_re.match(op.type):
+        return True
+    return False
+
+  def _is_user_excluded_op(self, op):
+    for opname_re in self._excluded_opname_re_list:
+      if opname_re.match(op.name):
+        return True
+    for optype_re in self._excluded_optype_re_list:
+      if optype_re.match(op.type):
+        return True
+    return False
+
+  def _use_tensor_values_cache(self):
+    """Returns True if immediate tensors should be first saved to a cache."""
+
+    if self._trace_mode not in set([_TRACE_MODE_NAN_INF,
+                                    _TRACE_MODE_NORM, _TRACE_MODE_MAX_ABS]):
+      return False
+    if self._trace_dir and _trace_files_need_precreated(self._trace_dir):
+      return True
+    if TensorTracer.use_compact_trace():
+      return True
+    return False
+
+  def _save_tensor_value_to_cache_op(self, graph, cache_idx, updates):
+    """Returns an Op that will save the given updates to an entry in the cache."""
+
+    cache = _get_tensor_values_cache(graph)
+    indices = constant_op.constant([cache_idx])
+    return state_ops.scatter_update(cache, indices, updates).op
+
+  def _write_report(self, content):
+    """Writes the given content to the report."""
+
+    line = '%s %s'%(_TRACER_LOG_PREFIX, content)
+    if self._report_file:
+      self._report_file.write(line)
+    else:
+      logging.info(line)
+
+  def _write_config_section(self):
+    """Writes the config section of the report."""
+
+    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN, _SECTION_NAME_CONFIG))
+    self._write_report('%s %s\n'%(_FIELD_NAME_VERSION, self._version))
+    self._write_report('%s %s\n'%(_FIELD_NAME_DEVICE, self._device_type))
+    self._write_report('%s %s\n'%(_FIELD_NAME_TRACE_MODE, self._trace_mode))
+    self._write_report('%s %s\n'%(_FIELD_NAME_SUBMODE, self._submode))
+    self._write_report('%s %s\n'%(_FIELD_NAME_NUM_REPLICAS, self._num_replicas))
+    self._write_report('%s %s\n'%(_FIELD_NAME_NUM_REPLICAS_PER_HOST,
+                                  self._num_replicas_per_host))
+    self._write_report('%s %s\n'%(_FIELD_NAME_NUM_HOSTS, self._num_hosts))
+    self._write_report('%s %s\n'%(_MARKER_SECTION_END, _SECTION_NAME_CONFIG))
+
+  def _write_reason_section(self):
+    """Writes the reason section of the report."""
+
+    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN, _SECTION_NAME_REASON))
+    for key in sorted(self._instrument_records):
+      self._write_report('"%s" %s\n'%(key, self._instrument_records[key]))
+    self._write_report('%s %s\n'%(_MARKER_SECTION_END, _SECTION_NAME_REASON))
+
+  def _write_op_list_section(self, op_list):
+    """Writes the Op-list section of the report."""
+
+    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN, _SECTION_NAME_OP_LIST))
+    self._write_report('%s %d\n'%(_FIELD_NAME_NUM_OPS, len(op_list)))
+    for i in range(0, len(op_list)):
+      op = op_list[i]
+      line = '%d "%s" %s'%(i, op.name, op.type)
+      for out_tensor in op.outputs:
+        if out_tensor.name not in self._tensorname_idx_map:
+          raise ValueError(
+              'out_tensor %s is not in tensorname_idx_map'%out_tensor.name)
+        line += ' %d'%self._tensorname_idx_map[out_tensor.name]
+      line += '\n'
+      self._write_report(line)
+    self._write_report('%s %s\n'%(_MARKER_SECTION_END, _SECTION_NAME_OP_LIST))
+
+  def _write_tensor_list_section(self, tensor_list, opname_idx_map):
+    """Writes the tensor-list section of the report."""
+
+    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN,
+                                  _SECTION_NAME_TENSOR_LIST))
+    self._write_report('%s %d\n'%(_FIELD_NAME_NUM_TENSORS, len(tensor_list)))
+    for i in range(0, len(tensor_list)):
+      tensor = tensor_list[i]
+      line = '%d "%s"'%(i, tensor.name)
+      for consumer_op in tensor.consumers():
+        if consumer_op.name not in opname_idx_map:
+          raise ValueError(
+              'consumer_op %s is not in opname_idx_map'%consumer_op.name)
+        line += ' %d'%opname_idx_map[consumer_op.name]
+      line += '\n'
+      self._write_report(line)
+    self._write_report('%s %s\n'%(_MARKER_SECTION_END,
+                                  _SECTION_NAME_TENSOR_LIST))
+
+  def _write_cache_index_map_section(self):
+    """Writes the mapping from cache index to tensor index to the report."""
+
+    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN,
+                                  _SECTION_NAME_CACHE_INDEX_MAP))
+    self._write_report('%s %d\n'%(_FIELD_NAME_NUM_CACHE_INDICES,
+                                  len(self._cache_idx_to_tensor_idx)))
+    for cache_idx in range(0, len(self._cache_idx_to_tensor_idx)):
+      tensor_idx = self._cache_idx_to_tensor_idx[cache_idx]
+      line = '%d %d\n'%(cache_idx, tensor_idx)
+      self._write_report(line)
+    self._write_report('%s %s\n'%(_MARKER_SECTION_END,
+                                  _SECTION_NAME_CACHE_INDEX_MAP))
+
+  def _write_graph_section(self, succeed, sorted_or_cycle):
+    """Writes the graph section of the report."""
+
+    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN, _SECTION_NAME_GRAPH))
+    self._write_report('%s %s\n'%(_FIELD_NAME_TOPOLOGICAL_SORT_SUCCEED,
+                                  succeed))
+    l = list(sorted_or_cycle)
+    for i in range(0, len(l)):
+      self._write_report('%d "%s"\n'%(i, l[i].name))
+    self._write_report('%s %s\n'%(_MARKER_SECTION_END, _SECTION_NAME_GRAPH))
+
+  def _preprocess_traced_tensor(self, tensor):
+    """Computes NAN/Norm/Max on TPUs before sending to CPU.
+
+    Args:
+      tensor: The tensor to be traced.
+    Returns:
+      A tensor that should be input to the trace_function.
+    Raises:
+      RuntimeError: If the trace mode is invalid.
+    """
+
+    def _detect_nan_inf(tensor):
+      """Trace function for detecting any NaN/Inf in the tensor."""
+
+      if tensor.dtype.is_floating:
+        mask = math_ops.reduce_any(
+            gen_math_ops.logical_or(
+                gen_math_ops.is_nan(tensor), gen_math_ops.is_inf(tensor)))
+        output_tensor = control_flow_ops.cond(mask,
+                                              lambda: constant_op.constant(1.0),
+                                              lambda: constant_op.constant(0.0))
+      else:
+        output_tensor = constant_op.constant(0.0)
+      # The shape has to be 1. Set it if it does not have the information.
+      output_tensor = array_ops.reshape(output_tensor, [1])
+      return output_tensor
+
+    def _show_norm(tensor):
+      tensor = math_ops.cast(tensor, dtypes.float32)
+      output_tensor = linalg_ops.norm(tensor)
+      # The shape has to be 1. Set it if it does not have the information.
+      output_tensor = array_ops.reshape(output_tensor, [1])
+      return output_tensor
+
+    def _show_max_abs(tensor):
+      tensor = math_ops.cast(tensor, dtypes.float32)
+      output_tensor = math_ops.reduce_max(math_ops.abs(tensor))
+      zero = constant_op.constant(0, dtypes.float32)
+      output_tensor = gen_math_ops.maximum(zero, output_tensor)
+      # The shape has to be 1. Set it if it does not have the information.
+      output_tensor = array_ops.reshape(output_tensor, [1])
+      return output_tensor
+
+    if self._trace_mode == _TRACE_MODE_NAN_INF:
+      return _detect_nan_inf(tensor)
+    if self._trace_mode == _TRACE_MODE_PART_TENSOR:
+      return tensor
+    if self._trace_mode == _TRACE_MODE_FULL_TENSOR:
+      return tensor
+    if self._trace_mode == _TRACE_MODE_NORM:
+      return _show_norm(tensor)
+    if self._trace_mode == _TRACE_MODE_MAX_ABS:
+      return _show_max_abs(tensor)
+    raise RuntimeError(
+        'Tensor trace fun for %s is not yet implemented' % self._trace_mode)
+
+  def _make_tensor_trace_fun(self, tensor_name):
+    """Makes the tensor tracing function called by outside compilation.
+
+    Args:
+      tensor_name: name of the tensor being traced.
+
+    Returns:
+      A function to be passed as the first argument to outside compilation.
+
+    Raises:
+      RuntimeError: If the trace mode is invalid.
+    """
+
+    def _print_tensor(tensor_name, num_elements, tensor, output_tensor):
+      """Prints a tensor value to a file.
+
+      Args:
+        tensor_name: name of the tensor being traced.
+        num_elements: number of elements to print (-1 means print all).
+        tensor: the tensor needs to be returned.
+        output_tensor: the tensor needs to be printed.
+
+      Returns:
+        The same tensor passed via the "tensor" argument.
+
+      Raises:
+        ValueError: If tensor_name is not already in
+                    self._tensorname_idx_map.
+      """
+
+      if self._submode == _SUBMODE_BRIEF:
+        if tensor_name not in self._tensorname_idx_map:
+          raise ValueError(
+              'Tensor name %s is not in the tensorname_idx_map'%tensor_name)
+        msg = '%d'%self._tensorname_idx_map[tensor_name]
+      else:
+        msg = '"%s"'%tensor_name
+
+      if self._trace_dir:
+        output_path = os.path.join(self._trace_dir, _TRACE_FILE_NAME)
+        output_stream = _OUTPUT_STREAM_ESCAPE + output_path
+      else:
+        output_stream = sys.stderr
+      return logging_ops.print_v2(msg, array_ops.shape(output_tensor),
+                                  '@', self._replica_id,
+                                  '\n', output_tensor, '\n',
+                                  summarize=num_elements,
+                                  output_stream=output_stream)
+
+    def _show_part_tensor(tensor):
+      """Trace function for printing part of the tensor."""
+
+      return _print_tensor(tensor_name, self._part_tensor_size,
+                           tensor, tensor)
+
+    def _show_full_tensor(tensor):
+      """Trace function for printing the entire tensor."""
+
+      return _print_tensor(tensor_name, -1, tensor, tensor)
+
+    if self._trace_mode == _TRACE_MODE_PART_TENSOR:
+      return _show_part_tensor
+    # The input tensor has a shape of "[1]" for _TRACE_MODE_NAN_INF,
+    # _TRACE_MODE_NORM, and _TRACE_MODE_MAX_ABS, as related computations are
+    # performed within TPUs and only their results are transferred to CPU.
+    # Simply, print the full tensor for these trace modes.
+    if self._trace_mode in [
+        _TRACE_MODE_NAN_INF, _TRACE_MODE_NORM, _TRACE_MODE_FULL_TENSOR,
+        _TRACE_MODE_MAX_ABS
+    ]:
+      return _show_full_tensor
+
+    raise RuntimeError('Tensor trace fun for %s is not yet implemented'
+                       %self._trace_mode)
+
+  def _skip_op(self, op_id, op, user_included, user_excluded,
+               in_exec_path=True):
+    """Returns True if we should not trace Op."""
+
+    if TensorTracer.while_loop_op(op):
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_WHILELOOP_OP)
+      return True
+    if TensorTracer.unsafe_op(op):
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_UNSAFE_OP)
+      return True
+    if TensorTracer.device_mismatch(self._device_type, op):
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_DEVICE_MISMATCH)
+      return True
+    if not in_exec_path:
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_NOT_EXECUTED)
+      return True
+
+    if not self._inside_op_range(op_id):
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_OUTSIDE_OP_RANGE)
+      return True
+    if TensorTracer.less_interesting_op(op):
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_LESS_INTERESTING_OP)
+      return True
+    if user_included:
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_USER_INCLUDED)
+      return False
+    if user_excluded:
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_USER_EXCLUDED)
+      return True
+    return False
+
+  def _skip_tensor(self, op_id, out_tensor, user_included,
+                   user_excluded):
+    """Returns True if we should not trace out_tensor."""
+
+    # Skips a tensor if the tensor has a non-numeric type.
+    #   Note: we cannot use check_ops.is_numeric_tensor(out_tensor)
+    #         because it also excludes tensors with dtypes, bool, and
+    #         float32_ref, which we actually want to trace.
+    non_numeric_tensor_types = set([dtypes.variant, dtypes.resource,
+                                    dtypes.string])
+    if out_tensor.dtype in non_numeric_tensor_types:
+      self._instrument_records[out_tensor.name] = TensorTracer.reason(
+          op_id, _REASON_NON_NUMERIC_TENSOR)
+      return True
+    # Skip a tensor if it feeds a special while loop op.
+    if [consumer for consumer in out_tensor.consumers() if
+        TensorTracer.while_loop_op(consumer)]:
+      self._instrument_records[out_tensor.name] = TensorTracer.reason(
+          op_id, _REASON_FEEDS_WHILELOOP_OP)
+      return True
+    if user_included:
+      self._instrument_records[out_tensor.name] = TensorTracer.reason(
+          op_id, _REASON_USER_INCLUDED)
+      return False
+    if user_excluded:
+      self._instrument_records[out_tensor.name] = TensorTracer.reason(
+          op_id, _REASON_USER_EXCLUDED)
+      return True
+    if not out_tensor.get_shape().is_fully_defined():
+      # If trace mode is nan-inf, norm or max, then the tensor will be reduced
+      # to a scalar before the outside compilation call.
+      if self._trace_mode in [
+          _TRACE_MODE_NAN_INF, _TRACE_MODE_NORM, _TRACE_MODE_MAX_ABS
+      ]:
+        self._instrument_records[out_tensor.name] = TensorTracer.reason(
+            op_id, _REASON_TENSOR_GET_TRACED)
+        return False
+      else:
+        self._instrument_records[out_tensor.name] = TensorTracer.reason(
+            op_id, _REASON_DYNAMIC_SHAPE)
+        return True
+    rank = len(out_tensor.shape)
+    if rank < 1:
+      # scalar
+      if TensorTracer.unsafe_scalar_trace(out_tensor.op):
+        self._instrument_records[out_tensor.name] = TensorTracer.reason(
+            op_id, _REASON_UNSAFE_SCALAR)
+        return True
+      else:
+        self._instrument_records[out_tensor.name] = TensorTracer.reason(
+            op_id, _REASON_SCALAR_GET_TRACED)
+        return False
+    else:
+      # tensor
+      self._instrument_records[out_tensor.name] = TensorTracer.reason(
+          op_id, _REASON_TENSOR_GET_TRACED)
+      return False
+
+  def _filter_execution_path_operations(self, operations, fetches):
+    """Returns the set of ops in the execution path to compute given fetches."""
+
+    # If no fetch provided, then return all operations.
+    if fetches is None:
+      return set(operations)
+    # Convert to list, if a single element is provided.
+    if not isinstance(fetches, (list, tuple)):
+      fetches = [fetches]
+    # If a tensor is given as fetch, convert it to op.
+    op_fetches = []
+    for fetch in fetches:
+      if isinstance(fetch, ops.Operation):
+        op_fetches.append(fetch)
+      elif isinstance(fetch, ops.Tensor):
+        op_fetches.append(fetch.op)
+      else:
+        raise RuntimeError('Given fetch:%s is neither a tensor nor an op.'
+                           %fetch)
+
+    execution_path_operations = set(op_fetches)
+    traverse_stack = list(op_fetches)
+    while True:
+      if not traverse_stack:
+        break
+      head_op = traverse_stack.pop()
+      input_ops = [tensor_input.op for tensor_input in head_op.inputs]
+      input_ops.extend(head_op.control_inputs)
+
+      for input_op in input_ops:
+        if input_op not in execution_path_operations:
+          # Filter out loop condition operations, tracing them causes a cycle.
+          # Trace only the loop-body.
+          if TensorTracer.loop_cond_op(input_op):
+            continue
+          execution_path_operations.add(input_op)
+          traverse_stack.append(input_op)
+    return execution_path_operations
+
+  def _determine_traced_tensors(self, graph, ops_in_exec_path):
+    """Determines the tensors that will be traced."""
+
+    self._traced_tensorname_to_cache_idx_map = {}
+    self._cache_idx_to_tensor_idx = []
+    operations = graph.get_operations()
+    checkpoint_operations = self._get_checkpoints(graph)
+    for op_id, op in enumerate(operations):
+      if checkpoint_operations and op.name not in checkpoint_operations:
+        continue
+      user_included = self._is_user_included_op(op)
+      user_excluded = self._is_user_excluded_op(op)
+      in_exec_path = op in ops_in_exec_path
+      if self._skip_op(op_id, op, user_included, user_excluded, in_exec_path):
+        continue
+      for i in range(len(op.outputs)):
+        out_tensor = op.outputs[i]
+        if self._skip_tensor(op_id, out_tensor, user_included,
+                             user_excluded):
+          continue
+        tensor_name = out_tensor.name
+        if tensor_name in self._traced_tensorname_to_cache_idx_map:
+          raise ValueError(
+              'Tensor name %s should not be already in '
+              'traced_tensorname_to_cache_idx_map'%tensor_name)
+        if tensor_name not in self._tensorname_idx_map:
+          raise ValueError(
+              'Tensor name %s is not in the tensorname_idx_map'%tensor_name)
+        tensor_idx = self._tensorname_idx_map[tensor_name]
+        cache_idx = len(self._traced_tensorname_to_cache_idx_map)
+        self._traced_tensorname_to_cache_idx_map[tensor_name] = cache_idx
+        self._cache_idx_to_tensor_idx.append(tensor_idx)
+        if len(self._traced_tensorname_to_cache_idx_map) != len(
+            self._cache_idx_to_tensor_idx):
+          raise RuntimeError('len(self._traced_tensorname_to_cache_idx_map) != '
+                             'len(self._cache_idx_to_tensor_idx')
+
+  def _check_trace_files(self):
+    """Checks if any requirements for trace files are satisfied."""
+
+    if not self._trace_dir:
+      # traces will be written to stderr. No need to check trace files.
+      return
+    if _trace_files_need_precreated(self._trace_dir):
+      for replica_id in range(0, self._num_replicas):
+        trace_file_path = os.path.join(
+            self._trace_dir,
+            _COMPACT_TRACE_FILE_PREFIX) + '%d'%replica_id
+        if not gfile.Exists(trace_file_path):
+          raise RuntimeError(
+              '%s must be pre-created with the '
+              'appropriate properties.'%trace_file_path)
+    else:
+      if not gfile.Exists(self._trace_dir):
+        gfile.MkDir(self._trace_dir)
+        if not gfile.Exists(self._trace_dir):
+          raise RuntimeError('Failed to create %s'%self._trace_dir)
+
+  def _pre_tracing(self, graph, fetches):
+    """Work needs to be done prior to TPU or CPU tracing."""
+
+    self._check_trace_files()
+    operations = graph.get_operations()
+    (opname_idx_map, tensor_list, self._tensorname_idx_map) = (
+        TensorTracer._make_op_and_tensor_maps(operations))
+    self._write_config_section()
+    self._write_op_list_section(operations)
+    self._write_tensor_list_section(tensor_list, opname_idx_map)
+    # Filter out the operations that won't be executed.
+    # if fetches=None, then ops_in_exec_path = set(operations)
+    ops_in_exec_path = self._filter_execution_path_operations(operations,
+                                                              fetches)
+    self._determine_traced_tensors(graph, ops_in_exec_path)
+    self._write_cache_index_map_section()
+    # Does the topological sort before adding any nodes to the graph.
+    (succeed, sorted_or_cycle) = TensorTracer.topological_sort(graph)
+    if self._use_tensor_values_cache():
+      _create_tensor_values_cache(graph,
+                                  len(self._cache_idx_to_tensor_idx))
+    return (ops_in_exec_path, succeed, sorted_or_cycle)
+
+  def _post_tracing(self, succeed, sorted_or_cycle):
+    """Work needs to be done after TPU or CPU tracing."""
+
+    self._write_reason_section()
+    self._write_graph_section(succeed, sorted_or_cycle)
+    self._close_report_file()
+
+  def _get_checkpoints(self, graph):
+    """Returns the list of Ops that produce the tensors traced with API.
+
+    Args:
+      graph: the graph of Ops.
+
+    Returns:
+      A set of operation names which should be traced.
+    """
+
+    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN,
+                                  _TENSOR_TRACER_CHECKPOINT))
+    checkpoint_operations = set()
+    tensor_tracer_variables = graph.get_collection(_TENSOR_TRACER_COLLECTION)
+    for (tensor, checkpoint_name) in tensor_tracer_variables:
+      self._write_report('%s %s\n'%(tensor.name, checkpoint_name))
+      checkpoint_operations.add(tensor.op.name)
+    self._write_report('%s %s\n'%(_MARKER_SECTION_END,
+                                  _TENSOR_TRACER_CHECKPOINT))
+    return checkpoint_operations
+
+  def _generate_flush_cache_op(self, graph, start_replica, on_tpu):
+    """Generates an Op that will flush the cache to file.
+
+    Args:
+      graph: the graph of Ops
+      start_replica: the ID of the first replica being flushed by this Op.
+      on_tpu: if the graph is executed on TPU.
+
+    Returns:
+      The Op to flush the cache to file.
+    """
+    def _make_flush_fun(replica_id):
+      """Makes a function for flushing the cache for the given replica."""
+
+      def _fun():
+        """A function that flushes the cache to a file."""
+
+        def _flush_fun(cache):
+          """Flushes the cache to a file."""
+
+          if isinstance(replica_id, str):
+            replica_id_str = replica_id
+          else:
+            replica_id_str = '%d'%replica_id
+          if self._trace_dir:
+            output_path = os.path.join(self._trace_dir,
+                                       _COMPACT_TRACE_FILE_PREFIX) \
+                                       + replica_id_str
+            output_stream = _OUTPUT_STREAM_ESCAPE + output_path
+          else:
+            output_stream = sys.stderr
+          new_step_line = _REPLICA_ID_TAG + replica_id_str
+          print_op = logging_ops.print_v2(
+              new_step_line, '\n',
+              cache, '\n',
+              summarize=-1,
+              output_stream=output_stream)
+          with ops.control_dependencies([print_op]):
+            return constant_op.constant(0).op
+
+        cache = _get_tensor_values_cache(graph)
+        if on_tpu:
+          flush_op = tpu.outside_compilation(_flush_fun, cache.value())
+        else:
+          flush_op = _flush_fun(cache.value())
+        with ops.control_dependencies([flush_op]):
+          reset_value = constant_op.constant(_COMPACT_TRACE_ENTRY_INIT_VALUE,
+                                             dtype=cache.dtype,
+                                             shape=cache.shape)
+          assign_op = state_ops.assign(cache, reset_value).op
+          with ops.control_dependencies([assign_op]):
+            return flush_op.outputs[0]
+
+      return _fun
+
+    def _f(replica_id):
+      return _make_flush_fun(replica_id)
+    def _eq(x):
+      return math_ops.equal(x, self._replica_id)
+    def _do_nothing():
+      return constant_op.constant(0)
+
+    return control_flow_ops.case({\
+                                  _eq(start_replica): _f(start_replica), \
+                                  _eq(start_replica+1): _f(start_replica+1), \
+                                  _eq(start_replica+2): _f(start_replica+2), \
+                                  _eq(start_replica+3): _f(start_replica+3), \
+                                  _eq(start_replica+4): _f(start_replica+4), \
+                                  _eq(start_replica+5): _f(start_replica+5), \
+                                  _eq(start_replica+6): _f(start_replica+6), \
+                                  _eq(start_replica+7): _f(start_replica+7), \
+    },
+                                 default=_do_nothing,
+                                 exclusive=True).op
+
+  def _flush_tensor_values_cache(self, graph, tensor_fetches, op_fetches,
+                                 on_tpu):
+    """Flushes the intermediate tensor values in the graph to the cache.
+
+    Args:
+      graph: the graph of Ops
+      tensor_fetches: list of tensor results returned by the model_fn.
+      op_fetches: list of ops that are returned by the model_fn, e.g., train_op.
+      on_tpu: if the graph is executed on TPU.
+
+    Returns:
+      An identical copy of tensor_fetches.
+    """
+    # Add a dependency to op and tensor fetches to make sure that all tracing
+    # ops are executed before flushing trace results.
+    with ops.control_dependencies(op_fetches +
+                                  [tensor.op for tensor in tensor_fetches]):
+      flush_cache_op_list = []
+      for host in range(self._num_hosts):
+        start_replica = host * 8
+        flush_op = self._generate_flush_cache_op(graph, start_replica, on_tpu)
+        flush_cache_op_list.append(flush_op)
+      return control_flow_ops.tuple(tensor_fetches,
+                                    control_inputs=flush_cache_op_list)
+
+  def _process_tensor_fetches(self, tensor_fetches):
+    """Check that tensor_fetches is not empty and have valid tensors."""
+    # If none or empty list.
+    if tensor_fetches is None:
+      raise RuntimeError('tensor_fetches provided to tensor_tracer cannot be '
+                         'None.')
+    if not isinstance(tensor_fetches, (list, tuple)):
+      tensor_fetches = [tensor_fetches]
+    elif not tensor_fetches:
+      raise RuntimeError('tensor_fetches provided to tensor_tracer cannot be '
+                         'empty list.')
+    fetches = []
+    for fetch in tensor_fetches:
+      if isinstance(fetch, ops.Tensor):
+        fetches.append(fetch)
+      else:
+        raise RuntimeError('Given tensor_fetch:%s is not a tensor.' % fetch)
+    return fetches
+
+  def _process_op_fetches(self, op_fetches):
+    """Check that op_fetches have valid ops."""
+    if op_fetches is None:
+      return []
+
+    if not isinstance(op_fetches, (list, tuple)):
+      op_fetches = [op_fetches]
+
+    fetches = []
+    for fetch in op_fetches:
+      if isinstance(fetch, ops.Operation):
+        fetches.append(fetch)
+      else:
+        logging.warning('Ignoring the given op_fetch:%s, which is not an op.' %
+                        fetch)
+    return fetches
+
+  def _convert_fetches_to_input_format(self, input_fetches, current_fetches):
+    """Changes current_fetches' format, so that it matches input_fetches."""
+    if isinstance(input_fetches, ops.Tensor):
+      if len(current_fetches) != 1:
+        raise RuntimeError('Tensor tracer input/output fetches do not match.')
+      return current_fetches[0]
+    else:
+      if len(current_fetches) != len(current_fetches):
+        raise RuntimeError('Tensor tracer input/output fetches do not match.')
+      elif isinstance(input_fetches, tuple):
+        return tuple(current_fetches)
+      else:
+        return current_fetches
+
+  def _get_op_control_flow_context(self, op):
+    """Returns the control flow of the given op.
+
+    Args:
+      op: tf.Operation for which the control flow context is requested.
+    Returns:
+      op_control_flow_context: which the is control flow context of the given
+      op. If the operation type is LoopExit, returns the outer control flow
+      context.
+    """
+    # pylint: disable=protected-access
+    op_control_flow_context = op._control_flow_context
+    # pylint: enable=protected-access
+    if control_flow_util.IsLoopExit(op):
+      op_control_flow_context = op_control_flow_context.outer_context
+    return op_control_flow_context
+
+  def _trace_execution(self, graph,
+                       tensor_fetches,
+                       op_fetches=None,
+                       on_tpu=True):
+    """Commong tracing function for both CPU and TPUs.
+
+    The caller function should set _device_type, _num_replicas,
+    _num_replicas_per_host, _num_hosts and _replica_id before calling
+    _trace_execution.
+
+
+    Args:
+      graph: the graph of Ops executed on the TPU.
+      tensor_fetches: a (list,tuple,or a single object) of tensor fetches
+        returned by model_fn given to session.run. Function must be provided
+        with as least one tensor to fetch.
+      op_fetches: A list of op fetches returned by model_fn given to
+        session.run. op_fetches and tensor_fetches are used to determine the
+        nodes that will be executed. Can be None.
+      on_tpu: True if executing on TPU.
+
+    Returns:
+      tensor_fetches: an exact copy of tensor_fetches that has additional
+                      dependencies.
+    Raises:
+      RuntimeError: If tensor_fetches is None or empty.
+    """
+    def _cast_unsupported_dtypes(tensor):
+      """Casts tensor to a supported type."""
+
+      if tensor.dtype.__eq__(dtypes.int64):
+        # outside-compilation doesn't support int64 input yet.
+        return math_ops.cast(tensor, dtypes.int32)
+      if tensor.dtype.__eq__(dtypes.bfloat16) or tensor.dtype.__eq__(
+          dtypes.float16):
+        # Since host can't handle bf16, convert tensor to f32.
+        return math_ops.cast(tensor, dtypes.float32)
+      return tensor
+
+    TensorTracer.check_device_type(self._device_type)
+    # Check in_tensor_fetches, and op_fetches and convert them to lists.
+    processed_t_fetches = self._process_tensor_fetches(tensor_fetches)
+    op_fetches = self._process_op_fetches(op_fetches)
+    all_fetches = op_fetches + [tensor.op for tensor in processed_t_fetches]
+
+    # Filter the set of ops that will be executed, and topological sort.
+    (exec_op_set, succeed, sorted_or_cycle) = self._pre_tracing(graph,
+                                                                all_fetches)
+
+    tensor_fetch_set = set(processed_t_fetches)
+    tracing_ops = []
+
+    # pylint: disable=protected-access
+    current_control_flow_context = graph._get_control_flow_context()
+    # pylint: enable=protected-access
+
+    # Trace ops only if they are in the execution path.
+    for op in exec_op_set:
+      for i in range(len(op.outputs)):
+        out_tensor = op.outputs[i]
+        tensor_name = out_tensor.name
+        if tensor_name not in self._traced_tensorname_to_cache_idx_map:
+          continue
+        # Create the list of consumers before calling _preprocess_traced_tensor.
+        # Otherwise, adding control input below, will introduce a cycle in the
+        # graph.
+        consumers = out_tensor.consumers()
+        # Not all consumers may be in the exec path. Filter out the consumers
+        # to keep the graph simpler.
+        consumers = [cop for cop in consumers if cop in exec_op_set]
+
+        # If there is no consumer of the tensor, there is no need to trace it;
+        # unless the tensor itself is one of the fetches.
+        is_a_fetched_tensor = out_tensor in tensor_fetch_set
+        if (not consumers) and (not is_a_fetched_tensor):
+          continue
+
+        op_control_flow_context = self._get_op_control_flow_context(op)
+        # pylint: disable=protected-access
+        graph._set_control_flow_context(op_control_flow_context)
+        # pylint: enable=protected-access
+        processed_out_tensor = self._preprocess_traced_tensor(out_tensor)
+
+        if on_tpu:
+          processed_out_tensor = _cast_unsupported_dtypes(processed_out_tensor)
+
+        if self._use_tensor_values_cache():
+          cache_idx = self._traced_tensorname_to_cache_idx_map[tensor_name]
+          trace_op = self._save_tensor_value_to_cache_op(graph,
+                                                         cache_idx,
+                                                         processed_out_tensor)
+        elif on_tpu:
+          trace_op = tpu.outside_compilation(
+              self._make_tensor_trace_fun(tensor_name), processed_out_tensor)
+        else:
+          trace_fun = self._make_tensor_trace_fun(tensor_name)
+          trace_op = trace_fun(processed_out_tensor)
+
+        if is_a_fetched_tensor:
+          tracing_ops.append(trace_op)
+          continue
+        # Add it to all consumers, as some consumers may not be executed if they
+        # are in a control flow.
+        for consumer_op in consumers:
+          # pylint: disable=protected-access
+          consumer_op._add_control_input(trace_op)
+          # pylint: enable=protected-access
+
+    # pylint: disable=protected-access
+    graph._set_control_flow_context(current_control_flow_context)
+    # pylint: enable=protected-access
+    if tracing_ops:
+      # If we are tracing a fetched tensor, their dependency is stored in
+      # tracing_ops.
+      processed_t_fetches = control_flow_ops.tuple(processed_t_fetches,
+                                                   control_inputs=tracing_ops)
+    if self._use_tensor_values_cache():
+      processed_t_fetches = self._flush_tensor_values_cache(graph,
+                                                            processed_t_fetches,
+                                                            op_fetches,
+                                                            on_tpu=on_tpu)
+    self._post_tracing(succeed, sorted_or_cycle)
+    # processed_t_fetches is a list at this point. Convert it to the same
+    # format as given in tensor_fetches.
+    return self._convert_fetches_to_input_format(tensor_fetches,
+                                                 processed_t_fetches)
+
+  def trace_tpu(self, graph,
+                tensor_fetches,
+                op_fetches=None,
+                num_replicas=None,
+                num_replicas_per_host=None,
+                num_hosts=None):
+    """Traces the tensors generated by TPU Ops in a TF graph.
+
+    Args:
+      graph: the graph of Ops executed on the TPU.
+      tensor_fetches: a (list,tuple,or a single object) of tensor fetches
+        returned by model_fn given to session.run. Function must be provided
+        with as least one tensor to fetch.
+      op_fetches: A list of op fetches returned by model_fn given to
+        session.run. op_fetches and tensor_fetches are used to determine the
+        nodes that will be executed. Can be None.
+      num_replicas: number of replicas used on the TPU.
+      num_replicas_per_host: number of replicas per TPU host.
+      num_hosts: total number of TPU hosts.
+
+    Returns:
+      tensor_fetches: an exact copy of tensor_fetches that has additional
+                      dependencies.
+    Raises:
+      RuntimeError: If num_replicas_per_host > 8.
+      RuntimeError: If tensor_fetches is None or empty.
+    """
+
+    if graph in TensorTracer._traced_graphs:
+      logging.warning('Graph is already rewritten with tensor tracer, ignoring '
+                      'multiple calls.')
+      return tensor_fetches
+    else:
+      TensorTracer._traced_graphs.add(graph)
+    self._device_type = _DEVICE_TYPE_TPU
+    self._num_replicas = num_replicas
+    self._num_replicas_per_host = num_replicas_per_host
+    self._num_hosts = num_hosts
+    if self._num_replicas is not None:
+      if self._num_replicas_per_host is None:
+        self._num_replicas_per_host = 8
+      if self._num_hosts is None:
+        self._num_hosts = num_replicas // self._num_replicas_per_host + \
+            (num_replicas % self._num_replicas_per_host > 0)
+
+    if self._num_replicas_per_host > 8:
+      # Checks for the assumption in _generate_flush_cache_op().
+      raise RuntimeError('num_replicas_per_host (%d) is '
+                         'greater than 8'%self._num_replicas_per_host)
+    if self._graph_dump_path:
+      graph_io.write_graph(graph, self._graph_dump_path,
+                           'graph_before_tt.pbtxt')
+    with graph.as_default():
+      self._add_replica_id_to_graph()
+      tensor_fetches = self._trace_execution(graph, tensor_fetches, op_fetches,
+                                             on_tpu=True)
+    if self._graph_dump_path:
+      graph_io.write_graph(graph, self._graph_dump_path,
+                           'graph_after_tt.pbtxt')
+    return tensor_fetches
+
+  def trace_cpu(self, graph, tensor_fetches, op_fetches=None):
+    """Traces the tensors generated by CPU Ops in a TF graph.
+
+    Args:
+      graph: the graph of Ops executed on the CPU.
+      tensor_fetches: a (list,tuple,or a single object) of tensor fetches
+        returned by model_fn given to session.run. Function must be provided
+        with as least one tensor to fetch.
+      op_fetches: A list of op fetches returned by model_fn given to
+        session.run. op_fetches and tensor_fetches are used to determine the
+        nodes that will be executed. Can be None.
+
+    Returns:
+      tensor_fetches: an exact copy of tensor_fetches that has additional
+                      dependencies.
+    Raises:
+      RuntimeError: If tensor_fetches is None or empty.
+    """
+
+    if graph in TensorTracer._traced_graphs:
+      logging.warning('Graph is already rewritten with tensor tracer, ignoring '
+                      'multiple calls.')
+      return tensor_fetches
+    else:
+      TensorTracer._traced_graphs.add(graph)
+
+    self._device_type = _DEVICE_TYPE_CPU
+    self._num_replicas = 1
+    self._num_replicas_per_host = 1
+    self._num_hosts = 1
+    self._replica_id = 0
+    if self._graph_dump_path:
+      graph_io.write_graph(graph, self._graph_dump_path,
+                           'graph_before_tt.pbtxt')
+    with graph.as_default():
+      tensor_fetches = self._trace_execution(graph, tensor_fetches, op_fetches,
+                                             on_tpu=False)
+    if self._graph_dump_path:
+      graph_io.write_graph(graph, self._graph_dump_path,
+                           'graph_after_tt.pbtxt')
+    return tensor_fetches
+
+

diff --git a/tensorflow/python/tpu/topology.py b/tensorflow/python/tpu/topology.py
new file mode 100644
index 0000000..00ee21e
--- /dev/null
+++ b/tensorflow/python/tpu/topology.py

@@ -0,0 +1,220 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ======================================
+"""Defines the `Topology` class, that describes a TPU fabric topology."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.core.protobuf.tpu import topology_pb2
+
+
+def _tpu_device_name(job, task, device):
+  """Returns the device name for the TPU `device` on `task` of `job`."""
+  if job is None:
+    return "/task:%d/device:TPU:%d" % (task, device)
+  else:
+    return "/job:%s/task:%d/device:TPU:%d" % (job, task, device)
+
+
+def _tpu_host_device_name(job, task):
+  """Returns the device name for the CPU device on `task` of `job`."""
+  if job is None:
+    return "/task:%d/device:CPU:0" % task
+  else:
+    return "/job:%s/task:%d/device:CPU:0" % (job, task)
+
+
+class Topology(object):
+  """Describes a set of TPU devices.
+
+  Represents both the shape of the physical mesh, and the mapping between
+  TensorFlow TPU devices to physical mesh coordinates.
+  """
+
+  def __init__(self, serialized=None, mesh_shape=None, device_coordinates=None):
+    """Builds a Topology object.
+
+    If `serialized` is not `None`, the topology is parsed from `serialized` and
+    the other arguments are ignored. Otherwise, the topology is computed from
+    `mesh_shape` and `device_coordinates`.
+
+    Args:
+      serialized: A serialized `TopologyProto`, or `None`. If not `None`, the
+        serialized proto is parsed to discover the topology.
+      mesh_shape: A sequence of 3 positive integers, or `None`. If not `None`,
+        the shape of the TPU topology, in number of cores. Ignored if
+        `serialized` is not `None`.
+      device_coordinates: A rank 3 numpy array that describes the mapping from
+        TensorFlow TPU devices to TPU fabric coordinates, or `None`. Ignored
+        if `serialized is not `None`.
+
+    Raises:
+      ValueError: If `serialized` does not describe a well-formed topology.
+      ValueError: If `serialized` is `None` and `mesh_shape` is not a sequence
+        of 3 positive integers.
+      ValueError: If `serialized` is `None` and `device_coordinates` is not a
+        rank 3 numpy int32 array that describes a valid coordinate mapping.
+    """
+
+    self._serialized = serialized
+
+    if serialized:
+      self._parse_topology(serialized)
+    else:
+      self._mesh_shape = np.asarray(mesh_shape, dtype=np.int32)
+      self._device_coordinates = np.asarray(device_coordinates, np.int32)
+      if len(self._mesh_shape) != 3 or any(self._mesh_shape < 1):
+        raise ValueError("`mesh_shape` must be a sequence of 3 positive "
+                         "entries; got {}".format(self._mesh_shape))
+
+      if (len(self._device_coordinates.shape) != 3 or
+          self._device_coordinates.shape[2] != len(self._mesh_shape)):
+        raise ValueError("`device_coordinates` must be a rank 3 int32 array "
+                         "with minor dimension equal to the mesh shape rank")
+
+    self._topology_tasks, self._topology_devices = self._invert_topology()
+
+  def _parse_topology(self, serialized):
+    """Parses a serialized `TopologyProto` into `self`."""
+    proto = topology_pb2.TopologyProto()
+    proto.ParseFromString(serialized)
+
+    self._mesh_shape = np.array(proto.mesh_shape, dtype=np.int32)
+    if len(self._mesh_shape) != 3 or any(self._mesh_shape < 1):
+      raise ValueError("`mesh_shape` must be a vector of size 3 with positive "
+                       "entries; got {}".format(self._mesh_shape))
+
+    if proto.num_tasks < 0:
+      raise ValueError("`num_tasks` must be >= 0; got {}".format(
+          proto.num_tasks))
+    if proto.num_tpu_devices_per_task < 0:
+      raise ValueError("`num_tpu_devices_per_task` must be >= 0; got {}".format(
+          proto.num_tpu_devices_per_task))
+
+    expected_coordinates_size = (
+        proto.num_tasks * proto.num_tpu_devices_per_task * len(
+            proto.mesh_shape))
+    if len(proto.device_coordinates) != expected_coordinates_size:
+      raise ValueError("`device_coordinates` must have shape num_tasks ({}) * "
+                       "num_tpu_devices_per_task ({}) * len(mesh_shape) ({}); "
+                       "got shape {}".format(proto.num_tasks,
+                                             proto.num_tpu_devices_per_task,
+                                             proto.mesh_shape,
+                                             len(proto.device_coordinates)))
+
+    coords = np.array(proto.device_coordinates, dtype=np.int32)
+    if any(coords < 0):
+      raise ValueError("`device_coordinates` must be >= 0")
+    coords = coords.reshape((proto.num_tasks, proto.num_tpu_devices_per_task,
+                             len(proto.mesh_shape)))
+    self._device_coordinates = coords
+
+  def _invert_topology(self):
+    """Inverts a [task,device,axis] topology to [x,y,z] -> task/device maps."""
+    tasks = np.full(list(self.mesh_shape), -1, dtype=np.int32)
+    devices = np.full(list(self.mesh_shape), -1, dtype=np.int32)
+    for task in xrange(self.device_coordinates.shape[0]):
+      for device in xrange(self.device_coordinates.shape[1]):
+        x, y, z = self.device_coordinates[task, device, :]
+        tasks[x, y, z] = task
+        devices[x, y, z] = device
+    return tasks, devices
+
+  @property
+  def mesh_shape(self):
+    """A rank 1 int32 array describing the shape of the TPU topology."""
+    return self._mesh_shape
+
+  @property
+  def mesh_rank(self):
+    """Returns the number of dimensions in the mesh."""
+    return len(self._mesh_shape)
+
+  @property
+  def device_coordinates(self):
+    """Describes the mapping from TPU devices to topology coordinates.
+
+    Returns:
+      A rank 3 int32 array with shape `[tasks, devices, axis]`.
+      `tasks` is the number of tasks in the TPU cluster, `devices` is the number
+      of TPU devices per task, and `axis` is the number of axes in the TPU
+      cluster topology. Each entry gives the `axis`-th coordinate in the
+      topology of a task/device pair. TPU topologies are 3-dimensional, with
+      dimensions `(x, y, core number)`.
+    """
+    return self._device_coordinates
+
+  def task_ordinal_at_coordinates(self, device_coordinates):
+    """Returns the TensorFlow task number attached to `device_coordinates`.
+
+    Args:
+      device_coordinates: An integer sequence describing a device's physical
+        coordinates in the TPU fabric.
+
+    Returns:
+      Returns the TensorFlow task number that contains the TPU device with those
+      physical coordinates.
+    """
+    return self._topology_tasks[tuple(device_coordinates)]
+
+  def tpu_device_ordinal_at_coordinates(self, device_coordinates):
+    """Returns the TensorFlow device number at `device_coordinates`.
+
+    Args:
+      device_coordinates: An integer sequence describing a device's physical
+        coordinates in the TPU fabric.
+
+    Returns:
+      Returns the TensorFlow device number within the task corresponding to
+      attached to the device with those physical coordinates.
+    """
+    return self._topology_devices[tuple(device_coordinates)]
+
+  def cpu_device_name_at_coordinates(self, device_coordinates, job=None):
+    """Returns the CPU device attached to a logical core."""
+    return _tpu_host_device_name(
+        job, self._topology_tasks[tuple(device_coordinates)])
+
+  def tpu_device_name_at_coordinates(self, device_coordinates, job=None):
+    """Returns the name of the TPU device assigned to a logical core."""
+    return _tpu_device_name(job,
+                            self._topology_tasks[tuple(device_coordinates)],
+                            self._topology_devices[tuple(device_coordinates)])
+
+  @property
+  def num_tasks(self):
+    """Returns the number of TensorFlow tasks in the TPU slice."""
+    return self._device_coordinates.shape[0]
+
+  @property
+  def num_tpus_per_task(self):
+    """Returns the number of TPU devices per task in the TPU slice."""
+    return self._device_coordinates.shape[1]
+
+  def serialized(self):
+    """Returns the serialized form of the topology."""
+    if self._serialized is None:
+      proto = topology_pb2.TopologyProto()
+      proto.mesh_shape[:] = list(self._mesh_shape)
+      proto.num_tasks = self._device_coordinates.shape[0]
+      proto.num_tpu_devices_per_task = self._device_coordinates.shape[1]
+      proto.device_coordinates.extend(list(self._device_coordinates.flatten()))
+      self._serialized = proto.SerializeToString()
+
+    return self._serialized

diff --git a/tensorflow/python/tpu/topology_test.py b/tensorflow/python/tpu/topology_test.py
new file mode 100644
index 0000000..9e1b7de
--- /dev/null
+++ b/tensorflow/python/tpu/topology_test.py

@@ -0,0 +1,45 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Tests for topology.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.platform import test
+from tensorflow.python.tpu import topology
+
+
+class TopologyTest(test.TestCase):
+
+  def testSerialization(self):
+    """Tests if the class is able to generate serialized strings."""
+    original_topology = topology.Topology(
+        mesh_shape=[1, 1, 2],
+        device_coordinates=[[[0, 0, 0], [0, 0, 1]]],
+    )
+    serialized_str = original_topology.serialized()
+    new_topology = topology.Topology(serialized=serialized_str)
+
+    # Make sure the topology recovered from serialized str is same as the
+    # original topology.
+    self.assertAllEqual(
+        original_topology.mesh_shape, new_topology.mesh_shape)
+    self.assertAllEqual(
+        original_topology.device_coordinates, new_topology.device_coordinates)
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/tpu/tpu.py b/tensorflow/python/tpu/tpu.py
new file mode 100644
index 0000000..02489a9
--- /dev/null
+++ b/tensorflow/python/tpu/tpu.py

@@ -0,0 +1,1576 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ======================================
+
+"""Library of TPU helper functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.core.framework import attr_value_pb2
+from tensorflow.core.protobuf.tpu import dynamic_padding_pb2 as dynamic_padding
+from tensorflow.python.compat import compat as api_compat
+from tensorflow.python.framework import device as pydev
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.tpu import tpu_function
+from tensorflow.python.tpu import xla
+from tensorflow.python.tpu.ops import tpu_ops
+from tensorflow.python.util import compat
+from tensorflow.python.util import nest
+
+
+# Operations that indicate some error in the users graph, e.g. a placeholder
+# that's introduced outside of the infeed.
+_BLACKLISTED_OPS = set([
+    "Placeholder",
+])
+
+# XLA doesn't currently support reading of intermediate tensors, thus some ops
+# are not supported.
+_UNSUPPORTED_OPS = set([
+    "AudioSummary",
+    "AudioSummaryV2",
+    "HistogramSummary",
+    "ImageSummary",
+    "MergeSummary",
+    "Print",
+    "ScalarSummary",
+    "TensorSummary",
+    "TensorSummaryV2",
+    ])
+
+_MAX_WARNING_LINES = 5
+
+_TPU_REPLICATE_ATTR = "_tpu_replicate"
+_TPU_COMPILATION_STATUS_ATTR = "_tpu_compilation_status"
+_OUTSIDE_COMPILATION_ATTR = "_xla_outside_compilation"
+
+
+def _tpu_system_device_name(job):
+  """Returns the device name for the TPU_SYSTEM device of `job`."""
+  if job is None:
+    return "/device:TPU_SYSTEM:0"
+  else:
+    return "/job:%s/device:TPU_SYSTEM:0" % job
+
+
+def initialize_system(embedding_config=None, job=None):
+  """Initializes a distributed TPU system for use with TensorFlow.
+
+  Args:
+    embedding_config: If not None, a `TPUEmbeddingConfiguration` proto
+      describing the desired configuration of the hardware embedding lookup
+      tables. If embedding_config is None, no hardware embeddings can be used.
+    job: The job (the XXX in TensorFlow device specification /job:XXX) that
+      contains the TPU devices that will be initialized. If job=None it is
+      assumed there is only one job in the TensorFlow flock, and an error will
+      be returned if this assumption does not hold.
+  Returns:
+    A serialized `TopologyProto` that describes the TPU system. Note:
+      the topology must be evaluated using `Session.run` before it can be used.
+  """
+  config_string = ("" if embedding_config is None else
+                   embedding_config.SerializeToString())
+  with ops.device(_tpu_system_device_name(job)):
+    return tpu_ops.configure_distributed_tpu(embedding_config=config_string)
+
+
+def shutdown_system(job=None):
+  """Shuts down a running a distributed TPU system."""
+  with ops.device(_tpu_system_device_name(job)):
+    shutdown_distributed_tpu = tpu_ops.shutdown_distributed_tpu()
+  return shutdown_distributed_tpu
+
+
+def core(num):
+  """Returns the device name for a core in a replicated TPU computation.
+
+  Args:
+    num: the virtual core number within each replica to which operators should
+    be assigned.
+  Returns:
+    A device name, suitable for passing to `tf.device()`.
+  """
+  return "device:TPU_REPLICATED_CORE:{}".format(num)
+
+
+class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
+  """A `ControlFlowContext` for nodes inside a TPU computation.
+
+  The primary role of `TPUReplicateContext` is to mark operators inside a
+  tpu.replicate() computation with the attribute "_tpu_replicate=XYZ", where XYZ
+  is a unique name.
+
+  We use a `ControlFlowContext` to perform the annotation since it integrates
+  with Tensorflow constructs like ResourceVariables. For example, if a
+  `ResourceVariable` is constructed inside a tpu.replicate() block, the
+  `ResourceVariable` implementation can use
+  `with ops.control_dependencies(None)` to build the variable's definition
+  outside the replicated computation.
+  """
+
+  def __init__(self, name, num_replicas, pivot):
+    """Builds a new TPUReplicateContext.
+
+    Args:
+      name: a unique name for the context, used to populate the `_tpu_replicate`
+        attribute.
+      num_replicas: an integer that gives the number of replicas for the
+        computation.
+      pivot: a pivot node. Nodes in the TPUReplicateContext that do not have any
+        inputs will have a control dependency on the pivot node. This ensures
+        that nodes are correctly included in any enclosing control flow
+        contexts.
+    """
+    super(TPUReplicateContext, self).__init__()
+    self._num_replicas = num_replicas
+    self._outer_device_function_stack = None
+    self._oc_dev_fn_stack = None
+    self._outside_compilation_cluster = None
+    self._outside_compilation_counter = 0
+    self._in_gradient_colocation = None
+    self._gradient_colocation_stack = []
+    self._host_compute_core = []
+    self._name = name
+    self._name_as_bytes = compat.as_bytes(name)
+    self._unsupported_ops = []
+    self._pivot = pivot
+    self._replicated_vars = {}
+
+  def get_replicated_var_handle(self, name, vars_):
+    """Returns a variable handle for replicated TPU variable 'var'.
+
+    This is a method used by an experimental replicated variable implementation
+    and is not intended as a public API.
+
+    Args:
+      name: The common name of the variable.
+      vars_: The replicated TPU variables.
+
+    Returns:
+      The handle of the TPU replicated input node.
+    """
+    handle = self._replicated_vars.get(name)
+    if handle is not None:
+      return handle
+
+    # Builds a TPUReplicatedInput node for the variable, if one does not already
+    # exist. The TPUReplicatedInput node must belong to the enclosing
+    # control-flow scope of the TPUReplicateContext.
+    # TODO(phawkins): consider changing the contract of the TPU encapsulation
+    # so the TPUReplicatedInput nodes go inside the TPUReplicateContext scope
+    # instead.
+
+    # pylint: disable=protected-access
+    graph = ops.get_default_graph()
+    saved_context = graph._get_control_flow_context()
+    graph._set_control_flow_context(self.outer_context)
+    handle = tpu_ops.tpu_replicated_input(
+        [v.handle for v in vars_], name=name + "/handle")
+    graph._set_control_flow_context(saved_context)
+    # pylint: enable=protected-access
+    self._replicated_vars[name] = handle
+    return handle
+
+  def report_unsupported_operations(self):
+    if self._unsupported_ops:
+      op_str = "\n".join(["  %s (%s)" % (op.type, op.name)
+                          for op in self._unsupported_ops[:_MAX_WARNING_LINES]])
+      logging.warning("%d unsupported operations found: \n%s",
+                      len(self._unsupported_ops), op_str)
+      if len(self._unsupported_ops) > _MAX_WARNING_LINES:
+        logging.warning("... and %d more" %
+                        (len(self._unsupported_ops) - _MAX_WARNING_LINES))
+
+  def EnterGradientColocation(self, op, gradient_uid):
+    if op is not None:
+      self._gradient_colocation_stack.append(op)
+      if not self._outside_compilation_cluster:
+        try:
+          outside_attr = op.get_attr(_OUTSIDE_COMPILATION_ATTR)
+          if self._in_gradient_colocation:
+            raise NotImplementedError(
+                "Cannot nest gradient colocation operations outside compilation"
+            )
+          if gradient_uid == "__unsupported__":
+            raise NotImplementedError(
+                "No gradient_uid calling gradient within outside_compilation")
+          # When we take the gradient of an op X in an outside_compilation
+          # cluster C in a forward computation we would like to put the ops
+          # corresponding to the gradient of X into a new outside_compilation
+          # cluster C'. However, if we take the gradient of X twice, the second
+          # one should get yet another new outside_compilation cluster C''.
+          #
+          # The mechanism we adopt is to use a 'root_cluster' which is the
+          # cluster that X was in before we took gradients, and a 'gradient_uid'
+          # which is different for every invocation of gradients, and put the
+          # gradient of X in cluster 'root_cluster.gradient_uid'.
+          #
+          # When taking a gradient of a gradient, some ops will be colocated
+          # with Op in the forward pass (e.g., cluster root_cluster) and some in
+          # the backward pass (e.g., cluster root_cluster.initial_gradient_uid).
+          # We need all of the grad-of-grad ops to be in the same cluster to
+          # avoid cyclic dependencies between clusters. We adopt a heuristic
+          # that puts any op clustered with root_cluster.<xxx> in
+          # root_cluster.gradient_uid, even if xxx was initial_gradient_uid.
+          self._in_gradient_colocation = op
+          parts = outside_attr.split(".")
+          cluster = parts[0] + "." + gradient_uid
+          self._EnterOutsideCompilationScope(cluster=cluster)
+        except ValueError:
+          # The attr was not present: do nothing.
+          pass
+
+  def ExitGradientColocation(self, op, gradient_uid):
+    if op is not None:
+      if not self._gradient_colocation_stack:
+        raise errors.InternalError(
+            op.node_def, op,
+            "Badly nested gradient colocation: empty stack when popping Op " +
+            op.name)
+      last_op = self._gradient_colocation_stack.pop()
+      if op is last_op:
+        if op is self._in_gradient_colocation:
+          self._in_gradient_colocation = None
+          self._ExitOutsideCompilationScope()
+      else:
+        raise errors.InternalError(
+            op.node_def, op, "Badly nested gradient colocation, expected " +
+            last_op + ", got " + op.name)
+
+  def _EnterOutsideCompilationScope(self, cluster=None):
+
+    class FakeOp(object):
+      """A helper class to determine the current device.
+
+      Supports only the type and device set/get methods needed to run the
+      graph's _apply_device_function method.
+      """
+
+      def __init__(self):
+        self._device = ""
+
+      @property
+      def type(self):
+        return "FakeOp"
+
+      @property
+      def device(self):
+        return self._device
+
+      def _set_device(self, device):
+        if isinstance(device, pydev.DeviceSpec):
+          self._device = device.to_string()
+        else:
+          self._device = device
+
+    if self._outside_compilation_cluster:
+      raise NotImplementedError("Cannot nest outside_compilation clusters")
+    if cluster:
+      self._outside_compilation_cluster = cluster
+    else:
+      self._outside_compilation_cluster = str(self._outside_compilation_counter)
+      self._outside_compilation_counter += 1
+    graph = ops.get_default_graph()
+    fake_op = FakeOp()
+    graph._apply_device_functions(fake_op)  # pylint: disable=protected-access
+    device = pydev.DeviceSpec.from_string(fake_op.device)
+    if (device.device_type == "TPU_REPLICATED_CORE" and
+        device.device_index is not None):
+      self._host_compute_core.append(self._outside_compilation_cluster + ":" +
+                                     str(device.device_index))
+    self._oc_dev_fn_stack = graph._device_function_stack  # pylint: disable=protected-access
+    graph._device_function_stack = self._outer_device_function_stack  # pylint: disable=protected-access
+
+  def _ExitOutsideCompilationScope(self):
+    if not self._outside_compilation_cluster:
+      raise NotImplementedError(
+          "Attempted to exit outside_compilation scope when not in scope")
+    self._outside_compilation_cluster = None
+    graph = ops.get_default_graph()
+    graph._device_function_stack = self._oc_dev_fn_stack  # pylint: disable=protected-access
+
+  def Enter(self):
+    if not self._outer_device_function_stack:
+      # Capture the device function stack at the time of first entry
+      # since that is the stack that will be used outside_compilation.
+      graph = ops.get_default_graph()
+      # pylint: disable=protected-access
+      self._outer_device_function_stack = graph._device_function_stack.copy()
+      # pylint: enable=protected-access
+    super(TPUReplicateContext, self).Enter()
+
+  def HostComputeCore(self):
+    return self._host_compute_core
+
+  def _RemoveExternalControlEdges(self, op):
+    """Remove any external control dependency on this op."""
+    internal_control_inputs = []
+    external_control_inputs = []
+    for x in op.control_inputs:
+      # pylint: disable=protected-access
+      is_internal_op = False
+      ctxt = x._get_control_flow_context()
+      while ctxt is not None:
+        if ctxt == self:
+          is_internal_op = True
+          break
+        ctxt = ctxt._outer_context
+      if is_internal_op:
+        internal_control_inputs.append(x)
+      else:
+        external_control_inputs.append(x)
+      # pylint: enable=protected-access
+    # pylint: disable=protected-access
+    op._remove_all_control_inputs()
+    op._add_control_inputs(internal_control_inputs)
+    # pylint: enable=protected-access
+    return internal_control_inputs, external_control_inputs
+
+  def AddOp(self, op):
+    # pylint: disable=protected-access
+    if op.type in _BLACKLISTED_OPS:
+      logging.error("Operation of type %s (%s) is not supported on the TPU. "
+                    "Execution will fail if this op is used in the graph. " %
+                    (op.type, op.name))
+
+    if op.type in _UNSUPPORTED_OPS:
+      self._unsupported_ops.append(op)
+
+    if any(x.dtype._is_ref_dtype for x in op.inputs):
+      raise NotImplementedError(
+          "Non-resource Variables are not supported inside TPU computations "
+          "(operator name: %s)" % op.name)
+    if _TPU_REPLICATE_ATTR in op.node_def.attr:
+      raise ValueError("TPU computations cannot be nested")
+    op._set_attr(_TPU_REPLICATE_ATTR,
+                 attr_value_pb2.AttrValue(s=self._name_as_bytes))
+    if self._outside_compilation_cluster:
+      op._set_attr(
+          _OUTSIDE_COMPILATION_ATTR,
+          attr_value_pb2.AttrValue(
+              s=compat.as_bytes(self._outside_compilation_cluster)))
+    if self._num_replicas > 1 or not self._outside_compilation_cluster:
+      # Prevent feeding or fetching anything that is being compiled,
+      # and any replicated outside_compilation Op.
+      op.graph.prevent_feeding(op)
+      op.graph.prevent_fetching(op)
+
+    # Remove any control edges from outer control flow contexts. These may cause
+    # mismatched frame errors.
+    (internal_control_inputs,
+     external_control_inputs) = self._RemoveExternalControlEdges(op)
+
+    if not op.inputs:
+      # Add a control edge from the control pivot to this op.
+      if not internal_control_inputs:
+        # pylint: disable=protected-access
+        op._add_control_input(self.GetControlPivot())
+        # pylint: enable=protected-access
+    else:
+      for index in xrange(len(op.inputs)):
+        x = op.inputs[index]
+        real_x = self.AddValue(x)
+        if real_x != x:
+          op._update_input(index, real_x)  # pylint: disable=protected-access
+
+    if external_control_inputs:
+      # Use an identity to pull control inputs as data inputs. Note that we
+      # ignore ops which don't have outputs. TODO(phawkins): fix that.
+      with ops.control_dependencies(None):
+        self.Enter()
+        external_control_inputs = [
+            array_ops.identity(x.outputs[0]).op
+            for x in external_control_inputs
+            if x.outputs
+        ]
+        self.Exit()
+      # pylint: disable=protected-access
+      op._add_control_inputs(external_control_inputs)
+      # pylint: enable=protected-access
+
+    # Mark op's outputs as seen by this context and any outer contexts.
+    output_names = [x.name for x in op.outputs]
+    context = self
+    while context is not None:
+      # pylint: disable=protected-access
+      context._values.update(output_names)
+      context = context._outer_context
+      # pylint: enable=protected-access
+
+    if self._outer_context:
+      self._outer_context.AddInnerOp(op)
+
+  def AddValue(self, val):
+    """Add `val` to the current context and its outer context recursively."""
+    if val.name in self._values:
+      # Use the real value if it comes from outer context.
+      result = self._external_values.get(val.name)
+      return val if result is None else result
+
+    result = val
+    self._values.add(val.name)
+    if self._outer_context:
+      result = self._outer_context.AddValue(val)
+      self._values.add(result.name)
+
+    self._external_values[val.name] = result
+
+    return result
+
+  def AddInnerOp(self, op):
+    self.AddOp(op)
+    if self._outer_context:
+      self._outer_context.AddInnerOp(op)
+
+  @property
+  def grad_state(self):
+    # Define the gradient loop state associated with the TPUReplicateContext to
+    # be None as the TPUReplicateContext does not get nested nor does the
+    # grad_state outside the TPUReplicateContext affect the graph inside so the
+    # grad_state should be as if this is the top-level gradient state.
+    return None
+
+  @property
+  def back_prop(self):
+    """Forwards to the enclosing while context, if any."""
+    if self.GetWhileContext():
+      return self.GetWhileContext().back_prop
+    return False
+
+  def GetControlPivot(self):
+    return self._pivot
+
+
+def outside_compilation(computation, *args, **kwargs):
+  """Builds part of a computation outside any current TPU replicate scope.
+
+  Args:
+    computation: A Python function that builds the computation to
+      place on the host.
+    *args: the positional arguments for the computation.
+    **kwargs: the keyword arguments for the computation.
+
+  Returns:
+    The Tensors returned by computation.
+  """
+  args = [] if args is None else args
+  graph = ops.get_default_graph()
+
+  # If we are in a TPUReplicateContext, signal that we are now
+  # outside_compilation
+  initial_context = graph._get_control_flow_context()  # pylint: disable=protected-access
+  context = initial_context
+  while context:
+    if isinstance(context, TPUReplicateContext):
+      context._EnterOutsideCompilationScope()  # pylint: disable=protected-access
+    context = context.outer_context
+
+  retval = computation(*args, **kwargs)
+
+  # If we are in a TPUReplicateContext, signal that we are no longer
+  # outside_compilation
+  final_context = graph._get_control_flow_context()  # pylint: disable=protected-access
+  if initial_context is not final_context:
+    raise NotImplementedError(
+        "Control-flow context cannot be different at start and end of an "
+        "outside_compilation scope")
+  context = initial_context
+  while context:
+    if isinstance(context, TPUReplicateContext):
+      context._ExitOutsideCompilationScope()  # pylint: disable=protected-access
+    context = context.outer_context
+
+  return retval
+
+
+def replicate(computation,
+              inputs=None,
+              infeed_queue=None,
+              device_assignment=None,
+              name=None,
+              maximum_shapes=None):
+  """Builds a graph operator that runs a replicated TPU computation.
+
+  Args:
+    computation: A Python function that builds the computation to replicate.
+    inputs: A list of lists of input tensors or `None` (equivalent to
+      `[[]]`), indexed by `[replica_num][input_num]`. All replicas must
+      have the same number of inputs. Each input can be a nested structure
+      containing values that are convertible to tensors. Note that passing an
+      N-dimension list of compatible values will result in a N-dimention list of
+      scalar tensors rather than a single Rank-N tensors. If you need different
+      behavior, convert part of inputs to tensors with `tf.convert_to_tensor`.
+    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
+      of arguments as inputs to computation.
+    device_assignment: If not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. Uses a default device assignment if `None`. The
+      `DeviceAssignment` may be omitted if each replica of the computation uses
+      only one core, and there is either only one replica, or the number of
+      replicas is equal to the number of cores in the TPU system.
+    name: (Deprecated) Does nothing.
+    maximum_shapes: A nested structure of tf.TensorShape representing the shape
+      to which the respective component of each input element in each replica
+      should be padded. Any unknown dimensions (e.g. tf.Dimension(None) in a
+      tf.TensorShape or -1 in a tensor-like object) will be padded to the
+      maximum size of that dimension over all replicas. Note that if the input
+      dimension is already static, we won't do padding on it and we require the
+      maximum_shapes to have the same value or None on that dimension. The
+      structure of `maximum_shapes` needs to be the same as `inputs[0]`.
+  Returns:
+    A list of outputs, indexed by `[replica_num]` each output can be a nested
+    structure same as what computation() returns with a few exceptions.
+
+    Exceptions include:
+      1) None output: a NoOp would be returned which control-depends on
+         computation.
+      2) Single value output: A tuple containing the value would be returned.
+      3) Operation-only outputs: a NoOp would be returned which
+         control-depends on computation.
+      TODO(b/121383831): Investigate into removing these special cases.
+
+  Raises:
+    ValueError: If all replicas do not have equal numbers of input tensors.
+    ValueError: If the number of inputs per replica does not match
+      the number of formal parameters to `computation`.
+    ValueError: If the static `inputs` dimensions don't match with the values
+      given in `maximum_shapes`.
+    ValueError: If the structure of inputs per replica does not match
+      the structure of `maximum_shapes`.
+  """
+  return split_compile_and_replicate(
+      computation,
+      inputs,
+      infeed_queue,
+      device_assignment,
+      name,
+      maximum_shapes=maximum_shapes)[1]
+
+
+def _pad_all_input(inputs, padded_shapes):
+  """Pad all input tensors given padded_shapes.
+
+  The real shape tensors will be concatenated with the padded original inputs.
+
+  Args:
+    inputs: The original inputs.
+    padded_shapes: A list of padded shapes for each input.
+
+  Returns:
+    The padded inputs and a PaddingMap list which maps the padded input
+    dimension to the real shape argument index.
+  """
+  input_shape_tensors = []
+  for core_idx, inputs_per_core in enumerate(inputs):
+    for idx, input_tensor in enumerate(inputs_per_core):
+      if core_idx == 0:
+        input_shape_tensors.append([])
+      input_shape_tensors[idx].append(array_ops.shape(input_tensor))
+
+  maximum_shapes = []
+  for shapes_per_input in input_shape_tensors:
+    maximum_shapes.append(
+        math_ops.reduce_max(array_ops.stack(shapes_per_input), axis=0))
+
+  padded_inputs = []
+  real_shapes = []
+  padding_maps = []
+  for core_idx, inputs_per_core in enumerate(inputs):
+    padded_inputs.append([])
+    real_shapes.append([])
+    real_shape_idx = len(inputs_per_core) - 1
+    for idx, input_tensor in enumerate(inputs_per_core):
+      input_shape_tensor = input_shape_tensors[idx][core_idx]
+      input_shape = input_tensor.get_shape()
+      padded_shape = padded_shapes[idx]
+
+      # The static shape of inputs should be compatible with the given padded
+      # shapes.
+      input_shape.assert_is_compatible_with(padded_shape)
+
+      if input_shape.is_fully_defined():
+        # Do nothing if the shape of the whole tensor is already static.
+        padded_inputs[core_idx].append(input_tensor)
+      else:
+        # Only pad the non static shape dimension.
+        for i, s in enumerate(input_shape):
+          if s.value is None:
+            if core_idx == 0:
+              real_shape_idx += 1
+              padding_map = dynamic_padding.PaddingMap()
+              padding_map.arg_index = idx
+              padding_map.shape_index = i
+              padding_map.padding_arg_index = real_shape_idx
+              padding_maps.append(padding_map)
+            real_shapes[core_idx].append(
+                math_ops.cast(input_shape_tensor[i], dtypes.uint32))
+
+        paddings = []
+        for i, s in enumerate(padded_shape):
+          if input_shape[i].value:
+            # Don't pad if input shape is already static.
+            padding = [0, 0]
+          else:
+            if s.value:
+              # Pad to the given maximum value.
+              padding = [0, s.value - input_shape_tensor[i]]
+            else:
+              # If maximum value is not given, then pad to the maximum dimension
+              # among all the cores.
+              padding = [0, maximum_shapes[idx][i] - input_shape_tensor[i]]
+          paddings.append(padding)
+
+        padded_input = array_ops.pad(input_tensor, paddings)
+        padded_inputs[core_idx].append(padded_input)
+
+  num_replicas = len(padded_inputs)
+  for i in range(num_replicas):
+    padded_inputs[i].extend(real_shapes[i])
+
+  return padded_inputs, padding_maps
+
+
+def split_compile_and_replicate(computation,
+                                inputs=None,
+                                infeed_queue=None,
+                                device_assignment=None,
+                                name=None,
+                                use_tpu=True,
+                                maximum_shapes=None):
+  """Builds graph operators that runs compilation and replicated computation.
+
+  This is a lower level interface than replicate that returns a separate compile
+  and execute output tensor. In the generated graph the compile op feeds into
+  the execute op and no additional compilation is incurred when running the
+  compile op before the execute op. The compile op returns additional
+  information about the compilation but does not return the compiled program.
+
+  Args:
+    computation: A Python function that builds the computation to replicate.
+    inputs: A list of lists of input tensors or `None` (equivalent to
+      `[[]]`), indexed by `[replica_num][input_num]`. All replicas must
+      have the same number of inputs. Each input can be a nested structure
+      containing values that are convertible to tensors. Note that passing an
+      N-dimension list of compatible values will result in a N-dimention list of
+      scalar tensors rather than a single Rank-N tensors. If you need different
+      behavior, convert part of inputs to tensors with `tf.convert_to_tensor`.
+    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
+      of arguments as inputs to computation.
+    device_assignment: If not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. Uses a default device assignment if `None`. The
+      `DeviceAssignment` may be omitted if each replica of the computation uses
+      only one core, and there is either only one replica, or the number of
+      replicas is equal to the number of cores in the TPU system.
+    name: (Deprecated) Does nothing.
+    use_tpu: When false, the input `computation` is executed on the XLA CPU/GPU
+      backends. Currently, only supports a default placement (computation is
+      placed on GPU if one is available, and on CPU if not).
+    maximum_shapes: A nested structure of tf.TensorShape representing the shape
+      to which the respective component of each input element in each replica
+      should be padded. Any unknown dimensions (e.g. tf.Dimension(None) in a
+      tf.TensorShape or -1 in a tensor-like object) will be padded to the
+      maximum size of that dimension over all replicas. Note that if the input
+      dimension is already static, we won't do padding on it and we require the
+      maximum_shapes to have the same value or None on that dimension. The
+      structure of `maximum_shapes` needs to be the same as `inputs[0]`.
+
+  Returns:
+    A list of lists with the first list corresponding to the compile op and the
+    second a list of output tensors, indexed by `[replica_num][output_num]`.
+  Raises:
+    ValueError: If all replicas do not have equal numbers of input tensors.
+    ValueError: If the number of inputs per replica does not match
+      the number of formal parameters to `computation`.
+    ValueError: If the static `inputs` dimensions don't match with the values
+      given in `maximum_shapes`.
+    ValueError: If the structure of inputs per replica does not match
+      the structure of `maximum_shapes`.
+  """
+  del name
+  inputs = [[]] if inputs is None else inputs
+
+  metadata_kwargs = {}
+  if device_assignment is not None:
+    # Turn the Numpy array into a flattened list so we can pass it as an
+    # operator attribute.
+    metadata_kwargs = {
+        "topology":
+            device_assignment.topology.serialized(),
+        "device_assignment":
+            device_assignment.core_assignment.flatten().tolist()
+    }
+    # TODO(phawkins): remove this case after the forward compatibility window
+    # expires on 2018-10-5.
+    if api_compat.forward_compatible(2018, 10, 5):
+      metadata_kwargs["num_cores_per_replica"] = (
+          device_assignment.num_cores_per_replica)
+    else:
+      metadata_kwargs["computation_shape"] = [
+          device_assignment.num_cores_per_replica
+      ]
+
+  if ((not isinstance(inputs, list)) or
+      any(not isinstance(inp, (list, tuple)) for inp in inputs)):
+    raise TypeError("tpu.replicate() inputs must be a list of lists/tuples")
+
+  num_replicas = len(inputs)
+
+  # No replicas? Nothing to do.
+  if num_replicas == 0:
+    return []
+
+  # Checks all replicas have the same structure.
+  for i in xrange(1, num_replicas):
+    nest.assert_same_structure(inputs[0], inputs[i])
+
+  # Flatten inputs.
+  flat_inputs = [
+      nest.flatten(per_replica_input) for per_replica_input in inputs
+  ]
+  # Converts inputs to Tensors.
+  flat_inputs = [[ops.convert_to_tensor(x) for x in inp] for inp in flat_inputs]
+
+  # Verifies that all replicas have matching numbers and types of inputs
+  flat_input_types = [x.dtype for x in flat_inputs[0]]
+  input_arity = len(inputs[0])
+  flat_input_arity = len(flat_input_types)
+  for i in range(num_replicas):
+    if len(inputs[i]) != input_arity:
+      raise ValueError("Replicas must have the same number of inputs. "
+                       "Replica 0 had {} inputs, replica {} had {} "
+                       "inputs.".format(input_arity, i, len(inputs[i])))
+
+    types = [x.dtype for x in flat_inputs[i]]
+    if types != flat_input_types:
+      raise ValueError("Replicas must have matching input types. Replica 0 had "
+                       "input types {}, replica {} had input types {}".format(
+                           flat_input_types, i, types))
+
+  arg_error = xla.check_function_argument_count(
+      computation, input_arity, infeed_queue)
+  if arg_error is not None:
+    if infeed_queue is None:
+      raise TypeError(
+          "Supplied computation cannot be called with the specified inputs. "
+          "You specified %d inputs: %s, but the computation needs %s" % (
+              input_arity, str([i.name for i in inputs[0]]), arg_error))
+    else:
+      raise TypeError(
+          "Supplied computation cannot be called with the specified inputs. "
+          "You specified %d inputs: %s and %d additional inputs from infeed,"
+          " but the computation needs %s" % (input_arity, str(
+              [i.name
+               for i in inputs[0]]), infeed_queue.number_of_tuple_elements,
+                                             arg_error))
+
+  if maximum_shapes:
+    if infeed_queue:
+      raise ValueError(
+          "Dynamic input shapes are not supported with infeed queues")
+
+    # Make sure maximum_shapes has the same structure as inputs.
+    nest.assert_same_structure(inputs[0], maximum_shapes, check_types=False)
+
+    # Flatten padded shapes.
+    flat_maximum_shapes = nest.flatten(maximum_shapes)
+    flat_maximum_shapes = [
+        tensor_shape.TensorShape(s) for s in flat_maximum_shapes
+    ]
+
+    flat_inputs, padding_maps = _pad_all_input(flat_inputs, flat_maximum_shapes)
+
+    serialized_padding_maps = []
+    for padding_map in padding_maps:
+      serialized_padding_maps.append(padding_map.SerializeToString())
+    metadata_kwargs["padding_map"] = serialized_padding_maps
+
+  metadata_kwargs["step_marker_location"] = getattr(
+      computation, "step_marker_location", "STEP_MARK_AT_ENTRY")
+
+  graph = ops.get_default_graph()
+
+  # Fan-in: Builds a TPUReplicatedInput node for each input.
+  flat_replicated_inputs = []
+  for i in range(0, len(flat_inputs[0])):
+    replicas = [flat_inputs[replica][i] for replica in xrange(num_replicas)]
+    flat_replicated_inputs.append(
+        tpu_ops.tpu_replicated_input(replicas, name="input{}".format(i)))
+
+  cluster_name = graph.unique_name("cluster")
+  pivot = control_flow_ops.no_op(name=cluster_name + "/pivot")
+  context = TPUReplicateContext(
+      name=cluster_name, num_replicas=num_replicas, pivot=pivot)
+  try:
+    context.Enter()
+
+    metadata = tpu_ops.tpu_replicate_metadata(
+        num_replicas=num_replicas, use_tpu=use_tpu, **metadata_kwargs)
+
+    with tpu_function.tpu_shard_context(
+        num_replicas), ops.control_dependencies([metadata]):
+
+      # Add identity ops so even unused inputs are "consumed" by the
+      # computation. This is to avoid orphaned TPUReplicatedInput nodes.
+      # TODO(phawkins): consider instead pruning unused TPUReplicatedInput
+      # and eliding trivial TPUReplicatedInput/TPUReplicatedOutput pairs.
+      flat_replicated_inputs = [
+          array_ops.identity(x, name="replicated_input_{}".format(i))
+          for i, x in enumerate(flat_replicated_inputs)
+      ]
+      for i in flat_replicated_inputs:
+        # pylint: disable=protected-access
+        # Add an attribute to the identity node so that they could be removed in
+        # encapsulate TPU computation pass if unused. However we don't remove
+        # inputs when dynamic padding is enabled.
+        # TODO(rxsang): Use other ways except argument index in padding_map so
+        # outside compilation can work with dynamic padding correctly.
+        if maximum_shapes is None:
+          i.op._set_attr("_tpu_input_identity",
+                         attr_value_pb2.AttrValue(b=True))
+        # pylint: enable=protected-access
+
+      # Unflatten the computation inputs to match original input structure.
+      computation_inputs = nest.pack_sequence_as(
+          structure=inputs[0],
+          flat_sequence=flat_replicated_inputs[:flat_input_arity])
+
+      # If there is an infeed queue, adds the dequeued values to the
+      # computation's inputs.
+      if infeed_queue is not None:
+        infeed_queue.set_number_of_shards(num_replicas)
+        for t in infeed_queue.generate_dequeue_op():
+          computation_inputs.append(t)
+
+      # Only resource variables work inside a TPU computation, so turn on
+      # resource variables for the computation.
+      # TODO(phawkins): consider removing this code. It will
+      # be less confusing to clients if they knowingly choose to use resource
+      # variables.
+      # Partitioned variables is not supported (b/112311320).
+      vscope = variable_scope.get_variable_scope()
+      saved_use_resource = vscope.use_resource
+      saved_custom_getter = vscope.custom_getter
+
+      def custom_getter(getter, name, *args, **kwargs):
+        """Variables on TPU have a few restrictions."""
+        partitioner = kwargs["partitioner"]
+        if partitioner is not None:
+          kwargs["partitioner"] = None
+          logging.warning(
+              "Partitioned variables are not supported on TPU. Got "
+              "`partitioner` that is {} for variable {}. "
+              "Setting `partitioner` to `None`."
+              .format(partitioner, name))
+        if saved_custom_getter is None:
+          return getter(name, *args, **kwargs)
+        else:
+          return saved_custom_getter(getter, name, *args, **kwargs)
+
+      vscope.set_use_resource(True)
+      vscope.set_custom_getter(custom_getter)
+
+      outputs = computation(*computation_inputs)
+
+      vscope.set_use_resource(saved_use_resource)
+      vscope.set_custom_getter(saved_custom_getter)
+
+    outputs_is_flat = xla.is_flat(outputs)
+    if outputs_is_flat:
+      output_tensors, control_deps = _postprocess_flat_outputs(outputs)
+    else:
+      output_tensors, control_deps = _postprocess_non_flat_outputs(outputs)
+
+    # tensor_tracer imports tpu.py. Local import to tensor_tracer to avoid
+    # import-cycle
+    # pylint: disable=g-import-not-at-top
+    from tensorflow.python.tpu import tensor_tracer
+    # pylint: enable=g-import-not-at-top
+    if tensor_tracer.TensorTracer.is_enabled():
+      tt = tensor_tracer.TensorTracer()
+      output_tensors = tt.trace_tpu(ops.get_default_graph(),
+                                    output_tensors, control_deps,
+                                    num_replicas)
+
+    context.ExitResult(output_tensors)
+  finally:
+    context.report_unsupported_operations()
+    context.Exit()
+    host_compute_core = context.HostComputeCore()
+
+  if host_compute_core:
+    attr_value = attr_value_pb2.AttrValue()
+    attr_value.list.s.extend([compat.as_bytes(x) for x in host_compute_core])
+    metadata._set_attr("host_compute_core", attr_value)  # pylint: disable=protected-access
+
+  with ops.control_dependencies([metadata]):
+    if use_tpu:
+      compile_status = tpu_ops.tpu_compilation_result()
+      op = compile_status.op
+      attr_value = attr_value_pb2.AttrValue(s=compat.as_bytes(cluster_name))
+      op._set_attr(_TPU_COMPILATION_STATUS_ATTR, attr_value)  # pylint: disable=protected-access
+    else:
+      compile_status = control_flow_ops.no_op(name="compilation_status")
+
+  if not output_tensors:
+    # Returns a list of NoOps dependent on the replication Op, indexed by
+    # [replica_num].
+    return [
+        compile_status,
+        [
+            control_flow_ops.group(control_deps, name="shard_%d" % i)
+            for i in range(num_replicas)
+        ]
+    ]
+
+  # Fan-out: Builds a TPUReplicatedOutput node for each output.
+  replicated_outputs = [[] for i in xrange(num_replicas)]
+  for i, t in enumerate(output_tensors):
+    # Fan-out: Builds a TPUReplicatedOutput node for each output.
+    ys = tpu_ops.tpu_replicated_output(
+        t, num_replicas, name="output{}".format(i))
+
+    # Wraps the outputs in identity operators so the names of any possible
+    # `fetch` nodes are preserved by the replication rewrite.
+    with ops.control_dependencies(control_deps):
+      for replica in xrange(num_replicas):
+        replicated_outputs[replica].append(
+            array_ops.identity(
+                ys[replica], name="output_%d_shard_%d" % (i, replica)))
+
+  if not outputs_is_flat:
+    replicated_outputs = [
+        nest.pack_sequence_as(outputs, replica_outs)
+        for replica_outs in replicated_outputs
+    ]
+
+  return [compile_status, replicated_outputs]
+
+
+def _postprocess_flat_outputs(outputs):
+  """Validates non-flat outputs, add backs device assignments and other attrs.
+
+  Args:
+    outputs: Output from `computation` inside `tpu.rewrite`.
+
+  Returns:
+    Tensors and Operations extracted from outputs.
+  """
+  # Following code segment is to preserve legacy behavior. Previously we only
+  # supported flat outputs and thus for consistency it was nice to convert even
+  # single element into a tuple. But now that we support arbitrary output
+  # structure, this is no longer necessary.
+  # TODO(b/121383831): Migrate all legacy use cases and delete this special
+  # case.
+  # If the computation returns `None`, make it an empty tuple.
+  if outputs is None:
+    outputs = tuple()
+  # If the computation only returned one value, makes it a tuple.
+  if not isinstance(outputs, collections.Sequence):
+    outputs = (outputs,)
+
+  # Append `no_op` here so that fetching any return value of this function
+  # will trigger TPUExecute node.
+  outputs += (control_flow_ops.no_op(),)
+  try:
+    with ops.device(core(0)):
+      outputs = [
+          o if isinstance(o, ops.Operation) else ops.convert_to_tensor(o)
+          for o in outputs
+      ]
+  except Exception as e:
+    raise ValueError(
+        "TPU function return values must all either be Operations or "
+        "convertible to Tensors. Got '%s'" % str(e))
+
+  # Separates the returned Operations and Tensors.
+  output_operations = [o for o in outputs if isinstance(o, ops.Operation)]
+  output_tensors = [o for o in outputs if not isinstance(o, ops.Operation)]
+
+  if outputs != output_tensors + output_operations:
+    raise ValueError(
+        "TPU functions must return zero-or more Tensor values followed by "
+        "zero or more Operations.")
+
+  # Wraps outputs in Identity ops. Otherwise a replicated input copied
+  # straight to an output would bypass the replicate(). This would be bad
+  # because the TPUReplicatedInput/TPUReplicatedOutput operator would not
+  # be rewritten away, leading to a runtime error.
+  # TODO(phawkins): extend the rewrite to elide these nodes instead.
+  new_output_tensors = []
+  for t in output_tensors:
+    with ops.device(t.device if t.device else core(0)):
+      o = array_ops.identity(t)
+      # pylint: disable=protected-access
+      o.op._set_attr("_tpu_output_identity", attr_value_pb2.AttrValue(b=True))
+      # pylint: enable=protected-access
+      new_output_tensors.append(o)
+  return new_output_tensors, output_operations
+
+
+def _postprocess_non_flat_outputs(outputs):
+  """Validates non-flat outputs, add backs device assignments and other attrs.
+
+  Args:
+    outputs: Output from `computation` inside `tpu.rewrite`.
+
+  Returns:
+    Tensors extracted from outputs and an empty list because Operations are not
+    allowed in non-flat outputs..
+  """
+
+  # Flatten output items.
+  flat_outputs = nest.flatten(outputs)
+
+  # Convert all non-Operation outputs to Tensors.
+  for i, o in enumerate(flat_outputs):
+    if isinstance(o, ops.Operation):
+      raise ValueError(
+          "tpu.rewrite does not support Operation as return value in non-flat "
+          "output structure. You can set returned Operations as control "
+          "dependencies of returned Tensors so Operations are triggered when "
+          'Tensors are evaluated. Operation found: "%s"' % o.name)
+
+    try:
+      o = ops.convert_to_tensor(o)
+    except Exception as e:
+      raise ValueError(
+          "TPU function return values must all either be Operations or "
+          'convertible to Tensors. Got error: "%s"' % str(e))
+
+    # Wraps outputs in Identity ops. Otherwise a replicated input copied
+    # straight to an output would bypass the replicate(). This would be bad
+    # because the TPUReplicatedInput/TPUReplicatedOutput operator would not
+    # be rewritten away, leading to a runtime error.
+    # TODO(phawkins): extend the rewrite to elide these nodes instead.
+    with ops.device(core(0)):
+      o = array_ops.identity(o)
+      # pylint: disable=protected-access
+      o.op._set_attr("_tpu_output_identity", attr_value_pb2.AttrValue(b=True))
+      # pylint: enable=protected-access
+      flat_outputs[i] = array_ops.identity(o)
+
+  # All flat_outputs are Tensors, and no Operations.
+  return flat_outputs, []
+
+
+def split_compile_and_shard(computation,
+                            inputs=None,
+                            num_shards=1,
+                            input_shard_axes=None,
+                            outputs_from_all_shards=True,
+                            output_shard_axes=None,
+                            infeed_queue=None,
+                            device_assignment=None,
+                            name=None):
+  """Shards `computation` for parallel execution.
+
+  `inputs` must be a list of Tensors or None (equivalent to an empty list), each
+  of which has a corresponding split axis (from `input_shard_axes`). Each input
+  is split into `num_shards` pieces along the corresponding axis, and
+  computation is applied to each shard in parallel.
+
+  Tensors are broadcast to all shards if they are lexically captured by
+  `computation`. e.g.,
+
+  x = tf.constant(7)
+  def computation():
+    return x + 3
+  ... = shard(computation, ...)
+
+  If `outputs_from_all_shards` is true, the outputs from all shards of
+  `computation` are concatenated back together along their `output_shards_axes`.
+  Otherwise, each output is taken from an arbitrary shard.
+
+  Inputs and outputs of the computation must be at least rank-1 Tensors.
+
+  Args:
+    computation: A Python function that builds a computation to apply to each
+      shard of the input.
+    inputs: A list of input tensors or None (equivalent to an empty list). Each
+      input tensor has a corresponding shard axes, given by `input_shard_axes`,
+      which must have size divisible by `num_shards`.
+    num_shards: The number of shards.
+    input_shard_axes: A list of dimensions along which to shard `inputs`, or
+      `None`. `None` means "shard all inputs along dimension 0". If not `None`,
+      there must be one dimension per input.
+    outputs_from_all_shards: Boolean or list of boolean. For each output, if
+      `True`, outputs from all shards are concatenated along the corresponding
+      `output_shard_axes` entry. Otherwise, each output is taken
+      from an arbitrary shard. If the argument is a boolean, the argument's
+      value is used for each output.
+    output_shard_axes: A list of dimensions along which to concatenate the
+      outputs of `computation`, or `None`. `None` means "concatenate all outputs
+      along dimension 0". If not `None`, there must be one dimension per output.
+      Ignored if `outputs_from_all_shards` is False.
+    infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs
+      of `computation`.
+    device_assignment: If not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. Uses a default device assignment if `None`. The
+      `DeviceAssignment` may be omitted if each shard of the computation uses
+      only one core, and there is either only one shard, or the number of shards
+      is equal to the number of cores in the TPU system.
+    name: (Deprecated) Does nothing.
+  Returns:
+    A tuple of (compile op, [output tensors]).
+  Raises:
+    ValueError: If num_shards <= 0
+    ValueError: If len(input_shard_axes) != len(inputs)
+    ValueError: If len(output_shard_axes) != len(outputs from `computation`)
+  """
+  # TODO(phawkins): consider adding support for broadcasting Tensors passed as
+  # inputs.
+
+  if num_shards <= 0:
+    raise ValueError("num_shards must be a positive integer.")
+
+  inputs = [] if inputs is None else inputs
+  if not isinstance(inputs, list):
+    raise TypeError("tpu.shard()'s inputs must be a list of Tensors or None.")
+
+  # Converts inputs to Tensors.
+  inputs = [ops.convert_to_tensor(x) for x in inputs]
+
+  if input_shard_axes is None:
+    input_shard_axes = [0] * len(inputs)
+  if len(inputs) != len(input_shard_axes):
+    raise ValueError("Length of input_shard_axes must be equal to the number "
+                     "of inputs.")
+
+  if inputs:
+    # Splits the `inputs` along the corresponding `input_shard_axes`, giving
+    # lists with layout [input][shard]
+    split_inputs = [
+        array_ops.split(x, num_shards, axis=axis)
+        for (axis, x) in zip(input_shard_axes, inputs)]
+
+    # Transposes the input lists to have layout [shard][input]
+    transposed_inputs = [list(i) for i in zip(*split_inputs)]
+  else:
+    transposed_inputs = [[]] * num_shards
+
+  compile_op, outputs = split_compile_and_replicate(
+      computation,
+      transposed_inputs,
+      infeed_queue=infeed_queue,
+      device_assignment=device_assignment,
+      name=name)
+
+  # There must be at least one shard since num_shards > 0.
+  # TODO(b/36647078) remove disable when pylint bug is fixed.
+  # pylint: disable=indexing-exception
+  if isinstance(outputs[0], ops.Operation):
+    # pylint: enable=indexing-exception
+    # There were no outputs from the computation and replicate returned a list
+    # of NoOps with control dependencies on the computation. Return the first
+    # one so it can be used as a control dependency or fetch node.
+    # TODO(b/36647078) remove disable when pylint bug is fixed.
+    # pylint: disable=indexing-exception
+    return compile_op, [outputs[0]]
+    # pylint: enable=indexing-exception
+
+  # TODO(b/36647078) remove disable when pylint bug is fixed.
+  # pylint: disable=indexing-exception
+  num_outputs = len(outputs[0])
+  # pylint: enable=indexing-exception
+
+  if output_shard_axes is None:
+    output_shard_axes = [0] * num_outputs
+  if num_outputs != len(output_shard_axes):
+    raise ValueError("Length of output_shard_axes must be equal to the number "
+                     "of outputs.")
+
+  if isinstance(outputs_from_all_shards, bool):
+    outputs_from_all_shards = [outputs_from_all_shards] * num_outputs
+
+  if num_outputs != len(outputs_from_all_shards):
+    raise ValueError("Length of outputs_from_all_shards must be equal to the "
+                     "number of outputs.")
+
+  results = []
+  for (axis, all_shards, x) in zip(output_shard_axes, outputs_from_all_shards,
+                                   zip(*outputs)):
+    if all_shards:
+      # Concatenate all of the outputs together (use stack for scalars).
+      shape = x[0].shape
+      is_scalar = shape is not None and (shape.ndims == 0)
+      results.append((array_ops.stack(list(x)) if is_scalar
+                      else array_ops.concat(list(x), axis=axis)))
+    else:
+      # TODO(phawkins): use a smarter policy, e.g., round-robin across shards.
+      results.append(x[0])
+
+  return compile_op, results
+
+
+def shard(computation,
+          inputs=None,
+          num_shards=1,
+          input_shard_axes=None,
+          outputs_from_all_shards=True,
+          output_shard_axes=None,
+          infeed_queue=None,
+          device_assignment=None,
+          name=None):
+  """Shards `computation` for parallel execution.
+
+  `inputs` must be a list of Tensors or None (equivalent to an empty list), each
+  of which has a corresponding split axis (from `input_shard_axes`). Each input
+  is split into `num_shards` pieces along the corresponding axis, and
+  computation is applied to each shard in parallel.
+
+  Tensors are broadcast to all shards if they are lexically captured by
+  `computation`. e.g.,
+
+  x = tf.constant(7)
+  def computation():
+    return x + 3
+  ... = shard(computation, ...)
+
+  TODO(phawkins): consider adding support for broadcasting Tensors passed
+  as inputs.
+
+  If `outputs_from_all_shards` is true, the outputs from all shards of
+  `computation` are concatenated back together along their `output_shards_axes`.
+  Otherwise, each output is taken from an arbitrary shard.
+
+  Inputs and outputs of the computation must be at least rank-1 Tensors.
+
+  Args:
+    computation: A Python function that builds a computation to apply to each
+      shard of the input.
+    inputs: A list of input tensors or None (equivalent to an empty list). Each
+      input tensor has a corresponding shard axes, given by `input_shard_axes`,
+      which must have size divisible by `num_shards`.
+    num_shards: The number of shards.
+    input_shard_axes: A list of dimensions along which to shard `inputs`, or
+      `None`. `None` means "shard all inputs along dimension 0". If not `None`,
+      there must be one dimension per input.
+    outputs_from_all_shards: Boolean or list of boolean. For each output, if
+      `True`, outputs from all shards are concatenated along the corresponding
+      `output_shard_axes` entry. Otherwise, each output is taken
+      from an arbitrary shard. If the argument is a boolean, the argument's
+      value is used for each output.
+    output_shard_axes: A list of dimensions along which to concatenate the
+      outputs of `computation`, or `None`. `None` means "concatenate all outputs
+      along dimension 0". If not `None`, there must be one dimension per output.
+      Ignored if `outputs_from_all_shards` is False.
+    infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs
+      of `computation`.
+    device_assignment: If not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. Uses a default device assignment if `None`. The
+      `DeviceAssignment` may be omitted if each shard of the computation uses
+      only one core, and there is either only one shard, or the number of shards
+      is equal to the number of cores in the TPU system.
+    name: (Deprecated) Does nothing.
+  Returns:
+    A list of output tensors.
+  Raises:
+    ValueError: If num_shards <= 0
+    ValueError: If len(input_shard_axes) != len(inputs)
+    ValueError: If len(output_shard_axes) != len(outputs from `computation`)
+  """
+  return split_compile_and_shard(
+      computation,
+      inputs=inputs,
+      num_shards=num_shards,
+      input_shard_axes=input_shard_axes,
+      outputs_from_all_shards=outputs_from_all_shards,
+      output_shard_axes=output_shard_axes,
+      infeed_queue=infeed_queue,
+      device_assignment=device_assignment,
+      name=name)[1]
+
+
+def batch_parallel(computation,
+                   inputs=None,
+                   num_shards=1,
+                   infeed_queue=None,
+                   device_assignment=None,
+                   name=None):
+  """Shards `computation` along the batch dimension for parallel execution.
+
+  Convenience wrapper around shard().
+
+  `inputs` must be a list of Tensors or None (equivalent to an empty list).
+  Each input is split into `num_shards` pieces along the 0-th dimension, and
+  computation is applied to each shard in parallel.
+
+  Tensors are broadcast to all shards if they are lexically captured by
+  `computation`. e.g.,
+
+  x = tf.constant(7)
+  def computation():
+    return x + 3
+  ... = shard(computation, ...)
+
+  The outputs from all shards are concatenated back together along their 0-th
+  dimension.
+
+  Inputs and outputs of the computation must be at least rank-1 Tensors.
+
+  Args:
+    computation: A Python function that builds a computation to apply to each
+      shard of the input.
+    inputs: A list of input tensors or None (equivalent to an empty list). The
+      0-th dimension of each Tensor must have size divisible by `num_shards`.
+    num_shards: The number of shards.
+    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
+      of arguments as inputs to `computation`.
+    device_assignment: If not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. Uses a default device assignment if `None`. The
+      `DeviceAssignment` may be omitted if each shard of the computation uses
+      only one core, and there is either only one shard, or the number of shards
+      is equal to the number of cores in the TPU system.
+    name: (Deprecated) Does nothing.
+  Returns:
+    A list of output tensors.
+  Raises:
+    ValueError: If `num_shards <= 0`
+  """
+  return shard(
+      computation,
+      inputs,
+      num_shards=num_shards,
+      infeed_queue=infeed_queue,
+      device_assignment=device_assignment,
+      name=name)
+
+
+def rewrite(computation,
+            inputs=None,
+            infeed_queue=None,
+            device_assignment=None,
+            name=None):
+  """Rewrites `computation` for execution on a TPU system.
+
+  Args:
+    computation: A Python function that builds a computation to apply to the
+      input. If the function takes n inputs, 'inputs' should be a list of n
+      tensors.
+
+      `computation` may return a list of operations and tensors. Tensors must
+      come before operations in the returned list.  The return value of
+      `rewrite` is a list of tensors corresponding to the tensors from the
+      output of `computation`.
+
+      All `Operation`s constructed during `computation` will be executed when
+      evaluating any of the returned output tensors, not just the ones returned.
+    inputs: A list of input tensors or `None` (equivalent to an empty list).
+      Each input can be a nested structure containing values that are
+      convertible to tensors. Note that passing an N-dimension list of
+      compatible values will result in a N-dimention list of scalar tensors
+      rather than a single Rank-N tensors. If you need different behavior,
+      convert part of inputs to tensors with `tf.convert_to_tensor`.
+    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
+      of arguments as inputs to `computation`.
+    device_assignment: if not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. May be omitted for a single-core computation, in which
+      case the core attached to task 0, TPU device 0 is used.
+    name: (Deprecated) Does nothing.
+  Returns:
+    Same data structure as if computation(*inputs) is called directly with some
+    exceptions for correctness. Exceptions include:
+      1) None output: a NoOp would be returned which control-depends on
+         computation.
+      2) Single value output: A tuple containing the value would be returned.
+      3) Operation-only outputs: a NoOp would be returned which
+         control-depends on computation.
+      TODO(b/121383831): Investigate into removing these special cases.
+  """
+  # TODO(b/36647078) remove disable when pylint bug is fixed.
+  # pylint: disable=indexing-exception
+  return replicate(
+      computation,
+      None if inputs is None else [inputs],
+      infeed_queue=infeed_queue,
+      device_assignment=device_assignment,
+      name=name)[0]
+  # pylint: enable=indexing-exception
+
+  # Operations that indicate some error in the user's inference graph.
+_BLACKLISTED_INFERENCE_OPS = set([
+    "ReadVariableOp",
+    "AssignVariableOp",
+    "AssignAddVariableOp",
+    "AssignSubVariableOp",
+    "VarHandleOp",
+    "Variable",
+    "VariableV2",
+])
+
+
+def under_tpu_inference_context():
+  """Check if it is currently under `tpu.rewrite_for_inference()`."""
+  graph = ops.get_default_graph()
+
+  context = graph._get_control_flow_context()  # pylint: disable=protected-access
+  while context:
+    if isinstance(context, _TPUInferenceContext):
+      return True
+    context = context.outer_context
+
+  return False
+
+
+class _TPUInferenceContext(control_flow_ops.XLAControlFlowContext):
+  """A `ControlFlowContext` for nodes inside a TPU inference computation.
+
+  The primary role of `TPUReplicateContext` is to sanity check operators inside
+  a tpu.rewrite_for_inference() computation.
+  """
+
+  def __init__(self, name):
+    super(_TPUInferenceContext, self).__init__()
+    self._name = name
+
+  def AddOp(self, op):
+    self._AddOpInternal(op)
+
+  def _AddOpInternal(self, op):
+    # pylint: disable=protected-access
+    if op.type in _BLACKLISTED_INFERENCE_OPS:
+      raise NotImplementedError(
+          "Operation of type %s (%s) is not supported on the TPU for inference."
+          " Execution will fail if this op is used in the graph. Make sure your"
+          " variables are using variable_scope." % (op.type, op.name))
+    if self._outer_context:
+      self._outer_context.AddInnerOp(op)
+
+  def AddValue(self, val):
+    result = val
+    if self._outer_context:
+      result = self._outer_context.AddValue(val)
+    return result
+
+  def AddInnerOp(self, op):
+    self._AddOpInternal(op)
+
+  @property
+  def grad_state(self):
+    return None
+
+
+def validate_inference_rewrite_for_variables(graph):
+  """Validates whether rewrite_for_inference() 'worked' for variables.
+
+     The rewrite_for_inference() method is supposed to append GuaranteeConstOps
+     after ReadVariableOps, but this mechanism works only if you are using
+     tf.get_variable() to create and access variables in your tpu computation.
+     This validation method can be called immediately after calling
+     tpu.rewrite_for_inference() to check whether GuaranteeConstOps where added
+     to the graph.
+
+     Typical usages:
+       tpu.validate_inference_rewrite_for_variables(tf.get_default_graph())
+
+       tpu.validate_inference_rewrite_for_variables(sess.graph)
+
+  Args:
+    graph: The graph which needs to be validated.
+  Raises:
+    RuntimeError: if validation failed.
+  """
+  if not any(x.type == "GuaranteeConst" for x in graph.get_operations()):
+    raise RuntimeError(
+        "No GuaranteeConst ops found in the graph after running "
+        "tpu.rewrite_for_inference(...). Please check that you are using "
+        "tf.get_variable() to create and access variables in your tpu "
+        "computation.")
+
+
+def rewrite_for_inference(computation,
+                          inputs=None,
+                          infeed_queue=None,
+                          device_assignment=None,
+                          name=None):
+  """Rewrites `computation` for inference on a TPU system.
+
+     Other than 'rewriting' the computation to run on a TPU, if using variables
+     in your computation, it moves the ReadVariableOps outside the TPU
+     computation, and adds GuaranteeConst ops just after the ReadVariableOps.
+     This mechanism works only if you are using tf.get_variable() to create and
+     access variables in your tpu computation. You can validate whether this
+     worked, by calling validate_inference_rewrite_for_variables() method
+     immediately after this method to check whether GuaranteeConstOps where
+     added to the graph.
+
+  Args:
+    computation: A Python function that builds a computation to apply to the
+      input. If the function takes n inputs, 'inputs' should be a list of n
+      tensors. If the function returns m outputs, rewrite will return a list of
+      m tensors.
+    inputs: A list of input tensors or `None` (equivalent to an empty list).
+    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
+      of arguments as inputs to `computation`.
+    device_assignment: if not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. May be omitted for a single-core computation, in which
+      case the core attached to task 0, TPU device 0 is used.
+    name: The name of the operator.
+  Returns:
+    A list of output tensors.
+  """
+
+  def guarantee_const_getter(getter, name, *args, **kwargs):
+    with ops.control_dependencies(None):
+      return array_ops.guarantee_const(
+          getter(name, *args, **kwargs), name=name + "/GuaranteeConst")
+
+  def wrapped_computation(*args, **kwargs):
+    """Execute computation under `_TPUInferenceContext`."""
+    context = _TPUInferenceContext(
+        name=ops.get_default_graph().unique_name("rewrite_for_inference"))
+    try:
+      context.Enter()
+
+      vscope = variable_scope.get_variable_scope()
+      prev_custom_getter = vscope.custom_getter
+      prev_caching_device = vscope.caching_device
+      vscope.set_custom_getter(guarantee_const_getter)
+      vscope.set_caching_device(lambda op: op.device)
+
+      result = computation(*args, **kwargs)
+
+      vscope.set_custom_getter(prev_custom_getter)
+      vscope.set_caching_device(prev_caching_device)
+    finally:
+      context.Exit()
+    return result
+
+  # pylint: disable=undefined-variable
+  return rewrite(
+      wrapped_computation,
+      inputs=inputs,
+      infeed_queue=infeed_queue,
+      device_assignment=device_assignment,
+      name=name)
+  # pylint: enable=undefined-variable

diff --git a/tensorflow/python/tpu/tpu_config.py b/tensorflow/python/tpu/tpu_config.py
new file mode 100644
index 0000000..a9038a9
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_config.py

@@ -0,0 +1,276 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+
+"""A RunConfig subclass with TPU support."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+import os
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.estimator import run_config as run_config_lib
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.tpu import util as util_lib
+
+# pylint: disable=protected-access
+_TF_CONFIG_ENV = run_config_lib._TF_CONFIG_ENV
+_SERVICE_KEY = run_config_lib._SERVICE_KEY
+_TPU_WORKER_JOB_NAME = 'tpu_worker_job_name'
+# pylint: enable=protected-access
+
+
+class InputPipelineConfig(object):
+  r"""Please see the definition of these values in TPUConfig."""
+  PER_SHARD_V1 = 1
+  PER_HOST_V1 = 2
+  PER_HOST_V2 = 3
+  BROADCAST = 4
+
+
+class TPUConfig(
+    collections.namedtuple('TPUConfig', [
+        'iterations_per_loop',
+        'num_shards',
+        'num_cores_per_replica',
+        'per_host_input_for_training',
+        'tpu_job_name',
+        'initial_infeed_sleep_secs',
+        'input_partition_dims',
+    ])):
+  r"""TPU related configuration required by `TPUEstimator`.
+
+  Args:
+    iterations_per_loop: This is the number of train steps running in TPU
+      system before returning to CPU host for each `Session.run`. This means
+      global step is increased `iterations_per_loop` times in one `Session.run`.
+      It is recommended to be set as number of global steps for next checkpoint.
+    num_shards: (Deprecated, ignored by TPUEstimator).
+      The number of model replicas in the system. For non-model-parallelism
+      case, this number equals the total number of TPU cores. For
+      model-parallelism, the total number of TPU cores equals
+      num_cores_per_replica * num_shards.
+    num_cores_per_replica: Defaults to `None`, which disables model parallelism.
+      An integer which describes the number of TPU cores per model replica. This
+      is required by model-parallelism which enables partitioning
+      the model to multiple cores. Currently num_cores_per_replica must be
+      1, 2, 4, or 8.
+    per_host_input_for_training: If `True`, `PER_HOST_V1`, or `PER_HOST_V2`,
+      `input_fn` is invoked once on each host. With the per-core input pipeline
+      configuration, it is invoked once for each core.
+      With a global batch size `train_batch_size` in `TPUEstimator` constructor,
+      the batch size for each shard is `train_batch_size` // #hosts in the
+      `True` or `PER_HOST_V1` mode. In `PER_HOST_V2` mode, it is
+      `train_batch_size` // #cores. In `BROADCAST` mode, `input_fn` is only
+      invoked once on host 0 and the tensors are broadcasted to all other
+      replicas. The batch size equals to train_batch_size`. With the per-core
+      input pipeline configuration, the shard batch size is also
+      `train_batch_size` // #cores.
+      Note: per_host_input_for_training==PER_SHARD_V1 only supports mode.TRAIN.
+    tpu_job_name: The name of the TPU job. Typically, this name is auto-inferred
+      within TPUEstimator, however when using ClusterSpec propagation in more
+      esoteric cluster configurations, you may need to specify the job name as a
+      string.
+    initial_infeed_sleep_secs: The number of seconds the infeed thread should
+      wait before enqueueing the first batch. This helps avoid timeouts for
+      models that require a long compilation time.
+    input_partition_dims: A nested list to describe the partition dims
+      for all the tensors from input_fn(). The structure of
+      input_partition_dims must match the structure of `features` and
+      `labels` from input_fn(). The total number of partitions must match
+      `num_cores_per_replica`. For example, if input_fn() returns two tensors:
+      images with shape [N, H, W, C] and labels [N].
+      input_partition_dims = [[1, 2, 2, 1], None] will split the images to 4
+      pieces and feed into 4 TPU cores. labels tensor are directly broadcasted
+      to all the TPU cores since the partition dims is `None`.
+      Current limitations: This feature is only supported with the PER_HOST_V2
+      input mode.
+
+    Raises:
+      ValueError: If `num_cores_per_replica` is not 1, 2, 4, 8 or 16.
+  """
+
+  def __new__(cls,
+              iterations_per_loop=2,
+              num_shards=None,
+              num_cores_per_replica=None,
+              per_host_input_for_training=True,
+              tpu_job_name=None,
+              initial_infeed_sleep_secs=None,
+              input_partition_dims=None):
+
+    # Check iterations_per_loop.
+    util_lib.check_positive_integer(iterations_per_loop,
+                                    'TPUConfig iterations_per_loop')
+
+    # Check num_shards.
+    if num_shards is not None:
+      util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')
+
+    if input_partition_dims is not None:
+      if len(input_partition_dims) != 1 and len(input_partition_dims) != 2:
+        raise ValueError(
+            'input_partition_dims must be a list/tuple with one or two'
+            ' elements.')
+
+      if per_host_input_for_training is not InputPipelineConfig.PER_HOST_V2:
+        raise ValueError(
+            'input_partition_dims is only supported in PER_HOST_V2 mode.')
+
+      if num_cores_per_replica is None:
+        raise ValueError(
+            'input_partition_dims requires setting num_cores_per_replica.')
+
+    # Check num_cores_per_replica
+    if num_cores_per_replica is not None:
+      if num_cores_per_replica not in [1, 2, 4, 8, 16]:
+        raise ValueError(
+            'num_cores_per_replica must be 1, 2, 4, 8, or 16; got {}'.format(
+                str(num_cores_per_replica)))
+
+    # per_host_input_for_training may be True, False, or integer in [1..3].
+    # Map legacy values (True, False) to numeric values.
+    if per_host_input_for_training is False:
+      per_host_input_for_training = InputPipelineConfig.PER_SHARD_V1
+    elif per_host_input_for_training is True:
+      per_host_input_for_training = InputPipelineConfig.PER_HOST_V1
+
+    # Check initial_infeed_sleep_secs.
+    if initial_infeed_sleep_secs:
+      util_lib.check_positive_integer(initial_infeed_sleep_secs,
+                                      'TPUConfig initial_infeed_sleep_secs')
+
+    tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()
+
+    return super(TPUConfig, cls).__new__(
+        cls,
+        iterations_per_loop=iterations_per_loop,
+        num_shards=num_shards,
+        num_cores_per_replica=num_cores_per_replica,
+        per_host_input_for_training=per_host_input_for_training,
+        tpu_job_name=tpu_job_name,
+        initial_infeed_sleep_secs=initial_infeed_sleep_secs,
+        input_partition_dims=input_partition_dims)
+
+
+class RunConfig(run_config_lib.RunConfig):
+  """RunConfig with TPU support."""
+
+  def __init__(self,
+               tpu_config=None,
+               evaluation_master=None,
+               master=None,
+               cluster=None,
+               **kwargs):
+    """Constructs a RunConfig.
+
+    Args:
+      tpu_config: the TPUConfig that specifies TPU-specific configuration.
+      evaluation_master: a string. The address of the master to use for eval.
+        Defaults to master if not set.
+      master: a string. The address of the master to use for training.
+      cluster: a ClusterResolver
+      **kwargs: keyword config parameters.
+
+    Raises:
+      ValueError: if cluster is not None and the provided session_config has a
+        cluster_def already.
+    """
+    super(RunConfig, self).__init__(**kwargs)
+    self._tpu_config = tpu_config or TPUConfig()
+    self._cluster = cluster
+
+    # If user sets master and/or evaluation_master explicitly, including empty
+    # string '', take it. Otherwise, take the values set by parent class.
+    if master is not None:
+      if cluster is not None:
+        raise ValueError('Both master and cluster are set.')
+      self._master = master
+    else:
+      if cluster:
+        self._master = cluster.master()
+
+    if evaluation_master is not None:
+      self._evaluation_master = evaluation_master
+    elif (not self._evaluation_master and
+          self.task_type != run_config_lib.TaskType.EVALUATOR):
+      # If the task type is EVALUATOR, it means some cluster manager sets the
+      # TF_CONFIG. In that case, we respect the configuration in TF_CONFIG.
+      #
+      # Otherwise, it means user executes the code without external cluster
+      # manager. For that, we optimize the user experience by setting
+      # evaluation_master to master, unless user overwrites it.
+      self._evaluation_master = self._master
+
+    # Set the ClusterSpec to use
+    if cluster:
+      self._cluster_spec = cluster.cluster_spec()
+
+      # Merge the cluster_def into the ConfigProto.
+      if self._session_config is None:  # pylint: disable=access-member-before-definition
+        self._session_config = config_pb2.ConfigProto(
+            allow_soft_placement=True, isolate_session_state=True)
+      if self._session_config.HasField('cluster_def'):
+        raise ValueError(
+            'You cannot provide a ClusterResolver and '
+            'session_config.cluster_def.')
+      if self._cluster_spec:
+        self._session_config.cluster_def.CopyFrom(
+            self._cluster_spec.as_cluster_def())
+
+  def _maybe_overwrite_session_config_for_distributed_training(self):
+    # Overrides the parent class session_config overwrite for between-graph. TPU
+    # runs with in-graph, which should not have device filter. Doing nothing
+    # ("pass") basically disables it.
+    pass
+
+  @property
+  def evaluation_master(self):
+    return self._evaluation_master
+
+  @property
+  def master(self):
+    return self._master
+
+  @property
+  def tpu_config(self):
+    return self._tpu_config
+
+  @property
+  def cluster(self):
+    return self._cluster
+
+  def replace(self, **kwargs):
+    if 'tpu_config' not in kwargs:
+      return super(RunConfig, self).replace(**kwargs)
+
+    tpu_config = kwargs.pop('tpu_config')
+    new_instance = super(RunConfig, self).replace(**kwargs)
+    new_instance._tpu_config = tpu_config  # pylint: disable=protected-access
+    return new_instance
+
+
+def _get_tpu_job_name_from_tf_config():
+  """Extracts the TPU job name from TF_CONFIG env variable."""
+  # TODO(xiejw): Extends this to support both TF_CONFIG env variable and cluster
+  # spec propagation.
+  tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV, '{}'))
+  tpu_job_name = tf_config.get(_SERVICE_KEY, {}).get(_TPU_WORKER_JOB_NAME)
+  if tpu_job_name:
+    logging.info('Load TPU job name from TF_CONFIG: %s', tpu_job_name)
+  return tpu_job_name

diff --git a/tensorflow/python/tpu/tpu_config_test.py b/tensorflow/python/tpu/tpu_config_test.py
new file mode 100644
index 0000000..22fb303
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_config_test.py

@@ -0,0 +1,181 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TPU RunConfig tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.estimator import run_config as run_config_lib
+from tensorflow.python.platform import test
+from tensorflow.python.tpu import tpu_config as tpu_config_lib
+
+
+def _set_tf_config_env_variable(tf_config):
+  return test.mock.patch.dict('os.environ', {
+      'TF_CONFIG': json.dumps(tf_config)
+  })
+
+
+class TPURunConfigTest(test.TestCase):
+
+  def test_no_session_config_set_in_local_case(self):
+    run_config = tpu_config_lib.RunConfig()
+    self.assertIsNone(run_config.session_config)
+
+  def test_no_session_config_overwrite_in_local_case(self):
+    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
+    run_config = tpu_config_lib.RunConfig(session_config=session_config)
+    self.assertEqual(session_config, run_config.session_config)
+
+  def test_no_session_config_set_with_cluster_spec(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host3:3'],
+            run_config_lib.TaskType.WORKER: ['host3:4']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        }
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig()
+      self.assertIsNone(run_config.session_config)
+
+  def test_no_session_config_overwrite_with_cluster_spec(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host3:3'],
+            run_config_lib.TaskType.WORKER: ['host3:4']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        }
+    }
+    with _set_tf_config_env_variable(tf_config):
+      session_config = config_pb2.ConfigProto(allow_soft_placement=True)
+      run_config = tpu_config_lib.RunConfig(session_config=session_config)
+      self.assertEqual(session_config, run_config.session_config)
+
+  def test_fail_with_invalid_num_shards(self):
+    with self.assertRaisesRegexp(ValueError, 'must be positive'):
+      tpu_config_lib.RunConfig(
+          tpu_config=tpu_config_lib.TPUConfig(num_shards=0))
+
+  def test_fail_with_iterations_per_loop(self):
+    with self.assertRaisesRegexp(ValueError, 'must be positive'):
+      tpu_config_lib.RunConfig(
+          tpu_config=tpu_config_lib.TPUConfig(iterations_per_loop=0))
+
+  def test_fail_with_invalid_num_cores_per_replica(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'num_cores_per_replica must be 1, 2, 4, 8, or 16;'
+        ' got 7'):
+      tpu_config_lib.TPUConfig(num_cores_per_replica=7)
+
+
+class TPURunConfigMasterTest(test.TestCase):
+
+  def test_default_values(self):
+    run_config = tpu_config_lib.RunConfig()
+    self.assertEqual('', run_config.master)
+    self.assertEqual('', run_config.evaluation_master)
+
+  def test_user_provided_master_and_evaluation_master(self):
+    run_config = tpu_config_lib.RunConfig(
+        master='_master_123', evaluation_master='_eval_master_123')
+    self.assertEqual('_master_123', run_config.master)
+    self.assertEqual('_eval_master_123', run_config.evaluation_master)
+
+  def test_evaluation_master_defaults_to_master(self):
+    run_config = tpu_config_lib.RunConfig(master='_master_123')
+    self.assertEqual('_master_123', run_config.master)
+    self.assertEqual('_master_123', run_config.evaluation_master)
+
+  def test_tf_config(self):
+    tf_config = {
+        'session_master': '_master_123',
+        'eval_session_master': '_eval_master_123'
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig()
+      self.assertEqual('_master_123', run_config.master)
+      self.assertEqual('_eval_master_123', run_config.evaluation_master)
+
+  def test_evaluation_master_defaults_to_master_in_tf_config(self):
+    tf_config = {
+        'session_master': '_master_123',
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig()
+      self.assertEqual('_master_123', run_config.master)
+      self.assertEqual('_master_123', run_config.evaluation_master)
+
+  def test_respect_evaluation_master_in_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.EVALUATOR,
+            'index': 0
+        },
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig(master='_something')
+      self.assertEqual('', run_config.evaluation_master)
+
+  def test_user_overwrites_tf_config(self):
+    tf_config = {
+        'session_master': '_master_123',
+        'eval_session_master': '_eval_master_123'
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig(
+          master='_new_master_123', evaluation_master='_new_eval_master_123')
+      self.assertEqual('_new_master_123', run_config.master)
+      self.assertEqual('_new_eval_master_123', run_config.evaluation_master)
+
+  def test_user_overwrites_master_in_tf_config(self):
+    tf_config = {
+        'session_master': '_master_123',
+        'eval_session_master': '_eval_master_123'
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig(master='_new_master_123')
+      self.assertEqual('_new_master_123', run_config.master)
+      self.assertEqual('_eval_master_123', run_config.evaluation_master)
+
+
+class TPUJobNameTest(test.TestCase):
+
+  def test_default_name(self):
+    config = tpu_config_lib.RunConfig()
+    self.assertIsNone(config.tpu_config.tpu_job_name)
+
+  def test_with_tf_config(self):
+    tf_config = {'service': {'tpu_worker_job_name': '_my_new_name',}}
+    with _set_tf_config_env_variable(tf_config):
+      config = tpu_config_lib.RunConfig()
+      self.assertEqual('_my_new_name', config.tpu_config.tpu_job_name)
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/python/tpu/tpu_context.py b/tensorflow/python/tpu/tpu_context.py
new file mode 100644
index 0000000..2511e42
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_context.py

@@ -0,0 +1,763 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+"""TPU system metadata and associated tooling."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from contextlib import contextmanager
+import copy
+
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.tpu import _tpu_estimator_embedding
+from tensorflow.python.tpu import device_assignment as tpu_device_assignment
+from tensorflow.python.tpu import tpu_config
+from tensorflow.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
+
+
+_DEFAULT_JOB_NAME = 'tpu_worker'
+_DEFAULT_COORDINATOR_JOB_NAME = 'coordinator'
+_LOCAL_MASTERS = ('', 'local')
+_NUM_CORES_TO_COMPUTATION_SHAPE = {
+    1: [1, 1, 1],
+    2: [1, 1, 2],
+    4: [1, 2, 2],
+    8: [2, 2, 2],
+    16: [4, 2, 2],
+}
+
+
+class TPUContext(object):
+  """A context that holds the current configuration of the TPU computation."""
+
+  def __init__(self,
+               internal_ctx,
+               input_device=None,
+               invocation_index=None,
+               call_from_input_fn=True):
+    self._internal_ctx = internal_ctx
+    self._input_device = input_device
+    self._invocation_index = invocation_index
+    self._call_from_input_fn = call_from_input_fn
+
+  def current_input_fn_deployment(self):
+    """The configuration of the current input_fn invocation.
+
+    The configuration depends on `TPUConfig.per_host_input_for_training`. See
+    `TPUConfig` for details.
+
+    Only set in params dict of input_fn
+
+    Returns:
+      A tuple of
+        1. Device spec string: String, is the current CPU host where the
+           input_fn is invoked.
+        2. Current invocation index: Int, 0-based index of the input_fn
+           invocation. See next item for details.
+        3. Total invocation count: Int, the total number of times to invoke the
+           input_fn on all CPU hosts. Each invocation will be passed with a new
+           `TPUContext` instance with current invocation index set properly.
+        4. Total number of replicas consumed by current_invocation: Int, the
+           number of replicas fed by the data returned by current input_fn. For
+           example, for per_core input pipeline deployment
+           and non-model-parallelism, total invocation count is equal to
+           the number of cores in the system and num replicas consumed by
+           current invocation is 1. For per-host v2 input pipeline deployment,
+           total invocation count is equal to the number of hosts in the system
+           and num replicas consumed by current invocation is equal to number of
+           cores per host.
+
+    Raises:
+      RuntimeError: If this method must not be called from input_fn.
+    """
+    if not self._call_from_input_fn:
+      raise RuntimeError('This TPUContext instance must not be called from'
+                         ' model_fn.')
+
+    if self._internal_ctx.is_input_sharded_per_core():
+      total_invocation_count = (self._internal_ctx.num_hosts
+                                * self._internal_ctx.num_of_replicas_per_host)
+      replicas_consumed = 1
+    elif self._internal_ctx.is_input_broadcast_with_iterators():
+      total_invocation_count = 1
+      replicas_consumed = self._internal_ctx.num_replicas
+    else:
+      total_invocation_count = self._internal_ctx.num_hosts
+      replicas_consumed = self._internal_ctx.num_of_replicas_per_host
+    return (self._input_device, self._invocation_index,
+            total_invocation_count, replicas_consumed)
+
+  @property
+  def num_replicas(self):
+    """The total number of replicas.
+
+    For non-model-parallelism, num_replicas should be the total num of TPU
+    cores in the system.
+
+    Returns:
+      The number of replicas.
+    """
+    return self._internal_ctx.num_replicas
+
+  @property
+  def num_hosts(self):
+    """The number of hosts for the TPU system."""
+    return self._internal_ctx.num_hosts
+
+  @property
+  def current_host(self):
+    """The current host index for the TPU system."""
+    return self._invocation_index
+
+  @property
+  def num_of_replicas_per_host(self):
+    """The number of replicas for each host."""
+    if self._internal_ctx.model_parallelism_enabled:
+      raise ValueError(
+          'num_of_replicas_per_host is not supported for model_parallelism')
+    return self._internal_ctx.num_of_replicas_per_host
+
+  @property
+  def device_assignment(self):
+    """Returns device_assignment object."""
+    if self._call_from_input_fn:
+      raise RuntimeError('This TPUContext instance must not be called from'
+                         ' input_fn.')
+    return self._internal_ctx.device_assignment
+
+  def device_for_replica(self, replica_id):
+    """Returns the tuple of (CPU device and device ordinal) for replica.
+
+    This should be used for full replicate for non-model-parallelism.
+
+    Args:
+       replica_id: Int, the replica index.
+
+    Returns:
+       A tuple of device spec for CPU device and int device ordinal.
+    """
+    # Note that: For the non-model parallelism, the mapping could be
+    # a random permutation. The order should not matter in most cases
+    # as far as model is replicated to all cores in the system.
+    return self._internal_ctx.device_for_replica(replica_id)
+
+  @property
+  def tpu_host_placement_function(self):
+    """Returns the TPU host place function.
+
+    The place function takes host_id as the input and returns the TF device
+    for the correspoding host.
+    """
+
+    def _placement_function(host_id):
+      """Return the host device given host_id."""
+      return self._internal_ctx.tpu_host_placement_function(host_id=host_id)
+
+    return _placement_function
+
+
+class _InternalTPUContext(object):
+  """A context holds immutable states of TPU computation.
+
+  This immutable object holds TPUEstimator config, train/eval batch size, and
+  `TPUEstimator.use_tpu`, which is expected to be passed around. It also
+  provides utility functions, based on the current state, to determine other
+  information commonly required by TPU computation, such as TPU device names,
+  TPU hosts, shard batch size, etc.
+
+  if eval_on_tpu is False, then execution of eval on TPU is disabled.
+  if eval_on_tpu is True, but use_tpu is False, a warning is issued,
+  and TPU execution is disabled for all modes.
+
+  N.B. As `mode` is not immutable state in Estimator, but essential to
+  distinguish between TPU training and evaluation, a common usage for
+  _InternalTPUContext with `mode` is as follows:
+  ```
+  with _ctx.with_mode(mode) as ctx:
+    if ctx.is_running_on_cpu():
+       ...
+  ```
+  """
+
+  def __init__(self,
+               config,
+               train_batch_size,
+               eval_batch_size,
+               predict_batch_size,
+               use_tpu,
+               eval_on_tpu=True,
+               embedding_config_spec=None):
+    self._config = config
+    self._train_batch_size = train_batch_size
+    self._eval_batch_size = eval_batch_size
+    self._predict_batch_size = predict_batch_size
+    self._use_tpu = use_tpu
+    logging.info('_TPUContext: eval_on_tpu %s', eval_on_tpu)
+    if not use_tpu and eval_on_tpu:
+      logging.warning('eval_on_tpu ignored because use_tpu is False.')
+
+    self._eval_on_tpu = eval_on_tpu
+    self._model_parallelism_enabled = (
+        use_tpu and config.tpu_config.num_cores_per_replica)
+    self._mode = None
+    num_cores_per_replica = config.tpu_config.num_cores_per_replica
+    if self._model_parallelism_enabled:
+      self._computation_shape = _NUM_CORES_TO_COMPUTATION_SHAPE[
+          num_cores_per_replica]
+    else:
+      self._computation_shape = None
+    self._lazy_tpu_system_metadata_dict = {}  # key by master address
+    self._lazy_device_assignment_dict = {}  # key by master address
+    self._lazy_validation_dict = {}  # key by ModeKeys
+    self._embedding_config_spec = embedding_config_spec
+    self._lazy_embedding_config_dict = {}  # key by master address
+
+  def _assert_mode(self):
+    if self._mode is None:
+      raise RuntimeError(
+          '`mode` needs to be set via contextmanager `with_mode`.')
+    return self._mode
+
+  @contextmanager
+  def with_mode(self, mode):
+    # NOTE(xiejw): Shallow copy is enough. It will share he lazy dictionaries,
+    # such as _lazy_tpu_system_metadata_dict between new copy and the original
+    # one. Note that all lazy states stored in properties _lazy_foo are sort of
+    # immutable as they should be same for the process lifetime.
+    new_ctx = copy.copy(self)
+    new_ctx._mode = mode  # pylint: disable=protected-access
+    yield new_ctx
+
+  @property
+  def mode(self):
+    return self._assert_mode()
+
+  def _get_master_address(self):
+    mode = self._assert_mode()
+    config = self._config
+    master = (
+        config.master
+        if mode != model_fn_lib.ModeKeys.EVAL else config.evaluation_master)
+    return master
+
+  def _get_tpu_system_metadata(self):
+    """Gets the (maybe cached) TPU system metadata."""
+    master = self._get_master_address()
+    tpu_system_metadata = self._lazy_tpu_system_metadata_dict.get(master)
+    if tpu_system_metadata is not None:
+      return tpu_system_metadata
+
+    cluster_def = None
+    if (self._config.session_config and
+        self._config.session_config.cluster_def.job):
+      cluster_def = self._config.session_config.cluster_def
+
+    # pylint: disable=protected-access
+    tpu_system_metadata = (
+        tpu_system_metadata_lib._query_tpu_system_metadata(
+            master,
+            cluster_def=cluster_def,
+            query_topology=self.model_parallelism_enabled))
+
+    self._lazy_tpu_system_metadata_dict[master] = tpu_system_metadata
+    return tpu_system_metadata
+
+  def _get_device_assignment(self):
+    """Gets the (maybe cached) TPU device assignment."""
+    master = self._get_master_address()
+    device_assignment = self._lazy_device_assignment_dict.get(master)
+    if device_assignment is not None:
+      return device_assignment
+
+    tpu_system_metadata = self._get_tpu_system_metadata()
+
+    device_assignment = tpu_device_assignment.device_assignment(
+        tpu_system_metadata.topology,
+        computation_shape=self._computation_shape,
+        num_replicas=self.num_replicas)
+
+    logging.info('num_cores_per_replica: %s',
+                 str(self._config.tpu_config.num_cores_per_replica))
+    logging.info('computation_shape: %s', str(self._computation_shape))
+    logging.info('num_replicas: %d', self.num_replicas)
+    logging.info('device_assignment.topology.device_coordinates: %s',
+                 str(device_assignment.topology.device_coordinates))
+    logging.info('device_assignment.core_assignment: %s',
+                 str(device_assignment.core_assignment))
+
+    self._lazy_device_assignment_dict[master] = device_assignment
+    return device_assignment
+
+  @property
+  def embedding_config(self):
+    """Returns the embedding config based on current mode."""
+    master = self._get_master_address()
+    if master in self._lazy_embedding_config_dict:
+      embedding_config = self._lazy_embedding_config_dict[master]
+    else:
+      embedding_config = None
+      if self._use_tpu and self._embedding_config_spec:
+        embedding_config = _tpu_estimator_embedding.EmbeddingConfig(
+            self._embedding_config_spec, self._train_batch_size,
+            self._eval_batch_size, self.num_hosts, self.num_cores, master)
+        if not embedding_config.has_embedding_tables():
+          embedding_config = None
+      self._lazy_embedding_config_dict[master] = embedding_config
+
+    if embedding_config is not None:
+      mode = self._assert_mode()
+      # Dynamically attach tpu_embedding based on mode. With
+      # this, we could keep embedding_config immutable but call site always
+      # accesses the unified API '.tpu_embedding'.
+      embedding_config.tpu_embedding = embedding_config.get_tpu_embedding(mode)
+    return embedding_config
+
+  @property
+  def model_parallelism_enabled(self):
+    return self._model_parallelism_enabled
+
+  @property
+  def input_partition_dims(self):
+    return self._config.tpu_config.input_partition_dims
+
+  @property
+  def device_assignment(self):
+    return (self._get_device_assignment()
+            if self._model_parallelism_enabled else None)
+
+  @property
+  def num_of_cores_per_host(self):
+    metadata = self._get_tpu_system_metadata()
+    return metadata.num_of_cores_per_host
+
+  @property
+  def num_cores(self):
+    metadata = self._get_tpu_system_metadata()
+    return metadata.num_cores
+
+  @property
+  def num_of_replicas_per_host(self):
+    """Return the number of replicas per host."""
+    if self.model_parallelism_enabled:
+      return self.num_replicas // self.num_hosts
+    else:
+      return self.num_of_cores_per_host
+
+  @property
+  def num_replicas(self):
+    num_cores_in_system = self.num_cores
+
+    if self.model_parallelism_enabled:
+      num_cores_per_replica = self._config.tpu_config.num_cores_per_replica
+      if num_cores_per_replica > num_cores_in_system:
+        raise ValueError(
+            'The num of cores required by the model parallelism, specified by '
+            'TPUConfig.num_cores_per_replica, is larger than the total num of '
+            'TPU cores in the system. num_cores_per_replica: {}, num cores '
+            'in the system: {}'.format(num_cores_per_replica,
+                                       num_cores_in_system))
+
+      if num_cores_in_system % num_cores_per_replica != 0:
+        raise RuntimeError(
+            'The num of cores in the system ({}) is not divisible by the num '
+            'of cores ({}) required by the model parallelism, specified by '
+            'TPUConfig.num_cores_per_replica. This should never happen!'.format(
+                num_cores_in_system, num_cores_per_replica))
+
+      return num_cores_in_system // num_cores_per_replica
+    else:
+      return num_cores_in_system
+
+  @property
+  def num_hosts(self):
+    metadata = self._get_tpu_system_metadata()
+    return metadata.num_hosts
+
+  @property
+  def config(self):
+    return self._config
+
+  def is_input_sharded_per_core(self):
+    """Return true if input_fn is invoked per-core (other than per-host)."""
+    mode = self._assert_mode()
+    return (mode == model_fn_lib.ModeKeys.TRAIN and
+            (self._config.tpu_config.per_host_input_for_training is
+             tpu_config.InputPipelineConfig.PER_SHARD_V1))
+
+  def is_input_per_host_with_iterators(self):
+    """Return true if input_fn should be run in the per-host v2 config."""
+    return (self._config.tpu_config.per_host_input_for_training is
+            tpu_config.InputPipelineConfig.PER_HOST_V2)
+
+  def is_input_broadcast_with_iterators(self):
+    """Return true if input_fn should be run in the full_replicae config."""
+    return (self._config.tpu_config.per_host_input_for_training is
+            tpu_config.InputPipelineConfig.BROADCAST)
+
+  def is_running_on_cpu(self, is_export_mode=False):
+    """Determines whether the input_fn and model_fn should be invoked on CPU.
+
+    This API also validates user provided configuration, such as batch size,
+    according the lazy initialized TPU system metadata.
+
+    Args:
+      is_export_mode: Indicates whether the current mode is for exporting the
+        model, when mode == PREDICT. Only with this bool, we could
+        tell whether user is calling the Estimator.predict or
+        Estimator.export_savedmodel, which are running on TPU and CPU
+        respectively. Parent class Estimator does not distinguish these two.
+
+    Returns:
+      bool, whether current input_fn or model_fn should be running on CPU.
+
+    Raises:
+      ValueError: any configuration is invalid.
+    """
+
+    is_running_on_cpu = self._is_running_on_cpu(is_export_mode)
+    if not is_running_on_cpu:
+      self._validate_tpu_configuration()
+    return is_running_on_cpu
+
+  def _is_running_on_cpu(self, is_export_mode):
+    """Determines whether the input_fn and model_fn should be invoked on CPU."""
+    mode = self._assert_mode()
+
+    if not self._use_tpu:
+      return True
+
+    if mode == model_fn_lib.ModeKeys.EVAL and not self._eval_on_tpu:
+      logging.info('_is_running_on_cpu: eval_on_tpu disabled')
+      return True
+
+    if is_export_mode:
+      return True
+
+    return False
+
+  @property
+  def global_batch_size(self):
+    mode = self._assert_mode()
+    if mode == model_fn_lib.ModeKeys.TRAIN:
+      return self._train_batch_size
+    elif mode == model_fn_lib.ModeKeys.EVAL:
+      return self._eval_batch_size
+    elif mode == model_fn_lib.ModeKeys.PREDICT:
+      return self._predict_batch_size
+    else:
+      return None
+
+  @property
+  def batch_size_for_input_fn(self):
+    """Returns the shard batch size for `input_fn`."""
+    global_batch_size = self.global_batch_size
+
+    if (self.is_running_on_cpu() or self.is_input_broadcast_with_iterators()):
+      return global_batch_size
+
+    # On TPU
+    if self.is_input_sharded_per_core() or (
+        self.is_input_per_host_with_iterators()):
+      return global_batch_size // self.num_replicas
+    else:
+      return global_batch_size // self.num_hosts
+
+  @property
+  def batch_size_for_model_fn(self):
+    """Returns the shard batch size for `model_fn`."""
+    global_batch_size = self.global_batch_size
+
+    if (self.is_running_on_cpu() or self.is_input_broadcast_with_iterators()):
+      return global_batch_size
+
+    # On TPU. always sharded per shard.
+    return global_batch_size // self.num_replicas
+
+  @property
+  def master_job(self):
+    """Returns the job name to use to place TPU computations on.
+
+    Returns:
+      A string containing the job name, or None if no job should be specified.
+
+    Raises:
+      ValueError: If the user needs to specify a tpu_job_name, because we are
+        unable to infer the job name automatically, or if the user-specified job
+        names are inappropriate.
+    """
+    run_config = self._config
+    # If the user specifies the tpu_job_name, use that.
+    if run_config.tpu_config.tpu_job_name:
+      return run_config.tpu_config.tpu_job_name
+
+    # The tpu job is determined by the run_config. Right now, this method is
+    # required as tpu_config is not part of the RunConfig.
+    mode = self._assert_mode()
+    master = (
+        run_config.evaluation_master
+        if mode == model_fn_lib.ModeKeys.EVAL else run_config.master)
+    if master in _LOCAL_MASTERS:
+      return None
+
+    if (not run_config.session_config or
+        not run_config.session_config.cluster_def.job):
+      return _DEFAULT_JOB_NAME
+    cluster_def = run_config.session_config.cluster_def
+    job_names = set([job.name for job in cluster_def.job])
+    if _DEFAULT_JOB_NAME in job_names:
+      # b/37868888 tracks allowing ClusterSpec propagation to reuse job names.
+      raise ValueError('Currently, tpu_worker is not an allowed job name.')
+    if len(job_names) == 1:
+      return cluster_def.job[0].name
+    if len(job_names) == 2:
+      if _DEFAULT_COORDINATOR_JOB_NAME in job_names:
+        job_names.remove(_DEFAULT_COORDINATOR_JOB_NAME)
+        return job_names.pop()
+      # TODO(b/67716447): Include more sophisticated heuristics.
+    raise ValueError(
+        'Could not infer TPU job name. Please specify a tpu_job_name as part '
+        'of your TPUConfig.')
+
+  @property
+  def tpu_host_placement_function(self):
+    """Returns the TPU host place function."""
+
+    master = self.master_job
+
+    def _placement_function(_sentinal=None, replica_id=None, host_id=None):  # pylint: disable=invalid-name
+      """Return the host device given replica_id or host_id."""
+      assert _sentinal is None
+      if replica_id is not None and host_id is not None:
+        raise RuntimeError(
+            'replica_id and host_id can have only one non-None value.')
+
+      if master is None:
+        return '/replica:0/task:0/device:CPU:0'
+      else:
+        if replica_id is not None:
+          if self.model_parallelism_enabled:
+            return self.device_assignment.host_device(
+                replica=replica_id, job=master)
+          else:
+            host_id = replica_id / self.num_of_cores_per_host
+
+        return '/job:%s/task:%d/device:CPU:0' % (master, host_id)
+
+    return _placement_function
+
+  @property
+  def tpu_device_placement_function(self):
+    """Returns a TPU device placement Fn."""
+    master = self.master_job
+    job_device = '' if master is None else ('/job:%s' % master)
+
+    def _placement_function(i):
+      if self.model_parallelism_enabled:
+        return self.device_assignment.tpu_device(replica=i, job=master)
+      else:
+        num_of_cores_per_host = self.num_of_cores_per_host
+        host_id = i / num_of_cores_per_host
+        ordinal_id = i % num_of_cores_per_host
+        return '%s/task:%d/device:TPU:%d' % (job_device, host_id, ordinal_id)
+
+    return _placement_function
+
+  def tpu_ordinal_function(self, host_id):
+    """Returns the TPU ordinal fn."""
+
+    def _tpu_ordinal_function(shard_index_in_host):
+      """Return the TPU ordinal associated with a shard.
+
+      Required because the enqueue ops are placed on CPU.
+
+      Args:
+        shard_index_in_host: the shard index
+
+      Returns:
+        The ordinal of the TPU device the shard's infeed should be placed on.
+      """
+      if self.model_parallelism_enabled:
+        # We put both enqueue/dequeue ops at tpu.core(0) in each replica.
+        replica = self.device_assignment.lookup_replicas(host_id,
+                                                         0)[shard_index_in_host]
+        return self.device_assignment.tpu_ordinal(replica=replica)
+      else:
+        return shard_index_in_host % self.num_of_cores_per_host
+
+    return _tpu_ordinal_function
+
+  def _validate_tpu_configuration(self):
+    """Validates the configuration based on the TPU system metadata."""
+    mode = self._assert_mode()
+    if self._lazy_validation_dict.get(mode):
+      return
+
+    # All following information is obtained from TPU system metadata.
+    num_cores = self.num_cores
+    num_replicas = self.num_replicas
+    num_hosts = self.num_hosts
+
+    if not num_cores:
+      tpu_system_metadata = self._get_tpu_system_metadata()
+      raise RuntimeError(
+          'Cannot find any TPU cores in the system. Please double check '
+          'Tensorflow master address and TPU worker(s). Available devices '
+          'are {}.'.format(tpu_system_metadata.devices))
+
+    if self._config.tpu_config.num_shards:
+      user_provided_num_replicas = self._config.tpu_config.num_shards
+      if user_provided_num_replicas != num_replicas:
+        message = (
+            'TPUConfig.num_shards is not set correctly. According to TPU '
+            'system metadata for Tensorflow master ({}): num_replicas should '
+            'be ({}), got ({}). For non-model-parallelism, num_replicas should '
+            'be the total num of TPU cores in the system. For '
+            'model-parallelism, the total number of TPU cores should be '
+            'num_cores_per_replica * num_replicas. Please set it '
+            'accordingly or leave it as `None`'.format(
+                self._get_master_address(), num_replicas,
+                user_provided_num_replicas))
+
+        raise ValueError(message)
+
+    if self._config.tpu_config.num_cores_per_replica:
+      num_cores_per_replica = self._config.tpu_config.num_cores_per_replica
+      num_cores_per_host = self._get_tpu_system_metadata().num_of_cores_per_host
+      if num_cores_per_replica > num_cores_per_host:
+        raise ValueError(
+            'The num of cores required by the model parallelism, specified by '
+            'TPUConfig.num_cores_per_replica, is larger than the '
+            'num_cores_per_host. num_cores_per_replica: {}, '
+            'num_cores_per_host: {}'.format(num_cores_per_replica,
+                                            num_cores_per_host))
+
+    if mode == model_fn_lib.ModeKeys.TRAIN:
+      if (self._train_batch_size % num_replicas != 0 and
+          not self.is_input_broadcast_with_iterators()):
+        raise ValueError(
+            'train batch size {} must be divisible by number of replicas {}'
+            .format(self._train_batch_size, num_replicas))
+
+    elif mode == model_fn_lib.ModeKeys.EVAL:
+      if self._eval_batch_size is None:
+        raise ValueError(
+            'eval_batch_size in TPUEstimator constructor cannot be `None`'
+            'if .evaluate is running on TPU.')
+      if (self._eval_batch_size % num_replicas != 0 and
+          not self.is_input_broadcast_with_iterators()):
+        raise ValueError(
+            'eval batch size {} must be divisible by number of replicas {}'
+            .format(self._eval_batch_size, num_replicas))
+      if num_hosts > 1 and not self.is_input_broadcast_with_iterators():
+        raise ValueError(
+            'TPUEstimator.evaluate should be running on single TPU'
+            ' instead of a Pod.')
+    else:
+      assert mode == model_fn_lib.ModeKeys.PREDICT
+      if self._predict_batch_size is None:
+        raise ValueError(
+            'predict_batch_size in TPUEstimator constructor should not be '
+            '`None` if .predict is running on TPU.')
+      if (self._predict_batch_size % num_replicas != 0 and
+          not self.is_input_broadcast_with_iterators()):
+        raise ValueError(
+            'predict batch size {} must be divisible by number of replicas {}'
+            .format(self._predict_batch_size, num_replicas))
+      if num_hosts > 1 and not self.is_input_broadcast_with_iterators():
+        raise ValueError(
+            'TPUEstimator.predict should be running on single TPU worker. '
+            'got {}.'.format(num_hosts))
+
+    # Record the state "validated" into lazy dictionary.
+    self._lazy_validation_dict[mode] = True
+
+  def device_for_replica(self, replica_id):
+    """Returns the tuple of (CPU device and device ordinal) for replica.
+
+    This should be used for full replicate for non-model-parallelism.
+
+    Args:
+       replica_id: Int, the replica index.
+
+    Returns:
+       A tuple of device spec for CPU device and int device ordinal.
+    """
+    master = self.master_job
+
+    if self.model_parallelism_enabled:
+      return (self.device_assignment.host_device(
+          replica=replica_id, job=master),
+              self.device_assignment.tpu_ordinal(replica=replica_id))
+
+    job_device = '' if master is None else ('/job:%s' % master)
+
+    num_of_replicas_per_host = self.num_of_replicas_per_host
+    host_id = replica_id / num_of_replicas_per_host
+    ordinal_id = replica_id % num_of_replicas_per_host
+
+    host_device = '%s/task:%d/device:CPU:0' % (job_device, host_id)
+    return (host_device, ordinal_id)
+
+
+class _OneCoreTPUContext(_InternalTPUContext):
+  """Special _InternalTPUContext for one core usage."""
+
+  def __init__(self, config, train_batch_size, eval_batch_size,
+               predict_batch_size, use_tpu):
+
+    super(_OneCoreTPUContext, self).__init__(
+        config, train_batch_size, eval_batch_size,
+        predict_batch_size, use_tpu)
+
+  def _get_tpu_system_metadata(self):
+    """Gets the (maybe cached) TPU system metadata."""
+    master = self._get_master_address()
+    tpu_system_metadata = self._lazy_tpu_system_metadata_dict.get(master)
+    if tpu_system_metadata is not None:
+      return tpu_system_metadata
+
+    tpu_system_metadata = (
+        tpu_system_metadata_lib._TPUSystemMetadata(  # pylint: disable=protected-access
+            num_cores=1,
+            num_hosts=1,
+            num_of_cores_per_host=1,
+            topology=None,
+            devices=[]))
+
+    self._lazy_tpu_system_metadata_dict[master] = tpu_system_metadata
+    return tpu_system_metadata
+
+
+def _get_tpu_context(config, train_batch_size, eval_batch_size,
+                     predict_batch_size, use_tpu, eval_on_tpu,
+                     embedding_config_spec):
+  """Returns an instance of `_InternalTPUContext`."""
+
+  if (config.tpu_config.num_shards == 1 and
+      config.tpu_config.num_cores_per_replica is None):
+    if embedding_config_spec is not None:
+      raise ValueError('Setting TPUConfig.num_shards==1 is unsupported '
+                       'when embedding_config_spec is not None.')
+    logging.warning(
+        'Setting TPUConfig.num_shards==1 is an unsupported behavior. '
+        'Please fix as soon as possible (leaving num_shards as None.)')
+    return _OneCoreTPUContext(config, train_batch_size, eval_batch_size,
+                              predict_batch_size, use_tpu)
+
+  return _InternalTPUContext(config, train_batch_size, eval_batch_size,
+                             predict_batch_size, use_tpu, eval_on_tpu,
+                             embedding_config_spec)

diff --git a/tensorflow/python/tpu/tpu_embedding.py b/tensorflow/python/tpu/tpu_embedding.py
new file mode 100644
index 0000000..ee2658f
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_embedding.py

@@ -0,0 +1,1105 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TPU embedding APIs."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+import math
+import re
+import six
+
+from tensorflow.core.protobuf.tpu import optimization_parameters_pb2
+from tensorflow.core.protobuf.tpu import tpu_embedding_configuration_pb2 as elc
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
+from tensorflow.python.tpu.ops import tpu_ops
+
+TRAINING = elc.TPUEmbeddingConfiguration.TRAINING
+INFERENCE = elc.TPUEmbeddingConfiguration.INFERENCE
+
+
+class TableConfig(
+    collections.namedtuple(
+        'TableConfig',
+        ['vocabulary_size', 'dimension', 'initializer', 'combiner'])):
+  """Embedding table configuration."""
+
+  def __new__(cls,
+              vocabulary_size,
+              dimension,
+              initializer=None,
+              combiner='mean'):
+    """Embedding table configuration.
+
+    Args:
+      vocabulary_size: Number of vocabulary (/rows) in the table.
+      dimension: The embedding dimension.
+      initializer: A variable initializer function to be used in embedding
+        variable initialization. If not specified, defaults to
+        `tf.truncated_normal_initializer` with mean `0.0` and standard deviation
+        `1/sqrt(dimension)`.
+      combiner: A string specifying how to reduce if there are multiple entries
+        in a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with
+        'mean' the default. 'sqrtn' often achieves good accuracy, in particular
+        with bag-of-words columns. For more information, see
+        `tf.nn.embedding_lookup_sparse`.
+
+    Returns:
+      `TableConfig`.
+
+    Raises:
+      ValueError: if `vocabulary_size` is not positive integer.
+      ValueError: if `dimension` is not positive integer.
+      ValueError: if `initializer` is specified and is not callable.
+      ValueError: if `combiner` is not supported.
+    """
+    if not isinstance(vocabulary_size, int) or vocabulary_size < 1:
+      raise ValueError('Invalid vocabulary_size {}.'.format(vocabulary_size))
+
+    if not isinstance(dimension, int) or dimension < 1:
+      raise ValueError('Invalid dimension {}.'.format(dimension))
+
+    if (initializer is not None) and (not callable(initializer)):
+      raise ValueError('initializer must be callable if specified.')
+    if initializer is None:
+      initializer = init_ops.truncated_normal_initializer(
+          mean=0.0, stddev=1 / math.sqrt(dimension))
+
+    if combiner not in ('mean', 'sum', 'sqrtn'):
+      raise ValueError('Invalid combiner {}'.format(combiner))
+
+    return super(TableConfig, cls).__new__(cls, vocabulary_size, dimension,
+                                           initializer, combiner)
+
+
+AdamSlotVariableNames = collections.namedtuple(
+    'AdamSlotVariableNames', ['m', 'v'])
+
+AdagradSlotVariableName = collections.namedtuple(
+    'AdagradSlotVariableName', ['accumulator'])
+
+AdamSlotVariables = collections.namedtuple(
+    'AdamSlotVariables', ['m', 'v'])
+
+AdagradSlotVariable = collections.namedtuple(
+    'AdagradSlotVariable', ['accumulator'])
+
+VariablesAndOps = collections.namedtuple(
+    'VariablesAndOps',
+    ['embedding_variables_by_table', 'slot_variables_by_table',
+     'load_ops', 'retrieve_ops']
+)
+
+
+# TODO(shizhiw): Factor `use_gradient_accumulation` and
+# `pipeline_execution_with_tensor_core` out of `_OptimizationParameters`.
+class _OptimizationParameters(object):
+  """Parameters common to all optimizations."""
+
+  def __init__(self, learning_rate, use_gradient_accumulation,
+               pipeline_execution_with_tensor_core):
+    self.learning_rate = learning_rate
+    self.use_gradient_accumulation = use_gradient_accumulation
+    self.pipeline_execution_with_tensor_core = (
+        pipeline_execution_with_tensor_core)
+
+
+class AdagradParameters(_OptimizationParameters):
+  """Optimization parameters for Adagrad."""
+
+  def __init__(self, learning_rate, initial_accumulator,
+               use_gradient_accumulation=False,
+               pipeline_execution_with_tensor_core=True):
+    """Optimization parameters for Adagrad.
+
+    Args:
+      learning_rate: used for updating embedding table.
+      initial_accumulator: initial accumulator for Adagrad.
+      use_gradient_accumulation: setting this to `True` makes embedding
+         gradients calculation more accurate but slower. Please see
+         `optimization_parameters.proto` for details.
+         for details.
+      pipeline_execution_with_tensor_core: setting this to `True` makes training
+        faster, but trained model will be different if step N and step N+1
+        involve the same set of embedding ID. Please see
+        `tpu_embedding_configuration.proto` for details.
+    """
+    super(AdagradParameters, self).__init__(learning_rate,
+                                            use_gradient_accumulation,
+                                            pipeline_execution_with_tensor_core)
+    self.initial_accumulator = initial_accumulator
+
+
+class AdamParameters(_OptimizationParameters):
+  """Optimization parameters for Adam."""
+
+  def __init__(self, learning_rate,
+               beta1=0.9,
+               beta2=0.999,
+               epsilon=1e-08,
+               lazy_adam=True,
+               sum_inside_sqrt=True,
+               use_gradient_accumulation=False,
+               pipeline_execution_with_tensor_core=True):
+    """Optimization parameters for Adam.
+
+    Args:
+      learning_rate: a floating point value. The learning rate.
+      beta1: A float value.
+        The exponential decay rate for the 1st moment estimates.
+      beta2: A float value.
+        The exponential decay rate for the 2nd moment estimates.
+      epsilon: A small constant for numerical stability.
+      lazy_adam: Use lazy Adam instead of Adam. Lazy Adam trains faster.
+        Please see `optimization_parameters.proto` for details.
+      sum_inside_sqrt: This improves training speed. Please see
+        `optimization_parameters.proto` for details.
+      use_gradient_accumulation: setting this to `True` makes embedding
+        gradients calculation more accurate but slower. Please see
+        `optimization_parameters.proto` for details.
+        for details.
+      pipeline_execution_with_tensor_core: setting this to `True` makes training
+        faster, but trained model will be different if step N and step N+1
+        involve the same set of embedding ID. Please see
+        `tpu_embedding_configuration.proto` for details.
+    """
+    super(AdamParameters, self).__init__(learning_rate,
+                                         use_gradient_accumulation,
+                                         pipeline_execution_with_tensor_core)
+    self.beta1 = beta1
+    self.beta2 = beta2
+    self.epsilon = epsilon
+    self.lazy_adam = lazy_adam
+    self.sum_inside_sqrt = sum_inside_sqrt
+
+
+class StochasticGradientDescentParameters(_OptimizationParameters):
+  """Optimization parameters for stochastic gradient descent.
+
+  Args:
+    learning_rate: a floating point value. The learning rate.
+    use_gradient_accumulation: setting this to `True` makes embedding
+      gradients calculation more accurate but slower. Please see
+         `optimization_parameters.proto` for details.
+    pipeline_execution_with_tensor_core: setting this to `True` makes training
+      faster, but trained model will be different if step N and step N+1
+      involve the same set of embedding ID. Please see
+      `tpu_embedding_configuration.proto` for details.
+    """
+
+  def __init__(self, learning_rate, use_gradient_accumulation=False,
+               pipeline_execution_with_tensor_core=True):
+    super(StochasticGradientDescentParameters, self).__init__(
+        learning_rate, use_gradient_accumulation,
+        pipeline_execution_with_tensor_core)
+
+
+class TPUEmbedding(object):
+  """API for using TPU for embedding.
+
+    Example:
+    ```
+    table_config_user = tpu_embedding.TableConfig(
+        vocabulary_size=4, dimension=2,
+        initializer=initializer, combiner='mean')
+    table_to_config_dict = {'video': table_config_video,
+                          'user': table_config_user}
+    feature_to_table_dict = {'watched': 'video',
+                             'favorited': 'video',
+                             'friends': 'user'}
+    batch_size = 4
+    num_hosts = 1
+    optimization_parameters = tpu_embedding.AdagradParameters(1., 1.)
+    mode = tpu_embedding.TRAINING
+    embedding = tpu_embedding.TPUEmbedding(
+        table_to_config_dict, feature_to_table_dict,
+        batch_size, num_hosts, mode, optimization_parameters)
+
+    batch_size_per_core = embedding.batch_size_per_core
+    sparse_features_list = []
+    for host in hosts:
+      with ops.device(host):
+        for _ in range(embedding.num_cores_per_host):
+          sparse_features = {}
+          sparse_features['watched'] = sparse_tensor.SparseTensor(...)
+          sparse_features['favorited'] = sparse_tensor.SparseTensor(...)
+          sparse_features['friends'] = sparse_tensor.SparseTensor(...)
+          sparse_features_list.append(sparse_features)
+
+    enqueue_ops = embedding.generate_enqueue_ops(sparse_features_list)
+    embedding_variables_and_ops = embedding.create_variables_and_ops()
+
+    def computation():
+      activations = embedding.get_activations()
+      loss = compute_loss(activations)
+
+      base_optimizer = gradient_descent.GradientDescentOptimizer(
+          learning_rate=1)
+      cross_shard_optimizer = tpu_optimizer.CrossShardOptimizer(
+          base_optimizer)
+
+      train_op = cross_shard_optimizer.minimize(loss)
+      gradients = (
+          tpu_embedding_gradient.get_gradients_through_compute_gradients(
+              cross_shard_optimizer, loss, activations)
+      send_gradients_op = embedding.generate_send_gradients_op(gradients)
+      with ops.control_dependencies([train_op, send_gradients_op]):
+        loss = array_ops.identity(loss)
+
+    loss = tpu.shard(computation,
+                     num_shards=embedding.num_cores)
+
+    with self.test_session() as sess:
+      sess.run(tpu.initialize_system(embedding_config=
+                                     embedding.config_proto))
+      sess.run(variables.global_variables_initializer())
+      sess.run(embedding_variables_and_ops.load_ops())
+      sess.run(enqueue_ops)
+      loss_val = sess.run(loss)
+    ```
+  """
+
+  # TODO(shizhiw): Instead of `feature_to_table_dict` which maps to table
+  # name, consider `feature_to_config_dict` which maps to `FeatureConfig`.
+  # `FeatureConfig` could have fields other than table name. For example, it
+  # could have a field to indicate that the feature should not be used to
+  # update embedding table (cr/204852758, cr/204940540). Also, this can support
+  # different combiners for different features within the same table.
+  # TODO(shizhiw, b/118512626): Remove `batch_size` from `__init__` and move it
+  # to `FeatureConfig`?
+
+  # TODO(shizhiw): will it be cleaner to make `table_to_config_dict` and
+  # `feature_to_table_dict` lists of `TableSpec` and `FeatureSpec` respectively?
+
+  # TODO(shizhiw): Consider adding `input_fn` as an option to remove boilerplate
+  # for-loops around construction of inputs.
+
+  # `optimization_parameter` applies to all tables. If the need arises,
+  # we can add `optimization_parameters` to `TableConfig` to override this
+  # global setting.
+  def __init__(self,
+               table_to_config_dict,
+               feature_to_table_dict,
+               batch_size,
+               mode,
+               master,
+               optimization_parameters=None):
+    """API for using TPU for embedding lookups.
+
+    Args:
+      table_to_config_dict: A dictionary mapping from string of table name to
+        `TableConfig`. Table refers to an embedding table, e.g. `params`
+        argument to `tf.nn.embedding_lookup_sparse()`.
+      feature_to_table_dict: A dictionary mapping from string of feature name
+        to string of table name. Feature refers to ids to lookup in embedding
+        table, e.g. `sp_ids` argument to `tf.nn.embedding_lookup_sparse()`.
+      batch_size: An `int` representing the global batch size.
+      mode: `TRAINING` or `INFERENCE`.
+      master: A `string` representing the TensorFlow master to use.
+      optimization_parameters: `AdagradParameters`, `AdamParameters`,
+        `Stochasticgradientdescentparameters`. Must be set in training and must
+        be `None` in inference.
+
+    Raises:
+      ValueError: if any input is invalid.
+    """
+    _validate_table_to_config_dict(table_to_config_dict)
+    # Avoid nondeterminism from `Dict` iteration order by using `OrderedDict`.
+    self._table_to_config_dict = _create_ordered_dict(table_to_config_dict)
+    self._combiners = _create_combiners(self._table_to_config_dict)
+
+    _validate_feature_to_table_dict(table_to_config_dict, feature_to_table_dict)
+    self._feature_to_table_dict = _create_ordered_dict(feature_to_table_dict)
+    self._table_to_features_dict = _create_table_to_features_dict(
+        self._feature_to_table_dict)
+
+    self._batch_size = batch_size
+
+    self._master = master
+    self._tpu_system_metadata = (
+        tpu_system_metadata_lib._query_tpu_system_metadata(self._master))  # pylint: disable=protected-access
+    if self._tpu_system_metadata.num_cores == 0:
+      raise ValueError('TPUEmbedding needs TPUs, but master {} does not have '
+                       'TPUs.'.format(self._master))
+    self._num_hosts = self._tpu_system_metadata.num_hosts
+    self._hosts = [device.name for device in self._tpu_system_metadata.devices
+                   if 'device:CPU:' in device.name]
+    self._num_cores_per_host = self._tpu_system_metadata.num_of_cores_per_host
+    self._num_cores = self._tpu_system_metadata.num_cores
+
+    _validate_batch_size(self._batch_size, self._num_cores)
+    self._batch_size_per_core = self._batch_size // self._num_cores
+
+    # TODO(shizhiw): remove `mode`?
+    if mode == TRAINING:
+      _validate_optimization_parameters(optimization_parameters)
+      self._optimization_parameters = optimization_parameters
+    elif mode == INFERENCE:
+      if optimization_parameters is not None:
+        raise ValueError('`optimization_parameters` should be `None` '
+                         'for inference mode.')
+      self._optimization_parameters = (
+          StochasticGradientDescentParameters(1.))
+    else:
+      raise ValueError('`mode` only supports {} and {}; got {}.'
+                       .format(TRAINING, INFERENCE, mode))
+    self._mode = mode
+
+    # TODO(shizhiw): move `optimization_parameters` into `_optimizer_handler`
+    # and create special handler for inference that inherits from
+    # StochasticGradientDescentHandler with more user-friendly error message
+    # on get_slot().
+    self._optimizer_handler = _get_optimization_handler(
+        self._optimization_parameters)
+
+    self._config_proto = self._create_config_proto()
+
+  @property
+  def hosts(self):
+    """A list of device names for CPU hosts.
+
+    Returns:
+      A list of device names for CPU hosts.
+    """
+    return copy.copy(self._hosts)
+
+  # TODO(shizhiw): change to num_tensor_cores_per_host to be more explicit and
+  # to be consistent with `tpu_embedding_configuration.proto`.
+  @property
+  def num_cores_per_host(self):
+    """Number of TPU cores on a CPU host.
+
+    Returns:
+      Number of TPU cores on a CPU host.
+    """
+    return self._num_cores_per_host
+
+  @property
+  def num_cores(self):
+    """Total number of TPU cores on all hosts.
+
+    Returns:
+      Total number of TPU cores on all hosts.
+    """
+    return self._num_cores
+
+  @property
+  def batch_size_per_core(self):
+    """Batch size for each TPU core.
+
+    The sparse tensors in `sparse_features_list` to `generate_enqueue_ops`
+       must have batch dimension equal to this.
+
+    Returns:
+      Batch size for each TPU core.
+    """
+    return self._batch_size_per_core
+
+  @property
+  def config_proto(self):
+    """Create embedding config proto for `tpu.initialize_system()`.
+
+    Returns:
+      an `TPUEmbeddingConfiguration` proto describing the desired
+         configuration of the hardware embedding lookup tables, which
+         is passed to `tpu.initialize_system()`.
+    """
+    return self._config_proto
+
+  @property
+  def table_to_config_dict(self):
+    return copy.copy(self._table_to_config_dict)
+
+  @property
+  def feature_to_table_dict(self):
+    return copy.copy(self._feature_to_table_dict)
+
+  @property
+  def table_to_features_dict(self):
+    return copy.copy(self._table_to_features_dict)
+
+  @property
+  def optimization_parameters(self):
+    return self._optimization_parameters
+
+  def _create_config_proto(self):
+    """Create `TPUEmbeddingConfiguration`."""
+    config_proto = elc.TPUEmbeddingConfiguration()
+    for table in self._table_to_config_dict:
+      table_descriptor = config_proto.table_descriptor.add()
+      table_descriptor.name = table
+
+      table_config = self._table_to_config_dict[table]
+      table_descriptor.vocabulary_size = table_config.vocabulary_size
+      table_descriptor.dimension = table_config.dimension
+
+      features_for_table = self._table_to_features_dict[table]
+      table_descriptor.num_features = len(features_for_table)
+
+      table_descriptor.optimization_parameters.learning_rate.constant = (
+          self._optimization_parameters.learning_rate)
+      table_descriptor.optimization_parameters.gradient_accumulation_status = (
+          optimization_parameters_pb2.GradientAccumulationStatus.ENABLED
+          if self._optimization_parameters.use_gradient_accumulation else
+          optimization_parameters_pb2.GradientAccumulationStatus.DISABLED)
+      # For compatibility with old TPU workers.
+      table_descriptor.optimization_parameters.use_gradient_accumulation = (
+          self._optimization_parameters.use_gradient_accumulation)
+      self._optimizer_handler.set_optimization_parameters(table_descriptor)
+
+    config_proto.mode = self._mode
+    config_proto.batch_size_per_tensor_core = self._batch_size_per_core
+    config_proto.num_hosts = self._num_hosts
+    config_proto.num_tensor_cores = self._num_cores
+    config_proto.sharding_strategy = elc.TPUEmbeddingConfiguration.DIV_DEFAULT
+    config_proto.pipeline_execution_with_tensor_core = (
+        self._optimization_parameters.pipeline_execution_with_tensor_core)
+
+    return config_proto
+
+  def create_variables_and_ops(self, embedding_variable_name_by_table=None,
+                               slot_variable_names_by_table=None):
+    """Create embedding and slot variables, with ops to load and retrieve them.
+
+    Args:
+      embedding_variable_name_by_table: A dictionary mapping from string of
+        table name to string of embedding variable name. If `None`,
+        defaults from `get_default_slot_variable_names()` will be used.
+      slot_variable_names_by_table: A dictionary mapping from string of table
+        name to `AdamSlotVariableNames`, `AdagradSlotVariableNames` etc. If
+        `None`, defaults from `get_default_slot_variable_names()` will be used.
+
+    Returns:
+      `tpu_embedding.VariablesAndOps` with:
+        A dictionary mapping from string of table name to embedding variables,
+        A dictionary mapping from string of table name to AdagradSlotVariable,
+         AdamSlotVariables etc with slot variables,
+        A function which returns a list of ops to load embedding and slot
+         variables from TPU to CPU.
+        A function which returns a list of ops to retrieve embedding and slot
+         variables from TPU to CPU.
+    """
+    embedding_variables_by_table = {}
+    slot_variables_by_table = {}
+    load_op_fns = []
+    retrieve_op_fns = []
+    for table in self._table_to_config_dict:
+      if embedding_variable_name_by_table:
+        embedding_variable_name = embedding_variable_name_by_table[table]
+      else:
+        embedding_variable_name = table
+      if slot_variable_names_by_table:
+        slot_variable_names = slot_variable_names_by_table[table]
+      else:
+        slot_variable_names = (
+            self._optimizer_handler.get_default_slot_variable_names(table))
+
+      device_fn = _create_device_fn(self._hosts)
+      with ops.device(device_fn):
+        table_variables = _create_partitioned_variables(
+            name=embedding_variable_name,
+            num_hosts=self._num_hosts,
+            vocabulary_size=self._table_to_config_dict[table].vocabulary_size,
+            embedding_dimension=self._table_to_config_dict[table].dimension,
+            initializer=self._table_to_config_dict[table].initializer,
+            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
+        embedding_variables_by_table[table] = table_variables
+
+        slot_variables_for_table, load_ops_fn, retrieve_ops_fn = (
+            self._optimizer_handler.create_variables_and_ops(
+                table, slot_variable_names, self._num_hosts,
+                self._table_to_config_dict[table], table_variables)
+        )
+        slot_variables_by_table[table] = slot_variables_for_table
+        load_op_fns.append(load_ops_fn)
+        retrieve_op_fns.append(retrieve_ops_fn)
+
+    def load_ops():
+      """Calls and returns the load ops for each embedding table.
+
+      Returns:
+        A list of ops to load embedding and slot variables from CPU to TPU.
+      """
+      load_ops_list = []
+      for load_op_fn in load_op_fns:
+        load_ops_list.extend(load_op_fn())
+      return load_ops_list
+
+    def retrieve_ops():
+      """Calls and returns the retrieve ops for each embedding table.
+
+      Returns:
+        A list of ops to retrieve embedding and slot variables from TPU to CPU.
+      """
+      retrieve_ops_list = []
+      for retrieve_op_fn in retrieve_op_fns:
+        retrieve_ops_list.extend(retrieve_op_fn())
+      return retrieve_ops_list
+
+    return VariablesAndOps(embedding_variables_by_table,
+                           slot_variables_by_table,
+                           load_ops, retrieve_ops)
+
+  def generate_enqueue_ops(self, sparse_features_list):
+    """Generate enqueue ops.
+
+    Args:
+      sparse_features_list: a list of dictionary mapping from string
+        of feature names to sparse tensor. Each dictionary is for one
+        TPU core. Dictionaries for the same core should be contiguous
+        on the list.
+
+    Returns:
+      Ops to enqueue to TPU for embedding.
+    """
+    self._validate_generate_enqueue_ops_sparse_features_list(
+        sparse_features_list)
+    return [
+        self._generate_enqueue_op(
+            sparse_features, device_ordinal=i % self._num_cores_per_host)
+        for i, sparse_features in enumerate(sparse_features_list)
+    ]
+
+  def _validate_generate_enqueue_ops_sparse_features_list(
+      self, sparse_features_list):
+    """Validate `sparse_features_list`."""
+    if len(sparse_features_list) != self._num_cores:
+      raise ValueError('Length of `sparse_features_list` should match the '
+                       'number of cores; '
+                       '`len(sparse_features_list)` is {}, '
+                       'number of cores is {}.'.format(
+                           len(sparse_features_list), self._num_cores))
+
+    feature_set = set(self._feature_to_table_dict.keys())
+    contiguous_device = None
+    for i, sparse_features in enumerate(sparse_features_list):
+      used_feature_set = set(sparse_features.keys())
+
+      # Check features are valid.
+      missing_feature_set = feature_set - used_feature_set
+      if missing_feature_set:
+        raise ValueError('`sparse_features_list[{}]` misses a feature that is '
+                         'in `feature_to_config_dict`: {}.'.format(
+                             i, missing_feature_set))
+
+      extra_feature_set = used_feature_set - feature_set
+      if extra_feature_set:
+        raise ValueError('`sparse_features_list[{}]` has a feature that is not '
+                         'in `feature_to_config_dict`: {}.'.format(
+                             i, extra_feature_set))
+
+      device = None
+      device_feature = None
+      for feature, tensor in six.iteritems(sparse_features):
+        if not isinstance(tensor, sparse_tensor.SparseTensor):
+          raise ValueError('`sparse_features_list[{}]` has a feature that is '
+                           'not mapped to `SparseTensor`. '
+                           '`feature`: {}, type: {}'.format(
+                               i, feature, type(tensor)))
+
+        # Check all features are on the same device.
+        if device is None:
+          device = tensor.op.device
+          device_feature = feature
+        else:
+          if device != tensor.op.device:
+            raise ValueError('Devices are different between features in '
+                             '`sparse_features_list[{}]`; '
+                             'devices: {}, {}; features: {}, {}.'.format(
+                                 i, device, tensor.op.device, feature,
+                                 device_feature))
+
+      if i % self._num_cores_per_host:
+        if device != contiguous_device:
+          raise ValueError('We expect the `sparse_features` which are on the '
+                           'same host to be contiguous in '
+                           '`sparse_features_list`, '
+                           '`sparse_features_list[{}]` is on device {}, '
+                           'but is expected to be on device {}.'.format(
+                               i, device, contiguous_device))
+      else:
+        contiguous_device = device
+
+  def _generate_enqueue_op(self, sparse_features, device_ordinal):
+    with ops.colocate_with(list(sparse_features.values())[0]):
+      sample_idcs, embedding_idcs, aggregation_weights = (
+          self._format_for_tpu_embedding_sparse_batch(sparse_features))
+      return tpu_ops.enqueue_tpu_embedding_sparse_batch(
+          sample_idcs,
+          embedding_idcs,
+          aggregation_weights,
+          combiners=self._combiners,
+          device_ordinal=device_ordinal)
+
+  def _format_for_tpu_embedding_sparse_batch(self, sparse_features):
+    """Format sparse features for `enqueue_tpu_embedding_sparse_batch()`.
+
+    Args:
+      sparse_features: a `Dict` of `SparseTensor`s for embedding.
+
+    Returns:
+      Arguments for `enqueue_tpu_embedding_sparse_batch()`.
+    """
+
+    sample_idcs, embedding_idcs, aggregation_weights = list(), list(), list()
+    for table in self._table_to_features_dict:
+      sample_t, indices_t, weights_t = list(), list(), list()
+
+      features = self._table_to_features_dict[table]
+      for i, feature in enumerate(features):
+        tensor = sparse_features[feature]
+        sample_indices = tensor.indices[:, 0]
+        embedding_indices = tensor.values
+        weights = array_ops.ones_like(embedding_indices)
+        sample_t.append(i * self._batch_size_per_core + sample_indices)
+        indices_t.append(embedding_indices)
+        weights_t.append(weights)
+
+      sample_idcs.append(
+          math_ops.cast(array_ops.concat(sample_t, axis=0), dtype=dtypes.int32))
+      embedding_idcs.append(
+          math_ops.cast(
+              array_ops.concat(indices_t, axis=0), dtype=dtypes.int32))
+      aggregation_weights.append(
+          math_ops.cast(
+              array_ops.concat(weights_t, axis=0), dtype=dtypes.float32))
+
+    return sample_idcs, embedding_idcs, aggregation_weights
+
+  def get_activations(self):
+    """Get activations for features.
+
+    This should be called within `computation` that is passed to
+      `tpu.replicate` and friends.
+
+    Returns:
+      A dictionary mapping from `String` of feature name to `Tensor`
+        of activation.
+    """
+    recv_activations = tpu_ops.recv_tpu_embedding_activations(
+        num_outputs=len(self._table_to_config_dict),
+        config=self._config_proto.SerializeToString())
+
+    activations = collections.OrderedDict()
+    for table_id, table in enumerate(self._table_to_features_dict):
+      features = self._table_to_features_dict[table]
+      for lookup_id, feature in enumerate(features):
+        start_row = lookup_id * self._batch_size_per_core
+        end_row = start_row + self._batch_size_per_core
+        activations[feature] = recv_activations[table_id][start_row:end_row, :]
+    return activations
+
+  def generate_send_gradients_op(self, feature_to_gradient_dict):
+    """Send gradient to TPU embedding.
+
+    Args:
+      feature_to_gradient_dict: dict mapping feature names to gradient wrt
+        activations.
+
+    Returns:
+      SendTPUEmbeddingGradients Op.
+
+    Raises:
+      RuntimeError: If `mode` is not `TRAINING`.
+    """
+    if self._mode != TRAINING:
+      raise RuntimeError('Only in training mode gradients need to '
+                         'be sent to TPU embedding; got mode {}.'
+                         .format(self._mode))
+    gradients = []
+    for table in self._table_to_features_dict:
+      features = self._table_to_features_dict[table]
+      table_gradients = [
+          feature_to_gradient_dict[feature] for feature in features
+      ]
+      concat_table_grads = array_ops.concat(table_gradients, axis=0)
+      gradients.append(concat_table_grads)
+    return tpu_ops.send_tpu_embedding_gradients(
+        inputs=gradients, config=self.config_proto.SerializeToString())
+
+
+def _validate_table_to_config_dict(table_to_config_dict):
+  """Validate `table_to_config_dict`."""
+  for k, v in six.iteritems(table_to_config_dict):
+    if not isinstance(v, TableConfig):
+      raise ValueError('Value of `table_to_config_dict` must be of type '
+                       '`TableConfig`, got {} for {}.'.format(type(v), k))
+
+
+def _validate_feature_to_table_dict(table_to_config_dict,
+                                    feature_to_table_dict):
+  """Validate `feature_to_table_dict`."""
+  used_table_set = set(feature_to_table_dict.values())
+  table_set = set(table_to_config_dict.keys())
+
+  unused_table_set = table_set - used_table_set
+  if unused_table_set:
+    raise ValueError('`table_to_config_dict` specifies table that is not '
+                     'used in `feature_to_table_dict`: {}.'
+                     .format(unused_table_set))
+
+  extra_table_set = used_table_set - table_set
+  if extra_table_set:
+    raise ValueError('`feature_to_table_dict` refers to a table that is not '
+                     'specified in `table_to_config_dict`: {}.'
+                     .format(extra_table_set))
+
+
+def _validate_batch_size(batch_size, num_cores):
+  if batch_size % num_cores:
+    raise ValueError('`batch_size` is not a multiple of number of '
+                     'cores. `batch_size`={}, `_num_cores`={}.'.format(
+                         batch_size, num_cores))
+
+
+def _validate_optimization_parameters(optimization_parameters):
+  if not isinstance(optimization_parameters, _OptimizationParameters):
+    raise ValueError('`optimization_parameters` must inherit from '
+                     '`_OptimizationPramaters`. '
+                     '`type(optimization_parameters)`={}'.format(
+                         type(optimization_parameters)))
+
+
+class _OptimizerHandler(object):
+  """Interface class for handling optimizer specific logic."""
+
+  def __init__(self, optimization_parameters):
+    self._optimization_parameters = optimization_parameters
+
+  def set_optimization_parameters(self, table_descriptor):
+    raise NotImplementedError()
+
+  def get_default_slot_variable_names(self, table):
+    raise NotImplementedError()
+
+  def create_variables_and_ops(self, table, slot_variable_names, num_hosts,
+                               table_config, table_variables):
+    raise NotImplementedError()
+
+
+class _AdagradHandler(_OptimizerHandler):
+  """Handles Adagrad specific logic."""
+
+  def __init__(self, optimization_parameters):
+    super(_AdagradHandler, self).__init__(optimization_parameters)
+    self._table_to_accumulator_variables_dict = {}
+
+  def set_optimization_parameters(self, table_descriptor):
+    table_descriptor.optimization_parameters.adagrad.SetInParent()
+
+  def get_default_slot_variable_names(self, table):
+    return AdagradSlotVariableName('{}/{}'.format(table, 'Adagrad'))
+
+  def create_variables_and_ops(self, table, slot_variable_names, num_hosts,
+                               table_config, table_variables):
+    accumulator_initializer = init_ops.constant_initializer(
+        self._optimization_parameters.initial_accumulator)
+    accumulator_variables = _create_partitioned_variables(
+        name=slot_variable_names.accumulator,
+        num_hosts=num_hosts,
+        vocabulary_size=table_config.vocabulary_size,
+        embedding_dimension=table_config.dimension,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
+        initializer=accumulator_initializer)
+    slot_variables = AdagradSlotVariable(accumulator_variables)
+
+    def load_ops_fn():
+      """Returns the retrieve ops for AdaGrad embedding tables.
+
+      Returns:
+        A list of ops to load embedding and slot variables from CPU to TPU.
+      """
+      load_op_list = []
+      for host_id, table_variable, accumulator_variable in (zip(
+          range(num_hosts), table_variables, accumulator_variables)):
+        with ops.colocate_with(table_variable):
+          load_parameters_op = (
+              tpu_ops.load_tpu_embedding_adagrad_parameters(
+                  parameters=table_variable,
+                  accumulators=accumulator_variable,
+                  table_name=table,
+                  num_shards=num_hosts,
+                  shard_id=host_id))
+        load_op_list.append(load_parameters_op)
+      return load_op_list
+
+    def retrieve_ops_fn():
+      """Returns the retrieve ops for AdaGrad embedding tables.
+
+      Returns:
+        A list of ops to retrieve embedding and slot variables from TPU to CPU.
+      """
+      retrieve_op_list = []
+      for host_id, table_variable, accumulator_variable in (zip(
+          range(num_hosts), table_variables, accumulator_variables)):
+        with ops.colocate_with(table_variable):
+          retrieved_table, retrieved_accumulator = (
+              tpu_ops.retrieve_tpu_embedding_adagrad_parameters(
+                  table_name=table,
+                  num_shards=num_hosts,
+                  shard_id=host_id))
+          retrieve_parameters_op = control_flow_ops.group(
+              state_ops.assign(table_variable, retrieved_table),
+              state_ops.assign(accumulator_variable, retrieved_accumulator))
+        retrieve_op_list.append(retrieve_parameters_op)
+      return retrieve_op_list
+
+    return slot_variables, load_ops_fn, retrieve_ops_fn
+
+
+class _AdamHandler(_OptimizerHandler):
+  """Handles Adam specific logic."""
+
+  def __init__(self, optimization_parameters):
+    super(_AdamHandler, self).__init__(optimization_parameters)
+    self._table_to_m_variables_dict = {}
+    self._table_to_v_variables_dict = {}
+
+  def set_optimization_parameters(self, table_descriptor):
+    table_descriptor.optimization_parameters.adam.beta1 = (
+        self._optimization_parameters.beta1)
+    table_descriptor.optimization_parameters.adam.beta2 = (
+        self._optimization_parameters.beta2)
+    table_descriptor.optimization_parameters.adam.epsilon = (
+        self._optimization_parameters.epsilon)
+    table_descriptor.optimization_parameters.adam.use_non_lazy_adam = (
+        not self._optimization_parameters.lazy_adam)
+    table_descriptor.optimization_parameters.adam.use_sum_inside_sqrt = (
+        self._optimization_parameters.sum_inside_sqrt)
+
+  def get_default_slot_variable_names(self, table):
+    return AdamSlotVariableNames('{}/{}/m'.format(table, 'Adam'),
+                                 '{}/{}/v'.format(table, 'Adam'))
+
+  def create_variables_and_ops(self, table, slot_variable_names, num_hosts,
+                               table_config, table_variables):
+    m_initializer = init_ops.zeros_initializer()
+    m_variables = _create_partitioned_variables(
+        name=slot_variable_names.m,
+        num_hosts=num_hosts,
+        vocabulary_size=table_config.vocabulary_size,
+        embedding_dimension=table_config.dimension,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
+        initializer=m_initializer)
+    v_initializer = init_ops.zeros_initializer()
+    v_variables = _create_partitioned_variables(
+        name=slot_variable_names.v,
+        num_hosts=num_hosts,
+        vocabulary_size=table_config.vocabulary_size,
+        embedding_dimension=table_config.dimension,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
+        initializer=v_initializer)
+    slot_variables = AdamSlotVariables(m_variables, v_variables)
+
+    def load_ops_fn():
+      """Returns the retrieve ops for AdaGrad embedding tables.
+
+      Returns:
+        A list of ops to load embedding and slot variables from CPU to TPU.
+      """
+      load_op_list = []
+      for host_id, table_variable, m_variable, v_variable in (zip(
+          range(num_hosts), table_variables,
+          m_variables, v_variables)):
+        with ops.colocate_with(table_variable):
+          load_parameters_op = (
+              tpu_ops.load_tpu_embedding_adam_parameters(
+                  parameters=table_variable,
+                  momenta=m_variable,
+                  velocities=v_variable,
+                  table_name=table,
+                  num_shards=num_hosts,
+                  shard_id=host_id))
+
+      load_op_list.append(load_parameters_op)
+      return load_op_list
+
+    def retrieve_ops_fn():
+      """Returns the retrieve ops for Adam embedding tables.
+
+      Returns:
+        A list of ops to retrieve embedding and slot variables from TPU to CPU.
+      """
+
+      retrieve_op_list = []
+      for host_id, table_variable, m_variable, v_variable in (zip(
+          range(num_hosts), table_variables,
+          m_variables, v_variables)):
+        with ops.colocate_with(table_variable):
+          retrieved_table, retrieved_m, retrieved_v = (
+              tpu_ops.retrieve_tpu_embedding_adam_parameters(
+                  table_name=table,
+                  num_shards=num_hosts,
+                  shard_id=host_id))
+          retrieve_parameters_op = control_flow_ops.group(
+              state_ops.assign(table_variable, retrieved_table),
+              state_ops.assign(m_variable, retrieved_m),
+              state_ops.assign(v_variable, retrieved_v))
+
+        retrieve_op_list.append(retrieve_parameters_op)
+      return retrieve_op_list
+
+    return slot_variables, load_ops_fn, retrieve_ops_fn
+
+
+class _StochasticGradientDescentHandler(_OptimizerHandler):
+  """Handles stochastic gradient descent specific logic."""
+
+  def set_optimization_parameters(self, table_descriptor):
+    (table_descriptor.optimization_parameters.stochastic_gradient_descent
+     .SetInParent())
+
+  def get_default_slot_variable_names(self, table):
+    return None
+
+  def create_variables_and_ops(self, table, slot_variable_names, num_hosts,
+                               table_config, table_variables):
+    del table_config
+
+    def load_ops_fn():
+      """Returns the retrieve ops for AdaGrad embedding tables.
+
+      Returns:
+        A list of ops to load embedding and slot variables from CPU to TPU.
+      """
+      load_op_list = []
+      for host_id, table_variable in (zip(
+          range(num_hosts), table_variables)):
+        with ops.colocate_with(table_variable):
+          load_parameters_op = (
+              tpu_ops
+              .load_tpu_embedding_stochastic_gradient_descent_parameters(
+                  parameters=table_variable,
+                  table_name=table,
+                  num_shards=num_hosts,
+                  shard_id=host_id))
+
+        load_op_list.append(load_parameters_op)
+      return load_op_list
+
+    def retrieve_ops_fn():
+      """Returns the retrieve ops for SGD embedding tables.
+
+      Returns:
+        A list of ops to retrieve embedding and slot variables from TPU to CPU.
+      """
+
+      retrieve_op_list = []
+      for host_id, table_variable in (zip(
+          range(num_hosts), table_variables)):
+        with ops.colocate_with(table_variable):
+          retrieved_table = (
+              tpu_ops
+              .retrieve_tpu_embedding_stochastic_gradient_descent_parameters(
+                  table_name=table,
+                  num_shards=num_hosts,
+                  shard_id=host_id))
+          retrieve_parameters_op = control_flow_ops.group(
+              state_ops.assign(table_variable, retrieved_table))
+
+        retrieve_op_list.append(retrieve_parameters_op)
+      return retrieve_op_list
+
+    return None, load_ops_fn, retrieve_ops_fn
+
+
+def _get_optimization_handler(optimization_parameters):
+  if isinstance(optimization_parameters, AdagradParameters):
+    return _AdagradHandler(optimization_parameters)
+  elif isinstance(optimization_parameters, AdamParameters):
+    return _AdamHandler(optimization_parameters)
+  elif isinstance(optimization_parameters, StochasticGradientDescentParameters):
+    return _StochasticGradientDescentHandler(optimization_parameters)
+  else:
+    return NotImplementedError()
+
+
+def _create_ordered_dict(d):
+  """Create an OrderedDict from Dict."""
+  return collections.OrderedDict((k, d[k]) for k in sorted(d))
+
+
+def _create_combiners(table_to_config_dict):
+  return [table_to_config_dict[t].combiner for t in table_to_config_dict]
+
+
+def _create_table_to_features_dict(feature_to_table_dict):
+  """Create mapping from table to a list of its features."""
+  table_to_features_dict_tmp = {}
+  for feature, table in six.iteritems(feature_to_table_dict):
+    if table in table_to_features_dict_tmp:
+      table_to_features_dict_tmp[table].append(feature)
+    else:
+      table_to_features_dict_tmp[table] = [feature]
+
+  table_to_features_dict = collections.OrderedDict()
+  for table in sorted(table_to_features_dict_tmp):
+    table_to_features_dict[table] = sorted(table_to_features_dict_tmp[table])
+  return table_to_features_dict
+
+
+def _create_device_fn(hosts):
+  """Create device_fn() to use with _create_partitioned_variables()."""
+
+  def device_fn(op):
+    """Returns the `device` for `op`."""
+    part_match = re.match(r'.*/part_(\d+)(/|$)', op.name)
+
+    if part_match:
+      idx = int(part_match.group(1))
+    else:
+      raise RuntimeError('Internal Error: '
+                         'Expected %s to contain /part_*.' % op.name)
+
+    device = hosts[idx]
+    return device
+
+  return device_fn
+
+
+def _create_partitioned_variables(name,
+                                  num_hosts,
+                                  vocabulary_size,
+                                  embedding_dimension,
+                                  initializer,
+                                  collections=None):  # pylint: disable=redefined-outer-name
+  """Creates ParitionedVariables based on `num_hosts` for `table`."""
+  # TODO(shizhiw): automatically place embedding lookup elsewhere?
+  if vocabulary_size < num_hosts:
+    raise ValueError('`vocabulary_size`({}) is smaller than `num_hosts`({}). '
+                     'As TPU embedding is not optimized for small tables, '
+                     'please consider other ways for this embedding lookup.')
+
+  return list(variable_scope.get_variable(
+      name,
+      shape=(vocabulary_size, embedding_dimension),
+      partitioner=partitioned_variables.fixed_size_partitioner(num_hosts),
+      dtype=dtypes.float32,
+      initializer=initializer,
+      collections=collections,
+      trainable=False))

diff --git a/tensorflow/python/tpu/tpu_embedding_gradient.py b/tensorflow/python/tpu/tpu_embedding_gradient.py
new file mode 100644
index 0000000..d7de661
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_embedding_gradient.py

@@ -0,0 +1,153 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+"""Optional helper for gradient handling."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.tpu.ops import tpu_ops
+
+
+def get_gradients_through_compute_gradients(optimizer, loss, activations):
+  """Compute gradients to send to TPU embedding.
+
+  Args:
+    optimizer: a subclass of optimizer.Optimizer, usually CrossShardOptimizer.
+      Used to call compute_gradients().
+    loss: a Tensor to call optimizer.compute_gradients() on.
+    activations: an OrderedDict mapping feature_name to Tensors of activations.
+
+  Returns:
+    An OrderedDict mapping from feature name Strings to Tensors of gradients of
+      the loss wrt the activations of the features.
+  """
+  activation_list = activations.values()
+  grads_and_vars = optimizer.compute_gradients(loss, activation_list)
+  grads = [grad for grad, _ in grads_and_vars]
+  feature_to_gradient_dict = collections.OrderedDict(
+      zip(activations.keys(), grads))
+  return feature_to_gradient_dict
+
+
+def create_dummy_table_variables(tpu_embedding):
+  """Create dummy embedding table variables.
+
+  The sole purpose of these dummy variables are to trigger gradient
+  calcuation wrt them so that the gradients wrt activation can be captured
+  and later sent to TPU embedding.
+
+  Args:
+    tpu_embedding: TPUEmbedding, dummy table variables will be created for use
+      with tpu_embedding.
+
+  Returns:
+    A tuple of dummy variables and their initializer.
+
+  Raises:
+    RuntimeError: if collection to store gradients already exists and is not
+    empty.
+  """
+  dummy_table_variables = collections.OrderedDict()
+  for table_id, table in enumerate(tpu_embedding.table_to_features_dict):
+    dummy_table_variables[table] = (
+        # Explicitly specifying collections prevents this variable from
+        # being added to the GLOBAL_VARIABLES collection, so that Saver()
+        # ignores it.
+        # But Tensorflow optimizer creates slot variable for these dummy
+        # variable, e.g. tpu_embedding_dummy_table_variable_mlp_user/Adam{_1},
+        # which will be in GLOBAL_VARIABLES collection,
+        variable_scope.get_variable(
+            'tpu_embedding_dummy_table_variable_{}'.format(table),
+            dtype=dtypes.float32,
+            shape=[1],
+            use_resource=True,
+            trainable=True,
+            collections=['tpu_embedding_dummy_table_variables']))
+
+    g = ops.get_default_graph()
+    table_gradients = g.get_collection_ref(
+        'tpu_embedding_gradients_table_{}'.format(table_id))
+    if table_gradients:
+      raise RuntimeError(
+          'tpu_embedding_gradients_table_{} is not empty.'.format(table_id))
+    table_gradients.extend(
+        [None] * len(tpu_embedding.table_to_features_dict[table]))
+
+  return (dummy_table_variables,
+          variables.variables_initializer(
+              dummy_table_variables.values(),
+              name='tpu_embedding_dummy_table_variables_init'))
+
+
+def hook_dummy_table_variables_to_activations(tpu_embedding, activations,
+                                              dummy_table_variables):
+  """Have activations depend on dummy table variables for gradient intercept.
+
+  Args:
+    tpu_embedding: TPUEmbedding, activations and dummy_table_variables are from
+      tpu_embedding.
+    activations: An OrderedDict of feature name String to activation tensors.
+    dummy_table_variables: An OrderedDict of table name String to dummy table
+      variables.
+
+  Returns:
+    An OrderedDict of feature name String to activation tensors, which can be
+      used just as the activations input.
+  """
+  new_activations = collections.OrderedDict()
+  for feature in activations:
+    table = tpu_embedding.feature_to_table_dict[feature]
+    new_activations[feature] = tpu_ops.tpu_embedding_activations(
+        dummy_table_variables[table],
+        activations[feature],
+        table_id=tpu_embedding.table_to_config_dict.keys().index(table),
+        lookup_id=tpu_embedding.table_to_features_dict[table].index(feature))
+  return new_activations
+
+
+def get_gradients_through_dummy_table_variables(tpu_embedding):
+  """Get gradients wrt the activations of each feature.
+
+  Args:
+    tpu_embedding: TPUEmbedding, create dummy table variable to be used with
+      tpu_embedding.
+
+  Returns:
+    An OrderedDict mapping feature name to gradient.
+
+  Raises:
+    ValueError: if some gradients are not defined.
+  """
+  g = ops.get_default_graph()
+  feature_to_gradient_dict = collections.OrderedDict()
+  for table_id, table in enumerate(tpu_embedding.table_to_config_dict):
+    table_gradients = g.get_collection(
+        'tpu_embedding_gradients_table_{}'.format(table_id))
+    if any(gradient is None for gradient in table_gradients):
+      raise ValueError(
+          'Table {} with id {} has undefined gradients: this is probably '
+          'because the model asked TPUEmbedding to compute activations that '
+          'were not used.'.format(table, table_id))
+    for feature, gradient in zip(tpu_embedding.table_to_features_dict[table],
+                                 table_gradients):
+      feature_to_gradient_dict[feature] = gradient
+  return feature_to_gradient_dict

diff --git a/tensorflow/python/tpu/tpu_estimator.py b/tensorflow/python/tpu/tpu_estimator.py
new file mode 100644
index 0000000..ea6b297
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_estimator.py

@@ -0,0 +1,3760 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+"""TPUEstimator class."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+import os
+import signal
+import sys
+import threading
+import time
+
+import numpy as np
+import six
+from six.moves import queue as Queue  # pylint: disable=redefined-builtin
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.core.framework import variable_pb2
+from tensorflow.core.framework.summary_pb2 import Summary
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf.tpu import compilation_result_pb2 as tpu_compilation_result
+from tensorflow.python.client import session as tf_session
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest as data_nest
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.export import export_output as export_output_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import function
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import summary_ops_v2 as contrib_summary
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import tag_constants
+from tensorflow.python.summary import summary
+from tensorflow.python.tpu import _tpu_estimator_embedding
+from tensorflow.python.tpu import error_handling
+from tensorflow.python.tpu import functional as tpu_functional
+from tensorflow.python.tpu import session_support
+from tensorflow.python.tpu import tensor_tracer
+from tensorflow.python.tpu import tpu
+from tensorflow.python.tpu import tpu_config
+from tensorflow.python.tpu import tpu_context
+from tensorflow.python.tpu import tpu_embedding_gradient
+from tensorflow.python.tpu import tpu_feed
+from tensorflow.python.tpu import tpu_function
+from tensorflow.python.tpu import training_loop
+from tensorflow.python.tpu import util as util_lib
+from tensorflow.python.tpu._tpu_estimator_embedding import AdamParameters  # pylint: disable=unused-import
+from tensorflow.python.tpu._tpu_estimator_embedding import EmbeddingConfigSpec  # pylint: disable=unused-import
+from tensorflow.python.tpu.ops import tpu_ops
+from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import evaluation
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.training import training
+from tensorflow.python.training import training_util
+from tensorflow.python.util import function_utils
+from tensorflow.python.util import nest
+from tensorflow.python.util import tf_inspect
+
+_INITIAL_LOSS = 1e7
+_ZERO_LOSS = 0.
+_TPU_ESTIMATOR = 'tpu_estimator'
+_ITERATIONS_PER_LOOP_VAR = 'iterations_per_loop'
+_BATCH_SIZE_KEY = 'batch_size'
+_CTX_KEY = 'context'
+_USE_TPU_KEY = 'use_tpu'
+_CROSS_REPLICA_SUM_OP = 'CrossReplicaSum'
+_ONE_GIGABYTE = 1024 * 1024 * 1024
+_TPU_ENQUEUE_OPS = '_tpu_enqueue_ops'
+_TPU_TRAIN_OP = '_tpu_train_op'
+_REWRITE_FOR_INFERENCE_MODE = '_rewrite_for_inference'
+_KEY_WHEN_PREDICTIONS_IS_A_TENSOR = '_key_when_predictions_is_a_tensor'
+
+# Ideally _USE_TPU_KEY should be reserved as well. However there are already
+# models that make use of this key, thus it can not be reserved now to prevent
+# breakage. In the long run, we would like to mitigate this by migrating models
+# off of using _USE_TPU_KEY.
+_RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY, _CTX_KEY]
+
+# TODO(b/65703635): Flip the value and remove all dead code. Currently, this is
+# only used for per-core based deployments. For per-host based pipelines, if a
+# user returns a Dataset instance it will be automatically wrapped in a
+# tf.while_loop (This can be disabled by returning features and labels
+# explicitly).
+_WRAP_INPUT_FN_INTO_WHILE_LOOP = False
+
+ops.register_proto_function(
+    '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR),
+    proto_type=variable_pb2.VariableDef,
+    to_proto=resource_variable_ops._to_proto_fn,  # pylint: disable=protected-access
+    from_proto=resource_variable_ops._from_proto_fn)  # pylint: disable=protected-access
+
+
+def _is_iterable(obj):
+  """A Python 2 and 3 compatible util to check whether `obj` is iterable."""
+  try:
+    iter(obj)
+    return True
+  except TypeError:
+    return False
+
+
+class CatchInvalidHostcallFunctions(control_flow_ops.XLAControlFlowContext):
+
+  def AddOp(self, op):
+    if op.type in [
+        'AudioSummary', 'AudioSummaryV2', 'HistogramSummary', 'ImageSummary',
+        'MergeSummary', 'ScalarSummary', 'TensorSummary', 'TensorSummaryV2'
+    ]:
+      raise ValueError('Use tf.contrib.summary inside of host_calls.')
+
+
+def _create_global_step(graph):
+  graph = graph or ops.get_default_graph()
+  if training.get_global_step(graph) is not None:
+    raise ValueError('"global_step" already exists.')
+  # Create in proper graph and base name_scope.
+  with graph.as_default() as g, g.name_scope(None):
+    return variable_scope.get_variable(
+        ops.GraphKeys.GLOBAL_STEP,
+        shape=[],
+        dtype=dtypes.int64,
+        initializer=init_ops.zeros_initializer(),
+        trainable=False,
+        use_resource=True,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.GLOBAL_STEP])
+
+
+def _create_or_get_iterations_per_loop():
+  """Creates or gets the iterations_per_loop variable.
+
+  In TPUEstimator, the user provided computation, the model_fn, is wrapped
+  inside a tf.while_loop for peak performance. The iterations of the loop are
+  specified by this variable, which adjusts its value on the CPU after each TPU
+  program execution and before the next TPU execution.
+
+  The purpose of using a variable, rather then a constant, is to allow
+  TPUEstimator adapt the TPU training iterations according to the final steps
+  specified by users. For example, if the user sets the iterations_per_loop as 4
+  in TPUConfig and steps as 10 in TPUEstimator.train(), the iterations_per_loop
+  variable will have the following value before each TPU training.
+
+      - 1-th TPU execution: iterations_per_loop = 4
+      - 2-th TPU execution: iterations_per_loop = 4
+      - 3-th TPU execution: iterations_per_loop = 2
+
+  As model_fn increases the global step once per train_op invocation, the global
+  step is 10 after all TPU executions, matching the steps=10 inputs passed in by
+  users.
+
+  Returns:
+    A TF non-trainable resource variable.
+
+  Raises:
+    RuntimeError: If multi iterations_per_loop variables were found.
+  """
+  graph = ops.get_default_graph()
+  collection_name = '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR)
+  iter_vars = graph.get_collection(collection_name)
+  if len(iter_vars) == 1:
+    return iter_vars[0]
+  elif len(iter_vars) > 1:
+    raise RuntimeError('Multiple iterations_per_loop_var in collection.')
+
+  with ops.colocate_with(training_util.get_global_step()):
+    with variable_scope.variable_scope(
+        _TPU_ESTIMATOR, reuse=variable_scope.AUTO_REUSE):
+      return variable_scope.get_variable(
+          _ITERATIONS_PER_LOOP_VAR,
+          initializer=init_ops.zeros_initializer(),
+          shape=[],
+          dtype=dtypes.int32,
+          trainable=False,
+          collections=[collection_name, ops.GraphKeys.LOCAL_VARIABLES],
+          use_resource=True)
+
+
+def _sync_variables_ops(ctx):
+  """Create varriables synchronization ops.
+
+  Gets the variables back from TPU nodes. This means the variables updated
+  by TPU will now be *synced* to host memory.
+  In BROADCAST mode, we skip this sync since the variables are ususally too
+  big to transmit via RPC.
+
+  Args:
+    ctx: A `_InternalTPUContext` instance with mode.
+
+  Returns:
+    A list of sync ops.
+  """
+
+  if not ctx.is_input_broadcast_with_iterators():
+    return [
+        array_ops.check_numerics(v.read_value(),
+                                 'Gradient for %s is NaN' % v.name).op
+        for v in variables.trainable_variables()
+    ]
+  else:
+    return [control_flow_ops.no_op()]
+
+
+def _increase_eval_step_op(iterations_per_loop):
+  """Returns an op to increase the eval step for TPU evaluation.
+
+  Args:
+    iterations_per_loop: Tensor. The number of eval steps running in TPU system
+      before returning to CPU host for each `Session.run`.
+
+  Returns:
+    An operation
+  """
+  eval_step = evaluation._get_or_create_eval_step()  # pylint: disable=protected-access
+  # Estimator evaluate increases 1 by default. So, we increase the difference.
+  return state_ops.assign_add(
+      eval_step,
+      math_ops.cast(iterations_per_loop - 1, dtype=eval_step.dtype),
+      use_locking=True)
+
+
+def _extract_key_names(tensor_or_dict):
+  if isinstance(tensor_or_dict, dict):
+    return sorted(tensor_or_dict.keys())
+  return []
+
+
+class _SIGNAL(object):
+  """Signal used to control the thread of infeed/outfeed.
+
+  All preserved signals must be negative numbers. Positive numbers are used to
+  indicate the number of iterations for next training/evaluation loop.
+  """
+  NEXT_BATCH = -1
+  STOP = -2
+
+
+class TPUEstimatorSpec(model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
+  """Ops and objects returned from a `model_fn` and passed to `TPUEstimator`.
+
+  See `EstimatorSpec` for `mode`, `predictions`, `loss`, `train_op`, and
+  `export_outputs`.
+
+  For evaluation, `eval_metrics `is a tuple of `metric_fn` and `tensors`, where
+  `metric_fn` runs on CPU to generate metrics and `tensors` represents the
+  `Tensor`s transferred from TPU system to CPU host and passed to `metric_fn`.
+  To be precise, TPU evaluation expects a slightly different signature from the
+  `tf.estimator.Estimator`. While `EstimatorSpec.eval_metric_ops` expects a
+  dict, `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`.
+  The `tensors` could be a list of `Tensor`s or dict of names to `Tensor`s. The
+  `tensors` usually specify the model logits, which are transferred back from
+  TPU system to CPU host. All tensors must have be batch-major, i.e., the batch
+  size is the first dimension. Once all tensors are available at CPU host from
+  all shards, they are concatenated (on CPU) and passed as positional arguments
+  to the `metric_fn` if `tensors` is list or keyword arguments if `tensors` is
+  a dict. `metric_fn` takes the `tensors` and returns a dict from metric string
+  name to the result of calling a metric function, namely a `(metric_tensor,
+  update_op)` tuple. See `TPUEstimator` for MNIST example how to specify the
+  `eval_metrics`.
+
+  `scaffold_fn` is a function running on CPU to generate the `Scaffold`. This
+  function should not capture any Tensors in `model_fn`.
+
+  `host_call` is a tuple of a `function` and a list or dictionary of `tensors`
+  to pass to that function and returns a list of Tensors. `host_call` currently
+  works for train() and evaluate(). The Tensors returned by the function is
+  executed on the CPU on every step, so there is communication overhead when
+  sending tensors from TPU to CPU. To reduce the overhead, try reducing the
+  size of the tensors. The `tensors` are concatenated along their major (batch)
+  dimension, and so must be >= rank 1. The `host_call` is useful for writing
+  summaries with `tf.contrib.summary.create_file_writer`.
+  """
+
+  def __new__(cls,
+              mode,
+              predictions=None,
+              loss=None,
+              train_op=None,
+              eval_metrics=None,
+              export_outputs=None,
+              scaffold_fn=None,
+              host_call=None,
+              training_hooks=None,
+              evaluation_hooks=None,
+              prediction_hooks=None):
+    """Creates a validated `TPUEstimatorSpec` instance."""
+    host_calls = {}
+    if eval_metrics is not None:
+      host_calls['eval_metrics'] = eval_metrics
+    if host_call is not None:
+      host_calls['host_call'] = host_call
+    _OutfeedHostCall.validate(host_calls)
+
+    training_hooks = tuple(training_hooks or [])
+    evaluation_hooks = tuple(evaluation_hooks or [])
+    prediction_hooks = tuple(prediction_hooks or [])
+
+    for hook in training_hooks + evaluation_hooks + prediction_hooks:
+      if not isinstance(hook, session_run_hook.SessionRunHook):
+        raise TypeError('All hooks must be SessionRunHook instances, given: {}'
+                        .format(hook))
+
+    return super(TPUEstimatorSpec, cls).__new__(
+        cls,
+        mode=mode,
+        predictions=predictions,
+        loss=loss,
+        train_op=train_op,
+        eval_metrics=eval_metrics,
+        export_outputs=export_outputs,
+        scaffold_fn=scaffold_fn,
+        host_call=host_call,
+        training_hooks=training_hooks,
+        evaluation_hooks=evaluation_hooks,
+        prediction_hooks=prediction_hooks)
+
+  def as_estimator_spec(self):
+    """Creates an equivalent `EstimatorSpec` used by CPU train/eval."""
+    host_calls = {}
+    if self.eval_metrics is not None:
+      host_calls['eval_metrics'] = self.eval_metrics
+    if self.host_call is not None:
+      host_calls['host_call'] = self.host_call
+    host_call_ret = _OutfeedHostCall.create_cpu_hostcall(host_calls)
+    eval_metric_ops = None
+    if self.eval_metrics is not None:
+      eval_metric_ops = host_call_ret['eval_metrics']
+    hooks = None
+    if self.host_call is not None:
+      hooks = [_OutfeedHostCallHook(host_call_ret['host_call'])]
+    loss = self.loss
+    if tensor_tracer.TensorTracer.is_enabled() \
+       and self.train_op is not None:
+      tt = tensor_tracer.TensorTracer()
+      loss = tt.trace_cpu(ops.get_default_graph(), loss, self.train_op)
+
+    hooks = tuple(hooks or [])
+    scaffold = self.scaffold_fn() if self.scaffold_fn else None
+    return model_fn_lib.EstimatorSpec(
+        mode=self.mode,
+        predictions=self.predictions,
+        loss=loss,
+        train_op=self.train_op,
+        eval_metric_ops=eval_metric_ops,
+        export_outputs=self.export_outputs,
+        scaffold=scaffold,
+        training_hooks=self.training_hooks + hooks,
+        evaluation_hooks=self.evaluation_hooks + hooks,
+        prediction_hooks=self.prediction_hooks + hooks)
+
+
+class _OpQueueContext(object):
+  """Manages work queue and thread for a infeed/outfeed thread."""
+
+  def __init__(self, name, target, args):
+    self._name = name
+    self._queue = Queue.Queue()
+    args = (self,) + args
+    self._thread = threading.Thread(name=name, target=target, args=args)
+    self._thread.daemon = True
+    self._thread.start()
+
+  def stop(self):
+    self._queue.put(_SIGNAL.STOP)
+
+  def send_next_batch_signal(self, iterations):
+    self._queue.put(iterations)
+
+  def read_iteration_counts(self):
+    while True:
+      iterations = self._queue.get(block=True)
+      logging.debug('%s read iterations %s', self._name, iterations)
+      if iterations == _SIGNAL.STOP:
+        logging.info('%s received shutdown signal, stopping.', self._name)
+        return
+      yield iterations
+
+  def join(self):
+    logging.info('Shutting down %s thread.', self._name)
+    self.stop()
+    self._thread.join()
+
+
+class _OpSignalOnceQueueContext(_OpQueueContext):
+  """Manages work queue and thread for a infeed/outfeed thread.
+
+  This subclass only signals once.
+  """
+
+  def __init__(self, name, target, args):
+    super(_OpSignalOnceQueueContext, self).__init__(name, target, args)
+    self._has_signaled = False
+
+  def send_next_batch_signal(self, iterations):
+    if not self._has_signaled:
+      self._queue.put(iterations)
+      self._has_signaled = True
+
+
+class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
+  """A Session hook setting up the TPU initialization, infeed, and outfeed.
+
+  This hook does two major things:
+  1. initialize and shutdown TPU system.
+  2. launch and join the threads for infeed enqueue and (optional) outfeed
+     dequeue.
+  """
+
+  def __init__(self,
+               ctx,
+               enqueue_ops,
+               dequeue_ops,
+               tpu_compile_op,
+               run_infeed_loop_on_coordinator=True,
+               rendezvous=None,
+               master=None,
+               session_config=None,
+               tpu_init_ops=None):
+    self._master_job = ctx.master_job
+    self._enqueue_ops = enqueue_ops
+    self._dequeue_ops = dequeue_ops
+    self._rendezvous = rendezvous
+    self._master = master
+    self._session_config = session_config
+    self._init_ops = list(tpu_init_ops or [])
+    if ctx.embedding_config is None:
+      self._embedding_layer_config = None
+    else:
+      self._embedding_layer_config = (
+          ctx.embedding_config.tpu_embedding.config_proto)
+    self._run_infeed_loop_on_coordinator = run_infeed_loop_on_coordinator
+    self._initial_infeed_sleep_secs = (
+        ctx.config.tpu_config.initial_infeed_sleep_secs)
+
+    self._feed_error = None
+    self._finished = False
+    # When using model parallelism, the TPU is pre-initialized at startup to
+    # fetch mesh information.  We skip re-initializing it here to avoid
+    # suspected issues due to the mesh layout changing on the second
+    # initialization.
+    self._should_initialize_tpu = not ctx.model_parallelism_enabled
+    self._tpu_compile_op = tpu_compile_op
+
+  def begin(self):
+    logging.info('TPU job name %s', self._master_job)
+    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
+    if self._should_initialize_tpu:
+      self._finalize_ops = [tpu.shutdown_system(job=self._master_job)]
+    else:
+      self._finalize_ops = []
+
+    summary_writer_init_ops = contrib_summary.summary_writer_initializer_op()
+    self._init_ops.extend(summary_writer_init_ops)
+    # Get all the writer resources from the initializer, so we know what to
+    # flush.
+    for op in summary_writer_init_ops:
+      self._finalize_ops.append(contrib_summary.flush(writer=op.inputs[0]))
+
+  def _run_infeed(self, queue_ctx, session):
+    logging.info('Starting infeed thread controller.')
+    if self._initial_infeed_sleep_secs:
+      logging.info('Infeed thread sleeping for %d seconds.',
+                   self._initial_infeed_sleep_secs)
+      time.sleep(self._initial_infeed_sleep_secs)
+      logging.info('Infeed thread starting after sleep')
+
+    with self._rendezvous.catch_errors(source='infeed', session=session):
+      if self._run_infeed_loop_on_coordinator:
+        for count, steps in enumerate(queue_ctx.read_iteration_counts()):
+          for i in xrange(steps):
+            logging.debug('Infeed enqueue for iteration (%d, %d)', count, i)
+            session.run(self._enqueue_ops)
+      else:
+        for _ in queue_ctx.read_iteration_counts():
+          session.run(self._enqueue_ops)
+      logging.info('Infeed thread finished, shutting down.')
+
+  def _run_outfeed(self, queue_ctx, session):
+    logging.info('Starting outfeed thread controller.')
+    with self._rendezvous.catch_errors(source='outfeed', session=session):
+      for count, steps in enumerate(queue_ctx.read_iteration_counts()):
+        for i in xrange(steps):
+          logging.debug('Outfeed dequeue for iteration (%d, %d)', count, i)
+          session.run(self._dequeue_ops)
+      logging.info('Outfeed thread finished, shutting down.')
+
+  def _create_infeed_controller(self, name, target, args):
+    return _OpQueueContext(name=name, target=target, args=args)
+
+  def _assertCompilationSucceeded(self, result, coord):
+    proto = tpu_compilation_result.CompilationResultProto()
+    proto.ParseFromString(result)
+    if proto.status_error_message:
+      logging.error('Compilation failed: {}'.format(proto.status_error_message))
+      coord.request_stop()
+    else:
+      logging.info('Compilation succeeded')
+
+  def after_create_session(self, session, coord):
+    if self._should_initialize_tpu:
+      logging.info('Init TPU system')
+      start = time.time()
+      with ops.Graph().as_default():
+        with tf_session.Session(
+            self._master, config=self._session_config) as sess:
+          sess.run(
+              tpu.initialize_system(
+                  job=self._master_job,
+                  embedding_config=self._embedding_layer_config))
+      logging.info('Initialized TPU in %d seconds', time.time() - start)
+
+    session.run(self._init_ops,
+                options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000))
+
+    if os.environ.get('TPU_SPLIT_COMPILE_AND_EXECUTE', '') == '1':
+      logging.info('Compiling user program: this may take a while...')
+      self._assertCompilationSucceeded(session.run(self._tpu_compile_op), coord)
+
+    self._infeed_controller = self._create_infeed_controller(
+        name='InfeedController', target=self._run_infeed, args=(session,))
+
+    self._outfeed_controller = _OpQueueContext(
+        name='OutfeedController', target=self._run_outfeed, args=(session,))
+
+    # Enable the worker watchdog to terminate workers on coordinator exit.
+    watchdog_timeout = int(os.environ.get('TF_TPU_WATCHDOG_TIMEOUT', '0'))
+    if watchdog_timeout > 0:
+      session_support.start_worker_watchdog(session,
+                                            shutdown_timeout=watchdog_timeout)
+
+  def before_run(self, run_context):
+    self._feed_error = None
+
+    iterations = run_context.session.run(self._iterations_per_loop_var)
+
+    logging.info('Enqueue next (%d) batch(es) of data to infeed.', iterations)
+    self._infeed_controller.send_next_batch_signal(iterations)
+
+    logging.info('Dequeue next (%d) batch(es) of data from outfeed.',
+                 iterations)
+    self._outfeed_controller.send_next_batch_signal(iterations)
+
+  def end(self, session):
+    self._finished = True
+    logging.info('Stop infeed thread controller')
+    self._infeed_controller.join()
+    self._rendezvous.record_done('infeed')
+
+    logging.info('Stop output thread controller')
+    self._outfeed_controller.join()
+    self._rendezvous.record_done('outfeed')
+
+    logging.info('Shutdown TPU system.')
+    session.run(self._finalize_ops)
+
+
+class TPUInfeedOutfeedSessionHookForPrediction(TPUInfeedOutfeedSessionHook):
+
+  def __init__(self, ctx, enqueue_ops, dequeue_ops, tpu_compile_op,
+               rendezvous=None, master=None, session_config=None):
+    super(TPUInfeedOutfeedSessionHookForPrediction, self).__init__(
+        ctx,
+        enqueue_ops,
+        dequeue_ops,
+        tpu_compile_op=tpu_compile_op,
+        run_infeed_loop_on_coordinator=False,
+        rendezvous=rendezvous,
+        master=master,
+        session_config=session_config)
+
+  def _create_infeed_controller(self, name, target, args):
+    return _OpSignalOnceQueueContext(name=name, target=target, args=args)
+
+
+class _TPUStopAtStepHook(session_run_hook.SessionRunHook):
+  """Hook that requests stop at a specified step.
+
+  This hook is similar to the `session_run_hook._StopAfterNEvalsHook` with
+  following differences for TPU training:
+
+  1. This hook sets the variable for iterations_per_loop, which is used by
+     `TPUInfeedOutfeedSessionHook` to control the iterations for infeed/outfeed.
+     As the hook execution order is not guaranteed, the variable update is
+     handled in `after_create_session` and `after_run` as
+     `TPUInfeedOutfeedSessionHook` reads the variable value in `before_run`.
+
+  2. For each training loop (session.run), the global step could be increased
+     multiple times on TPU. The global step tensor value will be explicitly read
+     again in `after_run` to ensure the latest value is retrieved to avoid race
+     condition.
+  """
+
+  def __init__(self, iterations, num_steps=None, last_step=None):
+    """Initializes a `StopAtStepHook`.
+
+    Args:
+      iterations: The number of iterations to run optimizer per training loop.
+      num_steps: Number of steps to execute.
+      last_step: Step after which to stop.
+
+    Raises:
+      ValueError: If one of the arguments is invalid.
+    """
+    if num_steps is None and last_step is None:
+      raise ValueError('One of num_steps or last_step must be specified.')
+    if num_steps is not None and last_step is not None:
+      raise ValueError('Only one of num_steps or last_step can be specified.')
+    self._num_steps = num_steps
+    self._last_step = last_step
+    self._iterations = iterations
+
+  def _next_iterations(self, global_step, last_step):
+    gap = last_step - global_step
+    return min(gap, self._iterations)
+
+  def begin(self):
+    self._global_step_tensor = training_util.get_global_step()
+    if self._global_step_tensor is None:
+      raise RuntimeError('Global step should be created.')
+
+    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
+
+  def after_create_session(self, session, coord):
+    global_step = session.run(self._global_step_tensor)
+    if self._last_step is None:
+      self._last_step = global_step + self._num_steps
+
+    iterations = self._next_iterations(global_step, self._last_step)
+
+    self._iterations_per_loop_var.load(iterations, session=session)
+
+  def after_run(self, run_context, run_values):
+    # Global step cannot be retrieved via SessionRunArgs and before_run due to
+    # race condition.
+    global_step = run_context.session.run(self._global_step_tensor)
+    if global_step >= self._last_step:
+      run_context.request_stop()
+    else:
+      iterations = self._next_iterations(global_step, self._last_step)
+      self._iterations_per_loop_var.load(
+          iterations, session=run_context.session)
+
+
+class _SetEvalIterationsHook(session_run_hook.SessionRunHook):
+  """Hook that requests stop at a specified step."""
+
+  def __init__(self, num_steps):
+    """Initializes a `_SetEvalIterationsHook`.
+
+    Args:
+      num_steps: Number of steps to execute.
+    """
+    self._num_steps = num_steps
+
+  def begin(self):
+    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
+
+  def after_create_session(self, session, coord):
+    self._iterations_per_loop_var.load(self._num_steps, session=session)
+
+
+class _StoppingPredictHook(session_run_hook.SessionRunHook):
+  """Hook that requests stop according to the stopping signal in prediction."""
+
+  def __init__(self, scalar_stopping_signal):
+    self._scalar_stopping_signal = scalar_stopping_signal
+
+  def begin(self):
+    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
+
+  def after_create_session(self, session, coord):
+    # This is not necessary as we do not run infeed enqueue and outfeed dequeue
+    # in side threads for prediction model. But it makes the
+    # TPUInfeedOutfeedSessionHook prints nice message.
+    self._iterations_per_loop_var.load(1, session=session)
+
+  def before_run(self, run_context):
+    return session_run_hook.SessionRunArgs(self._scalar_stopping_signal)
+
+  def after_run(self, run_context, run_values):
+    _ = run_context
+    scalar_stopping_signal = run_values.results
+    if _StopSignals.should_stop(scalar_stopping_signal):
+      # NOTE(xiejw): In prediction, stopping signals are inserted for each
+      # batch. And we append one more batch to signal the system it should stop.
+      # The data flow might look like
+      #
+      #  batch   0: images, labels, stop = 0  (user provided)
+      #  batch   1: images, labels, stop = 0  (user provided)
+      #  ...
+      #  batch  99: images, labels, stop = 0  (user provided)
+      #  batch 100: images, labels, stop = 1  (TPUEstimator appended)
+      #
+      # where the final batch (id = 100) is appended by TPUEstimator, so we
+      # should drop it before returning the predictions to user.
+      # To achieve that, we throw the OutOfRangeError in after_run. Once
+      # Monitored Session sees this error in SessionRunHook.after_run, the
+      # "current" prediction, i.e., batch with id=100, will be discarded
+      # immediately
+      raise errors.OutOfRangeError(None, None, 'Stopped by stopping signal.')
+
+
+def generate_per_core_enqueue_ops_fn_for_host(
+    ctx, input_fn, inputs_structure_recorder, host_device, host_id):
+  """Generates infeed enqueue ops for per-core input_fn on a single host."""
+  captured_infeed_queue = _CapturedObject()
+  tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
+
+  def enqueue_ops_fn():
+    """A fn returns enqueue_ops."""
+    num_cores_per_host = ctx.num_of_cores_per_host
+    per_host_sharded_inputs = []
+    for core_ordinal in range(num_cores_per_host):
+      with ops.name_scope('ordinal_%d' % (core_ordinal)):
+        user_context = tpu_context.TPUContext(
+            internal_ctx=ctx,
+            input_device=host_device,
+            invocation_index=host_id * ctx.num_of_cores_per_host + core_ordinal)
+        inputs = _Inputs.from_input_fn(input_fn(user_context))
+        if inputs.is_dataset:
+          raise TypeError(
+              '`input_fn` returning `Dataset`  is not yet supported in '
+              'per-Core input pipeline deployment yet. Please set '
+              'TPUConfig.per_host_input_for_training to True or return '
+              '`features` and `labels` from `input_fn`')
+        features, labels = inputs.features_and_labels()
+
+        inputs_structure_recorder.validate_and_record_structure(
+            features, labels)
+        flattened_inputs = (
+            inputs_structure_recorder.flatten_features_and_labels(
+                features, labels))
+        per_host_sharded_inputs.append(flattened_inputs)
+
+    infeed_queue = tpu_feed.InfeedQueue(
+        number_of_tuple_elements=len(per_host_sharded_inputs[0]))
+    captured_infeed_queue.capture(infeed_queue)
+
+    per_host_enqueue_ops = infeed_queue.generate_enqueue_ops(
+        per_host_sharded_inputs, tpu_ordinal_function=tpu_ordinal_function_impl)
+    return per_host_enqueue_ops
+
+  return enqueue_ops_fn, captured_infeed_queue
+
+
+def generate_per_host_enqueue_ops_fn_for_host(
+    ctx, input_fn, inputs_structure_recorder, batch_axis, device, host_id):
+  """Generates infeed enqueue ops for per-host input_fn on a single host."""
+  captured_infeed_queue = _CapturedObject()
+
+  dataset_initializer = None
+
+  with ops.device(device):
+    user_context = tpu_context.TPUContext(
+        internal_ctx=ctx, input_device=device, invocation_index=host_id)
+    inputs = _Inputs.from_input_fn(input_fn(user_context))
+
+    is_dataset = inputs.is_dataset
+    if ctx.mode == model_fn_lib.ModeKeys.PREDICT:
+      if not is_dataset:
+        raise TypeError(
+            'For mode PREDICT, `input_fn` must return `Dataset` instead of '
+            '`features` and `labels`.')
+      if batch_axis is not None:
+        raise TypeError('For mode PREDICT, batch_axis is not supported yet.')
+      inputs = _InputsWithStoppingSignals(
+          dataset=inputs.dataset,
+          batch_size=ctx.batch_size_for_input_fn,
+          add_padding=True)
+
+    if is_dataset:
+      dataset_initializer = inputs.dataset_initializer()
+
+    tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
+
+  def enqueue_ops_fn():
+    """A Fn returning the TPU infeed enqueue ops.
+
+    By providing as a Fn, it can be invoked inside the tf.while_loop such that
+    the input pipeline for multiple iterations can be executed by one
+    Session.run call.
+
+    Returns:
+      list of dict of ops.
+    """
+    with ops.device(device):
+      num_of_replicas_per_host = ctx.num_of_replicas_per_host
+      # Convert user input to features and labels.  If the user returns a
+      # dataset, it is initialized and the features and labels extracted via
+      # `dataset.iterator.get_next()`
+      features, labels = inputs.features_and_labels()
+      signals = inputs.signals()
+
+      inputs_structure_recorder.validate_and_record_structure(features, labels)
+      unsharded_tensor_list = (
+          inputs_structure_recorder.flatten_features_and_labels(
+              features, labels, signals))
+
+      infeed_queue = tpu_feed.InfeedQueue(
+          tuple_types=[t.dtype for t in unsharded_tensor_list],
+          tuple_shapes=[t.shape for t in unsharded_tensor_list],
+          shard_dimensions=batch_axis)
+      captured_infeed_queue.capture(infeed_queue)
+      infeed_queue.set_number_of_shards(num_of_replicas_per_host)
+      per_host_enqueue_ops = (
+          infeed_queue.split_inputs_and_generate_enqueue_ops(
+              unsharded_tensor_list,
+              placement_function=lambda x: device,
+              tpu_ordinal_function=tpu_ordinal_function_impl))
+      if signals is None:
+        return per_host_enqueue_ops
+      else:
+        return {
+            'ops': per_host_enqueue_ops,
+            'signals': signals,
+        }
+
+  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
+
+
+def generate_per_host_v2_enqueue_ops_fn_for_host(
+    ctx, input_fn, inputs_structure_recorder, device, host_id):
+  """Generates infeed enqueue ops for per-host input_fn on a single host."""
+  captured_infeed_queue = _CapturedObject()
+  dataset_initializer = None
+
+  with ops.device(device):
+    user_context = tpu_context.TPUContext(
+        internal_ctx=ctx, input_device=device, invocation_index=host_id)
+    inputs = _Inputs.from_input_fn(input_fn(user_context))
+
+    is_dataset = inputs.is_dataset
+    if not is_dataset:
+      raise TypeError('`input_fn` must return a `Dataset` for the PER_HOST_V2 '
+                      'input pipeline configuration.')
+
+    if ctx.mode == model_fn_lib.ModeKeys.PREDICT:
+      inputs = _InputsWithStoppingSignals(
+          dataset=inputs.dataset,
+          batch_size=ctx.batch_size_for_input_fn,
+          add_padding=True,
+          num_invocations_per_step=ctx.num_of_replicas_per_host)
+
+    dataset_initializer = inputs.dataset_initializer()
+    tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
+
+  def enqueue_ops_fn():
+    """Generates the per_host enqueue ops."""
+    control_deps = []
+    per_host_sharded_inputs = []
+    sparse_features_list = []
+    num_replicas_per_host = ctx.num_of_replicas_per_host
+    cached_signals = None
+    with ops.device(device):
+      if not inputs.is_dataset:
+        raise TypeError('`input_fn` must return a `Dataset` for this mode.')
+      for _ in range(num_replicas_per_host):
+        # Use control dependencies to ensure a deterministic ordering.
+        with ops.control_dependencies(control_deps):
+          features, labels = inputs.features_and_labels()  # Calls get_next()
+          signals = inputs.signals()
+
+          # All the replicas share the replica 0's stopping singal.
+          # This avoids inconsistent state among different model replcias.
+          if cached_signals:
+            signals['stopping'] = cached_signals['stopping']
+          else:
+            cached_signals = signals
+
+        features, labels, sparse_features = (
+            _tpu_estimator_embedding.split_inputs(ctx, features, labels))
+        sparse_features_list.append(sparse_features)
+
+        inputs_structure_recorder.validate_and_record_structure(
+            features, labels)
+        flattened_inputs = (
+            inputs_structure_recorder.flatten_features_and_labels(
+                features, labels, signals))
+        control_deps.extend(flattened_inputs)
+        per_host_sharded_inputs.append(flattened_inputs)
+
+      if inputs_structure_recorder.flattened_input_dims:
+        input_partition_dims = inputs_structure_recorder.flattened_input_dims
+        if signals:
+          input_partition_dims += [None] * len(signals)
+        # pylint: disable=protected-access
+        infeed_queue = tpu_feed._PartitionedInfeedQueue(
+            number_of_tuple_elements=len(per_host_sharded_inputs[0]),
+            host_id=host_id,
+            input_partition_dims=input_partition_dims,
+            device_assignment=ctx.device_assignment)
+        per_host_enqueue_ops = infeed_queue.generate_enqueue_ops(
+            per_host_sharded_inputs)
+      else:
+        infeed_queue = tpu_feed.InfeedQueue(
+            number_of_tuple_elements=len(per_host_sharded_inputs[0]))
+        per_host_enqueue_ops = infeed_queue.generate_enqueue_ops(
+            per_host_sharded_inputs,
+            tpu_ordinal_function=tpu_ordinal_function_impl)
+      captured_infeed_queue.capture(infeed_queue)
+
+    if ctx.embedding_config:
+      per_host_enqueue_ops.extend(
+          ctx.embedding_config.tpu_embedding.generate_enqueue_ops(
+              sparse_features_list))
+
+    if signals is None:
+      return per_host_enqueue_ops
+    else:
+      return {
+          'ops': per_host_enqueue_ops,
+          'signals': signals,
+      }
+
+  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
+
+
+def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder,
+                                      num_hosts):
+  """Generates infeed enqueue ops for one input_fn on all the hosts."""
+  captured_infeed_queue = _CapturedObject()
+  dataset_initializer = None
+  device_0 = ctx.tpu_host_placement_function(host_id=0)
+  with ops.device(device_0):
+    user_context = tpu_context.TPUContext(
+        internal_ctx=ctx, input_device=device_0, invocation_index=0)
+    inputs = _Inputs.from_input_fn(input_fn(user_context))
+
+    is_dataset = inputs.is_dataset
+    if ctx.mode == model_fn_lib.ModeKeys.PREDICT:
+      if not is_dataset:
+        raise TypeError(
+            'For mode PREDICT, `input_fn` must return `Dataset` instead of '
+            '`features` and `labels`.')
+
+      inputs = _InputsWithStoppingSignals(
+          dataset=inputs.dataset,
+          batch_size=ctx.batch_size_for_input_fn,
+          add_padding=True)
+
+    if is_dataset:
+      dataset_initializer = inputs.dataset_initializer()
+    num_replicas_per_host = ctx.num_of_replicas_per_host
+
+  def tpu_ordinal_function_impl(replica_id):
+    if ctx.device_assignment:
+      return ctx.device_assignment.tpu_ordinal(replica=replica_id)
+    else:
+      return replica_id % num_replicas_per_host
+
+  def device_function_impl(replica_id):
+    return ctx.tpu_host_placement_function(replica_id=replica_id)
+
+  def enqueue_ops_fn():
+    """Generates enqueue ops for all the hosts."""
+    broadcasted_inputs = []
+    flattened_inputs = None  # Cache result from input_fn.
+    signals = None
+    for host_id in xrange(num_hosts):
+      with ops.device(ctx.tpu_host_placement_function(host_id=host_id)):
+        for _ in xrange(ctx.num_of_replicas_per_host):
+          # Note: input_fn is only called once at host 0 for the first replica.
+          # The features and labels returned from that invocation are
+          # broadcasted to other replicas(including the replicas on other
+          # hosts).
+          if flattened_inputs is None:
+            features, labels = inputs.features_and_labels()  # Calls get_next()
+            signals = inputs.signals()
+
+            inputs_structure_recorder.validate_and_record_structure(
+                features, labels)
+            flattened_inputs = (
+                inputs_structure_recorder.flatten_features_and_labels(
+                    features, labels, signals))
+          broadcasted_inputs.append(flattened_inputs)
+
+    infeed_queue = tpu_feed.InfeedQueue(
+        number_of_tuple_elements=len(broadcasted_inputs[0]))
+    captured_infeed_queue.capture(infeed_queue)
+    enqueue_ops = infeed_queue.generate_enqueue_ops(
+        broadcasted_inputs,
+        tpu_ordinal_function=tpu_ordinal_function_impl,
+        placement_function=device_function_impl)
+
+    if signals is None:
+      return enqueue_ops
+    else:
+      return {
+          'ops': enqueue_ops,
+          'signals': signals,
+      }
+
+  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
+
+
+class _InputPipeline(object):
+  """`_InputPipeline` handles invoking `input_fn` and piping to infeed queue.
+
+  `_InputPipeline` abstracts the per-core/per-host `input_fn` invocation from
+  call site.  To be precise, based on the configuration in
+  `_InternalTPUContext`,  it invokes `input_fn` for all cores (usually
+  multi-host TPU training) or for one host (usually for single-host TPU
+  evaluation), and sends all `features` and `labels` returned by `input_fn` to
+  TPU infeed. For per-core invocation, `features` and `labels` are piped to
+  infeed directly, one tuple for each core. For per-host invocation,  `features`
+  and `labels` are split at host (with respect to `batch_axis`) and piped to all
+  cores accordingly.
+
+  In addition, flatten/unflatten are handled by `_InputPipeline` also.  Model
+  inputs returned by the `input_fn` can have one of the following forms:
+  1. features
+  2. (features, labels)
+  3. ((arbitrarily nested structure of features), labels)
+
+  Internally, form 1 is reformed to `(features, None)` as features and labels
+  are passed separately to underlying methods. For TPU training, TPUEstimator
+  may expect multiple `features` and `labels` tuples one for each core.
+
+  TPUEstimator allows various different structures for inputs (namely `features`
+  and `labels`).  Both `features` and `labels` can be any nested sturcture
+  supported by TF nest (namely, dict, tuples, namedtuples or any nested
+  structure of such of Tensors).  `labels` could be `None` as well.
+
+  These are flattened before they are passed to the infeed/outfeed library
+  as that expectes flattend lists.
+  """
+
+  class InputsStructureRecorder(object):
+    """The recorder to record inputs structure."""
+
+    def __init__(self, input_partition_dims=None):
+      # Holds the structure of inputs
+      self._feature_structure = {}
+      self._flattened_input_dims = None
+
+      if input_partition_dims:
+        # This should have been validated in TPUConfig.
+        assert len(input_partition_dims) <= 2, 'must have 1 or 2 elements.'
+        if len(input_partition_dims) == 2:
+          self._feature_dims, self._label_dims = input_partition_dims
+        else:
+          self._feature_dims = input_partition_dims[0]
+          self._label_dims = None
+
+        assert self._feature_dims is not None, ('input_partition_dims[0] must '
+                                                'not be None')
+      else:
+        self._feature_dims = None
+        self._label_dims = None
+
+      # Internal state.
+      self._initialized = False
+
+    @property
+    def flattened_input_dims(self):
+      assert self._initialized, 'InputsStructureRecorder is not initialized.'
+      return self._flattened_input_dims
+
+    def has_labels(self):
+      return 'labels' in self._feature_structure
+
+    def _flatten_input_dims(self, feature_dims, feature_dims_names, label_dims,
+                            label_dims_names, label_names, has_labels):
+      """Flatten input dims with the same order as flattened input tensors."""
+      flattened_input_dims = []
+      if feature_dims_names:
+        # We need a fixed ordering for matching the tensors in features.
+        flattened_input_dims.extend(
+            [feature_dims[name] for name in feature_dims_names])
+      else:
+        flattened_input_dims.append(feature_dims)
+
+      if label_dims_names:
+        # We need a fixed ordering for matching the tensors in labels.
+        flattened_input_dims.extend(
+            [label_dims[name] for name in label_dims_names])
+      else:
+        if label_names:
+          num_tensors_in_label = len(label_names)
+        else:
+          num_tensors_in_label = int(has_labels)
+        # Setting `None` in input_partition_dims[1] will apply `None` to
+        # all the tensors in labels, regardless of internal structure.
+        flattened_input_dims.extend([label_dims] * num_tensors_in_label)
+
+      return flattened_input_dims
+
+    def validate_and_record_structure(self, features, labels):
+      """Validates and records the structure of `features` and `labels`."""
+      # Extract structure.
+      has_labels = labels is not None
+      feature_names = _extract_key_names(features)
+      label_names = _extract_key_names(labels)
+
+      if not self._initialized:
+        # Record structure.
+        self._initialized = True
+        if self._feature_dims is not None:
+          feature_dims_names = _extract_key_names(self._feature_dims)
+          if feature_dims_names != feature_names:
+            raise ValueError(
+                'TPUConfig.input_partition_dims[0] mismatched feature'
+                ' keys. Expected {}, got {}'.format(feature_names,
+                                                    feature_dims_names))
+
+          label_dims_names = _extract_key_names(self._label_dims)
+          if self._label_dims is not None and label_dims_names != label_names:
+            raise ValueError(
+                'TPUConfig.input_partition_dims[1] mismatched label'
+                ' keys. Expected {}, got {}'.format(label_names,
+                                                    label_dims_names))
+
+          self._flattened_input_dims = self._flatten_input_dims(
+              self._feature_dims, feature_dims_names, self._label_dims,
+              label_dims_names, label_names, has_labels)
+
+    def flatten_features_and_labels(self, features, labels, signals=None):
+      """Flattens the `features` and `labels` to a single tensor list."""
+      self._feature_structure['features'] = features
+      if labels is not None:
+        self._feature_structure['labels'] = labels
+      if signals is not None:
+        self._feature_structure['signals'] = signals
+      return data_nest.flatten(self._feature_structure)
+
+    def unflatten_features_and_labels(self, flattened_inputs):
+      """Restores the flattened inputs to original features and labels form.
+
+      Args:
+        flattened_inputs: Flattened inputs for each shard.
+
+      Returns:
+        A tuple of (`features`, `labels`), where `labels` could be None.
+        Each one, if present, should have identical structure (single tensor vs
+        dict) as the one returned by input_fn.
+
+      Raises:
+        ValueError: If the number of expected tensors from `flattened_inputs`
+          mismatches the recorded structure.
+      """
+
+      unflattened_inputs = data_nest.pack_sequence_as(self._feature_structure,
+                                                      flattened_inputs)
+      return _Inputs(
+          unflattened_inputs['features'],
+          unflattened_inputs.get('labels'),
+          signals=unflattened_inputs.get('signals'))
+
+  def __init__(self, input_fn, batch_axis, ctx):
+    """Constructor.
+
+    Args:
+      input_fn: input fn for train or eval.
+      batch_axis: A python tuple of int values describing how each tensor
+        produced by the Estimator `input_fn` should be split across the TPU
+        compute shards.
+      ctx: A `_InternalTPUContext` instance with mode.
+
+    Raises:
+      ValueError: If both `sharded_features` and `num_cores` are `None`.
+    """
+    self._inputs_structure_recorder = _InputPipeline.InputsStructureRecorder(
+        ctx.input_partition_dims)
+
+    self._sharded_per_core = ctx.is_input_sharded_per_core()
+    self._input_fn = input_fn
+    self._infeed_queue = None
+    self._ctx = ctx
+    self._batch_axis = batch_axis
+
+  def generate_infeed_enqueue_ops_and_dequeue_fn(self):
+    """Generates infeed enqueue ops and dequeue_fn."""
+    # While tf.while_loop is called, the body function, which invokes
+    # `enqueue_fn` passed in, is called to construct the graph. So, input_fn
+    # structure is recorded.
+    enqueue_ops, all_hooks, run_infeed_loop_on_coordinator = (
+        self._invoke_input_fn_and_record_structure())
+
+    self._validate_input_pipeline()
+
+    def dequeue_fn():
+      """dequeue_fn is used by TPU to retrieve the tensors."""
+      # In the model-parallel case, both the host-side and device-side
+      # computations must agree on the core on which infeed takes place. We
+      # choose to perform infeed on logical core 0 of each replica.
+      values = self._infeed_queue.generate_dequeue_op(tpu_device=0)
+      # The unflatten process uses the structure information recorded above.
+      return self._inputs_structure_recorder.unflatten_features_and_labels(
+          values)
+
+    return (enqueue_ops, dequeue_fn, all_hooks, run_infeed_loop_on_coordinator)
+
+  def _invoke_input_fn_and_record_structure(self):
+    """Deploys the input pipeline and record input structure."""
+    enqueue_ops = []
+    infeed_queues = []
+    all_dataset_initializers = []
+    num_hosts = self._ctx.num_hosts
+    tpu_host_placement_fn = self._ctx.tpu_host_placement_function
+
+    run_infeed_loop_on_coordinator = True
+
+    if self._sharded_per_core:
+      # Per-Core input pipeline deployment.
+      # Invoke input pipeline for each core and placed on the corresponding
+      # host.
+      for host_id in range(num_hosts):
+        host_device = tpu_host_placement_fn(host_id=host_id)
+        with ops.device(host_device):
+          with ops.name_scope('input_pipeline_task%d' % (host_id)):
+            enqueue_ops_fn, captured_infeed_queue = (
+                generate_per_core_enqueue_ops_fn_for_host(
+                    self._ctx, self._input_fn, self._inputs_structure_recorder,
+                    host_device, host_id))
+
+            if _WRAP_INPUT_FN_INTO_WHILE_LOOP:
+              run_infeed_loop_on_coordinator = False
+              enqueue_ops.append(
+                  _wrap_computation_in_while_loop(
+                      device=host_device, op_fn=enqueue_ops_fn))
+            else:
+              enqueue_ops.append(enqueue_ops_fn())
+            # Infeed_queue_getter must be called after enqueue_ops_fn is called.
+            infeed_queues.append(captured_infeed_queue.get())
+
+    elif self._ctx.is_input_broadcast_with_iterators():
+      # Only calls input_fn in host 0.
+      host_device = tpu_host_placement_fn(host_id=0)
+      enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
+          generate_broadcast_enqueue_ops_fn(self._ctx, self._input_fn,
+                                            self._inputs_structure_recorder,
+                                            num_hosts))
+      if dataset_initializer:
+        all_dataset_initializers.append(dataset_initializer)
+        run_infeed_loop_on_coordinator = False
+        wrap_fn = (
+            _wrap_computation_in_while_loop
+            if self._ctx.mode != model_fn_lib.ModeKeys.PREDICT else
+            _wrap_computation_in_while_loop_with_stopping_signals)
+        enqueue_ops.append(wrap_fn(device=host_device, op_fn=enqueue_ops_fn))
+      else:
+        enqueue_ops.append(enqueue_ops_fn())
+      infeed_queues.append(captured_infeed_queue.get())
+    else:
+      for host_id in range(num_hosts):
+        host_device = tpu_host_placement_fn(host_id=host_id)
+        with ops.device(host_device):
+          with ops.name_scope('input_pipeline_task%d' % (host_id)):
+            if self._ctx.is_input_per_host_with_iterators():
+              enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
+                  generate_per_host_v2_enqueue_ops_fn_for_host(
+                      self._ctx, self._input_fn,
+                      self._inputs_structure_recorder, host_device, host_id))
+            else:
+              enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
+                  generate_per_host_enqueue_ops_fn_for_host(
+                      self._ctx, self._input_fn,
+                      self._inputs_structure_recorder, self._batch_axis,
+                      host_device, host_id))
+
+            # NOTE(xiejw): We dispatch here based on the return type of the
+            # users `input_fn`.
+            #
+            # 1. If input_fn returns a Dataset instance, we initialize the
+            # iterator outside of tf.while_loop, and call the iterator.get_next
+            # inside tf.while_loop.  This should be always safe.
+            #
+            # 2. If input_fn returns (features, labels), it is too late to wrap
+            # them inside tf.while_loop, as resource initialization cannot be
+            # handled in TF control flow properly. In this case, we will use
+            # python loop to enqueue the data into TPU system.  This may be
+            # slow compared to the previous case.
+            if dataset_initializer:
+              all_dataset_initializers.append(dataset_initializer)
+              run_infeed_loop_on_coordinator = False
+              wrap_fn = (
+                  _wrap_computation_in_while_loop
+                  if self._ctx.mode != model_fn_lib.ModeKeys.PREDICT else
+                  _wrap_computation_in_while_loop_with_stopping_signals)
+              enqueue_ops.append(
+                  wrap_fn(device=host_device, op_fn=enqueue_ops_fn))
+            else:
+              enqueue_ops.append(enqueue_ops_fn())
+            infeed_queues.append(captured_infeed_queue.get())
+    # infeed_queue is used to generate dequeue ops. The only thing it uses for
+    # dequeue is dtypes and types. So, any one can be used. Here, grab the
+    # first one.
+    self._infeed_queue = infeed_queues[0]
+    return enqueue_ops, [
+        util_lib.MultiHostDatasetInitializerHook(all_dataset_initializers)
+    ], run_infeed_loop_on_coordinator
+
+  def _validate_input_pipeline(self):
+    """Validates the input pipeline.
+
+    Perform some sanity checks to log user friendly information. We should
+    error out to give users better error message. But, if
+    _WRAP_INPUT_FN_INTO_WHILE_LOOP is False (legacy behavior), we cannot break
+    user code, so, log a warning.
+
+    Raises:
+      RuntimeError: If the validation failed.
+    """
+    if ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS):
+      err_msg = ('Input pipeline contains one or more QueueRunners. '
+                 'It could be slow and not scalable. Please consider '
+                 'converting your input pipeline to use `tf.data` instead (see '
+                 'https://www.tensorflow.org/guide/datasets for '
+                 'instructions.')
+      if _WRAP_INPUT_FN_INTO_WHILE_LOOP:
+        raise RuntimeError(err_msg)
+      else:
+        logging.warn(err_msg)
+
+
+def call_computation(computation,
+                     experimental_exported_model_uses_all_cores=True):
+  """Call computation.
+
+  computation uses a single-core for TPU inference. If
+  `experimental_exported_model_uses_all_cores` is `True`, this function will
+  round-robin
+  computation among all TPU cores visible to the host; otherwise, it will use
+  a single core.
+
+  Args:
+    computation: A Python function that takes no inputs and builds computation
+      graph. If `computation` returns m outputs, this function will return a
+      list of m Tensors.
+    experimental_exported_model_uses_all_cores: Whether to round-robin among all
+      cores visible to the host, or to use a single core.
+
+  Returns:
+    A list of output tensors.
+  """
+  if experimental_exported_model_uses_all_cores:
+    # Using `TPUPartitionedCall` makes it possible to target a different
+    # TPU core with every `Session.run()` call. Note that the entire inference
+    # graph executes on a single core, and that invocations of this graph
+    # will round-robin among the cores attached to a host.
+    @function.Defun(capture_resource_var_by_value=False)
+    def tpu_subgraph():
+      return computation()
+
+    return tpu_functional.TPUPartitionedCall(
+        args=tpu_subgraph.captured_inputs,
+        device_ordinal=tpu_ops.tpu_ordinal_selector(),
+        Tout=[o.type for o in tpu_subgraph.definition.signature.output_arg],
+        f=tpu_subgraph)
+  else:
+    return computation()
+
+
+class _ModelFnWrapper(object):
+  """A `model_fn` wrapper.
+
+  This makes calling model_fn on CPU and TPU easier and more consistent and
+  performs necessary check and mutation required by TPU training and evaluation.
+
+  In addition, this wrapper manages converting the `model_fn` to a single TPU
+  train and eval step.
+  """
+
+  def __init__(self, model_fn, config, params, ctx):
+    self._model_fn = model_fn
+    self._config = config
+    self._params = params
+    self._ctx = ctx
+
+  def call_without_tpu(self, features, labels, is_export_mode):
+    return self._call_model_fn(features, labels, is_export_mode=is_export_mode)
+
+  def _add_embedding_features(self, features, hook_dummy_table_variables):
+    """Add embedding features, optionally add hook to intercept gradient."""
+    if self._ctx.embedding_config:
+      tpu_embedding_ = self._ctx.embedding_config.tpu_embedding
+      embedding_activations = tpu_embedding_.get_activations()
+      if hook_dummy_table_variables:
+        new_embedding_activations = (
+            tpu_embedding_gradient.hook_dummy_table_variables_to_activations(
+                tpu_embedding_, embedding_activations,
+                self._ctx.embedding_config.dummy_table_variables))
+        features.update(new_embedding_activations)
+      else:
+        features.update(embedding_activations)
+
+  def convert_to_single_tpu_train_step(self, dequeue_fn):
+    """Converts user provided model_fn` as a single train step on TPU.
+
+    The user provided `model_fn` takes input tuple
+    (features, labels) and produces the EstimatorSpec with train_op and loss for
+    train `mode`. This usually represents a single train computation on CPU.
+
+    For TPU training, a train (computation) step is first wrapped in a
+    tf.while_loop control flow to repeat for many times and then replicated to
+    all TPU shards. Besides the input should be taken from TPU infeed rather
+    than input pipeline (input_fn) directly. To fit TPU loop and replicate
+    pattern, the original train computation should be reformed, which is the
+    returned `train_step`.
+
+    Args:
+      dequeue_fn: The function to retrieve inputs, features and labels, from TPU
+        infeed dequeue channel.
+
+    Returns:
+      A tuple of train_fn, host_calls, and captured scaffold_fn. The train_fn
+      representing the train step for TPU.
+    """
+
+    host_call = _OutfeedHostCall(self._ctx)
+    captured_scaffold_fn = _CapturedObject()
+    captured_training_hooks = _CapturedObject()
+
+    def train_step(loss):
+      """Training step function for use inside a while loop."""
+      del loss  # unused; required in function signature.
+      inputs = dequeue_fn()
+      features, labels = inputs.features_and_labels()
+      self._add_embedding_features(features, True)
+
+      estimator_spec = self._verify_estimator_spec(
+          self._call_model_fn(features, labels))
+      loss, train_op = estimator_spec.loss, estimator_spec.train_op
+
+      if isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
+        captured_scaffold_fn.capture(estimator_spec.scaffold_fn)
+      else:
+        captured_scaffold_fn.capture(None)
+
+      captured_training_hooks.capture(estimator_spec.training_hooks)
+
+      if self._ctx.embedding_config is None:
+        apply_sparse_grads = []
+      else:
+        tpu_embedding_ = self._ctx.embedding_config.tpu_embedding
+        gradients = (
+            tpu_embedding_gradient.get_gradients_through_dummy_table_variables(
+                tpu_embedding_)
+        )
+        apply_sparse_grads = [
+            tpu_embedding_.generate_send_gradients_op(gradients)
+        ]
+
+      # We must run train_op to update the variables prior to running the
+      # outfeed.
+      with ops.control_dependencies([train_op] + apply_sparse_grads):
+        host_call_outfeed_ops = []
+        if (isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec)  # pylint: disable=protected-access
+            and estimator_spec.host_call is not None):
+          host_call.record({'host_call': estimator_spec.host_call})
+          host_call_outfeed_ops = host_call.create_enqueue_op()
+        with ops.control_dependencies(host_call_outfeed_ops):
+          return array_ops.identity(loss)
+
+    return (train_step, host_call, captured_scaffold_fn,
+            captured_training_hooks)
+
+  def convert_to_single_tpu_eval_step(self, dequeue_fn):
+    """Converts user provided model_fn` as a single eval step on TPU.
+
+    Similar to training, the user provided `model_fn` takes input tuple
+    (features, labels) and produces the TPUEstimatorSpec with eval_metrics for
+    eval `mode`. This usually represents a single evaluation computation on CPU.
+
+    For TPU evaluation, a eval (computation) step is first wrapped in a
+    tf.while_loop control flow to repeat for many times and then replicated to
+    all TPU shards. Besides the input and output are slightly different. Input,
+    features and labels, should be taken from TPU infeed rather than input
+    pipeline (input_fn) directly. Output is managed in two stages.  First, the
+    model outputs as the result of evaluation computation, usually model logits,
+    should be transferred from TPU system to CPU. Then, all model outputs are
+    concatenated first on CPU and sent to the metric_fn for metrics computation.
+    To fit TPU evaluation pattern, the original eval computation should be
+    reformed, which is the returned `eval_step`.
+
+    Args:
+      dequeue_fn: The function to retrieve inputs, features and labels, from TPU
+        infeed dequeue channel.
+
+    Returns:
+      A tuple of eval_fn, host_calls, and captured scaffold_fn. The eval_fn
+      representing the eval step for TPU.
+    """
+    host_calls = _OutfeedHostCall(self._ctx)
+    captured_scaffold_fn = _CapturedObject()
+    captured_eval_hooks = _CapturedObject()
+
+    def eval_step(total_loss):
+      """Evaluation step function for use inside a while loop."""
+      inputs = dequeue_fn()
+      features, labels = inputs.features_and_labels()
+      self._add_embedding_features(features, False)
+
+      tpu_estimator_spec = self._call_model_fn(features, labels)
+      if not isinstance(tpu_estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
+        raise RuntimeError(
+            'estimator_spec used by TPU evaluation must have type'
+            '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec)))
+
+      loss = tpu_estimator_spec.loss
+      captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn)
+      captured_eval_hooks.capture(tpu_estimator_spec.evaluation_hooks)
+
+      to_record = {}
+      if tpu_estimator_spec.eval_metrics:
+        to_record['eval_metrics'] = tpu_estimator_spec.eval_metrics
+      if tpu_estimator_spec.host_call is not None:
+        # We assume that evaluate won't update global step, so we don't wrap
+        # this host_call.
+        to_record['host_call'] = tpu_estimator_spec.host_call
+      host_calls.record(to_record)
+
+      with ops.control_dependencies(host_calls.create_enqueue_op()):
+        return math_ops.add(total_loss, loss)
+
+    return eval_step, host_calls, captured_scaffold_fn, captured_eval_hooks
+
+  def convert_to_single_tpu_predict_step(self, dequeue_fn):
+    """Converts user provided model_fn` as a single predict step on TPU.
+
+    Args:
+      dequeue_fn: The function to retrieve inputs, features and labels, from TPU
+        infeed dequeue channel.
+
+    Returns:
+      A tuple of predict_fn, host_calls, and captured scaffold_fn. The
+      predict_fn representing the predict step for TPU.
+    """
+    host_calls = _OutfeedHostCall(self._ctx)
+    captured_scaffold_fn = _CapturedObject()
+    captured_predict_hooks = _CapturedObject()
+
+    def predict_step(unused_scalar_stopping_signal):
+      """Evaluation step function for use inside a while loop."""
+      inputs = dequeue_fn()
+      features, labels = inputs.features_and_labels()
+      stopping_signals = inputs.signals()
+
+      assert stopping_signals is not None, (
+          'Internal Error: `signals` is missing.')
+
+      tpu_estimator_spec = self._call_model_fn(
+          features, labels, is_export_mode=False)
+      if not isinstance(tpu_estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
+        raise RuntimeError(
+            'estimator_spec used by TPU prediction must have type'
+            '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec)))
+
+      self._verify_tpu_spec_predictions(tpu_estimator_spec.predictions)
+
+      captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn)
+      captured_predict_hooks.capture(tpu_estimator_spec.prediction_hooks)
+      to_record = {}
+      identity_fn = lambda **kwargs: kwargs
+      to_record['predictions'] = [identity_fn, tpu_estimator_spec.predictions]
+      to_record['signals'] = [identity_fn, stopping_signals]
+      if tpu_estimator_spec.host_call is not None:
+        to_record['host_call'] = tpu_estimator_spec.host_call
+      host_calls.record(to_record)
+
+      with ops.control_dependencies(host_calls.create_enqueue_op()):
+        return _StopSignals.as_scalar_stopping_signal(stopping_signals)
+
+    return (predict_step, host_calls, captured_scaffold_fn,
+            captured_predict_hooks)
+
+  def _verify_tpu_spec_predictions(self, predictions):
+    """Validates TPUEstimatorSpec.predictions dict."""
+    # TODO(xiejw): Adds validation for prediction dictionrary.
+    # TODO(xiejw): Adds support for single tensor as predictions.
+    if not isinstance(predictions, dict):
+      raise TypeError('TPUEstimatorSpec.predictions must be dict of Tensors.')
+
+    for (key, tensor) in predictions.items():
+      if tensor.shape.dims[0].value is None:
+        raise ValueError(
+            'The tensor with key ({}) in TPUEstimatorSpec.predictions has '
+            'dynamic shape (should be static). Tensor: {}'.format(key, tensor))
+    return predictions
+
+  def _validate_model_features_and_labels(self, features, labels,
+                                          is_export_mode):
+    """Validates that the features and labels for the model function are valid.
+
+    A valid features/labels object is the one with:
+    - Type: A tensor or any nested structure of tensors supported by TF nest,
+        namely nested dictionary, tuple, namedtuple, or sequence of tensors.
+    - Static shape if is_export_mode is False.
+
+    Args:
+      features: the features that would be input to the model function.
+      labels: the labels that would be input to the model function.
+      is_export_mode: boolean value specifying if in export mode.
+
+    Raises:
+      TypeError: If features/labels are not of the correct type.
+      ValueError: If features/labels have dynamic shape.
+    """
+
+    def validate(obj, obj_name):
+      """Helper validate function."""
+      if is_export_mode or self._ctx.is_running_on_cpu(is_export_mode):
+        return
+      if isinstance(obj, ops.Tensor):
+        if not obj.get_shape().is_fully_defined():
+          raise ValueError(
+              'The {} to the model returned by input_fn must have static shape.'
+              ' Tensor: {}'.format(obj_name, obj))
+      else:
+        for tensor in data_nest.flatten(obj):
+          if not tensor.get_shape().is_fully_defined():
+            raise ValueError(
+                ('The {} to the model returned by input_fn must have static '
+                 'shape. Tensor: {}').format(obj_name, tensor))
+
+    validate(features, 'features')
+    if labels is not None:
+      validate(labels, 'labels')
+
+  def _call_model_fn(self, features, labels, is_export_mode=False):
+    """Calls the model_fn with required parameters."""
+    self._validate_model_features_and_labels(features, labels, is_export_mode)
+    model_fn_args = function_utils.fn_args(self._model_fn)
+    kwargs = {}
+
+    # Makes deep copy with `config` and params` in case user mutates them.
+    config = copy.deepcopy(self._config)
+    params = copy.deepcopy(self._params)
+
+    if 'labels' in model_fn_args:
+      kwargs['labels'] = labels
+    elif labels is not None:
+      raise ValueError(
+          'model_fn does not take labels, but input_fn returns labels.')
+    if 'mode' in model_fn_args:
+      kwargs['mode'] = self._ctx.mode
+    if 'config' in model_fn_args:
+      kwargs['config'] = config
+    if 'params' in model_fn_args:
+      kwargs['params'] = params
+
+    if 'params' not in model_fn_args:
+      raise ValueError('model_fn ({}) does not include params argument, '
+                       'required by TPUEstimator to pass batch size as '
+                       'params[\'batch_size\']'.format(self._model_fn))
+
+    if is_export_mode:
+      batch_size_for_model_fn = None
+    else:
+      batch_size_for_model_fn = self._ctx.batch_size_for_model_fn
+
+    if batch_size_for_model_fn is not None:
+      _add_item_to_params(params, _BATCH_SIZE_KEY, batch_size_for_model_fn)
+
+    running_on_cpu = self._ctx.is_running_on_cpu(is_export_mode)
+    _add_item_to_params(params, _USE_TPU_KEY, not running_on_cpu)
+
+    if not running_on_cpu:
+      user_context = tpu_context.TPUContext(
+          internal_ctx=self._ctx, call_from_input_fn=False)
+      _add_item_to_params(params, _CTX_KEY, user_context)
+
+    estimator_spec = self._model_fn(features=features, **kwargs)
+    if (running_on_cpu and
+        isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec)):  # pylint: disable=protected-access
+      # The estimator_spec will be passed to `Estimator` directly, which expects
+      # type `EstimatorSpec`.
+      return estimator_spec.as_estimator_spec()
+    else:
+      return estimator_spec
+
+  def _verify_estimator_spec(self, estimator_spec):
+    """Validates the estimator_spec."""
+    if isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
+      return estimator_spec
+
+    err_msg = '{} returned by EstimatorSpec is not supported in TPUEstimator.'
+    if estimator_spec.training_chief_hooks:
+      raise ValueError(
+          err_msg.format('training_chief_hooks') + 'If you want' +
+          ' to pass training hooks, please pass via training_hooks.')
+
+    if estimator_spec.scaffold:
+      logging.warning('EstimatorSpec.Scaffold is ignored by TPU train/eval. '
+                      'Please use TPUEstimatorSpec.')
+    return estimator_spec
+
+
+class _OutfeedHostCall(object):
+  """Support for `eval_metrics` and `host_call` in TPUEstimatorSpec."""
+
+  def __init__(self, ctx):
+    self._ctx = ctx
+    self._names = []
+    # All of these are dictionaries of lists keyed on the name.
+    self._host_fns = {}
+    self._tensor_keys = collections.defaultdict(list)
+    self._tensors = collections.defaultdict(list)
+    self._tensor_dtypes = collections.defaultdict(list)
+    self._tensor_shapes = collections.defaultdict(list)
+
+  @staticmethod
+  def validate(host_calls):
+    """Validates the `eval_metrics` and `host_call` in `TPUEstimatorSpec`."""
+
+    for name, host_call in host_calls.items():
+      if not isinstance(host_call, (tuple, list)):
+        raise ValueError('{} should be tuple or list'.format(name))
+      if len(host_call) != 2:
+        raise ValueError('{} should have two elements.'.format(name))
+      if not callable(host_call[0]):
+        raise TypeError('{}[0] should be callable.'.format(name))
+      if not isinstance(host_call[1], (tuple, list, dict)):
+        raise ValueError('{}[1] should be tuple or list, or dict.'.format(name))
+
+      if isinstance(host_call[1], (tuple, list)):
+        fullargspec = tf_inspect.getfullargspec(host_call[0])
+        fn_args = function_utils.fn_args(host_call[0])
+        # wrapped_hostcall_with_global_step uses varargs, so we allow that.
+        if fullargspec.varargs is None and len(host_call[1]) != len(fn_args):
+          raise RuntimeError(
+              'In TPUEstimatorSpec.{}, length of tensors {} does not match '
+              'method args of the function, which takes {}.'.format(
+                  name, len(host_call[1]), len(fn_args)))
+
+  @staticmethod
+  def create_cpu_hostcall(host_calls):
+    """Runs on the host_call on CPU instead of TPU when use_tpu=False."""
+
+    _OutfeedHostCall.validate(host_calls)
+    ret = {}
+    for name, host_call in host_calls.items():
+      host_fn, tensors = host_call
+      if isinstance(tensors, (tuple, list)):
+        ret[name] = host_fn(*tensors)
+      else:
+        # Must be dict.
+        try:
+          ret[name] = host_fn(**tensors)
+        except TypeError as e:
+          logging.warning(
+              'Exception while calling %s: %s. It is likely the tensors '
+              '(%s[1]) do not match the '
+              'function\'s arguments', name, e, name)
+          raise
+    return ret
+
+  def record(self, host_calls):
+    """Records the host_call structure."""
+
+    for name, host_call in host_calls.items():
+      host_fn, tensor_list_or_dict = host_call
+      self._names.append(name)
+      self._host_fns[name] = host_fn
+
+      if isinstance(tensor_list_or_dict, dict):
+        for (key, tensor) in six.iteritems(tensor_list_or_dict):
+          self._tensor_keys[name].append(key)
+          self._tensors[name].append(tensor)
+          self._tensor_dtypes[name].append(tensor.dtype)
+          self._tensor_shapes[name].append(tensor.shape)
+      else:
+        # List or tuple.
+        self._tensor_keys[name] = None
+        for tensor in tensor_list_or_dict:
+          self._tensors[name].append(tensor)
+          self._tensor_dtypes[name].append(tensor.dtype)
+          self._tensor_shapes[name].append(tensor.shape)
+
+  def create_enqueue_op(self):
+    """Create the op to enqueue the recorded host_calls.
+
+    Returns:
+      A list of enqueue ops, which is empty if there are no host calls.
+    """
+    if not self._names:
+      return []
+
+    tensors = []
+    # TODO(jhseu): Consider deduping tensors.
+    for name in self._names:
+      tensors.extend(self._tensors[name])
+
+    with ops.device(tpu.core(0)):
+      return [tpu_ops.outfeed_enqueue_tuple(tensors)]
+
+  def create_tpu_hostcall(self):
+    """Sends the tensors through outfeed and runs the host_fn on CPU.
+
+    The tensors are concatenated along dimension 0 to form a global tensor
+    across all shards. The concatenated function is passed to the host_fn and
+    executed on the first host.
+
+    Returns:
+      A dictionary mapping name to the return type of the host_call by that
+      name.
+
+    Raises:
+      RuntimeError: If outfeed tensor is scalar.
+    """
+    if not self._names:
+      return {}
+
+    ret = {}
+    # For each i, dequeue_ops[i] is a list containing the tensors from all
+    # shards. This list is concatenated later.
+    dequeue_ops = []
+    tensor_dtypes = []
+    tensor_shapes = []
+    for name in self._names:
+      for _ in self._tensors[name]:
+        dequeue_ops.append([])
+      for dtype in self._tensor_dtypes[name]:
+        tensor_dtypes.append(dtype)
+      for shape in self._tensor_shapes[name]:
+        tensor_shapes.append(shape)
+
+    # Outfeed ops execute on each replica's first logical core. Note: we must
+    # constraint it such that we have at most one outfeed dequeue and enqueue
+    # per replica.
+    for i in xrange(self._ctx.num_replicas):
+      host_device, ordinal_id = self._ctx.device_for_replica(i)
+      with ops.device(host_device):
+        outfeed_tensors = tpu_ops.outfeed_dequeue_tuple(
+            dtypes=tensor_dtypes,
+            shapes=tensor_shapes,
+            device_ordinal=ordinal_id)
+        for j, item in enumerate(outfeed_tensors):
+          dequeue_ops[j].append(item)
+
+    # Deconstruct dequeue ops.
+    flat_dequeue_ops = []
+    for l in dequeue_ops:
+      flat_dequeue_ops.extend(l)
+
+    dequeue_ops_by_name = {}
+    pos = 0
+    for name in self._names:
+      dequeue_ops_by_name[name] = dequeue_ops[pos:pos +
+                                              len(self._tensors[name])]
+      pos += len(self._tensors[name])
+
+    def _call_host_fn(fn, *args, **kw):
+      context = CatchInvalidHostcallFunctions()
+      context.Enter()
+      result = fn(*args, **kw)
+      context.Exit()
+      context.ExitResult(result)
+      return result
+
+    # It is assumed evaluation always happens on single host TPU system. So,
+    # place all ops on tpu host if possible.
+    #
+    # TODO(jhseu): Evaluate whether this is right for summaries.
+    with ops.device(self._ctx.tpu_host_placement_function(replica_id=0)):
+      for name in self._names:
+        dequeue_ops = dequeue_ops_by_name[name]
+        for i, item in enumerate(dequeue_ops):
+          if dequeue_ops[i][0].shape.ndims == 0:
+            raise RuntimeError(
+                'All tensors outfed from TPU should preserve batch size '
+                'dimension, but got scalar {}'.format(dequeue_ops[i][0]))
+          # TODO(xiejw): Make the specification of the outfeed combinaton
+          # function more explicit and well-documented.  We may want to give the
+          # user the option of concatenating along any axis.
+          if (self._ctx.config.tpu_config.per_host_input_for_training is
+              tpu_config.InputPipelineConfig.BROADCAST):
+            # If the infeed is in BROADCAST mode (each core recieving the same
+            # input), then we assume that the cores also produce identical
+            # copies of the same output, and we simply take the output from
+            # the first core.  This mode is used by Mesh-TensorFlow.
+            with ops.control_dependencies(dequeue_ops[i]):
+              dequeue_ops[i] = array_ops.identity(dequeue_ops[i][0])
+          else:
+            # Assume that the input has been batch-split and that axis 0 of the
+            # output tensors represents the batch size.  Concatenate along
+            # the axis 0 to re-combine the batch.
+            dequeue_ops[i] = array_ops.concat(dequeue_ops[i], axis=0)
+
+        if self._tensor_keys[name] is not None:
+          # The user-provided eval_metrics[1] is a dict.
+          dequeue_ops = dict(zip(self._tensor_keys[name], dequeue_ops))
+          try:
+            ret[name] = _call_host_fn(self._host_fns[name], **dequeue_ops)
+          except TypeError as e:
+            logging.warning(
+                'Exception while calling %s: %s. It is likely the tensors '
+                '(%s[1]) do not match the '
+                'function\'s arguments', name, e, name)
+            raise
+        else:
+          ret[name] = _call_host_fn(self._host_fns[name], *dequeue_ops)
+
+    # force all dequeue operations to be run if not consumed by the host calls
+    ret['__force_dequeue'] = control_flow_ops.group(*flat_dequeue_ops)
+    return ret
+
+
+class _OutfeedHostCallHook(session_run_hook.SessionRunHook):
+  """Hook to run host calls when use_tpu=False."""
+
+  def __init__(self, tensors):
+    self._tensors = tensors
+
+  def begin(self):
+    # We duplicate this code from the TPUInfeedOutfeedSessionHook rather than
+    # create a separate hook to guarantee execution order, because summaries
+    # need to be initialized before the outfeed thread starts.
+    # TODO(jhseu): Make a wrapper hook instead?
+    self._init_ops = contrib_summary.summary_writer_initializer_op()
+    # Get all the writer resources from the initializer, so we know what to
+    # flush.
+    self._finalize_ops = []
+    for op in self._init_ops:
+      self._finalize_ops.append(contrib_summary.flush(writer=op.inputs[0]))
+
+  def after_create_session(self, session, coord):
+    session.run(self._init_ops)
+
+  def before_run(self, run_context):
+    return basic_session_run_hooks.SessionRunArgs(self._tensors)
+
+  def end(self, session):
+    session.run(self._finalize_ops)
+
+
+class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook):
+  """Calculate and report global_step/sec and examples/sec during runtime."""
+
+  def __init__(self,
+               batch_size,
+               every_n_steps=100,
+               every_n_secs=None,
+               output_dir=None,
+               summary_writer=None):
+    self._batch_size = batch_size
+    super(ExamplesPerSecondHook, self).__init__(
+        every_n_steps=every_n_steps,
+        every_n_secs=every_n_secs,
+        output_dir=output_dir,
+        summary_writer=summary_writer)
+
+  def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
+    global_step_per_sec = elapsed_steps / elapsed_time
+    examples_per_sec = self._batch_size * global_step_per_sec
+    if self._summary_writer is not None:
+      global_step_summary = Summary(value=[
+          Summary.Value(tag='global_step/sec', simple_value=global_step_per_sec)
+      ])
+      example_summary = Summary(value=[
+          Summary.Value(tag='examples/sec', simple_value=examples_per_sec)
+      ])
+      self._summary_writer.add_summary(global_step_summary, global_step)
+      self._summary_writer.add_summary(example_summary, global_step)
+    logging.info('global_step/sec: %g', global_step_per_sec)
+    logging.info('examples/sec: %g', examples_per_sec)
+
+
+class InstallSignalHandlerHook(session_run_hook.SessionRunHook):
+  """Change SIGINT (CTRL^C) handler to force quit the process.
+
+  The default behavior often results in hanging processes.
+  The original handler is restored after training/evaluation.
+  """
+
+  def __init__(self):
+    self._signal_fn = signal.getsignal(signal.SIGINT)
+
+  def before_run(self, run_context):
+    signal.signal(signal.SIGINT, signal.SIG_DFL)
+
+  def end(self, session):
+    signal.signal(signal.SIGINT, self._signal_fn)
+
+
+class TPUEstimator(estimator_lib.Estimator):
+  """Estimator with TPU support.
+
+  TPUEstimator also supports training on CPU and GPU. You don't need to define
+  a separate `tf.estimator.Estimator`.
+
+  TPUEstimator handles many of the details of running on TPU devices, such as
+  replicating inputs and models for each core, and returning to host
+  periodically to run hooks.
+
+  TPUEstimator transforms a global batch size in params to a per-shard batch
+  size when calling the `input_fn` and `model_fn`. Users should specify
+  global batch size in constructor, and then get the batch size for each shard
+  in `input_fn` and `model_fn` by `params['batch_size']`.
+
+  - For training, `model_fn` gets per-core batch size; `input_fn` may get
+    per-core or per-host batch size depending on `per_host_input_for_training`
+    in `TPUConfig` (See docstring for TPUConfig for details).
+
+  - For evaluation and prediction, `model_fn` gets per-core batch size and
+    `input_fn` get per-host batch size.
+
+  Evaluation
+  ==========
+
+  `model_fn` should return `TPUEstimatorSpec`, which expects the `eval_metrics`
+  for TPU evaluation. If eval_on_tpu is False, the evaluation will execute on
+  CPU or GPU; in this case the following discussion on TPU evaluation does not
+  apply.
+
+  `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`, where
+  `tensors` could be a list of any nested structure of `Tensor`s (See
+  `TPUEstimatorSpec` for details).  `metric_fn` takes the `tensors` and returns
+  a dict from metric string name to the result of calling a metric function,
+  namely a `(metric_tensor, update_op)` tuple.
+
+  One can set `use_tpu` to `False` for testing. All training, evaluation, and
+  predict will be executed on CPU. `input_fn` and `model_fn` will receive
+  `train_batch_size` or `eval_batch_size` unmodified as `params['batch_size']`.
+
+  Current limitations:
+  --------------------
+
+  1. TPU evaluation only works on a single host (one TPU worker) except
+     BROADCAST mode.
+
+  2. `input_fn` for evaluation should **NOT** raise an end-of-input exception
+     (`OutOfRangeError` or `StopIteration`). And all evaluation steps and all
+     batches should have the same size.
+
+  Example (MNIST):
+  ----------------
+
+  ```
+  # The metric Fn which runs on CPU.
+  def metric_fn(labels, logits):
+    predictions = tf.argmax(logits, 1)
+    return {
+      'accuracy': tf.metrics.precision(
+          labels=labels, predictions=predictions),
+    }
+
+  # Your model Fn which runs on TPU (eval_metrics is list in this example)
+  def model_fn(features, labels, mode, config, params):
+    ...
+    logits = ...
+
+    if mode = tf.estimator.ModeKeys.EVAL:
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=loss,
+          eval_metrics=(metric_fn, [labels, logits]))
+
+  # or specify the eval_metrics tensors as dict.
+  def model_fn(features, labels, mode, config, params):
+    ...
+    final_layer_output = ...
+
+    if mode = tf.estimator.ModeKeys.EVAL:
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=loss,
+          eval_metrics=(metric_fn, {
+              'labels': labels,
+              'logits': final_layer_output,
+          }))
+  ```
+
+  Prediction
+  ==========
+
+  Prediction on TPU is an experimental feature to support large batch inference.
+  It is not designed for latency-critical system. In addition, due to some
+  usability issues, for prediction with small dataset, CPU `.predict`, i.e.,
+  creating a new `TPUEstimator` instance with `use_tpu=False`, might be more
+  convenient.
+
+  Note: In contrast to TPU training/evaluation, the `input_fn` for prediction
+  *should* raise an end-of-input exception (`OutOfRangeError` or
+  `StopIteration`), which serves as the stopping signal to `TPUEstimator`. To be
+  precise, the ops created by `input_fn` produce one batch of the data.
+  The `predict()` API processes one batch at a time. When reaching the end of
+  the data source, an end-of-input exception should be raised by one of these
+  operations. The user usually does not need to do this manually. As long as the
+  dataset is not repeated forever, the `tf.data` API will raise an end-of-input
+  exception automatically after the last batch has been produced.
+
+  Note: Estimator.predict returns a Python generator. Please consume all the
+  data from the generator so that TPUEstimator can shutdown the TPU system
+  properly for user.
+
+  Current limitations:
+  --------------------
+  1. TPU prediction only works on a single host (one TPU worker).
+
+  2. `input_fn` must return a `Dataset` instance rather than `features`. In
+  fact, .train() and .evaluate() also support Dataset as return value.
+
+  Example (MNIST):
+  ----------------
+  ```
+  height = 32
+  width = 32
+  total_examples = 100
+
+  def predict_input_fn(params):
+    batch_size = params['batch_size']
+
+    images = tf.random_uniform(
+        [total_examples, height, width, 3], minval=-1, maxval=1)
+
+    dataset = tf.data.Dataset.from_tensor_slices(images)
+    dataset = dataset.map(lambda images: {'image': images})
+
+    dataset = dataset.batch(batch_size)
+    return dataset
+
+  def model_fn(features, labels, params, mode):
+     # Generate predictions, called 'output', from features['image']
+
+    if mode == tf.estimator.ModeKeys.PREDICT:
+      return tf.contrib.tpu.TPUEstimatorSpec(
+          mode=mode,
+          predictions={
+              'predictions': output,
+              'is_padding': features['is_padding']
+          })
+
+  tpu_est = TPUEstimator(
+      model_fn=model_fn,
+      ...,
+      predict_batch_size=16)
+
+  # Fully consume the generator so that TPUEstimator can shutdown the TPU
+  # system.
+  for item in tpu_est.predict(input_fn=input_fn):
+    # Filter out item if the `is_padding` is 1.
+    # Process the 'predictions'
+  ```
+
+  Exporting
+  =========
+
+  `export_savedmodel` exports 2 metagraphs, one with `tag_constants.SERVING`,
+  and another with `tag_constants.SERVING` and `tag_constants.TPU`.
+  At serving time, these tags are used to select metagraph to load.
+
+  Before running the graph on TPU, TPU system needs to be initialized. If
+  TensorFlow Serving model-server is used, this is done automatically. If
+  not, please call `session.run(tpu.initialize_system())`.
+
+  `tpu.outside_compilation` can be used to wrap TPU incompatible ops in
+  `model_fn`.
+
+  Example:
+  ----------------
+
+  ```
+  def model_fn(features, labels, mode, config, params):
+    ...
+    logits = ...
+    export_outputs = {
+      'logits': export_output_lib.PredictOutput(
+        {'logits': logits})
+    }
+
+    def host_call(logits):
+      class_ids = math_ops.argmax(logits)
+      classes = string_ops.as_string(class_ids)
+      export_outputs['classes'] =
+        export_output_lib.ClassificationOutput(classes=classes)
+
+    tpu.outside_compilation(host_call, logits)
+
+    ...
+  ```
+
+  """
+
+  def __init__(self,
+               model_fn=None,
+               model_dir=None,
+               config=None,
+               params=None,
+               use_tpu=True,
+               train_batch_size=None,
+               eval_batch_size=None,
+               predict_batch_size=None,
+               batch_axis=None,
+               eval_on_tpu=True,
+               export_to_tpu=True,
+               export_to_cpu=True,
+               warm_start_from=None,
+               experimental_exported_model_uses_all_cores=False,
+               experimental_export_device_assignment=False,
+               experimental_embedding_config_spec=None):
+    """Constructs an `TPUEstimator` instance.
+
+    Args:
+      model_fn: Model function as required by `Estimator` which returns
+        EstimatorSpec or TPUEstimatorSpec. `training_hooks`, 'evaluation_hooks',
+        and `prediction_hooks` must not capure any TPU Tensor inside the
+        model_fn.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model. If `None`, the model_dir in
+        `config` will be used if set. If both are set, they must be same. If
+        both are `None`, a temporary directory will be used.
+      config: An `tpu_config.RunConfig` configuration object. Cannot be `None`.
+      params: An optional `dict` of hyper parameters that will be passed into
+        `input_fn` and `model_fn`.  Keys are names of parameters, values are
+        basic python types. There are reserved keys for `TPUEstimator`,
+        including 'batch_size'.
+      use_tpu: A bool indicating whether TPU support is enabled. Currently, -
+        TPU training and evaluation respect this bit, but eval_on_tpu can
+        override execution of eval. See below. - Predict still happens on CPU.
+      train_batch_size: An int representing the global training batch size.
+        TPUEstimator transforms this global batch size to a per-shard batch
+        size, as params['batch_size'], when calling `input_fn` and `model_fn`.
+        Cannot be `None` if `use_tpu` is `True`. Must be divisible by total
+        number of replicas.
+      eval_batch_size: An int representing evaluation batch size. Must be
+        divisible by total number of replicas.
+      predict_batch_size: An int representing the prediction batch size. Must be
+        divisible by total number of replicas.
+      batch_axis: A python tuple of int values describing how each tensor
+        produced by the Estimator `input_fn` should be split across the TPU
+        compute shards. For example, if your input_fn produced (images, labels)
+        where the images tensor is in `HWCN` format, your shard dimensions would
+        be [3, 0], where 3 corresponds to the `N` dimension of your images
+        Tensor, and 0 corresponds to the dimension along which to split the
+        labels to match up with the corresponding images. If None is supplied,
+        and per_host_input_for_training is True, batches will be sharded based
+        on the major dimension. If tpu_config.per_host_input_for_training is
+        False or `PER_HOST_V2`, batch_axis is ignored.
+      eval_on_tpu: If False, evaluation runs on CPU or GPU. In this case, the
+        model_fn must return `EstimatorSpec` when called with `mode` as `EVAL`.
+      export_to_tpu: If True, `export_savedmodel()` exports a metagraph for
+        serving on TPU. Note that unsupported export modes such as EVAL will be
+        ignored. For those modes, only a CPU model will be exported.
+        Currently, export_to_tpu only supports PREDICT.
+      export_to_cpu: If True, `export_savedmodel()` exports a metagraph for
+        serving on CPU.
+      warm_start_from: Optional string filepath to a checkpoint or SavedModel to
+        warm-start from, or a `tf.estimator.WarmStartSettings` object to fully
+        configure warm-starting.  If the string filepath is provided instead of
+        a `WarmStartSettings`, then all variables are warm-started, and it is
+        assumed that vocabularies and Tensor names are unchanged.
+      experimental_exported_model_uses_all_cores: Whether to round-robin among
+        all cores visible to the host which is serving the saved model, or to
+        use a single core. This is a temporary flag to enable using all TPU
+        cores for inference with TPUPartitionedCall(). Once outside compilation
+        is supported in TPUPartitionedCall(), this flag will be enabled by
+        default.
+      experimental_export_device_assignment: Whether to include the device
+        assignment in the exported model. Doing so is useful in case of model
+        parallel inference but will tie the exported model to the TPU topology
+        used to export the model.
+      experimental_embedding_config_spec: Optional EmbeddingConfigSpec instance
+        to support using TPU embedding. IT IS STILL WORK IN PROGRESS, SO PLEASE
+        DO NOT USE.
+
+    Raises:
+      ValueError: `params` has reserved keys already.
+    """
+    if config is None or not isinstance(config, tpu_config.RunConfig):
+      raise ValueError(
+          '`config` must be provided with type `tpu_config.RunConfig`')
+
+    if params is not None and any(k in params for k in _RESERVED_PARAMS_KEYS):
+      raise ValueError('{} are reserved keys but existed in params {}.'.format(
+          _RESERVED_PARAMS_KEYS, params))
+
+    if use_tpu:
+      # Perform some very basic validations. More validations will be found in
+      # _InternalTPUContext.
+      if train_batch_size is None:
+        raise ValueError('`train_batch_size` cannot be `None`')
+      util_lib.check_positive_integer(train_batch_size, 'train_batch_size')
+
+      if (config.tpu_config.per_host_input_for_training is
+          tpu_config.InputPipelineConfig.PER_SHARD_V1 and
+          config.tpu_config.num_cores_per_replica):
+        raise ValueError(
+            'Model parallelism only supports per host input for training. '
+            'Please adjust TPURunconfig.per_host_input_for_training.')
+
+      if eval_batch_size is not None:
+        util_lib.check_positive_integer(eval_batch_size, 'eval_batch_size')
+
+      if predict_batch_size is not None:
+        util_lib.check_positive_integer(predict_batch_size,
+                                        'predict_batch_size')
+
+    # Verifies the model_fn signature according to Estimator framework.
+    estimator_lib._verify_model_fn_args(model_fn, params)  # pylint: disable=protected-access
+    # We cannot store config and params in this constructor as parent
+    # constructor might change them, such as assigning a temp dir for
+    # config.model_dir.
+    model_function = self._augment_model_fn(model_fn, batch_axis)
+
+    # Overwrite log_step_count_steps to disable TensorLoggingHook and
+    # StepCounterHook from being created in Estimator. TPUEstimator already
+    # added equivalent hooks in _augment_model_fn above.
+    self._log_every_n_steps = config.log_step_count_steps
+    config = config.replace(log_step_count_steps=None)
+
+    # Passing non-None params as wrapped model_fn has it.
+    params = params or {}
+    super(TPUEstimator, self).__init__(
+        model_fn=model_function,
+        model_dir=model_dir,
+        config=config,
+        params=params,
+        warm_start_from=warm_start_from)
+    self._iterations_per_training_loop = (
+        self._config.tpu_config.iterations_per_loop)
+
+    # All properties passed to _InternalTPUContext are immutable.
+    # pylint: disable=protected-access
+    self._ctx = tpu_context._get_tpu_context(
+        self._config, train_batch_size, eval_batch_size, predict_batch_size,
+        use_tpu, eval_on_tpu, experimental_embedding_config_spec)
+
+    self._export_to_cpu = export_to_cpu
+    self._export_to_tpu = export_to_tpu
+    self._experimental_exported_model_uses_all_cores = (
+        experimental_exported_model_uses_all_cores)
+    self._experimental_export_device_assignment = (
+        experimental_export_device_assignment)
+    if (experimental_exported_model_uses_all_cores and
+        experimental_export_device_assignment):
+      raise ValueError('experimental_exported_model_uses_all_cores and '
+                       'experimental_export_device_assignment is not supported '
+                       'at the same time.')
+
+    self._is_input_fn_invoked = None
+    self._rendezvous = {}
+
+  def _add_meta_graph_for_mode(self,
+                               builder,
+                               input_receiver_fn_map,
+                               checkpoint_path,
+                               save_variables=True,
+                               mode=model_fn_lib.ModeKeys.PREDICT,
+                               export_tags=None,
+                               check_variables=True):
+    if self._export_to_tpu and mode != model_fn_lib.ModeKeys.PREDICT:
+      logging.warning('TPUEstimator only handles mode PREDICT for exporting '
+                      'when `export_to_tpu` is `True`; Mode {} will be ignored '
+                      'for TPU.'.format(mode))
+
+    if not self._export_to_cpu and not self._export_to_tpu:
+      raise ValueError('One of export_to_cpu and export_to_tpu must be true.')
+
+    if self._export_to_cpu:
+      (super(TPUEstimator, self)._add_meta_graph_for_mode(
+          builder,
+          input_receiver_fn_map,
+          checkpoint_path,
+          save_variables,
+          mode=mode,
+          export_tags=export_tags,
+          check_variables=check_variables))
+
+    if self._export_to_tpu and mode == model_fn_lib.ModeKeys.PREDICT:
+      input_receiver_fn_map = {
+          _REWRITE_FOR_INFERENCE_MODE: input_receiver_fn_map[mode]
+      }
+      export_tags = [tag_constants.SERVING, tag_constants.TPU]
+      mode = _REWRITE_FOR_INFERENCE_MODE
+
+      # See b/110052256 for why `check_variables` is `False`.
+      if not self._export_to_cpu:
+        check_variables = save_variables = True
+      else:
+        check_variables = save_variables = False
+      (super(TPUEstimator, self)._add_meta_graph_for_mode(
+          builder,
+          input_receiver_fn_map,
+          checkpoint_path,
+          save_variables=save_variables,
+          mode=mode,
+          export_tags=export_tags,
+          check_variables=check_variables))
+
+  def _call_model_fn(self, features, labels, mode, config):
+    if mode == _REWRITE_FOR_INFERENCE_MODE:
+      return self._call_model_fn_for_inference(features, labels, mode, config)
+    else:
+      return super(TPUEstimator, self)._call_model_fn(features, labels, mode,
+                                                      config)
+
+  def _call_model_fn_for_inference(self, features, labels, mode, config):
+    """Wraps `_call_model_fn` for `export_savedmodel`."""
+    if mode != _REWRITE_FOR_INFERENCE_MODE:
+      raise ValueError('mode must be {}; '
+                       'got {}.'.format(_REWRITE_FOR_INFERENCE_MODE, mode))
+
+    computation, capture = self._build_computation_for_inference(
+        features, labels, mode, config)
+    tensors = call_computation(
+        computation,
+        experimental_exported_model_uses_all_cores=self
+        ._experimental_exported_model_uses_all_cores)
+    estimator_spec, export_outputs_dict, predictions_dict, none_indices = (
+        capture.get())
+    predictions_list = tensors[:len(predictions_dict)]
+    export_outputs_list_without_none = tensors[len(predictions_dict):]
+
+    # Reinsert `None`s which we've taken out in
+    # `_build_computation_for_inference()`.
+    export_outputs_list = []
+    while none_indices or export_outputs_list_without_none:
+      if none_indices and none_indices[0] == len(export_outputs_list):
+        export_outputs_list.append(None)
+        none_indices.pop(0)
+      else:
+        export_outputs_list.append(export_outputs_list_without_none.pop(0))
+
+    # Reconstruct `export_outputs` with updated tensors.
+    new_export_outputs_dict = nest.pack_sequence_as(export_outputs_dict,
+                                                    export_outputs_list)
+    export_outputs = estimator_spec.export_outputs
+    new_export_outputs = collections.OrderedDict(
+        (k, _clone_export_output_with_tensors(export_outputs[k], v))
+        for k, v in six.iteritems(new_export_outputs_dict))
+    # Reconstruct `predictions` with updated tensors.
+    new_predictions = nest.pack_sequence_as(predictions_dict, predictions_list)
+    if (len(new_predictions) == 1 and
+        _KEY_WHEN_PREDICTIONS_IS_A_TENSOR in new_predictions):
+      new_predictions = new_predictions[_KEY_WHEN_PREDICTIONS_IS_A_TENSOR]
+
+    return estimator_spec._replace(
+        export_outputs=new_export_outputs, predictions=new_predictions)
+
+  def _build_computation_for_inference(self, features, labels, mode, config):
+    capture = _CapturedObject()
+
+    def computation():
+      """Computation to be passed to `TPUPartitionedCall()`."""
+      tpu_computation, tpu_capture = self._build_tpu_computation_for_inference(
+          features, labels, mode, config)
+
+      if self._experimental_export_device_assignment:
+        # Export the device assignment as part of the model. This is useful for
+        # model parallel usecases where the model relies on the mapping between
+        # logical and physical devices.
+        with self._ctx.with_mode(mode) as ctx:
+          device_assignment = ctx.device_assignment
+      else:
+        device_assignment = None
+
+      if self._experimental_exported_model_uses_all_cores:
+        tensors_on_cpu = tpu.rewrite(
+            tpu_computation, device_assignment=device_assignment)
+      else:
+        tensors_on_cpu = tpu.rewrite_for_inference(
+            tpu_computation, device_assignment=device_assignment)
+
+      (estimator_spec, export_outputs_dict, export_outputs_list,
+       predictions_dict) = (
+           tpu_capture.get())
+      predictions_list = tensors_on_cpu[:len(predictions_dict)]
+      export_outputs_tpu_on_cpu_list = tensors_on_cpu[len(predictions_dict):]
+
+      # Reconstruct tensors used in export_outputs, with TPU tensors replaced
+      # with their CPU counterpart returned from `rewrite_for_inference()`.
+      # `function.Defun()` does not like `None`s in return values, so we leave
+      # `None`s out but record their positions for later reconstruction.
+      export_outputs_list_without_none = []
+      none_indices = []
+      for i, t in enumerate(export_outputs_list):
+        if t is None:
+          none_indices.append(i)
+        else:
+          export_outputs_list_without_none.append(
+              export_outputs_tpu_on_cpu_list.pop(0))
+
+      capture.capture((estimator_spec, export_outputs_dict, predictions_dict,
+                       none_indices))
+      return predictions_list + export_outputs_list_without_none
+
+    return computation, capture
+
+  def _build_tpu_computation_for_inference(self, features, labels, mode,
+                                           config):
+    capture = _CapturedObject()
+
+    def computation():
+      """Compute tpu tensors used in export_outputs.
+
+      Passed to rewrite_for_inference so that model_fn will be called under
+      the rewriting contexts. Only tpu tensors are returned, but export_outputs
+      and scaffold are captured.
+
+      Returns:
+         A list of Tensors used in export_outputs and not marked for
+         outside_compilation.
+      """
+      # We should only call model fn once and it should be inside `computation`
+      # so that building the graph will happen under `rewrite_for_inference`.
+      mode = model_fn_lib.ModeKeys.PREDICT
+      estimator_spec = self._call_model_fn(features, labels, mode, config)
+
+      # We pick the TPU tensors out from `export_output` and later return them
+      # from `computation` for rewriting.
+      export_outputs_dict = collections.OrderedDict(
+          (k, _export_output_to_tensors(v))
+          for k, v in six.iteritems(estimator_spec.export_outputs))
+      export_outputs_list = nest.flatten(export_outputs_dict)
+      export_outputs_tpu_list = [
+          t for t in export_outputs_list if t is not None
+      ]
+
+      if isinstance(estimator_spec.predictions, dict):
+        predictions_dict = collections.OrderedDict(
+            (k, v) for k, v in six.iteritems(estimator_spec.predictions))
+      else:
+        predictions_dict = {
+            _KEY_WHEN_PREDICTIONS_IS_A_TENSOR: estimator_spec.predictions
+        }
+      predictions_list = nest.flatten(predictions_dict)
+
+      # We cannot return everything we want through the return values, so
+      # capture the rest here for later use.
+      capture.capture((estimator_spec, export_outputs_dict, export_outputs_list,
+                       predictions_dict))
+      return predictions_list + export_outputs_tpu_list
+
+    return computation, capture
+
+  def _create_global_step(self, graph):
+    """Creates a global step suitable for TPUs.
+
+    Args:
+      graph: The graph in which to create the global step.
+
+    Returns:
+      A global step `Tensor`.
+
+    Raises:
+      ValueError: if the global step tensor is already defined.
+    """
+    return _create_global_step(graph)
+
+  def _convert_train_steps_to_hooks(self, steps, max_steps):
+    with self._ctx.with_mode(model_fn_lib.ModeKeys.TRAIN) as ctx:
+      if ctx.is_running_on_cpu():
+        return super(TPUEstimator, self)._convert_train_steps_to_hooks(
+            steps, max_steps)
+
+    # On TPU.
+    if steps is None and max_steps is None:
+      raise ValueError(
+          'For TPU training, one of `steps` or `max_steps` must be set. '
+          'Cannot be both `None`.')
+
+    # Estimator.train has explicit positiveness check.
+    if steps is not None:
+      util_lib.check_positive_integer(steps, 'Train steps')
+    if max_steps is not None:
+      util_lib.check_positive_integer(max_steps, 'Train max_steps')
+
+    return [
+        _TPUStopAtStepHook(self._iterations_per_training_loop, steps, max_steps)
+    ]
+
+  def _convert_eval_steps_to_hooks(self, steps):
+    with self._ctx.with_mode(model_fn_lib.ModeKeys.EVAL) as ctx:
+      if ctx.is_running_on_cpu():
+        return super(TPUEstimator, self)._convert_eval_steps_to_hooks(steps)
+
+    if steps is None:
+      raise ValueError('Evaluate `steps` must be set on TPU. Cannot be `None`.')
+
+    util_lib.check_positive_integer(steps, 'Eval steps')
+
+    return [
+        evaluation._StopAfterNEvalsHook(  # pylint: disable=protected-access
+            num_evals=steps),
+        _SetEvalIterationsHook(steps)
+    ]
+
+  def _call_input_fn(self, input_fn, mode):
+    """Calls the input function.
+
+    Args:
+      input_fn: The input function.
+      mode: ModeKeys
+
+    Returns:
+      In TPU mode, returns an input_fn to be called later in model_fn.
+      Otherwise, calls the input_fn and returns either fatures or
+        (features, labels).
+
+    Raises:
+      ValueError: if input_fn takes invalid arguments or does not have `params`.
+    """
+    input_fn_args = function_utils.fn_args(input_fn)
+    config = self.config  # a deep copy.
+    kwargs = {}
+    if 'params' in input_fn_args:
+      kwargs['params'] = self.params  # a deep copy.
+    else:
+      raise ValueError('input_fn ({}) does not include params argument, '
+                       'required by TPUEstimator to pass batch size as '
+                       'params["batch_size"]'.format(input_fn))
+    if 'config' in input_fn_args:
+      kwargs['config'] = config
+
+    if 'mode' in input_fn_args:
+      kwargs['mode'] = mode
+
+    # Records the fact input_fn has been invoked.
+    self._is_input_fn_invoked = True
+
+    with self._ctx.with_mode(mode) as ctx:
+      # Setting the batch size in params first. This helps user to have same
+      # input_fn for use_tpu=True/False.
+      batch_size_for_input_fn = ctx.batch_size_for_input_fn
+      if batch_size_for_input_fn is not None:
+        _add_item_to_params(kwargs['params'], _BATCH_SIZE_KEY,
+                            batch_size_for_input_fn)
+
+      # For export_savedmodel, input_fn is never passed to Estimator. So,
+      # `is_export_mode` must be False.
+      if ctx.is_running_on_cpu(is_export_mode=False):
+        with ops.device('/device:CPU:0'):
+          return input_fn(**kwargs)
+
+      # For TPU computation, input_fn should be invoked in a tf.while_loop for
+      # performance. While constructing the tf.while_loop, the structure of
+      # inputs returned by the `input_fn` needs to be recorded. The structure
+      # includes whether features or labels is dict or single Tensor, dict keys,
+      # tensor shapes, and dtypes. The recorded structure is used to create the
+      # infeed dequeue ops, which must be wrapped and passed as a Fn, called
+      # inside the TPU computation, as the TPU computation is wrapped inside a
+      # tf.while_loop also. So, we either pass input_fn to model_fn or pass
+      # dequeue_fn to model_fn. Here, `input_fn` is passed directly as
+      # `features` in `model_fn` signature.
+      def _input_fn(ctx):
+        _add_item_to_params(kwargs['params'], _CTX_KEY, ctx)
+        return input_fn(**kwargs)
+
+      return _input_fn
+
+  def _validate_features_in_predict_input(self, result):
+    """Skip the validation.
+
+    For TPUEstimator, we do not need to check the result type. `_InputPipeline`
+    has stronger check. Parent class's check generates confusing warning msg.
+
+    Args:
+      result: `features` returned by input_fn.
+    """
+    pass
+
+  def train(self,
+            input_fn,
+            hooks=None,
+            steps=None,
+            max_steps=None,
+            saving_listeners=None):
+    rendezvous = error_handling.ErrorRendezvous(num_sources=3)
+    self._rendezvous[model_fn_lib.ModeKeys.TRAIN] = rendezvous
+    try:
+      return super(TPUEstimator, self).train(
+          input_fn=input_fn,
+          hooks=hooks,
+          steps=steps,
+          max_steps=max_steps,
+          saving_listeners=saving_listeners)
+    except Exception:  # pylint: disable=broad-except
+      rendezvous.record_error('training_loop', sys.exc_info())
+    finally:
+      rendezvous.record_done('training_loop')
+      rendezvous.raise_errors()
+
+  def evaluate(self,
+               input_fn,
+               steps=None,
+               hooks=None,
+               checkpoint_path=None,
+               name=None):
+    rendezvous = error_handling.ErrorRendezvous(num_sources=3)
+    self._rendezvous[model_fn_lib.ModeKeys.EVAL] = rendezvous
+    try:
+      return super(TPUEstimator, self).evaluate(
+          input_fn,
+          steps=steps,
+          hooks=hooks,
+          checkpoint_path=checkpoint_path,
+          name=name)
+    except Exception:  # pylint: disable=broad-except
+      rendezvous.record_error('evaluation_loop', sys.exc_info())
+    finally:
+      rendezvous.record_done('evaluation_loop')
+      rendezvous.raise_errors()
+
+  def predict(self,
+              input_fn,
+              predict_keys=None,
+              hooks=None,
+              checkpoint_path=None,
+              yield_single_examples=True):
+    rendezvous = error_handling.ErrorRendezvous(num_sources=3)
+    self._rendezvous[model_fn_lib.ModeKeys.PREDICT] = rendezvous
+    try:
+      for result in super(TPUEstimator, self).predict(
+          input_fn=input_fn,
+          predict_keys=predict_keys,
+          hooks=hooks,
+          checkpoint_path=checkpoint_path,
+          yield_single_examples=yield_single_examples):
+        yield result
+    except Exception:  # pylint: disable=broad-except
+      rendezvous.record_error('prediction_loop', sys.exc_info())
+    finally:
+      rendezvous.record_done('prediction_loop')
+      rendezvous.raise_errors()
+
+    rendezvous.record_done('prediction_loop')
+    rendezvous.raise_errors()
+
+  def _augment_model_fn(self, model_fn, batch_axis):
+    """Returns a new model_fn, which wraps the TPU support."""
+
+    def _model_fn(features, labels, mode, config, params):
+      """A Estimator `model_fn` for TPUEstimator."""
+      with self._ctx.with_mode(mode) as ctx:
+        model_fn_wrapper = _ModelFnWrapper(model_fn, config, params, ctx)
+
+        # `input_fn` is called in `train()`, `evaluate()`, and `predict()`,
+        # but not in `export_savedmodel()`.
+        if self._is_input_fn_invoked:
+          is_export_mode = False
+        else:
+          is_export_mode = True
+
+        # Clear the bit.
+        self._is_input_fn_invoked = None
+
+        # examples_hook is added to training_hooks for both CPU and TPU
+        # execution.
+        if self._log_every_n_steps is not None:
+          examples_hook = ExamplesPerSecondHook(
+              ctx.global_batch_size,
+              # pylint:disable=g-long-ternary
+              output_dir=(self.model_dir
+                          if not config or config.save_summary_steps
+                          else None),
+              # pylint:enable=g-long-ternary
+              every_n_steps=self._log_every_n_steps)
+
+        if ctx.is_running_on_cpu(is_export_mode=is_export_mode):
+          logging.info('Running %s on CPU', mode)
+          estimator_spec = model_fn_wrapper.call_without_tpu(
+              features, labels, is_export_mode=is_export_mode)
+          if self._log_every_n_steps is not None:
+            estimator_spec = estimator_spec._replace(
+                training_hooks=estimator_spec.training_hooks + (examples_hook,))
+          return estimator_spec
+
+        assert labels is None, '`labels` passed to `model_fn` must be `None`.'
+        # TPUEstimator._call_input_fn passes `input_fn` as features to here.
+        assert callable(features), '`input_fn` is not callable.'
+        input_fn = features
+
+        tpu_init_ops = []
+        if ctx.embedding_config and mode == model_fn_lib.ModeKeys.TRAIN:
+          dummy_table_variables, dummy_table_variables_init = (
+              tpu_embedding_gradient.create_dummy_table_variables(
+                  ctx.embedding_config.tpu_embedding))
+          ctx.embedding_config.dummy_table_variables = dummy_table_variables
+          tpu_init_ops.append(dummy_table_variables_init)
+
+        input_holders = _InputPipeline(input_fn, batch_axis, ctx)
+        enqueue_ops, dequeue_fn, input_hooks, run_infeed_loop_on_coordinator = (
+            input_holders.generate_infeed_enqueue_ops_and_dequeue_fn())
+
+        graph = ops.get_default_graph()
+        for enqueue_op in enqueue_ops:
+          if isinstance(enqueue_op, list):
+            graph.get_collection_ref(_TPU_ENQUEUE_OPS).extend(enqueue_op)
+          else:
+            graph.add_to_collection(_TPU_ENQUEUE_OPS, enqueue_op)
+
+        if mode == model_fn_lib.ModeKeys.TRAIN:
+          compile_op, loss, host_call, scaffold, training_hooks = (
+              _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
+          if ctx.embedding_config:
+            g = ops.get_default_graph()
+            table_to_config_dict = (
+                ctx.embedding_config.tpu_embedding.table_to_config_dict)
+            optimization_parameters = (
+                ctx.embedding_config.tpu_embedding.optimization_parameters)
+            embedding_variable_name_by_table, slot_variable_names_by_table = (
+                _tpu_estimator_embedding.get_full_variable_names(
+                    g, table_to_config_dict, optimization_parameters
+                )
+            )
+            embedding_variables_and_ops = (
+                ctx.embedding_config.tpu_embedding.create_variables_and_ops(
+                    embedding_variable_name_by_table,
+                    slot_variable_names_by_table
+                ))
+            tpu_init_ops.extend(embedding_variables_and_ops.load_ops())
+
+          host_ops = host_call.create_tpu_hostcall()
+          if host_ops is None:
+            host_ops = []
+
+          shutdown_hooks = []
+          shutdown_mode = os.environ.get('TF_TPU_GRACEFUL_SHUTDOWN_MODE',
+                                         'shutdown_worker')
+          if shutdown_mode:
+            if shutdown_mode == 'shutdown_worker':
+              finalizer_hooks = [
+                  session_support.ShutdownLameWorkers(timeout_ms=60 * 1000),
+              ]
+            elif shutdown_mode == 'shutdown_computation':
+              finalizer_hooks = [
+                  session_support.RestartComputation(timeout_ms=60 * 1000),
+              ]
+            else:
+              raise ValueError(
+                  'Unknown TF_TPU_GRACEFUL_SHUTDOWN_MODE "%s"' % shutdown_mode)
+
+            shutdown_hooks.append(
+                session_support.GracefulShutdownHook(
+                    checkpoint_prefix=self.model_dir + '/model.ckpt',
+                    on_shutdown_hooks=finalizer_hooks))
+
+          with ops.control_dependencies([loss]):
+            global_step = array_ops.identity(training.get_global_step())
+          hooks = input_hooks + shutdown_hooks
+          hooks.extend([
+              TPUInfeedOutfeedSessionHook(
+                  ctx,
+                  enqueue_ops,
+                  host_ops,
+                  tpu_compile_op=compile_op,
+                  run_infeed_loop_on_coordinator=(
+                      run_infeed_loop_on_coordinator),
+                  rendezvous=self._rendezvous[mode],
+                  master=self._config.master,
+                  session_config=self._session_config,
+                  tpu_init_ops=tpu_init_ops),
+              InstallSignalHandlerHook()
+          ])
+          if self._log_every_n_steps is not None:
+            logging_hook_frequency = (  # Divide and round up
+                (self._log_every_n_steps +
+                 self._config.tpu_config.iterations_per_loop - 1) //
+                self._config.tpu_config.iterations_per_loop)
+            hooks.append(
+                training.LoggingTensorHook({
+                    'loss': array_ops.identity(loss),
+                    'step': global_step,
+                },
+                                           every_n_iter=logging_hook_frequency))
+            examples_hook._set_steps_per_run(  # pylint: disable=protected-access
+                self._config.tpu_config.iterations_per_loop)
+            hooks.append(examples_hook)
+
+          if training_hooks:
+            hooks.extend(training_hooks)
+
+          chief_hooks = []
+          if (self._config.save_checkpoints_secs or
+              self._config.save_checkpoints_steps):
+            checkpoint_hook = training.CheckpointSaverHook(
+                self.model_dir,
+                save_secs=self._config.save_checkpoints_secs,
+                save_steps=self._config.save_checkpoints_steps,
+                scaffold=scaffold)
+            checkpoint_hook._set_steps_per_run(  # pylint: disable=protected-access
+                self._config.tpu_config.iterations_per_loop)
+            chief_hooks.append(checkpoint_hook)
+
+          summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss)
+          with ops.control_dependencies([loss]):
+            update_ops = _sync_variables_ops(ctx)
+            if ctx.embedding_config:
+              update_ops.extend(embedding_variables_and_ops.retrieve_ops())
+
+          # Validate the TPU training graph to catch basic errors
+          _validate_tpu_training_graph()
+
+          train_op = control_flow_ops.group(*update_ops)
+          graph.add_to_collection(_TPU_TRAIN_OP, train_op)
+
+          return model_fn_lib.EstimatorSpec(
+              mode,
+              loss=loss,
+              training_chief_hooks=chief_hooks,
+              training_hooks=hooks,
+              train_op=train_op,
+              scaffold=scaffold)
+
+        if mode == model_fn_lib.ModeKeys.EVAL:
+          compile_op, total_loss, host_calls, scaffold, eval_hooks = (
+              _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
+          iterations_per_loop_var = _create_or_get_iterations_per_loop()
+          mean_loss = math_ops.div(
+              total_loss,
+              math_ops.cast(iterations_per_loop_var, dtype=total_loss.dtype))
+
+          with ops.control_dependencies([mean_loss]):
+            # After TPU evaluation computation is done (the mean_loss tensor),
+            # reads all variables back from TPU and updates the eval step
+            # counter properly
+            internal_ops_to_run = _sync_variables_ops(ctx)
+            internal_ops_to_run.append(
+                _increase_eval_step_op(iterations_per_loop_var))
+
+          host_call_ret = host_calls.create_tpu_hostcall()
+          eval_metric_ops = {}
+          eval_update_ops = []
+
+          eval_metrics = host_call_ret.get('eval_metrics', {})
+          if eval_metrics:
+            # Creates a dummy metric update_op for all metrics. Estimator
+            # expects all metrics in `eval_metric_ops` have update_op and calls
+            # them one by one. The real metric update_ops are invoked in a
+            # separated thread. So, here give Estimator the dummy op for all
+            # metrics.
+            with ops.control_dependencies(internal_ops_to_run):
+              dummy_update_op = control_flow_ops.no_op()
+
+            for k, v in eval_metrics.items():
+              eval_metric_ops[k] = (v[0], dummy_update_op)
+              eval_update_ops.append(v[1])
+          else:
+            # If no eval metrics are passed, create an identity node for the
+            # loss and add `internal_ops_to_run` to its dependencies. So
+            # `internal_ops_to_run` can be executed.
+            with ops.control_dependencies(internal_ops_to_run):
+              mean_loss = array_ops.identity(mean_loss)
+
+          if 'host_call' not in host_call_ret:
+            host_ops = []
+          else:
+            host_ops = host_call_ret['host_call']
+          hooks = [
+              TPUInfeedOutfeedSessionHook(
+                  ctx,
+                  enqueue_ops,
+                  eval_update_ops + host_ops,
+                  tpu_compile_op=compile_op,
+                  run_infeed_loop_on_coordinator=(
+                      run_infeed_loop_on_coordinator),
+                  rendezvous=self._rendezvous[mode],
+                  master=self._config.evaluation_master,
+                  session_config=self._session_config,
+                  tpu_init_ops=tpu_init_ops)
+          ] + input_hooks
+
+          if eval_hooks:
+            hooks.extend(eval_hooks)
+
+          return model_fn_lib.EstimatorSpec(
+              mode,
+              loss=mean_loss,
+              evaluation_hooks=hooks,
+              eval_metric_ops=eval_metric_ops,
+              scaffold=scaffold)
+
+        # Predict
+        assert mode == model_fn_lib.ModeKeys.PREDICT
+
+        (compile_op, dummy_predict_op, host_calls,
+         scaffold, prediction_hooks) = _predict_on_tpu_system(
+             ctx, model_fn_wrapper, dequeue_fn)
+        with ops.control_dependencies([dummy_predict_op]):
+          internal_ops_to_run = _sync_variables_ops(ctx)
+          with ops.control_dependencies(internal_ops_to_run):
+            dummy_predict_op = control_flow_ops.no_op()
+
+        # In train and evaluation, the main TPU program is passed to monitored
+        # training session to run. Infeed enqueue and outfeed dequeue are
+        # executed in side threads. This is not the configuration for
+        # prediction mode.
+        #
+        # For prediction, the Estimator executes the EstimatorSpec.predictions
+        # directly and yield the element (via generator) to call site. So, the
+        # outfeed based prediction must be passed to MonitoredSession directly.
+        # Other parts of the TPU execution are organized as follows.
+        #
+        # 1. All outfeed based Tensors must be grouped with predictions Tensors
+        #    to form a single invocation. This avoid the issue we might trigger
+        #    multiple outfeeds incorrectly. To achieve this, `host_call` is
+        #    placed in control_dependencies of `stopping_signals`, and
+        #    `stopping_signals` is passed into _StoppingPredictHook, which sets
+        #    the `stopping_signals` as SessionRunArgs. MonitoredSession merges
+        #    all SessionRunArgs with the fetch in session.run together.
+        #
+        # 2. The TPU program (dummy_predict_op) and enqueue_ops (infeed Enqueue)
+        #    are grouped together. They will be launched once and only once in
+        #    side threads and they quit naturally according to the SAME stopping
+        #    condition.
+        enqueue_ops.append(dummy_predict_op)
+
+        host_call_ret = host_calls.create_tpu_hostcall()
+        if 'host_call' not in host_call_ret:
+          host_ops = []
+        else:
+          host_ops = host_call_ret['host_call']
+
+        predictions = host_call_ret['predictions']
+        _verify_cross_hosts_transfer_size(
+            predictions,
+            message=(
+                'The estimated size for TPUEstimatorSpec.predictions is too '
+                'large.'))
+        signals = host_call_ret['signals']
+
+        with ops.control_dependencies(host_ops):
+          host_ops = []  # Empty, we do do not need it anymore.
+          scalar_stopping_signal = _StopSignals.as_scalar_stopping_signal(
+              signals)
+          predictions = _PaddingSignals.slice_tensor_or_dict(
+              predictions, signals)
+
+        hooks = [
+            _StoppingPredictHook(scalar_stopping_signal),
+            TPUInfeedOutfeedSessionHookForPrediction(
+                ctx, enqueue_ops, host_ops, rendezvous=self._rendezvous[mode],
+                tpu_compile_op=compile_op,
+                master=self._config.master,
+                session_config=self._session_config),
+        ] + input_hooks
+
+        if prediction_hooks:
+          hooks.extend(prediction_hooks)
+
+        return model_fn_lib.EstimatorSpec(
+            mode,
+            prediction_hooks=hooks,
+            predictions=predictions,
+            scaffold=scaffold)
+
+    return _model_fn
+
+
+def _export_output_to_tensors(export_output):
+  """Get a list of `Tensors` used in `export_output`.
+
+  Args:
+    export_output: an `ExportOutput` object such as `ClassificationOutput`,
+      `RegressionOutput`, or `PredictOutput`.
+
+  Returns:
+    a list of tensors used in export_output.
+
+  Raises:
+    ValueError: if `export_output` is not one of `ClassificationOutput`,
+        `RegressionOutput`, or `PredictOutput`.
+  """
+  if isinstance(export_output, export_output_lib.ClassificationOutput):
+    return [export_output.scores, export_output.classes]
+  elif isinstance(export_output, export_output_lib.RegressionOutput):
+    return [export_output.value]
+  elif isinstance(export_output, export_output_lib.PredictOutput):
+    return list(export_output.outputs.values())
+  else:
+    raise ValueError(
+        '`export_output` must be have type `ClassificationOutput`, '
+        '`RegressionOutput`, or `PredictOutput`; got {}.'.format(export_output))
+
+
+def _clone_export_output_with_tensors(export_output, tensors):
+  """Clones `export_output` but with new `tensors`.
+
+  Args:
+    export_output: an `ExportOutput` object such as `ClassificationOutput`,
+      `RegressionOutput`, or `PredictOutput`.
+    tensors: a list of `Tensors` used to construct a new `export_output`.
+
+  Returns:
+    A dict similar to `export_output` but with `tensors`.
+
+  Raises:
+    ValueError: if `export_output` is not one of `ClassificationOutput`,
+        `RegressionOutput`, or `PredictOutput`.
+  """
+  if isinstance(export_output, export_output_lib.ClassificationOutput):
+    if len(tensors) != 2:
+      raise ValueError('tensors must be of length 2; '
+                       'got {}.'.format(len(tensors)))
+    return export_output_lib.ClassificationOutput(*tensors)
+  elif isinstance(export_output, export_output_lib.RegressionOutput):
+    if len(tensors) != 1:
+      raise ValueError('tensors must be of length 1; '
+                       'got {}'.format(len(tensors)))
+    return export_output_lib.RegressionOutput(*tensors)
+  elif isinstance(export_output, export_output_lib.PredictOutput):
+    return export_output_lib.PredictOutput(
+        dict(zip(export_output.outputs.keys(), tensors)))
+  else:
+    raise ValueError(
+        '`export_output` must be have type `ClassificationOutput`, '
+        '`RegressionOutput`, or `PredictOutput`; got {}.'.format(export_output))
+
+
+def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
+  """Executes `model_fn_wrapper` multiple times on all TPU shards."""
+  iterations_per_loop_var = _create_or_get_iterations_per_loop()
+
+  (single_tpu_eval_step, host_calls, captured_scaffold_fn, captured_eval_hooks
+  ) = model_fn_wrapper.convert_to_single_tpu_eval_step(dequeue_fn)
+
+  def multi_tpu_eval_steps_on_single_shard():
+    return training_loop.repeat(iterations_per_loop_var, single_tpu_eval_step,
+                                [_ZERO_LOSS])
+
+  (compile_op, loss,) = tpu.split_compile_and_shard(
+      multi_tpu_eval_steps_on_single_shard,
+      inputs=[],
+      num_shards=ctx.num_replicas,
+      outputs_from_all_shards=False,
+      device_assignment=ctx.device_assignment)
+
+  loss = loss[0]
+  scaffold = _get_scaffold(captured_scaffold_fn)
+  return compile_op, loss, host_calls, scaffold, captured_eval_hooks.get()
+
+
+def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
+  """Executes `model_fn_wrapper` multiple times on all TPU shards."""
+  iterations_per_loop_var = _create_or_get_iterations_per_loop()
+
+  (single_tpu_train_step, host_call, captured_scaffold_fn,
+   captured_training_hooks) = (
+       model_fn_wrapper.convert_to_single_tpu_train_step(dequeue_fn))
+
+  @tpu_function.on_device_training_loop
+  def multi_tpu_train_steps_on_single_shard():
+    return training_loop.repeat(iterations_per_loop_var, single_tpu_train_step,
+                                [_INITIAL_LOSS])
+
+  (compile_op, loss,) = tpu.split_compile_and_shard(
+      multi_tpu_train_steps_on_single_shard,
+      inputs=[],
+      num_shards=ctx.num_replicas,
+      outputs_from_all_shards=False,
+      device_assignment=ctx.device_assignment)
+
+  loss = loss[0]
+  scaffold = _get_scaffold(captured_scaffold_fn)
+  return compile_op, loss, host_call, scaffold, captured_training_hooks.get()
+
+
+def _predict_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
+  """Executes `model_fn_wrapper` multiple times on all TPU shards."""
+  (single_tpu_predict_step, host_calls, captured_scaffold_fn,
+   captured_predict_hooks
+  ) = model_fn_wrapper.convert_to_single_tpu_predict_step(dequeue_fn)
+
+  @tpu_function.on_device_training_loop
+  def multi_tpu_predict_steps_on_single_shard():
+
+    def cond(scalar_stopping_signal):
+      return math_ops.logical_not(
+          _StopSignals.should_stop(scalar_stopping_signal))
+
+    inputs = [_StopSignals.NON_STOPPING_SIGNAL]
+    outputs = training_loop.while_loop(
+        cond, single_tpu_predict_step, inputs=inputs, name=b'loop')
+    return outputs
+
+  (compile_op, dummy_predict_op,) = tpu.split_compile_and_shard(
+      multi_tpu_predict_steps_on_single_shard,
+      inputs=[],
+      num_shards=ctx.num_replicas,
+      outputs_from_all_shards=False,
+      device_assignment=ctx.device_assignment)
+
+  dummy_predict_op = dummy_predict_op[0]
+  scaffold = _get_scaffold(captured_scaffold_fn)
+  return (compile_op, dummy_predict_op, host_calls, scaffold,
+          captured_predict_hooks.get())
+
+
+def _wrap_computation_in_while_loop(device, op_fn):
+  """Wraps the ops generated by `op_fn` in tf.while_loop."""
+
+  def computation(i):
+    with ops.control_dependencies(op_fn()):
+      return i + 1
+
+  iterations_per_loop_var = _create_or_get_iterations_per_loop()
+  # By setting parallel_iterations=1, the parallel execution in while_loop is
+  # basically turned off.
+  with ops.device(device):
+    iterations = array_ops.identity(iterations_per_loop_var)
+    return control_flow_ops.while_loop(
+        lambda i: i < iterations,
+        computation, [constant_op.constant(0)],
+        parallel_iterations=1)
+
+
+def _wrap_computation_in_while_loop_with_stopping_signals(device, op_fn):
+  """Wraps the ops generated by `op_fn` in tf.while_loop."""
+
+  def cond(scalar_stopping_signal):
+    return math_ops.logical_not(
+        _StopSignals.should_stop(scalar_stopping_signal))
+
+  def computation(unused_scalar_stopping_signal):
+    return_value = op_fn()
+    execute_ops = return_value['ops']
+    signals = return_value['signals']
+    with ops.control_dependencies(execute_ops):
+      return _StopSignals.as_scalar_stopping_signal(signals)
+
+  # By setting parallel_iterations=1, the parallel execution in while_loop is
+  # basically turned off.
+  with ops.device(device):
+    return control_flow_ops.while_loop(
+        cond,
+        computation, [_StopSignals.NON_STOPPING_SIGNAL],
+        parallel_iterations=1)
+
+
+def _validate_tpu_training_graph():
+  """Validate graph before running distributed training.
+
+  Raises:
+    ValueError: If the graph seems invalid for running on device
+  """
+  operations = ops.get_default_graph().get_operations()
+
+  # Check if there is atleast one CrossReplicaSum operation in the graph
+  # This should be introduced by using the CrossShardOptimizer wrapper
+  cross_replica_sum_ops = [
+      o for o in operations if o.type == _CROSS_REPLICA_SUM_OP
+  ]
+  if not cross_replica_sum_ops:
+    raise ValueError(
+        'CrossShardOptimizer must be used for model training on TPUs.')
+
+
+class _CapturedObject(object):
+  """A placeholder to capture an object.
+
+  This is useful when we need to capture a Python object in the Tensorflow
+  control flow body function and use it outside the control flow.
+  """
+
+  def __init__(self):
+    self._object = None
+    self._captured = False
+
+  def capture(self, o):
+    if self._captured:
+      raise RuntimeError(
+          'InternalError: Object can capture only once. Please file bug.')
+
+    self._captured = True
+    self._object = o
+
+  def get(self):
+    if not self._captured:
+      raise RuntimeError(
+          'InternalError: Object is not captured properly before `get`. '
+          'Please file bug.')
+    return self._object
+
+
+def _get_scaffold(captured_scaffold_fn):
+  """Retrieves the Scaffold from `captured_scaffold_fn`."""
+  with _CapturingContext(message='Inside scaffold_fn'):
+    scaffold_fn = captured_scaffold_fn.get()
+    if scaffold_fn:
+      scaffold = scaffold_fn()
+      if scaffold is None:
+        raise ValueError(
+            'TPUEstimatorSpec.scaffold_fn returns None, which is not allowed')
+    else:
+      scaffold = None
+
+  if scaffold:
+    wrapped_finalize = scaffold.finalize
+
+    def _finalize():
+      with _CapturingContext('Inside Scaffold.finalize'):
+        wrapped_finalize()
+
+    scaffold.finalize = _finalize
+  return scaffold
+
+
+class _CapturingContext(control_flow_ops.ControlFlowContext):
+  """Tracks references to Tensors defined in TPU replication."""
+
+  def __init__(self, message):
+    control_flow_ops.ControlFlowContext.__init__(self)
+    self._message = message
+
+  def to_control_flow_context_def(self, context_def, export_scope=None):
+    # pylint: disable=useless-super-delegation
+    # NOTE(slebedev): the method is required by `ControlFlowContext`.
+    super(_CapturingContext, self).to_control_flow_context_def(
+        context_def, export_scope)
+
+  def AddOp(self, op):  # pylint: disable=invalid-name
+    for c in op.inputs:
+      if tpu._TPU_REPLICATE_ATTR in c.op.node_def.attr:  # pylint: disable=protected-access
+        raise ValueError('{}: Op {} depends on TPU computation {}, '
+                         'which is not allowed.'.format(self._message, op, c))
+
+  def __enter__(self):
+    # pylint: disable=protected-access
+    self._g = ops.get_default_graph()
+    self._old = self._g._get_control_flow_context()
+    self._g._set_control_flow_context(self)
+    # pylint: enable=protected-access
+
+  def __exit__(self, _, __, ___):  # pylint: disable=invalid-name
+    self._g._set_control_flow_context(self._old)  # pylint: disable=protected-access
+
+
+class _Inputs(object):
+  """A data structure representing the input_fn returned values.
+
+  This also supports the returned value from input_fn as `Dataset`.
+  """
+
+  def __init__(self, features=None, labels=None, dataset=None, signals=None):
+    if dataset is not None and (features is not None or labels is not None or
+                                signals is not None):
+      raise RuntimeError('Internal Error: Either (features and labels) or '
+                         'dataset should be provided, not both. Please file '
+                         'bug')
+
+    self._features = features
+    self._labels = labels
+    self._signals = signals
+
+    self._dataset = dataset
+    self._iterator = None
+
+  @staticmethod
+  def from_input_fn(return_values):
+    """Returns an `_Inputs` instance according to `input_fn` return value."""
+    if isinstance(return_values, dataset_ops.DatasetV2):
+      dataset = return_values
+      return _Inputs(dataset=dataset)
+
+    features, labels = _Inputs._parse_inputs(return_values)
+    return _Inputs(features, labels)
+
+  @staticmethod
+  def _parse_inputs(return_values):
+    if isinstance(return_values, tuple):
+      features, labels = return_values
+    else:
+      features, labels = return_values, None
+    return features, labels
+
+  @property
+  def is_dataset(self):
+    """Returns True if the return value from input_fn is Dataset."""
+    return self._dataset is not None
+
+  def dataset_initializer(self):
+    """Returns the dataset's initializer.
+
+    The initializer must be run before calling `features_and_labels`.
+    """
+    self._iterator = dataset_ops.make_initializable_iterator(self._dataset)
+    return self._iterator.initializer
+
+  def features_and_labels(self):
+    """Gets `features` and `labels`."""
+    if self.is_dataset:
+      if self._iterator is None:
+        raise RuntimeError('Internal error: Must run dataset_initializer '
+                           'before calling features_and_labels(). Please file '
+                           'a bug!')
+      return _Inputs._parse_inputs(self._iterator.get_next())
+
+    return (self._features, self._labels)
+
+  def signals(self):
+    return self._signals
+
+  @property
+  def dataset(self):
+    return self._dataset
+
+
+class _InputsWithStoppingSignals(_Inputs):
+  """Inputs with `_StopSignals` inserted into the dataset."""
+
+  def __init__(self,
+               dataset,
+               batch_size,
+               add_padding=False,
+               num_invocations_per_step=1):
+
+    assert dataset is not None
+    user_provided_dataset = dataset.map(
+        _InputsWithStoppingSignals.insert_stopping_signal(
+            stop=False, batch_size=batch_size, add_padding=add_padding))
+    if num_invocations_per_step == 1:
+      final_batch_dataset = dataset.take(1).map(
+          _InputsWithStoppingSignals.insert_stopping_signal(
+              stop=True, batch_size=batch_size, add_padding=add_padding))
+    else:
+      # We append (2 * num_invocations_per_step - 1) batches for exhausting the
+      # user_provided_dataset and stop properly.
+      # For example, if num_invocations_per_step is 2, we append 3 additional
+      # padding batches: b1, b2, b3.
+      # If user_provided_dataset contains two batches: a1, a2
+      # Step 1: [a1, a2]
+      # Step 2: [b1, b2] -> STOP
+      # If user_provided_dataset contains three batches: a1, a2, a3.
+      # The training loops:
+      # Step 1: [a1, a2]
+      # Step 2: [a3, b1]
+      # Step 3: [b2, b3] -> STOP.
+      final_batch_dataset = dataset.take(1).map(
+          _InputsWithStoppingSignals.insert_stopping_signal(
+              stop=True, batch_size=batch_size, add_padding=add_padding))
+      final_batch_dataset = final_batch_dataset.repeat(
+          2 * num_invocations_per_step - 1)
+
+      def _set_mask(data_dict):
+        signals = data_dict['signals']
+        signals['padding_mask'] = array_ops.ones_like(signals['padding_mask'])
+        data_dict['signals'] = signals
+        return data_dict
+
+      # Mask out the extra batch.
+      final_batch_dataset = final_batch_dataset.map(_set_mask)
+
+    dataset = user_provided_dataset.concatenate(final_batch_dataset).prefetch(2)
+
+    super(_InputsWithStoppingSignals, self).__init__(dataset=dataset)
+    self._current_inputs = None
+
+  def features_and_labels(self):
+    if self._current_inputs is not None:
+      raise RuntimeError(
+          'Internal Error: The previous inputs have not been properly '
+          'consumed. First call features_and_labels, then call signals.')
+
+    inputs_with_signals = self._iterator.get_next()
+    features = inputs_with_signals['features']
+    labels = inputs_with_signals.get('labels')
+
+    self._current_inputs = inputs_with_signals
+    return features, labels
+
+  def signals(self):
+    """Returns the `Signals` from `_Inputs`."""
+    if self._current_inputs is None:
+      raise RuntimeError(
+          'Internal Error: The current inputs have not been properly '
+          'generated. First call features_and_labels, then call signals.')
+    signals = self._current_inputs['signals']
+    self._current_inputs = None
+    return signals
+
+  @staticmethod
+  def insert_stopping_signal(stop, batch_size, add_padding=False):
+    """Inserts stopping_signal into dataset via _map_fn.
+
+    Here we change the data structure in the dataset, such that the return value
+    is a dictionary now and `features`, `labels`, and `signals` are three
+    distinguished keys in that dict. This provides a better structure, which
+    eases the process to decompose the inputs (see `features_and_labels`).
+
+    Args:
+      stop: bool, state of current stopping signals.
+      batch_size: int, batch size.
+      add_padding: bool, whether to pad the tensor to full batch size.
+
+    Returns:
+      A map_fn passed to dataset.map API.
+    """
+
+    def _map_fn(*args):
+      """The map fn to insert signals."""
+      if len(args) == 1:
+        # Unpack the single Tensor/dict argument as features. This is required
+        # for the input_fn returns no labels.
+        args = args[0]
+      features, labels = _Inputs._parse_inputs(args)
+      new_input_dict = {}
+
+      if add_padding:
+        padding_mask, features, labels = (
+            _PaddingSignals.pad_features_and_labels(features, labels,
+                                                    batch_size))
+
+        new_input_dict['features'] = features
+        if labels is not None:
+          new_input_dict['labels'] = labels
+
+      else:
+        new_input_dict['features'] = features
+        if labels is not None:
+          new_input_dict['labels'] = labels
+        padding_mask = None
+
+      new_input_dict['signals'] = _StopSignals(
+          stop=stop, batch_size=batch_size,
+          padding_mask=padding_mask).as_dict()
+
+      return new_input_dict
+
+    return _map_fn
+
+
+class _StopSignals(object):
+  """Signals class holding all logic to handle TPU stopping condition."""
+
+  NON_STOPPING_SIGNAL = False
+  STOPPING_SIGNAL = True
+
+  def __init__(self, stop, batch_size, padding_mask=None):
+    self._stop = stop
+    self._batch_size = batch_size
+    self._padding_mask = padding_mask
+
+  def as_dict(self):
+    """Returns the signals as Python dict."""
+    shape = [self._batch_size, 1]
+    dtype = dtypes.bool
+
+    if self._stop:
+      stopping = array_ops.ones(shape=shape, dtype=dtype)
+    else:
+      stopping = array_ops.zeros(shape=shape, dtype=dtype)
+
+    signals = {'stopping': stopping}
+    if self._padding_mask is not None:
+      signals['padding_mask'] = self._padding_mask
+    return signals
+
+  @staticmethod
+  def as_scalar_stopping_signal(signals):
+    return array_ops.identity(signals['stopping'][0][0])
+
+  @staticmethod
+  def should_stop(scalar_stopping_signal):
+    """Detects whether scalar_stopping_signal indicates stopping."""
+    if isinstance(scalar_stopping_signal, ops.Tensor):
+      # STOPPING_SIGNAL is a constant True. Here, the logical_and is just the TF
+      # way to express the bool check whether scalar_stopping_signal is True.
+      return math_ops.logical_and(scalar_stopping_signal,
+                                  _StopSignals.STOPPING_SIGNAL)
+    else:
+      # For non Tensor case, it is used in SessionRunHook. So, we cannot modify
+      # the graph anymore. Here, we use pure Python.
+      return bool(scalar_stopping_signal)
+
+
+class _PaddingSignals(object):
+  """Signals class holding all logic to handle padding."""
+
+  @staticmethod
+  def pad_features_and_labels(features, labels, batch_size):
+    """Pads out the batch dimension of features and labels."""
+    real_batch_size = array_ops.shape(
+        _PaddingSignals._find_any_tensor(features))[0]
+
+    batch_size_tensor = constant_op.constant(batch_size, dtypes.int32)
+
+    check_greater = check_ops.assert_greater_equal(
+        batch_size_tensor,
+        real_batch_size,
+        data=(batch_size_tensor, real_batch_size),
+        message='The real batch size should not be greater than batch_size.')
+
+    with ops.control_dependencies([check_greater]):
+      missing_count = batch_size_tensor - real_batch_size
+
+    def pad_single_tensor(tensor):
+      """Pads out the batch dimension of a tensor to the complete batch_size."""
+      rank = len(tensor.shape)
+      assert rank > 0
+      padding = array_ops.stack([[0, missing_count]] + [[0, 0]] * (rank - 1))
+      padded_shape = (batch_size,) + tuple(tensor.shape[1:])
+      padded_tensor = array_ops.pad(tensor, padding)
+      padded_tensor.set_shape(padded_shape)
+      return padded_tensor
+
+    def nest_pad(tensor_or_dict):
+      return nest.map_structure(pad_single_tensor, tensor_or_dict)
+
+    features = nest_pad(features)
+    if labels is not None:
+      labels = nest_pad(labels)
+
+    padding_mask = _PaddingSignals._padding_mask(real_batch_size, missing_count,
+                                                 batch_size)
+
+    return padding_mask, features, labels
+
+  @staticmethod
+  def slice_tensor_or_dict(tensor_or_dict, signals):
+    """Slice the real Tensors according to padding mask in signals."""
+
+    padding_mask = signals['padding_mask']
+    batch_size = array_ops.shape(padding_mask)[0]
+
+    def verify_batch_size(tensor):
+      check_batch_size = math_ops.equal(batch_size, tensor.shape[0])
+      with ops.control_dependencies([check_batch_size]):
+        return array_ops.identity(tensor)
+
+    def slice_single_tensor(tensor):
+      rank = len(tensor.shape)
+      assert rank > 0
+      real_batch_size = batch_size - math_ops.reduce_sum(padding_mask)
+      return verify_batch_size(tensor)[0:real_batch_size]
+
+    # As we split the Tensors to all TPU cores and concat them back, it is
+    # important to ensure the real data is placed before padded ones, i.e.,
+    # order is preserved. By that, the sliced padding mask should have all 0's.
+    # If this assertion failed, # the slice logic here would not hold.
+    sliced_padding_mask = slice_single_tensor(padding_mask)
+    assert_padding_mask = math_ops.equal(
+        math_ops.reduce_sum(sliced_padding_mask), 0)
+
+    with ops.control_dependencies([assert_padding_mask]):
+      should_stop = _StopSignals.should_stop(
+          _StopSignals.as_scalar_stopping_signal(signals))
+
+    is_full_batch = math_ops.equal(math_ops.reduce_sum(padding_mask), 0)
+
+    def slice_fn(tensor):
+      # If the current batch is full batch or part of stopping signals, we do
+      # not need to slice to save performance.
+      return control_flow_ops.cond(
+          math_ops.logical_or(should_stop, is_full_batch),
+          (lambda: verify_batch_size(tensor)),
+          (lambda: slice_single_tensor(tensor)))
+
+    return nest.map_structure(slice_fn, tensor_or_dict)
+
+  @staticmethod
+  def _find_any_tensor(batch_features):
+    tensors = [
+        x for x in nest.flatten(batch_features) if isinstance(x, ops.Tensor)
+    ]
+    if not tensors:
+      raise ValueError('Cannot find any Tensor in features dict.')
+    return tensors[0]
+
+  @staticmethod
+  def _padding_mask(real_batch_size, missing_count, batch_size):
+    padding_mask = array_ops.concat([
+        array_ops.zeros((real_batch_size,), dtype=dtypes.int32),
+        array_ops.ones((missing_count,), dtype=dtypes.int32)
+    ],
+                                    axis=0)
+    padding_mask.set_shape((batch_size,))
+    return padding_mask
+
+
+def _verify_cross_hosts_transfer_size(tensor_dict, message):
+  total_size = 0
+  tensor_structure = {}
+  for key, tensor in tensor_dict.items():
+    shape = tensor.shape
+    size = np.product(shape) * tensor.dtype.size
+    tensor_structure[key] = shape
+    total_size += size
+  if total_size >= _ONE_GIGABYTE:
+    raise ValueError(
+        '{} The transfer size is larger than the protobuf limit. Please '
+        'consider to use Tensors with smaller shapes or reduce batch '
+        'size. Given:\n'
+        '{}'.format(
+            message, '\n'.join([
+                ' -- Key: {}, Shape: {}'.format(k, v)
+                for k, v in tensor_structure.items()
+            ])))
+
+
+def _add_item_to_params(params, key, value):
+  """Adds a new item into `params`."""
+  if hasattr(params, 'set_hparam'):
+    # For HParams, we need to use special API.
+    if key in params:
+      params.set_hparam(key, value)
+    else:
+      params.add_hparam(key, value)
+  else:
+    # Now params is Python dict.
+    params[key] = value
+
+
+def export_estimator_savedmodel(estimator,
+                                export_dir_base,
+                                serving_input_receiver_fn,
+                                assets_extra=None,
+                                as_text=False,
+                                checkpoint_path=None,
+                                strip_default_attrs=False):
+  """Export `Estimator` trained model for TPU inference.
+
+  Args:
+    estimator: `Estimator` with which model has been trained.
+    export_dir_base: A string containing a directory in which to create
+      timestamped subdirectories containing exported SavedModels.
+    serving_input_receiver_fn: A function that takes no argument and returns a
+      `ServingInputReceiver` or `TensorServingInputReceiver`.
+    assets_extra: A dict specifying how to populate the assets.extra directory
+      within the exported SavedModel, or `None` if no extra assets are needed.
+    as_text: whether to write the SavedModel proto in text format.
+    checkpoint_path: The checkpoint path to export.  If `None` (the default),
+      the most recent checkpoint found within the model directory is chosen.
+    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+      removed from the NodeDefs.
+
+  Returns:
+    The string path to the exported directory.
+  """
+  # `TPUEstimator` requires `tpu_config.RunConfig`, so we cannot use
+  # `estimator.config`.
+  config = tpu_config.RunConfig(model_dir=estimator.model_dir)
+  est = TPUEstimator(
+      estimator._model_fn,  # pylint: disable=protected-access
+      config=config,
+      params=estimator.params,
+      use_tpu=True,
+      train_batch_size=2048,  # Does not matter.
+      eval_batch_size=2048,  # Does not matter.
+  )
+  return est.export_savedmodel(export_dir_base, serving_input_receiver_fn,
+                               assets_extra, as_text, checkpoint_path,
+                               strip_default_attrs)

diff --git a/tensorflow/python/tpu/tpu_estimator_signals_test.py b/tensorflow/python/tpu/tpu_estimator_signals_test.py
new file mode 100644
index 0000000..ca3eeaa
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_estimator_signals_test.py

@@ -0,0 +1,339 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TPU Estimator Signalling Tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.client import session
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import test
+from tensorflow.python.tpu import tpu_estimator
+
+
+def make_input_fn(num_samples):
+  a = np.linspace(0, 100.0, num=num_samples)
+  b = np.reshape(np.array(a, dtype=np.float32), (len(a), 1))
+
+  def input_fn(params):
+    batch_size = params['batch_size']
+    da1 = dataset_ops.Dataset.from_tensor_slices(a)
+    da2 = dataset_ops.Dataset.from_tensor_slices(b)
+
+    dataset = dataset_ops.Dataset.zip((da1, da2))
+    dataset = dataset.map(lambda fa, fb: {'a': fa, 'b': fb})
+    dataset = dataset.batch(batch_size)
+    return dataset
+  return input_fn, (a, b)
+
+
+def make_input_fn_with_labels(num_samples):
+  a = np.linspace(0, 100.0, num=num_samples)
+  b = np.reshape(np.array(a, dtype=np.float32), (len(a), 1))
+
+  def input_fn(params):
+    batch_size = params['batch_size']
+    da1 = dataset_ops.Dataset.from_tensor_slices(a)
+    da2 = dataset_ops.Dataset.from_tensor_slices(b)
+
+    dataset = dataset_ops.Dataset.zip((da1, da2))
+    dataset = dataset.map(lambda fa, fb: ({'a': fa}, fb))
+    dataset = dataset.batch(batch_size)
+    return dataset
+  return input_fn, (a, b)
+
+
+class TPUEstimatorStoppingSignalsTest(test.TestCase):
+
+  def test_normal_output_without_signals(self):
+    num_samples = 4
+    batch_size = 2
+
+    params = {'batch_size': batch_size}
+    input_fn, (a, b) = make_input_fn(num_samples=num_samples)
+
+    with ops.Graph().as_default():
+      dataset = input_fn(params)
+      features = dataset_ops.make_one_shot_iterator(dataset).get_next()
+
+      # With tf.data.Dataset.batch, the batch is None, i.e., dynamic shape.
+      self.assertIsNone(features['a'].shape.as_list()[0])
+
+      with session.Session() as sess:
+        result = sess.run(features)
+        self.assertAllEqual(a[:batch_size], result['a'])
+        self.assertAllEqual(b[:batch_size], result['b'])
+
+        # This run should work as num_samples / batch_size = 2.
+        result = sess.run(features)
+        self.assertAllEqual(a[batch_size:num_samples], result['a'])
+        self.assertAllEqual(b[batch_size:num_samples], result['b'])
+
+        with self.assertRaises(errors.OutOfRangeError):
+          # Given num_samples and batch_size, this run should fail.
+          sess.run(features)
+
+  def test_output_with_stopping_signals(self):
+    num_samples = 4
+    batch_size = 2
+
+    params = {'batch_size': batch_size}
+    input_fn, (a, b) = make_input_fn(num_samples=num_samples)
+
+    with ops.Graph().as_default():
+      dataset = input_fn(params)
+      inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size)
+      dataset_initializer = inputs.dataset_initializer()
+      features, _ = inputs.features_and_labels()
+      signals = inputs.signals()
+
+      # With tf.data.Dataset.batch, the batch is None, i.e., dynamic shape.
+      self.assertIsNone(features['a'].shape.as_list()[0])
+
+      with session.Session() as sess:
+        sess.run(dataset_initializer)
+
+        result, evaluated_signals = sess.run([features, signals])
+        self.assertAllEqual(a[:batch_size], result['a'])
+        self.assertAllEqual(b[:batch_size], result['b'])
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+
+        # This run should work as num_samples / batch_size = 2.
+        result, evaluated_signals = sess.run([features, signals])
+        self.assertAllEqual(a[batch_size:num_samples], result['a'])
+        self.assertAllEqual(b[batch_size:num_samples], result['b'])
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+
+        # This run should work, *but* see STOP ('1') as signals
+        _, evaluated_signals = sess.run([features, signals])
+        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
+
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(features)
+
+
+class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
+
+  def test_num_samples_divisible_by_batch_size(self):
+    num_samples = 4
+    batch_size = 2
+
+    params = {'batch_size': batch_size}
+    input_fn, (a, b) = make_input_fn(num_samples=num_samples)
+
+    with ops.Graph().as_default():
+      dataset = input_fn(params)
+      inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
+                                                        add_padding=True)
+      dataset_initializer = inputs.dataset_initializer()
+      features, _ = inputs.features_and_labels()
+      signals = inputs.signals()
+
+      # With padding, all shapes are static now.
+      self.assertEqual(batch_size, features['a'].shape.as_list()[0])
+
+      with session.Session() as sess:
+        sess.run(dataset_initializer)
+
+        result, evaluated_signals = sess.run([features, signals])
+        self.assertAllEqual(a[:batch_size], result['a'])
+        self.assertAllEqual(b[:batch_size], result['b'])
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+        self.assertAllEqual([0.] * batch_size,
+                            evaluated_signals['padding_mask'])
+
+        # This run should work as num_samples / batch_size = 2.
+        result, evaluated_signals = sess.run([features, signals])
+        self.assertAllEqual(a[batch_size:num_samples], result['a'])
+        self.assertAllEqual(b[batch_size:num_samples], result['b'])
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+        self.assertAllEqual([0.] * batch_size,
+                            evaluated_signals['padding_mask'])
+
+        # This run should work, *but* see STOP ('1') as signals
+        _, evaluated_signals = sess.run([features, signals])
+        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
+
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(features)
+
+  def test_num_samples_not_divisible_by_batch_size(self):
+    num_samples = 5
+    batch_size = 2
+
+    params = {'batch_size': batch_size}
+    input_fn, (a, b) = make_input_fn_with_labels(num_samples=num_samples)
+
+    with ops.Graph().as_default():
+      dataset = input_fn(params)
+      inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
+                                                        add_padding=True)
+      dataset_initializer = inputs.dataset_initializer()
+      features, labels = inputs.features_and_labels()
+      signals = inputs.signals()
+
+      # With padding, all shapes are static.
+      self.assertEqual(batch_size, features['a'].shape.as_list()[0])
+
+      with session.Session() as sess:
+        sess.run(dataset_initializer)
+
+        evaluated_features, evaluated_labels, evaluated_signals = (
+            sess.run([features, labels, signals]))
+        self.assertAllEqual(a[:batch_size], evaluated_features['a'])
+        self.assertAllEqual(b[:batch_size], evaluated_labels)
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+        self.assertAllEqual([0.] * batch_size,
+                            evaluated_signals['padding_mask'])
+
+        # This run should work as num_samples / batch_size >= 2.
+        evaluated_features, evaluated_labels, evaluated_signals = (
+            sess.run([features, labels, signals]))
+        self.assertAllEqual(a[batch_size:2*batch_size], evaluated_features['a'])
+        self.assertAllEqual(b[batch_size:2*batch_size], evaluated_labels)
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+        self.assertAllEqual([0.] * batch_size,
+                            evaluated_signals['padding_mask'])
+
+        # This is the final partial batch.
+        evaluated_features, evaluated_labels, evaluated_signals = (
+            sess.run([features, labels, signals]))
+        real_batch_size = num_samples % batch_size
+
+        # Assert the real part.
+        self.assertAllEqual(a[2*batch_size:num_samples],
+                            evaluated_features['a'][:real_batch_size])
+        self.assertAllEqual(b[2*batch_size:num_samples],
+                            evaluated_labels[:real_batch_size])
+        # Assert the padded part.
+        self.assertAllEqual([0.0] * (batch_size - real_batch_size),
+                            evaluated_features['a'][real_batch_size:])
+        self.assertAllEqual([[0.0]] * (batch_size - real_batch_size),
+                            evaluated_labels[real_batch_size:])
+
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+
+        padding = ([.0] * real_batch_size
+                   + [1.] * (batch_size - real_batch_size))
+        self.assertAllEqual(padding, evaluated_signals['padding_mask'])
+
+        # This run should work, *but* see STOP ('1') as signals
+        _, evaluated_signals = sess.run([features, signals])
+        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
+
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(features)
+
+  def test_slice(self):
+    num_samples = 3
+    batch_size = 2
+
+    params = {'batch_size': batch_size}
+    input_fn, (a, b) = make_input_fn(num_samples=num_samples)
+
+    with ops.Graph().as_default():
+      dataset = input_fn(params)
+      inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
+                                                        add_padding=True)
+      dataset_initializer = inputs.dataset_initializer()
+      features, _ = inputs.features_and_labels()
+      signals = inputs.signals()
+
+      sliced_features = (
+          tpu_estimator._PaddingSignals.slice_tensor_or_dict(
+              features, signals))
+
+      with session.Session() as sess:
+        sess.run(dataset_initializer)
+
+        result, evaluated_signals = sess.run([sliced_features, signals])
+        self.assertAllEqual(a[:batch_size], result['a'])
+        self.assertAllEqual(b[:batch_size], result['b'])
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+
+        # This is the final partial batch.
+        result, evaluated_signals = sess.run([sliced_features, signals])
+        self.assertEqual(1, len(result['a']))
+        self.assertAllEqual(a[batch_size:num_samples], result['a'])
+        self.assertAllEqual(b[batch_size:num_samples], result['b'])
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+
+        # This run should work, *but* see STOP ('1') as signals
+        _, evaluated_signals = sess.run([sliced_features, signals])
+        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
+
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(sliced_features)
+
+  def test_slice_with_multi_invocations_per_step(self):
+    num_samples = 3
+    batch_size = 2
+
+    params = {'batch_size': batch_size}
+    input_fn, (a, b) = make_input_fn(num_samples=num_samples)
+
+    with ops.Graph().as_default():
+      dataset = input_fn(params)
+      inputs = tpu_estimator._InputsWithStoppingSignals(
+          dataset, batch_size, add_padding=True, num_invocations_per_step=2)
+      dataset_initializer = inputs.dataset_initializer()
+      features, _ = inputs.features_and_labels()
+      signals = inputs.signals()
+
+      sliced_features = (
+          tpu_estimator._PaddingSignals.slice_tensor_or_dict(features, signals))
+
+      with session.Session() as sess:
+        sess.run(dataset_initializer)
+
+        result, evaluated_signals = sess.run([sliced_features, signals])
+        self.assertAllEqual(a[:batch_size], result['a'])
+        self.assertAllEqual(b[:batch_size], result['b'])
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+
+        # This is the final partial batch.
+        result, evaluated_signals = sess.run([sliced_features, signals])
+        self.assertEqual(1, len(result['a']))
+        self.assertAllEqual(a[batch_size:num_samples], result['a'])
+        self.assertAllEqual(b[batch_size:num_samples], result['b'])
+        self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping'])
+
+        # We should see 3 continuous batches with STOP ('1') as signals and all
+        # of them have mask 1.
+        _, evaluated_signals = sess.run([sliced_features, signals])
+        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
+        self.assertAllEqual([1.] * batch_size,
+                            evaluated_signals['padding_mask'])
+
+        _, evaluated_signals = sess.run([sliced_features, signals])
+        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
+        self.assertAllEqual([1.] * batch_size,
+                            evaluated_signals['padding_mask'])
+
+        _, evaluated_signals = sess.run([sliced_features, signals])
+        self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping'])
+        self.assertAllEqual([1.] * batch_size,
+                            evaluated_signals['padding_mask'])
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(sliced_features)
+
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/python/tpu/tpu_feed.py b/tensorflow/python/tpu/tpu_feed.py
new file mode 100644
index 0000000..de1adc8
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_feed.py

@@ -0,0 +1,919 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+
+"""Helper library for handling infeed between hosts and TPUs.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.compiler.xla.experimental.xla_sharding import xla_sharding
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.tpu import tpu
+from tensorflow.python.tpu import tpu_sharding
+from tensorflow.python.tpu.ops import tpu_ops
+
+from tensorflow.python.util import nest
+
+
+def partition_or_replicate_on_host(tensor, dims):
+  """Partitions or replicates the input tensor.
+
+    The ops inside this function are placed on the host side.
+
+  Args:
+    tensor: The input tensor which will be partioned or replicated.
+    dims: A list of integer describes how to partition the input tensor.
+
+  Returns:
+    An iterator of `Tensor`s or a list of partioned tensors.
+  """
+  if dims is None:
+    return itertools.repeat(tensor)
+  dims = np.array(dims)
+  output = [tensor]
+  shape_list = np.array(tensor.shape.as_list())
+  quotients, remainders = np.divmod(shape_list, dims)
+  for axis, (quotient, remainder, dim, original_size) in enumerate(
+      zip(quotients, remainders, dims, shape_list)):
+    if dim <= 1:
+      continue
+    if remainder > 0:
+      # For each dimension, when it cannot be evenly partitioned, XLA assumes
+      # tensors are partitioned in a greedy manner by using
+      # ceil_ratio(size/dim) first. E.g. 2D tensor with shape (5, 14) and dims
+      # are (2, 4). Since 5 % 2 = 1 and 14 % 4 = 2, [5, 14] =>
+      # [[(3, 4), (3, 4), (2, 4), (2, 2)],
+      # [(2, 4), (2, 4), (2, 4), (2, 2)]]
+      ceil_ratio = quotient + 1
+      num_full_slots, left_over = np.divmod(original_size, ceil_ratio)
+      num_or_size_splits = [ceil_ratio] * num_full_slots + [left_over]
+      if len(num_or_size_splits) < dim:
+        num_or_size_splits += [0] * (dim - len(num_or_size_splits))
+      new_output = []
+      for x in output:
+        new_output.append(
+            array_ops.split(
+                x, num_or_size_splits=num_or_size_splits, axis=axis))
+      output = new_output
+    else:
+      output = [array_ops.split(x, dim, axis=axis) for x in output]
+    output = nest.flatten(output)
+  return output
+
+
+def _tag_sharding_attribute_for_dequeued_tensor(tensor, dims):
+  """Tags appropriate XLA sharding attribute to the dequeued tensor.
+
+  Args:
+    tensor: The dequeued tensor on TPU.
+    dims: A list of integer describes how the tensor is partitioned.
+
+  Returns:
+    The same tensor with the xla_sharding attribute.
+  """
+  if dims is None:
+    return xla_sharding.replicate(tensor)
+  elif np.prod(dims) == 1:
+    return xla_sharding.assign_device(tensor, 0)
+  else:
+    tile_assignment = np.arange(np.prod(dims)).reshape(dims)
+    return xla_sharding.tile(tensor=tensor, tile_assignment=tile_assignment)
+
+
+def tag_sharding_attribute_for_dequeued_tensors(dequeues, dims):
+  """Tags appropriate XLA sharding attribute to the dequeued tensors.
+
+  Args:
+    dequeues: A list of dequeued tensors on TPU.
+    dims: A list of integer describes how the tensor is partitioned.
+
+  Returns:
+    The same dequeues with appropriate xla_sharding attribute.
+  """
+  nest.assert_shallow_structure(dequeues, dims)
+  return nest.map_structure_up_to(
+      dequeues, _tag_sharding_attribute_for_dequeued_tensor, dequeues, dims)
+
+
+class InfeedQueue(object):
+  """A helper object to build a device infeed queue.
+
+  The InfeedQueue builds the host-side and device-side Ops to enqueue and
+  dequeue elements, respectively, and ensures that their types and
+  shapes match.
+  """
+
+  def __init__(self,
+               number_of_tuple_elements=None,
+               tuple_types=None,
+               tuple_shapes=None,
+               shard_dimensions=None,
+               name=None):
+    """Creates a new InfeedQueue with the given configuration.
+
+    The configuration need not be fully specified at creation since it
+    can be modified subsequently by methods that set the values
+    explicitly or infer them from the shapes of inputs.
+
+    Args:
+      number_of_tuple_elements: the number of Tensors fed atomically through the
+        queue, must be present unless it can be inferred from other arguments.
+      tuple_types: if not None, a list of types of the elements of the queue.
+      tuple_shapes: if not None, a list of shapes of the elements of the queue.
+      shard_dimensions: if not None, a list of dimensions on which the
+        elements of the queue should be sharded during automatic
+        parallelization.
+      name: the name of the queue.
+
+    Raises:
+      ValueError: if number_of_tuple_elements <= 0; or
+        number_of_tuple_arguments, tuple_types, tuple_shapes, and
+        shard_dimensions are all None; or the length of tuple_types,
+        tuple_shapes, or shard_dimensions is not equal to
+        number_of_tuple_elements; or any element of shard_dimensions
+        can't be converted to a Dimension.
+      TypeError: if any element of tuple_types or tuple_shapes can't
+        be converted to a dtype or TensorShape, respectively.
+    """
+    self._frozen = False
+    self._generated_enqueue_ops = False
+    self._generated_dequeue_op = False
+    self._name = "InfeedQueue" if name is None else name
+    if number_of_tuple_elements is None:
+      if tuple_types is not None:
+        number_of_tuple_elements = len(tuple_types)
+      elif tuple_shapes is not None:
+        number_of_tuple_elements = len(tuple_shapes)
+      elif shard_dimensions is not None:
+        number_of_tuple_elements = len(shard_dimensions)
+      else:
+        raise ValueError(
+            "number of tuple elements cannot be inferred from InfeedQueue "
+            "constructor")
+    if number_of_tuple_elements <= 0:
+      raise ValueError("number_of_tuple_elements %d must be > 0" %
+                       number_of_tuple_elements)
+    # Make an empty sharding policy for each tuple element.
+    self._sharding_policies = [
+        tpu_sharding.ShardingPolicy()
+        for _ in xrange(number_of_tuple_elements)
+    ]
+    if tuple_types is not None:
+      self.set_tuple_types(tuple_types)
+    else:
+      self._tuple_types = None
+    if tuple_shapes is not None:
+      self.set_tuple_shapes(tuple_shapes)
+    else:
+      self._tuple_shapes = None
+    if shard_dimensions is not None:
+      self.set_shard_dimensions(shard_dimensions)
+    self._validate()
+
+  def _validate(self):
+    """Checks that the configuration is self-consistent.
+
+    Raises:
+      ValueError: if the shapes and sharding policies don't match.
+    """
+    if self.tuple_shapes is not None:
+      for (policy, shape) in zip(self._sharding_policies, self._tuple_shapes):
+        # Raise an error if the policy is incompatible with the shape.
+        _ = policy.get_sharded_shape(shape)
+
+  @property
+  def number_of_tuple_elements(self):
+    """Returns the number of InfeedQueue tuple elements."""
+    return len(self._sharding_policies)
+
+  @property
+  def tuple_types(self):
+    """Returns the types of the InfeedQueue tuple elements."""
+    return self._tuple_types
+
+  def set_tuple_types(self, tuple_types):
+    """Sets the type of each element of the queue.
+
+    tuple_types must be a list of length
+    self.number_of_tuple_elements, and each element must be
+    convertible to a dtype.
+
+    Args:
+      tuple_types: the types of each queue element.
+
+    Raises:
+      ValueError: if tuple_types is not of length
+        self.number_of_tuple_elements.
+      TypeError: if an element of tuple_types cannot be converted to a
+        dtype.
+    """
+    if len(tuple_types) != self.number_of_tuple_elements:
+      raise ValueError("tuple_types is %s, but must be a list of length %d" %
+                       (str(tuple_types), self.number_of_tuple_elements))
+    if self._frozen:
+      for (frozen, updated) in zip(self._tuple_types, tuple_types):
+        if frozen != updated:
+          raise ValueError(
+              "Trying to update InfeedQueue with frozen configuration with an "
+              "incompatible type. Frozen types are %s, updated types are %s" % (
+                  str(self._tuple_types), str(tuple_types)))
+    else:
+      try:
+        self._tuple_types = [dtypes.as_dtype(t) for t in tuple_types]
+      except (TypeError) as e:
+        raise TypeError(
+            "tuple_types is %s, but must be a list of elements each "
+            "convertible to dtype: got error %s" % (str(tuple_types), str(e)))
+
+  @property
+  def tuple_shapes(self):
+    """Returns the shapes of the InfeedQueue tuple elements."""
+    return self._tuple_shapes
+
+  def set_tuple_shapes(self, tuple_shapes):
+    """Sets the shape of each element of the queue.
+
+    tuple_shapes must be a list of length
+    self.number_of_tuple_elements, and each element must be
+    convertible to a TensorShape.
+
+    Args:
+      tuple_shapes: the shapes of each queue element.
+
+    Raises:
+      ValueError: if tuple_shapes is not of length
+        self.number_of_tuple_elements.
+      TypeError: if an element of tuple_shapes cannot be converted to
+        a TensorShape.
+    """
+    if len(tuple_shapes) != self.number_of_tuple_elements:
+      raise ValueError("tuple_shapes is %s, but must be a list of length %d" %
+                       (str(tuple_shapes), self.number_of_tuple_elements))
+    try:
+      tuple_shapes = [tensor_shape.as_shape(shape) for shape in tuple_shapes]
+    except (ValueError, TypeError) as e:
+      raise TypeError(
+          "tuple_shapes is %s, but must be a list of elements each "
+          "convertible to TensorShape: got error %s" % (str(tuple_shapes),
+                                                        str(e)))
+    if self._frozen:
+      for (frozen, updated) in zip(self._tuple_shapes, tuple_shapes):
+        if frozen != updated:
+          raise ValueError(
+              "Trying to update InfeedQueue with frozen configuration with an "
+              "incompatible shape. Frozen shapes are %s, updated shapes are %s"
+              % (str(self._tuple_shapes), str(tuple_shapes)))
+    else:
+      self._tuple_shapes = tuple_shapes
+    self._validate()
+
+  @property
+  def sharding_policies(self):
+    """Returns the sharding policies of the InfeedQueue tuple elements."""
+    return self._sharding_policies
+
+  @property
+  def shard_dimensions(self):
+    """Gets the shard dimension of each tuple element.
+
+    Returns:
+      A list of length number_of_tuple_elements, where each list entry
+      is the shard dimension of that tuple element or None if the
+      shard dimension has not been set.
+    """
+    # The number of shards is always the same for all the policies.
+    return [policy.shard_dimension for policy in self._sharding_policies]
+
+  def set_shard_dimensions(self, shard_dimensions):
+    """Sets the shard_dimension of each element of the queue.
+
+    shard_dimensions must be a list of length
+    self.number_of_tuple_elements, and each element must be
+    convertible to a Dimension compatible with self.tuple_shapes.
+
+    Args:
+      shard_dimensions: the dimensions of each queue element.
+
+    Raises:
+      ValueError: if shard_dimensions is not of length
+        self.number_of_tuple_elements; or an element of
+        shard_dimensions cannot be converted to a Dimension; or an
+        element of shard_dimensions is a Dimension that is out of
+        range for the corresponding tuple element shape.
+    """
+    if len(shard_dimensions) != self.number_of_tuple_elements:
+      raise ValueError("shard_dimensions is %s, but must be a list of length %d"
+                       % (str(shard_dimensions),
+                          self.number_of_tuple_elements))
+    for (policy, dimension) in zip(self._sharding_policies, shard_dimensions):
+      policy.set_shard_dimension(dimension)
+    self._validate()
+
+  @property
+  def number_of_shards(self):
+    """Gets the number of shards to use for the InfeedQueue.
+
+    Returns:
+      Number of shards or None if the number of shards has not been set.
+    """
+    # The number of shards is always the same for all the policies.
+    return self._sharding_policies[0].number_of_shards
+
+  def set_number_of_shards(self, number_of_shards):
+    """Sets the number of shards to use for the InfeedQueue.
+
+    Args:
+      number_of_shards: number of ways to shard the InfeedQueue.
+
+    Raises:
+      ValueError: if number_of_shards is not > 0; or the policies have
+        been frozen and number_of_shards was already set to something
+        else.
+    """
+    for policy in self._sharding_policies:
+      policy.set_number_of_shards(number_of_shards)
+    self._validate()
+
+  def set_configuration_from_input_tensors(self, input_tensors):
+    """Sets the shapes and types of the queue tuple elements.
+
+    input_tensors is a list of Tensors whose types and shapes are used
+    to set the queue configuration.
+
+    Args:
+      input_tensors: list of Tensors of the same types and shapes as
+        the desired queue Tuple.
+
+    Raises:
+      ValueError: if input_tensors is not a list of length
+        self.number_of_tuple_elements
+    """
+    if len(input_tensors) != self.number_of_tuple_elements:
+      raise ValueError("input_tensors is %s, but should be a list of %d Tensors"
+                       % (str(input_tensors), self.number_of_tuple_elements))
+    self.set_tuple_shapes([t.shape for t in input_tensors])
+    self.set_tuple_types([t.dtype for t in input_tensors])
+
+  def set_configuration_from_sharded_input_tensors(self, input_tensors):
+    """Sets the shapes and types of the queue tuple elements.
+
+    input_tensors is a list of lists of Tensors whose types and shapes are used
+    to set the queue configuration. The length of the outer list is the number
+    of shards required, and each inner list is the tuple of Tensors to use to
+    determine the types and shapes of the corresponding shard. This method
+    depends on the shard dimension, and calling it freezes the shard policy.
+
+    Args:
+      input_tensors: list of lists of Tensors. The outer list length corresponds
+        to the desired number of shards, and each inner list is the size
+        and shape of the desired configuration of the corresponding shard.
+
+    Raises:
+      ValueError: if any inner list is not a list of length
+        self.number_of_tuple_elements; or the inner lists do not combine to
+        form a consistent unsharded shape.
+      TypeError: if the types of the Tensors in the inner lists do not match.
+    """
+    if not self._frozen:
+      # Unset the tuple shapes in case the configuration becomes
+      # transiently inconsistent.
+      self._tuple_shapes = None
+    number_of_shards = len(input_tensors)
+    self.set_number_of_shards(number_of_shards)
+    for t in input_tensors:
+      if len(t) != self.number_of_tuple_elements:
+        raise ValueError(
+            "input_tensors is %s but must be a list of lists, where each inner"
+            " list has length number_of_tuple_elements=%d" % (
+                str(input_tensors), self.number_of_tuple_elements))
+    # Transpose the inputs to make a list of shard shapes for each tuple
+    # element.
+    sharded_shapes = [[t[i].shape for t in input_tensors]
+                      for i in xrange(self.number_of_tuple_elements)]
+    # For each tuple, get the unsharded shape using that tuple's policy.
+    unsharded_shapes = [
+        policy.get_unsharded_shape(s)
+        for (policy, s) in zip(self._sharding_policies, sharded_shapes)
+    ]
+    self.set_tuple_shapes(unsharded_shapes)
+    for i in xrange(1, self.number_of_shards):
+      for (t1, t2) in zip(input_tensors[0], input_tensors[i]):
+        if t1.dtype != t2.dtype:
+          raise TypeError(
+              "types of the tuple elements of input_tensors %s are not "
+              "consistent" % str(input_tensors))
+    self.set_tuple_types([t.dtype for t in input_tensors[0]])
+
+  def freeze(self):
+    """Freezes the InfeedQueue so it can no longer be modified.
+
+    The configuration is implicitly frozen before any host-side or
+    device-side Ops are generated. The configuration cannot be frozen
+    until the types and shapes of the tuple elements have been set.
+
+    Raises:
+      ValueError: if the types or shapes of the tuple elements have not been
+      set.
+    """
+    self._frozen = True
+    if self._tuple_types is None:
+      raise ValueError(
+          "Can't freeze an InfeedQueue without setting all tuple types.")
+    if self._tuple_shapes is None:
+      raise ValueError(
+          "Can't freeze an InfeedQueue without setting all tuple shapes.")
+    for shape in self._tuple_shapes:
+      if shape.dims is None:
+        raise ValueError(
+            "Can't freeze an InfeedQueue without setting all tuple shapes.")
+    for policy in self._sharding_policies:
+      policy.freeze()
+    self._validate()
+
+  def generate_dequeue_op(self, tpu_device=0):
+    """Generates the device-side Op to dequeue a tuple from the queue.
+
+    Implicitly freezes the queue configuration if it is not already
+    frozen, which will raise errors if the shapes and types have not
+    been fully specified.
+
+    Args:
+      tpu_device: The TPU device ordinal where the infeed instruction should be
+        placed. If None, no explicit placement will be performed, and it is up
+        to the user to call this API from within a proper TPU device scope.
+        The XLA code will fail if the TPU dequeue instruction is not bound to
+        any device.
+
+    Returns:
+      A list of Outputs corresponding to a shard of infeed dequeued
+      into XLA, suitable for use within a replicated block.
+
+    Raises:
+      ValueError: if the types or shapes of the tuple elements have not been
+      set; or if a dequeue op has already been generated.
+    """
+    self.freeze()
+    if self._generated_dequeue_op:
+      raise ValueError("Can't generate two dequeue Ops from the same queue")
+    self._generated_dequeue_op = True
+    full_name = "%s/dequeue" % self._name
+    sharded_shapes = [
+        policy.get_sharded_shape(shape)
+        for (shape, policy) in zip(self._tuple_shapes, self._sharding_policies)
+    ]
+    if tpu_device is not None:
+      with ops.device(tpu.core(tpu_device)):
+        return tpu_ops.infeed_dequeue_tuple(
+            dtypes=self._tuple_types, shapes=sharded_shapes, name=full_name)
+    else:
+      return tpu_ops.infeed_dequeue_tuple(
+          dtypes=self._tuple_types, shapes=sharded_shapes, name=full_name)
+
+  def _generate_enqueue_op(self,
+                           inputs,
+                           name_prefix,
+                           index,
+                           device=None,
+                           tpu_ordinal=-1):
+    """Generate a host-side Op to enqueue a tuple to the queue.
+
+    If device is None the inputs are all required to have the same
+    device specification, and the enqueue Op is colocated with
+    inputs[0]. Otherwise the enqueue Op is placed on 'device'.
+
+    Args:
+      inputs: a list of Tensors with the types and shapes of the tuple elements.
+      name_prefix: the base name for the Op.
+      index: the shard index, used to uniquify the Op name.
+      device: device to place the Op on, or None if it should be
+        colocated with the inputs.
+      tpu_ordinal: ordinal of the TPU device on the host to use for
+      infeed if device is a CPU device. Should be set to -1 if device
+      is a TPU device.
+
+    Returns:
+      An Op corresponding to a shard of infeed enqueued at the host,
+      suitable for use within a replicated block.
+
+    Raises:
+      ValueError: if device is None and inputs do not all have the
+        same device specification.
+    """
+    full_name = "%s/%d" % (name_prefix, index)
+    shapes = [t.shape for t in inputs]
+    if device is None:
+      devices = [t.device for t in inputs]
+      for i in xrange(1, self.number_of_tuple_elements):
+        if devices[0] != devices[i]:
+          raise ValueError(
+              "input devices for shard %d are %s, but should all be the same" %
+              (index, str(devices)))
+      with ops.colocate_with(inputs[0]):
+        return tpu_ops.infeed_enqueue_tuple(
+            inputs=inputs,
+            shapes=shapes,
+            name=full_name,
+            device_ordinal=tpu_ordinal)
+    else:
+      with ops.device(device):
+        return tpu_ops.infeed_enqueue_tuple(
+            inputs=inputs,
+            shapes=shapes,
+            name=full_name,
+            device_ordinal=tpu_ordinal)
+
+  def generate_enqueue_ops(self,
+                           sharded_inputs,
+                           tpu_ordinal_function=None,
+                           placement_function=None):
+    """Generates the host-side Ops to enqueue the shards of a tuple.
+
+    sharded_inputs is a list, one for each shard, of lists of
+    Tensors. sharded_inputs[0] is the tuple of Tensors to use to feed
+    shard 0 if the queue. Returns the host-side Ops that must be run to
+    enqueue the sharded tuple. The Op for shard i is colocated with the inputs
+    for shard i.
+
+    Implicitly freezes the queue configuration if it is not already
+    frozen. If the configuration has already been frozen, and is not
+    compatible with the types and shapes of sharded_inputs, an error
+    will be raised.
+
+    Args:
+      sharded_inputs: a list of lists of Tensors. The length of the outer list
+        determines the number of shards. Each inner list indicates the types
+        and shapes of the tuples in the corresponding shard.
+      tpu_ordinal_function: if not None, a function that takes the
+        shard index as input and returns the ordinal of the TPU device
+        the shard's infeed should be placed on. tpu_ordinal_function must be
+        set if the inputs are placed on CPU devices.
+      placement_function: if not None, a function that takes the shard index as
+        input and returns the host device where the enqueue op should be placed
+        on.
+
+    Returns:
+      A list of host-side Ops, one for each shard, that when executed together
+      will enqueue a full-size element of infeed.
+
+    Raises:
+      ValueError: if the queue configuration has previously been frozen and the
+        shapes of the elements of sharded_inputs are not compatible with the
+        frozen configuration; or if the shapes of the elements of sharded_inputs
+        don't form a consistent unsharded tuple; or if the elements of a tuple
+        have different device constraints.
+      TypeError: if the queue configuration has previously been frozen and the
+        types of the elements of sharded_inputs are not compatible with the
+        frozen configuration; or if the types of the elements of sharded_inputs
+        don't form a consistent unsharded tuple.
+    """
+    self.set_configuration_from_sharded_input_tensors(sharded_inputs)
+    self.freeze()
+    if self._generated_enqueue_ops:
+      raise ValueError("Can't generate two enqueue Ops from the same queue")
+    self._generated_enqueue_ops = True
+    if tpu_ordinal_function is None:
+      tpu_ordinal_function = lambda index: -1
+    name_prefix = "%s/enqueue" % self._name
+    return [
+        self._generate_enqueue_op(
+            shard,
+            name_prefix,
+            index,
+            tpu_ordinal=tpu_ordinal_function(index),
+            device=placement_function(index) if placement_function else None)
+        for (shard, index) in zip(sharded_inputs, xrange(self.number_of_shards))
+    ]
+
+  # TODO(misard) Generalize this to the case of systems that don't
+  # have 8 devices per host, and figure out what to do with
+  # model-parallelism.
+  def _default_placement_function(self, index):
+    return "/task:%d/device:CPU:0" % (index / 8)
+
+  def _default_ordinal_function(self, index):
+    return index % 8
+
+  # TODO(b/36470756) remove this from tutorials once we have a better story
+  # for automatic placement of input pipelines.
+  def split_inputs_and_generate_enqueue_ops(self,
+                                            inputs,
+                                            device_assignment=None,
+                                            placement_function=None,
+                                            tpu_ordinal_function=None):
+    """POORLY-PERFORMING ON MULTI-HOST SYSTEMS.
+
+    Generates the host-side Ops to enqueue a tuple.
+
+    This method performs poorly because it takes an entire input on a single
+    host, splits it, and distributes it to all of the cores. It is present only
+    to simplify tutorial examples.
+
+    inputs is a list of Tensors to use to feed the queue. Each input is split
+    into self.number_of_shards shards. Returns an Op for each shard to enqueue
+    the shard. The Op for shard i is placed on device placement_function(i).
+
+    Implicitly freezes the queue configuration if it is not already
+    frozen. If the configuration has already been frozen, and is not
+    compatible with the types and shapes of inputs, an error
+    will be raised.
+
+    Args:
+      inputs: a list of Tensors which indicates the types and shapes of the
+        queue tuple.
+     device_assignment: if not `None`, a TPU `DeviceAssignment`. If
+        device_assignment is not `None`, but `placement_function` and
+        `ordinal_function` are None, then `device_assignment` will be used to
+        place infeeds on the first k TPU shards, where k is the number of shards
+        in the queue. If all three are `None`, then default placement and
+        ordinal functions are used.
+      placement_function: if not None, a function that takes the shard
+        index as input and returns a device string indicating which
+        device the shard's infeed should be placed on. If placement_function
+        and tpu_ordinal_function are None, inputs are sharded round-robin
+        across the devices in the system.
+      tpu_ordinal_function: if not None, a function that takes the
+        shard index as input and returns the ordinal of the TPU device
+        the shard's infeed should be placed on. If placement_function
+        and tpu_ordinal_function are None, inputs are sharded round-robin
+        across the devices in the system.
+
+    Returns:
+      A list of host-side Ops, one for each shard, that when executed together
+      will enqueue a full-size element of infeed.
+
+    Raises:
+      ValueError: if the queue configuration has previously been frozen and the
+        shapes of the elements of inputs are not compatible with the frozen
+        configuration.
+      TypeError: if the queue configuration has previously been frozen and the
+        types of the elements of inputs are not compatible with the frozen
+        configuration.
+    """
+    if device_assignment is None:
+      if placement_function is None:
+        placement_function = self._default_placement_function
+      if tpu_ordinal_function is None:
+        tpu_ordinal_function = self._default_ordinal_function
+    else:
+
+      def _placement_function_from_map(index):
+        return device_assignment.host_device(replica=index)
+
+      def _ordinal_function_from_map(index):
+        return device_assignment.tpu_ordinal(replica=index)
+
+      if placement_function is None:
+        placement_function = _placement_function_from_map
+      if tpu_ordinal_function is None:
+        tpu_ordinal_function = _ordinal_function_from_map
+    self.set_configuration_from_input_tensors(inputs)
+    self.freeze()
+    if self._generated_enqueue_ops:
+      raise ValueError("Can't generate two enqueue Ops from the same queue")
+    self._generated_enqueue_ops = True
+    split_name_prefix = "%s/split" % self._name
+    if self.number_of_shards == 1:
+      transposed_sharded_inputs = [[inp] for inp in inputs]
+    else:
+
+      def split_fn(inp, num_shards, axis, name):
+        with ops.colocate_with(inp):
+          return array_ops.split(inp, num_shards, axis=axis, name=name)
+
+      transposed_sharded_inputs = [
+          split_fn(
+              inp,
+              self.number_of_shards,
+              axis=policy.shard_dimension,
+              name="%s/%d" % (split_name_prefix, index))
+          for (inp, policy, index) in zip(inputs, self._sharding_policies,
+                                          xrange(self.number_of_tuple_elements))
+      ]
+    sharded_inputs = [[shard[i] for shard in transposed_sharded_inputs]
+                      for i in xrange(self.number_of_shards)]
+    name_prefix = "%s/enqueue" % self._name
+    return [
+        self._generate_enqueue_op(
+            shard,
+            name_prefix,
+            index,
+            device=placement_function(index),
+            tpu_ordinal=tpu_ordinal_function(index))
+        for (shard, index) in zip(sharded_inputs, xrange(self.number_of_shards))
+    ]
+
+
+class _PartitionedInfeedQueue(InfeedQueue):
+  """A helper object to build a device infeed queue with input partition.
+
+  Args:
+    number_of_tuple_elements: the number of Tensors fed atomically through the
+      queue, must be present unless it can be inferred from other arguments.
+    device_assignment: A TPU `DeviceAssignment` which is used to place all the
+      partitions to different TPU infeed queues.
+    host_id: The id of the host machine.
+    input_partition_dims: A nested list/tuple of integers. Each inner
+      list/tuple describes how to partition the corresponding input tensor.
+    tuple_types: If not None, a list of types of the elements of the queue.
+    tuple_shapes: If not None, a list of shapes of the elements of the queue.
+    name: The name of the queue.
+  """
+
+  def __init__(self,
+               number_of_tuple_elements,
+               device_assignment,
+               host_id,
+               input_partition_dims=None,
+               tuple_types=None,
+               tuple_shapes=None,
+               name=None):
+    super(_PartitionedInfeedQueue, self).__init__(
+        number_of_tuple_elements=number_of_tuple_elements,
+        tuple_types=tuple_types,
+        tuple_shapes=None,
+        shard_dimensions=None,
+        name="PartitionedInfeedQueue" if name is None else name)
+    self._input_partition_dims = input_partition_dims
+    self._host_id = host_id
+    self._device_assignment = device_assignment
+
+  def generate_dequeue_op(self, tpu_device=0):
+    """Generate TPU dequeue ops.
+
+    Args:
+      tpu_device: The TPU device ordinal where the infeed instruction should be
+        placed.
+
+    Returns:
+      A list of Outputs corresponding to a partition of infeed dequeued
+      into XLA, suitable for use within a replicated block.
+
+    Raises:
+      ValueError: if the types or shapes of the tuple elements have not been
+      set; or if a dequeue op has already been generated.
+    """
+    self.freeze()
+    if self._generated_dequeue_op:
+      raise ValueError("Can't generate two dequeue Ops from the same queue")
+    self._generated_dequeue_op = True
+    full_name = "%s/dequeue" % self._name
+    sharded_shapes = [
+        policy.get_sharded_shape(shape)
+        for (shape, policy) in zip(self._tuple_shapes, self._sharding_policies)
+    ]
+    with ops.device(tpu.core(tpu_device)):
+      values = tpu_ops.infeed_dequeue_tuple(
+          dtypes=self._tuple_types, shapes=sharded_shapes, name=full_name)
+    return tag_sharding_attribute_for_dequeued_tensors(
+        values, self._input_partition_dims)
+
+  def generate_enqueue_ops(self, per_host_sharded_inputs):
+    """Generates the host-side Ops to enqueue the partitioned inputs.
+
+    per_host_sharded_inputs is a list, one for each replica, of lists of
+    Tensors. sharded_inputs[i] is the tuple of Tensors to use to feed
+    replica i.
+    sharded_inputs[i][j] is partitioned by self._input_partition_dims[j].
+
+    For example, if sharded_inputs[i][j] is a 2-D Tensor:
+    [[A, B, C, D],
+     [E ,F, G, H]]
+    self._input_partition_dims[j] is [2, 4].
+
+    sharded_inputs[i][j] will be partitioned and flattened into:
+    [A, B, C, D, E, F, G, H] and fed into the logical core ids:
+    [0, 1, 2, 3, 4, 5, 6, 7] respectively.
+
+    Args:
+      per_host_sharded_inputs: a list of lists of Tensors. The length of the
+        outer list determines the number of shards. Each inner list indicates
+        the types and shapes of the tuples in the corresponding shard.
+
+    Returns:
+      A list of host-side Ops, one for each shard, that when executed together
+      will enqueue a full-size element of infeed.
+
+    Raises:
+      ValueError: if the queue configuration has previously been frozen and the
+        shapes of the elements of sharded_inputs are not compatible with the
+        frozen configuration; or if the shapes of the elements of sharded_inputs
+        don't form a consistent unsharded tuple; or if the elements of a tuple
+        have different device constraints; or if the partition dims are invalid.
+      TypeError: if the queue configuration has previously been frozen and the
+        types of the elements of sharded_inputs are not compatible with the
+        frozen configuration; or if the types of the elements of sharded_inputs
+        don't form a consistent unsharded tuple.
+    """
+    self.set_configuration_from_sharded_input_tensors(per_host_sharded_inputs)
+    number_of_replicas_per_host = len(per_host_sharded_inputs)
+    number_of_tuple_elements = len(per_host_sharded_inputs[0])
+
+    assert len(self._input_partition_dims) == number_of_tuple_elements
+    per_host_enqueue_ops = []
+
+    for replica_index in range(number_of_replicas_per_host):
+      flattened_inputs = per_host_sharded_inputs[replica_index]
+      inputs_part_dims_flat = nest.flatten_up_to(flattened_inputs,
+                                                 self._input_partition_dims)
+      inputs_parted_iters = [
+          iter(self._check_dims_and_partition_or_replicate_on_host(x, dims))
+          for x, dims in zip(per_host_sharded_inputs[replica_index],
+                             inputs_part_dims_flat)
+      ]
+
+      for logical_core in xrange(self._device_assignment.num_cores_per_replica):
+        # Places different partitions to different logic cores.
+        replica_id = self._device_assignment.lookup_replicas(
+            self._host_id, logical_core)[replica_index]
+        ordinal = self._device_assignment.tpu_ordinal(
+            replica=replica_id, logical_core=logical_core)
+        infeed_inputs = []
+        for it in inputs_parted_iters:
+          input_for_device = next(it, None)
+          if input_for_device is not None:
+            infeed_inputs.append(input_for_device)
+
+        if infeed_inputs:
+          per_host_enqueue_ops.append(
+              tpu_ops.infeed_enqueue_tuple(
+                  inputs=infeed_inputs,
+                  shapes=[x.shape for x in infeed_inputs],
+                  name="enqueue/replica_{0}/input_{1}".format(
+                      replica_index, logical_core),
+                  device_ordinal=ordinal))
+    return per_host_enqueue_ops
+
+  def _check_input_partition_dims(self, tensor, dims):
+    """Checks that input partition dims are valid for the `Tensor`.
+
+    Args:
+      tensor: Input tensor for partitioning.
+      dims: A list of integer describes how to partition the input tensor.
+
+    Raises:
+      ValueError: If the tensor can't be partitioned by dims or the
+        num_cores_per_replica doesn't match the number of
+        partitions(dims.prod()).
+    """
+    # No partitioning specified, so don't perform further checks.
+    if dims is None:
+      return
+
+    dims = np.array(dims)
+
+    if (dims < 1).any():
+      raise ValueError("All input partition dims must be >= 1.")
+
+    # No partitioning, so don't perform further checks.
+    if dims.prod() == 1:
+      return
+
+    if dims.prod() != self._device_assignment.num_cores_per_replica:
+      raise ValueError(
+          "The product of each input parition dim should equal to "
+          "num_cores_per_replica. (dim = {}, num_cores_per_replica "
+          "= {})".format(dims, self._device_assignment.num_cores_per_replica))
+    if dims.shape[0] != tensor.shape.ndims:
+      raise ValueError(
+          "Input partition dims must have the same number of dimensions "
+          "as the `Tensor` to be partitioned. (tensor shape = {}, input "
+          "partition dims = {}).".format(tensor.shape.as_list(), dims))
+
+    tensor.shape.assert_is_fully_defined()
+
+  def _check_dims_and_partition_or_replicate_on_host(self, tensor, dims):
+    """Checks dims and partitions or replicates the input tensor.
+
+      The ops inside this function are placed on the host side.
+
+    Args:
+      tensor: The input tensor which will be partioned or replicated.
+      dims: A list of integer describes how to partition the input tensor.
+
+    Returns:
+      An iterator of `Tensor`s or a list of partioned tensors.
+    """
+    self._check_input_partition_dims(tensor, dims)
+    return partition_or_replicate_on_host(tensor, dims)

diff --git a/tensorflow/python/tpu/tpu_function.py b/tensorflow/python/tpu/tpu_function.py
new file mode 100644
index 0000000..422c7d3
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_function.py

@@ -0,0 +1,66 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Helper library for functions used during TPU compilation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+
+
+class TpuContext(object):
+  """A context object holding state about the TPU computation being built."""
+
+  def __init__(self):
+    """Creates a new TpuContext."""
+    self._number_of_shards = None
+
+  @property
+  def number_of_shards(self):
+    return self._number_of_shards
+
+  def set_number_of_shards(self, number_of_shards):
+    self._number_of_shards = number_of_shards
+
+
+# The Tpu context holds the number of shards when a sharded computation is
+# being built, or None if no computation is being built.
+_current_tpu_context = TpuContext()
+
+
+@contextlib.contextmanager
+def tpu_shard_context(number_of_shards):
+  if _current_tpu_context.number_of_shards is not None:
+    raise NotImplementedError("tpu_shard_context cannot be nested.")
+  try:
+    _current_tpu_context.set_number_of_shards(number_of_shards)
+    yield
+  finally:
+    _current_tpu_context.set_number_of_shards(None)
+
+
+def get_tpu_context():
+  return _current_tpu_context
+
+
+# Decorator function for tpu computation func that was passed to tpu.rewrite()
+# if there is an embedded training loop in this func, trace tools will generate
+# step markers for each iteration.
+def on_device_training_loop(func):
+  # Value for this attribute is from xla.DebugOptions.StepMarkerLocation.
+  setattr(func, "step_marker_location", "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP")
+  return func

diff --git a/tensorflow/python/tpu/tpu_infeed_test.py b/tensorflow/python/tpu/tpu_infeed_test.py
new file mode 100644
index 0000000..3e90979
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_infeed_test.py

@@ -0,0 +1,130 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Tests for TPU InfeedQueue methods."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.platform import test
+from tensorflow.python.tpu import tpu_feed
+
+
+class InfeedTest(test.TestCase):
+
+  def testConstructor(self):
+    """Tests that the constructor can be called with different arguments."""
+    i = tpu_feed.InfeedQueue(number_of_tuple_elements=2)
+    self.assertEqual(i.number_of_tuple_elements, 2)
+    self.assertEqual(i.tuple_types, None)
+    self.assertEqual(i.tuple_shapes, None)
+    self.assertEqual(i.number_of_shards, None)
+    i = tpu_feed.InfeedQueue(
+        tuple_types=[dtypes.float32, dtypes.int32, dtypes.int32])
+    self.assertEqual(i.number_of_tuple_elements, 3)
+    self.assertEqual(i.tuple_types,
+                     [dtypes.float32, dtypes.int32, dtypes.int32])
+    self.assertEqual(i.tuple_shapes, None)
+    self.assertEqual(i.number_of_shards, None)
+    i = tpu_feed.InfeedQueue(tuple_shapes=[[1], [2, 3]])
+    self.assertEqual(i.number_of_tuple_elements, 2)
+    self.assertEqual(i.tuple_types, None)
+    self.assertEqual(i.tuple_shapes, [[1], [2, 3]])
+    self.assertEqual(i.number_of_shards, None)
+    i = tpu_feed.InfeedQueue(shard_dimensions=[1, 0, 7])
+    self.assertEqual(i.number_of_tuple_elements, 3)
+    self.assertEqual(i.tuple_types, None)
+    self.assertEqual(i.tuple_shapes, None)
+    self.assertEqual([p.shard_dimension
+                      for p in i.sharding_policies], [1, 0, 7])
+    with self.assertRaises(ValueError):
+      i = tpu_feed.InfeedQueue()
+    with self.assertRaises(ValueError):
+      i = tpu_feed.InfeedQueue(
+          number_of_tuple_elements=2, tuple_types=[dtypes.float32])
+    with self.assertRaises(ValueError):
+      i = tpu_feed.InfeedQueue(number_of_tuple_elements=2, tuple_shapes=[[1]])
+    with self.assertRaises(ValueError):
+      i = tpu_feed.InfeedQueue(number_of_tuple_elements=2, shard_dimensions=[1])
+    with self.assertRaises(ValueError):
+      i = tpu_feed.InfeedQueue(tuple_shapes=[[1], [2, 3]], shard_dimensions=[1])
+
+  def testModification(self):
+    """Tests modification of the queue post-construction."""
+    i = tpu_feed.InfeedQueue(number_of_tuple_elements=2)
+    i.set_tuple_types([dtypes.float32, dtypes.int32])
+    self.assertEqual(i.tuple_types, [dtypes.float32, dtypes.int32])
+    i.set_tuple_types([dtypes.float32, dtypes.float32])
+    self.assertEqual(i.tuple_types, [dtypes.float32, dtypes.float32])
+    with self.assertRaises(ValueError):
+      i.set_tuple_types([dtypes.float32])
+    i.set_tuple_shapes([[1], [2, 3]])
+    self.assertEqual(i.tuple_shapes, [[1], [2, 3]])
+    i.set_tuple_shapes([[1, 2], [3, 4]])
+    self.assertEqual(i.tuple_shapes, [[1, 2], [3, 4]])
+    with self.assertRaises(ValueError):
+      i.set_tuple_shapes([[1, 2]])
+    i.set_number_of_shards(2)
+    self.assertEqual(i.number_of_shards, 2)
+    i.set_number_of_shards(3)
+    self.assertEqual(i.number_of_shards, 3)
+    t1 = constant_op.constant(1, dtypes.int32, shape=[6])
+    t2 = constant_op.constant(2.0, dtypes.float32, shape=[3, 18])
+    i.set_configuration_from_input_tensors([t1, t2])
+    self.assertEqual(i.tuple_shapes, [[6], [3, 18]])
+    self.assertEqual(i.tuple_types, [dtypes.int32, dtypes.float32])
+    i.set_configuration_from_sharded_input_tensors([[t2, t1], [t2, t1]])
+    self.assertEqual(i.number_of_shards, 2)
+    self.assertEqual(i.tuple_shapes, [[6, 18], [12]])
+    self.assertEqual(i.tuple_types, [dtypes.float32, dtypes.int32])
+    i.set_shard_dimensions([1, 0])
+    i.set_number_of_shards(3)
+    with self.assertRaises(ValueError):
+      i.set_number_of_shards(4)
+
+  def testFreezing(self):
+    """Tests freezing the queue."""
+    i = tpu_feed.InfeedQueue(number_of_tuple_elements=2)
+    t1 = constant_op.constant(1, dtypes.int32, shape=[2])
+    t2 = constant_op.constant(2.0, dtypes.float32, shape=[2, 4])
+    i.set_configuration_from_sharded_input_tensors([[t2, t1], [t2, t1]])
+    self.assertEqual(i.number_of_shards, 2)
+    self.assertEqual(i.tuple_shapes, [[4, 4], [4]])
+    self.assertEqual(i.tuple_types, [dtypes.float32, dtypes.int32])
+    self.assertEqual(i.shard_dimensions, [0, 0])
+    i.freeze()
+    i.set_number_of_shards(2)
+    i.set_tuple_shapes([[4, 4], [4]])
+    i.set_tuple_types([dtypes.float32, dtypes.int32])
+    i.set_shard_dimensions([0, 0])
+    with self.assertRaises(ValueError):
+      i.set_number_of_shards(1)
+    with self.assertRaises(ValueError):
+      i.set_tuple_shapes([[8, 8], [8]])
+    with self.assertRaises(ValueError):
+      i.set_tuple_types([dtypes.int32, dtypes.float32])
+    with self.assertRaises(ValueError):
+      i.set_shard_dimensions([1, 0])
+    self.assertEqual(i.number_of_shards, 2)
+    self.assertEqual(i.tuple_shapes, [[4, 4], [4]])
+    self.assertEqual(i.tuple_types, [dtypes.float32, dtypes.int32])
+    self.assertEqual(i.shard_dimensions, [0, 0])
+
+if __name__ == '__main__':
+  test.main()

diff --git a/tensorflow/python/tpu/tpu_optimizer.py b/tensorflow/python/tpu/tpu_optimizer.py
new file mode 100644
index 0000000..22c409e
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_optimizer.py

@@ -0,0 +1,203 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Optimizer that implements cross-shard gradient reduction for TPU."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.tpu import tpu_function
+from tensorflow.python.tpu.ops import tpu_ops
+from tensorflow.python.training import optimizer
+
+
+class CrossShardOptimizer(optimizer.Optimizer):
+  """An optimizer that averages gradients across TPU shards."""
+
+  def __init__(self,
+               opt,
+               reduction=losses.Reduction.MEAN,
+               name="CrossShardOptimizer",
+               group_assignment=None):
+    """Construct a new cross-shard optimizer.
+
+    Args:
+      opt: An existing `Optimizer` to encapsulate.
+      reduction: The reduction to apply to the shard losses.
+      name: Optional name prefix for the operations created when applying
+        gradients. Defaults to "CrossShardOptimizer".
+      group_assignment: Optional 2d int32 lists with shape
+        [num_groups, num_replicas_per_group] which describles how to apply
+        optimizer to subgroups.
+
+    Raises:
+      ValueError: If reduction is not a valid cross-shard reduction.
+    """
+    if reduction not in (losses.Reduction.SUM, losses.Reduction.MEAN):
+      raise ValueError("Unsupported reduction: %s." % reduction)
+
+    super(CrossShardOptimizer, self).__init__(False, name)
+    self._opt = opt
+    self._reduction = reduction
+    self._group_assignment = group_assignment
+
+  def _verify_and_get_subgroup_size(self, group_assignment, num_shards):
+    """Verify group_assignment and get the subgroup size".
+
+    Args:
+      group_assignment: list of group ids for applying the optimizer
+        to subgroups.
+      num_shards: The number of TPU shards.
+
+    Returns:
+      The size of one subgroup in group_assignment.
+
+    Raises:
+      ValueError: If group_assignment is invalid.
+    """
+    if not group_assignment:
+      return None
+    if not (isinstance(group_assignment, list) and
+            all(isinstance(i, list) for i in group_assignment)):
+      raise ValueError("group_assignment must be a list of list. Got {}".format(
+          group_assignment))
+
+    replica_ids = set()
+    for g in group_assignment:
+      for i in g:
+        replica_ids.add(i)
+
+    if set(range(num_shards)) != replica_ids:
+      raise ValueError("group_assignment must be a permutation of range({0})."
+                       " Got group_assignment={1}".format(
+                           num_shards, group_assignment))
+
+    subgroup_size_list = [len(group) for group in group_assignment]
+    if all(subgroup_size_list[0] == size for size in subgroup_size_list):
+      return subgroup_size_list[0]
+    else:
+      raise ValueError("The size of each subgroup in group_assignment must "
+                       "be equal. Got group_assignment={}".format(
+                           self._group_assignment))
+
+  def compute_gradients(self, loss, var_list=None, **kwargs):
+    """Compute gradients of "loss" for the variables in "var_list".
+
+    This simply wraps the compute_gradients() from the real optimizer. The
+    gradients will be aggregated in the apply_gradients() so that user can
+    modify the gradients like clipping with per replica global norm if needed.
+    The global norm with aggregated gradients can be bad as one replica's huge
+    gradients can hurt the gradients from other replicas.
+
+    Args:
+      loss: A Tensor containing the value to minimize.
+      var_list: Optional list or tuple of `tf.Variable` to update to minimize
+        `loss`.  Defaults to the list of variables collected in the graph
+        under the key `GraphKey.TRAINABLE_VARIABLES`.
+      **kwargs: Keyword arguments for compute_gradients().
+
+    Returns:
+      A list of (gradient, variable) pairs.
+
+    Raises:
+      ValueError: If not within a tpu_shard_context or group_assignment is
+        invalid.
+    """
+    num_shards = tpu_function.get_tpu_context().number_of_shards
+    if num_shards is None:
+      logging.warning(
+          "CrossShardOptimizer should be used within a tpu_shard_context, but "
+          "got unset number_of_shards. Assuming 1.")
+      num_shards = 1
+
+    subgroup_size = self._verify_and_get_subgroup_size(self._group_assignment,
+                                                       num_shards)
+
+    if num_shards > 1 and self._reduction == losses.Reduction.MEAN:
+      if self._group_assignment:
+        scale = 1.0 / subgroup_size
+      else:
+        scale = 1.0 / num_shards
+      loss *= scale
+
+    return self._opt.compute_gradients(loss, var_list=var_list, **kwargs)
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    """Apply gradients to variables.
+
+    Calls tpu_ops.cross_replica_sum() to sum gradient contributions across
+    replicas, and then applies the real optimizer.
+
+    Args:
+      grads_and_vars: List of (gradient, variable) pairs as returned by
+        compute_gradients().
+      global_step: Optional Variable to increment by one after the
+        variables have been updated.
+      name: Optional name for the returned operation.  Default to the
+        name passed to the Optimizer constructor.
+
+    Returns:
+      An `Operation` that applies the gradients. If `global_step` was not None,
+      that operation also increments `global_step`.
+
+    Raises:
+      ValueError: If the grads_and_vars is malformed.
+    """
+    summed_grads_and_vars = []
+    for (grad, var) in grads_and_vars:
+      if grad is None:
+        summed_grads_and_vars.append((grad, var))
+      else:
+        with ops.colocate_with(grad):
+          summed_grads_and_vars.append((tpu_ops.cross_replica_sum(
+              grad, self._group_assignment), var))
+    return self._opt.apply_gradients(summed_grads_and_vars, global_step, name)
+
+  def get_slot(self, *args, **kwargs):
+    """Return a slot named "name" created for "var" by the Optimizer.
+
+    This simply wraps the get_slot() from the actual optimizer.
+
+    Args:
+      *args: Arguments for get_slot().
+      **kwargs: Keyword arguments for get_slot().
+
+    Returns:
+      The `Variable` for the slot if it was created, `None` otherwise.
+    """
+    return self._opt.get_slot(*args, **kwargs)
+
+  def get_slot_names(self, *args, **kwargs):
+    """Return a list of the names of slots created by the `Optimizer`.
+
+    This simply wraps the get_slot_names() from the actual optimizer.
+
+    Args:
+      *args: Arguments for get_slot().
+      **kwargs: Keyword arguments for get_slot().
+
+    Returns:
+      A list of strings.
+    """
+    return self._opt.get_slot_names(*args, **kwargs)
+
+  def variables(self):
+    """Forwarding the variables from the underlying optimizer."""
+    return self._opt.variables()

diff --git a/tensorflow/python/tpu/tpu_sharding.py b/tensorflow/python/tpu/tpu_sharding.py
new file mode 100644
index 0000000..f5af03f
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_sharding.py

@@ -0,0 +1,253 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Helper library for sharding during TPU compilation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.python.framework import tensor_shape
+
+_DEFAULT_NUMBER_OF_SHARDS = 1
+_DEFAULT_SHARD_DIMENSION = 0
+
+
+# TODO(b/36777903) change other parts of tpu.py to use this class.
+class ShardingPolicy(object):
+  """An object use to hold the sharding policy for a Tensor.
+  """
+
+  def __init__(self):
+    self._number_of_shards = None
+    self._shard_dimension = None
+    self._frozen = False
+
+  def __str__(self):
+    if self.number_of_shards is None or self.shard_dimension is None:
+      return "ShardingPolicy(unset)"
+    else:
+      return ("ShardingPolicy(%d shards dimension %d)" %
+              (self.number_of_shards, self.shard_dimension))
+
+  def _fill_default_values(self):
+    if self._number_of_shards is None:
+      self._number_of_shards = _DEFAULT_NUMBER_OF_SHARDS
+    if self._shard_dimension is None:
+      self._shard_dimension = tensor_shape.as_dimension(
+          _DEFAULT_SHARD_DIMENSION)
+
+  def freeze(self):
+    """Prevents further modification to the sharding policy.
+
+    Any values that have not been set when freeze is called are set to
+    defaults. If the ShardingPolicy is already frozen, this is a NoOp.
+    """
+    if not self._frozen:
+      self._fill_default_values()
+      self._frozen = True
+
+  @property
+  def number_of_shards(self):
+    """Returns the number of shards in the policy or None if unspecified."""
+    return self._number_of_shards
+
+  def set_number_of_shards(self, number_of_shards):
+    """Sets the number of shards for the current policy.
+
+    If the policy has been frozen then number_of_shards must match the
+    existing setting.
+
+    Args:
+      number_of_shards: The number of shards to use in the policy.
+
+    Raises:
+      ValueError: If the policy has been frozen and number_of_shards
+        differs from the frozen value; or number_of_shards <= 0.
+    """
+    if self._frozen:
+      if self._number_of_shards != number_of_shards:
+        raise ValueError(
+            "Can't set sharding policy to use %d shards since it has been "
+            "frozen to use %d." % (number_of_shards, self._number_of_shards))
+    else:
+      if number_of_shards > 0:
+        self._number_of_shards = number_of_shards
+      else:
+        raise ValueError(
+            "Can't set sharding policy to use %s shards; value must be >0",
+            str(number_of_shards))
+
+  @property
+  def shard_dimension(self):
+    """Returns the shard dimension of the policy or None if unspecified."""
+    return self._shard_dimension
+
+  def set_shard_dimension(self, shard_dimension):
+    """Sets the shard dimension for the current policy.
+
+    If the policy has been frozen then shard_dimension must match the
+    existing setting.
+
+    Args:
+      shard_dimension: The shard dimension to use in the policy.
+
+    Raises:
+      ValueError: If the policy has been frozen and shard_dimension
+        differs from the frozen value, or shard_dimension can't be
+        interpreted as a Dimension.
+    """
+    if self._frozen:
+      if self._shard_dimension != shard_dimension:
+        raise ValueError(
+            "Can't set shard dimension to %d since it has been frozen to "
+            "use %d." % (shard_dimension, self._shard_dimension))
+    else:
+      self._shard_dimension = tensor_shape.as_dimension(shard_dimension)
+
+  def merge(self, other):
+    """Merges the policy of another policy into the current policy.
+
+    Args:
+      other: The policy to merge into this one.
+
+    Raises:
+      ValueError: If this policy has been frozen and the merge conflicts with
+      the frozen policy.
+    """
+    if other.number_of_shards is not None:
+      self.set_number_of_shards(other.number_of_shards)
+    if other.shard_dimension is not None:
+      self.set_shard_dimension(other.shard_dimension)
+
+  def get_sharded_shape(self, shape, shard_index=None):
+    """Returns the shape of a shard of a full Tensor.
+
+    When given the shape of a 'full-size' Tensor, returns the shape of
+    the sub-Tensor after it has been sharded. Freezes the policy if it
+    has not yet been frozen.
+
+    Args:
+      shape: The shape of the full-size Tensor to be sharded.
+      shard_index: The index of the shard whose shape should be returned.
+        shard_index can be None for sharding policies that use the same
+        shape for every shard.
+      freeze_config:
+
+    Returns:
+      The shape of the sharded version of the Tensor.
+
+    Raises:
+      ValueError: If shard_index is None when shards are of different
+        shapes; or shard_index is not None and
+        !(0<=shard_index<number_of_shards); or shape does not have at
+        least self.shard_dimension+1 dimensions; or the value of
+        shape's shard dimension is not a multiple of
+        self.number_of_shards
+    """
+    if self._shard_dimension is None or self._number_of_shards is None:
+      # Don't raise an error if the config is unset.
+      return None
+    if shard_index is not None:
+      if shard_index < 0 or shard_index >= self.number_of_shards:
+        raise ValueError("shard_index %d, but must be in [0,%d)." %
+                         (shard_index, self._number_of_shards))
+    shape = tensor_shape.as_shape(shape)
+    if self._number_of_shards == 1:
+      # Don't do anything when there's only one shard.
+      return shape
+    ndims = shape.ndims
+    if ndims is None:
+      raise ValueError("shape must be a specified shape not Unknown")
+    if ndims <= self._shard_dimension:
+      raise ValueError("shape %s does not contain shard_dimension %d" %
+                       (shape.as_list(), self._shard_dimension))
+    dims = shape.as_list()
+    if dims[self._shard_dimension] is None:
+      raise ValueError("shape %s must have a fixed size for dimension %d "
+                       "that is known at graph construction time." %
+                       (shape.as_list(), self._shard_dimension))
+    if (dims[self._shard_dimension] % self._number_of_shards) != 0:
+      raise ValueError("shape %s cannot be sharded %d ways along dimension %d" %
+                       (shape.as_list(), self._number_of_shards,
+                        self._shard_dimension))
+    dims[self._shard_dimension] /= self._number_of_shards
+    return tensor_shape.as_shape(dims)
+
+  def _unshard_shape(self, shape):
+    """Return the unsharded shape that would generate a given sharded shape.
+
+    Args:
+      shape: the sharded shape to unshard
+
+    Returns:
+      The unsharded shape.
+
+    Raises:
+      ValueError: if shape is unknown or does not contain
+        self.shard_dimension
+      TypeError: if shape is not convertible to a TensorShape
+    """
+    shape = tensor_shape.as_shape(shape)
+    if self._number_of_shards == 1:
+      # Don't do anything when there's only one shard.
+      return shape
+    ndims = shape.ndims
+    if ndims is None:
+      raise ValueError("shape must be a specified shape not Unknown")
+    if ndims <= self._shard_dimension:
+      raise ValueError("shape %s does not contain shard_dimension %d" %
+                       (shape.as_list(), self._shard_dimension))
+    dims = shape.as_list()
+    dims[self._shard_dimension] *= self._number_of_shards
+    return tensor_shape.as_shape(dims)
+
+  def get_unsharded_shape(self, shapes):
+    """Returns the shape of an unsharded Tensor given a list of shards.
+
+    When given a list of shapes of shards, returns the shape of the
+    unsharded Tensor that would generate the shards. Sets defaults for the
+    policy if number_of_shards or shard_dimension is None.
+
+    Args:
+      shapes: The shapes of the Tensor shards to be combined.
+
+    Returns:
+      The shape of the unsharded version of the Tensor.
+
+    Raises:
+      ValueError: if shapes is not a list of length
+        self.number_of_shards; or any element of shapes is not a valid
+        shape consistent with the sharding policy; or the list of
+        shapes is not a valid sharding of a full shape.
+      TypeError: if an element of shapes is not convertible to a
+        TensorShape
+    """
+    self._fill_default_values()
+    if len(shapes) != self.number_of_shards:
+      raise ValueError(
+          "shapes is %s but must be a list of length number_of_shards=%d" % (
+              str(shapes), self.number_of_shards))
+    unsharded_shapes = [self._unshard_shape(s) for s in shapes]
+    for i in xrange(self.number_of_shards - 1):
+      if not unsharded_shapes[i].is_compatible_with(
+          unsharded_shapes[self.number_of_shards - 1]):
+        raise ValueError(
+            "sharded shapes %s are not consistent shards of a full shape "
+            "sharded %d ways along dimension %d" % (
+                str(shapes), self.number_of_shards, self.shard_dimension))
+    return unsharded_shapes[0]

diff --git a/tensorflow/python/tpu/tpu_sharding_test.py b/tensorflow/python/tpu/tpu_sharding_test.py
new file mode 100644
index 0000000..21d2a08
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_sharding_test.py

@@ -0,0 +1,138 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Tests for tpu_function helpers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.platform import test
+from tensorflow.python.tpu import tpu_sharding
+
+
+class ShardingTest(test.TestCase):
+
+  def testFreeze(self):
+    """Tests that freezing a policy applies default values."""
+    p1 = tpu_sharding.ShardingPolicy()
+    p1.freeze()
+    self.assertEqual(p1.number_of_shards,
+                     tpu_sharding._DEFAULT_NUMBER_OF_SHARDS)
+    self.assertEqual(p1.shard_dimension, tpu_sharding._DEFAULT_SHARD_DIMENSION)
+    p2 = tpu_sharding.ShardingPolicy()
+    p2.set_number_of_shards(17)
+    p2.set_shard_dimension(23)
+    p2.freeze()
+    self.assertEqual(p2.number_of_shards, 17)
+    self.assertEqual(p2.shard_dimension, 23)
+
+  def testFrozen(self):
+    """Tests that frozen policies can't be changed."""
+    p1 = tpu_sharding.ShardingPolicy()
+    p1.freeze()
+    with self.assertRaises(ValueError):
+      p1.set_number_of_shards(17)
+    with self.assertRaises(ValueError):
+      p1.set_shard_dimension(22)
+
+  def testStr(self):
+    """Tests the string representation."""
+    p1 = tpu_sharding.ShardingPolicy()
+    self.assertEqual(str(p1), "ShardingPolicy(unset)")
+    p1.set_number_of_shards(17)
+    self.assertEqual(str(p1), "ShardingPolicy(unset)")
+    p1.set_shard_dimension(8)
+    self.assertEqual(str(p1), "ShardingPolicy(17 shards dimension 8)")
+
+  def testMerge(self):
+    """Tests that merging works."""
+    p1 = tpu_sharding.ShardingPolicy()
+    p1.set_number_of_shards(17)
+    p1.set_shard_dimension(23)
+    p2 = tpu_sharding.ShardingPolicy()
+    p2.merge(p1)
+    self.assertEqual(p2.number_of_shards, 17)
+    self.assertEqual(p2.shard_dimension, 23)
+    p1 = tpu_sharding.ShardingPolicy()
+    p1.set_shard_dimension(12)
+    p2.merge(p1)
+    self.assertEqual(p2.number_of_shards, 17)
+    self.assertEqual(p2.shard_dimension, 12)
+    p2.freeze()
+    p2.merge(p1)
+    self.assertEqual(p2.number_of_shards, 17)
+    self.assertEqual(p2.shard_dimension, 12)
+    p1.set_number_of_shards(1)
+    with self.assertRaises(ValueError):
+      p2.merge(p1)
+    p1 = tpu_sharding.ShardingPolicy()
+    p1.set_number_of_shards(17)
+    p2.merge(p1)
+    p1.set_shard_dimension(2)
+    with self.assertRaises(ValueError):
+      p2.merge(p1)
+
+  def testGetShardedShape(self):
+    """Tests getting a sharded shape."""
+    p = tpu_sharding.ShardingPolicy()
+    p.set_number_of_shards(3)
+    p.set_shard_dimension(1)
+    self.assertEqual(p.get_sharded_shape([4, 9]), [4, 3])
+    p.freeze()
+    with self.assertRaises(ValueError):
+      p.set_shard_dimension(0)
+    with self.assertRaises(ValueError):
+      _ = p.get_sharded_shape([4, 9], shard_index=4)
+    with self.assertRaises(ValueError):
+      _ = p.get_sharded_shape([4, 9], shard_index=-1)
+    with self.assertRaises(TypeError):
+      _ = p.get_sharded_shape("not_a_shape")
+    with self.assertRaises(ValueError):
+      _ = p.get_sharded_shape(tensor_shape.TensorShape(None))
+    with self.assertRaises(ValueError):
+      _ = p.get_sharded_shape([4, 10], shard_index=-1)
+
+  def testGetUnshardedShape(self):
+    """Tests getting an unsharded shape."""
+    p = tpu_sharding.ShardingPolicy()
+    p.set_number_of_shards(2)
+    p.set_shard_dimension(1)
+    self.assertEqual(p.get_unsharded_shape([[4, 3], [4, 3]]), [4, 6])
+    with self.assertRaises(ValueError):
+      _ = p.get_unsharded_shape([[4, 3]])
+    with self.assertRaises(ValueError):
+      _ = p.get_unsharded_shape([[4, 3], [4, 3], [4, 3]])
+    with self.assertRaises(ValueError):
+      _ = p.get_unsharded_shape([[4, 3], [4, 2]])
+    with self.assertRaises(TypeError):
+      _ = p.get_unsharded_shape([[4, 3], "not_a_shape"])
+    with self.assertRaises(ValueError):
+      _ = p.get_unsharded_shape([None, [4, 3]])
+    with self.assertRaises(ValueError):
+      _ = p.get_unsharded_shape([[2], [4, 3]])
+
+  def testScalar(self):
+    """Tests sharding and unsharding scalars."""
+    p = tpu_sharding.ShardingPolicy()
+    p.freeze()
+    self.assertEqual(p.get_sharded_shape([]), [])
+    self.assertEqual(p.get_unsharded_shape([[]]), [])
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/tpu/tpu_system_metadata.py b/tensorflow/python/tpu/tpu_system_metadata.py
new file mode 100644
index 0000000..8668959
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_system_metadata.py

@@ -0,0 +1,156 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+"""TPU system metadata and associated tooling."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.client import session as session_lib
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.tpu import tpu
+
+_PINGING_MASTER_TIMEOUT_IN_MS = 60 * 1000  # 1 min
+_RETRY_TIMES = 120
+_INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS = 300 * 1000  # 5 mins
+
+_TPU_DEVICE_REG = re.compile(r'.*task:(\d+)/.*device:TPU:(\d+)$')
+
+# _TPUSystemMetadata is used by TPUEstimator to hold TPU configuration,
+# including num_cores and num_hosts.
+_TPUSystemMetadata = collections.namedtuple('_TPUSystemMetadata', [
+    'num_cores',
+    'num_hosts',
+    'num_of_cores_per_host',
+    'topology',
+    'devices',
+])
+
+
+def _query_tpu_system_metadata(master_address, cluster_def=None,
+                               query_topology=False):
+  """Automatically detects the TPU system metadata in the system."""
+  tpu_core_count = 0
+  devices = []
+  device_dict = collections.defaultdict(list)
+
+  # TODO(b/120564445): Replace with standard library for retries.
+  retry_count = 1
+  while True:
+    logging.info('Querying Tensorflow master (%s) for TPU system metadata.',
+                 master_address)
+    try:
+      with ops.Graph().as_default():
+        with session_lib.Session(
+            master_address,
+            config=get_session_config_with_timeout(
+                _PINGING_MASTER_TIMEOUT_IN_MS,
+                cluster_def)) as sess:
+          devices = sess.list_devices()
+          for device in devices:
+            match = _TPU_DEVICE_REG.match(device.name)
+            if match:
+              host_id = match.group(1)
+              core_id = match.group(2)
+              device_dict[host_id].append(core_id)
+              tpu_core_count += 1
+          break
+    except errors.DeadlineExceededError:
+      msg = ('Failed to connect to the Tensorflow master. The TPU worker may '
+             'not be ready (still scheduling) or the Tensorflow master address '
+             'is incorrect: got (%s).' %
+             (master_address))
+
+      # TODO(xiejw): For local or grpc master we might not need retry logic
+      # here.
+      if retry_count <= _RETRY_TIMES:
+        logging.warning('%s', msg)
+        logging.warning('Retrying (%d/%d).', retry_count, _RETRY_TIMES)
+        retry_count += 1
+      else:
+        raise ValueError(msg)
+
+  num_of_cores_per_host = 0
+  if tpu_core_count:
+    num_cores_per_host_set = set(
+        [len(core_ids) for core_ids in device_dict.values()])
+    if len(num_cores_per_host_set) != 1:
+      raise RuntimeError(
+          'TPU cores on each host is not same. This should not happen!. '
+          'devices: {}'.format(devices))
+    num_of_cores_per_host = num_cores_per_host_set.pop()
+
+  topology = None
+  if query_topology:
+    if not tpu_core_count:
+      raise RuntimeError(
+          'Cannot find any TPU cores in the system (master address {}). '
+          'This usually means the master address is incorrect or the '
+          'TPU worker has some problems. Available devices: {}'.format(
+              master_address, devices))
+
+    topology = _obtain_topology(master_address, cluster_def)
+
+  metadata = _TPUSystemMetadata(
+      num_cores=tpu_core_count,
+      num_hosts=len(device_dict),
+      num_of_cores_per_host=num_of_cores_per_host,
+      topology=topology,
+      devices=devices)
+
+  if tpu_core_count:
+    logging.info('Found TPU system:')
+    logging.info('*** Num TPU Cores: %d', metadata.num_cores)
+    logging.info('*** Num TPU Workers: %d', metadata.num_hosts)
+    logging.info('*** Num TPU Cores Per Worker: %d',
+                 metadata.num_of_cores_per_host)
+    for device in metadata.devices:
+      logging.info('*** Available Device: %s', device)
+  else:
+    logging.info('Failed to find TPU: %s', metadata)
+  return metadata
+
+
+def _obtain_topology(master_address, cluster_def):
+  """Obtains TPU fabric topology."""
+  try:
+    logging.info('Initializing TPU system (master: %s) to fetch topology '
+                 'for model parallelism. This might take a while.',
+                 master_address)
+    with ops.Graph().as_default():
+      session_config = get_session_config_with_timeout(
+          _INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS, cluster_def)
+      with session_lib.Session(
+          master_address, config=session_config) as sess:
+        topology = sess.run(tpu.initialize_system())
+        return topology
+  except errors.DeadlineExceededError:
+    raise ValueError(
+        'Fail to initialize TPU system with master (%s). '
+        'Please double check the TPU system is functional.' % (
+            master_address))
+
+
+def get_session_config_with_timeout(timeout_in_secs, cluster_def):
+  """Returns a session given a timeout and a cluster configuration."""
+  config = config_pb2.ConfigProto(
+      operation_timeout_in_ms=timeout_in_secs, cluster_def=cluster_def)
+  return config

diff --git a/tensorflow/python/tpu/tpu_test.py b/tensorflow/python/tpu/tpu_test.py
new file mode 100644
index 0000000..69b0381
--- /dev/null
+++ b/tensorflow/python/tpu/tpu_test.py

@@ -0,0 +1,80 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Tests for tpu_function helpers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.layers import convolutional
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+from tensorflow.python.tpu import tpu
+from tensorflow.python.tpu import tpu_feed
+from tensorflow.python.tpu import training_loop
+
+
+class TPUContextTest(test.TestCase):
+
+  def testIsInContext(self):
+    """Test that control_flow_util can check that we're in a TPU context."""
+    z1 = array_ops.identity(1)
+    pivot = control_flow_ops.no_op()
+    context = tpu.TPUReplicateContext(b"context", 1, pivot=pivot)
+    context.Enter()
+    z2 = array_ops.identity(1)
+    context.Exit()
+    self.assertFalse(control_flow_util.IsInXLAContext(z1.op))
+    self.assertTrue(control_flow_util.IsInXLAContext(z2.op))
+
+
+class TPULayerRewriteTest(test.TestCase):
+
+  def testUsingInfeedQueueWithRegularizer(self):
+    """Test that Layer regularizers can reference data created in loops."""
+
+    def make_regularizer(scale):
+      return lambda inputs: scale * math_ops.reduce_sum(math_ops.square(inputs))
+
+    def training_step(inputs, scale):
+      outputs = convolutional.conv2d(
+          inputs,
+          filters=16,
+          kernel_size=(3, 3),
+          data_format="channels_first",
+          kernel_regularizer=make_regularizer(scale))
+      loss = math_ops.reduce_mean(math_ops.square(outputs))
+      return loss.op
+
+    inputs = array_ops.zeros(shape=(128, 32, 32, 16))
+    scale = array_ops.ones(shape=())
+    infeed = tpu_feed.InfeedQueue(
+        tuple_types=[dtypes.float32, dtypes.float32],
+        tuple_shapes=[inputs.shape, scale.shape])
+
+    def loop():
+      return training_loop.repeat(5, training_step, infeed_queue=infeed)
+
+    # This should not throw an error.
+    tpu.rewrite(loop)
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/tpu/training_loop.py b/tensorflow/python/tpu/training_loop.py
new file mode 100644
index 0000000..cffeb7e
--- /dev/null
+++ b/tensorflow/python/tpu/training_loop.py

@@ -0,0 +1,222 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Library for constructing a training loop, suitable for TPUs."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.tpu import tensor_tracer
+from tensorflow.python.tpu import tpu_function
+from tensorflow.python.tpu import xla
+
+
+def while_loop(condition, body, inputs=None, infeed_queue=None, name=None):
+  """Builds a training loop for TPUs.
+
+  The set of loop-carried tensors corresponds to `inputs`.  Both
+  `condition` and `body` take the current value of the loop-carried
+  tensors. 'body' additionally takes a tuple of infeed from
+  infeed_queue if infeed_queue is not None. `condition` must return a
+  single boolean value that determines whether iteration
+  continues. `body` must return an updated list of values for the
+  loop-carried tensors.
+
+  Args:
+    condition: a Python function that builds the loop condition.
+    body: a Python function that builds the loop body.
+    inputs: a list of initial values passed into the training loop, or
+      None (equivalent to an empty list).
+    infeed_queue: if not None, the infeed queue from which to append a tuple
+      of arguments as inputs to condition.
+    name: (Deprecated) Does nothing.
+
+  Returns:
+    The final values of the loop-carried tensors.
+
+  Raises:
+    TypeError: if body or condition has the wrong signature.
+  """
+  del name
+  # Converts inputs to Tensors.
+  inputs = [] if inputs is None else [ops.convert_to_tensor(x) for
+                                      x in inputs]
+  input_types = [x.dtype for x in inputs]
+  input_arity = len(inputs)
+
+  body_arg_error = xla.check_function_argument_count(
+      body, input_arity, infeed_queue)
+  if body_arg_error is not None:
+    if infeed_queue is None:
+      raise TypeError(
+          "Supplied loop body function cannot be called with the specified "
+          "inputs. You specified %d inputs: %s, but the loop body needs %s" % (
+              input_arity, str([i.name for i in inputs]), body_arg_error))
+    else:
+      raise TypeError(
+          "Supplied loop body function cannot be called with the specified "
+          "inputs. You specified %d inputs: %s and %d additional inputs from "
+          "infeed, but the computation needs %s" % (input_arity, str(
+              [i.name for i in inputs]), infeed_queue.number_of_tuple_elements,
+                                                    body_arg_error))
+  condition_arg_error = xla.check_function_argument_count(
+      condition, input_arity, None)
+  if condition_arg_error is not None:
+    if infeed_queue is None:
+      raise TypeError(
+          "Supplied loop condition function cannot be called with the "
+          "specified inputs. You specified %d inputs: %s, but the loop "
+          "condition needs %s" % (input_arity, str([i.name for i in inputs]),
+                                  condition_arg_error))
+    else:
+      raise TypeError(
+          "Supplied loop condition function cannot be called with the "
+          "specified inputs. You specified %d inputs: %s, but the loop "
+          "condition needs %s. Note that infeed is not passed to the loop "
+          "condition." % (input_arity, str([i.name for i in inputs]),
+                          condition_arg_error))
+
+  def condition_wrapper(*inputs):
+    # Discards the dummy output added for arity-0 loops.
+    if input_arity == 0:
+      inputs = []
+    return condition(*inputs)
+
+  def body_wrapper(*inputs):
+    """Wrapper around `body` that handles infeed queues and control deps."""
+    inputs = list(inputs)
+
+    # Discards the dummy output added for arity-0 loops.
+    if input_arity == 0:
+      inputs = []
+
+    # Runs `body` with the dequeue_ops appended.
+    if infeed_queue:
+      number_of_shards = tpu_function.get_tpu_context().number_of_shards
+      if number_of_shards is None:
+        raise ValueError("Can't build training loop with infeed when there is "
+                         "no tpu_shard_context. Are you building a loop or "
+                         "graph directly rather than from inside tpu.rewrite, "
+                         "tpu.batch_parallel, tpu.shard, or tpu.replicate?")
+      infeed_queue.set_number_of_shards(number_of_shards)
+      dequeue_ops = [d for d in infeed_queue.generate_dequeue_op()]
+    else:
+      dequeue_ops = []
+    outputs = body(*(inputs + dequeue_ops))
+
+    # If the computation only returned one value, make it a tuple.
+    if not isinstance(outputs, (list, tuple)):
+      outputs = (outputs,)
+
+    outputs = [
+        o if isinstance(o, ops.Operation) else ops.convert_to_tensor(o)
+        for o in outputs
+    ]
+
+    # Separates the returned Operations and Tensors.
+    output_operations = [o for o in outputs if isinstance(o, ops.Operation)]
+    output_tensors = [o for o in outputs
+                      if not isinstance(o, ops.Operation)]
+
+    if outputs != output_tensors + output_operations:
+      raise ValueError(
+          "TPU training loop body must return zero or more Tensor values "
+          "followed by zero or more Operations.")
+
+    output_types = [op.dtype for op in output_tensors]
+    if input_types != output_types:
+      raise TypeError(
+          "Mismatch between input types and output types for training loop "
+          "body: {} vs {}".format(input_types, output_types))
+
+    # Add the dequeue operations to output_operations to ensure they are run
+    # by the loop, even if the programmer's loop body does not use them.
+    output_operations += dequeue_ops
+
+    # Add a dummy output, if needed.
+    if not output_tensors:
+      output_tensors = array_ops.constant(0)
+
+    if output_operations:
+      # TODO(phawkins): in principle this is too restrictive since it serializes
+      # the training loop steps. In practice it does not matter since this loop
+      # will be compiled by XLA.
+      output_tensors = control_flow_ops.tuple(output_tensors,
+                                              control_inputs=output_operations)
+
+    if tensor_tracer.TensorTracer.is_enabled():
+      num_replicas = tpu_function.get_tpu_context().number_of_shards
+      if num_replicas is None:
+        num_replicas = 1
+      tt = tensor_tracer.TensorTracer()
+      output_tensors = tt.trace_tpu(ops.get_default_graph(),
+                                    output_tensors, None,
+                                    num_replicas)
+    return output_tensors
+
+  # If the body has arity 0, add a dummy loop-carried value to which we can add
+  # control dependencies from any side-effecting operations.
+  if input_arity == 0:
+    inputs = [array_ops.constant(0)]
+  return control_flow_ops.while_loop(
+      condition_wrapper, body_wrapper, inputs, name="", parallel_iterations=1)
+
+
+def repeat(n, body, inputs=None, infeed_queue=None, name=None):
+  """Builds a training loop that executes a fixed number of iterations.
+
+  The set of loop-carried tensors correspond to `inputs`.
+  `body` must be a function that takes and returns the values of the
+  loop-carried tensors.
+
+  Args:
+    n: the number of loop iterations
+    body: a Python function that builds the loop body.
+    inputs: a list of initial values passed into the training loop or
+      None (equivalent to an empty list).
+    infeed_queue: if not None, the infeed queue from which to append a tuple
+      of arguments as inputs to condition.
+    name: (Deprecated) Does nothing.
+  Returns:
+    The final values of the loop-carried tensors.
+  Raises:
+    ValueError: if there is a type error.
+  """
+  def _convert_to_list(xs):
+    if not isinstance(xs, (list, tuple)):
+      return [xs]
+    else:
+      return list(xs)
+
+  def cond(i, *args):
+    del args
+    return i < n
+
+  def body_wrapper(i, *args):
+    return [i + 1] + _convert_to_list(body(*args))
+
+  inputs = [0] if inputs is None else [0] + _convert_to_list(inputs)
+  outputs = while_loop(
+      cond, body_wrapper, inputs=inputs, infeed_queue=infeed_queue, name=name)
+  outputs = _convert_to_list(outputs)
+  if len(outputs) == 1:
+    # Returns the Op rather than an empty list.
+    return outputs[0].op
+  else:
+    return outputs[1:]

diff --git a/tensorflow/python/tpu/util.py b/tensorflow/python/tpu/util.py
new file mode 100644
index 0000000..dfb8ce1
--- /dev/null
+++ b/tensorflow/python/tpu/util.py

@@ -0,0 +1,51 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+
+"""Utilities for the functionalities."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+import six
+
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import training
+
+def check_positive_integer(value, name):
+  """Checks whether `value` is a positive integer."""
+  if not isinstance(value, six.integer_types):
+    raise TypeError('{} must be int, got {}'.format(name, type(value)))
+
+  if value <= 0:
+    raise ValueError('{} must be positive, got {}'.format(name, value))
+
+
+# TODO(b/118302029) Remove this copy of MultiHostDatasetInitializerHook after we
+# release a tensorflow_estimator with MultiHostDatasetInitializerHook in
+# python/estimator/util.py.
+class MultiHostDatasetInitializerHook(training.SessionRunHook):
+  """Creates a SessionRunHook that initializes all passed iterators."""
+
+  def __init__(self, dataset_initializers):
+    self._initializers = dataset_initializers
+
+  def after_create_session(self, session, coord):
+    del coord
+    start = time.time()
+    session.run(self._initializers)
+    logging.info('Initialized dataset iterators in %d seconds',
+                 time.time() - start)

diff --git a/tensorflow/python/tpu/xla.py b/tensorflow/python/tpu/xla.py
new file mode 100644
index 0000000..58476fa
--- /dev/null
+++ b/tensorflow/python/tpu/xla.py

@@ -0,0 +1,106 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""XLA utility functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from tensorflow.python.util import tf_inspect
+
+
+def is_flat(outputs):
+  """Checks if outputs is a flat structure.
+
+    Following structures and values are considered flat:
+    1) None
+    2) A single object
+    3) A list or tuple of Tensors/Operations
+
+    The only structures that this function understands are sequences and
+    dictionaries.  E.g. this means that if outputs contains a single
+    user-defined Object, it is considered to be flat. Errors are raised later on
+    if that Object cannot be converted to a Tensor.
+
+  Args:
+    outputs: Output from `computation` inside `xla.compile`.
+
+  Returns:
+    A boolean indicates whether outputs is flat.
+  """
+  # If outputs is a list or tuple, check if it has any nested structure. If
+  # there is, then outputs is non-flat.
+  if isinstance(outputs, collections.Sequence):
+    for o in outputs:
+      if isinstance(o, collections.Sequence) or isinstance(o, dict):
+        return False
+
+  # If outputs is a dict, it is non-flat.
+  if isinstance(outputs, dict):
+    return False
+
+  # Getting here means either outputs itself is a single non-structured value
+  # or it is a flat list of single non-structured values.
+  return True
+
+
+def check_function_argument_count(func, input_arity, infeed_queue):
+  """Validate the number of input arguments to an XLA function.
+
+  Args:
+    func: the Python function that will be called to generate the body of an XLA
+      computation graph.
+    input_arity: the number of explicit arguments supplied by the caller.
+    infeed_queue: if not None, the infeed queue that will supply
+      additional arguments to the function.
+
+  Returns:
+    None if function can be called with the supplied number of
+      arguments, or an error string if it cannot.
+  """
+  def format_error(complaint, quantity):
+    return '%s %d argument%s' % (complaint, quantity, ''
+                                 if quantity == 1 else 's')
+
+  num_args_supplied = input_arity
+  if infeed_queue is not None:
+    num_args_supplied += infeed_queue.number_of_tuple_elements
+  arg_spec = tf_inspect.getargspec(func)
+  num_func_args = len(arg_spec.args)
+  if arg_spec.defaults is None:
+    num_func_defaults = 0
+  else:
+    num_func_defaults = len(arg_spec.defaults)
+  min_func_args = num_func_args - num_func_defaults
+  if num_args_supplied < min_func_args:
+    # The required number of arguments is not enough to call the function.
+    if num_func_defaults == 0 and arg_spec.varargs is None:
+      return format_error('exactly', num_func_args)
+    else:
+      return format_error('at least', min_func_args)
+  if arg_spec.varargs is None and num_args_supplied > num_func_args:
+    # The required number of arguments is too many to call the function.
+    if num_func_defaults == 0:
+      return format_error('exactly', num_func_args)
+    else:
+      return format_error('at most', num_func_args)
+  # Reaching here means either
+  # 1) There are varargs, func can accept any number of arguments greater than
+  # the minimum.
+  # 2) Number of supplied arguments falls in range of acceptable argument count
+  # of func.
+  return None

diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py
index 81e03c7..46ec3be 100644
--- a/tensorflow/python/training/adam.py
+++ b/tensorflow/python/training/adam.py

@@ -52,7 +52,7 @@
     $$t := 0 \text{(Initialize timestep)}$$
 
     The update rule for `variable` with gradient `g` uses an optimization
-    described at the end of section2 of the paper:
+    described at the end of section 2 of the paper:
 
     $$t := t + 1$$
     $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$

diff --git a/tensorflow/python/training/checkpoint_management_test.py b/tensorflow/python/training/checkpoint_management_test.py
index 8606ec4..053298d 100644
--- a/tensorflow/python/training/checkpoint_management_test.py
+++ b/tensorflow/python/training/checkpoint_management_test.py

@@ -38,7 +38,7 @@
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import saver as saver_module
 from tensorflow.python.training.checkpoint_state_pb2 import CheckpointState
-from tensorflow.python.training.checkpointable import util
+from tensorflow.python.training.tracking import util
 
 
 class LatestCheckpointWithRelativePaths(test.TestCase):

diff --git a/tensorflow/python/training/checkpointable/BUILD b/tensorflow/python/training/checkpointable/BUILD
deleted file mode 100644
index e1f58a9..0000000
--- a/tensorflow/python/training/checkpointable/BUILD
+++ /dev/null

@@ -1,251 +0,0 @@
-# Description:
-#   Utilities for reading and writing object-based checkpoints.
-
-package(
-    default_visibility = [
-        "//tensorflow:internal",
-    ],
-)
-
-licenses(["notice"])  # Apache 2.0
-
-exports_files(["LICENSE"])
-
-load("//tensorflow:tensorflow.bzl", "tf_py_test")
-load("//tensorflow/compiler/tests:build_defs.bzl", "tf_xla_py_test")
-
-py_library(
-    name = "base",
-    srcs = ["base.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops_gen",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:util",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/training/saving:saveable_object",
-        "@six_archive//:six",
-    ],
-)
-
-tf_py_test(
-    name = "base_test",
-    srcs = ["base_test.py"],
-    additional_deps = [
-        ":base",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_library(
-    name = "tracking",
-    srcs = ["tracking.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":base",
-        ":data_structures",
-    ],
-)
-
-tf_py_test(
-    name = "tracking_test",
-    srcs = ["tracking_test.py"],
-    additional_deps = [
-        ":base",
-        ":tracking",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_library(
-    name = "layer_utils",
-    srcs = ["layer_utils.py"],
-    srcs_version = "PY2AND3",
-)
-
-py_library(
-    name = "data_structures",
-    srcs = ["data_structures.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":base",
-        ":layer_utils",
-        "//tensorflow/python/saved_model:revived_types",
-    ],
-)
-
-tf_py_test(
-    name = "data_structures_test",
-    srcs = ["data_structures_test.py"],
-    additional_deps = [
-        ":data_structures",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:layers",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/eager:test",
-        "//tensorflow/python/keras:engine",
-        "//tensorflow/python/keras:layers",
-    ],
-)
-
-py_library(
-    name = "object_identity",
-    srcs = ["object_identity.py"],
-    srcs_version = "PY2AND3",
-)
-
-py_library(
-    name = "graph_view",
-    srcs = ["graph_view.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":base",
-        ":object_identity",
-        ":tracking",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/training/saving:saveable_object",
-        "//tensorflow/python/training/saving:saveable_object_util",
-    ],
-)
-
-py_library(
-    name = "util",
-    srcs = ["util.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":base",
-        ":data_structures",
-        ":graph_view",
-        ":object_identity",
-        ":tracking",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:checkpoint_management",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:io_ops_gen",
-        "//tensorflow/python:lib",
-        "//tensorflow/python:pywrap_tensorflow",
-        "//tensorflow/python:saver",
-        "//tensorflow/python:session",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/eager:def_function",
-        "//tensorflow/python/training/saving:functional_saver",
-        "//tensorflow/python/training/saving:saveable_object_util",
-    ],
-)
-
-tf_py_test(
-    name = "util_test",
-    srcs = ["util_test.py"],
-    additional_deps = [
-        ":base",
-        ":graph_view",
-        ":tracking",
-        ":util",
-        "@absl_py//absl/testing:parameterized",
-        "@six_archive//:six",
-        "//tensorflow/python/keras/optimizer_v2",
-        "//tensorflow/python:checkpoint_management",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:pywrap_tensorflow",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:saver",
-        "//tensorflow/python:session",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:template",
-        "//tensorflow/python:training_util",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python/eager:backprop",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/eager:def_function",
-        "//tensorflow/python/eager:test",
-        "//tensorflow/python/keras:engine",
-        "//tensorflow/python/keras:layers",
-        "//tensorflow/python:variables",
-    ],
-    tags = ["notsan"],  # b/74395663
-)
-
-tf_xla_py_test(
-    name = "util_xla_test",
-    srcs = ["util_xla_test.py"],
-    tags = [
-        "no_pip",
-        "nomac",
-        "notsan",  # b/74395663
-    ],
-    deps = [
-        ":tracking",
-        ":util",
-        "//tensorflow/compiler/tests:xla_test",
-        "//tensorflow/python:checkpoint_management",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/eager:backprop",
-        "//tensorflow/python/keras:engine",
-        "//tensorflow/python/keras:layers",
-        "//tensorflow/python/keras/optimizer_v2",
-    ],
-)
-
-tf_py_test(
-    name = "util_with_v1_optimizers_test",
-    srcs = ["util_with_v1_optimizers_test.py"],
-    additional_deps = [
-        ":base",
-        ":graph_view",
-        ":tracking",
-        ":util",
-        "@absl_py//absl/testing:parameterized",
-        "@six_archive//:six",
-        "//tensorflow/python:checkpoint_management",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:pywrap_tensorflow",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:saver",
-        "//tensorflow/python:session",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:template",
-        "//tensorflow/python:training",
-        "//tensorflow/python:training_util",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python/distribute:mirrored_strategy",
-        "//tensorflow/python/eager:backprop",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/eager:def_function",
-        "//tensorflow/python/eager:test",
-        "//tensorflow/python/keras:engine",
-        "//tensorflow/python/keras:layers",
-    ],
-    tags = ["notsan"],  # b/74395663
-)

diff --git a/tensorflow/python/training/checkpointable/base.py b/tensorflow/python/training/checkpointable/base.py
deleted file mode 100644
index 0d659ce..0000000
--- a/tensorflow/python/training/checkpointable/base.py
+++ /dev/null

@@ -1,877 +0,0 @@
-"""An object-local variable management scheme."""
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import abc
-import collections
-import functools
-import json
-import weakref
-
-import six
-
-from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gen_io_ops as io_ops
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training.saving import saveable_object
-from tensorflow.python.util import nest
-from tensorflow.python.util import serialization
-from tensorflow.python.util import tf_decorator
-
-
-# Key where the object graph proto is saved in a TensorBundle
-OBJECT_GRAPH_PROTO_KEY = "_CHECKPOINTABLE_OBJECT_GRAPH"
-
-
-# A key indicating a variable's value in an object's checkpointed Tensors
-# (Checkpointable._gather_saveables_for_checkpoint). If this is the only key and
-# the object has no dependencies, then its value may be restored on object
-# creation (avoiding double assignment when executing eagerly).
-VARIABLE_VALUE_KEY = "VARIABLE_VALUE"
-OBJECT_CONFIG_JSON_KEY = "OBJECT_CONFIG_JSON"
-
-CheckpointableReference = collections.namedtuple(
-    "CheckpointableReference",
-    [
-        # The local name for this dependency.
-        "name",
-        # The Checkpointable object being referenced.
-        "ref"
-    ])
-
-
-class CheckpointInitialValue(ops.Tensor):
-  """Tensor wrapper for managing update UIDs in `Variables`.
-
-  When supplied as an initial value, objects of this type let a `Variable`
-  (`Variable`, `ResourceVariable`, etc.) know the UID of the restore the initial
-  value came from. This allows deferred restorations to be sequenced in the
-  order the user specified them, and lets us fall back on assignment if an
-  initial value is not set (e.g. due to a custom getter interfering).
-
-  See comments in _add_variable_with_custom_getter for more information about
-  how `CheckpointInitialValue` is used.
-  """
-
-  def __init__(self, checkpoint_position, shape=None):
-    self.wrapped_value = checkpoint_position.value_tensors()[
-        VARIABLE_VALUE_KEY]
-    if shape:
-      # We need to set the static shape information on the initializer if
-      # possible so we don't get a variable with an unknown shape.
-      self.wrapped_value.set_shape(shape)
-    self._checkpoint_position = checkpoint_position
-
-  def __getattr__(self, attr):
-    try:
-      return getattr(self.wrapped_value, attr)
-    except AttributeError:
-      return self.__getattribute__(attr)
-
-  @property
-  def checkpoint_position(self):
-    return self._checkpoint_position
-
-
-class NoRestoreSaveable(saveable_object.SaveableObject):
-  """Embeds a tensor in a checkpoint with no restore ops."""
-
-  def __init__(self, tensor, name, dtype=None):
-    spec = saveable_object.SaveSpec(tensor, "", name, dtype=dtype)
-    super(NoRestoreSaveable, self).__init__(tensor, [spec], name)
-
-  def restore(self, restored_tensors, restored_shapes):
-    return control_flow_ops.no_op()
-
-
-@six.add_metaclass(abc.ABCMeta)
-class PythonStateSaveable(saveable_object.SaveableObject):
-  """An interface for saving/restoring volatile Python state."""
-
-  @abc.abstractmethod
-  def feed_dict_additions(self):
-    """When running a graph, indicates fresh state to feed.
-
-    Returns:
-      A dictionary mapping `Tensor`s to current Python state.
-    """
-    pass
-
-  @abc.abstractmethod
-  def freeze(self):
-    """Create a new `SaveableObject` which freezes current state as a constant.
-
-    Used when executing eagerly to embed the current state as a constant, or
-    when creating a static tf.train.Saver with the frozen current Python state.
-
-    Returns:
-      A `SaveableObject` which is not a `PythonStateSaveable` instance (i.e. has
-      no Python state associated with it).
-    """
-    pass
-
-
-class PythonStringStateSaveable(PythonStateSaveable):
-  """Saves Python state in a checkpoint."""
-
-  def __init__(self, name, state_callback, restore_callback=None):
-    """Configure saving.
-
-    Args:
-      name: The checkpoint key to write to.
-      state_callback: A function taking no arguments which returns a
-        string. This function is run every time a checkpoint is written.
-      restore_callback: A function taking a Python string, used to restore
-        state. Optional; defaults to doing nothing, in which case it is ignored
-        by status assertions such as assert_consumed().
-    """
-    self._has_trivial_state_callback = (restore_callback is None)
-    def _state_callback_wrapper():
-      with ops.init_scope():
-        return state_callback()
-    self._state_callback = _state_callback_wrapper
-    self._restore_callback = restore_callback
-    with ops.device("/cpu:0"):
-      self._save_string = constant_op.constant("", dtype=dtypes.string)
-    spec = saveable_object.SaveSpec(
-        self._save_string, "", name, dtype=dtypes.string)
-    super(PythonStringStateSaveable, self).__init__(
-        self._save_string, [spec], name)
-
-  @property
-  def optional_restore(self):
-    """For values with no restore, relaxes assert_consumed()."""
-    return self._has_trivial_state_callback
-
-  def feed_dict_additions(self):
-    """When running a graph, indicates fresh state to feed."""
-    return {self._save_string: self._state_callback()}
-
-  def freeze(self):
-    """Create a frozen `SaveableObject` which saves the current state."""
-    def _constant_state():
-      return constant_op.constant(self._state_callback(), dtype=dtypes.string)
-    return NoRestoreSaveable(
-        tensor=_constant_state,
-        dtype=dtypes.string,
-        name=self.name)
-
-  def python_restore(self, restored_strings):
-    """Called to restore Python state."""
-    if self._restore_callback:
-      restored, = restored_strings
-      self._restore_callback(restored)
-
-  def restore(self, restored_tensors, restored_shapes):
-    """Called to restore TensorFlow state (nothing to do)."""
-    return control_flow_ops.no_op()
-
-
-class CheckpointPosition(object):
-  """Indicates a position within a `_CheckpointRestoreCoordinator`."""
-
-  def __init__(self, checkpoint, proto_id):
-    """Specify an object within a checkpoint.
-
-    Args:
-      checkpoint: A _CheckpointRestoreCoordinator object.
-      proto_id: The index of this object in CheckpointableObjectGraph.nodes.
-    """
-    self._checkpoint = checkpoint
-    self._proto_id = proto_id
-
-  def restore(self, checkpointable):
-    """Restore this value into `checkpointable`."""
-    with ops.init_scope():
-      if self.bind_object(checkpointable):
-        # This object's correspondence with a checkpointed object is new, so
-        # process deferred restorations for it and its dependencies.
-        restore_ops = checkpointable._restore_from_checkpoint_position(self)  # pylint: disable=protected-access
-        if restore_ops:
-          self._checkpoint.new_restore_ops(restore_ops)
-
-  def bind_object(self, checkpointable):
-    """Set a checkpoint<->object correspondence and process slot variables.
-
-    Args:
-      checkpointable: The object to record a correspondence for.
-    Returns:
-      True if this is a new assignment, False if this object has already been
-      mapped to a checkpointed `Object` proto.
-    Raises:
-      AssertionError: If another object is already bound to the `Object` proto.
-    """
-    checkpoint = self.checkpoint
-    checkpoint.all_python_objects.add(checkpointable)
-    current_assignment = checkpoint.object_by_proto_id.get(self._proto_id, None)
-    if current_assignment is None:
-      checkpoint.object_by_proto_id[self._proto_id] = checkpointable
-      for deferred_slot_restoration in (
-          checkpoint.deferred_slot_restorations.pop(self._proto_id, ())):
-        checkpointable._create_or_restore_slot_variable(  # pylint: disable=protected-access
-            slot_variable_position=CheckpointPosition(
-                checkpoint=checkpoint,
-                proto_id=deferred_slot_restoration.slot_variable_id),
-            variable=deferred_slot_restoration.original_variable,
-            slot_name=deferred_slot_restoration.slot_name)
-      for slot_restoration in checkpoint.slot_restorations.pop(
-          self._proto_id, ()):
-        optimizer_object = checkpoint.object_by_proto_id.get(
-            slot_restoration.optimizer_id, None)
-        if optimizer_object is None:
-          # The optimizer has not yet been created or tracked. Record in the
-          # checkpoint that the slot variables need to be restored when it is.
-          checkpoint.deferred_slot_restorations.setdefault(
-              slot_restoration.optimizer_id, []).append(
-                  _DeferredSlotVariableRestoration(
-                      original_variable=checkpointable,
-                      slot_variable_id=slot_restoration.slot_variable_id,
-                      slot_name=slot_restoration.slot_name))
-        else:
-          optimizer_object._create_or_restore_slot_variable(  # pylint: disable=protected-access
-              slot_variable_position=CheckpointPosition(
-                  checkpoint=checkpoint,
-                  proto_id=slot_restoration.slot_variable_id),
-              variable=checkpointable,
-              slot_name=slot_restoration.slot_name)
-      return True  # New assignment
-    else:
-      # The object was already mapped for this checkpoint load, which means
-      # we don't need to do anything besides check that the mapping is
-      # consistent (if the dependency DAG is not a tree then there are
-      # multiple paths to the same object).
-      if current_assignment is not checkpointable:
-        logging.warning(
-            ("Inconsistent references when loading the checkpoint into this "
-             "object graph. Either the Checkpointable object references in the "
-             "Python program have changed in an incompatible way, or the "
-             "checkpoint was generated in an incompatible program.\n\nTwo "
-             "checkpoint references resolved to different objects (%s and %s).")
-            % (current_assignment, checkpointable))
-      return False  # Not a new assignment
-
-  def is_simple_variable(self):
-    """Determine whether this value is restorable with a Tensor initializer."""
-    attributes = self.object_proto.attributes
-    return (len(attributes) == 1
-            and attributes[0].name == VARIABLE_VALUE_KEY
-            and not self.object_proto.children)
-
-  def value_tensors(self):
-    """Create value `Tensor`s for this object's attributes.
-
-    Does not require that the Python object has been created. Used for
-    restore-on-create when executing eagerly.
-
-    Returns:
-      A dictionary mapping from object attribute names to `Tensor`s.
-    """
-    value_tensors = {}
-    for serialized_tensor in self.object_proto.attributes:
-      checkpoint_key = serialized_tensor.checkpoint_key
-      dtype = self._checkpoint.dtype_map[checkpoint_key]
-      base_type = dtype.base_dtype
-      with ops.init_scope():
-        with ops.device("/cpu:0"):
-          # Run the restore itself on the CPU.
-          value, = io_ops.restore_v2(
-              prefix=self._checkpoint.save_path_tensor,
-              tensor_names=[checkpoint_key],
-              shape_and_slices=[""],
-              dtypes=[base_type],
-              name="%s_checkpoint_read" % (serialized_tensor.name,))
-        # Copy the value to the current device if necessary.
-        value_tensors[serialized_tensor.name] = array_ops.identity(value)
-      return value_tensors
-
-  def _gather_ops_or_named_saveables(self):
-    """Looks up or creates SaveableObjects which don't have cached ops."""
-    saveables = self.checkpointable._gather_saveables_for_checkpoint()  # pylint: disable=protected-access
-    # Name saveables based on the name this object had when it was checkpointed.
-    named_saveables = {}
-    python_saveables = []
-    existing_restore_ops = []
-    for serialized_tensor in self.object_proto.attributes:
-      if context.executing_eagerly():
-        existing_op = None
-      else:
-        existing_op = self._checkpoint.restore_ops_by_name.get(
-            serialized_tensor.checkpoint_key, None)
-      if existing_op is not None:
-        existing_restore_ops.append(existing_op)
-        continue
-
-      # Only if we don't have cached ops for this SaveableObject, we'll see if
-      # the SaveableObject itself has been cached. If not, we'll make it, and
-      # either way we'll extract new ops from it (or if it has Python state to
-      # restore, we'll run that).
-      saveables_cache = self._checkpoint.graph_view.saveables_cache
-      if saveables_cache is None:
-        # No SaveableObject caching when executing eagerly.
-        saveable = None
-      else:
-        # If we've already created and cached a SaveableObject for this
-        # attribute, we can re-use it to avoid re-creating some ops when graph
-        # building.
-        saveable_list = saveables_cache.get(
-            self.checkpointable, {}).get(serialized_tensor.name, (None,))
-        if len(saveable_list) == 1:
-          # Almost every attribute will have exactly one SaveableObject.
-          saveable, = saveable_list
-        else:
-          # Don't use cached SaveableObjects for partitioned variables, which is
-          # the only case where we'd have a list of SaveableObjects. Op caching
-          # will catch them.
-          saveable = None
-      if saveable is not None:
-        # The name of this attribute has changed, so we need to re-generate
-        # the SaveableObject.
-        if serialized_tensor.checkpoint_key not in saveable.name:
-          saveable = None
-          del saveables_cache[self.checkpointable]
-          break
-      if saveable is None:
-        # If there was no cached SaveableObject, we should check if the Python
-        # object has the attribute.
-        saveable_factory = saveables.get(serialized_tensor.name, None)
-        if saveable_factory is None:
-          # Purposefully does not throw an exception if attributes have been
-          # added or deleted. Stores unused attributes so an exception can be
-          # raised if the user decides to check that everything in the
-          # checkpoint was loaded.
-          if not serialized_tensor.optional_restore:
-            self._checkpoint.unused_attributes.setdefault(
-                self.checkpointable, []).append(serialized_tensor.name)
-          continue
-        if callable(saveable_factory):
-          saveable = saveable_factory(name=serialized_tensor.checkpoint_key)
-        else:
-          saveable = saveable_factory
-        if saveables_cache is not None:
-          saveables_cache.setdefault(
-              self.checkpointable, {})[serialized_tensor.name] = [saveable]
-      if isinstance(saveable, PythonStateSaveable):
-        python_saveables.append(saveable)
-      else:
-        named_saveables[serialized_tensor.checkpoint_key] = saveable
-    return existing_restore_ops, named_saveables, python_saveables
-
-  def restore_ops(self):
-    """Create or fetch restore ops for this object's attributes.
-
-    Requires that the `Checkpointable` Python object has been bound to an object
-    ID in the checkpoint.
-
-    Returns:
-      A list of operations when graph building, or an empty list when executing
-      eagerly.
-    """
-    (restore_ops,
-     tensor_saveables,
-     python_saveables) = self._gather_ops_or_named_saveables()
-    restore_ops.extend(self._checkpoint.restore_saveables(
-        tensor_saveables, python_saveables))
-    return restore_ops
-
-  @property
-  def checkpoint(self):
-    return self._checkpoint
-
-  @property
-  def checkpointable(self):
-    return self._checkpoint.object_by_proto_id[self._proto_id]
-
-  @property
-  def object_proto(self):
-    return self._checkpoint.object_graph_proto.nodes[self._proto_id]
-
-  @property
-  def restore_uid(self):
-    return self._checkpoint.restore_uid
-
-  def __repr__(self):
-    return repr(self.object_proto)
-
-
-_DeferredSlotVariableRestoration = collections.namedtuple(
-    "_DeferredSlotVariableRestoration",
-    [
-        "original_variable",
-        "slot_variable_id",
-        "slot_name",
-    ]
-)
-
-_SlotVariableRestoration = collections.namedtuple(
-    "_SlotVariableRestoration",
-    [
-        # The checkpoint proto id of the optimizer object.
-        "optimizer_id",
-        # The checkpoint proto id of the slot variable.
-        "slot_variable_id",
-        "slot_name",
-    ])
-
-
-def no_automatic_dependency_tracking(method):
-  """Disables automatic dependency tracking on attribute assignment.
-
-  Use to decorate any method of a Checkpointable object. Attribute assignment in
-  that method will not add dependencies (also respected in Model). Harmless if
-  used in a class which does not do automatic dependency tracking (which means
-  it's safe to use in base classes which may have subclasses which also inherit
-  from Checkpointable).
-
-  Args:
-    method: The method to decorate.
-  Returns:
-    A decorated method which sets and un-sets automatic dependency tracking for
-    the object the method is called on (not thread safe).
-  """
-
-  def _method_wrapper(self, *args, **kwargs):
-    previous_value = getattr(self, "_setattr_tracking", True)
-    self._setattr_tracking = False  # pylint: disable=protected-access
-    try:
-      method(self, *args, **kwargs)
-    finally:
-      self._setattr_tracking = previous_value  # pylint: disable=protected-access
-
-  return tf_decorator.make_decorator(
-      target=method, decorator_func=_method_wrapper)
-
-
-class Checkpointable(object):
-  """Base class for `Checkpointable` objects without automatic dependencies.
-
-  This class has no __setattr__ override for performance reasons. Dependencies
-  must be added explicitly. Unless attribute assignment is performance-critical,
-  use `AutoCheckpointable` instead. Use `Checkpointable` for `isinstance`
-  checks.
-  """
-
-  # Checkpointable does not do automatic dependency tracking, but uses the
-  # no_automatic_dependency_tracking decorator so it can avoid adding
-  # dependencies if a subclass is Checkpointable / inherits from Model (both of
-  # which have __setattr__ overrides).
-  @no_automatic_dependency_tracking
-  def _maybe_initialize_checkpointable(self):
-    """Initialize dependency management.
-
-    Not __init__, since most objects will forget to call it.
-    """
-    if hasattr(self, "_unconditional_checkpoint_dependencies"):
-      # __init__ already called. This check means that we don't need
-      # Checkpointable.__init__() in the constructor of every TensorFlow object.
-      return
-    # A list of CheckpointableReference objects. Some classes implementing
-    # `Checkpointable`, notably `Optimizer`s, may override the
-    # _checkpoint_dependencies property with conditional dependencies
-    # (e.g. based on the current graph when saving).
-    self._unconditional_checkpoint_dependencies = []
-    # Maps names -> Checkpointable objects
-    self._unconditional_dependency_names = {}
-    # Restorations for other Checkpointable objects on which this object may
-    # eventually depend. Maps local name -> CheckpointPosition list. Optimizers
-    # tack on conditional dependencies, and so need separate management of
-    # deferred dependencies too.
-    self._unconditional_deferred_dependencies = {}
-    # The UID of the highest assignment to this object. Used to ensure that the
-    # last requested assignment determines the final value of an object.
-    if hasattr(self, "_update_uid"):
-      raise AssertionError(
-          "Internal error: the object had an update UID set before its "
-          "initialization code was run.")
-    self._update_uid = -1
-    # When executing eagerly, holds a collection of _NameBasedRestoreCoordinator
-    # instances, which should be checked when creating variables or other
-    # saveables. These are passed on recursively to all dependencies, since
-    # unlike object-based checkpoint restores we don't know which subgraph is
-    # being restored in advance. This mechanism is only necessary for
-    # restore-on-create when executing eagerly, and so is unused when graph
-    # building.
-    self._name_based_restores = set()
-
-  def _no_dependency(self, value):
-    """If automatic dependency tracking is enabled, ignores `value`."""
-    return value
-
-  def _name_based_attribute_restore(self, checkpoint):
-    """Restore the object's attributes from a name-based checkpoint."""
-    self._name_based_restores.add(checkpoint)
-    if self._update_uid < checkpoint.restore_uid:
-      checkpoint.eager_restore(self)
-      self._update_uid = checkpoint.restore_uid
-
-  @property
-  def _checkpoint_dependencies(self):
-    """All dependencies of this object.
-
-    May be overridden to include conditional dependencies.
-
-    Returns:
-      A list of `CheckpointableReference` objects indicating named
-      `Checkpointable` dependencies which should be saved along with this
-      object.
-    """
-    return self._unconditional_checkpoint_dependencies
-
-  @property
-  def _deferred_dependencies(self):
-    """A dictionary with deferred dependencies.
-
-    Stores restorations for other Checkpointable objects on which this object
-    may eventually depend. May be overridden by sub-classes (e.g. Optimizers use
-    conditional dependencies based the current graph, and so need separate
-    management of deferred dependencies too).
-
-    Returns:
-      A dictionary mapping from local name to a list of CheckpointPosition
-      objects.
-    """
-    return self._unconditional_deferred_dependencies
-
-  def _lookup_dependency(self, name):
-    """Look up a dependency by name.
-
-    May be overridden to include conditional dependencies.
-
-    Args:
-      name: The local name of the dependency.
-    Returns:
-      A `Checkpointable` object, or `None` if no dependency by this name was
-      found.
-    """
-    return self._unconditional_dependency_names.get(name, None)
-
-  def _add_variable_with_custom_getter(
-      self, name, shape=None, dtype=dtypes.float32,
-      initializer=None, getter=None, overwrite=False,
-      **kwargs_for_getter):
-    """Restore-on-create for a variable be saved with this `Checkpointable`.
-
-    If the user has requested that this object or another `Checkpointable` which
-    depends on this object be restored from a checkpoint (deferred loading
-    before variable object creation), `initializer` may be ignored and the value
-    from the checkpoint used instead.
-
-    Args:
-      name: A name for the variable. Must be unique within this object.
-      shape: The shape of the variable.
-      dtype: The data type of the variable.
-      initializer: The initializer to use. Ignored if there is a deferred
-        restoration left over from a call to
-        `_restore_from_checkpoint_position`.
-      getter: The getter to wrap which actually fetches the variable.
-      overwrite: If True, disables unique name and type checks.
-      **kwargs_for_getter: Passed to the getter.
-
-    Returns:
-      The new variable object.
-
-    Raises:
-      ValueError: If the variable name is not unique.
-    """
-    self._maybe_initialize_checkpointable()
-    with ops.init_scope():
-      if context.executing_eagerly():
-        # If this is a variable with a single Tensor stored in the checkpoint,
-        # we can set that value as an initializer rather than initializing and
-        # then assigning (when executing eagerly). This call returns None if
-        # there is nothing to restore.
-        checkpoint_initializer = self._preload_simple_restoration(
-            name=name, shape=shape)
-      else:
-        checkpoint_initializer = None
-      if (checkpoint_initializer is not None
-          and not (
-              isinstance(initializer, CheckpointInitialValue)
-              and (initializer.restore_uid
-                   > checkpoint_initializer.restore_uid))):
-        # If multiple Checkpointable objects are "creating" the same variable
-        # via the magic of custom getters, the one with the highest restore UID
-        # (the one called last) has to make the final initializer. If another
-        # custom getter interrupts this process by overwriting the initializer,
-        # then we'll catch that when we call _track_checkpointable. So this is
-        # "best effort" to set the initializer with the highest restore UID.
-        initializer = checkpoint_initializer
-        shape = None
-    new_variable = getter(
-        name=name, shape=shape, dtype=dtype, initializer=initializer,
-        **kwargs_for_getter)
-
-    # If we set an initializer and the variable processed it, tracking will not
-    # assign again. It will add this variable to our dependencies, and if there
-    # is a non-trivial restoration queued, it will handle that. This also
-    # handles slot variables.
-    if not overwrite or isinstance(new_variable, Checkpointable):
-      return self._track_checkpointable(new_variable, name=name,
-                                        overwrite=overwrite)
-    else:
-      # TODO(allenl): Some variable types are not yet supported. Remove this
-      # fallback once all get_variable() return types are Checkpointable.
-      return new_variable
-
-  def _preload_simple_restoration(self, name, shape):
-    """Return a dependency's value for restore-on-create.
-
-    Note the restoration is not deleted; if for some reason preload is called
-    and then not assigned to the variable (for example because a custom getter
-    overrides the initializer), the assignment will still happen once the
-    variable is tracked (determined based on checkpoint.restore_uid).
-
-    Args:
-      name: The object-local name of the dependency holding the variable's
-        value.
-      shape: The shape of the variable being loaded into.
-    Returns:
-      An callable for use as a variable's initializer/initial_value, or None if
-      one should not be set (either because there was no variable with this name
-      in the checkpoint or because it needs more complex deserialization). Any
-      non-trivial deserialization will happen when the variable object is
-      tracked.
-    """
-    deferred_dependencies_list = self._deferred_dependencies.get(name, ())
-    if not deferred_dependencies_list:
-      # Nothing to do; we don't have a restore for this dependency queued up.
-      return
-    for checkpoint_position in deferred_dependencies_list:
-      if not checkpoint_position.is_simple_variable():
-        # If _any_ pending restoration is too complicated to fit in an
-        # initializer (because it has dependencies, or because there are
-        # multiple Tensors to restore), bail and let the general tracking code
-        # handle it.
-        return None
-    checkpoint_position = max(
-        deferred_dependencies_list,
-        key=lambda restore: restore.checkpoint.restore_uid)
-    return CheckpointInitialValue(
-        checkpoint_position=checkpoint_position, shape=shape)
-
-  def _track_checkpointable(self, checkpointable, name, overwrite=False):
-    """Declare a dependency on another `Checkpointable` object.
-
-    Indicates that checkpoints for this object should include variables from
-    `checkpointable`.
-
-    Variables in a checkpoint are mapped to `Checkpointable`s based on the names
-    provided when the checkpoint was written. To avoid breaking existing
-    checkpoints when modifying a class, neither variable names nor dependency
-    names (the names passed to `_track_checkpointable`) may change.
-
-    Args:
-      checkpointable: A `Checkpointable` which this object depends on.
-      name: A local name for `checkpointable`, used for loading checkpoints into
-        the correct objects.
-      overwrite: Boolean, whether silently replacing dependencies is OK. Used
-        for __setattr__, where throwing an error on attribute reassignment would
-        be inappropriate.
-
-    Returns:
-      `checkpointable`, for convenience when declaring a dependency and
-      assigning to a member variable in one statement.
-
-    Raises:
-      TypeError: If `checkpointable` does not inherit from `Checkpointable`.
-      ValueError: If another object is already tracked by this name.
-    """
-    self._maybe_initialize_checkpointable()
-    if not isinstance(checkpointable, Checkpointable):
-      raise TypeError(
-          ("Checkpointable._track_checkpointable() passed type %s, not a "
-           "Checkpointable.") % (type(checkpointable),))
-    new_reference = CheckpointableReference(name=name, ref=checkpointable)
-    current_object = self._lookup_dependency(name)
-    if (current_object is not None
-        and current_object is not checkpointable):
-      if not overwrite:
-        raise ValueError(
-            ("Called Checkpointable._track_checkpointable() with name='%s', "
-             "but a Checkpointable with this name is already declared as a "
-             "dependency. Names must be unique (or overwrite=True).") % (name,))
-      # This is a weird thing to do, but we're not going to stop people from
-      # using __setattr__.
-      for index, (old_name, _) in enumerate(
-          self._unconditional_checkpoint_dependencies):
-        if name == old_name:
-          self._unconditional_checkpoint_dependencies[index] = new_reference
-    elif current_object is None:
-      self._unconditional_checkpoint_dependencies.append(new_reference)
-      self._handle_deferred_dependencies(
-          name=name, checkpointable=checkpointable)
-    self._unconditional_dependency_names[name] = checkpointable
-    return checkpointable
-
-  def _handle_deferred_dependencies(self, name, checkpointable):
-    """Pop and load any deferred checkpoint restores into `checkpointable`.
-
-    This method does not add a new dependency on `checkpointable`, but it does
-    check if any outstanding/deferred dependencies have been queued waiting for
-    this dependency to be added (matched based on `name`). If so,
-    `checkpointable` and its dependencies are restored. The restorations are
-    considered fulfilled and so are deleted.
-
-    `_track_checkpointable` is more appropriate for adding a
-    normal/unconditional dependency, and includes handling for deferred
-    restorations. This method allows objects such as `Optimizer` to use the same
-    restoration logic while managing conditional dependencies themselves, by
-    overriding `_checkpoint_dependencies` and `_lookup_dependency` to change the
-    object's dependencies based on the context it is saved/restored in (a single
-    optimizer instance can have state associated with multiple graphs).
-
-    Args:
-      name: The name of the dependency within this object (`self`), used to
-        match `checkpointable` with values saved in a checkpoint.
-      checkpointable: The Checkpointable object to restore (inheriting from
-        `Checkpointable`).
-    """
-    self._maybe_initialize_checkpointable()
-    checkpointable._maybe_initialize_checkpointable()  # pylint: disable=protected-access
-    deferred_dependencies_list = self._deferred_dependencies.pop(name, ())
-    for checkpoint_position in sorted(
-        deferred_dependencies_list,
-        key=lambda restore: restore.checkpoint.restore_uid,
-        reverse=True):
-      checkpoint_position.restore(checkpointable)
-
-    # Pass on any name-based restores queued in this object.
-    for name_based_restore in sorted(
-        self._name_based_restores,
-        key=lambda checkpoint: checkpoint.restore_uid,
-        reverse=True):
-      checkpointable._name_based_attribute_restore(name_based_restore)  # pylint: disable=protected-access
-
-  def _restore_from_checkpoint_position(self, checkpoint_position):
-    """Restore this object and its dependencies (may be deferred)."""
-    # Attempt a breadth-first traversal, since presumably the user has more
-    # control over shorter paths. If we don't have all of the dependencies at
-    # this point, the end result is not breadth-first (since other deferred
-    # traversals will happen later).
-    visit_queue = collections.deque([checkpoint_position])
-    restore_ops = []
-    while visit_queue:
-      current_position = visit_queue.popleft()
-      restore_ops.extend(nest.flatten(
-          current_position.checkpointable  # pylint: disable=protected-access
-          ._single_restoration_from_checkpoint_position(
-              checkpoint_position=current_position,
-              visit_queue=visit_queue)))
-    return restore_ops
-
-  def _single_restoration_from_checkpoint_position(
-      self, checkpoint_position, visit_queue):
-    """Restore this object, and either queue its dependencies or defer them."""
-    self._maybe_initialize_checkpointable()
-    checkpoint = checkpoint_position.checkpoint
-    # If the UID of this restore is lower than our current update UID, we don't
-    # need to actually restore the object. However, we should pass the
-    # restoration on to our dependencies.
-    if checkpoint.restore_uid > self._update_uid:
-      restore_ops = checkpoint_position.restore_ops()
-      self._update_uid = checkpoint.restore_uid
-    else:
-      restore_ops = ()
-    for child in checkpoint_position.object_proto.children:
-      child_position = CheckpointPosition(
-          checkpoint=checkpoint,
-          proto_id=child.node_id)
-      local_object = self._lookup_dependency(child.local_name)
-      if local_object is None:
-        # We don't yet have a dependency registered with this name. Save it
-        # in case we do.
-        self._deferred_dependencies.setdefault(child.local_name, []).append(
-            child_position)
-      else:
-        if child_position.bind_object(checkpointable=local_object):
-          # This object's correspondence is new, so dependencies need to be
-          # visited. Delay doing it so that we get a breadth-first dependency
-          # resolution order (shallowest paths first). The caller is responsible
-          # for emptying visit_queue.
-          visit_queue.append(child_position)
-    return restore_ops
-
-  def _gather_saveables_for_checkpoint(self):
-    """Returns a dictionary of values to checkpoint with this object.
-
-    Keys in the returned dictionary are local to this object and in a separate
-    namespace from dependencies. Values may either be `SaveableObject` factories
-    or variables easily converted to `SaveableObject`s (as in `tf.train.Saver`'s
-    `var_list` constructor argument).
-
-    `SaveableObjects` have a name set, which Checkpointable needs to generate
-    itself. So rather than returning `SaveableObjects` directly, this method
-    should return a dictionary of callables which take `name` arguments and
-    return `SaveableObjects` with that name.
-
-    If this object may also be passed to the global-name-based `tf.train.Saver`,
-    the returned callables should have a default value for their name argument
-    (i.e. be callable with no arguments).
-
-    Returned values must be saved only by this object; if any value may be
-    shared, it should instead be a dependency. For example, variable objects
-    save their own values with the key `VARIABLE_VALUE_KEY`, but objects which
-    reference variables simply add a dependency.
-
-    Returns:
-      The dictionary mapping attribute names to `SaveableObject` factories
-      described above. For example:
-      {VARIABLE_VALUE_KEY:
-       lambda name="global_name_for_this_object":
-       SaveableObject(name=name, ...)}
-    """
-    if not hasattr(self, "get_config"):
-      return {}
-    try:
-      self.get_config()
-    except NotImplementedError:
-      return {}
-    weak_self = weakref.ref(self)
-    def _state_callback():
-      """Serializes `self.get_config()` for saving."""
-      dereferenced_self = weak_self()
-      if dereferenced_self:
-        try:
-          return json.dumps(dereferenced_self,
-                            default=serialization.get_json_type,
-                            sort_keys=True).encode("utf8")
-        except TypeError:
-          # Even if get_config worked objects may have produced garbage.
-          return ""
-      else:
-        return ""
-    return {OBJECT_CONFIG_JSON_KEY: functools.partial(
-        PythonStringStateSaveable,
-        state_callback=_state_callback)}
-
-  def _list_functions_for_serialization(self):
-    """Lists the functions of this checkpointable to serialize.
-
-    Internal sub-classes can override this with specific logic. E.g.
-    `AutoCheckpointable` provides an implementation that returns the `attr`
-    that return functions.
-
-    Returns:
-        A dictionary mapping attribute names to `Function` or
-        `ConcreteFunction`.
-    """
-    return dict()

diff --git a/tensorflow/python/training/checkpointable/base_test.py b/tensorflow/python/training/checkpointable/base_test.py
deleted file mode 100644
index 5863f5a..0000000
--- a/tensorflow/python/training/checkpointable/base_test.py
+++ /dev/null

@@ -1,102 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-import os
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.platform import test
-from tensorflow.python.training.checkpointable import base
-from tensorflow.python.training.checkpointable import util
-
-
-class InterfaceTests(test.TestCase):
-
-  def testOverwrite(self):
-    root = base.Checkpointable()
-    leaf = base.Checkpointable()
-    root._track_checkpointable(leaf, name="leaf")
-    (current_name, current_dependency), = root._checkpoint_dependencies
-    self.assertIs(leaf, current_dependency)
-    self.assertEqual("leaf", current_name)
-    duplicate_name_dep = base.Checkpointable()
-    with self.assertRaises(ValueError):
-      root._track_checkpointable(duplicate_name_dep, name="leaf")
-    root._track_checkpointable(duplicate_name_dep, name="leaf", overwrite=True)
-    (current_name, current_dependency), = root._checkpoint_dependencies
-    self.assertIs(duplicate_name_dep, current_dependency)
-    self.assertEqual("leaf", current_name)
-
-  def testAddVariableOverwrite(self):
-    root = base.Checkpointable()
-    a = root._add_variable_with_custom_getter(
-        name="v", shape=[], getter=variable_scope.get_variable)
-    self.assertEqual([root, a], util.list_objects(root))
-    with ops.Graph().as_default():
-      b = root._add_variable_with_custom_getter(
-          name="v", shape=[], overwrite=True,
-          getter=variable_scope.get_variable)
-      self.assertEqual([root, b], util.list_objects(root))
-    with ops.Graph().as_default():
-      with self.assertRaisesRegexp(
-          ValueError, "already declared as a dependency"):
-        root._add_variable_with_custom_getter(
-            name="v", shape=[], overwrite=False,
-            getter=variable_scope.get_variable)
-
-  def testAssertConsumedWithUnusedPythonState(self):
-    has_config = base.Checkpointable()
-    has_config.get_config = lambda: {}
-    saved = util.Checkpoint(obj=has_config)
-    save_path = saved.save(os.path.join(self.get_temp_dir(), "ckpt"))
-    restored = util.Checkpoint(obj=base.Checkpointable())
-    restored.restore(save_path).assert_consumed()
-
-  def testAssertConsumedFailsWithUsedPythonState(self):
-    has_config = base.Checkpointable()
-    attributes = {
-        "foo_attr": functools.partial(
-            base.PythonStringStateSaveable,
-            state_callback=lambda: "",
-            restore_callback=lambda x: None)}
-    has_config._gather_saveables_for_checkpoint = lambda: attributes
-    saved = util.Checkpoint(obj=has_config)
-    save_path = saved.save(os.path.join(self.get_temp_dir(), "ckpt"))
-    restored = util.Checkpoint(obj=base.Checkpointable())
-    status = restored.restore(save_path)
-    with self.assertRaisesRegexp(AssertionError, "foo_attr"):
-      status.assert_consumed()
-
-  def testBuggyGetConfig(self):
-
-    class NotSerializable(object):
-      pass
-
-    class GetConfigRaisesError(base.Checkpointable):
-
-      def get_config(self):
-        return NotSerializable()
-
-    util.Checkpoint(obj=GetConfigRaisesError()).save(
-        os.path.join(self.get_temp_dir(), "ckpt"))
-
-
-if __name__ == "__main__":
-  ops.enable_eager_execution()
-  test.main()

diff --git a/tensorflow/python/training/checkpointable/data_structures.py b/tensorflow/python/training/checkpointable/data_structures.py
deleted file mode 100644
index ae3ab3f..0000000
--- a/tensorflow/python/training/checkpointable/data_structures.py
+++ /dev/null

@@ -1,842 +0,0 @@
-"""Checkpointable data structures."""
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import copy
-import operator
-import sys
-
-import six
-
-from tensorflow.python.eager import def_function
-from tensorflow.python.eager import function as defun
-from tensorflow.python.ops import variables
-from tensorflow.python.saved_model import revived_types
-from tensorflow.python.training.checkpointable import base
-from tensorflow.python.training.checkpointable import layer_utils
-
-
-class NoDependency(object):
-  """Allows attribute assignment to `Checkpointable` objects with no dependency.
-
-  Example usage:
-  ```python
-  obj = Checkpointable()
-  obj.has_dependency = tf.Variable(0., name="dep")
-  obj.no_dependency = NoDependency(tf.Variable(1., name="nodep"))
-  assert obj.no_dependency.name == "nodep:0"
-  ```
-
-  `obj` in this example has a dependency on the variable "dep", and both
-  attributes contain un-wrapped `Variable` objects.
-
-  `NoDependency` also works with `tf.keras.Model`, but only for checkpoint
-  dependencies: wrapping a `Layer` in `NoDependency` will assign the (unwrapped)
-  `Layer` to the attribute without a checkpoint dependency, but the `Model` will
-  still track the `Layer` (so it will appear in `Model.layers`, and its
-  variables will appear in `Model.variables`).
-  """
-
-  def __init__(self, value):
-    self.value = value
-
-
-def _wrap_or_unwrap(value):
-  """Wraps basic data structures, unwraps NoDependency objects."""
-  if isinstance(value, NoDependency):
-    return value.value
-  if isinstance(value, base.Checkpointable):
-    return value  # Skip conversion for already checkpointable objects.
-  elif isinstance(value, dict):
-    return _DictWrapper(value)
-  elif isinstance(value, list):
-    return _ListWrapper(value)
-  else:
-    return value
-  # TODO(allenl): Handle other common data structures. Tuples will require
-  # special casing (tuple subclasses are not weak referenceable, so replacement
-  # with a wrapper that subclasses tuple on attribute assignment works poorly,
-  # and replacement with a wrapper that isn't a tuple is also problematic),
-  # probably a tree traversal where the leaves are non-tuples(/namedtuples) to
-  # come up with names. Dictionaries should look like lists.
-
-
-def sticky_attribute_assignment(checkpointable, name, value):
-  """Adds dependencies, generally called from __setattr__.
-
-  This behavior is shared between Checkpointable and Model.
-
-  Respects NoDependency indicators, but otherwise makes checkpointable objects
-  out of common data structures and tracks objects by their attribute names.
-
-  Args:
-    checkpointable: The object to add dependencies to (generally the one having
-      an attribute assigned).
-    name: The attribute name being assigned.
-    value: The value being assigned. Not necessarily a checkpointable object.
-
-  Returns:
-    The value which should be stored in the attribute (unwrapped from a
-    NoDependency object if necessary).
-  """
-  if isinstance(value, NoDependency):
-    add_dependency = False
-  else:
-    add_dependency = True
-  value = _wrap_or_unwrap(value)
-  if not add_dependency:
-    return value
-  if isinstance(value, base.Checkpointable):
-    checkpointable._track_checkpointable(  # pylint: disable=protected-access
-        value, name=name,
-        # Allow the user to switch the Checkpointable which is tracked by this
-        # name, since assigning a new variable to an attribute has
-        # historically been fine (e.g. Adam did this).
-        overwrite=True)
-  return value
-
-
-class CheckpointableDataStructure(base.Checkpointable):
-  """Base class for data structures which contain checkpointable objects."""
-
-  def __init__(self):
-    self.trainable = True
-    self._extra_variables = []
-
-  def _track_value(self, value, name):
-    """Add a dependency on `value`."""
-    value = sticky_attribute_assignment(
-        checkpointable=self, value=value, name=name)
-    if isinstance(value, variables.Variable):
-      self._extra_variables.append(value)
-    if not isinstance(value, base.Checkpointable):
-      raise ValueError(
-          ("Only checkpointable objects (such as Layers or Optimizers) may be "
-           "stored in a List object. Got %s, which does not inherit from "
-           "Checkpointable.") % (value,))
-    if hasattr(value, "_use_resource_variables"):
-      # In subclassed models, legacy layers (tf.layers) must always use
-      # resource variables.
-      value._use_resource_variables = True  # pylint: disable=protected-access
-    return value
-
-  @property
-  def _values(self):
-    """An iterable/sequence which may contain checkpointable objects."""
-    raise NotImplementedError("Abstract method")
-
-  @property
-  def _layers(self):
-    """All Layers and Layer containers, including empty containers."""
-    # Filter objects on demand so that wrapper objects use values from the thing
-    # they're wrapping if out of sync.
-    collected = []
-    for obj in self._values:
-      if (isinstance(obj, CheckpointableDataStructure)
-          or layer_utils.is_layer(obj)
-          or layer_utils.has_weights(obj)):
-        collected.append(obj)
-    return collected
-
-  @property
-  def layers(self):
-    return layer_utils.filter_empty_layer_containers(self._layers)
-
-  @property
-  def trainable_weights(self):
-    return layer_utils.gather_trainable_weights(
-        trainable=self.trainable,
-        sub_layers=self._layers,
-        extra_variables=self._extra_variables)
-
-  @property
-  def non_trainable_weights(self):
-    return layer_utils.gather_non_trainable_weights(
-        trainable=self.trainable,
-        sub_layers=self._layers,
-        extra_variables=self._extra_variables)
-
-  @property
-  def weights(self):
-    return self.trainable_weights + self.non_trainable_weights
-
-  @property
-  def trainable_variables(self):
-    return self.trainable_weights
-
-  @property
-  def non_trainable_variables(self):
-    return self.non_trainable_weights
-
-  @property
-  def variables(self):
-    return self.weights
-
-  @property
-  def updates(self):
-    """Aggregate updates from any `Layer` instances."""
-    # Updates and conditional losses are forwarded as-is rather than being
-    # filtered based on inputs, since this is just a container and won't ever
-    # have any inputs.
-    aggregated = []
-    for layer in self.layers:
-      if hasattr(layer, "updates"):
-        aggregated += layer.updates
-    return aggregated
-
-  @property
-  def losses(self):
-    """Aggregate losses from any `Layer` instances."""
-    aggregated = []
-    for layer in self.layers:
-      if hasattr(layer, "losses"):
-        aggregated += layer.losses
-    return aggregated
-
-  def __hash__(self):
-    # Support object-identity hashing, so these structures can be used as keys
-    # in sets/dicts.
-    return id(self)
-
-  def __eq__(self, other):
-    # Similar to Tensors, checkpointable data structures use object-identity
-    # equality to support set/dict membership.
-    return self is other
-
-
-class List(CheckpointableDataStructure, collections.Sequence):
-  """An append-only sequence type which is checkpointable.
-
-  Maintains checkpoint dependencies on its contents (which must also be
-  checkpointable), and forwards any `Layer` metadata such as updates and losses.
-
-  Note that `List` is purely a container. It lets a `tf.keras.Model` or
-  other checkpointable object know about its contents, but does not call any
-  `Layer` instances which are added to it. To indicate a sequence of `Layer`
-  instances which should be called sequentially, use `tf.keras.Sequential`.
-
-  Example usage:
-  ```python
-  class HasList(tf.keras.Model):
-
-    def __init__(self):
-      super(HasList, self).__init__()
-      self.layer_list = tf.contrib.checkpoint.List([layers.Dense(3)])
-      self.layer_list.append(layers.Dense(4))
-
-    def call(self, x):
-      aggregation = 0.
-      for l in self.layer_list:
-        x = l(x)
-        aggregation += tf.reduce_sum(x)
-      return aggregation
-  ```
-
-  This kind of wrapping is necessary because `Checkpointable` objects do not
-  (yet) deeply inspect regular Python data structures, so for example assigning
-  a regular list (`self.layer_list = [layers.Dense(3)]`) does not create a
-  checkpoint dependency and does not add the `Layer` instance's weights to its
-  parent `Model`.
-  """
-
-  def __init__(self, *args, **kwargs):
-    """Construct a new sequence. Arguments are passed to `list()`."""
-    super(List, self).__init__()
-    self._storage = self._make_storage(*args, **kwargs)
-    for index, element in enumerate(self._storage):
-      self._storage[index] = self._track_value(
-          element, name=self._name_element(index))
-
-  def copy(self):
-    return type(self)(copy.copy(self._storage))
-
-  def __copy__(self):
-    return self.copy()
-
-  def __deepcopy__(self, memo):
-    return type(self)(copy.deepcopy(self._storage, memo))
-
-  def _make_storage(self, *args, **kwargs):
-    """Determines the backing storage (overridden in subclasses)."""
-    return list(*args, **kwargs)
-
-  def _name_element(self, index):
-    return "%d" % (index,)
-
-  @property
-  def _values(self):
-    return self
-
-  def append(self, value):
-    """Add a new checkpointable value."""
-    value = self._track_value(value, self._name_element(len(self._storage)))
-    self._storage.append(value)
-
-  def extend(self, values):
-    """Add a sequence of checkpointable values."""
-    for value in values:
-      self.append(value)
-
-  def __iadd__(self, values):
-    self.extend(values)
-    return self
-
-  def __add__(self, other):
-    return self.__class__(self._storage + getattr(other, "_storage", other))
-
-  def __imul__(self, y):
-    if y <= 0:
-      raise ValueError(
-          "List only supports append, multiplying in place by %d removes "
-          "elements." % y)
-
-    n = len(self._storage)
-    for _ in range(y - 1):
-      for i in range(n):
-        self.append(self._storage[i])
-
-    return self
-
-  def __mul__(self, n):
-    return self.__class__(self._storage * n)
-
-  def __rmul__(self, n):
-    return self * n
-
-  def __radd__(self, other):
-    return self + other
-
-  def __getitem__(self, key):
-    return self._storage[key]
-
-  def __getslice__(self, i, j):
-    return self._storage[slice(i, j)]
-
-  def __len__(self):
-    return len(self._storage)
-
-  def __repr__(self):
-    return "List(%s)" % (repr(self._storage),)
-
-  def __sizeof__(self):
-    return super(List, self).__sizeof__() + sys.getsizeof(self._storage)
-
-
-# TODO(tomhennigan) Update to collections.UserList?
-class _ListWrapper(List, collections.MutableSequence,
-                   # Shadowed, but there for isinstance checks.
-                   list):
-  """Wraps the built-in `list` to support restore-on-create for variables.
-
-  Unlike `List`, this sequence type is mutable in the same ways built-in lists
-  are. Instead of throwing an error immediately like `List`, it records
-  problematic mutations (e.g. assigning a new element to a position already
-  occupied, meaning both elements get the same names at different times) and
-  refuses to save.
-
-  On assignment to an attribute of a Model or Checkpointable object, Python
-  lists are replaced with _ListWrapper. Wrapping a list in a
-  `tf.contrib.checkpoint.NoDependency` object prevents this.
-  """
-
-  def __init__(self, wrapped_list):
-    """Construct a new list wrapper.
-
-    Args:
-      wrapped_list: The initial value of the data structure. A shallow copy may
-        be maintained for error checking. `wrapped_list` itself should not be
-        modified directly after constructing the `_ListWrapper`, and if changes
-        are detected the `_ListWrapper` will throw an exception on save.
-    """
-    # Monotonic flags which indicate this object would not be restored properly,
-    # and therefore should throw an error on save to avoid giving the impression
-    # that restoring it will work.
-    self._non_append_mutation = False
-    self._external_modification = False
-    super(_ListWrapper, self).__init__(wrapped_list)
-    self._last_wrapped_list_snapshot = list(self._storage)
-
-  # pylint: disable=protected-access
-  def __copy__(self):
-    copied = super(_ListWrapper, self).__copy__()
-    copied._non_append_mutation = self._non_append_mutation
-    copied._external_modification = self._external_modification
-    return copied
-
-  def __deepcopy__(self, memo):
-    copied = super(_ListWrapper, self).__deepcopy__(memo)
-    copied._non_append_mutation = self._non_append_mutation
-    copied._external_modification = self._external_modification
-    return copied
-  # pylint: enable=protected-access
-
-  def _make_storage(self, wrapped_list):
-    """Use the user's original list for storage."""
-    return wrapped_list
-
-  def _check_external_modification(self):
-    """Checks for any changes to the wrapped list not through the wrapper."""
-    if self._external_modification or self._non_append_mutation:
-      return
-    if self._storage != self._last_wrapped_list_snapshot:
-      self._external_modification = True
-      self._last_wrapped_list_snapshot = None
-
-  def _update_snapshot(self):
-    """Acknowledges tracked changes to the wrapped list."""
-    if self._external_modification or self._non_append_mutation:
-      return
-    self._last_wrapped_list_snapshot = list(self._storage)
-
-  @property
-  def _checkpoint_dependencies(self):
-    self._check_external_modification()
-    if self._non_append_mutation:
-      raise ValueError(
-          ("Unable to save the object %s (a list wrapper constructed to track "
-           "checkpointable TensorFlow objects). A list element was replaced "
-           "(__setitem__, __setslice__), deleted (__delitem__, __delslice__), "
-           "or moved (sort). In order to support restoration on object "
-           "creation, tracking is exclusively for append-only data structures."
-           "\n\nIf you don't need this list checkpointed, wrap it in a "
-           "tf.contrib.checkpoint.NoDependency object; it will be "
-           "automatically un-wrapped and subsequently ignored." % (self,)))
-    if self._external_modification:
-      raise ValueError(
-          ("Unable to save the object %s (a list wrapper constructed to track "
-           "checkpointable TensorFlow objects). The wrapped list was modified "
-           "outside the wrapper (its final value was %s, its value when a "
-           "checkpoint dependency was added was %s), which breaks restoration "
-           "on object creation.\n\nIf you don't need this list checkpointed, "
-           "wrap it in a tf.contrib.checkpoint.NoDependency object; it will be "
-           "automatically un-wrapped and subsequently ignored." % (
-               self, self._storage, self._last_wrapped_list_snapshot)))
-    return super(_ListWrapper, self)._checkpoint_dependencies
-
-  def __delitem__(self, key):
-    self._non_append_mutation = True
-    del self._storage[key]
-
-  def __setitem__(self, key, value):
-    self._check_external_modification()
-
-    if isinstance(key, slice):
-      # Note: this is quite inefficient, but the list API supports a broad range
-      # of slice setters (e.g. truncate, extend, replace) and immitating this
-      # for a range of Python versions is non-trivial.
-      storage_copy = list(self._storage)
-      self._storage[key] = value
-
-      len_before = len(storage_copy)
-      len_now = len(self._storage)
-      for i in range(max(len_before, len_now)):
-        value_now = self._storage[i] if i < len_now else None
-        value_before = storage_copy[i] if i < len_before else None
-
-        if isinstance(value_before, base.Checkpointable):
-          self._non_append_mutation = True
-
-        if value_now is not None and value_now != value_before:
-          self._storage[i] = self._track_value(self._storage[i],
-                                               self._name_element(i))
-
-    else:
-      if isinstance(self._storage[key], base.Checkpointable):
-        self._non_append_mutation = True
-      self._storage[key] = self._track_value(value, self._name_element(key))
-
-    self._update_snapshot()
-
-  def append(self, value):
-    """Add a new checkpointable value."""
-    self._check_external_modification()
-    super(_ListWrapper, self).append(value)
-    self._update_snapshot()
-
-  def extend(self, values):
-    """Add a sequence of checkpointable values."""
-    self._check_external_modification()
-    super(_ListWrapper, self).extend(values)
-    self._update_snapshot()
-
-  def __eq__(self, other):
-    return self._storage == getattr(other, "_storage", other)
-
-  def __ne__(self, other):
-    return self._storage != getattr(other, "_storage", other)
-
-  def __lt__(self, other):
-    return self._storage < getattr(other, "_storage", other)
-
-  def __le__(self, other):
-    return self._storage <= getattr(other, "_storage", other)
-
-  def __gt__(self, other):
-    return self._storage > getattr(other, "_storage", other)
-
-  def __ge__(self, other):
-    return self._storage >= getattr(other, "_storage", other)
-
-  def __hash__(self):
-    # List wrappers need to compare like regular lists, and so like regular
-    # lists they don't belong in hash tables.
-    raise TypeError("unhashable type: 'ListWrapper'")
-
-  def insert(self, index, obj):
-    self._non_append_mutation = True
-    self._storage.insert(index, obj)
-
-  def sort(self):
-    self._non_append_mutation = True
-    self._storage.sort()
-
-  def __setslice__(self, i, j, y):
-    self.__setitem__(slice(i, j), y)
-
-  def __delslice__(self, i, j):
-    self._non_append_mutation = True
-    del self._storage[slice(i, j)]
-
-  def _track_value(self, value, name):
-    """Allows storage of non-checkpointable objects."""
-    try:
-      value = super(_ListWrapper, self)._track_value(value=value, name=name)
-    except ValueError:
-      # Even if this value isn't checkpointable, we need to make sure
-      # NoDependency objects get unwrapped.
-      value = sticky_attribute_assignment(
-          checkpointable=self, value=value, name=name)
-    return value
-
-  def __repr__(self):
-    return "ListWrapper(%s)" % (repr(self._storage),)
-
-  def _list_functions_for_serialization(self):
-    return {
-        str(key): value for key, value in enumerate(self)
-        if _is_function(value)
-    }
-
-
-class Mapping(CheckpointableDataStructure, collections.Mapping):
-  """An append-only checkpointable mapping data structure with string keys.
-
-  Maintains checkpoint dependencies on its contents (which must also be
-  checkpointable), named based on its keys.
-
-  Note that once a key has been added, it may not be deleted or replaced. If
-  names may not be unique, see `tf.contrib.checkpoint.UniqueNameTracker`.
-  """
-
-  def __init__(self, *args, **kwargs):
-    """Construct a new sequence. Arguments are passed to `dict()`."""
-    super(Mapping, self).__init__()
-    self._storage = self._make_storage(*args, **kwargs)
-    self._storage.update(
-        {key: self._track_value(
-            value, name=self._name_element(key))
-         for key, value in self._storage.items()})
-
-  def __copy__(self):
-    return type(self)(copy.copy(self._storage))
-
-  def __deepcopy__(self, memo):
-    return type(self)(copy.deepcopy(self._storage, memo))
-
-  def _make_storage(self, *args, **kwargs):
-    return dict(*args, **kwargs)
-
-  @property
-  def _values(self):
-    # Sort items deterministically by key
-    ordered = list(zip(*sorted(self.items(), key=lambda it: it[0])))
-    if ordered:
-      return ordered[1]
-    return []
-
-  def _name_element(self, key):
-    if not isinstance(key, six.string_types):
-      raise TypeError(
-          "Mapping accepts only string keys, but got a key %s."
-          % repr(key))
-    return str(key)
-
-  def __setitem__(self, key, value):
-    name = self._name_element(key)
-    value = self._track_value(value, name=name)
-    current_value = self._storage.setdefault(key, value)
-    if current_value is not value:
-      raise ValueError(
-          ("Mappings are an append-only data structure. Tried to overwrite the "
-           "key '%s' with value %s, but it already contains %s")
-          % (key, value, current_value))
-
-  def update(self, *args, **kwargs):
-    for key, value in dict(*args, **kwargs).items():
-      self[key] = value
-
-  def __getitem__(self, key):
-    return self._storage[key]
-
-  def __len__(self):
-    return len(self._storage)
-
-  def __repr__(self):
-    return "Mapping(%s)" % (repr(self._storage),)
-
-  def __iter__(self):
-    return iter(self._storage)
-
-
-# Unlike _ListWrapper, having _DictWrapper inherit from dict and pass isinstance
-# checks seems infeasible. CPython will not call Python methods/properties on
-# dictionary subclasses when running e.g. {}.update(dict_subclass), and instead
-# collects elements directly from dict_subclass's C structs. So subclassing dict
-# implies that the storage has to be "self" (i.e. the C structs for the object
-# must be updated correctly), but we also need that storage to be the wrapped
-# dictionary to avoid synchronization bugs (un-tracked external modifications
-# should still show up when the dict is accessed through the wrapper). Monkey
-# patching all of the "wrapped" dict's methods instead of creating a wrapper
-# object is an option, but not a very attractive one (replacing methods without
-# creating reference cycles is difficult, and then dicts would need to be
-# special cased everywhere as being checkpointable).
-class _DictWrapper(Mapping, collections.MutableMapping):
-  """Wraps built-in dicts to support restore-on-create for variables.
-
-  _DictWrapper is to Mapping as _ListWrapper is to List. Unlike Mapping,
-  _DictWrapper allows non-string keys and values and arbitrary mutations (delete
-  keys, reassign values). Like _ListWrapper, these mutations mean that
-  _DictWrapper will raise an exception on save.
-  """
-
-  def __new__(cls, *args):
-    if len(args) == 1 and isinstance(args[0], dict):
-      return super(_DictWrapper, cls).__new__(cls)
-    else:
-      # Allow construction from a sequence, e.g. for nest.pack_sequence_as. In
-      # this case there's nothing to wrap, so we make a normal dictionary. Also
-      # allows constructing empty instances of the _DictWrapper type, as Session
-      # is wont to do (and again there's nothing to wrap, so a normal dictionary
-      # makes more sense).
-      return dict(*args)
-
-  def __init__(self, wrapped_dict):
-    self._non_string_key = False
-    self._non_append_mutation = False
-    self._external_modification = False
-    super(_DictWrapper, self).__init__(wrapped_dict)
-    self._update_snapshot()
-
-  # pylint: disable=protected-access
-  def __copy__(self):
-    copied = super(_DictWrapper, self).__copy__()
-    copied._non_append_mutation = self._non_append_mutation
-    copied._external_modification = self._external_modification
-    copied._non_string_key = self._non_string_key
-    return copied
-
-  def __deepcopy__(self, memo):
-    copied = super(_DictWrapper, self).__deepcopy__(memo)
-    copied._non_append_mutation = self._non_append_mutation
-    copied._external_modification = self._external_modification
-    copied._non_string_key = self._non_string_key
-    return copied
-  # pylint: enable=protected-access
-
-  def _make_storage(self, wrapped_dict):
-    """Re-use the wrapped dict for storage (to force them to be in sync)."""
-    return wrapped_dict
-
-  @property
-  def _checkpoint_dependencies(self):
-    """Check that the object is saveable before listing its dependencies."""
-    self._check_external_modification()
-    if self._non_string_key:
-      raise ValueError(
-          "Unable to save the object %s (a dictionary wrapper constructed "
-          "automatically on attribute assignment). The wrapped dictionary "
-          "contains a non-string key which maps to a checkpointable object or "
-          "mutable data structure.\n\nIf you don't need this dictionary "
-          "checkpointed, wrap it in a tf.contrib.checkpoint.NoDependency "
-          "object; it will be automatically un-wrapped and subsequently "
-          "ignored." % (self,))
-    if self._non_append_mutation:
-      raise ValueError(
-          "Unable to save the object %s (a dictionary wrapper constructed "
-          "automatically on attribute assignment). A key mapping to a "
-          "checkpointable object was overwritten or deleted, which would "
-          "cause problems for restoration.\n\nIf you don't need this "
-          "dictionary checkpointed, wrap it in a "
-          "tf.contrib.checkpoint.NoDependency object; it will be automatically "
-          "un-wrapped and subsequently ignored." % (self,))
-    if self._external_modification:
-      raise ValueError(
-          "Unable to save the object %s (a dictionary wrapper constructed "
-          "automatically on attribute assignment). The wrapped dictionary was "
-          "modified outside the wrapper (its final value was %s, its value "
-          "when a checkpoint dependency was added was %s), which breaks "
-          "restoration on object creation.\n\nIf you don't need this "
-          "dictionary checkpointed, wrap it in a "
-          "tf.contrib.checkpoint.NoDependency object; it will be automatically "
-          "un-wrapped and subsequently ignored." % (
-              self, self, self._last_wrapped_dict_snapshot))
-    assert not self._dirty  # Any reason for dirtiness should have an exception.
-    return super(_DictWrapper, self)._checkpoint_dependencies
-
-  @property
-  def _dirty(self):
-    """Check if there has already been a mutation which prevents saving."""
-    return (self._external_modification
-            or self._non_append_mutation
-            or self._non_string_key)
-
-  def _check_external_modification(self):
-    """Checks for any changes to the wrapped dict not through the wrapper."""
-    if self._dirty:
-      return
-    if self != self._last_wrapped_dict_snapshot:
-      self._external_modification = True
-      self._last_wrapped_dict_snapshot = None
-
-  def _update_snapshot(self):
-    """Acknowledges tracked changes to the wrapped dict."""
-    if self._dirty:
-      return
-    self._last_wrapped_dict_snapshot = dict(self)
-
-  def _track_value(self, value, name):
-    """Allows storage of non-checkpointable objects."""
-    if isinstance(name, six.string_types):
-      string_key = True
-    else:
-      name = "-non_string_key"
-      string_key = False
-    try:
-      no_dependency = isinstance(value, NoDependency)
-      value = super(_DictWrapper, self)._track_value(value=value, name=name)
-      if not (string_key or no_dependency):
-        # A non-string key maps to a checkpointable value. This data structure
-        # is not saveable.
-        self._non_string_key = True
-      return value
-    except ValueError:
-      # Even if this value isn't checkpointable, we need to make sure
-      # NoDependency objects get unwrapped.
-      return sticky_attribute_assignment(
-          checkpointable=self, value=value, name=name)
-
-  def _name_element(self, key):
-    """Don't throw errors for non-string keys."""
-    if isinstance(key, six.string_types):
-      return super(_DictWrapper, self)._name_element(key)
-    else:
-      return key
-
-  def __setitem__(self, key, value):
-    """Allow any modifications, but possibly mark the wrapper as unsaveable."""
-    self._check_external_modification()
-    no_dep = isinstance(value, NoDependency)
-    if isinstance(key, six.string_types):
-      existing_dependency = self._lookup_dependency(key)
-      value = self._track_value(value, name=key)
-    else:
-      value = _wrap_or_unwrap(value)
-      existing_dependency = None
-      if not no_dep and isinstance(value, base.Checkpointable):
-        # Non-string keys are OK as long as we have no reason to add a
-        # dependency on the value (either because the value is not
-        # checkpointable, or because it was wrapped in a NoDependency object).
-        self._non_string_key = True
-    current_value = self._storage.setdefault(key, value)
-    if current_value is not value:
-      if ((not no_dep and isinstance(value, base.Checkpointable))
-          # We don't want to just check that the existing object is
-          # checkpointable, since it may have been wrapped in a NoDependency
-          # object.
-          or existing_dependency is not None):
-        # A checkpointable object was replaced under the same key; this means
-        # that restoring would be error-prone, so we'll throw an exception on
-        # save.
-        self._non_append_mutation = True
-      self._storage[key] = value
-
-    self._update_snapshot()
-
-  def __delitem__(self, key):
-    self._check_external_modification()
-    existing_value = self[key]
-    if isinstance(existing_value, base.Checkpointable):
-      # Deleting tracked checkpointable values means restoring is problematic,
-      # so we'll throw an exception on save.
-      self._non_append_mutation = True
-    del self._storage[key]
-    self._update_snapshot()
-
-  def __repr__(self):
-    return "DictWrapper(%s)" % (repr(self._storage),)
-
-  def __hash__(self):
-    raise TypeError("unhashable type: 'DictWrapper'")
-
-  def __eq__(self, other):
-    return self._storage == getattr(other, "_storage", other)
-
-  def update(self, *args, **kwargs):
-    for key, value in dict(*args, **kwargs).items():
-      self[key] = value
-
-  def _list_functions_for_serialization(self):
-    return {
-        key: value for key, value in self.items()
-        if _is_function(value)
-    }
-
-
-def _is_function(x):
-  return isinstance(x, (def_function.Function, defun.ConcreteFunction))
-
-revived_types.register_revived_type(
-    "checkpointable_dict_wrapper",
-    lambda obj: isinstance(obj, _DictWrapper),
-    versions=[revived_types.VersionedTypeRegistration(
-        # Standard dependencies are enough to reconstruct the checkpointable
-        # items in dictionaries, so we don't need to save any extra information.
-        object_factory=lambda proto: _DictWrapper({}),
-        version=1,
-        min_producer_version=1,
-        min_consumer_version=1,
-        setter=operator.setitem)])
-
-
-def _set_list_item(list_object, index_string, value):
-  item_index = int(index_string)
-  if len(list_object) <= item_index:
-    list_object.extend([None] * (1 + item_index - len(list_object)))
-  list_object[item_index] = value
-
-
-revived_types.register_revived_type(
-    "checkpointable_list_wrapper",
-    lambda obj: isinstance(obj, _ListWrapper),
-    versions=[revived_types.VersionedTypeRegistration(
-        object_factory=lambda proto: _ListWrapper([]),
-        version=1,
-        min_producer_version=1,
-        min_consumer_version=1,
-        setter=_set_list_item)])

diff --git a/tensorflow/python/training/checkpointable/data_structures_test.py b/tensorflow/python/training/checkpointable/data_structures_test.py
deleted file mode 100644
index 7204587..0000000
--- a/tensorflow/python/training/checkpointable/data_structures_test.py
+++ /dev/null

@@ -1,704 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-import os
-
-import numpy
-import six
-
-from tensorflow.python.eager import context
-from tensorflow.python.eager import test
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import test_util
-from tensorflow.python.keras.engine import training
-from tensorflow.python.keras.layers import core
-from tensorflow.python.keras.layers import normalization
-from tensorflow.python.layers import core as non_keras_core
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.training.checkpointable import data_structures
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util
-
-
-class HasList(training.Model):
-
-  def __init__(self):
-    super(HasList, self).__init__()
-    self.layer_list = data_structures.List([core.Dense(3)])
-    self.layer_list.append(core.Dense(4))
-    self.layer_list.extend(
-        [core.Dense(5),
-         core.Dense(6, kernel_regularizer=math_ops.reduce_sum)])
-    self.layer_list += [
-        core.Dense(7, bias_regularizer=math_ops.reduce_sum),
-        core.Dense(8)
-    ]
-    self.layer_list += (
-        data_structures.List([core.Dense(9)]) + data_structures.List(
-            [core.Dense(10)]))
-    self.layer_list.extend(
-        data_structures.List(
-            list(sequence=[core.Dense(11)]) + [core.Dense(12)]))
-    self.layers_with_updates = data_structures.List(
-        sequence=(normalization.BatchNormalization(),))
-
-  def call(self, x):
-    aggregation = 0.
-    for l in self.layer_list:
-      x = l(x)
-      aggregation += math_ops.reduce_sum(x)
-    bn, = self.layers_with_updates
-    return bn(x) / aggregation
-
-
-class ListTests(test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes
-  @test_util.run_v1_only("b/120545219")
-  def testTracking(self):
-    model = HasList()
-    output = model(array_ops.ones([32, 2]))
-    self.assertAllEqual([32, 12], output.shape)
-    self.assertEqual(11, len(model.layers))
-    self.assertEqual(10, len(model.layer_list.layers))
-    six.assertCountEqual(
-        self,
-        model.layers,
-        model.layer_list.layers + model.layers_with_updates)
-    for index in range(10):
-      self.assertEqual(3 + index, model.layer_list.layers[index].units)
-    self.assertEqual(2, len(model._checkpoint_dependencies))
-    self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref)
-    self.assertIs(model.layers_with_updates,
-                  model._checkpoint_dependencies[1].ref)
-    self.assertEqual(
-        10, len(model._checkpoint_dependencies[0].ref._checkpoint_dependencies))
-    self.evaluate([v.initializer for v in model.variables])
-    self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]]))
-    save_path = os.path.join(self.get_temp_dir(), "ckpt")
-    model.save_weights(save_path)
-    self.evaluate(model.variables[0].assign(array_ops.zeros([2, 3])))
-    model.load_weights(save_path)
-    self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]],
-                        self.evaluate(model.variables[0]))
-    v = variables.Variable(1.)
-    model.var_list = [v]
-    self.assertIn(v, model.variables)
-    self.assertIn(v, model.trainable_variables)
-    self.assertNotIn(v, model.non_trainable_variables)
-
-  @test_util.run_v1_only("b/120545219")
-  def testUpdatesForwarded(self):
-    with context.graph_mode():
-      model = HasList()
-      model_input = array_ops.ones([32, 2])
-      model(model_input)
-      self.assertGreater(len(model.layers_with_updates[0].updates), 0)
-      self.assertEqual(set(model.layers_with_updates[0].updates),
-                       set(model.updates))
-
-    with context.eager_mode():
-      model = HasList()
-      model_input = array_ops.ones([32, 2])
-      model(model_input)
-      self.assertEqual(0, len(model.updates))
-
-  @test_util.run_in_graph_and_eager_modes
-  @test_util.run_v1_only("b/120545219")
-  def testLossesForwarded(self):
-    model = HasList()
-    model_input = array_ops.ones([32, 2])
-    model(model_input)
-    self.assertEqual(2, len(model.losses))
-
-  def testModelContainersCompareEqual(self):
-    class HasEqualContainers(training.Model):
-
-      def __init__(self):
-        super(HasEqualContainers, self).__init__()
-        self.l1 = []
-        self.l2 = []
-
-    model = HasEqualContainers()
-    first_layer = HasEqualContainers()
-    model.l1.append(first_layer)
-    second_layer = HasEqualContainers()
-    model.l2.append(second_layer)
-    self.assertEqual([first_layer, second_layer], model.layers)
-
-  def testNotCheckpointable(self):
-    class NotCheckpointable(object):
-      pass
-
-    with self.assertRaises(ValueError):
-      data_structures.List([NotCheckpointable()])
-
-  def testCallNotImplemented(self):
-    with self.assertRaisesRegexp(TypeError, "not callable"):
-      data_structures.List()(1.)
-
-  def testNoPop(self):
-    with self.assertRaises(AttributeError):
-      data_structures.List().pop()
-
-  @test_util.run_in_graph_and_eager_modes
-  def testTensorConversion(self):
-
-    class ListToTensor(training.Model):
-
-      def __init__(self):
-        super(ListToTensor, self).__init__()
-        self.l = [1., 2., 3.]
-
-    self.assertAllEqual(
-        [1., 2., 3.],
-        self.evaluate(constant_op.constant(ListToTensor().l)))
-
-    self.assertAllEqual(
-        [1., 2., 3.],
-        self.evaluate(array_ops.pack(ListToTensor().l)))
-
-  def testNesting(self):
-    with context.graph_mode():
-      inner = data_structures.List()
-      outer = data_structures.List([inner])
-      inner.append(non_keras_core.Dense(1))
-      inner[0](array_ops.ones([2, 3]))
-      self.assertEqual(2, len(outer.variables))
-      self.assertIsInstance(
-          outer.variables[0],
-          resource_variable_ops.ResourceVariable)
-
-  def testNonLayerVariables(self):
-    v = resource_variable_ops.ResourceVariable([1.])
-    l = data_structures.List([v])
-    self.assertTrue(l.trainable)
-    self.assertEqual([], l.layers)
-    self.assertEqual([v], l.variables)
-    self.assertEqual([v], l.trainable_weights)
-    self.assertEqual([], l.non_trainable_variables)
-    l.trainable = False
-    self.assertEqual([v], l.variables)
-    self.assertEqual([], l.trainable_variables)
-    self.assertEqual([v], l.non_trainable_variables)
-    l.trainable = True
-    v2 = resource_variable_ops.ResourceVariable(1., trainable=False)
-    l.append(v2)
-    self.assertEqual([v, v2], l.weights)
-    self.assertEqual([v], l.trainable_weights)
-    self.assertEqual([v2], l.non_trainable_weights)
-
-  def testCopy(self):
-    v1 = resource_variable_ops.ResourceVariable(1.)
-    v2 = resource_variable_ops.ResourceVariable(1.)
-    v3 = resource_variable_ops.ResourceVariable(1.)
-
-    l1 = data_structures.List([v1, v2])
-    l2 = l1.copy()
-    l2.append(v3)
-    self.assertEqual(list(l1), [v1, v2])
-    self.assertEqual(list(l2), [v1, v2, v3])
-
-  def testSlicing(self):
-    v1 = resource_variable_ops.ResourceVariable(1.)
-    v2 = resource_variable_ops.ResourceVariable(1.)
-    v3 = resource_variable_ops.ResourceVariable(1.)
-    v4 = resource_variable_ops.ResourceVariable(1.)
-
-    l = data_structures.List([v1, v2, v3, v4])
-    self.assertEqual(l[1:], [v2, v3, v4])
-    self.assertEqual(l[1:-1], [v2, v3])
-    self.assertEqual(l[:-1], [v1, v2, v3])
-
-  def testHash(self):
-    has_sequences = set([data_structures.List(),
-                         data_structures.List()])
-    self.assertEqual(2, len(has_sequences))
-    self.assertNotIn(data_structures.List(), has_sequences)
-
-  def testIMul_zero(self):
-    l = data_structures.List([])
-    with self.assertRaisesRegexp(ValueError, "List only supports append"):
-      l *= 0
-
-  def testIMul(self):
-    v = resource_variable_ops.ResourceVariable(1.)
-    l = data_structures.List([v])
-    l *= 2
-    self.assertEqual(list(l), [v] * 2)
-
-  def testMul(self):
-    v = resource_variable_ops.ResourceVariable(1.)
-    l = data_structures.List([v, v, v])
-    self.assertEqual(list(l * 2), [v, v, v] * 2)
-
-  def testRMul(self):
-    v = resource_variable_ops.ResourceVariable(1.)
-    l = data_structures.List([v, v, v])
-    self.assertEqual(list(2 * l), [v, v, v] * 2)
-
-
-class ListWrapperTest(test.TestCase):
-
-  IGNORED = ("__new__", "__init__", "__subclasshook__", "__getattribute__")
-
-  def test_overrides_all_list_methods(self):
-    not_overridden = []
-
-    for name in dir(list):
-      if name in ListWrapperTest.IGNORED:
-        continue
-
-      list_method = getattr(list, name)
-
-      if not callable(list_method):
-        continue
-
-      object_method = getattr(object, name, None)
-      if object_method is not None and object_method == list_method:
-        # Skip methods that aren't overridden from object.
-        continue
-
-      if list_method == getattr(data_structures._ListWrapper, name):
-        not_overridden.append(name)
-
-    if not_overridden:
-      self.fail("_ListWrapper does not override %s" % (not_overridden))
-
-  def testListWrapperBasic(self):
-    # _ListWrapper, unlike List, compares like the built-in list type (since it
-    # is used to automatically replace lists).
-    a = tracking.AutoCheckpointable()
-    b = tracking.AutoCheckpointable()
-    self.assertEqual([a, a],
-                     [a, a])
-    self.assertEqual(data_structures._ListWrapper([a, a]),
-                     data_structures._ListWrapper([a, a]))
-    self.assertEqual([a, a],
-                     data_structures._ListWrapper([a, a]))
-    self.assertEqual(data_structures._ListWrapper([a, a]),
-                     [a, a])
-    self.assertNotEqual([a, a],
-                        [b, a])
-    self.assertNotEqual(data_structures._ListWrapper([a, a]),
-                        data_structures._ListWrapper([b, a]))
-    self.assertNotEqual([a, a],
-                        data_structures._ListWrapper([b, a]))
-    self.assertLess([a], [a, b])
-    self.assertLess(data_structures._ListWrapper([a]),
-                    data_structures._ListWrapper([a, b]))
-    self.assertLessEqual([a], [a, b])
-    self.assertLessEqual(data_structures._ListWrapper([a]),
-                         data_structures._ListWrapper([a, b]))
-    self.assertGreater([a, b], [a])
-    self.assertGreater(data_structures._ListWrapper([a, b]),
-                       data_structures._ListWrapper([a]))
-    self.assertGreaterEqual([a, b], [a])
-    self.assertGreaterEqual(data_structures._ListWrapper([a, b]),
-                            data_structures._ListWrapper([a]))
-    self.assertEqual([a], data_structures._ListWrapper([a]))
-    self.assertEqual([a], list(data_structures.List([a])))
-    self.assertEqual([a, a], data_structures._ListWrapper([a]) + [a])
-    self.assertEqual([a, a], [a] + data_structures._ListWrapper([a]))
-    self.assertIsInstance(data_structures._ListWrapper([a]), list)
-
-  def testAcceptsNonCheckpointableContent(self):
-    l = data_structures._ListWrapper([1, 2, 3])
-    self.assertEqual(l, [1, 2, 3])
-
-  def testWrapperChangesList(self):
-    l = []
-    l_wrapper = data_structures._ListWrapper(l)
-    l_wrapper.append(1)
-    self.assertEqual([1], l)
-
-  def testListChangesWrapper(self):
-    l = []
-    l_wrapper = data_structures._ListWrapper(l)
-    l.append(1)
-    self.assertEqual([1], l_wrapper)
-
-  def testLayerCollectionWithExternalMutation(self):
-    l = []
-    l_wrapper = data_structures._ListWrapper(l)
-    layer = core.Dense(1)
-    l.append(layer)
-    self.assertEqual([layer], l_wrapper.layers)
-
-  def testNotHashable(self):
-    with self.assertRaises(TypeError):
-      hash(data_structures._ListWrapper())
-
-  def testDelItem(self):
-    l = data_structures._ListWrapper([1, 2, 3, 4])
-    del l[0]
-    self.assertEqual(l, [2, 3, 4])
-    self.assertUnableToSave(l, "Unable to save .*__delitem__")
-
-  def testDelSlice(self):
-    l = data_structures._ListWrapper([1, 2, 3, 4])
-    del l[2:3]
-    self.assertEqual(l, [1, 2, 4])
-    self.assertUnableToSave(l, "Unable to save .*__delslice__")
-
-  def testSetSlice_canSaveForNonCheckpointableItems(self):
-    l = data_structures._ListWrapper([1, 2, 3, 4])
-    l[:] = 2, 8, 9, 0
-    self.assertEqual(l, [2, 8, 9, 0])
-    l._maybe_initialize_checkpointable()  # pylint: disable=protected-access
-    self.assertEqual(len(l._checkpoint_dependencies), 0)  # pylint: disable=protected-access
-
-  def testSetSlice_cannotSaveIfCheckpointableModified(self):
-    v1 = resource_variable_ops.ResourceVariable(1.)
-    v2 = resource_variable_ops.ResourceVariable(1.)
-    l = data_structures._ListWrapper([1, 2, v1, v2])
-    l[:] = 2, 8, 9, v2
-    self.assertEqual(l, [2, 8, 9, v2])
-    self.assertUnableToSave(l, "Unable to save .*__setslice__")
-
-  def testSetSlice_truncate(self):
-    l = data_structures._ListWrapper([1, 2, 3, 4])
-    l[:] = []
-    self.assertEqual(l, [])
-
-  def testSetSlice_extend(self):
-    l = data_structures._ListWrapper([1, 2, 3, 4])
-    l[2:] = 1, 2, 3, 4
-    self.assertEqual(l, [1, 2, 1, 2, 3, 4])
-
-  def testSort(self):
-    l = data_structures._ListWrapper([1, 2, 3, 4])
-    l.sort()
-    self.assertEqual(l, [1, 2, 3, 4])
-    # Regardless of being a no-op for the input list, we still refuse to save.
-    # This is intentional since otherwise we would end up with a hard to debug
-    # case for users (e.g. sometimes sort on a ListWrapper is checkpointable and
-    # other times it is not).
-    self.assertUnableToSave(l, "Unable to save .*sort")
-
-  def assertUnableToSave(self, l, msg):
-    l._maybe_initialize_checkpointable()  # pylint: disable=protected-access
-    with self.assertRaisesRegexp(ValueError, msg):
-      return l._checkpoint_dependencies  # pylint: disable=protected-access
-
-
-class HasMapping(training.Model):
-
-  def __init__(self):
-    super(HasMapping, self).__init__()
-    self.layer_dict = data_structures.Mapping(output=core.Dense(7))
-    self.layer_dict["norm"] = data_structures.List()
-    self.layer_dict["dense"] = data_structures.List()
-    self.layer_dict["dense"].extend(
-        [core.Dense(5),
-         core.Dense(6, kernel_regularizer=math_ops.reduce_sum)])
-    self.layer_dict["norm"].append(
-        normalization.BatchNormalization())
-    self.layer_dict["norm"].append(
-        normalization.BatchNormalization())
-
-  def call(self, x):
-    aggregation = 0.
-    for norm, dense in zip(self.layer_dict["norm"], self.layer_dict["dense"]):
-      x = norm(dense(x))
-      aggregation += math_ops.reduce_sum(x)
-    return self.layer_dict["output"](x) / aggregation
-
-
-class MappingTests(test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes
-  @test_util.run_v1_only("b/120545219")
-  def testTracking(self):
-    model = HasMapping()
-    output = model(array_ops.ones([32, 2]))
-    self.assertAllEqual([32, 7], output.shape)
-    self.assertEqual(5, len(model.layers))
-    six.assertCountEqual(self, model.layers, model.layer_dict.layers)
-    self.assertEqual(1, len(model._checkpoint_dependencies))
-    self.assertIs(model.layer_dict, model._checkpoint_dependencies[0].ref)
-    self.evaluate([v.initializer for v in model.variables])
-    test_var = model.layer_dict["output"].kernel
-    self.evaluate(test_var.assign(array_ops.ones([6, 7])))
-    save_path = os.path.join(self.get_temp_dir(), "ckpt")
-    model.save_weights(save_path)
-    self.evaluate(test_var.assign(array_ops.zeros([6, 7])))
-    model.load_weights(save_path)
-    self.assertAllEqual(numpy.ones([6, 7]),
-                        self.evaluate(test_var))
-
-  def testNoOverwrite(self):
-    mapping = data_structures.Mapping()
-    original = data_structures.List()
-    mapping["a"] = original
-    with self.assertRaises(ValueError):
-      mapping["a"] = data_structures.List()
-    self.assertIs(original, mapping["a"])
-    with self.assertRaises(AttributeError):
-      del mapping["a"]
-    mapping.update(b=data_structures.Mapping())
-    with self.assertRaises(ValueError):
-      mapping.update({"b": data_structures.Mapping()})
-
-  def testNonStringKeys(self):
-    mapping = data_structures.Mapping()
-    with self.assertRaises(TypeError):
-      mapping[1] = data_structures.List()
-
-  def testLayerCollectionWithExternalMutation(self):
-    d = {}
-    root = tracking.AutoCheckpointable()
-    root.wrapper = d
-    self.assertEqual([], root.wrapper.layers)
-    self.assertEqual([], root.wrapper.trainable_weights)
-    layer1 = core.Dense(1)
-    layer2 = core.Dense(1)
-    d["a"] = layer1
-    d["b"] = layer2
-    self.assertEqual([layer1, layer2], root.wrapper.layers)
-    # The layers have still not created variables
-    self.assertEqual([], root.wrapper.trainable_weights)
-
-  def testHashing(self):
-    has_mappings = set([data_structures.Mapping(),
-                        data_structures.Mapping()])
-    self.assertEqual(2, len(has_mappings))
-    self.assertNotIn(data_structures.Mapping(), has_mappings)
-    # In contrast to Mapping, dict wrappers are not hashable
-    a = tracking.AutoCheckpointable()
-    a.d = {}
-    self.assertEqual({}, a.d)
-    self.assertFalse({} != a.d)  # pylint: disable=g-explicit-bool-comparison
-    self.assertNotEqual({1: 2}, a.d)
-    with self.assertRaisesRegexp(TypeError, "unhashable"):
-      set([a.d])
-
-  def testDictWrapperBadKeys(self):
-    a = tracking.AutoCheckpointable()
-    a.d = {}
-    a.d[1] = data_structures.List()
-    model = training.Model()
-    model.sub = a
-    save_path = os.path.join(self.get_temp_dir(), "ckpt")
-    with self.assertRaisesRegexp(ValueError, "non-string key"):
-      model.save_weights(save_path)
-
-  def testDictWrapperNoDependency(self):
-    a = tracking.AutoCheckpointable()
-    a.d = data_structures.NoDependency({})
-    a.d[1] = [3]
-    self.assertEqual([a], util.list_objects(a))
-    model = training.Model()
-    model.sub = a
-    save_path = os.path.join(self.get_temp_dir(), "ckpt")
-    model.save_weights(save_path)
-    model.load_weights(save_path)
-
-  def testNonStringKeyNotCheckpointableValue(self):
-    a = tracking.AutoCheckpointable()
-    a.d = {}
-    a.d["a"] = [3]
-    a.d[1] = data_structures.NoDependency([3])
-    self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a))
-    model = training.Model()
-    model.sub = a
-    save_path = os.path.join(self.get_temp_dir(), "ckpt")
-    model.save_weights(save_path)
-    model.load_weights(save_path)
-
-  def testNonAppendNotCheckpointable(self):
-    # Non-append mutations (deleting or overwriting values) are OK when the
-    # values aren't tracked.
-    a = tracking.AutoCheckpointable()
-    a.d = {}
-    a.d["a"] = [3]
-    a.d[1] = 3
-    a.d[1] = 2
-    self.assertEqual(2, a.d[1])
-    del a.d[1]
-    a.d[2] = data_structures.NoDependency(tracking.AutoCheckpointable())
-    second = tracking.AutoCheckpointable()
-    a.d[2] = data_structures.NoDependency(second)
-    self.assertIs(second, a.d[2])
-    self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a))
-    model = training.Model()
-    model.sub = a
-    save_path = os.path.join(self.get_temp_dir(), "ckpt")
-    model.save_weights(save_path)
-    model.load_weights(save_path)
-
-  def testDelNoSave(self):
-    model = training.Model()
-    model.d = {}
-    model.d["a"] = []
-    del model.d["a"]
-    save_path = os.path.join(self.get_temp_dir(), "ckpt")
-    with self.assertRaisesRegexp(ValueError, "overwritten or deleted"):
-      model.save_weights(save_path)
-
-  def testPopNoSave(self):
-    model = training.Model()
-    model.d = {}
-    model.d["a"] = []
-    model.d.pop("a")
-    save_path = os.path.join(self.get_temp_dir(), "ckpt")
-    with self.assertRaisesRegexp(ValueError, "overwritten or deleted"):
-      model.save_weights(save_path)
-
-  def testExternalModificationNoSave(self):
-    model = training.Model()
-    external_reference = {}
-    model.d = external_reference
-    external_reference["a"] = []
-    save_path = os.path.join(self.get_temp_dir(), "ckpt")
-    with self.assertRaisesRegexp(ValueError, "modified outside the wrapper"):
-      model.save_weights(save_path)
-
-  def testOverwriteNoSave(self):
-    model = training.Model()
-    model.d = {}
-    model.d["a"] = {}
-    model.d["a"] = {}
-    save_path = os.path.join(self.get_temp_dir(), "ckpt")
-    with self.assertRaisesRegexp(ValueError, "overwritten or deleted"):
-      model.save_weights(save_path)
-
-  def testIter(self):
-    model = training.Model()
-    model.d = {1: 3}
-    model.d[1] = 3
-    self.assertEqual([1], list(model.d))
-    new_dict = {}
-    # This update() is super tricky. If the dict wrapper subclasses dict,
-    # CPython will access its storage directly instead of calling any
-    # methods/properties on the object. So the options are either not to
-    # subclass dict (in which case update will call normal iter methods, but the
-    # object won't pass isinstance checks) or to subclass dict and keep that
-    # storage updated (no shadowing all its methods like _ListWrapper).
-    new_dict.update(model.d)
-    self.assertEqual({1: 3}, new_dict)
-
-  def testListShallowCopy(self):
-    root = tracking.AutoCheckpointable()
-    orig_list = [[1.]]
-    root.a = orig_list
-    copied = copy.copy(root.a)
-    self.assertAllEqual([[1.]], copied)
-    self.assertIsNot(root.a, copied)
-    self.assertIs(root.a[0], copied[0])
-
-    # Dirtiness should be inherited
-    util.list_objects(root.a)
-    orig_list.append(1.)
-    with self.assertRaises(ValueError):
-      util.list_objects(root.a)
-    with self.assertRaises(ValueError):
-      util.list_objects(copy.copy(root.a))
-
-  def testListDeepCopy(self):
-    root = tracking.AutoCheckpointable()
-    orig_list = [[1.]]
-    root.a = orig_list
-    copied = copy.deepcopy(root.a)
-    self.assertAllEqual([[1.]], copied)
-    self.assertIsNot(root.a, copied)
-    self.assertIsNot(root.a[0], copied[0])
-
-    # Dirtiness should be inherited
-    util.list_objects(root.a)
-    orig_list.append(1.)
-    with self.assertRaises(ValueError):
-      util.list_objects(root.a)
-    with self.assertRaises(ValueError):
-      util.list_objects(copy.deepcopy(root.a))
-
-  def testDictShallowCopy(self):
-    root = tracking.AutoCheckpointable()
-    orig_dict = {"a": [1.]}
-    root.a = orig_dict
-    copied = copy.copy(root.a)
-    self.assertAllEqual([1.], copied["a"])
-    self.assertIsNot(root.a, copied)
-    self.assertIs(root.a["a"], copied["a"])
-
-    # Dirtiness should be inherited
-    util.list_objects(root.a)
-    orig_dict["b"] = []
-    with self.assertRaises(ValueError):
-      util.list_objects(root.a)
-    with self.assertRaises(ValueError):
-      util.list_objects(copy.copy(root.a))
-
-  def testDictDeepCopy(self):
-    root = tracking.AutoCheckpointable()
-    orig_dict = {"a": [1.]}
-    root.a = orig_dict
-    copied = copy.deepcopy(root.a)
-    self.assertAllEqual([1.], copied["a"])
-    self.assertIsNot(root.a, copied)
-    self.assertIsNot(root.a["a"], copied["a"])
-
-    # Dirtiness should be inherited
-    util.list_objects(root.a)
-    orig_dict["b"] = []
-    with self.assertRaises(ValueError):
-      util.list_objects(root.a)
-    with self.assertRaises(ValueError):
-      util.list_objects(copy.deepcopy(root.a))
-
-  def testShallowCopyCheckpointable(self):
-    original = tracking.AutoCheckpointable()
-    original_sub = tracking.AutoCheckpointable()
-    original.a = [[1.]]
-    original.b = {"a": original_sub}
-    shallow_copied = copy.copy(original)
-    self.assertIs(original_sub, shallow_copied.b["a"])
-    self.assertIsNot(original, shallow_copied)
-    self.assertEqual([[1.]], shallow_copied.a)
-    shallow_deps = util.list_objects(shallow_copied)
-    self.assertIn(shallow_copied.a, shallow_deps)
-    self.assertIn(shallow_copied.b, shallow_deps)
-    self.assertIn(shallow_copied.b["a"], shallow_deps)
-
-  def testDeepCopyCheckpointable(self):
-    original = tracking.AutoCheckpointable()
-    original_sub = tracking.AutoCheckpointable()
-    original.a = [[1.]]
-    original.b = {"a": original_sub}
-    deep_copied = copy.deepcopy(original)
-    self.assertIsNot(original, deep_copied)
-    self.assertIsNot(original_sub, deep_copied.b["a"])
-    self.assertEqual([[1.]], deep_copied.a)
-    self.assertIsInstance(deep_copied.b["a"], tracking.AutoCheckpointable)
-    deps = util.list_objects(deep_copied)
-    self.assertIn(deep_copied.a, deps)
-    self.assertIn(deep_copied.b, deps)
-    self.assertIn(deep_copied.b["a"], deps)
-    self.assertNotIn(original_sub, deps)
-
-  def testConstructableFromSequence(self):
-    result = data_structures._DictWrapper([(1, 2), (3, 4)])
-    self.assertIsInstance(result, dict)
-    self.assertEqual({1: 2, 3: 4}, result)
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/python/training/checkpointable/graph_view.py b/tensorflow/python/training/checkpointable/graph_view.py
deleted file mode 100644
index 1f5f867..0000000
--- a/tensorflow/python/training/checkpointable/graph_view.py
+++ /dev/null

@@ -1,432 +0,0 @@
-"""Manages a graph of Checkpointable objects."""
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import weakref
-
-from tensorflow.core.protobuf import checkpointable_object_graph_pb2
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.training import optimizer as optimizer_v1
-from tensorflow.python.training.checkpointable import base
-from tensorflow.python.training.checkpointable import object_identity
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.saving import saveable_object as saveable_object_lib
-from tensorflow.python.training.saving import saveable_object_util
-
-
-_ESCAPE_CHAR = "."  # For avoiding conflicts with user-specified names.
-
-# Keyword for identifying that the next bit of a checkpoint variable name is a
-# slot name. Checkpoint names for slot variables look like:
-#
-#   <path to variable>/<_OPTIMIZER_SLOTS_NAME>/<path to optimizer>/<slot name>
-#
-# Where <path to variable> is a full path from the checkpoint root to the
-# variable being slotted for.
-_OPTIMIZER_SLOTS_NAME = _ESCAPE_CHAR + "OPTIMIZER_SLOT"
-# Keyword for separating the path to an object from the name of an
-# attribute in checkpoint names. Used like:
-#   <path to variable>/<_OBJECT_ATTRIBUTES_NAME>/<name of attribute>
-_OBJECT_ATTRIBUTES_NAME = _ESCAPE_CHAR + "ATTRIBUTES"
-
-
-def _escape_local_name(name):
-  # We need to support slashes in local names for compatibility, since this
-  # naming scheme is being patched in to things like Layer.add_variable where
-  # slashes were previously accepted. We also want to use slashes to indicate
-  # edges traversed to reach the variable, so we escape forward slashes in
-  # names.
-  return (name.replace(_ESCAPE_CHAR, _ESCAPE_CHAR + _ESCAPE_CHAR)
-          .replace(r"/", _ESCAPE_CHAR + "S"))
-
-
-def _object_prefix_from_path(path_to_root):
-  return "/".join(
-      (_escape_local_name(checkpointable.name)
-       for checkpointable in path_to_root))
-
-
-def _slot_variable_naming_for_optimizer(optimizer_path):
-  """Make a function for naming slot variables in an optimizer."""
-  # Name slot variables:
-  #
-  #   <variable name>/<_OPTIMIZER_SLOTS_NAME>/<optimizer path>/<slot name>
-  #
-  # where <variable name> is exactly the checkpoint name used for the original
-  # variable, including the path from the checkpoint root and the local name in
-  # the object which owns it. Note that we only save slot variables if the
-  # variable it's slotting for is also being saved.
-
-  optimizer_identifier = "/%s/%s/" % (_OPTIMIZER_SLOTS_NAME, optimizer_path)
-
-  def _name_slot_variable(variable_path, slot_name):
-    """With an optimizer specified, name a slot variable."""
-    return (variable_path
-            + optimizer_identifier
-            + _escape_local_name(slot_name))
-
-  return _name_slot_variable
-
-
-def _serialize_slot_variables(checkpointable_objects, node_ids, object_names):
-  """Gather and name slot variables."""
-  non_slot_objects = list(checkpointable_objects)
-  slot_variables = object_identity.ObjectIdentityDictionary()
-  for checkpointable in non_slot_objects:
-    if (isinstance(checkpointable, optimizer_v1.Optimizer)
-        # TODO(b/110718070): Fix Keras imports.
-        or hasattr(checkpointable, "_create_or_restore_slot_variable")):
-      naming_scheme = _slot_variable_naming_for_optimizer(
-          optimizer_path=object_names[checkpointable])
-      slot_names = checkpointable.get_slot_names()
-      for slot_name in slot_names:
-        for original_variable_node_id, original_variable in enumerate(
-            non_slot_objects):
-          try:
-            slot_variable = checkpointable.get_slot(
-                original_variable, slot_name)
-          except (AttributeError, KeyError):
-            slot_variable = None
-          if slot_variable is None:
-            continue
-          slot_variable._maybe_initialize_checkpointable()  # pylint: disable=protected-access
-          if slot_variable._checkpoint_dependencies:  # pylint: disable=protected-access
-            # TODO(allenl): Gather dependencies of slot variables.
-            raise NotImplementedError(
-                "Currently only variables with no dependencies can be saved as "
-                "slot variables. File a feature request if this limitation "
-                "bothers you.")
-          if slot_variable in node_ids:
-            raise NotImplementedError(
-                "A slot variable was re-used as a dependency of a "
-                "Checkpointable object. This is not currently allowed. File a "
-                "feature request if this limitation bothers you.")
-          checkpoint_name = naming_scheme(
-              variable_path=object_names[original_variable],
-              slot_name=slot_name)
-          object_names[slot_variable] = checkpoint_name
-          slot_variable_node_id = len(checkpointable_objects)
-          node_ids[slot_variable] = slot_variable_node_id
-          checkpointable_objects.append(slot_variable)
-          slot_variable_proto = (
-              checkpointable_object_graph_pb2.CheckpointableObjectGraph
-              .CheckpointableObject.SlotVariableReference(
-                  slot_name=slot_name,
-                  original_variable_node_id=original_variable_node_id,
-                  slot_variable_node_id=slot_variable_node_id))
-          slot_variables.setdefault(checkpointable, []).append(
-              slot_variable_proto)
-  return slot_variables
-
-
-def _fill_object_graph_proto(checkpointable_objects,
-                             node_ids,
-                             slot_variables,
-                             object_graph_proto=None):
-  """Name non-slot `Checkpointable`s and add them to `object_graph_proto`."""
-  if object_graph_proto is None:
-    object_graph_proto = (
-        checkpointable_object_graph_pb2.CheckpointableObjectGraph())
-  for checkpoint_id, checkpointable in enumerate(checkpointable_objects):
-    assert node_ids[checkpointable] == checkpoint_id
-    object_proto = object_graph_proto.nodes.add()
-    object_proto.slot_variables.extend(slot_variables.get(checkpointable, ()))
-    for child in checkpointable._checkpoint_dependencies:  # pylint: disable=protected-access
-      child_proto = object_proto.children.add()
-      child_proto.node_id = node_ids[child.ref]
-      child_proto.local_name = child.name
-  return object_graph_proto
-
-
-class ObjectGraphView(object):
-  """Gathers and serializes an object graph."""
-
-  def __init__(self, root, saveables_cache=None):
-    """Configure the graph view.
-
-    Args:
-      root: A `Checkpointable` object whose variables (including the variables
-        of dependencies, recursively) should be saved. May be a weak reference.
-      saveables_cache: A dictionary mapping `Checkpointable` objects ->
-        attribute names -> SaveableObjects, used to avoid re-creating
-        SaveableObjects when graph building.
-    """
-    self._root_ref = root
-    self._saveables_cache = saveables_cache
-
-  def _list_dependencies(self, obj):
-    # pylint: disable=protected-access
-    obj._maybe_initialize_checkpointable()
-    return obj._checkpoint_dependencies
-    # pylint: enable=protected-access
-
-  @property
-  def saveables_cache(self):
-    """Maps Checkpointable objects -> attribute names -> list(SaveableObjects).
-
-    Used to avoid re-creating SaveableObjects when graph building. None when
-    executing eagerly.
-
-    Returns:
-      The cache (an object-identity dictionary), or None if caching is disabled.
-    """
-    return self._saveables_cache
-
-  @property
-  def root(self):
-    if isinstance(self._root_ref, weakref.ref):
-      derefed = self._root_ref()
-      assert derefed is not None
-      return derefed
-    else:
-      return self._root_ref
-
-  def _breadth_first_traversal(self):
-    """Find shortest paths to all dependencies of self.root."""
-    bfs_sorted = []
-    to_visit = collections.deque([self.root])
-    path_to_root = object_identity.ObjectIdentityDictionary()
-    path_to_root[self.root] = ()
-    while to_visit:
-      current_checkpointable = to_visit.popleft()
-      if isinstance(current_checkpointable, tracking.NotCheckpointable):
-        raise NotImplementedError(
-            ("The object %s does not support object-based saving. File a "
-             "feature request if this limitation bothers you. In the meantime, "
-             "you can remove the dependency on this object and save everything "
-             "else.")
-            % (current_checkpointable,))
-      bfs_sorted.append(current_checkpointable)
-      for name, dependency in self._list_dependencies(current_checkpointable):
-        if dependency not in path_to_root:
-          path_to_root[dependency] = (
-              path_to_root[current_checkpointable] + (
-                  base.CheckpointableReference(name, dependency),))
-          to_visit.append(dependency)
-    return bfs_sorted, path_to_root
-
-  def _add_attributes_to_object_graph(
-      self, checkpointable_objects, object_graph_proto, node_ids, object_names,
-      object_map):
-    """Create SaveableObjects and corresponding SerializedTensor protos."""
-    named_saveable_objects = []
-    if self._saveables_cache is None:
-      # No SaveableObject caching. Either we're executing eagerly, or building a
-      # static save which is specialized to the current Python state.
-      feed_additions = None
-    else:
-      # If we are caching SaveableObjects, we need to build up a feed_dict with
-      # functions computing volatile Python state to be saved with the
-      # checkpoint.
-      feed_additions = {}
-    for checkpoint_id, (checkpointable, object_proto) in enumerate(
-        zip(checkpointable_objects, object_graph_proto.nodes)):
-      assert node_ids[checkpointable] == checkpoint_id
-      object_name = object_names[checkpointable]
-      if object_map is None:
-        object_to_save = checkpointable
-      else:
-        object_to_save = object_map.get(checkpointable, checkpointable)
-      if self._saveables_cache is not None:
-        cached_attributes = self._saveables_cache.setdefault(object_to_save, {})
-      else:
-        cached_attributes = None
-
-      for name, saveable_factory in (
-          object_to_save._gather_saveables_for_checkpoint().items()):  # pylint: disable=protected-access
-        attribute = object_proto.attributes.add()
-        attribute.name = name
-        attribute.checkpoint_key = "%s/%s/%s" % (
-            object_name, _OBJECT_ATTRIBUTES_NAME, _escape_local_name(name))
-        if cached_attributes is None:
-          saveables = None
-        else:
-          saveables = cached_attributes.get(name, None)
-          if saveables is not None:
-            for saveable in saveables:
-              if attribute.checkpoint_key not in saveable.name:
-                # The checkpoint key for this SaveableObject is different. We
-                # need to re-create it.
-                saveables = None
-                del cached_attributes[name]
-                break
-        if saveables is None:
-          if callable(saveable_factory):
-            maybe_saveable = saveable_factory(name=attribute.checkpoint_key)
-          else:
-            maybe_saveable = saveable_factory
-          if isinstance(maybe_saveable, saveable_object_lib.SaveableObject):
-            saveables = (maybe_saveable,)
-          else:
-            # Figure out the name-based Saver's name for this variable. If it's
-            # already a SaveableObject we'd just get the checkpoint key back, so
-            # we leave full_name blank.
-            saver_dict = saveable_object_util.op_list_to_dict(
-                [maybe_saveable], convert_variable_to_tensor=False)
-            full_name, = saver_dict.keys()
-            saveables = tuple(saveable_object_util.saveable_objects_for_op(
-                op=maybe_saveable, name=attribute.checkpoint_key))
-            for saveable in saveables:
-              saveable.full_name = full_name
-          for saveable in saveables:
-            if attribute.checkpoint_key not in saveable.name:
-              raise AssertionError(
-                  ("The object %s produced a SaveableObject with name '%s' for "
-                   "attribute '%s'. Expected a name containing '%s'.")
-                  % (checkpointable, name, saveable.name,
-                     attribute.checkpoint_key))
-          if cached_attributes is not None:
-            cached_attributes[name] = saveables
-
-        optional_restore = None
-        for saveable in saveables:
-          if optional_restore is None:
-            optional_restore = saveable.optional_restore
-          else:
-            optional_restore = optional_restore and saveable.optional_restore
-
-          if hasattr(saveable, "full_name"):
-            attribute.full_name = saveable.full_name
-          if isinstance(saveable, base.PythonStateSaveable):
-            if feed_additions is None:
-              assert self._saveables_cache is None
-              # If we're not caching saveables, then we're either executing
-              # eagerly or building a static save/restore (e.g. for a
-              # SavedModel). In either case, we should embed the current Python
-              # state in the graph rather than relying on a feed dict.
-              saveable = saveable.freeze()
-            else:
-              saveable_feed_dict = saveable.feed_dict_additions()
-              for new_feed_key in saveable_feed_dict.keys():
-                if new_feed_key in feed_additions:
-                  raise AssertionError(
-                      ("The object %s tried to feed a value for the Tensor %s "
-                       "when saving, but another object is already feeding a "
-                       "value.")
-                      % (checkpointable, new_feed_key))
-              feed_additions.update(saveable_feed_dict)
-          named_saveable_objects.append(saveable)
-        if optional_restore is None:
-          optional_restore = False
-        attribute.optional_restore = optional_restore
-
-    return named_saveable_objects, feed_additions
-
-  def _serialize_gathered_objects(self, checkpointable_objects, path_to_root,
-                                  object_map=None):
-    """Create SaveableObjects and protos for gathered objects."""
-    object_names = object_identity.ObjectIdentityDictionary()
-    for obj, path in path_to_root.items():
-      object_names[obj] = _object_prefix_from_path(path)
-    node_ids = object_identity.ObjectIdentityDictionary()
-    for node_id, node in enumerate(checkpointable_objects):
-      node_ids[node] = node_id
-    slot_variables = _serialize_slot_variables(
-        checkpointable_objects=checkpointable_objects,
-        node_ids=node_ids,
-        object_names=object_names)
-    object_graph_proto = _fill_object_graph_proto(
-        checkpointable_objects=checkpointable_objects,
-        node_ids=node_ids,
-        slot_variables=slot_variables)
-    named_saveable_objects, feed_additions = (
-        self._add_attributes_to_object_graph(
-            checkpointable_objects=checkpointable_objects,
-            object_graph_proto=object_graph_proto,
-            node_ids=node_ids,
-            object_names=object_names,
-            object_map=object_map))
-    return named_saveable_objects, object_graph_proto, feed_additions
-
-  def serialize_object_graph(self):
-    """Determine checkpoint keys for variables and build a serialized graph.
-
-    Non-slot variables are keyed based on a shortest path from the root saveable
-    to the object which owns the variable (i.e. the one which called
-    `Checkpointable._add_variable` to create it).
-
-    Slot variables are keyed based on a shortest path to the variable being
-    slotted for, a shortest path to their optimizer, and the slot name.
-
-    Returns:
-      A tuple of (named_variables, object_graph_proto, feed_additions):
-        named_variables: A dictionary mapping names to variable objects.
-        object_graph_proto: A CheckpointableObjectGraph protocol buffer
-          containing the serialized object graph and variable references.
-        feed_additions: A dictionary mapping from Tensors to values which should
-          be fed when saving.
-
-    Raises:
-      ValueError: If there are invalid characters in an optimizer's slot names.
-    """
-    checkpointable_objects, path_to_root = self._breadth_first_traversal()
-    return self._serialize_gathered_objects(
-        checkpointable_objects, path_to_root)
-
-  def frozen_saveable_objects(self, object_map=None, to_graph=None):
-    """Creates SaveableObjects with the current object graph frozen."""
-    checkpointable_objects, path_to_root = self._breadth_first_traversal()
-    if to_graph:
-      target_context = to_graph.as_default
-    else:
-      target_context = ops.NullContextmanager
-    with target_context():
-      named_saveable_objects, graph_proto, _ = self._serialize_gathered_objects(
-          checkpointable_objects,
-          path_to_root,
-          object_map)
-      with ops.device("/cpu:0"):
-        object_graph_tensor = constant_op.constant(
-            graph_proto.SerializeToString(), dtype=dtypes.string)
-      named_saveable_objects.append(
-          base.NoRestoreSaveable(
-              tensor=object_graph_tensor,
-              name=base.OBJECT_GRAPH_PROTO_KEY))
-    return named_saveable_objects
-
-  def objects_ids_and_slot_variables(self):
-    """Traverse the object graph and list all accessible objects.
-
-    Looks for `Checkpointable` objects which are dependencies of
-    `root_checkpointable`. Includes slot variables only if the variable they are
-    slotting for and the optimizer are dependencies of `root_checkpointable`
-    (i.e. if they would be saved with a checkpoint).
-
-    Returns:
-      A tuple of (checkpointable objects, object -> node id, slot variables)
-    """
-    checkpointable_objects, path_to_root = self._breadth_first_traversal()
-    object_names = object_identity.ObjectIdentityDictionary()
-    for obj, path in path_to_root.items():
-      object_names[obj] = _object_prefix_from_path(path)
-    node_ids = object_identity.ObjectIdentityDictionary()
-    for node_id, node in enumerate(checkpointable_objects):
-      node_ids[node] = node_id
-    slot_variables = _serialize_slot_variables(
-        checkpointable_objects=checkpointable_objects,
-        node_ids=node_ids,
-        object_names=object_names)
-    return checkpointable_objects, node_ids, slot_variables
-
-  def list_objects(self):
-    """Traverse the object graph and list all accessible objects."""
-    checkpointable_objects, _, _ = self.objects_ids_and_slot_variables()
-    return checkpointable_objects

diff --git a/tensorflow/python/training/checkpointable/layer_utils.py b/tensorflow/python/training/checkpointable/layer_utils.py
deleted file mode 100644
index 9d45c48..0000000
--- a/tensorflow/python/training/checkpointable/layer_utils.py
+++ /dev/null

@@ -1,103 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Utilities related to layer/model functionality."""
-
-# TODO(b/110718070): Move these functions back to tensorflow/python/keras/utils
-# once __init__ files no longer require all of tf.keras to be imported together.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-def is_layer(obj):
-  """Implicit check for Layer-like objects."""
-  # TODO(b/110718070): Replace with isinstance(obj, base_layer.Layer).
-  return hasattr(obj, "_is_layer")
-
-
-def has_weights(obj):
-  """Implicit check for Layer-like objects."""
-  # TODO(b/110718070): Replace with isinstance(obj, base_layer.Layer).
-  return (hasattr(obj, "trainable_weights")
-          and hasattr(obj, "non_trainable_weights"))
-
-
-def filter_empty_layer_containers(layer_list):
-  """Filter out empty Layer-like containers."""
-  filtered = []
-  for obj in layer_list:
-    if is_layer(obj):
-      filtered.append(obj)
-    elif hasattr(obj, "layers"):
-      # Checkpointable data structures will not show up in ".layers" lists, but
-      # the layers they contain will.
-      filtered.extend(obj.layers)
-  return filtered
-
-
-def gather_trainable_weights(trainable, sub_layers, extra_variables):
-  """Lists the trainable weights for an object with sub-layers.
-
-  Args:
-    trainable: Whether the object collecting the variables is trainable.
-    sub_layers: A flat list of Layer objects owned by this object, to collect
-      variables from.
-    extra_variables: Any extra variables to include. Their `.trainable` property
-      is used to categorize them.
-
-  Returns:
-    A list of collected trainable weights/variables.
-  """
-  if not trainable:
-    return []
-  weights = []
-  for layer in sub_layers:
-    weights += layer.trainable_weights
-  trainable_extra_variables = [
-      v for v in extra_variables if v.trainable]
-  return weights + trainable_extra_variables
-
-
-def gather_non_trainable_weights(trainable, sub_layers, extra_variables):
-  """Lists the non-trainable weights for an object with sub-layers.
-
-  Args:
-    trainable: Whether the object collecting the variables is trainable.
-    sub_layers: A flat list of Layer objects owned by this object, to collect
-      variables from.
-    extra_variables: Any extra variables to include. Their `.trainable` property
-      is used to categorize them.
-
-  Returns:
-    A list of collected non-trainable weights/variables.
-  """
-  trainable_extra_variables = []
-  non_trainable_extra_variables = []
-  for v in extra_variables:
-    if v.trainable:
-      trainable_extra_variables.append(v)
-    else:
-      non_trainable_extra_variables.append(v)
-  weights = []
-  for layer in sub_layers:
-    weights += layer.non_trainable_weights
-  if not trainable:
-    trainable_weights = []
-    for layer in sub_layers:
-      trainable_weights += layer.trainable_weights
-    return (trainable_weights + trainable_extra_variables
-            + weights + non_trainable_extra_variables)
-  return weights + non_trainable_extra_variables

diff --git a/tensorflow/python/training/checkpointable/object_identity.py b/tensorflow/python/training/checkpointable/object_identity.py
deleted file mode 100644
index 2d3056b..0000000
--- a/tensorflow/python/training/checkpointable/object_identity.py
+++ /dev/null

@@ -1,156 +0,0 @@
-"""Utilities for collecting objects based on "is" comparison."""
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import weakref
-
-
-class _ObjectIdentityWrapper(object):
-  """Wraps an object, mapping __eq__ on wrapper to "is" on wrapped.
-
-  Since __eq__ is based on object identity, it's safe to also define __hash__
-  based on object ids. This lets us add unhashable types like checkpointable
-  _ListWrapper objects to object-identity collections.
-  """
-
-  def __init__(self, wrapped):
-    self._wrapped = wrapped
-
-  @property
-  def unwrapped(self):
-    return self._wrapped
-
-  def __eq__(self, other):
-    if isinstance(other, _ObjectIdentityWrapper):
-      return self._wrapped is other._wrapped  # pylint: disable=protected-access
-    return self._wrapped is other
-
-  def __hash__(self):
-    # Wrapper id() is also fine for weakrefs. In fact, we rely on
-    # id(weakref.ref(a)) == id(weakref.ref(a)) and weakref.ref(a) is
-    # weakref.ref(a) in _WeakObjectIdentityWrapper.
-    return id(self._wrapped)
-
-
-class _WeakObjectIdentityWrapper(_ObjectIdentityWrapper):
-
-  def __init__(self, wrapped):
-    super(_WeakObjectIdentityWrapper, self).__init__(weakref.ref(wrapped))
-
-  @property
-  def unwrapped(self):
-    return self._wrapped()
-
-
-class ObjectIdentityDictionary(collections.MutableMapping):
-  """A mutable mapping data structure which compares using "is".
-
-  This is necessary because we have checkpointable objects (_ListWrapper) which
-  have behavior identical to built-in Python lists (including being unhashable
-  and comparing based on the equality of their contents by default).
-  """
-
-  def __init__(self):
-    self._storage = {}
-
-  def _wrap_key(self, key):
-    return _ObjectIdentityWrapper(key)
-
-  def __getitem__(self, key):
-    return self._storage[self._wrap_key(key)]
-
-  def __setitem__(self, key, value):
-    self._storage[self._wrap_key(key)] = value
-
-  def __delitem__(self, key):
-    del self._storage[self._wrap_key(key)]
-
-  def __len__(self):
-    return len(self._storage)
-
-  def __iter__(self):
-    for key in self._storage:
-      yield key.unwrapped
-
-
-class ObjectIdentityWeakKeyDictionary(ObjectIdentityDictionary):
-  """Like weakref.WeakKeyDictionary, but compares objects with "is"."""
-
-  def _wrap_key(self, key):
-    return _WeakObjectIdentityWrapper(key)
-
-  def __len__(self):
-    # Iterate, discarding old weak refs
-    return len(list(self._storage))
-
-  def __iter__(self):
-    keys = self._storage.keys()
-    for key in keys:
-      unwrapped = key.unwrapped
-      if unwrapped is None:
-        del self[key]
-      else:
-        yield unwrapped
-
-
-class ObjectIdentitySet(collections.MutableSet):
-  """Like the built-in set, but compares objects with "is"."""
-
-  def __init__(self, *args):
-    self._storage = set([self._wrap_key(obj) for obj in list(*args)])
-
-  def _wrap_key(self, key):
-    return _ObjectIdentityWrapper(key)
-
-  def __contains__(self, key):
-    return self._wrap_key(key) in self._storage
-
-  def discard(self, key):
-    self._storage.discard(self._wrap_key(key))
-
-  def add(self, key):
-    self._storage.add(self._wrap_key(key))
-
-  def __len__(self):
-    return len(self._storage)
-
-  def __iter__(self):
-    keys = list(self._storage)
-    for key in keys:
-      yield key.unwrapped
-
-
-class ObjectIdentityWeakSet(ObjectIdentitySet):
-  """Like weakref.WeakSet, but compares objects with "is"."""
-
-  def _wrap_key(self, key):
-    return _WeakObjectIdentityWrapper(key)
-
-  def __len__(self):
-    # Iterate, discarding old weak refs
-    return len([_ for _ in self])
-
-  def __iter__(self):
-    keys = list(self._storage)
-    for key in keys:
-      unwrapped = key.unwrapped
-      if unwrapped is None:
-        self.discard(key)
-      else:
-        yield unwrapped

diff --git a/tensorflow/python/training/checkpointable/tracking.py b/tensorflow/python/training/checkpointable/tracking.py
deleted file mode 100644
index 83b0d8b..0000000
--- a/tensorflow/python/training/checkpointable/tracking.py
+++ /dev/null

@@ -1,212 +0,0 @@
-"""Dependency tracking for checkpointable objects."""
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.eager import context
-from tensorflow.python.eager import def_function
-from tensorflow.python.eager import function as defun
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.training.checkpointable import base
-from tensorflow.python.training.checkpointable import data_structures
-from tensorflow.python.util import tf_contextlib
-
-
-# global _RESOURCE_TRACKER_STACK
-_RESOURCE_TRACKER_STACK = []
-
-
-class NotCheckpointable(object):
-  """Marks instances of child classes as unsaveable using an object-based API.
-
-  Useful for marking objects which would otherwise look checkpointable because
-  of inheritance (e.g. through `Layer`) as not checkpointable. Inheriting from
-  `NotCheckpointable` does not prevent an object from being assigned to any
-  attributes, but will throw an error on save/restore.
-  """
-  pass
-
-
-class AutoCheckpointable(base.Checkpointable):
-  """Manages dependencies on other objects.
-
-  `Checkpointable` objects may have dependencies: other `Checkpointable` objects
-  which should be saved if the object declaring the dependency is saved. A
-  correctly saveable program has a dependency graph such that if changing a
-  global variable affects an object (e.g. changes the behavior of any of its
-  methods) then there is a chain of dependencies from the influenced object to
-  the variable.
-
-  Dependency edges have names, and are created implicitly when a
-  `Checkpointable` object is assigned to an attribute of another
-  `Checkpointable` object. For example:
-
-  ```
-  obj = Checkpointable()
-  obj.v = ResourceVariable(0.)
-  ```
-
-  The `Checkpointable` object `obj` now has a dependency named "v" on a
-  variable.
-
-  `Checkpointable` objects may specify `Tensor`s to be saved and restored
-  directly (e.g. a `Variable` indicating how to save itself) rather than through
-  dependencies on other objects. See
-  `Checkpointable._gather_saveables_for_checkpoint` for details.
-  """
-
-  def __setattr__(self, name, value):
-    """Support self.foo = checkpointable syntax."""
-    if getattr(self, "_setattr_tracking", True):
-      value = data_structures.sticky_attribute_assignment(
-          checkpointable=self, value=value, name=name)
-    super(AutoCheckpointable, self).__setattr__(name, value)
-
-  def __delattr__(self, name):
-    self._maybe_initialize_checkpointable()
-    if name in self._unconditional_dependency_names:
-      del self._unconditional_dependency_names[name]
-      for index, (dep_name, _) in enumerate(
-          self._unconditional_checkpoint_dependencies):
-        if dep_name == name:
-          del self._unconditional_checkpoint_dependencies[index]
-          break
-    super(AutoCheckpointable, self).__delattr__(name)
-
-  def _no_dependency(self, value):
-    """Override to allow CheckpointableBase to disable dependency tracking."""
-    return data_structures.NoDependency(value)
-
-  def _list_functions_for_serialization(self):
-    """Return a dict of `Function`s of a checkpointable."""
-    functions = dict()
-    for attribute_name in dir(self):
-      try:
-        attribute_value = getattr(self, attribute_name, None)
-      except Exception:  # pylint: disable=broad-except
-        # We really don't want to throw an exception just because some object's
-        # attribute accessor is broken.
-        attribute_value = None
-      if isinstance(attribute_value, (def_function.Function,
-                                      defun.ConcreteFunction)):
-        functions[attribute_name] = attribute_value
-    return functions
-
-
-class ResourceTracker(object):
-  """An object that tracks a list of resources."""
-
-  def __init__(self):
-    self._resources = []
-
-  @property
-  def resources(self):
-    return self._resources
-
-  def add_resource(self, resource):
-    self._resources.append(resource)
-
-
-@tf_contextlib.contextmanager
-def resource_tracker_scope(resource_tracker):
-  """A context to manage resource trackers.
-
-  Use this in order to collect up all resources created within a block of code.
-  Example usage:
-
-  ```python
-  resource_tracker = ResourceTracker()
-  with resource_tracker_scope(resource_tracker):
-    resource = TrackableResource()
-
-  assert resource_tracker.resources == [resource]
-
-  Args:
-    resource_tracker: The passed in ResourceTracker object
-
-  Yields:
-    A scope in which the resource_tracker is active.
-  """
-  global _RESOURCE_TRACKER_STACK
-  old = list(_RESOURCE_TRACKER_STACK)
-  _RESOURCE_TRACKER_STACK.append(resource_tracker)
-  try:
-    yield
-  finally:
-    _RESOURCE_TRACKER_STACK = old
-
-
-class TrackableResource(base.Checkpointable):
-  """Base class for all resources that need to be tracked."""
-
-  def __init__(self):
-    global _RESOURCE_TRACKER_STACK
-    for resource_tracker in _RESOURCE_TRACKER_STACK:
-      resource_tracker.add_resource(self)
-
-    self._resource_handle = None
-
-  def create_resource(self):
-    """A function that creates a resource handle."""
-    raise NotImplementedError("TrackableResource.create_resource not "
-                              "implemented.")
-
-  def initialize(self):
-    """A function that initializes the resource. Optional."""
-    pass
-
-  @property
-  def resource_handle(self):
-    """Returns the resource handle associated with this Resource."""
-    if self._resource_handle is None:
-      self._resource_handle = self.create_resource()
-    return self._resource_handle
-
-
-class TrackableAsset(base.Checkpointable):
-  """Base class for asset files which need to be tracked."""
-
-  def __init__(self, path):
-    """Record the full path to the asset."""
-    # We use a variable here so that @tf.functions do not capture a literal
-    # value. The init_scope prevents functions from capturing `path` in an
-    # initialization graph, since it is transient and should not end up in a
-    # serialized function body. When serialized in a SavedModel, the variable
-    # will be set during the loading process to its location in the assets/
-    # directory.
-    with ops.init_scope():
-      if context.executing_eagerly():
-        self._path = self._no_dependency(
-            resource_variable_ops.ResourceVariable(
-                path, dtype=dtypes.string,
-                name="asset_path"))
-      else:
-        # Adding a variable is too disruptive when v1-style graph building,
-        # since things may get fed and local variable initializers would then
-        # need to be run.
-        self._path = path
-
-  @property
-  def asset_path(self):
-    """Fetch the current asset path."""
-    return self._path
-
-ops.register_tensor_conversion_function(
-    TrackableAsset,
-    lambda asset, **kw: ops.internal_convert_to_tensor(asset.asset_path, **kw))

diff --git a/tensorflow/python/training/checkpointable/tracking_test.py b/tensorflow/python/training/checkpointable/tracking_test.py
deleted file mode 100644
index 87c6603..0000000
--- a/tensorflow/python/training/checkpointable/tracking_test.py
+++ /dev/null

@@ -1,269 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-import numpy
-import six
-
-from tensorflow.python.framework import test_util
-from tensorflow.python.keras.engine import training
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import test
-from tensorflow.python.training.checkpointable import base
-from tensorflow.python.training.checkpointable import data_structures
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util
-from tensorflow.python.util import nest
-
-
-class InterfaceTests(test.TestCase):
-
-  def testMultipleAssignment(self):
-    root = tracking.AutoCheckpointable()
-    root.leaf = tracking.AutoCheckpointable()
-    root.leaf = root.leaf
-    duplicate_name_dep = tracking.AutoCheckpointable()
-    with self.assertRaisesRegexp(ValueError, "already declared"):
-      root._track_checkpointable(duplicate_name_dep, name="leaf")
-    # No error; we're overriding __setattr__, so we can't really stop people
-    # from doing this while maintaining backward compatibility.
-    root.leaf = duplicate_name_dep
-    root._track_checkpointable(duplicate_name_dep, name="leaf", overwrite=True)
-    self.assertIs(duplicate_name_dep, root._lookup_dependency("leaf"))
-    (_, dep_object), = root._checkpoint_dependencies
-    self.assertIs(duplicate_name_dep, dep_object)
-
-  def testNoDependency(self):
-    root = tracking.AutoCheckpointable()
-    hasdep = tracking.AutoCheckpointable()
-    root.hasdep = hasdep
-    nodep = tracking.AutoCheckpointable()
-    root.nodep = data_structures.NoDependency(nodep)
-    self.assertEqual(1, len(root._checkpoint_dependencies))
-    self.assertIs(root._checkpoint_dependencies[0].ref, root.hasdep)
-    self.assertIs(root.hasdep, hasdep)
-    self.assertIs(root.nodep, nodep)
-
-    class NoDependencyModel(training.Model):
-
-      @base.no_automatic_dependency_tracking
-      def __init__(self):
-        super(NoDependencyModel, self).__init__()
-        self.a = []
-        self.b = tracking.AutoCheckpointable()
-
-    nodeps = NoDependencyModel()
-    self.assertEqual([nodeps], util.list_objects(nodeps))
-
-  def testRemoveDependency(self):
-    root = tracking.AutoCheckpointable()
-    root.a = tracking.AutoCheckpointable()
-    self.assertEqual(1, len(root._checkpoint_dependencies))
-    self.assertEqual(1, len(root._unconditional_checkpoint_dependencies))
-    self.assertIs(root.a, root._checkpoint_dependencies[0].ref)
-    del root.a
-    self.assertFalse(hasattr(root, "a"))
-    self.assertEqual(0, len(root._checkpoint_dependencies))
-    self.assertEqual(0, len(root._unconditional_checkpoint_dependencies))
-    root.a = tracking.AutoCheckpointable()
-    self.assertEqual(1, len(root._checkpoint_dependencies))
-    self.assertEqual(1, len(root._unconditional_checkpoint_dependencies))
-    self.assertIs(root.a, root._checkpoint_dependencies[0].ref)
-
-  def testListBasic(self):
-    a = tracking.AutoCheckpointable()
-    b = tracking.AutoCheckpointable()
-    a.l = [b]
-    c = tracking.AutoCheckpointable()
-    a.l.append(c)
-    a_deps = util.list_objects(a)
-    self.assertIn(b, a_deps)
-    self.assertIn(c, a_deps)
-    direct_a_dep, = a._checkpoint_dependencies
-    self.assertEqual("l", direct_a_dep.name)
-    self.assertIn(b, direct_a_dep.ref)
-    self.assertIn(c, direct_a_dep.ref)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testMutationDirtiesList(self):
-    a = tracking.AutoCheckpointable()
-    b = tracking.AutoCheckpointable()
-    a.l = [b]
-    c = tracking.AutoCheckpointable()
-    a.l.insert(0, c)
-    checkpoint = util.Checkpoint(a=a)
-    with self.assertRaisesRegexp(ValueError, "A list element was replaced"):
-      checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testOutOfBandEditDirtiesList(self):
-    a = tracking.AutoCheckpointable()
-    b = tracking.AutoCheckpointable()
-    held_reference = [b]
-    a.l = held_reference
-    c = tracking.AutoCheckpointable()
-    held_reference.append(c)
-    checkpoint = util.Checkpoint(a=a)
-    with self.assertRaisesRegexp(ValueError, "The wrapped list was modified"):
-      checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testNestedLists(self):
-    a = tracking.AutoCheckpointable()
-    a.l = []
-    b = tracking.AutoCheckpointable()
-    a.l.append([b])
-    c = tracking.AutoCheckpointable()
-    a.l[0].append(c)
-    a_deps = util.list_objects(a)
-    self.assertIn(b, a_deps)
-    self.assertIn(c, a_deps)
-    a.l[0].append(1)
-    d = tracking.AutoCheckpointable()
-    a.l[0].append(d)
-    a_deps = util.list_objects(a)
-    self.assertIn(d, a_deps)
-    self.assertIn(b, a_deps)
-    self.assertIn(c, a_deps)
-    self.assertNotIn(1, a_deps)
-    e = tracking.AutoCheckpointable()
-    f = tracking.AutoCheckpointable()
-    a.l1 = [[], [e]]
-    a.l1[0].append(f)
-    a_deps = util.list_objects(a)
-    self.assertIn(e, a_deps)
-    self.assertIn(f, a_deps)
-    checkpoint = util.Checkpoint(a=a)
-    checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
-    a.l[0].append(data_structures.NoDependency([]))
-    a.l[0][-1].append(5)
-    checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
-    # Dirtying the inner list means the root object is unsaveable.
-    a.l[0][1] = 2
-    with self.assertRaisesRegexp(ValueError, "A list element was replaced"):
-      checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testDictionariesBasic(self):
-    a = training.Model()
-    b = training.Model()
-    a.attribute = {"b": b}
-    c = training.Model()
-    a.attribute["c"] = []
-    a.attribute["c"].append(c)
-    a_deps = util.list_objects(a)
-    self.assertIn(b, a_deps)
-    self.assertIn(c, a_deps)
-    self.assertIs(b, a.attribute["b"])
-    six.assertCountEqual(
-        self,
-        ["b", "c"],
-        [dep.name for dep in a.attribute._checkpoint_dependencies])
-    self.assertEqual([b, c], a.layers)
-    self.assertEqual([b, c], a.attribute.layers)
-    self.assertEqual([c], a.attribute["c"].layers)
-    checkpoint = util.Checkpoint(a=a)
-    save_path = checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
-    with self.cached_session():
-      checkpoint.restore(save_path).assert_consumed().initialize_or_restore()
-
-  @test_util.run_in_graph_and_eager_modes
-  def testNoDepList(self):
-    a = training.Model()
-    a.l1 = data_structures.NoDependency([])
-    a.l1.insert(1, 0)
-    self.assertTrue(isinstance(a.l1, list))
-    checkpoint = util.Checkpoint(a=a)
-    checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
-    a.l2 = []
-    a.l2.insert(1, 0)
-    with self.assertRaisesRegexp(ValueError, "A list element was replaced"):
-      checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testAssertions(self):
-    a = tracking.AutoCheckpointable()
-    a.l = {"k": [numpy.zeros([2, 2])]}
-    self.assertAllEqual(nest.flatten({"k": [numpy.zeros([2, 2])]}),
-                        nest.flatten(a.l))
-    self.assertAllClose({"k": [numpy.zeros([2, 2])]}, a.l)
-    nest.map_structure(self.assertAllClose, a.l, {"k": [numpy.zeros([2, 2])]})
-    a.tensors = {"k": [array_ops.ones([2, 2]), array_ops.zeros([3, 3])]}
-    self.assertAllClose({"k": [numpy.ones([2, 2]), numpy.zeros([3, 3])]},
-                        self.evaluate(a.tensors))
-
-
-class _DummyResource(tracking.TrackableResource):
-
-  def __init__(self, handle_name):
-    self._handle_name = handle_name
-    super(_DummyResource, self).__init__()
-
-  def create_resource(self):
-    return self._handle_name
-
-
-class ResourceTrackerTest(test.TestCase):
-
-  def testBasic(self):
-    resource_tracker = tracking.ResourceTracker()
-    with tracking.resource_tracker_scope(resource_tracker):
-      dummy_resource1 = _DummyResource("test1")
-      dummy_resource2 = _DummyResource("test2")
-
-    self.assertEqual(2, len(resource_tracker.resources))
-    self.assertEqual("test1", resource_tracker.resources[0].resource_handle)
-    self.assertEqual("test2", resource_tracker.resources[1].resource_handle)
-
-  def testTwoScopes(self):
-    resource_tracker1 = tracking.ResourceTracker()
-    with tracking.resource_tracker_scope(resource_tracker1):
-      dummy_resource1 = _DummyResource("test1")
-
-    resource_tracker2 = tracking.ResourceTracker()
-    with tracking.resource_tracker_scope(resource_tracker2):
-      dummy_resource2 = _DummyResource("test2")
-
-    self.assertEqual(1, len(resource_tracker1.resources))
-    self.assertEqual("test1", resource_tracker1.resources[0].resource_handle)
-    self.assertEqual(1, len(resource_tracker1.resources))
-    self.assertEqual("test2", resource_tracker2.resources[0].resource_handle)
-
-  def testNestedScopesScopes(self):
-    resource_tracker = tracking.ResourceTracker()
-    with tracking.resource_tracker_scope(resource_tracker):
-      resource_tracker1 = tracking.ResourceTracker()
-      with tracking.resource_tracker_scope(resource_tracker1):
-        dummy_resource1 = _DummyResource("test1")
-
-      resource_tracker2 = tracking.ResourceTracker()
-      with tracking.resource_tracker_scope(resource_tracker2):
-        dummy_resource2 = _DummyResource("test2")
-
-    self.assertEqual(1, len(resource_tracker1.resources))
-    self.assertEqual("test1", resource_tracker1.resources[0].resource_handle)
-    self.assertEqual(1, len(resource_tracker1.resources))
-    self.assertEqual("test2", resource_tracker2.resources[0].resource_handle)
-    self.assertEqual(2, len(resource_tracker.resources))
-    self.assertEqual("test1", resource_tracker.resources[0].resource_handle)
-    self.assertEqual("test2", resource_tracker.resources[1].resource_handle)
-
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py
deleted file mode 100644
index 1ba9720..0000000
--- a/tensorflow/python/training/checkpointable/util.py
+++ /dev/null

@@ -1,1448 +0,0 @@
-"""Utilities for saving/loading Checkpointable objects."""
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import abc
-import os
-import weakref
-
-from tensorflow.core.protobuf import checkpointable_object_graph_pb2
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.client import session as session_lib
-from tensorflow.python.eager import context
-from tensorflow.python.eager import def_function
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors_impl
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.lib.io import file_io
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_io_ops as io_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import saver as v1_saver_lib
-from tensorflow.python.training.checkpointable import base
-from tensorflow.python.training.checkpointable import data_structures
-from tensorflow.python.training.checkpointable import graph_view as graph_view_lib
-from tensorflow.python.training.checkpointable import object_identity
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.saving import functional_saver
-from tensorflow.python.training.saving import saveable_object_util
-from tensorflow.python.util import compat
-from tensorflow.python.util import deprecation
-from tensorflow.python.util import tf_contextlib
-from tensorflow.python.util.tf_export import tf_export
-
-
-class _CheckpointRestoreCoordinator(object):
-  """Holds the status of an object-based checkpoint load."""
-
-  def __init__(self, object_graph_proto, save_path, save_path_tensor,
-               restore_op_cache, graph_view):
-    """Specify the checkpoint being loaded.
-
-    Args:
-      object_graph_proto: The CheckpointableObjectGraph protocol buffer
-        associated with this checkpoint.
-      save_path: A string, the path to the checkpoint, as returned by
-        `tf.train.latest_checkpoint`.
-      save_path_tensor: A string `Tensor` which contains or will be fed the save
-        path.
-      restore_op_cache: A dictionary shared between
-        `_CheckpointRestoreCoordinator`s for the same Python objects, used to
-        look up restore ops by name to avoid re-creating them across multiple
-        `restore()` calls.
-      graph_view: A graph_view_lib.ObjectGraphView object for the restored
-        objects.
-    """
-    self.object_graph_proto = object_graph_proto
-    self.restore_uid = ops.uid()
-    # Maps from objects to lists of attributes which were in the checkpoint but
-    # not loaded into any object, for error checking.
-    self.unused_attributes = weakref.WeakKeyDictionary()
-    # Dictionary mapping from an id in the protocol buffer flat array to
-    # Checkpointable Python objects. This mapping may be deferred if a
-    # checkpoint is restored before all dependencies have been tracked. Uses
-    # weak references so that partial restorations don't create reference cycles
-    # (as objects with deferred dependencies will generally have references to
-    # this object).
-    self.object_by_proto_id = weakref.WeakValueDictionary()
-    # A set of all Python objects we've seen as dependencies, even if we didn't
-    # use them (for example because of inconsistent references when
-    # loading). Used to make status assertions fail when loading checkpoints
-    # that don't quite match.
-    self.all_python_objects = object_identity.ObjectIdentityWeakSet()
-    self.save_path_tensor = save_path_tensor
-    self.save_path_string = save_path
-    self.dtype_map = pywrap_tensorflow.NewCheckpointReader(
-        save_path).get_variable_to_dtype_map()
-    # A NewCheckpointReader for the most recent checkpoint, for streaming Python
-    # state restoration.
-    # When graph building, contains a list of ops to run to restore objects from
-    # this checkpoint.
-    self.restore_ops = []
-    self.restore_ops_by_name = restore_op_cache
-    self.graph_view = graph_view
-    self.new_restore_ops_callback = None
-    # A mapping from optimizer proto ids to lists of slot variables to be
-    # restored when the optimizer is tracked. Only includes slot variables whose
-    # regular variables have already been created, and only for optimizer
-    # objects which have not yet been created/tracked.
-    self.deferred_slot_restorations = {}
-    # A mapping from variable proto ids to lists of slot variables to be
-    # restored when the variable is created/tracked. These get shifted over to
-    # deferred_slot_restorations if the optimizer hasn't been created when that
-    # happens.
-    self.slot_restorations = {}
-    for node_index, node in enumerate(self.object_graph_proto.nodes):
-      for slot_reference in node.slot_variables:
-        # `node` refers to an `Optimizer`, since only these have slot variables.
-        self.slot_restorations.setdefault(
-            slot_reference.original_variable_node_id, []).append(
-                base._SlotVariableRestoration(  # pylint: disable=protected-access
-                    optimizer_id=node_index,
-                    slot_variable_id=slot_reference.slot_variable_node_id,
-                    slot_name=slot_reference.slot_name))
-
-  def new_restore_ops(self, new_ops):
-    self.restore_ops.extend(new_ops)
-    if self.new_restore_ops_callback:
-      self.new_restore_ops_callback(new_ops)  # pylint: disable=not-callable
-
-  def restore_saveables(self, tensor_saveables, python_saveables):
-    """Run or build restore operations for SaveableObjects.
-
-    Args:
-      tensor_saveables: `SaveableObject`s which correspond to Tensors.
-      python_saveables: `PythonStateSaveable`s which correspond to Python
-        values.
-
-    Returns:
-      When graph building, a list of restore operations, either cached or newly
-      created, to restore `tensor_saveables`.
-    """
-    restore_ops = []
-    # Eagerly run restorations for Python state.
-    reader = pywrap_tensorflow.NewCheckpointReader(
-        self.save_path_string)
-    for saveable in python_saveables:
-      spec_names = [spec.name for spec in saveable.specs]
-      saveable.python_restore(
-          [reader.get_tensor(name) for name in spec_names])
-
-    # If we have new SaveableObjects, extract and cache restore ops.
-    if tensor_saveables:
-      validated_saveables = saveable_object_util.validate_and_slice_inputs(
-          tensor_saveables)
-      validated_names = set(saveable.name for saveable in validated_saveables)
-      if set(tensor_saveables.keys()) != validated_names:
-        raise AssertionError(
-            ("Saveable keys changed when validating. Got back %s, was "
-             "expecting %s") % (tensor_saveables.keys(), validated_names))
-      new_restore_ops = functional_saver.restore_from_saveable_objects(
-          self.save_path_tensor, validated_saveables)
-      if not context.executing_eagerly():
-        restore_ops.extend(new_restore_ops)
-        for saveable, restore_op in zip(validated_saveables, new_restore_ops):
-          assert saveable.name not in self.restore_ops_by_name
-          self.restore_ops_by_name[saveable.name] = restore_op
-    return restore_ops
-
-
-class _NameBasedRestoreCoordinator(object):
-  """Keeps the status of a name-based checkpoint restore."""
-
-  def __init__(self, save_path, dtype_map=None):
-    self.save_path = save_path
-    self.dtype_map = dtype_map
-    self.unused_attributes = weakref.WeakKeyDictionary()
-    self.restore_uid = ops.uid()
-
-  def globally_named_object_attributes(self, checkpointable):
-    """Create globally named SaveableObjects from attributes.
-
-    If an object's attribute has no global name specified (default construction
-    for the SaveableObject factory), records the failure in
-    `self.unused_attributes` (which can then be used to make status assertions
-    fail; see `NameBasedSaverStatus`).
-
-    Args:
-      checkpointable: An object to save.
-
-    Yields:
-      SaveableObjects for `checkpointable`'s attributes.
-    """
-    for attribute_name, saveable_factory in (
-        checkpointable._gather_saveables_for_checkpoint().items()):  # pylint: disable=protected-access
-      if callable(saveable_factory):
-        try:
-          # This saveable object factory does not have a default name= argument,
-          # which means there's no way to save/restore it using a name-based
-          # checkpoint. Ignore the error now and make sure assert_consumed()
-          # fails.
-          saveable = saveable_factory()
-        except TypeError:
-          self.unused_attributes.setdefault(checkpointable, []).append(
-              attribute_name)
-          continue
-      else:
-        saveable = saveable_factory
-      names_to_saveables = saveable_object_util.op_list_to_dict(
-          [saveable],
-          convert_variable_to_tensor=False)
-      for name, op in names_to_saveables.items():
-        for saveable_object in saveable_object_util.saveable_objects_for_op(
-            op=op, name=name):
-          yield saveable_object
-
-  def eager_restore(self, checkpointable):
-    """Runs restore ops for `checkpointable`'s attributes."""
-    # When graph building, we don't add any restore ops to the graph until
-    # run_restore_ops/initialize_or_restore on the status object for name-based
-    # checkpoints.
-    assert context.executing_eagerly()
-    for saveable in self.globally_named_object_attributes(
-        checkpointable):
-      restored_tensors = []
-      tensor_missing = False
-      for spec in saveable.specs:
-        if spec.name in self.dtype_map:
-          with ops.device("cpu:0"):
-            restored, = io_ops.restore_v2(
-                prefix=self.save_path,
-                tensor_names=[spec.name],
-                shape_and_slices=[""],
-                dtypes=[self.dtype_map[spec.name]],
-                name="%s_checkpoint_read" % (spec.name,))
-          restored_tensors.append(array_ops.identity(restored))
-        else:
-          tensor_missing = True
-
-      if not tensor_missing:
-        # Ignores values missing from the checkpoint, as with object-based
-        # restore. Status assertions can be used to check exact matches,
-        # although it's unlikely to ever happen for name-based checkpoints.
-        saveable.restore(restored_tensors=restored_tensors,
-                         restored_shapes=None)
-
-
-# TODO(allenl): If this ends up in a public API, consider adding LINT.IfChange
-# or consolidating the implementation with get_variable.
-def _default_getter(name, shape, dtype, initializer=None,
-                    partition_info=None, **kwargs):
-  """A pared-down version of get_variable which does not reuse variables."""
-  dtype = dtypes.as_dtype(dtype)
-  shape_object = tensor_shape.as_shape(shape)
-  with ops.init_scope():
-    if initializer is None:
-      initializer, initializing_from_value = (
-          variable_scope._get_default_variable_store()._get_default_initializer(  # pylint: disable=protected-access
-              name=name, shape=shape_object, dtype=dtype))
-    else:
-      initializing_from_value = not callable(initializer)
-    # Same logic as get_variable
-    variable_dtype = dtype.base_dtype
-    if initializing_from_value:
-      if shape is not None:
-        raise ValueError("If initializer is a constant, do not specify shape.")
-      initial_value = initializer
-    else:
-      # Instantiate initializer if provided initializer is a type object.
-      if isinstance(initializer, type(init_ops.Initializer)):
-        initializer = initializer(dtype=dtype)
-      def initial_value():
-        return initializer(
-            shape_object.as_list(), dtype=dtype, partition_info=partition_info)
-    return variables.VariableV1(
-        initial_value=initial_value,
-        name=name,
-        dtype=variable_dtype,
-        use_resource=True,
-        **kwargs
-    )
-
-
-def add_variable(checkpointable, name, shape=None, dtype=dtypes.float32,
-                 initializer=None):
-  """Add a variable to a Checkpointable with no scope influence."""
-  return checkpointable._add_variable_with_custom_getter(  # pylint: disable=protected-access
-      name=name, shape=shape, dtype=dtype,
-      initializer=initializer, getter=_default_getter)
-
-
-def object_metadata(save_path):
-  """Retrieves information about the objects in a checkpoint.
-
-  Example usage:
-
-  ```python
-  object_graph = tf.contrib.checkpoint.object_metadata(
-      tf.train.latest_checkpoint(checkpoint_directory))
-  ckpt_variable_names = set()
-  for node in object_graph.nodes:
-    for attribute in node.attributes:
-      ckpt_variable_names.add(attribute.full_name)
-  ```
-
-  Args:
-    save_path: The path to the checkpoint, as returned by `save` or
-      `tf.train.latest_checkpoint`.
-  Returns:
-    A parsed `tf.contrib.checkpoint.CheckpointableObjectGraph` protocol buffer.
-  Raises:
-    ValueError: If an object graph was not found in the checkpoint.
-  """
-  reader = pywrap_tensorflow.NewCheckpointReader(save_path)
-  try:
-    object_graph_string = reader.get_tensor(
-        base.OBJECT_GRAPH_PROTO_KEY)
-  except errors_impl.NotFoundError:
-    raise ValueError(
-        ('The specified checkpoint "%s" does not appear to be object-based (it '
-         'is missing the key "%s"). Likely it was created with a name-based '
-         'saver and does not contain an object dependency graph.') % (
-             save_path, base.OBJECT_GRAPH_PROTO_KEY))
-  object_graph_proto = (
-      checkpointable_object_graph_pb2.CheckpointableObjectGraph())
-  object_graph_proto.ParseFromString(object_graph_string)
-  return object_graph_proto
-
-
-def list_objects(root_checkpointable):
-  """Traverse the object graph and list all accessible objects.
-
-  Looks for `Checkpointable` objects which are dependencies of
-  `root_checkpointable`. Includes slot variables only if the variable they are
-  slotting for and the optimizer are dependencies of `root_checkpointable`
-  (i.e. if they would be saved with a checkpoint).
-
-  Args:
-    root_checkpointable: A `Checkpointable` object whose dependencies should be
-      flattened.
-  Returns:
-    A flat list of objects.
-  """
-  return graph_view_lib.ObjectGraphView(root_checkpointable).list_objects()
-
-
-def gather_initializers(root_checkpointable):
-  """Traverse the object graph and find initialization ops.
-
-  Looks for `Checkpointable` objects which are dependencies of
-  `root_checkpointable` and which have an `initializer` property. Includes
-  initializers for slot variables only if the variable they are slotting for and
-  the optimizer are dependencies of `root_checkpointable` (i.e. if they would be
-  saved with a checkpoint).
-
-  Args:
-    root_checkpointable: A `Checkpointable` object to gather initializers for.
-  Returns:
-    A list of initialization ops.
-  """
-  checkpointable_objects = list_objects(root_checkpointable)
-  return [c.initializer for c in checkpointable_objects
-          if hasattr(c, "initializer") and c.initializer is not None]
-
-
-@tf_contextlib.contextmanager
-def capture_dependencies(template):
-  """Capture variables created within this scope as `Template` dependencies.
-
-  Requires that `template.variable_scope` is active.
-
-  This scope is intended as a compatibility measure, allowing a checkpointable
-  object to add dependencies on variables created in a block of code which is
-  not aware of object-based saving (and instead uses variable names
-  heavily). This is how `Template` objects add dependencies on variables and
-  sub-`Template`s. Where possible, use `tf.make_template` directly.
-
-  Args:
-    template: The `Template` object to register dependencies with.
-
-  Yields:
-    None (when used as a context manager).
-  """
-  name_prefix = template.variable_scope.name
-
-  def _checkpointable_custom_creator(next_creator, name, initial_value,
-                                     checkpointable_parent=None, **kwargs):
-    """A variable creation hook which adds Checkpointable dependencies.
-
-    Set for example during a `Template`'s first wrapped function
-    execution. Ensures that (a) `template` depends on any checkpointable
-    objects using their own `capture_dependencies` scope inside this scope which
-    create variables, and (b) that any variables not in a more deeply nested
-    scope are added as dependencies directly.
-
-    The `checkpointable_parent` argument is passed between custom creators but
-    ignored when the variable object itself is created. This argument indicates
-    (if not `None`) that a more deeply nested scope has already added the
-    variable as a dependency, and that parent scopes should add a dependency on
-    that object rather than on the variable directly.
-
-    Args:
-      next_creator: See `variable_scope.variable_creator_scope`; the next
-        creator in the chain.
-      name: The (full, scope-influenced) name of the variable. The `name_prefix`
-        itself is stripped for the purposes of object-based dependency tracking,
-        but scopes opened within this scope are respected.
-      initial_value: See `variable_scope.variable_creator_scope`. Taken
-        explicitly so the argument can be re-named and used with
-        `Checkpointable._add_variable_with_custom_getter`.
-      checkpointable_parent: If not None, a more deeply nested checkpointable
-        object and its name prefix which were passed to `capture_dependencies`
-        to add a dependency on (rather than depending on the variable directly).
-      **kwargs: Passed through to the next creator.
-
-    Returns:
-      The output of `next_creator`: the fetched/created variable object.
-    """
-    def _call_next_creator_renaming_initializer(initializer, **inner_kwargs):
-      inner_kwargs.pop("name")  # Ignored; this is the scope-stripped name which
-      # we don't want to propagate.
-      return next_creator(
-          initial_value=initializer,
-          name=name,
-          **inner_kwargs)
-    if name is not None and name.startswith(name_prefix):
-      scope_stripped_name = name[len(name_prefix) + 1:]
-      if not checkpointable_parent:
-        return template._add_variable_with_custom_getter(  # pylint: disable=protected-access
-            initializer=initial_value,
-            name=scope_stripped_name,
-            getter=_call_next_creator_renaming_initializer,
-            # Disable error checking for Checkpointable. Exceptions are instead
-            # raised if necessary when the object-based saver tries to
-            # save/restore the object.
-            overwrite=True,
-            checkpointable_parent=(template, name_prefix),
-            **kwargs)
-      else:
-        parent_object, parent_name_prefix = checkpointable_parent
-        template._track_checkpointable(  # pylint: disable=protected-access
-            parent_object,
-            name=parent_name_prefix[len(name_prefix) + 1:],
-            overwrite=True)
-    return next_creator(
-        name=name, initial_value=initial_value,
-        checkpointable_parent=(template, name_prefix), **kwargs)
-
-  with variable_scope.variable_creator_scope(_checkpointable_custom_creator):
-    yield
-
-
-class _LoadStatus(object):
-  """Abstract base for load status callbacks."""
-
-  @abc.abstractmethod
-  def assert_consumed(self):
-    """Raises an exception unless a non-trivial restoration has completed."""
-    pass
-
-  @abc.abstractmethod
-  def assert_existing_objects_matched(self):
-    """Raises an exception unless existing Python objects have been matched."""
-    pass
-
-  @abc.abstractmethod
-  def assert_nontrivial_match(self):
-    """Raises an exception if only the root object matched."""
-    pass
-
-  @abc.abstractmethod
-  def run_restore_ops(self, session=None):
-    """Runs restore ops from the checkpoint. Requires a valid checkpoint."""
-    pass
-
-  @abc.abstractmethod
-  def initialize_or_restore(self, session=None):
-    """Runs restore ops from the checkpoint, or initializes variables."""
-    pass
-
-
-def streaming_restore(status, session=None):
-  """When graph building, runs restore ops as soon as they come in.
-
-  Args:
-    status: A _LoadStatus objects from an object-based saver's
-      restore(). Streaming restore from name-based checkpoints is not currently
-      supported.
-    session: A session to run new restore ops in.
-  """
-  if context.executing_eagerly():
-    # Streaming restore is the default/only behavior when executing eagerly.
-    return
-  if session is None:
-    session = ops.get_default_session()
-  if isinstance(status, NameBasedSaverStatus):
-    raise NotImplementedError(
-        "Streaming restore not supported from name-based checkpoints. File a "
-        "feature request if this limitation bothers you.")
-  status.run_restore_ops(session=session)
-  # pylint: disable=protected-access
-  status._checkpoint.new_restore_ops_callback = (
-      lambda ops: session.run(ops, feed_dict=status._feed_dict))
-  # pylint: enable=protected-access
-
-
-class CheckpointLoadStatus(_LoadStatus):
-  """Checks the status of checkpoint loading and manages restore ops.
-
-  Returned from `Saver.restore`. Since `restore` may defer the loading of values
-  in the checkpoint which don't yet have corresponding Python objects,
-  `CheckpointLoadStatus` provides a callback to verify that checkpoint loading
-  is complete (`assert_consumed`).
-
-  When graph building, `restore` does not run restore ops itself since their
-  creation may be deferred. The `run_restore_ops` method must be called once all
-  Python objects with values to restore have been created and added to the
-  dependency graph (this does not necessarily have to be the whole checkpoint;
-  calling `run_restore_ops` while `assert_consumed` fails is supported and will
-  partially restore the checkpoint).
-
-  See `Saver.restore` for usage examples.
-  """
-
-  def __init__(self, checkpoint, feed_dict, graph_view):
-    self._checkpoint = checkpoint
-    self._feed_dict = feed_dict
-    self._graph_view = graph_view
-
-  def assert_consumed(self):
-    """Asserts that all objects in the checkpoint have been created/matched.
-
-    Returns:
-      `self` for chaining.
-    Raises:
-      AssertionError: If there are any Python objects in the dependency graph
-        which have not been restored from this checkpoint or a later `restore`,
-        or if there are any checkpointed values which have not been matched to
-        Python objects.
-    """
-    self.assert_existing_objects_matched()
-    for node_id, node in enumerate(self._checkpoint.object_graph_proto.nodes):
-      checkpointable = self._checkpoint.object_by_proto_id.get(node_id, None)
-      if checkpointable is None:
-        raise AssertionError("Unresolved object in checkpoint: %s" % (node,))
-    if self._checkpoint.slot_restorations:
-      # Sanity check; this collection should be clear if everything has been
-      # restored.
-      raise AssertionError("Unresolved slot restorations: %s" % (
-          self._checkpoint.slot_restorations,))
-    if self._checkpoint.unused_attributes:
-      raise AssertionError(
-          ("Unused attributes in these objects (the attributes exist in the "
-           "checkpoint but not in the objects): %s") % (
-               list(self._checkpoint.unused_attributes.items()),))
-    return self
-
-  def assert_existing_objects_matched(self):
-    """Asserts that checkpointable Python objects have been matched.
-
-    Note that this is a weaker assertion than `assert_consumed`. It will only
-    fail for existing Python objects which are (transitive) dependencies of the
-    root object and which do not have an entry in the checkpoint.
-
-    It will not fail, for example, if a `tf.keras.Layer` object has not yet been
-    built and so has not created any `tf.Variable` objects.
-
-    Returns:
-      `self` for chaining.
-
-    Raises:
-      AssertionError: If a Python object exists in the transitive dependencies
-        of the root object but does not have a value in the checkpoint.
-    """
-    for node_id, node in enumerate(self._checkpoint.object_graph_proto.nodes):
-      checkpointable = self._checkpoint.object_by_proto_id.get(node_id, None)
-      if (checkpointable is not None
-          and checkpointable._update_uid < self._checkpoint.restore_uid):  # pylint: disable=protected-access
-        raise AssertionError(
-            "Object not assigned a value from checkpoint: %s" % (node,))
-    for checkpointable_object in self._graph_view.list_objects():
-      # Remove data structures that do not contain any variables from
-      # restoration checks.
-      if (isinstance(checkpointable_object,
-                     data_structures.CheckpointableDataStructure) and
-          not checkpointable_object._checkpoint_dependencies):
-        continue
-      self._checkpoint.all_python_objects.add(checkpointable_object)
-    unused_python_objects = (
-        object_identity.ObjectIdentitySet(self._checkpoint.all_python_objects)
-        - object_identity.ObjectIdentitySet(
-            self._checkpoint.object_by_proto_id.values()))
-    if unused_python_objects:
-      raise AssertionError(
-          ("Some Python objects were not bound to checkpointed values, likely "
-           "due to changes in the Python program: %s")
-          % (list(unused_python_objects),))
-    return self
-
-  def assert_nontrivial_match(self):
-    """Raises an exception if only the root object matched."""
-    for checkpointable_object in self._graph_view.list_objects():
-      self._checkpoint.all_python_objects.add(checkpointable_object)
-    if len(self._checkpoint.object_by_proto_id) <= 1:
-      unused_python_objects = (
-          object_identity.ObjectIdentitySet(
-              self._checkpoint.all_python_objects)
-          - object_identity.ObjectIdentitySet(
-              self._checkpoint.object_by_proto_id.values()))
-      if unused_python_objects:
-        raise AssertionError(
-            ("Nothing except the root object matched a checkpointed value. "
-             "Typically this means that the checkpoint does not match the "
-             "Python program. The following objects have no matching "
-             "checkpointed value: %s") % (list(unused_python_objects),))
-      else:
-        raise AssertionError(
-            "Nothing to load. No dependencies have been added to %s yet." % (
-                self._graph_view.root,))
-    return self
-
-  def run_restore_ops(self, session=None):
-    """Run operations to restore objects in the dependency graph."""
-    if context.executing_eagerly():
-      return  # Run eagerly
-    if session is None:
-      session = ops.get_default_session()
-    session.run(self._checkpoint.restore_ops, feed_dict=self._feed_dict)
-
-  def initialize_or_restore(self, session=None):
-    """Run operations to initialize or restore objects in the dependency graph.
-
-    Any objects in the dependency graph which have initializers but are not in
-    the checkpoint will have those initializers run, unless those variables are
-    being restored by a later call to `tf.train.Checkpoint.restore()`.
-
-    This method has a sibling in `InitializationOnlyStatus` which instead
-    initializes variables. That type is returned if no checkpoint is specified
-    in `Saver.restore`.
-
-    Args:
-      session: The session to run init/restore ops in. If `None`, uses the
-        default session.
-    """
-    if context.executing_eagerly():
-      return  # Initialization and restoration ops are run eagerly
-    if session is None:
-      session = ops.get_default_session()
-    all_objects = self._graph_view.list_objects()
-    already_initialized_objects = object_identity.ObjectIdentitySet(
-        self._checkpoint.object_by_proto_id.values())
-    initializers_for_non_restored_variables = [
-        c.initializer for c in all_objects
-        if hasattr(c, "initializer")
-        and c not in already_initialized_objects
-        and (getattr(c, "_update_uid", self._checkpoint.restore_uid - 1)
-             < self._checkpoint.restore_uid)]
-    self.run_restore_ops(session=session)
-    session.run(initializers_for_non_restored_variables)
-
-
-class InitializationOnlyStatus(_LoadStatus):
-  """Returned from `Saver.restore` when no checkpoint has been specified.
-
-  Objects of this type have the same `assert_consumed` method as
-  `CheckpointLoadStatus`, but it always fails. However,
-  `initialize_or_restore` works on objects of both types, and will
-  initialize variables in `InitializationOnlyStatus` objects or restore them
-  otherwise.
-  """
-
-  def __init__(self, graph_view, restore_uid):
-    self._restore_uid = restore_uid
-    self._graph_view = graph_view
-
-  def assert_consumed(self):
-    """Assertion for consistency with `CheckpointLoadStatus`. Always fails."""
-    raise AssertionError(
-        "No checkpoint specified (save_path=None); nothing is being restored.")
-
-  def assert_existing_objects_matched(self):
-    """Assertion for consistency with `CheckpointLoadStatus`. Always fails."""
-    raise AssertionError(
-        "No checkpoint specified (save_path=None); nothing is being restored.")
-
-  def assert_nontrivial_match(self):
-    """Assertion for consistency with `CheckpointLoadStatus`. Always fails."""
-    raise AssertionError(
-        "No checkpoint specified (save_path=None); nothing is being restored.")
-
-  def run_restore_ops(self, session=None):
-    """For consistency with `CheckpointLoadStatus`.
-
-    Use `initialize_or_restore` for initializing if no checkpoint was passed
-    to `Saver.restore` and restoring otherwise.
-
-    Args:
-      session: Not used.
-    """
-    raise AssertionError(
-        "No checkpoint specified, so no restore ops are available "
-        "(save_path=None to Saver.restore).")
-
-  def initialize_or_restore(self, session=None):
-    """Runs initialization ops for variables.
-
-    Objects which would be saved by `Saver.save` will be initialized, unless
-    those variables are being restored by a later call to
-    `tf.train.Checkpoint.restore()`.
-
-    This method does nothing when executing eagerly (initializers get run
-    eagerly).
-
-    Args:
-      session: The session to run initialization ops in. If `None`, uses the
-        default session.
-    """
-    if context.executing_eagerly():
-      return  # run eagerly
-    if session is None:
-      session = ops.get_default_session()
-    checkpointable_objects = self._graph_view.list_objects()
-    initializers = [
-        c.initializer for c in checkpointable_objects
-        if hasattr(c, "initializer") and c.initializer is not None
-        and (getattr(c, "_update_uid", self._restore_uid - 1)
-             < self._restore_uid)]
-    session.run(initializers)
-
-
-_DEPRECATED_RESTORE_INSTRUCTIONS = (
-    "Restoring a name-based tf.train.Saver checkpoint using the object-based "
-    "restore API. This mode uses global names to match variables, and so is "
-    "somewhat fragile. It also adds new restore ops to the graph each time it "
-    "is called when graph building. Prefer re-encoding training checkpoints in "
-    "the object-based format: run save() on the object-based saver (the same "
-    "one this message is coming from) and use that checkpoint in the future.")
-
-
-class NameBasedSaverStatus(_LoadStatus):
-  """Status for loading a name-based training checkpoint."""
-
-  # Ideally this deprecation decorator would be on the class, but that
-  # interferes with isinstance checks.
-  @deprecation.deprecated(
-      date=None, instructions=_DEPRECATED_RESTORE_INSTRUCTIONS)
-  def __init__(self, checkpoint, graph_view):
-    self._checkpoint = checkpoint
-    self._graph_view = graph_view
-
-  def assert_consumed(self):
-    """Raises an exception if any variables/objects are unmatched."""
-    unused_attributes = dict(self._checkpoint.unused_attributes)
-    if unused_attributes:
-      raise AssertionError(
-          "Some objects had attributes which were not restored: %s"
-          % (unused_attributes,))
-    for checkpointable in self._graph_view.list_objects():
-      # pylint: disable=protected-access
-      checkpointable._maybe_initialize_checkpointable()
-      if checkpointable._update_uid < self._checkpoint.restore_uid:
-        raise AssertionError("Object not restored: %s" % (checkpointable,))
-      # pylint: enable=protected-access
-    return self
-
-  def assert_existing_objects_matched(self):
-    """Raises an exception if currently created objects are unmatched."""
-    # For name-based checkpoints there's no object information in the
-    # checkpoint, so there's no distinction between
-    # assert_existing_objects_matched and assert_consumed (and both are less
-    # useful since we don't touch Python objects or Python state).
-    return self.assert_consumed()
-
-  def assert_nontrivial_match(self):
-    """Raises an exception if currently created objects are unmatched."""
-    # For name-based checkpoints there's no object information in the
-    # checkpoint, so there's no distinction between
-    # assert_nontrivial_match and assert_consumed (and both are less
-    # useful since we don't touch Python objects or Python state).
-    return self.assert_consumed()
-
-  def _gather_saveable_objects(self):
-    """Walk the object graph, using global names for SaveableObjects."""
-    objects = self._graph_view.list_objects()
-    saveable_objects = []
-    for checkpointable in objects:
-      # pylint: disable=protected-access
-      checkpointable._maybe_initialize_checkpointable()
-      if checkpointable._update_uid < self._checkpoint.restore_uid:
-        checkpointable._update_uid = self._checkpoint.restore_uid
-      else:
-        continue
-      # pylint: enable=protected-access
-      saveable_objects.extend(
-          self._checkpoint.globally_named_object_attributes(
-              checkpointable))
-    return saveable_objects
-
-  def run_restore_ops(self, session=None):
-    """Load the name-based training checkpoint using a new `tf.train.Saver`."""
-    if context.executing_eagerly():
-      return  # Nothing to do, variables are restored on creation.
-    if session is None:
-      session = ops.get_default_session()
-    with ops.device("/cpu:0"):
-      saveables = self._gather_saveable_objects()
-      v1_saver_lib.Saver(saveables).restore(
-          sess=session, save_path=self._checkpoint.save_path)
-
-  def initialize_or_restore(self, session=None):
-    """Alias for `run_restore_ops`."""
-    self.run_restore_ops(session=session)
-
-
-class _SessionWithFeedDictAdditions(session_lib.SessionInterface):
-  """Pretends to be a session, inserts extra feeds on run()."""
-
-  def __init__(self, session, feed_additions):
-    self._wrapped_session = session
-    self._feed_additions = feed_additions
-
-  def run(self, fetches, feed_dict=None, **kwargs):
-    if feed_dict is None:
-      feed_dict = {}
-    else:
-      feed_dict = feed_dict.copy()
-    feed_dict.update(self._feed_additions)
-    return self._wrapped_session.run(
-        fetches=fetches, feed_dict=feed_dict, **kwargs)
-
-
-class CheckpointableSaver(object):
-  """Saves and restores a `Checkpointable` object and its dependencies.
-
-  See `Checkpointable` for details of dependency management. `Saver` wraps
-  `tf.train.Saver` for saving, including extra information about the graph of
-  dependencies between Python objects. When restoring, it uses this information
-  about the save-time dependency graph to more robustly match objects with their
-  checkpointed values. When executing eagerly, it supports restoring variables
-  on object creation (see `Saver.restore`).
-
-  Values in a checkpoint are mapped to `Checkpointable` Python objects
-  (`Variable`s, `Optimizer`s, `Layer`s) based on the names provided when the
-  checkpoint was written. To avoid breaking existing checkpoints when modifying
-  a class, dependency names (the names of attributes to which `Checkpointable`
-  objects are assigned) may not change. These names are local to objects, in
-  contrast to the `Variable.name`-based save/restore from `tf.train.Saver`, and
-  so allow additional program transformations.
-  """
-
-  def __init__(self, graph_view):
-    """Configure saving.
-
-    Args:
-      graph_view: A `GraphView` object containing a description of the object
-        graph to save.
-    """
-    # The file prefix placeholder is created lazily when graph building (and not
-    # at all when executing eagerly) to avoid creating ops in the constructor
-    # (when they may never be necessary).
-    self._file_prefix_placeholder = None
-
-    # Op caching for save
-    self._object_graph_feed_tensor = None
-    self._last_save_object_graph = None
-    self._file_prefix_feed_tensor = None
-    self._cached_save_operation = None
-
-    # Op caching for restore, shared between _CheckpointRestoreCoordinators
-    self._restore_op_cache = {}
-    self._graph_view = graph_view
-
-  def _gather_saveables(
-      self, object_graph_tensor=None):
-    """Wraps _serialize_object_graph to include the object graph proto."""
-    (named_saveable_objects, graph_proto,
-     feed_additions) = self._graph_view.serialize_object_graph()
-    if object_graph_tensor is None:
-      with ops.device("/cpu:0"):
-        object_graph_tensor = constant_op.constant(
-            graph_proto.SerializeToString(), dtype=dtypes.string)
-    else:
-      feed_additions.update(
-          {object_graph_tensor: graph_proto.SerializeToString()})
-    assert base.OBJECT_GRAPH_PROTO_KEY not in named_saveable_objects
-    named_saveable_objects.append(
-        base.NoRestoreSaveable(
-            tensor=object_graph_tensor,
-            name=base.OBJECT_GRAPH_PROTO_KEY))
-    return named_saveable_objects, graph_proto, feed_additions
-
-  def _save_cached_when_graph_building(
-      self,
-      file_prefix,
-      object_graph_tensor=None):
-    """Create or retrieve save ops.
-
-    Args:
-      file_prefix: The prefix for saved checkpoint files.
-      object_graph_tensor: A `Tensor` to which the current object graph will be
-        fed.
-
-    Returns:
-      A two-element tuple with a filename tensor and a feed_dict of tensors to
-      feed when running it (if graph building). The feed dict contains the
-      current object graph and any Python state to be saved in the
-      checkpoint. When executing eagerly only the first argument is meaningful.
-    """
-    (named_saveable_objects, graph_proto,
-     feed_additions) = self._gather_saveables(
-         object_graph_tensor=object_graph_tensor)
-    if (self._last_save_object_graph != graph_proto
-        # When executing eagerly, we need to re-create SaveableObjects each time
-        # save() is called so they pick up new Tensors passed to their
-        # constructors. That means the Saver needs to be copied with a new
-        # var_list.
-        or context.executing_eagerly()):
-      saver = functional_saver.Saver(named_saveable_objects)
-      with ops.device("/cpu:0"):
-        self._cached_save_operation = saver.save(file_prefix)
-      self._last_save_object_graph = graph_proto
-    return self._cached_save_operation, feed_additions
-
-  def save(self, file_prefix, checkpoint_number=None, session=None):
-    """Save a training checkpoint.
-
-    The saved checkpoint includes variables created by this object and any
-    Checkpointable objects it depends on at the time `Saver.save()` is called.
-
-    Args:
-      file_prefix: A prefix to use for the checkpoint filenames
-        (/path/to/directory/and_a_prefix). Names are generated based on this
-        prefix and `checkpoint_number`, if provided.
-      checkpoint_number: An integer variable or Tensor, used to number
-        checkpoints. Typically this value is saved along with other variables in
-        training checkpoints, which will happen automatically if it was created
-        by `root_checkpointable` or one of its dependencies (via
-        `Checkpointable._add_variable`).
-      session: The session to evaluate variables in. Ignored when executing
-        eagerly. If not provided when graph building, the default session is
-        used.
-
-    Returns:
-      The full path to the checkpoint.
-    """
-    feed_dict = {}
-    graph_building = not context.executing_eagerly()
-    if checkpoint_number:
-      file_prefix = "%s-%d" % (file_prefix, checkpoint_number)
-    if graph_building:
-      if self._object_graph_feed_tensor is None:
-        with ops.device("/cpu:0"):
-          self._object_graph_feed_tensor = constant_op.constant(
-              "", dtype=dtypes.string)
-          self._file_prefix_feed_tensor = constant_op.constant(
-              "", dtype=dtypes.string)
-      object_graph_tensor = self._object_graph_feed_tensor
-      file_prefix_tensor = self._file_prefix_feed_tensor
-      feed_dict[file_prefix_tensor] = file_prefix
-    else:
-      with ops.device("/cpu:0"):
-        file_prefix_tensor = constant_op.constant(
-            file_prefix, dtype=dtypes.string)
-      object_graph_tensor = None
-
-    file_io.recursive_create_dir(os.path.dirname(file_prefix))
-    save_path, new_feed_additions = self._save_cached_when_graph_building(
-        file_prefix=file_prefix_tensor,
-        object_graph_tensor=object_graph_tensor)
-    if new_feed_additions:
-      feed_dict.update(new_feed_additions)
-    if not graph_building:
-      session = None
-    elif session is None:
-      session = ops.get_default_session()
-
-    if session:
-      save_path = session.run(save_path, feed_dict=feed_dict)
-    else:
-      save_path = save_path.numpy()
-    return save_path
-
-  def restore(self, save_path):
-    """Restore a training checkpoint.
-
-    Restores `root_checkpointable` and any objects that it tracks
-    (transitive). Either assigns values immediately if variables to restore have
-    been created already, or defers restoration until the variables are
-    created. Dependencies added to the `root_checkpointable` passed to the
-    constructor after this call will be matched if they have a corresponding
-    object in the checkpoint.
-
-    When building a graph, restorations are added to the graph but not run.
-
-    To disallow deferred loading, assert immediately that all checkpointed
-    variables have been matched to variable objects:
-
-    ```python
-    saver = Saver(root)
-    saver.restore(path).assert_consumed()
-    ```
-
-    An exception will be raised unless every object was matched and its
-    variables already exist.
-
-    When graph building, `assert_consumed()` indicates that all of the restore
-    ops which will be created for this checkpoint have been created. They can be
-    run via the `run_restore_ops()` function of the status object:
-
-    ```python
-    saver.restore(path).assert_consumed().run_restore_ops()
-    ```
-
-    If the checkpoint has not been consumed completely, then the list of restore
-    ops will grow as more objects are added to the dependency graph.
-
-    Name-based `tf.train.Saver` checkpoints can be loaded using this
-    method. There is no deferred loading, and names are used to match
-    variables. No restore ops are created/run until `run_restore_ops()` or
-    `initialize_or_restore()` are called on the returned status object, even
-    when executing eagerly. Re-encode name-based checkpoints using this
-    object-based `Saver.save` as soon as possible.
-
-    Args:
-      save_path: The path to the checkpoint, as returned by `save` or
-        `tf.train.latest_checkpoint`. If None (as when there is no latest
-        checkpoint for `tf.train.latest_checkpoint` to return), returns an
-        object which may run initializers for objects in the dependency
-        graph. If the checkpoint was written by the name-based `tf.train.Saver`,
-        names are used to match variables.
-
-    Returns:
-      A load status object, which can be used to make assertions about the
-      status of checkpoint restoration and run initialization/restore ops
-      (of type `CheckpointLoadStatus`, or `InitializationOnlyStatus` if
-      `save_path` is `None`).
-
-      If `save_path` points to a name-based checkpoint, a `NameBasedSaverStatus`
-      object is returned which runs restore ops from a name-based saver.
-    """
-    if save_path is None:
-      return InitializationOnlyStatus(self._graph_view, ops.uid())
-    reader = pywrap_tensorflow.NewCheckpointReader(save_path)
-    graph_building = not context.executing_eagerly()
-    if graph_building:
-      dtype_map = None
-    else:
-      dtype_map = reader.get_variable_to_dtype_map()
-    try:
-      object_graph_string = reader.get_tensor(
-          base.OBJECT_GRAPH_PROTO_KEY)
-    except errors_impl.NotFoundError:
-      # The object graph proto does not exist in this checkpoint. Try the
-      # name-based compatibility mode.
-      restore_coordinator = _NameBasedRestoreCoordinator(
-          save_path=save_path, dtype_map=dtype_map)
-      if not graph_building:
-        for existing_checkpointable in self._graph_view.list_objects():
-          # pylint: disable=protected-access
-          existing_checkpointable._maybe_initialize_checkpointable()
-          existing_checkpointable._name_based_restores.add(restore_coordinator)
-          existing_checkpointable._name_based_attribute_restore(
-              restore_coordinator)
-          # pylint: enable=protected-access
-      return NameBasedSaverStatus(
-          restore_coordinator, graph_view=self._graph_view)
-
-    if graph_building:
-      if self._file_prefix_placeholder is None:
-        with ops.device("/cpu:0"):
-          self._file_prefix_placeholder = constant_op.constant("model")
-      file_prefix_tensor = self._file_prefix_placeholder
-      file_prefix_feed_dict = {self._file_prefix_placeholder: save_path}
-    else:
-      with ops.device("/cpu:0"):
-        file_prefix_tensor = constant_op.constant(save_path)
-      file_prefix_feed_dict = None
-    object_graph_proto = (
-        checkpointable_object_graph_pb2.CheckpointableObjectGraph())
-    object_graph_proto.ParseFromString(object_graph_string)
-    checkpoint = _CheckpointRestoreCoordinator(
-        object_graph_proto=object_graph_proto,
-        save_path=save_path,
-        save_path_tensor=file_prefix_tensor,
-        restore_op_cache=self._restore_op_cache,
-        graph_view=self._graph_view)
-    base.CheckpointPosition(checkpoint=checkpoint, proto_id=0).restore(
-        self._graph_view.root)
-    load_status = CheckpointLoadStatus(
-        checkpoint,
-        graph_view=self._graph_view,
-        feed_dict=file_prefix_feed_dict)
-    return load_status
-
-
-def frozen_saver(root_checkpointable):
-  """Creates a static `tf.train.Saver` from a checkpointable object.
-
-  The returned `Saver` saves object-based checkpoints, but these checkpoints
-  will no longer reflect structural changes to the object graph, only changes to
-  the values of `Variable`s added as dependencies of the root object before
-  `freeze` was called.
-
-  `restore` works on the returned `Saver`, but requires that the object graph of
-  the checkpoint being loaded exactly matches the object graph when `freeze` was
-  called. This is in contrast the object-based restore performed by
-  `tf.train.Checkpoint` which attempts a fuzzy matching between a checkpoint's
-  object graph and the current Python object graph.
-
-  Args:
-    root_checkpointable: A checkpointable object to save.
-
-  Returns:
-    A saver which saves object-based checkpoints for the object graph frozen at
-    the time `frozen_saver` was called.
-  """
-  named_saveable_objects = graph_view_lib.ObjectGraphView(
-      root_checkpointable).frozen_saveable_objects()
-  return functional_saver.Saver(named_saveable_objects)
-
-
-def saver_with_op_caching(obj):
-  """A CheckpointableSaver with a SaveableObject cache when graph building."""
-  if context.executing_eagerly():
-    saveables_cache = None
-  else:
-    saveables_cache = object_identity.ObjectIdentityWeakKeyDictionary()
-  return CheckpointableSaver(graph_view_lib.ObjectGraphView(
-      weakref.ref(obj),
-      saveables_cache=saveables_cache))
-
-
-@tf_export("train.Checkpoint")
-class Checkpoint(tracking.AutoCheckpointable):
-  """Groups checkpointable objects, saving and restoring them.
-
-  `Checkpoint`'s constructor accepts keyword arguments whose values are types
-  that contain checkpointable state, such as `tf.train.Optimizer`
-  implementations, `tf.Variable`, `tf.keras.Layer` implementations, or
-  `tf.keras.Model` implementations. It saves these values with a checkpoint, and
-  maintains a `save_counter` for numbering checkpoints.
-
-  Example usage when graph building:
-
-  ```python
-  import tensorflow as tf
-  import os
-
-  checkpoint_directory = "/tmp/training_checkpoints"
-  checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-
-  checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
-  status = checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory))
-  train_op = optimizer.minimize( ... )
-  status.assert_consumed()  # Optional sanity checks.
-  with tf.Session() as session:
-    # Use the Session to restore variables, or initialize them if
-    # tf.train.latest_checkpoint returned None.
-    status.initialize_or_restore(session)
-    for _ in range(num_training_steps):
-      session.run(train_op)
-    checkpoint.save(file_prefix=checkpoint_prefix)
-  ```
-
-  Example usage with eager execution enabled:
-
-  ```python
-  import tensorflow as tf
-  import os
-
-  tf.enable_eager_execution()
-
-  checkpoint_directory = "/tmp/training_checkpoints"
-  checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-
-  checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
-  status = checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory))
-  for _ in range(num_training_steps):
-    optimizer.minimize( ... )  # Variables will be restored on creation.
-  status.assert_consumed()  # Optional sanity checks.
-  checkpoint.save(file_prefix=checkpoint_prefix)
-  ```
-
-  `Checkpoint.save` and `Checkpoint.restore` write and read object-based
-  checkpoints, in contrast to `tf.train.Saver` which writes and reads
-  `variable.name` based checkpoints. Object-based checkpointing saves a graph of
-  dependencies between Python objects (`Layer`s, `Optimizer`s, `Variable`s,
-  etc.) with named edges, and this graph is used to match variables when
-  restoring a checkpoint. It can be more robust to changes in the Python
-  program, and helps to support restore-on-create for variables when executing
-  eagerly. Prefer `tf.train.Checkpoint` over `tf.train.Saver` for new code.
-
-  `Checkpoint` objects have dependencies on the objects passed as keyword
-  arguments to their constructors, and each dependency is given a name that is
-  identical to the name of the keyword argument for which it was created.
-  TensorFlow classes like `Layer`s and `Optimizer`s will automatically add
-  dependencies on their variables (e.g. "kernel" and "bias" for
-  `tf.keras.layers.Dense`). Inheriting from `tf.keras.Model` makes managing
-  dependencies easy in user-defined classes, since `Model` hooks into attribute
-  assignment. For example:
-
-  ```python
-  class Regress(tf.keras.Model):
-
-    def __init__(self):
-      super(Regress, self).__init__()
-      self.input_transform = tf.keras.layers.Dense(10)
-      # ...
-
-    def call(self, inputs):
-      x = self.input_transform(inputs)
-      # ...
-  ```
-
-  This `Model` has a dependency named "input_transform" on its `Dense` layer,
-  which in turn depends on its variables. As a result, saving an instance of
-  `Regress` using `tf.train.Checkpoint` will also save all the variables created
-  by the `Dense` layer.
-
-  Attributes:
-    save_counter: Incremented when `save()` is called. Used to number
-      checkpoints.
-  """
-
-  def __init__(self, **kwargs):
-    """Group objects into a training checkpoint.
-
-    Args:
-      **kwargs: Keyword arguments are set as attributes of this object, and are
-        saved with the checkpoint. Values must be checkpointable objects.
-    Raises:
-      ValueError: If objects in `kwargs` are not checkpointable.
-    """
-    super(Checkpoint, self).__init__()
-    for k, v in sorted(kwargs.items(), key=lambda item: item[0]):
-      if not isinstance(v, (base.Checkpointable, def_function.Function)):
-        raise ValueError(
-            ("`Checkpoint` was expecting a checkpointable object (an object "
-             "derived from `CheckpointableBase`), got %s. If you believe this "
-             "object should be checkpointable (i.e. it is part of the "
-             "TensorFlow Python API and manages state), please open an issue.")
-            % (v,))
-      setattr(self, k, v)
-    self._save_counter = None  # Created lazily for restore-on-create.
-    self._save_assign_op = None
-    self._saver = saver_with_op_caching(self)
-
-  def _maybe_create_save_counter(self):
-    """Create a save counter if it does not yet exist."""
-    if self._save_counter is None:
-      # Initialized to 0 and incremented before saving.
-      with ops.device("/cpu:0"):
-        # add_variable creates a dependency named "save_counter"; NoDependency
-        # prevents creating a second dependency named "_save_counter".
-        self._save_counter = data_structures.NoDependency(
-            add_variable(self, name="save_counter", initializer=0,
-                         dtype=dtypes.int64))
-
-  def write(self, file_prefix, session=None):
-    """Writes a training checkpoint.
-
-    The checkpoint includes variables created by this object and any
-    checkpointable objects it depends on at the time `Checkpoint.write()` is
-    called.
-
-    `write` does not number checkpoints, increment `save_counter`, or update the
-    metadata used by `tf.train.latest_checkpoint`. It is primarily intended for
-    use by higher level checkpoint management utilities. `save` provides a very
-    basic implementation of these features.
-
-    Args:
-      file_prefix: A prefix to use for the checkpoint filenames
-        (/path/to/directory/and_a_prefix).
-      session: The session to evaluate variables in. Ignored when executing
-        eagerly. If not provided when graph building, the default session is
-        used.
-
-    Returns:
-      The full path to the checkpoint (i.e. `file_prefix`).
-    """
-    return compat.as_str(self._saver.save(
-        file_prefix=file_prefix,
-        session=session))
-
-  @property
-  def save_counter(self):
-    """An integer variable which starts at zero and is incremented on save.
-
-    Used to number checkpoints.
-
-    Returns:
-      The save counter variable.
-    """
-    self._maybe_create_save_counter()
-    return self._save_counter
-
-  def save(self, file_prefix, session=None):
-    """Saves a training checkpoint and provides basic checkpoint management.
-
-    The saved checkpoint includes variables created by this object and any
-    checkpointable objects it depends on at the time `Checkpoint.save()` is
-    called.
-
-    `save` is a basic convenience wrapper around the `write` method,
-    sequentially numbering checkpoints using `save_counter` and updating the
-    metadata used by `tf.train.latest_checkpoint`. More advanced checkpoint
-    management, for example garbage collection and custom numbering, may be
-    provided by other utilities which also wrap `write`
-    (`tf.contrib.checkpoint.CheckpointManager` for example).
-
-    Args:
-      file_prefix: A prefix to use for the checkpoint filenames
-        (/path/to/directory/and_a_prefix). Names are generated based on this
-        prefix and `Checkpoint.save_counter`.
-      session: The session to evaluate variables in. Ignored when executing
-        eagerly. If not provided when graph building, the default session is
-        used.
-
-    Returns:
-      The full path to the checkpoint.
-    """
-    graph_building = not context.executing_eagerly()
-    if graph_building:
-      if session is None:
-        session = ops.get_default_session()
-      if self._save_counter is None:
-        # When graph building, if this is a new save counter variable then it
-        # needs to be initialized before assign_add. This is only an issue if
-        # restore() has not been called first.
-        session.run(self.save_counter.initializer)
-    if not graph_building or self._save_assign_op is None:
-      with ops.colocate_with(self.save_counter):
-        assign_op = self.save_counter.assign_add(1, read_value=True)
-      if graph_building:
-        self._save_assign_op = data_structures.NoDependency(assign_op)
-    if graph_building:
-      checkpoint_number = session.run(self._save_assign_op)
-    else:
-      checkpoint_number = assign_op.numpy()
-    file_path = self.write("%s-%d" % (file_prefix, checkpoint_number),
-                           session=session)
-    checkpoint_management.update_checkpoint_state_internal(
-        save_dir=os.path.dirname(file_prefix),
-        model_checkpoint_path=file_path,
-        all_model_checkpoint_paths=[file_path])
-    return file_path
-
-  def restore(self, save_path):
-    """Restore a training checkpoint.
-
-    Restores this `Checkpoint` and any objects it depends on.
-
-    When executing eagerly, either assigns values immediately if variables to
-    restore have been created already, or defers restoration until the variables
-    are created. Dependencies added after this call will be matched if they have
-    a corresponding object in the checkpoint (the restore request will queue in
-    any checkpointable object waiting for the expected dependency to be added).
-
-    When graph building, restoration ops are added to the graph but not run
-    immediately.
-
-    To ensure that loading is complete and no more assignments will take place,
-    use the `assert_consumed()` method of the status object returned by
-    `restore`:
-
-    ```python
-    checkpoint = tf.train.Checkpoint( ... )
-    checkpoint.restore(path).assert_consumed()
-    ```
-
-    An exception will be raised if any Python objects in the dependency graph
-    were not found in the checkpoint, or if any checkpointed values do not have
-    a matching Python object.
-
-    When graph building, `assert_consumed()` indicates that all of the restore
-    ops that will be created for this checkpoint have been created. They can be
-    run via the `run_restore_ops()` method of the status object:
-
-    ```python
-    checkpoint.restore(path).assert_consumed().run_restore_ops()
-    ```
-
-    If the checkpoint has not been consumed completely, then the list of restore
-    ops will grow as more objects are added to the dependency graph.
-
-    Name-based `tf.train.Saver` checkpoints can be loaded using this
-    method. Names are used to match variables. No restore ops are created/run
-    until `run_restore_ops()` or `initialize_or_restore()` are called on the
-    returned status object when graph building, but there is restore-on-creation
-    when executing eagerly. Re-encode name-based checkpoints using
-    `tf.train.Checkpoint.save` as soon as possible.
-
-    Args:
-      save_path: The path to the checkpoint, as returned by `save` or
-        `tf.train.latest_checkpoint`. If None (as when there is no latest
-        checkpoint for `tf.train.latest_checkpoint` to return), returns an
-        object which may run initializers for objects in the dependency
-        graph. If the checkpoint was written by the name-based `tf.train.Saver`,
-        names are used to match variables.
-
-    Returns:
-      A load status object, which can be used to make assertions about the
-      status of a checkpoint restoration and run initialization/restore ops.
-
-      The returned status object has the following methods:
-
-      * `assert_consumed()`:
-          Raises an exception if any variables/objects are unmatched: either
-          checkpointed values which don't have a matching Python object or
-          Python objects in the dependency graph with no values in the
-          checkpoint. This method returns the status object, and so may be
-          chained with `initialize_or_restore` or `run_restore_ops`.
-
-      * `assert_existing_objects_matched()`:
-          Raises an exception if any existing Python objects in the dependency
-          graph are unmatched. Unlike `assert_consumed`, this assertion will
-          pass if values in the checkpoint have no corresponding Python
-          objects. For example a `tf.keras.Layer` object which has not yet been
-          built, and so has not created any variables, will pass this assertion
-          but fail `assert_consumed`. Useful when loading part of a larger
-          checkpoint into a new Python program, e.g. a training checkpoint with
-          a `tf.train.Optimizer` was saved but only the state required for
-          inference is being loaded. This method returns the status object, and
-          so may be chained with `initialize_or_restore` or `run_restore_ops`.
-
-      * `assert_nontrivial_match()`: Asserts that something aside from the root
-          object was matched. This is a very weak assertion, but is useful for
-          sanity checking in library code where objects may exist in the
-          checkpoint which haven't been created in Python and some Python
-          objects may not have a checkpointed value.
-
-      * `initialize_or_restore(session=None)`:
-          When graph building, runs variable initializers if `save_path` is
-          `None`, but otherwise runs restore operations. If no `session` is
-          explicitly specified, the default session is used. No effect when
-          executing eagerly (variables are initialized or restored eagerly).
-
-      * `run_restore_ops(session=None)`:
-          When graph building, runs restore operations. If no `session` is
-          explicitly specified, the default session is used. No effect when
-          executing eagerly (restore operations are run eagerly). May only be
-          called when `save_path` is not `None`.
-    """
-    status = self._saver.restore(save_path=save_path)
-    # Create the save counter now so it gets initialized with other variables
-    # when graph building. Creating it earlier would lead to double
-    # initialization when executing eagerly.
-    self._maybe_create_save_counter()
-    return status

diff --git a/tensorflow/python/training/checkpointable/util_test.py b/tensorflow/python/training/checkpointable/util_test.py
deleted file mode 100644
index 4f9abcc..0000000
--- a/tensorflow/python/training/checkpointable/util_test.py
+++ /dev/null

@@ -1,1550 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-import json
-import os
-
-from absl.testing import parameterized
-import six
-
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.eager import backprop
-from tensorflow.python.eager import context
-from tensorflow.python.eager import def_function
-from tensorflow.python.eager import test
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
-from tensorflow.python.keras.engine import sequential
-from tensorflow.python.keras.engine import training
-from tensorflow.python.keras.layers import core
-from tensorflow.python.keras.optimizer_v2 import adam
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import template
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables as variables_lib
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.training import training_util
-from tensorflow.python.training.checkpointable import base
-from tensorflow.python.training.checkpointable import graph_view
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
-
-
-class NonLayerCheckpointable(tracking.AutoCheckpointable):
-
-  def __init__(self):
-    super(NonLayerCheckpointable, self).__init__()
-    self.a_variable = checkpointable_utils.add_variable(
-        self, name="a_variable", shape=[])
-
-
-# pylint: disable=not-callable
-class MyModel(training.Model):
-  """A concrete Model for testing."""
-
-  def __init__(self):
-    super(MyModel, self).__init__()
-    self._named_dense = core.Dense(1, use_bias=True)
-    self._second = core.Dense(1, use_bias=False)
-    # We can still track Checkpointables which aren't Layers.
-    self._non_layer = NonLayerCheckpointable()
-
-  def call(self, values):
-    ret = self._second(self._named_dense(values))
-    return ret
-
-
-class InterfaceTests(test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
-  def testAddVariable(self):
-    obj = NonLayerCheckpointable()
-    with self.assertRaisesRegexp(ValueError, "do not specify shape"):
-      checkpointable_utils.add_variable(
-          obj, name="shape_specified_twice", shape=[], initializer=1)
-    constant_initializer = checkpointable_utils.add_variable(
-        obj, name="constant_initializer", initializer=1)
-    with variable_scope.variable_scope("some_variable_scope"):
-      ones_initializer = checkpointable_utils.add_variable(
-          obj,
-          name="ones_initializer",
-          shape=[2],
-          initializer=init_ops.ones_initializer(dtype=dtypes.float32))
-    bare_initializer = checkpointable_utils.add_variable(
-        obj,
-        name="bare_initializer",
-        shape=[2, 2],
-        dtype=dtypes.float64,
-        initializer=init_ops.zeros_initializer)
-
-    # Even in graph mode, there are no naming conflicts between objects, only
-    # naming conflicts within an object.
-    other_duplicate = resource_variable_ops.ResourceVariable(
-        name="duplicate", initial_value=1.)
-    duplicate = checkpointable_utils.add_variable(
-        obj, name="duplicate", shape=[])
-    with self.assertRaisesRegexp(ValueError, "'duplicate'.*already declared"):
-      checkpointable_utils.add_variable(obj, name="duplicate", shape=[])
-
-    self.evaluate(checkpointable_utils.gather_initializers(obj))
-    self.assertEqual("constant_initializer:0", constant_initializer.name)
-    self.assertEqual(1, self.evaluate(constant_initializer))
-    self.assertEqual("some_variable_scope/ones_initializer:0",
-                     ones_initializer.name)
-    self.assertAllEqual([1, 1], self.evaluate(ones_initializer))
-    self.assertAllEqual([[0., 0.],
-                         [0., 0.]], self.evaluate(bare_initializer))
-    self.assertEqual("a_variable:0", obj.a_variable.name)
-    self.assertEqual("duplicate:0", other_duplicate.name)
-    if context.executing_eagerly():
-      # When executing eagerly, there's no uniquification of variable names. The
-      # checkpoint name will be the same.
-      self.assertEqual("duplicate:0", duplicate.name)
-    else:
-      # The .name attribute may be globally influenced, but the checkpoint name
-      # won't be (tested below).
-      self.assertEqual("duplicate_1:0", duplicate.name)
-    named_variables, _, _ = (
-        graph_view.ObjectGraphView(obj).serialize_object_graph())
-    expected_checkpoint_names = (
-        "a_variable/.ATTRIBUTES/VARIABLE_VALUE",
-        "bare_initializer/.ATTRIBUTES/VARIABLE_VALUE",
-        "constant_initializer/.ATTRIBUTES/VARIABLE_VALUE",
-        "duplicate/.ATTRIBUTES/VARIABLE_VALUE",
-        "ones_initializer/.ATTRIBUTES/VARIABLE_VALUE",
-    )
-    six.assertCountEqual(
-        self, expected_checkpoint_names, [v.name for v in named_variables])
-
-  def testInitNotCalled(self):
-
-    class NoInit(tracking.AutoCheckpointable):
-
-      def __init__(self):
-        pass
-
-    # __init__ for Checkpointable will be called implicitly.
-    checkpointable_utils.add_variable(NoInit(), "var", shape=[])
-
-  def testShapeDtype(self):
-    root = tracking.AutoCheckpointable()
-    v1 = checkpointable_utils.add_variable(
-        root, name="v1", initializer=3., dtype=dtypes.float64)
-    self.assertEqual(dtypes.float64, v1.dtype)
-    v2 = checkpointable_utils.add_variable(
-        root,
-        name="v2",
-        shape=[3],
-        initializer=init_ops.ones_initializer,
-        dtype=dtypes.float64)
-    self.assertEqual(dtypes.float64, v2.dtype)
-    self.assertAllEqual([1., 1., 1.], self.evaluate(v2))
-
-  def testObjectMetadata(self):
-    with context.eager_mode():
-      checkpoint_directory = self.get_temp_dir()
-      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-      dense = core.Dense(1)
-      checkpoint = checkpointable_utils.Checkpoint(dense=dense)
-      dense(constant_op.constant([[1.]]))
-      save_path = checkpoint.save(checkpoint_prefix)
-
-    objects = checkpointable_utils.object_metadata(save_path)
-    all_variable_names = []
-    for obj in objects.nodes:
-      for attribute in obj.attributes:
-        all_variable_names.append(attribute.full_name)
-    self.assertIn("dense/kernel", all_variable_names)
-
-  def testNotCheckpointable(self):
-
-    class CallsFunctionalStuff(
-        tracking.NotCheckpointable, tracking.AutoCheckpointable):
-      pass
-
-    test_dir = self.get_temp_dir()
-    prefix = os.path.join(test_dir, "ckpt")
-    checkpoint = checkpointable_utils.Checkpoint(x=CallsFunctionalStuff())
-    with self.assertRaises(NotImplementedError):
-      checkpoint.save(prefix)
-
-    class CallsFunctionalStuffOtherMRO(
-        tracking.AutoCheckpointable, tracking.NotCheckpointable):
-      pass
-
-    checkpoint_reversed = checkpointable_utils.Checkpoint(
-        x=CallsFunctionalStuffOtherMRO())
-    with self.assertRaises(NotImplementedError):
-      checkpoint_reversed.save(prefix)
-
-
-class _MirroringSaveable(saver_lib.BaseSaverBuilder.SaveableObject):
-
-  def __init__(self, primary_variable, mirrored_variable, name):
-    self._primary_variable = primary_variable
-    self._mirrored_variable = mirrored_variable
-    tensor = self._primary_variable.read_value()
-    spec = saver_lib.BaseSaverBuilder.SaveSpec(
-        tensor=tensor,
-        slice_spec="",
-        name=name)
-    super(_MirroringSaveable, self).__init__(
-        tensor, [spec], name)
-
-  def restore(self, restored_tensors, restored_shapes):
-    """Restore the same value into both variables."""
-    tensor, = restored_tensors
-    return control_flow_ops.group(
-        self._primary_variable.assign(tensor),
-        self._mirrored_variable.assign(tensor))
-
-
-class _OwnsMirroredVariables(base.Checkpointable):
-  """A Checkpointable object which returns a more complex SaveableObject."""
-
-  def __init__(self):
-    self.non_dep_variable = variable_scope.get_variable(
-        name="non_dep_variable", initializer=6., use_resource=True)
-    self.mirrored = variable_scope.get_variable(
-        name="mirrored", initializer=15., use_resource=True)
-
-  def _gather_saveables_for_checkpoint(self):
-    def _saveable_factory(name=self.non_dep_variable.name):
-      return _MirroringSaveable(
-          primary_variable=self.non_dep_variable,
-          mirrored_variable=self.mirrored,
-          name=name)
-    return {base.VARIABLE_VALUE_KEY: _saveable_factory}
-
-  # The Saver sorts by name before parsing, so we need a name property.
-  @property
-  def name(self):
-    return self.non_dep_variable.name
-
-
-class CheckpointingTests(parameterized.TestCase, test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
-  def testNamingWithOptimizer(self):
-    input_value = constant_op.constant([[3.]])
-    model = MyModel()
-    # A nuisance Model using the same optimizer. Its slot variables should not
-    # go in the checkpoint, since it is never depended on.
-    other_model = MyModel()
-    optimizer = adam.Adam(0.001)
-    step = training_util.get_or_create_global_step()
-    root_checkpointable = checkpointable_utils.Checkpoint(
-        optimizer=optimizer, model=model, step=step)
-
-    with backprop.GradientTape() as tape:
-      loss = model(input_value)
-    variables = model.trainable_variables
-    gradients = tape.gradient(loss, variables)
-    train_op = control_flow_ops.group(
-        optimizer.apply_gradients(zip(gradients, variables)),
-        step.assign_add(1))
-
-    with backprop.GradientTape() as tape:
-      loss = other_model(input_value)
-    variables = other_model.trainable_variables
-    gradients = tape.gradient(loss, variables)
-    optimizer.apply_gradients(zip(gradients, variables))
-
-    self.evaluate(checkpointable_utils.gather_initializers(
-        root_checkpointable))
-    self.evaluate(train_op)
-    named_variables, serialized_graph, _ = graph_view.ObjectGraphView(
-        root_checkpointable).serialize_object_graph()
-    expected_slot_keys = (
-        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
-        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
-        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
-        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
-        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
-        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
-    )
-    expected_checkpoint_names = (
-        # Created in the root node, so no prefix.
-        "step",
-        "model/_second/kernel",
-        "model/_named_dense/kernel",
-        "model/_named_dense/bias",
-        # non-Layer dependency of the model
-        "model/_non_layer/a_variable",
-        "optimizer/learning_rate",
-        "optimizer/beta_1",
-        "optimizer/beta_2",
-        "optimizer/epsilon",
-        "optimizer/iter",
-        "optimizer/decay",
-    ) + expected_slot_keys
-    suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
-    expected_checkpoint_names = [
-        name + suffix for name in expected_checkpoint_names]
-    expected_checkpoint_names.append(
-        "optimizer/.ATTRIBUTES/OBJECT_CONFIG_JSON")
-    # The Dense layers also save get_config() JSON
-    expected_checkpoint_names.extend(
-        ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
-         "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"])
-    named_variables = {v.name: v for v in named_variables}
-    six.assertCountEqual(self, expected_checkpoint_names,
-                         named_variables.keys())
-    # Check that we've mapped to the right variable objects (not exhaustive)
-    self.assertEqual(
-        "global_step",
-        named_variables["step" + suffix].full_name)
-    self.assertEqual(
-        "my_model/dense_1/kernel",
-        named_variables["model/_second/kernel" + suffix].full_name)
-    self.assertEqual(
-        "my_model/dense/kernel",
-        named_variables["model/_named_dense/kernel" + suffix].full_name)
-    self.assertEqual(
-        "beta_1",
-        named_variables["optimizer/beta_1" + suffix].full_name)
-    self.assertEqual(
-        "beta_2",
-        named_variables["optimizer/beta_2" + suffix].full_name)
-    # Spot check the generated protocol buffers.
-    self.assertEqual("optimizer",
-                     serialized_graph.nodes[0].children[1].local_name)
-    optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
-        1].node_id]
-    children = [node.local_name for node in optimizer_node.children]
-    six.assertCountEqual(
-        self,
-        # Non-slot dependencies
-        ["beta_1", "beta_2", "iter", "decay", "epsilon", "learning_rate"],
-        children)
-    serialized_slot_keys = []
-    for slot in optimizer_node.slot_variables:
-      for attribute in (
-          serialized_graph.nodes[slot.slot_variable_node_id].attributes):
-        serialized_slot_keys.append(attribute.checkpoint_key)
-    six.assertCountEqual(
-        self,
-        [key + suffix for key in expected_slot_keys],
-        serialized_slot_keys)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testMoreComplexSaveableReturned(self):
-    v = _OwnsMirroredVariables()
-    checkpoint = checkpointable_utils.Checkpoint(v=v)
-    test_dir = self.get_temp_dir()
-    prefix = os.path.join(test_dir, "ckpt")
-    self.evaluate(v.non_dep_variable.assign(42.))
-    save_path = checkpoint.save(prefix)
-    self.evaluate(v.non_dep_variable.assign(43.))
-    self.evaluate(v.mirrored.assign(44.))
-    checkpoint.restore(save_path).assert_consumed().initialize_or_restore()
-    self.assertEqual(42., self.evaluate(v.non_dep_variable))
-    self.assertEqual(42., self.evaluate(v.mirrored))
-    self.evaluate(v.non_dep_variable.assign(44.))
-    save_path = checkpoint.save(prefix)
-    self.evaluate(v.non_dep_variable.assign(45.))
-    checkpoint.restore(save_path).assert_consumed().initialize_or_restore()
-    self.assertEqual(44., self.evaluate(v.non_dep_variable))
-    self.assertEqual(44., self.evaluate(v.mirrored))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testMoreComplexSaveableReturnedWithGlobalName(self):
-    # The same object can also be saved using the name-based saver.
-    v = _OwnsMirroredVariables()
-    saver = saver_lib.Saver(var_list=[v])
-    test_dir = self.get_temp_dir()
-    prefix = os.path.join(test_dir, "ckpt")
-    with self.cached_session() as sess:
-      self.evaluate(v.non_dep_variable.assign(42.))
-      save_path = saver.save(sess, prefix)
-      self.evaluate(v.non_dep_variable.assign(43.))
-      self.evaluate(v.mirrored.assign(44.))
-      saver.restore(sess, save_path)
-      self.assertEqual(42., self.evaluate(v.non_dep_variable))
-      self.assertEqual(42., self.evaluate(v.mirrored))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testSaveRestore(self):
-    model = MyModel()
-    optimizer = adam.Adam(0.001)
-    root_checkpointable = checkpointable_utils.Checkpoint(
-        optimizer=optimizer, model=model)
-    input_value = constant_op.constant([[3.]])
-    with backprop.GradientTape() as tape:
-      loss = model(input_value)
-    variables = model.trainable_variables
-    gradients = tape.gradient(loss, variables)
-    train_op = optimizer.apply_gradients(zip(gradients, variables))
-    root_checkpointable.save_counter  # pylint: disable=pointless-statement
-    self.evaluate(checkpointable_utils.gather_initializers(
-        root_checkpointable))
-    self.evaluate(train_op)
-    prefix = os.path.join(self.get_temp_dir(), "ckpt")
-    self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
-    m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
-    self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
-    save_path = root_checkpointable.save(file_prefix=prefix)
-    self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
-    self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3))
-    optimizer_variables = self.evaluate(
-        sorted(optimizer.variables(), key=lambda v: v.name))
-    self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
-    # Immediate restoration
-    status = root_checkpointable.restore(save_path=save_path).assert_consumed()
-    status.run_restore_ops()
-    self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1]))
-    self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter))
-    self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
-    if not context.executing_eagerly():
-      return  # Restore-on-create is only supported when executing eagerly
-    on_create_model = MyModel()
-    on_create_optimizer = adam.Adam(0.001)
-    on_create_root = checkpointable_utils.Checkpoint(
-        optimizer=on_create_optimizer, model=on_create_model)
-    # Deferred restoration
-    status = on_create_root.restore(save_path=save_path)
-    status.assert_nontrivial_match()
-    status.assert_existing_objects_matched()
-    with self.assertRaises(AssertionError):
-      status.assert_consumed()
-    on_create_model(constant_op.constant([[3.]]))  # create variables
-    self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
-    self.assertAllEqual([42.],
-                        self.evaluate(
-                            on_create_model._named_dense.variables[1]))
-    on_create_m_bias_slot = on_create_optimizer.get_slot(
-        on_create_model._named_dense.variables[1], "m")
-    status.assert_existing_objects_matched()
-    if not context.executing_eagerly():
-      with self.assertRaises(AssertionError):
-        status.assert_consumed()
-    # Optimizer slot variables are created when the original variable is
-    # restored.
-    self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
-    dummy_var = resource_variable_ops.ResourceVariable([1.])
-    on_create_optimizer.minimize(loss=dummy_var.read_value,
-                                 var_list=[dummy_var])
-    status.assert_existing_objects_matched()
-    status.assert_consumed()
-    self.assertAllEqual(
-        optimizer_variables,
-        # Creation order is different, so .variables() needs to be re-sorted.
-        self.evaluate(sorted(optimizer.variables(), key=lambda v: v.name)))
-
-  # TODO(allenl): Debug garbage created by this test in python3.
-  def testDeferredRestorationUsageEager(self):
-    """An idiomatic eager execution example."""
-    num_training_steps = 10
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    for training_continuation in range(3):
-      model = MyModel()
-      optimizer = adam.Adam(0.001)
-      root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer, model=model)
-      root.restore(checkpoint_management.latest_checkpoint(
-          checkpoint_directory))
-      for _ in range(num_training_steps):
-        # TODO(allenl): Use a Dataset and serialize/checkpoint it.
-        input_value = constant_op.constant([[3.]])
-        with backprop.GradientTape() as tape:
-          loss = model(input_value)
-        variables = model.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        optimizer.apply_gradients(zip(gradients, variables))
-      root.save(file_prefix=checkpoint_prefix)
-      self.assertEqual((training_continuation + 1) * num_training_steps,
-                       root.optimizer.iterations.numpy())
-
-  def testUsageGraph(self):
-    """Expected usage when graph building."""
-    with context.graph_mode():
-      num_training_steps = 10
-      checkpoint_directory = self.get_temp_dir()
-      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-      for training_continuation in range(3):
-        with ops.Graph().as_default():
-          model = MyModel()
-          optimizer = adam.Adam(0.001)
-          root = checkpointable_utils.Checkpoint(
-              optimizer=optimizer, model=model)
-          input_value = constant_op.constant([[3.]])
-          with backprop.GradientTape() as tape:
-            loss = model(input_value)
-          variables = model.trainable_variables
-          gradients = tape.gradient(loss, variables)
-          train_op = optimizer.apply_gradients(zip(gradients, variables))
-
-          checkpoint_path = checkpoint_management.latest_checkpoint(
-              checkpoint_directory)
-          with self.session(graph=ops.get_default_graph()) as session:
-            status = root.restore(save_path=checkpoint_path)
-            status.initialize_or_restore(session=session)
-            if checkpoint_path is None:
-              self.assertEqual(0, training_continuation)
-              with self.assertRaises(AssertionError):
-                status.assert_consumed()
-              with self.assertRaises(AssertionError):
-                status.assert_existing_objects_matched()
-            else:
-              status.assert_consumed()
-              status.assert_existing_objects_matched()
-            for _ in range(num_training_steps):
-              session.run(train_op)
-            root.save(file_prefix=checkpoint_prefix, session=session)
-            self.assertEqual((training_continuation + 1) * num_training_steps,
-                             session.run(root.optimizer.iterations))
-            self.assertEqual(training_continuation + 1,
-                             session.run(root.save_counter))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testAgnosticUsage(self):
-    """Graph/eager agnostic usage."""
-    # Does create garbage when executing eagerly due to ops.Graph() creation.
-    num_training_steps = 10
-    checkpoint_directory = self.get_temp_dir()
-    def _train_fn(model, input_value):
-      with backprop.GradientTape() as tape:
-        loss = model(input_value)
-      variables = model.trainable_variables
-      gradients = tape.gradient(loss, variables)
-      return optimizer.apply_gradients(zip(gradients, variables))
-    for training_continuation in range(3):
-      with test_util.device(use_gpu=True):
-        model = MyModel()
-        optimizer = adam.Adam(0.001)
-        root = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, model=model)
-        manager = checkpoint_management.CheckpointManager(
-            root, checkpoint_directory, max_to_keep=1)
-        status = root.restore(save_path=manager.latest_checkpoint)
-        input_value = constant_op.constant([[3.]])
-        train_fn = functools.partial(_train_fn, model, input_value)
-        if not context.executing_eagerly():
-          train_fn = functools.partial(self.evaluate, train_fn())
-        status.initialize_or_restore()
-        for _ in range(num_training_steps):
-          train_fn()
-        manager.save()
-        self.assertEqual((training_continuation + 1) * num_training_steps,
-                         self.evaluate(root.optimizer.iterations))
-        self.assertEqual(training_continuation + 1,
-                         self.evaluate(root.save_counter))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testFreezing(self):
-    with test_util.use_gpu():
-      # Save an object-based checkpoint using a frozen saver
-      directory = self.get_temp_dir()
-      prefix = os.path.join(directory, "ckpt")
-      v = resource_variable_ops.ResourceVariable(0, dtype=dtypes.int64)
-      checkpoint = checkpointable_utils.Checkpoint(v=v)
-      self.evaluate(v.assign(3))
-      # Create the save counter so assert_consumed doesn't complain about it not
-      # existing in the checkpoint on restore.
-      self.evaluate(checkpoint.save_counter.assign(12))
-      saver = checkpointable_utils.frozen_saver(checkpoint)
-      with ops.device("cpu:0"):
-        prefix_tensor = constant_op.constant(prefix)
-      save_path = self.evaluate(saver.save(prefix_tensor))
-      self.evaluate(v.assign(10))
-      # Use the frozen saver to restore the same object graph
-      self.evaluate(saver.restore(prefix_tensor))
-      self.assertEqual(3, self.evaluate(v))
-
-      # Restore using another frozen saver on an identical object graph
-      del v, checkpoint, saver
-      v = resource_variable_ops.ResourceVariable(0, dtype=dtypes.int64)
-      checkpoint = checkpointable_utils.Checkpoint(v=v)
-      saver = checkpointable_utils.frozen_saver(checkpoint)
-      self.evaluate(saver.restore(prefix_tensor))
-      self.assertEqual(3, self.evaluate(v))
-
-      # Restore as an object-based checkpoint
-      del v, checkpoint, saver
-      checkpoint = checkpointable_utils.Checkpoint()
-      status = checkpoint.restore(save_path)
-      v = resource_variable_ops.ResourceVariable(0, dtype=dtypes.int64)
-      if context.executing_eagerly():
-        self.assertEqual(12, self.evaluate(checkpoint.save_counter))
-        self.assertEqual(0, self.evaluate(v))
-      checkpoint.v = v
-      status.assert_consumed().run_restore_ops()
-      self.assertEqual(3, self.evaluate(v))
-      self.assertEqual(12, self.evaluate(checkpoint.save_counter))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testCustomNumbering(self):
-    directory = self.get_temp_dir()
-    prefix = os.path.join(directory, "ckpt")
-    step = resource_variable_ops.ResourceVariable(0, dtype=dtypes.int64)
-    checkpoint = checkpointable_utils.Checkpoint(step=step)
-    self.evaluate(step.initializer)
-    for i in range(5):
-      path = checkpoint.write("%s-%d" % (prefix, self.evaluate(step)))
-      expected_suffix = "-%d" % (2 * i,)
-      if not path.endswith(expected_suffix):
-        self.fail("%s should have suffix %s" % (path, expected_suffix))
-      self.evaluate(step.assign_add(2))
-
-  # pylint: disable=cell-var-from-loop
-  @test_util.run_in_graph_and_eager_modes
-  @test_util.run_v1_only("b/120545219")
-  def testWithDefun(self):
-    num_training_steps = 2
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    for training_continuation in range(3):
-      with test_util.device(use_gpu=True):
-        model = MyModel()
-        # Don't actually train so we can test variable values
-        optimizer = adam.Adam(0.)
-        root = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, model=model)
-        checkpoint_path = checkpoint_management.latest_checkpoint(
-            checkpoint_directory)
-        status = root.restore(save_path=checkpoint_path)
-        def train_fn():
-          @def_function.function
-          def _call_model(x):
-            return model(x)
-          with backprop.GradientTape() as tape:
-            loss = _call_model(constant_op.constant([[3.]]))
-          gradients = tape.gradient(loss, model.variables)
-          return optimizer.apply_gradients(zip(gradients, model.variables))
-        if not context.executing_eagerly():
-          train_fn = functools.partial(
-              self.evaluate, train_fn())
-        status.initialize_or_restore()
-        for _ in range(num_training_steps):
-          train_fn()
-        if training_continuation > 0:
-          status.assert_consumed()
-          self.assertAllClose([[42.]], self.evaluate(model.variables[0]))
-        else:
-          self.evaluate(model.variables[0].assign([[42.]]))
-        root.save(file_prefix=checkpoint_prefix)
-        self.assertEqual((training_continuation + 1) * num_training_steps,
-                         self.evaluate(optimizer.iterations))
-        self.assertEqual(training_continuation + 1,
-                         self.evaluate(root.save_counter))
-  # pylint: enable=cell-var-from-loop
-
-  def _get_checkpoint_name(self, name):
-    root = tracking.AutoCheckpointable()
-    checkpointable_utils.add_variable(
-        root, name=name, shape=[1, 2], dtype=dtypes.float64)
-    (named_variable,), _, _ = graph_view.ObjectGraphView(
-        root).serialize_object_graph()
-    with ops.name_scope("root/" + named_variable.name):
-      pass  # Make sure we can use this as an op name if we prefix it.
-    return named_variable.name
-
-  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
-  def testVariableNameEscaping(self):
-    suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
-    self.assertEqual(r"a.Sb.Sc" + suffix, self._get_checkpoint_name(r"a/b/c"))
-    self.assertEqual(r"b" + suffix, self._get_checkpoint_name(r"b"))
-    self.assertEqual(r"c.S" + suffix, self._get_checkpoint_name(r"c/"))
-    self.assertEqual(r"d.S..S" + suffix, self._get_checkpoint_name(r"d/.S"))
-    self.assertEqual(r"d.S..ATTRIBUTES.Sf" + suffix,
-                     self._get_checkpoint_name(r"d/.ATTRIBUTES/f"))
-
-  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
-  def testNumberedPath(self):
-    root = tracking.AutoCheckpointable()
-    leaf = tracking.AutoCheckpointable()
-    root.leaf = leaf
-    checkpointable_utils.add_variable(leaf, name="v", shape=[])
-    (named_variable,), _, _ = graph_view.ObjectGraphView(
-        root).serialize_object_graph()
-    self.assertEqual(r"leaf/v/.ATTRIBUTES/VARIABLE_VALUE", named_variable.name)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testLocalNameValidation(self):
-    root = tracking.AutoCheckpointable()
-    leaf = tracking.AutoCheckpointable()
-    # Dots are escaped, which avoids conflicts with reserved names.
-    root._track_checkpointable(leaf, name=".ATTRIBUTES")
-    checkpointable_utils.add_variable(checkpointable=leaf, name="a", shape=[])
-    (named_variable,), _, _ = graph_view.ObjectGraphView(
-        root).serialize_object_graph()
-    self.assertEqual("..ATTRIBUTES/a/.ATTRIBUTES/VARIABLE_VALUE",
-                     named_variable.name)
-
-  def testAnonymousVarsInInit(self):
-
-    class Model(training.Model):
-
-      def __init__(self):
-        super(Model, self).__init__()
-        self.w = resource_variable_ops.ResourceVariable(0.0)
-        self.b = resource_variable_ops.ResourceVariable(0.0)
-        self.vars = [self.w, self.b]
-
-      def call(self, x):
-        return x * self.w + self.b
-
-    with context.eager_mode():
-      model = Model()
-      optimizer = adam.Adam(learning_rate=0.05)
-      checkpoint_directory = self.get_temp_dir()
-      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-      checkpoint = checkpointable_utils.Checkpoint(
-          model=model, optimizer=optimizer)
-      for _ in range(2):
-        checkpoint.save(checkpoint_prefix)
-        with backprop.GradientTape() as tape:
-          loss = (constant_op.constant(1.)
-                  - model(constant_op.constant(1.))) ** 2
-        grad = tape.gradient(loss, model.vars)
-        optimizer.apply_gradients(
-            [(g, v) for g, v in zip(grad, model.vars)])
-
-  @test_util.run_in_graph_and_eager_modes
-  def testLateDependencyTracking(self):
-
-    class Dependency(tracking.AutoCheckpointable):
-
-      def build(self):
-        self.var = checkpointable_utils.add_variable(
-            self, "var", initializer=0.)
-
-    class LateDependencies(checkpointable_utils.Checkpoint):
-
-      def add_dep(self):
-        self.dep = Dependency()
-        self.dep.build()
-
-    original = LateDependencies()
-    original.add_dep()
-    self.evaluate(state_ops.assign(original.dep.var, 123.))
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    save_path = original.save(checkpoint_prefix)
-    load_into = LateDependencies()
-    status = load_into.restore(save_path)
-    status.assert_existing_objects_matched()
-    with self.assertRaises(AssertionError):
-      status.assert_consumed()
-    load_into.add_dep()
-    status.assert_consumed()
-    status.assert_existing_objects_matched().run_restore_ops()
-    self.assertEqual(123., self.evaluate(load_into.dep.var))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testDepAfterVar(self):
-
-    class Dependency(tracking.AutoCheckpointable):
-
-      def build(self):
-        self.var = checkpointable_utils.add_variable(
-            self, "var", initializer=0.)
-
-    class DepAfterVar(checkpointable_utils.Checkpoint):
-
-      def add_dep(self):
-        dep = Dependency()
-        dep.build()
-        self.dep = dep
-
-    dep_after_var = DepAfterVar()
-    dep_after_var.add_dep()
-    self.evaluate(state_ops.assign(dep_after_var.dep.var, -14.))
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    save_path = dep_after_var.save(checkpoint_prefix)
-
-    loaded_dep_after_var = DepAfterVar()
-    status = loaded_dep_after_var.restore(save_path)
-    loaded_dep_after_var.add_dep()
-    status.assert_consumed()
-    status.run_restore_ops()
-    self.assertEqual(-14., self.evaluate(loaded_dep_after_var.dep.var))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testDeferredSlotRestoration(self):
-    checkpoint_directory = self.get_temp_dir()
-
-    root = checkpointable_utils.Checkpoint()
-    root.var = checkpointable_utils.add_variable(
-        root, name="var", initializer=0.)
-    optimizer = adam.Adam(0.1)
-    variables = [root.var]
-    gradients = [1.]
-    train_op = optimizer.apply_gradients(zip(gradients, variables))
-    # Note that `optimizer` has not been added as a dependency of
-    # `root`. Create a one-off grouping so that slot variables for `root.var`
-    # get initialized too.
-    self.evaluate(checkpointable_utils.gather_initializers(
-        checkpointable_utils.Checkpoint(root=root, optimizer=optimizer)))
-    self.evaluate(train_op)
-    self.evaluate(state_ops.assign(root.var, 12.))
-    no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots"))
-    root.optimizer = optimizer
-    self.evaluate(state_ops.assign(root.var, 13.))
-    self.evaluate(state_ops.assign(
-        optimizer.get_slot(slot_name="m", var=root.var),
-        14.))
-    slots_path = root.save(os.path.join(checkpoint_directory, "with_slots"))
-    new_root = checkpointable_utils.Checkpoint()
-    # Load the slot-containing checkpoint (deferred), then immediately overwrite
-    # the non-slot variable (also deferred).
-    slot_status = new_root.restore(slots_path)
-    no_slot_status = new_root.restore(no_slots_path)
-    with self.assertRaises(AssertionError):
-      no_slot_status.assert_consumed()
-    new_root.var = checkpointable_utils.add_variable(
-        new_root, name="var", shape=[])
-    no_slot_status.assert_consumed()
-    no_slot_status.run_restore_ops()
-    self.assertEqual(12., self.evaluate(new_root.var))
-    new_root.optimizer = adam.Adam(0.1)
-    slot_status.assert_existing_objects_matched()
-    if not context.executing_eagerly():
-      with self.assertRaisesRegexp(AssertionError, "Unresolved object"):
-        slot_status.assert_consumed()
-    self.assertEqual(12., self.evaluate(new_root.var))
-    if context.executing_eagerly():
-      # Slot variables are only created with restoring initializers when
-      # executing eagerly.
-      self.assertEqual(14., self.evaluate(
-          new_root.optimizer.get_slot(slot_name="m", var=new_root.var)))
-    else:
-      # Slot variables are not created eagerly when graph building.
-      with self.assertRaises(KeyError):
-        new_root.optimizer.get_slot(slot_name="m", var=new_root.var)
-    variables = [new_root.var]
-    gradients = [1.]
-    train_op = new_root.optimizer.apply_gradients(zip(gradients, variables))
-    # The slot variable now exists; restore() didn't create it, but we should
-    # now have a restore op for it.
-    slot_status.run_restore_ops()
-    if not context.executing_eagerly():
-      # The train op hasn't run when graph building, so the slot variable has
-      # its restored value. It has run in eager, so the value will be different.
-      self.assertEqual(14., self.evaluate(
-          new_root.optimizer.get_slot(slot_name="m", var=new_root.var)))
-    self.evaluate(train_op)
-    slot_status.assert_consumed()
-
-  @test_util.run_in_graph_and_eager_modes
-  def testOverlappingRestores(self):
-    checkpoint_directory = self.get_temp_dir()
-    save_root = checkpointable_utils.Checkpoint()
-    save_root.dep = tracking.AutoCheckpointable()
-    save_root.dep.var = checkpointable_utils.add_variable(
-        save_root.dep, name="var", initializer=0.)
-    self.evaluate(state_ops.assign(save_root.dep.var, 12.))
-    first_path = save_root.save(os.path.join(checkpoint_directory, "first"))
-    self.evaluate(state_ops.assign(save_root.dep.var, 13.))
-    second_path = save_root.save(os.path.join(checkpoint_directory, "second"))
-
-    first_root = checkpointable_utils.Checkpoint()
-    second_root = checkpointable_utils.Checkpoint()
-    first_status = first_root.restore(first_path)
-    second_status = second_root.restore(second_path)
-    load_dep = tracking.AutoCheckpointable()
-    load_dep.var = checkpointable_utils.add_variable(
-        load_dep, name="var", shape=[])
-    first_root.dep = load_dep
-    first_status.assert_consumed()
-    first_status.run_restore_ops()
-    self.assertEqual(12., self.evaluate(load_dep.var))
-    second_root.dep = load_dep
-    second_status.assert_consumed()
-    second_status.run_restore_ops()
-    self.assertEqual(13., self.evaluate(load_dep.var))
-
-    # Try again with the order of the restore() reversed. The last restore
-    # determines the final value.
-    first_root = checkpointable_utils.Checkpoint()
-    second_root = checkpointable_utils.Checkpoint()
-    second_status = second_root.restore(second_path)
-    first_status = first_root.restore(first_path)
-    load_dep = tracking.AutoCheckpointable()
-    load_dep.var = checkpointable_utils.add_variable(
-        load_dep, name="var", shape=[])
-    first_root.dep = load_dep
-    first_status.assert_consumed()
-    first_status.run_restore_ops()
-    self.assertEqual(12., self.evaluate(load_dep.var))
-    second_root.dep = load_dep
-    second_status.assert_consumed()
-    second_status.run_restore_ops()
-    self.assertEqual(12., self.evaluate(load_dep.var))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testAmbiguousLoad(self):
-    # Not OK to split one checkpoint object into two
-    checkpoint_directory = self.get_temp_dir()
-    save_root = checkpointable_utils.Checkpoint()
-    save_root.dep_one = tracking.AutoCheckpointable()
-    save_root.dep_two = tracking.AutoCheckpointable()
-    dep_three = tracking.AutoCheckpointable()
-    save_root.dep_one.dep_three = dep_three
-    save_root.dep_two.dep_three = dep_three
-    checkpointable_utils.add_variable(dep_three, name="var", initializer=0.)
-    self.evaluate(checkpointable_utils.gather_initializers(save_root))
-    save_path = save_root.save(os.path.join(checkpoint_directory, "ckpt"))
-    load_root = checkpointable_utils.Checkpoint()
-    status = load_root.restore(save_path)
-    load_root.dep_one = tracking.AutoCheckpointable()
-    load_root.dep_two = tracking.AutoCheckpointable()
-    load_root.dep_one.dep_three = tracking.AutoCheckpointable()
-    load_root.dep_two.dep_three = tracking.AutoCheckpointable()
-    checkpointable_utils.add_variable(
-        load_root.dep_one.dep_three, name="var", initializer=0.)
-    with self.assertRaises(AssertionError):
-      status.assert_consumed()
-    with self.assertRaises(AssertionError):
-      status.assert_existing_objects_matched()
-
-  @test_util.run_in_graph_and_eager_modes
-  def testObjectsCombined(self):
-    # Currently fine to load two checkpoint objects into one Python object
-    checkpoint_directory = self.get_temp_dir()
-    save_root = checkpointable_utils.Checkpoint()
-    save_root.dep_one = tracking.AutoCheckpointable()
-    save_root.dep_two = tracking.AutoCheckpointable()
-    checkpointable_utils.add_variable(
-        save_root.dep_one, name="var1", initializer=32., dtype=dtypes.float64)
-    checkpointable_utils.add_variable(
-        save_root.dep_two, name="var2", initializer=64., dtype=dtypes.float64)
-    self.evaluate(checkpointable_utils.gather_initializers(save_root))
-    save_path = save_root.save(os.path.join(checkpoint_directory, "ckpt"))
-    load_root = checkpointable_utils.Checkpoint()
-    load_root.dep_one = tracking.AutoCheckpointable()
-    load_root.dep_two = load_root.dep_one
-    v1 = checkpointable_utils.add_variable(
-        load_root.dep_one, name="var1", shape=[], dtype=dtypes.float64)
-    v2 = checkpointable_utils.add_variable(
-        load_root.dep_one, name="var2", shape=[], dtype=dtypes.float64)
-    status = load_root.restore(
-        save_path).assert_consumed().assert_existing_objects_matched()
-    status.run_restore_ops()
-    self.assertEqual(32., self.evaluate(v1))
-    self.assertEqual(64., self.evaluate(v2))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testDependencyLoop(self):
-    # Note: this test creates garbage during eager execution because it
-    # purposefully creates a reference cycle.
-    first = checkpointable_utils.Checkpoint()
-    second = checkpointable_utils.Checkpoint()
-    first.second = second
-    second.first = first
-    first.v = checkpointable_utils.add_variable(
-        first, "v1", initializer=[3., 1., 4.])
-    second.v = checkpointable_utils.add_variable(
-        second, "v2", initializer=[1., 1., 2., 3.])
-    self.evaluate(checkpointable_utils.gather_initializers(first))
-    checkpoint_directory = self.get_temp_dir()
-    save_path = first.save(os.path.join(checkpoint_directory, "ckpt"))
-
-    # Test deferred loading
-    first_load = checkpointable_utils.Checkpoint()
-    status = first_load.restore(save_path)
-    second_load = tracking.AutoCheckpointable()
-    first_load.second = second_load
-    second_load.first = first_load
-    with self.assertRaises(AssertionError):
-      status.assert_consumed()
-    first_load.v = checkpointable_utils.add_variable(
-        first_load, "v1", shape=[3])
-    second_load.v = checkpointable_utils.add_variable(
-        second_load, "v2", shape=[4])
-    status.assert_consumed()
-    status.run_restore_ops()
-    self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v))
-    self.assertAllEqual([1., 1., 2., 3.], self.evaluate(second_load.v))
-
-    # Test loading when variables have already been created
-    self.evaluate(first_load.v.assign([2., 7., 1.]))
-    self.assertAllEqual([2., 7., 1.], self.evaluate(first_load.v))
-    self.evaluate(second_load.v.assign([2., 7., 1., 8.]))
-    self.assertAllEqual([2., 7., 1., 8.], self.evaluate(second_load.v))
-    status = first_load.restore(save_path).assert_consumed()
-    status.run_restore_ops()
-    self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v))
-    self.assertAllEqual([1., 1., 2., 3.], self.evaluate(second_load.v))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testRestoreOnAssign(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    first = checkpointable_utils.Checkpoint()
-    first.var1 = variables_lib.Variable(0., name="outside_var")
-    first.var2 = variables_lib.Variable(0., name="blah")
-    self.evaluate(first.var1.assign(4.))
-    self.evaluate(first.var2.assign(8.))
-    save_path = first.save(checkpoint_prefix)
-
-    second = checkpointable_utils.Checkpoint()
-    second.var2 = variables_lib.Variable(0., name="blah")
-    status = second.restore(save_path)
-    recreated_var1 = variables_lib.Variable(0., name="outside_var")
-    status.run_restore_ops()
-    self.assertEqual(8., self.evaluate(second.var2))
-    self.evaluate(recreated_var1.assign(-2.))
-    self.assertEqual(-2., self.evaluate(recreated_var1))
-    second.var1 = recreated_var1
-    status.run_restore_ops()
-    self.assertEqual(4., self.evaluate(recreated_var1))
-
-  def testManySavesGraph(self):
-    """Saves after the first should not modify the graph."""
-    with context.graph_mode():
-      graph = ops.Graph()
-      with graph.as_default(), self.session(graph):
-        checkpoint_directory = self.get_temp_dir()
-        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-        obj = checkpointable_utils.Checkpoint()
-        obj.var = variables_lib.Variable(0., name="v")
-        obj.opt = adam.Adam(0.1)
-        variables = [obj.var]
-        gradients = [1.]
-        obj.opt.apply_gradients(zip(gradients, variables))
-        self.evaluate(checkpointable_utils.gather_initializers(obj))
-        obj.save(checkpoint_prefix)
-        graph.finalize()
-        obj.save(checkpoint_prefix)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testCheckpointState(self):
-    # No checkpoints are deleted by default
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    obj = tracking.AutoCheckpointable()
-    obj.var = variable_scope.get_variable(name="v", initializer=0.)
-    self.evaluate(checkpointable_utils.gather_initializers(obj))
-    saver = checkpointable_utils.Checkpoint(obj=obj)
-    for _ in range(10):
-      saver.save(checkpoint_prefix)
-    expected_filenames = ["checkpoint"]
-    for checkpoint_number in range(1, 11):
-      expected_filenames.append("ckpt-%d.index" % (checkpoint_number,))
-      expected_filenames.append(
-          "ckpt-%d.data-00000-of-00001" % (checkpoint_number,))
-    six.assertCountEqual(
-        self,
-        expected_filenames,
-        os.listdir(checkpoint_directory))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testCheckpointStateChangingVarList(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    obj = tracking.AutoCheckpointable()
-    obj.var = variable_scope.get_variable(name="v", initializer=0.)
-    self.evaluate(checkpointable_utils.gather_initializers(obj))
-    checkpoint = checkpointable_utils.Checkpoint(obj=obj)
-    looped_variables = []
-    for iteration in range(10):
-      new_variable = resource_variable_ops.ResourceVariable(iteration)
-      self.evaluate(new_variable.initializer)
-      setattr(checkpoint, "var_%d" % iteration, new_variable)
-      checkpoint.save(checkpoint_prefix)
-      looped_variables.append(new_variable)
-    expected_filenames = ["checkpoint"]
-    # We've copied the saver each time, but checkpoint management should still
-    # be consistent. Nothing gets deleted.
-    for checkpoint_number in range(1, 11):
-      expected_filenames.append("ckpt-%d.index" % (checkpoint_number,))
-      expected_filenames.append(
-          "ckpt-%d.data-00000-of-00001" % (checkpoint_number,))
-    six.assertCountEqual(
-        self,
-        expected_filenames,
-        os.listdir(checkpoint_directory))
-    self.assertEqual(
-        checkpoint_prefix + "-10",
-        checkpoint_management.latest_checkpoint(checkpoint_directory))
-    # The checkpoint list only contains the most recent checkpoint, but they're
-    # all on disk. This means we won't eventually run into proto size limits.
-    self.assertEqual(
-        [checkpoint_prefix + "-10"],
-        (checkpoint_management.get_checkpoint_state(checkpoint_directory)
-         .all_model_checkpoint_paths))
-    for v in looped_variables:
-      self.evaluate(v.assign(314))
-    checkpoint.restore(checkpoint_prefix + "-6").run_restore_ops()
-    self.assertEqual(314, self.evaluate(checkpoint.var_9))
-    self.assertEqual(314, self.evaluate(checkpoint.var_8))
-    self.assertEqual(314, self.evaluate(checkpoint.var_6))
-    self.assertEqual(5, self.evaluate(checkpoint.var_5))
-    self.assertEqual(1, self.evaluate(checkpoint.var_1))
-    self.assertEqual(0, self.evaluate(checkpoint.var_0))
-    checkpoint.restore(checkpoint_prefix + "-10").run_restore_ops()
-    self.assertEqual(9, self.evaluate(checkpoint.var_9))
-    self.assertEqual(8, self.evaluate(checkpoint.var_8))
-    self.assertEqual(1, self.evaluate(checkpoint.var_1))
-    self.assertEqual(0, self.evaluate(checkpoint.var_0))
-
-  def testManyRestoresGraph(self):
-    """Restores after the first should not modify the graph."""
-    with context.graph_mode():
-      graph = ops.Graph()
-      with graph.as_default(), self.session(graph):
-        checkpoint_directory = self.get_temp_dir()
-        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-        obj = checkpointable_utils.Checkpoint()
-        obj.var = variables_lib.Variable(0., name="v")
-        obj.opt = adam.Adam(0.1)
-        variables = [obj.var]
-        gradients = [1.]
-        obj.opt.apply_gradients(zip(gradients, variables))
-        self.evaluate(checkpointable_utils.gather_initializers(obj))
-        save_path = obj.save(checkpoint_prefix)
-        obj.restore(save_path)
-        graph.finalize()
-        obj.restore(save_path)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_sequential(self):
-    model = sequential.Sequential()
-    checkpoint = checkpointable_utils.Checkpoint(model=model)
-    model.add(core.Dense(4))
-    second_dense = core.Dense(5)
-    model.add(second_dense)
-    model(constant_op.constant([[1.]]))
-    checkpoint.restore(None).initialize_or_restore()
-    self.evaluate(second_dense.bias.assign(
-        constant_op.constant([1., 2., 3., 4., 5.])))
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    save_path = checkpoint.save(checkpoint_prefix)
-    self.evaluate(second_dense.bias.assign(
-        constant_op.constant([5., 6., 7., 8., 9.])))
-    checkpoint.restore(save_path).assert_consumed().run_restore_ops()
-    self.assertAllEqual([1., 2., 3., 4., 5.], self.evaluate(second_dense.bias))
-
-    deferred_sequential = sequential.Sequential()
-    deferred_sequential_checkpoint = checkpointable_utils.Checkpoint(
-        model=deferred_sequential)
-    status = deferred_sequential_checkpoint.restore(save_path)
-    deferred_sequential.add(core.Dense(4))
-    deferred_sequential(constant_op.constant([[1.]]))
-    deferred_second_dense = core.Dense(5)
-    deferred_sequential.add(deferred_second_dense)
-    deferred_sequential(constant_op.constant([[1.]]))
-    status.run_restore_ops()
-    self.assertAllEqual([1., 2., 3., 4., 5.],
-                        self.evaluate(deferred_second_dense.bias))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_initialize_if_not_restoring(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    optimizer_only_prefix = os.path.join(checkpoint_directory, "opt")
-    with test_util.device(use_gpu=True):
-      model = MyModel()
-      optimizer = adam.Adam(0.001)
-      root = checkpointable_utils.Checkpoint(
-          model=model)  # Do not save the optimizer with the checkpoint.
-      optimizer_checkpoint = checkpointable_utils.Checkpoint(
-          optimizer=optimizer)
-
-      checkpoint_path = checkpoint_management.latest_checkpoint(
-          checkpoint_directory)
-      status = root.restore(save_path=checkpoint_path)
-      input_value = constant_op.constant([[3.]])
-      def train_fn():
-        with backprop.GradientTape() as tape:
-          loss = model(input_value)
-        variables = model.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        return optimizer.apply_gradients(zip(gradients, variables))
-      if not context.executing_eagerly():
-        train_fn = functools.partial(self.evaluate, train_fn())
-      status.initialize_or_restore()
-      # TODO(tanzheny): Add hyper variables to .variables(), and set them with
-      # set_weights etc.
-      variables_not_in_the_variables_property = [
-          obj for obj in optimizer._hyper.values()
-          if isinstance(obj, variables_lib.Variable)]
-      self.evaluate([v.initializer for v
-                     in optimizer.variables()
-                     + variables_not_in_the_variables_property])
-      train_fn()
-      model_save_path = root.save(file_prefix=checkpoint_prefix)
-      self.evaluate(optimizer.beta_1.assign(42.))
-      optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix)
-    del train_fn
-
-    # Restore into a graph with the optimizer
-    with test_util.device(use_gpu=True):
-      model = MyModel()
-      optimizer = adam.Adam(0.001)
-      root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer, model=model)
-      status = root.restore(save_path=model_save_path)
-      input_value = constant_op.constant([[3.]])
-      def train_fn1():
-        with backprop.GradientTape() as tape:
-          loss = model(input_value)
-        variables = model.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        return optimizer.apply_gradients(zip(gradients, variables))
-      if not context.executing_eagerly():
-        train_fn1 = functools.partial(self.evaluate, train_fn1())
-      status.initialize_or_restore()
-      train_fn1()
-      with self.assertRaises(AssertionError):
-        status.assert_existing_objects_matched()
-      with self.assertRaises(AssertionError):
-        status.assert_consumed()
-    del train_fn1
-
-    # Make sure initialization doesn't clobber later restores
-    with test_util.device(use_gpu=True):
-      model = MyModel()
-      optimizer = adam.Adam(0.001, beta_1=1.0)
-      root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer, model=model)
-      opt_root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer)
-      status = root.restore(save_path=model_save_path)
-      init_only_optimizer_status = opt_root.restore(save_path=None)
-      optimizer_status = opt_root.restore(save_path=optimizer_save_path)
-      input_value = constant_op.constant([[3.]])
-      def train_fn2():
-        with backprop.GradientTape() as tape:
-          loss = model(input_value)
-        variables = model.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        return optimizer.apply_gradients(zip(gradients, variables))
-      if not context.executing_eagerly():
-        train_fn2 = functools.partial(self.evaluate, train_fn2())
-      optimizer_status.run_restore_ops()
-      status.initialize_or_restore()
-      init_only_optimizer_status.initialize_or_restore()
-      train_fn2()
-      self.assertEqual(42., self.evaluate(optimizer.beta_1))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_restore_after_adding_empty_checkpointable_data_structure(self):
-    model = NonLayerCheckpointable()
-    checkpoint = checkpointable_utils.Checkpoint(model=model)
-    checkpoint.restore(None).initialize_or_restore()
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    save_path = checkpoint.save(checkpoint_prefix)
-
-    del model, checkpoint
-
-    model = NonLayerCheckpointable()
-    model.dict = {"a": 1}
-    model.list = {"b": 1}
-    checkpoint = checkpointable_utils.Checkpoint(model=model)
-    load_status = checkpoint.restore(save_path)
-    load_status.assert_existing_objects_matched().run_restore_ops()
-
-
-class _ManualScope(tracking.AutoCheckpointable):
-
-  def __call__(self):
-    with variable_scope.variable_scope("ManualScope") as vs:
-      self.variable_scope = vs
-      with checkpointable_utils.capture_dependencies(template=self):
-        return self._build()
-
-  def _build(self):
-    return variable_scope.get_variable(name="in_manual_scope", shape=[])
-
-
-class TemplateTests(parameterized.TestCase, test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_checkpointable_save_restore(self):
-
-    def _templated():
-      v = variable_scope.get_variable(
-          "v", shape=[1], initializer=init_ops.zeros_initializer(),
-          use_resource=True)
-      v2 = variable_scope.get_variable(
-          "v2", shape=[1], initializer=init_ops.zeros_initializer(),
-          use_resource=True)
-      manual = _ManualScope()
-      return v, v + 1., v2, manual, manual()
-
-    save_template = template.make_template("s1", _templated)
-    v1_save, _, v2_save, manual_scope, manual_scope_v = save_template()
-    six.assertCountEqual(
-        self,
-        [v1_save, v2_save, manual_scope, manual_scope_v, save_template],
-        checkpointable_utils.list_objects(save_template))
-    manual_dep, = manual_scope._checkpoint_dependencies
-    self.assertEqual("in_manual_scope", manual_dep.name)
-    self.assertIs(manual_scope_v, manual_dep.ref)
-    optimizer = adam.Adam(0.0)
-    save_root = checkpointable_utils.Checkpoint(
-        my_template=save_template, optimizer=optimizer)
-    optimizer.minimize(v1_save.read_value,
-                       var_list=[v1_save])
-    self.evaluate([v.initializer for v in save_template.variables])
-    self.evaluate([v.initializer for v in optimizer.variables()])
-    self.evaluate(v1_save.assign([12.]))
-    self.evaluate(v2_save.assign([14.]))
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    save_path = save_root.save(checkpoint_prefix)
-
-    load_template = template.make_template("s2", _templated)
-    load_optimizer = adam.Adam(0.0)
-    load_root = checkpointable_utils.Checkpoint(
-        my_template=load_template, optimizer=load_optimizer)
-    status = load_root.restore(save_path)
-    var, var_plus_one, var2, _, _ = load_template()
-    load_optimizer.minimize(var.read_value, var_list=[var])
-    self.assertLen(load_template._checkpoint_dependencies, 3)
-    self.assertEqual("v", load_template._checkpoint_dependencies[0].name)
-    self.assertEqual("v2", load_template._checkpoint_dependencies[1].name)
-    self.assertEqual("ManualScope",
-                     load_template._checkpoint_dependencies[2].name)
-    status.assert_consumed().run_restore_ops()
-    self.assertAllEqual([12.], self.evaluate(var))
-    self.assertAllEqual([13.], self.evaluate(var_plus_one))
-    self.assertAllEqual([14.], self.evaluate(var2))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_checkpointable_save_restore_nested(self):
-
-    def _inner_template():
-      v = variable_scope.get_variable(
-          "v", shape=[1], initializer=init_ops.zeros_initializer())
-      return v
-
-    def _outer_template():
-      first_inner = template.make_template("i1", _inner_template)
-      second_inner = template.make_template("i2", _inner_template)
-      v1 = first_inner()
-      v2 = second_inner()
-      v3 = second_inner()
-      return (first_inner, second_inner), (v1, v2, v3)
-
-    with variable_scope.variable_scope("ignored"):
-      save_template = template.make_template("s1", _outer_template)
-      save_root = checkpointable_utils.Checkpoint(my_template=save_template)
-      (inner_template_one, inner_template_two), _ = save_template()
-    self.evaluate(inner_template_one.variables[0].assign([20.]))
-    self.evaluate(inner_template_two.variables[0].assign([25.]))
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    save_path = save_root.save(checkpoint_prefix)
-
-    load_template = template.make_template("s2", _outer_template)
-    load_root = checkpointable_utils.Checkpoint(my_template=load_template)
-    status = load_root.restore(save_path)
-    (inner_template_one, inner_template_two), (v1, v2, v3) = load_template()
-    outer_template_dependencies = load_root.my_template._checkpoint_dependencies
-    self.assertLen(outer_template_dependencies, 2)
-    self.assertEqual("i1", outer_template_dependencies[0].name)
-    self.assertIs(inner_template_one, outer_template_dependencies[0].ref)
-    self.assertEqual("i2", outer_template_dependencies[1].name)
-    self.assertIs(inner_template_two, outer_template_dependencies[1].ref)
-    self.assertLen(inner_template_one._checkpoint_dependencies, 1)
-    self.assertEqual("v", inner_template_one._checkpoint_dependencies[0].name)
-    self.assertLen(inner_template_two._checkpoint_dependencies, 1)
-    self.assertEqual("v", inner_template_two._checkpoint_dependencies[0].name)
-    status.assert_consumed().run_restore_ops()
-    self.assertAllEqual([20.], self.evaluate(v1))
-    self.assertAllEqual([25.], self.evaluate(v2))
-    self.assertAllEqual([25.], self.evaluate(v3))
-
-
-class CheckpointCompatibilityTests(test.TestCase):
-
-  def _initialized_model(self):
-    input_value = constant_op.constant([[3.]])
-    model = MyModel()
-    optimizer = adam.Adam(0.001)
-    root_checkpointable = checkpointable_utils.Checkpoint(
-        optimizer=optimizer, model=model)
-    with backprop.GradientTape() as tape:
-      loss = model(input_value)
-    variables = model.trainable_variables
-    gradients = tape.gradient(loss, variables)
-    train_op = optimizer.apply_gradients(zip(gradients, variables))
-    self.evaluate(checkpointable_utils.gather_initializers(
-        root_checkpointable))
-    self.evaluate(train_op)
-    # A regular variable, a slot variable, and a non-slot Optimizer variable
-    # with known values to check when loading.
-    self.evaluate(model._named_dense.bias.assign([1.]))
-    self.evaluate(optimizer.get_slot(
-        var=model._named_dense.bias, slot_name="m").assign([2.]))
-    self.evaluate(optimizer.beta_1.assign(3.))
-    return root_checkpointable
-
-  def _set_sentinels(self, root_checkpointable):
-    self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.]))
-    self.evaluate(
-        root_checkpointable.optimizer.get_slot(
-            var=root_checkpointable.model._named_dense.bias, slot_name="m")
-        .assign([102.]))
-    self.evaluate(root_checkpointable.optimizer.beta_1.assign(103.))
-
-  def _check_sentinels(self, root_checkpointable):
-    self.assertAllEqual(
-        [1.], self.evaluate(root_checkpointable.model._named_dense.bias))
-    self.assertAllEqual([2.], self.evaluate(
-        root_checkpointable.optimizer.get_slot(
-            var=root_checkpointable.model._named_dense.bias, slot_name="m")))
-    self.assertAllEqual(3.,
-                        self.evaluate(root_checkpointable.optimizer.beta_1))
-
-  def _write_name_based_checkpoint(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    with context.graph_mode():
-      save_graph = ops.Graph()
-      with save_graph.as_default(), self.session(
-          graph=save_graph) as session:
-        root = self._initialized_model()
-        name_saver = saver_lib.Saver()
-        return name_saver.save(
-            sess=session, save_path=checkpoint_prefix,
-            global_step=root.optimizer.iterations)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testLoadFromNameBasedSaver(self):
-    """Save a name-based checkpoint, load it using the object-based API."""
-    with test_util.device(use_gpu=True):
-      save_path = self._write_name_based_checkpoint()
-      root = self._initialized_model()
-      self._set_sentinels(root)
-      with self.assertRaises(AssertionError):
-        self._check_sentinels(root)
-      object_saver = checkpointable_utils.CheckpointableSaver(
-          graph_view.ObjectGraphView(root))
-      self._set_sentinels(root)
-      status = object_saver.restore(save_path)
-      if context.executing_eagerly():
-        self._check_sentinels(root)
-      if context.executing_eagerly():
-        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
-          status.assert_consumed()
-        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
-          status.assert_existing_objects_matched()
-        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
-          status.assert_nontrivial_match()
-      else:
-        # When graph building, we haven't read any keys, so we don't know
-        # whether the restore will be complete.
-        with self.assertRaisesRegexp(AssertionError, "not restored"):
-          status.assert_consumed()
-        with self.assertRaisesRegexp(AssertionError, "not restored"):
-          status.assert_existing_objects_matched()
-        with self.assertRaisesRegexp(AssertionError, "not restored"):
-          status.assert_nontrivial_match()
-      status.run_restore_ops()
-      self._check_sentinels(root)
-      self._set_sentinels(root)
-      status = object_saver.restore(save_path)
-      status.initialize_or_restore()
-      self._check_sentinels(root)
-      # Check that there is no error when keys are missing from the name-based
-      # checkpoint.
-      root.not_in_name_checkpoint = resource_variable_ops.ResourceVariable([1.])
-      status = object_saver.restore(save_path)
-      with self.assertRaises(AssertionError):
-        status.assert_existing_objects_matched()
-
-  def testSaveGraphLoadEager(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    with context.graph_mode():
-      save_graph = ops.Graph()
-      with save_graph.as_default(), self.session(
-          graph=save_graph) as session:
-        root = self._initialized_model()
-        save_path = root.save(session=session, file_prefix=checkpoint_prefix)
-    with context.eager_mode():
-      root = self._initialized_model()
-      self._set_sentinels(root)
-      root.restore(save_path).assert_consumed()
-      self._check_sentinels(root)
-
-  def testSaveEagerLoadGraph(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    with context.eager_mode():
-      root = self._initialized_model()
-      save_path = root.save(file_prefix=checkpoint_prefix)
-    with context.graph_mode():
-      save_graph = ops.Graph()
-      with save_graph.as_default(), self.session(
-          graph=save_graph):
-        root = self._initialized_model()
-        self._set_sentinels(root)
-        root.restore(save_path).assert_consumed().run_restore_ops()
-        self._check_sentinels(root)
-
-
-class PythonMetadataTests(test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes
-  def testSaveLoad(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    dense = core.Dense(1)
-    checkpoint = checkpointable_utils.Checkpoint(dense=dense)
-    dense(constant_op.constant([[1.]]))
-    checkpoint.restore(None).initialize_or_restore()
-    save_path = checkpoint.save(checkpoint_prefix)
-
-    def _get_dense_node_from_object_graph(object_graph_proto):
-      root_node = object_graph_proto.nodes[0]
-      for child in root_node.children:
-        if child.local_name == "dense":
-          break
-      else:
-        raise AssertionError(
-            "Expected a 'dense' dependency of root, didn't find one.")
-      dense_node = object_graph_proto.nodes[child.node_id]  # pylint: disable=undefined-loop-variable
-      self.assertEqual(1, len(dense_node.attributes))
-      reader = pywrap_tensorflow.NewCheckpointReader(save_path)
-      layer_json = reader.get_tensor(dense_node.attributes[0].checkpoint_key)
-      return json.loads(layer_json.decode("utf-8"))
-
-    layer_data = _get_dense_node_from_object_graph(
-        checkpointable_utils.object_metadata(save_path))
-    self.assertEqual("Dense", layer_data["class_name"])
-    self.assertEqual(1, layer_data["config"]["units"])
-
-    # Check that no new ops are added to the graph the second time we save.
-    ops.get_default_graph().finalize()
-
-    dense.units = 42
-    save_path = checkpoint.save(checkpoint_prefix)
-    layer_data = _get_dense_node_from_object_graph(
-        checkpointable_utils.object_metadata(save_path))
-    self.assertEqual("Dense", layer_data["class_name"])
-    self.assertEqual(42, layer_data["config"]["units"])
-
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/python/training/checkpointable/util_with_v1_optimizers_test.py b/tensorflow/python/training/checkpointable/util_with_v1_optimizers_test.py
deleted file mode 100644
index d7158c0..0000000
--- a/tensorflow/python/training/checkpointable/util_with_v1_optimizers_test.py
+++ /dev/null

@@ -1,933 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for object-based saving which use tf.train.* optimizers."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-import os
-
-import six
-
-from tensorflow.python.client import session as session_lib
-from tensorflow.python.distribute import mirrored_strategy
-from tensorflow.python.eager import backprop
-from tensorflow.python.eager import context
-from tensorflow.python.eager import def_function
-from tensorflow.python.eager import test
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
-from tensorflow.python.keras.engine import training
-from tensorflow.python.keras.layers import core
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import template
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.training import adam
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.training import training_util
-from tensorflow.python.training.checkpointable import graph_view
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
-
-
-class NonLayerCheckpointable(tracking.AutoCheckpointable):
-
-  def __init__(self):
-    super(NonLayerCheckpointable, self).__init__()
-    self.a_variable = checkpointable_utils.add_variable(
-        self, name="a_variable", shape=[])
-
-
-# pylint: disable=not-callable
-class MyModel(training.Model):
-  """A concrete Model for testing."""
-
-  def __init__(self):
-    super(MyModel, self).__init__()
-    self._named_dense = core.Dense(1, use_bias=True)
-    self._second = core.Dense(1, use_bias=False)
-    # We can still track Checkpointables which aren't Layers.
-    self._non_layer = NonLayerCheckpointable()
-
-  def call(self, values):
-    ret = self._second(self._named_dense(values))
-    return ret
-
-
-class CheckpointingTests(test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
-  def testNamingWithOptimizer(self):
-    input_value = constant_op.constant([[3.]])
-    model = MyModel()
-    # A nuisance Model using the same optimizer. Its slot variables should not
-    # go in the checkpoint, since it is never depended on.
-    other_model = MyModel()
-    optimizer = adam.AdamOptimizer(0.001)
-    optimizer_step = training_util.get_or_create_global_step()
-    root_checkpointable = checkpointable_utils.Checkpoint(
-        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
-    if context.executing_eagerly():
-      optimizer.minimize(
-          lambda: model(input_value),
-          global_step=optimizer_step)
-      optimizer.minimize(
-          lambda: other_model(input_value),
-          global_step=optimizer_step)
-    else:
-      train_op = optimizer.minimize(
-          model(input_value), global_step=optimizer_step)
-      optimizer.minimize(
-          other_model(input_value),
-          global_step=optimizer_step)
-      self.evaluate(checkpointable_utils.gather_initializers(
-          root_checkpointable))
-      self.evaluate(train_op)
-    named_variables, serialized_graph, _ = graph_view.ObjectGraphView(
-        root_checkpointable).serialize_object_graph()
-    expected_checkpoint_names = (
-        # Created in the root node, so no prefix.
-        "optimizer_step",
-        "model/_second/kernel",
-        "model/_named_dense/kernel",
-        "model/_named_dense/bias",
-        # non-Layer dependency of the model
-        "model/_non_layer/a_variable",
-        # The optimizer creates two non-slot variables
-        "optimizer/beta1_power",
-        "optimizer/beta2_power",
-        # Slot variables
-        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
-        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
-        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
-        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
-        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
-        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
-    )
-    suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
-    expected_checkpoint_names = [
-        name + suffix for name in expected_checkpoint_names]
-    # The Dense layers also save get_config() JSON
-    expected_checkpoint_names.extend(
-        ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
-         "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"])
-    named_variables = {v.name: v for v in named_variables}
-    six.assertCountEqual(self, expected_checkpoint_names,
-                         named_variables.keys())
-    # Check that we've mapped to the right variable objects (not exhaustive)
-    self.assertEqual(
-        "global_step",
-        named_variables["optimizer_step" + suffix].full_name)
-    self.assertEqual(
-        "my_model/dense_1/kernel",
-        named_variables["model/_second/kernel" + suffix].full_name)
-    self.assertEqual(
-        "my_model/dense/kernel",
-        named_variables["model/_named_dense/kernel" + suffix].full_name)
-    self.assertEqual(
-        "beta1_power",
-        named_variables["optimizer/beta1_power" + suffix].full_name)
-    self.assertEqual(
-        "beta2_power",
-        named_variables["optimizer/beta2_power" + suffix].full_name)
-    # Spot check the generated protocol buffers.
-    self.assertEqual("optimizer",
-                     serialized_graph.nodes[0].children[1].local_name)
-    optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
-        1].node_id]
-    self.assertEqual("beta1_power",
-                     optimizer_node.children[0].local_name)
-    self.assertEqual("beta1_power",
-                     serialized_graph.nodes[optimizer_node.children[0].node_id]
-                     .attributes[0].full_name)
-    self.assertEqual(
-        "my_model/dense/kernel",
-        serialized_graph.nodes[optimizer_node.slot_variables[0]
-                               .original_variable_node_id]
-        .attributes[0].full_name)
-    # We strip off the :0 suffix, as variable.name-based saving does.
-    self.assertEqual(
-        "my_model/dense/kernel/Adam",
-        serialized_graph.nodes[optimizer_node.slot_variables[0]
-                               .slot_variable_node_id]
-        .attributes[0].full_name)
-    self.assertEqual(
-        "my_model/dense/kernel/Adam:0",
-        optimizer.get_slot(
-            var=model._named_dense.kernel,
-            name="m").name)
-    self.assertEqual(
-        "model/_named_dense/kernel" + suffix,
-        serialized_graph.nodes[
-            optimizer_node.slot_variables[0]
-            .original_variable_node_id].attributes[0].checkpoint_key)
-    self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
-    self.assertEqual(
-        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix,
-        serialized_graph.nodes[
-            optimizer_node.slot_variables[0]
-            .slot_variable_node_id].attributes[0].checkpoint_key)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testSaveRestore(self):
-    model = MyModel()
-    optimizer = adam.AdamOptimizer(0.001)
-    root_checkpointable = checkpointable_utils.Checkpoint(
-        optimizer=optimizer, model=model)
-    input_value = constant_op.constant([[3.]])
-    if context.executing_eagerly():
-      optimizer.minimize(
-          lambda: model(input_value))
-    else:
-      train_op = optimizer.minimize(model(input_value))
-      # TODO(allenl): Make initialization more pleasant when graph building.
-      root_checkpointable.save_counter  # pylint: disable=pointless-statement
-      self.evaluate(checkpointable_utils.gather_initializers(
-          root_checkpointable))
-      self.evaluate(train_op)
-    prefix = os.path.join(self.get_temp_dir(), "ckpt")
-    self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
-    m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
-    self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
-    save_path = root_checkpointable.save(file_prefix=prefix)
-    self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
-    self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3))
-    optimizer_variables = self.evaluate(optimizer.variables())
-    self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
-    # Immediate restoration
-    status = root_checkpointable.restore(save_path=save_path).assert_consumed()
-    status.run_restore_ops()
-    self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1]))
-    self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter))
-    self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
-    if not context.executing_eagerly():
-      return  # Restore-on-create is only supported when executing eagerly
-    on_create_model = MyModel()
-    on_create_optimizer = adam.AdamOptimizer(
-        0.001,
-        # Preserve beta1_power and beta2_power when appying gradients so we can
-        # test that they've been restored correctly.
-        beta1=1.0, beta2=1.0)
-    on_create_root = checkpointable_utils.Checkpoint(
-        optimizer=on_create_optimizer, model=on_create_model)
-    # Deferred restoration
-    status = on_create_root.restore(save_path=save_path)
-    status.assert_nontrivial_match()
-    status.assert_existing_objects_matched()
-    with self.assertRaises(AssertionError):
-      status.assert_consumed()
-    on_create_model(constant_op.constant([[3.]]))  # create variables
-    self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
-    self.assertAllEqual([42.],
-                        self.evaluate(
-                            on_create_model._named_dense.variables[1]))
-    on_create_m_bias_slot = on_create_optimizer.get_slot(
-        on_create_model._named_dense.variables[1], "m")
-    status.assert_existing_objects_matched()
-    with self.assertRaises(AssertionError):
-      status.assert_consumed()
-    # Optimizer slot variables are created when the original variable is
-    # restored.
-    self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
-    self.assertAllEqual(optimizer_variables[2:],
-                        self.evaluate(on_create_optimizer.variables()))
-    dummy_var = resource_variable_ops.ResourceVariable([1.])
-    on_create_optimizer.minimize(loss=dummy_var.read_value)
-    status.assert_existing_objects_matched()
-    status.assert_consumed()
-    beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators()
-    self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power))
-    self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power))
-
-  # TODO(allenl): Debug garbage created by this test in python3.
-  def testDeferredRestorationUsageEager(self):
-    """An idiomatic eager execution example."""
-    num_training_steps = 10
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    for training_continuation in range(3):
-      model = MyModel()
-      optimizer = adam.AdamOptimizer(0.001)
-      root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer, model=model,
-          optimizer_step=training_util.get_or_create_global_step())
-      root.restore(checkpoint_management.latest_checkpoint(
-          checkpoint_directory))
-      for _ in range(num_training_steps):
-        # TODO(allenl): Use a Dataset and serialize/checkpoint it.
-        input_value = constant_op.constant([[3.]])
-        optimizer.minimize(
-            lambda: model(input_value),  # pylint: disable=cell-var-from-loop
-            global_step=root.optimizer_step)
-      root.save(file_prefix=checkpoint_prefix)
-      self.assertEqual((training_continuation + 1) * num_training_steps,
-                       root.optimizer_step.numpy())
-
-  def testEagerDistributionStrategy(self):
-    num_training_steps = 10
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-
-    def _train_fn(optimizer, model):
-      input_value = constant_op.constant([[3.]])
-      optimizer.minimize(
-          functools.partial(model, input_value),
-          global_step=root.optimizer_step)
-
-    for training_continuation in range(3):
-      strategy = mirrored_strategy.MirroredStrategy()
-      with strategy.scope():
-        model = MyModel()
-        optimizer = adam.AdamOptimizer(0.001)
-        root = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, model=model,
-            optimizer_step=training_util.get_or_create_global_step())
-        root.restore(checkpoint_management.latest_checkpoint(
-            checkpoint_directory))
-
-        for _ in range(num_training_steps):
-          strategy.extended.call_for_each_replica(
-              functools.partial(_train_fn, optimizer, model))
-        root.save(file_prefix=checkpoint_prefix)
-        self.assertEqual((training_continuation + 1) * num_training_steps,
-                         root.optimizer_step.numpy())
-
-  def testGraphDistributionStrategy(self):
-    self.skipTest("b/121381184")
-    num_training_steps = 10
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-
-    def _train_fn(optimizer, model):
-      input_value = constant_op.constant([[3.]])
-      return optimizer.minimize(
-          functools.partial(model, input_value),
-          global_step=root.optimizer_step)
-
-    for training_continuation in range(3):
-      with ops.Graph().as_default():
-        strategy = mirrored_strategy.MirroredStrategy()
-        with strategy.scope():
-          model = MyModel()
-          optimizer = adam.AdamOptimizer(0.001)
-          root = checkpointable_utils.Checkpoint(
-              optimizer=optimizer, model=model,
-              optimizer_step=training_util.get_or_create_global_step())
-          status = root.restore(checkpoint_management.latest_checkpoint(
-              checkpoint_directory))
-          train_op = strategy.extended.call_for_each_replica(
-              functools.partial(_train_fn, optimizer, model))
-          with self.session() as session:
-            if training_continuation > 0:
-              status.assert_consumed()
-            status.initialize_or_restore()
-            for _ in range(num_training_steps):
-              session.run(train_op)
-            root.save(file_prefix=checkpoint_prefix)
-        self.assertEqual((training_continuation + 1) * num_training_steps,
-                         root.optimizer_step.numpy())
-
-  def testUsageGraph(self):
-    """Expected usage when graph building."""
-    with context.graph_mode():
-      num_training_steps = 10
-      checkpoint_directory = self.get_temp_dir()
-      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-      for training_continuation in range(3):
-        with ops.Graph().as_default():
-          model = MyModel()
-          optimizer = adam.AdamOptimizer(0.001)
-          root = checkpointable_utils.Checkpoint(
-              optimizer=optimizer, model=model,
-              global_step=training_util.get_or_create_global_step())
-          input_value = constant_op.constant([[3.]])
-          train_op = optimizer.minimize(
-              model(input_value),
-              global_step=root.global_step)
-          checkpoint_path = checkpoint_management.latest_checkpoint(
-              checkpoint_directory)
-          with self.session(graph=ops.get_default_graph()) as session:
-            status = root.restore(save_path=checkpoint_path)
-            status.initialize_or_restore(session=session)
-            if checkpoint_path is None:
-              self.assertEqual(0, training_continuation)
-              with self.assertRaises(AssertionError):
-                status.assert_consumed()
-              with self.assertRaises(AssertionError):
-                status.assert_existing_objects_matched()
-            else:
-              status.assert_consumed()
-              status.assert_existing_objects_matched()
-            for _ in range(num_training_steps):
-              session.run(train_op)
-            root.save(file_prefix=checkpoint_prefix, session=session)
-            self.assertEqual((training_continuation + 1) * num_training_steps,
-                             session.run(root.global_step))
-            self.assertEqual(training_continuation + 1,
-                             session.run(root.save_counter))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testAgnosticUsage(self):
-    """Graph/eager agnostic usage."""
-    # Does create garbage when executing eagerly due to ops.Graph() creation.
-    num_training_steps = 10
-    checkpoint_directory = self.get_temp_dir()
-    for training_continuation in range(3):
-      with test_util.device(use_gpu=True):
-        model = MyModel()
-        optimizer = adam.AdamOptimizer(0.001)
-        root = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, model=model,
-            global_step=training_util.get_or_create_global_step())
-        manager = checkpoint_management.CheckpointManager(
-            root, checkpoint_directory, max_to_keep=1)
-        status = root.restore(save_path=manager.latest_checkpoint)
-        input_value = constant_op.constant([[3.]])
-        train_fn = functools.partial(
-            optimizer.minimize,
-            functools.partial(model, input_value),
-            global_step=root.global_step)
-        if not context.executing_eagerly():
-          train_fn = functools.partial(self.evaluate, train_fn())
-        status.initialize_or_restore()
-        for _ in range(num_training_steps):
-          train_fn()
-        manager.save()
-        self.assertEqual((training_continuation + 1) * num_training_steps,
-                         self.evaluate(root.global_step))
-        self.assertEqual(training_continuation + 1,
-                         self.evaluate(root.save_counter))
-
-  # pylint: disable=cell-var-from-loop
-  @test_util.run_in_graph_and_eager_modes
-  def testWithDefun(self):
-    num_training_steps = 2
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    for training_continuation in range(3):
-      with test_util.device(use_gpu=True):
-        model = MyModel()
-        # Don't actually train so we can test variable values
-        optimizer = adam.AdamOptimizer(0.)
-        root = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, model=model,
-            global_step=training_util.get_or_create_global_step())
-        checkpoint_path = checkpoint_management.latest_checkpoint(
-            checkpoint_directory)
-        status = root.restore(save_path=checkpoint_path)
-        def train_fn():
-          @def_function.function
-          def _call_model(x):
-            return model(x)
-          with backprop.GradientTape() as tape:
-            loss = _call_model(constant_op.constant([[3.]]))
-          gradients = tape.gradient(loss, model.variables)
-          return optimizer.apply_gradients(zip(gradients, model.variables),
-                                           global_step=root.global_step)
-        if not context.executing_eagerly():
-          train_fn = functools.partial(
-              self.evaluate, train_fn())
-        status.initialize_or_restore()
-        for _ in range(num_training_steps):
-          train_fn()
-        if training_continuation > 0:
-          status.assert_consumed()
-          self.assertAllClose([[42.]], self.evaluate(model.variables[0]))
-        else:
-          self.evaluate(model.variables[0].assign([[42.]]))
-        root.save(file_prefix=checkpoint_prefix)
-        self.assertEqual((training_continuation + 1) * num_training_steps,
-                         self.evaluate(root.global_step))
-        self.assertEqual(training_continuation + 1,
-                         self.evaluate(root.save_counter))
-  # pylint: enable=cell-var-from-loop
-
-  def _get_checkpoint_name(self, name):
-    root = tracking.AutoCheckpointable()
-    checkpointable_utils.add_variable(
-        root, name=name, shape=[1, 2], dtype=dtypes.float64)
-    (named_variable,), _, _ = checkpointable_utils._serialize_object_graph(
-        root, saveables_cache=None)
-    with ops.name_scope("root/" + named_variable.name):
-      pass  # Make sure we can use this as an op name if we prefix it.
-    return named_variable.name
-
-  def testAnonymousVarsInInit(self):
-
-    class Model(training.Model):
-
-      def __init__(self):
-        super(Model, self).__init__()
-        self.w = resource_variable_ops.ResourceVariable(0.0)
-        self.b = resource_variable_ops.ResourceVariable(0.0)
-        self.vars = [self.w, self.b]
-
-      def call(self, x):
-        return x * self.w + self.b
-
-    with context.eager_mode():
-      model = Model()
-      optimizer = adam.AdamOptimizer(learning_rate=0.05)
-      checkpoint_directory = self.get_temp_dir()
-      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-      checkpoint = checkpointable_utils.Checkpoint(
-          model=model, optimizer=optimizer)
-      for _ in range(2):
-        checkpoint.save(checkpoint_prefix)
-        with backprop.GradientTape() as tape:
-          loss = (constant_op.constant(1.)
-                  - model(constant_op.constant(1.))) ** 2
-        grad = tape.gradient(loss, model.vars)
-        optimizer.apply_gradients(
-            [(g, v) for g, v in zip(grad, model.vars)])
-
-  @test_util.run_in_graph_and_eager_modes
-  def testDeferredSlotRestoration(self):
-    checkpoint_directory = self.get_temp_dir()
-
-    root = checkpointable_utils.Checkpoint()
-    root.var = checkpointable_utils.add_variable(
-        root, name="var", initializer=0.)
-    optimizer = adam.AdamOptimizer(0.1)
-    if context.executing_eagerly():
-      optimizer.minimize(root.var.read_value)
-    else:
-      train_op = optimizer.minimize(root.var)
-      # Note that `optimizer` has not been added as a dependency of
-      # `root`. Create a one-off grouping so that slot variables for `root.var`
-      # get initialized too.
-      self.evaluate(checkpointable_utils.gather_initializers(
-          checkpointable_utils.Checkpoint(root=root, optimizer=optimizer)))
-      self.evaluate(train_op)
-    self.evaluate(state_ops.assign(root.var, 12.))
-    no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots"))
-    root.optimizer = optimizer
-    self.evaluate(state_ops.assign(root.var, 13.))
-    self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var),
-                                   14.))
-    slots_path = root.save(os.path.join(checkpoint_directory, "with_slots"))
-    new_root = checkpointable_utils.Checkpoint()
-    # Load the slot-containing checkpoint (deferred), then immediately overwrite
-    # the non-slot variable (also deferred).
-    slot_status = new_root.restore(slots_path)
-    no_slot_status = new_root.restore(no_slots_path)
-    with self.assertRaises(AssertionError):
-      no_slot_status.assert_consumed()
-    new_root.var = checkpointable_utils.add_variable(
-        new_root, name="var", shape=[])
-    no_slot_status.assert_consumed()
-    no_slot_status.run_restore_ops()
-    self.assertEqual(12., self.evaluate(new_root.var))
-    new_root.optimizer = adam.AdamOptimizer(0.1)
-    slot_status.assert_existing_objects_matched()
-    with self.assertRaisesRegexp(AssertionError, "beta1_power"):
-      slot_status.assert_consumed()
-    self.assertEqual(12., self.evaluate(new_root.var))
-    if context.executing_eagerly():
-      # Slot variables are only created with restoring initializers when
-      # executing eagerly.
-      self.assertEqual(14., self.evaluate(
-          new_root.optimizer.get_slot(name="m", var=new_root.var)))
-    else:
-      self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var),
-                    None)
-    if context.executing_eagerly():
-      new_root.optimizer.minimize(new_root.var.read_value)
-    else:
-      train_op = new_root.optimizer.minimize(new_root.var)
-      # The slot variable now exists; restore() didn't create it, but we should
-      # now have a restore op for it.
-      slot_status.run_restore_ops()
-      self.assertEqual(14., self.evaluate(
-          new_root.optimizer.get_slot(name="m", var=new_root.var)))
-      self.evaluate(train_op)
-    slot_status.assert_consumed()
-
-  def testManySavesGraph(self):
-    """Saves after the first should not modify the graph."""
-    with context.graph_mode():
-      graph = ops.Graph()
-      with graph.as_default(), self.session(graph):
-        checkpoint_directory = self.get_temp_dir()
-        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-        obj = checkpointable_utils.Checkpoint()
-        obj.var = variable_scope.get_variable(name="v", initializer=0.)
-        obj.opt = adam.AdamOptimizer(0.1)
-        obj.opt.minimize(obj.var.read_value())
-        self.evaluate(checkpointable_utils.gather_initializers(obj))
-        obj.save(checkpoint_prefix)
-        before_ops = graph.get_operations()
-        obj.save(checkpoint_prefix)
-        self.assertEqual(before_ops, graph.get_operations())
-
-  def testManyRestoresGraph(self):
-    """Restores after the first should not modify the graph."""
-    with context.graph_mode():
-      graph = ops.Graph()
-      with graph.as_default(), self.session(graph):
-        checkpoint_directory = self.get_temp_dir()
-        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-        obj = checkpointable_utils.Checkpoint()
-        obj.var = variable_scope.get_variable(name="v", initializer=0.)
-        obj.opt = adam.AdamOptimizer(0.1)
-        obj.opt.minimize(obj.var.read_value())
-        self.evaluate(checkpointable_utils.gather_initializers(obj))
-        save_path = obj.save(checkpoint_prefix)
-        obj.restore(save_path)
-        before_ops = graph.get_operations()
-        obj.restore(save_path)
-        self.assertEqual(before_ops, graph.get_operations())
-
-  def testMultipleGraphsNonSlotVariables(self):
-    with context.graph_mode():
-      checkpoint_directory = self.get_temp_dir()
-      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-      optimizer = adam.AdamOptimizer(0.001)
-      # Construct a model in one graph
-      first_graph = ops.Graph()
-      first_session = session_lib.Session(graph=first_graph)
-      with first_graph.as_default(), first_session.as_default():
-        first_variable = resource_variable_ops.ResourceVariable([1.])
-        first_root_checkpointable = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, variable=first_variable)
-        train_op = optimizer.minimize(first_variable.read_value)
-        self.evaluate(checkpointable_utils.gather_initializers(
-            first_root_checkpointable))
-        self.evaluate(train_op)
-        self.evaluate(first_variable.assign([1.]))
-        self.evaluate(optimizer.get_slot(
-            var=first_variable, name="m").assign([2.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.evaluate(beta1_power.assign(3.))
-
-      # Save and load in a second graph
-      second_graph = ops.Graph()
-      with second_graph.as_default(), session_lib.Session(graph=second_graph):
-        second_variable = resource_variable_ops.ResourceVariable([1.])
-        second_root_checkpointable = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, variable=second_variable)
-        train_op = optimizer.minimize(second_variable.read_value)
-        second_root_checkpointable.restore(None).initialize_or_restore()
-        self.evaluate(train_op)
-        self.evaluate(second_variable.assign([4.]))
-        self.evaluate(optimizer.get_slot(
-            var=second_variable, name="m").assign([5.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.evaluate(beta1_power.assign(6.))
-        save_path = second_root_checkpointable.save(checkpoint_prefix)
-        self.evaluate(second_variable.assign([7.]))
-        self.evaluate(optimizer.get_slot(
-            var=second_variable, name="m").assign([8.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(6., self.evaluate(beta1_power))
-        status = second_root_checkpointable.restore(save_path)
-        status.assert_consumed().run_restore_ops()
-        self.assertAllEqual([4.], self.evaluate(second_variable))
-        self.assertAllEqual([5.], self.evaluate(optimizer.get_slot(
-            var=second_variable, name="m")))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(6., self.evaluate(beta1_power))
-
-      # Check that the first graph is unmolested
-      with first_graph.as_default(), first_session.as_default():
-        self.assertAllEqual([1.], self.evaluate(first_variable))
-        self.assertAllEqual([2.], self.evaluate(optimizer.get_slot(
-            var=first_variable, name="m")))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(3., self.evaluate(beta1_power))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_initialize_if_not_restoring(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    optimizer_only_prefix = os.path.join(checkpoint_directory, "opt")
-    with test_util.device(use_gpu=True):
-      model = MyModel()
-      optimizer = adam.AdamOptimizer(0.001)
-      root = checkpointable_utils.Checkpoint(
-          model=model,  # Do not save the optimizer with the checkpoint.
-          global_step=training_util.get_or_create_global_step())
-      optimizer_checkpoint = checkpointable_utils.Checkpoint(
-          optimizer=optimizer)
-
-      checkpoint_path = checkpoint_management.latest_checkpoint(
-          checkpoint_directory)
-      status = root.restore(save_path=checkpoint_path)
-      input_value = constant_op.constant([[3.]])
-      train_fn = functools.partial(
-          optimizer.minimize,
-          functools.partial(model, input_value),
-          global_step=root.global_step)
-      if not context.executing_eagerly():
-        train_fn = functools.partial(self.evaluate, train_fn())
-      status.initialize_or_restore()
-      self.evaluate([v.initializer for v in optimizer.variables()])
-      train_fn()
-      model_save_path = root.save(file_prefix=checkpoint_prefix)
-      self.evaluate(optimizer.variables()[0].assign(42.))
-      optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix)
-
-    # Restore into a graph with the optimizer
-    with test_util.device(use_gpu=True):
-      model = MyModel()
-      optimizer = adam.AdamOptimizer(0.001)
-      root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer, model=model,
-          global_step=training_util.get_or_create_global_step())
-      status = root.restore(save_path=model_save_path)
-      input_value = constant_op.constant([[3.]])
-      train_fn = functools.partial(
-          optimizer.minimize,
-          functools.partial(model, input_value),
-          global_step=root.global_step)
-      if not context.executing_eagerly():
-        train_fn = functools.partial(self.evaluate, train_fn())
-      status.initialize_or_restore()
-      train_fn()
-      with self.assertRaises(AssertionError):
-        status.assert_existing_objects_matched()
-      with self.assertRaises(AssertionError):
-        status.assert_consumed()
-
-    # Make sure initialization doesn't clobber later restores
-    with test_util.device(use_gpu=True):
-      model = MyModel()
-      optimizer = adam.AdamOptimizer(0.001, beta1=1.0)
-      root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer, model=model,
-          global_step=training_util.get_or_create_global_step())
-      opt_root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer)
-      status = root.restore(save_path=model_save_path)
-      init_only_optimizer_status = opt_root.restore(save_path=None)
-      optimizer_status = opt_root.restore(save_path=optimizer_save_path)
-      input_value = constant_op.constant([[3.]])
-      train_fn = functools.partial(
-          optimizer.minimize,
-          functools.partial(model, input_value),
-          global_step=root.global_step)
-      if not context.executing_eagerly():
-        train_fn = functools.partial(self.evaluate, train_fn())
-      optimizer_status.run_restore_ops()
-      status.initialize_or_restore()
-      init_only_optimizer_status.initialize_or_restore()
-      train_fn()
-      self.assertEqual(42., self.evaluate(optimizer.variables()[0]))
-
-
-class _ManualScope(tracking.AutoCheckpointable):
-
-  def __call__(self):
-    with variable_scope.variable_scope("ManualScope") as vs:
-      self.variable_scope = vs
-      with checkpointable_utils.capture_dependencies(template=self):
-        return self._build()
-
-  def _build(self):
-    return variable_scope.get_variable(name="in_manual_scope", shape=[])
-
-
-class TemplateTests(test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_checkpointable_save_restore(self):
-
-    def _templated():
-      v = variable_scope.get_variable(
-          "v", shape=[1], initializer=init_ops.zeros_initializer(),
-          use_resource=True)
-      v2 = variable_scope.get_variable(
-          "v2", shape=[1], initializer=init_ops.zeros_initializer(),
-          use_resource=True)
-      manual = _ManualScope()
-      return v, v + 1., v2, manual, manual()
-
-    save_template = template.make_template("s1", _templated)
-    v1_save, _, v2_save, manual_scope, manual_scope_v = save_template()
-    six.assertCountEqual(
-        self,
-        [v1_save, v2_save, manual_scope, manual_scope_v, save_template],
-        checkpointable_utils.list_objects(save_template))
-    manual_dep, = manual_scope._checkpoint_dependencies
-    self.assertEqual("in_manual_scope", manual_dep.name)
-    self.assertIs(manual_scope_v, manual_dep.ref)
-    optimizer = adam.AdamOptimizer(0.0)
-    save_root = checkpointable_utils.Checkpoint(
-        my_template=save_template, optimizer=optimizer)
-    optimizer.minimize(v1_save.read_value)
-    self.evaluate([v.initializer for v in save_template.variables])
-    self.evaluate([v.initializer for v in optimizer.variables()])
-    self.evaluate(v1_save.assign([12.]))
-    self.evaluate(v2_save.assign([14.]))
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    save_path = save_root.save(checkpoint_prefix)
-
-    load_template = template.make_template("s2", _templated)
-    load_optimizer = adam.AdamOptimizer(0.0)
-    load_root = checkpointable_utils.Checkpoint(
-        my_template=load_template, optimizer=load_optimizer)
-    status = load_root.restore(save_path)
-    var, var_plus_one, var2, _, _ = load_template()
-    load_optimizer.minimize(var.read_value)
-    self.assertEqual(3, len(load_template._checkpoint_dependencies))
-    self.assertEqual("v", load_template._checkpoint_dependencies[0].name)
-    self.assertEqual("v2", load_template._checkpoint_dependencies[1].name)
-    self.assertEqual("ManualScope",
-                     load_template._checkpoint_dependencies[2].name)
-    status.assert_consumed().run_restore_ops()
-    self.assertAllEqual([12.], self.evaluate(var))
-    self.assertAllEqual([13.], self.evaluate(var_plus_one))
-    self.assertAllEqual([14.], self.evaluate(var2))
-
-
-class CheckpointCompatibilityTests(test.TestCase):
-
-  def _initialized_model(self):
-    input_value = constant_op.constant([[3.]])
-    model = MyModel()
-    optimizer = adam.AdamOptimizer(0.001)
-    optimizer_step = training_util.get_or_create_global_step()
-    root_checkpointable = checkpointable_utils.Checkpoint(
-        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
-    train_op = optimizer.minimize(
-        functools.partial(model, input_value),
-        global_step=optimizer_step)
-    self.evaluate(checkpointable_utils.gather_initializers(
-        root_checkpointable))
-    self.evaluate(train_op)
-    # A regular variable, a slot variable, and a non-slot Optimizer variable
-    # with known values to check when loading.
-    self.evaluate(model._named_dense.bias.assign([1.]))
-    self.evaluate(optimizer.get_slot(
-        var=model._named_dense.bias, name="m").assign([2.]))
-    beta1_power, _ = optimizer._get_beta_accumulators()
-    self.evaluate(beta1_power.assign(3.))
-    return root_checkpointable
-
-  def _set_sentinels(self, root_checkpointable):
-    self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.]))
-    self.evaluate(
-        root_checkpointable.optimizer.get_slot(
-            var=root_checkpointable.model._named_dense.bias, name="m")
-        .assign([102.]))
-    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
-    self.evaluate(beta1_power.assign(103.))
-
-  def _check_sentinels(self, root_checkpointable):
-    self.assertAllEqual(
-        [1.], self.evaluate(root_checkpointable.model._named_dense.bias))
-    self.assertAllEqual([2.], self.evaluate(
-        root_checkpointable.optimizer.get_slot(
-            var=root_checkpointable.model._named_dense.bias, name="m")))
-    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
-    self.assertAllEqual(3., self.evaluate(beta1_power))
-
-  def _write_name_based_checkpoint(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    with context.graph_mode():
-      save_graph = ops.Graph()
-      with save_graph.as_default(), self.session(
-          graph=save_graph) as session:
-        root = self._initialized_model()
-        name_saver = saver_lib.Saver()
-        return name_saver.save(
-            sess=session, save_path=checkpoint_prefix,
-            global_step=root.optimizer_step)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testLoadFromNameBasedSaver(self):
-    """Save a name-based checkpoint, load it using the object-based API."""
-    with test_util.device(use_gpu=True):
-      save_path = self._write_name_based_checkpoint()
-      root = self._initialized_model()
-      self._set_sentinels(root)
-      with self.assertRaises(AssertionError):
-        self._check_sentinels(root)
-      object_saver = checkpointable_utils.CheckpointableSaver(
-          graph_view.ObjectGraphView(root))
-      self._set_sentinels(root)
-      status = object_saver.restore(save_path)
-      if context.executing_eagerly():
-        self._check_sentinels(root)
-      if context.executing_eagerly():
-        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
-          status.assert_consumed()
-        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
-          status.assert_existing_objects_matched()
-        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
-          status.assert_nontrivial_match()
-      else:
-        # When graph building, we haven't read any keys, so we don't know
-        # whether the restore will be complete.
-        with self.assertRaisesRegexp(AssertionError, "not restored"):
-          status.assert_consumed()
-        with self.assertRaisesRegexp(AssertionError, "not restored"):
-          status.assert_existing_objects_matched()
-        with self.assertRaisesRegexp(AssertionError, "not restored"):
-          status.assert_nontrivial_match()
-      status.run_restore_ops()
-      self._check_sentinels(root)
-      self._set_sentinels(root)
-      status = object_saver.restore(save_path)
-      status.initialize_or_restore()
-      self._check_sentinels(root)
-      # Check that there is no error when keys are missing from the name-based
-      # checkpoint.
-      root.not_in_name_checkpoint = resource_variable_ops.ResourceVariable([1.])
-      status = object_saver.restore(save_path)
-      with self.assertRaises(AssertionError):
-        status.assert_existing_objects_matched()
-
-  def testSaveGraphLoadEager(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    with context.graph_mode():
-      save_graph = ops.Graph()
-      with save_graph.as_default(), self.session(
-          graph=save_graph) as session:
-        root = self._initialized_model()
-        save_path = root.save(session=session, file_prefix=checkpoint_prefix)
-    with context.eager_mode():
-      root = self._initialized_model()
-      self._set_sentinels(root)
-      root.restore(save_path).assert_consumed()
-      self._check_sentinels(root)
-
-  def testSaveEagerLoadGraph(self):
-    checkpoint_directory = self.get_temp_dir()
-    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-    with context.eager_mode():
-      root = self._initialized_model()
-      save_path = root.save(file_prefix=checkpoint_prefix)
-    with context.graph_mode():
-      save_graph = ops.Graph()
-      with save_graph.as_default(), self.session(
-          graph=save_graph):
-        root = self._initialized_model()
-        self._set_sentinels(root)
-        root.restore(save_path).assert_consumed().run_restore_ops()
-        self._check_sentinels(root)
-
-
-if __name__ == "__main__":
-  test.main()

diff --git a/tensorflow/python/training/checkpointable/util_xla_test.py b/tensorflow/python/training/checkpointable/util_xla_test.py
deleted file mode 100644
index 4e96a75..0000000
--- a/tensorflow/python/training/checkpointable/util_xla_test.py
+++ /dev/null

@@ -1,84 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.compiler.tests import xla_test
-from tensorflow.python.eager import backprop
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.keras.engine import training
-from tensorflow.python.keras.layers import core
-from tensorflow.python.keras.optimizer_v2 import adam
-from tensorflow.python.platform import test
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training.checkpointable import tracking
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
-
-
-class NonLayerCheckpointable(tracking.AutoCheckpointable):
-
-  def __init__(self):
-    super(NonLayerCheckpointable, self).__init__()
-    self.a_variable = checkpointable_utils.add_variable(
-        self, name="a_variable", shape=[])
-
-
-class Subclassed(training.Model):
-  """A concrete Model for testing."""
-
-  def __init__(self):
-    super(Subclassed, self).__init__()
-    self._named_dense = core.Dense(1, use_bias=True)
-    self._second = core.Dense(1, use_bias=False)
-    # We can still track Checkpointables which aren't Layers.
-    self._non_layer = NonLayerCheckpointable()
-
-  def call(self, values):
-    ret = self._second(self._named_dense(values))
-    return ret
-
-
-class CheckpointingTests(xla_test.XLATestCase):
-
-  def testDeferredRestorationUsageEager(self):
-    """An idiomatic eager execution example."""
-    num_training_steps = 10
-    checkpoint_directory = self.get_temp_dir()
-    for training_continuation in range(3):
-      with self.test_scope():
-        model = Subclassed()
-        optimizer = adam.Adam(0.001)
-        root = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, model=model)
-        manager = checkpoint_management.CheckpointManager(
-            root, checkpoint_directory, max_to_keep=2)
-        root.restore(manager.latest_checkpoint)
-        for _ in range(num_training_steps):
-          input_value = constant_op.constant([[3.]])
-          with backprop.GradientTape() as tape:
-            loss = model(input_value)
-          variables = model.trainable_variables
-          gradients = tape.gradient(loss, variables)
-          optimizer.apply_gradients(zip(gradients, variables))
-        manager.save()
-        self.assertEqual((training_continuation + 1) * num_training_steps,
-                         root.optimizer.iterations.numpy())
-
-
-if __name__ == "__main__":
-  ops.enable_eager_execution()
-  test.main()

diff --git a/tensorflow/python/training/mode_keys.py b/tensorflow/python/training/mode_keys.py
deleted file mode 100644
index ef64554..0000000
--- a/tensorflow/python/training/mode_keys.py
+++ /dev/null

@@ -1,33 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Model modeKeys for TensorFlow and Estimator."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-class ModeKeys(object):
-  """Standard names for model modes.
-
-  The following standard keys are defined:
-
-  * `TRAIN`: training/fitting mode.
-  * `TEST`: testing/evaluation mode.
-  * `PREDICT`: prediction/inference mode.
-  """
-
-  TRAIN = 'train'
-  TEST = 'test'
-  PREDICT = 'predict'

diff --git a/tensorflow/python/training/mode_keys_test.py b/tensorflow/python/training/mode_keys_test.py
deleted file mode 100644
index c4435b7..0000000
--- a/tensorflow/python/training/mode_keys_test.py
+++ /dev/null

@@ -1,29 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for `tf.train.ModeKeys."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.platform import test
-from tensorflow.python.training import mode_keys
-
-
-class ModeKeysTest(test.TestCase):
-
-  def testKeyEquality(self):
-    self.assertEqual(mode_keys.ModeKeys.PREDICT, 'predict')
-    self.assertEqual(mode_keys.ModeKeys.TRAIN, 'train')
-    self.assertEqual(mode_keys.ModeKeys.TEST, 'test')

diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index 1687898..7d7e95c 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py

@@ -41,8 +41,8 @@
 from tensorflow.python.training import saver as training_saver
 from tensorflow.python.training import session_manager as sm
 from tensorflow.python.training import session_run_hook
-from tensorflow.python.training.checkpointable import graph_view
-from tensorflow.python.training.checkpointable import util as checkpointable_util
+from tensorflow.python.training.tracking import graph_view
+from tensorflow.python.training.tracking import util as trackable_util
 from tensorflow.python.util import function_utils
 from tensorflow.python.util.tf_export import tf_export
 
@@ -228,7 +228,7 @@
     if self._saver is None:
       self._saver = training_saver._get_saver_or_default()  # pylint: disable=protected-access
     # pylint: enable=g-long-lambda
-    if isinstance(self._saver, checkpointable_util.Checkpoint):
+    if isinstance(self._saver, trackable_util.Checkpoint):
       self._saver = training_saver.Saver(
           var_list=graph_view.ObjectGraphView(
               self._saver).frozen_saveable_objects(),

diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index a98fcc2..4361f07 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py

@@ -39,7 +39,7 @@
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.training import slot_creator
-from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
@@ -214,10 +214,10 @@
 
 @tf_export(v1=["train.Optimizer"])
 class Optimizer(
-    # Optimizers inherit from CheckpointableBase rather than Checkpointable
+    # Optimizers inherit from Trackable rather than AutoTrackable
     # since they do most of their dependency management themselves (slot
     # variables are special-cased, and non-slot variables are keyed to graphs).
-    checkpointable.Checkpointable):
+    trackable.Trackable):
   """Base class for optimizers.
 
   This class defines the API to add Ops to train a model.  You never use this
@@ -333,9 +333,9 @@
     #   ... }
     self._slots = {}
     self._non_slot_dict = {}
-    # For implementing Checkpointable. Stores information about how to restore
+    # For implementing Trackable. Stores information about how to restore
     # slot variables which have not yet been created
-    # (checkpointable._CheckpointPosition objects).
+    # (trackable._CheckpointPosition objects).
     #  {slot_name :
     #      {_var_key(variable_to_train): [checkpoint_position, ... ], ... },
     #   ... }
@@ -796,7 +796,7 @@
     key = (name, graph)
     v = self._non_slot_dict.get(key, None)
     if v is None:
-      self._maybe_initialize_checkpointable()
+      self._maybe_initialize_trackable()
       distribution_strategy = distribute_ctx.get_strategy()
       with distribution_strategy.extended.colocate_vars_with(colocate_with):
         if eager:
@@ -809,19 +809,19 @@
             use_resource=resource_variable_ops.is_resource_variable(
                 colocate_with))
       # Restore this variable by name if necessary, but don't add a
-      # Checkpointable dependency. Optimizers return the current graph's
+      # Trackable dependency. Optimizers return the current graph's
       # non-slot variables from _checkpoint_dependencies explicitly rather
       # than unconditionally adding dependencies (since there may be multiple
       # non-slot variables with the same name in different graphs, trying to
       # save all of them would result in errors).
-      self._handle_deferred_dependencies(name=name, checkpointable=v)
+      self._handle_deferred_dependencies(name=name, trackable=v)
       self._non_slot_dict[key] = v
 
     return v
 
   @property
   def _checkpoint_dependencies(self):
-    """From Checkpointable. Gather graph-specific non-slot variables to save."""
+    """From Trackable. Gather graph-specific non-slot variables to save."""
     current_graph_non_slot_variables = []
     current_graph_key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
     for (name, _), variable_object in sorted(self._non_slot_dict.items(),
@@ -829,13 +829,13 @@
                                              key=lambda item: item[0][0]):
       if variable_object._graph_key == current_graph_key:  # pylint: disable=protected-access
         current_graph_non_slot_variables.append(
-            checkpointable.CheckpointableReference(
+            trackable.TrackableReference(
                 name=name, ref=variable_object))
     return (super(Optimizer, self)._checkpoint_dependencies
             + current_graph_non_slot_variables)
 
   def _lookup_dependency(self, name):
-    """From Checkpointable. Find a non-slot variable in the current graph."""
+    """From Trackable. Find a non-slot variable in the current graph."""
     unconditional = super(Optimizer, self)._lookup_dependency(name)
     if unconditional is not None:
       return unconditional
@@ -1140,7 +1140,7 @@
     return named_slots[_var_key(var)]
 
   # --------------
-  # For implementing the Checkpointable interface.
+  # For implementing the Trackable interface.
   # --------------
 
   def _restore_slot_variable(self, slot_name, variable, slot_variable):
@@ -1171,8 +1171,8 @@
     slot variable needs to be restored).
 
     Args:
-      slot_variable_position: A `checkpointable._CheckpointPosition` object
-        indicating the slot variable `Checkpointable` object to be restored.
+      slot_variable_position: A `trackable._CheckpointPosition` object
+        indicating the slot variable `Trackable` object to be restored.
       slot_name: The name of this `Optimizer`'s slot to restore into.
       variable: The variable object this slot is being created for.
     """
@@ -1190,7 +1190,7 @@
         # (aside from double initialization), and makes variable creator scopes
         # behave the same way they do when graph building.
         and not ops.get_default_graph()._variable_creator_stack):  # pylint: disable=protected-access
-      initializer = checkpointable.CheckpointInitialValue(
+      initializer = trackable.CheckpointInitialValue(
           checkpoint_position=slot_variable_position)
       slot_variable = self._get_or_make_slot(
           var=variable,

diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index ee0e215..215fc39 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py

@@ -17,7 +17,7 @@
 """Save and restore variables.
 
 Symbols in this file are deprecated. See replacements in
-tensorflow/python/training/checkpointable and tensorflow/python/training/saving.
+tensorflow/python/training/trackable and tensorflow/python/training/saving.
 """
 from __future__ import absolute_import
 from __future__ import division
@@ -29,10 +29,9 @@
 import uuid
 
 import numpy as np
-
-from tensorflow.core.protobuf import checkpointable_object_graph_pb2
 from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.core.protobuf import saver_pb2
+from tensorflow.core.protobuf import trackable_object_graph_pb2
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.client import session
 from tensorflow.python.eager import context
@@ -51,9 +50,9 @@
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import training_util
-from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.training.saving import saveable_object
 from tensorflow.python.training.saving import saveable_object_util
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import compat
 from tensorflow.python.util.tf_export import tf_export
 
@@ -1552,12 +1551,13 @@
     RuntimeError: If called with eager execution enabled.
 
   @compatibility(eager)
-  Exporting/importing meta graphs is not supported. No graph exists when eager
-  execution is enabled.
+  Exporting/importing meta graphs is not supported unless both `graph_def` and
+  `graph` are provided. No graph exists when eager execution is enabled.
   @end_compatibility
   """
   # pylint: enable=line-too-long
-  if context.executing_eagerly():
+  if context.executing_eagerly() and not (graph_def is not None and
+                                          graph is not None):
     raise RuntimeError("Exporting/importing meta graphs is not supported when "
                        "eager execution is enabled. No graph exists when eager "
                        "execution is enabled.")
@@ -1604,9 +1604,9 @@
   """
   reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_path)
   object_graph_string = reader.get_tensor(
-      checkpointable.OBJECT_GRAPH_PROTO_KEY)
+      trackable.OBJECT_GRAPH_PROTO_KEY)
   object_graph_proto = (
-      checkpointable_object_graph_pb2.CheckpointableObjectGraph())
+      trackable_object_graph_pb2.TrackableObjectGraph())
   object_graph_proto.ParseFromString(object_graph_string)
   names_to_keys = {}
   for node in object_graph_proto.nodes:

diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index dfc43ee..9b2a1da 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py

@@ -73,9 +73,9 @@
 from tensorflow.python.training import saver as saver_module
 from tensorflow.python.training import saver_test_utils
 from tensorflow.python.training import training_util
-from tensorflow.python.training.checkpointable import base as checkpointable_base
-from tensorflow.python.training.checkpointable import tracking as checkpointable_tracking
-from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.training.tracking import base as trackable_base
+from tensorflow.python.training.tracking import tracking as trackable_tracking
+from tensorflow.python.training.tracking import util as trackable_utils
 from tensorflow.python.util import compat
 
 
@@ -2775,15 +2775,15 @@
       self.assertEqual(2.0, self.evaluate(var_dict2["variable2:0"]))
 
 
-class _OwnsAVariableSimple(checkpointable_base.Checkpointable):
-  """A Checkpointable object which can be saved using a tf.train.Saver."""
+class _OwnsAVariableSimple(trackable_base.Trackable):
+  """A Trackable object which can be saved using a tf.train.Saver."""
 
   def __init__(self):
     self.non_dep_variable = variable_scope.get_variable(
         name="non_dep_variable", initializer=6., use_resource=True)
 
   def _gather_saveables_for_checkpoint(self):
-    return {checkpointable_base.VARIABLE_VALUE_KEY: self.non_dep_variable}
+    return {trackable_base.VARIABLE_VALUE_KEY: self.non_dep_variable}
 
   # The Saver sorts by name before parsing, so we need a name property.
   @property
@@ -2808,8 +2808,8 @@
         self._mirrored_variable.assign(tensor))
 
 
-class _OwnsMirroredVariables(checkpointable_base.Checkpointable):
-  """A Checkpointable object which returns a more complex SaveableObject."""
+class _OwnsMirroredVariables(trackable_base.Trackable):
+  """A Trackable object which returns a more complex SaveableObject."""
 
   def __init__(self):
     self.non_dep_variable = variable_scope.get_variable(
@@ -2823,7 +2823,7 @@
           primary_variable=self.non_dep_variable,
           mirrored_variable=self.mirrored,
           name=name)
-    return {checkpointable_base.VARIABLE_VALUE_KEY: _saveable_factory}
+    return {trackable_base.VARIABLE_VALUE_KEY: _saveable_factory}
 
   # The Saver sorts by name before parsing, so we need a name property.
   @property
@@ -2831,11 +2831,11 @@
     return self.non_dep_variable.name
 
 
-class NonLayerCheckpointable(checkpointable_tracking.AutoCheckpointable):
+class NonLayerTrackable(trackable_tracking.AutoTrackable):
 
   def __init__(self):
-    super(NonLayerCheckpointable, self).__init__()
-    self.a_variable = checkpointable_utils.add_variable(
+    super(NonLayerTrackable, self).__init__()
+    self.a_variable = trackable_utils.add_variable(
         self, name="a_variable", shape=[])
 
 
@@ -2846,19 +2846,19 @@
     super(MyModel, self).__init__()
     self._named_dense = core.Dense(1, use_bias=True)
     self._second = core.Dense(1, use_bias=False)
-    # We can still track Checkpointables which aren't Layers.
-    self._non_layer = NonLayerCheckpointable()
+    # We can still track Trackables which aren't Layers.
+    self._non_layer = NonLayerTrackable()
 
   def call(self, values):
     ret = self._second(self._named_dense(values))
     return ret
 
 
-class CheckpointableCompatibilityTests(test.TestCase):
+class TrackableCompatibilityTests(test.TestCase):
 
   # TODO(allenl): Track down python3 reference cycles in these tests.
   @test_util.run_in_graph_and_eager_modes
-  def testNotSaveableButIsCheckpointable(self):
+  def testNotSaveableButIsTrackable(self):
     v = _OwnsAVariableSimple()
     test_dir = self.get_temp_dir()
     prefix = os.path.join(test_dir, "ckpt")
@@ -2923,13 +2923,13 @@
     model = MyModel()
     optimizer = adam.AdamOptimizer(0.001)
     optimizer_step = training_util.get_or_create_global_step()
-    root_checkpointable = checkpointable_utils.Checkpoint(
+    root_trackable = trackable_utils.Checkpoint(
         optimizer=optimizer, model=model, optimizer_step=optimizer_step)
     train_op = optimizer.minimize(
         functools.partial(model, input_value),
         global_step=optimizer_step)
-    self.evaluate(checkpointable_utils.gather_initializers(
-        root_checkpointable))
+    self.evaluate(trackable_utils.gather_initializers(
+        root_trackable))
     self.evaluate(train_op)
     # A regular variable, a slot variable, and a non-slot Optimizer variable
     # with known values to check when loading.
@@ -2938,24 +2938,24 @@
         var=model._named_dense.bias, name="m").assign([2.]))
     beta1_power, _ = optimizer._get_beta_accumulators()
     self.evaluate(beta1_power.assign(3.))
-    return root_checkpointable
+    return root_trackable
 
-  def _set_sentinels(self, root_checkpointable):
-    self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.]))
+  def _set_sentinels(self, root_trackable):
+    self.evaluate(root_trackable.model._named_dense.bias.assign([101.]))
     self.evaluate(
-        root_checkpointable.optimizer.get_slot(
-            var=root_checkpointable.model._named_dense.bias, name="m")
+        root_trackable.optimizer.get_slot(
+            var=root_trackable.model._named_dense.bias, name="m")
         .assign([102.]))
-    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    beta1_power, _ = root_trackable.optimizer._get_beta_accumulators()
     self.evaluate(beta1_power.assign(103.))
 
-  def _check_sentinels(self, root_checkpointable):
+  def _check_sentinels(self, root_trackable):
     self.assertAllEqual(
-        [1.], self.evaluate(root_checkpointable.model._named_dense.bias))
+        [1.], self.evaluate(root_trackable.model._named_dense.bias))
     self.assertAllEqual([2.], self.evaluate(
-        root_checkpointable.optimizer.get_slot(
-            var=root_checkpointable.model._named_dense.bias, name="m")))
-    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+        root_trackable.optimizer.get_slot(
+            var=root_trackable.model._named_dense.bias, name="m")))
+    beta1_power, _ = root_trackable.optimizer._get_beta_accumulators()
     self.assertAllEqual(3., self.evaluate(beta1_power))
 
   def testVariableNotFoundErrorRaised(self):
@@ -3012,13 +3012,13 @@
     save_graph = ops_lib.Graph()
     with save_graph.as_default(), self.session(graph=save_graph) as sess:
       root = self._initialized_model()
-      object_saver = checkpointable_utils.Checkpoint(root=root)
+      object_saver = trackable_utils.Checkpoint(root=root)
       save_path = object_saver.save(file_prefix=checkpoint_prefix)
 
       # An incompatible object-based checkpoint to check error messages
       var = resource_variable_ops.ResourceVariable(1., name="a")
       self.evaluate(var.initializer)
-      second_saver = checkpointable_utils.Checkpoint(v=var)
+      second_saver = trackable_utils.Checkpoint(v=var)
       second_path = second_saver.save(file_prefix=os.path.join(
           checkpoint_directory, "second"))
 
@@ -3046,7 +3046,7 @@
     save_graph = ops_lib.Graph()
     with save_graph.as_default(), self.session(graph=save_graph):
       root = self._initialized_model()
-      object_saver = checkpointable_utils.Checkpoint(root=root)
+      object_saver = trackable_utils.Checkpoint(root=root)
       save_path = object_saver.save(file_prefix=checkpoint_prefix)
 
     with context.eager_mode():

diff --git a/tensorflow/python/training/saving/BUILD b/tensorflow/python/training/saving/BUILD
index 67ccd59..adb50f9 100644
--- a/tensorflow/python/training/saving/BUILD
+++ b/tensorflow/python/training/saving/BUILD

@@ -49,7 +49,7 @@
     deps = [
         "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:variables",
-        "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/tracking:base",
         "@six_archive//:six",
     ],
 )

diff --git a/tensorflow/python/training/saving/saveable_object_util.py b/tensorflow/python/training/saving/saveable_object_util.py
index b8cc662..eeec19e 100644
--- a/tensorflow/python/training/saving/saveable_object_util.py
+++ b/tensorflow/python/training/saving/saveable_object_util.py

@@ -26,8 +26,8 @@
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables
-from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.training.saving import saveable_object
+from tensorflow.python.training.tracking import base as trackable
 
 
 # Op names which identify variable reads which should be saved.
@@ -137,7 +137,7 @@
   if not isinstance(name, six.string_types):
     raise TypeError(
         "names_to_saveables must be a dict mapping string names to "
-        "checkpointable operations. Name is not a string: %s" % name)
+        "trackable operations. Name is not a string: %s" % name)
   if isinstance(op, saveable_object.SaveableObject):
     yield op
   elif isinstance(op, (list, tuple, variables.PartitionedVariable)):
@@ -165,11 +165,11 @@
         yield ResourceVariableSaveable(
             variable, variable._save_slice_info.spec, name)
     # pylint: enable=protected-access
-  elif isinstance(op, checkpointable.Checkpointable) and not isinstance(
+  elif isinstance(op, trackable.Trackable) and not isinstance(
       op, variables.Variable):
     # pylint: disable=protected-access
     for attr, factory in op._gather_saveables_for_checkpoint().items():
-      if attr == checkpointable.VARIABLE_VALUE_KEY:
+      if attr == trackable.VARIABLE_VALUE_KEY:
         # Keep original name for classes masquerading as variables.
         full_name = name
       else:
@@ -250,15 +250,18 @@
         names_to_saveables[name].append(var)
       else:
         names_to_saveables[name] = [var]
-    elif (isinstance(var, checkpointable.Checkpointable)
+    elif (isinstance(var, trackable.Trackable)
           and not isinstance(var, variables.Variable)):
-      checkpointable_saveables = [
+      trackable_saveables = [
           (factory() if callable(factory) else factory)
           for factory in var._gather_saveables_for_checkpoint().values()]
       names_to_saveables.update(
-          op_list_to_dict(checkpointable_saveables))
+          op_list_to_dict(trackable_saveables))
     else:
-      if context.executing_eagerly():
+      # Variables (reference and resource) have an _in_graph_mode property
+      # indicating whether they were created in a graph building context. We
+      # also get Tensors when graph building, which do not have this property.
+      if not getattr(var, "_in_graph_mode", True):
         if not isinstance(var, resource_variable_ops.ResourceVariable):
           raise ValueError(
               "Can only save/restore ResourceVariables when eager execution "
@@ -323,7 +326,7 @@
 
   Raises:
     TypeError: If any of the keys are not strings or any of the
-      values are not one of Tensor or Variable or a checkpointable operation.
+      values are not one of Tensor or Variable or a trackable operation.
     ValueError: If the same operation is given in more than one value
       (this also applies to slices of SlicedVariables).
   """

diff --git a/tensorflow/python/training/tracking/BUILD b/tensorflow/python/training/tracking/BUILD
new file mode 100644
index 0000000..40a6e93
--- /dev/null
+++ b/tensorflow/python/training/tracking/BUILD

@@ -0,0 +1,255 @@
+# Description:
+#   Utilities for reading and writing object-based checkpoints.
+
+package(
+    default_visibility = [
+        "//tensorflow:internal",
+    ],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
+load("//tensorflow/compiler/tests:build_defs.bzl", "tf_xla_py_test")
+
+py_library(
+    name = "base",
+    srcs = ["base.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:io_ops_gen",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:util",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/training/saving:saveable_object",
+        "@six_archive//:six",
+    ],
+)
+
+tf_py_test(
+    name = "base_test",
+    srcs = ["base_test.py"],
+    additional_deps = [
+        ":base",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_library(
+    name = "tracking",
+    srcs = ["tracking.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":base",
+        ":data_structures",
+    ],
+)
+
+tf_py_test(
+    name = "tracking_test",
+    srcs = ["tracking_test.py"],
+    additional_deps = [
+        ":base",
+        ":tracking",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_library(
+    name = "layer_utils",
+    srcs = ["layer_utils.py"],
+    srcs_version = "PY2AND3",
+)
+
+py_library(
+    name = "data_structures",
+    srcs = ["data_structures.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":base",
+        ":layer_utils",
+        "//tensorflow/python/saved_model:revived_types",
+    ],
+)
+
+tf_py_test(
+    name = "data_structures_test",
+    srcs = ["data_structures_test.py"],
+    additional_deps = [
+        ":data_structures",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:layers",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python/keras:engine",
+        "//tensorflow/python/keras:layers",
+    ],
+)
+
+py_library(
+    name = "object_identity",
+    srcs = ["object_identity.py"],
+    srcs_version = "PY2AND3",
+)
+
+py_library(
+    name = "graph_view",
+    srcs = ["graph_view.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":base",
+        ":object_identity",
+        ":tracking",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/training/saving:saveable_object",
+        "//tensorflow/python/training/saving:saveable_object_util",
+    ],
+)
+
+py_library(
+    name = "util",
+    srcs = ["util.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":base",
+        ":data_structures",
+        ":graph_view",
+        ":object_identity",
+        ":tracking",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:checkpoint_management",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:io_ops_gen",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:saver",
+        "//tensorflow/python:session",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:def_function",
+        "//tensorflow/python/training/saving:functional_saver",
+        "//tensorflow/python/training/saving:saveable_object_util",
+    ],
+)
+
+tf_py_test(
+    name = "util_test",
+    srcs = ["util_test.py"],
+    additional_deps = [
+        ":base",
+        ":graph_view",
+        ":tracking",
+        ":util",
+        "@absl_py//absl/testing:parameterized",
+        "@six_archive//:six",
+        "//tensorflow/python/keras/optimizer_v2",
+        "//tensorflow/python:checkpoint_management",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:saver",
+        "//tensorflow/python:session",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:template",
+        "//tensorflow/python:training_util",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/eager:backprop",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:def_function",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python/keras:engine",
+        "//tensorflow/python/keras:layers",
+        "//tensorflow/python:variables",
+    ],
+    tags = ["notsan"],  # b/74395663
+)
+
+tf_xla_py_test(
+    name = "util_xla_test",
+    srcs = ["util_xla_test.py"],
+    tags = [
+        "no_pip",
+        "no_rocm",
+        "nomac",
+        "notsan",  # b/74395663
+    ],
+    deps = [
+        ":tracking",
+        ":util",
+        "//tensorflow/compiler/tests:xla_test",
+        "//tensorflow/python:checkpoint_management",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/eager:backprop",
+        "//tensorflow/python/keras:engine",
+        "//tensorflow/python/keras:layers",
+        "//tensorflow/python/keras/optimizer_v2",
+    ],
+)
+
+tf_py_test(
+    name = "util_with_v1_optimizers_test",
+    srcs = ["util_with_v1_optimizers_test.py"],
+    additional_deps = [
+        ":base",
+        ":graph_view",
+        ":tracking",
+        ":util",
+        "@absl_py//absl/testing:parameterized",
+        "@six_archive//:six",
+        "//tensorflow/python:checkpoint_management",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:saver",
+        "//tensorflow/python:session",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:template",
+        "//tensorflow/python:training",
+        "//tensorflow/python:training_util",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/distribute:mirrored_strategy",
+        "//tensorflow/python/eager:backprop",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:def_function",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python/keras:engine",
+        "//tensorflow/python/keras:layers",
+    ],
+    tags = [
+        "no_windows",  # b/124401331
+        "notsan",  # b/74395663
+    ],
+)

diff --git a/tensorflow/python/training/tracking/base.py b/tensorflow/python/training/tracking/base.py
new file mode 100644
index 0000000..4a8960d
--- /dev/null
+++ b/tensorflow/python/training/tracking/base.py

@@ -0,0 +1,875 @@
+"""An object-local variable management scheme."""
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+import collections
+import functools
+import json
+import weakref
+
+import six
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_io_ops as io_ops
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training.saving import saveable_object
+from tensorflow.python.util import nest
+from tensorflow.python.util import serialization
+from tensorflow.python.util import tf_decorator
+
+
+# Key where the object graph proto is saved in a TensorBundle
+OBJECT_GRAPH_PROTO_KEY = "_CHECKPOINTABLE_OBJECT_GRAPH"
+
+
+# A key indicating a variable's value in an object's checkpointed Tensors
+# (Trackable._gather_saveables_for_checkpoint). If this is the only key and
+# the object has no dependencies, then its value may be restored on object
+# creation (avoiding double assignment when executing eagerly).
+VARIABLE_VALUE_KEY = "VARIABLE_VALUE"
+OBJECT_CONFIG_JSON_KEY = "OBJECT_CONFIG_JSON"
+
+TrackableReference = collections.namedtuple(
+    "TrackableReference",
+    [
+        # The local name for this dependency.
+        "name",
+        # The Trackable object being referenced.
+        "ref"
+    ])
+
+
+class CheckpointInitialValue(ops.Tensor):
+  """Tensor wrapper for managing update UIDs in `Variables`.
+
+  When supplied as an initial value, objects of this type let a `Variable`
+  (`Variable`, `ResourceVariable`, etc.) know the UID of the restore the initial
+  value came from. This allows deferred restorations to be sequenced in the
+  order the user specified them, and lets us fall back on assignment if an
+  initial value is not set (e.g. due to a custom getter interfering).
+
+  See comments in _add_variable_with_custom_getter for more information about
+  how `CheckpointInitialValue` is used.
+  """
+
+  def __init__(self, checkpoint_position, shape=None):
+    self.wrapped_value = checkpoint_position.value_tensors()[
+        VARIABLE_VALUE_KEY]
+    if shape:
+      # We need to set the static shape information on the initializer if
+      # possible so we don't get a variable with an unknown shape.
+      self.wrapped_value.set_shape(shape)
+    self._checkpoint_position = checkpoint_position
+
+  def __getattr__(self, attr):
+    try:
+      return getattr(self.wrapped_value, attr)
+    except AttributeError:
+      return self.__getattribute__(attr)
+
+  @property
+  def checkpoint_position(self):
+    return self._checkpoint_position
+
+
+class NoRestoreSaveable(saveable_object.SaveableObject):
+  """Embeds a tensor in a checkpoint with no restore ops."""
+
+  def __init__(self, tensor, name, dtype=None):
+    spec = saveable_object.SaveSpec(tensor, "", name, dtype=dtype)
+    super(NoRestoreSaveable, self).__init__(tensor, [spec], name)
+
+  def restore(self, restored_tensors, restored_shapes):
+    return control_flow_ops.no_op()
+
+
+@six.add_metaclass(abc.ABCMeta)
+class PythonStateSaveable(saveable_object.SaveableObject):
+  """An interface for saving/restoring volatile Python state."""
+
+  @abc.abstractmethod
+  def feed_dict_additions(self):
+    """When running a graph, indicates fresh state to feed.
+
+    Returns:
+      A dictionary mapping `Tensor`s to current Python state.
+    """
+    pass
+
+  @abc.abstractmethod
+  def freeze(self):
+    """Create a new `SaveableObject` which freezes current state as a constant.
+
+    Used when executing eagerly to embed the current state as a constant, or
+    when creating a static tf.train.Saver with the frozen current Python state.
+
+    Returns:
+      A `SaveableObject` which is not a `PythonStateSaveable` instance (i.e. has
+      no Python state associated with it).
+    """
+    pass
+
+
+class PythonStringStateSaveable(PythonStateSaveable):
+  """Saves Python state in a checkpoint."""
+
+  def __init__(self, name, state_callback, restore_callback=None):
+    """Configure saving.
+
+    Args:
+      name: The checkpoint key to write to.
+      state_callback: A function taking no arguments which returns a
+        string. This function is run every time a checkpoint is written.
+      restore_callback: A function taking a Python string, used to restore
+        state. Optional; defaults to doing nothing, in which case it is ignored
+        by status assertions such as assert_consumed().
+    """
+    self._has_trivial_state_callback = (restore_callback is None)
+    def _state_callback_wrapper():
+      with ops.init_scope():
+        return state_callback()
+    self._state_callback = _state_callback_wrapper
+    self._restore_callback = restore_callback
+    with ops.device("/cpu:0"):
+      self._save_string = constant_op.constant("", dtype=dtypes.string)
+    spec = saveable_object.SaveSpec(
+        self._save_string, "", name, dtype=dtypes.string)
+    super(PythonStringStateSaveable, self).__init__(
+        self._save_string, [spec], name)
+
+  @property
+  def optional_restore(self):
+    """For values with no restore, relaxes assert_consumed()."""
+    return self._has_trivial_state_callback
+
+  def feed_dict_additions(self):
+    """When running a graph, indicates fresh state to feed."""
+    return {self._save_string: self._state_callback()}
+
+  def freeze(self):
+    """Create a frozen `SaveableObject` which saves the current state."""
+    def _constant_state():
+      return constant_op.constant(self._state_callback(), dtype=dtypes.string)
+    return NoRestoreSaveable(
+        tensor=_constant_state,
+        dtype=dtypes.string,
+        name=self.name)
+
+  def python_restore(self, restored_strings):
+    """Called to restore Python state."""
+    if self._restore_callback:
+      restored, = restored_strings
+      self._restore_callback(restored)
+
+  def restore(self, restored_tensors, restored_shapes):
+    """Called to restore TensorFlow state (nothing to do)."""
+    return control_flow_ops.no_op()
+
+
+class CheckpointPosition(object):
+  """Indicates a position within a `_CheckpointRestoreCoordinator`."""
+
+  def __init__(self, checkpoint, proto_id):
+    """Specify an object within a checkpoint.
+
+    Args:
+      checkpoint: A _CheckpointRestoreCoordinator object.
+      proto_id: The index of this object in TrackableObjectGraph.nodes.
+    """
+    self._checkpoint = checkpoint
+    self._proto_id = proto_id
+
+  def restore(self, trackable):
+    """Restore this value into `trackable`."""
+    with ops.init_scope():
+      if self.bind_object(trackable):
+        # This object's correspondence with a checkpointed object is new, so
+        # process deferred restorations for it and its dependencies.
+        restore_ops = trackable._restore_from_checkpoint_position(self)  # pylint: disable=protected-access
+        if restore_ops:
+          self._checkpoint.new_restore_ops(restore_ops)
+
+  def bind_object(self, trackable):
+    """Set a checkpoint<->object correspondence and process slot variables.
+
+    Args:
+      trackable: The object to record a correspondence for.
+    Returns:
+      True if this is a new assignment, False if this object has already been
+      mapped to a checkpointed `Object` proto.
+    Raises:
+      AssertionError: If another object is already bound to the `Object` proto.
+    """
+    checkpoint = self.checkpoint
+    checkpoint.all_python_objects.add(trackable)
+    current_assignment = checkpoint.object_by_proto_id.get(self._proto_id, None)
+    if current_assignment is None:
+      checkpoint.object_by_proto_id[self._proto_id] = trackable
+      for deferred_slot_restoration in (
+          checkpoint.deferred_slot_restorations.pop(self._proto_id, ())):
+        trackable._create_or_restore_slot_variable(  # pylint: disable=protected-access
+            slot_variable_position=CheckpointPosition(
+                checkpoint=checkpoint,
+                proto_id=deferred_slot_restoration.slot_variable_id),
+            variable=deferred_slot_restoration.original_variable,
+            slot_name=deferred_slot_restoration.slot_name)
+      for slot_restoration in checkpoint.slot_restorations.pop(
+          self._proto_id, ()):
+        optimizer_object = checkpoint.object_by_proto_id.get(
+            slot_restoration.optimizer_id, None)
+        if optimizer_object is None:
+          # The optimizer has not yet been created or tracked. Record in the
+          # checkpoint that the slot variables need to be restored when it is.
+          checkpoint.deferred_slot_restorations.setdefault(
+              slot_restoration.optimizer_id, []).append(
+                  _DeferredSlotVariableRestoration(
+                      original_variable=trackable,
+                      slot_variable_id=slot_restoration.slot_variable_id,
+                      slot_name=slot_restoration.slot_name))
+        else:
+          optimizer_object._create_or_restore_slot_variable(  # pylint: disable=protected-access
+              slot_variable_position=CheckpointPosition(
+                  checkpoint=checkpoint,
+                  proto_id=slot_restoration.slot_variable_id),
+              variable=trackable,
+              slot_name=slot_restoration.slot_name)
+      return True  # New assignment
+    else:
+      # The object was already mapped for this checkpoint load, which means
+      # we don't need to do anything besides check that the mapping is
+      # consistent (if the dependency DAG is not a tree then there are
+      # multiple paths to the same object).
+      if current_assignment is not trackable:
+        logging.warning(
+            ("Inconsistent references when loading the checkpoint into this "
+             "object graph. Either the Trackable object references in the "
+             "Python program have changed in an incompatible way, or the "
+             "checkpoint was generated in an incompatible program.\n\nTwo "
+             "checkpoint references resolved to different objects (%s and %s).")
+            % (current_assignment, trackable))
+      return False  # Not a new assignment
+
+  def is_simple_variable(self):
+    """Determine whether this value is restorable with a Tensor initializer."""
+    attributes = self.object_proto.attributes
+    return (len(attributes) == 1
+            and attributes[0].name == VARIABLE_VALUE_KEY
+            and not self.object_proto.children)
+
+  def value_tensors(self):
+    """Create value `Tensor`s for this object's attributes.
+
+    Does not require that the Python object has been created. Used for
+    restore-on-create when executing eagerly.
+
+    Returns:
+      A dictionary mapping from object attribute names to `Tensor`s.
+    """
+    value_tensors = {}
+    for serialized_tensor in self.object_proto.attributes:
+      checkpoint_key = serialized_tensor.checkpoint_key
+      dtype = self._checkpoint.dtype_map[checkpoint_key]
+      base_type = dtype.base_dtype
+      with ops.init_scope():
+        with ops.device("/cpu:0"):
+          # Run the restore itself on the CPU.
+          value, = io_ops.restore_v2(
+              prefix=self._checkpoint.save_path_tensor,
+              tensor_names=[checkpoint_key],
+              shape_and_slices=[""],
+              dtypes=[base_type],
+              name="%s_checkpoint_read" % (serialized_tensor.name,))
+        # Copy the value to the current device if necessary.
+        value_tensors[serialized_tensor.name] = array_ops.identity(value)
+      return value_tensors
+
+  def _gather_ops_or_named_saveables(self):
+    """Looks up or creates SaveableObjects which don't have cached ops."""
+    saveables = self.trackable._gather_saveables_for_checkpoint()  # pylint: disable=protected-access
+    # Name saveables based on the name this object had when it was checkpointed.
+    named_saveables = {}
+    python_saveables = []
+    existing_restore_ops = []
+    for serialized_tensor in self.object_proto.attributes:
+      if context.executing_eagerly():
+        existing_op = None
+      else:
+        existing_op = self._checkpoint.restore_ops_by_name.get(
+            serialized_tensor.checkpoint_key, None)
+      if existing_op is not None:
+        existing_restore_ops.append(existing_op)
+        continue
+
+      # Only if we don't have cached ops for this SaveableObject, we'll see if
+      # the SaveableObject itself has been cached. If not, we'll make it, and
+      # either way we'll extract new ops from it (or if it has Python state to
+      # restore, we'll run that).
+      saveables_cache = self._checkpoint.graph_view.saveables_cache
+      if saveables_cache is None:
+        # No SaveableObject caching when executing eagerly.
+        saveable = None
+      else:
+        # If we've already created and cached a SaveableObject for this
+        # attribute, we can re-use it to avoid re-creating some ops when graph
+        # building.
+        saveable_list = saveables_cache.get(
+            self.trackable, {}).get(serialized_tensor.name, (None,))
+        if len(saveable_list) == 1:
+          # Almost every attribute will have exactly one SaveableObject.
+          saveable, = saveable_list
+        else:
+          # Don't use cached SaveableObjects for partitioned variables, which is
+          # the only case where we'd have a list of SaveableObjects. Op caching
+          # will catch them.
+          saveable = None
+      if saveable is not None:
+        # The name of this attribute has changed, so we need to re-generate
+        # the SaveableObject.
+        if serialized_tensor.checkpoint_key not in saveable.name:
+          saveable = None
+          del saveables_cache[self.trackable]
+          break
+      if saveable is None:
+        # If there was no cached SaveableObject, we should check if the Python
+        # object has the attribute.
+        saveable_factory = saveables.get(serialized_tensor.name, None)
+        if saveable_factory is None:
+          # Purposefully does not throw an exception if attributes have been
+          # added or deleted. Stores unused attributes so an exception can be
+          # raised if the user decides to check that everything in the
+          # checkpoint was loaded.
+          if not serialized_tensor.optional_restore:
+            self._checkpoint.unused_attributes.setdefault(
+                self.trackable, []).append(serialized_tensor.name)
+          continue
+        if callable(saveable_factory):
+          saveable = saveable_factory(name=serialized_tensor.checkpoint_key)
+        else:
+          saveable = saveable_factory
+        if saveables_cache is not None:
+          saveables_cache.setdefault(
+              self.trackable, {})[serialized_tensor.name] = [saveable]
+      if isinstance(saveable, PythonStateSaveable):
+        python_saveables.append(saveable)
+      else:
+        named_saveables[serialized_tensor.checkpoint_key] = saveable
+    return existing_restore_ops, named_saveables, python_saveables
+
+  def restore_ops(self):
+    """Create or fetch restore ops for this object's attributes.
+
+    Requires that the `Trackable` Python object has been bound to an object
+    ID in the checkpoint.
+
+    Returns:
+      A list of operations when graph building, or an empty list when executing
+      eagerly.
+    """
+    (restore_ops,
+     tensor_saveables,
+     python_saveables) = self._gather_ops_or_named_saveables()
+    restore_ops.extend(self._checkpoint.restore_saveables(
+        tensor_saveables, python_saveables))
+    return restore_ops
+
+  @property
+  def checkpoint(self):
+    return self._checkpoint
+
+  @property
+  def trackable(self):
+    return self._checkpoint.object_by_proto_id[self._proto_id]
+
+  @property
+  def object_proto(self):
+    return self._checkpoint.object_graph_proto.nodes[self._proto_id]
+
+  @property
+  def restore_uid(self):
+    return self._checkpoint.restore_uid
+
+  def __repr__(self):
+    return repr(self.object_proto)
+
+
+_DeferredSlotVariableRestoration = collections.namedtuple(
+    "_DeferredSlotVariableRestoration",
+    [
+        "original_variable",
+        "slot_variable_id",
+        "slot_name",
+    ]
+)
+
+_SlotVariableRestoration = collections.namedtuple(
+    "_SlotVariableRestoration",
+    [
+        # The checkpoint proto id of the optimizer object.
+        "optimizer_id",
+        # The checkpoint proto id of the slot variable.
+        "slot_variable_id",
+        "slot_name",
+    ])
+
+
+def no_automatic_dependency_tracking(method):
+  """Disables automatic dependency tracking on attribute assignment.
+
+  Use to decorate any method of a Trackable object. Attribute assignment in
+  that method will not add dependencies (also respected in Model). Harmless if
+  used in a class which does not do automatic dependency tracking (which means
+  it's safe to use in base classes which may have subclasses which also inherit
+  from Trackable).
+
+  Args:
+    method: The method to decorate.
+  Returns:
+    A decorated method which sets and un-sets automatic dependency tracking for
+    the object the method is called on (not thread safe).
+  """
+
+  def _method_wrapper(self, *args, **kwargs):
+    previous_value = getattr(self, "_setattr_tracking", True)
+    self._setattr_tracking = False  # pylint: disable=protected-access
+    try:
+      result = method(self, *args, **kwargs)
+    finally:
+      self._setattr_tracking = previous_value  # pylint: disable=protected-access
+    return result
+
+  return tf_decorator.make_decorator(
+      target=method, decorator_func=_method_wrapper)
+
+
+class Trackable(object):
+  """Base class for `Trackable` objects without automatic dependencies.
+
+  This class has no __setattr__ override for performance reasons. Dependencies
+  must be added explicitly. Unless attribute assignment is performance-critical,
+  use `AutoTrackable` instead. Use `Trackable` for `isinstance`
+  checks.
+  """
+
+  # Trackable does not do automatic dependency tracking, but uses the
+  # no_automatic_dependency_tracking decorator so it can avoid adding
+  # dependencies if a subclass is Trackable / inherits from Model (both of
+  # which have __setattr__ overrides).
+  @no_automatic_dependency_tracking
+  def _maybe_initialize_trackable(self):
+    """Initialize dependency management.
+
+    Not __init__, since most objects will forget to call it.
+    """
+    if hasattr(self, "_unconditional_checkpoint_dependencies"):
+      # __init__ already called. This check means that we don't need
+      # Trackable.__init__() in the constructor of every TensorFlow object.
+      return
+    # A list of TrackableReference objects. Some classes implementing
+    # `Trackable`, notably `Optimizer`s, may override the
+    # _checkpoint_dependencies property with conditional dependencies
+    # (e.g. based on the current graph when saving).
+    self._unconditional_checkpoint_dependencies = []
+    # Maps names -> Trackable objects
+    self._unconditional_dependency_names = {}
+    # Restorations for other Trackable objects on which this object may
+    # eventually depend. Maps local name -> CheckpointPosition list. Optimizers
+    # tack on conditional dependencies, and so need separate management of
+    # deferred dependencies too.
+    self._unconditional_deferred_dependencies = {}
+    # The UID of the highest assignment to this object. Used to ensure that the
+    # last requested assignment determines the final value of an object.
+    if hasattr(self, "_update_uid"):
+      raise AssertionError(
+          "Internal error: the object had an update UID set before its "
+          "initialization code was run.")
+    self._update_uid = -1
+    # When executing eagerly, holds a collection of _NameBasedRestoreCoordinator
+    # instances, which should be checked when creating variables or other
+    # saveables. These are passed on recursively to all dependencies, since
+    # unlike object-based checkpoint restores we don't know which subgraph is
+    # being restored in advance. This mechanism is only necessary for
+    # restore-on-create when executing eagerly, and so is unused when graph
+    # building.
+    self._name_based_restores = set()
+
+  def _no_dependency(self, value):
+    """If automatic dependency tracking is enabled, ignores `value`."""
+    return value
+
+  def _name_based_attribute_restore(self, checkpoint):
+    """Restore the object's attributes from a name-based checkpoint."""
+    self._name_based_restores.add(checkpoint)
+    if self._update_uid < checkpoint.restore_uid:
+      checkpoint.eager_restore(self)
+      self._update_uid = checkpoint.restore_uid
+
+  @property
+  def _checkpoint_dependencies(self):
+    """All dependencies of this object.
+
+    May be overridden to include conditional dependencies.
+
+    Returns:
+      A list of `TrackableReference` objects indicating named
+      `Trackable` dependencies which should be saved along with this
+      object.
+    """
+    return self._unconditional_checkpoint_dependencies
+
+  @property
+  def _deferred_dependencies(self):
+    """A dictionary with deferred dependencies.
+
+    Stores restorations for other Trackable objects on which this object
+    may eventually depend. May be overridden by sub-classes (e.g. Optimizers use
+    conditional dependencies based the current graph, and so need separate
+    management of deferred dependencies too).
+
+    Returns:
+      A dictionary mapping from local name to a list of CheckpointPosition
+      objects.
+    """
+    return self._unconditional_deferred_dependencies
+
+  def _lookup_dependency(self, name):
+    """Look up a dependency by name.
+
+    May be overridden to include conditional dependencies.
+
+    Args:
+      name: The local name of the dependency.
+    Returns:
+      A `Trackable` object, or `None` if no dependency by this name was
+      found.
+    """
+    return self._unconditional_dependency_names.get(name, None)
+
+  def _add_variable_with_custom_getter(
+      self, name, shape=None, dtype=dtypes.float32,
+      initializer=None, getter=None, overwrite=False,
+      **kwargs_for_getter):
+    """Restore-on-create for a variable be saved with this `Trackable`.
+
+    If the user has requested that this object or another `Trackable` which
+    depends on this object be restored from a checkpoint (deferred loading
+    before variable object creation), `initializer` may be ignored and the value
+    from the checkpoint used instead.
+
+    Args:
+      name: A name for the variable. Must be unique within this object.
+      shape: The shape of the variable.
+      dtype: The data type of the variable.
+      initializer: The initializer to use. Ignored if there is a deferred
+        restoration left over from a call to
+        `_restore_from_checkpoint_position`.
+      getter: The getter to wrap which actually fetches the variable.
+      overwrite: If True, disables unique name and type checks.
+      **kwargs_for_getter: Passed to the getter.
+
+    Returns:
+      The new variable object.
+
+    Raises:
+      ValueError: If the variable name is not unique.
+    """
+    self._maybe_initialize_trackable()
+    with ops.init_scope():
+      if context.executing_eagerly():
+        # If this is a variable with a single Tensor stored in the checkpoint,
+        # we can set that value as an initializer rather than initializing and
+        # then assigning (when executing eagerly). This call returns None if
+        # there is nothing to restore.
+        checkpoint_initializer = self._preload_simple_restoration(
+            name=name, shape=shape)
+      else:
+        checkpoint_initializer = None
+      if (checkpoint_initializer is not None
+          and not (
+              isinstance(initializer, CheckpointInitialValue)
+              and (initializer.restore_uid
+                   > checkpoint_initializer.restore_uid))):
+        # If multiple Trackable objects are "creating" the same variable
+        # via the magic of custom getters, the one with the highest restore UID
+        # (the one called last) has to make the final initializer. If another
+        # custom getter interrupts this process by overwriting the initializer,
+        # then we'll catch that when we call _track_trackable. So this is
+        # "best effort" to set the initializer with the highest restore UID.
+        initializer = checkpoint_initializer
+        shape = None
+    new_variable = getter(
+        name=name, shape=shape, dtype=dtype, initializer=initializer,
+        **kwargs_for_getter)
+
+    # If we set an initializer and the variable processed it, tracking will not
+    # assign again. It will add this variable to our dependencies, and if there
+    # is a non-trivial restoration queued, it will handle that. This also
+    # handles slot variables.
+    if not overwrite or isinstance(new_variable, Trackable):
+      return self._track_trackable(new_variable, name=name,
+                                   overwrite=overwrite)
+    else:
+      # TODO(allenl): Some variable types are not yet supported. Remove this
+      # fallback once all get_variable() return types are Trackable.
+      return new_variable
+
+  def _preload_simple_restoration(self, name, shape):
+    """Return a dependency's value for restore-on-create.
+
+    Note the restoration is not deleted; if for some reason preload is called
+    and then not assigned to the variable (for example because a custom getter
+    overrides the initializer), the assignment will still happen once the
+    variable is tracked (determined based on checkpoint.restore_uid).
+
+    Args:
+      name: The object-local name of the dependency holding the variable's
+        value.
+      shape: The shape of the variable being loaded into.
+    Returns:
+      An callable for use as a variable's initializer/initial_value, or None if
+      one should not be set (either because there was no variable with this name
+      in the checkpoint or because it needs more complex deserialization). Any
+      non-trivial deserialization will happen when the variable object is
+      tracked.
+    """
+    deferred_dependencies_list = self._deferred_dependencies.get(name, ())
+    if not deferred_dependencies_list:
+      # Nothing to do; we don't have a restore for this dependency queued up.
+      return
+    for checkpoint_position in deferred_dependencies_list:
+      if not checkpoint_position.is_simple_variable():
+        # If _any_ pending restoration is too complicated to fit in an
+        # initializer (because it has dependencies, or because there are
+        # multiple Tensors to restore), bail and let the general tracking code
+        # handle it.
+        return None
+    checkpoint_position = max(
+        deferred_dependencies_list,
+        key=lambda restore: restore.checkpoint.restore_uid)
+    return CheckpointInitialValue(
+        checkpoint_position=checkpoint_position, shape=shape)
+
+  def _track_trackable(self, trackable, name, overwrite=False):
+    """Declare a dependency on another `Trackable` object.
+
+    Indicates that checkpoints for this object should include variables from
+    `trackable`.
+
+    Variables in a checkpoint are mapped to `Trackable`s based on the names
+    provided when the checkpoint was written. To avoid breaking existing
+    checkpoints when modifying a class, neither variable names nor dependency
+    names (the names passed to `_track_trackable`) may change.
+
+    Args:
+      trackable: A `Trackable` which this object depends on.
+      name: A local name for `trackable`, used for loading checkpoints into
+        the correct objects.
+      overwrite: Boolean, whether silently replacing dependencies is OK. Used
+        for __setattr__, where throwing an error on attribute reassignment would
+        be inappropriate.
+
+    Returns:
+      `trackable`, for convenience when declaring a dependency and
+      assigning to a member variable in one statement.
+
+    Raises:
+      TypeError: If `trackable` does not inherit from `Trackable`.
+      ValueError: If another object is already tracked by this name.
+    """
+    self._maybe_initialize_trackable()
+    if not isinstance(trackable, Trackable):
+      raise TypeError(
+          ("Trackable._track_trackable() passed type %s, not a "
+           "Trackable.") % (type(trackable),))
+    new_reference = TrackableReference(name=name, ref=trackable)
+    current_object = self._lookup_dependency(name)
+    if (current_object is not None
+        and current_object is not trackable):
+      if not overwrite:
+        raise ValueError(
+            ("Called Trackable._track_trackable() with name='%s', "
+             "but a Trackable with this name is already declared as a "
+             "dependency. Names must be unique (or overwrite=True).") % (name,))
+      # This is a weird thing to do, but we're not going to stop people from
+      # using __setattr__.
+      for index, (old_name, _) in enumerate(
+          self._unconditional_checkpoint_dependencies):
+        if name == old_name:
+          self._unconditional_checkpoint_dependencies[index] = new_reference
+    elif current_object is None:
+      self._unconditional_checkpoint_dependencies.append(new_reference)
+      self._handle_deferred_dependencies(
+          name=name, trackable=trackable)
+    self._unconditional_dependency_names[name] = trackable
+    return trackable
+
+  def _handle_deferred_dependencies(self, name, trackable):
+    """Pop and load any deferred checkpoint restores into `trackable`.
+
+    This method does not add a new dependency on `trackable`, but it does
+    check if any outstanding/deferred dependencies have been queued waiting for
+    this dependency to be added (matched based on `name`). If so,
+    `trackable` and its dependencies are restored. The restorations are
+    considered fulfilled and so are deleted.
+
+    `_track_trackable` is more appropriate for adding a
+    normal/unconditional dependency, and includes handling for deferred
+    restorations. This method allows objects such as `Optimizer` to use the same
+    restoration logic while managing conditional dependencies themselves, by
+    overriding `_checkpoint_dependencies` and `_lookup_dependency` to change the
+    object's dependencies based on the context it is saved/restored in (a single
+    optimizer instance can have state associated with multiple graphs).
+
+    Args:
+      name: The name of the dependency within this object (`self`), used to
+        match `trackable` with values saved in a checkpoint.
+      trackable: The Trackable object to restore (inheriting from
+        `Trackable`).
+    """
+    self._maybe_initialize_trackable()
+    trackable._maybe_initialize_trackable()  # pylint: disable=protected-access
+    deferred_dependencies_list = self._deferred_dependencies.pop(name, ())
+    for checkpoint_position in sorted(
+        deferred_dependencies_list,
+        key=lambda restore: restore.checkpoint.restore_uid,
+        reverse=True):
+      checkpoint_position.restore(trackable)
+
+    # Pass on any name-based restores queued in this object.
+    for name_based_restore in sorted(
+        self._name_based_restores,
+        key=lambda checkpoint: checkpoint.restore_uid,
+        reverse=True):
+      trackable._name_based_attribute_restore(name_based_restore)  # pylint: disable=protected-access
+
+  def _restore_from_checkpoint_position(self, checkpoint_position):
+    """Restore this object and its dependencies (may be deferred)."""
+    # Attempt a breadth-first traversal, since presumably the user has more
+    # control over shorter paths. If we don't have all of the dependencies at
+    # this point, the end result is not breadth-first (since other deferred
+    # traversals will happen later).
+    visit_queue = collections.deque([checkpoint_position])
+    restore_ops = []
+    while visit_queue:
+      current_position = visit_queue.popleft()
+      restore_ops.extend(nest.flatten(
+          current_position.trackable  # pylint: disable=protected-access
+          ._single_restoration_from_checkpoint_position(
+              checkpoint_position=current_position,
+              visit_queue=visit_queue)))
+    return restore_ops
+
+  def _single_restoration_from_checkpoint_position(
+      self, checkpoint_position, visit_queue):
+    """Restore this object, and either queue its dependencies or defer them."""
+    self._maybe_initialize_trackable()
+    checkpoint = checkpoint_position.checkpoint
+    # If the UID of this restore is lower than our current update UID, we don't
+    # need to actually restore the object. However, we should pass the
+    # restoration on to our dependencies.
+    if checkpoint.restore_uid > self._update_uid:
+      restore_ops = checkpoint_position.restore_ops()
+      self._update_uid = checkpoint.restore_uid
+    else:
+      restore_ops = ()
+    for child in checkpoint_position.object_proto.children:
+      child_position = CheckpointPosition(
+          checkpoint=checkpoint,
+          proto_id=child.node_id)
+      local_object = self._lookup_dependency(child.local_name)
+      if local_object is None:
+        # We don't yet have a dependency registered with this name. Save it
+        # in case we do.
+        self._deferred_dependencies.setdefault(child.local_name, []).append(
+            child_position)
+      else:
+        if child_position.bind_object(trackable=local_object):
+          # This object's correspondence is new, so dependencies need to be
+          # visited. Delay doing it so that we get a breadth-first dependency
+          # resolution order (shallowest paths first). The caller is responsible
+          # for emptying visit_queue.
+          visit_queue.append(child_position)
+    return restore_ops
+
+  def _gather_saveables_for_checkpoint(self):
+    """Returns a dictionary of values to checkpoint with this object.
+
+    Keys in the returned dictionary are local to this object and in a separate
+    namespace from dependencies. Values may either be `SaveableObject` factories
+    or variables easily converted to `SaveableObject`s (as in `tf.train.Saver`'s
+    `var_list` constructor argument).
+
+    `SaveableObjects` have a name set, which Trackable needs to generate
+    itself. So rather than returning `SaveableObjects` directly, this method
+    should return a dictionary of callables which take `name` arguments and
+    return `SaveableObjects` with that name.
+
+    If this object may also be passed to the global-name-based `tf.train.Saver`,
+    the returned callables should have a default value for their name argument
+    (i.e. be callable with no arguments).
+
+    Returned values must be saved only by this object; if any value may be
+    shared, it should instead be a dependency. For example, variable objects
+    save their own values with the key `VARIABLE_VALUE_KEY`, but objects which
+    reference variables simply add a dependency.
+
+    Returns:
+      The dictionary mapping attribute names to `SaveableObject` factories
+      described above. For example:
+      {VARIABLE_VALUE_KEY:
+       lambda name="global_name_for_this_object":
+       SaveableObject(name=name, ...)}
+    """
+    if not hasattr(self, "get_config"):
+      return {}
+    try:
+      self.get_config()
+    except NotImplementedError:
+      return {}
+    weak_self = weakref.ref(self)
+    def _state_callback():
+      """Serializes `self.get_config()` for saving."""
+      dereferenced_self = weak_self()
+      if dereferenced_self:
+        return json.dumps(
+            dereferenced_self,
+            default=serialization.get_json_type,
+            sort_keys=True).encode("utf8")
+      else:
+        return ""
+    return {OBJECT_CONFIG_JSON_KEY: functools.partial(
+        PythonStringStateSaveable,
+        state_callback=_state_callback)}
+
+  def _list_functions_for_serialization(self):
+    """Lists the functions of this trackable to serialize.
+
+    Internal sub-classes can override this with specific logic. E.g.
+    `AutoTrackable` provides an implementation that returns the `attr`
+    that return functions.
+
+    Returns:
+        A dictionary mapping attribute names to `Function` or
+        `ConcreteFunction`.
+    """
+    return dict()

diff --git a/tensorflow/python/training/tracking/base_test.py b/tensorflow/python/training/tracking/base_test.py
new file mode 100644
index 0000000..4a74417
--- /dev/null
+++ b/tensorflow/python/training/tracking/base_test.py

@@ -0,0 +1,89 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import os
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.platform import test
+from tensorflow.python.training.tracking import base
+from tensorflow.python.training.tracking import util
+
+
+class InterfaceTests(test.TestCase):
+
+  def testOverwrite(self):
+    root = base.Trackable()
+    leaf = base.Trackable()
+    root._track_trackable(leaf, name="leaf")
+    (current_name, current_dependency), = root._checkpoint_dependencies
+    self.assertIs(leaf, current_dependency)
+    self.assertEqual("leaf", current_name)
+    duplicate_name_dep = base.Trackable()
+    with self.assertRaises(ValueError):
+      root._track_trackable(duplicate_name_dep, name="leaf")
+    root._track_trackable(duplicate_name_dep, name="leaf", overwrite=True)
+    (current_name, current_dependency), = root._checkpoint_dependencies
+    self.assertIs(duplicate_name_dep, current_dependency)
+    self.assertEqual("leaf", current_name)
+
+  def testAddVariableOverwrite(self):
+    root = base.Trackable()
+    a = root._add_variable_with_custom_getter(
+        name="v", shape=[], getter=variable_scope.get_variable)
+    self.assertEqual([root, a], util.list_objects(root))
+    with ops.Graph().as_default():
+      b = root._add_variable_with_custom_getter(
+          name="v", shape=[], overwrite=True,
+          getter=variable_scope.get_variable)
+      self.assertEqual([root, b], util.list_objects(root))
+    with ops.Graph().as_default():
+      with self.assertRaisesRegexp(
+          ValueError, "already declared as a dependency"):
+        root._add_variable_with_custom_getter(
+            name="v", shape=[], overwrite=False,
+            getter=variable_scope.get_variable)
+
+  def testAssertConsumedWithUnusedPythonState(self):
+    has_config = base.Trackable()
+    has_config.get_config = lambda: {}
+    saved = util.Checkpoint(obj=has_config)
+    save_path = saved.save(os.path.join(self.get_temp_dir(), "ckpt"))
+    restored = util.Checkpoint(obj=base.Trackable())
+    restored.restore(save_path).assert_consumed()
+
+  def testAssertConsumedFailsWithUsedPythonState(self):
+    has_config = base.Trackable()
+    attributes = {
+        "foo_attr": functools.partial(
+            base.PythonStringStateSaveable,
+            state_callback=lambda: "",
+            restore_callback=lambda x: None)}
+    has_config._gather_saveables_for_checkpoint = lambda: attributes
+    saved = util.Checkpoint(obj=has_config)
+    save_path = saved.save(os.path.join(self.get_temp_dir(), "ckpt"))
+    restored = util.Checkpoint(obj=base.Trackable())
+    status = restored.restore(save_path)
+    with self.assertRaisesRegexp(AssertionError, "foo_attr"):
+      status.assert_consumed()
+
+
+if __name__ == "__main__":
+  ops.enable_eager_execution()
+  test.main()

diff --git a/tensorflow/python/training/tracking/data_structures.py b/tensorflow/python/training/tracking/data_structures.py
new file mode 100644
index 0000000..7f1cc50
--- /dev/null
+++ b/tensorflow/python/training/tracking/data_structures.py

@@ -0,0 +1,849 @@
+"""Trackable data structures."""
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+import operator
+import sys
+
+import six
+
+from tensorflow.python.eager import def_function
+from tensorflow.python.eager import function as defun
+from tensorflow.python.ops import variables
+from tensorflow.python.saved_model import revived_types
+from tensorflow.python.training.tracking import base
+from tensorflow.python.training.tracking import layer_utils
+
+
+class NoDependency(object):
+  """Allows attribute assignment to `Trackable` objects with no dependency.
+
+  Example usage:
+  ```python
+  obj = Trackable()
+  obj.has_dependency = tf.Variable(0., name="dep")
+  obj.no_dependency = NoDependency(tf.Variable(1., name="nodep"))
+  assert obj.no_dependency.name == "nodep:0"
+  ```
+
+  `obj` in this example has a dependency on the variable "dep", and both
+  attributes contain un-wrapped `Variable` objects.
+
+  `NoDependency` also works with `tf.keras.Model`, but only for checkpoint
+  dependencies: wrapping a `Layer` in `NoDependency` will assign the (unwrapped)
+  `Layer` to the attribute without a checkpoint dependency, but the `Model` will
+  still track the `Layer` (so it will appear in `Model.layers`, and its
+  variables will appear in `Model.variables`).
+  """
+
+  def __init__(self, value):
+    self.value = value
+
+
+def _wrap_or_unwrap(value):
+  """Wraps basic data structures, unwraps NoDependency objects."""
+  # pylint: disable=unidiomatic-typecheck
+  # Exact type checking to avoid mucking up custom logic in list/dict
+  # subclasses, e.g. collections.Counter.
+  if isinstance(value, NoDependency):
+    return value.value
+  if isinstance(value, base.Trackable):
+    return value  # Skip conversion for already trackable objects.
+  elif type(value) == dict:
+    return _DictWrapper(value)
+  elif type(value) == collections.OrderedDict:
+    return _DictWrapper(value)
+  elif type(value) == list:
+    return _ListWrapper(value)
+  else:
+    return value
+  # pylint: enable=unidiomatic-typecheck
+  # TODO(allenl): Handle other common data structures. Tuples will require
+  # special casing (tuple subclasses are not weak referenceable, so replacement
+  # with a wrapper that subclasses tuple on attribute assignment works poorly,
+  # and replacement with a wrapper that isn't a tuple is also problematic),
+  # probably a tree traversal where the leaves are non-tuples(/namedtuples) to
+  # come up with names. Dictionaries should look like lists.
+
+
+def sticky_attribute_assignment(trackable, name, value):
+  """Adds dependencies, generally called from __setattr__.
+
+  This behavior is shared between Trackable and Model.
+
+  Respects NoDependency indicators, but otherwise makes trackable objects
+  out of common data structures and tracks objects by their attribute names.
+
+  Args:
+    trackable: The object to add dependencies to (generally the one having
+      an attribute assigned).
+    name: The attribute name being assigned.
+    value: The value being assigned. Not necessarily a trackable object.
+
+  Returns:
+    The value which should be stored in the attribute (unwrapped from a
+    NoDependency object if necessary).
+  """
+  if isinstance(value, NoDependency):
+    add_dependency = False
+  else:
+    add_dependency = True
+  value = _wrap_or_unwrap(value)
+  if not add_dependency:
+    return value
+  if isinstance(value, base.Trackable):
+    trackable._track_trackable(  # pylint: disable=protected-access
+        value, name=name,
+        # Allow the user to switch the Trackable which is tracked by this
+        # name, since assigning a new variable to an attribute has
+        # historically been fine (e.g. Adam did this).
+        overwrite=True)
+  return value
+
+
+class TrackableDataStructure(base.Trackable):
+  """Base class for data structures which contain trackable objects."""
+
+  def __init__(self):
+    self.trainable = True
+    self._extra_variables = []
+
+  def _track_value(self, value, name):
+    """Add a dependency on `value`."""
+    value = sticky_attribute_assignment(
+        trackable=self, value=value, name=name)
+    if isinstance(value, variables.Variable):
+      self._extra_variables.append(value)
+    if not isinstance(value, base.Trackable):
+      raise ValueError(
+          ("Only trackable objects (such as Layers or Optimizers) may be "
+           "stored in a List object. Got %s, which does not inherit from "
+           "Trackable.") % (value,))
+    if hasattr(value, "_use_resource_variables"):
+      # In subclassed models, legacy layers (tf.layers) must always use
+      # resource variables.
+      value._use_resource_variables = True  # pylint: disable=protected-access
+    return value
+
+  @property
+  def _values(self):
+    """An iterable/sequence which may contain trackable objects."""
+    raise NotImplementedError("Abstract method")
+
+  @property
+  def _layers(self):
+    """All Layers and Layer containers, including empty containers."""
+    # Filter objects on demand so that wrapper objects use values from the thing
+    # they're wrapping if out of sync.
+    collected = []
+    for obj in self._values:
+      if (isinstance(obj, TrackableDataStructure)
+          or layer_utils.is_layer(obj)
+          or layer_utils.has_weights(obj)):
+        collected.append(obj)
+    return collected
+
+  @property
+  def layers(self):
+    return layer_utils.filter_empty_layer_containers(self._layers)
+
+  @property
+  def trainable_weights(self):
+    return layer_utils.gather_trainable_weights(
+        trainable=self.trainable,
+        sub_layers=self._layers,
+        extra_variables=self._extra_variables)
+
+  @property
+  def non_trainable_weights(self):
+    return layer_utils.gather_non_trainable_weights(
+        trainable=self.trainable,
+        sub_layers=self._layers,
+        extra_variables=self._extra_variables)
+
+  @property
+  def weights(self):
+    return self.trainable_weights + self.non_trainable_weights
+
+  @property
+  def trainable_variables(self):
+    return self.trainable_weights
+
+  @property
+  def non_trainable_variables(self):
+    return self.non_trainable_weights
+
+  @property
+  def variables(self):
+    return self.weights
+
+  @property
+  def updates(self):
+    """Aggregate updates from any `Layer` instances."""
+    # Updates and conditional losses are forwarded as-is rather than being
+    # filtered based on inputs, since this is just a container and won't ever
+    # have any inputs.
+    aggregated = []
+    for layer in self.layers:
+      if hasattr(layer, "updates"):
+        aggregated += layer.updates
+    return aggregated
+
+  @property
+  def losses(self):
+    """Aggregate losses from any `Layer` instances."""
+    aggregated = []
+    for layer in self.layers:
+      if hasattr(layer, "losses"):
+        aggregated += layer.losses
+    return aggregated
+
+  def __hash__(self):
+    # Support object-identity hashing, so these structures can be used as keys
+    # in sets/dicts.
+    return id(self)
+
+  def __eq__(self, other):
+    # Similar to Tensors, trackable data structures use object-identity
+    # equality to support set/dict membership.
+    return self is other
+
+
+class List(TrackableDataStructure, collections.Sequence):
+  """An append-only sequence type which is trackable.
+
+  Maintains checkpoint dependencies on its contents (which must also be
+  trackable), and forwards any `Layer` metadata such as updates and losses.
+
+  Note that `List` is purely a container. It lets a `tf.keras.Model` or
+  other trackable object know about its contents, but does not call any
+  `Layer` instances which are added to it. To indicate a sequence of `Layer`
+  instances which should be called sequentially, use `tf.keras.Sequential`.
+
+  Example usage:
+  ```python
+  class HasList(tf.keras.Model):
+
+    def __init__(self):
+      super(HasList, self).__init__()
+      self.layer_list = tf.contrib.checkpoint.List([layers.Dense(3)])
+      self.layer_list.append(layers.Dense(4))
+
+    def call(self, x):
+      aggregation = 0.
+      for l in self.layer_list:
+        x = l(x)
+        aggregation += tf.reduce_sum(x)
+      return aggregation
+  ```
+
+  This kind of wrapping is necessary because `Trackable` objects do not
+  (yet) deeply inspect regular Python data structures, so for example assigning
+  a regular list (`self.layer_list = [layers.Dense(3)]`) does not create a
+  checkpoint dependency and does not add the `Layer` instance's weights to its
+  parent `Model`.
+  """
+
+  def __init__(self, *args, **kwargs):
+    """Construct a new sequence. Arguments are passed to `list()`."""
+    super(List, self).__init__()
+    self._storage = self._make_storage(*args, **kwargs)
+    for index, element in enumerate(self._storage):
+      self._storage[index] = self._track_value(
+          element, name=self._name_element(index))
+
+  def copy(self):
+    return type(self)(copy.copy(self._storage))
+
+  def __copy__(self):
+    return self.copy()
+
+  def __deepcopy__(self, memo):
+    return type(self)(copy.deepcopy(self._storage, memo))
+
+  def _make_storage(self, *args, **kwargs):
+    """Determines the backing storage (overridden in subclasses)."""
+    return list(*args, **kwargs)
+
+  def _name_element(self, index):
+    return "%d" % (index,)
+
+  @property
+  def _values(self):
+    return self
+
+  def append(self, value):
+    """Add a new trackable value."""
+    value = self._track_value(value, self._name_element(len(self._storage)))
+    self._storage.append(value)
+
+  def extend(self, values):
+    """Add a sequence of trackable values."""
+    for value in values:
+      self.append(value)
+
+  def __iadd__(self, values):
+    self.extend(values)
+    return self
+
+  def __add__(self, other):
+    return self.__class__(self._storage + getattr(other, "_storage", other))
+
+  def __imul__(self, y):
+    if y <= 0:
+      raise ValueError(
+          "List only supports append, multiplying in place by %d removes "
+          "elements." % y)
+
+    n = len(self._storage)
+    for _ in range(y - 1):
+      for i in range(n):
+        self.append(self._storage[i])
+
+    return self
+
+  def __mul__(self, n):
+    return self.__class__(self._storage * n)
+
+  def __rmul__(self, n):
+    return self * n
+
+  def __radd__(self, other):
+    return self.__class__(other) + self
+
+  def __getitem__(self, key):
+    return self._storage[key]
+
+  def __getslice__(self, i, j):
+    return self._storage[slice(i, j)]
+
+  def __len__(self):
+    return len(self._storage)
+
+  def __repr__(self):
+    return "List(%s)" % (repr(self._storage),)
+
+  def __sizeof__(self):
+    return super(List, self).__sizeof__() + sys.getsizeof(self._storage)
+
+
+# TODO(tomhennigan) Update to collections.UserList?
+class _ListWrapper(List, collections.MutableSequence,
+                   # Shadowed, but there for isinstance checks.
+                   list):
+  """Wraps the built-in `list` to support restore-on-create for variables.
+
+  Unlike `List`, this sequence type is mutable in the same ways built-in lists
+  are. Instead of throwing an error immediately like `List`, it records
+  problematic mutations (e.g. assigning a new element to a position already
+  occupied, meaning both elements get the same names at different times) and
+  refuses to save.
+
+  On assignment to an attribute of a Model or Trackable object, Python
+  lists are replaced with _ListWrapper. Wrapping a list in a
+  `tf.contrib.checkpoint.NoDependency` object prevents this.
+  """
+
+  def __init__(self, wrapped_list):
+    """Construct a new list wrapper.
+
+    Args:
+      wrapped_list: The initial value of the data structure. A shallow copy may
+        be maintained for error checking. `wrapped_list` itself should not be
+        modified directly after constructing the `_ListWrapper`, and if changes
+        are detected the `_ListWrapper` will throw an exception on save.
+    """
+    # Monotonic flags which indicate this object would not be restored properly,
+    # and therefore should throw an error on save to avoid giving the impression
+    # that restoring it will work.
+    self._non_append_mutation = False
+    self._external_modification = False
+    super(_ListWrapper, self).__init__(wrapped_list)
+    self._last_wrapped_list_snapshot = list(self._storage)
+
+  # pylint: disable=protected-access
+  def __copy__(self):
+    copied = super(_ListWrapper, self).__copy__()
+    copied._non_append_mutation = self._non_append_mutation
+    copied._external_modification = self._external_modification
+    return copied
+
+  def __deepcopy__(self, memo):
+    copied = super(_ListWrapper, self).__deepcopy__(memo)
+    copied._non_append_mutation = self._non_append_mutation
+    copied._external_modification = self._external_modification
+    return copied
+  # pylint: enable=protected-access
+
+  def _make_storage(self, wrapped_list):
+    """Use the user's original list for storage."""
+    return wrapped_list
+
+  def _check_external_modification(self):
+    """Checks for any changes to the wrapped list not through the wrapper."""
+    if self._external_modification or self._non_append_mutation:
+      return
+    if self._storage != self._last_wrapped_list_snapshot:
+      self._external_modification = True
+      self._last_wrapped_list_snapshot = None
+
+  def _update_snapshot(self):
+    """Acknowledges tracked changes to the wrapped list."""
+    if self._external_modification or self._non_append_mutation:
+      return
+    self._last_wrapped_list_snapshot = list(self._storage)
+
+  @property
+  def _checkpoint_dependencies(self):
+    self._check_external_modification()
+    if self._non_append_mutation:
+      raise ValueError(
+          ("Unable to save the object %s (a list wrapper constructed to track "
+           "trackable TensorFlow objects). A list element was replaced "
+           "(__setitem__, __setslice__), deleted (__delitem__, __delslice__), "
+           "or moved (sort). In order to support restoration on object "
+           "creation, tracking is exclusively for append-only data structures."
+           "\n\nIf you don't need this list checkpointed, wrap it in a "
+           "tf.contrib.checkpoint.NoDependency object; it will be "
+           "automatically un-wrapped and subsequently ignored." % (self,)))
+    if self._external_modification:
+      raise ValueError(
+          ("Unable to save the object %s (a list wrapper constructed to track "
+           "trackable TensorFlow objects). The wrapped list was modified "
+           "outside the wrapper (its final value was %s, its value when a "
+           "checkpoint dependency was added was %s), which breaks restoration "
+           "on object creation.\n\nIf you don't need this list checkpointed, "
+           "wrap it in a tf.contrib.checkpoint.NoDependency object; it will be "
+           "automatically un-wrapped and subsequently ignored." % (
+               self, self._storage, self._last_wrapped_list_snapshot)))
+    return super(_ListWrapper, self)._checkpoint_dependencies
+
+  def __delitem__(self, key):
+    self._non_append_mutation = True
+    del self._storage[key]
+
+  def __setitem__(self, key, value):
+    self._check_external_modification()
+
+    if isinstance(key, slice):
+      # Note: this is quite inefficient, but the list API supports a broad range
+      # of slice setters (e.g. truncate, extend, replace) and immitating this
+      # for a range of Python versions is non-trivial.
+      storage_copy = list(self._storage)
+      self._storage[key] = value
+
+      len_before = len(storage_copy)
+      len_now = len(self._storage)
+      for i in range(max(len_before, len_now)):
+        value_now = self._storage[i] if i < len_now else None
+        value_before = storage_copy[i] if i < len_before else None
+
+        if isinstance(value_before, base.Trackable):
+          self._non_append_mutation = True
+
+        if value_now is not None and value_now != value_before:
+          self._storage[i] = self._track_value(self._storage[i],
+                                               self._name_element(i))
+
+    else:
+      if isinstance(self._storage[key], base.Trackable):
+        self._non_append_mutation = True
+      self._storage[key] = self._track_value(value, self._name_element(key))
+
+    self._update_snapshot()
+
+  def append(self, value):
+    """Add a new trackable value."""
+    self._check_external_modification()
+    super(_ListWrapper, self).append(value)
+    self._update_snapshot()
+
+  def extend(self, values):
+    """Add a sequence of trackable values."""
+    self._check_external_modification()
+    super(_ListWrapper, self).extend(values)
+    self._update_snapshot()
+
+  def __eq__(self, other):
+    return self._storage == getattr(other, "_storage", other)
+
+  def __ne__(self, other):
+    return self._storage != getattr(other, "_storage", other)
+
+  def __lt__(self, other):
+    return self._storage < getattr(other, "_storage", other)
+
+  def __le__(self, other):
+    return self._storage <= getattr(other, "_storage", other)
+
+  def __gt__(self, other):
+    return self._storage > getattr(other, "_storage", other)
+
+  def __ge__(self, other):
+    return self._storage >= getattr(other, "_storage", other)
+
+  def __hash__(self):
+    # List wrappers need to compare like regular lists, and so like regular
+    # lists they don't belong in hash tables.
+    raise TypeError("unhashable type: 'ListWrapper'")
+
+  def insert(self, index, obj):
+    self._non_append_mutation = True
+    self._storage.insert(index, obj)
+
+  def sort(self):
+    self._non_append_mutation = True
+    self._storage.sort()
+
+  def __setslice__(self, i, j, y):
+    self.__setitem__(slice(i, j), y)
+
+  def __delslice__(self, i, j):
+    self._non_append_mutation = True
+    del self._storage[slice(i, j)]
+
+  def _track_value(self, value, name):
+    """Allows storage of non-trackable objects."""
+    try:
+      value = super(_ListWrapper, self)._track_value(value=value, name=name)
+    except ValueError:
+      # Even if this value isn't trackable, we need to make sure
+      # NoDependency objects get unwrapped.
+      value = sticky_attribute_assignment(
+          trackable=self, value=value, name=name)
+    return value
+
+  def __repr__(self):
+    return "ListWrapper(%s)" % (repr(self._storage),)
+
+  def _list_functions_for_serialization(self):
+    return {
+        str(key): value for key, value in enumerate(self)
+        if _is_function(value)
+    }
+
+
+class Mapping(TrackableDataStructure, collections.Mapping):
+  """An append-only trackable mapping data structure with string keys.
+
+  Maintains checkpoint dependencies on its contents (which must also be
+  trackable), named based on its keys.
+
+  Note that once a key has been added, it may not be deleted or replaced. If
+  names may not be unique, see `tf.contrib.checkpoint.UniqueNameTracker`.
+  """
+
+  def __init__(self, *args, **kwargs):
+    """Construct a new sequence. Arguments are passed to `dict()`."""
+    super(Mapping, self).__init__()
+    self._storage = self._make_storage(*args, **kwargs)
+    self._storage.update(
+        {key: self._track_value(
+            value, name=self._name_element(key))
+         for key, value in self._storage.items()})
+
+  def __copy__(self):
+    return type(self)(copy.copy(self._storage))
+
+  def __deepcopy__(self, memo):
+    return type(self)(copy.deepcopy(self._storage, memo))
+
+  def _make_storage(self, *args, **kwargs):
+    return dict(*args, **kwargs)
+
+  @property
+  def _values(self):
+    # Sort items deterministically by key
+    ordered = list(zip(*sorted(self.items(), key=lambda it: it[0])))
+    if ordered:
+      return ordered[1]
+    return []
+
+  def _name_element(self, key):
+    if not isinstance(key, six.string_types):
+      raise TypeError(
+          "Mapping accepts only string keys, but got a key %s."
+          % repr(key))
+    return str(key)
+
+  def __setitem__(self, key, value):
+    name = self._name_element(key)
+    value = self._track_value(value, name=name)
+    current_value = self._storage.setdefault(key, value)
+    if current_value is not value:
+      raise ValueError(
+          ("Mappings are an append-only data structure. Tried to overwrite the "
+           "key '%s' with value %s, but it already contains %s")
+          % (key, value, current_value))
+
+  def update(self, *args, **kwargs):
+    for key, value in dict(*args, **kwargs).items():
+      self[key] = value
+
+  def __getitem__(self, key):
+    return self._storage[key]
+
+  def __len__(self):
+    return len(self._storage)
+
+  def __repr__(self):
+    return "Mapping(%s)" % (repr(self._storage),)
+
+  def __iter__(self):
+    return iter(self._storage)
+
+
+# Unlike _ListWrapper, having _DictWrapper inherit from dict and pass isinstance
+# checks seems infeasible. CPython will not call Python methods/properties on
+# dictionary subclasses when running e.g. {}.update(dict_subclass), and instead
+# collects elements directly from dict_subclass's C structs. So subclassing dict
+# implies that the storage has to be "self" (i.e. the C structs for the object
+# must be updated correctly), but we also need that storage to be the wrapped
+# dictionary to avoid synchronization bugs (un-tracked external modifications
+# should still show up when the dict is accessed through the wrapper). Monkey
+# patching all of the "wrapped" dict's methods instead of creating a wrapper
+# object is an option, but not a very attractive one (replacing methods without
+# creating reference cycles is difficult, and then dicts would need to be
+# special cased everywhere as being trackable).
+class _DictWrapper(Mapping, collections.MutableMapping):
+  """Wraps built-in dicts to support restore-on-create for variables.
+
+  _DictWrapper is to Mapping as _ListWrapper is to List. Unlike Mapping,
+  _DictWrapper allows non-string keys and values and arbitrary mutations (delete
+  keys, reassign values). Like _ListWrapper, these mutations mean that
+  _DictWrapper will raise an exception on save.
+  """
+
+  def __new__(cls, *args):
+    if len(args) == 1 and isinstance(args[0], dict):
+      return super(_DictWrapper, cls).__new__(cls)
+    else:
+      # Allow construction from a sequence, e.g. for nest.pack_sequence_as. In
+      # this case there's nothing to wrap, so we make a normal dictionary. Also
+      # allows constructing empty instances of the _DictWrapper type, as Session
+      # is wont to do (and again there's nothing to wrap, so a normal dictionary
+      # makes more sense).
+      return dict(*args)
+
+  def __init__(self, wrapped_dict):
+    self._non_string_key = False
+    self._non_append_mutation = False
+    self._external_modification = False
+    super(_DictWrapper, self).__init__(wrapped_dict)
+    self._update_snapshot()
+
+  # pylint: disable=protected-access
+  def __copy__(self):
+    copied = super(_DictWrapper, self).__copy__()
+    copied._non_append_mutation = self._non_append_mutation
+    copied._external_modification = self._external_modification
+    copied._non_string_key = self._non_string_key
+    return copied
+
+  def __deepcopy__(self, memo):
+    copied = super(_DictWrapper, self).__deepcopy__(memo)
+    copied._non_append_mutation = self._non_append_mutation
+    copied._external_modification = self._external_modification
+    copied._non_string_key = self._non_string_key
+    return copied
+  # pylint: enable=protected-access
+
+  def _make_storage(self, wrapped_dict):
+    """Re-use the wrapped dict for storage (to force them to be in sync)."""
+    return wrapped_dict
+
+  @property
+  def _checkpoint_dependencies(self):
+    """Check that the object is saveable before listing its dependencies."""
+    self._check_external_modification()
+    if self._non_string_key:
+      raise ValueError(
+          "Unable to save the object %s (a dictionary wrapper constructed "
+          "automatically on attribute assignment). The wrapped dictionary "
+          "contains a non-string key which maps to a trackable object or "
+          "mutable data structure.\n\nIf you don't need this dictionary "
+          "checkpointed, wrap it in a tf.contrib.checkpoint.NoDependency "
+          "object; it will be automatically un-wrapped and subsequently "
+          "ignored." % (self,))
+    if self._non_append_mutation:
+      raise ValueError(
+          "Unable to save the object %s (a dictionary wrapper constructed "
+          "automatically on attribute assignment). A key mapping to a "
+          "trackable object was overwritten or deleted, which would "
+          "cause problems for restoration.\n\nIf you don't need this "
+          "dictionary checkpointed, wrap it in a "
+          "tf.contrib.checkpoint.NoDependency object; it will be automatically "
+          "un-wrapped and subsequently ignored." % (self,))
+    if self._external_modification:
+      raise ValueError(
+          "Unable to save the object %s (a dictionary wrapper constructed "
+          "automatically on attribute assignment). The wrapped dictionary was "
+          "modified outside the wrapper (its final value was %s, its value "
+          "when a checkpoint dependency was added was %s), which breaks "
+          "restoration on object creation.\n\nIf you don't need this "
+          "dictionary checkpointed, wrap it in a "
+          "tf.contrib.checkpoint.NoDependency object; it will be automatically "
+          "un-wrapped and subsequently ignored." % (
+              self, self, self._last_wrapped_dict_snapshot))
+    assert not self._dirty  # Any reason for dirtiness should have an exception.
+    return super(_DictWrapper, self)._checkpoint_dependencies
+
+  @property
+  def _dirty(self):
+    """Check if there has already been a mutation which prevents saving."""
+    return (self._external_modification
+            or self._non_append_mutation
+            or self._non_string_key)
+
+  def _check_external_modification(self):
+    """Checks for any changes to the wrapped dict not through the wrapper."""
+    if self._dirty:
+      return
+    if self != self._last_wrapped_dict_snapshot:
+      self._external_modification = True
+      self._last_wrapped_dict_snapshot = None
+
+  def _update_snapshot(self):
+    """Acknowledges tracked changes to the wrapped dict."""
+    if self._dirty:
+      return
+    self._last_wrapped_dict_snapshot = dict(self)
+
+  def _track_value(self, value, name):
+    """Allows storage of non-trackable objects."""
+    if isinstance(name, six.string_types):
+      string_key = True
+    else:
+      name = "-non_string_key"
+      string_key = False
+    try:
+      no_dependency = isinstance(value, NoDependency)
+      value = super(_DictWrapper, self)._track_value(value=value, name=name)
+      if not (string_key or no_dependency):
+        # A non-string key maps to a trackable value. This data structure
+        # is not saveable.
+        self._non_string_key = True
+      return value
+    except ValueError:
+      # Even if this value isn't trackable, we need to make sure
+      # NoDependency objects get unwrapped.
+      return sticky_attribute_assignment(
+          trackable=self, value=value, name=name)
+
+  def _name_element(self, key):
+    """Don't throw errors for non-string keys."""
+    if isinstance(key, six.string_types):
+      return super(_DictWrapper, self)._name_element(key)
+    else:
+      return key
+
+  def __setitem__(self, key, value):
+    """Allow any modifications, but possibly mark the wrapper as unsaveable."""
+    self._check_external_modification()
+    no_dep = isinstance(value, NoDependency)
+    if isinstance(key, six.string_types):
+      existing_dependency = self._lookup_dependency(key)
+      value = self._track_value(value, name=key)
+    else:
+      value = _wrap_or_unwrap(value)
+      existing_dependency = None
+      if not no_dep and isinstance(value, base.Trackable):
+        # Non-string keys are OK as long as we have no reason to add a
+        # dependency on the value (either because the value is not
+        # trackable, or because it was wrapped in a NoDependency object).
+        self._non_string_key = True
+    if key in self._storage:
+      previous_value = self._storage[key]
+      if previous_value is not value:
+        if ((not no_dep and isinstance(value, base.Trackable))
+            # We don't want to just check that the existing object is
+            # trackable, since it may have been wrapped in a NoDependency
+            # object.
+            or existing_dependency is not None):
+          # A trackable object was replaced under the same key; this means
+          # that restoring would be error-prone, so we'll throw an exception on
+          # save.
+          self._non_append_mutation = True
+    self._storage[key] = value
+
+    self._update_snapshot()
+
+  def __delitem__(self, key):
+    self._check_external_modification()
+    existing_value = self[key]
+    if isinstance(existing_value, base.Trackable):
+      # Deleting tracked trackable values means restoring is problematic,
+      # so we'll throw an exception on save.
+      self._non_append_mutation = True
+    del self._storage[key]
+    self._update_snapshot()
+
+  def __repr__(self):
+    return "DictWrapper(%s)" % (repr(self._storage),)
+
+  def __hash__(self):
+    raise TypeError("unhashable type: 'DictWrapper'")
+
+  def __eq__(self, other):
+    return self._storage == getattr(other, "_storage", other)
+
+  def update(self, *args, **kwargs):
+    for key, value in dict(*args, **kwargs).items():
+      self[key] = value
+
+  def _list_functions_for_serialization(self):
+    return {
+        key: value for key, value in self.items()
+        if _is_function(value)
+    }
+
+
+def _is_function(x):
+  return isinstance(x, (def_function.Function, defun.ConcreteFunction))
+
+revived_types.register_revived_type(
+    "trackable_dict_wrapper",
+    lambda obj: isinstance(obj, _DictWrapper),
+    versions=[revived_types.VersionedTypeRegistration(
+        # Standard dependencies are enough to reconstruct the trackable
+        # items in dictionaries, so we don't need to save any extra information.
+        object_factory=lambda proto: _DictWrapper({}),
+        version=1,
+        min_producer_version=1,
+        min_consumer_version=1,
+        setter=operator.setitem)])
+
+
+def _set_list_item(list_object, index_string, value):
+  item_index = int(index_string)
+  if len(list_object) <= item_index:
+    list_object.extend([None] * (1 + item_index - len(list_object)))
+  list_object[item_index] = value
+
+
+revived_types.register_revived_type(
+    "trackable_list_wrapper",
+    lambda obj: isinstance(obj, _ListWrapper),
+    versions=[revived_types.VersionedTypeRegistration(
+        object_factory=lambda proto: _ListWrapper([]),
+        version=1,
+        min_producer_version=1,
+        min_consumer_version=1,
+        setter=_set_list_item)])

diff --git a/tensorflow/python/training/tracking/data_structures_test.py b/tensorflow/python/training/tracking/data_structures_test.py
new file mode 100644
index 0000000..a03614b
--- /dev/null
+++ b/tensorflow/python/training/tracking/data_structures_test.py

@@ -0,0 +1,716 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import os
+
+import numpy
+import six
+
+from tensorflow.python.eager import context
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.layers import core
+from tensorflow.python.keras.layers import normalization
+from tensorflow.python.layers import core as non_keras_core
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.training.tracking import data_structures
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util
+
+
+class HasList(training.Model):
+
+  def __init__(self):
+    super(HasList, self).__init__()
+    self.layer_list = data_structures.List([core.Dense(3)])
+    self.layer_list.append(core.Dense(4))
+    self.layer_list.extend(
+        [core.Dense(5),
+         core.Dense(6, kernel_regularizer=math_ops.reduce_sum)])
+    self.layer_list += [
+        core.Dense(7, bias_regularizer=math_ops.reduce_sum),
+        core.Dense(8)
+    ]
+    self.layer_list += (
+        data_structures.List([core.Dense(9)]) + data_structures.List(
+            [core.Dense(10)]))
+    self.layer_list.extend(
+        data_structures.List(
+            list(sequence=[core.Dense(11)]) + [core.Dense(12)]))
+    self.layers_with_updates = data_structures.List(
+        sequence=(normalization.BatchNormalization(),))
+
+  def call(self, x):
+    aggregation = 0.
+    for l in self.layer_list:
+      x = l(x)
+      aggregation += math_ops.reduce_sum(x)
+    bn, = self.layers_with_updates
+    return bn(x) / aggregation
+
+
+class ListTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only("b/120545219")
+  def testTracking(self):
+    model = HasList()
+    output = model(array_ops.ones([32, 2]))
+    self.assertAllEqual([32, 12], output.shape)
+    self.assertEqual(11, len(model.layers))
+    self.assertEqual(10, len(model.layer_list.layers))
+    six.assertCountEqual(
+        self,
+        model.layers,
+        model.layer_list.layers + model.layers_with_updates)
+    for index in range(10):
+      self.assertEqual(3 + index, model.layer_list.layers[index].units)
+    self.assertEqual(2, len(model._checkpoint_dependencies))
+    self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref)
+    self.assertIs(model.layers_with_updates,
+                  model._checkpoint_dependencies[1].ref)
+    self.assertEqual(
+        10, len(model._checkpoint_dependencies[0].ref._checkpoint_dependencies))
+    self.evaluate([v.initializer for v in model.variables])
+    self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]]))
+    save_path = os.path.join(self.get_temp_dir(), "ckpt")
+    model.save_weights(save_path)
+    self.evaluate(model.variables[0].assign(array_ops.zeros([2, 3])))
+    model.load_weights(save_path)
+    self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]],
+                        self.evaluate(model.variables[0]))
+    v = variables.Variable(1.)
+    model.var_list = [v]
+    self.assertIn(v, model.variables)
+    self.assertIn(v, model.trainable_variables)
+    self.assertNotIn(v, model.non_trainable_variables)
+
+  @test_util.run_v1_only("b/120545219")
+  def testUpdatesForwarded(self):
+    with context.graph_mode():
+      model = HasList()
+      model_input = array_ops.ones([32, 2])
+      model(model_input)
+      self.assertGreater(len(model.layers_with_updates[0].updates), 0)
+      self.assertEqual(set(model.layers_with_updates[0].updates),
+                       set(model.updates))
+
+    with context.eager_mode():
+      model = HasList()
+      model_input = array_ops.ones([32, 2])
+      model(model_input)
+      self.assertEqual(0, len(model.updates))
+
+  @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only("b/120545219")
+  def testLossesForwarded(self):
+    model = HasList()
+    model_input = array_ops.ones([32, 2])
+    model(model_input)
+    self.assertEqual(2, len(model.losses))
+
+  def testModelContainersCompareEqual(self):
+    class HasEqualContainers(training.Model):
+
+      def __init__(self):
+        super(HasEqualContainers, self).__init__()
+        self.l1 = []
+        self.l2 = []
+
+    model = HasEqualContainers()
+    first_layer = HasEqualContainers()
+    model.l1.append(first_layer)
+    second_layer = HasEqualContainers()
+    model.l2.append(second_layer)
+    self.assertEqual([first_layer, second_layer], model.layers)
+
+  def testNotTrackable(self):
+    class NotTrackable(object):
+      pass
+
+    with self.assertRaises(ValueError):
+      data_structures.List([NotTrackable()])
+
+  def testCallNotImplemented(self):
+    with self.assertRaisesRegexp(TypeError, "not callable"):
+      data_structures.List()(1.)
+
+  def testNoPop(self):
+    with self.assertRaises(AttributeError):
+      data_structures.List().pop()
+
+  @test_util.run_in_graph_and_eager_modes
+  def testTensorConversion(self):
+
+    class ListToTensor(training.Model):
+
+      def __init__(self):
+        super(ListToTensor, self).__init__()
+        self.l = [1., 2., 3.]
+
+    self.assertAllEqual(
+        [1., 2., 3.],
+        self.evaluate(constant_op.constant(ListToTensor().l)))
+
+    self.assertAllEqual(
+        [1., 2., 3.],
+        self.evaluate(array_ops.pack(ListToTensor().l)))
+
+  def testNesting(self):
+    with context.graph_mode():
+      inner = data_structures.List()
+      outer = data_structures.List([inner])
+      inner.append(non_keras_core.Dense(1))
+      inner[0](array_ops.ones([2, 3]))
+      self.assertEqual(2, len(outer.variables))
+      self.assertIsInstance(
+          outer.variables[0],
+          resource_variable_ops.ResourceVariable)
+
+  def testNonLayerVariables(self):
+    v = resource_variable_ops.ResourceVariable([1.])
+    l = data_structures.List([v])
+    self.assertTrue(l.trainable)
+    self.assertEqual([], l.layers)
+    self.assertEqual([v], l.variables)
+    self.assertEqual([v], l.trainable_weights)
+    self.assertEqual([], l.non_trainable_variables)
+    l.trainable = False
+    self.assertEqual([v], l.variables)
+    self.assertEqual([], l.trainable_variables)
+    self.assertEqual([v], l.non_trainable_variables)
+    l.trainable = True
+    v2 = resource_variable_ops.ResourceVariable(1., trainable=False)
+    l.append(v2)
+    self.assertEqual([v, v2], l.weights)
+    self.assertEqual([v], l.trainable_weights)
+    self.assertEqual([v2], l.non_trainable_weights)
+
+  def testCopy(self):
+    v1 = resource_variable_ops.ResourceVariable(1.)
+    v2 = resource_variable_ops.ResourceVariable(1.)
+    v3 = resource_variable_ops.ResourceVariable(1.)
+
+    l1 = data_structures.List([v1, v2])
+    l2 = l1.copy()
+    l2.append(v3)
+    self.assertEqual(list(l1), [v1, v2])
+    self.assertEqual(list(l2), [v1, v2, v3])
+
+  def testSlicing(self):
+    v1 = resource_variable_ops.ResourceVariable(1.)
+    v2 = resource_variable_ops.ResourceVariable(1.)
+    v3 = resource_variable_ops.ResourceVariable(1.)
+    v4 = resource_variable_ops.ResourceVariable(1.)
+
+    l = data_structures.List([v1, v2, v3, v4])
+    self.assertEqual(l[1:], [v2, v3, v4])
+    self.assertEqual(l[1:-1], [v2, v3])
+    self.assertEqual(l[:-1], [v1, v2, v3])
+
+  def testHash(self):
+    has_sequences = set([data_structures.List(),
+                         data_structures.List()])
+    self.assertEqual(2, len(has_sequences))
+    self.assertNotIn(data_structures.List(), has_sequences)
+
+  def testIMul_zero(self):
+    l = data_structures.List([])
+    with self.assertRaisesRegexp(ValueError, "List only supports append"):
+      l *= 0
+
+  def testIMul(self):
+    v = resource_variable_ops.ResourceVariable(1.)
+    l = data_structures.List([v])
+    l *= 2
+    self.assertEqual(list(l), [v] * 2)
+
+  def testMul(self):
+    v = resource_variable_ops.ResourceVariable(1.)
+    l = data_structures.List([v, v, v])
+    self.assertEqual(list(l * 2), [v, v, v] * 2)
+
+  def testRMul(self):
+    v = resource_variable_ops.ResourceVariable(1.)
+    l = data_structures.List([v, v, v])
+    self.assertEqual(list(2 * l), [v, v, v] * 2)
+
+
+class ListWrapperTest(test.TestCase):
+
+  IGNORED = ("__new__", "__init__", "__subclasshook__", "__getattribute__")
+
+  def test_overrides_all_list_methods(self):
+    not_overridden = []
+
+    for name in dir(list):
+      if name in ListWrapperTest.IGNORED:
+        continue
+
+      list_method = getattr(list, name)
+
+      if not callable(list_method):
+        continue
+
+      object_method = getattr(object, name, None)
+      if object_method is not None and object_method == list_method:
+        # Skip methods that aren't overridden from object.
+        continue
+
+      if list_method == getattr(data_structures._ListWrapper, name):
+        not_overridden.append(name)
+
+    if not_overridden:
+      self.fail("_ListWrapper does not override %s" % (not_overridden))
+
+  def testListWrapperBasic(self):
+    # _ListWrapper, unlike List, compares like the built-in list type (since it
+    # is used to automatically replace lists).
+    a = tracking.AutoTrackable()
+    b = tracking.AutoTrackable()
+    self.assertEqual([a, a],
+                     [a, a])
+    self.assertEqual(data_structures._ListWrapper([a, a]),
+                     data_structures._ListWrapper([a, a]))
+    self.assertEqual([a, a],
+                     data_structures._ListWrapper([a, a]))
+    self.assertEqual(data_structures._ListWrapper([a, a]),
+                     [a, a])
+    self.assertNotEqual([a, a],
+                        [b, a])
+    self.assertNotEqual(data_structures._ListWrapper([a, a]),
+                        data_structures._ListWrapper([b, a]))
+    self.assertNotEqual([a, a],
+                        data_structures._ListWrapper([b, a]))
+    self.assertLess([a], [a, b])
+    self.assertLess(data_structures._ListWrapper([a]),
+                    data_structures._ListWrapper([a, b]))
+    self.assertLessEqual([a], [a, b])
+    self.assertLessEqual(data_structures._ListWrapper([a]),
+                         data_structures._ListWrapper([a, b]))
+    self.assertGreater([a, b], [a])
+    self.assertGreater(data_structures._ListWrapper([a, b]),
+                       data_structures._ListWrapper([a]))
+    self.assertGreaterEqual([a, b], [a])
+    self.assertGreaterEqual(data_structures._ListWrapper([a, b]),
+                            data_structures._ListWrapper([a]))
+    self.assertEqual([a], data_structures._ListWrapper([a]))
+    self.assertEqual([a], list(data_structures.List([a])))
+    self.assertEqual([a, a], data_structures._ListWrapper([a]) + [a])
+    self.assertEqual([a, a], [a] + data_structures._ListWrapper([a]))
+    self.assertIsInstance(data_structures._ListWrapper([a]), list)
+
+  def testAcceptsNonTrackableContent(self):
+    l = data_structures._ListWrapper([1, 2, 3])
+    self.assertEqual(l, [1, 2, 3])
+
+  def testWrapperChangesList(self):
+    l = []
+    l_wrapper = data_structures._ListWrapper(l)
+    l_wrapper.append(1)
+    self.assertEqual([1], l)
+
+  def testListChangesWrapper(self):
+    l = []
+    l_wrapper = data_structures._ListWrapper(l)
+    l.append(1)
+    self.assertEqual([1], l_wrapper)
+
+  def testLayerCollectionWithExternalMutation(self):
+    l = []
+    l_wrapper = data_structures._ListWrapper(l)
+    layer = core.Dense(1)
+    l.append(layer)
+    self.assertEqual([layer], l_wrapper.layers)
+
+  def testNotHashable(self):
+    with self.assertRaises(TypeError):
+      hash(data_structures._ListWrapper())
+
+  def testDelItem(self):
+    l = data_structures._ListWrapper([1, 2, 3, 4])
+    del l[0]
+    self.assertEqual(l, [2, 3, 4])
+    self.assertUnableToSave(l, "Unable to save .*__delitem__")
+
+  def testDelSlice(self):
+    l = data_structures._ListWrapper([1, 2, 3, 4])
+    del l[2:3]
+    self.assertEqual(l, [1, 2, 4])
+    self.assertUnableToSave(l, "Unable to save .*__delslice__")
+
+  def testSetSlice_canSaveForNonTrackableItems(self):
+    l = data_structures._ListWrapper([1, 2, 3, 4])
+    l[:] = 2, 8, 9, 0
+    self.assertEqual(l, [2, 8, 9, 0])
+    l._maybe_initialize_trackable()  # pylint: disable=protected-access
+    self.assertEqual(len(l._checkpoint_dependencies), 0)  # pylint: disable=protected-access
+
+  def testSetSlice_cannotSaveIfTrackableModified(self):
+    v1 = resource_variable_ops.ResourceVariable(1.)
+    v2 = resource_variable_ops.ResourceVariable(1.)
+    l = data_structures._ListWrapper([1, 2, v1, v2])
+    l[:] = 2, 8, 9, v2
+    self.assertEqual(l, [2, 8, 9, v2])
+    self.assertUnableToSave(l, "Unable to save .*__setslice__")
+
+  def testSetSlice_truncate(self):
+    l = data_structures._ListWrapper([1, 2, 3, 4])
+    l[:] = []
+    self.assertEqual(l, [])
+
+  def testSetSlice_extend(self):
+    l = data_structures._ListWrapper([1, 2, 3, 4])
+    l[2:] = 1, 2, 3, 4
+    self.assertEqual(l, [1, 2, 1, 2, 3, 4])
+
+  def testSort(self):
+    l = data_structures._ListWrapper([1, 2, 3, 4])
+    l.sort()
+    self.assertEqual(l, [1, 2, 3, 4])
+    # Regardless of being a no-op for the input list, we still refuse to save.
+    # This is intentional since otherwise we would end up with a hard to debug
+    # case for users (e.g. sometimes sort on a ListWrapper is trackable and
+    # other times it is not).
+    self.assertUnableToSave(l, "Unable to save .*sort")
+
+  def assertUnableToSave(self, l, msg):
+    l._maybe_initialize_trackable()  # pylint: disable=protected-access
+    with self.assertRaisesRegexp(ValueError, msg):
+      return l._checkpoint_dependencies  # pylint: disable=protected-access
+
+
+class HasMapping(training.Model):
+
+  def __init__(self):
+    super(HasMapping, self).__init__()
+    self.layer_dict = data_structures.Mapping(output=core.Dense(7))
+    self.layer_dict["norm"] = data_structures.List()
+    self.layer_dict["dense"] = data_structures.List()
+    self.layer_dict["dense"].extend(
+        [core.Dense(5),
+         core.Dense(6, kernel_regularizer=math_ops.reduce_sum)])
+    self.layer_dict["norm"].append(
+        normalization.BatchNormalization())
+    self.layer_dict["norm"].append(
+        normalization.BatchNormalization())
+
+  def call(self, x):
+    aggregation = 0.
+    for norm, dense in zip(self.layer_dict["norm"], self.layer_dict["dense"]):
+      x = norm(dense(x))
+      aggregation += math_ops.reduce_sum(x)
+    return self.layer_dict["output"](x) / aggregation
+
+
+class MappingTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only("b/120545219")
+  def testTracking(self):
+    model = HasMapping()
+    output = model(array_ops.ones([32, 2]))
+    self.assertAllEqual([32, 7], output.shape)
+    self.assertEqual(5, len(model.layers))
+    six.assertCountEqual(self, model.layers, model.layer_dict.layers)
+    self.assertEqual(1, len(model._checkpoint_dependencies))
+    self.assertIs(model.layer_dict, model._checkpoint_dependencies[0].ref)
+    self.evaluate([v.initializer for v in model.variables])
+    test_var = model.layer_dict["output"].kernel
+    self.evaluate(test_var.assign(array_ops.ones([6, 7])))
+    save_path = os.path.join(self.get_temp_dir(), "ckpt")
+    model.save_weights(save_path)
+    self.evaluate(test_var.assign(array_ops.zeros([6, 7])))
+    model.load_weights(save_path)
+    self.assertAllEqual(numpy.ones([6, 7]),
+                        self.evaluate(test_var))
+
+  def testNoOverwrite(self):
+    mapping = data_structures.Mapping()
+    original = data_structures.List()
+    mapping["a"] = original
+    with self.assertRaises(ValueError):
+      mapping["a"] = data_structures.List()
+    self.assertIs(original, mapping["a"])
+    with self.assertRaises(AttributeError):
+      del mapping["a"]
+    mapping.update(b=data_structures.Mapping())
+    with self.assertRaises(ValueError):
+      mapping.update({"b": data_structures.Mapping()})
+
+  def testNonStringKeys(self):
+    mapping = data_structures.Mapping()
+    with self.assertRaises(TypeError):
+      mapping[1] = data_structures.List()
+
+  def testLayerCollectionWithExternalMutation(self):
+    d = {}
+    root = tracking.AutoTrackable()
+    root.wrapper = d
+    self.assertEqual([], root.wrapper.layers)
+    self.assertEqual([], root.wrapper.trainable_weights)
+    layer1 = core.Dense(1)
+    layer2 = core.Dense(1)
+    d["a"] = layer1
+    d["b"] = layer2
+    self.assertEqual([layer1, layer2], root.wrapper.layers)
+    # The layers have still not created variables
+    self.assertEqual([], root.wrapper.trainable_weights)
+
+  def testHashing(self):
+    has_mappings = set([data_structures.Mapping(),
+                        data_structures.Mapping()])
+    self.assertEqual(2, len(has_mappings))
+    self.assertNotIn(data_structures.Mapping(), has_mappings)
+    # In contrast to Mapping, dict wrappers are not hashable
+    a = tracking.AutoTrackable()
+    a.d = {}
+    self.assertEqual({}, a.d)
+    self.assertFalse({} != a.d)  # pylint: disable=g-explicit-bool-comparison
+    self.assertNotEqual({1: 2}, a.d)
+    with self.assertRaisesRegexp(TypeError, "unhashable"):
+      set([a.d])
+
+  def testDictWrapperBadKeys(self):
+    a = tracking.AutoTrackable()
+    a.d = {}
+    a.d[1] = data_structures.List()
+    model = training.Model()
+    model.sub = a
+    save_path = os.path.join(self.get_temp_dir(), "ckpt")
+    with self.assertRaisesRegexp(ValueError, "non-string key"):
+      model.save_weights(save_path)
+
+  def testDictWrapperNoDependency(self):
+    a = tracking.AutoTrackable()
+    a.d = data_structures.NoDependency({})
+    a.d[1] = [3]
+    self.assertEqual([a], util.list_objects(a))
+    model = training.Model()
+    model.sub = a
+    save_path = os.path.join(self.get_temp_dir(), "ckpt")
+    model.save_weights(save_path)
+    model.load_weights(save_path)
+
+  def testNonStringKeyNotTrackableValue(self):
+    a = tracking.AutoTrackable()
+    a.d = {}
+    a.d["a"] = [3]
+    a.d[1] = data_structures.NoDependency([3])
+    self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a))
+    model = training.Model()
+    model.sub = a
+    save_path = os.path.join(self.get_temp_dir(), "ckpt")
+    model.save_weights(save_path)
+    model.load_weights(save_path)
+
+  def testNonAppendNotTrackable(self):
+    # Non-append mutations (deleting or overwriting values) are OK when the
+    # values aren't tracked.
+    a = tracking.AutoTrackable()
+    a.d = {}
+    a.d["a"] = [3]
+    a.d[1] = 3
+    a.d[1] = 2
+    self.assertEqual(2, a.d[1])
+    del a.d[1]
+    a.d[2] = data_structures.NoDependency(tracking.AutoTrackable())
+    second = tracking.AutoTrackable()
+    a.d[2] = data_structures.NoDependency(second)
+    self.assertIs(second, a.d[2])
+    self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a))
+    model = training.Model()
+    model.sub = a
+    save_path = os.path.join(self.get_temp_dir(), "ckpt")
+    model.save_weights(save_path)
+    model.load_weights(save_path)
+
+  def testDelNoSave(self):
+    model = training.Model()
+    model.d = {}
+    model.d["a"] = []
+    del model.d["a"]
+    save_path = os.path.join(self.get_temp_dir(), "ckpt")
+    with self.assertRaisesRegexp(ValueError, "overwritten or deleted"):
+      model.save_weights(save_path)
+
+  def testPopNoSave(self):
+    model = training.Model()
+    model.d = {}
+    model.d["a"] = []
+    model.d.pop("a")
+    save_path = os.path.join(self.get_temp_dir(), "ckpt")
+    with self.assertRaisesRegexp(ValueError, "overwritten or deleted"):
+      model.save_weights(save_path)
+
+  def testExternalModificationNoSave(self):
+    model = training.Model()
+    external_reference = {}
+    model.d = external_reference
+    external_reference["a"] = []
+    save_path = os.path.join(self.get_temp_dir(), "ckpt")
+    with self.assertRaisesRegexp(ValueError, "modified outside the wrapper"):
+      model.save_weights(save_path)
+
+  def testOverwriteNoSave(self):
+    model = training.Model()
+    model.d = {}
+    model.d["a"] = {}
+    model.d["a"] = {}
+    save_path = os.path.join(self.get_temp_dir(), "ckpt")
+    with self.assertRaisesRegexp(ValueError, "overwritten or deleted"):
+      model.save_weights(save_path)
+
+  def testIter(self):
+    model = training.Model()
+    model.d = {1: 3}
+    model.d[1] = 3
+    self.assertEqual([1], list(model.d))
+    new_dict = {}
+    # This update() is super tricky. If the dict wrapper subclasses dict,
+    # CPython will access its storage directly instead of calling any
+    # methods/properties on the object. So the options are either not to
+    # subclass dict (in which case update will call normal iter methods, but the
+    # object won't pass isinstance checks) or to subclass dict and keep that
+    # storage updated (no shadowing all its methods like _ListWrapper).
+    new_dict.update(model.d)
+    self.assertEqual({1: 3}, new_dict)
+
+  def testListShallowCopy(self):
+    root = tracking.AutoTrackable()
+    orig_list = [[1.]]
+    root.a = orig_list
+    copied = copy.copy(root.a)
+    self.assertAllEqual([[1.]], copied)
+    self.assertIsNot(root.a, copied)
+    self.assertIs(root.a[0], copied[0])
+
+    # Dirtiness should be inherited
+    util.list_objects(root.a)
+    orig_list.append(1.)
+    with self.assertRaises(ValueError):
+      util.list_objects(root.a)
+    with self.assertRaises(ValueError):
+      util.list_objects(copy.copy(root.a))
+
+  def testListDeepCopy(self):
+    root = tracking.AutoTrackable()
+    orig_list = [[1.]]
+    root.a = orig_list
+    copied = copy.deepcopy(root.a)
+    self.assertAllEqual([[1.]], copied)
+    self.assertIsNot(root.a, copied)
+    self.assertIsNot(root.a[0], copied[0])
+
+    # Dirtiness should be inherited
+    util.list_objects(root.a)
+    orig_list.append(1.)
+    with self.assertRaises(ValueError):
+      util.list_objects(root.a)
+    with self.assertRaises(ValueError):
+      util.list_objects(copy.deepcopy(root.a))
+
+  def testDictShallowCopy(self):
+    root = tracking.AutoTrackable()
+    orig_dict = {"a": [1.]}
+    root.a = orig_dict
+    copied = copy.copy(root.a)
+    self.assertAllEqual([1.], copied["a"])
+    self.assertIsNot(root.a, copied)
+    self.assertIs(root.a["a"], copied["a"])
+
+    # Dirtiness should be inherited
+    util.list_objects(root.a)
+    orig_dict["b"] = []
+    with self.assertRaises(ValueError):
+      util.list_objects(root.a)
+    with self.assertRaises(ValueError):
+      util.list_objects(copy.copy(root.a))
+
+  def testDictDeepCopy(self):
+    root = tracking.AutoTrackable()
+    orig_dict = {"a": [1.]}
+    root.a = orig_dict
+    copied = copy.deepcopy(root.a)
+    self.assertAllEqual([1.], copied["a"])
+    self.assertIsNot(root.a, copied)
+    self.assertIsNot(root.a["a"], copied["a"])
+
+    # Dirtiness should be inherited
+    util.list_objects(root.a)
+    orig_dict["b"] = []
+    with self.assertRaises(ValueError):
+      util.list_objects(root.a)
+    with self.assertRaises(ValueError):
+      util.list_objects(copy.deepcopy(root.a))
+
+  def testShallowCopyTrackable(self):
+    original = tracking.AutoTrackable()
+    original_sub = tracking.AutoTrackable()
+    original.a = [[1.]]
+    original.b = {"a": original_sub}
+    shallow_copied = copy.copy(original)
+    self.assertIs(original_sub, shallow_copied.b["a"])
+    self.assertIsNot(original, shallow_copied)
+    self.assertEqual([[1.]], shallow_copied.a)
+    shallow_deps = util.list_objects(shallow_copied)
+    self.assertIn(shallow_copied.a, shallow_deps)
+    self.assertIn(shallow_copied.b, shallow_deps)
+    self.assertIn(shallow_copied.b["a"], shallow_deps)
+
+  def testDeepCopyTrackable(self):
+    original = tracking.AutoTrackable()
+    original_sub = tracking.AutoTrackable()
+    original.a = [[1.]]
+    original.b = {"a": original_sub}
+    deep_copied = copy.deepcopy(original)
+    self.assertIsNot(original, deep_copied)
+    self.assertIsNot(original_sub, deep_copied.b["a"])
+    self.assertEqual([[1.]], deep_copied.a)
+    self.assertIsInstance(deep_copied.b["a"], tracking.AutoTrackable)
+    deps = util.list_objects(deep_copied)
+    self.assertIn(deep_copied.a, deps)
+    self.assertIn(deep_copied.b, deps)
+    self.assertIn(deep_copied.b["a"], deps)
+    self.assertNotIn(original_sub, deps)
+
+  def testConstructableFromSequence(self):
+    result = data_structures._DictWrapper([(1, 2), (3, 4)])
+    self.assertIsInstance(result, dict)
+    self.assertEqual({1: 2, 3: 4}, result)
+
+  def testListAddOrder(self):
+    self.assertEqual([1., 2.],
+                     data_structures._ListWrapper([1.])
+                     + data_structures._ListWrapper([2.]))
+    self.assertEqual([1., 2.],
+                     data_structures._ListWrapper([1.])
+                     + [2.])
+    self.assertEqual([1., 2.],
+                     [1.]
+                     + data_structures._ListWrapper([2.]))
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/training/tracking/graph_view.py b/tensorflow/python/training/tracking/graph_view.py
new file mode 100644
index 0000000..ba23878
--- /dev/null
+++ b/tensorflow/python/training/tracking/graph_view.py

@@ -0,0 +1,431 @@
+"""Manages a graph of Trackable objects."""
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import weakref
+
+from tensorflow.core.protobuf import trackable_object_graph_pb2
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.training import optimizer as optimizer_v1
+from tensorflow.python.training.saving import saveable_object as saveable_object_lib
+from tensorflow.python.training.saving import saveable_object_util
+from tensorflow.python.training.tracking import base
+from tensorflow.python.training.tracking import object_identity
+from tensorflow.python.training.tracking import tracking
+
+
+_ESCAPE_CHAR = "."  # For avoiding conflicts with user-specified names.
+
+# Keyword for identifying that the next bit of a checkpoint variable name is a
+# slot name. Checkpoint names for slot variables look like:
+#
+#   <path to variable>/<_OPTIMIZER_SLOTS_NAME>/<path to optimizer>/<slot name>
+#
+# Where <path to variable> is a full path from the checkpoint root to the
+# variable being slotted for.
+_OPTIMIZER_SLOTS_NAME = _ESCAPE_CHAR + "OPTIMIZER_SLOT"
+# Keyword for separating the path to an object from the name of an
+# attribute in checkpoint names. Used like:
+#   <path to variable>/<_OBJECT_ATTRIBUTES_NAME>/<name of attribute>
+_OBJECT_ATTRIBUTES_NAME = _ESCAPE_CHAR + "ATTRIBUTES"
+
+
+def _escape_local_name(name):
+  # We need to support slashes in local names for compatibility, since this
+  # naming scheme is being patched in to things like Layer.add_variable where
+  # slashes were previously accepted. We also want to use slashes to indicate
+  # edges traversed to reach the variable, so we escape forward slashes in
+  # names.
+  return (name.replace(_ESCAPE_CHAR, _ESCAPE_CHAR + _ESCAPE_CHAR)
+          .replace(r"/", _ESCAPE_CHAR + "S"))
+
+
+def _object_prefix_from_path(path_to_root):
+  return "/".join(
+      (_escape_local_name(trackable.name)
+       for trackable in path_to_root))
+
+
+def _slot_variable_naming_for_optimizer(optimizer_path):
+  """Make a function for naming slot variables in an optimizer."""
+  # Name slot variables:
+  #
+  #   <variable name>/<_OPTIMIZER_SLOTS_NAME>/<optimizer path>/<slot name>
+  #
+  # where <variable name> is exactly the checkpoint name used for the original
+  # variable, including the path from the checkpoint root and the local name in
+  # the object which owns it. Note that we only save slot variables if the
+  # variable it's slotting for is also being saved.
+
+  optimizer_identifier = "/%s/%s/" % (_OPTIMIZER_SLOTS_NAME, optimizer_path)
+
+  def _name_slot_variable(variable_path, slot_name):
+    """With an optimizer specified, name a slot variable."""
+    return (variable_path
+            + optimizer_identifier
+            + _escape_local_name(slot_name))
+
+  return _name_slot_variable
+
+
+def _serialize_slot_variables(trackable_objects, node_ids, object_names):
+  """Gather and name slot variables."""
+  non_slot_objects = list(trackable_objects)
+  slot_variables = object_identity.ObjectIdentityDictionary()
+  for trackable in non_slot_objects:
+    if (isinstance(trackable, optimizer_v1.Optimizer)
+        # TODO(b/110718070): Fix Keras imports.
+        or hasattr(trackable, "_create_or_restore_slot_variable")):
+      naming_scheme = _slot_variable_naming_for_optimizer(
+          optimizer_path=object_names[trackable])
+      slot_names = trackable.get_slot_names()
+      for slot_name in slot_names:
+        for original_variable_node_id, original_variable in enumerate(
+            non_slot_objects):
+          try:
+            slot_variable = trackable.get_slot(
+                original_variable, slot_name)
+          except (AttributeError, KeyError):
+            slot_variable = None
+          if slot_variable is None:
+            continue
+          slot_variable._maybe_initialize_trackable()  # pylint: disable=protected-access
+          if slot_variable._checkpoint_dependencies:  # pylint: disable=protected-access
+            # TODO(allenl): Gather dependencies of slot variables.
+            raise NotImplementedError(
+                "Currently only variables with no dependencies can be saved as "
+                "slot variables. File a feature request if this limitation "
+                "bothers you.")
+          if slot_variable in node_ids:
+            raise NotImplementedError(
+                "A slot variable was re-used as a dependency of a "
+                "Trackable object. This is not currently allowed. File a "
+                "feature request if this limitation bothers you.")
+          checkpoint_name = naming_scheme(
+              variable_path=object_names[original_variable],
+              slot_name=slot_name)
+          object_names[slot_variable] = checkpoint_name
+          slot_variable_node_id = len(trackable_objects)
+          node_ids[slot_variable] = slot_variable_node_id
+          trackable_objects.append(slot_variable)
+          slot_variable_proto = (
+              trackable_object_graph_pb2.TrackableObjectGraph
+              .TrackableObject.SlotVariableReference(
+                  slot_name=slot_name,
+                  original_variable_node_id=original_variable_node_id,
+                  slot_variable_node_id=slot_variable_node_id))
+          slot_variables.setdefault(trackable, []).append(
+              slot_variable_proto)
+  return slot_variables
+
+
+class ObjectGraphView(object):
+  """Gathers and serializes an object graph."""
+
+  def __init__(self, root, saveables_cache=None):
+    """Configure the graph view.
+
+    Args:
+      root: A `Trackable` object whose variables (including the variables
+        of dependencies, recursively) should be saved. May be a weak reference.
+      saveables_cache: A dictionary mapping `Trackable` objects ->
+        attribute names -> SaveableObjects, used to avoid re-creating
+        SaveableObjects when graph building.
+    """
+    self._root_ref = root
+    self._saveables_cache = saveables_cache
+
+  def list_dependencies(self, obj):
+    # pylint: disable=protected-access
+    obj._maybe_initialize_trackable()
+    return obj._checkpoint_dependencies
+    # pylint: enable=protected-access
+
+  @property
+  def saveables_cache(self):
+    """Maps Trackable objects -> attribute names -> list(SaveableObjects).
+
+    Used to avoid re-creating SaveableObjects when graph building. None when
+    executing eagerly.
+
+    Returns:
+      The cache (an object-identity dictionary), or None if caching is disabled.
+    """
+    return self._saveables_cache
+
+  @property
+  def root(self):
+    if isinstance(self._root_ref, weakref.ref):
+      derefed = self._root_ref()
+      assert derefed is not None
+      return derefed
+    else:
+      return self._root_ref
+
+  def _breadth_first_traversal(self):
+    """Find shortest paths to all dependencies of self.root."""
+    bfs_sorted = []
+    to_visit = collections.deque([self.root])
+    path_to_root = object_identity.ObjectIdentityDictionary()
+    path_to_root[self.root] = ()
+    while to_visit:
+      current_trackable = to_visit.popleft()
+      if isinstance(current_trackable, tracking.NotTrackable):
+        raise NotImplementedError(
+            ("The object %s does not support object-based saving. File a "
+             "feature request if this limitation bothers you. In the meantime, "
+             "you can remove the dependency on this object and save everything "
+             "else.")
+            % (current_trackable,))
+      bfs_sorted.append(current_trackable)
+      for name, dependency in self.list_dependencies(current_trackable):
+        if dependency not in path_to_root:
+          path_to_root[dependency] = (
+              path_to_root[current_trackable] + (
+                  base.TrackableReference(name, dependency),))
+          to_visit.append(dependency)
+    return bfs_sorted, path_to_root
+
+  def _add_attributes_to_object_graph(
+      self, trackable_objects, object_graph_proto, node_ids, object_names,
+      object_map):
+    """Create SaveableObjects and corresponding SerializedTensor protos."""
+    named_saveable_objects = []
+    if self._saveables_cache is None:
+      # No SaveableObject caching. Either we're executing eagerly, or building a
+      # static save which is specialized to the current Python state.
+      feed_additions = None
+    else:
+      # If we are caching SaveableObjects, we need to build up a feed_dict with
+      # functions computing volatile Python state to be saved with the
+      # checkpoint.
+      feed_additions = {}
+    for checkpoint_id, (trackable, object_proto) in enumerate(
+        zip(trackable_objects, object_graph_proto.nodes)):
+      assert node_ids[trackable] == checkpoint_id
+      object_name = object_names[trackable]
+      if object_map is None:
+        object_to_save = trackable
+      else:
+        object_to_save = object_map.get(trackable, trackable)
+      if self._saveables_cache is not None:
+        cached_attributes = self._saveables_cache.setdefault(object_to_save, {})
+      else:
+        cached_attributes = None
+
+      for name, saveable_factory in (
+          object_to_save._gather_saveables_for_checkpoint().items()):  # pylint: disable=protected-access
+        attribute = object_proto.attributes.add()
+        attribute.name = name
+        attribute.checkpoint_key = "%s/%s/%s" % (
+            object_name, _OBJECT_ATTRIBUTES_NAME, _escape_local_name(name))
+        if cached_attributes is None:
+          saveables = None
+        else:
+          saveables = cached_attributes.get(name, None)
+          if saveables is not None:
+            for saveable in saveables:
+              if attribute.checkpoint_key not in saveable.name:
+                # The checkpoint key for this SaveableObject is different. We
+                # need to re-create it.
+                saveables = None
+                del cached_attributes[name]
+                break
+        if saveables is None:
+          if callable(saveable_factory):
+            maybe_saveable = saveable_factory(name=attribute.checkpoint_key)
+          else:
+            maybe_saveable = saveable_factory
+          if isinstance(maybe_saveable, saveable_object_lib.SaveableObject):
+            saveables = (maybe_saveable,)
+          else:
+            # Figure out the name-based Saver's name for this variable. If it's
+            # already a SaveableObject we'd just get the checkpoint key back, so
+            # we leave full_name blank.
+            saver_dict = saveable_object_util.op_list_to_dict(
+                [maybe_saveable], convert_variable_to_tensor=False)
+            full_name, = saver_dict.keys()
+            saveables = tuple(saveable_object_util.saveable_objects_for_op(
+                op=maybe_saveable, name=attribute.checkpoint_key))
+            for saveable in saveables:
+              saveable.full_name = full_name
+          for saveable in saveables:
+            if attribute.checkpoint_key not in saveable.name:
+              raise AssertionError(
+                  ("The object %s produced a SaveableObject with name '%s' for "
+                   "attribute '%s'. Expected a name containing '%s'.")
+                  % (trackable, name, saveable.name,
+                     attribute.checkpoint_key))
+          if cached_attributes is not None:
+            cached_attributes[name] = saveables
+
+        optional_restore = None
+        for saveable in saveables:
+          if optional_restore is None:
+            optional_restore = saveable.optional_restore
+          else:
+            optional_restore = optional_restore and saveable.optional_restore
+
+          if hasattr(saveable, "full_name"):
+            attribute.full_name = saveable.full_name
+          if isinstance(saveable, base.PythonStateSaveable):
+            if feed_additions is None:
+              assert self._saveables_cache is None
+              # If we're not caching saveables, then we're either executing
+              # eagerly or building a static save/restore (e.g. for a
+              # SavedModel). In either case, we should embed the current Python
+              # state in the graph rather than relying on a feed dict.
+              saveable = saveable.freeze()
+            else:
+              saveable_feed_dict = saveable.feed_dict_additions()
+              for new_feed_key in saveable_feed_dict.keys():
+                if new_feed_key in feed_additions:
+                  raise AssertionError(
+                      ("The object %s tried to feed a value for the Tensor %s "
+                       "when saving, but another object is already feeding a "
+                       "value.")
+                      % (trackable, new_feed_key))
+              feed_additions.update(saveable_feed_dict)
+          named_saveable_objects.append(saveable)
+        if optional_restore is None:
+          optional_restore = False
+        attribute.optional_restore = optional_restore
+
+    return named_saveable_objects, feed_additions
+
+  def _fill_object_graph_proto(self, trackable_objects,
+                               node_ids,
+                               slot_variables,
+                               object_graph_proto=None):
+    """Name non-slot `Trackable`s and add them to `object_graph_proto`."""
+    if object_graph_proto is None:
+      object_graph_proto = (
+          trackable_object_graph_pb2.TrackableObjectGraph())
+    for checkpoint_id, trackable in enumerate(trackable_objects):
+      assert node_ids[trackable] == checkpoint_id
+      object_proto = object_graph_proto.nodes.add()
+      object_proto.slot_variables.extend(slot_variables.get(trackable, ()))
+      for child in self.list_dependencies(trackable):
+        child_proto = object_proto.children.add()
+        child_proto.node_id = node_ids[child.ref]
+        child_proto.local_name = child.name
+    return object_graph_proto
+
+  def _serialize_gathered_objects(self, trackable_objects, path_to_root,
+                                  object_map=None):
+    """Create SaveableObjects and protos for gathered objects."""
+    object_names = object_identity.ObjectIdentityDictionary()
+    for obj, path in path_to_root.items():
+      object_names[obj] = _object_prefix_from_path(path)
+    node_ids = object_identity.ObjectIdentityDictionary()
+    for node_id, node in enumerate(trackable_objects):
+      node_ids[node] = node_id
+    slot_variables = _serialize_slot_variables(
+        trackable_objects=trackable_objects,
+        node_ids=node_ids,
+        object_names=object_names)
+    object_graph_proto = self._fill_object_graph_proto(
+        trackable_objects=trackable_objects,
+        node_ids=node_ids,
+        slot_variables=slot_variables)
+    named_saveable_objects, feed_additions = (
+        self._add_attributes_to_object_graph(
+            trackable_objects=trackable_objects,
+            object_graph_proto=object_graph_proto,
+            node_ids=node_ids,
+            object_names=object_names,
+            object_map=object_map))
+    return named_saveable_objects, object_graph_proto, feed_additions
+
+  def serialize_object_graph(self):
+    """Determine checkpoint keys for variables and build a serialized graph.
+
+    Non-slot variables are keyed based on a shortest path from the root saveable
+    to the object which owns the variable (i.e. the one which called
+    `Trackable._add_variable` to create it).
+
+    Slot variables are keyed based on a shortest path to the variable being
+    slotted for, a shortest path to their optimizer, and the slot name.
+
+    Returns:
+      A tuple of (named_variables, object_graph_proto, feed_additions):
+        named_variables: A dictionary mapping names to variable objects.
+        object_graph_proto: A TrackableObjectGraph protocol buffer
+          containing the serialized object graph and variable references.
+        feed_additions: A dictionary mapping from Tensors to values which should
+          be fed when saving.
+
+    Raises:
+      ValueError: If there are invalid characters in an optimizer's slot names.
+    """
+    trackable_objects, path_to_root = self._breadth_first_traversal()
+    return self._serialize_gathered_objects(
+        trackable_objects, path_to_root)
+
+  def frozen_saveable_objects(self, object_map=None, to_graph=None):
+    """Creates SaveableObjects with the current object graph frozen."""
+    trackable_objects, path_to_root = self._breadth_first_traversal()
+    if to_graph:
+      target_context = to_graph.as_default
+    else:
+      target_context = ops.NullContextmanager
+    with target_context():
+      named_saveable_objects, graph_proto, _ = self._serialize_gathered_objects(
+          trackable_objects,
+          path_to_root,
+          object_map)
+      with ops.device("/cpu:0"):
+        object_graph_tensor = constant_op.constant(
+            graph_proto.SerializeToString(), dtype=dtypes.string)
+      named_saveable_objects.append(
+          base.NoRestoreSaveable(
+              tensor=object_graph_tensor,
+              name=base.OBJECT_GRAPH_PROTO_KEY))
+    return named_saveable_objects
+
+  def objects_ids_and_slot_variables(self):
+    """Traverse the object graph and list all accessible objects.
+
+    Looks for `Trackable` objects which are dependencies of
+    `root_trackable`. Includes slot variables only if the variable they are
+    slotting for and the optimizer are dependencies of `root_trackable`
+    (i.e. if they would be saved with a checkpoint).
+
+    Returns:
+      A tuple of (trackable objects, object -> node id, slot variables)
+    """
+    trackable_objects, path_to_root = self._breadth_first_traversal()
+    object_names = object_identity.ObjectIdentityDictionary()
+    for obj, path in path_to_root.items():
+      object_names[obj] = _object_prefix_from_path(path)
+    node_ids = object_identity.ObjectIdentityDictionary()
+    for node_id, node in enumerate(trackable_objects):
+      node_ids[node] = node_id
+    slot_variables = _serialize_slot_variables(
+        trackable_objects=trackable_objects,
+        node_ids=node_ids,
+        object_names=object_names)
+    return trackable_objects, node_ids, slot_variables
+
+  def list_objects(self):
+    """Traverse the object graph and list all accessible objects."""
+    trackable_objects, _, _ = self.objects_ids_and_slot_variables()
+    return trackable_objects

diff --git a/tensorflow/python/training/tracking/layer_utils.py b/tensorflow/python/training/tracking/layer_utils.py
new file mode 100644
index 0000000..818563c
--- /dev/null
+++ b/tensorflow/python/training/tracking/layer_utils.py

@@ -0,0 +1,111 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities related to layer/model functionality."""
+
+# TODO(b/110718070): Move these functions back to tensorflow/python/keras/utils
+# once __init__ files no longer require all of tf.keras to be imported together.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.training.tracking import object_identity
+
+
+def is_layer(obj):
+  """Implicit check for Layer-like objects."""
+  # TODO(b/110718070): Replace with isinstance(obj, base_layer.Layer).
+  return hasattr(obj, "_is_layer")
+
+
+def has_weights(obj):
+  """Implicit check for Layer-like objects."""
+  # TODO(b/110718070): Replace with isinstance(obj, base_layer.Layer).
+  return (hasattr(obj, "trainable_weights")
+          and hasattr(obj, "non_trainable_weights"))
+
+
+def filter_empty_layer_containers(layer_list):
+  """Filter out empty Layer-like containers and uniquify."""
+  existing = object_identity.ObjectIdentitySet()
+  to_visit = layer_list[::-1]
+  filtered = []
+  while to_visit:
+    obj = to_visit.pop()
+    if obj in existing:
+      continue
+    existing.add(obj)
+    if is_layer(obj):
+      filtered.append(obj)
+    elif hasattr(obj, "layers"):
+      # Trackable data structures will not show up in ".layers" lists, but
+      # the layers they contain will.
+      to_visit.extend(obj.layers[::-1])
+  return filtered
+
+
+def gather_trainable_weights(trainable, sub_layers, extra_variables):
+  """Lists the trainable weights for an object with sub-layers.
+
+  Args:
+    trainable: Whether the object collecting the variables is trainable.
+    sub_layers: A flat list of Layer objects owned by this object, to collect
+      variables from.
+    extra_variables: Any extra variables to include. Their `.trainable` property
+      is used to categorize them.
+
+  Returns:
+    A list of collected trainable weights/variables.
+  """
+  if not trainable:
+    return []
+  weights = []
+  for layer in sub_layers:
+    weights += layer.trainable_weights
+  trainable_extra_variables = [
+      v for v in extra_variables if v.trainable]
+  return weights + trainable_extra_variables
+
+
+def gather_non_trainable_weights(trainable, sub_layers, extra_variables):
+  """Lists the non-trainable weights for an object with sub-layers.
+
+  Args:
+    trainable: Whether the object collecting the variables is trainable.
+    sub_layers: A flat list of Layer objects owned by this object, to collect
+      variables from.
+    extra_variables: Any extra variables to include. Their `.trainable` property
+      is used to categorize them.
+
+  Returns:
+    A list of collected non-trainable weights/variables.
+  """
+  trainable_extra_variables = []
+  non_trainable_extra_variables = []
+  for v in extra_variables:
+    if v.trainable:
+      trainable_extra_variables.append(v)
+    else:
+      non_trainable_extra_variables.append(v)
+  weights = []
+  for layer in sub_layers:
+    weights += layer.non_trainable_weights
+  if not trainable:
+    trainable_weights = []
+    for layer in sub_layers:
+      trainable_weights += layer.trainable_weights
+    return (trainable_weights + trainable_extra_variables
+            + weights + non_trainable_extra_variables)
+  return weights + non_trainable_extra_variables

diff --git a/tensorflow/python/training/tracking/object_identity.py b/tensorflow/python/training/tracking/object_identity.py
new file mode 100644
index 0000000..d4eef5b
--- /dev/null
+++ b/tensorflow/python/training/tracking/object_identity.py

@@ -0,0 +1,156 @@
+"""Utilities for collecting objects based on "is" comparison."""
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import weakref
+
+
+class _ObjectIdentityWrapper(object):
+  """Wraps an object, mapping __eq__ on wrapper to "is" on wrapped.
+
+  Since __eq__ is based on object identity, it's safe to also define __hash__
+  based on object ids. This lets us add unhashable types like trackable
+  _ListWrapper objects to object-identity collections.
+  """
+
+  def __init__(self, wrapped):
+    self._wrapped = wrapped
+
+  @property
+  def unwrapped(self):
+    return self._wrapped
+
+  def __eq__(self, other):
+    if isinstance(other, _ObjectIdentityWrapper):
+      return self._wrapped is other._wrapped  # pylint: disable=protected-access
+    return self._wrapped is other
+
+  def __hash__(self):
+    # Wrapper id() is also fine for weakrefs. In fact, we rely on
+    # id(weakref.ref(a)) == id(weakref.ref(a)) and weakref.ref(a) is
+    # weakref.ref(a) in _WeakObjectIdentityWrapper.
+    return id(self._wrapped)
+
+
+class _WeakObjectIdentityWrapper(_ObjectIdentityWrapper):
+
+  def __init__(self, wrapped):
+    super(_WeakObjectIdentityWrapper, self).__init__(weakref.ref(wrapped))
+
+  @property
+  def unwrapped(self):
+    return self._wrapped()
+
+
+class ObjectIdentityDictionary(collections.MutableMapping):
+  """A mutable mapping data structure which compares using "is".
+
+  This is necessary because we have trackable objects (_ListWrapper) which
+  have behavior identical to built-in Python lists (including being unhashable
+  and comparing based on the equality of their contents by default).
+  """
+
+  def __init__(self):
+    self._storage = {}
+
+  def _wrap_key(self, key):
+    return _ObjectIdentityWrapper(key)
+
+  def __getitem__(self, key):
+    return self._storage[self._wrap_key(key)]
+
+  def __setitem__(self, key, value):
+    self._storage[self._wrap_key(key)] = value
+
+  def __delitem__(self, key):
+    del self._storage[self._wrap_key(key)]
+
+  def __len__(self):
+    return len(self._storage)
+
+  def __iter__(self):
+    for key in self._storage:
+      yield key.unwrapped
+
+
+class ObjectIdentityWeakKeyDictionary(ObjectIdentityDictionary):
+  """Like weakref.WeakKeyDictionary, but compares objects with "is"."""
+
+  def _wrap_key(self, key):
+    return _WeakObjectIdentityWrapper(key)
+
+  def __len__(self):
+    # Iterate, discarding old weak refs
+    return len(list(self._storage))
+
+  def __iter__(self):
+    keys = self._storage.keys()
+    for key in keys:
+      unwrapped = key.unwrapped
+      if unwrapped is None:
+        del self[key]
+      else:
+        yield unwrapped
+
+
+class ObjectIdentitySet(collections.MutableSet):
+  """Like the built-in set, but compares objects with "is"."""
+
+  def __init__(self, *args):
+    self._storage = set([self._wrap_key(obj) for obj in list(*args)])
+
+  def _wrap_key(self, key):
+    return _ObjectIdentityWrapper(key)
+
+  def __contains__(self, key):
+    return self._wrap_key(key) in self._storage
+
+  def discard(self, key):
+    self._storage.discard(self._wrap_key(key))
+
+  def add(self, key):
+    self._storage.add(self._wrap_key(key))
+
+  def __len__(self):
+    return len(self._storage)
+
+  def __iter__(self):
+    keys = list(self._storage)
+    for key in keys:
+      yield key.unwrapped
+
+
+class ObjectIdentityWeakSet(ObjectIdentitySet):
+  """Like weakref.WeakSet, but compares objects with "is"."""
+
+  def _wrap_key(self, key):
+    return _WeakObjectIdentityWrapper(key)
+
+  def __len__(self):
+    # Iterate, discarding old weak refs
+    return len([_ for _ in self])
+
+  def __iter__(self):
+    keys = list(self._storage)
+    for key in keys:
+      unwrapped = key.unwrapped
+      if unwrapped is None:
+        self.discard(key)
+      else:
+        yield unwrapped

diff --git a/tensorflow/python/training/tracking/tracking.py b/tensorflow/python/training/tracking/tracking.py
new file mode 100644
index 0000000..c8024e1
--- /dev/null
+++ b/tensorflow/python/training/tracking/tracking.py

@@ -0,0 +1,215 @@
+"""Dependency tracking for trackable objects."""
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.eager import def_function
+from tensorflow.python.eager import function as defun
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.training.tracking import base
+from tensorflow.python.training.tracking import data_structures
+from tensorflow.python.util import tf_contextlib
+
+
+# global _RESOURCE_TRACKER_STACK
+_RESOURCE_TRACKER_STACK = []
+
+
+class NotTrackable(object):
+  """Marks instances of child classes as unsaveable using an object-based API.
+
+  Useful for marking objects which would otherwise look trackable because
+  of inheritance (e.g. through `Layer`) as not trackable. Inheriting from
+  `NotTrackable` does not prevent an object from being assigned to any
+  attributes, but will throw an error on save/restore.
+  """
+  pass
+
+
+class AutoTrackable(base.Trackable):
+  """Manages dependencies on other objects.
+
+  `Trackable` objects may have dependencies: other `Trackable` objects
+  which should be saved if the object declaring the dependency is saved. A
+  correctly saveable program has a dependency graph such that if changing a
+  global variable affects an object (e.g. changes the behavior of any of its
+  methods) then there is a chain of dependencies from the influenced object to
+  the variable.
+
+  Dependency edges have names, and are created implicitly when a
+  `Trackable` object is assigned to an attribute of another
+  `Trackable` object. For example:
+
+  ```
+  obj = Trackable()
+  obj.v = ResourceVariable(0.)
+  ```
+
+  The `Trackable` object `obj` now has a dependency named "v" on a
+  variable.
+
+  `Trackable` objects may specify `Tensor`s to be saved and restored
+  directly (e.g. a `Variable` indicating how to save itself) rather than through
+  dependencies on other objects. See
+  `Trackable._gather_saveables_for_checkpoint` for details.
+  """
+
+  def __setattr__(self, name, value):
+    """Support self.foo = trackable syntax."""
+    if getattr(self, "_setattr_tracking", True):
+      value = data_structures.sticky_attribute_assignment(
+          trackable=self, value=value, name=name)
+    super(AutoTrackable, self).__setattr__(name, value)
+
+  def __delattr__(self, name):
+    self._maybe_initialize_trackable()
+    if name in self._unconditional_dependency_names:
+      del self._unconditional_dependency_names[name]
+      for index, (dep_name, _) in enumerate(
+          self._unconditional_checkpoint_dependencies):
+        if dep_name == name:
+          del self._unconditional_checkpoint_dependencies[index]
+          break
+    super(AutoTrackable, self).__delattr__(name)
+
+  def _no_dependency(self, value):
+    """Override to allow TrackableBase to disable dependency tracking."""
+    return data_structures.NoDependency(value)
+
+  def _list_functions_for_serialization(self):
+    """Return a dict of `Function`s of a trackable."""
+    functions = dict()
+    for attribute_name in dir(self):
+      try:
+        attribute_value = getattr(self, attribute_name, None)
+      except Exception:  # pylint: disable=broad-except
+        # We really don't want to throw an exception just because some object's
+        # attribute accessor is broken.
+        attribute_value = None
+      if isinstance(attribute_value, (def_function.Function,
+                                      defun.ConcreteFunction)):
+        functions[attribute_name] = attribute_value
+    return functions
+
+
+class ResourceTracker(object):
+  """An object that tracks a list of resources."""
+
+  def __init__(self):
+    self._resources = []
+
+  @property
+  def resources(self):
+    return self._resources
+
+  def add_resource(self, resource):
+    self._resources.append(resource)
+
+
+@tf_contextlib.contextmanager
+def resource_tracker_scope(resource_tracker):
+  """A context to manage resource trackers.
+
+  Use this in order to collect up all resources created within a block of code.
+  Example usage:
+
+  ```python
+  resource_tracker = ResourceTracker()
+  with resource_tracker_scope(resource_tracker):
+    resource = TrackableResource()
+
+  assert resource_tracker.resources == [resource]
+
+  Args:
+    resource_tracker: The passed in ResourceTracker object
+
+  Yields:
+    A scope in which the resource_tracker is active.
+  """
+  global _RESOURCE_TRACKER_STACK
+  old = list(_RESOURCE_TRACKER_STACK)
+  _RESOURCE_TRACKER_STACK.append(resource_tracker)
+  try:
+    yield
+  finally:
+    _RESOURCE_TRACKER_STACK = old
+
+
+class TrackableResource(base.Trackable):
+  """Base class for all resources that need to be tracked."""
+
+  def __init__(self):
+    global _RESOURCE_TRACKER_STACK
+    for resource_tracker in _RESOURCE_TRACKER_STACK:
+      resource_tracker.add_resource(self)
+
+    self._resource_handle = None
+
+  def create_resource(self):
+    """A function that creates a resource handle."""
+    raise NotImplementedError("TrackableResource.create_resource not "
+                              "implemented.")
+
+  def initialize(self):
+    """A function that initializes the resource. Optional."""
+    pass
+
+  @property
+  def resource_handle(self):
+    """Returns the resource handle associated with this Resource."""
+    if self._resource_handle is None:
+      self._resource_handle = self.create_resource()
+    return self._resource_handle
+
+  def _list_functions_for_serialization(self):
+    @def_function.function(input_signature=[], autograph=False)
+    def _creator():
+      resource = self.create_resource()
+      return resource
+
+    @def_function.function(input_signature=[], autograph=False)
+    def _initializer():
+      self.initialize()
+      return 1  # Dummy return
+
+    return {
+        "create_resource": _creator,
+        "initialize": _initializer,
+    }
+
+
+class TrackableAsset(base.Trackable):
+  """Base class for asset files which need to be tracked."""
+
+  def __init__(self, path):
+    """Record the full path to the asset."""
+    # The init_scope prevents functions from capturing `path` in an
+    # initialization graph, since it is transient and should not end up in a
+    # serialized function body.
+    with ops.init_scope():
+      self._path = ops.internal_convert_to_tensor(path, dtype=dtypes.string,
+                                                  name="asset_path")
+
+  @property
+  def asset_path(self):
+    """Fetch the current asset path."""
+    return self._path
+
+ops.register_tensor_conversion_function(
+    TrackableAsset,
+    lambda asset, **kw: ops.internal_convert_to_tensor(asset.asset_path, **kw))

diff --git a/tensorflow/python/training/tracking/tracking_test.py b/tensorflow/python/training/tracking/tracking_test.py
new file mode 100644
index 0000000..37e1417
--- /dev/null
+++ b/tensorflow/python/training/tracking/tracking_test.py

@@ -0,0 +1,269 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import numpy
+import six
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine import training
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+from tensorflow.python.training.tracking import base
+from tensorflow.python.training.tracking import data_structures
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util
+from tensorflow.python.util import nest
+
+
+class InterfaceTests(test.TestCase):
+
+  def testMultipleAssignment(self):
+    root = tracking.AutoTrackable()
+    root.leaf = tracking.AutoTrackable()
+    root.leaf = root.leaf
+    duplicate_name_dep = tracking.AutoTrackable()
+    with self.assertRaisesRegexp(ValueError, "already declared"):
+      root._track_trackable(duplicate_name_dep, name="leaf")
+    # No error; we're overriding __setattr__, so we can't really stop people
+    # from doing this while maintaining backward compatibility.
+    root.leaf = duplicate_name_dep
+    root._track_trackable(duplicate_name_dep, name="leaf", overwrite=True)
+    self.assertIs(duplicate_name_dep, root._lookup_dependency("leaf"))
+    (_, dep_object), = root._checkpoint_dependencies
+    self.assertIs(duplicate_name_dep, dep_object)
+
+  def testNoDependency(self):
+    root = tracking.AutoTrackable()
+    hasdep = tracking.AutoTrackable()
+    root.hasdep = hasdep
+    nodep = tracking.AutoTrackable()
+    root.nodep = data_structures.NoDependency(nodep)
+    self.assertEqual(1, len(root._checkpoint_dependencies))
+    self.assertIs(root._checkpoint_dependencies[0].ref, root.hasdep)
+    self.assertIs(root.hasdep, hasdep)
+    self.assertIs(root.nodep, nodep)
+
+    class NoDependencyModel(training.Model):
+
+      @base.no_automatic_dependency_tracking
+      def __init__(self):
+        super(NoDependencyModel, self).__init__()
+        self.a = []
+        self.b = tracking.AutoTrackable()
+
+    nodeps = NoDependencyModel()
+    self.assertEqual([nodeps], util.list_objects(nodeps))
+
+  def testRemoveDependency(self):
+    root = tracking.AutoTrackable()
+    root.a = tracking.AutoTrackable()
+    self.assertEqual(1, len(root._checkpoint_dependencies))
+    self.assertEqual(1, len(root._unconditional_checkpoint_dependencies))
+    self.assertIs(root.a, root._checkpoint_dependencies[0].ref)
+    del root.a
+    self.assertFalse(hasattr(root, "a"))
+    self.assertEqual(0, len(root._checkpoint_dependencies))
+    self.assertEqual(0, len(root._unconditional_checkpoint_dependencies))
+    root.a = tracking.AutoTrackable()
+    self.assertEqual(1, len(root._checkpoint_dependencies))
+    self.assertEqual(1, len(root._unconditional_checkpoint_dependencies))
+    self.assertIs(root.a, root._checkpoint_dependencies[0].ref)
+
+  def testListBasic(self):
+    a = tracking.AutoTrackable()
+    b = tracking.AutoTrackable()
+    a.l = [b]
+    c = tracking.AutoTrackable()
+    a.l.append(c)
+    a_deps = util.list_objects(a)
+    self.assertIn(b, a_deps)
+    self.assertIn(c, a_deps)
+    direct_a_dep, = a._checkpoint_dependencies
+    self.assertEqual("l", direct_a_dep.name)
+    self.assertIn(b, direct_a_dep.ref)
+    self.assertIn(c, direct_a_dep.ref)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testMutationDirtiesList(self):
+    a = tracking.AutoTrackable()
+    b = tracking.AutoTrackable()
+    a.l = [b]
+    c = tracking.AutoTrackable()
+    a.l.insert(0, c)
+    checkpoint = util.Checkpoint(a=a)
+    with self.assertRaisesRegexp(ValueError, "A list element was replaced"):
+      checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testOutOfBandEditDirtiesList(self):
+    a = tracking.AutoTrackable()
+    b = tracking.AutoTrackable()
+    held_reference = [b]
+    a.l = held_reference
+    c = tracking.AutoTrackable()
+    held_reference.append(c)
+    checkpoint = util.Checkpoint(a=a)
+    with self.assertRaisesRegexp(ValueError, "The wrapped list was modified"):
+      checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testNestedLists(self):
+    a = tracking.AutoTrackable()
+    a.l = []
+    b = tracking.AutoTrackable()
+    a.l.append([b])
+    c = tracking.AutoTrackable()
+    a.l[0].append(c)
+    a_deps = util.list_objects(a)
+    self.assertIn(b, a_deps)
+    self.assertIn(c, a_deps)
+    a.l[0].append(1)
+    d = tracking.AutoTrackable()
+    a.l[0].append(d)
+    a_deps = util.list_objects(a)
+    self.assertIn(d, a_deps)
+    self.assertIn(b, a_deps)
+    self.assertIn(c, a_deps)
+    self.assertNotIn(1, a_deps)
+    e = tracking.AutoTrackable()
+    f = tracking.AutoTrackable()
+    a.l1 = [[], [e]]
+    a.l1[0].append(f)
+    a_deps = util.list_objects(a)
+    self.assertIn(e, a_deps)
+    self.assertIn(f, a_deps)
+    checkpoint = util.Checkpoint(a=a)
+    checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
+    a.l[0].append(data_structures.NoDependency([]))
+    a.l[0][-1].append(5)
+    checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
+    # Dirtying the inner list means the root object is unsaveable.
+    a.l[0][1] = 2
+    with self.assertRaisesRegexp(ValueError, "A list element was replaced"):
+      checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDictionariesBasic(self):
+    a = training.Model()
+    b = training.Model()
+    a.attribute = {"b": b}
+    c = training.Model()
+    a.attribute["c"] = []
+    a.attribute["c"].append(c)
+    a_deps = util.list_objects(a)
+    self.assertIn(b, a_deps)
+    self.assertIn(c, a_deps)
+    self.assertIs(b, a.attribute["b"])
+    six.assertCountEqual(
+        self,
+        ["b", "c"],
+        [dep.name for dep in a.attribute._checkpoint_dependencies])
+    self.assertEqual([b, c], a.layers)
+    self.assertEqual([b, c], a.attribute.layers)
+    self.assertEqual([c], a.attribute["c"].layers)
+    checkpoint = util.Checkpoint(a=a)
+    save_path = checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
+    with self.cached_session():
+      checkpoint.restore(save_path).assert_consumed().initialize_or_restore()
+
+  @test_util.run_in_graph_and_eager_modes
+  def testNoDepList(self):
+    a = training.Model()
+    a.l1 = data_structures.NoDependency([])
+    a.l1.insert(1, 0)
+    self.assertTrue(isinstance(a.l1, list))
+    checkpoint = util.Checkpoint(a=a)
+    checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
+    a.l2 = []
+    a.l2.insert(1, 0)
+    with self.assertRaisesRegexp(ValueError, "A list element was replaced"):
+      checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testAssertions(self):
+    a = tracking.AutoTrackable()
+    a.l = {"k": [numpy.zeros([2, 2])]}
+    self.assertAllEqual(nest.flatten({"k": [numpy.zeros([2, 2])]}),
+                        nest.flatten(a.l))
+    self.assertAllClose({"k": [numpy.zeros([2, 2])]}, a.l)
+    nest.map_structure(self.assertAllClose, a.l, {"k": [numpy.zeros([2, 2])]})
+    a.tensors = {"k": [array_ops.ones([2, 2]), array_ops.zeros([3, 3])]}
+    self.assertAllClose({"k": [numpy.ones([2, 2]), numpy.zeros([3, 3])]},
+                        self.evaluate(a.tensors))
+
+
+class _DummyResource(tracking.TrackableResource):
+
+  def __init__(self, handle_name):
+    self._handle_name = handle_name
+    super(_DummyResource, self).__init__()
+
+  def create_resource(self):
+    return self._handle_name
+
+
+class ResourceTrackerTest(test.TestCase):
+
+  def testBasic(self):
+    resource_tracker = tracking.ResourceTracker()
+    with tracking.resource_tracker_scope(resource_tracker):
+      dummy_resource1 = _DummyResource("test1")
+      dummy_resource2 = _DummyResource("test2")
+
+    self.assertEqual(2, len(resource_tracker.resources))
+    self.assertEqual("test1", resource_tracker.resources[0].resource_handle)
+    self.assertEqual("test2", resource_tracker.resources[1].resource_handle)
+
+  def testTwoScopes(self):
+    resource_tracker1 = tracking.ResourceTracker()
+    with tracking.resource_tracker_scope(resource_tracker1):
+      dummy_resource1 = _DummyResource("test1")
+
+    resource_tracker2 = tracking.ResourceTracker()
+    with tracking.resource_tracker_scope(resource_tracker2):
+      dummy_resource2 = _DummyResource("test2")
+
+    self.assertEqual(1, len(resource_tracker1.resources))
+    self.assertEqual("test1", resource_tracker1.resources[0].resource_handle)
+    self.assertEqual(1, len(resource_tracker1.resources))
+    self.assertEqual("test2", resource_tracker2.resources[0].resource_handle)
+
+  def testNestedScopesScopes(self):
+    resource_tracker = tracking.ResourceTracker()
+    with tracking.resource_tracker_scope(resource_tracker):
+      resource_tracker1 = tracking.ResourceTracker()
+      with tracking.resource_tracker_scope(resource_tracker1):
+        dummy_resource1 = _DummyResource("test1")
+
+      resource_tracker2 = tracking.ResourceTracker()
+      with tracking.resource_tracker_scope(resource_tracker2):
+        dummy_resource2 = _DummyResource("test2")
+
+    self.assertEqual(1, len(resource_tracker1.resources))
+    self.assertEqual("test1", resource_tracker1.resources[0].resource_handle)
+    self.assertEqual(1, len(resource_tracker1.resources))
+    self.assertEqual("test2", resource_tracker2.resources[0].resource_handle)
+    self.assertEqual(2, len(resource_tracker.resources))
+    self.assertEqual("test1", resource_tracker.resources[0].resource_handle)
+    self.assertEqual("test2", resource_tracker.resources[1].resource_handle)
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/training/tracking/util.py b/tensorflow/python/training/tracking/util.py
new file mode 100644
index 0000000..f5f70a4
--- /dev/null
+++ b/tensorflow/python/training/tracking/util.py

@@ -0,0 +1,1468 @@
+"""Utilities for saving/loading Trackable objects."""
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+import os
+import weakref
+
+from tensorflow.core.protobuf import trackable_object_graph_pb2
+from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.client import session as session_lib
+from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.lib.io import file_io
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_io_ops as io_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import saver as v1_saver_lib
+from tensorflow.python.training.saving import functional_saver
+from tensorflow.python.training.saving import saveable_object_util
+from tensorflow.python.training.tracking import base
+from tensorflow.python.training.tracking import data_structures
+from tensorflow.python.training.tracking import graph_view as graph_view_lib
+from tensorflow.python.training.tracking import object_identity
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
+from tensorflow.python.util import tf_contextlib
+from tensorflow.python.util.tf_export import tf_export
+
+
+class _CheckpointRestoreCoordinator(object):
+  """Holds the status of an object-based checkpoint load."""
+
+  def __init__(self, object_graph_proto, save_path, save_path_tensor,
+               restore_op_cache, graph_view):
+    """Specify the checkpoint being loaded.
+
+    Args:
+      object_graph_proto: The TrackableObjectGraph protocol buffer
+        associated with this checkpoint.
+      save_path: A string, the path to the checkpoint, as returned by
+        `tf.train.latest_checkpoint`.
+      save_path_tensor: A string `Tensor` which contains or will be fed the save
+        path.
+      restore_op_cache: A dictionary shared between
+        `_CheckpointRestoreCoordinator`s for the same Python objects, used to
+        look up restore ops by name to avoid re-creating them across multiple
+        `restore()` calls.
+      graph_view: A graph_view_lib.ObjectGraphView object for the restored
+        objects.
+    """
+    self.object_graph_proto = object_graph_proto
+    self.restore_uid = ops.uid()
+    # Maps from objects to lists of attributes which were in the checkpoint but
+    # not loaded into any object, for error checking.
+    self.unused_attributes = weakref.WeakKeyDictionary()
+    # Dictionary mapping from an id in the protocol buffer flat array to
+    # Trackable Python objects. This mapping may be deferred if a
+    # checkpoint is restored before all dependencies have been tracked. Uses
+    # weak references so that partial restorations don't create reference cycles
+    # (as objects with deferred dependencies will generally have references to
+    # this object).
+    self.object_by_proto_id = weakref.WeakValueDictionary()
+    # A set of all Python objects we've seen as dependencies, even if we didn't
+    # use them (for example because of inconsistent references when
+    # loading). Used to make status assertions fail when loading checkpoints
+    # that don't quite match.
+    self.all_python_objects = object_identity.ObjectIdentityWeakSet()
+    self.save_path_tensor = save_path_tensor
+    self.save_path_string = save_path
+    self.dtype_map = pywrap_tensorflow.NewCheckpointReader(
+        save_path).get_variable_to_dtype_map()
+    # A NewCheckpointReader for the most recent checkpoint, for streaming Python
+    # state restoration.
+    # When graph building, contains a list of ops to run to restore objects from
+    # this checkpoint.
+    self.restore_ops = []
+    self.restore_ops_by_name = restore_op_cache
+    self.graph_view = graph_view
+    self.new_restore_ops_callback = None
+    # A mapping from optimizer proto ids to lists of slot variables to be
+    # restored when the optimizer is tracked. Only includes slot variables whose
+    # regular variables have already been created, and only for optimizer
+    # objects which have not yet been created/tracked.
+    self.deferred_slot_restorations = {}
+    # A mapping from variable proto ids to lists of slot variables to be
+    # restored when the variable is created/tracked. These get shifted over to
+    # deferred_slot_restorations if the optimizer hasn't been created when that
+    # happens.
+    self.slot_restorations = {}
+    for node_index, node in enumerate(self.object_graph_proto.nodes):
+      for slot_reference in node.slot_variables:
+        # `node` refers to an `Optimizer`, since only these have slot variables.
+        self.slot_restorations.setdefault(
+            slot_reference.original_variable_node_id, []).append(
+                base._SlotVariableRestoration(  # pylint: disable=protected-access
+                    optimizer_id=node_index,
+                    slot_variable_id=slot_reference.slot_variable_node_id,
+                    slot_name=slot_reference.slot_name))
+
+  def new_restore_ops(self, new_ops):
+    self.restore_ops.extend(new_ops)
+    if self.new_restore_ops_callback:
+      self.new_restore_ops_callback(new_ops)  # pylint: disable=not-callable
+
+  def restore_saveables(self, tensor_saveables, python_saveables):
+    """Run or build restore operations for SaveableObjects.
+
+    Args:
+      tensor_saveables: `SaveableObject`s which correspond to Tensors.
+      python_saveables: `PythonStateSaveable`s which correspond to Python
+        values.
+
+    Returns:
+      When graph building, a list of restore operations, either cached or newly
+      created, to restore `tensor_saveables`.
+    """
+    restore_ops = []
+    # Eagerly run restorations for Python state.
+    reader = pywrap_tensorflow.NewCheckpointReader(
+        self.save_path_string)
+    for saveable in python_saveables:
+      spec_names = [spec.name for spec in saveable.specs]
+      saveable.python_restore(
+          [reader.get_tensor(name) for name in spec_names])
+
+    # If we have new SaveableObjects, extract and cache restore ops.
+    if tensor_saveables:
+      validated_saveables = saveable_object_util.validate_and_slice_inputs(
+          tensor_saveables)
+      validated_names = set(saveable.name for saveable in validated_saveables)
+      if set(tensor_saveables.keys()) != validated_names:
+        raise AssertionError(
+            ("Saveable keys changed when validating. Got back %s, was "
+             "expecting %s") % (tensor_saveables.keys(), validated_names))
+      new_restore_ops = functional_saver.restore_from_saveable_objects(
+          self.save_path_tensor, validated_saveables)
+      if not context.executing_eagerly():
+        restore_ops.extend(new_restore_ops)
+        for saveable, restore_op in zip(validated_saveables, new_restore_ops):
+          assert saveable.name not in self.restore_ops_by_name
+          self.restore_ops_by_name[saveable.name] = restore_op
+    return restore_ops
+
+
+class _NameBasedRestoreCoordinator(object):
+  """Keeps the status of a name-based checkpoint restore."""
+
+  def __init__(self, save_path, dtype_map=None):
+    self.save_path = save_path
+    self.dtype_map = dtype_map
+    self.unused_attributes = weakref.WeakKeyDictionary()
+    self.restore_uid = ops.uid()
+
+  def globally_named_object_attributes(self, trackable):
+    """Create globally named SaveableObjects from attributes.
+
+    If an object's attribute has no global name specified (default construction
+    for the SaveableObject factory), records the failure in
+    `self.unused_attributes` (which can then be used to make status assertions
+    fail; see `NameBasedSaverStatus`).
+
+    Args:
+      trackable: An object to save.
+
+    Yields:
+      SaveableObjects for `trackable`'s attributes.
+    """
+    for attribute_name, saveable_factory in (
+        trackable._gather_saveables_for_checkpoint().items()):  # pylint: disable=protected-access
+      if callable(saveable_factory):
+        try:
+          # This saveable object factory does not have a default name= argument,
+          # which means there's no way to save/restore it using a name-based
+          # checkpoint. Ignore the error now and make sure assert_consumed()
+          # fails.
+          saveable = saveable_factory()
+        except TypeError:
+          self.unused_attributes.setdefault(trackable, []).append(
+              attribute_name)
+          continue
+      else:
+        saveable = saveable_factory
+      names_to_saveables = saveable_object_util.op_list_to_dict(
+          [saveable],
+          convert_variable_to_tensor=False)
+      for name, op in names_to_saveables.items():
+        for saveable_object in saveable_object_util.saveable_objects_for_op(
+            op=op, name=name):
+          yield saveable_object
+
+  def eager_restore(self, trackable):
+    """Runs restore ops for `trackable`'s attributes."""
+    # When graph building, we don't add any restore ops to the graph until
+    # run_restore_ops/initialize_or_restore on the status object for name-based
+    # checkpoints.
+    assert context.executing_eagerly()
+    for saveable in self.globally_named_object_attributes(
+        trackable):
+      restored_tensors = []
+      tensor_missing = False
+      for spec in saveable.specs:
+        if spec.name in self.dtype_map:
+          with ops.device("cpu:0"):
+            restored, = io_ops.restore_v2(
+                prefix=self.save_path,
+                tensor_names=[spec.name],
+                shape_and_slices=[""],
+                dtypes=[self.dtype_map[spec.name]],
+                name="%s_checkpoint_read" % (spec.name,))
+          restored_tensors.append(array_ops.identity(restored))
+        else:
+          tensor_missing = True
+
+      if not tensor_missing:
+        # Ignores values missing from the checkpoint, as with object-based
+        # restore. Status assertions can be used to check exact matches,
+        # although it's unlikely to ever happen for name-based checkpoints.
+        saveable.restore(restored_tensors=restored_tensors,
+                         restored_shapes=None)
+
+
+# TODO(allenl): If this ends up in a public API, consider adding LINT.IfChange
+# or consolidating the implementation with get_variable.
+def _default_getter(name, shape, dtype, initializer=None,
+                    partition_info=None, **kwargs):
+  """A pared-down version of get_variable which does not reuse variables."""
+  dtype = dtypes.as_dtype(dtype)
+  shape_object = tensor_shape.as_shape(shape)
+  with ops.init_scope():
+    if initializer is None:
+      initializer, initializing_from_value = (
+          variable_scope._get_default_variable_store()._get_default_initializer(  # pylint: disable=protected-access
+              name=name, shape=shape_object, dtype=dtype))
+    else:
+      initializing_from_value = not callable(initializer)
+    # Same logic as get_variable
+    variable_dtype = dtype.base_dtype
+    if initializing_from_value:
+      if shape is not None:
+        raise ValueError("If initializer is a constant, do not specify shape.")
+      initial_value = initializer
+    else:
+      # Instantiate initializer if provided initializer is a type object.
+      if isinstance(initializer, type(init_ops.Initializer)):
+        initializer = initializer(dtype=dtype)
+      def initial_value():
+        return initializer(
+            shape_object.as_list(), dtype=dtype, partition_info=partition_info)
+    return variables.VariableV1(
+        initial_value=initial_value,
+        name=name,
+        dtype=variable_dtype,
+        use_resource=True,
+        **kwargs
+    )
+
+
+def add_variable(trackable, name, shape=None, dtype=dtypes.float32,
+                 initializer=None):
+  """Add a variable to a Trackable with no scope influence."""
+  return trackable._add_variable_with_custom_getter(  # pylint: disable=protected-access
+      name=name, shape=shape, dtype=dtype,
+      initializer=initializer, getter=_default_getter)
+
+
+def object_metadata(save_path):
+  """Retrieves information about the objects in a checkpoint.
+
+  Example usage:
+
+  ```python
+  object_graph = tf.contrib.checkpoint.object_metadata(
+      tf.train.latest_checkpoint(checkpoint_directory))
+  ckpt_variable_names = set()
+  for node in object_graph.nodes:
+    for attribute in node.attributes:
+      ckpt_variable_names.add(attribute.full_name)
+  ```
+
+  Args:
+    save_path: The path to the checkpoint, as returned by `save` or
+      `tf.train.latest_checkpoint`.
+  Returns:
+    A parsed `tf.contrib.checkpoint.TrackableObjectGraph` protocol buffer.
+  Raises:
+    ValueError: If an object graph was not found in the checkpoint.
+  """
+  reader = pywrap_tensorflow.NewCheckpointReader(save_path)
+  try:
+    object_graph_string = reader.get_tensor(
+        base.OBJECT_GRAPH_PROTO_KEY)
+  except errors_impl.NotFoundError:
+    raise ValueError(
+        ('The specified checkpoint "%s" does not appear to be object-based (it '
+         'is missing the key "%s"). Likely it was created with a name-based '
+         'saver and does not contain an object dependency graph.') % (
+             save_path, base.OBJECT_GRAPH_PROTO_KEY))
+  object_graph_proto = (
+      trackable_object_graph_pb2.TrackableObjectGraph())
+  object_graph_proto.ParseFromString(object_graph_string)
+  return object_graph_proto
+
+
+def list_objects(root_trackable):
+  """Traverse the object graph and list all accessible objects.
+
+  Looks for `Trackable` objects which are dependencies of
+  `root_trackable`. Includes slot variables only if the variable they are
+  slotting for and the optimizer are dependencies of `root_trackable`
+  (i.e. if they would be saved with a checkpoint).
+
+  Args:
+    root_trackable: A `Trackable` object whose dependencies should be
+      flattened.
+  Returns:
+    A flat list of objects.
+  """
+  return graph_view_lib.ObjectGraphView(root_trackable).list_objects()
+
+
+def gather_initializers(root_trackable):
+  """Traverse the object graph and find initialization ops.
+
+  Looks for `Trackable` objects which are dependencies of
+  `root_trackable` and which have an `initializer` property. Includes
+  initializers for slot variables only if the variable they are slotting for and
+  the optimizer are dependencies of `root_trackable` (i.e. if they would be
+  saved with a checkpoint).
+
+  Args:
+    root_trackable: A `Trackable` object to gather initializers for.
+  Returns:
+    A list of initialization ops.
+  """
+  trackable_objects = list_objects(root_trackable)
+  return [c.initializer for c in trackable_objects
+          if hasattr(c, "initializer") and c.initializer is not None]
+
+
+@tf_contextlib.contextmanager
+def capture_dependencies(template):
+  """Capture variables created within this scope as `Template` dependencies.
+
+  Requires that `template.variable_scope` is active.
+
+  This scope is intended as a compatibility measure, allowing a trackable
+  object to add dependencies on variables created in a block of code which is
+  not aware of object-based saving (and instead uses variable names
+  heavily). This is how `Template` objects add dependencies on variables and
+  sub-`Template`s. Where possible, use `tf.make_template` directly.
+
+  Args:
+    template: The `Template` object to register dependencies with.
+
+  Yields:
+    None (when used as a context manager).
+  """
+  name_prefix = template.variable_scope.name
+
+  def _trackable_custom_creator(next_creator, name, initial_value,
+                                trackable_parent=None, **kwargs):
+    """A variable creation hook which adds Trackable dependencies.
+
+    Set for example during a `Template`'s first wrapped function
+    execution. Ensures that (a) `template` depends on any trackable
+    objects using their own `capture_dependencies` scope inside this scope which
+    create variables, and (b) that any variables not in a more deeply nested
+    scope are added as dependencies directly.
+
+    The `trackable_parent` argument is passed between custom creators but
+    ignored when the variable object itself is created. This argument indicates
+    (if not `None`) that a more deeply nested scope has already added the
+    variable as a dependency, and that parent scopes should add a dependency on
+    that object rather than on the variable directly.
+
+    Args:
+      next_creator: See `variable_scope.variable_creator_scope`; the next
+        creator in the chain.
+      name: The (full, scope-influenced) name of the variable. The `name_prefix`
+        itself is stripped for the purposes of object-based dependency tracking,
+        but scopes opened within this scope are respected.
+      initial_value: See `variable_scope.variable_creator_scope`. Taken
+        explicitly so the argument can be re-named and used with
+        `Trackable._add_variable_with_custom_getter`.
+      trackable_parent: If not None, a more deeply nested trackable
+        object and its name prefix which were passed to `capture_dependencies`
+        to add a dependency on (rather than depending on the variable directly).
+      **kwargs: Passed through to the next creator.
+
+    Returns:
+      The output of `next_creator`: the fetched/created variable object.
+    """
+    def _call_next_creator_renaming_initializer(initializer, **inner_kwargs):
+      inner_kwargs.pop("name")  # Ignored; this is the scope-stripped name which
+      # we don't want to propagate.
+      return next_creator(
+          initial_value=initializer,
+          name=name,
+          **inner_kwargs)
+    if name is not None and name.startswith(name_prefix):
+      scope_stripped_name = name[len(name_prefix) + 1:]
+      if not trackable_parent:
+        return template._add_variable_with_custom_getter(  # pylint: disable=protected-access
+            initializer=initial_value,
+            name=scope_stripped_name,
+            getter=_call_next_creator_renaming_initializer,
+            # Disable error checking for Trackable. Exceptions are instead
+            # raised if necessary when the object-based saver tries to
+            # save/restore the object.
+            overwrite=True,
+            trackable_parent=(template, name_prefix),
+            **kwargs)
+      else:
+        parent_object, parent_name_prefix = trackable_parent
+        template._track_trackable(  # pylint: disable=protected-access
+            parent_object,
+            name=parent_name_prefix[len(name_prefix) + 1:],
+            overwrite=True)
+    return next_creator(
+        name=name, initial_value=initial_value,
+        trackable_parent=(template, name_prefix), **kwargs)
+
+  with variable_scope.variable_creator_scope(_trackable_custom_creator):
+    yield
+
+
+class _LoadStatus(object):
+  """Abstract base for load status callbacks."""
+
+  @abc.abstractmethod
+  def assert_consumed(self):
+    """Raises an exception unless a non-trivial restoration has completed."""
+    pass
+
+  @abc.abstractmethod
+  def assert_existing_objects_matched(self):
+    """Raises an exception unless existing Python objects have been matched."""
+    pass
+
+  @abc.abstractmethod
+  def assert_nontrivial_match(self):
+    """Raises an exception if only the root object matched."""
+    pass
+
+  @abc.abstractmethod
+  def run_restore_ops(self, session=None):
+    """Runs restore ops from the checkpoint. Requires a valid checkpoint."""
+    pass
+
+  @abc.abstractmethod
+  def initialize_or_restore(self, session=None):
+    """Runs restore ops from the checkpoint, or initializes variables."""
+    pass
+
+
+def streaming_restore(status, session=None):
+  """When graph building, runs restore ops as soon as they come in.
+
+  Args:
+    status: A _LoadStatus objects from an object-based saver's
+      restore(). Streaming restore from name-based checkpoints is not currently
+      supported.
+    session: A session to run new restore ops in.
+  """
+  if context.executing_eagerly():
+    # Streaming restore is the default/only behavior when executing eagerly.
+    return
+  if session is None:
+    session = ops.get_default_session()
+  if isinstance(status, NameBasedSaverStatus):
+    raise NotImplementedError(
+        "Streaming restore not supported from name-based checkpoints. File a "
+        "feature request if this limitation bothers you.")
+  status.run_restore_ops(session=session)
+  # pylint: disable=protected-access
+  status._checkpoint.new_restore_ops_callback = (
+      lambda ops: session.run(ops, feed_dict=status._feed_dict))
+  # pylint: enable=protected-access
+
+
+class CheckpointLoadStatus(_LoadStatus):
+  """Checks the status of checkpoint loading and manages restore ops.
+
+  Returned from `Saver.restore`. Since `restore` may defer the loading of values
+  in the checkpoint which don't yet have corresponding Python objects,
+  `CheckpointLoadStatus` provides a callback to verify that checkpoint loading
+  is complete (`assert_consumed`).
+
+  When graph building, `restore` does not run restore ops itself since their
+  creation may be deferred. The `run_restore_ops` method must be called once all
+  Python objects with values to restore have been created and added to the
+  dependency graph (this does not necessarily have to be the whole checkpoint;
+  calling `run_restore_ops` while `assert_consumed` fails is supported and will
+  partially restore the checkpoint).
+
+  See `Saver.restore` for usage examples.
+  """
+
+  def __init__(self, checkpoint, feed_dict, graph_view):
+    self._checkpoint = checkpoint
+    self._feed_dict = feed_dict
+    self._graph_view = graph_view
+
+  def assert_consumed(self):
+    """Asserts that all objects in the checkpoint have been created/matched.
+
+    Returns:
+      `self` for chaining.
+    Raises:
+      AssertionError: If there are any Python objects in the dependency graph
+        which have not been restored from this checkpoint or a later `restore`,
+        or if there are any checkpointed values which have not been matched to
+        Python objects.
+    """
+    self.assert_existing_objects_matched()
+    for node_id, node in enumerate(self._checkpoint.object_graph_proto.nodes):
+      trackable = self._checkpoint.object_by_proto_id.get(node_id, None)
+      if trackable is None:
+        raise AssertionError("Unresolved object in checkpoint: %s" % (node,))
+    if self._checkpoint.slot_restorations:
+      # Sanity check; this collection should be clear if everything has been
+      # restored.
+      raise AssertionError("Unresolved slot restorations: %s" % (
+          self._checkpoint.slot_restorations,))
+    if self._checkpoint.unused_attributes:
+      raise AssertionError(
+          ("Unused attributes in these objects (the attributes exist in the "
+           "checkpoint but not in the objects): %s") % (
+               list(self._checkpoint.unused_attributes.items()),))
+    return self
+
+  def assert_existing_objects_matched(self):
+    """Asserts that trackable Python objects have been matched.
+
+    Note that this is a weaker assertion than `assert_consumed`. It will only
+    fail for existing Python objects which are (transitive) dependencies of the
+    root object and which do not have an entry in the checkpoint.
+
+    It will not fail, for example, if a `tf.keras.Layer` object has not yet been
+    built and so has not created any `tf.Variable` objects.
+
+    Returns:
+      `self` for chaining.
+
+    Raises:
+      AssertionError: If a Python object exists in the transitive dependencies
+        of the root object but does not have a value in the checkpoint.
+    """
+    for node_id, node in enumerate(self._checkpoint.object_graph_proto.nodes):
+      trackable = self._checkpoint.object_by_proto_id.get(node_id, None)
+      if (trackable is not None
+          and trackable._update_uid < self._checkpoint.restore_uid):  # pylint: disable=protected-access
+        raise AssertionError(
+            "Object not assigned a value from checkpoint: %s" % (node,))
+    for trackable_object in self._graph_view.list_objects():
+      # Remove data structures that do not contain any variables from
+      # restoration checks.
+      if (isinstance(trackable_object,
+                     data_structures.TrackableDataStructure) and
+          not trackable_object._checkpoint_dependencies):
+        continue
+      self._checkpoint.all_python_objects.add(trackable_object)
+    unused_python_objects = (
+        object_identity.ObjectIdentitySet(self._checkpoint.all_python_objects)
+        - object_identity.ObjectIdentitySet(
+            self._checkpoint.object_by_proto_id.values()))
+    if unused_python_objects:
+      raise AssertionError(
+          ("Some Python objects were not bound to checkpointed values, likely "
+           "due to changes in the Python program: %s")
+          % (list(unused_python_objects),))
+    return self
+
+  def assert_nontrivial_match(self):
+    """Raises an exception if only the root object matched."""
+    for trackable_object in self._graph_view.list_objects():
+      self._checkpoint.all_python_objects.add(trackable_object)
+    if len(self._checkpoint.object_by_proto_id) <= 1:
+      unused_python_objects = (
+          object_identity.ObjectIdentitySet(
+              self._checkpoint.all_python_objects)
+          - object_identity.ObjectIdentitySet(
+              self._checkpoint.object_by_proto_id.values()))
+      if unused_python_objects:
+        raise AssertionError(
+            ("Nothing except the root object matched a checkpointed value. "
+             "Typically this means that the checkpoint does not match the "
+             "Python program. The following objects have no matching "
+             "checkpointed value: %s") % (list(unused_python_objects),))
+      else:
+        raise AssertionError(
+            "Nothing to load. No dependencies have been added to %s yet." % (
+                self._graph_view.root,))
+    return self
+
+  def run_restore_ops(self, session=None):
+    """Run operations to restore objects in the dependency graph."""
+    if context.executing_eagerly():
+      return  # Run eagerly
+    if session is None:
+      session = ops.get_default_session()
+    session.run(self._checkpoint.restore_ops, feed_dict=self._feed_dict)
+
+  def initialize_or_restore(self, session=None):
+    """Run operations to initialize or restore objects in the dependency graph.
+
+    Any objects in the dependency graph which have initializers but are not in
+    the checkpoint will have those initializers run, unless those variables are
+    being restored by a later call to `tf.train.Checkpoint.restore()`.
+
+    This method has a sibling in `InitializationOnlyStatus` which instead
+    initializes variables. That type is returned if no checkpoint is specified
+    in `Saver.restore`.
+
+    Args:
+      session: The session to run init/restore ops in. If `None`, uses the
+        default session.
+    """
+    if context.executing_eagerly():
+      return  # Initialization and restoration ops are run eagerly
+    if session is None:
+      session = ops.get_default_session()
+    all_objects = self._graph_view.list_objects()
+    already_initialized_objects = object_identity.ObjectIdentitySet(
+        self._checkpoint.object_by_proto_id.values())
+    initializers_for_non_restored_variables = [
+        c.initializer for c in all_objects
+        if hasattr(c, "initializer")
+        and c not in already_initialized_objects
+        and (getattr(c, "_update_uid", self._checkpoint.restore_uid - 1)
+             < self._checkpoint.restore_uid)]
+    self.run_restore_ops(session=session)
+    session.run(initializers_for_non_restored_variables)
+
+
+class InitializationOnlyStatus(_LoadStatus):
+  """Returned from `Saver.restore` when no checkpoint has been specified.
+
+  Objects of this type have the same `assert_consumed` method as
+  `CheckpointLoadStatus`, but it always fails. However,
+  `initialize_or_restore` works on objects of both types, and will
+  initialize variables in `InitializationOnlyStatus` objects or restore them
+  otherwise.
+  """
+
+  def __init__(self, graph_view, restore_uid):
+    self._restore_uid = restore_uid
+    self._graph_view = graph_view
+
+  def assert_consumed(self):
+    """Assertion for consistency with `CheckpointLoadStatus`. Always fails."""
+    raise AssertionError(
+        "No checkpoint specified (save_path=None); nothing is being restored.")
+
+  def assert_existing_objects_matched(self):
+    """Assertion for consistency with `CheckpointLoadStatus`. Always fails."""
+    raise AssertionError(
+        "No checkpoint specified (save_path=None); nothing is being restored.")
+
+  def assert_nontrivial_match(self):
+    """Assertion for consistency with `CheckpointLoadStatus`. Always fails."""
+    raise AssertionError(
+        "No checkpoint specified (save_path=None); nothing is being restored.")
+
+  def run_restore_ops(self, session=None):
+    """For consistency with `CheckpointLoadStatus`.
+
+    Use `initialize_or_restore` for initializing if no checkpoint was passed
+    to `Saver.restore` and restoring otherwise.
+
+    Args:
+      session: Not used.
+    """
+    raise AssertionError(
+        "No checkpoint specified, so no restore ops are available "
+        "(save_path=None to Saver.restore).")
+
+  def initialize_or_restore(self, session=None):
+    """Runs initialization ops for variables.
+
+    Objects which would be saved by `Saver.save` will be initialized, unless
+    those variables are being restored by a later call to
+    `tf.train.Checkpoint.restore()`.
+
+    This method does nothing when executing eagerly (initializers get run
+    eagerly).
+
+    Args:
+      session: The session to run initialization ops in. If `None`, uses the
+        default session.
+    """
+    if context.executing_eagerly():
+      return  # run eagerly
+    if session is None:
+      session = ops.get_default_session()
+    trackable_objects = self._graph_view.list_objects()
+    initializers = [
+        c.initializer for c in trackable_objects
+        if hasattr(c, "initializer") and c.initializer is not None
+        and (getattr(c, "_update_uid", self._restore_uid - 1)
+             < self._restore_uid)]
+    session.run(initializers)
+
+
+_DEPRECATED_RESTORE_INSTRUCTIONS = (
+    "Restoring a name-based tf.train.Saver checkpoint using the object-based "
+    "restore API. This mode uses global names to match variables, and so is "
+    "somewhat fragile. It also adds new restore ops to the graph each time it "
+    "is called when graph building. Prefer re-encoding training checkpoints in "
+    "the object-based format: run save() on the object-based saver (the same "
+    "one this message is coming from) and use that checkpoint in the future.")
+
+
+class NameBasedSaverStatus(_LoadStatus):
+  """Status for loading a name-based training checkpoint."""
+
+  # Ideally this deprecation decorator would be on the class, but that
+  # interferes with isinstance checks.
+  @deprecation.deprecated(
+      date=None, instructions=_DEPRECATED_RESTORE_INSTRUCTIONS)
+  def __init__(self, checkpoint, graph_view):
+    self._checkpoint = checkpoint
+    self._graph_view = graph_view
+
+  def assert_consumed(self):
+    """Raises an exception if any variables/objects are unmatched."""
+    unused_attributes = dict(self._checkpoint.unused_attributes)
+    if unused_attributes:
+      raise AssertionError(
+          "Some objects had attributes which were not restored: %s"
+          % (unused_attributes,))
+    for trackable in self._graph_view.list_objects():
+      # pylint: disable=protected-access
+      trackable._maybe_initialize_trackable()
+      if trackable._update_uid < self._checkpoint.restore_uid:
+        raise AssertionError("Object not restored: %s" % (trackable,))
+      # pylint: enable=protected-access
+    return self
+
+  def assert_existing_objects_matched(self):
+    """Raises an exception if currently created objects are unmatched."""
+    # For name-based checkpoints there's no object information in the
+    # checkpoint, so there's no distinction between
+    # assert_existing_objects_matched and assert_consumed (and both are less
+    # useful since we don't touch Python objects or Python state).
+    return self.assert_consumed()
+
+  def assert_nontrivial_match(self):
+    """Raises an exception if currently created objects are unmatched."""
+    # For name-based checkpoints there's no object information in the
+    # checkpoint, so there's no distinction between
+    # assert_nontrivial_match and assert_consumed (and both are less
+    # useful since we don't touch Python objects or Python state).
+    return self.assert_consumed()
+
+  def _gather_saveable_objects(self):
+    """Walk the object graph, using global names for SaveableObjects."""
+    objects = self._graph_view.list_objects()
+    saveable_objects = []
+    for trackable in objects:
+      # pylint: disable=protected-access
+      trackable._maybe_initialize_trackable()
+      if trackable._update_uid < self._checkpoint.restore_uid:
+        trackable._update_uid = self._checkpoint.restore_uid
+      else:
+        continue
+      # pylint: enable=protected-access
+      saveable_objects.extend(
+          self._checkpoint.globally_named_object_attributes(
+              trackable))
+    return saveable_objects
+
+  def run_restore_ops(self, session=None):
+    """Load the name-based training checkpoint using a new `tf.train.Saver`."""
+    if context.executing_eagerly():
+      return  # Nothing to do, variables are restored on creation.
+    if session is None:
+      session = ops.get_default_session()
+    with ops.device("/cpu:0"):
+      saveables = self._gather_saveable_objects()
+      v1_saver_lib.Saver(saveables).restore(
+          sess=session, save_path=self._checkpoint.save_path)
+
+  def initialize_or_restore(self, session=None):
+    """Alias for `run_restore_ops`."""
+    self.run_restore_ops(session=session)
+
+
+class _SessionWithFeedDictAdditions(session_lib.SessionInterface):
+  """Pretends to be a session, inserts extra feeds on run()."""
+
+  def __init__(self, session, feed_additions):
+    self._wrapped_session = session
+    self._feed_additions = feed_additions
+
+  def run(self, fetches, feed_dict=None, **kwargs):
+    if feed_dict is None:
+      feed_dict = {}
+    else:
+      feed_dict = feed_dict.copy()
+    feed_dict.update(self._feed_additions)
+    return self._wrapped_session.run(
+        fetches=fetches, feed_dict=feed_dict, **kwargs)
+
+
+class TrackableSaver(object):
+  """Saves and restores a `Trackable` object and its dependencies.
+
+  See `Trackable` for details of dependency management. `Saver` wraps
+  `tf.train.Saver` for saving, including extra information about the graph of
+  dependencies between Python objects. When restoring, it uses this information
+  about the save-time dependency graph to more robustly match objects with their
+  checkpointed values. When executing eagerly, it supports restoring variables
+  on object creation (see `Saver.restore`).
+
+  Values in a checkpoint are mapped to `Trackable` Python objects
+  (`Variable`s, `Optimizer`s, `Layer`s) based on the names provided when the
+  checkpoint was written. To avoid breaking existing checkpoints when modifying
+  a class, dependency names (the names of attributes to which `Trackable`
+  objects are assigned) may not change. These names are local to objects, in
+  contrast to the `Variable.name`-based save/restore from `tf.train.Saver`, and
+  so allow additional program transformations.
+  """
+
+  def __init__(self, graph_view):
+    """Configure saving.
+
+    Args:
+      graph_view: A `GraphView` object containing a description of the object
+        graph to save.
+    """
+    # The file prefix placeholder is created lazily when graph building (and not
+    # at all when executing eagerly) to avoid creating ops in the constructor
+    # (when they may never be necessary).
+    self._file_prefix_placeholder = None
+
+    # Op caching for save
+    self._object_graph_feed_tensor = None
+    self._last_save_object_graph = None
+    self._file_prefix_feed_tensor = None
+    self._cached_save_operation = None
+
+    # Op caching for restore, shared between _CheckpointRestoreCoordinators
+    self._restore_op_cache = {}
+    self._graph_view = graph_view
+
+  def _gather_saveables(
+      self, object_graph_tensor=None):
+    """Wraps _serialize_object_graph to include the object graph proto."""
+    (named_saveable_objects, graph_proto,
+     feed_additions) = self._graph_view.serialize_object_graph()
+    if object_graph_tensor is None:
+      with ops.device("/cpu:0"):
+        object_graph_tensor = constant_op.constant(
+            graph_proto.SerializeToString(), dtype=dtypes.string)
+    else:
+      feed_additions.update(
+          {object_graph_tensor: graph_proto.SerializeToString()})
+    assert base.OBJECT_GRAPH_PROTO_KEY not in named_saveable_objects
+    named_saveable_objects.append(
+        base.NoRestoreSaveable(
+            tensor=object_graph_tensor,
+            name=base.OBJECT_GRAPH_PROTO_KEY))
+    return named_saveable_objects, graph_proto, feed_additions
+
+  def _save_cached_when_graph_building(
+      self,
+      file_prefix,
+      object_graph_tensor=None):
+    """Create or retrieve save ops.
+
+    Args:
+      file_prefix: The prefix for saved checkpoint files.
+      object_graph_tensor: A `Tensor` to which the current object graph will be
+        fed.
+
+    Returns:
+      A two-element tuple with a filename tensor and a feed_dict of tensors to
+      feed when running it (if graph building). The feed dict contains the
+      current object graph and any Python state to be saved in the
+      checkpoint. When executing eagerly only the first argument is meaningful.
+    """
+    (named_saveable_objects, graph_proto,
+     feed_additions) = self._gather_saveables(
+         object_graph_tensor=object_graph_tensor)
+    if (self._last_save_object_graph != graph_proto
+        # When executing eagerly, we need to re-create SaveableObjects each time
+        # save() is called so they pick up new Tensors passed to their
+        # constructors. That means the Saver needs to be copied with a new
+        # var_list.
+        or context.executing_eagerly()
+        or ops.inside_function()):
+      saver = functional_saver.Saver(named_saveable_objects)
+      with ops.device("/cpu:0"):
+        self._cached_save_operation = saver.save(file_prefix)
+      self._last_save_object_graph = graph_proto
+    return self._cached_save_operation, feed_additions
+
+  def save(self, file_prefix, checkpoint_number=None, session=None):
+    """Save a training checkpoint.
+
+    The saved checkpoint includes variables created by this object and any
+    Trackable objects it depends on at the time `Saver.save()` is called.
+
+    Args:
+      file_prefix: A prefix to use for the checkpoint filenames
+        (/path/to/directory/and_a_prefix). Names are generated based on this
+        prefix and `checkpoint_number`, if provided.
+      checkpoint_number: An integer variable or Tensor, used to number
+        checkpoints. Typically this value is saved along with other variables in
+        training checkpoints, which will happen automatically if it was created
+        by `root_trackable` or one of its dependencies (via
+        `Trackable._add_variable`).
+      session: The session to evaluate variables in. Ignored when executing
+        eagerly. If not provided when graph building, the default session is
+        used.
+
+    Returns:
+      The full path to the checkpoint.
+    """
+    feed_dict = {}
+    use_session = (not context.executing_eagerly()
+                   and not ops.inside_function())
+    if checkpoint_number:
+      file_prefix = "%s-%d" % (file_prefix, checkpoint_number)
+    if use_session:
+      if self._object_graph_feed_tensor is None:
+        with ops.device("/cpu:0"):
+          self._object_graph_feed_tensor = constant_op.constant(
+              "", dtype=dtypes.string)
+          self._file_prefix_feed_tensor = constant_op.constant(
+              "", dtype=dtypes.string)
+      object_graph_tensor = self._object_graph_feed_tensor
+      file_prefix_tensor = self._file_prefix_feed_tensor
+      feed_dict[file_prefix_tensor] = file_prefix
+    else:
+      with ops.device("/cpu:0"):
+        file_prefix_tensor = constant_op.constant(
+            file_prefix, dtype=dtypes.string)
+      object_graph_tensor = None
+
+    file_io.recursive_create_dir(os.path.dirname(file_prefix))
+    save_path, new_feed_additions = self._save_cached_when_graph_building(
+        file_prefix=file_prefix_tensor,
+        object_graph_tensor=object_graph_tensor)
+    if new_feed_additions:
+      feed_dict.update(new_feed_additions)
+    if not use_session:
+      session = None
+    elif session is None:
+      session = ops.get_default_session()
+
+    if session:
+      return session.run(save_path, feed_dict=feed_dict)
+    else:
+      return save_path
+
+  def restore(self, save_path):
+    """Restore a training checkpoint.
+
+    Restores `root_trackable` and any objects that it tracks
+    (transitive). Either assigns values immediately if variables to restore have
+    been created already, or defers restoration until the variables are
+    created. Dependencies added to the `root_trackable` passed to the
+    constructor after this call will be matched if they have a corresponding
+    object in the checkpoint.
+
+    When building a graph, restorations are added to the graph but not run.
+
+    To disallow deferred loading, assert immediately that all checkpointed
+    variables have been matched to variable objects:
+
+    ```python
+    saver = Saver(root)
+    saver.restore(path).assert_consumed()
+    ```
+
+    An exception will be raised unless every object was matched and its
+    variables already exist.
+
+    When graph building, `assert_consumed()` indicates that all of the restore
+    ops which will be created for this checkpoint have been created. They can be
+    run via the `run_restore_ops()` function of the status object:
+
+    ```python
+    saver.restore(path).assert_consumed().run_restore_ops()
+    ```
+
+    If the checkpoint has not been consumed completely, then the list of restore
+    ops will grow as more objects are added to the dependency graph.
+
+    Name-based `tf.train.Saver` checkpoints can be loaded using this
+    method. There is no deferred loading, and names are used to match
+    variables. No restore ops are created/run until `run_restore_ops()` or
+    `initialize_or_restore()` are called on the returned status object, even
+    when executing eagerly. Re-encode name-based checkpoints using this
+    object-based `Saver.save` as soon as possible.
+
+    Args:
+      save_path: The path to the checkpoint, as returned by `save` or
+        `tf.train.latest_checkpoint`. If None (as when there is no latest
+        checkpoint for `tf.train.latest_checkpoint` to return), returns an
+        object which may run initializers for objects in the dependency
+        graph. If the checkpoint was written by the name-based `tf.train.Saver`,
+        names are used to match variables.
+
+    Returns:
+      A load status object, which can be used to make assertions about the
+      status of checkpoint restoration and run initialization/restore ops
+      (of type `CheckpointLoadStatus`, or `InitializationOnlyStatus` if
+      `save_path` is `None`).
+
+      If `save_path` points to a name-based checkpoint, a `NameBasedSaverStatus`
+      object is returned which runs restore ops from a name-based saver.
+    """
+    if save_path is None:
+      return InitializationOnlyStatus(self._graph_view, ops.uid())
+    reader = pywrap_tensorflow.NewCheckpointReader(save_path)
+    graph_building = not context.executing_eagerly()
+    if graph_building:
+      dtype_map = None
+    else:
+      dtype_map = reader.get_variable_to_dtype_map()
+    try:
+      object_graph_string = reader.get_tensor(
+          base.OBJECT_GRAPH_PROTO_KEY)
+    except errors_impl.NotFoundError:
+      # The object graph proto does not exist in this checkpoint. Try the
+      # name-based compatibility mode.
+      restore_coordinator = _NameBasedRestoreCoordinator(
+          save_path=save_path, dtype_map=dtype_map)
+      if not graph_building:
+        for existing_trackable in self._graph_view.list_objects():
+          # pylint: disable=protected-access
+          existing_trackable._maybe_initialize_trackable()
+          existing_trackable._name_based_restores.add(restore_coordinator)
+          existing_trackable._name_based_attribute_restore(
+              restore_coordinator)
+          # pylint: enable=protected-access
+      return NameBasedSaverStatus(
+          restore_coordinator, graph_view=self._graph_view)
+
+    if graph_building:
+      if self._file_prefix_placeholder is None:
+        with ops.device("/cpu:0"):
+          self._file_prefix_placeholder = constant_op.constant("model")
+      file_prefix_tensor = self._file_prefix_placeholder
+      file_prefix_feed_dict = {self._file_prefix_placeholder: save_path}
+    else:
+      with ops.device("/cpu:0"):
+        file_prefix_tensor = constant_op.constant(save_path)
+      file_prefix_feed_dict = None
+    object_graph_proto = (
+        trackable_object_graph_pb2.TrackableObjectGraph())
+    object_graph_proto.ParseFromString(object_graph_string)
+    checkpoint = _CheckpointRestoreCoordinator(
+        object_graph_proto=object_graph_proto,
+        save_path=save_path,
+        save_path_tensor=file_prefix_tensor,
+        restore_op_cache=self._restore_op_cache,
+        graph_view=self._graph_view)
+    base.CheckpointPosition(checkpoint=checkpoint, proto_id=0).restore(
+        self._graph_view.root)
+    load_status = CheckpointLoadStatus(
+        checkpoint,
+        graph_view=self._graph_view,
+        feed_dict=file_prefix_feed_dict)
+    return load_status
+
+
+def frozen_saver(root_trackable):
+  """Creates a static `tf.train.Saver` from a trackable object.
+
+  The returned `Saver` saves object-based checkpoints, but these checkpoints
+  will no longer reflect structural changes to the object graph, only changes to
+  the values of `Variable`s added as dependencies of the root object before
+  `freeze` was called.
+
+  `restore` works on the returned `Saver`, but requires that the object graph of
+  the checkpoint being loaded exactly matches the object graph when `freeze` was
+  called. This is in contrast the object-based restore performed by
+  `tf.train.Checkpoint` which attempts a fuzzy matching between a checkpoint's
+  object graph and the current Python object graph.
+
+  Args:
+    root_trackable: A trackable object to save.
+
+  Returns:
+    A saver which saves object-based checkpoints for the object graph frozen at
+    the time `frozen_saver` was called.
+  """
+  named_saveable_objects = graph_view_lib.ObjectGraphView(
+      root_trackable).frozen_saveable_objects()
+  return functional_saver.Saver(named_saveable_objects)
+
+
+def saver_with_op_caching(obj):
+  """A TrackableSaver with a SaveableObject cache when graph building."""
+  if context.executing_eagerly():
+    saveables_cache = None
+  else:
+    saveables_cache = object_identity.ObjectIdentityWeakKeyDictionary()
+  return TrackableSaver(graph_view_lib.ObjectGraphView(
+      weakref.ref(obj),
+      saveables_cache=saveables_cache))
+
+
+@tf_export("train.Checkpoint")
+class Checkpoint(tracking.AutoTrackable):
+  """Groups trackable objects, saving and restoring them.
+
+  `Checkpoint`'s constructor accepts keyword arguments whose values are types
+  that contain trackable state, such as `tf.train.Optimizer`
+  implementations, `tf.Variable`, `tf.keras.Layer` implementations, or
+  `tf.keras.Model` implementations. It saves these values with a checkpoint, and
+  maintains a `save_counter` for numbering checkpoints.
+
+  Example usage when graph building:
+
+  ```python
+  import tensorflow as tf
+  import os
+
+  checkpoint_directory = "/tmp/training_checkpoints"
+  checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+
+  checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
+  status = checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory))
+  train_op = optimizer.minimize( ... )
+  status.assert_consumed()  # Optional sanity checks.
+  with tf.Session() as session:
+    # Use the Session to restore variables, or initialize them if
+    # tf.train.latest_checkpoint returned None.
+    status.initialize_or_restore(session)
+    for _ in range(num_training_steps):
+      session.run(train_op)
+    checkpoint.save(file_prefix=checkpoint_prefix)
+  ```
+
+  Example usage with eager execution enabled:
+
+  ```python
+  import tensorflow as tf
+  import os
+
+  tf.enable_eager_execution()
+
+  checkpoint_directory = "/tmp/training_checkpoints"
+  checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+
+  checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
+  status = checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory))
+  for _ in range(num_training_steps):
+    optimizer.minimize( ... )  # Variables will be restored on creation.
+  status.assert_consumed()  # Optional sanity checks.
+  checkpoint.save(file_prefix=checkpoint_prefix)
+  ```
+
+  `Checkpoint.save` and `Checkpoint.restore` write and read object-based
+  checkpoints, in contrast to `tf.train.Saver` which writes and reads
+  `variable.name` based checkpoints. Object-based checkpointing saves a graph of
+  dependencies between Python objects (`Layer`s, `Optimizer`s, `Variable`s,
+  etc.) with named edges, and this graph is used to match variables when
+  restoring a checkpoint. It can be more robust to changes in the Python
+  program, and helps to support restore-on-create for variables when executing
+  eagerly. Prefer `tf.train.Checkpoint` over `tf.train.Saver` for new code.
+
+  `Checkpoint` objects have dependencies on the objects passed as keyword
+  arguments to their constructors, and each dependency is given a name that is
+  identical to the name of the keyword argument for which it was created.
+  TensorFlow classes like `Layer`s and `Optimizer`s will automatically add
+  dependencies on their variables (e.g. "kernel" and "bias" for
+  `tf.keras.layers.Dense`). Inheriting from `tf.keras.Model` makes managing
+  dependencies easy in user-defined classes, since `Model` hooks into attribute
+  assignment. For example:
+
+  ```python
+  class Regress(tf.keras.Model):
+
+    def __init__(self):
+      super(Regress, self).__init__()
+      self.input_transform = tf.keras.layers.Dense(10)
+      # ...
+
+    def call(self, inputs):
+      x = self.input_transform(inputs)
+      # ...
+  ```
+
+  This `Model` has a dependency named "input_transform" on its `Dense` layer,
+  which in turn depends on its variables. As a result, saving an instance of
+  `Regress` using `tf.train.Checkpoint` will also save all the variables created
+  by the `Dense` layer.
+
+  Attributes:
+    save_counter: Incremented when `save()` is called. Used to number
+      checkpoints.
+  """
+
+  def __init__(self, **kwargs):
+    """Group objects into a training checkpoint.
+
+    Args:
+      **kwargs: Keyword arguments are set as attributes of this object, and are
+        saved with the checkpoint. Values must be trackable objects.
+    Raises:
+      ValueError: If objects in `kwargs` are not trackable.
+    """
+    super(Checkpoint, self).__init__()
+    for k, v in sorted(kwargs.items(), key=lambda item: item[0]):
+      if not isinstance(v, (base.Trackable, def_function.Function)):
+        raise ValueError(
+            ("`Checkpoint` was expecting a trackable object (an object "
+             "derived from `TrackableBase`), got %s. If you believe this "
+             "object should be trackable (i.e. it is part of the "
+             "TensorFlow Python API and manages state), please open an issue.")
+            % (v,))
+      setattr(self, k, v)
+    self._save_counter = None  # Created lazily for restore-on-create.
+    self._save_assign_op = None
+    self._saver = saver_with_op_caching(self)
+
+  def _maybe_create_save_counter(self):
+    """Create a save counter if it does not yet exist."""
+    if self._save_counter is None:
+      # Initialized to 0 and incremented before saving.
+      with ops.device("/cpu:0"):
+        # add_variable creates a dependency named "save_counter"; NoDependency
+        # prevents creating a second dependency named "_save_counter".
+        self._save_counter = data_structures.NoDependency(
+            add_variable(self, name="save_counter", initializer=0,
+                         dtype=dtypes.int64))
+
+  def write(self, file_prefix, session=None):
+    """Writes a training checkpoint.
+
+    The checkpoint includes variables created by this object and any
+    trackable objects it depends on at the time `Checkpoint.write()` is
+    called.
+
+    `write` does not number checkpoints, increment `save_counter`, or update the
+    metadata used by `tf.train.latest_checkpoint`. It is primarily intended for
+    use by higher level checkpoint management utilities. `save` provides a very
+    basic implementation of these features.
+
+    Args:
+      file_prefix: A prefix to use for the checkpoint filenames
+        (/path/to/directory/and_a_prefix).
+      session: The session to evaluate variables in. Ignored when executing
+        eagerly. If not provided when graph building, the default session is
+        used.
+
+    Returns:
+      The full path to the checkpoint (i.e. `file_prefix`).
+    """
+    output = self._saver.save(
+        file_prefix=file_prefix,
+        session=session)
+    if tensor_util.is_tensor(output):
+      if context.executing_eagerly():
+        return compat.as_str(output.numpy())
+      else:
+        # Function building
+        return output
+    else:
+      # Graph + Session, so we already session.ran it.
+      return compat.as_str(output)
+
+  @property
+  def save_counter(self):
+    """An integer variable which starts at zero and is incremented on save.
+
+    Used to number checkpoints.
+
+    Returns:
+      The save counter variable.
+    """
+    self._maybe_create_save_counter()
+    return self._save_counter
+
+  def save(self, file_prefix, session=None):
+    """Saves a training checkpoint and provides basic checkpoint management.
+
+    The saved checkpoint includes variables created by this object and any
+    trackable objects it depends on at the time `Checkpoint.save()` is
+    called.
+
+    `save` is a basic convenience wrapper around the `write` method,
+    sequentially numbering checkpoints using `save_counter` and updating the
+    metadata used by `tf.train.latest_checkpoint`. More advanced checkpoint
+    management, for example garbage collection and custom numbering, may be
+    provided by other utilities which also wrap `write`
+    (`tf.contrib.checkpoint.CheckpointManager` for example).
+
+    Args:
+      file_prefix: A prefix to use for the checkpoint filenames
+        (/path/to/directory/and_a_prefix). Names are generated based on this
+        prefix and `Checkpoint.save_counter`.
+      session: The session to evaluate variables in. Ignored when executing
+        eagerly. If not provided when graph building, the default session is
+        used.
+
+    Returns:
+      The full path to the checkpoint.
+    """
+    graph_building = not context.executing_eagerly()
+    if graph_building:
+      if ops.inside_function():
+        raise NotImplementedError(
+            "Calling tf.train.Checkpoint.save() from a function is not "
+            "supported, as save() modifies saving metadata in ways not "
+            "supported by TensorFlow Operations. Consider using "
+            "tf.train.Checkpoint.write(), a lower-level API which does not "
+            "update metadata. tf.train.latest_checkpoint and related APIs will "
+            "not see this checkpoint.")
+      if session is None:
+        session = ops.get_default_session()
+      if self._save_counter is None:
+        # When graph building, if this is a new save counter variable then it
+        # needs to be initialized before assign_add. This is only an issue if
+        # restore() has not been called first.
+        session.run(self.save_counter.initializer)
+    if not graph_building or self._save_assign_op is None:
+      with ops.colocate_with(self.save_counter):
+        assign_op = self.save_counter.assign_add(1, read_value=True)
+      if graph_building:
+        self._save_assign_op = data_structures.NoDependency(assign_op)
+    if graph_building:
+      checkpoint_number = session.run(self._save_assign_op)
+    else:
+      checkpoint_number = assign_op.numpy()
+    file_path = self.write("%s-%d" % (file_prefix, checkpoint_number),
+                           session=session)
+    checkpoint_management.update_checkpoint_state_internal(
+        save_dir=os.path.dirname(file_prefix),
+        model_checkpoint_path=file_path,
+        all_model_checkpoint_paths=[file_path],
+        save_relative_paths=True)
+    return file_path
+
+  def restore(self, save_path):
+    """Restore a training checkpoint.
+
+    Restores this `Checkpoint` and any objects it depends on.
+
+    When executing eagerly, either assigns values immediately if variables to
+    restore have been created already, or defers restoration until the variables
+    are created. Dependencies added after this call will be matched if they have
+    a corresponding object in the checkpoint (the restore request will queue in
+    any trackable object waiting for the expected dependency to be added).
+
+    When graph building, restoration ops are added to the graph but not run
+    immediately.
+
+    To ensure that loading is complete and no more assignments will take place,
+    use the `assert_consumed()` method of the status object returned by
+    `restore`:
+
+    ```python
+    checkpoint = tf.train.Checkpoint( ... )
+    checkpoint.restore(path).assert_consumed()
+    ```
+
+    An exception will be raised if any Python objects in the dependency graph
+    were not found in the checkpoint, or if any checkpointed values do not have
+    a matching Python object.
+
+    When graph building, `assert_consumed()` indicates that all of the restore
+    ops that will be created for this checkpoint have been created. They can be
+    run via the `run_restore_ops()` method of the status object:
+
+    ```python
+    checkpoint.restore(path).assert_consumed().run_restore_ops()
+    ```
+
+    If the checkpoint has not been consumed completely, then the list of restore
+    ops will grow as more objects are added to the dependency graph.
+
+    Name-based `tf.train.Saver` checkpoints can be loaded using this
+    method. Names are used to match variables. No restore ops are created/run
+    until `run_restore_ops()` or `initialize_or_restore()` are called on the
+    returned status object when graph building, but there is restore-on-creation
+    when executing eagerly. Re-encode name-based checkpoints using
+    `tf.train.Checkpoint.save` as soon as possible.
+
+    Args:
+      save_path: The path to the checkpoint, as returned by `save` or
+        `tf.train.latest_checkpoint`. If None (as when there is no latest
+        checkpoint for `tf.train.latest_checkpoint` to return), returns an
+        object which may run initializers for objects in the dependency
+        graph. If the checkpoint was written by the name-based `tf.train.Saver`,
+        names are used to match variables.
+
+    Returns:
+      A load status object, which can be used to make assertions about the
+      status of a checkpoint restoration and run initialization/restore ops.
+
+      The returned status object has the following methods:
+
+      * `assert_consumed()`:
+          Raises an exception if any variables/objects are unmatched: either
+          checkpointed values which don't have a matching Python object or
+          Python objects in the dependency graph with no values in the
+          checkpoint. This method returns the status object, and so may be
+          chained with `initialize_or_restore` or `run_restore_ops`.
+
+      * `assert_existing_objects_matched()`:
+          Raises an exception if any existing Python objects in the dependency
+          graph are unmatched. Unlike `assert_consumed`, this assertion will
+          pass if values in the checkpoint have no corresponding Python
+          objects. For example a `tf.keras.Layer` object which has not yet been
+          built, and so has not created any variables, will pass this assertion
+          but fail `assert_consumed`. Useful when loading part of a larger
+          checkpoint into a new Python program, e.g. a training checkpoint with
+          a `tf.train.Optimizer` was saved but only the state required for
+          inference is being loaded. This method returns the status object, and
+          so may be chained with `initialize_or_restore` or `run_restore_ops`.
+
+      * `assert_nontrivial_match()`: Asserts that something aside from the root
+          object was matched. This is a very weak assertion, but is useful for
+          sanity checking in library code where objects may exist in the
+          checkpoint which haven't been created in Python and some Python
+          objects may not have a checkpointed value.
+
+      * `initialize_or_restore(session=None)`:
+          When graph building, runs variable initializers if `save_path` is
+          `None`, but otherwise runs restore operations. If no `session` is
+          explicitly specified, the default session is used. No effect when
+          executing eagerly (variables are initialized or restored eagerly).
+
+      * `run_restore_ops(session=None)`:
+          When graph building, runs restore operations. If no `session` is
+          explicitly specified, the default session is used. No effect when
+          executing eagerly (restore operations are run eagerly). May only be
+          called when `save_path` is not `None`.
+    """
+    status = self._saver.restore(save_path=save_path)
+    # Create the save counter now so it gets initialized with other variables
+    # when graph building. Creating it earlier would lead to double
+    # initialization when executing eagerly.
+    self._maybe_create_save_counter()
+    return status

diff --git a/tensorflow/python/training/tracking/util_test.py b/tensorflow/python/training/tracking/util_test.py
new file mode 100644
index 0000000..e287891
--- /dev/null
+++ b/tensorflow/python/training/tracking/util_test.py

@@ -0,0 +1,1582 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import json
+import os
+
+from absl.testing import parameterized
+import six
+
+from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine import sequential
+from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.layers import core
+from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import template
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.training import training_util
+from tensorflow.python.training.tracking import base
+from tensorflow.python.training.tracking import graph_view
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util as trackable_utils
+
+
+class NonLayerTrackable(tracking.AutoTrackable):
+
+  def __init__(self):
+    super(NonLayerTrackable, self).__init__()
+    self.a_variable = trackable_utils.add_variable(
+        self, name="a_variable", shape=[])
+
+
+# pylint: disable=not-callable
+class MyModel(training.Model):
+  """A concrete Model for testing."""
+
+  def __init__(self):
+    super(MyModel, self).__init__()
+    self._named_dense = core.Dense(1, use_bias=True)
+    self._second = core.Dense(1, use_bias=False)
+    # We can still track Trackables which aren't Layers.
+    self._non_layer = NonLayerTrackable()
+
+  def call(self, values):
+    ret = self._second(self._named_dense(values))
+    return ret
+
+
+class InterfaceTests(test.TestCase):
+
+  def testLayerDeduplication(self):
+    model = training.Model()
+    layer_one = core.Dense(1)
+    layer_two = core.Dense(1)
+    model.other_path = [layer_one, layer_two]
+    model.l2 = layer_two
+    model.l1 = layer_one
+    self.assertEqual([layer_one, layer_two], model.layers)
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testAddVariable(self):
+    obj = NonLayerTrackable()
+    with self.assertRaisesRegexp(ValueError, "do not specify shape"):
+      trackable_utils.add_variable(
+          obj, name="shape_specified_twice", shape=[], initializer=1)
+    constant_initializer = trackable_utils.add_variable(
+        obj, name="constant_initializer", initializer=1)
+    with variable_scope.variable_scope("some_variable_scope"):
+      ones_initializer = trackable_utils.add_variable(
+          obj,
+          name="ones_initializer",
+          shape=[2],
+          initializer=init_ops.ones_initializer(dtype=dtypes.float32))
+    bare_initializer = trackable_utils.add_variable(
+        obj,
+        name="bare_initializer",
+        shape=[2, 2],
+        dtype=dtypes.float64,
+        initializer=init_ops.zeros_initializer)
+
+    # Even in graph mode, there are no naming conflicts between objects, only
+    # naming conflicts within an object.
+    other_duplicate = resource_variable_ops.ResourceVariable(
+        name="duplicate", initial_value=1.)
+    duplicate = trackable_utils.add_variable(
+        obj, name="duplicate", shape=[])
+    with self.assertRaisesRegexp(ValueError, "'duplicate'.*already declared"):
+      trackable_utils.add_variable(obj, name="duplicate", shape=[])
+
+    self.evaluate(trackable_utils.gather_initializers(obj))
+    self.assertEqual("constant_initializer:0", constant_initializer.name)
+    self.assertEqual(1, self.evaluate(constant_initializer))
+    self.assertEqual("some_variable_scope/ones_initializer:0",
+                     ones_initializer.name)
+    self.assertAllEqual([1, 1], self.evaluate(ones_initializer))
+    self.assertAllEqual([[0., 0.],
+                         [0., 0.]], self.evaluate(bare_initializer))
+    self.assertEqual("a_variable:0", obj.a_variable.name)
+    self.assertEqual("duplicate:0", other_duplicate.name)
+    if context.executing_eagerly():
+      # When executing eagerly, there's no uniquification of variable names. The
+      # checkpoint name will be the same.
+      self.assertEqual("duplicate:0", duplicate.name)
+    else:
+      # The .name attribute may be globally influenced, but the checkpoint name
+      # won't be (tested below).
+      self.assertEqual("duplicate_1:0", duplicate.name)
+    named_variables, _, _ = (
+        graph_view.ObjectGraphView(obj).serialize_object_graph())
+    expected_checkpoint_names = (
+        "a_variable/.ATTRIBUTES/VARIABLE_VALUE",
+        "bare_initializer/.ATTRIBUTES/VARIABLE_VALUE",
+        "constant_initializer/.ATTRIBUTES/VARIABLE_VALUE",
+        "duplicate/.ATTRIBUTES/VARIABLE_VALUE",
+        "ones_initializer/.ATTRIBUTES/VARIABLE_VALUE",
+    )
+    six.assertCountEqual(
+        self, expected_checkpoint_names, [v.name for v in named_variables])
+
+  def testInitNotCalled(self):
+
+    class NoInit(tracking.AutoTrackable):
+
+      def __init__(self):
+        pass
+
+    # __init__ for Trackable will be called implicitly.
+    trackable_utils.add_variable(NoInit(), "var", shape=[])
+
+  def testShapeDtype(self):
+    root = tracking.AutoTrackable()
+    v1 = trackable_utils.add_variable(
+        root, name="v1", initializer=3., dtype=dtypes.float64)
+    self.assertEqual(dtypes.float64, v1.dtype)
+    v2 = trackable_utils.add_variable(
+        root,
+        name="v2",
+        shape=[3],
+        initializer=init_ops.ones_initializer,
+        dtype=dtypes.float64)
+    self.assertEqual(dtypes.float64, v2.dtype)
+    self.assertAllEqual([1., 1., 1.], self.evaluate(v2))
+
+  def testObjectMetadata(self):
+    with context.eager_mode():
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      dense = core.Dense(1)
+      checkpoint = trackable_utils.Checkpoint(dense=dense)
+      dense(constant_op.constant([[1.]]))
+      save_path = checkpoint.save(checkpoint_prefix)
+
+    objects = trackable_utils.object_metadata(save_path)
+    all_variable_names = []
+    for obj in objects.nodes:
+      for attribute in obj.attributes:
+        all_variable_names.append(attribute.full_name)
+    self.assertIn("dense/kernel", all_variable_names)
+
+  def testNotTrackable(self):
+
+    class CallsFunctionalStuff(
+        tracking.NotTrackable, tracking.AutoTrackable):
+      pass
+
+    test_dir = self.get_temp_dir()
+    prefix = os.path.join(test_dir, "ckpt")
+    checkpoint = trackable_utils.Checkpoint(x=CallsFunctionalStuff())
+    with self.assertRaises(NotImplementedError):
+      checkpoint.save(prefix)
+
+    class CallsFunctionalStuffOtherMRO(
+        tracking.AutoTrackable, tracking.NotTrackable):
+      pass
+
+    checkpoint_reversed = trackable_utils.Checkpoint(
+        x=CallsFunctionalStuffOtherMRO())
+    with self.assertRaises(NotImplementedError):
+      checkpoint_reversed.save(prefix)
+
+
+class _MirroringSaveable(saver_lib.BaseSaverBuilder.SaveableObject):
+
+  def __init__(self, primary_variable, mirrored_variable, name):
+    self._primary_variable = primary_variable
+    self._mirrored_variable = mirrored_variable
+    tensor = self._primary_variable.read_value()
+    spec = saver_lib.BaseSaverBuilder.SaveSpec(
+        tensor=tensor,
+        slice_spec="",
+        name=name)
+    super(_MirroringSaveable, self).__init__(
+        tensor, [spec], name)
+
+  def restore(self, restored_tensors, restored_shapes):
+    """Restore the same value into both variables."""
+    tensor, = restored_tensors
+    return control_flow_ops.group(
+        self._primary_variable.assign(tensor),
+        self._mirrored_variable.assign(tensor))
+
+
+class _OwnsMirroredVariables(base.Trackable):
+  """A Trackable object which returns a more complex SaveableObject."""
+
+  def __init__(self):
+    self.non_dep_variable = variable_scope.get_variable(
+        name="non_dep_variable", initializer=6., use_resource=True)
+    self.mirrored = variable_scope.get_variable(
+        name="mirrored", initializer=15., use_resource=True)
+
+  def _gather_saveables_for_checkpoint(self):
+    def _saveable_factory(name=self.non_dep_variable.name):
+      return _MirroringSaveable(
+          primary_variable=self.non_dep_variable,
+          mirrored_variable=self.mirrored,
+          name=name)
+    return {base.VARIABLE_VALUE_KEY: _saveable_factory}
+
+  # The Saver sorts by name before parsing, so we need a name property.
+  @property
+  def name(self):
+    return self.non_dep_variable.name
+
+
+class CheckpointingTests(parameterized.TestCase, test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testNamingWithOptimizer(self):
+    input_value = constant_op.constant([[3.]])
+    model = MyModel()
+    # A nuisance Model using the same optimizer. Its slot variables should not
+    # go in the checkpoint, since it is never depended on.
+    other_model = MyModel()
+    optimizer = adam.Adam(0.001)
+    step = training_util.get_or_create_global_step()
+    root_trackable = trackable_utils.Checkpoint(
+        optimizer=optimizer, model=model, step=step)
+
+    with backprop.GradientTape() as tape:
+      loss = model(input_value)
+    variables = model.trainable_variables
+    gradients = tape.gradient(loss, variables)
+    train_op = control_flow_ops.group(
+        optimizer.apply_gradients(zip(gradients, variables)),
+        step.assign_add(1))
+
+    with backprop.GradientTape() as tape:
+      loss = other_model(input_value)
+    variables = other_model.trainable_variables
+    gradients = tape.gradient(loss, variables)
+    optimizer.apply_gradients(zip(gradients, variables))
+
+    self.evaluate(trackable_utils.gather_initializers(
+        root_trackable))
+    self.evaluate(train_op)
+    named_variables, serialized_graph, _ = graph_view.ObjectGraphView(
+        root_trackable).serialize_object_graph()
+    expected_slot_keys = (
+        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
+        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
+    )
+    expected_checkpoint_names = (
+        # Created in the root node, so no prefix.
+        "step",
+        "model/_second/kernel",
+        "model/_named_dense/kernel",
+        "model/_named_dense/bias",
+        # non-Layer dependency of the model
+        "model/_non_layer/a_variable",
+        "optimizer/learning_rate",
+        "optimizer/beta_1",
+        "optimizer/beta_2",
+        "optimizer/epsilon",
+        "optimizer/iter",
+        "optimizer/decay",
+    ) + expected_slot_keys
+    suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
+    expected_checkpoint_names = [
+        name + suffix for name in expected_checkpoint_names]
+    expected_checkpoint_names.append(
+        "optimizer/.ATTRIBUTES/OBJECT_CONFIG_JSON")
+    # The Dense layers also save get_config() JSON
+    expected_checkpoint_names.extend(
+        ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
+         "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"])
+    named_variables = {v.name: v for v in named_variables}
+    six.assertCountEqual(self, expected_checkpoint_names,
+                         named_variables.keys())
+    # Check that we've mapped to the right variable objects (not exhaustive)
+    self.assertEqual(
+        "global_step",
+        named_variables["step" + suffix].full_name)
+    self.assertEqual(
+        "my_model/dense_1/kernel",
+        named_variables["model/_second/kernel" + suffix].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel",
+        named_variables["model/_named_dense/kernel" + suffix].full_name)
+    self.assertEqual(
+        "beta_1",
+        named_variables["optimizer/beta_1" + suffix].full_name)
+    self.assertEqual(
+        "beta_2",
+        named_variables["optimizer/beta_2" + suffix].full_name)
+    # Spot check the generated protocol buffers.
+    self.assertEqual("optimizer",
+                     serialized_graph.nodes[0].children[1].local_name)
+    optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
+        1].node_id]
+    children = [node.local_name for node in optimizer_node.children]
+    six.assertCountEqual(
+        self,
+        # Non-slot dependencies
+        ["beta_1", "beta_2", "iter", "decay", "epsilon", "learning_rate"],
+        children)
+    serialized_slot_keys = []
+    for slot in optimizer_node.slot_variables:
+      for attribute in (
+          serialized_graph.nodes[slot.slot_variable_node_id].attributes):
+        serialized_slot_keys.append(attribute.checkpoint_key)
+    six.assertCountEqual(
+        self,
+        [key + suffix for key in expected_slot_keys],
+        serialized_slot_keys)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testMoreComplexSaveableReturned(self):
+    v = _OwnsMirroredVariables()
+    checkpoint = trackable_utils.Checkpoint(v=v)
+    test_dir = self.get_temp_dir()
+    prefix = os.path.join(test_dir, "ckpt")
+    self.evaluate(v.non_dep_variable.assign(42.))
+    save_path = checkpoint.save(prefix)
+    self.evaluate(v.non_dep_variable.assign(43.))
+    self.evaluate(v.mirrored.assign(44.))
+    checkpoint.restore(save_path).assert_consumed().initialize_or_restore()
+    self.assertEqual(42., self.evaluate(v.non_dep_variable))
+    self.assertEqual(42., self.evaluate(v.mirrored))
+    self.evaluate(v.non_dep_variable.assign(44.))
+    save_path = checkpoint.save(prefix)
+    self.evaluate(v.non_dep_variable.assign(45.))
+    checkpoint.restore(save_path).assert_consumed().initialize_or_restore()
+    self.assertEqual(44., self.evaluate(v.non_dep_variable))
+    self.assertEqual(44., self.evaluate(v.mirrored))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testMoreComplexSaveableReturnedWithGlobalName(self):
+    # The same object can also be saved using the name-based saver.
+    v = _OwnsMirroredVariables()
+    saver = saver_lib.Saver(var_list=[v])
+    test_dir = self.get_temp_dir()
+    prefix = os.path.join(test_dir, "ckpt")
+    with self.cached_session() as sess:
+      self.evaluate(v.non_dep_variable.assign(42.))
+      save_path = saver.save(sess, prefix)
+      self.evaluate(v.non_dep_variable.assign(43.))
+      self.evaluate(v.mirrored.assign(44.))
+      saver.restore(sess, save_path)
+      self.assertEqual(42., self.evaluate(v.non_dep_variable))
+      self.assertEqual(42., self.evaluate(v.mirrored))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testSaveRestore(self):
+    model = MyModel()
+    optimizer = adam.Adam(0.001)
+    root_trackable = trackable_utils.Checkpoint(
+        optimizer=optimizer, model=model)
+    input_value = constant_op.constant([[3.]])
+    with backprop.GradientTape() as tape:
+      loss = model(input_value)
+    variables = model.trainable_variables
+    gradients = tape.gradient(loss, variables)
+    train_op = optimizer.apply_gradients(zip(gradients, variables))
+    root_trackable.save_counter  # pylint: disable=pointless-statement
+    self.evaluate(trackable_utils.gather_initializers(
+        root_trackable))
+    self.evaluate(train_op)
+    prefix = os.path.join(self.get_temp_dir(), "ckpt")
+    self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
+    m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
+    self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
+    save_path = root_trackable.save(file_prefix=prefix)
+    self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
+    self.evaluate(state_ops.assign(root_trackable.save_counter, 3))
+    optimizer_variables = self.evaluate(
+        sorted(optimizer.variables(), key=lambda v: v.name))
+    self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
+    # Immediate restoration
+    status = root_trackable.restore(save_path=save_path).assert_consumed()
+    status.run_restore_ops()
+    self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1]))
+    self.assertAllEqual(1, self.evaluate(root_trackable.save_counter))
+    self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
+    if not context.executing_eagerly():
+      return  # Restore-on-create is only supported when executing eagerly
+    on_create_model = MyModel()
+    on_create_optimizer = adam.Adam(0.001)
+    on_create_root = trackable_utils.Checkpoint(
+        optimizer=on_create_optimizer, model=on_create_model)
+    # Deferred restoration
+    status = on_create_root.restore(save_path=save_path)
+    status.assert_nontrivial_match()
+    status.assert_existing_objects_matched()
+    with self.assertRaises(AssertionError):
+      status.assert_consumed()
+    on_create_model(constant_op.constant([[3.]]))  # create variables
+    self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
+    self.assertAllEqual([42.],
+                        self.evaluate(
+                            on_create_model._named_dense.variables[1]))
+    on_create_m_bias_slot = on_create_optimizer.get_slot(
+        on_create_model._named_dense.variables[1], "m")
+    status.assert_existing_objects_matched()
+    if not context.executing_eagerly():
+      with self.assertRaises(AssertionError):
+        status.assert_consumed()
+    # Optimizer slot variables are created when the original variable is
+    # restored.
+    self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
+    dummy_var = resource_variable_ops.ResourceVariable([1.])
+    on_create_optimizer.minimize(loss=dummy_var.read_value,
+                                 var_list=[dummy_var])
+    status.assert_existing_objects_matched()
+    status.assert_consumed()
+    self.assertAllEqual(
+        optimizer_variables,
+        # Creation order is different, so .variables() needs to be re-sorted.
+        self.evaluate(sorted(optimizer.variables(), key=lambda v: v.name)))
+
+  # TODO(allenl): Debug garbage created by this test in python3.
+  def testDeferredRestorationUsageEager(self):
+    """An idiomatic eager execution example."""
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      model = MyModel()
+      optimizer = adam.Adam(0.001)
+      root = trackable_utils.Checkpoint(
+          optimizer=optimizer, model=model)
+      root.restore(checkpoint_management.latest_checkpoint(
+          checkpoint_directory))
+      for _ in range(num_training_steps):
+        # TODO(allenl): Use a Dataset and serialize/checkpoint it.
+        input_value = constant_op.constant([[3.]])
+        with backprop.GradientTape() as tape:
+          loss = model(input_value)
+        variables = model.trainable_variables
+        gradients = tape.gradient(loss, variables)
+        optimizer.apply_gradients(zip(gradients, variables))
+      root.save(file_prefix=checkpoint_prefix)
+      self.assertEqual((training_continuation + 1) * num_training_steps,
+                       root.optimizer.iterations.numpy())
+
+  def testUsageGraph(self):
+    """Expected usage when graph building."""
+    with context.graph_mode():
+      num_training_steps = 10
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      for training_continuation in range(3):
+        with ops.Graph().as_default():
+          model = MyModel()
+          optimizer = adam.Adam(0.001)
+          root = trackable_utils.Checkpoint(
+              optimizer=optimizer, model=model)
+          input_value = constant_op.constant([[3.]])
+          with backprop.GradientTape() as tape:
+            loss = model(input_value)
+          variables = model.trainable_variables
+          gradients = tape.gradient(loss, variables)
+          train_op = optimizer.apply_gradients(zip(gradients, variables))
+
+          checkpoint_path = checkpoint_management.latest_checkpoint(
+              checkpoint_directory)
+          with self.session(graph=ops.get_default_graph()) as session:
+            status = root.restore(save_path=checkpoint_path)
+            status.initialize_or_restore(session=session)
+            if checkpoint_path is None:
+              self.assertEqual(0, training_continuation)
+              with self.assertRaises(AssertionError):
+                status.assert_consumed()
+              with self.assertRaises(AssertionError):
+                status.assert_existing_objects_matched()
+            else:
+              status.assert_consumed()
+              status.assert_existing_objects_matched()
+            for _ in range(num_training_steps):
+              session.run(train_op)
+            root.save(file_prefix=checkpoint_prefix, session=session)
+            self.assertEqual((training_continuation + 1) * num_training_steps,
+                             session.run(root.optimizer.iterations))
+            self.assertEqual(training_continuation + 1,
+                             session.run(root.save_counter))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testAgnosticUsage(self):
+    """Graph/eager agnostic usage."""
+    # Does create garbage when executing eagerly due to ops.Graph() creation.
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    def _train_fn(model, input_value):
+      with backprop.GradientTape() as tape:
+        loss = model(input_value)
+      variables = model.trainable_variables
+      gradients = tape.gradient(loss, variables)
+      return optimizer.apply_gradients(zip(gradients, variables))
+    for training_continuation in range(3):
+      with test_util.device(use_gpu=True):
+        model = MyModel()
+        optimizer = adam.Adam(0.001)
+        root = trackable_utils.Checkpoint(
+            optimizer=optimizer, model=model)
+        manager = checkpoint_management.CheckpointManager(
+            root, checkpoint_directory, max_to_keep=1)
+        status = root.restore(save_path=manager.latest_checkpoint)
+        input_value = constant_op.constant([[3.]])
+        train_fn = functools.partial(_train_fn, model, input_value)
+        if not context.executing_eagerly():
+          train_fn = functools.partial(self.evaluate, train_fn())
+        status.initialize_or_restore()
+        for _ in range(num_training_steps):
+          train_fn()
+        manager.save()
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         self.evaluate(root.optimizer.iterations))
+        self.assertEqual(training_continuation + 1,
+                         self.evaluate(root.save_counter))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testFreezing(self):
+    with test_util.use_gpu():
+      # Save an object-based checkpoint using a frozen saver
+      directory = self.get_temp_dir()
+      prefix = os.path.join(directory, "ckpt")
+      v = resource_variable_ops.ResourceVariable(0, dtype=dtypes.int64)
+      checkpoint = trackable_utils.Checkpoint(v=v)
+      self.evaluate(v.assign(3))
+      # Create the save counter so assert_consumed doesn't complain about it not
+      # existing in the checkpoint on restore.
+      self.evaluate(checkpoint.save_counter.assign(12))
+      saver = trackable_utils.frozen_saver(checkpoint)
+      with ops.device("cpu:0"):
+        prefix_tensor = constant_op.constant(prefix)
+      save_path = self.evaluate(saver.save(prefix_tensor))
+      self.evaluate(v.assign(10))
+      # Use the frozen saver to restore the same object graph
+      self.evaluate(saver.restore(prefix_tensor))
+      self.assertEqual(3, self.evaluate(v))
+
+      # Restore using another frozen saver on an identical object graph
+      del v, checkpoint, saver
+      v = resource_variable_ops.ResourceVariable(0, dtype=dtypes.int64)
+      checkpoint = trackable_utils.Checkpoint(v=v)
+      saver = trackable_utils.frozen_saver(checkpoint)
+      self.evaluate(saver.restore(prefix_tensor))
+      self.assertEqual(3, self.evaluate(v))
+
+      # Restore as an object-based checkpoint
+      del v, checkpoint, saver
+      checkpoint = trackable_utils.Checkpoint()
+      status = checkpoint.restore(save_path)
+      v = resource_variable_ops.ResourceVariable(0, dtype=dtypes.int64)
+      if context.executing_eagerly():
+        self.assertEqual(12, self.evaluate(checkpoint.save_counter))
+        self.assertEqual(0, self.evaluate(v))
+      checkpoint.v = v
+      status.assert_consumed().run_restore_ops()
+      self.assertEqual(3, self.evaluate(v))
+      self.assertEqual(12, self.evaluate(checkpoint.save_counter))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testCustomNumbering(self):
+    directory = self.get_temp_dir()
+    prefix = os.path.join(directory, "ckpt")
+    step = resource_variable_ops.ResourceVariable(0, dtype=dtypes.int64)
+    checkpoint = trackable_utils.Checkpoint(step=step)
+    self.evaluate(step.initializer)
+    for i in range(5):
+      path = checkpoint.write("%s-%d" % (prefix, self.evaluate(step)))
+      expected_suffix = "-%d" % (2 * i,)
+      if not path.endswith(expected_suffix):
+        self.fail("%s should have suffix %s" % (path, expected_suffix))
+      self.evaluate(step.assign_add(2))
+
+  # pylint: disable=cell-var-from-loop
+  @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only("b/120545219")
+  def testWithDefun(self):
+    num_training_steps = 2
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      with test_util.device(use_gpu=True):
+        model = MyModel()
+        # Don't actually train so we can test variable values
+        optimizer = adam.Adam(0.)
+        root = trackable_utils.Checkpoint(
+            optimizer=optimizer, model=model)
+        checkpoint_path = checkpoint_management.latest_checkpoint(
+            checkpoint_directory)
+        status = root.restore(save_path=checkpoint_path)
+        def train_fn():
+          @def_function.function
+          def _call_model(x):
+            return model(x)
+          with backprop.GradientTape() as tape:
+            loss = _call_model(constant_op.constant([[3.]]))
+          gradients = tape.gradient(loss, model.variables)
+          return optimizer.apply_gradients(zip(gradients, model.variables))
+        if not context.executing_eagerly():
+          train_fn = functools.partial(
+              self.evaluate, train_fn())
+        status.initialize_or_restore()
+        for _ in range(num_training_steps):
+          train_fn()
+        if training_continuation > 0:
+          status.assert_consumed()
+          self.assertAllClose([[42.]], self.evaluate(model.variables[0]))
+        else:
+          self.evaluate(model.variables[0].assign([[42.]]))
+        root.save(file_prefix=checkpoint_prefix)
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         self.evaluate(optimizer.iterations))
+        self.assertEqual(training_continuation + 1,
+                         self.evaluate(root.save_counter))
+  # pylint: enable=cell-var-from-loop
+
+  def _get_checkpoint_name(self, name):
+    root = tracking.AutoTrackable()
+    trackable_utils.add_variable(
+        root, name=name, shape=[1, 2], dtype=dtypes.float64)
+    (named_variable,), _, _ = graph_view.ObjectGraphView(
+        root).serialize_object_graph()
+    with ops.name_scope("root/" + named_variable.name):
+      pass  # Make sure we can use this as an op name if we prefix it.
+    return named_variable.name
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testVariableNameEscaping(self):
+    suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
+    self.assertEqual(r"a.Sb.Sc" + suffix, self._get_checkpoint_name(r"a/b/c"))
+    self.assertEqual(r"b" + suffix, self._get_checkpoint_name(r"b"))
+    self.assertEqual(r"c.S" + suffix, self._get_checkpoint_name(r"c/"))
+    self.assertEqual(r"d.S..S" + suffix, self._get_checkpoint_name(r"d/.S"))
+    self.assertEqual(r"d.S..ATTRIBUTES.Sf" + suffix,
+                     self._get_checkpoint_name(r"d/.ATTRIBUTES/f"))
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testNumberedPath(self):
+    root = tracking.AutoTrackable()
+    leaf = tracking.AutoTrackable()
+    root.leaf = leaf
+    trackable_utils.add_variable(leaf, name="v", shape=[])
+    (named_variable,), _, _ = graph_view.ObjectGraphView(
+        root).serialize_object_graph()
+    self.assertEqual(r"leaf/v/.ATTRIBUTES/VARIABLE_VALUE", named_variable.name)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testLocalNameValidation(self):
+    root = tracking.AutoTrackable()
+    leaf = tracking.AutoTrackable()
+    # Dots are escaped, which avoids conflicts with reserved names.
+    root._track_trackable(leaf, name=".ATTRIBUTES")
+    trackable_utils.add_variable(trackable=leaf, name="a", shape=[])
+    (named_variable,), _, _ = graph_view.ObjectGraphView(
+        root).serialize_object_graph()
+    self.assertEqual("..ATTRIBUTES/a/.ATTRIBUTES/VARIABLE_VALUE",
+                     named_variable.name)
+
+  def testAnonymousVarsInInit(self):
+
+    class Model(training.Model):
+
+      def __init__(self):
+        super(Model, self).__init__()
+        self.w = resource_variable_ops.ResourceVariable(0.0)
+        self.b = resource_variable_ops.ResourceVariable(0.0)
+        self.vars = [self.w, self.b]
+
+      def call(self, x):
+        return x * self.w + self.b
+
+    with context.eager_mode():
+      model = Model()
+      optimizer = adam.Adam(learning_rate=0.05)
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      checkpoint = trackable_utils.Checkpoint(
+          model=model, optimizer=optimizer)
+      for _ in range(2):
+        checkpoint.save(checkpoint_prefix)
+        with backprop.GradientTape() as tape:
+          loss = (constant_op.constant(1.)
+                  - model(constant_op.constant(1.))) ** 2
+        grad = tape.gradient(loss, model.vars)
+        optimizer.apply_gradients(
+            [(g, v) for g, v in zip(grad, model.vars)])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testLateDependencyTracking(self):
+
+    class Dependency(tracking.AutoTrackable):
+
+      def build(self):
+        self.var = trackable_utils.add_variable(
+            self, "var", initializer=0.)
+
+    class LateDependencies(trackable_utils.Checkpoint):
+
+      def add_dep(self):
+        self.dep = Dependency()
+        self.dep.build()
+
+    original = LateDependencies()
+    original.add_dep()
+    self.evaluate(state_ops.assign(original.dep.var, 123.))
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = original.save(checkpoint_prefix)
+    load_into = LateDependencies()
+    status = load_into.restore(save_path)
+    status.assert_existing_objects_matched()
+    with self.assertRaises(AssertionError):
+      status.assert_consumed()
+    load_into.add_dep()
+    status.assert_consumed()
+    status.assert_existing_objects_matched().run_restore_ops()
+    self.assertEqual(123., self.evaluate(load_into.dep.var))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDepAfterVar(self):
+
+    class Dependency(tracking.AutoTrackable):
+
+      def build(self):
+        self.var = trackable_utils.add_variable(
+            self, "var", initializer=0.)
+
+    class DepAfterVar(trackable_utils.Checkpoint):
+
+      def add_dep(self):
+        dep = Dependency()
+        dep.build()
+        self.dep = dep
+
+    dep_after_var = DepAfterVar()
+    dep_after_var.add_dep()
+    self.evaluate(state_ops.assign(dep_after_var.dep.var, -14.))
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = dep_after_var.save(checkpoint_prefix)
+
+    loaded_dep_after_var = DepAfterVar()
+    status = loaded_dep_after_var.restore(save_path)
+    loaded_dep_after_var.add_dep()
+    status.assert_consumed()
+    status.run_restore_ops()
+    self.assertEqual(-14., self.evaluate(loaded_dep_after_var.dep.var))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeferredSlotRestoration(self):
+    checkpoint_directory = self.get_temp_dir()
+
+    root = trackable_utils.Checkpoint()
+    root.var = trackable_utils.add_variable(
+        root, name="var", initializer=0.)
+    optimizer = adam.Adam(0.1)
+    variables = [root.var]
+    gradients = [1.]
+    train_op = optimizer.apply_gradients(zip(gradients, variables))
+    # Note that `optimizer` has not been added as a dependency of
+    # `root`. Create a one-off grouping so that slot variables for `root.var`
+    # get initialized too.
+    self.evaluate(trackable_utils.gather_initializers(
+        trackable_utils.Checkpoint(root=root, optimizer=optimizer)))
+    self.evaluate(train_op)
+    self.evaluate(state_ops.assign(root.var, 12.))
+    no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots"))
+    root.optimizer = optimizer
+    self.evaluate(state_ops.assign(root.var, 13.))
+    self.evaluate(state_ops.assign(
+        optimizer.get_slot(slot_name="m", var=root.var),
+        14.))
+    slots_path = root.save(os.path.join(checkpoint_directory, "with_slots"))
+    new_root = trackable_utils.Checkpoint()
+    # Load the slot-containing checkpoint (deferred), then immediately overwrite
+    # the non-slot variable (also deferred).
+    slot_status = new_root.restore(slots_path)
+    no_slot_status = new_root.restore(no_slots_path)
+    with self.assertRaises(AssertionError):
+      no_slot_status.assert_consumed()
+    new_root.var = trackable_utils.add_variable(
+        new_root, name="var", shape=[])
+    no_slot_status.assert_consumed()
+    no_slot_status.run_restore_ops()
+    self.assertEqual(12., self.evaluate(new_root.var))
+    new_root.optimizer = adam.Adam(0.1)
+    slot_status.assert_existing_objects_matched()
+    if not context.executing_eagerly():
+      with self.assertRaisesRegexp(AssertionError, "Unresolved object"):
+        slot_status.assert_consumed()
+    self.assertEqual(12., self.evaluate(new_root.var))
+    if context.executing_eagerly():
+      # Slot variables are only created with restoring initializers when
+      # executing eagerly.
+      self.assertEqual(14., self.evaluate(
+          new_root.optimizer.get_slot(slot_name="m", var=new_root.var)))
+    else:
+      # Slot variables are not created eagerly when graph building.
+      with self.assertRaises(KeyError):
+        new_root.optimizer.get_slot(slot_name="m", var=new_root.var)
+    variables = [new_root.var]
+    gradients = [1.]
+    train_op = new_root.optimizer.apply_gradients(zip(gradients, variables))
+    # The slot variable now exists; restore() didn't create it, but we should
+    # now have a restore op for it.
+    slot_status.run_restore_ops()
+    if not context.executing_eagerly():
+      # The train op hasn't run when graph building, so the slot variable has
+      # its restored value. It has run in eager, so the value will be different.
+      self.assertEqual(14., self.evaluate(
+          new_root.optimizer.get_slot(slot_name="m", var=new_root.var)))
+    self.evaluate(train_op)
+    slot_status.assert_consumed()
+
+  @test_util.run_in_graph_and_eager_modes
+  def testOverlappingRestores(self):
+    checkpoint_directory = self.get_temp_dir()
+    save_root = trackable_utils.Checkpoint()
+    save_root.dep = tracking.AutoTrackable()
+    save_root.dep.var = trackable_utils.add_variable(
+        save_root.dep, name="var", initializer=0.)
+    self.evaluate(state_ops.assign(save_root.dep.var, 12.))
+    first_path = save_root.save(os.path.join(checkpoint_directory, "first"))
+    self.evaluate(state_ops.assign(save_root.dep.var, 13.))
+    second_path = save_root.save(os.path.join(checkpoint_directory, "second"))
+
+    first_root = trackable_utils.Checkpoint()
+    second_root = trackable_utils.Checkpoint()
+    first_status = first_root.restore(first_path)
+    second_status = second_root.restore(second_path)
+    load_dep = tracking.AutoTrackable()
+    load_dep.var = trackable_utils.add_variable(
+        load_dep, name="var", shape=[])
+    first_root.dep = load_dep
+    first_status.assert_consumed()
+    first_status.run_restore_ops()
+    self.assertEqual(12., self.evaluate(load_dep.var))
+    second_root.dep = load_dep
+    second_status.assert_consumed()
+    second_status.run_restore_ops()
+    self.assertEqual(13., self.evaluate(load_dep.var))
+
+    # Try again with the order of the restore() reversed. The last restore
+    # determines the final value.
+    first_root = trackable_utils.Checkpoint()
+    second_root = trackable_utils.Checkpoint()
+    second_status = second_root.restore(second_path)
+    first_status = first_root.restore(first_path)
+    load_dep = tracking.AutoTrackable()
+    load_dep.var = trackable_utils.add_variable(
+        load_dep, name="var", shape=[])
+    first_root.dep = load_dep
+    first_status.assert_consumed()
+    first_status.run_restore_ops()
+    self.assertEqual(12., self.evaluate(load_dep.var))
+    second_root.dep = load_dep
+    second_status.assert_consumed()
+    second_status.run_restore_ops()
+    self.assertEqual(12., self.evaluate(load_dep.var))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testAmbiguousLoad(self):
+    # Not OK to split one checkpoint object into two
+    checkpoint_directory = self.get_temp_dir()
+    save_root = trackable_utils.Checkpoint()
+    save_root.dep_one = tracking.AutoTrackable()
+    save_root.dep_two = tracking.AutoTrackable()
+    dep_three = tracking.AutoTrackable()
+    save_root.dep_one.dep_three = dep_three
+    save_root.dep_two.dep_three = dep_three
+    trackable_utils.add_variable(dep_three, name="var", initializer=0.)
+    self.evaluate(trackable_utils.gather_initializers(save_root))
+    save_path = save_root.save(os.path.join(checkpoint_directory, "ckpt"))
+    load_root = trackable_utils.Checkpoint()
+    status = load_root.restore(save_path)
+    load_root.dep_one = tracking.AutoTrackable()
+    load_root.dep_two = tracking.AutoTrackable()
+    load_root.dep_one.dep_three = tracking.AutoTrackable()
+    load_root.dep_two.dep_three = tracking.AutoTrackable()
+    trackable_utils.add_variable(
+        load_root.dep_one.dep_three, name="var", initializer=0.)
+    with self.assertRaises(AssertionError):
+      status.assert_consumed()
+    with self.assertRaises(AssertionError):
+      status.assert_existing_objects_matched()
+
+  @test_util.run_in_graph_and_eager_modes
+  def testObjectsCombined(self):
+    # Currently fine to load two checkpoint objects into one Python object
+    checkpoint_directory = self.get_temp_dir()
+    save_root = trackable_utils.Checkpoint()
+    save_root.dep_one = tracking.AutoTrackable()
+    save_root.dep_two = tracking.AutoTrackable()
+    trackable_utils.add_variable(
+        save_root.dep_one, name="var1", initializer=32., dtype=dtypes.float64)
+    trackable_utils.add_variable(
+        save_root.dep_two, name="var2", initializer=64., dtype=dtypes.float64)
+    self.evaluate(trackable_utils.gather_initializers(save_root))
+    save_path = save_root.save(os.path.join(checkpoint_directory, "ckpt"))
+    load_root = trackable_utils.Checkpoint()
+    load_root.dep_one = tracking.AutoTrackable()
+    load_root.dep_two = load_root.dep_one
+    v1 = trackable_utils.add_variable(
+        load_root.dep_one, name="var1", shape=[], dtype=dtypes.float64)
+    v2 = trackable_utils.add_variable(
+        load_root.dep_one, name="var2", shape=[], dtype=dtypes.float64)
+    status = load_root.restore(
+        save_path).assert_consumed().assert_existing_objects_matched()
+    status.run_restore_ops()
+    self.assertEqual(32., self.evaluate(v1))
+    self.assertEqual(64., self.evaluate(v2))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDependencyLoop(self):
+    # Note: this test creates garbage during eager execution because it
+    # purposefully creates a reference cycle.
+    first = trackable_utils.Checkpoint()
+    second = trackable_utils.Checkpoint()
+    first.second = second
+    second.first = first
+    first.v = trackable_utils.add_variable(
+        first, "v1", initializer=[3., 1., 4.])
+    second.v = trackable_utils.add_variable(
+        second, "v2", initializer=[1., 1., 2., 3.])
+    self.evaluate(trackable_utils.gather_initializers(first))
+    checkpoint_directory = self.get_temp_dir()
+    save_path = first.save(os.path.join(checkpoint_directory, "ckpt"))
+
+    # Test deferred loading
+    first_load = trackable_utils.Checkpoint()
+    status = first_load.restore(save_path)
+    second_load = tracking.AutoTrackable()
+    first_load.second = second_load
+    second_load.first = first_load
+    with self.assertRaises(AssertionError):
+      status.assert_consumed()
+    first_load.v = trackable_utils.add_variable(
+        first_load, "v1", shape=[3])
+    second_load.v = trackable_utils.add_variable(
+        second_load, "v2", shape=[4])
+    status.assert_consumed()
+    status.run_restore_ops()
+    self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v))
+    self.assertAllEqual([1., 1., 2., 3.], self.evaluate(second_load.v))
+
+    # Test loading when variables have already been created
+    self.evaluate(first_load.v.assign([2., 7., 1.]))
+    self.assertAllEqual([2., 7., 1.], self.evaluate(first_load.v))
+    self.evaluate(second_load.v.assign([2., 7., 1., 8.]))
+    self.assertAllEqual([2., 7., 1., 8.], self.evaluate(second_load.v))
+    status = first_load.restore(save_path).assert_consumed()
+    status.run_restore_ops()
+    self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v))
+    self.assertAllEqual([1., 1., 2., 3.], self.evaluate(second_load.v))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testRestoreOnAssign(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    first = trackable_utils.Checkpoint()
+    first.var1 = variables_lib.Variable(0., name="outside_var")
+    first.var2 = variables_lib.Variable(0., name="blah")
+    self.evaluate(first.var1.assign(4.))
+    self.evaluate(first.var2.assign(8.))
+    save_path = first.save(checkpoint_prefix)
+
+    second = trackable_utils.Checkpoint()
+    second.var2 = variables_lib.Variable(0., name="blah")
+    status = second.restore(save_path)
+    recreated_var1 = variables_lib.Variable(0., name="outside_var")
+    status.run_restore_ops()
+    self.assertEqual(8., self.evaluate(second.var2))
+    self.evaluate(recreated_var1.assign(-2.))
+    self.assertEqual(-2., self.evaluate(recreated_var1))
+    second.var1 = recreated_var1
+    status.run_restore_ops()
+    self.assertEqual(4., self.evaluate(recreated_var1))
+
+  def testManySavesGraph(self):
+    """Saves after the first should not modify the graph."""
+    with context.graph_mode():
+      graph = ops.Graph()
+      with graph.as_default(), self.session(graph):
+        checkpoint_directory = self.get_temp_dir()
+        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+        obj = trackable_utils.Checkpoint()
+        obj.var = variables_lib.Variable(0., name="v")
+        obj.opt = adam.Adam(0.1)
+        variables = [obj.var]
+        gradients = [1.]
+        obj.opt.apply_gradients(zip(gradients, variables))
+        self.evaluate(trackable_utils.gather_initializers(obj))
+        obj.save(checkpoint_prefix)
+        graph.finalize()
+        obj.save(checkpoint_prefix)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testCheckpointState(self):
+    # No checkpoints are deleted by default
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    obj = tracking.AutoTrackable()
+    obj.var = variable_scope.get_variable(name="v", initializer=0.)
+    self.evaluate(trackable_utils.gather_initializers(obj))
+    saver = trackable_utils.Checkpoint(obj=obj)
+    for _ in range(10):
+      saver.save(checkpoint_prefix)
+    expected_filenames = ["checkpoint"]
+    for checkpoint_number in range(1, 11):
+      expected_filenames.append("ckpt-%d.index" % (checkpoint_number,))
+      expected_filenames.append(
+          "ckpt-%d.data-00000-of-00001" % (checkpoint_number,))
+    six.assertCountEqual(
+        self,
+        expected_filenames,
+        os.listdir(checkpoint_directory))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testCheckpointStateChangingVarList(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    obj = tracking.AutoTrackable()
+    obj.var = variable_scope.get_variable(name="v", initializer=0.)
+    self.evaluate(trackable_utils.gather_initializers(obj))
+    checkpoint = trackable_utils.Checkpoint(obj=obj)
+    looped_variables = []
+    for iteration in range(10):
+      new_variable = resource_variable_ops.ResourceVariable(iteration)
+      self.evaluate(new_variable.initializer)
+      setattr(checkpoint, "var_%d" % iteration, new_variable)
+      checkpoint.save(checkpoint_prefix)
+      looped_variables.append(new_variable)
+    expected_filenames = ["checkpoint"]
+    # We've copied the saver each time, but checkpoint management should still
+    # be consistent. Nothing gets deleted.
+    for checkpoint_number in range(1, 11):
+      expected_filenames.append("ckpt-%d.index" % (checkpoint_number,))
+      expected_filenames.append(
+          "ckpt-%d.data-00000-of-00001" % (checkpoint_number,))
+    six.assertCountEqual(
+        self,
+        expected_filenames,
+        os.listdir(checkpoint_directory))
+    self.assertEqual(
+        checkpoint_prefix + "-10",
+        checkpoint_management.latest_checkpoint(checkpoint_directory))
+    # The checkpoint list only contains the most recent checkpoint, but they're
+    # all on disk. This means we won't eventually run into proto size limits.
+    self.assertEqual(
+        [checkpoint_prefix + "-10"],
+        (checkpoint_management.get_checkpoint_state(checkpoint_directory)
+         .all_model_checkpoint_paths))
+    for v in looped_variables:
+      self.evaluate(v.assign(314))
+    checkpoint.restore(checkpoint_prefix + "-6").run_restore_ops()
+    self.assertEqual(314, self.evaluate(checkpoint.var_9))
+    self.assertEqual(314, self.evaluate(checkpoint.var_8))
+    self.assertEqual(314, self.evaluate(checkpoint.var_6))
+    self.assertEqual(5, self.evaluate(checkpoint.var_5))
+    self.assertEqual(1, self.evaluate(checkpoint.var_1))
+    self.assertEqual(0, self.evaluate(checkpoint.var_0))
+    checkpoint.restore(checkpoint_prefix + "-10").run_restore_ops()
+    self.assertEqual(9, self.evaluate(checkpoint.var_9))
+    self.assertEqual(8, self.evaluate(checkpoint.var_8))
+    self.assertEqual(1, self.evaluate(checkpoint.var_1))
+    self.assertEqual(0, self.evaluate(checkpoint.var_0))
+
+  def testManyRestoresGraph(self):
+    """Restores after the first should not modify the graph."""
+    with context.graph_mode():
+      graph = ops.Graph()
+      with graph.as_default(), self.session(graph):
+        checkpoint_directory = self.get_temp_dir()
+        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+        obj = trackable_utils.Checkpoint()
+        obj.var = variables_lib.Variable(0., name="v")
+        obj.opt = adam.Adam(0.1)
+        variables = [obj.var]
+        gradients = [1.]
+        obj.opt.apply_gradients(zip(gradients, variables))
+        self.evaluate(trackable_utils.gather_initializers(obj))
+        save_path = obj.save(checkpoint_prefix)
+        obj.restore(save_path)
+        graph.finalize()
+        obj.restore(save_path)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_sequential(self):
+    model = sequential.Sequential()
+    checkpoint = trackable_utils.Checkpoint(model=model)
+    model.add(core.Dense(4))
+    second_dense = core.Dense(5)
+    model.add(second_dense)
+    model(constant_op.constant([[1.]]))
+    checkpoint.restore(None).initialize_or_restore()
+    self.evaluate(second_dense.bias.assign(
+        constant_op.constant([1., 2., 3., 4., 5.])))
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = checkpoint.save(checkpoint_prefix)
+    self.evaluate(second_dense.bias.assign(
+        constant_op.constant([5., 6., 7., 8., 9.])))
+    checkpoint.restore(save_path).assert_consumed().run_restore_ops()
+    self.assertAllEqual([1., 2., 3., 4., 5.], self.evaluate(second_dense.bias))
+
+    deferred_sequential = sequential.Sequential()
+    deferred_sequential_checkpoint = trackable_utils.Checkpoint(
+        model=deferred_sequential)
+    status = deferred_sequential_checkpoint.restore(save_path)
+    deferred_sequential.add(core.Dense(4))
+    deferred_sequential(constant_op.constant([[1.]]))
+    deferred_second_dense = core.Dense(5)
+    deferred_sequential.add(deferred_second_dense)
+    deferred_sequential(constant_op.constant([[1.]]))
+    status.run_restore_ops()
+    self.assertAllEqual([1., 2., 3., 4., 5.],
+                        self.evaluate(deferred_second_dense.bias))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_initialize_if_not_restoring(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    optimizer_only_prefix = os.path.join(checkpoint_directory, "opt")
+    with test_util.device(use_gpu=True):
+      model = MyModel()
+      optimizer = adam.Adam(0.001)
+      root = trackable_utils.Checkpoint(
+          model=model)  # Do not save the optimizer with the checkpoint.
+      optimizer_checkpoint = trackable_utils.Checkpoint(
+          optimizer=optimizer)
+
+      checkpoint_path = checkpoint_management.latest_checkpoint(
+          checkpoint_directory)
+      status = root.restore(save_path=checkpoint_path)
+      input_value = constant_op.constant([[3.]])
+      def train_fn():
+        with backprop.GradientTape() as tape:
+          loss = model(input_value)
+        variables = model.trainable_variables
+        gradients = tape.gradient(loss, variables)
+        return optimizer.apply_gradients(zip(gradients, variables))
+      if not context.executing_eagerly():
+        train_fn = functools.partial(self.evaluate, train_fn())
+      status.initialize_or_restore()
+      # TODO(tanzheny): Add hyper variables to .variables(), and set them with
+      # set_weights etc.
+      variables_not_in_the_variables_property = [
+          obj for obj in optimizer._hyper.values()
+          if isinstance(obj, variables_lib.Variable)]
+      self.evaluate([v.initializer for v
+                     in optimizer.variables()
+                     + variables_not_in_the_variables_property])
+      train_fn()
+      model_save_path = root.save(file_prefix=checkpoint_prefix)
+      self.evaluate(optimizer.beta_1.assign(42.))
+      optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix)
+    del train_fn
+
+    # Restore into a graph with the optimizer
+    with test_util.device(use_gpu=True):
+      model = MyModel()
+      optimizer = adam.Adam(0.001)
+      root = trackable_utils.Checkpoint(
+          optimizer=optimizer, model=model)
+      status = root.restore(save_path=model_save_path)
+      input_value = constant_op.constant([[3.]])
+      def train_fn1():
+        with backprop.GradientTape() as tape:
+          loss = model(input_value)
+        variables = model.trainable_variables
+        gradients = tape.gradient(loss, variables)
+        return optimizer.apply_gradients(zip(gradients, variables))
+      if not context.executing_eagerly():
+        train_fn1 = functools.partial(self.evaluate, train_fn1())
+      status.initialize_or_restore()
+      train_fn1()
+      with self.assertRaises(AssertionError):
+        status.assert_existing_objects_matched()
+      with self.assertRaises(AssertionError):
+        status.assert_consumed()
+    del train_fn1
+
+    # Make sure initialization doesn't clobber later restores
+    with test_util.device(use_gpu=True):
+      model = MyModel()
+      optimizer = adam.Adam(0.001, beta_1=1.0)
+      root = trackable_utils.Checkpoint(
+          optimizer=optimizer, model=model)
+      opt_root = trackable_utils.Checkpoint(
+          optimizer=optimizer)
+      status = root.restore(save_path=model_save_path)
+      init_only_optimizer_status = opt_root.restore(save_path=None)
+      optimizer_status = opt_root.restore(save_path=optimizer_save_path)
+      input_value = constant_op.constant([[3.]])
+      def train_fn2():
+        with backprop.GradientTape() as tape:
+          loss = model(input_value)
+        variables = model.trainable_variables
+        gradients = tape.gradient(loss, variables)
+        return optimizer.apply_gradients(zip(gradients, variables))
+      if not context.executing_eagerly():
+        train_fn2 = functools.partial(self.evaluate, train_fn2())
+      optimizer_status.run_restore_ops()
+      status.initialize_or_restore()
+      init_only_optimizer_status.initialize_or_restore()
+      train_fn2()
+      self.assertEqual(42., self.evaluate(optimizer.beta_1))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_restore_after_adding_empty_trackable_data_structure(self):
+    model = NonLayerTrackable()
+    checkpoint = trackable_utils.Checkpoint(model=model)
+    checkpoint.restore(None).initialize_or_restore()
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = checkpoint.save(checkpoint_prefix)
+
+    del model, checkpoint
+
+    model = NonLayerTrackable()
+    model.dict = {"a": 1}
+    model.list = {"b": 1}
+    checkpoint = trackable_utils.Checkpoint(model=model)
+    load_status = checkpoint.restore(save_path)
+    load_status.assert_existing_objects_matched().run_restore_ops()
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_write_checkpoint_from_function(self):
+    checkpoint_prefix = os.path.join(self.get_temp_dir(), "ckpt")
+    save_checkpoint = trackable_utils.Checkpoint(
+        v=variables_lib.Variable(1.))
+
+    @def_function.function
+    def _write_checkpoint():
+      save_path = save_checkpoint.write(checkpoint_prefix)
+      return save_path
+
+    self.evaluate([save_checkpoint.v.initializer])
+    self.evaluate(_write_checkpoint())
+    load_checkpoint = trackable_utils.Checkpoint(
+        v=variables_lib.Variable(0.))
+    load_checkpoint.restore(checkpoint_prefix).run_restore_ops()
+    self.assertEqual(1., self.evaluate(load_checkpoint.v))
+    self.evaluate(save_checkpoint.v.assign(3.))
+    self.evaluate(_write_checkpoint())
+    self.evaluate(save_checkpoint.v.assign(0.))
+    load_checkpoint.restore(checkpoint_prefix).run_restore_ops()
+    self.assertEqual(3., self.evaluate(load_checkpoint.v))
+
+
+class _ManualScope(tracking.AutoTrackable):
+
+  def __call__(self):
+    with variable_scope.variable_scope("ManualScope") as vs:
+      self.variable_scope = vs
+      with trackable_utils.capture_dependencies(template=self):
+        return self._build()
+
+  def _build(self):
+    return variable_scope.get_variable(name="in_manual_scope", shape=[])
+
+
+class TemplateTests(parameterized.TestCase, test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_trackable_save_restore(self):
+
+    def _templated():
+      v = variable_scope.get_variable(
+          "v", shape=[1], initializer=init_ops.zeros_initializer(),
+          use_resource=True)
+      v2 = variable_scope.get_variable(
+          "v2", shape=[1], initializer=init_ops.zeros_initializer(),
+          use_resource=True)
+      manual = _ManualScope()
+      return v, v + 1., v2, manual, manual()
+
+    save_template = template.make_template("s1", _templated)
+    v1_save, _, v2_save, manual_scope, manual_scope_v = save_template()
+    six.assertCountEqual(
+        self,
+        [v1_save, v2_save, manual_scope, manual_scope_v, save_template],
+        trackable_utils.list_objects(save_template))
+    manual_dep, = manual_scope._checkpoint_dependencies
+    self.assertEqual("in_manual_scope", manual_dep.name)
+    self.assertIs(manual_scope_v, manual_dep.ref)
+    optimizer = adam.Adam(0.0)
+    save_root = trackable_utils.Checkpoint(
+        my_template=save_template, optimizer=optimizer)
+    optimizer.minimize(v1_save.read_value,
+                       var_list=[v1_save])
+    self.evaluate([v.initializer for v in save_template.variables])
+    self.evaluate([v.initializer for v in optimizer.variables()])
+    self.evaluate(v1_save.assign([12.]))
+    self.evaluate(v2_save.assign([14.]))
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = save_root.save(checkpoint_prefix)
+
+    load_template = template.make_template("s2", _templated)
+    load_optimizer = adam.Adam(0.0)
+    load_root = trackable_utils.Checkpoint(
+        my_template=load_template, optimizer=load_optimizer)
+    status = load_root.restore(save_path)
+    var, var_plus_one, var2, _, _ = load_template()
+    load_optimizer.minimize(var.read_value, var_list=[var])
+    self.assertLen(load_template._checkpoint_dependencies, 3)
+    self.assertEqual("v", load_template._checkpoint_dependencies[0].name)
+    self.assertEqual("v2", load_template._checkpoint_dependencies[1].name)
+    self.assertEqual("ManualScope",
+                     load_template._checkpoint_dependencies[2].name)
+    status.assert_consumed().run_restore_ops()
+    self.assertAllEqual([12.], self.evaluate(var))
+    self.assertAllEqual([13.], self.evaluate(var_plus_one))
+    self.assertAllEqual([14.], self.evaluate(var2))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_trackable_save_restore_nested(self):
+
+    def _inner_template():
+      v = variable_scope.get_variable(
+          "v", shape=[1], initializer=init_ops.zeros_initializer())
+      return v
+
+    def _outer_template():
+      first_inner = template.make_template("i1", _inner_template)
+      second_inner = template.make_template("i2", _inner_template)
+      v1 = first_inner()
+      v2 = second_inner()
+      v3 = second_inner()
+      return (first_inner, second_inner), (v1, v2, v3)
+
+    with variable_scope.variable_scope("ignored"):
+      save_template = template.make_template("s1", _outer_template)
+      save_root = trackable_utils.Checkpoint(my_template=save_template)
+      (inner_template_one, inner_template_two), _ = save_template()
+    self.evaluate(inner_template_one.variables[0].assign([20.]))
+    self.evaluate(inner_template_two.variables[0].assign([25.]))
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = save_root.save(checkpoint_prefix)
+
+    load_template = template.make_template("s2", _outer_template)
+    load_root = trackable_utils.Checkpoint(my_template=load_template)
+    status = load_root.restore(save_path)
+    (inner_template_one, inner_template_two), (v1, v2, v3) = load_template()
+    outer_template_dependencies = load_root.my_template._checkpoint_dependencies
+    self.assertLen(outer_template_dependencies, 2)
+    self.assertEqual("i1", outer_template_dependencies[0].name)
+    self.assertIs(inner_template_one, outer_template_dependencies[0].ref)
+    self.assertEqual("i2", outer_template_dependencies[1].name)
+    self.assertIs(inner_template_two, outer_template_dependencies[1].ref)
+    self.assertLen(inner_template_one._checkpoint_dependencies, 1)
+    self.assertEqual("v", inner_template_one._checkpoint_dependencies[0].name)
+    self.assertLen(inner_template_two._checkpoint_dependencies, 1)
+    self.assertEqual("v", inner_template_two._checkpoint_dependencies[0].name)
+    status.assert_consumed().run_restore_ops()
+    self.assertAllEqual([20.], self.evaluate(v1))
+    self.assertAllEqual([25.], self.evaluate(v2))
+    self.assertAllEqual([25.], self.evaluate(v3))
+
+
+class CheckpointCompatibilityTests(test.TestCase):
+
+  def _initialized_model(self):
+    input_value = constant_op.constant([[3.]])
+    model = MyModel()
+    optimizer = adam.Adam(0.001)
+    root_trackable = trackable_utils.Checkpoint(
+        optimizer=optimizer, model=model)
+    with backprop.GradientTape() as tape:
+      loss = model(input_value)
+    variables = model.trainable_variables
+    gradients = tape.gradient(loss, variables)
+    train_op = optimizer.apply_gradients(zip(gradients, variables))
+    self.evaluate(trackable_utils.gather_initializers(
+        root_trackable))
+    self.evaluate(train_op)
+    # A regular variable, a slot variable, and a non-slot Optimizer variable
+    # with known values to check when loading.
+    self.evaluate(model._named_dense.bias.assign([1.]))
+    self.evaluate(optimizer.get_slot(
+        var=model._named_dense.bias, slot_name="m").assign([2.]))
+    self.evaluate(optimizer.beta_1.assign(3.))
+    return root_trackable
+
+  def _set_sentinels(self, root_trackable):
+    self.evaluate(root_trackable.model._named_dense.bias.assign([101.]))
+    self.evaluate(
+        root_trackable.optimizer.get_slot(
+            var=root_trackable.model._named_dense.bias, slot_name="m")
+        .assign([102.]))
+    self.evaluate(root_trackable.optimizer.beta_1.assign(103.))
+
+  def _check_sentinels(self, root_trackable):
+    self.assertAllEqual(
+        [1.], self.evaluate(root_trackable.model._named_dense.bias))
+    self.assertAllEqual([2.], self.evaluate(
+        root_trackable.optimizer.get_slot(
+            var=root_trackable.model._named_dense.bias, slot_name="m")))
+    self.assertAllEqual(3.,
+                        self.evaluate(root_trackable.optimizer.beta_1))
+
+  def _write_name_based_checkpoint(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.session(
+          graph=save_graph) as session:
+        root = self._initialized_model()
+        name_saver = saver_lib.Saver()
+        return name_saver.save(
+            sess=session, save_path=checkpoint_prefix,
+            global_step=root.optimizer.iterations)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testLoadFromNameBasedSaver(self):
+    """Save a name-based checkpoint, load it using the object-based API."""
+    with test_util.device(use_gpu=True):
+      save_path = self._write_name_based_checkpoint()
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      with self.assertRaises(AssertionError):
+        self._check_sentinels(root)
+      object_saver = trackable_utils.TrackableSaver(
+          graph_view.ObjectGraphView(root))
+      self._set_sentinels(root)
+      status = object_saver.restore(save_path)
+      if context.executing_eagerly():
+        self._check_sentinels(root)
+      if context.executing_eagerly():
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_consumed()
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_existing_objects_matched()
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_nontrivial_match()
+      else:
+        # When graph building, we haven't read any keys, so we don't know
+        # whether the restore will be complete.
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_consumed()
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_existing_objects_matched()
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_nontrivial_match()
+      status.run_restore_ops()
+      self._check_sentinels(root)
+      self._set_sentinels(root)
+      status = object_saver.restore(save_path)
+      status.initialize_or_restore()
+      self._check_sentinels(root)
+      # Check that there is no error when keys are missing from the name-based
+      # checkpoint.
+      root.not_in_name_checkpoint = resource_variable_ops.ResourceVariable([1.])
+      status = object_saver.restore(save_path)
+      with self.assertRaises(AssertionError):
+        status.assert_existing_objects_matched()
+
+  def testSaveGraphLoadEager(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.session(
+          graph=save_graph) as session:
+        root = self._initialized_model()
+        save_path = root.save(session=session, file_prefix=checkpoint_prefix)
+    with context.eager_mode():
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      root.restore(save_path).assert_consumed()
+      self._check_sentinels(root)
+
+  def testSaveEagerLoadGraph(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.eager_mode():
+      root = self._initialized_model()
+      save_path = root.save(file_prefix=checkpoint_prefix)
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.session(
+          graph=save_graph):
+        root = self._initialized_model()
+        self._set_sentinels(root)
+        root.restore(save_path).assert_consumed().run_restore_ops()
+        self._check_sentinels(root)
+
+
+class PythonMetadataTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def testSaveLoad(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    dense = core.Dense(1)
+    checkpoint = trackable_utils.Checkpoint(dense=dense)
+    dense(constant_op.constant([[1.]]))
+    checkpoint.restore(None).initialize_or_restore()
+    save_path = checkpoint.save(checkpoint_prefix)
+
+    def _get_dense_node_from_object_graph(object_graph_proto):
+      root_node = object_graph_proto.nodes[0]
+      for child in root_node.children:
+        if child.local_name == "dense":
+          break
+      else:
+        raise AssertionError(
+            "Expected a 'dense' dependency of root, didn't find one.")
+      dense_node = object_graph_proto.nodes[child.node_id]  # pylint: disable=undefined-loop-variable
+      self.assertEqual(1, len(dense_node.attributes))
+      reader = pywrap_tensorflow.NewCheckpointReader(save_path)
+      layer_json = reader.get_tensor(dense_node.attributes[0].checkpoint_key)
+      return json.loads(layer_json.decode("utf-8"))
+
+    layer_data = _get_dense_node_from_object_graph(
+        trackable_utils.object_metadata(save_path))
+    self.assertEqual("Dense", layer_data["class_name"])
+    self.assertEqual(1, layer_data["config"]["units"])
+
+    # Check that no new ops are added to the graph the second time we save.
+    ops.get_default_graph().finalize()
+
+    dense.units = 42
+    save_path = checkpoint.save(checkpoint_prefix)
+    layer_data = _get_dense_node_from_object_graph(
+        trackable_utils.object_metadata(save_path))
+    self.assertEqual("Dense", layer_data["class_name"])
+    self.assertEqual(42, layer_data["config"]["units"])
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/training/tracking/util_with_v1_optimizers_test.py b/tensorflow/python/training/tracking/util_with_v1_optimizers_test.py
new file mode 100644
index 0000000..7939293
--- /dev/null
+++ b/tensorflow/python/training/tracking/util_with_v1_optimizers_test.py

@@ -0,0 +1,933 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object-based saving which use tf.train.* optimizers."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import os
+
+import six
+
+from tensorflow.python.client import session as session_lib
+from tensorflow.python.distribute import mirrored_strategy
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.layers import core
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import template
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import adam
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.training import training_util
+from tensorflow.python.training.tracking import graph_view
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util as trackable_utils
+
+
+class NonLayerTrackable(tracking.AutoTrackable):
+
+  def __init__(self):
+    super(NonLayerTrackable, self).__init__()
+    self.a_variable = trackable_utils.add_variable(
+        self, name="a_variable", shape=[])
+
+
+# pylint: disable=not-callable
+class MyModel(training.Model):
+  """A concrete Model for testing."""
+
+  def __init__(self):
+    super(MyModel, self).__init__()
+    self._named_dense = core.Dense(1, use_bias=True)
+    self._second = core.Dense(1, use_bias=False)
+    # We can still track Trackables which aren't Layers.
+    self._non_layer = NonLayerTrackable()
+
+  def call(self, values):
+    ret = self._second(self._named_dense(values))
+    return ret
+
+
+class CheckpointingTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testNamingWithOptimizer(self):
+    input_value = constant_op.constant([[3.]])
+    model = MyModel()
+    # A nuisance Model using the same optimizer. Its slot variables should not
+    # go in the checkpoint, since it is never depended on.
+    other_model = MyModel()
+    optimizer = adam.AdamOptimizer(0.001)
+    optimizer_step = training_util.get_or_create_global_step()
+    root_trackable = trackable_utils.Checkpoint(
+        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
+    if context.executing_eagerly():
+      optimizer.minimize(
+          lambda: model(input_value),
+          global_step=optimizer_step)
+      optimizer.minimize(
+          lambda: other_model(input_value),
+          global_step=optimizer_step)
+    else:
+      train_op = optimizer.minimize(
+          model(input_value), global_step=optimizer_step)
+      optimizer.minimize(
+          other_model(input_value),
+          global_step=optimizer_step)
+      self.evaluate(trackable_utils.gather_initializers(
+          root_trackable))
+      self.evaluate(train_op)
+    named_variables, serialized_graph, _ = graph_view.ObjectGraphView(
+        root_trackable).serialize_object_graph()
+    expected_checkpoint_names = (
+        # Created in the root node, so no prefix.
+        "optimizer_step",
+        "model/_second/kernel",
+        "model/_named_dense/kernel",
+        "model/_named_dense/bias",
+        # non-Layer dependency of the model
+        "model/_non_layer/a_variable",
+        # The optimizer creates two non-slot variables
+        "optimizer/beta1_power",
+        "optimizer/beta2_power",
+        # Slot variables
+        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
+        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
+    )
+    suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
+    expected_checkpoint_names = [
+        name + suffix for name in expected_checkpoint_names]
+    # The Dense layers also save get_config() JSON
+    expected_checkpoint_names.extend(
+        ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
+         "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"])
+    named_variables = {v.name: v for v in named_variables}
+    six.assertCountEqual(self, expected_checkpoint_names,
+                         named_variables.keys())
+    # Check that we've mapped to the right variable objects (not exhaustive)
+    self.assertEqual(
+        "global_step",
+        named_variables["optimizer_step" + suffix].full_name)
+    self.assertEqual(
+        "my_model/dense_1/kernel",
+        named_variables["model/_second/kernel" + suffix].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel",
+        named_variables["model/_named_dense/kernel" + suffix].full_name)
+    self.assertEqual(
+        "beta1_power",
+        named_variables["optimizer/beta1_power" + suffix].full_name)
+    self.assertEqual(
+        "beta2_power",
+        named_variables["optimizer/beta2_power" + suffix].full_name)
+    # Spot check the generated protocol buffers.
+    self.assertEqual("optimizer",
+                     serialized_graph.nodes[0].children[1].local_name)
+    optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
+        1].node_id]
+    self.assertEqual("beta1_power",
+                     optimizer_node.children[0].local_name)
+    self.assertEqual("beta1_power",
+                     serialized_graph.nodes[optimizer_node.children[0].node_id]
+                     .attributes[0].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel",
+        serialized_graph.nodes[optimizer_node.slot_variables[0]
+                               .original_variable_node_id]
+        .attributes[0].full_name)
+    # We strip off the :0 suffix, as variable.name-based saving does.
+    self.assertEqual(
+        "my_model/dense/kernel/Adam",
+        serialized_graph.nodes[optimizer_node.slot_variables[0]
+                               .slot_variable_node_id]
+        .attributes[0].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel/Adam:0",
+        optimizer.get_slot(
+            var=model._named_dense.kernel,
+            name="m").name)
+    self.assertEqual(
+        "model/_named_dense/kernel" + suffix,
+        serialized_graph.nodes[
+            optimizer_node.slot_variables[0]
+            .original_variable_node_id].attributes[0].checkpoint_key)
+    self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
+    self.assertEqual(
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix,
+        serialized_graph.nodes[
+            optimizer_node.slot_variables[0]
+            .slot_variable_node_id].attributes[0].checkpoint_key)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testSaveRestore(self):
+    model = MyModel()
+    optimizer = adam.AdamOptimizer(0.001)
+    root_trackable = trackable_utils.Checkpoint(
+        optimizer=optimizer, model=model)
+    input_value = constant_op.constant([[3.]])
+    if context.executing_eagerly():
+      optimizer.minimize(
+          lambda: model(input_value))
+    else:
+      train_op = optimizer.minimize(model(input_value))
+      # TODO(allenl): Make initialization more pleasant when graph building.
+      root_trackable.save_counter  # pylint: disable=pointless-statement
+      self.evaluate(trackable_utils.gather_initializers(
+          root_trackable))
+      self.evaluate(train_op)
+    prefix = os.path.join(self.get_temp_dir(), "ckpt")
+    self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
+    m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
+    self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
+    save_path = root_trackable.save(file_prefix=prefix)
+    self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
+    self.evaluate(state_ops.assign(root_trackable.save_counter, 3))
+    optimizer_variables = self.evaluate(optimizer.variables())
+    self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
+    # Immediate restoration
+    status = root_trackable.restore(save_path=save_path).assert_consumed()
+    status.run_restore_ops()
+    self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1]))
+    self.assertAllEqual(1, self.evaluate(root_trackable.save_counter))
+    self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
+    if not context.executing_eagerly():
+      return  # Restore-on-create is only supported when executing eagerly
+    on_create_model = MyModel()
+    on_create_optimizer = adam.AdamOptimizer(
+        0.001,
+        # Preserve beta1_power and beta2_power when appying gradients so we can
+        # test that they've been restored correctly.
+        beta1=1.0, beta2=1.0)
+    on_create_root = trackable_utils.Checkpoint(
+        optimizer=on_create_optimizer, model=on_create_model)
+    # Deferred restoration
+    status = on_create_root.restore(save_path=save_path)
+    status.assert_nontrivial_match()
+    status.assert_existing_objects_matched()
+    with self.assertRaises(AssertionError):
+      status.assert_consumed()
+    on_create_model(constant_op.constant([[3.]]))  # create variables
+    self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
+    self.assertAllEqual([42.],
+                        self.evaluate(
+                            on_create_model._named_dense.variables[1]))
+    on_create_m_bias_slot = on_create_optimizer.get_slot(
+        on_create_model._named_dense.variables[1], "m")
+    status.assert_existing_objects_matched()
+    with self.assertRaises(AssertionError):
+      status.assert_consumed()
+    # Optimizer slot variables are created when the original variable is
+    # restored.
+    self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
+    self.assertAllEqual(optimizer_variables[2:],
+                        self.evaluate(on_create_optimizer.variables()))
+    dummy_var = resource_variable_ops.ResourceVariable([1.])
+    on_create_optimizer.minimize(loss=dummy_var.read_value)
+    status.assert_existing_objects_matched()
+    status.assert_consumed()
+    beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators()
+    self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power))
+    self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power))
+
+  # TODO(allenl): Debug garbage created by this test in python3.
+  def testDeferredRestorationUsageEager(self):
+    """An idiomatic eager execution example."""
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      model = MyModel()
+      optimizer = adam.AdamOptimizer(0.001)
+      root = trackable_utils.Checkpoint(
+          optimizer=optimizer, model=model,
+          optimizer_step=training_util.get_or_create_global_step())
+      root.restore(checkpoint_management.latest_checkpoint(
+          checkpoint_directory))
+      for _ in range(num_training_steps):
+        # TODO(allenl): Use a Dataset and serialize/checkpoint it.
+        input_value = constant_op.constant([[3.]])
+        optimizer.minimize(
+            lambda: model(input_value),  # pylint: disable=cell-var-from-loop
+            global_step=root.optimizer_step)
+      root.save(file_prefix=checkpoint_prefix)
+      self.assertEqual((training_continuation + 1) * num_training_steps,
+                       root.optimizer_step.numpy())
+
+  def testEagerDistributionStrategy(self):
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+
+    def _train_fn(optimizer, model):
+      input_value = constant_op.constant([[3.]])
+      optimizer.minimize(
+          functools.partial(model, input_value),
+          global_step=root.optimizer_step)
+
+    for training_continuation in range(3):
+      strategy = mirrored_strategy.MirroredStrategy()
+      with strategy.scope():
+        model = MyModel()
+        optimizer = adam.AdamOptimizer(0.001)
+        root = trackable_utils.Checkpoint(
+            optimizer=optimizer, model=model,
+            optimizer_step=training_util.get_or_create_global_step())
+        root.restore(checkpoint_management.latest_checkpoint(
+            checkpoint_directory))
+
+        for _ in range(num_training_steps):
+          strategy.extended.call_for_each_replica(
+              functools.partial(_train_fn, optimizer, model))
+        root.save(file_prefix=checkpoint_prefix)
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         root.optimizer_step.numpy())
+
+  def testGraphDistributionStrategy(self):
+    self.skipTest("b/121381184")
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+
+    def _train_fn(optimizer, model):
+      input_value = constant_op.constant([[3.]])
+      return optimizer.minimize(
+          functools.partial(model, input_value),
+          global_step=root.optimizer_step)
+
+    for training_continuation in range(3):
+      with ops.Graph().as_default():
+        strategy = mirrored_strategy.MirroredStrategy()
+        with strategy.scope():
+          model = MyModel()
+          optimizer = adam.AdamOptimizer(0.001)
+          root = trackable_utils.Checkpoint(
+              optimizer=optimizer, model=model,
+              optimizer_step=training_util.get_or_create_global_step())
+          status = root.restore(checkpoint_management.latest_checkpoint(
+              checkpoint_directory))
+          train_op = strategy.extended.call_for_each_replica(
+              functools.partial(_train_fn, optimizer, model))
+          with self.session() as session:
+            if training_continuation > 0:
+              status.assert_consumed()
+            status.initialize_or_restore()
+            for _ in range(num_training_steps):
+              session.run(train_op)
+            root.save(file_prefix=checkpoint_prefix)
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         root.optimizer_step.numpy())
+
+  def testUsageGraph(self):
+    """Expected usage when graph building."""
+    with context.graph_mode():
+      num_training_steps = 10
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      for training_continuation in range(3):
+        with ops.Graph().as_default():
+          model = MyModel()
+          optimizer = adam.AdamOptimizer(0.001)
+          root = trackable_utils.Checkpoint(
+              optimizer=optimizer, model=model,
+              global_step=training_util.get_or_create_global_step())
+          input_value = constant_op.constant([[3.]])
+          train_op = optimizer.minimize(
+              model(input_value),
+              global_step=root.global_step)
+          checkpoint_path = checkpoint_management.latest_checkpoint(
+              checkpoint_directory)
+          with self.session(graph=ops.get_default_graph()) as session:
+            status = root.restore(save_path=checkpoint_path)
+            status.initialize_or_restore(session=session)
+            if checkpoint_path is None:
+              self.assertEqual(0, training_continuation)
+              with self.assertRaises(AssertionError):
+                status.assert_consumed()
+              with self.assertRaises(AssertionError):
+                status.assert_existing_objects_matched()
+            else:
+              status.assert_consumed()
+              status.assert_existing_objects_matched()
+            for _ in range(num_training_steps):
+              session.run(train_op)
+            root.save(file_prefix=checkpoint_prefix, session=session)
+            self.assertEqual((training_continuation + 1) * num_training_steps,
+                             session.run(root.global_step))
+            self.assertEqual(training_continuation + 1,
+                             session.run(root.save_counter))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testAgnosticUsage(self):
+    """Graph/eager agnostic usage."""
+    # Does create garbage when executing eagerly due to ops.Graph() creation.
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    for training_continuation in range(3):
+      with test_util.device(use_gpu=True):
+        model = MyModel()
+        optimizer = adam.AdamOptimizer(0.001)
+        root = trackable_utils.Checkpoint(
+            optimizer=optimizer, model=model,
+            global_step=training_util.get_or_create_global_step())
+        manager = checkpoint_management.CheckpointManager(
+            root, checkpoint_directory, max_to_keep=1)
+        status = root.restore(save_path=manager.latest_checkpoint)
+        input_value = constant_op.constant([[3.]])
+        train_fn = functools.partial(
+            optimizer.minimize,
+            functools.partial(model, input_value),
+            global_step=root.global_step)
+        if not context.executing_eagerly():
+          train_fn = functools.partial(self.evaluate, train_fn())
+        status.initialize_or_restore()
+        for _ in range(num_training_steps):
+          train_fn()
+        manager.save()
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         self.evaluate(root.global_step))
+        self.assertEqual(training_continuation + 1,
+                         self.evaluate(root.save_counter))
+
+  # pylint: disable=cell-var-from-loop
+  @test_util.run_in_graph_and_eager_modes
+  def testWithDefun(self):
+    num_training_steps = 2
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      with test_util.device(use_gpu=True):
+        model = MyModel()
+        # Don't actually train so we can test variable values
+        optimizer = adam.AdamOptimizer(0.)
+        root = trackable_utils.Checkpoint(
+            optimizer=optimizer, model=model,
+            global_step=training_util.get_or_create_global_step())
+        checkpoint_path = checkpoint_management.latest_checkpoint(
+            checkpoint_directory)
+        status = root.restore(save_path=checkpoint_path)
+        def train_fn():
+          @def_function.function
+          def _call_model(x):
+            return model(x)
+          with backprop.GradientTape() as tape:
+            loss = _call_model(constant_op.constant([[3.]]))
+          gradients = tape.gradient(loss, model.variables)
+          return optimizer.apply_gradients(zip(gradients, model.variables),
+                                           global_step=root.global_step)
+        if not context.executing_eagerly():
+          train_fn = functools.partial(
+              self.evaluate, train_fn())
+        status.initialize_or_restore()
+        for _ in range(num_training_steps):
+          train_fn()
+        if training_continuation > 0:
+          status.assert_consumed()
+          self.assertAllClose([[42.]], self.evaluate(model.variables[0]))
+        else:
+          self.evaluate(model.variables[0].assign([[42.]]))
+        root.save(file_prefix=checkpoint_prefix)
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         self.evaluate(root.global_step))
+        self.assertEqual(training_continuation + 1,
+                         self.evaluate(root.save_counter))
+  # pylint: enable=cell-var-from-loop
+
+  def _get_checkpoint_name(self, name):
+    root = tracking.AutoTrackable()
+    trackable_utils.add_variable(
+        root, name=name, shape=[1, 2], dtype=dtypes.float64)
+    (named_variable,), _, _ = trackable_utils._serialize_object_graph(
+        root, saveables_cache=None)
+    with ops.name_scope("root/" + named_variable.name):
+      pass  # Make sure we can use this as an op name if we prefix it.
+    return named_variable.name
+
+  def testAnonymousVarsInInit(self):
+
+    class Model(training.Model):
+
+      def __init__(self):
+        super(Model, self).__init__()
+        self.w = resource_variable_ops.ResourceVariable(0.0)
+        self.b = resource_variable_ops.ResourceVariable(0.0)
+        self.vars = [self.w, self.b]
+
+      def call(self, x):
+        return x * self.w + self.b
+
+    with context.eager_mode():
+      model = Model()
+      optimizer = adam.AdamOptimizer(learning_rate=0.05)
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      checkpoint = trackable_utils.Checkpoint(
+          model=model, optimizer=optimizer)
+      for _ in range(2):
+        checkpoint.save(checkpoint_prefix)
+        with backprop.GradientTape() as tape:
+          loss = (constant_op.constant(1.)
+                  - model(constant_op.constant(1.))) ** 2
+        grad = tape.gradient(loss, model.vars)
+        optimizer.apply_gradients(
+            [(g, v) for g, v in zip(grad, model.vars)])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeferredSlotRestoration(self):
+    checkpoint_directory = self.get_temp_dir()
+
+    root = trackable_utils.Checkpoint()
+    root.var = trackable_utils.add_variable(
+        root, name="var", initializer=0.)
+    optimizer = adam.AdamOptimizer(0.1)
+    if context.executing_eagerly():
+      optimizer.minimize(root.var.read_value)
+    else:
+      train_op = optimizer.minimize(root.var)
+      # Note that `optimizer` has not been added as a dependency of
+      # `root`. Create a one-off grouping so that slot variables for `root.var`
+      # get initialized too.
+      self.evaluate(trackable_utils.gather_initializers(
+          trackable_utils.Checkpoint(root=root, optimizer=optimizer)))
+      self.evaluate(train_op)
+    self.evaluate(state_ops.assign(root.var, 12.))
+    no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots"))
+    root.optimizer = optimizer
+    self.evaluate(state_ops.assign(root.var, 13.))
+    self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var),
+                                   14.))
+    slots_path = root.save(os.path.join(checkpoint_directory, "with_slots"))
+    new_root = trackable_utils.Checkpoint()
+    # Load the slot-containing checkpoint (deferred), then immediately overwrite
+    # the non-slot variable (also deferred).
+    slot_status = new_root.restore(slots_path)
+    no_slot_status = new_root.restore(no_slots_path)
+    with self.assertRaises(AssertionError):
+      no_slot_status.assert_consumed()
+    new_root.var = trackable_utils.add_variable(
+        new_root, name="var", shape=[])
+    no_slot_status.assert_consumed()
+    no_slot_status.run_restore_ops()
+    self.assertEqual(12., self.evaluate(new_root.var))
+    new_root.optimizer = adam.AdamOptimizer(0.1)
+    slot_status.assert_existing_objects_matched()
+    with self.assertRaisesRegexp(AssertionError, "beta1_power"):
+      slot_status.assert_consumed()
+    self.assertEqual(12., self.evaluate(new_root.var))
+    if context.executing_eagerly():
+      # Slot variables are only created with restoring initializers when
+      # executing eagerly.
+      self.assertEqual(14., self.evaluate(
+          new_root.optimizer.get_slot(name="m", var=new_root.var)))
+    else:
+      self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var),
+                    None)
+    if context.executing_eagerly():
+      new_root.optimizer.minimize(new_root.var.read_value)
+    else:
+      train_op = new_root.optimizer.minimize(new_root.var)
+      # The slot variable now exists; restore() didn't create it, but we should
+      # now have a restore op for it.
+      slot_status.run_restore_ops()
+      self.assertEqual(14., self.evaluate(
+          new_root.optimizer.get_slot(name="m", var=new_root.var)))
+      self.evaluate(train_op)
+    slot_status.assert_consumed()
+
+  def testManySavesGraph(self):
+    """Saves after the first should not modify the graph."""
+    with context.graph_mode():
+      graph = ops.Graph()
+      with graph.as_default(), self.session(graph):
+        checkpoint_directory = self.get_temp_dir()
+        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+        obj = trackable_utils.Checkpoint()
+        obj.var = variable_scope.get_variable(name="v", initializer=0.)
+        obj.opt = adam.AdamOptimizer(0.1)
+        obj.opt.minimize(obj.var.read_value())
+        self.evaluate(trackable_utils.gather_initializers(obj))
+        obj.save(checkpoint_prefix)
+        before_ops = graph.get_operations()
+        obj.save(checkpoint_prefix)
+        self.assertEqual(before_ops, graph.get_operations())
+
+  def testManyRestoresGraph(self):
+    """Restores after the first should not modify the graph."""
+    with context.graph_mode():
+      graph = ops.Graph()
+      with graph.as_default(), self.session(graph):
+        checkpoint_directory = self.get_temp_dir()
+        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+        obj = trackable_utils.Checkpoint()
+        obj.var = variable_scope.get_variable(name="v", initializer=0.)
+        obj.opt = adam.AdamOptimizer(0.1)
+        obj.opt.minimize(obj.var.read_value())
+        self.evaluate(trackable_utils.gather_initializers(obj))
+        save_path = obj.save(checkpoint_prefix)
+        obj.restore(save_path)
+        before_ops = graph.get_operations()
+        obj.restore(save_path)
+        self.assertEqual(before_ops, graph.get_operations())
+
+  def testMultipleGraphsNonSlotVariables(self):
+    with context.graph_mode():
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      optimizer = adam.AdamOptimizer(0.001)
+      # Construct a model in one graph
+      first_graph = ops.Graph()
+      first_session = session_lib.Session(graph=first_graph)
+      with first_graph.as_default(), first_session.as_default():
+        first_variable = resource_variable_ops.ResourceVariable([1.])
+        first_root_trackable = trackable_utils.Checkpoint(
+            optimizer=optimizer, variable=first_variable)
+        train_op = optimizer.minimize(first_variable.read_value)
+        self.evaluate(trackable_utils.gather_initializers(
+            first_root_trackable))
+        self.evaluate(train_op)
+        self.evaluate(first_variable.assign([1.]))
+        self.evaluate(optimizer.get_slot(
+            var=first_variable, name="m").assign([2.]))
+        beta1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta1_power.assign(3.))
+
+      # Save and load in a second graph
+      second_graph = ops.Graph()
+      with second_graph.as_default(), session_lib.Session(graph=second_graph):
+        second_variable = resource_variable_ops.ResourceVariable([1.])
+        second_root_trackable = trackable_utils.Checkpoint(
+            optimizer=optimizer, variable=second_variable)
+        train_op = optimizer.minimize(second_variable.read_value)
+        second_root_trackable.restore(None).initialize_or_restore()
+        self.evaluate(train_op)
+        self.evaluate(second_variable.assign([4.]))
+        self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m").assign([5.]))
+        beta1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta1_power.assign(6.))
+        save_path = second_root_trackable.save(checkpoint_prefix)
+        self.evaluate(second_variable.assign([7.]))
+        self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m").assign([8.]))
+        beta1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta1_power))
+        status = second_root_trackable.restore(save_path)
+        status.assert_consumed().run_restore_ops()
+        self.assertAllEqual([4.], self.evaluate(second_variable))
+        self.assertAllEqual([5.], self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m")))
+        beta1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta1_power))
+
+      # Check that the first graph is unmolested
+      with first_graph.as_default(), first_session.as_default():
+        self.assertAllEqual([1.], self.evaluate(first_variable))
+        self.assertAllEqual([2.], self.evaluate(optimizer.get_slot(
+            var=first_variable, name="m")))
+        beta1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(3., self.evaluate(beta1_power))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_initialize_if_not_restoring(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    optimizer_only_prefix = os.path.join(checkpoint_directory, "opt")
+    with test_util.device(use_gpu=True):
+      model = MyModel()
+      optimizer = adam.AdamOptimizer(0.001)
+      root = trackable_utils.Checkpoint(
+          model=model,  # Do not save the optimizer with the checkpoint.
+          global_step=training_util.get_or_create_global_step())
+      optimizer_checkpoint = trackable_utils.Checkpoint(
+          optimizer=optimizer)
+
+      checkpoint_path = checkpoint_management.latest_checkpoint(
+          checkpoint_directory)
+      status = root.restore(save_path=checkpoint_path)
+      input_value = constant_op.constant([[3.]])
+      train_fn = functools.partial(
+          optimizer.minimize,
+          functools.partial(model, input_value),
+          global_step=root.global_step)
+      if not context.executing_eagerly():
+        train_fn = functools.partial(self.evaluate, train_fn())
+      status.initialize_or_restore()
+      self.evaluate([v.initializer for v in optimizer.variables()])
+      train_fn()
+      model_save_path = root.save(file_prefix=checkpoint_prefix)
+      self.evaluate(optimizer.variables()[0].assign(42.))
+      optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix)
+
+    # Restore into a graph with the optimizer
+    with test_util.device(use_gpu=True):
+      model = MyModel()
+      optimizer = adam.AdamOptimizer(0.001)
+      root = trackable_utils.Checkpoint(
+          optimizer=optimizer, model=model,
+          global_step=training_util.get_or_create_global_step())
+      status = root.restore(save_path=model_save_path)
+      input_value = constant_op.constant([[3.]])
+      train_fn = functools.partial(
+          optimizer.minimize,
+          functools.partial(model, input_value),
+          global_step=root.global_step)
+      if not context.executing_eagerly():
+        train_fn = functools.partial(self.evaluate, train_fn())
+      status.initialize_or_restore()
+      train_fn()
+      with self.assertRaises(AssertionError):
+        status.assert_existing_objects_matched()
+      with self.assertRaises(AssertionError):
+        status.assert_consumed()
+
+    # Make sure initialization doesn't clobber later restores
+    with test_util.device(use_gpu=True):
+      model = MyModel()
+      optimizer = adam.AdamOptimizer(0.001, beta1=1.0)
+      root = trackable_utils.Checkpoint(
+          optimizer=optimizer, model=model,
+          global_step=training_util.get_or_create_global_step())
+      opt_root = trackable_utils.Checkpoint(
+          optimizer=optimizer)
+      status = root.restore(save_path=model_save_path)
+      init_only_optimizer_status = opt_root.restore(save_path=None)
+      optimizer_status = opt_root.restore(save_path=optimizer_save_path)
+      input_value = constant_op.constant([[3.]])
+      train_fn = functools.partial(
+          optimizer.minimize,
+          functools.partial(model, input_value),
+          global_step=root.global_step)
+      if not context.executing_eagerly():
+        train_fn = functools.partial(self.evaluate, train_fn())
+      optimizer_status.run_restore_ops()
+      status.initialize_or_restore()
+      init_only_optimizer_status.initialize_or_restore()
+      train_fn()
+      self.assertEqual(42., self.evaluate(optimizer.variables()[0]))
+
+
+class _ManualScope(tracking.AutoTrackable):
+
+  def __call__(self):
+    with variable_scope.variable_scope("ManualScope") as vs:
+      self.variable_scope = vs
+      with trackable_utils.capture_dependencies(template=self):
+        return self._build()
+
+  def _build(self):
+    return variable_scope.get_variable(name="in_manual_scope", shape=[])
+
+
+class TemplateTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_trackable_save_restore(self):
+
+    def _templated():
+      v = variable_scope.get_variable(
+          "v", shape=[1], initializer=init_ops.zeros_initializer(),
+          use_resource=True)
+      v2 = variable_scope.get_variable(
+          "v2", shape=[1], initializer=init_ops.zeros_initializer(),
+          use_resource=True)
+      manual = _ManualScope()
+      return v, v + 1., v2, manual, manual()
+
+    save_template = template.make_template("s1", _templated)
+    v1_save, _, v2_save, manual_scope, manual_scope_v = save_template()
+    six.assertCountEqual(
+        self,
+        [v1_save, v2_save, manual_scope, manual_scope_v, save_template],
+        trackable_utils.list_objects(save_template))
+    manual_dep, = manual_scope._checkpoint_dependencies
+    self.assertEqual("in_manual_scope", manual_dep.name)
+    self.assertIs(manual_scope_v, manual_dep.ref)
+    optimizer = adam.AdamOptimizer(0.0)
+    save_root = trackable_utils.Checkpoint(
+        my_template=save_template, optimizer=optimizer)
+    optimizer.minimize(v1_save.read_value)
+    self.evaluate([v.initializer for v in save_template.variables])
+    self.evaluate([v.initializer for v in optimizer.variables()])
+    self.evaluate(v1_save.assign([12.]))
+    self.evaluate(v2_save.assign([14.]))
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = save_root.save(checkpoint_prefix)
+
+    load_template = template.make_template("s2", _templated)
+    load_optimizer = adam.AdamOptimizer(0.0)
+    load_root = trackable_utils.Checkpoint(
+        my_template=load_template, optimizer=load_optimizer)
+    status = load_root.restore(save_path)
+    var, var_plus_one, var2, _, _ = load_template()
+    load_optimizer.minimize(var.read_value)
+    self.assertEqual(3, len(load_template._checkpoint_dependencies))
+    self.assertEqual("v", load_template._checkpoint_dependencies[0].name)
+    self.assertEqual("v2", load_template._checkpoint_dependencies[1].name)
+    self.assertEqual("ManualScope",
+                     load_template._checkpoint_dependencies[2].name)
+    status.assert_consumed().run_restore_ops()
+    self.assertAllEqual([12.], self.evaluate(var))
+    self.assertAllEqual([13.], self.evaluate(var_plus_one))
+    self.assertAllEqual([14.], self.evaluate(var2))
+
+
+class CheckpointCompatibilityTests(test.TestCase):
+
+  def _initialized_model(self):
+    input_value = constant_op.constant([[3.]])
+    model = MyModel()
+    optimizer = adam.AdamOptimizer(0.001)
+    optimizer_step = training_util.get_or_create_global_step()
+    root_trackable = trackable_utils.Checkpoint(
+        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
+    train_op = optimizer.minimize(
+        functools.partial(model, input_value),
+        global_step=optimizer_step)
+    self.evaluate(trackable_utils.gather_initializers(
+        root_trackable))
+    self.evaluate(train_op)
+    # A regular variable, a slot variable, and a non-slot Optimizer variable
+    # with known values to check when loading.
+    self.evaluate(model._named_dense.bias.assign([1.]))
+    self.evaluate(optimizer.get_slot(
+        var=model._named_dense.bias, name="m").assign([2.]))
+    beta1_power, _ = optimizer._get_beta_accumulators()
+    self.evaluate(beta1_power.assign(3.))
+    return root_trackable
+
+  def _set_sentinels(self, root_trackable):
+    self.evaluate(root_trackable.model._named_dense.bias.assign([101.]))
+    self.evaluate(
+        root_trackable.optimizer.get_slot(
+            var=root_trackable.model._named_dense.bias, name="m")
+        .assign([102.]))
+    beta1_power, _ = root_trackable.optimizer._get_beta_accumulators()
+    self.evaluate(beta1_power.assign(103.))
+
+  def _check_sentinels(self, root_trackable):
+    self.assertAllEqual(
+        [1.], self.evaluate(root_trackable.model._named_dense.bias))
+    self.assertAllEqual([2.], self.evaluate(
+        root_trackable.optimizer.get_slot(
+            var=root_trackable.model._named_dense.bias, name="m")))
+    beta1_power, _ = root_trackable.optimizer._get_beta_accumulators()
+    self.assertAllEqual(3., self.evaluate(beta1_power))
+
+  def _write_name_based_checkpoint(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.session(
+          graph=save_graph) as session:
+        root = self._initialized_model()
+        name_saver = saver_lib.Saver()
+        return name_saver.save(
+            sess=session, save_path=checkpoint_prefix,
+            global_step=root.optimizer_step)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testLoadFromNameBasedSaver(self):
+    """Save a name-based checkpoint, load it using the object-based API."""
+    with test_util.device(use_gpu=True):
+      save_path = self._write_name_based_checkpoint()
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      with self.assertRaises(AssertionError):
+        self._check_sentinels(root)
+      object_saver = trackable_utils.TrackableSaver(
+          graph_view.ObjectGraphView(root))
+      self._set_sentinels(root)
+      status = object_saver.restore(save_path)
+      if context.executing_eagerly():
+        self._check_sentinels(root)
+      if context.executing_eagerly():
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_consumed()
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_existing_objects_matched()
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_nontrivial_match()
+      else:
+        # When graph building, we haven't read any keys, so we don't know
+        # whether the restore will be complete.
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_consumed()
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_existing_objects_matched()
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_nontrivial_match()
+      status.run_restore_ops()
+      self._check_sentinels(root)
+      self._set_sentinels(root)
+      status = object_saver.restore(save_path)
+      status.initialize_or_restore()
+      self._check_sentinels(root)
+      # Check that there is no error when keys are missing from the name-based
+      # checkpoint.
+      root.not_in_name_checkpoint = resource_variable_ops.ResourceVariable([1.])
+      status = object_saver.restore(save_path)
+      with self.assertRaises(AssertionError):
+        status.assert_existing_objects_matched()
+
+  def testSaveGraphLoadEager(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.session(
+          graph=save_graph) as session:
+        root = self._initialized_model()
+        save_path = root.save(session=session, file_prefix=checkpoint_prefix)
+    with context.eager_mode():
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      root.restore(save_path).assert_consumed()
+      self._check_sentinels(root)
+
+  def testSaveEagerLoadGraph(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.eager_mode():
+      root = self._initialized_model()
+      save_path = root.save(file_prefix=checkpoint_prefix)
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.session(
+          graph=save_graph):
+        root = self._initialized_model()
+        self._set_sentinels(root)
+        root.restore(save_path).assert_consumed().run_restore_ops()
+        self._check_sentinels(root)
+
+
+if __name__ == "__main__":
+  test.main()

diff --git a/tensorflow/python/training/tracking/util_xla_test.py b/tensorflow/python/training/tracking/util_xla_test.py
new file mode 100644
index 0000000..4e8dd0a
--- /dev/null
+++ b/tensorflow/python/training/tracking/util_xla_test.py

@@ -0,0 +1,84 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.compiler.tests import xla_test
+from tensorflow.python.eager import backprop
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.layers import core
+from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.platform import test
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training.tracking import tracking
+from tensorflow.python.training.tracking import util as trackable_utils
+
+
+class NonLayerTrackable(tracking.AutoTrackable):
+
+  def __init__(self):
+    super(NonLayerTrackable, self).__init__()
+    self.a_variable = trackable_utils.add_variable(
+        self, name="a_variable", shape=[])
+
+
+class Subclassed(training.Model):
+  """A concrete Model for testing."""
+
+  def __init__(self):
+    super(Subclassed, self).__init__()
+    self._named_dense = core.Dense(1, use_bias=True)
+    self._second = core.Dense(1, use_bias=False)
+    # We can still track Trackables which aren't Layers.
+    self._non_layer = NonLayerTrackable()
+
+  def call(self, values):
+    ret = self._second(self._named_dense(values))
+    return ret
+
+
+class CheckpointingTests(xla_test.XLATestCase):
+
+  def testDeferredRestorationUsageEager(self):
+    """An idiomatic eager execution example."""
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    for training_continuation in range(3):
+      with self.test_scope():
+        model = Subclassed()
+        optimizer = adam.Adam(0.001)
+        root = trackable_utils.Checkpoint(
+            optimizer=optimizer, model=model)
+        manager = checkpoint_management.CheckpointManager(
+            root, checkpoint_directory, max_to_keep=2)
+        root.restore(manager.latest_checkpoint)
+        for _ in range(num_training_steps):
+          input_value = constant_op.constant([[3.]])
+          with backprop.GradientTape() as tape:
+            loss = model(input_value)
+          variables = model.trainable_variables
+          gradients = tape.gradient(loss, variables)
+          optimizer.apply_gradients(zip(gradients, variables))
+        manager.save()
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         root.optimizer.iterations.numpy())
+
+
+if __name__ == "__main__":
+  ops.enable_eager_execution()
+  test.main()

diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py
index ae71a62..5a15278 100644
--- a/tensorflow/python/training/training.py
+++ b/tensorflow/python/training/training.py

@@ -68,7 +68,7 @@
 from tensorflow.python.training.basic_session_run_hooks import FeedFnHook
 from tensorflow.python.training.basic_session_run_hooks import ProfilerHook
 from tensorflow.python.training.basic_loops import basic_train_loop
-from tensorflow.python.training.checkpointable.util import Checkpoint
+from tensorflow.python.training.tracking.util import Checkpoint
 from tensorflow.python.training.checkpoint_utils import init_from_checkpoint
 from tensorflow.python.training.checkpoint_utils import list_variables
 from tensorflow.python.training.checkpoint_utils import load_checkpoint

diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index 2435694..23a4306 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py

@@ -184,6 +184,20 @@
 is_sequence_or_composite = _pywrap_tensorflow.IsSequenceOrComposite
 
 
+@tf_export("nest.is_nested")
+def is_nested(seq):
+  """Returns true if its input is a collections.Sequence (except strings).
+
+  Args:
+    seq: an input sequence.
+
+  Returns:
+    True if the sequence is a not a string and is a collections.Sequence or a
+    dict.
+  """
+  return is_sequence(seq)
+
+
 @tf_export("nest.flatten")
 def flatten(structure, expand_composites=False):
   """Returns a flat list from a given nested structure.
@@ -258,7 +272,7 @@
         size. Note that namedtuples with identical name and fields are always
         considered to have the same shallow structure. Two types will also be
         considered the same if they are both list subtypes (which allows "list"
-        and "_ListWrapper" from checkpointable dependency tracking to compare
+        and "_ListWrapper" from trackable dependency tracking to compare
         equal).
     expand_composites: If true, then composite tensors such as `tf.SparseTensor`
         and `tf.RaggedTensor` are expanded into their component tensors.
@@ -626,7 +640,7 @@
   The following code will not raise an exception:
   ```python
     shallow_tree = ["a", "b"]
-    input_tree = ["c", ["d", "e"]]
+    input_tree = ["c", ["d", "e"], "f"]
     assert_shallow_structure(shallow_tree, input_tree)
   ```
 
@@ -876,6 +890,14 @@
   Examples:
 
   ```python
+  shallow_tree = [None, None]
+  inp_val = [1, 2, 3]
+  out = map_structure_up_to(shallow_tree, lambda x: 2 * x, inp_val)
+
+  # Output is: [2, 4]
+  ```
+
+  ```python
   ab_tuple = collections.namedtuple("ab_tuple", "a, b")
   op_tuple = collections.namedtuple("op_tuple", "add, mul")
   inp_val = ab_tuple(a=2, b=3)

diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py
index ec559bd..0540f71 100644
--- a/tensorflow/python/util/nest_test.py
+++ b/tensorflow/python/util/nest_test.py

@@ -231,17 +231,17 @@
                             ["and", "goodbye", "again"])
 
   @test_util.assert_no_new_pyobjects_executing_eagerly
-  def testIsSequence(self):
-    self.assertFalse(nest.is_sequence("1234"))
-    self.assertTrue(nest.is_sequence([1, 3, [4, 5]]))
-    self.assertTrue(nest.is_sequence(((7, 8), (5, 6))))
-    self.assertTrue(nest.is_sequence([]))
-    self.assertTrue(nest.is_sequence({"a": 1, "b": 2}))
-    self.assertFalse(nest.is_sequence(set([1, 2])))
+  def testIsNested(self):
+    self.assertFalse(nest.is_nested("1234"))
+    self.assertTrue(nest.is_nested([1, 3, [4, 5]]))
+    self.assertTrue(nest.is_nested(((7, 8), (5, 6))))
+    self.assertTrue(nest.is_nested([]))
+    self.assertTrue(nest.is_nested({"a": 1, "b": 2}))
+    self.assertFalse(nest.is_nested(set([1, 2])))
     ones = array_ops.ones([2, 3])
-    self.assertFalse(nest.is_sequence(ones))
-    self.assertFalse(nest.is_sequence(math_ops.tanh(ones)))
-    self.assertFalse(nest.is_sequence(np.ones((4, 5))))
+    self.assertFalse(nest.is_nested(ones))
+    self.assertFalse(nest.is_nested(math_ops.tanh(ones)))
+    self.assertFalse(nest.is_nested(np.ones((4, 5))))
 
   @parameterized.parameters({"mapping_type": _CustomMapping},
                             {"mapping_type": dict})

diff --git a/tensorflow/python/util/serialization.py b/tensorflow/python/util/serialization.py
index cff864c..2164ba4 100644
--- a/tensorflow/python/util/serialization.py
+++ b/tensorflow/python/util/serialization.py

@@ -18,6 +18,8 @@
 from __future__ import division
 from __future__ import print_function
 
+import collections
+
 import numpy as np
 
 from tensorflow.python.framework import tensor_shape
@@ -61,4 +63,7 @@
   if isinstance(obj, tensor_shape.TensorShape):
     return obj.as_list()
 
+  if isinstance(obj, collections.Mapping):
+    return dict(obj)
+
   raise TypeError('Not JSON Serializable:', obj)

diff --git a/tensorflow/python/util/tf_decorator.py b/tensorflow/python/util/tf_decorator.py
index f018e1a..21ed2d7 100644
--- a/tensorflow/python/util/tf_decorator.py
+++ b/tensorflow/python/util/tf_decorator.py

@@ -95,6 +95,11 @@
     decorator_func.__name__ = target.__name__
   if hasattr(target, '__module__'):
     decorator_func.__module__ = target.__module__
+  if hasattr(target, '__dict__'):
+    # Copy dict entries from target which are not overridden by decorator_func.
+    for name in target.__dict__:
+      if name not in decorator_func.__dict__:
+        decorator_func.__dict__[name] = target.__dict__[name]
   if hasattr(target, '__doc__'):
     decorator_func.__doc__ = decorator.__doc__
   decorator_func.__wrapped__ = target

diff --git a/tensorflow/python/util/tf_decorator_test.py b/tensorflow/python/util/tf_decorator_test.py
index 9198f0b..cd5cdfb 100644
--- a/tensorflow/python/util/tf_decorator_test.py
+++ b/tensorflow/python/util/tf_decorator_test.py

@@ -199,6 +199,20 @@
     decorator = getattr(decorated, '_tf_decorator')
     self.assertEqual('test decorator doc', decorator.decorator_doc)
 
+  def testUpdatesDictWithMissingEntries(self):
+    test_function.foobar = True
+    decorated = tf_decorator.make_decorator(test_function, test_wrapper)
+    self.assertTrue(decorated.foobar)
+    del test_function.foobar
+
+  def testUpdatesDict_doesNotOverridePresentEntries(self):
+    test_function.foobar = True
+    test_wrapper.foobar = False
+    decorated = tf_decorator.make_decorator(test_function, test_wrapper)
+    self.assertFalse(decorated.foobar)
+    del test_function.foobar
+    del test_wrapper.foobar
+
   def testSetsTFDecoratorArgSpec(self):
     argspec = tf_inspect.ArgSpec(
         args=['a', 'b', 'c'],

diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD
index 980c92a..03c8d6f 100644
--- a/tensorflow/stream_executor/BUILD
+++ b/tensorflow/stream_executor/BUILD

@@ -84,6 +84,7 @@
         "stream_executor_internal.h",
     ],
     deps = [
+        ":allocator_stats",
         ":dnn_proto_cc",
         ":host_or_device_scalar",
         ":stream_executor_headers",
@@ -128,6 +129,7 @@
         "trace_listener.h",
     ],
     deps = [
+        ":allocator_stats",
         ":device_memory",
         ":dnn_proto_cc",
         ":host_or_device_scalar",
@@ -190,6 +192,7 @@
     ],
     visibility = ["//visibility:public"],
     deps = [
+        ":allocator_stats",
         ":dnn_proto_cc",
         ":host_or_device_scalar",
         ":stream_executor_headers",
@@ -386,6 +389,7 @@
         "trace_listener.h",
     ],
     deps = [
+        ":allocator_stats",
         ":dnn_proto_cc",
         ":host_or_device_scalar",
         ":stream_executor_headers",
@@ -497,6 +501,7 @@
         "stream_executor_internal.h",
     ],
     deps = [
+        ":allocator_stats",
         ":device_description",
         ":device_memory",
         ":device_options",
@@ -510,6 +515,7 @@
         "//tensorflow/core:lib",
         "//tensorflow/stream_executor/lib",
         "//tensorflow/stream_executor/platform",
+        "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/types:optional",
         "@com_google_absl//absl/types:span",
     ],
@@ -527,6 +533,7 @@
     ],
     visibility = ["//visibility:public"],
     deps = [
+        ":allocator_stats",
         ":dnn_proto_cc",
         ":platform",
         ":stream_executor_headers",
@@ -561,6 +568,7 @@
         "//tensorflow/stream_executor/platform",
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -601,6 +609,7 @@
     ],
     visibility = ["//visibility:public"],
     deps = [
+        ":allocator_stats",
         ":dnn_proto_cc",
         ":host_or_device_scalar",
         "//tensorflow/core:lib",
@@ -638,6 +647,19 @@
     ],
 )
 
+cc_library(
+    name = "allocator_stats",
+    srcs = [
+        "allocator_stats.cc",
+    ],
+    hdrs = ["allocator_stats.h"],
+    deps = [
+        "//tensorflow/stream_executor/platform",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
+
 tf_cc_test(
     name = "stream_test",
     size = "small",

diff --git a/tensorflow/stream_executor/allocator_stats.cc b/tensorflow/stream_executor/allocator_stats.cc
new file mode 100644
index 0000000..440d6f4
--- /dev/null
+++ b/tensorflow/stream_executor/allocator_stats.cc

@@ -0,0 +1,32 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/stream_executor/allocator_stats.h"
+#include "absl/strings/str_format.h"
+
+namespace stream_executor {
+
+string AllocatorStats::DebugString() const {
+  return absl::StrFormat(
+      "Limit:        %20lld\n"
+      "InUse:        %20lld\n"
+      "MaxInUse:     %20lld\n"
+      "NumAllocs:    %20lld\n"
+      "MaxAllocSize: %20lld\n",
+      this->bytes_limit ? *this->bytes_limit : 0, this->bytes_in_use,
+      this->peak_bytes_in_use, this->num_allocs, this->largest_alloc_size);
+}
+
+}  // namespace stream_executor

diff --git a/tensorflow/stream_executor/allocator_stats.h b/tensorflow/stream_executor/allocator_stats.h
new file mode 100644
index 0000000..786ceb0
--- /dev/null
+++ b/tensorflow/stream_executor/allocator_stats.h

@@ -0,0 +1,50 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_STREAM_EXECUTOR_ALLOCATOR_STATS_H_
+#define TENSORFLOW_STREAM_EXECUTOR_ALLOCATOR_STATS_H_
+
+#include <string>
+
+#include "absl/types/optional.h"
+#include "tensorflow/stream_executor/platform/port.h"
+
+namespace stream_executor {
+
+// Runtime statistics collected by an allocator. Exactly the same as
+// tensorflow::AllocatorStats, but independently defined to preserve the mutual
+// independence of StreamExecutor and TensorFlow.
+struct AllocatorStats {
+  int64 num_allocs;          // Number of allocations.
+  int64 bytes_in_use;        // Number of bytes in use.
+  int64 peak_bytes_in_use;   // The peak bytes in use.
+  int64 largest_alloc_size;  // The largest single allocation seen.
+
+  // The upper limit of bytes of user allocatable device memory, if such a limit
+  // is known.
+  absl::optional<int64> bytes_limit;
+
+  AllocatorStats()
+      : num_allocs(0),
+        bytes_in_use(0),
+        peak_bytes_in_use(0),
+        largest_alloc_size(0) {}
+
+  string DebugString() const;
+};
+
+}  // namespace stream_executor
+
+#endif  // TENSORFLOW_STREAM_EXECUTOR_ALLOCATOR_STATS_H_

diff --git a/tensorflow/stream_executor/build_defs.bzl b/tensorflow/stream_executor/build_defs.bzl
index 1d8a7ec..b2da13e 100644
--- a/tensorflow/stream_executor/build_defs.bzl
+++ b/tensorflow/stream_executor/build_defs.bzl

@@ -17,5 +17,5 @@
 # Returns whether any GPU backend is configuered.
 def if_gpu_is_configured(x):
     if cuda_is_configured() or rocm_is_configured():
-      return x
+        return x
     return []

diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc
index 080c26f..5ef821a 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.cc
+++ b/tensorflow/stream_executor/cuda/cuda_driver.cc

@@ -837,6 +837,10 @@
 
 /* static */ void* GpuDriver::DeviceAllocate(GpuContext* context,
                                              uint64 bytes) {
+  if (bytes == 0) {
+    return nullptr;
+  }
+
   ScopedActivateContext activated{context};
   CUdeviceptr result = 0;
   CUresult res = tensorflow::wrap::cuMemAlloc(&result, bytes);

diff --git a/tensorflow/stream_executor/cuda/cuda_driver_wrapper.h b/tensorflow/stream_executor/cuda/cuda_driver_wrapper.h
index 657eea8..0de27d5 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver_wrapper.h
+++ b/tensorflow/stream_executor/cuda/cuda_driver_wrapper.h

@@ -44,22 +44,22 @@
 #define TO_STR_(x) #x
 #define TO_STR(x) TO_STR_(x)
 
-#define STREAM_EXECUTOR_LIBCUDA_WRAP(cudaSymbolName)                        \
-  template <typename... Args>                                               \
-  auto cudaSymbolName(Args... args)->decltype(::cudaSymbolName(args...)) {  \
-    using FuncPtrT = std::add_pointer<decltype(::cudaSymbolName)>::type;    \
-    static FuncPtrT loaded = []() -> FuncPtrT {                             \
-      static const char *kName = TO_STR(cudaSymbolName);                    \
-      void *f;                                                              \
-      auto s = stream_executor::port::Env::Default()->GetSymbolFromLibrary( \
-          stream_executor::internal::CachedDsoLoader::GetLibcudaDsoHandle() \
-              .ValueOrDie(),                                                \
-          kName, &f);                                                       \
-      CHECK(s.ok()) << "could not find " << kName                           \
-                    << " in libcuda DSO; dlerror: " << s.error_message();   \
-      return reinterpret_cast<FuncPtrT>(f);                                 \
-    }();                                                                    \
-    return loaded(args...);                                                 \
+#define STREAM_EXECUTOR_LIBCUDA_WRAP(cudaSymbolName)                           \
+  template <typename... Args>                                                  \
+  auto cudaSymbolName(Args... args)->decltype(::cudaSymbolName(args...)) {     \
+    using FuncPtrT = std::add_pointer<decltype(::cudaSymbolName)>::type;       \
+    static FuncPtrT loaded = []() -> FuncPtrT {                                \
+      static const char *kName = TO_STR(cudaSymbolName);                       \
+      void *f;                                                                 \
+      auto s = stream_executor::port::Env::Default()->GetSymbolFromLibrary(    \
+          stream_executor::internal::CachedDsoLoader::GetCudaDriverDsoHandle() \
+              .ValueOrDie(),                                                   \
+          kName, &f);                                                          \
+      CHECK(s.ok()) << "could not find " << kName                              \
+                    << " in libcuda DSO; dlerror: " << s.error_message();      \
+      return reinterpret_cast<FuncPtrT>(f);                                    \
+    }();                                                                       \
+    return loaded(args...);                                                    \
   }
 #endif
 

diff --git a/tensorflow/stream_executor/cuda/cudart_stub.cc b/tensorflow/stream_executor/cuda/cudart_stub.cc
index c5fc43d..8878700 100644
--- a/tensorflow/stream_executor/cuda/cudart_stub.cc
+++ b/tensorflow/stream_executor/cuda/cudart_stub.cc

@@ -21,19 +21,19 @@
 #include "tensorflow/stream_executor/platform/dso_loader.h"
 
 namespace {
-void *GetDsoHandle() {
-  static auto handle = [] {
-    void *result = nullptr;
-    using DsoLoader = stream_executor::internal::DsoLoader;
-    DsoLoader::GetLibcudartDsoHandle(&result).IgnoreError();
-    return result;
+void* GetDsoHandle() {
+  static auto handle = []() -> void* {
+    auto handle_or =
+        stream_executor::internal::DsoLoader::GetCudaRuntimeDsoHandle();
+    if (!handle_or.ok()) return nullptr;
+    return handle_or.ValueOrDie();
   }();
   return handle;
 }
 
 template <typename T>
-T LoadSymbol(const char *symbol_name) {
-  void *symbol = nullptr;
+T LoadSymbol(const char* symbol_name) {
+  void* symbol = nullptr;
   auto env = stream_executor::port::Env::Default();
   env->GetSymbolFromLibrary(GetDsoHandle(), symbol_name, &symbol).IgnoreError();
   return reinterpret_cast<T>(symbol);
@@ -41,28 +41,28 @@
 cudaError_t GetSymbolNotFoundError() {
   return cudaErrorSharedObjectSymbolNotFound;
 }
-const char *GetSymbolNotFoundStrError() {
+const char* GetSymbolNotFoundStrError() {
   return "cudaErrorSharedObjectSymbolNotFound";
 }
 }  // namespace
 
 // Code below is auto-generated.
 extern "C" {
-cudaError_t CUDART_CB cudaFree(void *devPtr) {
-  using FuncPtr = cudaError_t (*)(void *devPtr);
+cudaError_t CUDART_CB cudaFree(void* devPtr) {
+  using FuncPtr = cudaError_t (*)(void* devPtr);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudaFree");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(devPtr);
 }
 
-cudaError_t CUDART_CB cudaGetDevice(int *device) {
-  using FuncPtr = cudaError_t (*)(int *device);
+cudaError_t CUDART_CB cudaGetDevice(int* device) {
+  using FuncPtr = cudaError_t (*)(int* device);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudaGetDevice");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(device);
 }
 
-cudaError_t CUDART_CB cudaGetDeviceProperties(cudaDeviceProp *prop,
+cudaError_t CUDART_CB cudaGetDeviceProperties(cudaDeviceProp* prop,
                                               int device) {
   using FuncPtr = cudaError_t (*)(cudaDeviceProp * prop, int device);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudaGetDeviceProperties");
@@ -70,8 +70,8 @@
   return func_ptr(prop, device);
 }
 
-const char *CUDART_CB cudaGetErrorString(cudaError_t error) {
-  using FuncPtr = const char *(*)(cudaError_t error);
+const char* CUDART_CB cudaGetErrorString(cudaError_t error) {
+  using FuncPtr = const char* (*)(cudaError_t error);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudaGetErrorString");
   if (!func_ptr) return GetSymbolNotFoundStrError();
   return func_ptr(error);
@@ -86,27 +86,27 @@
 
 cudaError_t CUDART_CB cudaStreamAddCallback(cudaStream_t stream,
                                             cudaStreamCallback_t callback,
-                                            void *userData,
+                                            void* userData,
                                             unsigned int flags) {
   using FuncPtr =
       cudaError_t (*)(cudaStream_t stream, cudaStreamCallback_t callback,
-                      void *userData, unsigned int flags);
+                      void* userData, unsigned int flags);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudaStreamAddCallback");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stream, callback, userData, flags);
 }
 
-cudaError_t CUDART_CB cudaGetDeviceCount(int *count) {
-  using FuncPtr = cudaError_t (*)(int *count);
+cudaError_t CUDART_CB cudaGetDeviceCount(int* count) {
+  using FuncPtr = cudaError_t (*)(int* count);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudaGetDeviceCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(count);
 }
 
 cudaError_t CUDART_CB cudaPointerGetAttributes(
-    struct cudaPointerAttributes *attributes, const void *ptr) {
+    struct cudaPointerAttributes* attributes, const void* ptr) {
   using FuncPtr = cudaError_t (*)(struct cudaPointerAttributes * attributes,
-                                  const void *ptr);
+                                  const void* ptr);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudaPointerGetAttributes");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(attributes, ptr);

diff --git a/tensorflow/stream_executor/logging.proto b/tensorflow/stream_executor/logging.proto
index 68021d2..3351752 100644
--- a/tensorflow/stream_executor/logging.proto
+++ b/tensorflow/stream_executor/logging.proto

@@ -15,6 +15,18 @@
   int32 minor = 2;
 }
 
+// NOTE: this proto is temporarily duplicated in other places, outside of
+// stream_executor. The plan is to move all custom logging (tensorflow::Logger
+// related) behavior out of StreamExecutor. There are two reasons:
+// * Technical: stream_executor is part of libtensorflow_framework.so. It's
+//   extremely hard to have a single definition of the protos in the .so, and
+//   let the callers call into those definitions. The complication lives in
+//   cc_proto_library where we have a header-only version and impl version.
+// * Functional: we want to log autotuning stats from the callers. The
+//   autotuning stats are not available in SE.
+//
+// TODO(timshen): remove this proto once both XLA and TF log autotuning
+// results.
 message CudaInfo {
   CudnnVersion cudnn_version = 1;
   ComputeCapability compute_capability = 2;

diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc
index 8592455..ad8112b 100644
--- a/tensorflow/stream_executor/platform/default/dso_loader.cc
+++ b/tensorflow/stream_executor/platform/default/dso_loader.cc

@@ -12,298 +12,130 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/stream_executor/platform/default/dso_loader.h"
 
-// TODO(jhen): Replace hardcoded, platform specific path strings in GetXXXPath()
-// with a function in e.g. cuda.h.
-
-#include <limits.h>
 #include <stdlib.h>
-#include <initializer_list>
-#include <vector>
 
 #include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "cuda/cuda_config.h"
 #include "tensorflow/core/platform/load_library.h"
 #include "tensorflow/stream_executor/lib/env.h"
 #include "tensorflow/stream_executor/lib/error.h"
 #include "tensorflow/stream_executor/lib/path.h"
-#include "tensorflow/stream_executor/lib/str_util.h"
-#include "tensorflow/stream_executor/lib/stringprintf.h"
-#include "tensorflow/stream_executor/platform/default/dso_loader.h"
 #include "tensorflow/stream_executor/platform/logging.h"
 #include "tensorflow/stream_executor/platform/port.h"
 
-#if !defined(PLATFORM_GOOGLE)
-#include "absl/strings/string_view.h"
-#include "cuda/cuda_config.h"
-#endif
-
 namespace stream_executor {
 namespace internal {
 
+namespace {
 string GetCudaVersion() { return TF_CUDA_VERSION; }
 string GetCudnnVersion() { return TF_CUDNN_VERSION; }
 
-/* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
-                                      "cublas", GetCudaVersion()),
-                                  GetCudaLibraryDirPath()),
-                      dso_handle);
-}
+port::StatusOr<void*> GetDsoHandle(const string& name, const string& version) {
+  auto filename = port::Env::Default()->FormatLibraryFileName(name, version);
+  void* dso_handle;
+  port::Status status =
+      port::Env::Default()->LoadLibrary(filename.c_str(), &dso_handle);
+  if (status.ok()) {
+    LOG(INFO) << "Successfully opened CUDA library " << filename;
+    return dso_handle;
+  }
 
-/* static */ port::Status DsoLoader::GetCudnnDsoHandle(void** dso_handle) {
-  // libcudnn is versioned differently than the other libraries and may have a
-  // different version number than other CUDA libraries.  See b/22397368 for
-  // some details about the complications surrounding this.
-  return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
-                                      "cudnn", GetCudnnVersion()),
-                                  GetCudaLibraryDirPath()),
-                      dso_handle);
+  auto message = absl::StrCat("Could not dlopen library '", filename,
+                              "'; dlerror: ", status.error_message());
+#if !defined(PLATFORM_WINDOWS)
+  if (const char* ld_library_path = getenv("LD_LIBRARY_PATH")) {
+    message += absl::StrCat("; LD_LIRARY_PATH: ", ld_library_path);
+  }
+#endif
+  LOG(INFO) << message;
+  return port::Status(port::error::FAILED_PRECONDITION, message);
 }
+}  // namespace
 
-/* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
-                                      "cufft", GetCudaVersion()),
-                                  GetCudaLibraryDirPath()),
-                      dso_handle);
-}
-
-/* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
-                                      "curand", GetCudaVersion()),
-                                  GetCudaLibraryDirPath()),
-                      dso_handle);
-}
-
-/* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
+namespace DsoLoader {
+port::StatusOr<void*> GetCudaDriverDsoHandle() {
 #if defined(PLATFORM_WINDOWS)
-  return GetDsoHandle(
-      FindDsoPath(port::Env::Default()->FormatLibraryFileName("nvcuda", ""),
-                  GetCudaDriverLibraryPath()),
-      dso_handle);
-#else
-  port::Status status = GetDsoHandle(
-      FindDsoPath(port::Env::Default()->FormatLibraryFileName("cuda", "1"),
-                  GetCudaDriverLibraryPath()),
-      dso_handle);
-#if defined(__APPLE__)
+  return GetDsoHandle("nvcuda", "");
+#elif defined(__APPLE__)
   // On Mac OS X, CUDA sometimes installs libcuda.dylib instead of
   // libcuda.1.dylib.
-  return status.ok()
-             ? status
-             : GetDsoHandle(
-                   FindDsoPath(
-                       port::Env::Default()->FormatLibraryFileName("cuda", ""),
-                       GetCudaDriverLibraryPath()),
-                   dso_handle);
-#else
-  return status;
+  auto handle_or = GetDsoHandle("cuda", "");
+  if (handle_or.ok()) {
+    return handle_or;
+  }
 #endif
-#endif
+  return GetDsoHandle("cuda", "1");
 }
 
-/* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
+port::StatusOr<void*> GetCudaRuntimeDsoHandle() {
+  return GetDsoHandle("cudart", GetCudaVersion());
+}
+
+port::StatusOr<void*> GetCublasDsoHandle() {
+  return GetDsoHandle("cublas", GetCudaVersion());
+}
+
+port::StatusOr<void*> GetCufftDsoHandle() {
+  return GetDsoHandle("cufft", GetCudaVersion());
+}
+
+port::StatusOr<void*> GetCurandDsoHandle() {
+  return GetDsoHandle("curand", GetCudaVersion());
+}
+
+port::StatusOr<void*> GetCuptiDsoHandle() {
 #if defined(ANDROID_TEGRA)
   // On Android devices the CUDA version number is not added to the library
   // name.
-  return GetDsoHandle(
-      FindDsoPath(port::Env::Default()->FormatLibraryFileName("cupti", ""),
-                  GetCudaCuptiLibraryPath()),
-      dso_handle);
+  return GetDsoHandle("cupti", "");
 #else
-  return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
-                                      "cupti", GetCudaVersion()),
-                                  GetCudaCuptiLibraryPath()),
-                      dso_handle);
+  return GetDsoHandle("cupti", GetCudaVersion());
 #endif
 }
 
-/* static */ port::Status DsoLoader::GetLibcudartDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
-                                      "cudart", GetCudaVersion()),
-                                  GetCudaLibraryDirPath()),
-                      dso_handle);
+port::StatusOr<void*> GetCudnnDsoHandle() {
+  return GetDsoHandle("cudnn", GetCudnnVersion());
+}
+}  // namespace DsoLoader
+
+namespace CachedDsoLoader {
+port::StatusOr<void*> GetCudaDriverDsoHandle() {
+  static auto result = new auto(DsoLoader::GetCudaDriverDsoHandle());
+  return *result;
 }
 
-static mutex& GetRpathMutex() {
-  static mutex* mu = new mutex;
-  return *mu;
+port::StatusOr<void*> GetCudaRuntimeDsoHandle() {
+  static auto result = new auto(DsoLoader::GetCudaRuntimeDsoHandle());
+  return *result;
 }
 
-/* static */ void DsoLoader::RegisterRpath(absl::string_view path) {
-  mutex_lock lock{GetRpathMutex()};
-  GetRpaths()->emplace_back(path);
+port::StatusOr<void*> GetCublasDsoHandle() {
+  static auto result = new auto(DsoLoader::GetCublasDsoHandle());
+  return *result;
 }
 
-/* static */ port::Status DsoLoader::GetDsoHandle(absl::string_view path,
-                                                  void** dso_handle,
-                                                  LoadKind load_kind) {
-  if (load_kind != LoadKind::kLocal) {
-    return port::Status(port::error::INVALID_ARGUMENT,
-                        "Only LoadKind::kLocal is currently supported");
-  }
-  string path_string(path);
-  port::Status s =
-      port::Env::Default()->LoadLibrary(path_string.c_str(), dso_handle);
-  if (!s.ok()) {
-#if !defined(PLATFORM_WINDOWS)
-    char* ld_library_path = getenv("LD_LIBRARY_PATH");
-#endif
-    LOG(INFO) << "Couldn't open CUDA library " << path
-#if !defined(PLATFORM_WINDOWS)
-              << ". LD_LIBRARY_PATH: "
-              << (ld_library_path != nullptr ? ld_library_path : "")
-#endif
-        ;
-    return port::Status(port::error::FAILED_PRECONDITION,
-                        absl::StrCat("could not dlopen DSO: ", path,
-                                     "; dlerror: ", s.error_message()));
-  }
-  LOG(INFO) << "successfully opened CUDA library " << path << " locally";
-  return port::Status::OK();
+port::StatusOr<void*> GetCurandDsoHandle() {
+  static auto result = new auto(DsoLoader::GetCurandDsoHandle());
+  return *result;
 }
 
-/* static */ string DsoLoader::GetBinaryDirectory(bool strip_executable_name) {
-  string exe_path = port::Env::Default()->GetExecutablePath();
-  return strip_executable_name ? string(port::Dirname(exe_path)) : exe_path;
+port::StatusOr<void*> GetCufftDsoHandle() {
+  static auto result = new auto(DsoLoader::GetCufftDsoHandle());
+  return *result;
 }
 
-// Creates a heap-allocated vector for initial rpaths.
-// Ownership is transferred to the caller.
-static std::vector<string>* CreatePrimordialRpaths() {
-  auto rpaths = new std::vector<string>;
-#if defined(__APPLE__)
-  rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib");
-#else
-  rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib64");
-#endif
-  return rpaths;
+port::StatusOr<void*> GetCuptiDsoHandle() {
+  static auto result = new auto(DsoLoader::GetCuptiDsoHandle());
+  return *result;
 }
 
-/* static */ std::vector<string>* DsoLoader::GetRpaths() {
-  static std::vector<string>* rpaths = CreatePrimordialRpaths();
-  return rpaths;
+port::StatusOr<void*> GetCudnnDsoHandle() {
+  static auto result = new auto(DsoLoader::GetCudnnDsoHandle());
+  return *result;
 }
-
-/* static */ bool DsoLoader::TrySymbolicDereference(string* candidate) {
-#if defined(PLATFORM_WINDOWS)
-  return false;
-#else
-  char buf[PATH_MAX];
-  char* result = realpath(candidate->c_str(), buf);
-  if (result == nullptr) {
-    return false;
-  }
-  VLOG(3) << "realpath resolved candidate path \"" << *candidate << "\" to \""
-          << result << "\"";
-  *candidate = result;
-  return true;
-#endif
-}
-
-/* static */ string DsoLoader::FindDsoPath(absl::string_view library_name,
-                                           absl::string_view runfiles_relpath) {
-  // Keep a record of the paths we attempted so we can dump out meaningful
-  // diagnostics if no path is found.
-  std::vector<string> attempted;
-
-  using StringPieces = std::vector<absl::string_view>;
-  string candidate;
-
-  // Otherwise, try binary-plus-rpath locations.
-  string binary_directory =
-      GetBinaryDirectory(true /* = strip_executable_name */);
-  mutex_lock lock{GetRpathMutex()};
-  for (const string& rpath : *GetRpaths()) {
-    candidate =
-        port::Join(StringPieces{binary_directory, rpath, library_name}, "/");
-    if (TrySymbolicDereference(&candidate)) {
-      return candidate;
-    }
-  }
-  attempted.push_back(candidate);
-
-  return string(library_name);
-}
-
-/* static */ string DsoLoader::GetCudaLibraryDirPath() {
-#if defined(__APPLE__)
-  return "external/local_config_cuda/cuda/lib";
-#else
-  return "external/local_config_cuda/cuda/lib64";
-#endif
-}
-
-/* static */ string DsoLoader::GetCudaDriverLibraryPath() {
-#if defined(__APPLE__)
-  return "external/local_config_cuda/cuda/driver/lib";
-#elif defined(PLATFORM_WINDOWS)
-  return "";
-#else
-  return "external/local_config_cuda/cuda/driver/lib64";
-#endif
-}
-
-/* static */ string DsoLoader::GetCudaCuptiLibraryPath() {
-#if defined(__APPLE__)
-  return "external/local_config_cuda/cuda/extras/CUPTI/lib";
-#else
-  return "external/local_config_cuda/cuda/extras/CUPTI/lib64";
-#endif
-}
-
-// -- CachedDsoLoader
-
-/* static */ port::StatusOr<void*> CachedDsoLoader::GetCublasDsoHandle() {
-  static port::StatusOr<void*> result =
-      FetchHandleResult(DsoLoader::GetCublasDsoHandle);
-  return result;
-}
-
-/* static */ port::StatusOr<void*> CachedDsoLoader::GetCurandDsoHandle() {
-  static port::StatusOr<void*> result =
-      FetchHandleResult(DsoLoader::GetCurandDsoHandle);
-  return result;
-}
-
-/* static */ port::StatusOr<void*> CachedDsoLoader::GetCudnnDsoHandle() {
-  static port::StatusOr<void*> result =
-      FetchHandleResult(DsoLoader::GetCudnnDsoHandle);
-  return result;
-}
-
-/* static */ port::StatusOr<void*> CachedDsoLoader::GetCufftDsoHandle() {
-  static port::StatusOr<void*> result =
-      FetchHandleResult(DsoLoader::GetCufftDsoHandle);
-  return result;
-}
-
-/* static */ port::StatusOr<void*> CachedDsoLoader::GetLibcudaDsoHandle() {
-  static port::StatusOr<void*> result =
-      FetchHandleResult(DsoLoader::GetLibcudaDsoHandle);
-  return result;
-}
-
-/* static */ port::StatusOr<void*> CachedDsoLoader::GetLibcuptiDsoHandle() {
-  static port::StatusOr<void*> result =
-      FetchHandleResult(DsoLoader::GetLibcuptiDsoHandle);
-  return result;
-}
-
-/* static */ port::StatusOr<void*> CachedDsoLoader::GetLibcudartDsoHandle() {
-  static port::StatusOr<void*> result =
-      FetchHandleResult(DsoLoader::GetLibcudartDsoHandle);
-  return result;
-}
-
-/* static */ port::StatusOr<void*> CachedDsoLoader::FetchHandleResult(
-    std::function<port::Status(void**)> load_dso) {
-  void* handle;
-  auto status = load_dso(&handle);
-  if (!status.ok()) {
-    return status;
-  }
-  return handle;
-}
-
+}  // namespace CachedDsoLoader
 }  // namespace internal
 }  // namespace stream_executor

diff --git a/tensorflow/stream_executor/platform/default/dso_loader.h b/tensorflow/stream_executor/platform/default/dso_loader.h
index 92c0db7..45a8315 100644
--- a/tensorflow/stream_executor/platform/default/dso_loader.h
+++ b/tensorflow/stream_executor/platform/default/dso_loader.h

@@ -31,88 +31,30 @@
 namespace stream_executor {
 namespace internal {
 
-// Permits StreamExecutor code to dynamically load a pre-determined set of
-// relevant DSOs via dlopen.
-//
-// Thread-safe.
-class DsoLoader {
- public:
-  // The following methods either load the DSO of interest and return a dlopen
-  // handle or error status in the canonical namespace.
-
-  static port::Status GetCublasDsoHandle(void** dso_handle);
-  static port::Status GetCudnnDsoHandle(void** dso_handle);
-  static port::Status GetCufftDsoHandle(void** dso_handle);
-  static port::Status GetCurandDsoHandle(void** dso_handle);
-  static port::Status GetLibcudaDsoHandle(void** dso_handle);
-  static port::Status GetLibcuptiDsoHandle(void** dso_handle);
-  static port::Status GetLibcudartDsoHandle(void** dso_handle);
-
-  // Registers a new binary-relative path to use as a dlopen search path.
-  static void RegisterRpath(absl::string_view path);
-
- private:
-  // Registered rpaths (singleton vector) and a mutex that guards it.
-  static std::vector<string>* GetRpaths();
-
-  // Descriptive boolean wrapper to indicate whether symbols are made available
-  // to resolve in later-loaded libraries.
-  enum class LoadKind { kLocal, kGlobal };
-
-  // Loads a DSO from the given "path" (which can technically be any dlopen-able
-  // name). If the load kind is global, the symbols in the loaded DSO are
-  // visible to subsequent DSO loading operations.
-  static port::Status GetDsoHandle(absl::string_view path, void** dso_handle,
-                                   LoadKind load_kind = LoadKind::kLocal);
-
-  // Returns the binary directory (or binary path) associated with the currently
-  // executing program. If strip_executable_name is true, the executable file is
-  // stripped off of the path.
-  static string GetBinaryDirectory(bool strip_executable_name);
-
-  // Invokes realpath on the original path; updates candidate and returns true
-  // if it succeeds (i.e. a file exists at the path); otherwise, returns false.
-  static bool TrySymbolicDereference(string* candidate);
-
-  // Attempts to find a path to the DSO of interest, otherwise returns the
-  // bare library name:
-  // Arguments:
-  //   library_name: the filename in tree; e.g. libOpenCL.so.1.0.0
-  //   runfiles_relpath: where to look for the library relative to the runfiles
-  //      root; e.g. third_party/gpus/cuda/lib64
-  static string FindDsoPath(absl::string_view library_name,
-                            absl::string_view runfiles_relpath);
-
-  // Return platform dependent paths for DSOs
-  static string GetCudaLibraryDirPath();
-  static string GetCudaDriverLibraryPath();
-  static string GetCudaCuptiLibraryPath();
-
-  SE_DISALLOW_COPY_AND_ASSIGN(DsoLoader);
-};
+namespace DsoLoader {
+// The following methods either load the DSO of interest and return a dlopen
+// handle or error status.
+port::StatusOr<void*> GetCudaDriverDsoHandle();
+port::StatusOr<void*> GetCudaRuntimeDsoHandle();
+port::StatusOr<void*> GetCublasDsoHandle();
+port::StatusOr<void*> GetCufftDsoHandle();
+port::StatusOr<void*> GetCurandDsoHandle();
+port::StatusOr<void*> GetCuptiDsoHandle();
+port::StatusOr<void*> GetCudnnDsoHandle();
+}  // namespace DsoLoader
 
 // Wrapper around the DsoLoader that prevents us from dlopen'ing any of the DSOs
 // more than once.
-class CachedDsoLoader {
- public:
-  // Cached versions of the corresponding DsoLoader methods above.
-  static port::StatusOr<void*> GetCublasDsoHandle();
-  static port::StatusOr<void*> GetCudnnDsoHandle();
-  static port::StatusOr<void*> GetCufftDsoHandle();
-  static port::StatusOr<void*> GetCurandDsoHandle();
-  static port::StatusOr<void*> GetLibcudaDsoHandle();
-  static port::StatusOr<void*> GetLibcuptiDsoHandle();
-  static port::StatusOr<void*> GetLibcudartDsoHandle();
-
- private:
-  // Fetches a DSO handle via "load_dso" and returns the StatusOr form of the
-  // result.
-  static port::StatusOr<void*> FetchHandleResult(
-      std::function<port::Status(void**)> load_dso);
-
-  SE_DISALLOW_COPY_AND_ASSIGN(CachedDsoLoader);
-};
-
+namespace CachedDsoLoader {
+// Cached versions of the corresponding DsoLoader methods above.
+port::StatusOr<void*> GetCudaDriverDsoHandle();
+port::StatusOr<void*> GetCudaRuntimeDsoHandle();
+port::StatusOr<void*> GetCublasDsoHandle();
+port::StatusOr<void*> GetCufftDsoHandle();
+port::StatusOr<void*> GetCurandDsoHandle();
+port::StatusOr<void*> GetCuptiDsoHandle();
+port::StatusOr<void*> GetCudnnDsoHandle();
+}  // namespace CachedDsoLoader
 }  // namespace internal
 }  // namespace stream_executor
 

diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h
index e234e5d..36eabda 100644
--- a/tensorflow/stream_executor/stream_executor_internal.h
+++ b/tensorflow/stream_executor/stream_executor_internal.h

@@ -27,6 +27,8 @@
 #include <utility>
 #include <vector>
 
+#include "absl/types/optional.h"
+#include "tensorflow/stream_executor/allocator_stats.h"
 #include "tensorflow/stream_executor/device_description.h"
 #include "tensorflow/stream_executor/device_memory.h"
 #include "tensorflow/stream_executor/device_options.h"
@@ -367,6 +369,11 @@
   // as a platform.
   virtual void *GpuContextHack() { return nullptr; }
 
+  // Return allocator statistics.
+  virtual absl::optional<AllocatorStats> GetAllocatorStats() {
+    return absl::nullopt;
+  }
+
  private:
   SE_DISALLOW_COPY_AND_ASSIGN(StreamExecutorInterface);
 };

diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index c680a02..aae1efc 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc

@@ -871,6 +871,10 @@
   return true;
 }
 
+absl::optional<AllocatorStats> StreamExecutor::GetAllocatorStats() {
+  return implementation_->GetAllocatorStats();
+}
+
 template <typename TraceCallT, typename... ArgsT>
 void StreamExecutor::SubmitTrace(TraceCallT trace_call, ArgsT &&... args) {
   if (tracing_enabled_) {

diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h
index 508273e..09fe0a5 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h

@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "absl/base/macros.h"
+#include "absl/types/optional.h"
 #include "tensorflow/stream_executor/lib/status.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
 #include "tensorflow/stream_executor/lib/threadpool.h"
@@ -485,6 +486,9 @@
   // previously registered.
   bool UnregisterTraceListener(TraceListener* listener);
 
+  // Return allocator statistics.
+  absl::optional<AllocatorStats> GetAllocatorStats();
+
  private:
   template <typename BeginCallT, typename CompleteCallT,
             typename ReturnT, typename... BeginArgsT>
@@ -856,7 +860,7 @@
   }
   CreateAllocRecord(opaque, sizeof(T) * element_count);
   return DeviceMemory<T>(DeviceMemoryBase(opaque, sizeof(T) * element_count,
-                                    true /* = is_sub_buffer */));
+                                          true /* = is_sub_buffer */));
 }
 
 template <typename... Params, typename... Args>

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 6c8b445..a7205a4 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl

@@ -45,6 +45,7 @@
     "//third_party/ngraph:build_defs.bzl",
     "if_ngraph",
 )
+
 def register_extension_info(**kwargs):
     pass
 
@@ -612,6 +613,7 @@
             clean_dep("//tensorflow/core:core_cpu"),
             clean_dep("//tensorflow/core:framework"),
             clean_dep("//tensorflow/core:lib"),
+            clean_dep("//tensorflow/core:ops"),
             clean_dep("//tensorflow/core:protos_all_cc"),
         ]) + if_android([
             clean_dep("//tensorflow/core:android_tensorflow_lib"),
@@ -628,6 +630,7 @@
             clean_dep("//tensorflow/core:core_cpu"),
             clean_dep("//tensorflow/core:framework"),
             clean_dep("//tensorflow/core:lib"),
+            clean_dep("//tensorflow/core:ops"),
             clean_dep("//tensorflow/core:protos_all_cc"),
         ]) + if_android([
             clean_dep("//tensorflow/core:android_tensorflow_lib"),
@@ -1461,7 +1464,7 @@
 
 def tf_custom_op_library_additional_deps():
     return [
-      "@protobuf_archive//:protobuf_headers",
+        "@protobuf_archive//:protobuf_headers",
         clean_dep("//third_party/eigen3"),
         clean_dep("//tensorflow/core:framework_headers_lib"),
     ] + if_windows(["//tensorflow/python:pywrap_tensorflow_import_lib"])
@@ -1471,8 +1474,8 @@
 # exporting symbols from _pywrap_tensorflow.dll on Windows.
 def tf_custom_op_library_additional_deps_impl():
     return [
-      "@protobuf_archive//:protobuf",
-      "@nsync//:nsync_cpp",
+        "@protobuf_archive//:protobuf",
+        "@nsync//:nsync_cpp",
         # for //third_party/eigen3
         clean_dep("//third_party/eigen3"),
         # for //tensorflow/core:framework_headers_lib

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-critical-section.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-critical-section.pbtxt
new file mode 100644
index 0000000..024a208
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-critical-section.pbtxt

@@ -0,0 +1,17 @@
+path: "tensorflow.CriticalSection"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.critical_section_ops.CriticalSection\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'shared_name\', \'critical_section_def\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "execute"
+    argspec: "args=[\'self\', \'fn\', \'exclusive_resource_access\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-module.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-module.pbtxt
new file mode 100644
index 0000000..8d599d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-module.pbtxt

@@ -0,0 +1,35 @@
+path: "tensorflow.Module"
+tf_class {
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "no_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.-function-graphs.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.-function-graphs.pbtxt
new file mode 100644
index 0000000..d2e2f58
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.-function-graphs.pbtxt

@@ -0,0 +1,27 @@
+path: "tensorflow.RunMetadata.FunctionGraphs"
+tf_proto {
+  descriptor {
+    name: "FunctionGraphs"
+    field {
+      name: "partition_graphs"
+      number: 1
+      label: LABEL_REPEATED
+      type: TYPE_MESSAGE
+      type_name: ".tensorflow.GraphDef"
+    }
+    field {
+      name: "pre_optimization_graph"
+      number: 2
+      label: LABEL_OPTIONAL
+      type: TYPE_MESSAGE
+      type_name: ".tensorflow.GraphDef"
+    }
+    field {
+      name: "post_optimization_graph"
+      number: 3
+      label: LABEL_OPTIONAL
+      type: TYPE_MESSAGE
+      type_name: ".tensorflow.GraphDef"
+    }
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.pbtxt
index 1287940..777b889 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.pbtxt

@@ -23,5 +23,36 @@
       type: TYPE_MESSAGE
       type_name: ".tensorflow.GraphDef"
     }
+    field {
+      name: "function_graphs"
+      number: 4
+      label: LABEL_REPEATED
+      type: TYPE_MESSAGE
+      type_name: ".tensorflow.RunMetadata.FunctionGraphs"
+    }
+    nested_type {
+      name: "FunctionGraphs"
+      field {
+        name: "partition_graphs"
+        number: 1
+        label: LABEL_REPEATED
+        type: TYPE_MESSAGE
+        type_name: ".tensorflow.GraphDef"
+      }
+      field {
+        name: "pre_optimization_graph"
+        number: 2
+        label: LABEL_OPTIONAL
+        type: TYPE_MESSAGE
+        type_name: ".tensorflow.GraphDef"
+      }
+      field {
+        name: "post_optimization_graph"
+        number: 3
+        label: LABEL_OPTIONAL
+        type: TYPE_MESSAGE
+        type_name: ".tensorflow.GraphDef"
+      }
+    }
   }
 }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-tensor-shape.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-tensor-shape.pbtxt
index d11e927..60518ff 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-tensor-shape.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-tensor-shape.pbtxt

@@ -1,6 +1,6 @@
 path: "tensorflow.TensorShape"
 tf_class {
-  is_instance: "<class \'tensorflow.python.framework.tensor_shape.TensorShapeV1\'>"
+  is_instance: "<class \'tensorflow.python.framework.tensor_shape.TensorShape\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "dims"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt
index 341ace0..9a43638 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.ops.variables.VariableV1\'>"
   is_instance: "<class \'tensorflow.python.ops.variables.Variable\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "SaveSliceInfo"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt
index d7e4529..5663e64 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt

@@ -4,4 +4,8 @@
     name: "experimental_connect_to_host"
     argspec: "args=[\'remote_host\', \'job_name\'], varargs=None, keywords=None, defaults=[\'None\', \'worker\'], "
   }
+  member_method {
+    name: "experimental_run_functions_eagerly"
+    argspec: "args=[\'run_eagerly\'], varargs=None, keywords=None, defaults=None"
+  }
 }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
index f7d388d..1c55f81 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt

@@ -40,6 +40,10 @@
     argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "filter_with_legacy_function"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "flat_map"
     argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
   }
@@ -80,6 +84,10 @@
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
+    name: "map_with_legacy_function"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
     name: "options"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
index d73168b..5488449 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt

@@ -42,6 +42,10 @@
     argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "filter_with_legacy_function"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "flat_map"
     argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
   }
@@ -82,6 +86,10 @@
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
+    name: "map_with_legacy_function"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
     name: "options"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-iterator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-iterator.pbtxt
index 682a2b9..87af112 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-iterator.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-iterator.pbtxt

@@ -1,7 +1,7 @@
 path: "tensorflow.data.Iterator"
 tf_class {
   is_instance: "<class \'tensorflow.python.data.ops.iterator_ops.Iterator\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "initializer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
index 51224cd..0927dd0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt

@@ -42,6 +42,10 @@
     argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "filter_with_legacy_function"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "flat_map"
     argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
   }
@@ -82,6 +86,10 @@
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
+    name: "map_with_legacy_function"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
     name: "options"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
index a10add1..bab1e39 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt

@@ -42,6 +42,10 @@
     argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "filter_with_legacy_function"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "flat_map"
     argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
   }
@@ -82,6 +86,10 @@
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
+    name: "map_with_legacy_function"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
     name: "options"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
index 71b597c..68cf023 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt

@@ -42,6 +42,10 @@
     argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "filter_with_legacy_function"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "flat_map"
     argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
   }
@@ -82,6 +86,10 @@
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
+    name: "map_with_legacy_function"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
     name: "options"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
index 20646e8..6d3f88e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt

@@ -42,6 +42,10 @@
     argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "filter_with_legacy_function"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "flat_map"
     argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
   }
@@ -82,6 +86,10 @@
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
+    name: "map_with_legacy_function"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
     name: "options"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
index 86c5ff5..bc4943e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt

@@ -42,6 +42,10 @@
     argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "filter_with_legacy_function"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "flat_map"
     argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
   }
@@ -82,6 +86,10 @@
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
+    name: "map_with_legacy_function"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
     name: "options"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
index abc98a7..0e3999c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt

@@ -142,7 +142,7 @@
   }
   member_method {
     name: "make_csv_dataset"
-    argspec: "args=[\'file_pattern\', \'batch_size\', \'column_names\', \'column_defaults\', \'label_name\', \'select_columns\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'header\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'num_parallel_reads\', \'sloppy\', \'num_rows_for_inference\', \'compression_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \',\', \'True\', \'\', \'True\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'False\', \'100\', \'None\'], "
+    argspec: "args=[\'file_pattern\', \'batch_size\', \'column_names\', \'column_defaults\', \'label_name\', \'select_columns\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'header\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'num_parallel_reads\', \'sloppy\', \'num_rows_for_inference\', \'compression_type\', \'ignore_errors\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \',\', \'True\', \'\', \'True\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'False\', \'100\', \'None\', \'False\'], "
   }
   member_method {
     name: "make_saveable_from_iterator"
@@ -153,6 +153,10 @@
     argspec: "args=[\'map_func\', \'batch_size\', \'num_parallel_batches\', \'drop_remainder\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
   }
   member_method {
+    name: "map_and_batch_with_legacy_function"
+    argspec: "args=[\'map_func\', \'batch_size\', \'num_parallel_batches\', \'drop_remainder\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
+  member_method {
     name: "parallel_interleave"
     argspec: "args=[\'map_func\', \'cycle_length\', \'block_length\', \'sloppy\', \'buffer_output_elements\', \'prefetch_input_elements\'], varargs=None, keywords=None, defaults=[\'1\', \'False\', \'None\', \'None\'], "
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-cross-device-ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-cross-device-ops.pbtxt
new file mode 100644
index 0000000..a2ea234
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-cross-device-ops.pbtxt

@@ -0,0 +1,33 @@
+path: "tensorflow.distribute.CrossDeviceOps"
+tf_class {
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.CrossDeviceOps\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch_reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch_reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast_implementation"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-hierarchical-copy-all-reduce.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-hierarchical-copy-all-reduce.pbtxt
new file mode 100644
index 0000000..a38c4b2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-hierarchical-copy-all-reduce.pbtxt

@@ -0,0 +1,35 @@
+path: "tensorflow.distribute.HierarchicalCopyAllReduce"
+tf_class {
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.HierarchicalCopyAllReduce\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.AllReduceCrossDeviceOps\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.CrossDeviceOps\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'num_packs\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "batch_reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch_reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast_implementation"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-nccl-all-reduce.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-nccl-all-reduce.pbtxt
new file mode 100644
index 0000000..bdc09bc
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-nccl-all-reduce.pbtxt

@@ -0,0 +1,35 @@
+path: "tensorflow.distribute.NcclAllReduce"
+tf_class {
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.NcclAllReduce\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.AllReduceCrossDeviceOps\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.CrossDeviceOps\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'num_packs\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "batch_reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch_reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast_implementation"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-one-device-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-one-device-strategy.pbtxt
new file mode 100644
index 0000000..5d7943a
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-one-device-strategy.pbtxt

@@ -0,0 +1,66 @@
+path: "tensorflow.distribute.OneDeviceStrategy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.distribute.one_device_strategy.OneDeviceStrategy\'>"
+  is_instance: "<class \'tensorflow.python.distribute.distribute_lib.DistributionStrategy\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "extended"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "num_replicas_in_sync"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'device\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast"
+    argspec: "args=[\'self\', \'tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "colocate_vars_with"
+    argspec: "args=[\'self\', \'colocate_with_variable\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "configure"
+    argspec: "args=[\'self\', \'session_config\', \'cluster_spec\', \'task_type\', \'task_id\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "experimental_make_numpy_iterator"
+    argspec: "args=[\'self\', \'numpy_input\', \'batch_size\', \'num_epochs\', \'shuffle\', \'session\'], varargs=None, keywords=None, defaults=[\'1\', \'1024\', \'None\'], "
+  }
+  member_method {
+    name: "experimental_run"
+    argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "group"
+    argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_dataset_iterator"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "make_input_fn_iterator"
+    argspec: "args=[\'self\', \'input_fn\', \'replication_mode\'], varargs=None, keywords=None, defaults=[\'InputReplicationMode.PER_WORKER\'], "
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "scope"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "unwrap"
+    argspec: "args=[\'self\', \'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_config_proto"
+    argspec: "args=[\'self\', \'config_proto\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-reduction-to-one-device.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-reduction-to-one-device.pbtxt
new file mode 100644
index 0000000..f5ade9f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-reduction-to-one-device.pbtxt

@@ -0,0 +1,34 @@
+path: "tensorflow.distribute.ReductionToOneDevice"
+tf_class {
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.ReductionToOneDevice\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.CrossDeviceOps\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduce_to_device\', \'accumulation_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "batch_reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch_reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast_implementation"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt
index 5b5c9e2..5ea6b5b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt

@@ -1,6 +1,14 @@
 path: "tensorflow.distribute"
 tf_module {
   member {
+    name: "CrossDeviceOps"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "HierarchicalCopyAllReduce"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "InputContext"
     mtype: "<type \'type\'>"
   }
@@ -13,10 +21,22 @@
     mtype: "<type \'type\'>"
   }
   member {
+    name: "NcclAllReduce"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "OneDeviceStrategy"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "ReduceOp"
     mtype: "<class \'enum.EnumMeta\'>"
   }
   member {
+    name: "ReductionToOneDevice"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "ReplicaContext"
     mtype: "<type \'type\'>"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt
index bf7c1ab..d537527 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt

@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.ModeKeys"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.ModeKeys\'>"
+  is_instance: "<class \'tensorflow.python.saved_model.model_utils.mode_keys.EstimatorModeKeys\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "EVAL"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.experimental.-module.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.experimental.-module.pbtxt
deleted file mode 100644
index 3c5add1..0000000
--- a/tensorflow/tools/api/golden/v1/tensorflow.experimental.-module.pbtxt
+++ /dev/null

@@ -1,35 +0,0 @@
-path: "tensorflow.experimental.Module"
-tf_class {
-  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.tracking.AutoCheckpointable\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name_scope"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "submodules"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "no_name_scope"
-    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
-  }
-}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.experimental.pbtxt
index a7ee6d3..0c3f04e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.experimental.pbtxt

@@ -1,9 +1,5 @@
 path: "tensorflow.experimental"
 tf_module {
-  member {
-    name: "Module"
-    mtype: "<class \'tensorflow.python.module.module.ModuleMetaclass\'>"
-  }
   member_method {
     name: "function_executor_type"
     argspec: "args=[\'executor_type\'], varargs=None, keywords=None, defaults=None"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.feature_column.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.feature_column.pbtxt
index f06e798..79ed45c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.feature_column.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.feature_column.pbtxt

@@ -49,6 +49,26 @@
     argspec: "args=[\'key\', \'shape\', \'default_value\', \'dtype\', \'normalizer_fn\'], varargs=None, keywords=None, defaults=[\'(1,)\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
   }
   member_method {
+    name: "sequence_categorical_column_with_hash_bucket"
+    argspec: "args=[\'key\', \'hash_bucket_size\', \'dtype\'], varargs=None, keywords=None, defaults=[\"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "sequence_categorical_column_with_identity"
+    argspec: "args=[\'key\', \'num_buckets\', \'default_value\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "sequence_categorical_column_with_vocabulary_file"
+    argspec: "args=[\'key\', \'vocabulary_file\', \'vocabulary_size\', \'num_oov_buckets\', \'default_value\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "sequence_categorical_column_with_vocabulary_list"
+    argspec: "args=[\'key\', \'vocabulary_list\', \'dtype\', \'default_value\', \'num_oov_buckets\'], varargs=None, keywords=None, defaults=[\'None\', \'-1\', \'0\'], "
+  }
+  member_method {
+    name: "sequence_numeric_column"
+    argspec: "args=[\'key\', \'shape\', \'default_value\', \'dtype\', \'normalizer_fn\'], varargs=None, keywords=None, defaults=[\'(1,)\', \'0.0\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
     name: "shared_embedding_columns"
     argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
index bb44ba0..5e13718 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
index 44fc15e..4bbe98b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
index 712e3ad..c64a908 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt

@@ -198,7 +198,7 @@
   }
   member_method {
     name: "get_session"
-    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'op_input_list\'], varargs=None, keywords=None, defaults=[\'()\'], "
   }
   member_method {
     name: "get_uid"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
index 2e0f77e..66bacd8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt

@@ -1,6 +1,6 @@
 path: "tensorflow.keras.callbacks.TensorBoard"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.callbacks.TensorBoard\'>"
+  is_instance: "<class \'tensorflow.python.keras.callbacks_v1.TensorBoard\'>"
   is_instance: "<class \'tensorflow.python.keras.callbacks.Callback\'>"
   is_instance: "<type \'object\'>"
   member_method {

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt
index 4f7ace4..2f3cb0b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.PeepholeLSTMCell\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.LSTMCell\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt
new file mode 100644
index 0000000..5a75f44
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt

@@ -0,0 +1,184 @@
+path: "tensorflow.keras.experimental.SequenceFeatures"
+tf_class {
+  is_instance: "<class \'tensorflow.python.feature_column.sequence_feature_column.SequenceFeatures\'>"
+  is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'feature_columns\', \'trainable\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'features\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.pbtxt
index 721c188..2ae3561 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.pbtxt

@@ -26,6 +26,10 @@
   }
   member_method {
     name: "load_from_saved_model"
-    argspec: "args=[\'saved_model_path\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'saved_model_path\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member {
+    name: "SequenceFeatures"
+    mtype: "<type \'type\'>"
   }
 }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt
index eab888c..0c6c0a3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Activation\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt
index 96c7acc..15bf039 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.ActivityRegularization\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt
index 9e8aae1..b265384 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Add\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt
index 01fc730..3a0882d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.noise.AlphaDropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
index 8b6a151..d2ee310 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt
index 3c78457..1da079f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt
index e6e96a0..d96751c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt
index ec2d5b1..3819e52 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Average\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
index afff790..47f6b39 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt
index d7ab835..4b8cadc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt
index 6654f86..5c66da4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt
index a328d9f..203fea1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.normalization.BatchNormalizationV1\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.normalization.BatchNormalizationV2\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt
index 94f3a46..95eb6f6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.wrappers.Bidirectional\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.wrappers.Wrapper\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt
index e0eae17..0941013 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Concatenate\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
index ec8a44c..941b6aa 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional_recurrent.ConvRNN2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activation"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt
index 350d49a..4bf8336 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
index 9b48eb6..221addf 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt
index 1708d6a..1c95fcc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
index 5018492..994a507 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt
index fd24af3..ae251b5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt
index fbc7609..1d73eec 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
index 671a004..d37ec0f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt
index dd6519c..bb3c37d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
index 648f480..fc29f1c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt
index 87a07ea..2658fb4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt
index 6f3a153..58567ea 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Cropping1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt
index a1c418c..42be76f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Cropping2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt
index ad98f9c..11092f2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Cropping3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
index e35403b..e618a11 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.cudnn_recurrent._CuDNNRNN\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
index 90d03ea..cf8a67b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.cudnn_recurrent._CuDNNRNN\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt
new file mode 100644
index 0000000..d6f7f30
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt

@@ -0,0 +1,184 @@
+path: "tensorflow.keras.layers.DenseFeatures"
+tf_class {
+  is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2.DenseFeatures\'>"
+  is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'feature_columns\', \'trainable\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'features\', \'cols_to_output_tensors\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt
index ef12b2e..339c9f5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dense\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt
index eacfb37..c2992de 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt
index 7928ceb..b37f4c8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Dot\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt
index a7fa545..d21f577 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt
index 483ba65..f1e086b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.ELU\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt
index 4d0e5e1..eb3496a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.embeddings.Embedding\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt
index 5947047..bbe324c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Flatten\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt
index b4efdf3..dd93e32 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.GRUCell\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt
index db4d981..9f25b3c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.GRU\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activation"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt
index 1686768..e248626 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.noise.GaussianDropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt
index 69bca6a..b1b5759 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.noise.GaussianNoise\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
index 9a4119d..9c4087a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
index 2ca1eb1..d564294 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
index 4331adc..089145e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
index 6e91b4a..2bc02b6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
index 85887a5..c2510d1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
index dd20fd1..845d6b1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
index 3372ae7..f6fa865 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
index 0fb1882..1285e21 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
index 5b1c850..a1417e4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
index 49e59e0..ff4da8b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
index 9504f64..7140d57 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
index 42de6ae..4edeb97 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt
index f388b84..4860956 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.input_layer.InputLayer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
index d2634dd..7398613 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.LSTMCell\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt
index 94ec432..a8f60e8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.LSTM\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activation"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt
index da2373c..88f1f8b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Lambda\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt
index 2e47132..c95f915 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt

@@ -1,7 +1,7 @@
 path: "tensorflow.keras.layers.Layer"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
index a74e935..80d1c32 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.LeakyReLU\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt
index 0f4c071..b050302 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.local.LocallyConnected1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt
index 5eea071..3bb780c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.local.LocallyConnected2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt
index a16ceef..690208b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Masking\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
index e61d730..02f3186 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt
index a21c403..f2e9a3b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt
index fb8613a..868faa0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
index a433d49..8e16626 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt
index fa6ad6f..ab96640 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt
index 05e2ace..4f492f5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt
index ce62223..702f2e8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Maximum\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt
index a0ff4f9..fef9390 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Minimum\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt
index 558cc0d..1e2db3f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Multiply\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt
index 5863fbb..1450047 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.PReLU\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt
index 4d7413b..d5d3e03 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Permute\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
index 67ab60b..f1151f4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt
index eb32ba2..0874240 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.ReLU\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt
index 81ac253..de9f8fb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.RepeatVector\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt
index dd4dc49..a125754 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Reshape\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt
index c8724f0..01fac3a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.SeparableConv\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt
index 8c47395..80628d7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.SeparableConv\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt
index c0b6ad4..41e96fd 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.SeparableConv\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
index c5566c1..f48b0b3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.SeparableConv\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
index f91aac8..5e79932 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.SimpleRNNCell\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt
index eb2a7b9..60893bb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.SimpleRNN\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activation"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt
index f0411e2..c96405a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.Softmax\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
index 2a2fd2e..153b7bc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.core.SpatialDropout1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
index e4d1d43..44e0811 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.core.SpatialDropout2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
index 4e641a8e..6e9f624 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.core.SpatialDropout3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
index 591796e..57da4c0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.StackedRNNCells\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt
index 67555db..27eb794 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Subtract\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
index 0ed7da5..733070e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.ThresholdedReLU\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt
index 9492b0b..009ecca 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.wrappers.TimeDistributed\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.wrappers.Wrapper\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt
index 16c31d3..f465aa6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.UpSampling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
index cf1a076..049da3d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.UpSampling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt
index 5cded98..1d50c89 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.UpSampling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt
index 16f3f06..6604ac0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.wrappers.Wrapper\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt
index 59997a8..2c8d527 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.ZeroPadding1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt
index 9a327c2..bf9f43c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.ZeroPadding2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt
index 7933868..a78cfa8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.ZeroPadding3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-layer-normalization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-layer-normalization.pbtxt
index 6c8faef..9b5598e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-layer-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-layer-normalization.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.normalization.LayerNormalization\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.pbtxt
index ad74ab3..cc0fdab 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.pbtxt

@@ -125,6 +125,10 @@
     mtype: "<type \'type\'>"
   }
   member {
+    name: "DenseFeatures"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "DepthwiseConv2D"
     mtype: "<type \'type\'>"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-a-u-c.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-a-u-c.pbtxt
index beaa6f0..6e00a3a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-a-u-c.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-a-u-c.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.AUC\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt
index 5f00797..18cde2f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt
index a0a3ae8..c5d2fc9 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-crossentropy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-crossentropy.pbtxt
index b6ce9e0..a866245 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-crossentropy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-crossentropy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt
index 587ffdd..998c4cb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt
index 8cd173c..04f1794 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-hinge.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-hinge.pbtxt
index 99414dd..19442b5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-hinge.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-hinge.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-cosine-proximity.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-cosine-proximity.pbtxt
index b059e03..b247e5c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-cosine-proximity.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-cosine-proximity.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt
index 5432f7f..5a94569 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt
index 75541bf..9033d9e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-hinge.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-hinge.pbtxt
index f45a57d..dedc64f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-hinge.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-hinge.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-k-l-divergence.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-k-l-divergence.pbtxt
index 25bcf85..af8366b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-k-l-divergence.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-k-l-divergence.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-log-cosh-error.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-log-cosh-error.pbtxt
index fb2df7f..a7e072e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-log-cosh-error.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-log-cosh-error.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-error.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-error.pbtxt
index 11538c6..75173ad 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-error.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-error.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt
index adec3fe..7be81b6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-io-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-io-u.pbtxt
index 47b550a..21e44ed 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-io-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-io-u.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.MeanIoU\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-relative-error.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-relative-error.pbtxt
index a1ec7cb..8ef17fc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-relative-error.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-relative-error.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-squared-error.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-squared-error.pbtxt
index af960fa..363f532 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-squared-error.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-squared-error.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt
index 417f92f..712f10c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-tensor.pbtxt
index 2a5218f..fff91d2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-tensor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean-tensor.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.MeanTensor\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt
index b089109..cffb444 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-metric.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-metric.pbtxt
index 2216043..ce746ab 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-metric.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-metric.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-poisson.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-poisson.pbtxt
index 8e91098..570b774 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-poisson.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-poisson.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt
index 9aeaa56..83535d5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Precision\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt
index 748cec0..9ec2bbc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Recall\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt
index 4dd60bb..e2bdbd5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
index 97aeb68..172c40e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.SensitivitySpecificityBase\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt
index 5a7bef4..8a24088 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt
index c968768..0cadc9d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt
index ea0f2a7..c135b8f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
index 85f80b0..4f68187 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.SensitivitySpecificityBase\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-squared-hinge.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-squared-hinge.pbtxt
index 15895da..90bbb08 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-squared-hinge.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-squared-hinge.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sum.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sum.pbtxt
index 5ca1c6c..30ef19e0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sum.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sum.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt
index 9cf1bf2..e59476a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt
index 4bc9383..6627a46 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt
index 2eae4df..8c3c2cb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
index 3132e8d..63100a2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
index b5ef70e..5c9ba04 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
index 0a56293..8471803 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adadelta.Adadelta\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
index 14d0894..0466ea6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adagrad.Adagrad\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
index fdb1ea8..9762fad 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adam.Adam\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
index ece63ec..f477a60 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adamax.Adamax\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt
index f952f88..3ffb4bb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.nadam.Nadam\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
index 27bae90..9639c71 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt

@@ -1,7 +1,7 @@
 path: "tensorflow.keras.optimizers.Optimizer"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
index e523443..2a7603d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
index d2721f8..4163555 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.gradient_descent.SGD\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling1-d.pbtxt
index 6d826a8..d012bd9 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling1-d.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling1D\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling2-d.pbtxt
index 9505c90..90a27e5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling2-d.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling2D\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling3-d.pbtxt
index 5b1b8f7..d653a0c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling3-d.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling3D\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-batch-normalization.pbtxt
index ef4c57b..32f9345 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-batch-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-batch-normalization.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.normalization.BatchNormalizationV2\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv1-d.pbtxt
index b5ee2e7..0e7adfe 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv1-d.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d-transpose.pbtxt
index 57f6d7c..5296597 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d-transpose.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d.pbtxt
index 88c616b..5ae9568 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d-transpose.pbtxt
index b70a907..aa0da6d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d-transpose.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d.pbtxt
index 33e8765..516f0fa 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-dense.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-dense.pbtxt
index 1ac13b5..d92af8f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-dense.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-dense.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dense\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-dropout.pbtxt
index 77faa3c..614643f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-dropout.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dropout\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-flatten.pbtxt
index 0b26314..31022d3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-flatten.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-flatten.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.core.Flatten\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-layer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-layer.pbtxt
index 0a3414d..03bbf39 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-layer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling1-d.pbtxt
index ffc5cf1..63a301e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling1-d.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling1D\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling2-d.pbtxt
index ff2cf2b..d81a336 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling2-d.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling2D\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling3-d.pbtxt
index 09c8a31..48d93d5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling3-d.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling3D\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv1-d.pbtxt
index 549e13a..2f1f1c1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv1-d.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv2-d.pbtxt
index 169ecde..bd7549a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv2-d.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
index 973850c..53564e3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt

@@ -204,4 +204,8 @@
     name: "triangular_solve"
     argspec: "args=[\'matrix\', \'rhs\', \'lower\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'False\', \'None\'], "
   }
+  member_method {
+    name: "tridiagonal_solve"
+    argspec: "args=[\'diagonals\', \'rhs\', \'diagonals_format\', \'transpose_rhs\', \'conjugate_rhs\', \'name\'], varargs=None, keywords=None, defaults=[\'compact\', \'False\', \'False\', \'None\'], "
+  }
 }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.lite.-optimize.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.lite.-optimize.pbtxt
new file mode 100644
index 0000000..fedb5ee
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.-optimize.pbtxt

@@ -0,0 +1,12 @@
+path: "tensorflow.lite.Optimize"
+tf_class {
+  is_instance: "<enum \'Optimize\'>"
+  member {
+    name: "OPTIMIZE_FOR_LATENCY"
+    mtype: "<enum \'Optimize\'>"
+  }
+  member {
+    name: "OPTIMIZE_FOR_SIZE"
+    mtype: "<enum \'Optimize\'>"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.lite.-representative-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.lite.-representative-dataset.pbtxt
new file mode 100644
index 0000000..d14b695
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.-representative-dataset.pbtxt

@@ -0,0 +1,9 @@
+path: "tensorflow.lite.RepresentativeDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.lite.python.lite.RepresentativeDataset\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'input_gen\', \'output_gen\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt
index 154dd00..18664f7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt

@@ -13,6 +13,14 @@
     mtype: "<class \'enum.EnumMeta\'>"
   }
   member {
+    name: "Optimize"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
+  member {
+    name: "RepresentativeDataset"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "TFLiteConverter"
     mtype: "<type \'type\'>"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
index 4e211be..1e1c6ec 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt

@@ -301,6 +301,10 @@
     argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
+    name: "reduce_euclidean_norm"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
+  member_method {
     name: "reduce_logsumexp"
     argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nest.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nest.pbtxt
index ad5360e..70bb6d7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nest.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nest.pbtxt

@@ -9,6 +9,10 @@
     argspec: "args=[\'structure\', \'expand_composites\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
   member_method {
+    name: "is_nested"
+    argspec: "args=[\'seq\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "map_structure"
     argspec: "args=[\'func\'], varargs=structure, keywords=kwargs, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
index b8a56e1..626112b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt

@@ -22,13 +22,25 @@
   }
   member_method {
     name: "avg_pool"
-    argspec: "args=[\'value\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'value\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\', \'input\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "avg_pool1d"
+    argspec: "args=[\'input\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NWC\', \'None\'], "
+  }
+  member_method {
+    name: "avg_pool2d"
+    argspec: "args=[\'value\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\', \'input\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\', \'None\'], "
   }
   member_method {
     name: "avg_pool3d"
     argspec: "args=[\'input\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'None\'], "
   }
   member_method {
+    name: "avg_pool_v2"
+    argspec: "args=[\'input\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
     name: "batch_norm_with_global_normalization"
     argspec: "args=[\'t\', \'m\', \'v\', \'beta\', \'gamma\', \'variance_epsilon\', \'scale_after_normalization\', \'name\', \'input\', \'mean\', \'variance\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
@@ -54,7 +66,11 @@
   }
   member_method {
     name: "conv1d"
-    argspec: "args=[\'value\', \'filters\', \'stride\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\', \'input\', \'dilations\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'value\', \'filters\', \'stride\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\', \'input\', \'dilations\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "conv1d_transpose"
+    argspec: "args=[\'input\', \'filters\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'SAME\', \'NWC\', \'None\', \'None\'], "
   }
   member_method {
     name: "conv2d"
@@ -70,7 +86,7 @@
   }
   member_method {
     name: "conv2d_transpose"
-    argspec: "args=[\'value\', \'filter\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'name\', \'input\', \'filters\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'SAME\', \'NHWC\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'value\', \'filter\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'name\', \'input\', \'filters\', \'dilations\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'SAME\', \'NHWC\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "conv3d"
@@ -86,7 +102,7 @@
   }
   member_method {
     name: "conv3d_transpose"
-    argspec: "args=[\'value\', \'filter\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'name\', \'input\', \'filters\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'SAME\', \'NDHWC\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'value\', \'filter\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'name\', \'input\', \'filters\', \'dilations\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'SAME\', \'NDHWC\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "convolution"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt
index 4251206..9513615 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
index 20af246..912f78f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
index 3205c6a..58d004b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
index f273e11..a7b63a7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt
index e43547b..3f17805 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt
index 99381cd..055485f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt
index 1fbde9d..23272f4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
index 8ba92fc..a9f7e85 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
index 4003e87..ecf4361 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 64f5720..103fdd0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt

@@ -33,6 +33,10 @@
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
   member {
+    name: "CriticalSection"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "DType"
     mtype: "<type \'type\'>"
   }
@@ -137,6 +141,10 @@
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
   member {
+    name: "Module"
+    mtype: "<class \'tensorflow.python.module.module.ModuleMetaclass\'>"
+  }
+  member {
     name: "NameAttrList"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
index 28f26fe..86c62b3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt

@@ -533,6 +533,10 @@
     argspec: "args=[\'input_dataset\', \'filename\', \'output_types\', \'output_shapes\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "Case"
+    argspec: "args=[\'branch_index\', \'input\', \'Tout\', \'branches\', \'output_shapes\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "Cast"
     argspec: "args=[\'x\', \'DstT\', \'Truncate\'], varargs=None, keywords=None, defaults=None"
   }
@@ -569,6 +573,10 @@
     argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "CollectiveGather"
+    argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "CollectiveReduce"
     argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'merge_op\', \'final_op\', \'subdiv_offsets\', \'wait_for\'], varargs=None, keywords=None, defaults=None"
   }
@@ -973,6 +981,10 @@
     argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "EuclideanNorm"
+    argspec: "args=[\'input\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "Exit"
     argspec: "args=[\'data\'], varargs=None, keywords=None, defaults=None"
   }
@@ -2645,10 +2657,18 @@
     argspec: "args=[\'input\', \'input_min\', \'input_max\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "RequantizationRangePerChannel"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'clip_value_max\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "Requantize"
     argspec: "args=[\'input\', \'input_min\', \'input_max\', \'requested_output_min\', \'requested_output_max\', \'out_type\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "RequantizePerChannel"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'requested_output_min\', \'requested_output_max\', \'out_type\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "Reshape"
     argspec: "args=[\'tensor\', \'shape\'], varargs=None, keywords=None, defaults=None"
   }
@@ -3773,6 +3793,10 @@
     argspec: "args=[\'tensor\', \'indices\', \'element_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "TensorListScatterIntoExistingList"
+    argspec: "args=[\'input_handle\', \'tensor\', \'indices\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "TensorListScatterV2"
     argspec: "args=[\'tensor\', \'indices\', \'element_shape\', \'num_elements\'], varargs=None, keywords=None, defaults=None"
   }
@@ -3853,6 +3877,10 @@
     argspec: "args=[\'x\', \'perm\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "TridiagonalSolve"
+    argspec: "args=[\'diagonals\', \'rhs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "TruncateDiv"
     argspec: "args=[\'x\', \'y\'], varargs=None, keywords=None, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.summary.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.summary.pbtxt
index 7ed9cd7..3879645 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.summary.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.summary.pbtxt

@@ -45,6 +45,10 @@
     argspec: "args=[\'name\', \'tensor\', \'max_outputs\', \'collections\', \'family\'], varargs=None, keywords=None, defaults=[\'3\', \'None\', \'None\'], "
   }
   member_method {
+    name: "initialize"
+    argspec: "args=[\'graph\', \'session\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
     name: "merge"
     argspec: "args=[\'inputs\', \'collections\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-adadelta-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-adadelta-optimizer.pbtxt
index 65a2b60..6ed8f93 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-adadelta-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-adadelta-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.adadelta.AdadeltaOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-adagrad-d-a-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-adagrad-d-a-optimizer.pbtxt
index 179272d..c57b3d8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-adagrad-d-a-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-adagrad-d-a-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.adagrad_da.AdagradDAOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-adagrad-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-adagrad-optimizer.pbtxt
index 15c2ef4..897df3e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-adagrad-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-adagrad-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.adagrad.AdagradOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-adam-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-adam-optimizer.pbtxt
index 9c902e5..cb8b5d3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-adam-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-adam-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.adam.AdamOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-checkpoint.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-checkpoint.pbtxt
index 42dcdac..629bc13 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-checkpoint.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-checkpoint.pbtxt

@@ -1,8 +1,8 @@
 path: "tensorflow.train.Checkpoint"
 tf_class {
-  is_instance: "<class \'tensorflow.python.training.checkpointable.util.Checkpoint\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.tracking.AutoCheckpointable\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.util.Checkpoint\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "save_counter"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-ftrl-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-ftrl-optimizer.pbtxt
index f41d9f1..1d1aceb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-ftrl-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-ftrl-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.ftrl.FtrlOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-gradient-descent-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-gradient-descent-optimizer.pbtxt
index 7399750..b998e84 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-gradient-descent-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-gradient-descent-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.gradient_descent.GradientDescentOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-momentum-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-momentum-optimizer.pbtxt
index 9bbaa14..2de61d6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-momentum-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-momentum-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.momentum.MomentumOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-optimizer.pbtxt
index 448e17a..8baa569 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-optimizer.pbtxt

@@ -1,7 +1,7 @@
 path: "tensorflow.train.Optimizer"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-proximal-adagrad-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-proximal-adagrad-optimizer.pbtxt
index eb1782e..626b753 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-proximal-adagrad-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-proximal-adagrad-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.proximal_adagrad.ProximalAdagradOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt
index eb9a861..9c0dca0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.proximal_gradient_descent.ProximalGradientDescentOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-r-m-s-prop-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-r-m-s-prop-optimizer.pbtxt
index 2cf4c2e..61ae458 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-r-m-s-prop-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-r-m-s-prop-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.rmsprop.RMSPropOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-sync-replicas-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-sync-replicas-optimizer.pbtxt
index ecce082..b812d6f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-sync-replicas-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-sync-replicas-optimizer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.training.sync_replicas_optimizer.SyncReplicasOptimizer\'>"
   is_instance: "<class \'tensorflow.python.training.optimizer.Optimizer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "GATE_GRAPH"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-critical-section.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-critical-section.pbtxt
new file mode 100644
index 0000000..024a208
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-critical-section.pbtxt

@@ -0,0 +1,17 @@
+path: "tensorflow.CriticalSection"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.critical_section_ops.CriticalSection\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'shared_name\', \'critical_section_def\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "execute"
+    argspec: "args=[\'self\', \'fn\', \'exclusive_resource_access\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-module.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-module.pbtxt
new file mode 100644
index 0000000..8d599d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-module.pbtxt

@@ -0,0 +1,35 @@
+path: "tensorflow.Module"
+tf_class {
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "no_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-tensor-shape.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-tensor-shape.pbtxt
index bee1952..60518ff 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-tensor-shape.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-tensor-shape.pbtxt

@@ -1,7 +1,6 @@
 path: "tensorflow.TensorShape"
 tf_class {
-  is_instance: "<class \'tensorflow.python.framework.tensor_shape.TensorShapeV2\'>"
-  is_instance: "<class \'tensorflow.python.framework.tensor_shape.TensorShapeV1\'>"
+  is_instance: "<class \'tensorflow.python.framework.tensor_shape.TensorShape\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "dims"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
index a80726d..03fd32f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt

@@ -1,7 +1,7 @@
 path: "tensorflow.Variable"
 tf_class {
   is_instance: "<class \'tensorflow.python.ops.variables.Variable\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "SaveSliceInfo"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt
index d7e4529..5663e64 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt

@@ -4,4 +4,8 @@
     name: "experimental_connect_to_host"
     argspec: "args=[\'remote_host\', \'job_name\'], varargs=None, keywords=None, defaults=[\'None\', \'worker\'], "
   }
+  member_method {
+    name: "experimental_run_functions_eagerly"
+    argspec: "args=[\'run_eagerly\'], varargs=None, keywords=None, defaults=None"
+  }
 }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
index abc98a7..48d9002 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt

@@ -142,7 +142,7 @@
   }
   member_method {
     name: "make_csv_dataset"
-    argspec: "args=[\'file_pattern\', \'batch_size\', \'column_names\', \'column_defaults\', \'label_name\', \'select_columns\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'header\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'num_parallel_reads\', \'sloppy\', \'num_rows_for_inference\', \'compression_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \',\', \'True\', \'\', \'True\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'False\', \'100\', \'None\'], "
+    argspec: "args=[\'file_pattern\', \'batch_size\', \'column_names\', \'column_defaults\', \'label_name\', \'select_columns\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'header\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'num_parallel_reads\', \'sloppy\', \'num_rows_for_inference\', \'compression_type\', \'ignore_errors\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \',\', \'True\', \'\', \'True\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'False\', \'100\', \'None\', \'False\'], "
   }
   member_method {
     name: "make_saveable_from_iterator"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-cross-device-ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-cross-device-ops.pbtxt
new file mode 100644
index 0000000..a2ea234
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-cross-device-ops.pbtxt

@@ -0,0 +1,33 @@
+path: "tensorflow.distribute.CrossDeviceOps"
+tf_class {
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.CrossDeviceOps\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch_reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch_reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast_implementation"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-hierarchical-copy-all-reduce.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-hierarchical-copy-all-reduce.pbtxt
new file mode 100644
index 0000000..a38c4b2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-hierarchical-copy-all-reduce.pbtxt

@@ -0,0 +1,35 @@
+path: "tensorflow.distribute.HierarchicalCopyAllReduce"
+tf_class {
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.HierarchicalCopyAllReduce\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.AllReduceCrossDeviceOps\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.CrossDeviceOps\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'num_packs\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "batch_reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch_reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast_implementation"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-nccl-all-reduce.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-nccl-all-reduce.pbtxt
new file mode 100644
index 0000000..bdc09bc
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-nccl-all-reduce.pbtxt

@@ -0,0 +1,35 @@
+path: "tensorflow.distribute.NcclAllReduce"
+tf_class {
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.NcclAllReduce\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.AllReduceCrossDeviceOps\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.CrossDeviceOps\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'num_packs\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "batch_reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch_reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast_implementation"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt
new file mode 100644
index 0000000..5d7943a
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt

@@ -0,0 +1,66 @@
+path: "tensorflow.distribute.OneDeviceStrategy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.distribute.one_device_strategy.OneDeviceStrategy\'>"
+  is_instance: "<class \'tensorflow.python.distribute.distribute_lib.DistributionStrategy\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "extended"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "num_replicas_in_sync"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'device\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast"
+    argspec: "args=[\'self\', \'tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "colocate_vars_with"
+    argspec: "args=[\'self\', \'colocate_with_variable\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "configure"
+    argspec: "args=[\'self\', \'session_config\', \'cluster_spec\', \'task_type\', \'task_id\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "experimental_make_numpy_iterator"
+    argspec: "args=[\'self\', \'numpy_input\', \'batch_size\', \'num_epochs\', \'shuffle\', \'session\'], varargs=None, keywords=None, defaults=[\'1\', \'1024\', \'None\'], "
+  }
+  member_method {
+    name: "experimental_run"
+    argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "group"
+    argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_dataset_iterator"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "make_input_fn_iterator"
+    argspec: "args=[\'self\', \'input_fn\', \'replication_mode\'], varargs=None, keywords=None, defaults=[\'InputReplicationMode.PER_WORKER\'], "
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "scope"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "unwrap"
+    argspec: "args=[\'self\', \'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_config_proto"
+    argspec: "args=[\'self\', \'config_proto\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-reduction-to-one-device.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-reduction-to-one-device.pbtxt
new file mode 100644
index 0000000..f5ade9f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-reduction-to-one-device.pbtxt

@@ -0,0 +1,34 @@
+path: "tensorflow.distribute.ReductionToOneDevice"
+tf_class {
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.ReductionToOneDevice\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cross_device_ops.CrossDeviceOps\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduce_to_device\', \'accumulation_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "batch_reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch_reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'value_destination_pairs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "broadcast_implementation"
+    argspec: "args=[\'self\', \'tensor\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce_implementation"
+    argspec: "args=[\'self\', \'reduce_op\', \'per_replica_value\', \'destinations\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt
index 0d0d350..a4b78d4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt

@@ -1,6 +1,14 @@
 path: "tensorflow.distribute"
 tf_module {
   member {
+    name: "CrossDeviceOps"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "HierarchicalCopyAllReduce"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "InputContext"
     mtype: "<type \'type\'>"
   }
@@ -13,10 +21,22 @@
     mtype: "<type \'type\'>"
   }
   member {
+    name: "NcclAllReduce"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "OneDeviceStrategy"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "ReduceOp"
     mtype: "<class \'enum.EnumMeta\'>"
   }
   member {
+    name: "ReductionToOneDevice"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "ReplicaContext"
     mtype: "<type \'type\'>"
   }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
index f3dfe72..d537527 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt

@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.ModeKeys"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.mode_keys.ModeKeysV2\'>"
+  is_instance: "<class \'tensorflow.python.saved_model.model_utils.mode_keys.EstimatorModeKeys\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "EVAL"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.experimental.-module.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.experimental.-module.pbtxt
deleted file mode 100644
index 3c5add1..0000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.experimental.-module.pbtxt
+++ /dev/null

@@ -1,35 +0,0 @@
-path: "tensorflow.experimental.Module"
-tf_class {
-  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.tracking.AutoCheckpointable\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name_scope"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "submodules"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "no_name_scope"
-    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
-  }
-}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt
index a7ee6d3..0c3f04e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt

@@ -1,9 +1,5 @@
 path: "tensorflow.experimental"
 tf_module {
-  member {
-    name: "Module"
-    mtype: "<class \'tensorflow.python.module.module.ModuleMetaclass\'>"
-  }
   member_method {
     name: "function_executor_type"
     argspec: "args=[\'executor_type\'], varargs=None, keywords=None, defaults=None"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.feature_column.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.feature_column.pbtxt
index d6d5fc9..4e4fd78 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.feature_column.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.feature_column.pbtxt

@@ -41,6 +41,26 @@
     argspec: "args=[\'key\', \'shape\', \'default_value\', \'dtype\', \'normalizer_fn\'], varargs=None, keywords=None, defaults=[\'(1,)\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
   }
   member_method {
+    name: "sequence_categorical_column_with_hash_bucket"
+    argspec: "args=[\'key\', \'hash_bucket_size\', \'dtype\'], varargs=None, keywords=None, defaults=[\"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "sequence_categorical_column_with_identity"
+    argspec: "args=[\'key\', \'num_buckets\', \'default_value\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "sequence_categorical_column_with_vocabulary_file"
+    argspec: "args=[\'key\', \'vocabulary_file\', \'vocabulary_size\', \'num_oov_buckets\', \'default_value\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "sequence_categorical_column_with_vocabulary_list"
+    argspec: "args=[\'key\', \'vocabulary_list\', \'dtype\', \'default_value\', \'num_oov_buckets\'], varargs=None, keywords=None, defaults=[\'None\', \'-1\', \'0\'], "
+  }
+  member_method {
+    name: "sequence_numeric_column"
+    argspec: "args=[\'key\', \'shape\', \'default_value\', \'dtype\', \'normalizer_fn\'], varargs=None, keywords=None, defaults=[\'(1,)\', \'0.0\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
     name: "shared_embeddings"
     argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
   }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
index bb44ba0..5e13718 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
index 44fc15e..4bbe98b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
index 2e0f77e..abf4286 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\', \'update_freq\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\', \'epoch\'], "
+    argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'write_graph\', \'write_images\', \'update_freq\'], varargs=None, keywords=kwargs, defaults=[\'./logs\', \'0\', \'True\', \'False\', \'epoch\'], "
   }
   member_method {
     name: "on_batch_begin"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt
index 4f7ace4..2f3cb0b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.PeepholeLSTMCell\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.LSTMCell\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt
new file mode 100644
index 0000000..5a75f44
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt

@@ -0,0 +1,184 @@
+path: "tensorflow.keras.experimental.SequenceFeatures"
+tf_class {
+  is_instance: "<class \'tensorflow.python.feature_column.sequence_feature_column.SequenceFeatures\'>"
+  is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'feature_columns\', \'trainable\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'features\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.pbtxt
index 721c188..2ae3561 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.pbtxt

@@ -26,6 +26,10 @@
   }
   member_method {
     name: "load_from_saved_model"
-    argspec: "args=[\'saved_model_path\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'saved_model_path\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member {
+    name: "SequenceFeatures"
+    mtype: "<type \'type\'>"
   }
 }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt
index eab888c..0c6c0a3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Activation\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt
index 96c7acc..15bf039 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.ActivityRegularization\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt
index 9e8aae1..b265384 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Add\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt
index 01fc730..3a0882d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.noise.AlphaDropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
index 8b6a151..d2ee310 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt
index 3c78457..1da079f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt
index e6e96a0..d96751c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt
index ec2d5b1..3819e52 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Average\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
index afff790..47f6b39 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt
index d7ab835..4b8cadc 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt
index 6654f86..5c66da4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.AveragePooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt
index 05ac793..9ae69aa 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.normalization.BatchNormalizationV2\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt
index 94f3a46..95eb6f6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.wrappers.Bidirectional\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.wrappers.Wrapper\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt
index e0eae17..0941013 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Concatenate\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
index ec8a44c..941b6aa 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional_recurrent.ConvRNN2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activation"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt
index 350d49a..4bf8336 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
index 9b48eb6..221addf 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt
index 1708d6a..1c95fcc 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
index 5018492..994a507 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt
index fd24af3..ae251b5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt
index fbc7609..1d73eec 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
index 671a004..d37ec0f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt
index dd6519c..bb3c37d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
index 648f480..fc29f1c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt
index 87a07ea..2658fb4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt
index 6f3a153..58567ea 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Cropping1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt
index a1c418c..42be76f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Cropping2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt
index ad98f9c..11092f2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Cropping3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt
index ca6a327..d6f7f30 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2.DenseFeatures\'>"
   is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt
index ef12b2e..339c9f5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dense\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt
index eacfb37..c2992de 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt
index 7928ceb..b37f4c8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Dot\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt
index a7fa545..d21f577 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt
index 483ba65..f1e086b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.ELU\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt
index 4d0e5e1..eb3496a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.embeddings.Embedding\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt
index 5947047..bbe324c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Flatten\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt
index b4efdf3..dd93e32 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.GRUCell\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
index 811990a..32e6985 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.GRU\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activation"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt
index 1686768..e248626 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.noise.GaussianDropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt
index 69bca6a..b1b5759 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.noise.GaussianNoise\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
index 9a4119d..9c4087a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
index 2ca1eb1..d564294 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
index 4331adc..089145e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
index 6e91b4a..2bc02b6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
index 85887a5..c2510d1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
index dd20fd1..845d6b1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalAveragePooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
index 3372ae7..f6fa865 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
index 0fb1882..1285e21 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
index 5b1c850..a1417e4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
index 49e59e0..ff4da8b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
index 9504f64..7140d57 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
index 42de6ae..4edeb97 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalMaxPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.GlobalPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt
index f388b84..4860956 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.input_layer.InputLayer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
index d2634dd..7398613 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.LSTMCell\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
index 622a8e2..c9b759d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.LSTM\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activation"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt
index da2373c..88f1f8b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Lambda\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt
index 2e47132..c95f915 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt

@@ -1,7 +1,7 @@
 path: "tensorflow.keras.layers.Layer"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
index a74e935..80d1c32 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.LeakyReLU\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt
index bf6c84b..9bafe01 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt
index 0f4c071..b050302 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.local.LocallyConnected1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt
index 5eea071..3bb780c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.local.LocallyConnected2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt
index a16ceef..690208b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Masking\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
index e61d730..02f3186 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt
index a21c403..f2e9a3b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt
index fb8613a..868faa0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
index a433d49..8e16626 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt
index fa6ad6f..ab96640 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt
index 05e2ace..4f492f5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.MaxPooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.pooling.Pooling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt
index ce62223..702f2e8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Maximum\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt
index a0ff4f9..fef9390 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Minimum\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt
index 558cc0d..1e2db3f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Multiply\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt
index 5863fbb..1450047 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.PReLU\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt
index 4d7413b..d5d3e03 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Permute\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
index 67ab60b..f1151f4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt
index eb32ba2..0874240 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.ReLU\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt
index 81ac253..de9f8fb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.RepeatVector\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt
index dd4dc49..a125754 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.core.Reshape\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt
index c8724f0..01fac3a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.SeparableConv\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt
index 8c47395..80628d7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.SeparableConv\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt
index c0b6ad4..41e96fd 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.SeparableConv\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
index c5566c1..f48b0b3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.SeparableConv\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.Conv\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
index f91aac8..5e79932 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.SimpleRNNCell\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt
index eb2a7b9..60893bb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.SimpleRNN\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activation"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt
index f0411e2..c96405a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.Softmax\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
index 2a2fd2e..153b7bc 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.core.SpatialDropout1D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
index e4d1d43..44e0811 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.core.SpatialDropout2D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
index 4e641a8e..6e9f624 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.core.SpatialDropout3D\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.core.Dropout\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
index 591796e..57da4c0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.StackedRNNCells\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt
index 67555db..27eb794 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.merge.Subtract\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.merge._Merge\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
index 0ed7da5..733070e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.advanced_activations.ThresholdedReLU\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt
index 9492b0b..009ecca 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.layers.wrappers.TimeDistributed\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.wrappers.Wrapper\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt
index 16c31d3..f465aa6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.UpSampling1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
index cf1a076..049da3d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.UpSampling2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt
index 5cded98..1d50c89 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.UpSampling3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt
index 16f3f06..6604ac0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.wrappers.Wrapper\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt
index 59997a8..2c8d527 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.ZeroPadding1D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt
index 9a327c2..bf9f43c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.ZeroPadding2D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt
index 7933868..a78cfa8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.convolutional.ZeroPadding3D\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-layer-normalization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-layer-normalization.pbtxt
index 6c8faef..9b5598e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-layer-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-layer-normalization.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.layers.normalization.LayerNormalization\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.losses.-reduction.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.losses.-reduction.pbtxt
index f20ed26..e93be80 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.losses.-reduction.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.losses.-reduction.pbtxt

@@ -1,6 +1,6 @@
 path: "tensorflow.keras.losses.Reduction"
 tf_class {
-  is_instance: "<class \'tensorflow.python.ops.losses.losses_impl.ReductionV2\'>"
+  is_instance: "<class \'tensorflow.python.keras.utils.losses_utils.ReductionV2\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "NONE"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-a-u-c.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-a-u-c.pbtxt
index beaa6f0..6e00a3a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-a-u-c.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-a-u-c.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.AUC\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt
index 5f00797..18cde2f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt
index a0a3ae8..c5d2fc9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-crossentropy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-crossentropy.pbtxt
index b6ce9e0..a866245 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-crossentropy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-crossentropy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt
index 587ffdd..998c4cb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt
index 8cd173c..04f1794 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-crossentropy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-hinge.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-hinge.pbtxt
index 99414dd..19442b5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-hinge.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-hinge.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-cosine-proximity.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-cosine-proximity.pbtxt
index b059e03..b247e5c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-cosine-proximity.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-cosine-proximity.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt
index 5432f7f..5a94569 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt
index 75541bf..9033d9e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-hinge.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-hinge.pbtxt
index f45a57d..dedc64f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-hinge.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-hinge.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-k-l-divergence.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-k-l-divergence.pbtxt
index 25bcf85..af8366b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-k-l-divergence.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-k-l-divergence.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-log-cosh-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-log-cosh-error.pbtxt
index fb2df7f..a7e072e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-log-cosh-error.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-log-cosh-error.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-error.pbtxt
index 11538c6..75173ad 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-error.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-error.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt
index adec3fe..7be81b6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-absolute-percentage-error.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-io-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-io-u.pbtxt
index 47b550a..21e44ed 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-io-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-io-u.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.MeanIoU\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-relative-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-relative-error.pbtxt
index a1ec7cb..8ef17fc 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-relative-error.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-relative-error.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-squared-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-squared-error.pbtxt
index af960fa..363f532 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-squared-error.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-squared-error.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt
index 417f92f..712f10c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-squared-logarithmic-error.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-tensor.pbtxt
index 2a5218f..fff91d2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-tensor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean-tensor.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.MeanTensor\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt
index b089109..cffb444 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-metric.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-metric.pbtxt
index 2216043..ce746ab 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-metric.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-metric.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-poisson.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-poisson.pbtxt
index 8e91098..570b774 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-poisson.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-poisson.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt
index 9aeaa56..83535d5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Precision\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt
index 748cec0..9ec2bbc 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Recall\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt
index 4dd60bb..e2bdbd5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-root-mean-squared-error.pbtxt

@@ -5,7 +5,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
index 97aeb68..172c40e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.SensitivitySpecificityBase\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt
index 5a7bef4..8a24088 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt
index c968768..0cadc9d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-crossentropy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt
index ea0f2a7..c135b8f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-top-k-categorical-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
index 85f80b0..4f68187 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.SensitivitySpecificityBase\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-squared-hinge.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-squared-hinge.pbtxt
index 15895da..90bbb08 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-squared-hinge.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-squared-hinge.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sum.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sum.pbtxt
index 5ca1c6c..30ef19e0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sum.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sum.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt
index 9cf1bf2..e59476a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-top-k-categorical-accuracy.pbtxt

@@ -6,7 +6,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt
index 4bc9383..6627a46 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt
index 2eae4df..8c3c2cb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
index 3132e8d..63100a2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
index b5ef70e..5c9ba04 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt
index 0a56293..8471803 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adadelta.Adadelta\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt
index 14d0894..0466ea6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adagrad.Adagrad\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt
index fdb1ea8..9762fad 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adam.Adam\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt
index ece63ec..f477a60 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adamax.Adamax\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt
index f952f88..3ffb4bb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.nadam.Nadam\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt
index 27bae90..9639c71 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt

@@ -1,7 +1,7 @@
 path: "tensorflow.keras.optimizers.Optimizer"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
index e523443..2a7603d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt
index d2721f8..4163555 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt

@@ -2,7 +2,7 @@
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.gradient_descent.SGD\'>"
   is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "iterations"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
index 08b928e..e46cb44 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt

@@ -204,4 +204,8 @@
     name: "triangular_solve"
     argspec: "args=[\'matrix\', \'rhs\', \'lower\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'False\', \'None\'], "
   }
+  member_method {
+    name: "tridiagonal_solve"
+    argspec: "args=[\'diagonals\', \'rhs\', \'diagonals_format\', \'transpose_rhs\', \'conjugate_rhs\', \'name\'], varargs=None, keywords=None, defaults=[\'compact\', \'False\', \'False\', \'None\'], "
+  }
 }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.-optimize.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.-optimize.pbtxt
new file mode 100644
index 0000000..fedb5ee
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.-optimize.pbtxt

@@ -0,0 +1,12 @@
+path: "tensorflow.lite.Optimize"
+tf_class {
+  is_instance: "<enum \'Optimize\'>"
+  member {
+    name: "OPTIMIZE_FOR_LATENCY"
+    mtype: "<enum \'Optimize\'>"
+  }
+  member {
+    name: "OPTIMIZE_FOR_SIZE"
+    mtype: "<enum \'Optimize\'>"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.-representative-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.-representative-dataset.pbtxt
new file mode 100644
index 0000000..d14b695
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.-representative-dataset.pbtxt

@@ -0,0 +1,9 @@
+path: "tensorflow.lite.RepresentativeDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.lite.python.lite.RepresentativeDataset\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'input_gen\', \'output_gen\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.-toco-converter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.-toco-converter.pbtxt
deleted file mode 100644
index 3ef90b8..0000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.lite.-toco-converter.pbtxt
+++ /dev/null

@@ -1,24 +0,0 @@
-path: "tensorflow.lite.TocoConverter"
-tf_class {
-  is_instance: "<class \'tensorflow.lite.python.lite.TocoConverter\'>"
-  is_instance: "<type \'object\'>"
-  member_method {
-    name: "__init__"
-  }
-  member_method {
-    name: "from_frozen_graph"
-    argspec: "args=[\'cls\', \'graph_def_file\', \'input_arrays\', \'output_arrays\', \'input_shapes\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "from_keras_model_file"
-    argspec: "args=[\'cls\', \'model_file\', \'input_arrays\', \'input_shapes\', \'output_arrays\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "from_saved_model"
-    argspec: "args=[\'cls\', \'saved_model_dir\', \'input_arrays\', \'input_shapes\', \'output_arrays\', \'tag_set\', \'signature_key\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "from_session"
-    argspec: "args=[\'cls\', \'sess\', \'input_tensors\', \'output_tensors\'], varargs=None, keywords=None, defaults=None"
-  }
-}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt
index 154dd00..6fea044 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt

@@ -13,19 +13,19 @@
     mtype: "<class \'enum.EnumMeta\'>"
   }
   member {
-    name: "TFLiteConverter"
+    name: "Optimize"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
+  member {
+    name: "RepresentativeDataset"
     mtype: "<type \'type\'>"
   }
   member {
-    name: "TocoConverter"
+    name: "TFLiteConverter"
     mtype: "<type \'type\'>"
   }
   member {
     name: "constants"
     mtype: "<type \'module\'>"
   }
-  member_method {
-    name: "toco_convert"
-    argspec: "args=[\'input_data\', \'input_tensors\', \'output_tensors\'], varargs=args, keywords=kwargs, defaults=None"
-  }
 }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-binary-crossentropy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-binary-crossentropy.pbtxt
new file mode 100644
index 0000000..1d180a9
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-binary-crossentropy.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.BinaryCrossentropy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.BinaryCrossentropy\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'from_logits\', \'label_smoothing\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'0\', \'sum_over_batch_size\', \'binary_crossentropy\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-categorical-crossentropy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-categorical-crossentropy.pbtxt
new file mode 100644
index 0000000..3937dfa
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-categorical-crossentropy.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.CategoricalCrossentropy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.CategoricalCrossentropy\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'from_logits\', \'label_smoothing\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'0\', \'sum_over_batch_size\', \'categorical_crossentropy\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-categorical-hinge.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-categorical-hinge.pbtxt
new file mode 100644
index 0000000..d2a064d
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-categorical-hinge.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.CategoricalHinge"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.CategoricalHinge\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'categorical_hinge\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-cosine-proximity.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-cosine-proximity.pbtxt
new file mode 100644
index 0000000..3ec2902
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-cosine-proximity.pbtxt

@@ -0,0 +1,22 @@
+path: "tensorflow.losses.CosineProximity"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.CosineProximity\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'axis\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'sum_over_batch_size\', \'None\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-hinge.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-hinge.pbtxt
new file mode 100644
index 0000000..155154c
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-hinge.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.Hinge"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.Hinge\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'None\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-huber.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-huber.pbtxt
new file mode 100644
index 0000000..5052c19
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-huber.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.Huber"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.Huber\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'delta\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'1.0\', \'sum_over_batch_size\', \'huber_loss\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-k-l-divergence.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-k-l-divergence.pbtxt
new file mode 100644
index 0000000..b739c05
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-k-l-divergence.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.KLDivergence"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.KLDivergence\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'kullback_leibler_divergence\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-log-cosh.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-log-cosh.pbtxt
new file mode 100644
index 0000000..557cc21
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-log-cosh.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.LogCosh"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.LogCosh\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'logcosh\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-log-loss.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-log-loss.pbtxt
new file mode 100644
index 0000000..65b1fea
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-log-loss.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.LogLoss"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.LogLoss\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'logloss\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-loss.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-loss.pbtxt
new file mode 100644
index 0000000..2bcc6f8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-loss.pbtxt

@@ -0,0 +1,21 @@
+path: "tensorflow.losses.Loss"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'None\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-absolute-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-absolute-error.pbtxt
new file mode 100644
index 0000000..a33db29
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-absolute-error.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.MeanAbsoluteError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.MeanAbsoluteError\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'mean_absolute_error\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-absolute-percentage-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-absolute-percentage-error.pbtxt
new file mode 100644
index 0000000..4c79a5e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-absolute-percentage-error.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.MeanAbsolutePercentageError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.MeanAbsolutePercentageError\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'mean_absolute_percentage_error\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-squared-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-squared-error.pbtxt
new file mode 100644
index 0000000..b99e194
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-squared-error.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.MeanSquaredError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.MeanSquaredError\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'mean_squared_error\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-squared-logarithmic-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-squared-logarithmic-error.pbtxt
new file mode 100644
index 0000000..e018273
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-mean-squared-logarithmic-error.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.MeanSquaredLogarithmicError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.MeanSquaredLogarithmicError\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'mean_squared_logarithmic_error\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-poisson.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-poisson.pbtxt
new file mode 100644
index 0000000..b6603cb
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-poisson.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.Poisson"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.Poisson\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'poisson\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-reduction.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-reduction.pbtxt
index 6a44e4c..e4ae87e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.losses.-reduction.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-reduction.pbtxt

@@ -1,6 +1,6 @@
 path: "tensorflow.losses.Reduction"
 tf_class {
-  is_instance: "<class \'tensorflow.python.ops.losses.losses_impl.ReductionV2\'>"
+  is_instance: "<class \'tensorflow.python.keras.utils.losses_utils.ReductionV2\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "NONE"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-sparse-categorical-crossentropy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-sparse-categorical-crossentropy.pbtxt
new file mode 100644
index 0000000..5e3ce6f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-sparse-categorical-crossentropy.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.SparseCategoricalCrossentropy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.SparseCategoricalCrossentropy\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'from_logits\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'sum_over_batch_size\', \'None\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.-squared-hinge.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.-squared-hinge.pbtxt
new file mode 100644
index 0000000..b5e3757
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.-squared-hinge.pbtxt

@@ -0,0 +1,23 @@
+path: "tensorflow.losses.SquaredHinge"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.losses.SquaredHinge\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.LossFunctionWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.losses.Loss\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'reduction\', \'name\'], varargs=None, keywords=None, defaults=[\'sum_over_batch_size\', \'squared_hinge\'], "
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.losses.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.losses.pbtxt
index 36007d3..d906734 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.losses.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.losses.pbtxt

@@ -1,7 +1,187 @@
 path: "tensorflow.losses"
 tf_module {
   member {
+    name: "BinaryCrossentropy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CategoricalCrossentropy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CategoricalHinge"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CosineProximity"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Hinge"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Huber"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "KLDivergence"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "LogCosh"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "LogLoss"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Loss"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanAbsoluteError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanAbsolutePercentageError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanSquaredError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanSquaredLogarithmicError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Poisson"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "Reduction"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SparseCategoricalCrossentropy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SquaredHinge"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "KLD"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "MAE"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "MAPE"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "MSE"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "MSLE"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "binary_crossentropy"
+    argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'label_smoothing\'], varargs=None, keywords=None, defaults=[\'False\', \'0\'], "
+  }
+  member_method {
+    name: "categorical_crossentropy"
+    argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'label_smoothing\'], varargs=None, keywords=None, defaults=[\'False\', \'0\'], "
+  }
+  member_method {
+    name: "categorical_hinge"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "cosine"
+    argspec: "args=[\'y_true\', \'y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'-1\'], "
+  }
+  member_method {
+    name: "cosine_proximity"
+    argspec: "args=[\'y_true\', \'y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'-1\'], "
+  }
+  member_method {
+    name: "deserialize"
+    argspec: "args=[\'name\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get"
+    argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "hinge"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "kld"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "kullback_leibler_divergence"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "logcosh"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mae"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mape"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mean_absolute_error"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mean_absolute_percentage_error"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mean_squared_error"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mean_squared_logarithmic_error"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mse"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "msle"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "poisson"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "serialize"
+    argspec: "args=[\'loss\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "sparse_categorical_crossentropy"
+    argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\'], "
+  }
+  member_method {
+    name: "squared_hinge"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
 }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
index a0b8e9e..b1118f0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt

@@ -301,6 +301,10 @@
     argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
   }
   member_method {
+    name: "reduce_euclidean_norm"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
+  member_method {
     name: "reduce_logsumexp"
     argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
   }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-a-u-c.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-a-u-c.pbtxt
new file mode 100644
index 0000000..9aa5937
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-a-u-c.pbtxt

@@ -0,0 +1,200 @@
+path: "tensorflow.metrics.AUC"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.AUC\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'num_thresholds\', \'curve\', \'summation_method\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'200\', \'ROC\', \'interpolation\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interpolate_pr_auc"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-accuracy.pbtxt
new file mode 100644
index 0000000..f4cfade
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-accuracy.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.Accuracy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.Accuracy\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'accuracy\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-binary-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-binary-accuracy.pbtxt
new file mode 100644
index 0000000..84198f3
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-binary-accuracy.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.BinaryAccuracy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.BinaryAccuracy\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\', \'threshold\'], varargs=None, keywords=None, defaults=[\'binary_accuracy\', \'None\', \'0.5\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-binary-crossentropy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-binary-crossentropy.pbtxt
new file mode 100644
index 0000000..d51c6a7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-binary-crossentropy.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.BinaryCrossentropy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.BinaryCrossentropy\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'label_smoothing\'], varargs=None, keywords=None, defaults=[\'binary_crossentropy\', \'None\', \'False\', \'0\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-accuracy.pbtxt
new file mode 100644
index 0000000..67e14fa
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-accuracy.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.CategoricalAccuracy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.CategoricalAccuracy\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'categorical_accuracy\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-crossentropy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-crossentropy.pbtxt
new file mode 100644
index 0000000..33cd4c5
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-crossentropy.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.CategoricalCrossentropy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.CategoricalCrossentropy\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'label_smoothing\'], varargs=None, keywords=None, defaults=[\'categorical_crossentropy\', \'None\', \'False\', \'0\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-hinge.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-hinge.pbtxt
new file mode 100644
index 0000000..f5c90fa
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-hinge.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.CategoricalHinge"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.CategoricalHinge\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'categorical_hinge\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-cosine-proximity.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-cosine-proximity.pbtxt
new file mode 100644
index 0000000..329d64f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-cosine-proximity.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.CosineProximity"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.CosineProximity\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\', \'axis\'], varargs=None, keywords=None, defaults=[\'cosine_proximity\', \'None\', \'-1\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-negatives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-negatives.pbtxt
new file mode 100644
index 0000000..e9e32ad
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-negatives.pbtxt

@@ -0,0 +1,197 @@
+path: "tensorflow.metrics.FalseNegatives"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.FalseNegatives\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-positives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-positives.pbtxt
new file mode 100644
index 0000000..45a2c48
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-positives.pbtxt

@@ -0,0 +1,197 @@
+path: "tensorflow.metrics.FalsePositives"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.FalsePositives\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-hinge.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-hinge.pbtxt
new file mode 100644
index 0000000..d2132fd
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-hinge.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.Hinge"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.Hinge\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'hinge\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-k-l-divergence.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-k-l-divergence.pbtxt
new file mode 100644
index 0000000..bbd0db9
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-k-l-divergence.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.KLDivergence"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.KLDivergence\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'kullback_leibler_divergence\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-log-cosh-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-log-cosh-error.pbtxt
new file mode 100644
index 0000000..b66eda8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-log-cosh-error.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.LogCoshError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.LogCoshError\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'logcosh\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-absolute-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-absolute-error.pbtxt
new file mode 100644
index 0000000..90a6b06
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-absolute-error.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.MeanAbsoluteError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanAbsoluteError\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'mean_absolute_error\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-absolute-percentage-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-absolute-percentage-error.pbtxt
new file mode 100644
index 0000000..8710a45
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-absolute-percentage-error.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.MeanAbsolutePercentageError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanAbsolutePercentageError\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'mean_absolute_percentage_error\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-io-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-io-u.pbtxt
new file mode 100644
index 0000000..dcb4883
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-io-u.pbtxt

@@ -0,0 +1,196 @@
+path: "tensorflow.metrics.MeanIoU"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanIoU\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'num_classes\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-relative-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-relative-error.pbtxt
new file mode 100644
index 0000000..207f262
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-relative-error.pbtxt

@@ -0,0 +1,198 @@
+path: "tensorflow.metrics.MeanRelativeError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanRelativeError\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'normalizer\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-squared-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-squared-error.pbtxt
new file mode 100644
index 0000000..0ce4c95
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-squared-error.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.MeanSquaredError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanSquaredError\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'mean_squared_error\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-squared-logarithmic-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-squared-logarithmic-error.pbtxt
new file mode 100644
index 0000000..6ad4089
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-squared-logarithmic-error.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.MeanSquaredLogarithmicError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanSquaredLogarithmicError\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'mean_squared_logarithmic_error\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-tensor.pbtxt
new file mode 100644
index 0000000..31dc0cc
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean-tensor.pbtxt

@@ -0,0 +1,204 @@
+path: "tensorflow.metrics.MeanTensor"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanTensor\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "count"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "total"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'mean_tensor\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'values\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean.pbtxt
new file mode 100644
index 0000000..7946651
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean.pbtxt

@@ -0,0 +1,197 @@
+path: "tensorflow.metrics.Mean"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'values\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-metric.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-metric.pbtxt
new file mode 100644
index 0000000..d182698
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-metric.pbtxt

@@ -0,0 +1,195 @@
+path: "tensorflow.metrics.Metric"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-poisson.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-poisson.pbtxt
new file mode 100644
index 0000000..2027397
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-poisson.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.Poisson"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.Poisson\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'poisson\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-precision.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-precision.pbtxt
new file mode 100644
index 0000000..2ebe442
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-precision.pbtxt

@@ -0,0 +1,196 @@
+path: "tensorflow.metrics.Precision"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.Precision\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'thresholds\', \'top_k\', \'class_id\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-recall.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-recall.pbtxt
new file mode 100644
index 0000000..74489f6
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-recall.pbtxt

@@ -0,0 +1,196 @@
+path: "tensorflow.metrics.Recall"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.Recall\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'thresholds\', \'top_k\', \'class_id\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-root-mean-squared-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-root-mean-squared-error.pbtxt
new file mode 100644
index 0000000..f408836
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-root-mean-squared-error.pbtxt

@@ -0,0 +1,198 @@
+path: "tensorflow.metrics.RootMeanSquaredError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.RootMeanSquaredError\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'root_mean_squared_error\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sensitivity-at-specificity.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sensitivity-at-specificity.pbtxt
new file mode 100644
index 0000000..a6233b6
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sensitivity-at-specificity.pbtxt

@@ -0,0 +1,197 @@
+path: "tensorflow.metrics.SensitivityAtSpecificity"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.SensitivityAtSpecificity\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.SensitivitySpecificityBase\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'specificity\', \'num_thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'200\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt
new file mode 100644
index 0000000..7dfbf3f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.SparseCategoricalAccuracy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.SparseCategoricalAccuracy\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'sparse_categorical_accuracy\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-categorical-crossentropy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-categorical-crossentropy.pbtxt
new file mode 100644
index 0000000..0cb0007
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-categorical-crossentropy.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.SparseCategoricalCrossentropy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.SparseCategoricalCrossentropy\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'sparse_categorical_crossentropy\', \'None\', \'False\', \'-1\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-top-k-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-top-k-categorical-accuracy.pbtxt
new file mode 100644
index 0000000..d7e4344
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-top-k-categorical-accuracy.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.SparseTopKCategoricalAccuracy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.SparseTopKCategoricalAccuracy\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'k\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'5\', \'sparse_top_k_categorical_accuracy\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-specificity-at-sensitivity.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-specificity-at-sensitivity.pbtxt
new file mode 100644
index 0000000..a9dbf70
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-specificity-at-sensitivity.pbtxt

@@ -0,0 +1,197 @@
+path: "tensorflow.metrics.SpecificityAtSensitivity"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.SpecificityAtSensitivity\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.SensitivitySpecificityBase\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'sensitivity\', \'num_thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'200\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-squared-hinge.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-squared-hinge.pbtxt
new file mode 100644
index 0000000..76ecc8c
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-squared-hinge.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.SquaredHinge"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.SquaredHinge\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'squared_hinge\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sum.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sum.pbtxt
new file mode 100644
index 0000000..1642e1e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sum.pbtxt

@@ -0,0 +1,197 @@
+path: "tensorflow.metrics.Sum"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.Sum\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'sum\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'values\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-top-k-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-top-k-categorical-accuracy.pbtxt
new file mode 100644
index 0000000..d47520f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-top-k-categorical-accuracy.pbtxt

@@ -0,0 +1,199 @@
+path: "tensorflow.metrics.TopKCategoricalAccuracy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.TopKCategoricalAccuracy\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Reduce\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'k\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'5\', \'top_k_categorical_accuracy\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-negatives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-negatives.pbtxt
new file mode 100644
index 0000000..e8b55f8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-negatives.pbtxt

@@ -0,0 +1,197 @@
+path: "tensorflow.metrics.TrueNegatives"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.TrueNegatives\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-positives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-positives.pbtxt
new file mode 100644
index 0000000..9e7274d
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-positives.pbtxt

@@ -0,0 +1,197 @@
+path: "tensorflow.metrics.TruePositives"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.metrics.TruePositives\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\', \'dtype\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_states"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "result"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_state"
+    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.pbtxt
new file mode 100644
index 0000000..639186a
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.metrics.pbtxt

@@ -0,0 +1,271 @@
+path: "tensorflow.metrics"
+tf_module {
+  member {
+    name: "AUC"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Accuracy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "BinaryAccuracy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "BinaryCrossentropy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CategoricalAccuracy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CategoricalCrossentropy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CategoricalHinge"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CosineProximity"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "FalseNegatives"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "FalsePositives"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Hinge"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "KLDivergence"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "LogCoshError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Mean"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanAbsoluteError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanAbsolutePercentageError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanIoU"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanRelativeError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanSquaredError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanSquaredLogarithmicError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "MeanTensor"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Metric"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Poisson"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Precision"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Recall"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RootMeanSquaredError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SensitivityAtSpecificity"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseCategoricalAccuracy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseCategoricalCrossentropy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseTopKCategoricalAccuracy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SpecificityAtSensitivity"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SquaredHinge"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Sum"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TopKCategoricalAccuracy"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TrueNegatives"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TruePositives"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "KLD"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "MAE"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "MAPE"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "MSE"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "MSLE"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "binary_accuracy"
+    argspec: "args=[\'y_true\', \'y_pred\', \'threshold\'], varargs=None, keywords=None, defaults=[\'0.5\'], "
+  }
+  member_method {
+    name: "binary_crossentropy"
+    argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'label_smoothing\'], varargs=None, keywords=None, defaults=[\'False\', \'0\'], "
+  }
+  member_method {
+    name: "categorical_accuracy"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "categorical_crossentropy"
+    argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'label_smoothing\'], varargs=None, keywords=None, defaults=[\'False\', \'0\'], "
+  }
+  member_method {
+    name: "cosine"
+    argspec: "args=[\'y_true\', \'y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'-1\'], "
+  }
+  member_method {
+    name: "cosine_proximity"
+    argspec: "args=[\'y_true\', \'y_pred\', \'axis\'], varargs=None, keywords=None, defaults=[\'-1\'], "
+  }
+  member_method {
+    name: "deserialize"
+    argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get"
+    argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "hinge"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "kld"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "kullback_leibler_divergence"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mae"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mape"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mean_absolute_error"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mean_absolute_percentage_error"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mean_squared_error"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mean_squared_logarithmic_error"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "mse"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "msle"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "poisson"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "serialize"
+    argspec: "args=[\'metric\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "sparse_categorical_accuracy"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "sparse_categorical_crossentropy"
+    argspec: "args=[\'y_true\', \'y_pred\', \'from_logits\', \'axis\'], varargs=None, keywords=None, defaults=[\'False\', \'-1\'], "
+  }
+  member_method {
+    name: "sparse_top_k_categorical_accuracy"
+    argspec: "args=[\'y_true\', \'y_pred\', \'k\'], varargs=None, keywords=None, defaults=[\'5\'], "
+  }
+  member_method {
+    name: "squared_hinge"
+    argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "top_k_categorical_accuracy"
+    argspec: "args=[\'y_true\', \'y_pred\', \'k\'], varargs=None, keywords=None, defaults=[\'5\'], "
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nest.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nest.pbtxt
index ad5360e..70bb6d7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nest.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nest.pbtxt

@@ -9,6 +9,10 @@
     argspec: "args=[\'structure\', \'expand_composites\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
   member_method {
+    name: "is_nested"
+    argspec: "args=[\'seq\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "map_structure"
     argspec: "args=[\'func\'], varargs=structure, keywords=kwargs, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
index 930cce3..5ec8856 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt

@@ -22,7 +22,15 @@
   }
   member_method {
     name: "avg_pool"
-    argspec: "args=[\'value\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "avg_pool1d"
+    argspec: "args=[\'input\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NWC\', \'None\'], "
+  }
+  member_method {
+    name: "avg_pool2d"
+    argspec: "args=[\'input\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
   }
   member_method {
     name: "avg_pool3d"
@@ -53,32 +61,24 @@
     argspec: "args=[\'input\', \'filters\', \'stride\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NWC\', \'None\', \'None\'], "
   }
   member_method {
+    name: "conv1d_transpose"
+    argspec: "args=[\'input\', \'filters\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'SAME\', \'NWC\', \'None\', \'None\'], "
+  }
+  member_method {
     name: "conv2d"
     argspec: "args=[\'input\', \'filters\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\', \'None\'], "
   }
   member_method {
-    name: "conv2d_backprop_filter"
-    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "conv2d_backprop_input"
-    argspec: "args=[\'input_sizes\', \'filters\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\', \'None\'], "
-  }
-  member_method {
     name: "conv2d_transpose"
-    argspec: "args=[\'input\', \'filters\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'SAME\', \'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filters\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'SAME\', \'NHWC\', \'None\', \'None\'], "
   }
   member_method {
     name: "conv3d"
     argspec: "args=[\'input\', \'filters\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'None\', \'None\'], "
   }
   member_method {
-    name: "conv3d_backprop_filter"
-    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'[1, 1, 1, 1, 1]\', \'None\'], "
-  }
-  member_method {
     name: "conv3d_transpose"
-    argspec: "args=[\'input\', \'filters\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'SAME\', \'NDHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filters\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'SAME\', \'NDHWC\', \'None\', \'None\'], "
   }
   member_method {
     name: "convolution"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
index 3205c6a..58d004b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt

@@ -4,7 +4,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
index 8ba92fc..a9f7e85 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt

@@ -3,7 +3,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt
new file mode 100644
index 0000000..2b476fa
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt

@@ -0,0 +1,71 @@
+path: "tensorflow.optimizers.Adadelta"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adadelta.Adadelta\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.95\', \'1e-07\', \'Adadelta\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_gradients"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt
new file mode 100644
index 0000000..be2fedf
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt

@@ -0,0 +1,71 @@
+path: "tensorflow.optimizers.Adagrad"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adagrad.Adagrad\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.1\', \'1e-07\', \'Adagrad\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_gradients"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt
new file mode 100644
index 0000000..919c433
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt

@@ -0,0 +1,71 @@
+path: "tensorflow.optimizers.Adam"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adam.Adam\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'Adam\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_gradients"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt
new file mode 100644
index 0000000..67fce4f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt

@@ -0,0 +1,71 @@
+path: "tensorflow.optimizers.Adamax"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adamax.Adamax\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'Adamax\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_gradients"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt
new file mode 100644
index 0000000..0636323
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt

@@ -0,0 +1,71 @@
+path: "tensorflow.optimizers.Nadam"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.nadam.Nadam\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'Nadam\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_gradients"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt
new file mode 100644
index 0000000..041922b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt

@@ -0,0 +1,70 @@
+path: "tensorflow.optimizers.Optimizer"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_gradients"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt
new file mode 100644
index 0000000..5deef61
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt

@@ -0,0 +1,71 @@
+path: "tensorflow.optimizers.RMSprop"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'momentum\', \'epsilon\', \'centered\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.0\', \'1e-07\', \'False\', \'RMSprop\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_gradients"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt
new file mode 100644
index 0000000..381f727
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt

@@ -0,0 +1,71 @@
+path: "tensorflow.optimizers.SGD"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.gradient_descent.SGD\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'learning_rate\', \'momentum\', \'nesterov\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.0\', \'False\', \'SGD\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_gradients"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates"
+    argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.pbtxt
new file mode 100644
index 0000000..ed846c7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.pbtxt

@@ -0,0 +1,51 @@
+path: "tensorflow.optimizers"
+tf_module {
+  member {
+    name: "Adadelta"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Adagrad"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Adam"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Adamax"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Nadam"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "Optimizer"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RMSprop"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SGD"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "schedules"
+    mtype: "<type \'module\'>"
+  }
+  member_method {
+    name: "deserialize"
+    argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get"
+    argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "serialize"
+    argspec: "args=[\'optimizer\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-exponential-decay.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-exponential-decay.pbtxt
new file mode 100644
index 0000000..f174d6c
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-exponential-decay.pbtxt

@@ -0,0 +1,18 @@
+path: "tensorflow.optimizers.schedules.ExponentialDecay"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.ExponentialDecay\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'decay_rate\', \'staircase\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-inverse-time-decay.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-inverse-time-decay.pbtxt
new file mode 100644
index 0000000..851d21c
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-inverse-time-decay.pbtxt

@@ -0,0 +1,18 @@
+path: "tensorflow.optimizers.schedules.InverseTimeDecay"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.InverseTimeDecay\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'decay_rate\', \'staircase\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-learning-rate-schedule.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-learning-rate-schedule.pbtxt
new file mode 100644
index 0000000..36db36e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-learning-rate-schedule.pbtxt

@@ -0,0 +1,16 @@
+path: "tensorflow.optimizers.schedules.LearningRateSchedule"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-piecewise-constant-decay.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-piecewise-constant-decay.pbtxt
new file mode 100644
index 0000000..6613bed
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-piecewise-constant-decay.pbtxt

@@ -0,0 +1,18 @@
+path: "tensorflow.optimizers.schedules.PiecewiseConstantDecay"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.PiecewiseConstantDecay\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'boundaries\', \'values\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-polynomial-decay.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-polynomial-decay.pbtxt
new file mode 100644
index 0000000..fbd5bce
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.-polynomial-decay.pbtxt

@@ -0,0 +1,18 @@
+path: "tensorflow.optimizers.schedules.PolynomialDecay"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.PolynomialDecay\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'end_learning_rate\', \'power\', \'cycle\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0001\', \'1.0\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.pbtxt
new file mode 100644
index 0000000..8e1e61b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.schedules.pbtxt

@@ -0,0 +1,31 @@
+path: "tensorflow.optimizers.schedules"
+tf_module {
+  member {
+    name: "ExponentialDecay"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "InverseTimeDecay"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "LearningRateSchedule"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PiecewiseConstantDecay"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PolynomialDecay"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "deserialize"
+    argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "serialize"
+    argspec: "args=[\'learning_rate_schedule\'], varargs=None, keywords=None, defaults=None"
+  }
+}

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index f020682..d26406d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt

@@ -5,6 +5,10 @@
     mtype: "<type \'type\'>"
   }
   member {
+    name: "CriticalSection"
+    mtype: "<type \'type\'>"
+  }
+  member {
     name: "DType"
     mtype: "<type \'type\'>"
   }
@@ -21,6 +25,10 @@
     mtype: "<type \'type\'>"
   }
   member {
+    name: "Module"
+    mtype: "<class \'tensorflow.python.module.module.ModuleMetaclass\'>"
+  }
+  member {
     name: "Operation"
     mtype: "<type \'type\'>"
   }
@@ -209,6 +217,10 @@
     mtype: "<type \'module\'>"
   }
   member {
+    name: "metrics"
+    mtype: "<type \'module\'>"
+  }
+  member {
     name: "name_scope"
     mtype: "<type \'type\'>"
   }
@@ -229,6 +241,10 @@
     mtype: "<type \'type\'>"
   }
   member {
+    name: "optimizers"
+    mtype: "<type \'module\'>"
+  }
+  member {
     name: "qint16"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
@@ -765,6 +781,10 @@
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
+    name: "numpy_function"
+    argspec: "args=[\'func\', \'inp\', \'Tout\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
     name: "one_hot"
     argspec: "args=[\'indices\', \'depth\', \'on_value\', \'off_value\', \'axis\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
index 28f26fe..86c62b3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt

@@ -533,6 +533,10 @@
     argspec: "args=[\'input_dataset\', \'filename\', \'output_types\', \'output_shapes\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "Case"
+    argspec: "args=[\'branch_index\', \'input\', \'Tout\', \'branches\', \'output_shapes\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "Cast"
     argspec: "args=[\'x\', \'DstT\', \'Truncate\'], varargs=None, keywords=None, defaults=None"
   }
@@ -569,6 +573,10 @@
     argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "CollectiveGather"
+    argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "CollectiveReduce"
     argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'merge_op\', \'final_op\', \'subdiv_offsets\', \'wait_for\'], varargs=None, keywords=None, defaults=None"
   }
@@ -973,6 +981,10 @@
     argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "EuclideanNorm"
+    argspec: "args=[\'input\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "Exit"
     argspec: "args=[\'data\'], varargs=None, keywords=None, defaults=None"
   }
@@ -2645,10 +2657,18 @@
     argspec: "args=[\'input\', \'input_min\', \'input_max\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "RequantizationRangePerChannel"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'clip_value_max\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "Requantize"
     argspec: "args=[\'input\', \'input_min\', \'input_max\', \'requested_output_min\', \'requested_output_max\', \'out_type\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "RequantizePerChannel"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'requested_output_min\', \'requested_output_max\', \'out_type\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "Reshape"
     argspec: "args=[\'tensor\', \'shape\'], varargs=None, keywords=None, defaults=None"
   }
@@ -3773,6 +3793,10 @@
     argspec: "args=[\'tensor\', \'indices\', \'element_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "TensorListScatterIntoExistingList"
+    argspec: "args=[\'input_handle\', \'tensor\', \'indices\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "TensorListScatterV2"
     argspec: "args=[\'tensor\', \'indices\', \'element_shape\', \'num_elements\'], varargs=None, keywords=None, defaults=None"
   }
@@ -3853,6 +3877,10 @@
     argspec: "args=[\'x\', \'perm\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "TridiagonalSolve"
+    argspec: "args=[\'diagonals\', \'rhs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
     name: "TruncateDiv"
     argspec: "args=[\'x\', \'y\'], varargs=None, keywords=None, defaults=None"
   }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.rnn.-dropout-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.rnn.-dropout-wrapper.pbtxt
index 9f6ce04..53b61f0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.rnn.-dropout-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.rnn.-dropout-wrapper.pbtxt

@@ -8,7 +8,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.rnn.-residual-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.rnn.-residual-wrapper.pbtxt
index 51dc8c1..9a303df 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.rnn.-residual-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.rnn.-residual-wrapper.pbtxt

@@ -8,7 +8,7 @@
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "activity_regularizer"

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
index f6ab7ac..32d7c48 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt

@@ -89,30 +89,10 @@
     mtype: "<type \'str\'>"
   }
   member_method {
-    name: "build_signature_def"
-    argspec: "args=[\'inputs\', \'outputs\', \'method_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "classification_signature_def"
-    argspec: "args=[\'examples\', \'classes\', \'scores\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
     name: "contains_saved_model"
     argspec: "args=[\'export_dir\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
-    name: "is_valid_signature"
-    argspec: "args=[\'signature_def\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "predict_signature_def"
-    argspec: "args=[\'inputs\', \'outputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "regression_signature_def"
-    argspec: "args=[\'examples\', \'predictions\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
     name: "save"
     argspec: "args=[\'obj\', \'export_dir\', \'signatures\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt
index c59f1b8..85edef9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt

@@ -5,6 +5,10 @@
     mtype: "<type \'type\'>"
   }
   member_method {
+    name: "audio"
+    argspec: "args=[\'name\', \'data\', \'sample_rate\', \'step\', \'max_outputs\', \'encoding\', \'description\'], varargs=None, keywords=None, defaults=[\'3\', \'None\', \'None\'], "
+  }
+  member_method {
     name: "create_file_writer"
     argspec: "args=[\'logdir\', \'max_queue\', \'flush_millis\', \'filename_suffix\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
@@ -13,14 +17,34 @@
     argspec: "args=[\'writer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
+    name: "histogram"
+    argspec: "args=[\'name\', \'data\', \'step\', \'buckets\', \'description\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "image"
+    argspec: "args=[\'name\', \'data\', \'step\', \'max_outputs\', \'description\'], varargs=None, keywords=None, defaults=[\'3\', \'None\'], "
+  }
+  member_method {
     name: "import_event"
     argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
+    name: "record_if"
+    argspec: "args=[\'condition\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "scalar"
+    argspec: "args=[\'name\', \'data\', \'step\', \'description\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
     name: "summary_scope"
     argspec: "args=[\'name\', \'default_name\', \'values\'], varargs=None, keywords=None, defaults=[\'summary\', \'None\'], "
   }
   member_method {
+    name: "text"
+    argspec: "args=[\'name\', \'data\', \'step\', \'description\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
     name: "write"
     argspec: "args=[\'tag\', \'tensor\', \'step\', \'metadata\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt
index 3c77788..ac9dd8f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt

@@ -17,6 +17,10 @@
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
   }
   member_method {
+    name: "compute_gradient"
+    argspec: "args=[\'f\', \'x\', \'delta\'], varargs=None, keywords=None, defaults=[\'0.001\'], "
+  }
+  member_method {
     name: "create_local_cluster"
     argspec: "args=[\'num_workers\', \'num_ps\', \'protocol\', \'worker_config\', \'ps_config\'], varargs=None, keywords=None, defaults=[\'grpc\', \'None\', \'None\'], "
   }

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint.pbtxt
index 42dcdac..629bc13 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint.pbtxt

@@ -1,8 +1,8 @@
 path: "tensorflow.train.Checkpoint"
 tf_class {
-  is_instance: "<class \'tensorflow.python.training.checkpointable.util.Checkpoint\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.tracking.AutoCheckpointable\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.Checkpointable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.util.Checkpoint\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "save_counter"

diff --git a/tensorflow/tools/api/tests/BUILD b/tensorflow/tools/api/tests/BUILD
index 3cbea41..b81ce69 100644
--- a/tensorflow/tools/api/tests/BUILD
+++ b/tensorflow/tools/api/tests/BUILD

@@ -15,11 +15,7 @@
 
 py_test(
     name = "api_compatibility_test",
-    srcs = [
-        "api_compatibility_test.py",
-        "//tensorflow:tf_python_api_gen_v2",
-    ],
-    args = ["--only_test_core_api=true"],
+    srcs = ["api_compatibility_test.py"],
     data = [
         "//tensorflow/tools/api/golden:api_golden_v1",
         "//tensorflow/tools/api/golden:api_golden_v2",
@@ -27,6 +23,7 @@
         "//tensorflow/tools/api/tests:README.txt",
     ],
     srcs_version = "PY2AND3",
+    tags = ["no_rocm"],
     deps = [
         "//tensorflow:tensorflow_py",
         "//tensorflow/python:client_testlib",
@@ -35,6 +32,7 @@
         "//tensorflow/tools/api/lib:python_object_to_proto_visitor",
         "//tensorflow/tools/common:public_api",
         "//tensorflow/tools/common:traverse",
+        "@six_archive//:six",
     ],
 )
 

diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index 9350ebb..fe52a2b 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py

@@ -32,8 +32,8 @@
 import re
 import sys
 
+import six
 import tensorflow as tf
-from tensorflow._api.v2 import v2 as tf_v2
 
 from google.protobuf import message
 from google.protobuf import text_format
@@ -78,6 +78,13 @@
 _NON_CORE_PACKAGES = ['estimator']
 
 
+# TODO(annarev): remove this once we test with newer version of
+# estimator that actually has compat v1 version.
+if not hasattr(tf.compat.v1, 'estimator'):
+  tf.compat.v1.estimator = tf.estimator
+  tf.compat.v2.estimator = tf.estimator
+
+
 def _KeyToFilePath(key, api_version):
   """From a given key, construct a filepath.
 
@@ -135,6 +142,29 @@
   return filtered_file_list
 
 
+def _FilterGoldenProtoDict(golden_proto_dict, omit_golden_symbols_map):
+  """Filter out golden proto dict symbols that should be omitted."""
+  if not omit_golden_symbols_map:
+    return golden_proto_dict
+  filtered_proto_dict = dict(golden_proto_dict)
+  for key, symbol_list in six.iteritems(omit_golden_symbols_map):
+    api_object = api_objects_pb2.TFAPIObject()
+    api_object.CopyFrom(filtered_proto_dict[key])
+    filtered_proto_dict[key] = api_object
+    module_or_class = None
+    if api_object.HasField('tf_module'):
+      module_or_class = api_object.tf_module
+    elif api_object.HasField('tf_class'):
+      module_or_class = api_object.tf_class
+    if module_or_class is not None:
+      for members in (module_or_class.member, module_or_class.member_method):
+        filtered_members = [m for m in members if m.name not in symbol_list]
+        # Two steps because protobuf repeated fields disallow slice assignment.
+        del members[:]
+        members.extend(filtered_members)
+  return filtered_proto_dict
+
+
 class ApiCompatibilityTest(test.TestCase):
 
   def __init__(self, *args, **kwargs):
@@ -256,7 +286,7 @@
     visitor.do_not_descend_map['tf'].append('contrib')
     if FLAGS.only_test_core_api:
       visitor.do_not_descend_map['tf'].extend(_NON_CORE_PACKAGES)
-    traverse.traverse(tf_v2.compat.v1, visitor)
+    traverse.traverse(tf.compat.v1, visitor)
 
   def testNoSubclassOfMessageV2(self):
     if not hasattr(tf.compat, 'v2'):
@@ -271,7 +301,8 @@
                                    root,
                                    golden_file_pattern,
                                    api_version,
-                                   additional_private_map=None):
+                                   additional_private_map=None,
+                                   omit_golden_symbols_map=None):
     # Extract all API stuff.
     visitor = python_object_to_proto_visitor.PythonObjectToProtoVisitor()
 
@@ -304,6 +335,8 @@
         _FileNameToKey(filename): _ReadFileToProto(filename)
         for filename in golden_file_list
     }
+    golden_proto_dict = _FilterGoldenProtoDict(golden_proto_dict,
+                                               omit_golden_symbols_map)
 
     # Diff them. Do not fail if called with update.
     # If the test is run to update goldens, only report diffs but do not fail.
@@ -316,7 +349,7 @@
 
   @test_util.run_v1_only('b/120545219')
   def testAPIBackwardsCompatibility(self):
-    api_version = 1
+    api_version = 2 if '_api.v2' in tf.__name__ else 1
     golden_file_pattern = os.path.join(
         resource_loader.get_root_dir_with_all_resources(),
         _KeyToFilePath('*', api_version))
@@ -339,7 +372,7 @@
     golden_file_pattern = os.path.join(
         resource_loader.get_root_dir_with_all_resources(),
         _KeyToFilePath('*', api_version))
-    self._checkBackwardsCompatibility(tf_v2.compat.v1, golden_file_pattern,
+    self._checkBackwardsCompatibility(tf.compat.v1, golden_file_pattern,
                                       api_version)
 
   def testAPIBackwardsCompatibilityV2(self):
@@ -347,11 +380,17 @@
     golden_file_pattern = os.path.join(
         resource_loader.get_root_dir_with_all_resources(),
         _KeyToFilePath('*', api_version))
+    omit_golden_symbols_map = {}
+    if FLAGS.only_test_core_api:
+      # In TF 2.0 these summary symbols are imported from TensorBoard.
+      omit_golden_symbols_map['tensorflow.summary'] = [
+          'audio', 'histogram', 'image', 'scalar', 'text']
     self._checkBackwardsCompatibility(
-        tf_v2,
+        tf.compat.v2,
         golden_file_pattern,
         api_version,
-        additional_private_map={'tf.compat': ['v1', 'v2']})
+        additional_private_map={'tf.compat': ['v1', 'v2']},
+        omit_golden_symbols_map=omit_golden_symbols_map)
 
 
 if __name__ == '__main__':
@@ -363,7 +402,7 @@
   parser.add_argument(
       '--only_test_core_api',
       type=bool,
-      default=False,
+      default=True,  # only_test_core_api default value
       help=_ONLY_TEST_CORE_API_HELP)
   parser.add_argument(
       '--verbose_diffs', type=bool, default=True, help=_VERBOSE_DIFFS_HELP)

diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
index de93b12..e5187ab 100644
--- a/tensorflow/tools/benchmark/benchmark_model.cc
+++ b/tensorflow/tools/benchmark/benchmark_model.cc

@@ -254,6 +254,7 @@
   tensorflow::ConfigProto& config = options.config;
   if (num_threads > 0) {
     config.set_intra_op_parallelism_threads(num_threads);
+    config.set_inter_op_parallelism_threads(num_threads);
   }
   LOG(INFO) << "Got config, " << config.device_count_size() << " devices";
 

diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
index aadaa8b..f190199 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rocm
+++ b/tensorflow/tools/ci_build/Dockerfile.rocm

@@ -44,9 +44,12 @@
   libboost-filesystem-dev \
   rpm \
   libnuma-dev \
+  pciutils \
   virtualenv \
   python-pip \
   python3-pip \
+  libxml2 \
+  libxml2-dev \
   wget && \
   apt-get clean && \
   rm -rf /var/lib/apt/lists/*
@@ -54,15 +57,12 @@
 # Install rocm pkgs
 RUN apt-get update --allow-insecure-repositories && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
-    rocm-dev rocm-libs rocm-utils \
+    rocm-dev rocm-libs rocm-utils rocm-cmake \
     rocfft miopen-hip miopengemm rocblas hipblas rocrand \
     rocm-profiler cxlactivitylogger && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git
-RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb
-
 ENV HCC_HOME=$ROCM_PATH/hcc
 ENV HIP_PATH=$ROCM_PATH/hip
 ENV OPENCL_ROOT=$ROCM_PATH/opencl
@@ -71,12 +71,7 @@
 ENV PATH="$OPENCL_ROOT/bin:${PATH}"
 
 # Add target file to help determine which device(s) to build for
-RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst
-
-# Setup environment variables, and add those environment variables at the end of ~/.bashrc 
-ARG HCC_HOME=/opt/rocm/hcc
-ARG HIP_PATH=/opt/rocm/hip
-ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH
+RUN bash -c 'echo -e "gfx803\ngfx900\ngfx906" >> /opt/rocm/bin/target.lst'
 
 # Copy and run the install scripts.
 COPY install/*.sh /install/

diff --git a/tensorflow/tools/ci_build/builds/pip_new.sh b/tensorflow/tools/ci_build/builds/pip_new.sh
new file mode 100755
index 0000000..03fc577
--- /dev/null
+++ b/tensorflow/tools/ci_build/builds/pip_new.sh

@@ -0,0 +1,668 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Build the Python PIP installation package for TensorFlow and install
+# the package.
+#
+# Usage:
+#   pip_new.sh
+#
+# Required step(s):
+#   Run configure.py prior to running this script.
+#
+# Required environment variable(s):
+#   CONTAINER_TYPE:      (CPU | GPU)
+#   OS_TYPE:             (UBUNTU | MACOS)
+#   TF_PYTHON_VERSION:   (python2 | python2.7 | python3.5 | python3.7)
+#
+# Optional environment variables. If provided, overwrites any default values.
+#   TF_BUILD_FLAGS:      Bazel build flags excluding `--test_tag_filters` and
+#                        test targets.
+#                          e.g. TF_BUILD_FLAGS="--verbose_failures=true \
+#                               --build_tests_only --test_output=errors"
+#   TF_TEST_FILTER_TAGS: Filtering tags for bazel tests. More specifically,
+#                        input tags for `--test_filter_tags` flag.
+#                          e.g. TF_TEST_FILTER_TAGS="no_pip,-nomac,no_oss"
+#   TF_TEST_TARGETS:     Bazel test targets.
+#                          e.g. TF_TEST_TARGETS="//tensorflow/contrib/... \
+#                               //tensorflow/... \
+#                               //tensorflow/python/... "
+#   TF_PIP_TESTS:        PIP tests to run. If NOT specified, skips all tests.
+#                          e.g. TF_PIP_TESTS="test_pip_virtualenv_clean \
+#                               test_pip_virtualenv_clean \
+#                               test_pip_virtualenv_oss_serial"
+#   IS_NIGHTLY:          Nightly run flag.
+#                          e.g. IS_NIGHTLY=1  # nightly runs
+#                          e.g. IS_NIGHTLY=0  # non-nightly runs
+#   TF_PROJECT_NAME:     Name of the project. This string will be pass onto
+#                        the wheel file name. For nightly builds, it will be
+#                        overwritten to 'tf_nightly'. For gpu builds, '_gpu'
+#                        will be appended.
+#                          e.g. TF_PROJECT_NAME="tensorflow"
+#                          e.g. TF_PROJECT_NAME="tf_nightly_gpu"
+#   TF_PIP_TEST_ROOT:    Root directory for building and testing pip pkgs.
+#                          e.g. TF_PIP_TEST_ROOT="pip_test"
+#
+# To-be-deprecated variable(s).
+#   GIT_TAG_OVERRIDE:    Values for `--git_tag_override`. This flag gets passed
+#                        in as `--action_env` for bazel build and tests.
+#   TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES
+#                        Additonal pip packages to be installed.
+#                        Caveat: pip version needs to be checked prior.
+
+# set bash options
+set -e
+set -x
+
+###########################################################################
+# General helper function(s)
+###########################################################################
+
+# Strip leading and trailing whitespaces
+str_strip () {
+  echo -e "$1" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'
+}
+
+# Convert string to all lower case
+lowercase() {
+  if [[ -z "${1}" ]]; then
+    die "Nothing to convert to lowercase. No argument given."
+  fi
+  echo "${1}" | tr '[:upper:]' '[:lower:]'
+}
+
+check_global_vars() {
+  # Check container type
+  if ! [[ ${CONTAINER_TYPE} == "cpu" ]] && \
+     ! [[ ${CONTAINER_TYPE} == "rocm" ]] && \
+     ! [[ ${CONTAINER_TYPE} == "gpu" ]]; then
+    die "Error: Provided CONTAINER_TYPE \"${CONTAINER_TYPE}\" "\
+        "is not supported."
+  fi
+  # Check OS type
+  if ! [[ ${OS_TYPE} == "ubuntu" ]] && \
+     ! [[ ${OS_TYPE} == "macos" ]]; then
+    die"Error: Provided OS_TYPE \"${OS_TYPE}\" is not supported."
+  fi
+}
+
+add_test_filter_tag() {
+  EMPTY=""
+  while true; do
+    FILTER="${1:$EMPTY}"
+    if ! [[ $BAZEL_TEST_FILTER_TAGS == *"${FILTER}"* ]]; then
+      BAZEL_TEST_FILTER_TAGS="${FILTER},${BAZEL_TEST_FILTER_TAGS}"
+    fi
+    shift
+    if [[ -z "${1}" ]]; then
+      break
+    fi
+  done
+}
+
+remove_test_filter_tag() {
+  EMPTY=""
+  while true; do
+    FILTER="${1:$EMPTY}"
+    BAZEL_TEST_FILTER_TAGS="$(echo ${BAZEL_TEST_FILTER_TAGS} | sed -e 's/^'${FILTER}',//g' -e 's/,'${FILTER}'//g')"
+    shift
+    if [[ -z "${1}" ]]; then
+      break
+    fi
+  done
+}
+
+# Clean up bazel build & test flags with proper configuration.
+update_bazel_flags() {
+  # Add git tag override flag if necessary.
+  GIT_TAG_STR=" --action_env=GIT_TAG_OVERRIDE"
+  if [[ -z "${GIT_TAG_OVERRIDE}" ]] && \
+    ! [[ ${BAZEL_BUILD_FLAGS} = *${GIT_TAG_STR}* ]]; then
+    BAZEL_BUILD_FLAGS+="${GIT_TAG_STR}"
+  fi
+  # Clean up whitespaces
+  BAZEL_BUILD_FLAGS=$(str_strip "${BAZEL_BUILD_FLAGS}")
+  # Cleaned bazel flags
+  echo "Bazel build flags (cleaned):\n" "${BAZEL_BUILD_FLAGS}"
+}
+
+update_test_filter_tags() {
+  # Add test filter tags
+  # This script is for validating built PIP packages. Add pip tags.
+  add_test_filter_tag -no_pip -nopip
+  # MacOS filter tags
+  if [[ ${OS_TYPE} == "macos" ]]; then
+    remove_test_filter_tag nomac no_mac
+    add_test_filter_tag -nomac -no_mac
+  fi
+  echo "Final test filter tags: ${BAZEL_TEST_FILTER_TAGS}"
+}
+
+# Check currently running python and pip version
+check_python_pip_version() {
+  # Check if only the major version of python is provided by the user.
+  MAJOR_VER_ONLY=0
+  if [[ ${#PYTHON_VER} -lt 9 ]]; then
+    # User only provided major version (e.g. 'python2' instead of 'python2.7')
+    MAJOR_VER_ONLY=1
+  fi
+
+  # Retrieve only the version number of the user requested python.
+  PYTHON_VER_REQUESTED=${PYTHON_VER:6:3}
+  echo "PYTHON_VER_REQUESTED: ${PYTHON_VER_REQUESTED}"
+
+  # Retrieve only the version numbers of the python & pip in use currently.
+  PYTHON_VER_IN_USE=$(python --version 2>&1)
+  PYTHON_VER_IN_USE=${PYTHON_VER_IN_USE:7:3}
+  PIP_VER_IN_USE=$(pip --version)
+  PIP_VER_IN_USE=${PIP_VER_IN_USE:${#PIP_VER_IN_USE}-4:3}
+
+  # If only major versions are applied, drop minor versions.
+  if [[ $MAJOR_VER_ONLY == 1 ]]; then
+    PYTHON_VER_IN_USE=${PYTHON_VER_IN_USE:0:1}
+    PIP_VER_IN_USE=${PIP_VER_IN_USE:0:1}
+  fi
+
+  # Check if all versions match.
+  echo -e "User requested python version: '${PYTHON_VER_REQUESTED}'\n" \
+    "Detected python version in use: '${PYTHON_VER_IN_USE}'\n"\
+    "Detected pip version in use: '${PIP_VER_IN_USE}'"
+  if ! [[ $PYTHON_VER_REQUESTED == $PYTHON_VER_IN_USE ]]; then
+    die "Error: Mismatch in python versions detected."
+  else:
+    echo "Python and PIP versions in use match the requested."
+  fi
+}
+
+###########################################################################
+# Setup: directories, local/global variables
+###########################################################################
+
+# Script directory and source necessary files.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/builds_common.sh"
+
+# Required global variables
+# Checks on values for these vars are done in "Build TF PIP Package" section.
+CONTAINER_TYPE=$(lowercase "${CONTAINER_TYPE}")
+OS_TYPE=$(lowercase "${OS_TYPE}")
+PYTHON_VER=$(lowercase "${TF_PYTHON_VERSION}")
+
+# Python bin path
+if [[ -z "$PYTHON_BIN_PATH" ]]; then
+  die "Error: PYTHON_BIN_PATH was not provided. Did you run configure?"
+fi
+# Get python version for configuring pip later in installation.
+PYTHON_VER_CFG=$(${PYTHON_BIN_PATH} -V 2>&1 | awk '{print $NF}' | cut -d. -f-2)
+echo "PYTHON_BIN_PATH: ${PYTHON_BIN_PATH} (version: ${PYTHON_VER_CFG})"
+
+# Default values for optional global variables in case they are not user
+# defined.
+DEFAULT_BAZEL_BUILD_FLAGS='--test_output=errors --verbose_failures=true'
+DEFAULT_BAZEL_TEST_FILTERS='-no_oss,-oss_serial'
+DEFAULT_BAZEL_TEST_TARGETS='//tensorflow/python/... -//tensorflow/core/... -//tensorflow/compiler/... '
+DEFAULT_PIP_TESTS="" # Do not run any tests by default
+DEFAULT_IS_NIGHTLY=0 # Not nightly by default
+DEFAULT_PROJECT_NAME="tensorflow"
+DEFAULT_PIP_TEST_ROOT="pip_test"
+
+# Take in optional global variables
+BAZEL_BUILD_FLAGS=${TF_BUILD_FLAGS:-$DEFAULT_BAZEL_BUILD_FLAGS}
+BAZEL_TEST_TARGETS=${TF_TEST_TARGETS:-$DEFAULT_BAZEL_TEST_TARGETS}
+BAZEL_TEST_FILTER_TAGS=${TF_TEST_FILTER_TAGS:-$DEFAULT_BAZEL_TEST_FILTERS}
+PIP_TESTS=${TF_PIP_TESTS:-$DEFAULT_PIP_TESTS}
+IS_NIGHTLY=${IS_NIGHTLY:-$DEFAULT_IS_NIGHTLY}
+PROJECT_NAME=${TF_PROJECT_NAME:-$DEFAULT_PROJECT_NAME}
+PIP_TEST_ROOT=${TF_PIP_TEST_ROOT:-$DEFAULT_PIP_TEST_ROOT}
+
+# Local variables
+PIP_WHL_DIR="${KOKORO_ARTIFACTS_DIR}/tensorflow/${PIP_TEST_ROOT}/whl"
+mkdir -p "${PIP_WHL_DIR}"
+PIP_WHL_DIR=$(realpath "${PIP_WHL_DIR}") # Get absolute path
+WHL_PATH=""
+# Determine the major.minor versions of python being used (e.g., 2.7).
+# Useful for determining the directory of the local pip installation.
+PY_MAJOR_MINOR_VER=$(${PYTHON_BIN_PATH} -V 2>&1 | awk '{print $NF}' | cut -d. -f-2)
+if [[ -z "${PY_MAJOR_MINOR_VER}" ]]; then
+  die "ERROR: Unable to determine the major.minor version of Python."
+fi
+echo "Python binary path to be used in PIP install: ${PYTHON_BIN_PATH} "\
+"(Major.Minor version: ${PY_MAJOR_MINOR_VER})"
+PYTHON_BIN_PATH_INIT=${PYTHON_BIN_PATH}
+PIP_BIN_PATH="$(which pip${PYTHON_VER_CFG})"
+PIP_BIN_PATH_INIT=${PIP_BIN_PATH}
+
+# PIP packages
+INSTALL_EXTRA_PIP_PACKAGES=${TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES}
+
+###########################################################################
+# Build TF PIP Package
+###########################################################################
+
+# First remove any already existing binaries for a clean start and test.
+if [[ -d ${PIP_TEST_ROOT} ]]; then
+  echo "Test root directory ${PIP_TEST_ROOT} already exists. Deleting it."
+  sudo rm -rf ${PIP_TEST_ROOT}
+fi
+
+# Check that global variables are properly set.
+check_global_vars
+
+# Check if in a virtualenv and exit if yes.
+IN_VENV=$(python -c 'import sys; print("1" if hasattr(sys, "real_prefix") else "0")')
+if [[ "$IN_VENV" == "1" ]]; then
+  echo "It appears that we are already in a virtualenv. Deactivating..."
+  deactivate || source deactivate || die "FAILED: Unable to deactivate from existing virtualenv."
+fi
+
+# Configure python. Obtain the path to python binary.
+source tools/python_bin_path.sh
+# Assume PYTHON_BIN_PATH is exported by the script above.
+if [[ -z "$PYTHON_BIN_PATH" ]]; then
+  die "PYTHON_BIN_PATH was not provided. Did you run configure?"
+fi
+
+# Bazel build the file.
+PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package"
+# Clean bazel cache.
+bazel clean
+# Clean up and update bazel flags
+update_bazel_flags
+# Build. This outputs the file `build_pip_package`.
+bazel build ${BAZEL_BUILD_FLAGS} ${PIP_BUILD_TARGET} || \
+  die "Error: Bazel build failed for target: '${PIP_BUILD_TARGET}'"
+
+###########################################################################
+# Test function(s)
+###########################################################################
+
+test_pip_virtualenv_clean() {
+  # Create a clean directory.
+  CLEAN_VENV_DIR="${PIP_TEST_ROOT}/venv_clean"
+
+  # activate virtual environment and install tensorflow with PIP.
+  create_activate_virtualenv --clean "${CLEAN_VENV_DIR}"
+  # Install TF with pip
+  install_tensorflow_pip "${WHL_PATH}"
+
+  # cd to a temporary directory to avoid picking up Python files in the source
+  # tree.
+  TMP_DIR=$(mktemp -d)
+  pushd "${TMP_DIR}"
+
+  # Run a quick check on tensorflow installation.
+  RET_VAL=$(python -c "import tensorflow as tf; t1=tf.constant([1,2,3,4]); t2=tf.constant([5,6,7,8]); print(tf.add(t1,t2))")
+
+  # Deactivate virtualenv.
+  deactivate || source deactivate || die "FAILED: Unable to deactivate from existing virtualenv."
+
+  # Return to original directory. Remove temp dirs.
+  popd
+  sudo rm -rf "${TMP_DIR}" "${CLEAN_VENV_DIR}"
+
+  # Check result to see if tensorflow is properly installed.
+  if [[ ${RET_VAL} == *'Tensor("Add:0", shape=(4,), dtype=int32)'* ]]; then
+    echo "PIP test on clean virtualenv PASSED."
+    return 0
+  else
+    echo "PIP test on clean virtualenv FAILED."
+    return 1
+  fi
+}
+
+test_pip_virtualenv_non_clean() {
+  # Create virtualenv directory for install test
+  VENV_DIR="${PIP_TEST_ROOT}/venv"
+
+  # Activate virtualenv
+  create_activate_virtualenv "${VENV_DIR}"
+  # Install TF with pip
+  install_tensorflow_pip "${WHL_PATH}"
+
+  # cd to a temporary directory to avoid picking up Python files in the source
+  # tree.
+  TMP_DIR=$(mktemp -d)
+  pushd "${TMP_DIR}"
+
+  # Run a quick check on tensorflow installation.
+  RET_VAL=$(python -c "import tensorflow as tf; t1=tf.constant([1,2,3,4]); t2=tf.constant([5,6,7,8]); print(tf.add(t1,t2))")
+
+  # Return to original directory. Remove temp dirs.
+  popd
+  sudo rm -rf "${TMP_DIR}"
+
+  # Check result to see if tensorflow is properly installed.
+  if ! [[ ${RET_VAL} == *'Tensor("Add:0", shape=(4,), dtype=int32)'* ]]; then
+    echo "PIP test on virtualenv (non-clean) FAILED"
+    return 1
+  fi
+
+  # Install extra pip packages, if specified.
+  for PACKAGE in ${INSTALL_EXTRA_PIP_PACKAGES}; do
+    echo "Installing extra pip package required by test-on-install: ${PACKAGE}"
+
+    ${PIP_BIN_PATH} install ${PACKAGE}
+    if [[ $? != 0 ]]; then
+      echo "${PIP_BIN_PATH} install ${PACKAGE} FAILED."
+      deactivate || source deactivate || die "FAILED: Unable to deactivate from existing virtualenv."
+      return 1
+    fi
+  done
+
+  # Run bazel test.
+  run_test_with_bazel
+  RESULT=$?
+
+  # Deactivate from virtualenv.
+  deactivate || source deactivate || die "FAILED: Unable to deactivate from existing virtualenv."
+  sudo rm -rf "${VENV_DIR}"
+
+  if [[ $RESULT -ne 0 ]]; then
+    echo "PIP test on virtualenv (non-clean) FAILED."
+    return 1
+  else
+    echo "PIP test on virtualenv (non-clean) PASSED."
+    return 0
+  fi
+}
+
+test_pip_virtualenv_oss_serial() {
+  # Create virtualenv directory
+  VENV_DIR="${PIP_TEST_ROOT}/venv"
+
+  create_activate_virtualenv "${VENV_DIR}"
+  run_test_with_bazel --oss_serial
+  RESULT=$?
+
+  # deactivate virtualenv
+  deactivate || source deactivate || die "FAILED: Unable to deactivate from existing virtualenv."
+
+  if [[ ${RESULT} -ne 0 ]]; then
+    echo "PIP test on virtualenv (oss-serial) FAILED."
+    return 1
+  else
+    echo "PIP test on virtualenv (oss-serial) PASSED."
+    return 0
+  fi
+}
+
+###########################################################################
+# Test helper function(s)
+###########################################################################
+
+create_activate_virtualenv() {
+  VIRTUALENV_FLAGS="--system-site-packages"
+  if [[ "${1}" == "--clean" ]]; then
+    shift
+  fi
+
+  VIRTUALENV_DIR="${1}"
+  if [[ -d "${VIRTUALENV_DIR}" ]]; then
+    if sudo rm -rf "${VIRTUALENV_DIR}"
+    then
+      echo "Removed existing virtualenv directory: ${VIRTUALENV_DIR}"
+    else
+      die "Failed to remove existing virtualenv directory: ${VIRTUALENV_DIR}"
+    fi
+  fi
+
+  if mkdir -p "${VIRTUALENV_DIR}"
+  then
+    echo "Created virtualenv directory: ${VIRTUALENV_DIR}"
+  else
+    die "FAILED to create virtualenv directory: ${VIRTUALENV_DIR}"
+  fi
+
+  # Use the virtualenv from the default python version (i.e., python-virtualenv)
+  # to create the virtualenv directory for testing. Use the -p flag to specify
+  # the python version inside the to-be-created virtualenv directory.
+  ${PYTHON_BIN_PATH_INIT} -m virtualenv -p ${PYTHON_BIN_PATH_INIT} ${VIRTUALENV_FLAGS} ${VIRTUALENV_DIR} || \
+    die "FAILED: Unable to create virtualenv"
+
+  source "${VIRTUALENV_DIR}/bin/activate" || \
+    die "FAILED: Unable to activate virtualenv in ${VIRTUALENV_DIR}"
+
+  # Update .tf_configure.bazelrc with venv python path for bazel test.
+  PYTHON_BIN_PATH="$(which python)"
+  yes "" | ./configure
+}
+
+install_tensorflow_pip() {
+  if [[ -z "${1}" ]]; then
+    die "Please provide a proper wheel file path."
+  fi
+
+  # Set path to pip.
+  PIP_BIN_PATH="$(which pip${PYTHON_VER_CFG})"
+
+  # Print python and pip bin paths
+  echo "PYTHON_BIN_PATH to be used to install the .whl: ${PYTHON_BIN_PATH}"
+  echo "PIP_BIN_PATH to be used to install the .whl: ${PIP_BIN_PATH}"
+
+  # Upgrade pip so it supports tags such as cp27mu, manylinux1 etc.
+  echo "Upgrade pip in virtualenv"
+
+  # NOTE: pip install --upgrade pip leads to a documented TLS issue for
+  # some versions in python
+  curl https://bootstrap.pypa.io/get-pip.py | ${PYTHON_BIN_PATH} || \
+    die "Error: pip install (get-pip.py) FAILED"
+
+  # Check that requested python version matches configured one.
+  check_python_pip_version
+
+  # Force upgrade of setuptools. This must happen before the pip install of the
+  # WHL_PATH, which pulls in absl-py, which uses install_requires notation
+  # introduced in setuptools >=20.5. The default version of setuptools is 5.5.1,
+  # which is too old for absl-py.
+  ${PIP_BIN_PATH} install --upgrade setuptools==39.1.0 || \
+    die "Error: setuptools install, upgrade FAILED"
+
+  # Force tensorflow reinstallation. Otherwise it may not get installed from
+  # last build if it had the same version number as previous build.
+  PIP_FLAGS="--upgrade --force-reinstall"
+  ${PIP_BIN_PATH} install -v ${PIP_FLAGS} ${WHL_PATH} || \
+    die "pip install (forcing to reinstall tensorflow) FAILED"
+  echo "Successfully installed pip package ${WHL_PATH}"
+
+  # Force downgrade of setuptools. This must happen after the pip install of the
+  # WHL_PATH, which ends up upgrading to the latest version of setuptools.
+  # Versions of setuptools >= 39.1.0 will cause tests to fail like this:
+  #   ImportError: cannot import name py31compat
+  ${PIP_BIN_PATH} install --upgrade setuptools==39.1.0 || \
+    die "Error: setuptools install, upgrade FAILED"
+}
+
+run_test_with_bazel() {
+  IS_OSS_SERIAL=0
+  if [[ "${1}" == "--oss_serial" ]]; then
+    IS_OSS_SERIAL=1
+  fi
+  TF_GPU_COUNT=${TF_GPU_COUNT:-4}
+
+  # PIP tests should have a "different" path. Different than the one we place
+  # virtualenv, because we are deleting and recreating it here.
+  PIP_TEST_PREFIX=bazel_pip
+  TEST_ROOT=$(pwd)/${PIP_TEST_PREFIX}
+  sudo rm -rf $TEST_ROOT
+  mkdir -p $TEST_ROOT
+  ln -s $(pwd)/tensorflow $TEST_ROOT/tensorflow
+
+  if [[ "${IS_OSS_SERIAL}" == "1" ]]; then
+    remove_test_filter_tag -no_oss
+    add_test_filter_tag oss_serial
+  else
+    add_test_filter_tag -oss_serial
+  fi
+
+  # Clean the bazel cache
+  bazel clean
+  # Clean up flags before running bazel commands
+  update_bazel_flags
+  # Clean up and update test filter tags
+  update_test_filter_tags
+
+  # Figure out how many concurrent tests we can run and do run the tests.
+  BAZEL_PARALLEL_TEST_FLAGS=""
+  if [[ $CONTAINER_TYPE == "gpu" ]]; then
+    # Number of test threads is the number of GPU cards available.
+    if [[ $OS_TYPE == "macos" ]]; then
+      BAZEL_PARALLEL_TEST_FLAGS="--local_test_jobs=1"
+    else
+      BAZEL_PARALLEL_TEST_FLAGS="--local_test_jobs=${TF_GPU_COUNT} \
+        --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute"
+    fi
+  else
+    # Number of test threads is the number of physical CPUs.
+    if [[ $OS_TYPE == "macos" ]]; then
+      BAZEL_PARALLEL_TEST_FLAGS="--local_test_jobs=$(sysctl -n hw.ncpu)"
+    else
+      BAZEL_PARALLEL_TEST_FLAGS="--local_test_jobs=$(grep -c ^processor /proc/cpuinfo)"
+    fi
+  fi
+
+  if [[ ${IS_OSS_SERIAL} == 1 ]]; then
+    BAZEL_PARALLEL_TEST_FLAGS="--local_test_jobs=1"
+  fi
+
+  # TODO(hyey): Update test target after validation.
+  # Run the test.
+  bazel test --build_tests_only ${BAZEL_BUILD_FLAGS} ${BAZEL_PARALLEL_TEST_FLAGS} --test_tag_filters=${BAZEL_TEST_FILTER_TAGS} -k -- //$PIP_TEST_PREFIX/tensorflow/python/...
+
+  unlink ${TEST_ROOT}/tensorflow
+}
+
+run_all_tests() {
+  if [[ -z "${PIP_TESTS}" ]]; then
+    echo "No test was specified to run. Skipping all tests."
+    return 0
+  fi
+  FAIL_COUNTER=0
+  PASS_COUNTER=0
+  for TEST in ${PIP_TESTS[@]}; do
+
+    # Run tests.
+    case "${TEST}" in
+    "test_pip_virtualenv_clean")
+      test_pip_virtualenv_clean
+      ;;
+    "test_pip_virtualenv_non_clean")
+      test_pip_virtualenv_non_clean
+      ;;
+    "test_pip_virtualenv_oss_serial")
+      test_pip_virtualenv_oss_serial
+      ;;
+    *)
+      die "No matching test ${TEST} was found. Stopping test."
+      ;;
+    esac
+
+    # Check and update the results.
+    RETVAL=$?
+
+    # Update results counter
+    if [ ${RETVAL} -eq 0 ]; then
+      echo "Test (${TEST}) PASSED. (PASS COUNTER: ${PASS_COUNTER})"
+      PASS_COUNTER=$(($PASS_COUNTER+1))
+    else
+      echo "Test (${TEST}) FAILED. (FAIL COUNTER: ${FAIL_COUNTER})"
+      FAIL_COUNTER=$(($FAIL_COUNTER+1))
+    fi
+  done
+  printf "${PASS_COUNTER} PASSED | ${FAIL_COUNTER} FAILED"
+  if [[ "${FAIL_COUNTER}" == "0" ]]; then
+    printf "PIP tests ${COLOR_GREEN}PASSED${COLOR_NC}\n"
+    return 0
+  else:
+    printf "PIP tests ${COLOR_RED}FAILED${COLOR_NC}\n"
+    return 1
+  fi
+}
+
+###########################################################################
+# Build TF PIP Wheel file
+###########################################################################
+
+# Update the build flags for building whl.
+# Flags: GPU, OS, tf_nightly, project name
+GPU_FLAG=""
+NIGHTLY_FLAG=""
+
+# TF Nightly flag
+if [[ "$IS_NIGHTLY" == 1 ]]; then
+  # If 'nightly' is not specified in the project name already, then add.
+  if ! [[ $PROJECT_NAME == *"nightly"* ]]; then
+    echo "WARNING: IS_NIGHTLY=${IS_NIGHTLY} but requested project name \
+    (PROJECT_NAME=${PROJECT_NAME}) does not include 'nightly' string. \
+    Renaming it to 'tf_nightly'."
+    PROJECT_NAME="tf_nightly"
+  fi
+  NIGHTLY_FLAG="--nightly_flag"
+fi
+
+# CPU / GPU flag
+if [[ ${CONTAINER_TYPE} == "gpu" ]]; then
+  GPU_FLAG="--gpu"
+  if ! [[ $PROJECT_NAME == *"gpu"* ]]; then
+    echo "WARNING: GPU is specified but requested project name (PROJECT_NAME=${PROJECT_NAME}) \
+    does not include 'gpu'. Appending '_gpu' to the project name."
+    PROJECT_NAME="${PROJECT_NAME}_gpu"
+  fi
+fi
+
+./bazel-bin/tensorflow/tools/pip_package/build_pip_package ${PIP_WHL_DIR} ${GPU_FLAG} ${NIGHTLY_FLAG} "--project_name" ${PROJECT_NAME} || die "build_pip_package FAILED"
+
+PY_MAJOR_MINOR_VER=$(echo $PY_MAJOR_MINOR_VER | tr -d '.')
+if [[ $PY_MAJOR_MINOR_VER == "2" ]]; then
+  PY_MAJOR_MINOR_VER="27"
+fi
+
+# Set wheel path and verify that there is only one .whl file in the path.
+WHL_PATH=$(ls "${PIP_WHL_DIR}"/"${PROJECT_NAME}"-*"${PY_MAJOR_MINOR_VER}"*"${PY_MAJOR_MINOR_VER}"*.whl)
+if [[ $(echo "${WHL_PATH}" | wc -w) -ne 1 ]]; then
+  echo "ERROR: Failed to find exactly one built TensorFlow .whl file in "\
+  "directory: ${PIP_WHL_DIR}"
+fi
+
+WHL_DIR=$(dirname "${WHL_PATH}")
+WHL_BASE_NAME=$(basename "${WHL_PATH}")
+AUDITED_WHL_NAME="${WHL_DIR}"/$(echo "${WHL_BASE_NAME//linux/manylinux1}")
+
+# Print the size of the wheel file.
+echo "Size of the PIP wheel file built: $(ls -l ${WHL_PATH} | awk '{print $5}')"
+
+# Run tests (if any is specified).
+run_all_tests
+
+for WHL_PATH in $(ls ${PIP_TEST_ROOT}/${PROJECT_NAME}*.whl); do
+  if [[ "${TF_NEED_CUDA}" -eq "1" ]]; then
+    # Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so
+    WHL_PATH=${AUDITED_WHL_NAME}
+    cp "${WHL_DIR}"/"${WHL_BASE_NAME}" "${WHL_PATH}"
+    echo "Copied manylinux1 wheel file at ${WHL_PATH}"
+  else
+    # Repair the wheels for cpu manylinux1
+    echo "auditwheel repairing ${WHL_PATH}"
+    auditwheel repair -w "${WHL_DIR}" "${WHL_PATH}"
+
+    if [[ -f ${AUDITED_WHL_NAME} ]]; then
+      WHL_PATH=${AUDITED_WHL_NAME}
+      echo "Repaired manylinux1 wheel file at: ${WHL_PATH}"
+    else
+      die "ERROR: Cannot find repaired wheel."
+    fi
+  fi
+done
+
+echo "EOF: Successfully ran pip_new.sh"

diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh
index 2c348a0..afb2827 100755
--- a/tensorflow/tools/ci_build/ci_sanity.sh
+++ b/tensorflow/tools/ci_build/ci_sanity.sh

@@ -540,9 +540,12 @@
   python file_name_test.py
 }
 
+do_libtensorflow_framework_not_depend_on_cuda_check() {
+  bazel build --action_env=TF_NEED_CUDA=1 --define framework_shared_object=true --config=cuda --nobuild_tests_only tensorflow/core/platform/default/build_config:libtensorflow_cuda_check_deps
+}
 # Supply all sanity step commands and descriptions
-SANITY_STEPS=("do_pylint PYTHON2" "do_pylint PYTHON3" "do_check_futures_test" "do_buildifier" "do_bazel_nobuild" "do_pip_package_licenses_check" "do_lib_package_licenses_check" "do_java_package_licenses_check" "do_pip_smoke_test" "do_check_load_py_test" "do_code_link_check" "do_check_file_name_test")
-SANITY_STEPS_DESC=("Python 2 pylint" "Python 3 pylint" "Check that python files have certain __future__ imports" "buildifier check" "bazel nobuild" "pip: license check for external dependencies" "C library: license check for external dependencies" "Java Native Library: license check for external dependencies" "Pip Smoke Test: Checking py_test dependencies exist in pip package" "Check load py_test: Check that BUILD files with py_test target properly load py_test" "Code Link Check: Check there are no broken links" "Check file names for cases")
+SANITY_STEPS=("do_pylint PYTHON2" "do_pylint PYTHON3" "do_check_futures_test" "do_buildifier" "do_bazel_nobuild" "do_pip_package_licenses_check" "do_lib_package_licenses_check" "do_java_package_licenses_check" "do_pip_smoke_test" "do_check_load_py_test" "do_code_link_check" "do_check_file_name_test" "do_libtensorflow_framework_not_depend_on_cuda_check")
+SANITY_STEPS_DESC=("Python 2 pylint" "Python 3 pylint" "Check that python files have certain __future__ imports" "buildifier check" "bazel nobuild" "pip: license check for external dependencies" "C library: license check for external dependencies" "Java Native Library: license check for external dependencies" "Pip Smoke Test: Checking py_test dependencies exist in pip package" "Check load py_test: Check that BUILD files with py_test target properly load py_test" "Code Link Check: Check there are no broken links" "Check file names for cases" "Check gpu libtensorflow_framework.so does not depend on cuda shared libraries.")
 
 INCREMENTAL_FLAG=""
 DEFAULT_BAZEL_CONFIGS=""

diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py
index 40a7443..856d64e 100755
--- a/tensorflow/tools/ci_build/copy_binary.py
+++ b/tensorflow/tools/ci_build/copy_binary.py

@@ -32,7 +32,7 @@
 import tempfile
 import zipfile
 
-TF_NIGHTLY_REGEX = (r"(.+)tf_nightly(|_gpu)-(\d\.[\d]{1,2}"
+TF_NIGHTLY_REGEX = (r"(.+)(tf_nightly.*)-(\d\.[\d]{1,2}"
                     r"\.\d.dev[\d]{0,8})-(.+)\.whl")
 BINARY_STRING_TEMPLATE = "%s-%s-%s.whl"
 
@@ -43,7 +43,7 @@
     raise RuntimeError("%s not found." % filename)
 
 
-def copy_binary(directory, origin_tag, new_tag, version, gpu=False):
+def copy_binary(directory, origin_tag, new_tag, version, package):
   """Rename and copy binaries for different python versions.
 
   Arguments:
@@ -51,14 +51,10 @@
     origin_tag: str of the old python version tag
     new_tag: str of the new tag
     version: the version of the package
-    gpu: bool if its a gpu build or not
+    package: str, name of the package
 
   """
   print("Rename and copy binaries with %s to %s." % (origin_tag, new_tag))
-  if gpu:
-    package = "tf_nightly_gpu"
-  else:
-    package = "tf_nightly"
   origin_binary = BINARY_STRING_TEMPLATE % (package, version, origin_tag)
   new_binary = BINARY_STRING_TEMPLATE % (package, version, new_tag)
   zip_ref = zipfile.ZipFile(os.path.join(directory, origin_binary), "r")
@@ -120,7 +116,7 @@
   check_existence(args.filename)
   regex_groups = re.search(TF_NIGHTLY_REGEX, args.filename)
   directory = regex_groups.group(1)
-  gpu = regex_groups.group(2)
+  package = regex_groups.group(2)
   version = regex_groups.group(3)
   origin_tag = regex_groups.group(4)
   old_py_ver = re.search(r"(cp\d\d)", origin_tag).group(1)
@@ -129,7 +125,7 @@
   new_tag = origin_tag.replace(old_py_ver, "cp" + args.new_py_ver)
 
   # Copy the binary with the info we have
-  copy_binary(directory, origin_tag, new_tag, version, gpu)
+  copy_binary(directory, origin_tag, new_tag, version, package)
 
 
 if __name__ == "__main__":

diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 0fa8c6c..131950d 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh

@@ -40,8 +40,8 @@
 pip3 install virtualenv
 
 # Install six.
-pip2 install --upgrade six==1.11.0
-pip3 install --upgrade six==1.11.0
+pip2 install --upgrade six==1.12.0
+pip3 install --upgrade six==1.12.0
 
 # Install absl-py.
 pip2 install --upgrade absl-py

diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
index 1d0b838..1944183 100755
--- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh

@@ -19,9 +19,10 @@
 set -x
 
 N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+N_GPUS=$(lspci|grep 'VGA'|grep 'AMD/ATI'|wc -l)
 
 echo ""
-echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
 echo ""
 
 # Run configure.
@@ -29,11 +30,14 @@
 export CC_OPT_FLAGS='-mavx'
 
 export TF_NEED_ROCM=1
+export TF_GPU_COUNT=${N_GPUS}
 
 yes "" | $PYTHON_BIN_PATH configure.py
 
 # Run bazel test command. Double test timeouts to avoid flakes.
-bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
-    --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
-    --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
+bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test -k \
+    --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 600,900,2400,7200 \
+    --build_tests_only --test_output=errors --local_test_jobs=${TF_GPU_COUNT} --config=opt \
+    --test_sharding_strategy=disabled \
+    --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \
     //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...

diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 9741fba..a938928 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh

@@ -60,6 +60,7 @@
 TEST_TARGET="//${PY_TEST_DIR}/tensorflow/python/..."
 PROJECT_NAME=""
 EXTRA_BUILD_FLAGS=""
+EXTRA_TEST_FLAGS=""
 
 # --skip_test            Skip running tests
 # --enable_remote_cache  Add options to enable remote cache for build and test
@@ -89,6 +90,13 @@
       fi
       PROJECT_NAME="$1"
       ;;
+    --extra_test_flags)
+      shift
+      if [[ -z "$1" ]]; then
+        break
+      fi
+      EXTRA_TEST_FLAGS="$1"
+      ;;
     *)
   esac
   shift
@@ -156,6 +164,7 @@
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
 bazel test --announce_rc --config=opt -k --test_output=errors \
+  ${EXTRA_TEST_FLAGS} \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_oss,-gpu \
   --build_tag_filters=-no_pip,-no_windows,-no_oss,-gpu --build_tests_only \

diff --git a/tensorflow/tools/compatibility/ast_edits.py b/tensorflow/tools/compatibility/ast_edits.py
index ed388b8..3d421f6 100644
--- a/tensorflow/tools/compatibility/ast_edits.py
+++ b/tensorflow/tools/compatibility/ast_edits.py

@@ -286,7 +286,7 @@
     arg_warnings = self._get_applicable_dict("function_arg_warnings",
                                              full_name, name)
 
-    for (kwarg, arg), (level, warning) in arg_warnings.items():
+    for (kwarg, arg), (level, warning) in sorted(arg_warnings.items()):
       present, _ = get_arg_value(node, kwarg, arg)
       if present:
         warned = True
@@ -525,7 +525,7 @@
     text += "Processing file %r\n outputting to %r\n" % (in_filename,
                                                          out_filename)
     text += "-" * 80 + "\n\n"
-    text += "\n".join(log)
+    text += "\n".join(log) + "\n"
     text += "-" * 80 + "\n\n"
     return text
 
@@ -568,7 +568,7 @@
       in_place: Allow the conversion of an entire directory in place.
 
     Returns:
-      A tuple of files processed, the report string ofr all files, and a dict
+      A tuple of files processed, the report string for all files, and a dict
         mapping filenames to errors encountered in that file.
     """
 

diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index c29b259..9d883d5 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py

@@ -205,8 +205,6 @@
     'tf.get_variable': 'tf.compat.v1.get_variable',
     'tf.get_variable_scope': 'tf.compat.v1.get_variable_scope',
     'tf.gfile.FastGFile': 'tf.compat.v1.gfile.FastGFile',
-    'tf.gfile.GFile': 'tf.io.gfile.GFile',
-    'tf.gfile.Open': 'tf.io.gfile.GFile',
     'tf.global_norm': 'tf.linalg.global_norm',
     'tf.global_variables': 'tf.compat.v1.global_variables',
     'tf.global_variables_initializer': 'tf.compat.v1.global_variables_initializer',
@@ -323,6 +321,8 @@
     'tf.lbeta': 'tf.math.lbeta',
     'tf.lgamma': 'tf.math.lgamma',
     'tf.lin_space': 'tf.linspace',
+    'tf.lite.toco_convert': 'tf.compat.v1.lite.toco_convert',
+    'tf.lite.TocoConverter': 'tf.compat.v1.lite.TocoConverter',
     'tf.local_variables': 'tf.compat.v1.local_variables',
     'tf.local_variables_initializer': 'tf.compat.v1.local_variables_initializer',
     'tf.log': 'tf.math.log',
@@ -349,6 +349,7 @@
     'tf.logging.warn': 'tf.compat.v1.logging.warn',
     'tf.logging.warning': 'tf.compat.v1.logging.warning',
     'tf.logical_xor': 'tf.math.logical_xor',
+    'tf.losses.Reduction': 'tf.compat.v1.losses.Reduction',
     'tf.losses.absolute_difference': 'tf.compat.v1.losses.absolute_difference',
     'tf.losses.add_loss': 'tf.compat.v1.losses.add_loss',
     'tf.losses.compute_weighted_loss': 'tf.compat.v1.losses.compute_weighted_loss',
@@ -421,8 +422,11 @@
     'tf.min_max_variable_partitioner': 'tf.compat.v1.min_max_variable_partitioner',
     'tf.model_variables': 'tf.compat.v1.model_variables',
     'tf.moving_average_variables': 'tf.compat.v1.moving_average_variables',
+    'tf.nn.avg_pool_v2': 'tf.nn.avg_pool',
     'tf.nn.bidirectional_dynamic_rnn': 'tf.compat.v1.nn.bidirectional_dynamic_rnn',
-    'tf.nn.conv3d_backprop_filter_v2': 'tf.nn.conv3d_backprop_filter',
+    'tf.nn.conv2d_backprop_filter': 'tf.compat.v1.nn.conv2d_backprop_filter',
+    'tf.nn.conv3d_backprop_filter': 'tf.compat.v1.nn.conv3d_backprop_filter',
+    'tf.nn.conv3d_backprop_filter_v2': 'tf.compat.v1.nn.conv3d_backprop_filter_v2',
     'tf.nn.ctc_beam_search_decoder_v2': 'tf.nn.ctc_beam_search_decoder',
     'tf.nn.ctc_loss_v2': 'tf.nn.ctc_loss',
     'tf.nn.depthwise_conv2d_native': 'tf.compat.v1.nn.depthwise_conv2d_native',
@@ -498,8 +502,10 @@
     'tf.saved_model.Builder': 'tf.compat.v1.saved_model.Builder',
     'tf.saved_model.LEGACY_INIT_OP_KEY': 'tf.compat.v1.saved_model.LEGACY_INIT_OP_KEY',
     'tf.saved_model.MAIN_OP_KEY': 'tf.compat.v1.saved_model.MAIN_OP_KEY',
+    'tf.saved_model.build_signature_def': 'tf.compat.v1.saved_model.build_signature_def',
     'tf.saved_model.build_tensor_info': 'tf.compat.v1.saved_model.build_tensor_info',
     'tf.saved_model.builder.SavedModelBuilder': 'tf.compat.v1.saved_model.builder.SavedModelBuilder',
+    'tf.saved_model.classification_signature_def': 'tf.compat.v1.saved_model.classification_signature_def',
     'tf.saved_model.constants.ASSETS_DIRECTORY': 'tf.saved_model.ASSETS_DIRECTORY',
     'tf.saved_model.constants.ASSETS_KEY': 'tf.saved_model.ASSETS_KEY',
     'tf.saved_model.constants.LEGACY_INIT_OP_KEY': 'tf.compat.v1.saved_model.constants.LEGACY_INIT_OP_KEY',
@@ -511,6 +517,7 @@
     'tf.saved_model.constants.VARIABLES_FILENAME': 'tf.saved_model.VARIABLES_FILENAME',
     'tf.saved_model.experimental.save': 'tf.saved_model.save',
     'tf.saved_model.get_tensor_from_tensor_info': 'tf.compat.v1.saved_model.get_tensor_from_tensor_info',
+    'tf.saved_model.is_valid_signature': 'tf.compat.v1.saved_model.is_valid_signature',
     'tf.saved_model.load': 'tf.compat.v1.saved_model.load',
     'tf.saved_model.loader.load': 'tf.compat.v1.saved_model.loader.load',
     'tf.saved_model.loader.maybe_saved_model_directory': 'tf.compat.v1.saved_model.loader.maybe_saved_model_directory',
@@ -518,6 +525,8 @@
     'tf.saved_model.main_op.main_op_with_restore': 'tf.compat.v1.saved_model.main_op.main_op_with_restore',
     'tf.saved_model.main_op_with_restore': 'tf.compat.v1.saved_model.main_op_with_restore',
     'tf.saved_model.maybe_saved_model_directory': 'tf.compat.v1.saved_model.maybe_saved_model_directory',
+    'tf.saved_model.predict_signature_def': 'tf.compat.v1.saved_model.predict_signature_def',
+    'tf.saved_model.regression_signature_def': 'tf.compat.v1.saved_model.regression_signature_def',
     'tf.saved_model.signature_constants.CLASSIFY_INPUTS': 'tf.saved_model.CLASSIFY_INPUTS',
     'tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME': 'tf.saved_model.CLASSIFY_METHOD_NAME',
     'tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES': 'tf.saved_model.CLASSIFY_OUTPUT_CLASSES',
@@ -529,11 +538,11 @@
     'tf.saved_model.signature_constants.REGRESS_INPUTS': 'tf.saved_model.REGRESS_INPUTS',
     'tf.saved_model.signature_constants.REGRESS_METHOD_NAME': 'tf.saved_model.REGRESS_METHOD_NAME',
     'tf.saved_model.signature_constants.REGRESS_OUTPUTS': 'tf.saved_model.REGRESS_OUTPUTS',
-    'tf.saved_model.signature_def_utils.build_signature_def': 'tf.saved_model.build_signature_def',
-    'tf.saved_model.signature_def_utils.classification_signature_def': 'tf.saved_model.classification_signature_def',
-    'tf.saved_model.signature_def_utils.is_valid_signature': 'tf.saved_model.is_valid_signature',
-    'tf.saved_model.signature_def_utils.predict_signature_def': 'tf.saved_model.predict_signature_def',
-    'tf.saved_model.signature_def_utils.regression_signature_def': 'tf.saved_model.regression_signature_def',
+    'tf.saved_model.signature_def_utils.build_signature_def': 'tf.compat.v1.saved_model.signature_def_utils.build_signature_def',
+    'tf.saved_model.signature_def_utils.classification_signature_def': 'tf.compat.v1.saved_model.signature_def_utils.classification_signature_def',
+    'tf.saved_model.signature_def_utils.is_valid_signature': 'tf.compat.v1.saved_model.signature_def_utils.is_valid_signature',
+    'tf.saved_model.signature_def_utils.predict_signature_def': 'tf.compat.v1.saved_model.signature_def_utils.predict_signature_def',
+    'tf.saved_model.signature_def_utils.regression_signature_def': 'tf.compat.v1.saved_model.signature_def_utils.regression_signature_def',
     'tf.saved_model.simple_save': 'tf.compat.v1.saved_model.simple_save',
     'tf.saved_model.tag_constants.GPU': 'tf.saved_model.GPU',
     'tf.saved_model.tag_constants.SERVING': 'tf.saved_model.SERVING',
@@ -621,6 +630,7 @@
     'tf.summary.get_summary_description': 'tf.compat.v1.summary.get_summary_description',
     'tf.summary.histogram': 'tf.compat.v1.summary.histogram',
     'tf.summary.image': 'tf.compat.v1.summary.image',
+    'tf.summary.initialize': 'tf.compat.v1.summary.initialize',
     'tf.summary.merge': 'tf.compat.v1.summary.merge',
     'tf.summary.merge_all': 'tf.compat.v1.summary.merge_all',
     'tf.summary.scalar': 'tf.compat.v1.summary.scalar',

diff --git a/tensorflow/tools/compatibility/reorders_v2.py b/tensorflow/tools/compatibility/reorders_v2.py
index e7edf3f..14c2d0c 100644
--- a/tensorflow/tools/compatibility/reorders_v2.py
+++ b/tensorflow/tools/compatibility/reorders_v2.py

@@ -58,9 +58,9 @@
     'tf.math.reduce_prod': ['input_tensor', 'axis', 'keepdims', 'name', 'reduction_indices', 'keep_dims'],
     'tf.math.reduce_sum': ['input_tensor', 'axis', 'keepdims', 'name', 'reduction_indices', 'keep_dims'],
     'tf.multinomial': ['logits', 'num_samples', 'seed', 'name', 'output_dtype'],
-    'tf.nn.conv1d': ['value', 'filters', 'stride', 'padding', 'use_cudnn_on_gpu', 'data_format', 'name', 'input'],
+    'tf.nn.avg_pool': ['value', 'ksize', 'strides', 'padding', 'data_format', 'name', 'input'],
+    'tf.nn.conv1d': ['value', 'filters', 'stride', 'padding', 'use_cudnn_on_gpu', 'data_format', 'name', 'input', 'dilations'],
     'tf.nn.conv2d': ['input', 'filter', 'strides', 'padding', 'use_cudnn_on_gpu', 'data_format', 'dilations', 'name', 'filters'],
-    'tf.nn.conv2d_backprop_filter': ['input', 'filter_sizes', 'out_backprop', 'strides', 'padding', 'use_cudnn_on_gpu', 'data_format', 'dilations', 'name'],
     'tf.nn.conv2d_backprop_input': ['input_sizes', 'filter', 'out_backprop', 'strides', 'padding', 'use_cudnn_on_gpu', 'data_format', 'dilations', 'name', 'filters'],
     'tf.nn.convolution': ['input', 'filter', 'padding', 'strides', 'dilation_rate', 'name', 'data_format', 'filters', 'dilations'],
     'tf.nn.crelu': ['features', 'name', 'axis'],

diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py
index 18271ef..a2d846f 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py

@@ -23,7 +23,6 @@
 import sys
 
 import pasta
-import six
 
 from tensorflow.tools.compatibility import ast_edits
 from tensorflow.tools.compatibility import renames_v2
@@ -226,6 +225,14 @@
         "tf.nn.max_pool": {
             "value": "input"
         },
+
+        "tf.nn.avg_pool": {
+            "value": "input"
+        },
+
+        "tf.nn.avg_pool2d": {
+            "value": "input"
+        },
         "tf.multinomial": {
             "output_dtype": "dtype",
         },
@@ -405,12 +412,29 @@
             "filter": "filters",
             "use_cudnn_on_gpu": None,
         },
-        "tf.nn.conv2d_backprop_filter": {
-            "use_cudnn_on_gpu": None,
-        },
         "tf.nn.conv2d_backprop_input": {
-            "filter": "filters",
             "use_cudnn_on_gpu": None,
+            "input_sizes": "output_shape",
+            "out_backprop": "input",
+            "filter": "filters",
+        },
+        "tf.contrib.summary.audio": {
+            "tensor": "data",
+            "family": None,
+        },
+        "tf.contrib.summary.histogram": {
+            "tensor": "data",
+            "family": None,
+        },
+        "tf.contrib.summary.image": {
+            "tensor": "data",
+            "bad_color": None,
+            "max_images": "max_outputs",
+            "family": None,
+        },
+        "tf.contrib.summary.scalar": {
+            "tensor": "data",
+            "family": None,
         },
     }
 
@@ -423,7 +447,7 @@
         "tf.batch_to_space_nd":
             "tf.batch_to_space",
         "tf.batch_gather":
-            "tf.gather",
+            "tf.compat.v1.batch_gather",
         "tf.space_to_batch_nd":
             "tf.space_to_batch",
         "tf.nn.space_to_batch":
@@ -440,6 +464,8 @@
             "tf.io.gfile.exists",
         "tf.gfile.Glob":
             "tf.io.gfile.glob",
+        "tf.gfile.GFile":
+            "tf.io.gfile.GFile",
         "tf.gfile.IsDirectory":
             "tf.io.gfile.isdir",
         "tf.gfile.ListDirectory":
@@ -448,6 +474,8 @@
             "tf.io.gfile.makedirs",
         "tf.gfile.MkDir":
             "tf.io.gfile.mkdir",
+        "tf.gfile.Open":
+            "tf.io.gfile.GFile",
         "tf.gfile.Remove":
             "tf.io.gfile.remove",
         "tf.gfile.Rename":
@@ -540,17 +568,31 @@
             "tf.data.experimental.unbatch",
         "tf.contrib.data.unique":
             "tf.data.experimental.unique",
+        "tf.contrib.estimator.make_early_stopping_hook":
+            "tf.estimator.experimental.make_early_stopping_hook",
+        "tf.contrib.estimator.stop_if_higher_hook":
+            "tf.estimator.experimental.stop_if_higher_hook",
+        "tf.contrib.estimator.stop_if_lower_hook":
+            "tf.estimator.experimental.stop_if_lower_hook",
+        "tf.contrib.estimator.stop_if_no_decrease_hook":
+            "tf.estimator.experimental.stop_if_no_decrease_hook",
+        "tf.contrib.estimator.stop_if_no_increase_hook":
+            "tf.estimator.experimental.stop_if_no_increase_hook",
+        "tf.contrib.framework.CriticalSection":
+            "tf.CriticalSection",
         "tf.contrib.framework.is_tensor":
             "tf.is_tensor",
         "tf.contrib.framework.nest.assert_same_structure":
             "tf.nest.assert_same_structure",
         "tf.contrib.framework.nest.flatten":
             "tf.nest.flatten",
+        "tf.contrib.framework.nest.is_sequence":
+            "tf.nest.is_nested",
         "tf.contrib.framework.nest.map_structure":
             "tf.nest.map_structure",
         "tf.contrib.framework.nest.pack_sequence_as":
             "tf.nest.pack_sequence_as",
-        "tf.contrib.framework.constant_value":
+        "tf.contrib.util.constant_value":
             "tf.get_static_value",
         "tf.contrib.saved_model.load_keras_model":
             "tf.keras.experimental.load_from_saved_model",
@@ -560,10 +602,30 @@
             "tf.nn.rnn_cell.RNNCell",
         "tf.contrib.rnn.LSTMStateTuple":
             "tf.nn.rnn_cell.LSTMStateTuple",
+        "tf.contrib.rnn.BasicLSTMCell":
+            "tf.compat.v1.nn.rnn_cell.BasicLSTMCell",
+        "tf.contrib.rnn.BasicRNNCell":
+            "tf.compat.v1.nn.rnn_cell.BasicRNNCell",
+        "tf.contrib.rnn.GRUCell":
+            "tf.compat.v1.nn.rnn_cell.GRUCell",
+        "tf.contrib.rnn.LSTMCell":
+            "tf.compat.v1.nn.rnn_cell.LSTMCell",
+        "tf.contrib.rnn.MultiRNNCell":
+            "tf.compat.v1.nn.rnn_cell.MultiRNNCell",
         "tf.contrib.framework.sort":
             "tf.sort",
         "tf.contrib.framework.argsort":
             "tf.argsort",
+        "tf.contrib.summary.audio":
+            "tf.compat.v2.summary.audio",
+        "tf.contrib.summary.histogram":
+            "tf.compat.v2.summary.histogram",
+        "tf.contrib.summary.image":
+            "tf.compat.v2.summary.image",
+        "tf.contrib.summary.initialize":
+            "tf.compat.v1.summary.initialize",
+        "tf.contrib.summary.scalar":
+            "tf.compat.v2.summary.scalar",
         "tf.count_nonzero":
             "tf.math.count_nonzero",
         "tf.manip.batch_to_space_nd":
@@ -700,16 +762,20 @@
             "tf.compat.v1.debugging.assert_rank_in",
         "tf.assert_rank":
             "tf.compat.v1.assert_rank",
-        "tf.contrib.framework.argsort":
-            "tf.argsort",
         "tf.nn.max_pool":
             "tf.nn.max_pool2d",
-        'tf.keras.initializers.zeros':
-            'tf.compat.v1.keras.initializers.zeros',
-        'tf.keras.initializers.ones':
-            'tf.compat.v1.keras.initializers.ones',
-        'tf.keras.initializers.constant':
-            'tf.compat.v1.keras.initializers.constant',
+        "tf.nn.avg_pool":
+            "tf.nn.avg_pool2d",
+        "tf.keras.initializers.zeros":
+            "tf.compat.v1.keras.initializers.zeros",
+        "tf.keras.initializers.ones":
+            "tf.compat.v1.keras.initializers.ones",
+        "tf.keras.initializers.constant":
+            "tf.compat.v1.keras.initializers.constant",
+        "tf.data.experimental.map_and_batch_with_legacy_function":
+            "tf.compat.v1.data.experimental.map_and_batch_with_legacy_function",
+        "tf.nn.conv2d_backprop_input":
+            "tf.nn.conv2d_transpose"
     }
     # pylint: enable=line-too-long
 
@@ -743,7 +809,6 @@
         "tf.convert_to_tensor",
         "tf.nn.conv1d",
         "tf.nn.conv2d",
-        "tf.nn.conv2d_backprop_filter",
         "tf.nn.conv2d_backprop_input",
         "tf.nn.ctc_beam_search_decoder",
         "tf.nn.moments",
@@ -823,12 +888,27 @@
         "tf.gradients",
         "tf.hessians",
         "tf.nn.max_pool",
+        "tf.nn.avg_pool",
     }
 
+    # Manual mapping of function names to be reordered to their list of argument
+    # names, in order. Only use this if argument names cannot be autodetected,
+    # e.g. if the functions are in contrib.
+    self.manual_function_reorders = {
+        "tf.contrib.summary.audio": [
+            "name", "tensor", "sample_rate", "max_outputs", "family", "step"],
+        "tf.contrib.summary.histogram": [
+            "name", "tensor", "family", "step"],
+        "tf.contrib.summary.image": [
+            "name", "tensor", "bad_color", "max_images", "family", "step"],
+        "tf.contrib.summary.scalar": [
+            "name", "tensor", "family", "step"],
+    }
     # Functions that were reordered should be changed to the new keyword args
     # for safety, if positional arguments are used. If you have reversed the
     # positional arguments yourself, this could do the wrong thing.
-    self.function_reorders = reorders_v2.reorders
+    self.function_reorders = dict(reorders_v2.reorders)
+    self.function_reorders.update(self.manual_function_reorders)
 
     contrib_warning = (
         ast_edits.ERROR,
@@ -917,6 +997,14 @@
         "only effects core estimator. If you are using "
         "tf.contrib.learn.Estimator, please switch to using core estimator.")
 
+    # TODO(b/124529441): if possible eliminate need for manual checking.
+    contrib_summary_comment = (
+        ast_edits.WARNING,
+        "(Manual check required) tf.contrib.summary.* functions have been "
+        "migrated best-effort to tf.compat.v2.summary.* equivalents where "
+        "possible, but the resulting code may not always work. Please check "
+        "manually; you can report migration failures on b/124529441.")
+
     # Function warnings. <function name> placeholder inside warnings will be
     # replaced by function name.
     # You can use *. to add items which do not check the FQN, and apply to e.g.,
@@ -958,6 +1046,14 @@
             assert_rank_comment,
         "tf.assert_rank_in":
             assert_rank_comment,
+        "tf.contrib.summary.audio":
+            contrib_summary_comment,
+        "tf.contrib.summary.histogram":
+            contrib_summary_comment,
+        "tf.contrib.summary.image":
+            contrib_summary_comment,
+        "tf.contrib.summary.scalar":
+            contrib_summary_comment,
         "tf.debugging.assert_equal":
             assert_return_type_comment,
         "tf.debugging.assert_greater":
@@ -1257,11 +1353,45 @@
                 "tf.cond no longer takes 'strict' argument, it behaves as "
                 "if was set to True.")
         },
+        "tf.contrib.summary.audio": {
+            ("family", 4): (
+                ast_edits.WARNING,
+                "tf.contrib.summary.* functions no longer take the 'family' "
+                "argument; instead name scoping should be used. This call site "
+                "specifies a family argument so it cannot be converted safely.")
+        },
+        "tf.contrib.summary.histogram": {
+            ("family", 2): (
+                ast_edits.WARNING,
+                "tf.contrib.summary.* functions no longer take the 'family' "
+                "argument; instead name scoping should be used. This call site "
+                "specifies a family argument so it cannot be converted safely.")
+        },
+        "tf.contrib.summary.image": {
+            ("bad_color", 2): (
+                ast_edits.WARNING,
+                "tf.contrib.summary.image no longer takes the 'bad_color' "
+                "argument; caller must now preprocess if needed. This call "
+                "site specifies a bad_color argument so it cannot be converted "
+                "safely."),
+            ("family", 4): (
+                ast_edits.WARNING,
+                "tf.contrib.summary.* functions no longer take the 'family' "
+                "argument; instead name scoping should be used. This call site "
+                "specifies a family argument so it cannot be converted safely.")
+        },
+        "tf.contrib.summary.scalar": {
+            ("family", 2): (
+                ast_edits.WARNING,
+                "tf.contrib.summary.* functions no longer take the 'family' "
+                "argument; instead name scoping should be used. This call site "
+                "specifies a family argument so it cannot be converted safely.")
+        },
     }
 
     # Specially handled functions
     # Each transformer is a callable which will be called with the arguments
-    #   transformer(parent, node, full_name, name, logs, errors)
+    #   transformer(parent, node, full_name, name, logs)
     # Where logs is a list to which (level, line, col, msg) tuples can be
     # appended, full_name is the FQN of the function called (or None if that is
     # unknown), name is the name of the function called (or None is that is
@@ -1332,6 +1462,10 @@
             _add_argument_transformer,
             arg_name="data_format",
             arg_value_ast=ast.Str("NHWC")),
+        "tf.contrib.summary.audio": _add_summary_step_transformer,
+        "tf.contrib.summary.histogram": _add_summary_step_transformer,
+        "tf.contrib.summary.image": _add_summary_step_transformer,
+        "tf.contrib.summary.scalar": _add_summary_step_transformer,
     }
 
     self.module_deprecations = {
@@ -1443,7 +1577,7 @@
                               arg_name, arg_value_ast):
   """Adds an argument (as a final kwarg arg_name=arg_value_ast)."""
   node.keywords.append(ast.keyword(arg=arg_name, value=arg_value_ast))
-  logs.add((
+  logs.append((
       ast_edits.INFO, node.lineno, node.col_offset,
       "Adding argument '%s' to call to %s." % (pasta.dump(node.keywords[-1],
                                                           full_name or name))
@@ -1562,11 +1696,18 @@
   """Wrap labels argument with stop_gradients."""
   def _wrap_label(parent, old_value):
     """Wrap labels with tf.stop_gradient."""
-    if six.PY3:
+    already_stop_grad = (isinstance(old_value, ast.Call) and
+                         isinstance(old_value.func, ast.Attribute) and
+                         old_value.func.attr == "stop_gradient" and
+                         isinstance(old_value.func.value, ast.Name) and
+                         old_value.func.value.id == "tf")
+    if already_stop_grad:
+      return False
+    try:
       new_value = ast.Call(
           ast.Name(id="tf.stop_gradient", ctx=ast.Load()),
           [old_value], [])
-    else:
+    except TypeError:
       new_value = ast.Call(
           ast.Name(id="tf.stop_gradient", ctx=ast.Load()),
           [old_value], [], None, None)
@@ -1574,16 +1715,17 @@
     # This copies the prefix and suffix on old_value to new_value.
     pasta.ast_utils.replace_child(parent, old_value, new_value)
     ast.copy_location(new_value, old_value)
+    return True
 
   # Check if we have a labels keyword arg
   for karg in node.keywords:
     if karg.arg == "labels":
-      logs.append((ast_edits.INFO, node.lineno, node.col_offset,
-                   "Changing labels arg of "
-                   "tf.nn.softmax_cross_entropy_with_logits to "
-                   "tf.stop_gradient(labels). Please check this "
-                   "transformation.\n"))
-      _wrap_label(karg, karg.value)
+      if _wrap_label(karg, karg.value):
+        logs.append((ast_edits.INFO, node.lineno, node.col_offset,
+                     "Changing labels arg of "
+                     "tf.nn.softmax_cross_entropy_with_logits to "
+                     "tf.stop_gradient(labels). Please check this "
+                     "transformation.\n"))
       return node
   return node
 
@@ -1717,3 +1859,22 @@
                  "Changing uniform_noise arg of tf.image.extract_glimpse to "
                  "noise, and recomputing value.\n"))
     return node
+
+
+def _add_summary_step_transformer(parent, node, full_name, name, logs):
+  """Adds a step argument to the summary API call if not specified.
+
+  The inserted argument value is tf.compat.v1.train.get_or_create_global_step().
+  """
+  for keyword_arg in node.keywords:
+    if keyword_arg.arg == "step":
+      return node
+  default_value = "tf.compat.v1.train.get_or_create_global_step()"
+  # Parse with pasta instead of ast to avoid emitting a spurious trailing \n.
+  ast_value = pasta.parse(default_value)
+  node.keywords.append(ast.keyword(arg="step", value=ast_value))
+  logs.append((
+      ast_edits.WARNING, node.lineno, node.col_offset,
+      "Summary API writing function %s now requires a 'step' argument; "
+      "inserting default of %s." % (full_name or name, default_value)))
+  return node

diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_main.py b/tensorflow/tools/compatibility/tf_upgrade_v2_main.py
index aecd74e..c34d659 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_main.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_main.py

@@ -27,7 +27,7 @@
 def main():
   parser = argparse.ArgumentParser(
       formatter_class=argparse.RawDescriptionHelpFormatter,
-      description="""Convert a TensorFlow Python file to 2.0
+      description="""Convert a TensorFlow Python file from 1.* to 2.0
 
 Simple usage:
   tf_upgrade_v2.py --infile foo.py --outfile bar.py

diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index ede0ccf..65622aa 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py

@@ -343,6 +343,8 @@
         tf_upgrade_v2.TFAPIChangeSpec().reordered_function_names)
     function_reorders = (
         tf_upgrade_v2.TFAPIChangeSpec().function_reorders)
+    manual_function_reorders = (
+        tf_upgrade_v2.TFAPIChangeSpec().manual_function_reorders)
 
     added_names_message = """Some function names in
 self.reordered_function_names are not in reorders_v2.py.
@@ -362,6 +364,8 @@
     # function_reorders should contain reordered_function_names
     # and their TensorFlow V1 aliases.
     for name in function_reorders:
+      if name in manual_function_reorders:
+        continue
       # get other names for this function
       attr = get_symbol_for_name(tf.compat.v1, name)
       _, attr = tf_decorator.unwrap(attr)
@@ -818,6 +822,36 @@
     _, unused_report, unused_errors, new_text = self._upgrade(text)
     self.assertEqual(expected_text, new_text)
 
+  def testSoftMaxCrossEntropyWithLogitsDoesntNest(self):
+    text = ("tf.nn.softmax_cross_entropy_with_logits("
+            "labels=tf.stop_gradient(labels), logits=logits, dim=2)")
+    expected_text = (
+        "tf.nn.softmax_cross_entropy_with_logits("
+        "labels=tf.stop_gradient(labels), logits=logits, axis=2)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
+    text = ("tf.nn.softmax_cross_entropy_with_logits("
+            "labels=tf.stop_gradient(foo(bar)))")
+    expected_text = ("tf.nn.softmax_cross_entropy_with_logits("
+                     "labels=tf.stop_gradient(foo(bar)))")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(expected_text, new_text)
+
+    text = ("tf.nn.softmax_cross_entropy_with_logits("
+            "labels=foo())")
+    expected_text = ("tf.nn.softmax_cross_entropy_with_logits("
+                     "labels=tf.stop_gradient(foo()))")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(expected_text, new_text)
+
+    text = ("tf.nn.softmax_cross_entropy_with_logits("
+            "labels=foo().zz())")
+    expected_text = ("tf.nn.softmax_cross_entropy_with_logits("
+                     "labels=tf.stop_gradient(foo().zz()))")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(expected_text, new_text)
+
   def testSparseMatmul(self):
     text = ("tf.sparse_matmul(a, b, c, d, e, f, g)\n")
     expected_text = ("tf.linalg.matmul(a=a, b=b, transpose_a=c, transpose_b=d, "
@@ -879,9 +913,8 @@
         "tf.nn.conv2d_backprop_filter(input, filter_sizes, out_backprop, "
         "strides, padding, use_cudnn_on_gpu, data_format)")
     expected_text = (
-        "tf.nn.conv2d_backprop_filter(input=input, filter_sizes=filter_sizes, "
-        "out_backprop=out_backprop, strides=strides, padding=padding, "
-        "data_format=data_format)")
+        "tf.compat.v1.nn.conv2d_backprop_filter(input, filter_sizes, "
+        "out_backprop, strides, padding, use_cudnn_on_gpu, data_format)")
     _, unused_report, unused_errors, new_text = self._upgrade(text)
     self.assertEqual(new_text, expected_text)
 
@@ -890,8 +923,8 @@
         "tf.nn.conv2d_backprop_input(input_sizes, filter, out_backprop, "
         "strides, padding, use_cudnn_on_gpu, data_format)")
     expected_text = (
-        "tf.nn.conv2d_backprop_input(input_sizes=input_sizes, filters=filter, "
-        "out_backprop=out_backprop, strides=strides, padding=padding, "
+        "tf.nn.conv2d_transpose(output_shape=input_sizes, filters=filter, "
+        "input=out_backprop, strides=strides, padding=padding, "
         "data_format=data_format)")
     _, unused_report, unused_errors, new_text = self._upgrade(text)
     self.assertEqual(new_text, expected_text)
@@ -1037,6 +1070,13 @@
       _, unused_report, unused_errors, actual = self._upgrade(text)
       self.assertEqual(actual, expected)
 
+  def testMapAndBatch(self):
+    suffix = ".data.experimental.map_and_batch_with_legacy_function(args)"
+    text = "tf" + suffix
+    expected = "tf.compat.v1" + suffix
+    _, unused_report, unused_errors, actual = self._upgrade(text)
+    self.assertEqual(actual, expected)
+
   def testCast(self):
     for (name, dtype) in [("int32", "int32"),
                           ("int64", "int64"),
@@ -1150,6 +1190,12 @@
     _, _, _, new_text = self._upgrade(text)
     self.assertEqual(expected, new_text)
 
+  def test_CriticalSection_upgrade(self):
+    text = "tf.contrib.framework.CriticalSection(shared_name='blah')"
+    expected = "tf.CriticalSection(shared_name='blah')"
+    _, _, _, new_text = self._upgrade(text)
+    self.assertEqual(expected, new_text)
+
   def test_sample_distorted_bounding_box(self):
     # pylint: disable=line-too-long
     text = "tf.image.sample_distorted_bounding_box(a, b, c, d, e, f, g, h, i, j)"
@@ -1158,6 +1204,12 @@
     _, _, _, new_text = self._upgrade(text)
     self.assertEqual(expected, new_text)
 
+  def test_contrib_initialize(self):
+    text = "tf.contrib.summary.initialize"
+    expected = "tf.compat.v1.summary.initialize"
+    _, _, _, new_text = self._upgrade(text)
+    self.assertEqual(expected, new_text)
+
   def test_contrib_framework_argsort(self):
     text = "tf.contrib.framework.argsort"
     expected = "tf.argsort"
@@ -1179,6 +1231,106 @@
     _, _, _, new_text = self._upgrade(text)
     self.assertEqual(expected_text, new_text)
 
+  def test_contrib_estimator_early_stopping(self):
+    api_symbols = [
+        "make_early_stopping_hook", "stop_if_higher_hook", "stop_if_lower_hook",
+        "stop_if_no_decrease_hook", "stop_if_no_increase_hook"
+    ]
+    for symbol in api_symbols:
+      text = "tf.contrib.estimator." + symbol
+      expected_text = "tf.estimator.experimental." + symbol
+      _, _, _, new_text = self._upgrade(text)
+      self.assertEqual(expected_text, new_text)
+
+  def test_contrib_rnn(self):
+    api_symbols = ["BasicLSTMCell", "BasicRNNCell", "GRUCell", "LSTMCell",
+                   "MultiRNNCell"]
+    for symbol in api_symbols:
+      text = "tf.contrib.rnn." + symbol
+      expected_text = "tf.compat.v1.nn.rnn_cell." + symbol
+      _, _, _, new_text = self._upgrade(text)
+      self.assertEqual(expected_text, new_text)
+
+  def test_contrib_summary_audio(self):
+    text = "tf.contrib.summary.audio('foo', myval, 44100, 3, 'fam', 42)"
+    expected = ("tf.compat.v2.summary.audio(name='foo', data=myval, "
+                "sample_rate=44100, max_outputs=3, step=42)")
+    _, _, errors, new_text = self._upgrade(text)
+    self.assertEqual(expected, new_text)
+    self.assertIn("'family' argument", errors[0])
+    self.assertIn("Manual check required", errors[1])
+
+  def test_contrib_summary_histogram(self):
+    text = "tf.contrib.summary.histogram('foo', myval, 'fam', 42)"
+    expected = ("tf.compat.v2.summary.histogram(name='foo', data=myval, "
+                "step=42)")
+    _, _, errors, new_text = self._upgrade(text)
+    self.assertEqual(expected, new_text)
+    self.assertIn("'family' argument", errors[0])
+    self.assertIn("Manual check required", errors[1])
+
+  def test_contrib_summary_image(self):
+    text = "tf.contrib.summary.image('foo', myval, red, 3, 'fam', 42)"
+    expected = ("tf.compat.v2.summary.image(name='foo', data=myval, "
+                "max_outputs=3, step=42)")
+    _, _, errors, new_text = self._upgrade(text)
+    self.assertEqual(expected, new_text)
+    self.assertIn("'bad_color' argument", errors[0])
+    self.assertIn("'family' argument", errors[1])
+    self.assertIn("Manual check required", errors[2])
+
+  def test_contrib_summary_scalar(self):
+    text = "tf.contrib.summary.scalar('foo', myval, 'fam', 42)"
+    expected = ("tf.compat.v2.summary.scalar(name='foo', data=myval, "
+                "step=42)")
+    _, _, errors, new_text = self._upgrade(text)
+    self.assertEqual(expected, new_text)
+    self.assertIn("'family' argument", errors[0])
+    self.assertIn("Manual check required", errors[1])
+
+  def test_contrib_summary_audio_nostep(self):
+    text = "tf.contrib.summary.audio('foo', myval, 44100)"
+    expected = ("tf.compat.v2.summary.audio(name='foo', data=myval, "
+                "sample_rate=44100, "
+                "step=tf.compat.v1.train.get_or_create_global_step())")
+    _, _, errors, new_text = self._upgrade(text)
+    self.assertEqual(expected, new_text)
+    self.assertIn("'step' argument", errors[0])
+    self.assertIn("Manual check required", errors[1])
+
+  def test_contrib_summary_histogram_nostep(self):
+    text = "tf.contrib.summary.histogram('foo', myval)"
+    expected = ("tf.compat.v2.summary.histogram(name='foo', data=myval, "
+                "step=tf.compat.v1.train.get_or_create_global_step())")
+    _, _, errors, new_text = self._upgrade(text)
+    self.assertEqual(expected, new_text)
+    self.assertIn("'step' argument", errors[0])
+    self.assertIn("Manual check required", errors[1])
+
+  def test_contrib_summary_image_nostep(self):
+    text = "tf.contrib.summary.image('foo', myval)"
+    expected = ("tf.compat.v2.summary.image(name='foo', data=myval, "
+                "step=tf.compat.v1.train.get_or_create_global_step())")
+    _, _, errors, new_text = self._upgrade(text)
+    self.assertEqual(expected, new_text)
+    self.assertIn("'step' argument", errors[0])
+    self.assertIn("Manual check required", errors[1])
+
+  def test_contrib_summary_scalar_nostep(self):
+    text = "tf.contrib.summary.scalar('foo', myval)"
+    expected = ("tf.compat.v2.summary.scalar(name='foo', data=myval, "
+                "step=tf.compat.v1.train.get_or_create_global_step())")
+    _, _, errors, new_text = self._upgrade(text)
+    self.assertEqual(expected, new_text)
+    self.assertIn("'step' argument", errors[0])
+    self.assertIn("Manual check required", errors[1])
+
+  def test_avg_pool_2d(self):
+    text = "tf.nn.avg_pool(value=4)"
+    expected_text = "tf.nn.avg_pool2d(input=4)"
+    _, _, _, new_text = self._upgrade(text)
+    self.assertEqual(expected_text, new_text)
+
 
 class TestUpgradeFiles(test_util.TensorFlowTestCase):
 

diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py
index 09537b7..83b72cb 100644
--- a/tensorflow/tools/dockerfiles/assembler.py
+++ b/tensorflow/tools/dockerfiles/assembler.py

@@ -34,6 +34,7 @@
 import itertools
 import multiprocessing
 import os
+import platform
 import re
 import shutil
 import sys
@@ -552,6 +553,13 @@
       if not FLAGS.build_images:
         continue
 
+      # Only build images for host architecture
+      proc_arch = platform.processor()
+      is_x86 = proc_arch.startswith('x86')
+      if (is_x86 and any([arch in tag for arch in ['ppc64le']]) or
+          not is_x86 and proc_arch not in tag):
+        continue
+
       # Generate a temporary Dockerfile to use to build, since docker-py
       # needs a filepath relative to the build context (i.e. the current
       # directory)

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile
index c09a958..c806fa4 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile

@@ -47,8 +47,11 @@
 #   tensorflow-gpu
 #   tf-nightly
 #   tf-nightly-gpu
+# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
+# Installs the latest version by default.
 ARG TF_PACKAGE=tensorflow
-RUN ${PIP} install ${TF_PACKAGE}
+ARG TF_PACKAGE_VERSION=
+RUN ${PIP} install ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
 
 COPY bashrc /etc/bash.bashrc
 RUN chmod a+rwx /etc/bash.bashrc

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile
index 857b5e2..a82577b 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile

@@ -47,8 +47,11 @@
 #   tensorflow-gpu
 #   tf-nightly
 #   tf-nightly-gpu
+# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
+# Installs the latest version by default.
 ARG TF_PACKAGE=tensorflow
-RUN ${PIP} install ${TF_PACKAGE}
+ARG TF_PACKAGE_VERSION=
+RUN ${PIP} install ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
 
 COPY bashrc /etc/bash.bashrc
 RUN chmod a+rwx /etc/bash.bashrc

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
index 099c2dc..dc5b5d4 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile

@@ -30,7 +30,6 @@
         libcurl3-dev \
         libfreetype6-dev \
         libhdf5-serial-dev \
-        libpng12-dev \
         libzmq3-dev \
         pkg-config \
         rsync \
@@ -43,12 +42,14 @@
         && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
- 
+
 ENV CI_BUILD_PYTHON python
 
+# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
 # Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
 ARG CHECKOUT_TF_SRC=0
-RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
 
 ARG USE_PYTHON_3_NOT_2
 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
index a401763..da81397 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile

@@ -30,7 +30,6 @@
         libcurl3-dev \
         libfreetype6-dev \
         libhdf5-serial-dev \
-        libpng12-dev \
         libzmq3-dev \
         pkg-config \
         rsync \
@@ -43,12 +42,14 @@
         && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
- 
+
 ENV CI_BUILD_PYTHON python
 
+# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
 # Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
 ARG CHECKOUT_TF_SRC=0
-RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
 
 ARG USE_PYTHON_3_NOT_2
 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
index 44481eb..24309e3 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile

@@ -21,23 +21,33 @@
 
 ARG UBUNTU_VERSION=16.04
 
-FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
+ARG ARCH=
+ARG CUDA=10.0
+FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
+# ARCH and CUDA are specified again because the FROM directive resets ARGs
+# (but their default value is retained if set previously)
+ARG ARCH
+ARG CUDA
+ARG CUDNN=7.4.1.5-1
+ARG CUDNN_MAJOR_VERSION=7
+ARG LIB_DIR_PREFIX=x86_64
 
+# Needed for string substitution 
+SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-10-0 \
-        cuda-cublas-dev-10-0 \
-        cuda-cudart-dev-10-0 \
-        cuda-cufft-dev-10-0 \
-        cuda-curand-dev-10-0 \
-        cuda-cusolver-dev-10-0 \
-        cuda-cusparse-dev-10-0 \
-        libcudnn7=7.4.1.5-1+cuda10.0 \
-        libcudnn7-dev=7.4.1.5-1+cuda10.0 \
+        cuda-command-line-tools-${CUDA/./-} \
+        cuda-cublas-dev-${CUDA/./-} \
+        cuda-cudart-dev-${CUDA/./-} \
+        cuda-cufft-dev-${CUDA/./-} \
+        cuda-curand-dev-${CUDA/./-} \
+        cuda-cusolver-dev-${CUDA/./-} \
+        cuda-cusparse-dev-${CUDA/./-} \
+        libcudnn7=${CUDNN}+cuda${CUDA} \
+        libcudnn7-dev=${CUDNN}+cuda${CUDA} \
         libcurl3-dev \
         libfreetype6-dev \
         libhdf5-serial-dev \
-        libpng12-dev \
         libzmq3-dev \
         pkg-config \
         rsync \
@@ -48,27 +58,29 @@
         wget \
         git \
         && \
-    find /usr/local/cuda-10.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
-    rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
+    find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a
 
-RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \
         && apt-get update \
-        && apt-get install -y --no-install-recommends libnvinfer-dev=5.0.2-1+cuda10.0 \
-        && rm -rf /var/lib/apt/lists/*
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
+        && apt-get clean \
+        && rm -rf /var/lib/apt/lists/*; }
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
-ENV TF_NEED_TENSORRT 1
+ENV TF_NEED_TENSORRT 0
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
-ENV TF_CUDA_VERSION=10.0
-ENV TF_CUDNN_VERSION=7
-
-# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
+ENV TF_CUDA_VERSION=${CUDA}
+ENV TF_CUDNN_VERSION=${CUDNN_MAJOR_VERSION}
+# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
 ARG CHECKOUT_TF_SRC=0
-RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
 
 ARG USE_PYTHON_3_NOT_2
 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
index 160abc8..6bc4e32 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile

@@ -21,23 +21,33 @@
 
 ARG UBUNTU_VERSION=16.04
 
-FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
+ARG ARCH=
+ARG CUDA=10.0
+FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
+# ARCH and CUDA are specified again because the FROM directive resets ARGs
+# (but their default value is retained if set previously)
+ARG ARCH
+ARG CUDA
+ARG CUDNN=7.4.1.5-1
+ARG CUDNN_MAJOR_VERSION=7
+ARG LIB_DIR_PREFIX=x86_64
 
+# Needed for string substitution 
+SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-10-0 \
-        cuda-cublas-dev-10-0 \
-        cuda-cudart-dev-10-0 \
-        cuda-cufft-dev-10-0 \
-        cuda-curand-dev-10-0 \
-        cuda-cusolver-dev-10-0 \
-        cuda-cusparse-dev-10-0 \
-        libcudnn7=7.4.1.5-1+cuda10.0 \
-        libcudnn7-dev=7.4.1.5-1+cuda10.0 \
+        cuda-command-line-tools-${CUDA/./-} \
+        cuda-cublas-dev-${CUDA/./-} \
+        cuda-cudart-dev-${CUDA/./-} \
+        cuda-cufft-dev-${CUDA/./-} \
+        cuda-curand-dev-${CUDA/./-} \
+        cuda-cusolver-dev-${CUDA/./-} \
+        cuda-cusparse-dev-${CUDA/./-} \
+        libcudnn7=${CUDNN}+cuda${CUDA} \
+        libcudnn7-dev=${CUDNN}+cuda${CUDA} \
         libcurl3-dev \
         libfreetype6-dev \
         libhdf5-serial-dev \
-        libpng12-dev \
         libzmq3-dev \
         pkg-config \
         rsync \
@@ -48,27 +58,29 @@
         wget \
         git \
         && \
-    find /usr/local/cuda-10.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
-    rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
+    find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a
 
-RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \
         && apt-get update \
-        && apt-get install -y --no-install-recommends libnvinfer-dev=5.0.2-1+cuda10.0 \
-        && rm -rf /var/lib/apt/lists/*
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
+        && apt-get clean \
+        && rm -rf /var/lib/apt/lists/*; }
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
-ENV TF_NEED_TENSORRT 1
+ENV TF_NEED_TENSORRT 0
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
-ENV TF_CUDA_VERSION=10.0
-ENV TF_CUDNN_VERSION=7
-
-# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
+ENV TF_CUDA_VERSION=${CUDA}
+ENV TF_CUDNN_VERSION=${CUDNN_MAJOR_VERSION}
+# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
 ARG CHECKOUT_TF_SRC=0
-RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
 
 ARG USE_PYTHON_3_NOT_2
 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
index 0883107..85a32fa 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile

@@ -21,32 +21,41 @@
 
 ARG UBUNTU_VERSION=16.04
 
-FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
+ARG ARCH=
+ARG CUDA=10.0
+FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
+# ARCH and CUDA are specified again because the FROM directive resets ARGs
+# (but their default value is retained if set previously)
+ARG ARCH
+ARG CUDA
+ARG CUDNN=7.4.1.5-1
 
+# Needed for string substitution 
+SHELL ["/bin/bash", "-c"]
 # Pick up some TF dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-10-0 \
-        cuda-cublas-10-0 \
-        cuda-cufft-10-0 \
-        cuda-curand-10-0 \
-        cuda-cusolver-10-0 \
-        cuda-cusparse-10-0 \
-        libcudnn7=7.4.1.5-1+cuda10.0 \
+        cuda-command-line-tools-${CUDA/./-} \
+        cuda-cublas-${CUDA/./-} \
+        cuda-cufft-${CUDA/./-} \
+        cuda-curand-${CUDA/./-} \
+        cuda-cusolver-${CUDA/./-} \
+        cuda-cusparse-${CUDA/./-} \
+        curl \
+        libcudnn7=${CUDNN}+cuda${CUDA} \
         libfreetype6-dev \
         libhdf5-serial-dev \
-        libpng12-dev \
         libzmq3-dev \
         pkg-config \
         software-properties-common \
         unzip
 
-RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \
         && apt-get update \
-        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda10.0 \
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
         && apt-get clean \
-        && rm -rf /var/lib/apt/lists/*
+        && rm -rf /var/lib/apt/lists/*)
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
@@ -75,8 +84,11 @@
 #   tensorflow-gpu
 #   tf-nightly
 #   tf-nightly-gpu
+# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
+# Installs the latest version by default.
 ARG TF_PACKAGE=tensorflow
-RUN ${PIP} install ${TF_PACKAGE}
+ARG TF_PACKAGE_VERSION=
+RUN ${PIP} install ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
 
 COPY bashrc /etc/bash.bashrc
 RUN chmod a+rwx /etc/bash.bashrc

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile
index 80e427f..c661341 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile

@@ -21,32 +21,41 @@
 
 ARG UBUNTU_VERSION=16.04
 
-FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
+ARG ARCH=
+ARG CUDA=10.0
+FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
+# ARCH and CUDA are specified again because the FROM directive resets ARGs
+# (but their default value is retained if set previously)
+ARG ARCH
+ARG CUDA
+ARG CUDNN=7.4.1.5-1
 
+# Needed for string substitution 
+SHELL ["/bin/bash", "-c"]
 # Pick up some TF dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-10-0 \
-        cuda-cublas-10-0 \
-        cuda-cufft-10-0 \
-        cuda-curand-10-0 \
-        cuda-cusolver-10-0 \
-        cuda-cusparse-10-0 \
-        libcudnn7=7.4.1.5-1+cuda10.0 \
+        cuda-command-line-tools-${CUDA/./-} \
+        cuda-cublas-${CUDA/./-} \
+        cuda-cufft-${CUDA/./-} \
+        cuda-curand-${CUDA/./-} \
+        cuda-cusolver-${CUDA/./-} \
+        cuda-cusparse-${CUDA/./-} \
+        curl \
+        libcudnn7=${CUDNN}+cuda${CUDA} \
         libfreetype6-dev \
         libhdf5-serial-dev \
-        libpng12-dev \
         libzmq3-dev \
         pkg-config \
         software-properties-common \
         unzip
 
-RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \
         && apt-get update \
-        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda10.0 \
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
         && apt-get clean \
-        && rm -rf /var/lib/apt/lists/*
+        && rm -rf /var/lib/apt/lists/*)
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
@@ -75,8 +84,11 @@
 #   tensorflow-gpu
 #   tf-nightly
 #   tf-nightly-gpu
+# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
+# Installs the latest version by default.
 ARG TF_PACKAGE=tensorflow
-RUN ${PIP} install ${TF_PACKAGE}
+ARG TF_PACKAGE_VERSION=
+RUN ${PIP} install ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
 
 COPY bashrc /etc/bash.bashrc
 RUN chmod a+rwx /etc/bash.bashrc

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile
new file mode 100644
index 0000000..63bf205
--- /dev/null
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile

@@ -0,0 +1,94 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+#
+# THIS IS A GENERATED DOCKERFILE.
+#
+# This file was assembled from multiple pieces, whose use is documented
+# throughout. Please refer to the TensorFlow dockerfiles documentation
+# for more information.
+
+ARG UBUNTU_VERSION=16.04
+
+FROM ubuntu:${UBUNTU_VERSION} as base
+
+ARG USE_PYTHON_3_NOT_2
+ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
+ARG PYTHON=python${_PY_SUFFIX}
+ARG PIP=pip${_PY_SUFFIX}
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+
+RUN apt-get update && apt-get install -y \
+    ${PYTHON} \
+    ${PYTHON}-pip
+
+RUN ${PIP} --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+
+# Options:
+#   tensorflow
+#   tensorflow-gpu
+#   tf-nightly
+#   tf-nightly-gpu
+ARG TF_PACKAGE=tensorflow
+RUN apt-get update && apt-get install -y wget libhdf5-dev
+RUN ${PIP} install --global-option=build_ext \
+            --global-option=-I/usr/include/hdf5/serial/ \
+            --global-option=-L/usr/lib/powerpc64le-linux-gnu/hdf5/serial \
+            h5py
+
+# CACHE_STOP is used to rerun future commands, otherwise downloading the .whl will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+RUN if [ ${TF_PACKAGE} = tensorflow-gpu ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tf-nightly-gpu ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tensorflow ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tf-nightly ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/lastSuccessfulBuild/; \
+    fi; \
+    MAJOR=`${PYTHON} -c 'import sys; print(sys.version_info[0])'`; \
+    MINOR=`${PYTHON} -c 'import sys; print(sys.version_info[1])'`; \
+    PACKAGE=$(wget -qO- ${BASE}"api/xml?xpath=//fileName&wrapper=artifacts" | grep -o "[^<>]*cp${MAJOR}${MINOR}[^<>]*.whl"); \
+    wget ${BASE}"artifact/tensorflow_pkg/"${PACKAGE}; \
+    ${PIP} install ${PACKAGE}
+
+COPY bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc
+
+RUN ${PIP} install jupyter matplotlib
+RUN ${PIP} install jupyter_http_over_ws
+RUN jupyter serverextension enable --py jupyter_http_over_ws
+
+RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
+RUN mkdir /.local && chmod a+rwx /.local
+RUN apt-get install -y --no-install-recommends wget
+WORKDIR /tf/tensorflow-tutorials
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb
+COPY readme-for-jupyter.md README.md
+RUN apt-get autoremove -y && apt-get remove -y wget
+WORKDIR /tf
+EXPOSE 8888
+
+RUN ${PYTHON} -m ipykernel.kernelspec
+
+CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"]

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile
new file mode 100644
index 0000000..083d61b
--- /dev/null
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile

@@ -0,0 +1,75 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+#
+# THIS IS A GENERATED DOCKERFILE.
+#
+# This file was assembled from multiple pieces, whose use is documented
+# throughout. Please refer to the TensorFlow dockerfiles documentation
+# for more information.
+
+ARG UBUNTU_VERSION=16.04
+
+FROM ubuntu:${UBUNTU_VERSION} as base
+
+ARG USE_PYTHON_3_NOT_2
+ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
+ARG PYTHON=python${_PY_SUFFIX}
+ARG PIP=pip${_PY_SUFFIX}
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+
+RUN apt-get update && apt-get install -y \
+    ${PYTHON} \
+    ${PYTHON}-pip
+
+RUN ${PIP} --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+
+# Options:
+#   tensorflow
+#   tensorflow-gpu
+#   tf-nightly
+#   tf-nightly-gpu
+ARG TF_PACKAGE=tensorflow
+RUN apt-get update && apt-get install -y wget libhdf5-dev
+RUN ${PIP} install --global-option=build_ext \
+            --global-option=-I/usr/include/hdf5/serial/ \
+            --global-option=-L/usr/lib/powerpc64le-linux-gnu/hdf5/serial \
+            h5py
+
+# CACHE_STOP is used to rerun future commands, otherwise downloading the .whl will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+RUN if [ ${TF_PACKAGE} = tensorflow-gpu ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tf-nightly-gpu ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tensorflow ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tf-nightly ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/lastSuccessfulBuild/; \
+    fi; \
+    MAJOR=`${PYTHON} -c 'import sys; print(sys.version_info[0])'`; \
+    MINOR=`${PYTHON} -c 'import sys; print(sys.version_info[1])'`; \
+    PACKAGE=$(wget -qO- ${BASE}"api/xml?xpath=//fileName&wrapper=artifacts" | grep -o "[^<>]*cp${MAJOR}${MINOR}[^<>]*.whl"); \
+    wget ${BASE}"artifact/tensorflow_pkg/"${PACKAGE}; \
+    ${PIP} install ${PACKAGE}
+
+COPY bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile
new file mode 100644
index 0000000..c8384f7
--- /dev/null
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile

@@ -0,0 +1,127 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+#
+# THIS IS A GENERATED DOCKERFILE.
+#
+# This file was assembled from multiple pieces, whose use is documented
+# throughout. Please refer to the TensorFlow dockerfiles documentation
+# for more information.
+
+ARG UBUNTU_VERSION=16.04
+
+FROM ubuntu:${UBUNTU_VERSION} AS base
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        curl \
+        git \
+        libcurl3-dev \
+        libfreetype6-dev \
+        libhdf5-serial-dev \
+        libzmq3-dev \
+        pkg-config \
+        rsync \
+        software-properties-common \
+        unzip \
+        zip \
+        zlib1g-dev \
+        openjdk-8-jdk \
+        openjdk-8-jre-headless \
+        && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+ENV CI_BUILD_PYTHON python
+
+# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
+ARG CHECKOUT_TF_SRC=0
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
+
+ARG USE_PYTHON_3_NOT_2
+ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
+ARG PYTHON=python${_PY_SUFFIX}
+ARG PIP=pip${_PY_SUFFIX}
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+
+RUN apt-get update && apt-get install -y \
+    ${PYTHON} \
+    ${PYTHON}-pip
+
+RUN ${PIP} --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    git \
+    openjdk-8-jdk \
+    ${PYTHON}-dev \
+    swig
+
+RUN ${PIP} --no-cache-dir install \
+    Pillow \
+    h5py \
+    keras_applications \
+    keras_preprocessing \
+    matplotlib \
+    mock \
+    numpy \
+    scipy \
+    sklearn \
+    pandas \
+    && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \
+    enum34
+
+ # Build and install bazel
+ENV BAZEL_VERSION 0.15.0
+WORKDIR /
+RUN mkdir /bazel && \
+    cd /bazel && \
+    curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip && \
+    unzip bazel-$BAZEL_VERSION-dist.zip && \
+    bash ./compile.sh && \
+    cp output/bazel /usr/local/bin/ && \
+    rm -rf /bazel && \
+    cd -
+
+COPY bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc
+
+RUN ${PIP} install jupyter matplotlib
+RUN ${PIP} install jupyter_http_over_ws
+RUN jupyter serverextension enable --py jupyter_http_over_ws
+
+RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
+RUN mkdir /.local && chmod a+rwx /.local
+RUN apt-get install -y --no-install-recommends wget
+WORKDIR /tf/tensorflow-tutorials
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb
+COPY readme-for-jupyter.md README.md
+RUN apt-get autoremove -y && apt-get remove -y wget
+WORKDIR /tf
+EXPOSE 8888
+
+RUN ${PYTHON} -m ipykernel.kernelspec
+
+CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"]

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile
new file mode 100644
index 0000000..08f880e
--- /dev/null
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile

@@ -0,0 +1,108 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+#
+# THIS IS A GENERATED DOCKERFILE.
+#
+# This file was assembled from multiple pieces, whose use is documented
+# throughout. Please refer to the TensorFlow dockerfiles documentation
+# for more information.
+
+ARG UBUNTU_VERSION=16.04
+
+FROM ubuntu:${UBUNTU_VERSION} AS base
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        curl \
+        git \
+        libcurl3-dev \
+        libfreetype6-dev \
+        libhdf5-serial-dev \
+        libzmq3-dev \
+        pkg-config \
+        rsync \
+        software-properties-common \
+        unzip \
+        zip \
+        zlib1g-dev \
+        openjdk-8-jdk \
+        openjdk-8-jre-headless \
+        && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+ENV CI_BUILD_PYTHON python
+
+# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
+ARG CHECKOUT_TF_SRC=0
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
+
+ARG USE_PYTHON_3_NOT_2
+ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
+ARG PYTHON=python${_PY_SUFFIX}
+ARG PIP=pip${_PY_SUFFIX}
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+
+RUN apt-get update && apt-get install -y \
+    ${PYTHON} \
+    ${PYTHON}-pip
+
+RUN ${PIP} --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    git \
+    openjdk-8-jdk \
+    ${PYTHON}-dev \
+    swig
+
+RUN ${PIP} --no-cache-dir install \
+    Pillow \
+    h5py \
+    keras_applications \
+    keras_preprocessing \
+    matplotlib \
+    mock \
+    numpy \
+    scipy \
+    sklearn \
+    pandas \
+    && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \
+    enum34
+
+ # Build and install bazel
+ENV BAZEL_VERSION 0.15.0
+WORKDIR /
+RUN mkdir /bazel && \
+    cd /bazel && \
+    curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip && \
+    unzip bazel-$BAZEL_VERSION-dist.zip && \
+    bash ./compile.sh && \
+    cp output/bazel /usr/local/bin/ && \
+    rm -rf /bazel && \
+    cd -
+
+COPY bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile
new file mode 100644
index 0000000..c508a0f
--- /dev/null
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile

@@ -0,0 +1,158 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+#
+# THIS IS A GENERATED DOCKERFILE.
+#
+# This file was assembled from multiple pieces, whose use is documented
+# throughout. Please refer to the TensorFlow dockerfiles documentation
+# for more information.
+
+ARG UBUNTU_VERSION=16.04
+
+ARG ARCH=
+ARG CUDA=10.0
+FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
+# ARCH and CUDA are specified again because the FROM directive resets ARGs
+# (but their default value is retained if set previously)
+ARG ARCH
+ARG CUDA
+ARG CUDNN=7.4.1.5-1
+ARG CUDNN_MAJOR_VERSION=7
+ARG LIB_DIR_PREFIX=x86_64
+
+# Needed for string substitution 
+SHELL ["/bin/bash", "-c"]
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        cuda-command-line-tools-${CUDA/./-} \
+        cuda-cublas-dev-${CUDA/./-} \
+        cuda-cudart-dev-${CUDA/./-} \
+        cuda-cufft-dev-${CUDA/./-} \
+        cuda-curand-dev-${CUDA/./-} \
+        cuda-cusolver-dev-${CUDA/./-} \
+        cuda-cusparse-dev-${CUDA/./-} \
+        libcudnn7=${CUDNN}+cuda${CUDA} \
+        libcudnn7-dev=${CUDNN}+cuda${CUDA} \
+        libcurl3-dev \
+        libfreetype6-dev \
+        libhdf5-serial-dev \
+        libzmq3-dev \
+        pkg-config \
+        rsync \
+        software-properties-common \
+        unzip \
+        zip \
+        zlib1g-dev \
+        wget \
+        git \
+        && \
+    find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a
+
+RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \
+        && apt-get update \
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
+        && apt-get clean \
+        && rm -rf /var/lib/apt/lists/*; }
+
+# Configure the build for our CUDA configuration.
+ENV CI_BUILD_PYTHON python
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
+ENV TF_NEED_CUDA 1
+ENV TF_NEED_TENSORRT 0
+ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
+ENV TF_CUDA_VERSION=${CUDA}
+ENV TF_CUDNN_VERSION=${CUDNN_MAJOR_VERSION}
+# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
+ARG CHECKOUT_TF_SRC=0
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
+
+ARG USE_PYTHON_3_NOT_2
+ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
+ARG PYTHON=python${_PY_SUFFIX}
+ARG PIP=pip${_PY_SUFFIX}
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+
+RUN apt-get update && apt-get install -y \
+    ${PYTHON} \
+    ${PYTHON}-pip
+
+RUN ${PIP} --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    git \
+    openjdk-8-jdk \
+    ${PYTHON}-dev \
+    swig
+
+RUN ${PIP} --no-cache-dir install \
+    Pillow \
+    h5py \
+    keras_applications \
+    keras_preprocessing \
+    matplotlib \
+    mock \
+    numpy \
+    scipy \
+    sklearn \
+    pandas \
+    && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \
+    enum34
+
+ # Build and install bazel
+ENV BAZEL_VERSION 0.15.0
+WORKDIR /
+RUN mkdir /bazel && \
+    cd /bazel && \
+    curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip && \
+    unzip bazel-$BAZEL_VERSION-dist.zip && \
+    bash ./compile.sh && \
+    cp output/bazel /usr/local/bin/ && \
+    rm -rf /bazel && \
+    cd -
+
+COPY bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc
+
+RUN ${PIP} install jupyter matplotlib
+RUN ${PIP} install jupyter_http_over_ws
+RUN jupyter serverextension enable --py jupyter_http_over_ws
+
+RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
+RUN mkdir /.local && chmod a+rwx /.local
+RUN apt-get install -y --no-install-recommends wget
+WORKDIR /tf/tensorflow-tutorials
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb
+COPY readme-for-jupyter.md README.md
+RUN apt-get autoremove -y && apt-get remove -y wget
+WORKDIR /tf
+EXPOSE 8888
+
+RUN ${PYTHON} -m ipykernel.kernelspec
+
+CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"]

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile
new file mode 100644
index 0000000..f910cb2
--- /dev/null
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile

@@ -0,0 +1,139 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+#
+# THIS IS A GENERATED DOCKERFILE.
+#
+# This file was assembled from multiple pieces, whose use is documented
+# throughout. Please refer to the TensorFlow dockerfiles documentation
+# for more information.
+
+ARG UBUNTU_VERSION=16.04
+
+ARG ARCH=
+ARG CUDA=10.0
+FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
+# ARCH and CUDA are specified again because the FROM directive resets ARGs
+# (but their default value is retained if set previously)
+ARG ARCH
+ARG CUDA
+ARG CUDNN=7.4.1.5-1
+ARG CUDNN_MAJOR_VERSION=7
+ARG LIB_DIR_PREFIX=x86_64
+
+# Needed for string substitution 
+SHELL ["/bin/bash", "-c"]
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        cuda-command-line-tools-${CUDA/./-} \
+        cuda-cublas-dev-${CUDA/./-} \
+        cuda-cudart-dev-${CUDA/./-} \
+        cuda-cufft-dev-${CUDA/./-} \
+        cuda-curand-dev-${CUDA/./-} \
+        cuda-cusolver-dev-${CUDA/./-} \
+        cuda-cusparse-dev-${CUDA/./-} \
+        libcudnn7=${CUDNN}+cuda${CUDA} \
+        libcudnn7-dev=${CUDNN}+cuda${CUDA} \
+        libcurl3-dev \
+        libfreetype6-dev \
+        libhdf5-serial-dev \
+        libzmq3-dev \
+        pkg-config \
+        rsync \
+        software-properties-common \
+        unzip \
+        zip \
+        zlib1g-dev \
+        wget \
+        git \
+        && \
+    find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a
+
+RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \
+        && apt-get update \
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
+        && apt-get clean \
+        && rm -rf /var/lib/apt/lists/*; }
+
+# Configure the build for our CUDA configuration.
+ENV CI_BUILD_PYTHON python
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
+ENV TF_NEED_CUDA 1
+ENV TF_NEED_TENSORRT 0
+ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
+ENV TF_CUDA_VERSION=${CUDA}
+ENV TF_CUDNN_VERSION=${CUDNN_MAJOR_VERSION}
+# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
+ARG CHECKOUT_TF_SRC=0
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
+
+ARG USE_PYTHON_3_NOT_2
+ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
+ARG PYTHON=python${_PY_SUFFIX}
+ARG PIP=pip${_PY_SUFFIX}
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+
+RUN apt-get update && apt-get install -y \
+    ${PYTHON} \
+    ${PYTHON}-pip
+
+RUN ${PIP} --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    git \
+    openjdk-8-jdk \
+    ${PYTHON}-dev \
+    swig
+
+RUN ${PIP} --no-cache-dir install \
+    Pillow \
+    h5py \
+    keras_applications \
+    keras_preprocessing \
+    matplotlib \
+    mock \
+    numpy \
+    scipy \
+    sklearn \
+    pandas \
+    && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \
+    enum34
+
+ # Build and install bazel
+ENV BAZEL_VERSION 0.15.0
+WORKDIR /
+RUN mkdir /bazel && \
+    cd /bazel && \
+    curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip && \
+    unzip bazel-$BAZEL_VERSION-dist.zip && \
+    bash ./compile.sh && \
+    cp output/bazel /usr/local/bin/ && \
+    rm -rf /bazel && \
+    cd -
+
+COPY bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile
new file mode 100644
index 0000000..1e82ca2
--- /dev/null
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile

@@ -0,0 +1,131 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+#
+# THIS IS A GENERATED DOCKERFILE.
+#
+# This file was assembled from multiple pieces, whose use is documented
+# throughout. Please refer to the TensorFlow dockerfiles documentation
+# for more information.
+
+ARG UBUNTU_VERSION=16.04
+
+ARG ARCH=
+ARG CUDA=10.0
+FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
+# ARCH and CUDA are specified again because the FROM directive resets ARGs
+# (but their default value is retained if set previously)
+ARG ARCH
+ARG CUDA
+ARG CUDNN=7.4.1.5-1
+
+# Needed for string substitution 
+SHELL ["/bin/bash", "-c"]
+# Pick up some TF dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        cuda-command-line-tools-${CUDA/./-} \
+        cuda-cublas-${CUDA/./-} \
+        cuda-cufft-${CUDA/./-} \
+        cuda-curand-${CUDA/./-} \
+        cuda-cusolver-${CUDA/./-} \
+        cuda-cusparse-${CUDA/./-} \
+        curl \
+        libcudnn7=${CUDNN}+cuda${CUDA} \
+        libfreetype6-dev \
+        libhdf5-serial-dev \
+        libzmq3-dev \
+        pkg-config \
+        software-properties-common \
+        unzip
+
+RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \
+        && apt-get update \
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
+        && apt-get clean \
+        && rm -rf /var/lib/apt/lists/*)
+
+# For CUDA profiling, TensorFlow requires CUPTI.
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
+
+ARG USE_PYTHON_3_NOT_2
+ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
+ARG PYTHON=python${_PY_SUFFIX}
+ARG PIP=pip${_PY_SUFFIX}
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+
+RUN apt-get update && apt-get install -y \
+    ${PYTHON} \
+    ${PYTHON}-pip
+
+RUN ${PIP} --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+
+# Options:
+#   tensorflow
+#   tensorflow-gpu
+#   tf-nightly
+#   tf-nightly-gpu
+ARG TF_PACKAGE=tensorflow
+RUN apt-get update && apt-get install -y wget libhdf5-dev
+RUN ${PIP} install --global-option=build_ext \
+            --global-option=-I/usr/include/hdf5/serial/ \
+            --global-option=-L/usr/lib/powerpc64le-linux-gnu/hdf5/serial \
+            h5py
+
+# CACHE_STOP is used to rerun future commands, otherwise downloading the .whl will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+RUN if [ ${TF_PACKAGE} = tensorflow-gpu ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tf-nightly-gpu ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tensorflow ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tf-nightly ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/lastSuccessfulBuild/; \
+    fi; \
+    MAJOR=`${PYTHON} -c 'import sys; print(sys.version_info[0])'`; \
+    MINOR=`${PYTHON} -c 'import sys; print(sys.version_info[1])'`; \
+    PACKAGE=$(wget -qO- ${BASE}"api/xml?xpath=//fileName&wrapper=artifacts" | grep -o "[^<>]*cp${MAJOR}${MINOR}[^<>]*.whl"); \
+    wget ${BASE}"artifact/tensorflow_pkg/"${PACKAGE}; \
+    ${PIP} install ${PACKAGE}
+
+COPY bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc
+
+RUN ${PIP} install jupyter matplotlib
+RUN ${PIP} install jupyter_http_over_ws
+RUN jupyter serverextension enable --py jupyter_http_over_ws
+
+RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
+RUN mkdir /.local && chmod a+rwx /.local
+RUN apt-get install -y --no-install-recommends wget
+WORKDIR /tf/tensorflow-tutorials
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb
+COPY readme-for-jupyter.md README.md
+RUN apt-get autoremove -y && apt-get remove -y wget
+WORKDIR /tf
+EXPOSE 8888
+
+RUN ${PYTHON} -m ipykernel.kernelspec
+
+CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"]

diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile
new file mode 100644
index 0000000..e957275
--- /dev/null
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile

@@ -0,0 +1,112 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+#
+# THIS IS A GENERATED DOCKERFILE.
+#
+# This file was assembled from multiple pieces, whose use is documented
+# throughout. Please refer to the TensorFlow dockerfiles documentation
+# for more information.
+
+ARG UBUNTU_VERSION=16.04
+
+ARG ARCH=
+ARG CUDA=10.0
+FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
+# ARCH and CUDA are specified again because the FROM directive resets ARGs
+# (but their default value is retained if set previously)
+ARG ARCH
+ARG CUDA
+ARG CUDNN=7.4.1.5-1
+
+# Needed for string substitution 
+SHELL ["/bin/bash", "-c"]
+# Pick up some TF dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        cuda-command-line-tools-${CUDA/./-} \
+        cuda-cublas-${CUDA/./-} \
+        cuda-cufft-${CUDA/./-} \
+        cuda-curand-${CUDA/./-} \
+        cuda-cusolver-${CUDA/./-} \
+        cuda-cusparse-${CUDA/./-} \
+        curl \
+        libcudnn7=${CUDNN}+cuda${CUDA} \
+        libfreetype6-dev \
+        libhdf5-serial-dev \
+        libzmq3-dev \
+        pkg-config \
+        software-properties-common \
+        unzip
+
+RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \
+        && apt-get update \
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
+        && apt-get clean \
+        && rm -rf /var/lib/apt/lists/*)
+
+# For CUDA profiling, TensorFlow requires CUPTI.
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
+
+ARG USE_PYTHON_3_NOT_2
+ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
+ARG PYTHON=python${_PY_SUFFIX}
+ARG PIP=pip${_PY_SUFFIX}
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+
+RUN apt-get update && apt-get install -y \
+    ${PYTHON} \
+    ${PYTHON}-pip
+
+RUN ${PIP} --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+
+# Options:
+#   tensorflow
+#   tensorflow-gpu
+#   tf-nightly
+#   tf-nightly-gpu
+ARG TF_PACKAGE=tensorflow
+RUN apt-get update && apt-get install -y wget libhdf5-dev
+RUN ${PIP} install --global-option=build_ext \
+            --global-option=-I/usr/include/hdf5/serial/ \
+            --global-option=-L/usr/lib/powerpc64le-linux-gnu/hdf5/serial \
+            h5py
+
+# CACHE_STOP is used to rerun future commands, otherwise downloading the .whl will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+RUN if [ ${TF_PACKAGE} = tensorflow-gpu ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tf-nightly-gpu ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tensorflow ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tf-nightly ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/lastSuccessfulBuild/; \
+    fi; \
+    MAJOR=`${PYTHON} -c 'import sys; print(sys.version_info[0])'`; \
+    MINOR=`${PYTHON} -c 'import sys; print(sys.version_info[1])'`; \
+    PACKAGE=$(wget -qO- ${BASE}"api/xml?xpath=//fileName&wrapper=artifacts" | grep -o "[^<>]*cp${MAJOR}${MINOR}[^<>]*.whl"); \
+    wget ${BASE}"artifact/tensorflow_pkg/"${PACKAGE}; \
+    ${PIP} install ${PACKAGE}
+
+COPY bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc

diff --git a/tensorflow/tools/dockerfiles/partials/tensorflow-ppc64le.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/tensorflow-ppc64le.partial.Dockerfile
new file mode 100644
index 0000000..1e79574
--- /dev/null
+++ b/tensorflow/tools/dockerfiles/partials/tensorflow-ppc64le.partial.Dockerfile

@@ -0,0 +1,28 @@
+# Options:
+#   tensorflow
+#   tensorflow-gpu
+#   tf-nightly
+#   tf-nightly-gpu
+ARG TF_PACKAGE=tensorflow
+RUN apt-get update && apt-get install -y wget libhdf5-dev
+RUN ${PIP} install --global-option=build_ext \
+            --global-option=-I/usr/include/hdf5/serial/ \
+            --global-option=-L/usr/lib/powerpc64le-linux-gnu/hdf5/serial \
+            h5py
+
+# CACHE_STOP is used to rerun future commands, otherwise downloading the .whl will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+RUN if [ ${TF_PACKAGE} = tensorflow-gpu ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tf-nightly-gpu ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tensorflow ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/lastSuccessfulBuild/; \
+    elif [ ${TF_PACKAGE} = tf-nightly ]; then \
+        BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/lastSuccessfulBuild/; \
+    fi; \
+    MAJOR=`${PYTHON} -c 'import sys; print(sys.version_info[0])'`; \
+    MINOR=`${PYTHON} -c 'import sys; print(sys.version_info[1])'`; \
+    PACKAGE=$(wget -qO- ${BASE}"api/xml?xpath=//fileName&wrapper=artifacts" | grep -o "[^<>]*cp${MAJOR}${MINOR}[^<>]*.whl"); \
+    wget ${BASE}"artifact/tensorflow_pkg/"${PACKAGE}; \
+    ${PIP} install ${PACKAGE}

diff --git a/tensorflow/tools/dockerfiles/partials/tensorflow.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/tensorflow.partial.Dockerfile
index 76758bd..2ae8406 100644
--- a/tensorflow/tools/dockerfiles/partials/tensorflow.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/tensorflow.partial.Dockerfile

@@ -3,5 +3,8 @@
 #   tensorflow-gpu
 #   tf-nightly
 #   tf-nightly-gpu
+# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
+# Installs the latest version by default.
 ARG TF_PACKAGE=tensorflow
-RUN ${PIP} install ${TF_PACKAGE}
+ARG TF_PACKAGE_VERSION=
+RUN ${PIP} install ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}

diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile
new file mode 100644
index 0000000..0397ab5
--- /dev/null
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile

@@ -0,0 +1,33 @@
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    git \
+    openjdk-8-jdk \
+    ${PYTHON}-dev \
+    swig
+
+RUN ${PIP} --no-cache-dir install \
+    Pillow \
+    h5py \
+    keras_applications \
+    keras_preprocessing \
+    matplotlib \
+    mock \
+    numpy \
+    scipy \
+    sklearn \
+    pandas \
+    && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \
+    enum34
+
+ # Build and install bazel
+ENV BAZEL_VERSION 0.15.0
+WORKDIR /
+RUN mkdir /bazel && \
+    cd /bazel && \
+    curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip && \
+    unzip bazel-$BAZEL_VERSION-dist.zip && \
+    bash ./compile.sh && \
+    cp output/bazel /usr/local/bin/ && \
+    rm -rf /bazel && \
+    cd -

diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
index 0652ac4..a1fd901 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile

@@ -7,7 +7,6 @@
         libcurl3-dev \
         libfreetype6-dev \
         libhdf5-serial-dev \
-        libpng12-dev \
         libzmq3-dev \
         pkg-config \
         rsync \
@@ -20,9 +19,11 @@
         && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
- 
+
 ENV CI_BUILD_PYTHON python
 
+# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
 # Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
 ARG CHECKOUT_TF_SRC=0
-RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true

diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
index 2b4494a..cf3e38b 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile

@@ -1,20 +1,30 @@
-FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
+ARG ARCH=
+ARG CUDA=10.0
+FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
+# ARCH and CUDA are specified again because the FROM directive resets ARGs
+# (but their default value is retained if set previously)
+ARG ARCH
+ARG CUDA
+ARG CUDNN=7.4.1.5-1
+ARG CUDNN_MAJOR_VERSION=7
+ARG LIB_DIR_PREFIX=x86_64
 
+# Needed for string substitution 
+SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-10-0 \
-        cuda-cublas-dev-10-0 \
-        cuda-cudart-dev-10-0 \
-        cuda-cufft-dev-10-0 \
-        cuda-curand-dev-10-0 \
-        cuda-cusolver-dev-10-0 \
-        cuda-cusparse-dev-10-0 \
-        libcudnn7=7.4.1.5-1+cuda10.0 \
-        libcudnn7-dev=7.4.1.5-1+cuda10.0 \
+        cuda-command-line-tools-${CUDA/./-} \
+        cuda-cublas-dev-${CUDA/./-} \
+        cuda-cudart-dev-${CUDA/./-} \
+        cuda-cufft-dev-${CUDA/./-} \
+        cuda-curand-dev-${CUDA/./-} \
+        cuda-cusolver-dev-${CUDA/./-} \
+        cuda-cusparse-dev-${CUDA/./-} \
+        libcudnn7=${CUDNN}+cuda${CUDA} \
+        libcudnn7-dev=${CUDNN}+cuda${CUDA} \
         libcurl3-dev \
         libfreetype6-dev \
         libhdf5-serial-dev \
-        libpng12-dev \
         libzmq3-dev \
         pkg-config \
         rsync \
@@ -25,24 +35,26 @@
         wget \
         git \
         && \
-    find /usr/local/cuda-10.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
-    rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
+    find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a
 
-RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \
         && apt-get update \
-        && apt-get install -y --no-install-recommends libnvinfer-dev=5.0.2-1+cuda10.0 \
-        && rm -rf /var/lib/apt/lists/*
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
+        && apt-get clean \
+        && rm -rf /var/lib/apt/lists/*; }
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
-ENV TF_NEED_TENSORRT 1
+ENV TF_NEED_TENSORRT 0
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
-ENV TF_CUDA_VERSION=10.0
-ENV TF_CUDNN_VERSION=7
-
-# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
+ENV TF_CUDA_VERSION=${CUDA}
+ENV TF_CUDNN_VERSION=${CUDNN_MAJOR_VERSION}
+# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
+ARG CACHE_STOP=1
+# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
 ARG CHECKOUT_TF_SRC=0
-RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true

diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile
index a6393a3..041ee87 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile

@@ -1,29 +1,38 @@
-FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
+ARG ARCH=
+ARG CUDA=10.0
+FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
+# ARCH and CUDA are specified again because the FROM directive resets ARGs
+# (but their default value is retained if set previously)
+ARG ARCH
+ARG CUDA
+ARG CUDNN=7.4.1.5-1
 
+# Needed for string substitution 
+SHELL ["/bin/bash", "-c"]
 # Pick up some TF dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-10-0 \
-        cuda-cublas-10-0 \
-        cuda-cufft-10-0 \
-        cuda-curand-10-0 \
-        cuda-cusolver-10-0 \
-        cuda-cusparse-10-0 \
-        libcudnn7=7.4.1.5-1+cuda10.0 \
+        cuda-command-line-tools-${CUDA/./-} \
+        cuda-cublas-${CUDA/./-} \
+        cuda-cufft-${CUDA/./-} \
+        cuda-curand-${CUDA/./-} \
+        cuda-cusolver-${CUDA/./-} \
+        cuda-cusparse-${CUDA/./-} \
+        curl \
+        libcudnn7=${CUDNN}+cuda${CUDA} \
         libfreetype6-dev \
         libhdf5-serial-dev \
-        libpng12-dev \
         libzmq3-dev \
         pkg-config \
         software-properties-common \
         unzip
 
-RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \
         && apt-get update \
-        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda10.0 \
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
         && apt-get clean \
-        && rm -rf /var/lib/apt/lists/*
+        && rm -rf /var/lib/apt/lists/*)
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH

diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml
index 19d96e7..6fddfe0 100644
--- a/tensorflow/tools/dockerfiles/spec.yml
+++ b/tensorflow/tools/dockerfiles/spec.yml

@@ -55,6 +55,8 @@
         tag_specs:
             - "{ubuntu}{jupyter}"
             - "{ubuntu-devel}{jupyter}"
+            - "{ubuntu-ppc64le}{jupyter}"
+            - "{ubuntu-devel-ppc64le}{jupyter}"
 
 slice_sets:
 
@@ -122,6 +124,70 @@
           args:
               - CHECKOUT_TF_SRC=1
 
+    ubuntu-ppc64le:
+        - add_to_name: "-ppc64le"
+          dockerfile_exclusive_name: "cpu-ppc64le"
+          dockerfile_subdirectory: "ppc64le"
+          args:
+              - UBUNTU_VERSION=18.04
+          partials:
+              - ubuntu/version
+              - ubuntu/cpu
+              - ubuntu/python
+              - tensorflow-ppc64le
+              - shell
+        - add_to_name: "-gpu-ppc64le"
+          dockerfile_exclusive_name: "gpu-ppc64le"
+          dockerfile_subdirectory: "ppc64le"
+          args:
+              - UBUNTU_VERSION=18.04
+              - ARCH=ppc64le
+              - CUDA=10.0
+              - TF_PACKAGE=tensorflow-gpu
+          partials:
+              - ubuntu/version
+              - ubuntu/nvidia
+              - ubuntu/python
+              - tensorflow-ppc64le
+              - shell
+          tests:
+              - import-gpu.sh
+          test_runtime: nvidia
+
+    ubuntu-devel-ppc64le:
+        - add_to_name: "devel-ppc64le"
+          dockerfile_exclusive_name: "devel-cpu-ppc64le"
+          dockerfile_subdirectory: "ppc64le"
+          partials:
+              - ubuntu/version
+              - ubuntu/devel-cpu
+              - ubuntu/python
+              - ubuntu/bazelbuild
+              - shell
+          tests:
+              - build-cpu.sh
+          args:
+              - UBUNTU_VERSION=18.04
+              - CHECKOUT_TF_SRC=1
+        - add_to_name: "devel-gpu-ppc64le"
+          dockerfile_exclusive_name: "devel-gpu-ppc64le"
+          dockerfile_subdirectory: "ppc64le"
+          args:
+              - UBUNTU_VERSION=18.04
+              - ARCH=ppc64le
+              - CUDA=10.0
+              - LIB_DIR_PREFIX=powerpc64le
+              - CHECKOUT_TF_SRC=1
+          partials:
+              - ubuntu/version
+              - ubuntu/devel-nvidia
+              - ubuntu/python
+              - ubuntu/bazelbuild
+              - shell
+          tests:
+              - build-gpu.sh
+          test_runtime: nvidia
+
     nightly:
         - add_to_name: "nightly"
           partials:

diff --git a/tensorflow/tools/dockerfiles/tools.Dockerfile b/tensorflow/tools/dockerfiles/tools.Dockerfile
index e892929..a96b257 100644
--- a/tensorflow/tools/dockerfiles/tools.Dockerfile
+++ b/tensorflow/tools/dockerfiles/tools.Dockerfile

@@ -17,7 +17,7 @@
 #
 # You can use this image to quickly develop changes to the Dockerfile assembler
 # or set of TF Docker partials. See README.md for usage instructions.
-FROM debian:stretch
+FROM ubuntu:16.04
 LABEL maintainer="Austin Anderson <angerson@google.com>"
 
 RUN apt-get update && apt-get install -y python3 python3-pip bash curl

diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 30ee516..90dfca2 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD

@@ -80,6 +80,7 @@
     "//tensorflow/python/data/benchmarks:benchmark_base",
     "//tensorflow/python/data/experimental/kernel_tests/serialization:dataset_serialization_test_base",
     "//tensorflow/python/data/experimental/kernel_tests:stats_dataset_test_base",
+    "//tensorflow/python/data/kernel_tests:filter_test_base",
     "//tensorflow/python/data/kernel_tests:test_base",
     "//tensorflow/python/debug:debug_pip",
     "//tensorflow/python/eager:eager_pip",

diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index 952c71c..3bcc4fc 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py

@@ -34,6 +34,7 @@
 # pip smoke test.
 BUILD_BLACKLIST = [
     "tensorflow/lite/examples/android",
+    "tensorflow/lite/experimental/objc",
     "tensorflow/lite/experimental/swift",
 ]
 
@@ -88,8 +89,8 @@
     "//tensorflow/python/feature_column:vocabulary_testdata",
     "//tensorflow/python:framework/test_file_system.so",
     # lite
-    "//tensorflow/lite/experimental/examples/lstm:tflite_lstm",
-    "//tensorflow/lite/experimental/examples/lstm:tflite_lstm.py",
+    "//tensorflow/lite/experimental/examples/lstm:rnn_cell",
+    "//tensorflow/lite/experimental/examples/lstm:rnn_cell.py",
     "//tensorflow/lite/experimental/examples/lstm:unidirectional_sequence_lstm_test",  # pylint:disable=line-too-long
     "//tensorflow/lite/experimental/examples/lstm:unidirectional_sequence_lstm_test.py",  # pylint:disable=line-too-long
     "//tensorflow/lite/python:interpreter",

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index bbe3050..83081a1 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py

@@ -51,7 +51,7 @@
     'absl-py >= 0.1.6',
     'astor >= 0.6.0',
     'gast >= 0.2.0',
-    'google_pasta >= 0.1.1',
+    'google_pasta >= 0.1.2',
     'keras_applications >= 1.0.6',
     'keras_preprocessing >= 1.0.5',
     'numpy >= 1.14.5, < 2.0',
@@ -284,6 +284,7 @@
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
         'Topic :: Scientific/Engineering',
         'Topic :: Scientific/Engineering :: Mathematics',
         'Topic :: Scientific/Engineering :: Artificial Intelligence',

diff --git a/tensorflow/tools/test/BUILD b/tensorflow/tools/test/BUILD
index 4b2026b..ef12226 100644
--- a/tensorflow/tools/test/BUILD
+++ b/tensorflow/tools/test/BUILD

@@ -57,6 +57,14 @@
     srcs = ["run_and_gather_logs.py"],
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
+    deps = [":run_and_gather_logs_main_lib"],
+)
+
+py_library(
+    name = "run_and_gather_logs_main_lib",
+    srcs = ["run_and_gather_logs.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
     deps = [
         ":run_and_gather_logs_lib",
         "//tensorflow/core:protos_all_py",

diff --git a/tensorflow/tools/test/performance.bzl b/tensorflow/tools/test/performance.bzl
index 3486871..9786111 100644
--- a/tensorflow/tools/test/performance.bzl
+++ b/tensorflow/tools/test/performance.bzl

@@ -4,60 +4,66 @@
 
 # Create a benchmark test target of a TensorFlow C++ test (tf_cc_*_test)
 def tf_cc_logged_benchmark(
-    name=None,
-    target=None,
-    benchmarks="..",
-    tags=[],
-    test_log_output_prefix="",
-    benchmark_type="cpp_microbenchmark"):
-  if not name:
-    fail("Must provide a name")
-  if not target:
-    fail("Must provide a target")
-  if (not ":" in target
-      or not target.startswith("//")
-      or target.endswith(":all")
-      or target.endswith(".")):
-    fail(" ".join(("Target must be a single well-defined test, e.g.,",
-                   "//path/to:test. Received: %s" % target)))
+        name = None,
+        target = None,
+        benchmarks = "..",
+        tags = [],
+        test_log_output_prefix = "",
+        benchmark_type = "cpp_microbenchmark"):
+    if not name:
+        fail("Must provide a name")
+    if not target:
+        fail("Must provide a target")
+    if (not ":" in target or
+        not target.startswith("//") or
+        target.endswith(":all") or
+        target.endswith(".")):
+        fail(" ".join((
+            "Target must be a single well-defined test, e.g.,",
+            "//path/to:test. Received: %s" % target,
+        )))
 
-  all_tags = (
-    depset(tags) + depset(
-      ["benchmark-test", "local", "manual", "regression-test"])).to_list()
+    all_tags = (
+        depset(tags) + depset(
+            ["benchmark-test", "local", "manual", "regression-test"],
+        )
+    ).to_list()
 
-  tf_py_test(
-      name = name,
-      tags = all_tags,
-      size = "large",
-      srcs = ["//tensorflow/tools/test:run_and_gather_logs"],
-      args = [
-          "--name=//%s:%s" % (native.package_name(), name),
-          "--test_name=" + target,
-          "--test_args=--benchmarks=%s" % benchmarks,
-          "--benchmark_type=%s" % benchmark_type,
-      ],
-      data = [
-        target,
-      ],
-      main = "run_and_gather_logs.py",
-      additional_deps = [
-          "//tensorflow/tools/test:run_and_gather_logs"
-      ])
+    tf_py_test(
+        name = name,
+        tags = all_tags,
+        size = "large",
+        srcs = ["//tensorflow/tools/test:run_and_gather_logs"],
+        args = [
+            "--name=//%s:%s" % (native.package_name(), name),
+            "--test_name=" + target,
+            "--test_args=--benchmarks=%s" % benchmarks,
+            "--benchmark_type=%s" % benchmark_type,
+        ],
+        data = [
+            target,
+        ],
+        main = "run_and_gather_logs.py",
+        additional_deps = [
+            "//tensorflow/tools/test:run_and_gather_logs",
+        ],
+    )
 
 # Create a benchmark test target of a TensorFlow python test (*py_tests)
 def tf_py_logged_benchmark(
-    name=None,
-    target=None,
-    benchmarks="..",
-    tags=[],
-    test_log_output_prefix=""):
-  # For now generating a py benchmark is the same as generating a C++
-  # benchmark target. In the future this may change, so we have
-  # two macros just in case
-  tf_cc_logged_benchmark(
-    name=name,
-    target=target,
-    benchmarks=benchmarks,
-    tags=tags,
-    test_log_output_prefix=test_log_output_prefix,
-    benchmark_type="python_benchmark")
+        name = None,
+        target = None,
+        benchmarks = "..",
+        tags = [],
+        test_log_output_prefix = ""):
+    # For now generating a py benchmark is the same as generating a C++
+    # benchmark target. In the future this may change, so we have
+    # two macros just in case
+    tf_cc_logged_benchmark(
+        name = name,
+        target = target,
+        benchmarks = benchmarks,
+        tags = tags,
+        test_log_output_prefix = test_log_output_prefix,
+        benchmark_type = "python_benchmark",
+    )

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 93e73df..d2072a5 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl

@@ -7,7 +7,6 @@
 load("//third_party/mkl:build_defs.bzl", "mkl_repository")
 load("//third_party/git:git_configure.bzl", "git_configure")
 load("//third_party/py:python_configure.bzl", "python_configure")
-
 load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
 load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure")
 load("//third_party/toolchains/remote:configure.bzl", "remote_execution_configure")
@@ -140,12 +139,12 @@
     tf_http_archive(
         name = "eigen_archive",
         build_file = clean_dep("//third_party:eigen.BUILD"),
-        patch_file = clean_dep("//third_party/eigen3:gebp_neon.patch"),
-        sha256 = "48678550a32665331d729be87076e576f2502fff325f5b6c2c78ebf7b1b22c7b",
-        strip_prefix = "eigen-eigen-bcc817c0ba98",
+        patch_file = clean_dep("//third_party/eigen3:gpu_packet_math.patch"),
+        sha256 = "61f0017318a24cf940db14e57febecc524b24a9faa8ff4fa7f9f91630c4cd09d",
+        strip_prefix = "eigen-eigen-5a4931dafc1c",
         urls = [
-            "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/bcc817c0ba98.tar.gz",
-            "https://bitbucket.org/eigen/eigen/get/bcc817c0ba98.tar.gz",
+            "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/5a4931dafc1c.tar.gz",
+            "https://bitbucket.org/eigen/eigen/get/5a4931dafc1c.tar.gz",
         ],
     )
 
@@ -186,15 +185,15 @@
 
     tf_http_archive(
         name = "com_github_googlecloudplatform_google_cloud_cpp",
-        sha256 = "886bcba3616d5f362838a2d86ae0198dd3670a84a84c82291cda6c30e14779fc",
-        strip_prefix = "google-cloud-cpp-0.5.0",
+        sha256 = "8e3a302d37f232dec041bf3f3916ca3fa5689216d42112898a4e36581f2f4ce5",
+        strip_prefix = "google-cloud-cpp-0.6.1",
         system_build_file = clean_dep("//third_party/systemlibs:google_cloud_cpp.BUILD"),
         system_link_files = {
             "//third_party/systemlibs:google_cloud_cpp.google.cloud.bigtable.BUILD": "google/cloud/bigtable/BUILD",
         },
         urls = [
-            "https://mirror.bazel.build/github.com/GoogleCloudPlatform/google-cloud-cpp/archive/v0.5.0.tar.gz",
-            "https://github.com/GoogleCloudPlatform/google-cloud-cpp/archive/v0.5.0.tar.gz",
+            "https://mirror.bazel.build/github.com/googleapis/google-cloud-cpp/archive/v0.6.1.tar.gz",
+            "https://github.com/googleapis/google-cloud-cpp/archive/v0.6.1.tar.gz",
         ],
     )
 
@@ -478,12 +477,12 @@
     # WARNING: make sure ncteisen@ and vpai@ are cc-ed on any CL to change the below rule
     tf_http_archive(
         name = "grpc",
-        sha256 = "1aa84387232dda273ea8fdfe722622084f72c16f7b84bfc519ac7759b71cdc91",
-        strip_prefix = "grpc-69b6c047bc767b4d80e7af4d00ccb7c45b683dae",
+        sha256 = "fb56c4a03b9eef675ba70e9bf19a25c31f5d06e8d4f36907441eede678b51e47",
+        strip_prefix = "grpc-69b2d570428623d2d9097ec9e21b3e0a02268ab9",
         system_build_file = clean_dep("//third_party/systemlibs:grpc.BUILD"),
         urls = [
-            "https://mirror.bazel.build/github.com/grpc/grpc/archive/69b6c047bc767b4d80e7af4d00ccb7c45b683dae.tar.gz",
-            "https://github.com/grpc/grpc/archive/69b6c047bc767b4d80e7af4d00ccb7c45b683dae.tar.gz",
+            "https://mirror.bazel.build/github.com/grpc/grpc/archive/69b2d570428623d2d9097ec9e21b3e0a02268ab9.tar.gz",
+            "https://github.com/grpc/grpc/archive/69b2d570428623d2d9097ec9e21b3e0a02268ab9.tar.gz",
         ],
     )
 
@@ -719,16 +718,6 @@
     )
 
     tf_http_archive(
-        name = "bazel_toolchains",
-        sha256 = "07dfbe80638eb1fe681f7c07e61b34b579c6710c691e49ee90ccdc6e9e75ebbb",
-        strip_prefix = "bazel-toolchains-9a111bd82161c1fbe8ed17a593ca1023fd941c70",
-        urls = [
-            "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/9a111bd82161c1fbe8ed17a593ca1023fd941c70.tar.gz",
-            "https://github.com/bazelbuild/bazel-toolchains/archive/9a111bd82161c1fbe8ed17a593ca1023fd941c70.tar.gz",
-        ],
-    )
-
-    tf_http_archive(
         name = "arm_neon_2_x86_sse",
         build_file = clean_dep("//third_party:arm_neon_2_x86_sse.BUILD"),
         sha256 = "213733991310b904b11b053ac224fee2d4e0179e46b52fe7f8735b8831e04dcc",

diff --git a/third_party/android/android.bzl.tpl b/third_party/android/android.bzl.tpl
index e6ed499..c498f58 100644
--- a/third_party/android/android.bzl.tpl
+++ b/third_party/android/android.bzl.tpl

@@ -1,9 +1,9 @@
 """Set up configurable Android SDK and NDK dependencies."""
 
 def android_workspace():
-  # String for replacement in Bazel template.
-  # These will either be replaced by android_sdk_repository if various ENV
-  # variables are set when `local_config_android` repo_rule is run, or they
-  # will be replaced by noops otherwise.
-  MAYBE_ANDROID_SDK_REPOSITORY
-  MAYBE_ANDROID_NDK_REPOSITORY
+    # String for replacement in Bazel template.
+    # These will either be replaced by android_sdk_repository if various ENV
+    # variables are set when `local_config_android` repo_rule is run, or they
+    # will be replaced by noops otherwise.
+    MAYBE_ANDROID_SDK_REPOSITORY
+    MAYBE_ANDROID_NDK_REPOSITORY

diff --git a/third_party/android/android_configure.bzl b/third_party/android/android_configure.bzl
index da09bdf..646ed73 100644
--- a/third_party/android/android_configure.bzl
+++ b/third_party/android/android_configure.bzl

@@ -36,33 +36,39 @@
 """
 
 def _android_autoconf_impl(repository_ctx):
-  """Implementation of the android_autoconf repository rule."""
-  sdk_home = repository_ctx.os.environ.get(_ANDROID_SDK_HOME)
-  sdk_api_level = repository_ctx.os.environ.get(_ANDROID_SDK_API_VERSION)
-  build_tools_version = repository_ctx.os.environ.get(
-      _ANDROID_BUILD_TOOLS_VERSION)
-  ndk_home = repository_ctx.os.environ.get(_ANDROID_NDK_HOME)
-  ndk_api_level = repository_ctx.os.environ.get(_ANDROID_NDK_API_VERSION)
+    """Implementation of the android_autoconf repository rule."""
+    sdk_home = repository_ctx.os.environ.get(_ANDROID_SDK_HOME)
+    sdk_api_level = repository_ctx.os.environ.get(_ANDROID_SDK_API_VERSION)
+    build_tools_version = repository_ctx.os.environ.get(
+        _ANDROID_BUILD_TOOLS_VERSION,
+    )
+    ndk_home = repository_ctx.os.environ.get(_ANDROID_NDK_HOME)
+    ndk_api_level = repository_ctx.os.environ.get(_ANDROID_NDK_API_VERSION)
 
-  sdk_rule = "pass"
-  if all([sdk_home, sdk_api_level, build_tools_version]):
-    sdk_rule = _ANDROID_SDK_REPO_TEMPLATE % (
-        sdk_home, sdk_api_level, build_tools_version)
+    sdk_rule = "pass"
+    if all([sdk_home, sdk_api_level, build_tools_version]):
+        sdk_rule = _ANDROID_SDK_REPO_TEMPLATE % (
+            sdk_home,
+            sdk_api_level,
+            build_tools_version,
+        )
 
-  ndk_rule = "pass"
-  if all([ndk_home, ndk_api_level]):
-    ndk_rule = _ANDROID_NDK_REPO_TEMPLATE % (ndk_home, ndk_api_level)
+    ndk_rule = "pass"
+    if all([ndk_home, ndk_api_level]):
+        ndk_rule = _ANDROID_NDK_REPO_TEMPLATE % (ndk_home, ndk_api_level)
 
-  repository_ctx.template(
-      "BUILD",
-      Label("//third_party/android:android_configure.BUILD.tpl"))
-  repository_ctx.template(
-      "android.bzl",
-      Label("//third_party/android:android.bzl.tpl"),
-      substitutions={
-          "MAYBE_ANDROID_SDK_REPOSITORY": sdk_rule,
-          "MAYBE_ANDROID_NDK_REPOSITORY": ndk_rule,
-      })
+    repository_ctx.template(
+        "BUILD",
+        Label("//third_party/android:android_configure.BUILD.tpl"),
+    )
+    repository_ctx.template(
+        "android.bzl",
+        Label("//third_party/android:android.bzl.tpl"),
+        substitutions = {
+            "MAYBE_ANDROID_SDK_REPOSITORY": sdk_rule,
+            "MAYBE_ANDROID_NDK_REPOSITORY": ndk_rule,
+        },
+    )
 
 android_configure = repository_rule(
     implementation = _android_autoconf_impl,

diff --git a/third_party/common.bzl b/third_party/common.bzl
index db981a5..8134bf3 100644
--- a/third_party/common.bzl
+++ b/third_party/common.bzl

@@ -21,11 +21,11 @@
 #   substitutions: A dictionary mapping strings to their substitutions
 
 def template_rule_impl(ctx):
-  ctx.template_action(
-      template = ctx.file.src,
-      output = ctx.outputs.out,
-      substitutions = ctx.attr.substitutions,
-  )
+    ctx.template_action(
+        template = ctx.file.src,
+        output = ctx.outputs.out,
+        substitutions = ctx.attr.substitutions,
+    )
 
 template_rule = rule(
     attrs = {

diff --git a/third_party/eigen3/gebp_neon.patch b/third_party/eigen3/gebp_neon.patch
deleted file mode 100644
index d0022e9..0000000
--- a/third_party/eigen3/gebp_neon.patch
+++ /dev/null

@@ -1,11 +0,0 @@
---- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h	2019-01-22 20:46:51.000000000 -0800
-+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h	2019-01-25 13:48:49.000000000 -0800
-@@ -1031,7 +1031,7 @@
- 
-   EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
-   {
--    c += a * b;
-+    c = vfmaq_n_f32(c, a, b);
-   }
- 
-   EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const

diff --git a/third_party/eigen3/gpu_packet_math.patch b/third_party/eigen3/gpu_packet_math.patch
new file mode 100644
index 0000000..0634753
--- /dev/null
+++ b/third_party/eigen3/gpu_packet_math.patch

@@ -0,0 +1,18 @@
+--- a/Eigen/src/Core/arch/GPU/PacketMath.h
++++ b/Eigen/src/Core/arch/GPU/PacketMath.h
+@@ -100,6 +100,7 @@
+   return make_double2(from, from);
+ }
+ 
++#if defined(EIGEN_CUDA_ARCH)
+ namespace {
+ 
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
+@@ -211,6 +212,7 @@
+ pcmp_eq<double2>(const double2& a, const double2& b) {
+   return make_double2(eq_mask(a.x, b.x), eq_mask(a.y, b.y));
+ }
++#endif  // EIGEN_CUDA_ARCH
+ 
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
+   return make_float4(a, a+1, a+2, a+3);

diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
index 223ea4d..8df6782 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h

@@ -178,25 +178,37 @@
 struct unpacket_traits<Packet32q8i> {
   typedef QInt8 type;
   typedef Packet16q8i half;
-  enum { size = 32, alignment = Aligned32 };
+  enum { size = 32, alignment = Aligned32, vectorizable = true };
+};
+template <>
+struct unpacket_traits<Packet16q8i> {
+  typedef QInt8 type;
+  typedef Packet16q8i half;
+  enum { size = 16, alignment = Aligned32, vectorizable = true };
 };
 template <>
 struct unpacket_traits<Packet16q16i> {
   typedef QInt16 type;
   typedef Packet8q16i half;
-  enum { size = 16, alignment = Aligned32 };
+  enum { size = 16, alignment = Aligned32, vectorizable = true };
+};
+template <>
+struct unpacket_traits<Packet8q16i> {
+  typedef QInt16 type;
+  typedef Packet8q16i half;
+  enum { size = 8, alignment = Aligned32, vectorizable = true };
 };
 template <>
 struct unpacket_traits<Packet32q8u> {
   typedef QUInt8 type;
   typedef Packet16q8u half;
-  enum { size = 32, alignment = Aligned32 };
+  enum { size = 32, alignment = Aligned32, vectorizable = true };
 };
 template <>
 struct unpacket_traits<Packet8q32i> {
   typedef QInt32 type;
   typedef Packet4q32i half;
-  enum { size = 8, alignment = Aligned32 };
+  enum { size = 8, alignment = Aligned32, vectorizable = true };
 };
 
 // Unaligned load
@@ -206,6 +218,11 @@
       reinterpret_cast<const __m256i*>(from));
 }
 template <>
+EIGEN_STRONG_INLINE Packet16q8i ploadu<Packet16q8i>(const QInt8* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(
+      reinterpret_cast<const __m128i*>(from));
+}
+template <>
 EIGEN_STRONG_INLINE Packet32q8u ploadu<Packet32q8u>(const QUInt8* from) {
   EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(
       reinterpret_cast<const __m256i*>(from));
@@ -215,6 +232,11 @@
   EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(
       reinterpret_cast<const __m256i*>(from));
 }
+template<>
+EIGEN_STRONG_INLINE Packet8q16i ploadu<Packet8q16i>(const QInt16* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(
+      reinterpret_cast<const __m128i*>(from));
+}
 template <>
 EIGEN_STRONG_INLINE Packet8q32i ploadu<Packet8q32i>(const QInt32* from) {
   EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(
@@ -228,6 +250,11 @@
       reinterpret_cast<const __m256i*>(from));
 }
 template <>
+EIGEN_STRONG_INLINE Packet16q8i pload<Packet16q8i>(const QInt8* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(
+      reinterpret_cast<const __m128i*>(from));
+}
+template <>
 EIGEN_STRONG_INLINE Packet32q8u pload<Packet32q8u>(const QUInt8* from) {
   EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(
       reinterpret_cast<const __m256i*>(from));
@@ -238,6 +265,11 @@
       reinterpret_cast<const __m256i*>(from));
 }
 template <>
+EIGEN_STRONG_INLINE Packet8q16i pload<Packet8q16i>(const QInt16* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(
+      reinterpret_cast<const __m128i*>(from));
+}
+template <>
 EIGEN_STRONG_INLINE Packet8q32i pload<Packet8q32i>(const QInt32* from) {
   EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(
       reinterpret_cast<const __m256i*>(from));
@@ -250,6 +282,11 @@
       reinterpret_cast<__m256i*>(to), from.val);
 }
 template <>
+EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet16q8i& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(
+      reinterpret_cast<__m128i*>(to), from.val);
+}
+template <>
 EIGEN_STRONG_INLINE void pstoreu<QUInt8>(QUInt8* to, const Packet32q8u& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
       reinterpret_cast<__m256i*>(to), from.val);
@@ -260,6 +297,11 @@
       reinterpret_cast<__m256i*>(to), from.val);
 }
 template <>
+EIGEN_STRONG_INLINE void pstoreu<QInt16>(QInt16* to, const Packet8q16i& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(
+      reinterpret_cast<__m128i*>(to), from.val);
+}
+template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt32>(QInt32* to, const Packet8q32i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
       reinterpret_cast<__m256i*>(to), from.val);
@@ -277,6 +319,11 @@
                                                from.val);
 }
 template <>
+EIGEN_STRONG_INLINE void pstore<QInt16>(QInt16* to, const Packet8q16i& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to),
+                                            from.val);
+}
+template <>
 EIGEN_STRONG_INLINE void pstore<QUInt8>(QUInt8* to, const Packet32q8u& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
                                                from.val);
@@ -286,6 +333,11 @@
   EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
                                                from.val);
 }
+template <>
+EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet16q8i& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to),
+                                            from.val);
+}
 
 // Extract first element.
 template <>

diff --git a/third_party/git/git_configure.bzl b/third_party/git/git_configure.bzl
index 8e2839b..fc18fdb 100644
--- a/third_party/git/git_configure.bzl
+++ b/third_party/git/git_configure.bzl

@@ -8,49 +8,57 @@
 _PYTHON_BIN_PATH = "PYTHON_BIN_PATH"
 
 def _fail(msg):
-  """Output failure message when auto configuration fails."""
-  red = "\033[0;31m"
-  no_color = "\033[0m"
-  fail("%sGit Configuration Error:%s %s\n" % (red, no_color, msg))
+    """Output failure message when auto configuration fails."""
+    red = "\033[0;31m"
+    no_color = "\033[0m"
+    fail("%sGit Configuration Error:%s %s\n" % (red, no_color, msg))
 
 def _get_python_bin(repository_ctx):
-  """Gets the python bin path."""
-  python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
-  if python_bin != None:
-    return python_bin
-  python_bin_path = repository_ctx.which("python")
-  if python_bin_path != None:
-    return str(python_bin_path)
-  _fail("Cannot find python in PATH, please make sure " +
-        "python is installed and add its directory in PATH, or --define " +
-        "%s='/something/else'.\nPATH=%s" % (
-            _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", "")))
-
+    """Gets the python bin path."""
+    python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
+    if python_bin != None:
+        return python_bin
+    python_bin_path = repository_ctx.which("python")
+    if python_bin_path != None:
+        return str(python_bin_path)
+    _fail("Cannot find python in PATH, please make sure " +
+          "python is installed and add its directory in PATH, or --define " +
+          "%s='/something/else'.\nPATH=%s" % (
+              _PYTHON_BIN_PATH,
+              repository_ctx.os.environ.get("PATH", ""),
+          ))
 
 def _git_conf_impl(repository_ctx):
-  repository_ctx.template(
-      "BUILD",
-      Label("//third_party/git:BUILD.tpl"))
+    repository_ctx.template(
+        "BUILD",
+        Label("//third_party/git:BUILD.tpl"),
+    )
 
-  tensorflow_root_path = str(repository_ctx.path(
-      Label("@org_tensorflow//:BUILD")))[:-len("BUILD")]
-  python_script_path = repository_ctx.path(
-      Label("@org_tensorflow//tensorflow/tools/git:gen_git_source.py"))
-  generated_files_path = repository_ctx.path("gen")
+    tensorflow_root_path = str(repository_ctx.path(
+        Label("@org_tensorflow//:BUILD"),
+    ))[:-len("BUILD")]
+    python_script_path = repository_ctx.path(
+        Label("@org_tensorflow//tensorflow/tools/git:gen_git_source.py"),
+    )
+    generated_files_path = repository_ctx.path("gen")
 
-  r = repository_ctx.execute(
-      ["test", "-f", "%s/.git/logs/HEAD" % tensorflow_root_path])
-  if r.return_code == 0:
-    unused_var = repository_ctx.path(Label("//:.git/HEAD")) # pylint: disable=unused-variable
+    r = repository_ctx.execute(
+        ["test", "-f", "%s/.git/logs/HEAD" % tensorflow_root_path],
+    )
+    if r.return_code == 0:
+        unused_var = repository_ctx.path(Label("//:.git/HEAD"))  # pylint: disable=unused-variable
 
-  result = repository_ctx.execute([
-      _get_python_bin(repository_ctx),
-      python_script_path, "--configure", tensorflow_root_path,
-      "--gen_root_path", generated_files_path], quiet=False)
+    result = repository_ctx.execute([
+        _get_python_bin(repository_ctx),
+        python_script_path,
+        "--configure",
+        tensorflow_root_path,
+        "--gen_root_path",
+        generated_files_path,
+    ], quiet = False)
 
-  if not result.return_code == 0:
-    _fail(result.stderr)
-
+    if not result.return_code == 0:
+        _fail(result.stderr)
 
 git_configure = repository_rule(
     implementation = _git_conf_impl,

diff --git a/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
index 0e175b3..221ccb2 100644
--- a/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
+++ b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl

@@ -152,7 +152,11 @@
     # Removal of unused code and data at link time (can this increase binary size in some cases?).
     compiler_flag: "-ffunction-sections"
     compiler_flag: "-fdata-sections"
-    linker_flag: "-Wl,--gc-sections"
+
+    # With hipcc -fno-gpu-rdc, objects and libraries would contain a .kernel
+    # section which would be removed by ld. Therefore --gc-sections shall be
+    # abolished here.
+    #linker_flag: "-Wl,--gc-sections"
   }
   linking_mode_flags { mode: DYNAMIC }
 }

diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
index 57c7b63..c159531 100755
--- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl

@@ -30,6 +30,12 @@
 
 HIPCC_PATH = '%{hipcc_path}'
 PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
+HIPCC_ENV = '%{hipcc_env}'
+HIP_RUNTIME_PATH = '%{hip_runtime_path}'
+HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}'
+HCC_RUNTIME_PATH = '%{hcc_runtime_path}'
+HCC_RUNTIME_LIBRARY = '%{hcc_runtime_library}'
+VERBOSE = '%{crosstool_verbose}'=='1'
 
 def Log(s):
   print('gpus/crosstool: {0}'.format(s))
@@ -165,6 +171,11 @@
 
   hipccopts = ' '
   hipccopts += ' ' + hipcc_compiler_options
+  # Use -fno-gpu-rdc by default for early GPU kernel finalization
+  # This flag would trigger GPU kernels be generated at compile time, instead
+  # of link time. This allows the default host compiler (gcc) be used as the
+  # linker for TensorFlow on ROCm platform.
+  hipccopts += ' -fno-gpu-rdc '
   hipccopts += undefines
   hipccopts += defines
   hipccopts += std_options
@@ -205,37 +216,46 @@
   args, leftover = parser.parse_known_args(sys.argv[1:])
 
   if args.x and args.x[0] == 'rocm':
-    # XXX use hipcc to link
-    if args.pass_exit_codes:
-      gpu_compiler_flags = [flag for flag in sys.argv[1:]
-                                 if not flag.startswith(('-pass-exit-codes'))]
-
-      # special handling for $ORIGIN
-      # - guard every argument with ''
-      modified_gpu_compiler_flags = []
-      for flag in gpu_compiler_flags:
-        modified_gpu_compiler_flags.append("'" + flag + "'")
-
-      if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags)))
-      return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags)
-
+    # compilation for GPU objects
     if args.rocm_log: Log('-x rocm')
     leftover = [pipes.quote(s) for s in leftover]
     if args.rocm_log: Log('using hipcc')
     return InvokeHipcc(leftover, log=args.rocm_log)
 
-  # Strip our flags before passing through to the CPU compiler for files which
-  # are not -x rocm. We can't just pass 'leftover' because it also strips -x.
-  # We not only want to pass -x to the CPU compiler, but also keep it in its
-  # relative location in the argv list (the compiler is actually sensitive to
-  # this).
-  cpu_compiler_flags = [flag for flag in sys.argv[1:]
-                             if not flag.startswith(('--rocm_log'))]
+  elif args.pass_exit_codes:
+    # link
+    # with hipcc compiler invoked with -fno-gpu-rdc by default now, it's ok to 
+    # use host compiler as linker, but we have to link with HCC/HIP runtime.
+    # Such restriction would be revised further as the bazel script get
+    # improved to fine tune dependencies to ROCm libraries.
+    gpu_linker_flags = [flag for flag in sys.argv[1:]
+                               if not flag.startswith(('--rocm_log'))]
 
-  # XXX: SE codes need to be built with gcc, but need this macro defined
-  cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__")
+    gpu_linker_flags.append('-L' + HCC_RUNTIME_PATH)
+    gpu_linker_flags.append('-Wl,-rpath=' + HCC_RUNTIME_PATH)
+    gpu_linker_flags.append('-l' + HCC_RUNTIME_LIBRARY)
+    gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH)
+    gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH)
+    gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY)
 
-  return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
+    if VERBOSE: print(' '.join([CPU_COMPILER] + gpu_linker_flags))
+    return subprocess.call([CPU_COMPILER] + gpu_linker_flags)
+
+  else:
+    # compilation for host objects
+
+    # Strip our flags before passing through to the CPU compiler for files which
+    # are not -x rocm. We can't just pass 'leftover' because it also strips -x.
+    # We not only want to pass -x to the CPU compiler, but also keep it in its
+    # relative location in the argv list (the compiler is actually sensitive to
+    # this).
+    cpu_compiler_flags = [flag for flag in sys.argv[1:]
+                               if not flag.startswith(('--rocm_log'))]
+
+    # XXX: SE codes need to be built with gcc, but need this macro defined
+    cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__")
+    if VERBOSE: print(' '.join([CPU_COMPILER] + cpu_compiler_flags))
+    return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
 
 if __name__ == '__main__':
   sys.exit(main())

diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 40c5e37..f13aacb 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl

@@ -126,141 +126,142 @@
 )
 
 def _get_python_bin(repository_ctx):
-  """Gets the python bin path."""
-  python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
-  if python_bin != None:
-    return python_bin
-  python_bin_name = "python.exe" if _is_windows(repository_ctx) else "python"
-  python_bin_path = repository_ctx.which(python_bin_name)
-  if python_bin_path != None:
-    return str(python_bin_path)
-  auto_configure_fail(
-      "Cannot find python in PATH, please make sure " +
-      "python is installed and add its directory in PATH, or --define " +
-      "%s='/something/else'.\nPATH=%s" % (
-          _PYTHON_BIN_PATH,
-          repository_ctx.os.environ.get("PATH", ""),
-      ))
-
+    """Gets the python bin path."""
+    python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
+    if python_bin != None:
+        return python_bin
+    python_bin_name = "python.exe" if _is_windows(repository_ctx) else "python"
+    python_bin_path = repository_ctx.which(python_bin_name)
+    if python_bin_path != None:
+        return str(python_bin_path)
+    auto_configure_fail(
+        "Cannot find python in PATH, please make sure " +
+        "python is installed and add its directory in PATH, or --define " +
+        "%s='/something/else'.\nPATH=%s" % (
+            _PYTHON_BIN_PATH,
+            repository_ctx.os.environ.get("PATH", ""),
+        ),
+    )
 
 def _get_nvcc_tmp_dir_for_windows(repository_ctx):
-  """Return the tmp directory for nvcc to generate intermediate source files."""
-  escaped_tmp_dir = escape_string(
-      get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
-          "\\", "\\\\"),)
-  return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir"
-
+    """Return the tmp directory for nvcc to generate intermediate source files."""
+    escaped_tmp_dir = escape_string(
+        get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
+            "\\",
+            "\\\\",
+        ),
+    )
+    return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir"
 
 def _get_msvc_compiler(repository_ctx):
-  vc_path = find_vc_path(repository_ctx)
-  return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/")
-
+    vc_path = find_vc_path(repository_ctx)
+    return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/")
 
 def _get_win_cuda_defines(repository_ctx):
-  """Return CROSSTOOL defines for Windows"""
+    """Return CROSSTOOL defines for Windows"""
 
-  # If we are not on Windows, return empty vaules for Windows specific fields.
-  # This ensures the CROSSTOOL file parser is happy.
-  if not _is_windows(repository_ctx):
-    return {
-        "%{msvc_env_tmp}": "",
-        "%{msvc_env_path}": "",
-        "%{msvc_env_include}": "",
-        "%{msvc_env_lib}": "",
-        "%{msvc_cl_path}": "",
-        "%{msvc_ml_path}": "",
-        "%{msvc_link_path}": "",
-        "%{msvc_lib_path}": "",
-        "%{cxx_builtin_include_directory}": "",
-    }
+    # If we are not on Windows, return empty vaules for Windows specific fields.
+    # This ensures the CROSSTOOL file parser is happy.
+    if not _is_windows(repository_ctx):
+        return {
+            "%{msvc_env_tmp}": "",
+            "%{msvc_env_path}": "",
+            "%{msvc_env_include}": "",
+            "%{msvc_env_lib}": "",
+            "%{msvc_cl_path}": "",
+            "%{msvc_ml_path}": "",
+            "%{msvc_link_path}": "",
+            "%{msvc_lib_path}": "",
+            "%{cxx_builtin_include_directory}": "",
+        }
 
-  vc_path = find_vc_path(repository_ctx)
-  if not vc_path:
-    auto_configure_fail(
-        "Visual C++ build tools not found on your machine." +
-        "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using"
+    vc_path = find_vc_path(repository_ctx)
+    if not vc_path:
+        auto_configure_fail(
+            "Visual C++ build tools not found on your machine." +
+            "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using",
+        )
+        return {}
+
+    env = setup_vc_env_vars(repository_ctx, vc_path)
+    escaped_paths = escape_string(env["PATH"])
+    escaped_include_paths = escape_string(env["INCLUDE"])
+    escaped_lib_paths = escape_string(env["LIB"])
+    escaped_tmp_dir = escape_string(
+        get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
+            "\\",
+            "\\\\",
+        ),
     )
-    return {}
 
-  env = setup_vc_env_vars(repository_ctx, vc_path)
-  escaped_paths = escape_string(env["PATH"])
-  escaped_include_paths = escape_string(env["INCLUDE"])
-  escaped_lib_paths = escape_string(env["LIB"])
-  escaped_tmp_dir = escape_string(
-      get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
-          "\\", "\\\\"),)
+    msvc_cl_path = _get_python_bin(repository_ctx)
+    msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace(
+        "\\",
+        "/",
+    )
+    msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace(
+        "\\",
+        "/",
+    )
+    msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace(
+        "\\",
+        "/",
+    )
 
-  msvc_cl_path = _get_python_bin(repository_ctx)
-  msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace(
-      "\\", "/")
-  msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace(
-      "\\", "/")
-  msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace(
-      "\\", "/")
+    # nvcc will generate some temporary source files under %{nvcc_tmp_dir}
+    # The generated files are guranteed to have unique name, so they can share the same tmp directory
+    escaped_cxx_include_directories = [
+        "cxx_builtin_include_directory: \"%s\"" %
+        _get_nvcc_tmp_dir_for_windows(repository_ctx),
+    ]
+    for path in escaped_include_paths.split(";"):
+        if path:
+            escaped_cxx_include_directories.append(
+                "cxx_builtin_include_directory: \"%s\"" % path,
+            )
 
-  # nvcc will generate some temporary source files under %{nvcc_tmp_dir}
-  # The generated files are guranteed to have unique name, so they can share the same tmp directory
-  escaped_cxx_include_directories = [
-      "cxx_builtin_include_directory: \"%s\"" %
-      _get_nvcc_tmp_dir_for_windows(repository_ctx)
-  ]
-  for path in escaped_include_paths.split(";"):
-    if path:
-      escaped_cxx_include_directories.append(
-          "cxx_builtin_include_directory: \"%s\"" % path)
-
-  return {
-      "%{msvc_env_tmp}":
-          escaped_tmp_dir,
-      "%{msvc_env_path}":
-          escaped_paths,
-      "%{msvc_env_include}":
-          escaped_include_paths,
-      "%{msvc_env_lib}":
-          escaped_lib_paths,
-      "%{msvc_cl_path}":
-          msvc_cl_path,
-      "%{msvc_ml_path}":
-          msvc_ml_path,
-      "%{msvc_link_path}":
-          msvc_link_path,
-      "%{msvc_lib_path}":
-          msvc_lib_path,
-      "%{cxx_builtin_include_directory}":
-          "\n".join(escaped_cxx_include_directories),
-  }
+    return {
+        "%{msvc_env_tmp}": escaped_tmp_dir,
+        "%{msvc_env_path}": escaped_paths,
+        "%{msvc_env_include}": escaped_include_paths,
+        "%{msvc_env_lib}": escaped_lib_paths,
+        "%{msvc_cl_path}": msvc_cl_path,
+        "%{msvc_ml_path}": msvc_ml_path,
+        "%{msvc_link_path}": msvc_link_path,
+        "%{msvc_lib_path}": msvc_lib_path,
+        "%{cxx_builtin_include_directory}": "\n".join(escaped_cxx_include_directories),
+    }
 
 # TODO(dzc): Once these functions have been factored out of Bazel's
 # cc_configure.bzl, load them from @bazel_tools instead.
 # BEGIN cc_configure common functions.
 def find_cc(repository_ctx):
-  """Find the C++ compiler."""
-  if _is_windows(repository_ctx):
-    return _get_msvc_compiler(repository_ctx)
+    """Find the C++ compiler."""
+    if _is_windows(repository_ctx):
+        return _get_msvc_compiler(repository_ctx)
 
-  if _use_cuda_clang(repository_ctx):
-    target_cc_name = "clang"
-    cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
-    if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG):
-      return "extra_tools/bin/clang"
-  else:
-    target_cc_name = "gcc"
-    cc_path_envvar = _GCC_HOST_COMPILER_PATH
-  cc_name = target_cc_name
+    if _use_cuda_clang(repository_ctx):
+        target_cc_name = "clang"
+        cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
+        if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG):
+            return "extra_tools/bin/clang"
+    else:
+        target_cc_name = "gcc"
+        cc_path_envvar = _GCC_HOST_COMPILER_PATH
+    cc_name = target_cc_name
 
-  if cc_path_envvar in repository_ctx.os.environ:
-    cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
-    if cc_name_from_env:
-      cc_name = cc_name_from_env
-  if cc_name.startswith("/"):
-    # Absolute path, maybe we should make this supported by our which function.
-    return cc_name
-  cc = repository_ctx.which(cc_name)
-  if cc == None:
-    fail(("Cannot find {}, either correct your path or set the {}" +
-          " environment variable").format(target_cc_name, cc_path_envvar))
-  return cc
-
+    if cc_path_envvar in repository_ctx.os.environ:
+        cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+        if cc_name_from_env:
+            cc_name = cc_name_from_env
+    if cc_name.startswith("/"):
+        # Absolute path, maybe we should make this supported by our which function.
+        return cc_name
+    cc = repository_ctx.which(cc_name)
+    if cc == None:
+        fail(("Cannot find {}, either correct your path or set the {}" +
+              " environment variable").format(target_cc_name, cc_path_envvar))
+    return cc
 
 _INC_DIR_MARKER_BEGIN = "#include <...>"
 
@@ -269,480 +270,484 @@
 _OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
 
 def _cxx_inc_convert(path):
-  """Convert path returned by cc -E xc++ in a complete path."""
-  path = path.strip()
-  if path.endswith(_OSX_FRAMEWORK_SUFFIX):
-    path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
-  return path
-
+    """Convert path returned by cc -E xc++ in a complete path."""
+    path = path.strip()
+    if path.endswith(_OSX_FRAMEWORK_SUFFIX):
+        path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
+    return path
 
 def _normalize_include_path(repository_ctx, path):
-  """Normalizes include paths before writing them to the crosstool.
+    """Normalizes include paths before writing them to the crosstool.
 
-    If path points inside the 'crosstool' folder of the repository, a relative
-    path is returned.
-    If path points outside the 'crosstool' folder, an absolute path is returned.
-    """
-  path = str(repository_ctx.path(path))
-  crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
+      If path points inside the 'crosstool' folder of the repository, a relative
+      path is returned.
+      If path points outside the 'crosstool' folder, an absolute path is returned.
+      """
+    path = str(repository_ctx.path(path))
+    crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
 
-  if path.startswith(crosstool_folder):
-    # We drop the path to "$REPO/crosstool" and a trailing path separator.
-    return path[len(crosstool_folder) + 1:]
-  return path
-
+    if path.startswith(crosstool_folder):
+        # We drop the path to "$REPO/crosstool" and a trailing path separator.
+        return path[len(crosstool_folder) + 1:]
+    return path
 
 def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
-  """Compute the list of default C or C++ include directories."""
-  if lang_is_cpp:
-    lang = "c++"
-  else:
-    lang = "c"
-  result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"])
-  index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
-  if index1 == -1:
-    return []
-  index1 = result.stderr.find("\n", index1)
-  if index1 == -1:
-    return []
-  index2 = result.stderr.rfind("\n ")
-  if index2 == -1 or index2 < index1:
-    return []
-  index2 = result.stderr.find("\n", index2 + 1)
-  if index2 == -1:
-    inc_dirs = result.stderr[index1 + 1:]
-  else:
-    inc_dirs = result.stderr[index1 + 1:index2].strip()
+    """Compute the list of default C or C++ include directories."""
+    if lang_is_cpp:
+        lang = "c++"
+    else:
+        lang = "c"
+    result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"])
+    index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
+    if index1 == -1:
+        return []
+    index1 = result.stderr.find("\n", index1)
+    if index1 == -1:
+        return []
+    index2 = result.stderr.rfind("\n ")
+    if index2 == -1 or index2 < index1:
+        return []
+    index2 = result.stderr.find("\n", index2 + 1)
+    if index2 == -1:
+        inc_dirs = result.stderr[index1 + 1:]
+    else:
+        inc_dirs = result.stderr[index1 + 1:index2].strip()
 
-  return [
-      _normalize_include_path(repository_ctx, _cxx_inc_convert(p))
-      for p in inc_dirs.split("\n")
-  ]
-
+    return [
+        _normalize_include_path(repository_ctx, _cxx_inc_convert(p))
+        for p in inc_dirs.split("\n")
+    ]
 
 def get_cxx_inc_directories(repository_ctx, cc):
-  """Compute the list of default C and C++ include directories."""
+    """Compute the list of default C and C++ include directories."""
 
-  # For some reason `clang -xc` sometimes returns include paths that are
-  # different from the ones from `clang -xc++`. (Symlink and a dir)
-  # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
-  includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
-  includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+    # For some reason `clang -xc` sometimes returns include paths that are
+    # different from the ones from `clang -xc++`. (Symlink and a dir)
+    # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+    includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+    includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
 
-  includes_cpp_set = depset(includes_cpp)
-  return includes_cpp + [
-      inc for inc in includes_c if inc not in includes_cpp_set
-  ]
-
+    includes_cpp_set = depset(includes_cpp)
+    return includes_cpp + [
+        inc
+        for inc in includes_c
+        if inc not in includes_cpp_set
+    ]
 
 def auto_configure_fail(msg):
-  """Output failure message when cuda configuration fails."""
-  red = "\033[0;31m"
-  no_color = "\033[0m"
-  fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
+    """Output failure message when cuda configuration fails."""
+    red = "\033[0;31m"
+    no_color = "\033[0m"
+    fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
 
 # END cc_configure common functions (see TODO above).
 
 def _host_compiler_includes(repository_ctx, cc):
-  """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
+    """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
 
-    Args:
-      repository_ctx: The repository context.
-      cc: The path to the gcc host compiler.
+      Args:
+        repository_ctx: The repository context.
+        cc: The path to the gcc host compiler.
 
-    Returns:
-      A string containing the cxx_builtin_include_directory for each of the gcc
-      host compiler include directories, which can be added to the CROSSTOOL
-      file.
-    """
-  inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
-  inc_entries = []
-  for inc_dir in inc_dirs:
-    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
-  return "\n".join(inc_entries)
-
+      Returns:
+        A string containing the cxx_builtin_include_directory for each of the gcc
+        host compiler include directories, which can be added to the CROSSTOOL
+        file.
+      """
+    inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
+    inc_entries = []
+    for inc_dir in inc_dirs:
+        inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+    return "\n".join(inc_entries)
 
 def _cuda_include_path(repository_ctx, cuda_config):
-  """Generates the cxx_builtin_include_directory entries for cuda inc dirs.
+    """Generates the cxx_builtin_include_directory entries for cuda inc dirs.
 
-    Args:
-      repository_ctx: The repository context.
-      cc: The path to the gcc host compiler.
+      Args:
+        repository_ctx: The repository context.
+        cc: The path to the gcc host compiler.
 
-    Returns:
-      A string containing the cxx_builtin_include_directory for each of the gcc
-      host compiler include directories, which can be added to the CROSSTOOL
-      file.
-    """
-  nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
-      cuda_config.cuda_toolkit_path,
-      ".exe" if cuda_config.cpu_value == "Windows" else "",
-  ))
-  result = repository_ctx.execute([
-      nvcc_path,
-      "-v",
-      "/dev/null",
-      "-o",
-      "/dev/null",
-  ])
-  target_dir = ""
-  for one_line in result.stderr.splitlines():
-    if one_line.startswith("#$ _TARGET_DIR_="):
-      target_dir = (
-          cuda_config.cuda_toolkit_path + "/" + one_line.replace(
-              "#$ _TARGET_DIR_=", "") + "/include")
-  inc_entries = []
-  if target_dir != "":
-    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % target_dir)
-  default_include = cuda_config.cuda_toolkit_path + "/include"
-  inc_entries.append(
-      "  cxx_builtin_include_directory: \"%s\"" % default_include)
-  return "\n".join(inc_entries)
-
+      Returns:
+        A string containing the cxx_builtin_include_directory for each of the gcc
+        host compiler include directories, which can be added to the CROSSTOOL
+        file.
+      """
+    nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
+        cuda_config.cuda_toolkit_path,
+        ".exe" if cuda_config.cpu_value == "Windows" else "",
+    ))
+    result = repository_ctx.execute([
+        nvcc_path,
+        "-v",
+        "/dev/null",
+        "-o",
+        "/dev/null",
+    ])
+    target_dir = ""
+    for one_line in result.stderr.splitlines():
+        if one_line.startswith("#$ _TARGET_DIR_="):
+            target_dir = (
+                cuda_config.cuda_toolkit_path + "/" + one_line.replace(
+                    "#$ _TARGET_DIR_=",
+                    "",
+                ) + "/include"
+            )
+    inc_entries = []
+    if target_dir != "":
+        inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % target_dir)
+    default_include = cuda_config.cuda_toolkit_path + "/include"
+    inc_entries.append(
+        "  cxx_builtin_include_directory: \"%s\"" % default_include,
+    )
+    return "\n".join(inc_entries)
 
 def enable_cuda(repository_ctx):
-  if "TF_NEED_CUDA" in repository_ctx.os.environ:
-    enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
-    return enable_cuda == "1"
-  return False
-
+    if "TF_NEED_CUDA" in repository_ctx.os.environ:
+        enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
+        return enable_cuda == "1"
+    return False
 
 def cuda_toolkit_path(repository_ctx):
-  """Finds the cuda toolkit directory.
+    """Finds the cuda toolkit directory.
 
-    Args:
-      repository_ctx: The repository context.
+      Args:
+        repository_ctx: The repository context.
 
-    Returns:
-      A speculative real path of the cuda toolkit install directory.
-    """
-  cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
-  if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
-    cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
-  if not repository_ctx.path(cuda_toolkit_path).exists:
-    auto_configure_fail("Cannot find cuda toolkit path.")
-  return str(repository_ctx.path(cuda_toolkit_path).realpath)
-
+      Returns:
+        A speculative real path of the cuda toolkit install directory.
+      """
+    cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
+    if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
+        cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
+    if not repository_ctx.path(cuda_toolkit_path).exists:
+        auto_configure_fail("Cannot find cuda toolkit path.")
+    return str(repository_ctx.path(cuda_toolkit_path).realpath)
 
 def _cudnn_install_basedir(repository_ctx):
-  """Finds the cudnn install directory."""
-  cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
-  if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
-    cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
-  if not repository_ctx.path(cudnn_install_path).exists:
-    auto_configure_fail("Cannot find cudnn install path.")
-  return cudnn_install_path
-
+    """Finds the cudnn install directory."""
+    cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
+    if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
+        cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
+    if not repository_ctx.path(cudnn_install_path).exists:
+        auto_configure_fail("Cannot find cudnn install path.")
+    return cudnn_install_path
 
 def matches_version(environ_version, detected_version):
-  """Checks whether the user-specified version matches the detected version.
+    """Checks whether the user-specified version matches the detected version.
 
-    This function performs a weak matching so that if the user specifies only
-    the
-    major or major and minor versions, the versions are still considered
-    matching
-    if the version parts match. To illustrate:
+      This function performs a weak matching so that if the user specifies only
+      the
+      major or major and minor versions, the versions are still considered
+      matching
+      if the version parts match. To illustrate:
 
-        environ_version  detected_version  result
-        -----------------------------------------
-        5.1.3            5.1.3             True
-        5.1              5.1.3             True
-        5                5.1               True
-        5.1.3            5.1               False
-        5.2.3            5.1.3             False
+          environ_version  detected_version  result
+          -----------------------------------------
+          5.1.3            5.1.3             True
+          5.1              5.1.3             True
+          5                5.1               True
+          5.1.3            5.1               False
+          5.2.3            5.1.3             False
 
-    Args:
-      environ_version: The version specified by the user via environment
-        variables.
-      detected_version: The version autodetected from the CUDA installation on
-        the system.
-    Returns: True if user-specified version matches detected version and False
-      otherwise.
-  """
-  environ_version_parts = environ_version.split(".")
-  detected_version_parts = detected_version.split(".")
-  if len(detected_version_parts) < len(environ_version_parts):
-    return False
-  for i, part in enumerate(detected_version_parts):
-    if i >= len(environ_version_parts):
-      break
-    if part != environ_version_parts[i]:
-      return False
-  return True
-
+      Args:
+        environ_version: The version specified by the user via environment
+          variables.
+        detected_version: The version autodetected from the CUDA installation on
+          the system.
+      Returns: True if user-specified version matches detected version and False
+        otherwise.
+    """
+    environ_version_parts = environ_version.split(".")
+    detected_version_parts = detected_version.split(".")
+    if len(detected_version_parts) < len(environ_version_parts):
+        return False
+    for i, part in enumerate(detected_version_parts):
+        if i >= len(environ_version_parts):
+            break
+        if part != environ_version_parts[i]:
+            return False
+    return True
 
 _NVCC_VERSION_PREFIX = "Cuda compilation tools, release "
 
 def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value):
-  """Detects the version of CUDA installed on the system.
+    """Detects the version of CUDA installed on the system.
 
-    Args:
-      repository_ctx: The repository context.
-      cuda_toolkit_path: The CUDA install directory.
+      Args:
+        repository_ctx: The repository context.
+        cuda_toolkit_path: The CUDA install directory.
 
-    Returns:
-      String containing the version of CUDA.
-    """
+      Returns:
+        String containing the version of CUDA.
+      """
 
-  # Run nvcc --version and find the line containing the CUDA version.
-  nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
-      cuda_toolkit_path,
-      ".exe" if cpu_value == "Windows" else "",
-  ))
-  if not nvcc_path.exists:
-    auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path))
-  result = repository_ctx.execute([str(nvcc_path), "--version"])
-  if result.stderr:
-    auto_configure_fail("Error running nvcc --version: %s" % result.stderr)
-  lines = result.stdout.splitlines()
-  version_line = lines[len(lines) - 1]
-  if version_line.find(_NVCC_VERSION_PREFIX) == -1:
-    auto_configure_fail(
-        "Could not parse CUDA version from nvcc --version. Got: %s" %
-        result.stdout,)
+    # Run nvcc --version and find the line containing the CUDA version.
+    nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
+        cuda_toolkit_path,
+        ".exe" if cpu_value == "Windows" else "",
+    ))
+    if not nvcc_path.exists:
+        auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path))
+    result = repository_ctx.execute([str(nvcc_path), "--version"])
+    if result.stderr:
+        auto_configure_fail("Error running nvcc --version: %s" % result.stderr)
+    lines = result.stdout.splitlines()
+    version_line = lines[len(lines) - 1]
+    if version_line.find(_NVCC_VERSION_PREFIX) == -1:
+        auto_configure_fail(
+            "Could not parse CUDA version from nvcc --version. Got: %s" %
+            result.stdout,
+        )
 
-  # Parse the CUDA version from the line containing the CUDA version.
-  prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "")
-  parts = prefix_removed.split(",")
-  if len(parts) != 2 or len(parts[0]) < 2:
-    auto_configure_fail(
-        "Could not parse CUDA version from nvcc --version. Got: %s" %
-        result.stdout,)
-  full_version = parts[1].strip()
-  if full_version.startswith("V"):
-    full_version = full_version[1:]
+    # Parse the CUDA version from the line containing the CUDA version.
+    prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "")
+    parts = prefix_removed.split(",")
+    if len(parts) != 2 or len(parts[0]) < 2:
+        auto_configure_fail(
+            "Could not parse CUDA version from nvcc --version. Got: %s" %
+            result.stdout,
+        )
+    full_version = parts[1].strip()
+    if full_version.startswith("V"):
+        full_version = full_version[1:]
 
-  # Check whether TF_CUDA_VERSION was set by the user and fail if it does not
-  # match the detected version.
-  environ_version = ""
-  if _TF_CUDA_VERSION in repository_ctx.os.environ:
-    environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
-  if environ_version and not matches_version(environ_version, full_version):
-    auto_configure_fail(
-        ("CUDA version detected from nvcc (%s) does not match " +
-         "TF_CUDA_VERSION (%s)") % (full_version, environ_version),)
+    # Check whether TF_CUDA_VERSION was set by the user and fail if it does not
+    # match the detected version.
+    environ_version = ""
+    if _TF_CUDA_VERSION in repository_ctx.os.environ:
+        environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
+    if environ_version and not matches_version(environ_version, full_version):
+        auto_configure_fail(
+            ("CUDA version detected from nvcc (%s) does not match " +
+             "TF_CUDA_VERSION (%s)") % (full_version, environ_version),
+        )
 
-  # We only use the version consisting of the major and minor version numbers.
-  version_parts = full_version.split(".")
-  if len(version_parts) < 2:
-    auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.")
-  if cpu_value == "Windows":
-    version = "64_%s%s" % (version_parts[0], version_parts[1])
-  else:
-    version = "%s.%s" % (version_parts[0], version_parts[1])
-  return version
-
+    # We only use the version consisting of the major and minor version numbers.
+    version_parts = full_version.split(".")
+    if len(version_parts) < 2:
+        auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.")
+    if cpu_value == "Windows":
+        version = "64_%s%s" % (version_parts[0], version_parts[1])
+    else:
+        version = "%s.%s" % (version_parts[0], version_parts[1])
+    return version
 
 _DEFINE_CUDNN_MAJOR = "#define CUDNN_MAJOR"
 _DEFINE_CUDNN_MINOR = "#define CUDNN_MINOR"
 _DEFINE_CUDNN_PATCHLEVEL = "#define CUDNN_PATCHLEVEL"
 
 def find_cuda_define(repository_ctx, header_dir, header_file, define):
-  """Returns the value of a #define in a header file.
+    """Returns the value of a #define in a header file.
 
-    Greps through a header file and returns the value of the specified #define.
-    If the #define is not found, then raise an error.
+      Greps through a header file and returns the value of the specified #define.
+      If the #define is not found, then raise an error.
 
-    Args:
-      repository_ctx: The repository context.
-      header_dir: The directory containing the header file.
-      header_file: The header file name.
-      define: The #define to search for.
+      Args:
+        repository_ctx: The repository context.
+        header_dir: The directory containing the header file.
+        header_file: The header file name.
+        define: The #define to search for.
 
-    Returns:
-      The value of the #define found in the header.
-    """
+      Returns:
+        The value of the #define found in the header.
+      """
 
-  # Confirm location of the header and grep for the line defining the macro.
-  h_path = repository_ctx.path("%s/%s" % (header_dir, header_file))
-  if not h_path.exists:
-    auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path)))
-  result = repository_ctx.execute(
-      # Grep one more lines as some #defines are splitted into two lines.
-      ["grep", "--color=never", "-A1", "-E", define,
-       str(h_path)],)
-  if result.stderr:
-    auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr))
+    # Confirm location of the header and grep for the line defining the macro.
+    h_path = repository_ctx.path("%s/%s" % (header_dir, header_file))
+    if not h_path.exists:
+        auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path)))
+    result = repository_ctx.execute(
+        # Grep one more lines as some #defines are splitted into two lines.
+        [
+            "grep",
+            "--color=never",
+            "-A1",
+            "-E",
+            define,
+            str(h_path),
+        ],
+    )
+    if result.stderr:
+        auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr))
 
-  # Parse the version from the line defining the macro.
-  if result.stdout.find(define) == -1:
-    auto_configure_fail(
-        "Cannot find line containing '%s' in %s" % (define, h_path))
+    # Parse the version from the line defining the macro.
+    if result.stdout.find(define) == -1:
+        auto_configure_fail(
+            "Cannot find line containing '%s' in %s" % (define, h_path),
+        )
 
-  # Split results to lines
-  lines = result.stdout.split("\n")
-  num_lines = len(lines)
-  for l in range(num_lines):
-    line = lines[l]
-    if define in line:  # Find the line with define
-      version = line
-      if l != num_lines - 1 and line[-1] == "\\":  # Add next line, if multiline
-        version = version[:-1] + lines[l + 1]
-      break
+    # Split results to lines
+    lines = result.stdout.split("\n")
+    num_lines = len(lines)
+    for l in range(num_lines):
+        line = lines[l]
+        if define in line:  # Find the line with define
+            version = line
+            if l != num_lines - 1 and line[-1] == "\\":  # Add next line, if multiline
+                version = version[:-1] + lines[l + 1]
+            break
 
-  # Remove any comments
-  version = version.split("//")[0]
+    # Remove any comments
+    version = version.split("//")[0]
 
-  # Remove define name
-  version = version.replace(define, "").strip()
+    # Remove define name
+    version = version.replace(define, "").strip()
 
-  # Remove the code after the version number.
-  version_end = version.find(" ")
-  if version_end != -1:
-    if version_end == 0:
-      auto_configure_fail(
-          "Cannot extract the version from line containing '%s' in %s" %
-          (define, str(h_path)),)
-    version = version[:version_end].strip()
-  return version
-
+    # Remove the code after the version number.
+    version_end = version.find(" ")
+    if version_end != -1:
+        if version_end == 0:
+            auto_configure_fail(
+                "Cannot extract the version from line containing '%s' in %s" %
+                (define, str(h_path)),
+            )
+        version = version[:version_end].strip()
+    return version
 
 def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value):
-  """Detects the version of cuDNN installed on the system.
+    """Detects the version of cuDNN installed on the system.
 
-    Args:
-      repository_ctx: The repository context.
-      cpu_value: The name of the host operating system.
-      cudnn_install_basedir: The cuDNN install directory.
+      Args:
+        repository_ctx: The repository context.
+        cpu_value: The name of the host operating system.
+        cudnn_install_basedir: The cuDNN install directory.
 
-    Returns:
-      A string containing the version of cuDNN.
-    """
-  cudnn_header_dir = _find_cudnn_header_dir(
-      repository_ctx,
-      cudnn_install_basedir,
-  )
-  major_version = find_cuda_define(
-      repository_ctx,
-      cudnn_header_dir,
-      "cudnn.h",
-      _DEFINE_CUDNN_MAJOR,
-  )
-  minor_version = find_cuda_define(
-      repository_ctx,
-      cudnn_header_dir,
-      "cudnn.h",
-      _DEFINE_CUDNN_MINOR,
-  )
-  patch_version = find_cuda_define(
-      repository_ctx,
-      cudnn_header_dir,
-      "cudnn.h",
-      _DEFINE_CUDNN_PATCHLEVEL,
-  )
-  full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
+      Returns:
+        A string containing the version of cuDNN.
+      """
+    cudnn_header_dir = _find_cudnn_header_dir(
+        repository_ctx,
+        cudnn_install_basedir,
+    )
+    major_version = find_cuda_define(
+        repository_ctx,
+        cudnn_header_dir,
+        "cudnn.h",
+        _DEFINE_CUDNN_MAJOR,
+    )
+    minor_version = find_cuda_define(
+        repository_ctx,
+        cudnn_header_dir,
+        "cudnn.h",
+        _DEFINE_CUDNN_MINOR,
+    )
+    patch_version = find_cuda_define(
+        repository_ctx,
+        cudnn_header_dir,
+        "cudnn.h",
+        _DEFINE_CUDNN_PATCHLEVEL,
+    )
+    full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
 
-  # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not
-  # match the detected version.
-  environ_version = ""
-  if _TF_CUDNN_VERSION in repository_ctx.os.environ:
-    environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
-  if environ_version and not matches_version(environ_version, full_version):
-    cudnn_h_path = repository_ctx.path(
-        "%s/include/cudnn.h" % cudnn_install_basedir)
-    auto_configure_fail(("cuDNN version detected from %s (%s) does not match " +
-                         "TF_CUDNN_VERSION (%s)") %
-                        (str(cudnn_h_path), full_version, environ_version),)
-  # Only use the major version to match the SONAME of the library.
-  version = major_version
-  if cpu_value == "Windows":
-    version = "64_" + version
-  return version
+    # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not
+    # match the detected version.
+    environ_version = ""
+    if _TF_CUDNN_VERSION in repository_ctx.os.environ:
+        environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
+    if environ_version and not matches_version(environ_version, full_version):
+        cudnn_h_path = repository_ctx.path(
+            "%s/include/cudnn.h" % cudnn_install_basedir,
+        )
+        auto_configure_fail(("cuDNN version detected from %s (%s) does not match " +
+                             "TF_CUDNN_VERSION (%s)") %
+                            (str(cudnn_h_path), full_version, environ_version))
 
+    # Only use the major version to match the SONAME of the library.
+    version = major_version
+    if cpu_value == "Windows":
+        version = "64_" + version
+    return version
 
 def compute_capabilities(repository_ctx):
-  """Returns a list of strings representing cuda compute capabilities."""
-  if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
-    return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
-  capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES]
-  capabilities = capabilities_str.split(",")
-  for capability in capabilities:
-    # Workaround for Skylark's lack of support for regex. This check should
-    # be equivalent to checking:
-    #     if re.match("[0-9]+.[0-9]+", capability) == None:
-    parts = capability.split(".")
-    if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
-      auto_configure_fail("Invalid compute capability: %s" % capability)
-  return capabilities
-
+    """Returns a list of strings representing cuda compute capabilities."""
+    if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
+        return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
+    capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES]
+    capabilities = capabilities_str.split(",")
+    for capability in capabilities:
+        # Workaround for Skylark's lack of support for regex. This check should
+        # be equivalent to checking:
+        #     if re.match("[0-9]+.[0-9]+", capability) == None:
+        parts = capability.split(".")
+        if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
+            auto_configure_fail("Invalid compute capability: %s" % capability)
+    return capabilities
 
 def get_cpu_value(repository_ctx):
-  """Returns the name of the host operating system.
+    """Returns the name of the host operating system.
 
-    Args:
-      repository_ctx: The repository context.
+      Args:
+        repository_ctx: The repository context.
 
-    Returns:
-      A string containing the name of the host operating system.
-    """
-  os_name = repository_ctx.os.name.lower()
-  if os_name.startswith("mac os"):
-    return "Darwin"
-  if os_name.find("windows") != -1:
-    return "Windows"
-  result = repository_ctx.execute(["uname", "-s"])
-  return result.stdout.strip()
-
+      Returns:
+        A string containing the name of the host operating system.
+      """
+    os_name = repository_ctx.os.name.lower()
+    if os_name.startswith("mac os"):
+        return "Darwin"
+    if os_name.find("windows") != -1:
+        return "Windows"
+    result = repository_ctx.execute(["uname", "-s"])
+    return result.stdout.strip()
 
 def _is_windows(repository_ctx):
-  """Returns true if the host operating system is windows."""
-  return get_cpu_value(repository_ctx) == "Windows"
-
+    """Returns true if the host operating system is windows."""
+    return get_cpu_value(repository_ctx) == "Windows"
 
 def lib_name(base_name, cpu_value, version = None, static = False):
-  """Constructs the platform-specific name of a library.
+    """Constructs the platform-specific name of a library.
 
-    Args:
-      base_name: The name of the library, such as "cudart"
-      cpu_value: The name of the host operating system.
-      version: The version of the library.
-      static: True the library is static or False if it is a shared object.
+      Args:
+        base_name: The name of the library, such as "cudart"
+        cpu_value: The name of the host operating system.
+        version: The version of the library.
+        static: True the library is static or False if it is a shared object.
 
-    Returns:
-      The platform-specific name of the library.
-    """
-  version = "" if not version else "." + version
-  if cpu_value in ("Linux", "FreeBSD"):
-    if static:
-      return "lib%s.a" % base_name
-    return "lib%s.so%s" % (base_name, version)
-  elif cpu_value == "Windows":
-    return "%s.lib" % base_name
-  elif cpu_value == "Darwin":
-    if static:
-      return "lib%s.a" % base_name
-    return "lib%s%s.dylib" % (base_name, version)
-  else:
-    auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+      Returns:
+        The platform-specific name of the library.
+      """
+    version = "" if not version else "." + version
+    if cpu_value in ("Linux", "FreeBSD"):
+        if static:
+            return "lib%s.a" % base_name
+        return "lib%s.so%s" % (base_name, version)
+    elif cpu_value == "Windows":
+        return "%s.lib" % base_name
+    elif cpu_value == "Darwin":
+        if static:
+            return "lib%s.a" % base_name
+        return "lib%s%s.dylib" % (base_name, version)
+    else:
+        auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
 
 def find_lib(repository_ctx, paths, check_soname = True):
-  """
-    Finds a library among a list of potential paths.
+    """
+      Finds a library among a list of potential paths.
 
-    Args:
-      paths: List of paths to inspect.
+      Args:
+        paths: List of paths to inspect.
 
-    Returns:
-      Returns the first path in paths that exist.
-  """
-  objdump = repository_ctx.which("objdump")
-  mismatches = []
-  for path in [repository_ctx.path(path) for path in paths]:
-    if not path.exists:
-      continue
-    if check_soname and objdump != None:
-      output = repository_ctx.execute([objdump, "-p", str(path)]).stdout
-      output = [line for line in output.splitlines() if "SONAME" in line]
-      sonames = [line.strip().split(" ")[-1] for line in output]
-      if not any([soname == path.basename for soname in sonames]):
-        mismatches.append(str(path))
-        continue
-    return path
-  if mismatches:
-    auto_configure_fail(
-        "None of the libraries match their SONAME: " + ", ".join(mismatches))
-  auto_configure_fail("No library found under: " + ", ".join(paths))
-
+      Returns:
+        Returns the first path in paths that exist.
+    """
+    objdump = repository_ctx.which("objdump")
+    mismatches = []
+    for path in [repository_ctx.path(path) for path in paths]:
+        if not path.exists:
+            continue
+        if check_soname and objdump != None:
+            output = repository_ctx.execute([objdump, "-p", str(path)]).stdout
+            output = [line for line in output.splitlines() if "SONAME" in line]
+            sonames = [line.strip().split(" ")[-1] for line in output]
+            if not any([soname == path.basename for soname in sonames]):
+                mismatches.append(str(path))
+                continue
+        return path
+    if mismatches:
+        auto_configure_fail(
+            "None of the libraries match their SONAME: " + ", ".join(mismatches),
+        )
+    auto_configure_fail("No library found under: " + ", ".join(paths))
 
 def _find_cuda_lib(
         lib,
@@ -751,263 +756,257 @@
         basedir,
         version,
         static = False):
-  """Finds the given CUDA or cuDNN library on the system.
+    """Finds the given CUDA or cuDNN library on the system.
 
-    Args:
-      lib: The name of the library, such as "cudart"
-      repository_ctx: The repository context.
-      cpu_value: The name of the host operating system.
-      basedir: The install directory of CUDA or cuDNN.
-      version: The version of the library.
-      static: True if static library, False if shared object.
+      Args:
+        lib: The name of the library, such as "cudart"
+        repository_ctx: The repository context.
+        cpu_value: The name of the host operating system.
+        basedir: The install directory of CUDA or cuDNN.
+        version: The version of the library.
+        static: True if static library, False if shared object.
 
-    Returns:
-      Returns the path to the library.
-    """
-  file_name = lib_name(lib, cpu_value, version, static)
-  return find_lib(repository_ctx, [
-      "%s/%s%s" % (basedir, path, file_name) for path in CUDA_LIB_PATHS
-  ], check_soname = version and not static)
-
+      Returns:
+        Returns the path to the library.
+      """
+    file_name = lib_name(lib, cpu_value, version, static)
+    return find_lib(repository_ctx, [
+        "%s/%s%s" % (basedir, path, file_name)
+        for path in CUDA_LIB_PATHS
+    ], check_soname = version and not static)
 
 def _find_cupti_header_dir(repository_ctx, cuda_config):
-  """Returns the path to the directory containing cupti.h
+    """Returns the path to the directory containing cupti.h
 
-    On most systems, the cupti library is not installed in the same directory as
-    the other CUDA libraries but rather in a special extras/CUPTI directory.
+      On most systems, the cupti library is not installed in the same directory as
+      the other CUDA libraries but rather in a special extras/CUPTI directory.
 
-    Args:
-      repository_ctx: The repository context.
-      cuda_config: The CUDA config as returned by _get_cuda_config
+      Args:
+        repository_ctx: The repository context.
+        cuda_config: The CUDA config as returned by _get_cuda_config
 
-    Returns:
-      The path of the directory containing the cupti header.
-    """
-  cuda_toolkit_path = cuda_config.cuda_toolkit_path
-  for relative_path in CUPTI_HEADER_PATHS:
-    if repository_ctx.path(
-        "%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists:
-      return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-  auto_configure_fail("Cannot find cupti.h under %s" % ", ".join(
-      [cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS]))
-
+      Returns:
+        The path of the directory containing the cupti header.
+      """
+    cuda_toolkit_path = cuda_config.cuda_toolkit_path
+    for relative_path in CUPTI_HEADER_PATHS:
+        if repository_ctx.path(
+            "%s/%scupti.h" % (cuda_toolkit_path, relative_path),
+        ).exists:
+            return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+    auto_configure_fail("Cannot find cupti.h under %s" % ", ".join(
+        [cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS],
+    ))
 
 def _find_cupti_lib(repository_ctx, cuda_config):
-  """Finds the cupti library on the system.
+    """Finds the cupti library on the system.
 
-    On most systems, the cupti library is not installed in the same directory as
-    the other CUDA libraries but rather in a special extras/CUPTI directory.
+      On most systems, the cupti library is not installed in the same directory as
+      the other CUDA libraries but rather in a special extras/CUPTI directory.
 
-    Args:
-      repository_ctx: The repository context.
-      cuda_config: The cuda configuration as returned by _get_cuda_config.
+      Args:
+        repository_ctx: The repository context.
+        cuda_config: The cuda configuration as returned by _get_cuda_config.
 
-    Returns:
-      Returns the path to the library.
-    """
-  file_name = lib_name(
-      "cupti",
-      cuda_config.cpu_value,
-      cuda_config.cuda_version,
-  )
-  basedir = cuda_config.cuda_toolkit_path
-  return find_lib(repository_ctx, [
-      "%s/%s%s" % (basedir, path, file_name) for path in CUPTI_LIB_PATHS
-  ])
-
+      Returns:
+        Returns the path to the library.
+      """
+    file_name = lib_name(
+        "cupti",
+        cuda_config.cpu_value,
+        cuda_config.cuda_version,
+    )
+    basedir = cuda_config.cuda_toolkit_path
+    return find_lib(repository_ctx, [
+        "%s/%s%s" % (basedir, path, file_name)
+        for path in CUPTI_LIB_PATHS
+    ])
 
 def _find_libs(repository_ctx, cuda_config):
-  """Returns the CUDA and cuDNN libraries on the system.
+    """Returns the CUDA and cuDNN libraries on the system.
 
-    Args:
-      repository_ctx: The repository context.
-      cuda_config: The CUDA config as returned by _get_cuda_config
+      Args:
+        repository_ctx: The repository context.
+        cuda_config: The CUDA config as returned by _get_cuda_config
 
-    Returns:
-      Map of library names to structs of filename and path.
-    """
-  cpu_value = cuda_config.cpu_value
-  return {
-      "cuda":
-          _find_cuda_lib(
-              "cuda",
-              repository_ctx,
-              cpu_value,
-              cuda_config.cuda_toolkit_path,
-              None),
-      "cudart":
-          _find_cuda_lib(
-              "cudart",
-              repository_ctx,
-              cpu_value,
-              cuda_config.cuda_toolkit_path,
-              cuda_config.cuda_version,
-          ),
-      "cudart_static":
-          _find_cuda_lib(
-              "cudart_static",
-              repository_ctx,
-              cpu_value,
-              cuda_config.cuda_toolkit_path,
-              cuda_config.cuda_version,
-              static=True,
-          ),
-      "cublas":
-          _find_cuda_lib(
-              "cublas",
-              repository_ctx,
-              cpu_value,
-              cuda_config.cuda_toolkit_path,
-              cuda_config.cuda_version,
-          ),
-      "cusolver":
-          _find_cuda_lib(
-              "cusolver",
-              repository_ctx,
-              cpu_value,
-              cuda_config.cuda_toolkit_path,
-              cuda_config.cuda_version,
-          ),
-      "curand":
-          _find_cuda_lib(
-              "curand",
-              repository_ctx,
-              cpu_value,
-              cuda_config.cuda_toolkit_path,
-              cuda_config.cuda_version,
-          ),
-      "cufft":
-          _find_cuda_lib(
-              "cufft",
-              repository_ctx,
-              cpu_value,
-              cuda_config.cuda_toolkit_path,
-              cuda_config.cuda_version,
-          ),
-      "cudnn":
-          _find_cuda_lib(
-              "cudnn",
-              repository_ctx,
-              cpu_value,
-              cuda_config.cudnn_install_basedir,
-              cuda_config.cudnn_version,
-          ),
-      "cupti":
-          _find_cupti_lib(repository_ctx, cuda_config),
-  }
-
+      Returns:
+        Map of library names to structs of filename and path.
+      """
+    cpu_value = cuda_config.cpu_value
+    return {
+        "cuda": _find_cuda_lib(
+            "cuda",
+            repository_ctx,
+            cpu_value,
+            cuda_config.cuda_toolkit_path,
+            None,
+        ),
+        "cudart": _find_cuda_lib(
+            "cudart",
+            repository_ctx,
+            cpu_value,
+            cuda_config.cuda_toolkit_path,
+            cuda_config.cuda_version,
+        ),
+        "cudart_static": _find_cuda_lib(
+            "cudart_static",
+            repository_ctx,
+            cpu_value,
+            cuda_config.cuda_toolkit_path,
+            cuda_config.cuda_version,
+            static = True,
+        ),
+        "cublas": _find_cuda_lib(
+            "cublas",
+            repository_ctx,
+            cpu_value,
+            cuda_config.cuda_toolkit_path,
+            cuda_config.cuda_version,
+        ),
+        "cusolver": _find_cuda_lib(
+            "cusolver",
+            repository_ctx,
+            cpu_value,
+            cuda_config.cuda_toolkit_path,
+            cuda_config.cuda_version,
+        ),
+        "curand": _find_cuda_lib(
+            "curand",
+            repository_ctx,
+            cpu_value,
+            cuda_config.cuda_toolkit_path,
+            cuda_config.cuda_version,
+        ),
+        "cufft": _find_cuda_lib(
+            "cufft",
+            repository_ctx,
+            cpu_value,
+            cuda_config.cuda_toolkit_path,
+            cuda_config.cuda_version,
+        ),
+        "cudnn": _find_cuda_lib(
+            "cudnn",
+            repository_ctx,
+            cpu_value,
+            cuda_config.cudnn_install_basedir,
+            cuda_config.cudnn_version,
+        ),
+        "cupti": _find_cupti_lib(repository_ctx, cuda_config),
+    }
 
 def _find_cuda_include_path(repository_ctx, cuda_config):
-  """Returns the path to the directory containing cuda.h
+    """Returns the path to the directory containing cuda.h
 
-    Args:
-      repository_ctx: The repository context.
-      cuda_config: The CUDA config as returned by _get_cuda_config
+      Args:
+        repository_ctx: The repository context.
+        cuda_config: The CUDA config as returned by _get_cuda_config
 
-    Returns:
-      The path of the directory containing the CUDA headers.
-    """
-  cuda_toolkit_path = cuda_config.cuda_toolkit_path
-  for relative_path in CUDA_INCLUDE_PATHS:
-    if repository_ctx.path(
-        "%s/%scuda.h" % (cuda_toolkit_path, relative_path)).exists:
-      return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-  auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path)
-
+      Returns:
+        The path of the directory containing the CUDA headers.
+      """
+    cuda_toolkit_path = cuda_config.cuda_toolkit_path
+    for relative_path in CUDA_INCLUDE_PATHS:
+        if repository_ctx.path(
+            "%s/%scuda.h" % (cuda_toolkit_path, relative_path),
+        ).exists:
+            return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+    auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path)
 
 def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
-  """Returns the path to the directory containing cudnn.h
+    """Returns the path to the directory containing cudnn.h
 
-    Args:
-      repository_ctx: The repository context.
-      cudnn_install_basedir: The cudnn install directory as returned by
-        _cudnn_install_basedir.
+      Args:
+        repository_ctx: The repository context.
+        cudnn_install_basedir: The cudnn install directory as returned by
+          _cudnn_install_basedir.
 
-    Returns:
-      The path of the directory containing the cudnn header.
-    """
-  for relative_path in CUDA_INCLUDE_PATHS:
-    if repository_ctx.path(
-        "%s/%scudnn.h" % (cudnn_install_basedir, relative_path)).exists:
-      return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1]
-  if repository_ctx.path("/usr/include/cudnn.h").exists:
-    return "/usr/include"
-  auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
-
+      Returns:
+        The path of the directory containing the cudnn header.
+      """
+    for relative_path in CUDA_INCLUDE_PATHS:
+        if repository_ctx.path(
+            "%s/%scudnn.h" % (cudnn_install_basedir, relative_path),
+        ).exists:
+            return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1]
+    if repository_ctx.path("/usr/include/cudnn.h").exists:
+        return "/usr/include"
+    auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
 
 def _find_nvvm_libdevice_dir(repository_ctx, cuda_config):
-  """Returns the path to the directory containing libdevice in bitcode format.
+    """Returns the path to the directory containing libdevice in bitcode format.
 
-    Args:
-      repository_ctx: The repository context.
-      cuda_config: The CUDA config as returned by _get_cuda_config
+      Args:
+        repository_ctx: The repository context.
+        cuda_config: The CUDA config as returned by _get_cuda_config
 
-    Returns:
-      The path of the directory containing the CUDA headers.
-    """
-  cuda_toolkit_path = cuda_config.cuda_toolkit_path
-  for libdevice_file in NVVM_LIBDEVICE_FILES:
-    for relative_path in NVVM_LIBDEVICE_PATHS:
-      if repository_ctx.path("%s/%s%s" % (cuda_toolkit_path, relative_path,
-                                          libdevice_file)).exists:
-        return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-  auto_configure_fail(
-      "Cannot find libdevice*.bc files under %s" % cuda_toolkit_path)
-
+      Returns:
+        The path of the directory containing the CUDA headers.
+      """
+    cuda_toolkit_path = cuda_config.cuda_toolkit_path
+    for libdevice_file in NVVM_LIBDEVICE_FILES:
+        for relative_path in NVVM_LIBDEVICE_PATHS:
+            if repository_ctx.path("%s/%s%s" % (
+                cuda_toolkit_path,
+                relative_path,
+                libdevice_file,
+            )).exists:
+                return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+    auto_configure_fail(
+        "Cannot find libdevice*.bc files under %s" % cuda_toolkit_path,
+    )
 
 def _cudart_static_linkopt(cpu_value):
-  """Returns additional platform-specific linkopts for cudart."""
-  return "" if cpu_value == "Darwin" else "\"-lrt\","
-
+    """Returns additional platform-specific linkopts for cudart."""
+    return "" if cpu_value == "Darwin" else "\"-lrt\","
 
 def _get_cuda_config(repository_ctx):
-  """Detects and returns information about the CUDA installation on the system.
+    """Detects and returns information about the CUDA installation on the system.
 
-    Args:
-      repository_ctx: The repository context.
+      Args:
+        repository_ctx: The repository context.
 
-    Returns:
-      A struct containing the following fields:
-        cuda_toolkit_path: The CUDA toolkit installation directory.
-        cudnn_install_basedir: The cuDNN installation directory.
-        cuda_version: The version of CUDA on the system.
-        cudnn_version: The version of cuDNN on the system.
-        compute_capabilities: A list of the system's CUDA compute capabilities.
-        cpu_value: The name of the host operating system.
-    """
-  cpu_value = get_cpu_value(repository_ctx)
-  toolkit_path = cuda_toolkit_path(repository_ctx)
-  cuda_version = _cuda_version(repository_ctx, toolkit_path, cpu_value)
-  cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
-  cudnn_version = _cudnn_version(repository_ctx, cudnn_install_basedir,
-                                 cpu_value)
-  return struct(
-      cuda_toolkit_path=toolkit_path,
-      cudnn_install_basedir=cudnn_install_basedir,
-      cuda_version=cuda_version,
-      cudnn_version=cudnn_version,
-      compute_capabilities=compute_capabilities(repository_ctx),
-      cpu_value=cpu_value,
-  )
-
+      Returns:
+        A struct containing the following fields:
+          cuda_toolkit_path: The CUDA toolkit installation directory.
+          cudnn_install_basedir: The cuDNN installation directory.
+          cuda_version: The version of CUDA on the system.
+          cudnn_version: The version of cuDNN on the system.
+          compute_capabilities: A list of the system's CUDA compute capabilities.
+          cpu_value: The name of the host operating system.
+      """
+    cpu_value = get_cpu_value(repository_ctx)
+    toolkit_path = cuda_toolkit_path(repository_ctx)
+    cuda_version = _cuda_version(repository_ctx, toolkit_path, cpu_value)
+    cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
+    cudnn_version = _cudnn_version(
+        repository_ctx,
+        cudnn_install_basedir,
+        cpu_value,
+    )
+    return struct(
+        cuda_toolkit_path = toolkit_path,
+        cudnn_install_basedir = cudnn_install_basedir,
+        cuda_version = cuda_version,
+        cudnn_version = cudnn_version,
+        compute_capabilities = compute_capabilities(repository_ctx),
+        cpu_value = cpu_value,
+    )
 
 def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
-  if not out:
-    out = tpl.replace(":", "/")
-  repository_ctx.template(
-      out,
-      Label("//third_party/gpus/%s.tpl" % tpl),
-      substitutions,
-  )
-
+    if not out:
+        out = tpl.replace(":", "/")
+    repository_ctx.template(
+        out,
+        Label("//third_party/gpus/%s.tpl" % tpl),
+        substitutions,
+    )
 
 def _file(repository_ctx, label):
-  repository_ctx.template(
-      label.replace(":", "/"),
-      Label("//third_party/gpus/%s.tpl" % label),
-      {},
-  )
-
+    repository_ctx.template(
+        label.replace(":", "/"),
+        Label("//third_party/gpus/%s.tpl" % label),
+        {},
+    )
 
 _DUMMY_CROSSTOOL_BZL_FILE = """
 def error_gpu_disabled():
@@ -1035,99 +1034,83 @@
 """
 
 def _create_dummy_repository(repository_ctx):
-  cpu_value = get_cpu_value(repository_ctx)
+    cpu_value = get_cpu_value(repository_ctx)
 
-  # Set up BUILD file for cuda/.
-  _tpl(
-      repository_ctx,
-      "cuda:build_defs.bzl",
-      {
-          "%{cuda_is_configured}": "False",
-          "%{cuda_extra_copts}": "[]",
-      },
-  )
-  _tpl(
-      repository_ctx,
-      "cuda:BUILD",
-      {
-          "%{cuda_driver_lib}":
-              lib_name("cuda", cpu_value),
-          "%{cudart_static_lib}":
-              lib_name(
-                  "cudart_static",
-                  cpu_value,
-                  static=True,
-              ),
-          "%{cudart_static_linkopt}":
-              _cudart_static_linkopt(cpu_value),
-          "%{cudart_lib}":
-              lib_name("cudart", cpu_value),
-          "%{cublas_lib}":
-              lib_name("cublas", cpu_value),
-          "%{cusolver_lib}":
-              lib_name("cusolver", cpu_value),
-          "%{cudnn_lib}":
-              lib_name("cudnn", cpu_value),
-          "%{cufft_lib}":
-              lib_name("cufft", cpu_value),
-          "%{curand_lib}":
-              lib_name("curand", cpu_value),
-          "%{cupti_lib}":
-              lib_name("cupti", cpu_value),
-          "%{copy_rules}":
-              "",
-          "%{cuda_headers}":
-              "",
-      },
-  )
+    # Set up BUILD file for cuda/.
+    _tpl(
+        repository_ctx,
+        "cuda:build_defs.bzl",
+        {
+            "%{cuda_is_configured}": "False",
+            "%{cuda_extra_copts}": "[]",
+        },
+    )
+    _tpl(
+        repository_ctx,
+        "cuda:BUILD",
+        {
+            "%{cuda_driver_lib}": lib_name("cuda", cpu_value),
+            "%{cudart_static_lib}": lib_name(
+                "cudart_static",
+                cpu_value,
+                static = True,
+            ),
+            "%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value),
+            "%{cudart_lib}": lib_name("cudart", cpu_value),
+            "%{cublas_lib}": lib_name("cublas", cpu_value),
+            "%{cusolver_lib}": lib_name("cusolver", cpu_value),
+            "%{cudnn_lib}": lib_name("cudnn", cpu_value),
+            "%{cufft_lib}": lib_name("cufft", cpu_value),
+            "%{curand_lib}": lib_name("curand", cpu_value),
+            "%{cupti_lib}": lib_name("cupti", cpu_value),
+            "%{copy_rules}": "",
+            "%{cuda_headers}": "",
+        },
+    )
 
-  # Create dummy files for the CUDA toolkit since they are still required by
-  # tensorflow/core/platform/default/build_config:cuda.
-  repository_ctx.file("cuda/cuda/include/cuda.h")
-  repository_ctx.file("cuda/cuda/include/cublas.h")
-  repository_ctx.file("cuda/cuda/include/cudnn.h")
-  repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h")
-  repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cuda", cpu_value))
-  repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cudart", cpu_value))
-  repository_ctx.file(
-      "cuda/cuda/lib/%s" % lib_name("cudart_static", cpu_value))
-  repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cublas", cpu_value))
-  repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cusolver", cpu_value))
-  repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cudnn", cpu_value))
-  repository_ctx.file("cuda/cuda/lib/%s" % lib_name("curand", cpu_value))
-  repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cufft", cpu_value))
-  repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cupti", cpu_value))
+    # Create dummy files for the CUDA toolkit since they are still required by
+    # tensorflow/core/platform/default/build_config:cuda.
+    repository_ctx.file("cuda/cuda/include/cuda.h")
+    repository_ctx.file("cuda/cuda/include/cublas.h")
+    repository_ctx.file("cuda/cuda/include/cudnn.h")
+    repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h")
+    repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cuda", cpu_value))
+    repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cudart", cpu_value))
+    repository_ctx.file(
+        "cuda/cuda/lib/%s" % lib_name("cudart_static", cpu_value),
+    )
+    repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cublas", cpu_value))
+    repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cusolver", cpu_value))
+    repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cudnn", cpu_value))
+    repository_ctx.file("cuda/cuda/lib/%s" % lib_name("curand", cpu_value))
+    repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cufft", cpu_value))
+    repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cupti", cpu_value))
 
-  # Set up cuda_config.h, which is used by
-  # tensorflow/stream_executor/dso_loader.cc.
-  _tpl(
-      repository_ctx,
-      "cuda:cuda_config.h",
-      {
-          "%{cuda_version}":
-              _DEFAULT_CUDA_VERSION,
-          "%{cudnn_version}":
-              _DEFAULT_CUDNN_VERSION,
-          "%{cuda_compute_capabilities}":
-              ",".join([
-                  "CudaVersion(\"%s\")" % c
-                  for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES
-              ]),
-          "%{cuda_toolkit_path}":
-              _DEFAULT_CUDA_TOOLKIT_PATH,
-      },
-      "cuda/cuda/cuda_config.h",
-  )
+    # Set up cuda_config.h, which is used by
+    # tensorflow/stream_executor/dso_loader.cc.
+    _tpl(
+        repository_ctx,
+        "cuda:cuda_config.h",
+        {
+            "%{cuda_version}": _DEFAULT_CUDA_VERSION,
+            "%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
+            "%{cuda_compute_capabilities}": ",".join([
+                "CudaVersion(\"%s\")" % c
+                for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES
+            ]),
+            "%{cuda_toolkit_path}": _DEFAULT_CUDA_TOOLKIT_PATH,
+        },
+        "cuda/cuda/cuda_config.h",
+    )
 
-  # If cuda_configure is not configured to build with GPU support, and the user
-  # attempts to build with --config=cuda, add a dummy build rule to intercept
-  # this and fail with an actionable error message.
-  repository_ctx.file(
-      "crosstool/error_gpu_disabled.bzl",
-      _DUMMY_CROSSTOOL_BZL_FILE,
-  )
-  repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
-
+    # If cuda_configure is not configured to build with GPU support, and the user
+    # attempts to build with --config=cuda, add a dummy build rule to intercept
+    # this and fail with an actionable error message.
+    repository_ctx.file(
+        "crosstool/error_gpu_disabled.bzl",
+        _DUMMY_CROSSTOOL_BZL_FILE,
+    )
+    repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
 
 def _execute(
         repository_ctx,
@@ -1135,43 +1118,44 @@
         error_msg = None,
         error_details = None,
         empty_stdout_fine = False):
-  """Executes an arbitrary shell command.
+    """Executes an arbitrary shell command.
 
-    Args:
-      repository_ctx: the repository_ctx object
-      cmdline: list of strings, the command to execute
-      error_msg: string, a summary of the error if the command fails
-      error_details: string, details about the error or steps to fix it
-      empty_stdout_fine: bool, if True, an empty stdout result is fine,
-        otherwise it's an error
-    Return: the result of repository_ctx.execute(cmdline)
-  """
-  result = repository_ctx.execute(cmdline)
-  if result.stderr or not (empty_stdout_fine or result.stdout):
-    auto_configure_fail(
-        "\n".join([
-            error_msg.strip() if error_msg else "Repository command failed",
-            result.stderr.strip(),
-            error_details if error_details else "",
-        ]),)
-  return result
-
+      Args:
+        repository_ctx: the repository_ctx object
+        cmdline: list of strings, the command to execute
+        error_msg: string, a summary of the error if the command fails
+        error_details: string, details about the error or steps to fix it
+        empty_stdout_fine: bool, if True, an empty stdout result is fine,
+          otherwise it's an error
+      Return: the result of repository_ctx.execute(cmdline)
+    """
+    result = repository_ctx.execute(cmdline)
+    if result.stderr or not (empty_stdout_fine or result.stdout):
+        auto_configure_fail(
+            "\n".join([
+                error_msg.strip() if error_msg else "Repository command failed",
+                result.stderr.strip(),
+                error_details if error_details else "",
+            ]),
+        )
+    return result
 
 def _norm_path(path):
-  """Returns a path with '/' and remove the trailing slash."""
-  path = path.replace("\\", "/")
-  if path[-1] == "/":
-    path = path[:-1]
-  return path
+    """Returns a path with '/' and remove the trailing slash."""
+    path = path.replace("\\", "/")
+    if path[-1] == "/":
+        path = path[:-1]
+    return path
 
 def make_copy_files_rule(repository_ctx, name, srcs, outs):
-  """Returns a rule to copy a set of files."""
-  cmds = []
-  # Copy files.
-  for src, out in zip(srcs, outs):
-    cmds.append('cp -f "%s" $(location %s)' % (src, out))
-  outs = [('        "%s",' % out) for out in outs]
-  return """genrule(
+    """Returns a rule to copy a set of files."""
+    cmds = []
+
+    # Copy files.
+    for src, out in zip(srcs, outs):
+        cmds.append('cp -f "%s" $(location %s)' % (src, out))
+    outs = [('        "%s",' % out) for out in outs]
+    return """genrule(
     name = "%s",
     outs = [
 %s
@@ -1180,15 +1164,16 @@
 )""" % (name, "\n".join(outs), " && ".join(cmds))
 
 def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir):
-  """Returns a rule to recursively copy a directory."""
-  src_dir = _norm_path(src_dir)
-  out_dir = _norm_path(out_dir)
-  outs = _read_dir(repository_ctx, src_dir)
-  outs = [('        "%s",' % out.replace(src_dir, out_dir)) for out in outs]
-  # '@D' already contains the relative path for a single file, see
-  # http://docs.bazel.build/versions/master/be/make-variables.html#predefined_genrule_variables
-  out_dir = "$(@D)/%s" % out_dir if len(outs) > 1 else "$(@D)"
-  return """genrule(
+    """Returns a rule to recursively copy a directory."""
+    src_dir = _norm_path(src_dir)
+    out_dir = _norm_path(out_dir)
+    outs = _read_dir(repository_ctx, src_dir)
+    outs = [('        "%s",' % out.replace(src_dir, out_dir)) for out in outs]
+
+    # '@D' already contains the relative path for a single file, see
+    # http://docs.bazel.build/versions/master/be/make-variables.html#predefined_genrule_variables
+    out_dir = "$(@D)/%s" % out_dir if len(outs) > 1 else "$(@D)"
+    return """genrule(
     name = "%s",
     outs = [
 %s
@@ -1197,346 +1182,323 @@
 )""" % (name, "\n".join(outs), src_dir, out_dir)
 
 def _read_dir(repository_ctx, src_dir):
-  """Returns a string with all files in a directory.
+    """Returns a string with all files in a directory.
 
-    Finds all files inside a directory, traversing subfolders and following
-    symlinks. The returned string contains the full path of all files
-    separated by line breaks.
-    """
-  if _is_windows(repository_ctx):
-    src_dir = src_dir.replace("/", "\\")
-    find_result = _execute(
-        repository_ctx,
-        ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
-        empty_stdout_fine=True,
-    )
+      Finds all files inside a directory, traversing subfolders and following
+      symlinks. The returned string contains the full path of all files
+      separated by line breaks.
+      """
+    if _is_windows(repository_ctx):
+        src_dir = src_dir.replace("/", "\\")
+        find_result = _execute(
+            repository_ctx,
+            ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
+            empty_stdout_fine = True,
+        )
 
-    # src_files will be used in genrule.outs where the paths must
-    # use forward slashes.
-    result = find_result.stdout.replace("\\", "/")
-  else:
-    find_result = _execute(
-        repository_ctx,
-        ["find", src_dir, "-follow", "-type", "f"],
-        empty_stdout_fine=True,
-    )
-    result = find_result.stdout
-  return sorted(result.splitlines())
-
+        # src_files will be used in genrule.outs where the paths must
+        # use forward slashes.
+        result = find_result.stdout.replace("\\", "/")
+    else:
+        find_result = _execute(
+            repository_ctx,
+            ["find", src_dir, "-follow", "-type", "f"],
+            empty_stdout_fine = True,
+        )
+        result = find_result.stdout
+    return sorted(result.splitlines())
 
 def _flag_enabled(repository_ctx, flag_name):
-  if flag_name in repository_ctx.os.environ:
-    value = repository_ctx.os.environ[flag_name].strip()
-    return value == "1"
-  return False
-
+    if flag_name in repository_ctx.os.environ:
+        value = repository_ctx.os.environ[flag_name].strip()
+        return value == "1"
+    return False
 
 def _use_cuda_clang(repository_ctx):
-  return _flag_enabled(repository_ctx, "TF_CUDA_CLANG")
-
+    return _flag_enabled(repository_ctx, "TF_CUDA_CLANG")
 
 def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
-  if _use_cuda_clang(repository_ctx):
-    capability_flags = [
-        "--cuda-gpu-arch=sm_" + cap.replace(".", "")
-        for cap in compute_capabilities
-    ]
-  else:
-    # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
-    # TODO(csigg): Make this consistent with cuda clang and pass to crosstool.
-    capability_flags = []
-  return str(capability_flags)
-
+    if _use_cuda_clang(repository_ctx):
+        capability_flags = [
+            "--cuda-gpu-arch=sm_" + cap.replace(".", "")
+            for cap in compute_capabilities
+        ]
+    else:
+        # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
+        # TODO(csigg): Make this consistent with cuda clang and pass to crosstool.
+        capability_flags = []
+    return str(capability_flags)
 
 def _create_local_cuda_repository(repository_ctx):
-  """Creates the repository containing files set up to build with CUDA."""
-  cuda_config = _get_cuda_config(repository_ctx)
+    """Creates the repository containing files set up to build with CUDA."""
+    cuda_config = _get_cuda_config(repository_ctx)
 
-  cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config)
-  cudnn_header_dir = _find_cudnn_header_dir(
-      repository_ctx,
-      cuda_config.cudnn_install_basedir,
-  )
-  cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config)
-  nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config)
+    cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config)
+    cudnn_header_dir = _find_cudnn_header_dir(
+        repository_ctx,
+        cuda_config.cudnn_install_basedir,
+    )
+    cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config)
+    nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config)
 
-  # Create genrule to copy files from the installed CUDA toolkit into execroot.
-  copy_rules = [
-      make_copy_dir_rule(
-          repository_ctx,
-          name = "cuda-include",
-          src_dir = cuda_include_path,
-          out_dir = "cuda/include",
-      ),
-      make_copy_dir_rule(
-          repository_ctx,
-          name = "cuda-nvvm",
-          src_dir = nvvm_libdevice_dir,
-          out_dir = "cuda/nvvm/libdevice",
-      ),
-      make_copy_dir_rule(
-          repository_ctx,
-          name = "cuda-extras",
-          src_dir = cupti_header_dir,
-          out_dir = "cuda/extras/CUPTI/include",
-      ),
-  ]
+    # Create genrule to copy files from the installed CUDA toolkit into execroot.
+    copy_rules = [
+        make_copy_dir_rule(
+            repository_ctx,
+            name = "cuda-include",
+            src_dir = cuda_include_path,
+            out_dir = "cuda/include",
+        ),
+        make_copy_dir_rule(
+            repository_ctx,
+            name = "cuda-nvvm",
+            src_dir = nvvm_libdevice_dir,
+            out_dir = "cuda/nvvm/libdevice",
+        ),
+        make_copy_dir_rule(
+            repository_ctx,
+            name = "cuda-extras",
+            src_dir = cupti_header_dir,
+            out_dir = "cuda/extras/CUPTI/include",
+        ),
+    ]
 
-  cuda_libs = _find_libs(repository_ctx, cuda_config)
-  cuda_lib_srcs = []
-  cuda_lib_outs = []
-  for path in cuda_libs.values():
-    cuda_lib_srcs.append(str(path))
-    cuda_lib_outs.append("cuda/lib/" + path.basename)
-  copy_rules.append(make_copy_files_rule(
-      repository_ctx,
-      name = "cuda-lib",
-      srcs = cuda_lib_srcs,
-      outs = cuda_lib_outs,
-  ))
-
-  copy_rules.append(make_copy_dir_rule(
-      repository_ctx,
-      name = "cuda-bin",
-      src_dir = cuda_config.cuda_toolkit_path + "/bin",
-      out_dir = "cuda/bin"
-  ))
-
-  # Copy cudnn.h if cuDNN was not installed to CUDA_TOOLKIT_PATH.
-  included_files = _read_dir(repository_ctx, cuda_include_path)
-  if not any([file.endswith("cudnn.h") for file in included_files]):
+    cuda_libs = _find_libs(repository_ctx, cuda_config)
+    cuda_lib_srcs = []
+    cuda_lib_outs = []
+    for path in cuda_libs.values():
+        cuda_lib_srcs.append(str(path))
+        cuda_lib_outs.append("cuda/lib/" + path.basename)
     copy_rules.append(make_copy_files_rule(
         repository_ctx,
-        name = "cudnn-include",
-        srcs = [cudnn_header_dir + "/cudnn.h"],
-        outs = ["cuda/include/cudnn.h"],
+        name = "cuda-lib",
+        srcs = cuda_lib_srcs,
+        outs = cuda_lib_outs,
     ))
-  else:
-    copy_rules.append("filegroup(name = 'cudnn-include')\n")
 
-  # Set up BUILD file for cuda/
-  _tpl(
-      repository_ctx,
-      "cuda:build_defs.bzl",
-      {
-          "%{cuda_is_configured}":
-              "True",
-          "%{cuda_extra_copts}":
-              _compute_cuda_extra_copts(
-                  repository_ctx,
-                  cuda_config.compute_capabilities,
-              ),
-      },
-  )
-  _tpl(
-      repository_ctx,
-      "cuda:BUILD.windows" if _is_windows(repository_ctx) else "cuda:BUILD",
-      {
-          "%{cuda_driver_lib}":
-              cuda_libs["cuda"].basename,
-          "%{cudart_static_lib}":
-              cuda_libs["cudart_static"].basename,
-          "%{cudart_static_linkopt}":
-              _cudart_static_linkopt(cuda_config.cpu_value,),
-          "%{cudart_lib}":
-              cuda_libs["cudart"].basename,
-          "%{cublas_lib}":
-              cuda_libs["cublas"].basename,
-          "%{cusolver_lib}":
-              cuda_libs["cusolver"].basename,
-          "%{cudnn_lib}":
-              cuda_libs["cudnn"].basename,
-          "%{cufft_lib}":
-              cuda_libs["cufft"].basename,
-          "%{curand_lib}":
-              cuda_libs["curand"].basename,
-          "%{cupti_lib}":
-              cuda_libs["cupti"].basename,
-          "%{copy_rules}":
-              "\n".join(copy_rules),
-          "%{cuda_headers}": ('":cuda-include",\n' + '        ":cudnn-include",'
-                             ),
-      },
-      "cuda/BUILD",
-  )
+    copy_rules.append(make_copy_dir_rule(
+        repository_ctx,
+        name = "cuda-bin",
+        src_dir = cuda_config.cuda_toolkit_path + "/bin",
+        out_dir = "cuda/bin",
+    ))
 
-  is_cuda_clang = _use_cuda_clang(repository_ctx)
+    # Copy cudnn.h if cuDNN was not installed to CUDA_TOOLKIT_PATH.
+    included_files = _read_dir(repository_ctx, cuda_include_path)
+    if not any([file.endswith("cudnn.h") for file in included_files]):
+        copy_rules.append(make_copy_files_rule(
+            repository_ctx,
+            name = "cudnn-include",
+            srcs = [cudnn_header_dir + "/cudnn.h"],
+            outs = ["cuda/include/cudnn.h"],
+        ))
+    else:
+        copy_rules.append("filegroup(name = 'cudnn-include')\n")
 
-  should_download_clang = is_cuda_clang and _flag_enabled(
-      repository_ctx,
-      _TF_DOWNLOAD_CLANG,
-  )
-  if should_download_clang:
-    download_clang(repository_ctx, "crosstool/extra_tools")
+    # Set up BUILD file for cuda/
+    _tpl(
+        repository_ctx,
+        "cuda:build_defs.bzl",
+        {
+            "%{cuda_is_configured}": "True",
+            "%{cuda_extra_copts}": _compute_cuda_extra_copts(
+                repository_ctx,
+                cuda_config.compute_capabilities,
+            ),
+        },
+    )
+    _tpl(
+        repository_ctx,
+        "cuda:BUILD.windows" if _is_windows(repository_ctx) else "cuda:BUILD",
+        {
+            "%{cuda_driver_lib}": cuda_libs["cuda"].basename,
+            "%{cudart_static_lib}": cuda_libs["cudart_static"].basename,
+            "%{cudart_static_linkopt}": _cudart_static_linkopt(cuda_config.cpu_value),
+            "%{cudart_lib}": cuda_libs["cudart"].basename,
+            "%{cublas_lib}": cuda_libs["cublas"].basename,
+            "%{cusolver_lib}": cuda_libs["cusolver"].basename,
+            "%{cudnn_lib}": cuda_libs["cudnn"].basename,
+            "%{cufft_lib}": cuda_libs["cufft"].basename,
+            "%{curand_lib}": cuda_libs["curand"].basename,
+            "%{cupti_lib}": cuda_libs["cupti"].basename,
+            "%{copy_rules}": "\n".join(copy_rules),
+            "%{cuda_headers}": (
+                '":cuda-include",\n' + '        ":cudnn-include",'
+            ),
+        },
+        "cuda/BUILD",
+    )
 
-  # Set up crosstool/
-  cc = find_cc(repository_ctx)
-  cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
+    is_cuda_clang = _use_cuda_clang(repository_ctx)
 
-  host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath)
-  cuda_defines = {}
-  # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see
-  # https://github.com/bazelbuild/bazel/issues/760).
-  # However, this stops our custom clang toolchain from picking the provided
-  # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded
-  # toolchain.
-  # TODO: when bazel stops adding '-B/usr/bin' by default, remove this
-  #       flag from the CROSSTOOL completely (see
-  #       https://github.com/bazelbuild/bazel/issues/5634)
-  if should_download_clang:
-    cuda_defines["%{linker_bin_path_flag}"] = ""
-  else:
-    cuda_defines["%{linker_bin_path_flag}"] = 'flag: "-B/usr/bin"'
+    should_download_clang = is_cuda_clang and _flag_enabled(
+        repository_ctx,
+        _TF_DOWNLOAD_CLANG,
+    )
+    if should_download_clang:
+        download_clang(repository_ctx, "crosstool/extra_tools")
 
-  if is_cuda_clang:
-    cuda_defines["%{host_compiler_path}"] = str(cc)
-    cuda_defines["%{host_compiler_warnings}"] = """
+    # Set up crosstool/
+    cc = find_cc(repository_ctx)
+    cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
+
+    host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath)
+    cuda_defines = {}
+
+    # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see
+    # https://github.com/bazelbuild/bazel/issues/760).
+    # However, this stops our custom clang toolchain from picking the provided
+    # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded
+    # toolchain.
+    # TODO: when bazel stops adding '-B/usr/bin' by default, remove this
+    #       flag from the CROSSTOOL completely (see
+    #       https://github.com/bazelbuild/bazel/issues/5634)
+    if should_download_clang:
+        cuda_defines["%{linker_bin_path_flag}"] = ""
+    else:
+        cuda_defines["%{linker_bin_path_flag}"] = 'flag: "-B/usr/bin"'
+
+    if is_cuda_clang:
+        cuda_defines["%{host_compiler_path}"] = str(cc)
+        cuda_defines["%{host_compiler_warnings}"] = """
         # Some parts of the codebase set -Werror and hit this warning, so
         # switch it off for now.
         flag: "-Wno-invalid-partial-specialization"
     """
-    cuda_defines["%{host_compiler_includes}"] = host_compiler_includes
-    cuda_defines["%{extra_no_canonical_prefixes_flags}"] = ""
-    _tpl(repository_ctx, "crosstool:BUILD", {
-        "%{linker_files}": ":empty",
-        "%{win_linker_files}": ":empty"
-    })
-    repository_ctx.file(
-        "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "")
-    repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "")
-  else:
-    cuda_defines[
-        "%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
-    cuda_defines["%{host_compiler_warnings}"] = ""
+        cuda_defines["%{host_compiler_includes}"] = host_compiler_includes
+        cuda_defines["%{extra_no_canonical_prefixes_flags}"] = ""
+        _tpl(repository_ctx, "crosstool:BUILD", {
+            "%{linker_files}": ":empty",
+            "%{win_linker_files}": ":empty",
+        })
+        repository_ctx.file(
+            "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc",
+            "",
+        )
+        repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "")
+    else:
+        cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
+        cuda_defines["%{host_compiler_warnings}"] = ""
 
-    # nvcc has the system include paths built in and will automatically
-    # search them; we cannot work around that, so we add the relevant cuda
-    # system paths to the allowed compiler specific include paths.
-    cuda_defines["%{host_compiler_includes}"] = (
-        host_compiler_includes + "\n" + _cuda_include_path(
-            repository_ctx, cuda_config) +
-        "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir +
-        "\n  cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir)
+        # nvcc has the system include paths built in and will automatically
+        # search them; we cannot work around that, so we add the relevant cuda
+        # system paths to the allowed compiler specific include paths.
+        cuda_defines["%{host_compiler_includes}"] = (
+            host_compiler_includes + "\n" + _cuda_include_path(
+                repository_ctx,
+                cuda_config,
+            ) +
+            "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir +
+            "\n  cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir
+        )
 
-    # For gcc, do not canonicalize system header paths; some versions of gcc
-    # pick the shortest possible path for system includes when creating the
-    # .d file - given that includes that are prefixed with "../" multiple
-    # time quickly grow longer than the root of the tree, this can lead to
-    # bazel's header check failing.
-    cuda_defines["%{extra_no_canonical_prefixes_flags}"] = (
-        "flag: \"-fno-canonical-system-headers\"")
-    nvcc_path = str(
-        repository_ctx.path("%s/bin/nvcc%s" % (
-            cuda_config.cuda_toolkit_path,
-            ".exe" if _is_windows(repository_ctx) else "",
-        )))
+        # For gcc, do not canonicalize system header paths; some versions of gcc
+        # pick the shortest possible path for system includes when creating the
+        # .d file - given that includes that are prefixed with "../" multiple
+        # time quickly grow longer than the root of the tree, this can lead to
+        # bazel's header check failing.
+        cuda_defines["%{extra_no_canonical_prefixes_flags}"] = (
+            "flag: \"-fno-canonical-system-headers\""
+        )
+        nvcc_path = str(
+            repository_ctx.path("%s/bin/nvcc%s" % (
+                cuda_config.cuda_toolkit_path,
+                ".exe" if _is_windows(repository_ctx) else "",
+            )),
+        )
+        _tpl(
+            repository_ctx,
+            "crosstool:BUILD",
+            {
+                "%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc",
+                "%{win_linker_files}": ":windows_msvc_wrapper_files",
+            },
+        )
+        wrapper_defines = {
+            "%{cpu_compiler}": str(cc),
+            "%{cuda_version}": cuda_config.cuda_version,
+            "%{nvcc_path}": nvcc_path,
+            "%{gcc_host_compiler_path}": str(cc),
+            "%{cuda_compute_capabilities}": ", ".join(
+                ["\"%s\"" % c for c in cuda_config.compute_capabilities],
+            ),
+            "%{nvcc_tmp_dir}": _get_nvcc_tmp_dir_for_windows(repository_ctx),
+        }
+        _tpl(
+            repository_ctx,
+            "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
+            wrapper_defines,
+        )
+        _tpl(
+            repository_ctx,
+            "crosstool:windows/msvc_wrapper_for_nvcc.py",
+            wrapper_defines,
+        )
+
     _tpl(
         repository_ctx,
-        "crosstool:BUILD",
+        "crosstool:CROSSTOOL",
+        cuda_defines + _get_win_cuda_defines(repository_ctx),
+        out = "crosstool/CROSSTOOL",
+    )
+
+    # Set up cuda_config.h, which is used by
+    # tensorflow/stream_executor/dso_loader.cc.
+    _tpl(
+        repository_ctx,
+        "cuda:cuda_config.h",
         {
-            "%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc",
-            "%{win_linker_files}": ":windows_msvc_wrapper_files",
+            "%{cuda_version}": cuda_config.cuda_version,
+            "%{cudnn_version}": cuda_config.cudnn_version,
+            "%{cuda_compute_capabilities}": ",".join([
+                "CudaVersion(\"%s\")" % c
+                for c in cuda_config.compute_capabilities
+            ]),
+            "%{cuda_toolkit_path}": cuda_config.cuda_toolkit_path,
         },
+        "cuda/cuda/cuda_config.h",
     )
-    wrapper_defines = {
-        "%{cpu_compiler}":
-            str(cc),
-        "%{cuda_version}":
-            cuda_config.cuda_version,
-        "%{nvcc_path}":
-            nvcc_path,
-        "%{gcc_host_compiler_path}":
-            str(cc),
-        "%{cuda_compute_capabilities}":
-            ", ".join(
-                ["\"%s\"" % c for c in cuda_config.compute_capabilities],),
-        "%{nvcc_tmp_dir}":
-            _get_nvcc_tmp_dir_for_windows(repository_ctx),
-    }
-    _tpl(
-        repository_ctx,
-        "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
-        wrapper_defines,
-    )
-    _tpl(
-        repository_ctx,
-        "crosstool:windows/msvc_wrapper_for_nvcc.py",
-        wrapper_defines,
-    )
-
-  _tpl(
-      repository_ctx,
-      "crosstool:CROSSTOOL",
-      cuda_defines + _get_win_cuda_defines(repository_ctx),
-      out="crosstool/CROSSTOOL",
-  )
-
-  # Set up cuda_config.h, which is used by
-  # tensorflow/stream_executor/dso_loader.cc.
-  _tpl(
-      repository_ctx,
-      "cuda:cuda_config.h",
-      {
-          "%{cuda_version}":
-              cuda_config.cuda_version,
-          "%{cudnn_version}":
-              cuda_config.cudnn_version,
-          "%{cuda_compute_capabilities}":
-              ",".join([
-                  "CudaVersion(\"%s\")" % c
-                  for c in cuda_config.compute_capabilities
-              ],),
-          "%{cuda_toolkit_path}":
-              cuda_config.cuda_toolkit_path,
-      },
-      "cuda/cuda/cuda_config.h",
-  )
-
 
 def _create_remote_cuda_repository(repository_ctx, remote_config_repo):
-  """Creates pointers to a remotely configured repo set up to build with CUDA."""
-  _tpl(
-      repository_ctx,
-      "cuda:build_defs.bzl",
-      {
-          "%{cuda_is_configured}":
-              "True",
-          "%{cuda_extra_copts}":
-              _compute_cuda_extra_copts(
-                  repository_ctx,
-                  compute_capabilities(repository_ctx),
-              ),
-      },
-  )
-  repository_ctx.template(
-      "cuda/BUILD",
-      Label(remote_config_repo + "/cuda:BUILD"),
-      {},
-  )
-  repository_ctx.template(
-      "cuda/build_defs.bzl",
-      Label(remote_config_repo + "/cuda:build_defs.bzl"),
-      {},
-  )
-  repository_ctx.template(
-      "cuda/cuda/cuda_config.h",
-      Label(remote_config_repo + "/cuda:cuda/cuda_config.h"),
-      {},
-  )
-
+    """Creates pointers to a remotely configured repo set up to build with CUDA."""
+    _tpl(
+        repository_ctx,
+        "cuda:build_defs.bzl",
+        {
+            "%{cuda_is_configured}": "True",
+            "%{cuda_extra_copts}": _compute_cuda_extra_copts(
+                repository_ctx,
+                compute_capabilities(repository_ctx),
+            ),
+        },
+    )
+    repository_ctx.template(
+        "cuda/BUILD",
+        Label(remote_config_repo + "/cuda:BUILD"),
+        {},
+    )
+    repository_ctx.template(
+        "cuda/build_defs.bzl",
+        Label(remote_config_repo + "/cuda:build_defs.bzl"),
+        {},
+    )
+    repository_ctx.template(
+        "cuda/cuda/cuda_config.h",
+        Label(remote_config_repo + "/cuda:cuda/cuda_config.h"),
+        {},
+    )
 
 def _cuda_autoconf_impl(repository_ctx):
-  """Implementation of the cuda_autoconf repository rule."""
-  if not enable_cuda(repository_ctx):
-    _create_dummy_repository(repository_ctx)
-  elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ:
-    _create_remote_cuda_repository(
-        repository_ctx,
-        repository_ctx.os.environ[_TF_CUDA_CONFIG_REPO],
-    )
-  else:
-    _create_local_cuda_repository(repository_ctx)
-
+    """Implementation of the cuda_autoconf repository rule."""
+    if not enable_cuda(repository_ctx):
+        _create_dummy_repository(repository_ctx)
+    elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ:
+        _create_remote_cuda_repository(
+            repository_ctx,
+            repository_ctx.os.environ[_TF_CUDA_CONFIG_REPO],
+        )
+    else:
+        _create_local_cuda_repository(repository_ctx)
 
 cuda_configure = repository_rule(
     implementation = _cuda_autoconf_impl,

diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
index f67fb8a..63b8cd4 100644
--- a/third_party/gpus/rocm_configure.bzl
+++ b/third_party/gpus/rocm_configure.bzl

@@ -232,6 +232,42 @@
             auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target)
     return amdgpu_targets
 
+def _hipcc_env(repository_ctx):
+    """Returns the environment variable string for hipcc.
+
+    Args:
+        repository_ctx: The repository context.
+
+    Returns:
+        A string containing environment variables for hipcc.
+    """
+    hipcc_env = ""
+    for name in [
+        "HIP_CLANG_PATH",
+        "DEVICE_LIB_PATH",
+        "HIP_VDI_HOME",
+        "HIPCC_VERBOSE",
+        "HIPCC_COMPILE_FLAGS_APPEND",
+    ]:
+        if name in repository_ctx.os.environ:
+            hipcc_env = (hipcc_env + " " + name + "=\"" +
+                         repository_ctx.os.environ[name].strip() + "\";")
+    return hipcc_env.strip()
+
+def _crosstool_verbose(repository_ctx):
+    """Returns the environment variable value CROSSTOOL_VERBOSE.
+
+    Args:
+        repository_ctx: The repository context.
+
+    Returns:
+        A string containing value of environment variable CROSSTOOL_VERBOSE.
+    """
+    name = "CROSSTOOL_VERBOSE"
+    if name in repository_ctx.os.environ:
+        return repository_ctx.os.environ[name].strip()
+    return "0"
+
 def _cpu_value(repository_ctx):
     """Returns the name of the host operating system.
 
@@ -605,7 +641,6 @@
         outs = rocm_lib_outs,
     ))
 
-
     # Set up BUILD file for rocm/
     _tpl(
         repository_ctx,
@@ -656,6 +691,12 @@
         {
             "%{cpu_compiler}": str(cc),
             "%{hipcc_path}": "/opt/rocm/bin/hipcc",
+            "%{hipcc_env}": _hipcc_env(repository_ctx),
+            "%{hip_runtime_path}": "/opt/rocm/hip/lib",
+            "%{hip_runtime_library}": "hip_hcc",
+            "%{hcc_runtime_path}": "/opt/rocm/hcc/lib",
+            "%{hcc_runtime_library}": "mcwamp",
+            "%{crosstool_verbose}": _crosstool_verbose(repository_ctx),
             "%{gcc_host_compiler_path}": str(cc),
             "%{rocm_amdgpu_targets}": ",".join(
                 ["\"%s\"" % c for c in rocm_config.amdgpu_targets],

diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index 3a836fa..07e4ad7 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl

@@ -118,8 +118,8 @@
 
 def _nccl_configure_impl(repository_ctx):
     """Implementation of the nccl_configure repository rule."""
-    if not enable_cuda(repository_ctx) or \
-       get_cpu_value(repository_ctx) not in ("Linux", "FreeBSD"):
+    if (not enable_cuda(repository_ctx) or
+        get_cpu_value(repository_ctx) not in ("Linux", "FreeBSD")):
         # Add a dummy build file to make bazel query happy.
         repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
         return

diff --git a/third_party/nccl/system.BUILD.tpl b/third_party/nccl/system.BUILD.tpl
index a07f549..970dddb 100644
--- a/third_party/nccl/system.BUILD.tpl
+++ b/third_party/nccl/system.BUILD.tpl

@@ -1,26 +1,25 @@
 filegroup(
-  name = "LICENSE",
-  visibility = ["//visibility:public"],
+    name = "LICENSE",
+    visibility = ["//visibility:public"],
 )
 
 cc_library(
-  name = "nccl",
-  srcs = ["libnccl.so.%{version}"],
-  hdrs = ["nccl.h"],
-  include_prefix = "third_party/nccl",
-  deps = [
-      "@local_config_cuda//cuda:cuda_headers",
-  ],
-  visibility = ["//visibility:public"],
+    name = "nccl",
+    srcs = ["libnccl.so.%{version}"],
+    hdrs = ["nccl.h"],
+    include_prefix = "third_party/nccl",
+    visibility = ["//visibility:public"],
+    deps = [
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
 )
 
 genrule(
-  name = "nccl-files",
-  outs = [
-    "libnccl.so.%{version}",
-    "nccl.h",
-  ],
-  cmd = """cp "%{hdr_path}/nccl.h" "$(@D)/nccl.h" &&
+    name = "nccl-files",
+    outs = [
+        "libnccl.so.%{version}",
+        "nccl.h",
+    ],
+    cmd = """cp "%{hdr_path}/nccl.h" "$(@D)/nccl.h" &&
            cp "%{install_path}/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
 )
-

diff --git a/third_party/pasta/workspace.bzl b/third_party/pasta/workspace.bzl
index e46cc4a..9961835 100644
--- a/third_party/pasta/workspace.bzl
+++ b/third_party/pasta/workspace.bzl

@@ -6,11 +6,11 @@
     third_party_http_archive(
         name = "pasta",
         urls = [
-            "https://mirror.bazel.build/github.com/google/pasta/archive/c3d72cdee6fc806251949e912510444d58d7413c.tar.gz",
-            "https://github.com/google/pasta/archive/c3d72cdee6fc806251949e912510444d58d7413c.tar.gz",
+            "https://mirror.bazel.build/github.com/google/pasta/archive/v0.1.2.tar.gz",
+            "https://github.com/google/pasta/archive/v0.1.2.tar.gz",
         ],
-        strip_prefix = "pasta-c3d72cdee6fc806251949e912510444d58d7413c",
-        sha256 = "b5905f9cecc4b28363c563f3c4cb0545288bd35f7cc72c55066e97e53befc084",
+        strip_prefix = "pasta-0.1.2",
+        sha256 = "53e4c009a5eac38e942deb48bfc2d3cfca62cd457255fa86ffedb7e40f726a0c",
         build_file = "//third_party/pasta:BUILD.bazel",
         system_build_file = "//third_party/pasta:BUILD.system",
     )

diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl
index 9a7581c..d1b1e59 100644
--- a/third_party/py/python_configure.bzl
+++ b/third_party/py/python_configure.bzl

@@ -11,300 +11,337 @@
 _PYTHON_LIB_PATH = "PYTHON_LIB_PATH"
 _TF_PYTHON_CONFIG_REPO = "TF_PYTHON_CONFIG_REPO"
 
-
-def _tpl(repository_ctx, tpl, substitutions={}, out=None):
-  if not out:
-    out = tpl
-  repository_ctx.template(
-      out,
-      Label("//third_party/py:%s.tpl" % tpl),
-      substitutions)
-
+def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
+    if not out:
+        out = tpl
+    repository_ctx.template(
+        out,
+        Label("//third_party/py:%s.tpl" % tpl),
+        substitutions,
+    )
 
 def _fail(msg):
-  """Output failure message when auto configuration fails."""
-  red = "\033[0;31m"
-  no_color = "\033[0m"
-  fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg))
-
+    """Output failure message when auto configuration fails."""
+    red = "\033[0;31m"
+    no_color = "\033[0m"
+    fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg))
 
 def _is_windows(repository_ctx):
-  """Returns true if the host operating system is windows."""
-  os_name = repository_ctx.os.name.lower()
-  if os_name.find("windows") != -1:
-    return True
-  return False
+    """Returns true if the host operating system is windows."""
+    os_name = repository_ctx.os.name.lower()
+    if os_name.find("windows") != -1:
+        return True
+    return False
 
+def _execute(
+        repository_ctx,
+        cmdline,
+        error_msg = None,
+        error_details = None,
+        empty_stdout_fine = False):
+    """Executes an arbitrary shell command.
 
-def _execute(repository_ctx, cmdline, error_msg=None, error_details=None,
-             empty_stdout_fine=False):
-  """Executes an arbitrary shell command.
-
-  Args:
-    repository_ctx: the repository_ctx object
-    cmdline: list of strings, the command to execute
-    error_msg: string, a summary of the error if the command fails
-    error_details: string, details about the error or steps to fix it
-    empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
-      it's an error
-  Return:
-    the result of repository_ctx.execute(cmdline)
-  """
-  result = repository_ctx.execute(cmdline)
-  if result.stderr or not (empty_stdout_fine or result.stdout):
-    _fail("\n".join([
-        error_msg.strip() if error_msg else "Repository command failed",
-        result.stderr.strip(),
-        error_details if error_details else ""]))
-  return result
-
+    Args:
+      repository_ctx: the repository_ctx object
+      cmdline: list of strings, the command to execute
+      error_msg: string, a summary of the error if the command fails
+      error_details: string, details about the error or steps to fix it
+      empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
+        it's an error
+    Return:
+      the result of repository_ctx.execute(cmdline)
+    """
+    result = repository_ctx.execute(cmdline)
+    if result.stderr or not (empty_stdout_fine or result.stdout):
+        _fail("\n".join([
+            error_msg.strip() if error_msg else "Repository command failed",
+            result.stderr.strip(),
+            error_details if error_details else "",
+        ]))
+    return result
 
 def _read_dir(repository_ctx, src_dir):
-  """Returns a string with all files in a directory.
+    """Returns a string with all files in a directory.
 
-  Finds all files inside a directory, traversing subfolders and following
-  symlinks. The returned string contains the full path of all files
-  separated by line breaks.
-  """
-  if _is_windows(repository_ctx):
-    src_dir = src_dir.replace("/", "\\")
-    find_result = _execute(
-        repository_ctx, ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
-        empty_stdout_fine=True)
-    # src_files will be used in genrule.outs where the paths must
-    # use forward slashes.
-    result = find_result.stdout.replace("\\", "/")
-  else:
-    find_result = _execute(
-        repository_ctx, ["find", src_dir, "-follow", "-type", "f"],
-        empty_stdout_fine=True)
-    result = find_result.stdout
-  return result
+    Finds all files inside a directory, traversing subfolders and following
+    symlinks. The returned string contains the full path of all files
+    separated by line breaks.
+    """
+    if _is_windows(repository_ctx):
+        src_dir = src_dir.replace("/", "\\")
+        find_result = _execute(
+            repository_ctx,
+            ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
+            empty_stdout_fine = True,
+        )
 
+        # src_files will be used in genrule.outs where the paths must
+        # use forward slashes.
+        result = find_result.stdout.replace("\\", "/")
+    else:
+        find_result = _execute(
+            repository_ctx,
+            ["find", src_dir, "-follow", "-type", "f"],
+            empty_stdout_fine = True,
+        )
+        result = find_result.stdout
+    return result
 
 def _genrule(src_dir, genrule_name, command, outs):
-  """Returns a string with a genrule.
+    """Returns a string with a genrule.
 
-  Genrule executes the given command and produces the given outputs.
-  """
-  return (
-      'genrule(\n' +
-      '    name = "' +
-      genrule_name + '",\n' +
-      '    outs = [\n' +
-      outs +
-      '\n    ],\n' +
-      '    cmd = """\n' +
-      command +
-      '\n   """,\n' +
-      ')\n'
-  )
-
+    Genrule executes the given command and produces the given outputs.
+    """
+    return (
+        "genrule(\n" +
+        '    name = "' +
+        genrule_name + '",\n' +
+        "    outs = [\n" +
+        outs +
+        "\n    ],\n" +
+        '    cmd = """\n' +
+        command +
+        '\n   """,\n' +
+        ")\n"
+    )
 
 def _norm_path(path):
-  """Returns a path with '/' and remove the trailing slash."""
-  path = path.replace("\\", "/")
-  if path[-1] == "/":
-    path = path[:-1]
-  return path
+    """Returns a path with '/' and remove the trailing slash."""
+    path = path.replace("\\", "/")
+    if path[-1] == "/":
+        path = path[:-1]
+    return path
 
+def _symlink_genrule_for_dir(
+        repository_ctx,
+        src_dir,
+        dest_dir,
+        genrule_name,
+        src_files = [],
+        dest_files = []):
+    """Returns a genrule to symlink(or copy if on Windows) a set of files.
 
-def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name,
-    src_files = [], dest_files = []):
-  """Returns a genrule to symlink(or copy if on Windows) a set of files.
+    If src_dir is passed, files will be read from the given directory; otherwise
+    we assume files are in src_files and dest_files
+    """
+    if src_dir != None:
+        src_dir = _norm_path(src_dir)
+        dest_dir = _norm_path(dest_dir)
+        files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
 
-  If src_dir is passed, files will be read from the given directory; otherwise
-  we assume files are in src_files and dest_files
-  """
-  if src_dir != None:
-    src_dir = _norm_path(src_dir)
-    dest_dir = _norm_path(dest_dir)
-    files = '\n'.join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
-    # Create a list with the src_dir stripped to use for outputs.
-    dest_files = files.replace(src_dir, '').splitlines()
-    src_files = files.splitlines()
-  command = []
-  outs = []
-  for i in range(len(dest_files)):
-    if dest_files[i] != "":
-      # If we have only one file to link we do not want to use the dest_dir, as
-      # $(@D) will include the full path to the file.
-      dest = '$(@D)/' + dest_dir + dest_files[i] if len(dest_files) != 1 else '$(@D)/' + dest_files[i]
-      # Copy the headers to create a sandboxable setup.
-      cmd = 'cp -f'
-      command.append(cmd + ' "%s" "%s"' % (src_files[i] , dest))
-      outs.append('        "' + dest_dir + dest_files[i] + '",')
-  genrule = _genrule(src_dir, genrule_name, " && ".join(command),
-                     "\n".join(outs))
-  return genrule
+        # Create a list with the src_dir stripped to use for outputs.
+        dest_files = files.replace(src_dir, "").splitlines()
+        src_files = files.splitlines()
+    command = []
+    outs = []
+    for i in range(len(dest_files)):
+        if dest_files[i] != "":
+            # If we have only one file to link we do not want to use the dest_dir, as
+            # $(@D) will include the full path to the file.
+            dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
 
+            # Copy the headers to create a sandboxable setup.
+            cmd = "cp -f"
+            command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
+            outs.append('        "' + dest_dir + dest_files[i] + '",')
+    genrule = _genrule(
+        src_dir,
+        genrule_name,
+        " && ".join(command),
+        "\n".join(outs),
+    )
+    return genrule
 
 def _get_python_bin(repository_ctx):
-  """Gets the python bin path."""
-  python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
-  if python_bin != None:
-    return python_bin
-  python_bin_path = repository_ctx.which("python")
-  if python_bin_path != None:
-    return str(python_bin_path)
-  _fail("Cannot find python in PATH, please make sure " +
-        "python is installed and add its directory in PATH, or --define " +
-        "%s='/something/else'.\nPATH=%s" % (
-            _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", "")))
-
+    """Gets the python bin path."""
+    python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
+    if python_bin != None:
+        return python_bin
+    python_bin_path = repository_ctx.which("python")
+    if python_bin_path != None:
+        return str(python_bin_path)
+    _fail("Cannot find python in PATH, please make sure " +
+          "python is installed and add its directory in PATH, or --define " +
+          "%s='/something/else'.\nPATH=%s" % (
+              _PYTHON_BIN_PATH,
+              repository_ctx.os.environ.get("PATH", ""),
+          ))
 
 def _get_bash_bin(repository_ctx):
-  """Gets the bash bin path."""
-  bash_bin = repository_ctx.os.environ.get(_BAZEL_SH)
-  if bash_bin != None:
-    return bash_bin
-  else:
-    bash_bin_path = repository_ctx.which("bash")
-    if bash_bin_path != None:
-      return str(bash_bin_path)
+    """Gets the bash bin path."""
+    bash_bin = repository_ctx.os.environ.get(_BAZEL_SH)
+    if bash_bin != None:
+        return bash_bin
     else:
-      _fail("Cannot find bash in PATH, please make sure " +
-            "bash is installed and add its directory in PATH, or --define " +
-            "%s='/path/to/bash'.\nPATH=%s" % (
-                _BAZEL_SH, repository_ctx.os.environ.get("PATH", "")))
-
+        bash_bin_path = repository_ctx.which("bash")
+        if bash_bin_path != None:
+            return str(bash_bin_path)
+        else:
+            _fail("Cannot find bash in PATH, please make sure " +
+                  "bash is installed and add its directory in PATH, or --define " +
+                  "%s='/path/to/bash'.\nPATH=%s" % (
+                      _BAZEL_SH,
+                      repository_ctx.os.environ.get("PATH", ""),
+                  ))
 
 def _get_python_lib(repository_ctx, python_bin):
-  """Gets the python lib path."""
-  python_lib = repository_ctx.os.environ.get(_PYTHON_LIB_PATH)
-  if python_lib != None:
-    return python_lib
-  print_lib = ("<<END\n" +
-      "from __future__ import print_function\n" +
-      "import site\n" +
-      "import os\n" +
-      "\n" +
-      "try:\n" +
-      "  input = raw_input\n" +
-      "except NameError:\n" +
-      "  pass\n" +
-      "\n" +
-      "python_paths = []\n" +
-      "if os.getenv('PYTHONPATH') is not None:\n" +
-      "  python_paths = os.getenv('PYTHONPATH').split(':')\n" +
-      "try:\n" +
-      "  library_paths = site.getsitepackages()\n" +
-      "except AttributeError:\n" +
-      " from distutils.sysconfig import get_python_lib\n" +
-      " library_paths = [get_python_lib()]\n" +
-      "all_paths = set(python_paths + library_paths)\n" +
-      "paths = []\n" +
-      "for path in all_paths:\n" +
-      "  if os.path.isdir(path):\n" +
-      "    paths.append(path)\n" +
-      "if len(paths) >=1:\n" +
-      "  print(paths[0])\n" +
-      "END")
-  cmd = '%s - %s' % (python_bin, print_lib)
-  result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd])
-  return result.stdout.strip('\n')
-
+    """Gets the python lib path."""
+    python_lib = repository_ctx.os.environ.get(_PYTHON_LIB_PATH)
+    if python_lib != None:
+        return python_lib
+    print_lib = ("<<END\n" +
+                 "from __future__ import print_function\n" +
+                 "import site\n" +
+                 "import os\n" +
+                 "\n" +
+                 "try:\n" +
+                 "  input = raw_input\n" +
+                 "except NameError:\n" +
+                 "  pass\n" +
+                 "\n" +
+                 "python_paths = []\n" +
+                 "if os.getenv('PYTHONPATH') is not None:\n" +
+                 "  python_paths = os.getenv('PYTHONPATH').split(':')\n" +
+                 "try:\n" +
+                 "  library_paths = site.getsitepackages()\n" +
+                 "except AttributeError:\n" +
+                 " from distutils.sysconfig import get_python_lib\n" +
+                 " library_paths = [get_python_lib()]\n" +
+                 "all_paths = set(python_paths + library_paths)\n" +
+                 "paths = []\n" +
+                 "for path in all_paths:\n" +
+                 "  if os.path.isdir(path):\n" +
+                 "    paths.append(path)\n" +
+                 "if len(paths) >=1:\n" +
+                 "  print(paths[0])\n" +
+                 "END")
+    cmd = "%s - %s" % (python_bin, print_lib)
+    result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd])
+    return result.stdout.strip("\n")
 
 def _check_python_lib(repository_ctx, python_lib):
-  """Checks the python lib path."""
-  cmd = 'test -d "%s" -a -x "%s"' % (python_lib, python_lib)
-  result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd])
-  if result.return_code == 1:
-    _fail("Invalid python library path: %s" % python_lib)
-
+    """Checks the python lib path."""
+    cmd = 'test -d "%s" -a -x "%s"' % (python_lib, python_lib)
+    result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd])
+    if result.return_code == 1:
+        _fail("Invalid python library path: %s" % python_lib)
 
 def _check_python_bin(repository_ctx, python_bin):
-  """Checks the python bin path."""
-  cmd =  '[[ -x "%s" ]] && [[ ! -d "%s" ]]' % (python_bin, python_bin)
-  result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd])
-  if result.return_code == 1:
-    _fail("--define %s='%s' is not executable. Is it the python binary?" % (
-        _PYTHON_BIN_PATH, python_bin))
-
+    """Checks the python bin path."""
+    cmd = '[[ -x "%s" ]] && [[ ! -d "%s" ]]' % (python_bin, python_bin)
+    result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd])
+    if result.return_code == 1:
+        _fail("--define %s='%s' is not executable. Is it the python binary?" % (
+            _PYTHON_BIN_PATH,
+            python_bin,
+        ))
 
 def _get_python_include(repository_ctx, python_bin):
-  """Gets the python include path."""
-  result = _execute(
-      repository_ctx,
-      [python_bin, "-c",
-       'from __future__ import print_function;' +
-       'from distutils import sysconfig;' +
-       'print(sysconfig.get_python_inc())'],
-      error_msg="Problem getting python include path.",
-      error_details=("Is the Python binary path set up right? " +
-                     "(See ./configure or " + _PYTHON_BIN_PATH + ".) " +
-                     "Is distutils installed?"))
-  return result.stdout.splitlines()[0]
-
+    """Gets the python include path."""
+    result = _execute(
+        repository_ctx,
+        [
+            python_bin,
+            "-c",
+            "from __future__ import print_function;" +
+            "from distutils import sysconfig;" +
+            "print(sysconfig.get_python_inc())",
+        ],
+        error_msg = "Problem getting python include path.",
+        error_details = ("Is the Python binary path set up right? " +
+                         "(See ./configure or " + _PYTHON_BIN_PATH + ".) " +
+                         "Is distutils installed?"),
+    )
+    return result.stdout.splitlines()[0]
 
 def _get_python_import_lib_name(repository_ctx, python_bin):
-  """Get Python import library name (pythonXY.lib) on Windows."""
-  result = _execute(
-      repository_ctx,
-      [python_bin, "-c",
-       'import sys;' +
-       'print("python" + str(sys.version_info[0]) + ' +
-       '      str(sys.version_info[1]) + ".lib")'],
-      error_msg="Problem getting python import library.",
-      error_details=("Is the Python binary path set up right? " +
-                     "(See ./configure or " + _PYTHON_BIN_PATH + ".) "))
-  return result.stdout.splitlines()[0]
-
+    """Get Python import library name (pythonXY.lib) on Windows."""
+    result = _execute(
+        repository_ctx,
+        [
+            python_bin,
+            "-c",
+            "import sys;" +
+            'print("python" + str(sys.version_info[0]) + ' +
+            '      str(sys.version_info[1]) + ".lib")',
+        ],
+        error_msg = "Problem getting python import library.",
+        error_details = ("Is the Python binary path set up right? " +
+                         "(See ./configure or " + _PYTHON_BIN_PATH + ".) "),
+    )
+    return result.stdout.splitlines()[0]
 
 def _get_numpy_include(repository_ctx, python_bin):
-  """Gets the numpy include path."""
-  return _execute(repository_ctx,
-                  [python_bin, "-c",
-                   'from __future__ import print_function;' +
-                   'import numpy;' +
-                   ' print(numpy.get_include());'],
-                  error_msg="Problem getting numpy include path.",
-                  error_details="Is numpy installed?").stdout.splitlines()[0]
-
+    """Gets the numpy include path."""
+    return _execute(
+        repository_ctx,
+        [
+            python_bin,
+            "-c",
+            "from __future__ import print_function;" +
+            "import numpy;" +
+            " print(numpy.get_include());",
+        ],
+        error_msg = "Problem getting numpy include path.",
+        error_details = "Is numpy installed?",
+    ).stdout.splitlines()[0]
 
 def _create_local_python_repository(repository_ctx):
-  """Creates the repository containing files set up to build with Python."""
-  python_bin = _get_python_bin(repository_ctx)
-  _check_python_bin(repository_ctx, python_bin)
-  python_lib = _get_python_lib(repository_ctx, python_bin)
-  _check_python_lib(repository_ctx, python_lib)
-  python_include = _get_python_include(repository_ctx, python_bin)
-  numpy_include = _get_numpy_include(repository_ctx, python_bin) + '/numpy'
-  python_include_rule = _symlink_genrule_for_dir(
-      repository_ctx, python_include, 'python_include', 'python_include')
-  python_import_lib_genrule = ""
-  # To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib
-  # See https://docs.python.org/3/extending/windows.html
-  if _is_windows(repository_ctx):
-    python_include = _norm_path(python_include)
-    python_import_lib_name = _get_python_import_lib_name(repository_ctx, python_bin)
-    python_import_lib_src = python_include.rsplit('/', 1)[0] + "/libs/" + python_import_lib_name
-    python_import_lib_genrule = _symlink_genrule_for_dir(
-      repository_ctx, None, '', 'python_import_lib',
-      [python_import_lib_src], [python_import_lib_name])
-  numpy_include_rule = _symlink_genrule_for_dir(
-      repository_ctx, numpy_include, 'numpy_include/numpy', 'numpy_include')
-  _tpl(repository_ctx, "BUILD", {
-      "%{PYTHON_INCLUDE_GENRULE}": python_include_rule,
-      "%{PYTHON_IMPORT_LIB_GENRULE}": python_import_lib_genrule,
-      "%{NUMPY_INCLUDE_GENRULE}": numpy_include_rule,
-  })
+    """Creates the repository containing files set up to build with Python."""
+    python_bin = _get_python_bin(repository_ctx)
+    _check_python_bin(repository_ctx, python_bin)
+    python_lib = _get_python_lib(repository_ctx, python_bin)
+    _check_python_lib(repository_ctx, python_lib)
+    python_include = _get_python_include(repository_ctx, python_bin)
+    numpy_include = _get_numpy_include(repository_ctx, python_bin) + "/numpy"
+    python_include_rule = _symlink_genrule_for_dir(
+        repository_ctx,
+        python_include,
+        "python_include",
+        "python_include",
+    )
+    python_import_lib_genrule = ""
 
+    # To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib
+    # See https://docs.python.org/3/extending/windows.html
+    if _is_windows(repository_ctx):
+        python_include = _norm_path(python_include)
+        python_import_lib_name = _get_python_import_lib_name(repository_ctx, python_bin)
+        python_import_lib_src = python_include.rsplit("/", 1)[0] + "/libs/" + python_import_lib_name
+        python_import_lib_genrule = _symlink_genrule_for_dir(
+            repository_ctx,
+            None,
+            "",
+            "python_import_lib",
+            [python_import_lib_src],
+            [python_import_lib_name],
+        )
+    numpy_include_rule = _symlink_genrule_for_dir(
+        repository_ctx,
+        numpy_include,
+        "numpy_include/numpy",
+        "numpy_include",
+    )
+    _tpl(repository_ctx, "BUILD", {
+        "%{PYTHON_INCLUDE_GENRULE}": python_include_rule,
+        "%{PYTHON_IMPORT_LIB_GENRULE}": python_import_lib_genrule,
+        "%{NUMPY_INCLUDE_GENRULE}": numpy_include_rule,
+    })
 
 def _create_remote_python_repository(repository_ctx, remote_config_repo):
-  """Creates pointers to a remotely configured repo set up to build with Python.
-  """
-  repository_ctx.template("BUILD", Label(remote_config_repo + ":BUILD"), {})
-
+    """Creates pointers to a remotely configured repo set up to build with Python.
+    """
+    repository_ctx.template("BUILD", Label(remote_config_repo + ":BUILD"), {})
 
 def _python_autoconf_impl(repository_ctx):
-  """Implementation of the python_autoconf repository rule."""
-  if _TF_PYTHON_CONFIG_REPO in repository_ctx.os.environ:
-      _create_remote_python_repository(repository_ctx,
-          repository_ctx.os.environ[_TF_PYTHON_CONFIG_REPO])
-  else:
-    _create_local_python_repository(repository_ctx)
-
+    """Implementation of the python_autoconf repository rule."""
+    if _TF_PYTHON_CONFIG_REPO in repository_ctx.os.environ:
+        _create_remote_python_repository(
+            repository_ctx,
+            repository_ctx.os.environ[_TF_PYTHON_CONFIG_REPO],
+        )
+    else:
+        _create_local_python_repository(repository_ctx)
 
 python_configure = repository_rule(
     implementation = _python_autoconf_impl,

diff --git a/third_party/sycl/sycl/build_defs.bzl.tpl b/third_party/sycl/sycl/build_defs.bzl.tpl
index 33386f8..a726c8d 100755
--- a/third_party/sycl/sycl/build_defs.bzl.tpl
+++ b/third_party/sycl/sycl/build_defs.bzl.tpl

@@ -11,7 +11,7 @@
     return select({
         "@local_config_sycl//sycl:using_sycl_ccpp": if_true,
         "@local_config_sycl//sycl:using_sycl_trisycl": if_true[0:1],
-        "//conditions:default": if_false
+        "//conditions:default": if_false,
     })
 
 def if_ccpp(if_true, if_false = []):
@@ -24,5 +24,5 @@
     return select({
         "@local_config_sycl//sycl:using_sycl_ccpp": if_true,
         "@local_config_sycl//sycl:using_sycl_trisycl": if_false,
-        "//conditions:default": if_false
+        "//conditions:default": if_false,
     })

diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl
index 5b9d0eb..deba6c4 100644
--- a/third_party/sycl/sycl_configure.bzl
+++ b/third_party/sycl/sycl_configure.bzl

@@ -11,122 +11,124 @@
 """
 
 _HOST_CXX_COMPILER = "HOST_CXX_COMPILER"
-_HOST_C_COMPILER= "HOST_C_COMPILER"
+_HOST_C_COMPILER = "HOST_C_COMPILER"
 _COMPUTECPP_TOOLKIT_PATH = "COMPUTECPP_TOOLKIT_PATH"
 _TRISYCL_INCLUDE_DIR = "TRISYCL_INCLUDE_DIR"
 _PYTHON_LIB_PATH = "PYTHON_LIB_PATH"
 
 def _enable_sycl(repository_ctx):
-  if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ:
-    enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip()
-    return enable_sycl == "1"
-  return False
+    if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ:
+        enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip()
+        return enable_sycl == "1"
+    return False
 
 def _enable_compute_cpp(repository_ctx):
-  return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ
+    return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ
 
 def auto_configure_fail(msg):
-  """Output failure message when auto configuration fails."""
-  red = "\033[0;31m"
-  no_color = "\033[0m"
-  fail("\n%sAuto-Configuration Error:%s %s\n" % (red, no_color, msg))
+    """Output failure message when auto configuration fails."""
+    red = "\033[0;31m"
+    no_color = "\033[0m"
+    fail("\n%sAuto-Configuration Error:%s %s\n" % (red, no_color, msg))
+
 # END cc_configure common functions (see TODO above).
 
 def find_c(repository_ctx):
-  """Find host C compiler."""
-  c_name = "gcc"
-  if _HOST_C_COMPILER in repository_ctx.os.environ:
-    c_name = repository_ctx.os.environ[_HOST_C_COMPILER].strip()
-  if c_name.startswith("/"):
-    return c_name
-  c = repository_ctx.which(c_name)
-  if c == None:
-    fail("Cannot find C compiler, please correct your path.")
-  return c
+    """Find host C compiler."""
+    c_name = "gcc"
+    if _HOST_C_COMPILER in repository_ctx.os.environ:
+        c_name = repository_ctx.os.environ[_HOST_C_COMPILER].strip()
+    if c_name.startswith("/"):
+        return c_name
+    c = repository_ctx.which(c_name)
+    if c == None:
+        fail("Cannot find C compiler, please correct your path.")
+    return c
 
 def find_cc(repository_ctx):
-  """Find host C++ compiler."""
-  cc_name = "g++"
-  if _HOST_CXX_COMPILER in repository_ctx.os.environ:
-    cc_name = repository_ctx.os.environ[_HOST_CXX_COMPILER].strip()
-  if cc_name.startswith("/"):
-    return cc_name
-  cc = repository_ctx.which(cc_name)
-  if cc == None:
-    fail("Cannot find C++ compiler, please correct your path.")
-  return cc
+    """Find host C++ compiler."""
+    cc_name = "g++"
+    if _HOST_CXX_COMPILER in repository_ctx.os.environ:
+        cc_name = repository_ctx.os.environ[_HOST_CXX_COMPILER].strip()
+    if cc_name.startswith("/"):
+        return cc_name
+    cc = repository_ctx.which(cc_name)
+    if cc == None:
+        fail("Cannot find C++ compiler, please correct your path.")
+    return cc
 
 def find_computecpp_root(repository_ctx):
-  """Find ComputeCpp compiler."""
-  sycl_name = ""
-  if _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ:
-    sycl_name = repository_ctx.os.environ[_COMPUTECPP_TOOLKIT_PATH].strip()
-  if sycl_name.startswith("/"):
-    return sycl_name
-  fail("Cannot find SYCL compiler, please correct your path")
+    """Find ComputeCpp compiler."""
+    sycl_name = ""
+    if _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ:
+        sycl_name = repository_ctx.os.environ[_COMPUTECPP_TOOLKIT_PATH].strip()
+    if sycl_name.startswith("/"):
+        return sycl_name
+    fail("Cannot find SYCL compiler, please correct your path")
 
 def find_trisycl_include_dir(repository_ctx):
-  """Find triSYCL include directory. """
-  if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ:
-    sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip()
-    if sycl_name.startswith("/"):
-      return sycl_name
-  fail( "Cannot find triSYCL include directory, please correct your path")
+    """Find triSYCL include directory. """
+    if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ:
+        sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip()
+        if sycl_name.startswith("/"):
+            return sycl_name
+    fail("Cannot find triSYCL include directory, please correct your path")
 
 def find_python_lib(repository_ctx):
-  """Returns python path."""
-  if _PYTHON_LIB_PATH in repository_ctx.os.environ:
-    return repository_ctx.os.environ[_PYTHON_LIB_PATH].strip()
-  fail("Environment variable PYTHON_LIB_PATH was not specified re-run ./configure")
-
+    """Returns python path."""
+    if _PYTHON_LIB_PATH in repository_ctx.os.environ:
+        return repository_ctx.os.environ[_PYTHON_LIB_PATH].strip()
+    fail("Environment variable PYTHON_LIB_PATH was not specified re-run ./configure")
 
 def _check_lib(repository_ctx, toolkit_path, lib):
-  """Checks if lib exists under sycl_toolkit_path or fail if it doesn't.
+    """Checks if lib exists under sycl_toolkit_path or fail if it doesn't.
 
-  Args:
-    repository_ctx: The repository context.
-    toolkit_path: The toolkit directory containing the libraries.
-    ib: The library to look for under toolkit_path.
-  """
-  lib_path = toolkit_path + "/" + lib
-  if not repository_ctx.path(lib_path).exists:
-    auto_configure_fail("Cannot find %s" % lib_path)
+    Args:
+      repository_ctx: The repository context.
+      toolkit_path: The toolkit directory containing the libraries.
+      ib: The library to look for under toolkit_path.
+    """
+    lib_path = toolkit_path + "/" + lib
+    if not repository_ctx.path(lib_path).exists:
+        auto_configure_fail("Cannot find %s" % lib_path)
 
 def _check_dir(repository_ctx, directory):
-  """Checks whether the directory exists and fail if it does not.
+    """Checks whether the directory exists and fail if it does not.
 
-  Args:
-    repository_ctx: The repository context.
-    directory: The directory to check the existence of.
-  """
-  if not repository_ctx.path(directory).exists:
-    auto_configure_fail("Cannot find dir: %s" % directory)
+    Args:
+      repository_ctx: The repository context.
+      directory: The directory to check the existence of.
+    """
+    if not repository_ctx.path(directory).exists:
+        auto_configure_fail("Cannot find dir: %s" % directory)
 
 def _symlink_dir(repository_ctx, src_dir, dest_dir):
-  """Symlinks all the files in a directory.
+    """Symlinks all the files in a directory.
 
-  Args:
-    repository_ctx: The repository context.
-    src_dir: The source directory.
-    dest_dir: The destination directory to create the symlinks in.
-  """
-  files = repository_ctx.path(src_dir).readdir()
-  for src_file in files:
-    repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename)
+    Args:
+      repository_ctx: The repository context.
+      src_dir: The source directory.
+      dest_dir: The destination directory to create the symlinks in.
+    """
+    files = repository_ctx.path(src_dir).readdir()
+    for src_file in files:
+        repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename)
 
-def _tpl(repository_ctx, tpl, substitutions={}, out=None):
-  if not out:
-    out = tpl.replace(":", "/")
-  repository_ctx.template(
-      out,
-      Label("//third_party/sycl/%s.tpl" % tpl),
-      substitutions)
+def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
+    if not out:
+        out = tpl.replace(":", "/")
+    repository_ctx.template(
+        out,
+        Label("//third_party/sycl/%s.tpl" % tpl),
+        substitutions,
+    )
 
 def _file(repository_ctx, label):
-  repository_ctx.template(
-      label.replace(":", "/"),
-      Label("//third_party/sycl/%s" % label),
-      {})
+    repository_ctx.template(
+        label.replace(":", "/"),
+        Label("//third_party/sycl/%s" % label),
+        {},
+    )
 
 _DUMMY_CROSSTOOL_BZL_FILE = """
 def error_sycl_disabled():
@@ -147,7 +149,6 @@
   )
 """
 
-
 _DUMMY_CROSSTOOL_BUILD_FILE = """
 load("//crosstool:error_sycl_disabled.bzl", "error_sycl_disabled")
 
@@ -155,87 +156,97 @@
 """
 
 def _create_dummy_repository(repository_ctx):
-  # Set up BUILD file for sycl/.
-  _tpl(repository_ctx, "sycl:build_defs.bzl")
-  _tpl(repository_ctx, "sycl:BUILD")
-  _file(repository_ctx, "sycl:LICENSE.text")
-  _tpl(repository_ctx, "sycl:platform.bzl")
-
-  # Create dummy files for the SYCL toolkit since they are still required by
-  # tensorflow/sycl/platform/default/build_config:sycl.
-  repository_ctx.file("sycl/include/sycl.hpp", "")
-  repository_ctx.file("sycl/lib/libComputeCpp.so", "")
-
-  # If sycl_configure is not configured to build with SYCL support, and the user
-  # attempts to build with --config=sycl, add a dummy build rule to intercept
-  # this and fail with an actionable error message.
-  repository_ctx.file("crosstool/error_sycl_disabled.bzl",
-                      _DUMMY_CROSSTOOL_BZL_FILE)
-  repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
-
-
-def _sycl_autoconf_imp(repository_ctx):
-  """Implementation of the sycl_autoconf rule."""
-  if not _enable_sycl(repository_ctx):
-    _create_dummy_repository(repository_ctx)
-  else:
-    # copy template files
+    # Set up BUILD file for sycl/.
     _tpl(repository_ctx, "sycl:build_defs.bzl")
     _tpl(repository_ctx, "sycl:BUILD")
-    _tpl(repository_ctx, "sycl:platform.bzl")
-    _tpl(repository_ctx, "crosstool:BUILD")
     _file(repository_ctx, "sycl:LICENSE.text")
+    _tpl(repository_ctx, "sycl:platform.bzl")
 
-    if _enable_compute_cpp(repository_ctx):
-      _tpl(repository_ctx, "crosstool:computecpp",
-      {
-        "%{host_cxx_compiler}" : find_cc(repository_ctx),
-        "%{host_c_compiler}" : find_c(repository_ctx)
-      })
+    # Create dummy files for the SYCL toolkit since they are still required by
+    # tensorflow/sycl/platform/default/build_config:sycl.
+    repository_ctx.file("sycl/include/sycl.hpp", "")
+    repository_ctx.file("sycl/lib/libComputeCpp.so", "")
 
-      computecpp_root = find_computecpp_root(repository_ctx);
-      _check_dir(repository_ctx, computecpp_root)
+    # If sycl_configure is not configured to build with SYCL support, and the user
+    # attempts to build with --config=sycl, add a dummy build rule to intercept
+    # this and fail with an actionable error message.
+    repository_ctx.file(
+        "crosstool/error_sycl_disabled.bzl",
+        _DUMMY_CROSSTOOL_BZL_FILE,
+    )
+    repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
 
-      _tpl(repository_ctx, "crosstool:CROSSTOOL",
-      {
-        "%{sycl_include_dir}" : computecpp_root,
-        "%{sycl_impl}" : "computecpp",
-        "%{c++_std}" : "-std=c++11",
-        "%{python_lib_path}" : find_python_lib(repository_ctx),
-      })
-
-      # symlink libraries
-      _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" )
-      _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib")
-      _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include")
-      _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin")
+def _sycl_autoconf_imp(repository_ctx):
+    """Implementation of the sycl_autoconf rule."""
+    if not _enable_sycl(repository_ctx):
+        _create_dummy_repository(repository_ctx)
     else:
+        # copy template files
+        _tpl(repository_ctx, "sycl:build_defs.bzl")
+        _tpl(repository_ctx, "sycl:BUILD")
+        _tpl(repository_ctx, "sycl:platform.bzl")
+        _tpl(repository_ctx, "crosstool:BUILD")
+        _file(repository_ctx, "sycl:LICENSE.text")
 
-      trisycl_include_dir = find_trisycl_include_dir(repository_ctx);
-      _check_dir(repository_ctx, trisycl_include_dir)
+        if _enable_compute_cpp(repository_ctx):
+            _tpl(
+                repository_ctx,
+                "crosstool:computecpp",
+                {
+                    "%{host_cxx_compiler}": find_cc(repository_ctx),
+                    "%{host_c_compiler}": find_c(repository_ctx),
+                },
+            )
 
-      _tpl(repository_ctx, "crosstool:trisycl",
-      {
-        "%{host_cxx_compiler}" : find_cc(repository_ctx),
-        "%{host_c_compiler}" : find_c(repository_ctx),
-        "%{trisycl_include_dir}" : trisycl_include_dir
-      })
+            computecpp_root = find_computecpp_root(repository_ctx)
+            _check_dir(repository_ctx, computecpp_root)
 
+            _tpl(
+                repository_ctx,
+                "crosstool:CROSSTOOL",
+                {
+                    "%{sycl_include_dir}": computecpp_root,
+                    "%{sycl_impl}": "computecpp",
+                    "%{c++_std}": "-std=c++11",
+                    "%{python_lib_path}": find_python_lib(repository_ctx),
+                },
+            )
 
-      _tpl(repository_ctx, "crosstool:CROSSTOOL",
-      {
-        "%{sycl_include_dir}" : trisycl_include_dir,
-        "%{sycl_impl}" : "trisycl",
-        "%{c++_std}" : "-std=c++1y",
-        "%{python_lib_path}" : find_python_lib(repository_ctx),
-      })
+            # symlink libraries
+            _check_lib(repository_ctx, computecpp_root + "/lib", "libComputeCpp.so")
+            _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib")
+            _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include")
+            _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin")
+        else:
+            trisycl_include_dir = find_trisycl_include_dir(repository_ctx)
+            _check_dir(repository_ctx, trisycl_include_dir)
 
-      _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include")
+            _tpl(
+                repository_ctx,
+                "crosstool:trisycl",
+                {
+                    "%{host_cxx_compiler}": find_cc(repository_ctx),
+                    "%{host_c_compiler}": find_c(repository_ctx),
+                    "%{trisycl_include_dir}": trisycl_include_dir,
+                },
+            )
 
+            _tpl(
+                repository_ctx,
+                "crosstool:CROSSTOOL",
+                {
+                    "%{sycl_include_dir}": trisycl_include_dir,
+                    "%{sycl_impl}": "trisycl",
+                    "%{c++_std}": "-std=c++1y",
+                    "%{python_lib_path}": find_python_lib(repository_ctx),
+                },
+            )
+
+            _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include")
 
 sycl_configure = repository_rule(
-  implementation = _sycl_autoconf_imp,
-  local = True,
+    implementation = _sycl_autoconf_imp,
+    local = True,
 )
 """Detects and configures the SYCL toolchain.
 

diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl
index c6de25b..1d019a1 100644
--- a/third_party/tensorrt/tensorrt_configure.bzl
+++ b/third_party/tensorrt/tensorrt_configure.bzl

@@ -10,13 +10,13 @@
 load(
     "//third_party/gpus:cuda_configure.bzl",
     "auto_configure_fail",
-    "get_cpu_value",
     "find_cuda_define",
     "find_lib",
+    "get_cpu_value",
     "lib_name",
-    "matches_version",
     "make_copy_dir_rule",
     "make_copy_files_rule",
+    "matches_version",
 )
 
 _TENSORRT_INSTALL_PATH = "TENSORRT_INSTALL_PATH"
@@ -30,185 +30,200 @@
 _DEFINE_TENSORRT_SONAME_MINOR = "#define NV_TENSORRT_SONAME_MINOR"
 _DEFINE_TENSORRT_SONAME_PATCH = "#define NV_TENSORRT_SONAME_PATCH"
 
-
 def _headers_exist(repository_ctx, path):
-  """Returns whether all TensorRT header files could be found in 'path'.
+    """Returns whether all TensorRT header files could be found in 'path'.
 
-  Args:
-    repository_ctx: The repository context.
-    path: The TensorRT include path to check.
+    Args:
+      repository_ctx: The repository context.
+      path: The TensorRT include path to check.
 
-  Returns:
-    True if all TensorRT header files can be found in the path.
-  """
-  for h in _TF_TENSORRT_HEADERS:
-    if not repository_ctx.path("%s/%s" % (path, h)).exists:
-      return False
-  return True
-
+    Returns:
+      True if all TensorRT header files can be found in the path.
+    """
+    for h in _TF_TENSORRT_HEADERS:
+        if not repository_ctx.path("%s/%s" % (path, h)).exists:
+            return False
+    return True
 
 def _find_trt_header_dir(repository_ctx, trt_install_path):
-  """Returns the path to the directory containing headers of TensorRT.
+    """Returns the path to the directory containing headers of TensorRT.
 
-  Args:
-    repository_ctx: The repository context.
-    trt_install_path: The TensorRT library install directory.
+    Args:
+      repository_ctx: The repository context.
+      trt_install_path: The TensorRT library install directory.
 
-  Returns:
-    The path of the directory containing the TensorRT header.
-  """
-  if trt_install_path == "/usr/lib/x86_64-linux-gnu":
-    path = "/usr/include/x86_64-linux-gnu"
+    Returns:
+      The path of the directory containing the TensorRT header.
+    """
+    if trt_install_path == "/usr/lib/x86_64-linux-gnu":
+        path = "/usr/include/x86_64-linux-gnu"
+        if _headers_exist(repository_ctx, path):
+            return path
+    if trt_install_path == "/usr/lib/aarch64-linux-gnu":
+        path = "/usr/include/aarch64-linux-gnu"
+        if _headers_exist(repository_ctx, path):
+            return path
+    path = str(repository_ctx.path("%s/../include" % trt_install_path).realpath)
     if _headers_exist(repository_ctx, path):
-      return path
-  if trt_install_path == "/usr/lib/aarch64-linux-gnu":
-    path = "/usr/include/aarch64-linux-gnu"
-    if _headers_exist(repository_ctx, path):
-      return path
-  path = str(repository_ctx.path("%s/../include" % trt_install_path).realpath)
-  if _headers_exist(repository_ctx, path):
-    return path
-  auto_configure_fail(
-      "Cannot find NvInfer.h with TensorRT install path %s" % trt_install_path)
-
+        return path
+    auto_configure_fail(
+        "Cannot find NvInfer.h with TensorRT install path %s" % trt_install_path,
+    )
 
 def _trt_lib_version(repository_ctx, trt_install_path):
-  """Detects the library (e.g. libnvinfer) version of TensorRT.
+    """Detects the library (e.g. libnvinfer) version of TensorRT.
 
-  Args:
-    repository_ctx: The repository context.
-    trt_install_path: The TensorRT library install directory.
+    Args:
+      repository_ctx: The repository context.
+      trt_install_path: The TensorRT library install directory.
 
-  Returns:
-    A string containing the library version of TensorRT.
-  """
-  trt_header_dir = _find_trt_header_dir(repository_ctx, trt_install_path)
-  major_version = find_cuda_define(repository_ctx, trt_header_dir, "NvInfer.h",
-                                   _DEFINE_TENSORRT_SONAME_MAJOR)
-  minor_version = find_cuda_define(repository_ctx, trt_header_dir, "NvInfer.h",
-                                   _DEFINE_TENSORRT_SONAME_MINOR)
-  patch_version = find_cuda_define(repository_ctx, trt_header_dir, "NvInfer.h",
-                                   _DEFINE_TENSORRT_SONAME_PATCH)
-  full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
-  environ_version = repository_ctx.os.environ[_TF_TENSORRT_VERSION].strip()
-  if not matches_version(environ_version, full_version):
-    auto_configure_fail(
-        ("TensorRT library version detected from %s/%s (%s) does not match " +
-         "TF_TENSORRT_VERSION (%s). To fix this rerun configure again.") %
-        (trt_header_dir, "NvInfer.h", full_version, environ_version))
-  # Only use the major version to match the SONAME of the library.
-  return major_version
+    Returns:
+      A string containing the library version of TensorRT.
+    """
+    trt_header_dir = _find_trt_header_dir(repository_ctx, trt_install_path)
+    major_version = find_cuda_define(
+        repository_ctx,
+        trt_header_dir,
+        "NvInfer.h",
+        _DEFINE_TENSORRT_SONAME_MAJOR,
+    )
+    minor_version = find_cuda_define(
+        repository_ctx,
+        trt_header_dir,
+        "NvInfer.h",
+        _DEFINE_TENSORRT_SONAME_MINOR,
+    )
+    patch_version = find_cuda_define(
+        repository_ctx,
+        trt_header_dir,
+        "NvInfer.h",
+        _DEFINE_TENSORRT_SONAME_PATCH,
+    )
+    full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
+    environ_version = repository_ctx.os.environ[_TF_TENSORRT_VERSION].strip()
+    if not matches_version(environ_version, full_version):
+        auto_configure_fail(
+            ("TensorRT library version detected from %s/%s (%s) does not match " +
+             "TF_TENSORRT_VERSION (%s). To fix this rerun configure again.") %
+            (trt_header_dir, "NvInfer.h", full_version, environ_version),
+        )
 
+    # Only use the major version to match the SONAME of the library.
+    return major_version
 
 def _find_trt_libs(repository_ctx, cpu_value, trt_install_path, trt_lib_version):
-  """Finds the given TensorRT library on the system.
+    """Finds the given TensorRT library on the system.
 
-  Adapted from code contributed by Sami Kama (https://github.com/samikama).
+    Adapted from code contributed by Sami Kama (https://github.com/samikama).
 
-  Args:
-    repository_ctx: The repository context.
-    trt_install_path: The TensorRT library installation directory.
-    trt_lib_version: The version of TensorRT library files as returned
-      by _trt_lib_version.
+    Args:
+      repository_ctx: The repository context.
+      trt_install_path: The TensorRT library installation directory.
+      trt_lib_version: The version of TensorRT library files as returned
+        by _trt_lib_version.
 
-  Returns:
-    The path to the library.
-  """
-  result = {}
-  for lib in _TF_TENSORRT_LIBS:
-    file_name = lib_name("nvinfer", cpu_value, trt_lib_version)
-    path = find_lib(repository_ctx, ["%s/%s" % (trt_install_path, file_name)])
-    result[file_name] = path
-  return result
-
+    Returns:
+      The path to the library.
+    """
+    result = {}
+    for lib in _TF_TENSORRT_LIBS:
+        file_name = lib_name("nvinfer", cpu_value, trt_lib_version)
+        path = find_lib(repository_ctx, ["%s/%s" % (trt_install_path, file_name)])
+        result[file_name] = path
+    return result
 
 def _tpl(repository_ctx, tpl, substitutions):
-  repository_ctx.template(tpl, Label("//third_party/tensorrt:%s.tpl" % tpl),
-                          substitutions)
-
+    repository_ctx.template(
+        tpl,
+        Label("//third_party/tensorrt:%s.tpl" % tpl),
+        substitutions,
+    )
 
 def _create_dummy_repository(repository_ctx):
-  """Create a dummy TensorRT repository."""
-  _tpl(repository_ctx, "build_defs.bzl", {"%{if_tensorrt}": "if_false"})
+    """Create a dummy TensorRT repository."""
+    _tpl(repository_ctx, "build_defs.bzl", {"%{if_tensorrt}": "if_false"})
 
-  _tpl(repository_ctx, "BUILD", {
-      "%{tensorrt_genrules}": "",
-      "%{tensorrt_headers}": "[]",
-      "%{tensorrt_libs}": "[]"
-  })
+    _tpl(repository_ctx, "BUILD", {
+        "%{tensorrt_genrules}": "",
+        "%{tensorrt_headers}": "[]",
+        "%{tensorrt_libs}": "[]",
+    })
 
 def _tensorrt_configure_impl(repository_ctx):
-  """Implementation of the tensorrt_configure repository rule."""
-  if _TF_TENSORRT_CONFIG_REPO in repository_ctx.os.environ:
-    # Forward to the pre-configured remote repository.
-    remote_config_repo = repository_ctx.os.environ[_TF_TENSORRT_CONFIG_REPO]
-    repository_ctx.template("BUILD", Label(remote_config_repo + ":BUILD"), {})
-    repository_ctx.template(
-        "build_defs.bzl",
-        Label(remote_config_repo + ":build_defs.bzl"),
-        {},
+    """Implementation of the tensorrt_configure repository rule."""
+    if _TF_TENSORRT_CONFIG_REPO in repository_ctx.os.environ:
+        # Forward to the pre-configured remote repository.
+        remote_config_repo = repository_ctx.os.environ[_TF_TENSORRT_CONFIG_REPO]
+        repository_ctx.template("BUILD", Label(remote_config_repo + ":BUILD"), {})
+        repository_ctx.template(
+            "build_defs.bzl",
+            Label(remote_config_repo + ":build_defs.bzl"),
+            {},
+        )
+        return
+
+    if _TENSORRT_INSTALL_PATH not in repository_ctx.os.environ:
+        _create_dummy_repository(repository_ctx)
+        return
+
+    cpu_value = get_cpu_value(repository_ctx)
+    if (cpu_value != "Linux"):
+        auto_configure_fail("TensorRT is supported only on Linux.")
+    if _TF_TENSORRT_VERSION not in repository_ctx.os.environ:
+        auto_configure_fail("TensorRT library (libnvinfer) version is not set.")
+    trt_install_path = repository_ctx.os.environ[_TENSORRT_INSTALL_PATH].strip()
+    if not repository_ctx.path(trt_install_path).exists:
+        auto_configure_fail(
+            "Cannot find TensorRT install path %s." % trt_install_path,
+        )
+
+    # Copy the library files.
+    trt_lib_version = _trt_lib_version(repository_ctx, trt_install_path)
+    trt_libs = _find_trt_libs(repository_ctx, cpu_value, trt_install_path, trt_lib_version)
+    trt_lib_srcs = []
+    trt_lib_outs = []
+    for path in trt_libs.values():
+        trt_lib_srcs.append(str(path))
+        trt_lib_outs.append("tensorrt/lib/" + path.basename)
+    copy_rules = [make_copy_files_rule(
+        repository_ctx,
+        name = "tensorrt_lib",
+        srcs = trt_lib_srcs,
+        outs = trt_lib_outs,
+    )]
+
+    # Copy the header files header files.
+    trt_header_dir = _find_trt_header_dir(repository_ctx, trt_install_path)
+    trt_header_srcs = [
+        "%s/%s" % (trt_header_dir, header)
+        for header in _TF_TENSORRT_HEADERS
+    ]
+    trt_header_outs = [
+        "tensorrt/include/" + header
+        for header in _TF_TENSORRT_HEADERS
+    ]
+    copy_rules.append(
+        make_copy_files_rule(
+            repository_ctx,
+            name = "tensorrt_include",
+            srcs = trt_header_srcs,
+            outs = trt_header_outs,
+        ),
     )
-    return
 
-  if _TENSORRT_INSTALL_PATH not in repository_ctx.os.environ:
-    _create_dummy_repository(repository_ctx)
-    return
+    # Set up config file.
+    _tpl(repository_ctx, "build_defs.bzl", {"%{if_tensorrt}": "if_true"})
 
-  cpu_value = get_cpu_value(repository_ctx)
-  if (cpu_value != "Linux"):
-    auto_configure_fail("TensorRT is supported only on Linux.")
-  if _TF_TENSORRT_VERSION not in repository_ctx.os.environ:
-    auto_configure_fail("TensorRT library (libnvinfer) version is not set.")
-  trt_install_path = repository_ctx.os.environ[_TENSORRT_INSTALL_PATH].strip()
-  if not repository_ctx.path(trt_install_path).exists:
-    auto_configure_fail(
-        "Cannot find TensorRT install path %s." % trt_install_path)
-
-  # Copy the library files.
-  trt_lib_version = _trt_lib_version(repository_ctx, trt_install_path)
-  trt_libs = _find_trt_libs(repository_ctx, cpu_value, trt_install_path, trt_lib_version)
-  trt_lib_srcs = []
-  trt_lib_outs = []
-  for path in trt_libs.values():
-    trt_lib_srcs.append(str(path))
-    trt_lib_outs.append("tensorrt/lib/" + path.basename)
-  copy_rules = [make_copy_files_rule(
-      repository_ctx,
-      name = "tensorrt_lib",
-      srcs = trt_lib_srcs,
-      outs = trt_lib_outs,
-  )]
-
-  # Copy the header files header files.
-  trt_header_dir = _find_trt_header_dir(repository_ctx, trt_install_path)
-  trt_header_srcs = [
-      "%s/%s" % (trt_header_dir, header) for header in _TF_TENSORRT_HEADERS
-  ]
-  trt_header_outs = [
-      "tensorrt/include/" + header for header in _TF_TENSORRT_HEADERS
-  ]
-  copy_rules.append(
-      make_copy_files_rule(
-          repository_ctx,
-          name = "tensorrt_include",
-          srcs = trt_header_srcs,
-          outs = trt_header_outs,
-  ))
-
-  # Set up config file.
-  _tpl(repository_ctx, "build_defs.bzl", {"%{if_tensorrt}": "if_true"})
-
-  # Set up BUILD file.
-  _tpl(repository_ctx, "BUILD", {
-      "%{copy_rules}": "\n".join(copy_rules),
-      "%{tensorrt_headers}": '":tensorrt_include"',
-      "%{tensorrt_libs}": str(trt_lib_outs),
-  })
-
+    # Set up BUILD file.
+    _tpl(repository_ctx, "BUILD", {
+        "%{copy_rules}": "\n".join(copy_rules),
+        "%{tensorrt_headers}": '":tensorrt_include"',
+        "%{tensorrt_libs}": str(trt_lib_outs),
+    })
 
 tensorrt_configure = repository_rule(
-    implementation=_tensorrt_configure_impl,
-    environ=[
+    implementation = _tensorrt_configure_impl,
+    environ = [
         _TENSORRT_INSTALL_PATH,
         _TF_TENSORRT_VERSION,
     ],

diff --git a/third_party/toolchains/clang6/repo.bzl b/third_party/toolchains/clang6/repo.bzl
index b81f445..e4b6422 100644
--- a/third_party/toolchains/clang6/repo.bzl
+++ b/third_party/toolchains/clang6/repo.bzl

@@ -1,30 +1,37 @@
 """Repository rule for Debian 8 Jessie Clang-6.0 portable Linux builds."""
 
 def _clang6_configure(ctx):
-  # TODO(jart): It'd probably be better to use Bazel's struct.to_proto()
-  #             method to generate a gigantic CROSSTOOL file that allows
-  #             Clang to support everything.
-  ctx.symlink(
-      ctx.os.environ.get('TF_LLVM_PATH',
-                         '/usr/lib/llvm-6.0'),
-      'clang6/llvm')
-  ctx.symlink(
-      ctx.os.environ.get('STRIP', '/usr/bin/strip'),
-      'clang6/sbin/strip')
-  ctx.symlink(
-      ctx.os.environ.get('OBJDUMP', '/usr/bin/objdump'),
-      'clang6/sbin/objdump')
-  ctx.symlink(ctx.attr._build, 'clang6/BUILD')
-  ctx.template('clang6/CROSSTOOL', ctx.attr._crosstool, {
-      '%package(@local_config_clang6//clang6)%': str(ctx.path('clang6')),
-  })
+    # TODO(jart): It'd probably be better to use Bazel's struct.to_proto()
+    #             method to generate a gigantic CROSSTOOL file that allows
+    #             Clang to support everything.
+    ctx.symlink(
+        ctx.os.environ.get(
+            "TF_LLVM_PATH",
+            "/usr/lib/llvm-6.0",
+        ),
+        "clang6/llvm",
+    )
+    ctx.symlink(
+        ctx.os.environ.get("STRIP", "/usr/bin/strip"),
+        "clang6/sbin/strip",
+    )
+    ctx.symlink(
+        ctx.os.environ.get("OBJDUMP", "/usr/bin/objdump"),
+        "clang6/sbin/objdump",
+    )
+    ctx.symlink(ctx.attr._build, "clang6/BUILD")
+    ctx.template("clang6/CROSSTOOL", ctx.attr._crosstool, {
+        "%package(@local_config_clang6//clang6)%": str(ctx.path("clang6")),
+    })
 
 clang6_configure = repository_rule(
     implementation = _clang6_configure,
     attrs = {
-        '_build': attr.label(
-            default=str(Label('//third_party/toolchains/clang6:clang.BUILD'))),
-        '_crosstool': attr.label(
-            default=str(Label('//third_party/toolchains/clang6:CROSSTOOL.tpl'))),
+        "_build": attr.label(
+            default = str(Label("//third_party/toolchains/clang6:clang.BUILD")),
+        ),
+        "_crosstool": attr.label(
+            default = str(Label("//third_party/toolchains/clang6:CROSSTOOL.tpl")),
+        ),
     },
 )

diff --git a/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl b/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl
index ab6eac1..d675e95 100644
--- a/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl
+++ b/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl

@@ -1,38 +1,38 @@
 # -*- Python -*-
 """Repository rule for arm compiler autoconfiguration."""
 
-def _tpl(repository_ctx, tpl, substitutions={}, out=None):
-  if not out:
-    out = tpl
-  repository_ctx.template(
-      out,
-      Label("//third_party/toolchains/cpus/arm:%s.tpl" % tpl),
-      substitutions)
-
+def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
+    if not out:
+        out = tpl
+    repository_ctx.template(
+        out,
+        Label("//third_party/toolchains/cpus/arm:%s.tpl" % tpl),
+        substitutions,
+    )
 
 def _arm_compiler_configure_impl(repository_ctx):
-  # We need to find a cross-compilation include directory for Python, so look
-  # for an environment variable. Be warned, this crosstool template is only
-  # regenerated on the first run of Bazel, so if you change the variable after
-  # it may not be reflected in later builds. Doing a shutdown and clean of Bazel
-  # doesn't fix this, you'll need to delete the generated file at something like:
-  # external/local_config_arm_compiler/CROSSTOOL in your Bazel install.
-  if "CROSSTOOL_PYTHON_INCLUDE_PATH" in repository_ctx.os.environ:
-    python_include_path = repository_ctx.os.environ["CROSSTOOL_PYTHON_INCLUDE_PATH"]
-  else:
-    python_include_path = "/usr/include/python2.7"
-  _tpl(repository_ctx, "CROSSTOOL", {
-      "%{ARM_COMPILER_PATH}%": str(repository_ctx.path(
-          repository_ctx.attr.remote_config_repo)),
-      "%{PYTHON_INCLUDE_PATH}%": python_include_path,
-  })
-  repository_ctx.symlink(repository_ctx.attr.build_file, "BUILD")
-
+    # We need to find a cross-compilation include directory for Python, so look
+    # for an environment variable. Be warned, this crosstool template is only
+    # regenerated on the first run of Bazel, so if you change the variable after
+    # it may not be reflected in later builds. Doing a shutdown and clean of Bazel
+    # doesn't fix this, you'll need to delete the generated file at something like:
+    # external/local_config_arm_compiler/CROSSTOOL in your Bazel install.
+    if "CROSSTOOL_PYTHON_INCLUDE_PATH" in repository_ctx.os.environ:
+        python_include_path = repository_ctx.os.environ["CROSSTOOL_PYTHON_INCLUDE_PATH"]
+    else:
+        python_include_path = "/usr/include/python2.7"
+    _tpl(repository_ctx, "CROSSTOOL", {
+        "%{ARM_COMPILER_PATH}%": str(repository_ctx.path(
+            repository_ctx.attr.remote_config_repo,
+        )),
+        "%{PYTHON_INCLUDE_PATH}%": python_include_path,
+    })
+    repository_ctx.symlink(repository_ctx.attr.build_file, "BUILD")
 
 arm_compiler_configure = repository_rule(
     implementation = _arm_compiler_configure_impl,
     attrs = {
-        "remote_config_repo": attr.string(mandatory = False, default =""),
+        "remote_config_repo": attr.string(mandatory = False, default = ""),
         "build_file": attr.label(),
     },
 )

diff --git a/third_party/toolchains/preconfig/generate/archives.bzl b/third_party/toolchains/preconfig/generate/archives.bzl
index bafc7d4..a26d2c6 100644
--- a/third_party/toolchains/preconfig/generate/archives.bzl
+++ b/third_party/toolchains/preconfig/generate/archives.bzl

@@ -1,13 +1,12 @@
-load("//tensorflow:version_check.bzl", "parse_bazel_version")
 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 
 def bazel_toolchains_archive():
     http_archive(
-      name = "bazel_toolchains",
-      sha256 = "ee854b5de299138c1f4a2edb5573d22b21d975acfc7aa938f36d30b49ef97498",
-      strip_prefix = "bazel-toolchains-37419a124bdb9af2fec5b99a973d359b6b899b61",
-      urls = [
-        "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/37419a124bdb9af2fec5b99a973d359b6b899b61.tar.gz",
-        "https://github.com/bazelbuild/bazel-toolchains/archive/37419a124bdb9af2fec5b99a973d359b6b899b61.tar.gz",
-      ],
+        name = "bazel_toolchains",
+        sha256 = "109a99384f9d08f9e75136d218ebaebc68cc810c56897aea2224c57932052d30",
+        strip_prefix = "bazel-toolchains-94d31935a2c94fe7e7c7379a0f3393e181928ff7",
+        urls = [
+            "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/94d31935a2c94fe7e7c7379a0f3393e181928ff7.tar.gz",
+            "https://github.com/bazelbuild/bazel-toolchains/archive/94d31935a2c94fe7e7c7379a0f3393e181928ff7.tar.gz",
+        ],
     )

diff --git a/third_party/toolchains/preconfig/generate/generate.bzl b/third_party/toolchains/preconfig/generate/generate.bzl
index 40e0957..475db0f 100644
--- a/third_party/toolchains/preconfig/generate/generate.bzl
+++ b/third_party/toolchains/preconfig/generate/generate.bzl

@@ -27,6 +27,7 @@
 
     if cuda_version != None:
         base = "@cuda%s-cudnn%s-ubuntu14.04//image" % (cuda_version, cudnn_version)
+
         # The cuda toolchain currently contains its own C++ toolchain definition,
         # so we do not fetch local_config_cc.
         config_repos = [
@@ -42,7 +43,7 @@
             "TF_CUDNN_VERSION": cudnn_version,
             "TF_CUDA_VERSION": cuda_version,
             "CUDNN_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu",
-            "TF_NEED_TENSORRT" : "1",
+            "TF_NEED_TENSORRT": "1",
             "TF_TENSORRT_VERSION": tensorrt_version,
             "TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu",
             "GCC_HOST_COMPILER_PATH": compiler if compiler != "clang" else "",

diff --git a/third_party/toolchains/preconfig/generate/workspace.bzl b/third_party/toolchains/preconfig/generate/workspace.bzl
index 0495173..bce2d5b 100644
--- a/third_party/toolchains/preconfig/generate/workspace.bzl
+++ b/third_party/toolchains/preconfig/generate/workspace.bzl

@@ -1,7 +1,10 @@
 load(
+    "@io_bazel_rules_docker//repositories:repositories.bzl",
+    container_repositories = "repositories",
+)
+load(
     "@io_bazel_rules_docker//container:container.bzl",
     "container_pull",
-    container_repositories = "repositories",
 )
 load(":containers.bzl", "container_digests")
commit	a7015d8e1fe268c8a8b73e16b9e7540acc01105c	[log] [tgz]
author	TensorFlower Gardener <gardener@tensorflow.org>	Wed Feb 20 15:11:41 2019 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	Wed Feb 20 15:11:41 2019 -0800
tree	84abeeb5d123493fe1b8a75fda28882bc195db54
parent	95e1847a2e58f8e6174772a8b52f284ea58d01cd [diff]
parent	9c7866507772f14be3a1bcd07fe1af4bd68682ef [diff]